Python
April 20, 2015
Python MARC record cleaner
# -*- coding: UTF-8 -*- from pymarc import MARCReader from pymarc import Record, Field #from abc import ABCMeta, abstractmethod import re, string, os, json import sys, time from datetime import date """ Class for printing out colours on the CLI """ class bcolors: HEADER = '\033[95m' OKBLUE = '\033[94m' OKGREEN = '\033[92m' WARNING = '\033[93m' FAIL = '\033[91m' ENDC = '\033[0m' BOLD = '\033[1m' UNDERLINE = '\033[4m' """ Class for when we don't want colours (monkeys with 'more' use and puts garbage into files) """ class nocolors: HEADER = '' OKBLUE = '' OKGREEN = '' WARNING = '' FAIL = '' ENDC = '' BOLD = '' UNDERLINE = '' class ChangeLogger: """ Class for delegating logging functions to. Returns the log as JSON. """ def __init__(self): try: testit = "%s" % self.list except: self.set_list() # Set up the log list def set_list(self): self.list = {} # Main function to add to the log def log(self, leader, field, message): if leader in self.list: blah = 1 else: self.list[leader] = {} try: self.list[leader][field].append(message) except: self.list[leader][field] = [] self.list[leader][field].append(message) # Function to return the log as JSON def get_log(self, leader): return json.dumps(self.list[leader]) class MARCParser(): """ Generic (maybe later abstract) class with main MARC parser functions. """ def __init__(self): """ Instantiate the logger delegate object our colour writer and the deletions array. """ self.logger = ChangeLogger() self.bcolors = nocolors() self.deletions = ['520','533','655','776','505','500','588','590','980'] self.additions = [{'tag' : "909", 'value' : "Coutts MARC record for shelf-ready print. Cleaned PH %s" % date.today().isoformat()}] self.sub_deletions = [{'tag' : '245', 'sub' : 'h'}, {'tag' : '490', 'sub' : 'x'}] self.format = "print" def process_record(self, record): """ Function for processing each record. This should be moved to the concrete classes only (later). """ # Important or printing using as_marc() otherwise you getting an encoding error record.force_utf8 = 1 #title = record['245']['a'].strip(' \r\n\t\/\:') if self.format == "print": self.deletions.append('856') # Print the leader for comparison *before* we amend it. #print self.bcolors.HEADER + "%s" % title + self.bcolors.ENDC print self.bcolors.HEADER + "%s" % record.leader + self.bcolors.ENDC # Amend the leaders for additions record.leader = string.replace(record.leader, 'nam', 'nad') record.leader = string.replace(record.leader, 'cam', 'nad') self.log(record, 'LDR', "Replaced cam/nam with nad in the Leader") # Print out the record before we start to amend it using the yellow WARNING colour: for field in record.get_fields(): if not field == None: print self.bcolors.WARNING + "%s" % field + self.bcolors.ENDC # Tidy the call number in 082 to remove any separateor characters ' / record = self.clean_call_number(record) # Fix any editions entries and turn into an ordinal (e.g. 2nd) if necessary. record = self.fix_editions(record) # Remove any non-LCSH subject headings based on the value in the second sub-field indicator. for i in ['1','2','3','4','5','6','7']: record = self.nuke_field_based_on_indicator(record, '650', 2, i) # Remove copyright statement in 264 where the second indicator is a 4 record = self.nuke_field_based_on_indicator(record, '264', 2, '4') # Add any additional fields as defined in our hash set in the __init__ function for i in self.additions: record = self.add_simple_field(record, i['tag'], i['value']) # Delete those sub-fields defined in our sub_deletions array. See __init__ for i in self.sub_deletions: record = self.delete_subfield(record, i['tag'], i['sub']) # Delete those whole fields defined in our deletions array. See __init__ for deli in self.deletions: record = self.delete_field_by_tag(record, deli) # If we have an eBook set, process the 856s if self.format == 'ebooks': record = self.set_link_text(record) record = self.add_ezproxy(record) # Move any 440s into 490s record = self.switch_tag(record, '440', '490') print " " # Print out the processed record in white print self.bcolors.HEADER + "%s" % record.leader + self.bcolors.ENDC for field in record.get_fields(): if not field == None: print "%s" % field print " " # Print out the log print self.bcolors.OKGREEN + self.logger.get_log(record.leader) + self.bcolors.ENDC print " " # Priint out the raw MARC print self.bcolors.OKBLUE + record.as_marc() + self.bcolors.ENDC return record def switch_tag(self, record, old, new): try: fields = record.get_fields(old) except: return record for field in fields: field.tag = new if(len(fields) > 0): self.log(record, old, "Switched contents of %s to %s." % (old, new)) return record def add_ezproxy(self, record): """ Add the EZProxy prefix to the URL in 856 $u """ try: fields = record.get_fields('856') url = record['856']['u'] except: return record record = self.delete_subfield(record, '856', 'u') for field in fields: field.subfields.append('u') field.subfields.append("http://ezproxy.lib.le.ac.uk/login?url=%s" % url) self.log(record, '856', "Prefixed URL with EZPRoxy.") return record def set_link_text(self, record): """ Add proper link text for hyperlink into 856 $z """ record = self.delete_subfield(record, '856', 'z') fields = record.get_fields('856') title = record['245']['a'].strip(' \r\n\t\/\:') for field in fields: # Override field.indicators = ['4','0'] field.subfields.append('z') field.subfields.append("Access the eBook \"%s\"" % title) self.log(record, '856', "Set the link text in 856 $z: Access the eBook \"%s\"" % title) return record def add_simple_field(self, record, tagl, value): """ Add a new field with just a $a subfield and no indicators """ self.log(record, tagl, "Added new field %s with value of: %s" % (tagl, value)) subs = ['a', value] record.add_field(Field( tag = tagl, indicators = [' ', ' '], subfields = subs, )) return record def delete_subfield(self, record, tagl, del_sub): """ Turn the array into a hash so we can nuke by key. Better (dust) better. Yeah. """ self.log(record, tagl, "Attempting to remove %s subfields from %s" % (del_sub, tagl)) final = [] for field in record.get_fields(tagl): hash = self.subfields_to_dict(field.subfields) for key in sorted(hash.keys()): if not key == del_sub: final.append(key) final.append(hash[key]) #dicators = field.indicators #record.remove_field(field) """record.add_field(Field( tag = tagl, indicators = dicators, subfields = final, ))""" field.subfields = final return record def subfields_to_dict(self, l): return dict(zip(l[::2], l[1::2])) def delete_field_by_tag(self, record, tag): """ Pass the record and a tag and delete to field from the record """ try: fields = record.get_fields(tag) except: self.log(record, tag, "Tried deleting field(s) %s. None found." % tag) return record for field in fields: self.log(record, tag, "Deleted field(s) %s: %s" % (tag, field)) record.remove_field(field) return record def clean_call_number(self, record): """ Strip separators such as ' and / from the call number in the 082 field """ try: field = record['082'] except: return record try: subs = field.subfields except: return record #print "%s" % subs for strip in ["'","/"]: if strip in subs[1]: self.log(record, '082', "Found and stripped %s in 082" % strip) subs[1] = string.replace(subs[1], strip, "") return record def get_ordinal(self, value): try: value = int(value) except ValueError: return value if value % 100//10 != 1: if value % 10 == 1: ordval = u"%d%s" % (value, "st") elif value % 10 == 2: ordval = u"%d%s" % (value, "nd") elif value % 10 == 3: ordval = u"%d%s" % (value, "rd") else: ordval = u"%d%s" % (value, "th") else: ordval = u"%d%s" % (value, "th") return ordval def fix_editions(self, record): if not record['250']: self.log(record, '250', "Attempting to fix 250 but no 250 found.") return record pipe_a = record['250']['a'] subs = record['250'].subfields self.log(record, '250', "Attempting to fix 250 value to ordinal: %s" % pipe_a) subs[1] = string.replace(subs[1], pipe_a, self.get_ordinal(pipe_a)) return record def print_fields_by_tag(self, record, tag): for fields in record.get_fields(tag): print self.bcolors.WARNING + "%s" % fields + self.bcolors.ENDC def nuke_field_based_on_indicator(self, record, tag, indicator, value): """ Given an indicator (1st or 2nd), nuke the field if it has the value in that position. i.e. 650 with a second indicator of 7 (MESH heading) """ tag_list = record.get_fields(tag) final_list = [] for field in tag_list: ouch = 0 indicators = field.indicators if indicator == 2: try: val = indicators[1] except: ouch = 1 if indicator == 1: try: val = indicators[0] except: ouch = 1 if ouch == 0: if str(val) != str(value): final_list.append(field) else: message = "Removed field %s as %s was present in indicator %s - %s" % (tag, value, indicator, field) self.log(record, tag, message) else: message = "Removed field %s as %s was present in indicator %s - %S" % (tag, value, indicator, field) self.log(record, tag, message) final_list.append(field) record.remove_field(field) for field in final_list: record.add_field(field) return record def log(self, record, field, message): """ Function which hands off to logger delegate object """ self.logger.log(record.leader, field, message) def parse_run(self): counter = 0 if len(sys.argv) > 2: if (sys.argv[2] == "--colour") or (sys.argv[2] == "--color") or (sys.argv[2] == "-c"): self.bcolors = bcolors() if (sys.argv[2] == "--print") or (sys.argv[2] == "-p"): self.format = "print" if (sys.argv[2] == "--ebooks") or (sys.argv[2] == "-e"): self.format = "ebooks" if len(sys.argv) > 3: if (sys.argv[3] == "--colour") or (sys.argv[2] == "--color") or (sys.argv[3] == "-c"): self.bcolors = bcolors() if (sys.argv[3] == "--print") or (sys.argv[2] == "-p"): self.format = "print" if (sys.argv[3] == "--ebooks") or (sys.argv[3] == "-e"): self.format = "ebooks" if len(sys.argv) < 2: print "Usage: python marc-parser.py[-c for colour] [-p for print records] [-e for eBooks]" sys.exit else: try: with open(sys.argv[1], 'rb') as fh: reader = MARCReader(fh) outfile = string.replace(sys.argv[1], '.mrc', ".final.%s.CLI.marc" % self.format) if(os.path.isfile(outfile)): os.remove(outfile) out = open(outfile, 'wb') for record in reader: counter = counter + 1 print " " print self.bcolors.FAIL + "%s" % counter record = self.process_record(record) out.write(record.as_marc()) out.close print "Wrote file:" + self.bcolors.FAIL + " %s" % outfile + self.bcolors.ENDC except IOError as e: print self.bcolors.FAIL + "Unable to open file %s!" % sys.argv[1] + self.bcolors.ENDC if __name__ == "__main__": parser = MARCParser() parser.parse_run()
Posted by pj at 08:52 PM | Comments (0)
September 17, 2011
I love Python
import glob, os, re, string def rename(dir, pattern): for path_filename in glob.glob(os.path.join(dir, pattern)): title, ext = os.path.splitext(os.path.basename(path_filename)) new_title = '''%s''' %(re.sub(r'[\W]+','-',title)) print new_title.lower() os.rename(path_filename, os.path.join(dir, new_title.lower() + ext)) rename(r'/home/phollan2/crc_papers_2011',r'*.pdf')
Posted by pj at 01:28 PM | Comments (0)
September 03, 2011
Cosign with Django
Django | Authentication using REMOTE_USER | Django documentation
Posted by pj at 06:53 PM | Comments (0)
October 09, 2009
PostgreSQL version of do_sql.py
import sys import postgresql from read_config import read_config import time import string import re def parse_insert_sql(sql): response = {} da_list = re.split("[\s\(\)]+", sql.lower().strip()) print(da_list) ma_table = da_list[2] if da_list[0] == 'insert': response['is_insert'] = 1 if "returning %s_id" % (ma_table) in da_list: response['sql'] = sql else: response['sql'] = sql + " returning %s_id as insert_id" % (ma_table) else: response['is_insert'] = 0 response['sql'] = sql return response def do_sql_query(db_config_file, sql, debug): response = {} results = () """ Takes config file name, your SQL and a debug parameter. Returns a dictionary with a boolean, an error and a warning string and a list of results dictionaries. """ connection_map = read_config('%s.ini' % db_config_file, debug) (host, user, passwd, db) = (connection_map['connection_parameters']['host'], connection_map['connection_parameters']['user'], connection_map['connection_parameters']['password'], connection_map['connection_parameters']['db']) connection = postgresql.open("pq://%s:%s@%s/%s" % (user, passwd, host, db)) insert_response = parse_insert_sql(sql) if insert_response['is_insert'] == 1: sql = insert_response['sql'] try: results = connection.prepare(sql) response['results'] = results response['status'] = 1 if insert_response['is_insert'] == 1: response['insert_id'] = results.first() return(response) except: if debug == 1: print ("\nPostgreSQL error: %s\n" % (sql)) response['status'] = 0 response['error'] = "\nPostgreSQL error: %s\n" % (sql) response['results'] = results return(response) if __name__ == "__main__": response = do_sql_query('waf_common', "insert into stuff(stuff_id, nonsense) values(nextval('stuff_seq'::regclass), 'Glug')", 0) print (response['insert_id']) response = do_sql_query('waf_common', "select * from staff limit 10", 0) print (response['results'].first()) for record in response['results']: for column in record.column_names: print("%s : %s" % (column, record[column])) print("\n ----------- \n")
Posted by pj at 05:00 PM | Comments (0)
March 29, 2009
Getting MySQL-python-1.2.2 to work with XAMPP
I'm trying to get the Python MySQLdb library to talk to my XAMMP MySQL. Here's how:
1. Before you build the db adaptor, change the site.cfg
file to point to XAMMP's mysql_config
# The path to mysql_config. # Only use this if mysql_config is not on your PATH, or you have some weird # setup that requires it. mysql_config = /Applications/xampp/xamppfiles/bin/mysql_config
2. Link the dylib
cp /Applications/xampp/xamppfiles/lib/mysql/libmysqlclient.15.dylib /usr/local/mysql/lib/mysql/libmysqlclient_r.15.dylib mkdir /Applications/xampp/xamppfiles/include ln -s /usr/local/mysql-5.1.32-osx10.5-powerpc/include /Applications/xampp/xamppfiles/include/mysql
3. You have to point your script to the localhost
using the machines acutally IP address as if it was a remote server.
Posted by pj at 11:59 AM | Comments (0)
CherryPy HTMLTemplate
HTMLTemplate - CherryPy Tools - Trac
Posted by pj at 02:15 AM | Comments (0)
March 26, 2009
Python script for producing svn diffs
import os my_file = open('changed_files.txt','r') lines = my_file.readlines() for line in lines: plode = line.strip() els = plode.split('/') os.popen("svn diff -r 455 " + line.strip() + " > " + "_".join(els) + ".diff") print "svn diff -r 455 " + line.strip() + " > " + els[-1] + ".diff"
Posted by pj at 09:31 PM | Comments (0)
March 24, 2009
CherryPy does ZPT
ChoosingATemplatingLanguage - CherryPy - Trac
Posted by pj at 11:14 AM | Comments (0)
April 15, 2008
Python script for getting changed files
The following Python script gets you a list of files changed between two subversion revisions:
import sys import os import re lines = [] for counter in range(int(sys.argv[1]) - 1, int(sys.argv[2]) + 1): lines.append(os.popen('/usr/local/bin/svn log -vv -r ' + str(counter))); tally = {} for results in lines: for line in results: reg = re.compile("svn-repository") if reg.search(line): tally[line] = 1 final = tally.keys() final.sort() print " " + " ".join(final)
Posted by pj at 05:06 PM | Comments (0)
December 14, 2007
pysvn Programmer's Guide
pysvn: pysvn Programmer's Guide
Posted by pj at 09:03 PM | Comments (0)
August 11, 2006
More about the Python in PHP project
Posted by pj at 10:06 AM | Comments (0)
Python interpreter embedded in PHP
Posted by pj at 10:03 AM | Comments (0)
August 03, 2006
Parsing mutliple date formats in Python
try: date_time = datetime.datetime(*time.strptime(this_val, "%d/%m/%Y")[0:5]) except: pass try: date_time = datetime.datetime(*time.strptime(this_val, "%B %Y")[0:5]) except: pass try: date_time = datetime.datetime(*time.strptime(this_val, "%Y")[0:5]) except: pass print "<!--" + str(date_time) + "-->"
Posted by pj at 02:58 PM | Comments (0)
Very cunning Python based Universal Feed Parser
Posted by pj at 12:51 PM | Comments (0)
May 22, 2006
Notes for using Python urllib2
Posted by pj at 12:43 PM | Comments (0)
January 16, 2006
Python charting interface
PyGDChart2 - http://www.nullcube.com/software/pygdchart2/doc/index.html
Posted by pj at 08:32 PM
November 16, 2005
Parsing dates in Python
Posted by pj at 03:42 PM
September 14, 2005
Parsing binary data with Python
4.3 struct -- Interpret strings as packed binary data
Another alternative library is also available:
- http://www.nightmare.com/software.html
Posted by pj at 11:49 AM
August 09, 2005
mysql_robot2.py site indexing script
I've uploaded a copy of my mysql_robot2.py site indexing script for safe keeping.
And here's the DB structure:
Posted by pj at 10:40 AM
July 17, 2005
Yet another Python Web Framework
Django | The Web framework for perfectionists with deadlines
Posted by pj at 02:38 PM
May 27, 2005
A web crawler written in Python
http://www.newton.cx/~peter/software/crawler.py
Posted by pj at 01:20 PM
Example of how to go from Unicode to HTML
Unicode to HTML - tiddly-pom.com
Posted by pj at 12:54 PM
Information about the Python urlparse module
The urlparse module ::: The Standard Python Library (2005) ::: www.effbot.org
Posted by pj at 12:46 PM
A Python based HTML parser which handles tag soup and does tidying automaticamente
Beautiful Soup: We called him Tortoise because he taught us.
Posted by pj at 11:59 AM
May 26, 2005
Rare urllib2 example for handling response codes
ASPN : Python Cookbook : urllib2 for actions depending on http response codes
Posted by pj at 12:11 PM
May 23, 2005
Handling Unicode encoding in XML with Python
Posted by pj at 10:02 PM
April 20, 2005
Testing counters to see if they are odd or even
The following is a simple Python script to test whether a number is odd or even:
test_figure = float(test_figure) half_of_it = float(test_figure/2) #print str(half_of_it) if int(half_of_it) == float(half_of_it): return 'even' else: return 'odd'
It relies on the fact that casting a floating point number to an integer will round it up or down, and that dividing an odd number by two always gives a float, whereas dividing an even number gives an integer.
Posted by pj at 11:14 AM
February 15, 2005
Running JavaScript in Python
But why?
Posted by pj at 02:43 PM
October 30, 2004
Blogging iTunes playlists - RSS feeds
Found this posting by Kimbro Staken about a Python script he's written which posts his iTunes playlist to his blog using in-line AppleScript and the MT XML-RPC.
< http://www.xmldatabases.org/movabletype/archives/000159.html >
Should be straight forward to retask it to post 'now playing' titles too?
Posted by pj at 10:58 AM