Python
April 20, 2015
Python MARC record cleaner
# -*- coding: UTF-8 -*-
from pymarc import MARCReader
from pymarc import Record, Field
#from abc import ABCMeta, abstractmethod
import re, string, os, json
import sys, time
from datetime import date
"""
Class for printing out colours on the CLI
"""
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
"""
Class for when we don't want colours (monkeys with 'more' use and puts garbage into files)
"""
class nocolors:
HEADER = ''
OKBLUE = ''
OKGREEN = ''
WARNING = ''
FAIL = ''
ENDC = ''
BOLD = ''
UNDERLINE = ''
class ChangeLogger:
"""
Class for delegating logging functions to.
Returns the log as JSON.
"""
def __init__(self):
try:
testit = "%s" % self.list
except:
self.set_list()
# Set up the log list
def set_list(self):
self.list = {}
# Main function to add to the log
def log(self, leader, field, message):
if leader in self.list:
blah = 1
else:
self.list[leader] = {}
try:
self.list[leader][field].append(message)
except:
self.list[leader][field] = []
self.list[leader][field].append(message)
# Function to return the log as JSON
def get_log(self, leader):
return json.dumps(self.list[leader])
class MARCParser():
"""
Generic (maybe later abstract) class with main MARC parser functions.
"""
def __init__(self):
"""
Instantiate the logger delegate object our colour writer and the deletions array.
"""
self.logger = ChangeLogger()
self.bcolors = nocolors()
self.deletions = ['520','533','655','776','505','500','588','590','980']
self.additions = [{'tag' : "909", 'value' : "Coutts MARC record for shelf-ready print. Cleaned PH %s" % date.today().isoformat()}]
self.sub_deletions = [{'tag' : '245', 'sub' : 'h'}, {'tag' : '490', 'sub' : 'x'}]
self.format = "print"
def process_record(self, record):
"""
Function for processing each record. This should be moved to the concrete classes only (later).
"""
# Important or printing using as_marc() otherwise you getting an encoding error
record.force_utf8 = 1
#title = record['245']['a'].strip(' \r\n\t\/\:')
if self.format == "print": self.deletions.append('856')
# Print the leader for comparison *before* we amend it.
#print self.bcolors.HEADER + "%s" % title + self.bcolors.ENDC
print self.bcolors.HEADER + "%s" % record.leader + self.bcolors.ENDC
# Amend the leaders for additions
record.leader = string.replace(record.leader, 'nam', 'nad')
record.leader = string.replace(record.leader, 'cam', 'nad')
self.log(record, 'LDR', "Replaced cam/nam with nad in the Leader")
# Print out the record before we start to amend it using the yellow WARNING colour:
for field in record.get_fields():
if not field == None: print self.bcolors.WARNING + "%s" % field + self.bcolors.ENDC
# Tidy the call number in 082 to remove any separateor characters ' /
record = self.clean_call_number(record)
# Fix any editions entries and turn into an ordinal (e.g. 2nd) if necessary.
record = self.fix_editions(record)
# Remove any non-LCSH subject headings based on the value in the second sub-field indicator.
for i in ['1','2','3','4','5','6','7']:
record = self.nuke_field_based_on_indicator(record, '650', 2, i)
# Remove copyright statement in 264 where the second indicator is a 4
record = self.nuke_field_based_on_indicator(record, '264', 2, '4')
# Add any additional fields as defined in our hash set in the __init__ function
for i in self.additions:
record = self.add_simple_field(record, i['tag'], i['value'])
# Delete those sub-fields defined in our sub_deletions array. See __init__
for i in self.sub_deletions:
record = self.delete_subfield(record, i['tag'], i['sub'])
# Delete those whole fields defined in our deletions array. See __init__
for deli in self.deletions:
record = self.delete_field_by_tag(record, deli)
# If we have an eBook set, process the 856s
if self.format == 'ebooks':
record = self.set_link_text(record)
record = self.add_ezproxy(record)
# Move any 440s into 490s
record = self.switch_tag(record, '440', '490')
print " "
# Print out the processed record in white
print self.bcolors.HEADER + "%s" % record.leader + self.bcolors.ENDC
for field in record.get_fields():
if not field == None: print "%s" % field
print " "
# Print out the log
print self.bcolors.OKGREEN + self.logger.get_log(record.leader) + self.bcolors.ENDC
print " "
# Priint out the raw MARC
print self.bcolors.OKBLUE + record.as_marc() + self.bcolors.ENDC
return record
def switch_tag(self, record, old, new):
try:
fields = record.get_fields(old)
except:
return record
for field in fields:
field.tag = new
if(len(fields) > 0): self.log(record, old, "Switched contents of %s to %s." % (old, new))
return record
def add_ezproxy(self, record):
"""
Add the EZProxy prefix to the URL in 856 $u
"""
try:
fields = record.get_fields('856')
url = record['856']['u']
except:
return record
record = self.delete_subfield(record, '856', 'u')
for field in fields:
field.subfields.append('u')
field.subfields.append("http://ezproxy.lib.le.ac.uk/login?url=%s" % url)
self.log(record, '856', "Prefixed URL with EZPRoxy.")
return record
def set_link_text(self, record):
"""
Add proper link text for hyperlink into 856 $z
"""
record = self.delete_subfield(record, '856', 'z')
fields = record.get_fields('856')
title = record['245']['a'].strip(' \r\n\t\/\:')
for field in fields:
# Override
field.indicators = ['4','0']
field.subfields.append('z')
field.subfields.append("Access the eBook \"%s\"" % title)
self.log(record, '856', "Set the link text in 856 $z: Access the eBook \"%s\"" % title)
return record
def add_simple_field(self, record, tagl, value):
"""
Add a new field with just a $a subfield and no indicators
"""
self.log(record, tagl, "Added new field %s with value of: %s" % (tagl, value))
subs = ['a', value]
record.add_field(Field(
tag = tagl,
indicators = [' ', ' '],
subfields = subs,
))
return record
def delete_subfield(self, record, tagl, del_sub):
"""
Turn the array into a hash so we can nuke by key. Better (dust) better. Yeah.
"""
self.log(record, tagl, "Attempting to remove %s subfields from %s" % (del_sub, tagl))
final = []
for field in record.get_fields(tagl):
hash = self.subfields_to_dict(field.subfields)
for key in sorted(hash.keys()):
if not key == del_sub:
final.append(key)
final.append(hash[key])
#dicators = field.indicators
#record.remove_field(field)
"""record.add_field(Field(
tag = tagl,
indicators = dicators,
subfields = final,
))"""
field.subfields = final
return record
def subfields_to_dict(self, l):
return dict(zip(l[::2], l[1::2]))
def delete_field_by_tag(self, record, tag):
"""
Pass the record and a tag and delete to field from the record
"""
try:
fields = record.get_fields(tag)
except:
self.log(record, tag, "Tried deleting field(s) %s. None found." % tag)
return record
for field in fields:
self.log(record, tag, "Deleted field(s) %s: %s" % (tag, field))
record.remove_field(field)
return record
def clean_call_number(self, record):
"""
Strip separators such as ' and / from the call number in the 082 field
"""
try:
field = record['082']
except:
return record
try:
subs = field.subfields
except:
return record
#print "%s" % subs
for strip in ["'","/"]:
if strip in subs[1]:
self.log(record, '082', "Found and stripped %s in 082" % strip)
subs[1] = string.replace(subs[1], strip, "")
return record
def get_ordinal(self, value):
try:
value = int(value)
except ValueError:
return value
if value % 100//10 != 1:
if value % 10 == 1:
ordval = u"%d%s" % (value, "st")
elif value % 10 == 2:
ordval = u"%d%s" % (value, "nd")
elif value % 10 == 3:
ordval = u"%d%s" % (value, "rd")
else:
ordval = u"%d%s" % (value, "th")
else:
ordval = u"%d%s" % (value, "th")
return ordval
def fix_editions(self, record):
if not record['250']:
self.log(record, '250', "Attempting to fix 250 but no 250 found.")
return record
pipe_a = record['250']['a']
subs = record['250'].subfields
self.log(record, '250', "Attempting to fix 250 value to ordinal: %s" % pipe_a)
subs[1] = string.replace(subs[1], pipe_a, self.get_ordinal(pipe_a))
return record
def print_fields_by_tag(self, record, tag):
for fields in record.get_fields(tag):
print self.bcolors.WARNING + "%s" % fields + self.bcolors.ENDC
def nuke_field_based_on_indicator(self, record, tag, indicator, value):
"""
Given an indicator (1st or 2nd), nuke the field if it has the value in that position.
i.e. 650 with a second indicator of 7 (MESH heading)
"""
tag_list = record.get_fields(tag)
final_list = []
for field in tag_list:
ouch = 0
indicators = field.indicators
if indicator == 2:
try:
val = indicators[1]
except:
ouch = 1
if indicator == 1:
try:
val = indicators[0]
except:
ouch = 1
if ouch == 0:
if str(val) != str(value):
final_list.append(field)
else:
message = "Removed field %s as %s was present in indicator %s - %s" % (tag, value, indicator, field)
self.log(record, tag, message)
else:
message = "Removed field %s as %s was present in indicator %s - %S" % (tag, value, indicator, field)
self.log(record, tag, message)
final_list.append(field)
record.remove_field(field)
for field in final_list:
record.add_field(field)
return record
def log(self, record, field, message):
"""
Function which hands off to logger delegate object
"""
self.logger.log(record.leader, field, message)
def parse_run(self):
counter = 0
if len(sys.argv) > 2:
if (sys.argv[2] == "--colour") or (sys.argv[2] == "--color") or (sys.argv[2] == "-c"):
self.bcolors = bcolors()
if (sys.argv[2] == "--print") or (sys.argv[2] == "-p"):
self.format = "print"
if (sys.argv[2] == "--ebooks") or (sys.argv[2] == "-e"):
self.format = "ebooks"
if len(sys.argv) > 3:
if (sys.argv[3] == "--colour") or (sys.argv[2] == "--color") or (sys.argv[3] == "-c"):
self.bcolors = bcolors()
if (sys.argv[3] == "--print") or (sys.argv[2] == "-p"):
self.format = "print"
if (sys.argv[3] == "--ebooks") or (sys.argv[3] == "-e"):
self.format = "ebooks"
if len(sys.argv) < 2:
print "Usage: python marc-parser.py [-c for colour] [-p for print records] [-e for eBooks]"
sys.exit
else:
try:
with open(sys.argv[1], 'rb') as fh:
reader = MARCReader(fh)
outfile = string.replace(sys.argv[1], '.mrc', ".final.%s.CLI.marc" % self.format)
if(os.path.isfile(outfile)):
os.remove(outfile)
out = open(outfile, 'wb')
for record in reader:
counter = counter + 1
print " "
print self.bcolors.FAIL + "%s" % counter
record = self.process_record(record)
out.write(record.as_marc())
out.close
print "Wrote file:" + self.bcolors.FAIL + " %s" % outfile + self.bcolors.ENDC
except IOError as e:
print self.bcolors.FAIL + "Unable to open file %s!" % sys.argv[1] + self.bcolors.ENDC
if __name__ == "__main__":
parser = MARCParser()
parser.parse_run()
Posted by pj at 08:52 PM | Comments (0)
September 17, 2011
I love Python
import glob, os, re, string
def rename(dir, pattern):
for path_filename in glob.glob(os.path.join(dir, pattern)):
title, ext = os.path.splitext(os.path.basename(path_filename))
new_title = '''%s''' %(re.sub(r'[\W]+','-',title))
print new_title.lower()
os.rename(path_filename, os.path.join(dir, new_title.lower() + ext))
rename(r'/home/phollan2/crc_papers_2011',r'*.pdf')
Posted by pj at 01:28 PM | Comments (0)
September 03, 2011
Cosign with Django
Django | Authentication using REMOTE_USER | Django documentation
Posted by pj at 06:53 PM | Comments (0)
October 09, 2009
PostgreSQL version of do_sql.py
import sys
import postgresql
from read_config import read_config
import time
import string
import re
def parse_insert_sql(sql):
response = {}
da_list = re.split("[\s\(\)]+", sql.lower().strip())
print(da_list)
ma_table = da_list[2]
if da_list[0] == 'insert':
response['is_insert'] = 1
if "returning %s_id" % (ma_table) in da_list:
response['sql'] = sql
else:
response['sql'] = sql + " returning %s_id as insert_id" % (ma_table)
else:
response['is_insert'] = 0
response['sql'] = sql
return response
def do_sql_query(db_config_file, sql, debug):
response = {}
results = ()
"""
Takes config file name, your SQL and a debug parameter.
Returns a dictionary with a boolean, an error and a warning string and
a list of results dictionaries.
"""
connection_map = read_config('%s.ini' % db_config_file, debug)
(host, user, passwd, db) = (connection_map['connection_parameters']['host'],
connection_map['connection_parameters']['user'],
connection_map['connection_parameters']['password'],
connection_map['connection_parameters']['db'])
connection = postgresql.open("pq://%s:%s@%s/%s" % (user, passwd, host, db))
insert_response = parse_insert_sql(sql)
if insert_response['is_insert'] == 1:
sql = insert_response['sql']
try:
results = connection.prepare(sql)
response['results'] = results
response['status'] = 1
if insert_response['is_insert'] == 1:
response['insert_id'] = results.first()
return(response)
except:
if debug == 1:
print ("\nPostgreSQL error: %s\n" % (sql))
response['status'] = 0
response['error'] = "\nPostgreSQL error: %s\n" % (sql)
response['results'] = results
return(response)
if __name__ == "__main__":
response = do_sql_query('waf_common', "insert into stuff(stuff_id, nonsense) values(nextval('stuff_seq'::regclass), 'Glug')", 0)
print (response['insert_id'])
response = do_sql_query('waf_common', "select * from staff limit 10", 0)
print (response['results'].first())
for record in response['results']:
for column in record.column_names:
print("%s : %s" % (column, record[column]))
print("\n ----------- \n")
Posted by pj at 05:00 PM | Comments (0)
March 29, 2009
Getting MySQL-python-1.2.2 to work with XAMPP
I'm trying to get the Python MySQLdb library to talk to my XAMMP MySQL. Here's how:
1. Before you build the db adaptor, change the site.cfg file to point to XAMMP's mysql_config
# The path to mysql_config. # Only use this if mysql_config is not on your PATH, or you have some weird # setup that requires it. mysql_config = /Applications/xampp/xamppfiles/bin/mysql_config
2. Link the dylib
cp /Applications/xampp/xamppfiles/lib/mysql/libmysqlclient.15.dylib /usr/local/mysql/lib/mysql/libmysqlclient_r.15.dylib mkdir /Applications/xampp/xamppfiles/include ln -s /usr/local/mysql-5.1.32-osx10.5-powerpc/include /Applications/xampp/xamppfiles/include/mysql
3. You have to point your script to the localhost using the machines acutally IP address as if it was a remote server.
Posted by pj at 11:59 AM | Comments (0)
CherryPy HTMLTemplate
HTMLTemplate - CherryPy Tools - Trac
Posted by pj at 02:15 AM | Comments (0)
March 26, 2009
Python script for producing svn diffs
import os
my_file = open('changed_files.txt','r')
lines = my_file.readlines()
for line in lines:
plode = line.strip()
els = plode.split('/')
os.popen("svn diff -r 455 " + line.strip() + " > " + "_".join(els) + ".diff")
print "svn diff -r 455 " + line.strip() + " > " + els[-1] + ".diff"
Posted by pj at 09:31 PM | Comments (0)
March 24, 2009
CherryPy does ZPT
ChoosingATemplatingLanguage - CherryPy - Trac
Posted by pj at 11:14 AM | Comments (0)
April 15, 2008
Python script for getting changed files
The following Python script gets you a list of files changed between two subversion revisions:
import sys
import os
import re
lines = []
for counter in range(int(sys.argv[1]) - 1, int(sys.argv[2]) + 1):
lines.append(os.popen('/usr/local/bin/svn log -vv -r ' + str(counter)));
tally = {}
for results in lines:
for line in results:
reg = re.compile("svn-repository")
if reg.search(line):
tally[line] = 1
final = tally.keys()
final.sort()
print " " + " ".join(final)
Posted by pj at 05:06 PM | Comments (0)
December 14, 2007
pysvn Programmer's Guide
pysvn: pysvn Programmer's Guide
Posted by pj at 09:03 PM | Comments (0)
August 11, 2006
More about the Python in PHP project
Posted by pj at 10:06 AM | Comments (0)
Python interpreter embedded in PHP
Posted by pj at 10:03 AM | Comments (0)
August 03, 2006
Parsing mutliple date formats in Python
try:
date_time = datetime.datetime(*time.strptime(this_val, "%d/%m/%Y")[0:5])
except:
pass
try:
date_time = datetime.datetime(*time.strptime(this_val, "%B %Y")[0:5])
except:
pass
try:
date_time = datetime.datetime(*time.strptime(this_val, "%Y")[0:5])
except:
pass
print "<!--" + str(date_time) + "-->"
Posted by pj at 02:58 PM | Comments (0)
Very cunning Python based Universal Feed Parser
Posted by pj at 12:51 PM | Comments (0)
May 22, 2006
Notes for using Python urllib2
Posted by pj at 12:43 PM | Comments (0)
January 16, 2006
Python charting interface
PyGDChart2 - http://www.nullcube.com/software/pygdchart2/doc/index.html
Posted by pj at 08:32 PM
November 16, 2005
Parsing dates in Python
Posted by pj at 03:42 PM
September 14, 2005
Parsing binary data with Python
4.3 struct -- Interpret strings as packed binary data
Another alternative library is also available:
- http://www.nightmare.com/software.html
Posted by pj at 11:49 AM
August 09, 2005
mysql_robot2.py site indexing script
I've uploaded a copy of my mysql_robot2.py site indexing script for safe keeping.
And here's the DB structure:
Posted by pj at 10:40 AM
July 17, 2005
Yet another Python Web Framework
Django | The Web framework for perfectionists with deadlines
Posted by pj at 02:38 PM
May 27, 2005
A web crawler written in Python
http://www.newton.cx/~peter/software/crawler.py
Posted by pj at 01:20 PM
Example of how to go from Unicode to HTML
Unicode to HTML - tiddly-pom.com
Posted by pj at 12:54 PM
Information about the Python urlparse module
The urlparse module ::: The Standard Python Library (2005) ::: www.effbot.org
Posted by pj at 12:46 PM
A Python based HTML parser which handles tag soup and does tidying automaticamente
Beautiful Soup: We called him Tortoise because he taught us.
Posted by pj at 11:59 AM
May 26, 2005
Rare urllib2 example for handling response codes
ASPN : Python Cookbook : urllib2 for actions depending on http response codes
Posted by pj at 12:11 PM
May 23, 2005
Handling Unicode encoding in XML with Python
Posted by pj at 10:02 PM
April 20, 2005
Testing counters to see if they are odd or even
The following is a simple Python script to test whether a number is odd or even:
test_figure = float(test_figure) half_of_it = float(test_figure/2) #print str(half_of_it) if int(half_of_it) == float(half_of_it): return 'even' else: return 'odd'
It relies on the fact that casting a floating point number to an integer will round it up or down, and that dividing an odd number by two always gives a float, whereas dividing an even number gives an integer.
Posted by pj at 11:14 AM
February 15, 2005
Running JavaScript in Python
But why?
Posted by pj at 02:43 PM
October 30, 2004
Blogging iTunes playlists - RSS feeds
Found this posting by Kimbro Staken about a Python script he's written which posts his iTunes playlist to his blog using in-line AppleScript and the MT XML-RPC.
< http://www.xmldatabases.org/movabletype/archives/000159.html >
Should be straight forward to retask it to post 'now playing' titles too?
Posted by pj at 10:58 AM