Some basic code for zotero, Librarything, amazon wishilst

pull/1/head
Raymond Yee 2011-10-28 10:54:06 -07:00
parent 07248f16c8
commit 41b955245a
3 changed files with 176 additions and 0 deletions

29
amazon_wishlist.py Normal file
View File

@ -0,0 +1,29 @@
# scrape my amazon wishlist
import requests
import lxml.html
from lxml.cssselect import CSSSelector
def net_text(e):
children = e.getchildren()
if len(children) > 0 :
return "".join(map(net_text,children))
else:
return e.text if e.text is not None else ''
wishlist_id = '1U5EXVPVS3WP5'
url = "http://www.amazon.com/wishlist/%s/ref=cm_wl_act_print_o?_encoding=UTF8&layout=standard-print&disableNav=1&visitor-view=1&items-per-page=500&page=1" % (wishlist_id)
r = requests.get(url)
html = lxml.html.fromstring(r.content.decode("UTF-8"))
sel = CSSSelector('#itemsTable tr')
elems = sel(html)
# just realized no isbn in print version....need to do more work
for (i, tr) in enumerate(elems):
print i,
for td in tr.findall('td'):
print net_text(td),
print

47
my_librarything_books.py Normal file
View File

@ -0,0 +1,47 @@
import mechanize
import csv
import HTMLParser
import argparse
h = HTMLParser.HTMLParser()
# parse options
parser = argparse.ArgumentParser(description='Download and parse LibraryThing booklist.')
parser.add_argument('user', help='LibraryThing username')
parser.add_argument('password', help='LibraryThing password')
args = parser.parse_args()
USERNAME = args.user
PW = args.password
LT_url = "https://www.librarything.com"
LT_csv_file_url = "http://www.librarything.com/export-csv"
def retrieve_book_list(user,password):
br = mechanize.Browser()
br.open(LT_url)
# select 2 form
br.select_form(nr=1)
br["formusername"] = user
br["formpassword"] = password
br.submit()
# get CSV file
response = br.open(LT_csv_file_url)
return response
def parse_csv(f):
reader = csv.DictReader(f)
for (i,row) in enumerate(reader):
print i, h.unescape(row["'TITLE'"]), h.unescape(row["'AUTHOR (first, last)'"]), row["'ISBNs'"], row["'COMMENT'"], row["'TAGS'"], row["'COLLECTIONS'"], h.unescape(row["'REVIEWS'"])
if __name__ == '__main__':
dynamic = True
if dynamic:
f = retrieve_book_list(USERNAME, PW)
else:
fname = "/Users/raymondyee/Downloads/LibraryThing_export.csv"
f = open(fname,"rb")
parse_csv(f)

100
zotero_books.py Normal file
View File

@ -0,0 +1,100 @@
from zoteroconf import user_id, user_key
from itertools import islice
from functools import partial
from pyzotero.zotero import Zotero
class Zotero2(Zotero):
def __init__(self, user_id = None, user_key = None):
self.__params = {}
super(Zotero2,self).__init__(user_id,user_key)
def set_parameters(self, **kwargs):
self.__params.update(kwargs)
def _to_iterator(self, f, *args, **kwargs):
current_start = self.__params.get("start", 0)
params = self.__params.copy()
params["start"] = current_start
more_items = True
while more_items:
self.add_parameters(**params)
items = f(*args,**kwargs)
for item in items:
yield item
params["start"] += len(items)
if len(items) == 0:
more_items = False
def __getattribute__(self, name):
if name in ['items','item','top','children','tag_items', 'group_items','group_top',
'group_item','group_item_children', 'group_items_tag',
'group_collection_items', 'group_collection_item',
'group_collection_top','collection_items','get_subset',
'collections','collections_sub','group_collections',
'group_collection',
'groups','tags','item_tags','group_tags','group_item_tags']:
#f = super(Zotero2,self).items
f = getattr(super(Zotero2,self),name)
#return self._to_iterator(f)
return partial(self._to_iterator,f)
else:
return super(Zotero2,self).__getattribute__(name)
#def items(self):
# f = super(Zotero2,self).items
# return self._to_iterator(f)
#def items0(self):
# current_start = self._params.get("start", 1)
# params = self._params
#
# params["start"] = current_start
# more_items = True
#
# while more_items:
# self.add_parameters(**params)
# items = super(Zotero2,self).items()
# for item in items:
# yield item
# params["start"] += len(items)
# if len(items) == 0:
# more_items = False
class MyZotero(Zotero2):
def __init__(self, user_id=user_id, user_key=user_key):
super(MyZotero,self).__init__(user_id,user_key)
def items_in_unglue_it_collection(self):
return self.collection_items('3RKQ23IP')
def compare_keys(self,max,pagesize1,pagesize2):
set1 = self.item_keys(max,pagesize1)
set2 = self.item_keys(max,pagesize2)
print "length: ", len(set1), len(set2)
print set1 ^ set2
def item_keys(self, max, page_size):
self.set_parameters(limit=page_size)
items = self.items()
item_set = set()
for (i, item) in enumerate(islice(items,max)):
item_set.add((item["group_id"], item["key"], item["title"]))
print i, (item["group_id"], item["key"], item["title"])
return item_set
def get_all_items(self):
print len(self.item_keys(5000,99))
def get_all_books(self,max):
self.set_parameters(sort="type")
items = self.items()
book_set = set()
for (i, item) in enumerate(islice(items,max)):
if item.get("itemType") == 'book':
print i, (item["group_id"], item["key"], item["title"], item.get("itemType"), item.get("ISBN", None))
book_set.add((item["group_id"], item["key"], item["title"]))
print len(book_set)
return book_set
zot = MyZotero()
#zot.compare_keys(24,7,3)
to_unglue = list(zot.items_in_unglue_it_collection())
print len(to_unglue), [item["title"] for item in to_unglue]
zot.get_all_books(50)
#print zot.get_all_items()