Some basic code for zotero, Librarything, amazon wishilst
parent
07248f16c8
commit
41b955245a
|
@ -0,0 +1,29 @@
|
|||
# scrape my amazon wishlist
|
||||
|
||||
import requests
|
||||
import lxml.html
|
||||
from lxml.cssselect import CSSSelector
|
||||
|
||||
def net_text(e):
|
||||
children = e.getchildren()
|
||||
if len(children) > 0 :
|
||||
return "".join(map(net_text,children))
|
||||
else:
|
||||
return e.text if e.text is not None else ''
|
||||
|
||||
wishlist_id = '1U5EXVPVS3WP5'
|
||||
url = "http://www.amazon.com/wishlist/%s/ref=cm_wl_act_print_o?_encoding=UTF8&layout=standard-print&disableNav=1&visitor-view=1&items-per-page=500&page=1" % (wishlist_id)
|
||||
r = requests.get(url)
|
||||
html = lxml.html.fromstring(r.content.decode("UTF-8"))
|
||||
|
||||
sel = CSSSelector('#itemsTable tr')
|
||||
elems = sel(html)
|
||||
|
||||
# just realized no isbn in print version....need to do more work
|
||||
|
||||
for (i, tr) in enumerate(elems):
|
||||
print i,
|
||||
for td in tr.findall('td'):
|
||||
print net_text(td),
|
||||
print
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
import mechanize
|
||||
import csv
|
||||
import HTMLParser
|
||||
import argparse
|
||||
|
||||
h = HTMLParser.HTMLParser()
|
||||
|
||||
# parse options
|
||||
|
||||
parser = argparse.ArgumentParser(description='Download and parse LibraryThing booklist.')
|
||||
parser.add_argument('user', help='LibraryThing username')
|
||||
parser.add_argument('password', help='LibraryThing password')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
USERNAME = args.user
|
||||
PW = args.password
|
||||
|
||||
LT_url = "https://www.librarything.com"
|
||||
LT_csv_file_url = "http://www.librarything.com/export-csv"
|
||||
|
||||
def retrieve_book_list(user,password):
|
||||
br = mechanize.Browser()
|
||||
br.open(LT_url)
|
||||
# select 2 form
|
||||
br.select_form(nr=1)
|
||||
br["formusername"] = user
|
||||
br["formpassword"] = password
|
||||
br.submit()
|
||||
|
||||
# get CSV file
|
||||
response = br.open(LT_csv_file_url)
|
||||
return response
|
||||
|
||||
def parse_csv(f):
|
||||
reader = csv.DictReader(f)
|
||||
for (i,row) in enumerate(reader):
|
||||
print i, h.unescape(row["'TITLE'"]), h.unescape(row["'AUTHOR (first, last)'"]), row["'ISBNs'"], row["'COMMENT'"], row["'TAGS'"], row["'COLLECTIONS'"], h.unescape(row["'REVIEWS'"])
|
||||
|
||||
if __name__ == '__main__':
|
||||
dynamic = True
|
||||
if dynamic:
|
||||
f = retrieve_book_list(USERNAME, PW)
|
||||
else:
|
||||
fname = "/Users/raymondyee/Downloads/LibraryThing_export.csv"
|
||||
f = open(fname,"rb")
|
||||
parse_csv(f)
|
|
@ -0,0 +1,100 @@
|
|||
from zoteroconf import user_id, user_key
|
||||
|
||||
from itertools import islice
|
||||
from functools import partial
|
||||
|
||||
from pyzotero.zotero import Zotero
|
||||
|
||||
class Zotero2(Zotero):
|
||||
def __init__(self, user_id = None, user_key = None):
|
||||
self.__params = {}
|
||||
super(Zotero2,self).__init__(user_id,user_key)
|
||||
def set_parameters(self, **kwargs):
|
||||
self.__params.update(kwargs)
|
||||
def _to_iterator(self, f, *args, **kwargs):
|
||||
current_start = self.__params.get("start", 0)
|
||||
params = self.__params.copy()
|
||||
|
||||
params["start"] = current_start
|
||||
more_items = True
|
||||
|
||||
while more_items:
|
||||
self.add_parameters(**params)
|
||||
items = f(*args,**kwargs)
|
||||
for item in items:
|
||||
yield item
|
||||
params["start"] += len(items)
|
||||
if len(items) == 0:
|
||||
more_items = False
|
||||
def __getattribute__(self, name):
|
||||
if name in ['items','item','top','children','tag_items', 'group_items','group_top',
|
||||
'group_item','group_item_children', 'group_items_tag',
|
||||
'group_collection_items', 'group_collection_item',
|
||||
'group_collection_top','collection_items','get_subset',
|
||||
'collections','collections_sub','group_collections',
|
||||
'group_collection',
|
||||
'groups','tags','item_tags','group_tags','group_item_tags']:
|
||||
#f = super(Zotero2,self).items
|
||||
f = getattr(super(Zotero2,self),name)
|
||||
#return self._to_iterator(f)
|
||||
return partial(self._to_iterator,f)
|
||||
else:
|
||||
return super(Zotero2,self).__getattribute__(name)
|
||||
#def items(self):
|
||||
# f = super(Zotero2,self).items
|
||||
# return self._to_iterator(f)
|
||||
#def items0(self):
|
||||
# current_start = self._params.get("start", 1)
|
||||
# params = self._params
|
||||
#
|
||||
# params["start"] = current_start
|
||||
# more_items = True
|
||||
#
|
||||
# while more_items:
|
||||
# self.add_parameters(**params)
|
||||
# items = super(Zotero2,self).items()
|
||||
# for item in items:
|
||||
# yield item
|
||||
# params["start"] += len(items)
|
||||
# if len(items) == 0:
|
||||
# more_items = False
|
||||
|
||||
class MyZotero(Zotero2):
|
||||
def __init__(self, user_id=user_id, user_key=user_key):
|
||||
super(MyZotero,self).__init__(user_id,user_key)
|
||||
def items_in_unglue_it_collection(self):
|
||||
return self.collection_items('3RKQ23IP')
|
||||
def compare_keys(self,max,pagesize1,pagesize2):
|
||||
set1 = self.item_keys(max,pagesize1)
|
||||
set2 = self.item_keys(max,pagesize2)
|
||||
|
||||
print "length: ", len(set1), len(set2)
|
||||
print set1 ^ set2
|
||||
def item_keys(self, max, page_size):
|
||||
self.set_parameters(limit=page_size)
|
||||
items = self.items()
|
||||
item_set = set()
|
||||
for (i, item) in enumerate(islice(items,max)):
|
||||
item_set.add((item["group_id"], item["key"], item["title"]))
|
||||
print i, (item["group_id"], item["key"], item["title"])
|
||||
return item_set
|
||||
def get_all_items(self):
|
||||
print len(self.item_keys(5000,99))
|
||||
def get_all_books(self,max):
|
||||
self.set_parameters(sort="type")
|
||||
items = self.items()
|
||||
book_set = set()
|
||||
for (i, item) in enumerate(islice(items,max)):
|
||||
if item.get("itemType") == 'book':
|
||||
print i, (item["group_id"], item["key"], item["title"], item.get("itemType"), item.get("ISBN", None))
|
||||
book_set.add((item["group_id"], item["key"], item["title"]))
|
||||
print len(book_set)
|
||||
return book_set
|
||||
|
||||
|
||||
zot = MyZotero()
|
||||
#zot.compare_keys(24,7,3)
|
||||
to_unglue = list(zot.items_in_unglue_it_collection())
|
||||
print len(to_unglue), [item["title"] for item in to_unglue]
|
||||
zot.get_all_books(50)
|
||||
#print zot.get_all_items()
|
Loading…
Reference in New Issue