30 lines
817 B
Python
30 lines
817 B
Python
|
# scrape my amazon wishlist
|
||
|
|
||
|
import requests
|
||
|
import lxml.html
|
||
|
from lxml.cssselect import CSSSelector
|
||
|
|
||
|
def net_text(e):
|
||
|
children = e.getchildren()
|
||
|
if len(children) > 0 :
|
||
|
return "".join(map(net_text,children))
|
||
|
else:
|
||
|
return e.text if e.text is not None else ''
|
||
|
|
||
|
wishlist_id = '1U5EXVPVS3WP5'
|
||
|
url = "http://www.amazon.com/wishlist/%s/ref=cm_wl_act_print_o?_encoding=UTF8&layout=standard-print&disableNav=1&visitor-view=1&items-per-page=500&page=1" % (wishlist_id)
|
||
|
r = requests.get(url)
|
||
|
html = lxml.html.fromstring(r.content.decode("UTF-8"))
|
||
|
|
||
|
sel = CSSSelector('#itemsTable tr')
|
||
|
elems = sel(html)
|
||
|
|
||
|
# just realized no isbn in print version....need to do more work
|
||
|
|
||
|
for (i, tr) in enumerate(elems):
|
||
|
print i,
|
||
|
for td in tr.findall('td'):
|
||
|
print net_text(td),
|
||
|
print
|
||
|
|