put in fix to LT scraper -- need now to hit front page to pick up session-like cookies

pull/1/head
Raymond Yee 2014-06-11 08:46:24 -04:00
parent d12d31cf4a
commit d17836fb58
1 changed files with 5 additions and 0 deletions

View File

@ -154,6 +154,10 @@ class LibraryThing(object):
next_page = True
offset = 0
cookies = None
# go to the front page of LibraryThing first to pick up relevant session-like cookies
r = requests.get("https://www.librarything.com/")
cookies = r.cookies
while next_page:
url = "http://www.librarything.com/catalog_bottom.php?view=%s&viewstyle=%d&collection=%d&offset=%d" % (self.username,
@ -179,6 +183,7 @@ class LibraryThing(object):
total = int(re.search(r'(\d+)$',count_text).group(1))
logger.info('total: %d', total)
except Exception, e: # assume for now that if we can't grab this text, there is no page bar and no books
logger.info('Exception {0}'.format(e))
total = 0
# to do paging we can either look for a next link or just increase the offset by the number of rows.