Merge pull request #364 from Gluejar/fix_lt_scraper

put in fix to LT scraper
2014-06-11 08:48:16 -04:00 · 2014-06-11 08:48:16 -04:00 · 3491ee152b
parent d12d31cf4a d17836fb58
commit 3491ee152b
1 changed files with 5 additions and 0 deletions
--- a/core/librarything.py
+++ b/core/librarything.py
@ -155,6 +155,10 @@ class LibraryThing(object):
        offset = 0
        cookies = None
                
+        # go to the front page of LibraryThing first to pick up relevant session-like cookies
+        r = requests.get("https://www.librarything.com/")
+        cookies = r.cookies
+        
        while next_page:
            url = "http://www.librarything.com/catalog_bottom.php?view=%s&viewstyle=%d&collection=%d&offset=%d" % (self.username,
                                        view_style, COLLECTION, offset)
@ -179,6 +183,7 @@ class LibraryThing(object):
                total = int(re.search(r'(\d+)$',count_text).group(1))
                logger.info('total: %d', total)
            except Exception, e:  # assume for now that if we can't grab this text, there is no page bar and no books
+                logger.info('Exception {0}'.format(e))
                total = 0
                
            # to do paging we can either look for a next link or just increase the offset by the number of rows.