From 957cf615b4388080fdd8489f53aa6fceaffdd1c6 Mon Sep 17 00:00:00 2001
From: eric <eric@hellman.net>
Date: Thu, 22 Feb 2018 11:29:52 -0500
Subject: [PATCH] delint

---
 core/librarything.py | 190 ++++++++++++++++++++++++++-----------------
 1 file changed, 116 insertions(+), 74 deletions(-)

diff --git a/core/librarything.py b/core/librarything.py
index 4ba80a38..ba38f1fe 100644
--- a/core/librarything.py
+++ b/core/librarything.py
@@ -2,11 +2,12 @@ import csv
 import HTMLParser
 import httplib
 import logging
-import mechanize
 import re
+from datetime import datetime
+
+import mechanize
 import requests
 
-from datetime import datetime
 from regluit.core import models
 
 logger = logging.getLogger(__name__)
@@ -20,7 +21,7 @@ class LibraryThing(object):
     """
     url = "https://www.librarything.com"
     csv_file_url = "https://www.librarything.com/export-csv"
-    
+
     def __init__(self, username=None, password=None):
         self.username = username
         self.password = password
@@ -40,77 +41,98 @@ class LibraryThing(object):
     def parse_csv(self):
         h = HTMLParser.HTMLParser()
         reader = csv.DictReader(self.csv_handle)
-        # There are more fields to be parsed out.  Note that there is a second author column to handle
-        for (i,row) in enumerate(reader):
+        # There are more fields to be parsed out.  Note that there is a
+        # second author column to handle
+        for (i, row) in enumerate(reader):
             # ISBNs are written like '[123456789x]' in the CSV, suggesting possibility of a list
             m = re.match(r'^\[(.*)\]$', row["'ISBNs'"])
             if m:
                 isbn = m.group(1).split()
             else:
                 isbn = []
-            yield {'title':h.unescape(row["'TITLE'"]), 'author':h.unescape(row["'AUTHOR (first, last)'"]),
-                   'isbn':isbn, 'comment':row["'COMMENT'"],
-                   'tags':row["'TAGS'"], 'collections':row["'COLLECTIONS'"],
-                    'reviews':h.unescape(row["'REVIEWS'"])}
+            yield {
+                'title':h.unescape(row["'TITLE'"]),
+                'author':h.unescape(row["'AUTHOR (first, last)'"]),
+                'isbn':isbn,
+                'comment':row["'COMMENT'"],
+                'tags':row["'TAGS'"],
+                'collections':row["'COLLECTIONS'"],
+                'reviews':h.unescape(row["'REVIEWS'"])
+            }
     def viewstyle_1(self, rows):
-        
-        for (i,row) in enumerate(rows):
+
+        for (i, row) in enumerate(rows):
             book_data = {}
             cols = row.xpath('td')
             # cover
-            book_data["cover"] = {"cover_id":cols[0].attrib["id"],
-                                  "image": {"width":cols[0].xpath('.//img')[0].attrib['width'],
-                                    "src": cols[0].xpath('.//img')[0].attrib['src']}
+            book_data["cover"] = {
+                "cover_id":cols[0].attrib["id"],
+                "image": {
+                    "width":cols[0].xpath('.//img')[0].attrib['width'],
+                    "src": cols[0].xpath('.//img')[0].attrib['src']
+                }
             }
             # title
-            book_data["title"] = {"href":cols[1].xpath('.//a')[0].attrib['href'],
-                                  "title":cols[1].xpath('.//a')[0].text}
-            
+            book_data["title"] = {
+                "href":cols[1].xpath('.//a')[0].attrib['href'],
+                "title":cols[1].xpath('.//a')[0].text
+            }
+
             # extract work_id and book_id from href
             try:
-                (book_data["work_id"], book_data["book_id"]) = re.match("^/work/(.*)/book/(.*)$",book_data["title"]["href"]).groups()
+                (book_data["work_id"], book_data["book_id"]) = re.match(
+                    "^/work/(.*)/book/(.*)$",
+                    book_data["title"]["href"]
+                ).groups()
             except:
                 (book_data["work_id"], book_data["book_id"]) = (None, None)
-                
+
             # author -- what if there is more than 1?  or none?
             try:
-                book_data["author"] = {"display_name":cols[2].xpath('.//a')[0].text,
-                                       "href":cols[2].xpath('.//a')[0].attrib['href'],
-                                       "name":cols[2].xpath('div')[0].text}
+                book_data["author"] = {
+                    "display_name":cols[2].xpath('.//a')[0].text,
+                    "href":cols[2].xpath('.//a')[0].attrib['href'],
+                    "name":cols[2].xpath('div')[0].text
+                }
             except:
                 book_data["author"] = None
-                
+
             # date
             book_data["date"] = cols[3].xpath('span')[0].text
-            
+
             # tags: grab tags that are not empty strings
             tag_links = cols[4].xpath('.//a')
             book_data["tags"] = filter(lambda x: x is not None, [a.text for a in tag_links])
-            
+
             # rating -- count # of stars
             book_data["rating"] = len(cols[5].xpath('.//img[@alt="*"]'))
-            
+
             # entry date
-            book_data["entry_date"] = datetime.date(datetime.strptime(cols[6].xpath('span')[0].text, "%b %d, %Y"))
-            
+            book_data["entry_date"] = datetime.date(
+                datetime.strptime(cols[6].xpath('span')[0].text, "%b %d, %Y")
+            )
+
             yield book_data
-            
+
     def viewstyle_5(self, rows):
         # implement this view to get at the ISBNs
-        for (i,row) in enumerate(rows):
+        for (i, row) in enumerate(rows):
             book_data = {}
             cols = row.xpath('td')
-            
+
             # title
             book_data["title"] = {"href":cols[0].xpath('.//a')[0].attrib['href'],
                                   "title":cols[0].xpath('.//a')[0].text}
-            
+
             # extract work_id and book_id from href
             try:
-                (book_data["work_id"], book_data["book_id"]) = re.match("^/work/(.*)/book/(.*)$",book_data["title"]["href"]).groups()
+                (book_data["work_id"], book_data["book_id"]) = re.match(
+                    "^/work/(.*)/book/(.*)$",
+                    book_data["title"]["href"]
+                ).groups()
             except:
                 (book_data["work_id"], book_data["book_id"]) = (None, None)
-            
+
             # tags
             tag_links = cols[1].xpath('.//a')
             book_data["tags"] = filter(lambda x: x is not None, [a.text for a in tag_links])
@@ -121,13 +143,13 @@ class LibraryThing(object):
             except Exception, e:
                 logger.info("no lc call number for: %s %s", book_data["title"], e)
                 book_data["lc_call_number"] = None
-                
+
             # subject
-            
+
             subjects = cols[3].xpath('.//div[@class="subjectLine"]')
             book_data["subjects"] = [{'href':s.xpath('a')[0].attrib['href'],
                                       'text':s.xpath('a')[0].text} for s in subjects]
-            
+
             # isbn
             try:
                 book_data["isbn"] = cols[4].xpath('.//span')[0].text
@@ -136,88 +158,94 @@ class LibraryThing(object):
                     book_data["isbn"] = None
             except Exception, e:
                 book_data["isbn"] = None
-            
+
             yield book_data
 
-        
+
     def parse_user_catalog(self, view_style=1):
         from lxml import html
-        
+
         # we can vary viewstyle to get different info
-        
-        IMPLEMENTED_STYLES = [1,5]
+
+        IMPLEMENTED_STYLES = [1, 5]
         COLLECTION = 2 # set to get All Collections
-        
+
         if view_style not in IMPLEMENTED_STYLES:
             raise NotImplementedError()
-        style_parser = getattr(self,"viewstyle_%s" % view_style)
+        style_parser = getattr(self, "viewstyle_%s" % view_style)
         next_page = True
         offset = 0
         cookies = None
-                
+
         # go to the front page of LibraryThing first to pick up relevant session-like cookies
         r = requests.get("https://www.librarything.com/")
         cookies = r.cookies
-        
+
         while next_page:
-            url = "https://www.librarything.com/catalog_bottom.php?view=%s&viewstyle=%d&collection=%d&offset=%d" % (self.username,
-                                        view_style, COLLECTION, offset)
+            url = "https://www.librarything.com/catalog_bottom.php?view=%s&viewstyle=%d&collection=%d&offset=%d" % (
+                self.username, view_style, COLLECTION, offset
+            )
             logger.info("url: %s", url)
             if cookies is None:
                 r = requests.get(url)
             else:
                 r = requests.get(url, cookies=cookies)
-                
+
             if r.status_code != httplib.OK:
                 raise LibraryThingException("Error accessing %s: status %s" % (url, r.status_code))
             etree = html.fromstring(r.content)
             cookies = r.cookies  # retain the cookies
-            
+
             # look for a page bar
             # try to grab the total number of books
             # 1 - 50 of 82
             try:
                 count_text = etree.xpath('//td[@class="pbGroup"]')[0].text
-                total = int(re.search(r'(\d+)$',count_text).group(1))
+                total = int(re.search(r'(\d+)$', count_text).group(1))
                 logger.info('total: %d', total)
-            except Exception, e:  # assume for now that if we can't grab this text, there is no page bar and no books
+            except Exception, e:
+                # assume for now that if we can't grab this text,
+                # there is no page bar and no books
                 logger.info('Exception {0}'.format(e))
                 total = 0
-                
-            # to do paging we can either look for a next link or just increase the offset by the number of rows.
+
+            # to do paging we can either look for a next link or just increase the offset
+            # by the number of rows.
             # Let's try the latter
             # possible_next_link = etree.xpath('//a[@class="pageShuttleButton"]')[0]
-                        
+
             rows_xpath = '//table[@id="lt_catalog_list"]/tbody/tr'
-        
+
             # deal with page 1 first and then working on paging through the collection
             rows = etree.xpath(rows_xpath)
-            
-            i = -1 # have to account for the problem of style_parser(rows) returning nothing
-        
-            for (i,row) in enumerate(style_parser(rows)):
-                yield row
-                
-            # page size = 50, first page offset = 0, second page offset = 50 -- if total = 50 no need to go
 
-            offset += i + 1  
+            i = -1 # have to account for the problem of style_parser(rows) returning nothing
+
+            for (i, row) in enumerate(style_parser(rows)):
+                yield row
+
+            # page size = 50, first page offset = 0, second page offset = 50
+            # -- if total = 50 no need to go
+
+            offset += i + 1
             if offset >= total:
                 next_page = False
 
 def load_librarything_into_wishlist(user, lt_username, max_books=None):
     """
-    Load a specified LibraryThing shelf (by default:  all the books from the LibraryThing account associated with user)
+    Load a specified LibraryThing shelf (by default:  all the books
+    from the LibraryThing account associated with user)
     """
-   
+
     from regluit.core import bookloader
     from regluit.core import tasks
     from itertools import islice
-    
+
     logger.info("Entering into load_librarything_into_wishlist")
     lt = LibraryThing(lt_username)
-    
-    
-    for (i,book) in enumerate(islice(lt.parse_user_catalog(view_style=5),max_books)):
+
+
+    for (i, book) in enumerate(islice(lt.parse_user_catalog(view_style=5), max_books)):
         isbn = book["isbn"]  # grab the first one
         logger.info("%d %s %s", i, book["title"]["title"], isbn)
         try:
@@ -227,13 +255,27 @@ def load_librarything_into_wishlist(user, lt_username, max_books=None):
             if not edition:
                 continue
             # add the librarything ids to the db since we know them now
-            identifier= models.Identifier.get_or_add(type = 'thng', value = book['book_id'], edition = edition, work = edition.work)
-            identifier= models.Identifier.get_or_add(type = 'ltwk', value = book['work_id'], work = edition.work)
+            identifier = models.Identifier.get_or_add(
+                type='thng',
+                value=book['book_id'],
+                edition=edition,
+                work=edition.work
+            )
+            identifier = models.Identifier.get_or_add(
+                type='ltwk',
+                value=book['work_id'],
+                work=edition.work
+            )
             if book['lc_call_number']:
-                identifier= models.Identifier.get_or_add(type = 'lccn', value = book['lc_call_number'], edition = edition, work = edition.work)
+                identifier = models.Identifier.get_or_add(
+                    type='lccn',
+                    value=book['lc_call_number'],
+                    edition=edition,
+                    work=edition.work
+                )
             user.wishlist.add_work(edition.work, 'librarything', notify=True)
             if edition.new:
                 tasks.populate_edition.delay(edition.isbn_13)
             logger.info("Work with isbn %s added to wishlist.", isbn)
         except Exception, e:
-            logger.info ("error adding ISBN %s: %s", isbn, e)             
+            logger.info("error adding ISBN %s: %s", isbn, e)