344 lines
13 KiB
Python
344 lines
13 KiB
Python
# external library imports
|
|
|
|
from itertools import islice
|
|
import logging
|
|
import re
|
|
import http.client as httplib
|
|
from urllib.parse import urlencode, urljoin, parse_qsl
|
|
from xml.etree import ElementTree as ET
|
|
|
|
import oauth2 as oauth
|
|
from requests import request
|
|
|
|
|
|
# django imports
|
|
|
|
from django.conf import settings
|
|
|
|
# regluit imports
|
|
|
|
from . import bookloader, models
|
|
from . import tasks
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# QUESTION: should the request_token, access_token be part of the state of the client?
|
|
# for simplicity for now, I will make them part of the state of GoodReadsClient
|
|
|
|
class GoodreadsException(Exception):
|
|
pass
|
|
|
|
class GoodreadsAuthorizationRequired(GoodreadsException):
|
|
pass
|
|
|
|
def filter_none(d):
|
|
d2 = {}
|
|
for (k, v) in d.items():
|
|
if v is not None:
|
|
d2[k] = v
|
|
return d2
|
|
|
|
def safe_strip(a_string):
|
|
try:
|
|
return a_string.strip()
|
|
except:
|
|
return ''
|
|
|
|
class GoodreadsClient(object):
|
|
|
|
url = 'https://www.goodreads.com'
|
|
request_token_url = urljoin(url, 'oauth/request_token')
|
|
authorize_url = urljoin(url, '/oauth/authorize')
|
|
access_token_url = urljoin(url, '/oauth/access_token')
|
|
|
|
def __init__(self, key, secret, user=None, access_token=None):
|
|
self.key = key
|
|
self.secret = secret
|
|
self.consumer = oauth.Consumer(key=self.key, secret=self.secret)
|
|
|
|
self.client = oauth.Client(self.consumer)
|
|
#self.unauth_client = None
|
|
|
|
if access_token is not None:
|
|
self.__load_access_token(access_token)
|
|
else:
|
|
self.access_token = None
|
|
|
|
if user is not None:
|
|
self.load_user_access_token(user)
|
|
|
|
@property
|
|
def is_authorized(self):
|
|
return self.access_token is not None
|
|
|
|
def begin_authorization(self, callback_url=None):
|
|
# get request token
|
|
response, content = self.client.request(GoodreadsClient.request_token_url, 'GET')
|
|
|
|
if int(response['status']) != httplib.OK:
|
|
raise Exception('Invalid response: %s' % response['status'])
|
|
|
|
request_token = dict(parse_qsl(str(content, 'utf-8')))
|
|
|
|
q = {'oauth_token': request_token['oauth_token']}
|
|
if callback_url is not None:
|
|
q['oauth_callback'] = callback_url
|
|
|
|
authorize_link = GoodreadsClient.authorize_url + '?' + urlencode(q)
|
|
return (authorize_link, request_token)
|
|
|
|
def complete_authorization(self, request_token):
|
|
token = oauth.Token(request_token['oauth_token'], request_token['oauth_token_secret'])
|
|
|
|
self.client = oauth.Client(self.consumer, token)
|
|
response, content = self.client.request(GoodreadsClient.access_token_url, 'POST')
|
|
if int(response['status']) != httplib.OK:
|
|
raise Exception('Invalid response: %s' % response['status'])
|
|
|
|
access_token = dict(parse_qsl(str(content, 'utf-8')))
|
|
self.__load_access_token(access_token)
|
|
return access_token
|
|
|
|
def load_user_access_token(self, user):
|
|
access_token = {
|
|
'oauth_token':user.profile.goodreads_auth_token,
|
|
'oauth_token_secret':user.profile.goodreads_auth_secret
|
|
}
|
|
self.__load_access_token(access_token)
|
|
|
|
def __load_access_token(self, access_token):
|
|
token = oauth.Token(access_token['oauth_token'],
|
|
access_token['oauth_token_secret'])
|
|
self.access_token = token
|
|
self.client = oauth.Client(self.consumer, self.access_token)
|
|
|
|
def __clear_access_token(self):
|
|
self.access_token = None
|
|
self.consumer = oauth.Consumer(key=self.key, secret=self.secret)
|
|
|
|
def auth_user(self):
|
|
if self.is_authorized:
|
|
response, content = self.client.request(
|
|
'%s/api/auth_user' % GoodreadsClient.url,
|
|
'GET'
|
|
)
|
|
if int(response['status']) != httplib.OK:
|
|
raise GoodreadsException('Error authenticating Goodreads user ')
|
|
doc = ET.fromstring(content)
|
|
user = doc.find('user')
|
|
userid = user.get('id')
|
|
name = user.find('name').text
|
|
link = user.find('link').text
|
|
return({'userid':userid, 'name':name, 'link':link})
|
|
raise GoodreadsAuthorizationRequired('Attempt to access auth_user without authorization.')
|
|
|
|
def add_book(self, book_id=871441, shelf_name='to-read'):
|
|
# the book is: "Moby-Dick: A Pop-Up Book" 871441
|
|
body = urlencode({'name': 'to-read', 'book_id': book_id})
|
|
headers = {'content-type': 'application/x-www-form-urlencoded'}
|
|
response, content = self.client.request(
|
|
'%s/shelf/add_to_shelf.xml' % GoodreadsClient.url,
|
|
'POST',
|
|
body,
|
|
headers
|
|
)
|
|
# check that the new resource has been created
|
|
if int(response['status']) != httplib.CREATED:
|
|
logger.info('response, content: %s | %s ' % (response, content))
|
|
raise GoodreadsException('Cannot create resource: %s' % response['status'])
|
|
return True
|
|
|
|
def review_list_unauth(
|
|
self, user_id, shelf='all', page=1, sort=None, per_page=20, order='a', search=None, v=2
|
|
):
|
|
path = "/review/list.xml"
|
|
method = "GET"
|
|
params = filter_none({
|
|
'id':user_id, 'shelf':shelf, 'page':page, 'sort':sort,
|
|
'per_page':per_page, 'order':order,
|
|
'search':search, 'v':2
|
|
})
|
|
params["key"] = self.key
|
|
|
|
request_url = urljoin(GoodreadsClient.url, path)
|
|
logger.info("request_url:{0}, params: {1}".format(request_url, params))
|
|
|
|
more_pages = True
|
|
|
|
while more_pages:
|
|
r = request(method, request_url, params=params)
|
|
if r.status_code != httplib.OK:
|
|
raise GoodreadsException(
|
|
'Error in review_list_unauth, http status_code: {0}'.format(r.status_code)
|
|
)
|
|
doc = ET.fromstring(r.content)
|
|
# for the moment convert to a iterable of book data presented as dict
|
|
# -- one the way to paging through all results
|
|
reviews = doc.findall('reviews/review')
|
|
for review in reviews:
|
|
yield ({
|
|
'id':review.find('id').text,
|
|
'book': {
|
|
'id': safe_strip(review.find('book/id').text),
|
|
'isbn10': review.find('book/isbn').text,
|
|
'isbn13': review.find('book/isbn13').text,
|
|
'title': safe_strip(review.find('book/title').text),
|
|
'text_reviews_count': safe_strip(review.find('book/text_reviews_count').text),
|
|
'link': safe_strip(review.find('book/link').text),
|
|
'small_image_url': safe_strip(review.find('book/small_image_url').text),
|
|
'ratings_count': safe_strip(review.find('book/ratings_count').text),
|
|
'description': safe_strip(review.find('book/description').text)
|
|
}
|
|
})
|
|
if len(reviews) == 0:
|
|
more_pages = False
|
|
else:
|
|
params["page"] += 1
|
|
|
|
|
|
def review_list(self, user_id, shelf='all', page=1, sort=None, per_page=20, order='a', search=None, v=2):
|
|
"""have to account for situation in which we might need authorized access
|
|
for now: assume no need for auth
|
|
sort: available_for_swap, position, num_pages, votes, recommender, rating, shelves, format,
|
|
avg_rating, date_pub, isbn, comments, author, title, notes,
|
|
cover, isbn13, review, date_pub_edition,
|
|
condition, asin, date_started, owned, random, date_read, year_pub, read_count, date_added,
|
|
date_purchased, num_ratings, purchase_location, date_updated (optional)
|
|
"""
|
|
|
|
path = "/review/list.xml"
|
|
method = "GET"
|
|
params = filter_none({
|
|
'id':user_id, 'shelf':shelf, 'page':page, 'sort':sort,
|
|
'per_page':per_page, 'order':order, 'search':search, 'v':2
|
|
})
|
|
|
|
request_url = urljoin(GoodreadsClient.url, path)
|
|
|
|
more_pages = True
|
|
|
|
while more_pages:
|
|
|
|
response, content = self.client.request(
|
|
'%s?%s' % (request_url, urlencode(params)), method
|
|
)
|
|
if int(response['status']) != httplib.OK:
|
|
raise GoodreadsException('Error in review_list: ')
|
|
#logger.info(' %s' % (content))
|
|
doc = ET.fromstring(content)
|
|
# for the moment convert to a iterable of book data presented as dict
|
|
# -- one the way to paging through all results
|
|
reviews = doc.findall('reviews/review')
|
|
for review in reviews:
|
|
yield ({
|
|
'id':review.find('id').text,
|
|
'book': {
|
|
'id': safe_strip(review.find('book/id').text),
|
|
'isbn10': review.find('book/isbn').text,
|
|
'isbn13': review.find('book/isbn13').text,
|
|
'title': safe_strip(review.find('book/title').text),
|
|
'text_reviews_count': safe_strip(review.find('book/text_reviews_count').text),
|
|
'link': safe_strip(review.find('book/link').text),
|
|
'small_image_url': safe_strip(review.find('book/small_image_url').text),
|
|
'ratings_count': safe_strip(review.find('book/ratings_count').text),
|
|
'description': safe_strip(review.find('book/description').text)
|
|
}
|
|
})
|
|
if len(reviews) == 0:
|
|
more_pages = False
|
|
else:
|
|
params["page"] += 1
|
|
|
|
def shelves_list(self, user_id, page=1):
|
|
"""BUG to fix: should go through all the pages, not just page 1
|
|
"""
|
|
path = "/shelf/list.xml"
|
|
params = {'user_id':user_id, 'page':page}
|
|
params["key"] = self.key
|
|
method = "GET"
|
|
request_url = urljoin(GoodreadsClient.url, path)
|
|
|
|
r = request(method, request_url, params=params)
|
|
|
|
if r.status_code != httplib.OK:
|
|
raise GoodreadsException('Error in shelves_list: %s ' % (r.headers))
|
|
logger.info('headers: %s' % (r.headers))
|
|
doc = ET.fromstring(r.content)
|
|
shelves = doc.find('shelves')
|
|
# do a simple parsing to a dictionary
|
|
|
|
d = dict([(k, int(shelves.attrib[k])) for k in shelves.attrib])
|
|
d["user_shelves"] = [{
|
|
'name': shelf.find('name').text,
|
|
'book_count': int(shelf.find('book_count').text),
|
|
'description': shelf.find('description').text if shelf.find('description') else None,
|
|
'exclusive_flag': shelf.find('exclusive_flag').text == 'true'
|
|
} for shelf in shelves.findall('user_shelf')]
|
|
|
|
d["total_book_count"] = sum(
|
|
[shelf['book_count'] if shelf['exclusive_flag'] else 0 for shelf in d["user_shelves"]]
|
|
)
|
|
return d
|
|
|
|
|
|
def load_goodreads_shelf_into_wishlist(
|
|
user, shelf_name='all', goodreads_user_id=None, max_books=None, expected_number_of_books=None
|
|
):
|
|
"""
|
|
Load a specified Goodreads shelf (by default:
|
|
all the books from the Goodreads account associated with user)
|
|
"""
|
|
|
|
logger.info('''Entering load_goodreads_shelf_into_wishlist. user: %s, shelf_name: %s,
|
|
goodreads_user_id: %s, max_books: %s, expected_number_of_books: %s''',
|
|
user, shelf_name, goodreads_user_id, max_books, expected_number_of_books
|
|
)
|
|
gc = GoodreadsClient(
|
|
key=settings.GOODREADS_API_KEY, secret=settings.GOODREADS_API_SECRET, user=user
|
|
)
|
|
|
|
if goodreads_user_id is None:
|
|
if user.profile.goodreads_user_id is not None:
|
|
goodreads_user_id = user.profile.goodreads_user_id
|
|
else:
|
|
raise Exception("No Goodreads user_id is associated with user.")
|
|
|
|
logger.info('computed goodreads_user_id: %s ', goodreads_user_id)
|
|
|
|
for (i, review) in enumerate(islice(
|
|
gc.review_list(goodreads_user_id, shelf=shelf_name), max_books
|
|
)):
|
|
isbn = review["book"]["isbn10"] if review["book"]["isbn10"] is not None else review["book"]["isbn13"]
|
|
logger.info("%d %s %s %s ", i, review["book"]["title"], isbn, review["book"]["small_image_url"])
|
|
try:
|
|
edition = bookloader.add_by_isbn(isbn)
|
|
if not edition:
|
|
continue
|
|
# save the goodreads id since we know it at this point
|
|
# we need to extract it from the link since review['id']
|
|
# is the id for a users review, not the book
|
|
link = review['book']['link']
|
|
match = re.search(r'/show/(\d+)', link)
|
|
if match:
|
|
models.Identifier.get_or_add(
|
|
type='gdrd', value=match.group(1), edition=edition, work=edition.work
|
|
)
|
|
user.wishlist.add_work(edition.work, 'goodreads', notify=True)
|
|
logger.info("Work with isbn %s added to wishlist.", isbn)
|
|
else:
|
|
logger.error("unable to extract goodreads id from %s", link)
|
|
if edition.new:
|
|
tasks.populate_edition.delay(edition.isbn_13)
|
|
|
|
except Exception as e:
|
|
logger.info("Exception adding ISBN %s: %s", isbn, e)
|
|
|
|
logger.info(
|
|
'''Leaving load_goodreads_shelf_into_wishlist. Length of wishlist for user %s is %s''',
|
|
user,
|
|
len(user.wishlist.works.all())
|
|
)
|
|
|
|
return user.wishlist
|