2011-12-05 05:56:24 +00:00
|
|
|
import re
|
2011-10-25 01:09:30 +00:00
|
|
|
import json
|
|
|
|
import logging
|
2011-12-05 05:56:24 +00:00
|
|
|
from itertools import islice
|
2011-10-25 01:09:30 +00:00
|
|
|
from urllib import urlencode
|
2011-12-05 05:56:24 +00:00
|
|
|
from urlparse import urlparse, urlunparse, urljoin
|
|
|
|
|
2011-10-25 01:09:30 +00:00
|
|
|
import httplib
|
2011-12-05 05:56:24 +00:00
|
|
|
import oauth2 as oauth
|
|
|
|
from requests import request
|
|
|
|
from xml.etree import ElementTree as ET
|
2011-12-22 06:17:16 +00:00
|
|
|
import django.utils.encoding
|
2011-12-05 05:56:24 +00:00
|
|
|
|
2011-11-01 00:26:05 +00:00
|
|
|
import regluit.core
|
2011-12-05 05:56:24 +00:00
|
|
|
from regluit.core import bookloader
|
2012-01-17 00:34:35 +00:00
|
|
|
from regluit.core import models
|
2011-11-01 00:26:05 +00:00
|
|
|
|
2011-10-25 01:09:30 +00:00
|
|
|
# import parse_qsl from cgi if it doesn't exist in urlparse
|
|
|
|
try:
|
|
|
|
from urlparse import parse_qsl
|
|
|
|
except:
|
|
|
|
from cgi import parse_qsl
|
|
|
|
|
|
|
|
from django.conf import settings
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
# QUESTION: should the request_token, access_token be part of the state of the client?
|
|
|
|
# for simplicity for now, I will make them part of the state of GoodReadsClient
|
|
|
|
|
|
|
|
class GoodreadsException(Exception):
|
|
|
|
pass
|
|
|
|
|
|
|
|
class GoodreadsAuthorizationRequired(GoodreadsException):
|
|
|
|
pass
|
|
|
|
|
2011-10-25 21:20:10 +00:00
|
|
|
def filter_none(d):
|
|
|
|
d2 = {}
|
|
|
|
for (k,v) in d.iteritems():
|
|
|
|
if v is not None:
|
|
|
|
d2[k] = v
|
|
|
|
return d2
|
|
|
|
|
2012-04-13 21:53:18 +00:00
|
|
|
def safe_strip(a_string):
|
|
|
|
if a_string:
|
|
|
|
return a_string.strip()
|
|
|
|
else:
|
|
|
|
return ''
|
|
|
|
|
2011-10-25 01:09:30 +00:00
|
|
|
class GoodreadsClient(object):
|
|
|
|
|
|
|
|
url = 'http://www.goodreads.com'
|
2012-04-24 21:37:33 +00:00
|
|
|
request_token_url = urljoin(url,'oauth/request_token')
|
|
|
|
authorize_url = urljoin(url, '/oauth/authorize')
|
|
|
|
access_token_url = urljoin(url,'/oauth/access_token')
|
2011-10-25 01:09:30 +00:00
|
|
|
|
2011-11-04 21:04:32 +00:00
|
|
|
def __init__(self,key,secret,user=None, access_token=None):
|
2011-10-25 01:09:30 +00:00
|
|
|
self.key = key
|
|
|
|
self.secret = secret
|
|
|
|
self.consumer = oauth.Consumer(key=self.key,
|
|
|
|
secret=self.secret)
|
|
|
|
|
|
|
|
self.client = oauth.Client(self.consumer)
|
2011-11-18 00:45:26 +00:00
|
|
|
#self.unauth_client = None
|
2011-10-25 01:09:30 +00:00
|
|
|
|
|
|
|
if access_token is not None:
|
|
|
|
self.__load_access_token(access_token)
|
|
|
|
else:
|
|
|
|
self.access_token = None
|
|
|
|
|
2011-11-04 21:04:32 +00:00
|
|
|
if user is not None:
|
|
|
|
self.load_user_access_token(user)
|
|
|
|
|
2011-10-25 01:09:30 +00:00
|
|
|
@property
|
|
|
|
def is_authorized(self):
|
|
|
|
return (self.access_token is not None)
|
2011-12-03 03:16:11 +00:00
|
|
|
|
2011-10-25 01:09:30 +00:00
|
|
|
def begin_authorization (self, callback_url=None):
|
|
|
|
# get request token
|
|
|
|
response, content = self.client.request(GoodreadsClient.request_token_url, 'GET')
|
|
|
|
|
|
|
|
if int(response['status']) != httplib.OK:
|
|
|
|
raise Exception('Invalid response: %s' % response['status'])
|
|
|
|
|
|
|
|
request_token = dict(parse_qsl(content))
|
|
|
|
|
|
|
|
q = {'oauth_token':request_token['oauth_token']}
|
|
|
|
if callback_url is not None:
|
|
|
|
q['oauth_callback'] = callback_url
|
|
|
|
|
|
|
|
authorize_link = GoodreadsClient.authorize_url + '?' + urlencode(q)
|
|
|
|
return (authorize_link, request_token)
|
|
|
|
|
|
|
|
def complete_authorization(self, request_token):
|
|
|
|
token = oauth.Token(request_token['oauth_token'],
|
|
|
|
request_token['oauth_token_secret'])
|
|
|
|
|
|
|
|
self.client = oauth.Client(self.consumer, token)
|
|
|
|
response, content = self.client.request(GoodreadsClient.access_token_url, 'POST')
|
|
|
|
if int(response['status']) != httplib.OK:
|
|
|
|
raise Exception('Invalid response: %s' % response['status'])
|
|
|
|
|
|
|
|
access_token_raw = dict(parse_qsl(content))
|
|
|
|
self.__load_access_token(access_token_raw)
|
|
|
|
return access_token_raw
|
2011-11-04 21:04:32 +00:00
|
|
|
|
|
|
|
def load_user_access_token(self,user):
|
|
|
|
access_token = {'oauth_token':user.profile.goodreads_auth_token,
|
|
|
|
'oauth_token_secret':user.profile.goodreads_auth_secret}
|
|
|
|
self.__load_access_token(access_token)
|
2011-10-25 01:09:30 +00:00
|
|
|
|
|
|
|
def __load_access_token(self, access_token):
|
|
|
|
token = oauth.Token(access_token['oauth_token'],
|
|
|
|
access_token['oauth_token_secret'])
|
|
|
|
self.access_token = token
|
|
|
|
self.client = oauth.Client(self.consumer, self.access_token)
|
|
|
|
|
|
|
|
def __clear_access_token(self):
|
|
|
|
self.access_token = None
|
|
|
|
self.consumer = oauth.Consumer(key=self.key,
|
|
|
|
secret=self.secret)
|
|
|
|
|
|
|
|
def auth_user(self):
|
|
|
|
if self.is_authorized:
|
|
|
|
response, content = self.client.request('%s/api/auth_user' % GoodreadsClient.url,
|
|
|
|
'GET')
|
|
|
|
if int(response['status']) != httplib.OK:
|
2011-12-22 06:17:16 +00:00
|
|
|
raise GoodreadsException('Error authenticating Goodreads user ' )
|
2011-10-25 01:09:30 +00:00
|
|
|
else:
|
2011-12-22 16:06:11 +00:00
|
|
|
doc = ET.fromstring(content)
|
2011-10-25 01:09:30 +00:00
|
|
|
user = doc.find('user')
|
|
|
|
userid = user.get('id')
|
|
|
|
name = user.find('name').text
|
|
|
|
link = user.find('link').text
|
|
|
|
return({'userid':userid, 'name':name, 'link':link})
|
|
|
|
else:
|
|
|
|
raise GoodreadsAuthorizationRequired('Attempt to access auth_user without authorization.')
|
|
|
|
|
|
|
|
def add_book(self, book_id=871441, shelf_name='to-read'):
|
|
|
|
# the book is: "Moby-Dick: A Pop-Up Book" 871441
|
|
|
|
body = urlencode({'name': 'to-read', 'book_id': book_id})
|
|
|
|
headers = {'content-type': 'application/x-www-form-urlencoded'}
|
|
|
|
response, content = self.client.request('%s/shelf/add_to_shelf.xml' % GoodreadsClient.url,
|
|
|
|
'POST', body, headers)
|
|
|
|
# check that the new resource has been created
|
|
|
|
if int(response['status']) != httplib.CREATED:
|
|
|
|
raise GoodreadsException('Cannot create resource: %s' % response['status'])
|
|
|
|
logger.info('response,content: %s | %s ' % (response,content))
|
|
|
|
else:
|
|
|
|
return True
|
|
|
|
|
2011-11-18 14:14:33 +00:00
|
|
|
def review_list_unauth(self, user_id, shelf='all',page=1,sort=None,per_page=20,order='a',search=None,v=2):
|
|
|
|
path="/review/list.xml"
|
|
|
|
method = "GET"
|
|
|
|
params = filter_none({'id':user_id,'shelf':shelf,'page':page,'sort':sort,'per_page':per_page,'order':order,
|
|
|
|
'search':search, 'v':2})
|
|
|
|
params["key"] = self.key
|
|
|
|
|
|
|
|
request_url = urljoin(GoodreadsClient.url, path)
|
2012-05-29 22:01:48 +00:00
|
|
|
logger.info("request_url:{0}, params: {1}".format(request_url, params))
|
2011-11-18 14:14:33 +00:00
|
|
|
|
|
|
|
more_pages = True
|
|
|
|
|
|
|
|
while (more_pages):
|
|
|
|
|
|
|
|
r = request(method,request_url,params=params)
|
2011-12-22 06:17:16 +00:00
|
|
|
# print request_url, params
|
2011-11-18 14:14:33 +00:00
|
|
|
if r.status_code != httplib.OK:
|
2012-05-29 19:47:38 +00:00
|
|
|
raise GoodreadsException('Error in review_list_unauth, http status_code: {0}'.format(r.status_code))
|
2011-11-18 14:14:33 +00:00
|
|
|
else:
|
2011-12-20 15:31:15 +00:00
|
|
|
doc = ET.fromstring(r.content.encode('utf-8'))
|
2011-11-18 14:14:33 +00:00
|
|
|
# for the moment convert to a iterable of book data presented as dict -- one the way to paging through all results
|
|
|
|
reviews = doc.findall('reviews/review')
|
|
|
|
for review in reviews:
|
|
|
|
yield ({'id':review.find('id').text,
|
2012-04-13 21:53:18 +00:00
|
|
|
'book': {'id': safe_strip(review.find('book/id').text),
|
|
|
|
'isbn10': review.find('book/isbn').text,
|
|
|
|
'isbn13': review.find('book/isbn13').text,
|
|
|
|
'title': safe_strip(review.find('book/title').text),
|
|
|
|
'text_reviews_count': safe_strip(review.find('book/text_reviews_count').text),
|
|
|
|
'link': safe_strip(review.find('book/link').text),
|
|
|
|
'small_image_url': safe_strip(review.find('book/small_image_url').text),
|
|
|
|
'ratings_count': safe_strip(review.find('book/ratings_count').text),
|
|
|
|
'description': safe_strip(review.find('book/description').text)}
|
2011-11-18 14:14:33 +00:00
|
|
|
})
|
|
|
|
if len(reviews) == 0:
|
|
|
|
more_pages = False
|
|
|
|
else:
|
|
|
|
params["page"] += 1
|
2012-04-13 21:53:18 +00:00
|
|
|
|
|
|
|
|
2011-11-04 21:18:06 +00:00
|
|
|
def review_list(self, user_id, shelf='all',page=1,sort=None,per_page=20,order='a',search=None,v=2):
|
|
|
|
"""have to account for situation in which we might need authorized access
|
|
|
|
for now: assume no need for auth
|
|
|
|
sort: available_for_swap, position, num_pages, votes, recommender, rating, shelves, format,
|
|
|
|
avg_rating, date_pub, isbn, comments, author, title, notes, cover, isbn13, review, date_pub_edition,
|
|
|
|
condition, asin, date_started, owned, random, date_read, year_pub, read_count, date_added,
|
|
|
|
date_purchased, num_ratings, purchase_location, date_updated (optional)
|
|
|
|
"""
|
|
|
|
|
2011-11-04 21:04:32 +00:00
|
|
|
path="/review/list.xml"
|
|
|
|
method = "GET"
|
|
|
|
params = filter_none({'id':user_id,'shelf':shelf,'page':page,'sort':sort,'per_page':per_page,'order':order,
|
|
|
|
'search':search, 'v':2})
|
|
|
|
|
|
|
|
request_url = urljoin(GoodreadsClient.url, path)
|
|
|
|
|
|
|
|
more_pages = True
|
|
|
|
|
|
|
|
while (more_pages):
|
|
|
|
|
|
|
|
response, content = self.client.request('%s?%s' % (request_url, urlencode(params)),
|
|
|
|
method)
|
|
|
|
if int(response['status']) != httplib.OK:
|
2011-12-22 06:17:16 +00:00
|
|
|
raise GoodreadsException('Error in review_list: ' )
|
2011-11-04 21:04:32 +00:00
|
|
|
else:
|
2011-12-22 16:06:11 +00:00
|
|
|
#logger.info(' %s' % (content))
|
|
|
|
doc = ET.fromstring(content)
|
2011-11-04 21:04:32 +00:00
|
|
|
# for the moment convert to a iterable of book data presented as dict -- one the way to paging through all results
|
|
|
|
reviews = doc.findall('reviews/review')
|
|
|
|
for review in reviews:
|
|
|
|
yield ({'id':review.find('id').text,
|
|
|
|
'book': {'id': review.find('book/id').text.strip(),
|
|
|
|
'isbn10':review.find('book/isbn').text,
|
|
|
|
'isbn13':review.find('book/isbn13').text,
|
|
|
|
'title':review.find('book/title').text.strip(),
|
|
|
|
'text_reviews_count':review.find('book/text_reviews_count').text.strip(),
|
|
|
|
'link':review.find('book/link').text.strip(),
|
|
|
|
'small_image_url':review.find('book/small_image_url').text.strip(),
|
|
|
|
'ratings_count':review.find('book/ratings_count').text.strip(),
|
|
|
|
'description':review.find('book/description').text.strip()}
|
|
|
|
})
|
|
|
|
if len(reviews) == 0:
|
|
|
|
more_pages = False
|
|
|
|
else:
|
|
|
|
params["page"] += 1
|
2011-10-25 14:10:59 +00:00
|
|
|
|
2011-10-25 01:09:30 +00:00
|
|
|
def shelves_list(self,user_id,page=1):
|
2011-11-18 00:45:26 +00:00
|
|
|
"""BUG to fix: should go through all the pages, not just page 1
|
|
|
|
"""
|
2011-10-25 01:09:30 +00:00
|
|
|
path = "/shelf/list.xml"
|
|
|
|
params = {'user_id':user_id, 'page':page}
|
|
|
|
params["key"] = self.key
|
|
|
|
method = "GET"
|
|
|
|
request_url = urljoin(GoodreadsClient.url, path)
|
|
|
|
|
|
|
|
r = request(method,request_url,params=params)
|
|
|
|
|
|
|
|
if r.status_code != httplib.OK:
|
2011-12-22 06:17:16 +00:00
|
|
|
raise GoodreadsException('Error in shelves_list: %s ' % (r.headers))
|
2011-10-25 01:09:30 +00:00
|
|
|
else:
|
2011-12-22 06:17:16 +00:00
|
|
|
logger.info('headers: %s' % (r.headers))
|
2011-12-20 15:31:15 +00:00
|
|
|
doc = ET.fromstring(r.content.encode('utf-8'))
|
2011-10-25 01:09:30 +00:00
|
|
|
shelves = doc.find('shelves')
|
|
|
|
# do a simple parsing to a dictionary
|
2011-10-25 14:10:59 +00:00
|
|
|
|
|
|
|
d = dict( [ (k,int(shelves.attrib[k])) for k in shelves.attrib ] )
|
2011-10-25 01:09:30 +00:00
|
|
|
d["user_shelves"] = [{'name':shelf.find('name').text,
|
|
|
|
'book_count':int(shelf.find('book_count').text),
|
2011-11-01 00:26:05 +00:00
|
|
|
'description':shelf.find('description').text if shelf.find('description').attrib['nil'] != 'true' else None,
|
|
|
|
'exclusive_flag':shelf.find('exclusive_flag').text} \
|
2011-10-25 01:09:30 +00:00
|
|
|
for shelf in shelves.findall('user_shelf')]
|
2011-11-01 00:26:05 +00:00
|
|
|
|
|
|
|
d["total_book_count"] = sum([shelf['book_count'] if shelf['exclusive_flag'] == 'true' else 0 for shelf in d["user_shelves"]])
|
2011-10-25 01:09:30 +00:00
|
|
|
return d
|
|
|
|
|
|
|
|
|
2011-12-03 03:16:11 +00:00
|
|
|
def load_goodreads_shelf_into_wishlist(user, shelf_name='all', goodreads_user_id=None, max_books=None, expected_number_of_books=None):
|
2011-11-01 00:26:05 +00:00
|
|
|
"""
|
|
|
|
Load a specified Goodreads shelf (by default: all the books from the Goodreads account associated with user)
|
|
|
|
"""
|
2011-10-25 01:09:30 +00:00
|
|
|
|
2011-11-10 23:14:33 +00:00
|
|
|
logger.info('Entering load_goodreads_shelf_into_wishlist. user: %s, shelf_name: %s, goodreads_user_id: %s, max_books: %s, expected_number_of_books: %s',
|
|
|
|
user, shelf_name, goodreads_user_id, max_books, expected_number_of_books)
|
2011-11-04 21:18:06 +00:00
|
|
|
gc = GoodreadsClient(key=settings.GOODREADS_API_KEY, secret=settings.GOODREADS_API_SECRET, user=user)
|
2011-11-01 00:26:05 +00:00
|
|
|
|
|
|
|
if goodreads_user_id is None:
|
|
|
|
if user.profile.goodreads_user_id is not None:
|
|
|
|
goodreads_user_id = user.profile.goodreads_user_id
|
|
|
|
else:
|
|
|
|
raise Exception("No Goodreads user_id is associated with user.")
|
|
|
|
|
2011-11-10 15:36:17 +00:00
|
|
|
logger.info('computed goodreads_user_id: %s ', goodreads_user_id)
|
|
|
|
|
2011-11-01 00:26:05 +00:00
|
|
|
for (i, review) in enumerate(islice(gc.review_list(goodreads_user_id,shelf=shelf_name),max_books)):
|
|
|
|
isbn = review["book"]["isbn10"] if review["book"]["isbn10"] is not None else review["book"]["isbn13"]
|
|
|
|
logger.info("%d %s %s %s ", i, review["book"]["title"], isbn, review["book"]["small_image_url"])
|
|
|
|
try:
|
|
|
|
edition = bookloader.add_by_isbn(isbn)
|
2011-12-22 19:29:46 +00:00
|
|
|
if not edition:
|
|
|
|
continue
|
2011-12-03 03:16:11 +00:00
|
|
|
# save the goodreads id since we know it at this point
|
2011-12-05 05:56:24 +00:00
|
|
|
# we need to extract it from the link since review['id']
|
|
|
|
# is the id for a users review, not the book
|
|
|
|
link = review['book']['link']
|
|
|
|
match = re.search('/show/(\d+)', link)
|
|
|
|
if match:
|
2012-01-17 00:34:35 +00:00
|
|
|
identifier= models.Identifier.get_or_add(type = 'gdrd', value = match.group(1), edition = edition, work = edition.work)
|
2011-12-19 06:33:13 +00:00
|
|
|
user.wishlist.add_work(edition.work, 'goodreads')
|
|
|
|
logger.info("Work with isbn %s added to wishlist.", isbn)
|
2011-12-05 05:56:24 +00:00
|
|
|
else:
|
|
|
|
logger.error("unable to extract goodreads id from %s", link)
|
2011-12-22 19:29:46 +00:00
|
|
|
if edition.new:
|
2012-02-16 18:19:36 +00:00
|
|
|
regluit.core.tasks.populate_edition.delay(edition.isbn_13)
|
2011-12-03 03:16:11 +00:00
|
|
|
|
2011-11-01 00:26:05 +00:00
|
|
|
except Exception, e:
|
2011-11-10 15:36:17 +00:00
|
|
|
logger.info ("Exception adding ISBN %s: %s", isbn, e)
|
2011-11-01 00:26:05 +00:00
|
|
|
|
2011-11-10 15:36:17 +00:00
|
|
|
logger.info('Leaving load_goodreads_shelf_into_wishlist. Length of wishlist for user %s is %s', user, len(user.wishlist.works.all()))
|
2011-12-03 03:16:11 +00:00
|
|
|
|
|
|
|
return user.wishlist
|