regluit/core/goodreads.py

import json
import logging
from xml.etree import ElementTree as ET
from requests import request
import oauth2 as oauth
from urlparse import urlparse, urlunparse, urljoin
from urllib import urlencode
import httplib
from itertools import islice
from regluit.core import bookloader
import regluit.core

# import parse_qsl from cgi if it doesn't exist in urlparse
try:
  from urlparse import parse_qsl
except:
  from cgi import parse_qsl

from django.conf import settings

logger = logging.getLogger(__name__)

# QUESTION: should the request_token, access_token be part of the state of the client?
# for simplicity for now, I will make them part of the state of GoodReadsClient

class GoodreadsException(Exception):
    pass

class GoodreadsAuthorizationRequired(GoodreadsException):
    pass

def filter_none(d):
    d2 = {}
    for (k,v) in d.iteritems():
        if v is not None:
            d2[k] = v
    return d2

class GoodreadsClient(object):
    
    url = 'http://www.goodreads.com'
    request_token_url = urljoin(url,'oauth/request_token/')
    authorize_url = urljoin(url, '/oauth/authorize/')
    access_token_url = urljoin(url,'/oauth/access_token/')  
  
    def __init__(self,key,secret,access_token=None):
        self.key = key
        self.secret = secret
        self.consumer = oauth.Consumer(key=self.key,
                              secret=self.secret)
        
        self.client = oauth.Client(self.consumer)
        self.unauth_client = None
        
        if access_token is not None:
          self.__load_access_token(access_token)
        else:
          self.access_token = None
          
    @property     
    def is_authorized(self):
        return (self.access_token is not None)
    def begin_authorization (self, callback_url=None):
        # get request token
        response, content = self.client.request(GoodreadsClient.request_token_url, 'GET')
        
        if int(response['status']) != httplib.OK:
          raise Exception('Invalid response: %s' % response['status'])
        
        request_token = dict(parse_qsl(content))
        
        q = {'oauth_token':request_token['oauth_token']}
        if callback_url is not None:
          q['oauth_callback'] = callback_url
        
        authorize_link = GoodreadsClient.authorize_url + '?' + urlencode(q)
        return (authorize_link, request_token)
        
    def complete_authorization(self, request_token):
      token = oauth.Token(request_token['oauth_token'],
                          request_token['oauth_token_secret'])
      
      self.client = oauth.Client(self.consumer, token)
      response, content = self.client.request(GoodreadsClient.access_token_url, 'POST')
      if int(response['status']) != httplib.OK:
          raise Exception('Invalid response: %s' % response['status'])
      
      access_token_raw = dict(parse_qsl(content))
      self.__load_access_token(access_token_raw)
      return access_token_raw
      
    def __load_access_token(self, access_token):
        token = oauth.Token(access_token['oauth_token'],
                            access_token['oauth_token_secret'])
        self.access_token = token
        self.client = oauth.Client(self.consumer, self.access_token)        
  
    def __clear_access_token(self):
        self.access_token = None
        self.consumer = oauth.Consumer(key=self.key,
                              secret=self.secret)

    def auth_user(self):
        if self.is_authorized:
            response, content = self.client.request('%s/api/auth_user' % GoodreadsClient.url,
                                         'GET')
            if int(response['status']) != httplib.OK:
                raise GoodreadsException('Error authenticating Goodreads user: %s ' % response)
            else:
                doc = ET.fromstring(content)
                user = doc.find('user')
                userid = user.get('id')
                name = user.find('name').text
                link = user.find('link').text
                return({'userid':userid, 'name':name, 'link':link})
        else:
            raise GoodreadsAuthorizationRequired('Attempt to access auth_user without authorization.')
      
    def add_book(self, book_id=871441, shelf_name='to-read'):
      # the book is: "Moby-Dick: A Pop-Up Book" 871441
      body = urlencode({'name': 'to-read', 'book_id': book_id})
      headers = {'content-type': 'application/x-www-form-urlencoded'}
      response, content = self.client.request('%s/shelf/add_to_shelf.xml' % GoodreadsClient.url,
                                         'POST', body, headers)
      # check that the new resource has been created
      if int(response['status']) != httplib.CREATED:
          raise GoodreadsException('Cannot create resource: %s' % response['status'])
          logger.info('response,content: %s | %s ' % (response,content))
      else:
          return True
        
    def review_list(self, user_id, shelf='all',page=1,sort=None,per_page=20,order='a',search=None,v=2):
        """have to account for situation in which we might need authorized access
        for now:  assume no need for auth
        sort: available_for_swap, position, num_pages, votes, recommender, rating, shelves, format,
        avg_rating, date_pub, isbn, comments, author, title, notes, cover, isbn13, review, date_pub_edition,
        condition, asin, date_started, owned, random, date_read, year_pub, read_count, date_added,
        date_purchased, num_ratings, purchase_location, date_updated (optional)
        """
        
        path="/review/list/"
        params = filter_none({'id':user_id,'shelf':shelf,'page':page,'sort':sort,'per_page':per_page,'order':order,
                  'search':search, 'v':2})
        params["key"] = self.key
        method = "GET"
        
        request_url = urljoin(GoodreadsClient.url, path)
        
        # loop through all the pages starting with page
        
        more_pages = True
        
        while (more_pages):
        
            logger.info('request to review_list: %s %s', request_url, params)
            r = request(method,request_url,params=params)
            
            if r.status_code != httplib.OK:
                 raise GoodreadsException('Error in review_list: %s %s ' % (r.headers, r.content))
            else:
                #logger.info('headers, content: %s | %s ' % (r.headers,r.content))
                doc = ET.fromstring(r.content)
                # for the moment convert to a iterable of book data presented as dict -- one the way to paging through all results
                reviews = doc.findall('reviews/review')
                for review in reviews:
                    yield ({'id':review.find('id').text,
                            'book': {'id': review.find('book/id').text.strip(),
                                     'isbn10':review.find('book/isbn').text,
                                     'isbn13':review.find('book/isbn13').text,
                                     'title':review.find('book/title').text.strip(),
                                     'text_reviews_count':review.find('book/text_reviews_count').text.strip(),
                                     'link':review.find('book/link').text.strip(),
                                     'small_image_url':review.find('book/small_image_url').text.strip(),
                                     'ratings_count':review.find('book/ratings_count').text.strip(),
                                     'description':review.find('book/description').text.strip()}
                            })
                if len(reviews) == 0:
                    more_pages = False
                else:
                    params["page"] += 1

        
    def shelves_list(self,user_id,page=1):
        path = "/shelf/list.xml"
        params = {'user_id':user_id, 'page':page}
        params["key"] = self.key
        method = "GET"
        request_url = urljoin(GoodreadsClient.url, path)
        
        r = request(method,request_url,params=params)
        
        if r.status_code != httplib.OK:
            logger.info('headers, content: %s | %s ' % (r.headers,r.content))
            raise GoodreadsException('Error in shelves_list: %s %s ' % (r.headers, r.content))
        else:
            logger.info('headers, content: %s | %s ' % (r.headers,r.content))
            doc = ET.fromstring(r.content)
            shelves = doc.find('shelves')
            # do a simple parsing to a dictionary
            
            d = dict(  [ (k,int(shelves.attrib[k])) for k in shelves.attrib ]  )
            d["user_shelves"] = [{'name':shelf.find('name').text,
                                  'book_count':int(shelf.find('book_count').text),
                                  'description':shelf.find('description').text if shelf.find('description').attrib['nil'] != 'true' else None,
                                  'exclusive_flag':shelf.find('exclusive_flag').text} \
                for shelf in shelves.findall('user_shelf')]
  
            d["total_book_count"] = sum([shelf['book_count'] if shelf['exclusive_flag'] == 'true' else 0 for shelf in d["user_shelves"]])
            return d
        
        
def load_goodreads_shelf_into_wishlist(user, shelf_name='all', goodreads_user_id=None, max_books=None):
    """
    Load a specified Goodreads shelf (by default:  all the books from the Goodreads account associated with user)
    """
    
    gc = GoodreadsClient(key=settings.GOODREADS_API_KEY, secret=settings.GOODREADS_API_SECRET)
        
    if goodreads_user_id is None:
      if user.profile.goodreads_user_id is not None:
        goodreads_user_id = user.profile.goodreads_user_id
      else:
        raise Exception("No Goodreads user_id is associated with user.")
        
    for (i, review) in enumerate(islice(gc.review_list(goodreads_user_id,shelf=shelf_name),max_books)):
        isbn = review["book"]["isbn10"] if review["book"]["isbn10"] is not None else review["book"]["isbn13"]
        logger.info("%d %s %s %s ", i, review["book"]["title"], isbn, review["book"]["small_image_url"])
        try:
            edition = bookloader.add_by_isbn(isbn)
            # let's not trigger too much traffic to Google books for now
            # regluit.core.tasks.add_related.delay(isbn)
            user.wishlist.works.add(edition.work)
            logger.info("Work with isbn %s added to wishlist.", isbn)
        except Exception, e:
            logger.info ("error adding ISBN %s: %s", isbn, e)
First take on Goodreads integration. Can get userid, username, and display shelves info Using sessions to store user info right now. 2011-10-25 01:09:30 +00:00			`import json`
			`import logging`
			`from xml.etree import ElementTree as ET`
			`from requests import request`
			`import oauth2 as oauth`
			`from urlparse import urlparse, urlunparse, urljoin`
			`from urllib import urlencode`
			`import httplib`
Integrated Goodreads shelf loading to wishlist into UI 2011-11-01 00:26:05 +00:00			`from itertools import islice`
			`from regluit.core import bookloader`
			`import regluit.core`

First take on Goodreads integration. Can get userid, username, and display shelves info Using sessions to store user info right now. 2011-10-25 01:09:30 +00:00			`# import parse_qsl from cgi if it doesn't exist in urlparse`
			`try:`
			`from urlparse import parse_qsl`
			`except:`
			`from cgi import parse_qsl`

			`from django.conf import settings`

			`logger = logging.getLogger(__name__)`

			`# QUESTION: should the request_token, access_token be part of the state of the client?`
			`# for simplicity for now, I will make them part of the state of GoodReadsClient`

			`class GoodreadsException(Exception):`
			`pass`

			`class GoodreadsAuthorizationRequired(GoodreadsException):`
			`pass`

First pass at reading list of books from user 2011-10-25 21:20:10 +00:00			`def filter_none(d):`
			`d2 = {}`
			`for (k,v) in d.iteritems():`
			`if v is not None:`
			`d2[k] = v`
			`return d2`

First take on Goodreads integration. Can get userid, username, and display shelves info Using sessions to store user info right now. 2011-10-25 01:09:30 +00:00			`class GoodreadsClient(object):`

			`url = 'http://www.goodreads.com'`
			`request_token_url = urljoin(url,'oauth/request_token/')`
			`authorize_url = urljoin(url, '/oauth/authorize/')`
			`access_token_url = urljoin(url,'/oauth/access_token/')`

			`def __init__(self,key,secret,access_token=None):`
			`self.key = key`
			`self.secret = secret`
			`self.consumer = oauth.Consumer(key=self.key,`
			`secret=self.secret)`

			`self.client = oauth.Client(self.consumer)`
			`self.unauth_client = None`

			`if access_token is not None:`
			`self.__load_access_token(access_token)`
			`else:`
			`self.access_token = None`

			`@property`
			`def is_authorized(self):`
			`return (self.access_token is not None)`
			`def begin_authorization (self, callback_url=None):`
			`# get request token`
			`response, content = self.client.request(GoodreadsClient.request_token_url, 'GET')`

			`if int(response['status']) != httplib.OK:`
			`raise Exception('Invalid response: %s' % response['status'])`

			`request_token = dict(parse_qsl(content))`

			`q = {'oauth_token':request_token['oauth_token']}`
			`if callback_url is not None:`
			`q['oauth_callback'] = callback_url`

			`authorize_link = GoodreadsClient.authorize_url + '?' + urlencode(q)`
			`return (authorize_link, request_token)`

			`def complete_authorization(self, request_token):`
			`token = oauth.Token(request_token['oauth_token'],`
			`request_token['oauth_token_secret'])`

			`self.client = oauth.Client(self.consumer, token)`
			`response, content = self.client.request(GoodreadsClient.access_token_url, 'POST')`
			`if int(response['status']) != httplib.OK:`
			`raise Exception('Invalid response: %s' % response['status'])`

			`access_token_raw = dict(parse_qsl(content))`
			`self.__load_access_token(access_token_raw)`
			`return access_token_raw`

			`def __load_access_token(self, access_token):`
			`token = oauth.Token(access_token['oauth_token'],`
			`access_token['oauth_token_secret'])`
			`self.access_token = token`
			`self.client = oauth.Client(self.consumer, self.access_token)`

			`def __clear_access_token(self):`
			`self.access_token = None`
			`self.consumer = oauth.Consumer(key=self.key,`
			`secret=self.secret)`

			`def auth_user(self):`
			`if self.is_authorized:`
			`response, content = self.client.request('%s/api/auth_user' % GoodreadsClient.url,`
			`'GET')`
			`if int(response['status']) != httplib.OK:`
			`raise GoodreadsException('Error authenticating Goodreads user: %s ' % response)`
			`else:`
			`doc = ET.fromstring(content)`
			`user = doc.find('user')`
			`userid = user.get('id')`
			`name = user.find('name').text`
			`link = user.find('link').text`
			`return({'userid':userid, 'name':name, 'link':link})`
			`else:`
			`raise GoodreadsAuthorizationRequired('Attempt to access auth_user without authorization.')`

			`def add_book(self, book_id=871441, shelf_name='to-read'):`
			`# the book is: "Moby-Dick: A Pop-Up Book" 871441`
			`body = urlencode({'name': 'to-read', 'book_id': book_id})`
			`headers = {'content-type': 'application/x-www-form-urlencoded'}`
			`response, content = self.client.request('%s/shelf/add_to_shelf.xml' % GoodreadsClient.url,`
			`'POST', body, headers)`
			`# check that the new resource has been created`
			`if int(response['status']) != httplib.CREATED:`
			`raise GoodreadsException('Cannot create resource: %s' % response['status'])`
			`logger.info('response,content: %s \| %s ' % (response,content))`
			`else:`
			`return True`

First pass at reading list of books from user 2011-10-25 21:20:10 +00:00			`def review_list(self, user_id, shelf='all',page=1,sort=None,per_page=20,order='a',search=None,v=2):`
In the works: a method to retrieve books from a shelf 2011-10-25 14:10:59 +00:00			`"""have to account for situation in which we might need authorized access`
First pass at reading list of books from user 2011-10-25 21:20:10 +00:00			`for now: assume no need for auth`
In the works: a method to retrieve books from a shelf 2011-10-25 14:10:59 +00:00			`sort: available_for_swap, position, num_pages, votes, recommender, rating, shelves, format,`
			`avg_rating, date_pub, isbn, comments, author, title, notes, cover, isbn13, review, date_pub_edition,`
			`condition, asin, date_started, owned, random, date_read, year_pub, read_count, date_added,`
			`date_purchased, num_ratings, purchase_location, date_updated (optional)`
			`"""`
First pass at reading list of books from user 2011-10-25 21:20:10 +00:00
In the works: a method to retrieve books from a shelf 2011-10-25 14:10:59 +00:00			`path="/review/list/"`
First pass at reading list of books from user 2011-10-25 21:20:10 +00:00			`params = filter_none({'id':user_id,'shelf':shelf,'page':page,'sort':sort,'per_page':per_page,'order':order,`
			`'search':search, 'v':2})`
			`params["key"] = self.key`
			`method = "GET"`

			`request_url = urljoin(GoodreadsClient.url, path)`

Displaying some of the books from user shelf. 2011-10-25 23:07:44 +00:00			`# loop through all the pages starting with page`
First pass at reading list of books from user 2011-10-25 21:20:10 +00:00
Displaying some of the books from user shelf. 2011-10-25 23:07:44 +00:00			`more_pages = True`

			`while (more_pages):`

Integrated Goodreads shelf loading to wishlist into UI 2011-11-01 00:26:05 +00:00			`logger.info('request to review_list: %s %s', request_url, params)`
Displaying some of the books from user shelf. 2011-10-25 23:07:44 +00:00			`r = request(method,request_url,params=params)`

			`if r.status_code != httplib.OK:`
			`raise GoodreadsException('Error in review_list: %s %s ' % (r.headers, r.content))`
			`else:`
			`#logger.info('headers, content: %s \| %s ' % (r.headers,r.content))`
			`doc = ET.fromstring(r.content)`
			`# for the moment convert to a iterable of book data presented as dict -- one the way to paging through all results`
			`reviews = doc.findall('reviews/review')`
			`for review in reviews:`
			`yield ({'id':review.find('id').text,`
			`'book': {'id': review.find('book/id').text.strip(),`
dded a command to load a shelf to a user's wishlist 2011-10-26 16:49:54 +00:00			`'isbn10':review.find('book/isbn').text,`
Displaying some of the books from user shelf. 2011-10-25 23:07:44 +00:00			`'isbn13':review.find('book/isbn13').text,`
			`'title':review.find('book/title').text.strip(),`
			`'text_reviews_count':review.find('book/text_reviews_count').text.strip(),`
			`'link':review.find('book/link').text.strip(),`
			`'small_image_url':review.find('book/small_image_url').text.strip(),`
			`'ratings_count':review.find('book/ratings_count').text.strip(),`
			`'description':review.find('book/description').text.strip()}`
			`})`
			`if len(reviews) == 0:`
			`more_pages = False`
			`else:`
			`params["page"] += 1`

In the works: a method to retrieve books from a shelf 2011-10-25 14:10:59 +00:00
First take on Goodreads integration. Can get userid, username, and display shelves info Using sessions to store user info right now. 2011-10-25 01:09:30 +00:00			`def shelves_list(self,user_id,page=1):`
			`path = "/shelf/list.xml"`
			`params = {'user_id':user_id, 'page':page}`
			`params["key"] = self.key`
			`method = "GET"`
			`request_url = urljoin(GoodreadsClient.url, path)`

			`r = request(method,request_url,params=params)`

			`if r.status_code != httplib.OK:`
			`logger.info('headers, content: %s \| %s ' % (r.headers,r.content))`
			`raise GoodreadsException('Error in shelves_list: %s %s ' % (r.headers, r.content))`
			`else:`
			`logger.info('headers, content: %s \| %s ' % (r.headers,r.content))`
			`doc = ET.fromstring(r.content)`
			`shelves = doc.find('shelves')`
			`# do a simple parsing to a dictionary`
In the works: a method to retrieve books from a shelf 2011-10-25 14:10:59 +00:00
			`d = dict( [ (k,int(shelves.attrib[k])) for k in shelves.attrib ] )`
First take on Goodreads integration. Can get userid, username, and display shelves info Using sessions to store user info right now. 2011-10-25 01:09:30 +00:00			`d["user_shelves"] = [{'name':shelf.find('name').text,`
			`'book_count':int(shelf.find('book_count').text),`
Integrated Goodreads shelf loading to wishlist into UI 2011-11-01 00:26:05 +00:00			`'description':shelf.find('description').text if shelf.find('description').attrib['nil'] != 'true' else None,`
			`'exclusive_flag':shelf.find('exclusive_flag').text} \`
First take on Goodreads integration. Can get userid, username, and display shelves info Using sessions to store user info right now. 2011-10-25 01:09:30 +00:00			`for shelf in shelves.findall('user_shelf')]`
Integrated Goodreads shelf loading to wishlist into UI 2011-11-01 00:26:05 +00:00
			`d["total_book_count"] = sum([shelf['book_count'] if shelf['exclusive_flag'] == 'true' else 0 for shelf in d["user_shelves"]])`
First take on Goodreads integration. Can get userid, username, and display shelves info Using sessions to store user info right now. 2011-10-25 01:09:30 +00:00			`return d`


Integrated Goodreads shelf loading to wishlist into UI 2011-11-01 00:26:05 +00:00			`def load_goodreads_shelf_into_wishlist(user, shelf_name='all', goodreads_user_id=None, max_books=None):`
			`"""`
			`Load a specified Goodreads shelf (by default: all the books from the Goodreads account associated with user)`
			`"""`
First take on Goodreads integration. Can get userid, username, and display shelves info Using sessions to store user info right now. 2011-10-25 01:09:30 +00:00
Integrated Goodreads shelf loading to wishlist into UI 2011-11-01 00:26:05 +00:00			`gc = GoodreadsClient(key=settings.GOODREADS_API_KEY, secret=settings.GOODREADS_API_SECRET)`

			`if goodreads_user_id is None:`
			`if user.profile.goodreads_user_id is not None:`
			`goodreads_user_id = user.profile.goodreads_user_id`
			`else:`
			`raise Exception("No Goodreads user_id is associated with user.")`

			`for (i, review) in enumerate(islice(gc.review_list(goodreads_user_id,shelf=shelf_name),max_books)):`
			`isbn = review["book"]["isbn10"] if review["book"]["isbn10"] is not None else review["book"]["isbn13"]`
			`logger.info("%d %s %s %s ", i, review["book"]["title"], isbn, review["book"]["small_image_url"])`
			`try:`
			`edition = bookloader.add_by_isbn(isbn)`
			`# let's not trigger too much traffic to Google books for now`
			`# regluit.core.tasks.add_related.delay(isbn)`
			`user.wishlist.works.add(edition.work)`
			`logger.info("Work with isbn %s added to wishlist.", isbn)`
			`except Exception, e:`
			`logger.info ("error adding ISBN %s: %s", isbn, e)`