regluit/core/search.py

import re
import json

import requests
from django.conf import settings

import regluit.core.isbn

def gluejar_search(q, user_ip='69.243.24.29', page=1):
    """normalizes results from the google books search suitable for gluejar
    """
    results = []
    search_result = googlebooks_search(q, user_ip, page)
    if 'items' in search_result.keys():
        for item in search_result['items']:
            v = item['volumeInfo']
            r = {'title': v.get('title', ""),
                 'description': v.get('description', ""),
                 'publisher': v.get('publisher', ""),
                 'googlebooks_id': item.get('id')}

            # TODO: allow multiple authors
            if 'authors' in v and len(v['authors']) == 1:
                r['author'] = r['authors_short'] = v['authors'][0]
            elif 'authors' in v and len(v['authors']) > 2:
                r['author'] = v['authors'][0]
                r['authors_short'] = '%s et al.' % v['authors'][0]
            elif 'authors' in v and len(v['authors']) == 2:
                r['author'] = v['authors'][0]
                r['authors_short'] = '%s and %s' % (v['authors'][0], v['authors'][1])
            else:
                r['author'] = ""
            r['isbn_13'] = None

            # pull out isbns
            for i in v.get('industryIdentifiers', []):
                if i['type'] == 'ISBN_13':
                    r['isbn_13'] = i['identifier']
                elif i['type'] == 'ISBN_10':
                    if not r['isbn_13']:
                        r['isbn_13'] = regluit.core.isbn.convert_10_to_13(i['identifier'])

            # cover image
            if 'imageLinks' in v:
                url = v['imageLinks'].get('thumbnail', "")
                url = re.sub(
                    r'http://(bks[0-9]+\.)?books\.google\.com',
                    'https://encrypted.google.com',
                    url,
                )
                r['cover_image_thumbnail'] = url
            else:
                r['cover_image_thumbnail'] = "/static/images/generic_cover_larger.png"

            access_info = item.get('accessInfo')
            if access_info:
                epub = access_info.get('epub')
                if epub and epub.get('downloadLink'):
                    r['first_epub_url'] = epub['downloadLink']
                pdf = access_info.get('pdf')
                if pdf and pdf.get('downloadLink'):
                    r['first_pdf_url'] = pdf['downloadLink']
            results.append(r)
    return results


def googlebooks_search(q, user_ip, page):
    if len(q) < 2 or len(q) > 2000:
        return {}
    # XXX: need to pass IP address of user in from the frontend
    headers = {'X-Forwarded-For': user_ip}
    start = (page - 1) * 10
    params = {'q': q, 'startIndex': start, 'maxResults': 10}
    if hasattr(settings, 'GOOGLE_BOOKS_API_KEY'):
        params['key'] = settings.GOOGLE_BOOKS_API_KEY

    r = requests.get(
        'https://www.googleapis.com/books/v1/volumes',
        params=params,
        headers=headers
    )
    # urls like https://www.googleapis.com/books/v1/volumes?q=invisible+engines&startIndex=0&maxResults=10&key=[key]
    return json.loads(r.content)
need to rewrite cover image urls in googlebooks api search results too 2012-04-03 19:27:33 +00:00			`import re`
got search passably working and a somewhat functional ajax add to list 2011-09-29 06:23:50 +00:00			`import json`

delint 2019-02-28 21:22:23 +00:00			`import requests`
Adding Google books api key to the search 2013-07-02 15:00:01 +00:00			`from django.conf import settings`

delint 2019-02-28 21:22:23 +00:00			`import regluit.core.isbn`

added endless scroll for search results 2012-02-05 00:06:53 +00:00			`def gluejar_search(q, user_ip='69.243.24.29', page=1):`
got search passably working and a somewhat functional ajax add to list 2011-09-29 06:23:50 +00:00			`"""normalizes results from the google books search suitable for gluejar`
			`"""`
			`results = []`
delint 2019-02-28 21:22:23 +00:00			`search_result = googlebooks_search(q, user_ip, page)`
fixed no results bug in gluejar-search; started passing remote IP address to google api 2011-11-20 14:20:08 +00:00			`if 'items' in search_result.keys():`
			`for item in search_result['items']:`
			`v = item['volumeInfo']`
delint 2019-02-28 21:22:23 +00:00			`r = {'title': v.get('title', ""),`
fixed no results bug in gluejar-search; started passing remote IP address to google api 2011-11-20 14:20:08 +00:00			`'description': v.get('description', ""),`
			`'publisher': v.get('publisher', ""),`
			`'googlebooks_id': item.get('id')}`
delint 2019-02-28 21:22:23 +00:00
fixed no results bug in gluejar-search; started passing remote IP address to google api 2011-11-20 14:20:08 +00:00			`# TODO: allow multiple authors`
remove has_key 2020-02-12 16:12:56 +00:00			`if 'authors' in v and len(v['authors']) == 1:`
all authors now display had to harden the work model against works without editions to pass tests Lagos 2060 will need 1st author to change back added work.authors_short to work model and to search work mock model used work #415 as an example of a work with 3 authors 2014-01-18 02:37:19 +00:00			`r['author'] = r['authors_short'] = v['authors'][0]`
remove has_key 2020-02-12 16:12:56 +00:00			`elif 'authors' in v and len(v['authors']) > 2:`
fixed no results bug in gluejar-search; started passing remote IP address to google api 2011-11-20 14:20:08 +00:00			`r['author'] = v['authors'][0]`
delint 2019-02-28 21:22:23 +00:00			`r['authors_short'] = '%s et al.' % v['authors'][0]`
remove has_key 2020-02-12 16:12:56 +00:00			`elif 'authors' in v and len(v['authors']) == 2:`
all authors now display had to harden the work model against works without editions to pass tests Lagos 2060 will need 1st author to change back added work.authors_short to work model and to search work mock model used work #415 as an example of a work with 3 authors 2014-01-18 02:37:19 +00:00			`r['author'] = v['authors'][0]`
delint 2019-02-28 21:22:23 +00:00			`r['authors_short'] = '%s and %s' % (v['authors'][0], v['authors'][1])`
fixed no results bug in gluejar-search; started passing remote IP address to google api 2011-11-20 14:20:08 +00:00			`else:`
			`r['author'] = ""`
			`r['isbn_13'] = None`
delint 2019-02-28 21:22:23 +00:00
fixed no results bug in gluejar-search; started passing remote IP address to google api 2011-11-20 14:20:08 +00:00			`# pull out isbns`
			`for i in v.get('industryIdentifiers', []):`
			`if i['type'] == 'ISBN_13':`
			`r['isbn_13'] = i['identifier']`
removed isbn_10 column and fixed many isbn13 non-compliance issues apply migrations! 2011-12-20 04:26:55 +00:00			`elif i['type'] == 'ISBN_10':`
delint 2019-02-28 21:22:23 +00:00			`if not r['isbn_13']:`
removed isbn_10 column and fixed many isbn13 non-compliance issues apply migrations! 2011-12-20 04:26:55 +00:00			`r['isbn_13'] = regluit.core.isbn.convert_10_to_13(i['identifier'])`
delint 2019-02-28 21:22:23 +00:00
fixed no results bug in gluejar-search; started passing remote IP address to google api 2011-11-20 14:20:08 +00:00			`# cover image`
remove has_key 2020-02-12 16:12:56 +00:00			`if 'imageLinks' in v:`
need to rewrite cover image urls in googlebooks api search results too 2012-04-03 19:27:33 +00:00			`url = v['imageLinks'].get('thumbnail', "")`
delint 2019-02-28 21:22:23 +00:00			`url = re.sub(`
			`r'http://(bks[0-9]+\.)?books\.google\.com',`
			`'https://encrypted.google.com',`
			`url,`
			`)`
need to rewrite cover image urls in googlebooks api search results too 2012-04-03 19:27:33 +00:00			`r['cover_image_thumbnail'] = url`
fixed no results bug in gluejar-search; started passing remote IP address to google api 2011-11-20 14:20:08 +00:00			`else:`
refactored search.html to use book_panel.html. required altering labels of some data sources so that they have the same names when they hit book_panel regardless of where it's included from 2011-11-23 17:28:59 +00:00			`r['cover_image_thumbnail'] = "/static/images/generic_cover_larger.png"`
delint 2019-02-28 21:22:23 +00:00
fixed no results bug in gluejar-search; started passing remote IP address to google api 2011-11-20 14:20:08 +00:00			`access_info = item.get('accessInfo')`
			`if access_info:`
			`epub = access_info.get('epub')`
			`if epub and epub.get('downloadLink'):`
refactored search.html to use book_panel.html. required altering labels of some data sources so that they have the same names when they hit book_panel regardless of where it's included from 2011-11-23 17:28:59 +00:00			`r['first_epub_url'] = epub['downloadLink']`
fixed no results bug in gluejar-search; started passing remote IP address to google api 2011-11-20 14:20:08 +00:00			`pdf = access_info.get('pdf')`
			`if pdf and pdf.get('downloadLink'):`
refactored search.html to use book_panel.html. required altering labels of some data sources so that they have the same names when they hit book_panel regardless of where it's included from 2011-11-23 17:28:59 +00:00			`r['first_pdf_url'] = pdf['downloadLink']`
fixed no results bug in gluejar-search; started passing remote IP address to google api 2011-11-20 14:20:08 +00:00			`results.append(r)`
delint 2019-02-28 21:22:23 +00:00			`return results`
got search passably working and a somewhat functional ajax add to list 2011-09-29 06:23:50 +00:00

added endless scroll for search results 2012-02-05 00:06:53 +00:00			`def googlebooks_search(q, user_ip, page):`
harden search 2018-02-01 20:18:14 +00:00			`if len(q) < 2 or len(q) > 2000:`
			`return {}`
delint 2019-02-28 21:22:23 +00:00			`# XXX: need to pass IP address of user in from the frontend`
fixed no results bug in gluejar-search; started passing remote IP address to google api 2011-11-20 14:20:08 +00:00			`headers = {'X-Forwarded-For': user_ip}`
delint 2019-02-28 21:22:23 +00:00			`start = (page - 1) * 10`
added endless scroll for search results 2012-02-05 00:06:53 +00:00			`params = {'q': q, 'startIndex': start, 'maxResults': 10}`
Adding Google books api key to the search 2013-07-02 15:00:01 +00:00			`if hasattr(settings, 'GOOGLE_BOOKS_API_KEY'):`
			`params['key'] = settings.GOOGLE_BOOKS_API_KEY`
delint 2019-02-28 21:22:23 +00:00
			`r = requests.get(`
			`'https://www.googleapis.com/books/v1/volumes',`
			`params=params,`
			`headers=headers`
			`)`
passes all tests without secret keys 2016-12-31 03:26:16 +00:00			`# urls like https://www.googleapis.com/books/v1/volumes?q=invisible+engines&startIndex=0&maxResults=10&key=[key]`
got search passably working and a somewhat functional ajax add to list 2011-09-29 06:23:50 +00:00			`return json.loads(r.content)`