regluit/core/search.py

84 lines
3.1 KiB
Python
Raw Normal View History

import re
import json
2019-02-28 21:22:23 +00:00
import requests
from django.conf import settings
2019-02-28 21:22:23 +00:00
import regluit.core.isbn
def gluejar_search(q, user_ip='69.243.24.29', page=1):
"""normalizes results from the google books search suitable for gluejar
"""
results = []
2019-02-28 21:22:23 +00:00
search_result = googlebooks_search(q, user_ip, page)
if 'items' in search_result.keys():
for item in search_result['items']:
v = item['volumeInfo']
2019-02-28 21:22:23 +00:00
r = {'title': v.get('title', ""),
'description': v.get('description', ""),
'publisher': v.get('publisher', ""),
'googlebooks_id': item.get('id')}
2019-02-28 21:22:23 +00:00
# TODO: allow multiple authors
2020-02-12 16:12:56 +00:00
if 'authors' in v and len(v['authors']) == 1:
r['author'] = r['authors_short'] = v['authors'][0]
2020-02-12 16:12:56 +00:00
elif 'authors' in v and len(v['authors']) > 2:
r['author'] = v['authors'][0]
2019-02-28 21:22:23 +00:00
r['authors_short'] = '%s et al.' % v['authors'][0]
2020-02-12 16:12:56 +00:00
elif 'authors' in v and len(v['authors']) == 2:
r['author'] = v['authors'][0]
2019-02-28 21:22:23 +00:00
r['authors_short'] = '%s and %s' % (v['authors'][0], v['authors'][1])
else:
r['author'] = ""
r['isbn_13'] = None
2019-02-28 21:22:23 +00:00
# pull out isbns
for i in v.get('industryIdentifiers', []):
if i['type'] == 'ISBN_13':
r['isbn_13'] = i['identifier']
elif i['type'] == 'ISBN_10':
2019-02-28 21:22:23 +00:00
if not r['isbn_13']:
r['isbn_13'] = regluit.core.isbn.convert_10_to_13(i['identifier'])
2019-02-28 21:22:23 +00:00
# cover image
2020-02-12 16:12:56 +00:00
if 'imageLinks' in v:
url = v['imageLinks'].get('thumbnail', "")
2019-02-28 21:22:23 +00:00
url = re.sub(
r'http://(bks[0-9]+\.)?books\.google\.com',
'https://encrypted.google.com',
url,
)
r['cover_image_thumbnail'] = url
else:
r['cover_image_thumbnail'] = "/static/images/generic_cover_larger.png"
2019-02-28 21:22:23 +00:00
access_info = item.get('accessInfo')
if access_info:
epub = access_info.get('epub')
if epub and epub.get('downloadLink'):
r['first_epub_url'] = epub['downloadLink']
pdf = access_info.get('pdf')
if pdf and pdf.get('downloadLink'):
r['first_pdf_url'] = pdf['downloadLink']
results.append(r)
2019-02-28 21:22:23 +00:00
return results
def googlebooks_search(q, user_ip, page):
2018-02-01 20:18:14 +00:00
if len(q) < 2 or len(q) > 2000:
return {}
2019-02-28 21:22:23 +00:00
# XXX: need to pass IP address of user in from the frontend
headers = {'X-Forwarded-For': user_ip}
2019-02-28 21:22:23 +00:00
start = (page - 1) * 10
params = {'q': q, 'startIndex': start, 'maxResults': 10}
if hasattr(settings, 'GOOGLE_BOOKS_API_KEY'):
params['key'] = settings.GOOGLE_BOOKS_API_KEY
2019-02-28 21:22:23 +00:00
r = requests.get(
'https://www.googleapis.com/books/v1/volumes',
params=params,
headers=headers
)
2016-12-31 03:26:16 +00:00
# urls like https://www.googleapis.com/books/v1/volumes?q=invisible+engines&startIndex=0&maxResults=10&key=[key]
return json.loads(r.content)