regluit/api/opds.py

348 lines
13 KiB
Python

from itertools import islice
from lxml import etree
import datetime
from urllib.parse import urlparse, urlunparse
from django.urls import reverse
from django.utils.http import urlquote
import pytz
import logging
logger = logging.getLogger(__name__)
from regluit.core import models, facets
import regluit.core.cc as cc
licenses = cc.LICENSE_LIST
FORMAT_TO_MIMETYPE = {'pdf':"application/pdf",
'epub':"application/epub+zip",
'mobi':"application/x-mobipocket-ebook",
'html':"text/html",
'text':"text/html"}
UNGLUEIT_URL= 'https://unglue.it'
ACQUISITION = "application/atom+xml;profile=opds-catalog;kind=acquisition"
FACET_RELATION = "http://opds-spec.org/facet"
old_facets= ["creative_commons","active_campaigns"]
def feeds():
for facet in old_facets:
yield globals()[facet]
for facet_path in facets.get_all_facets('Format'):
yield get_facet_facet(facet_path)
for facet_path in facets.get_all_facets('Keyword'):
yield get_facet_facet(facet_path)
def get_facet_class(name):
if name in old_facets:
return globals()[name]
else:
return get_facet_facet(name)
def text_node(tag, text):
node = etree.Element(tag)
node.text = text
return node
def html_node(tag, html):
node = text_node(tag, html)
node.attrib.update({"{http://www.w3.org/2005/Atom}type":'html'})
return node
def add_query_component(url, qc):
"""
add component qc to the querystring of url
"""
m = list(urlparse(url))
if len(m[4]):
m[4] = "&".join([m[4],qc])
else:
m[4] = qc
return urlunparse(m)
def isbn_node(isbn):
node = etree.Element("{http://purl.org/dc/terms/}identifier")
node.attrib.update({"{http://www.w3.org/2001/XMLSchema-instance}type":'dcterms:URI'})
node.text = 'urn:ISBN:'+ isbn
return node
def work_node(work, facet=None):
node = etree.Element("entry")
# title
node.append(text_node("title", work.title))
# id
node.append(text_node('id', "{base}{url}".format(base=UNGLUEIT_URL,url=reverse('work_identifier',kwargs={'work_id':work.id}))))
updated = None
# links for all ebooks
ebooks = facet.filter_model("Ebook",work.ebooks()) if facet else work.ebooks()
versions = set()
for ebook in ebooks:
if updated is None:
# most recent ebook, first ebook in loop
updated = ebook.created.isoformat()
node.append(text_node('updated', updated))
if not ebook.version_label in versions:
versions.add(ebook.version_label)
link_node = etree.Element("link")
# ebook.download_url is an absolute URL with the protocol, domain, and path baked in
link_rel = "http://opds-spec.org/acquisition/open-access"
link_node.attrib.update({"href":add_query_component(ebook.download_url, "feed=opds"),
"rel":link_rel,
"{http://purl.org/dc/terms/}rights": str(ebook.rights)})
if ebook.is_direct():
link_node.attrib["type"] = FORMAT_TO_MIMETYPE.get(ebook.format, "")
else:
""" indirect acquisition, i.e. google books """
link_node.attrib["type"] = "text/html"
indirect = etree.Element("{http://opds-spec.org/}indirectAcquisition",)
indirect.attrib["type"] = FORMAT_TO_MIMETYPE.get(ebook.format, "")
link_node.append(indirect)
if ebook.version_label:
link_node.attrib.update({"{http://schema.org/}version": ebook.version_label})
node.append(link_node)
# get the cover -- assume jpg?
cover_node = etree.Element("link")
cover_node.attrib.update({"href":work.cover_image_small(),
"type":"image/"+work.cover_filetype(),
"rel":"http://opds-spec.org/image/thumbnail"})
node.append(cover_node)
cover_node = etree.Element("link")
cover_node.attrib.update({"href":work.cover_image_thumbnail(),
"type":"image/"+work.cover_filetype(),
"rel":"http://opds-spec.org/image"})
node.append(cover_node)
# <dcterms:issued>2012</dcterms:issued>
node.append(text_node("{http://purl.org/dc/terms/}issued", work.publication_date))
# author
# TO DO: include all authors?
author_node = etree.Element("author")
author_node.append(text_node("name", work.author()))
node.append(author_node)
# publisher
#<dcterms:publisher>Open Book Publishers</dcterms:publisher>
if len(work.publishers()):
for publisher in work.publishers():
node.append(text_node("{http://purl.org/dc/terms/}publisher", publisher.name.name))
# language
#<dcterms:language>en</dcterms:language>
node.append(text_node("{http://purl.org/dc/terms/}language", work.language))
# description
node.append(html_node("{http://www.w3.org/2005/Atom}content", work.description))
# identifiers
if work.identifiers.filter(type='isbn'):
for isbn in work.identifiers.filter(type='isbn')[0:9]: #10 should be more than enough
node.append(isbn_node(isbn.value))
# subject tags
# [[subject.name for subject in work.subjects.all()] for work in ccworks if work.subjects.all()]
for subject in work.subjects.all():
if subject.is_visible:
category_node = etree.Element("category")
try:
category_node.attrib["term"] = subject.name
node.append(category_node)
try:
subject.works.filter(is_free=True)[1]
# only show feed if there's another work in it
append_navlink(node, 'related', 'kw.'+ subject.name , 0, 'popular', title=subject.name)
except:
pass
except ValueError:
# caused by control chars in subject.name
logger.warning('Deleting subject: %s' % subject.name)
subject.delete()
# age level
# <category term="15-18" scheme="http://schema.org/typicalAgeRange" label="Teen - Grade 10-12, Age 15-18"/>
if work.age_level:
category_node = etree.Element("category")
category_node.attrib["scheme"] = 'http://schema.org/typicalAgeRange'
category_node.attrib["term"] = work.age_level
category_node.attrib["label"] = work.get_age_level_display()
node.append(category_node)
# rating
rating_node = etree.Element("{http://schema.org/}Rating")
rating_node.attrib.update({"{http://schema.org/}ratingValue":"{:}".format(work.priority())})
node.append(rating_node)
return node
class Facet:
title = ''
works = None
feed_path = ''
description = ''
def feed(self, page=None, order_by='newest'):
self.works = self.works.order_by(*facets.get_order_by(order_by))
return opds_feed_for_works(self, page=page, order_by=order_by)
def updated(self):
# return the creation date for most recently added item
if not self.works:
return pytz.utc.localize(datetime.datetime.utcnow()).isoformat()
else:
return pytz.utc.localize(self.works[0].created).isoformat()
def get_facet_facet(facet_path):
class Facet_Facet(Facet):
def __init__(self, facet_path=facet_path):
self.feed_path = facet_path
self.facet_object = facets.get_facet_object(facet_path)
self.title = "Unglue.it"
for facet in self.facet_object.facets():
self.title = self.title + " " + facet.title
self.works = self.facet_object.get_query_set().distinct()
self.description = self.facet_object.description
return Facet_Facet
class creative_commons(Facet):
def __init__(self):
self.title = "Unglue.it Catalog: Creative Commons Books"
self.feed_path = "creative_commons"
self.works = models.Work.objects.filter(editions__ebooks__isnull=False,
editions__ebooks__rights__in=cc.LICENSE_LIST).distinct()
self.description= "These Creative Commons licensed ebooks are free to read - the people who created them want you to read and share them."
self.facet_object = facets.get_facet_object(self.feed_path)
class active_campaigns(Facet):
"""
return opds feed for works associated with active campaigns
"""
def __init__(self):
self.title = "Unglue.it Catalog: Books under Active Campaign"
self.feed_path = "active_campaigns"
self.works = models.Work.objects.filter(campaigns__status='ACTIVE', is_free = True)
self.description= "With your help we're raising money to make these books free to the world."
self.facet_object = facets.get_facet_object(self.feed_path)
def opds_feed_for_work(work_id):
class single_work_facet:
def __init__(self, work_id):
try:
works=models.Work.objects.filter(id=work_id)
except models.Work.DoesNotExist:
works=models.Work.objects.none()
except ValueError:
# not a valid work_id
works=models.Work.objects.none()
self.works=works
self.title='Unglue.it work #%s' % work_id
self.feed_path=''
self.facet_object= facets.BaseFacet(None)
return opds_feed_for_works( single_work_facet(work_id) )
def opds_feed_for_works(the_facet, page=None, order_by='newest'):
works = the_facet.works
feed_path = the_facet.feed_path
title = the_facet.title
feed_xml = """<feed xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:opds="http://opds-spec.org/"
xmlns="http://www.w3.org/2005/Atom"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:schema="http://schema.org/"
xsi:noNamespaceSchemaLocation="http://www.kbcafe.com/rss/atom.xsd.xml"
xsi:schemaLocation="http://purl.org/dc/elements/1.1/ http://dublincore.org/schemas/xmls/qdc/2008/02/11/dc.xsd
http://purl.org/dc/terms/ http://dublincore.org/schemas/xmls/qdc/2008/02/11/dcterms.xsd"/>"""
feed = etree.fromstring(bytes(feed_xml, 'utf-8'))
# add title
# TO DO: will need to calculate the number items and where in the feed we are
feed.append(text_node('title', title + ' - sorted by ' + order_by))
# id
feed.append(text_node('id', "{url}/api/opds/{feed_path}/?order_by={order_by}".format(url=UNGLUEIT_URL,
feed_path=urlquote(feed_path), order_by=order_by)))
# updated
# TO DO: fix time zone?
# also use our wrapped datetime code
feed.append(text_node('updated',
pytz.utc.localize(datetime.datetime.utcnow()).isoformat()))
# author
author_node = etree.Element("author")
author_node.append(text_node('name', 'unglue.it'))
author_node.append(text_node('uri', UNGLUEIT_URL))
feed.append(author_node)
# links: start, self, next/prev (depending what's necessary -- to start with put all CC books)
# start link
append_navlink(feed, 'start', feed_path, None , order_by, title="First 10")
# next link
if not page:
page =0
else:
try:
page=int(page)
except TypeError:
page=0
try:
works[10 * page + 10]
append_navlink(feed, 'next', feed_path, page+1 , order_by, title="Next 10")
except IndexError:
pass
# sort facets
append_navlink(feed, FACET_RELATION, feed_path, None, 'popular', group="Order", active = order_by=='popular', title="Sorted by popularity")
append_navlink(feed, FACET_RELATION, feed_path, None, 'newest', group="Order", active = order_by=='newest', title="Sorted by newest")
#other facets
if feed_path not in old_facets:
for other_group in the_facet.facet_object.get_other_groups():
for facet_object in other_group.get_facets():
append_navlink(feed, FACET_RELATION, feed_path + '/' + facet_object.facet_name, None, order_by, group=other_group.title, title=facet_object.title)
works = islice(works, 10 * page, 10 * page + 10)
if page > 0:
append_navlink(feed, 'previous', feed_path, page-1, order_by, title="Previous 10")
for work in works:
node = work_node(work, facet=the_facet.facet_object)
feed.append(node)
return etree.tostring(feed, pretty_print=True)
def append_navlink(feed, rel, path, page, order_by, group=None, active=None , title=""):
link = etree.Element("link")
link.attrib.update({"rel":rel,
"href": UNGLUEIT_URL + "/api/opds/" + urlquote(path) + '/?order_by=' + order_by + ('&page=' + str(page) if page!=None else ''),
"type": ACQUISITION,
"title": title,
})
if rel == FACET_RELATION:
if group:
link.attrib['{http://opds-spec.org/}facetGroup'] = group
if active:
link.attrib['{http://opds-spec.org/}activeFacet'] = 'true'
feed.append(link)