regluit/api/opds.py

from itertools import islice

from lxml import etree
import datetime
import urlparse

import pytz

from regluit.core import models, facets
import regluit.core.cc as cc

licenses = cc.LICENSE_LIST

FORMAT_TO_MIMETYPE = {'pdf':"application/pdf",
                      'epub':"application/epub+zip",
                      'mobi':"application/x-mobipocket-ebook",
                      'html':"text/html",
                      'text':"text/html"}

UNGLUEIT_URL= 'https://unglue.it'
NAVIGATION = "application/atom+xml;profile=opds-catalog;kind=navigation"

old_facets= ["creative_commons","active_campaigns"] 


def feeds():
    yield get_facet_facet('all')
    for facet in old_facets:
        yield globals()[facet]
    for facet_path in facets.get_all_facets('Format'):
        yield get_facet_facet(facet_path)

def get_facet_class(name):
    if name in old_facets:
        return globals()[name]
    else:
        return get_facet_facet(name)
        
        
def text_node(tag, text):
    node = etree.Element(tag)
    node.text = text
    return node

def add_query_component(url, qc):
    """
    add component qc to the querystring of url
    """
    m = list(urlparse.urlparse(url))
    if len(m[4]):
        m[4] = "&".join([m[4],qc])
    else:
        m[4] = qc
    return urlparse.urlunparse(m)

def isbn_node(isbn):
    node = etree.Element("{http://purl.org/dc/terms/}identifier")
    node.attrib.update({"{http://www.w3.org/2001/XMLSchema-instance}type":'dcterms:URI'})
    node.text = 'urn:ISBN:'+ isbn
    return node

def work_node(work):
    
    node = etree.Element("entry")
    # title
    node.append(text_node("title", work.title))
    
    # id
    node.append(text_node('id', "{base}{url}".format(base=UNGLUEIT_URL,url=work.get_absolute_url())))
    
    # updated -- using creation date
    node.append(text_node('updated', work.created.isoformat()))
    
    # links for all ebooks
    
    for ebook in work.ebooks():
        link_node = etree.Element("link")
        
        # ebook.download_url is an absolute URL with the protocol, domain, and path baked in
        
        link_node.attrib.update({"href":add_query_component(ebook.download_url, "feed=opds"),
                                 "type":FORMAT_TO_MIMETYPE.get(ebook.format, ""),
                                 "rel":"http://opds-spec.org/acquisition"})
        node.append(link_node)
        
    # get the cover -- assume jpg?
    
    cover_node = etree.Element("link")
    cover_node.attrib.update({"href":work.cover_image_small(),
                              "type":"image/"+work.cover_filetype(),
                              "rel":"http://opds-spec.org/image/thumbnail"})
    node.append(cover_node)
    cover_node = etree.Element("link")
    cover_node.attrib.update({"href":work.cover_image_thumbnail(),
                              "type":"image/"+work.cover_filetype(),
                              "rel":"http://opds-spec.org/image"})
    node.append(cover_node)
    
    
    # <dcterms:issued>2012</dcterms:issued>
    node.append(text_node("{http://purl.org/dc/terms/}issued", work.publication_date_year))
    
    # author
    # TO DO: include all authors?
    author_node = etree.Element("author")
    author_node.append(text_node("name", work.author()))
    node.append(author_node)
    
    # publisher
    #<dcterms:publisher>Open Book Publishers</dcterms:publisher>
    if len(work.publishers()):
        for publisher in work.publishers():
            node.append(text_node("{http://purl.org/dc/terms/}issued", publisher.name.name))
            
    # language
    #<dcterms:language>en</dcterms:language>
    node.append(text_node("{http://purl.org/dc/terms/}language", work.language))
    
    # identifiers
    if work.identifiers.filter(type='isbn'):
        for isbn in work.identifiers.filter(type='isbn')[0:9]:  #10 should be more than enough
            node.append(isbn_node(isbn.value))
    
    # subject tags
    # [[subject.name for subject in work.subjects.all()] for work in ccworks if work.subjects.all()]
    if work.subjects.all():
        for subject in work.subjects.all():
            category_node = etree.Element("category")
            category_node.attrib["term"] = subject.name 
            node.append(category_node)
            
    return node

class Facet:
    title = ''
    works = None
    feed_path = ''
    description = ''
    
    def feed(self, page=None):
        return opds_feed_for_works(self.works, self.feed_path, title=self.title, page=page)
        
    def updated(self):
        # return the creation date for most recently added item
        if not self.works:
            return pytz.utc.localize(datetime.datetime.utcnow()).isoformat()
        else:
            return pytz.utc.localize(self.works[0].created).isoformat()

def get_facet_facet(facet_path):
    class Facet_Facet(Facet):
    
        def __init__(self, facet_path=facet_path):
            self.feed_path = facet_path
            self.facet_object = facets.get_facet_object(facet_path)
            self.title = "Unglue.it"
            for facet in self.facet_object.facets():
                self.title = self.title + " " + facet.title
            self.works = self.facet_object.get_query_set().distinct()
            self.description = self.facet_object.description
    return Facet_Facet

class creative_commons(Facet):
    title = "Unglue.it Catalog:  Creative Commons Books"
    feed_path = "creative_commons"
    works = models.Work.objects.filter(editions__ebooks__isnull=False, 
                        editions__ebooks__rights__in=cc.LICENSE_LIST).distinct().order_by('-created')
    description= "These Creative Commons licensed ebooks are free to read - the people who created them want you to read and share them."
    
class active_campaigns(Facet):
    """
    return opds feed for works associated with active campaigns
    """
    title = "Unglue.it Catalog:  Books under Active Campaign"
    feed_path = "active_campaigns"
    works = models.Work.objects.filter(campaigns__status='ACTIVE',
                               editions__ebooks__isnull=False).distinct().order_by('-created')
    description= "With your help we're raising money to make these books free to the world."

def opds_feed_for_works(works, feed_path, title="Unglue.it Catalog", page=None):

    feed_xml = """<feed xmlns:dcterms="http://purl.org/dc/terms/" 
      xmlns:opds="http://opds-spec.org/"
      xmlns="http://www.w3.org/2005/Atom"
      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
      xsi:noNamespaceSchemaLocation="http://www.kbcafe.com/rss/atom.xsd.xml"
      xsi:schemaLocation="http://purl.org/dc/elements/1.1/ http://dublincore.org/schemas/xmls/qdc/2008/02/11/dc.xsd http://purl.org/dc/terms/ http://dublincore.org/schemas/xmls/qdc/2008/02/11/dcterms.xsd"/>"""
    
    feed = etree.fromstring(feed_xml)
    
    # add title
    # TO DO: will need to calculate the number items and where in the feed we are
    
    feed.append(text_node('title', title))
    
    # id 
    
    feed.append(text_node('id', "{url}/api/opds/{feed_path}".format(url=UNGLUEIT_URL,
                                                                         feed_path=feed_path)))
    
    # updated
    # TO DO:  fix time zone?
    # also use our wrapped datetime code
    
    feed.append(text_node('updated',
                          pytz.utc.localize(datetime.datetime.utcnow()).isoformat()))
    
    # author
    
    author_node = etree.Element("author")
    author_node.append(text_node('name', 'unglue.it'))
    author_node.append(text_node('uri', UNGLUEIT_URL))
    feed.append(author_node)
    
    # links:  start, self, next/prev (depending what's necessary -- to start with put all CC books)
    
    # start link
    append_navlink(feed, 'start', feed_path, None )
    
    # next link
    
    if not page:
        page =0
    else:
        try:
            page=int(page)
        except TypeError:
            page=0
    
    try:
        works[10 * page + 10]
        append_navlink(feed, 'next', feed_path, page+1 )
    except IndexError:
        pass
                
    works = islice(works,  10 * page, 10 * page + 10)
    if page > 0:
        append_navlink(feed, 'previous', feed_path, page-1)
    for work in works:
        node = work_node(work)
        feed.append(node)
    
    return etree.tostring(feed, pretty_print=True)

def append_navlink(feed, rel, path, page):
    link = etree.Element("link")
    link.attrib.update({"rel":rel,
             "href": UNGLUEIT_URL + "/api/opds/" + path + ('/?page=' + unicode(page) if page!=None else '/'),
             "type": NAVIGATION,
            })
    feed.append(link)
First draft of opds/creativecommons.xml 2014-06-24 22:27:40 +00:00			`from itertools import islice`

			`from lxml import etree`
			`import datetime`
			`import urlparse`

			`import pytz`

add new facet machinery to olds 2014-12-05 23:38:04 +00:00			`from regluit.core import models, facets`
First draft of opds/creativecommons.xml 2014-06-24 22:27:40 +00:00			`import regluit.core.cc as cc`

			`licenses = cc.LICENSE_LIST`

			`FORMAT_TO_MIMETYPE = {'pdf':"application/pdf",`
			`'epub':"application/epub+zip",`
			`'mobi':"application/x-mobipocket-ebook",`
			`'html':"text/html",`
			`'text':"text/html"}`
ripped out more site/protocol code 2014-07-17 04:33:43 +00:00
			`UNGLUEIT_URL= 'https://unglue.it'`
paginate feeds 2014-11-04 00:36:26 +00:00			`NAVIGATION = "application/atom+xml;profile=opds-catalog;kind=navigation"`
add new facet machinery to olds 2014-12-05 23:38:04 +00:00
			`old_facets= ["creative_commons","active_campaigns"]`


make the feeds into classes 2014-07-17 04:00:16 +00:00			`def feeds():`
revise the top level feeds 2014-12-06 01:37:51 +00:00			`yield get_facet_facet('all')`
add new facet machinery to olds 2014-12-05 23:38:04 +00:00			`for facet in old_facets:`
			`yield globals()[facet]`
revise the top level feeds 2014-12-06 01:37:51 +00:00			`for facet_path in facets.get_all_facets('Format'):`
add new facet machinery to olds 2014-12-05 23:38:04 +00:00			`yield get_facet_facet(facet_path)`
First draft of opds/creativecommons.xml 2014-06-24 22:27:40 +00:00
add new facet machinery to olds 2014-12-05 23:38:04 +00:00			`def get_facet_class(name):`
			`if name in old_facets:`
			`return globals()[name]`
			`else:`
			`return get_facet_facet(name)`


First draft of opds/creativecommons.xml 2014-06-24 22:27:40 +00:00			`def text_node(tag, text):`
			`node = etree.Element(tag)`
			`node.text = text`
			`return node`

Add feed=opds to download URLs for ebooks in OPDS feed 2014-06-25 23:54:01 +00:00			`def add_query_component(url, qc):`
First pass at /opds/{facet} --> now with active_campaigns too 2014-06-26 01:05:57 +00:00			`"""`
			`add component qc to the querystring of url`
			`"""`
Add feed=opds to download URLs for ebooks in OPDS feed 2014-06-25 23:54:01 +00:00			`m = list(urlparse.urlparse(url))`
			`if len(m[4]):`
			`m[4] = "&".join([m[4],qc])`
			`else:`
			`m[4] = qc`
			`return urlparse.urlunparse(m)`

add isbn to opds feed based on feedback from leonardr but limit to first 10 isbns 2014-07-30 02:26:15 +00:00			`def isbn_node(isbn):`
			`node = etree.Element("{http://purl.org/dc/terms/}identifier")`
			`node.attrib.update({"{http://www.w3.org/2001/XMLSchema-instance}type":'dcterms:URI'})`
			`node.text = 'urn:ISBN:'+ isbn`
			`return node`
Add feed=opds to download URLs for ebooks in OPDS feed 2014-06-25 23:54:01 +00:00
ripped out more site/protocol code 2014-07-17 04:33:43 +00:00			`def work_node(work):`
First draft of opds/creativecommons.xml 2014-06-24 22:27:40 +00:00
			`node = etree.Element("entry")`
			`# title`
			`node.append(text_node("title", work.title))`

			`# id`
ripped out more site/protocol code 2014-07-17 04:33:43 +00:00			`node.append(text_node('id', "{base}{url}".format(base=UNGLUEIT_URL,url=work.get_absolute_url())))`
First draft of opds/creativecommons.xml 2014-06-24 22:27:40 +00:00
			`# updated -- using creation date`
			`node.append(text_node('updated', work.created.isoformat()))`

			`# links for all ebooks`

			`for ebook in work.ebooks():`
			`link_node = etree.Element("link")`
Add feed=opds to download URLs for ebooks in OPDS feed 2014-06-25 23:54:01 +00:00
			`# ebook.download_url is an absolute URL with the protocol, domain, and path baked in`

			`link_node.attrib.update({"href":add_query_component(ebook.download_url, "feed=opds"),`
First draft of opds/creativecommons.xml 2014-06-24 22:27:40 +00:00			`"type":FORMAT_TO_MIMETYPE.get(ebook.format, ""),`
			`"rel":"http://opds-spec.org/acquisition"})`
			`node.append(link_node)`

			`# get the cover -- assume jpg?`

			`cover_node = etree.Element("link")`
			`cover_node.attrib.update({"href":work.cover_image_small(),`
some feed consumers want images for both sizes 2014-11-04 00:57:58 +00:00			`"type":"image/"+work.cover_filetype(),`
First draft of opds/creativecommons.xml 2014-06-24 22:27:40 +00:00			`"rel":"http://opds-spec.org/image/thumbnail"})`
			`node.append(cover_node)`
some feed consumers want images for both sizes 2014-11-04 00:57:58 +00:00			`cover_node = etree.Element("link")`
			`cover_node.attrib.update({"href":work.cover_image_thumbnail(),`
			`"type":"image/"+work.cover_filetype(),`
			`"rel":"http://opds-spec.org/image"})`
			`node.append(cover_node)`

First draft of opds/creativecommons.xml 2014-06-24 22:27:40 +00:00
			`# <dcterms:issued>2012</dcterms:issued>`
			`node.append(text_node("{http://purl.org/dc/terms/}issued", work.publication_date_year))`

			`# author`
			`# TO DO: include all authors?`
			`author_node = etree.Element("author")`
			`author_node.append(text_node("name", work.author()))`
			`node.append(author_node)`

			`# publisher`
			`#<dcterms:publisher>Open Book Publishers</dcterms:publisher>`
			`if len(work.publishers()):`
			`for publisher in work.publishers():`
			`node.append(text_node("{http://purl.org/dc/terms/}issued", publisher.name.name))`

			`# language`
			`#<dcterms:language>en</dcterms:language>`
			`node.append(text_node("{http://purl.org/dc/terms/}language", work.language))`
add isbn to opds feed based on feedback from leonardr but limit to first 10 isbns 2014-07-30 02:26:15 +00:00
			`# identifiers`
			`if work.identifiers.filter(type='isbn'):`
			`for isbn in work.identifiers.filter(type='isbn')[0:9]: #10 should be more than enough`
			`node.append(isbn_node(isbn.value))`

First draft of opds/creativecommons.xml 2014-06-24 22:27:40 +00:00			`# subject tags`
			`# [[subject.name for subject in work.subjects.all()] for work in ccworks if work.subjects.all()]`
			`if work.subjects.all():`
			`for subject in work.subjects.all():`
			`category_node = etree.Element("category")`
			`category_node.attrib["term"] = subject.name`
			`node.append(category_node)`

			`return node`

make the feeds into classes 2014-07-17 04:00:16 +00:00			`class Facet:`
			`title = ''`
			`works = None`
			`feed_path = ''`
bring back descriptions, use them 2014-12-06 01:38:08 +00:00			`description = ''`

paginate feeds 2014-11-04 00:36:26 +00:00			`def feed(self, page=None):`
			`return opds_feed_for_works(self.works, self.feed_path, title=self.title, page=page)`
ripped out more site/protocol code 2014-07-17 04:33:43 +00:00
compute an updated date 2014-07-17 04:34:27 +00:00			`def updated(self):`
			`# return the creation date for most recently added item`
			`if not self.works:`
			`return pytz.utc.localize(datetime.datetime.utcnow()).isoformat()`
			`else:`
			`return pytz.utc.localize(self.works[0].created).isoformat()`
add new facet machinery to olds 2014-12-05 23:38:04 +00:00
			`def get_facet_facet(facet_path):`
			`class Facet_Facet(Facet):`

			`def __init__(self, facet_path=facet_path):`
			`self.feed_path = facet_path`
			`self.facet_object = facets.get_facet_object(facet_path)`
			`self.title = "Unglue.it"`
			`for facet in self.facet_object.facets():`
			`self.title = self.title + " " + facet.title`
			`self.works = self.facet_object.get_query_set().distinct()`
bring back descriptions, use them 2014-12-06 01:38:08 +00:00			`self.description = self.facet_object.description`
add new facet machinery to olds 2014-12-05 23:38:04 +00:00			`return Facet_Facet`
First pass at /opds/{facet} --> now with active_campaigns too 2014-06-26 01:05:57 +00:00
make the feeds into classes 2014-07-17 04:00:16 +00:00			`class creative_commons(Facet):`
			`title = "Unglue.it Catalog: Creative Commons Books"`
			`feed_path = "creative_commons"`
			`works = models.Work.objects.filter(editions__ebooks__isnull=False,`
			`editions__ebooks__rights__in=cc.LICENSE_LIST).distinct().order_by('-created')`
bring back descriptions, use them 2014-12-06 01:38:08 +00:00			`description= "These Creative Commons licensed ebooks are free to read - the people who created them want you to read and share them."`

make the feeds into classes 2014-07-17 04:00:16 +00:00			`class active_campaigns(Facet):`
First pass at /opds/{facet} --> now with active_campaigns too 2014-06-26 01:05:57 +00:00			`"""`
			`return opds feed for works associated with active campaigns`
			`"""`
make the feeds into classes 2014-07-17 04:00:16 +00:00			`title = "Unglue.it Catalog: Books under Active Campaign"`
			`feed_path = "active_campaigns"`
in active_campaigns feed, exclude books w/o ebooks 2014-07-25 20:49:12 +00:00			`works = models.Work.objects.filter(campaigns__status='ACTIVE',`
			`editions__ebooks__isnull=False).distinct().order_by('-created')`
bring back descriptions, use them 2014-12-06 01:38:08 +00:00			`description= "With your help we're raising money to make these books free to the world."`
First pass at /opds/{facet} --> now with active_campaigns too 2014-06-26 01:05:57 +00:00
paginate feeds 2014-11-04 00:36:26 +00:00			`def opds_feed_for_works(works, feed_path, title="Unglue.it Catalog", page=None):`
First draft of opds/creativecommons.xml 2014-06-24 22:27:40 +00:00
			`feed_xml = """<feed xmlns:dcterms="http://purl.org/dc/terms/"`
			`xmlns:opds="http://opds-spec.org/"`
			`xmlns="http://www.w3.org/2005/Atom"`
			`xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"`
			`xsi:noNamespaceSchemaLocation="http://www.kbcafe.com/rss/atom.xsd.xml"`
			`xsi:schemaLocation="http://purl.org/dc/elements/1.1/ http://dublincore.org/schemas/xmls/qdc/2008/02/11/dc.xsd http://purl.org/dc/terms/ http://dublincore.org/schemas/xmls/qdc/2008/02/11/dcterms.xsd"/>"""`

			`feed = etree.fromstring(feed_xml)`

			`# add title`
			`# TO DO: will need to calculate the number items and where in the feed we are`

First pass at /opds/{facet} --> now with active_campaigns too 2014-06-26 01:05:57 +00:00			`feed.append(text_node('title', title))`
First draft of opds/creativecommons.xml 2014-06-24 22:27:40 +00:00
			`# id`

ripped out more site/protocol code 2014-07-17 04:33:43 +00:00			`feed.append(text_node('id', "{url}/api/opds/{feed_path}".format(url=UNGLUEIT_URL,`
			`feed_path=feed_path)))`
First draft of opds/creativecommons.xml 2014-06-24 22:27:40 +00:00
			`# updated`
			`# TO DO: fix time zone?`
			`# also use our wrapped datetime code`

			`feed.append(text_node('updated',`
			`pytz.utc.localize(datetime.datetime.utcnow()).isoformat()))`

			`# author`

			`author_node = etree.Element("author")`
			`author_node.append(text_node('name', 'unglue.it'))`
ripped out more site/protocol code 2014-07-17 04:33:43 +00:00			`author_node.append(text_node('uri', UNGLUEIT_URL))`
First draft of opds/creativecommons.xml 2014-06-24 22:27:40 +00:00			`feed.append(author_node)`

			`# links: start, self, next/prev (depending what's necessary -- to start with put all CC books)`

			`# start link`
paginate feeds 2014-11-04 00:36:26 +00:00			`append_navlink(feed, 'start', feed_path, None )`
First draft of opds/creativecommons.xml 2014-06-24 22:27:40 +00:00
paginate feeds 2014-11-04 00:36:26 +00:00			`# next link`
First draft of opds/creativecommons.xml 2014-06-24 22:27:40 +00:00
paginate feeds 2014-11-04 00:36:26 +00:00			`if not page:`
			`page =0`
			`else:`
			`try:`
			`page=int(page)`
			`except TypeError:`
			`page=0`

			`try:`
			`works[10 * page + 10]`
			`append_navlink(feed, 'next', feed_path, page+1 )`
			`except IndexError:`
			`pass`

			`works = islice(works, 10 * page, 10 * page + 10)`
			`if page > 0:`
			`append_navlink(feed, 'previous', feed_path, page-1)`
			`for work in works:`
ripped out more site/protocol code 2014-07-17 04:33:43 +00:00			`node = work_node(work)`
First draft of opds/creativecommons.xml 2014-06-24 22:27:40 +00:00			`feed.append(node)`

paginate feeds 2014-11-04 00:36:26 +00:00			`return etree.tostring(feed, pretty_print=True)`

			`def append_navlink(feed, rel, path, page):`
			`link = etree.Element("link")`
			`link.attrib.update({"rel":rel,`
			`"href": UNGLUEIT_URL + "/api/opds/" + path + ('/?page=' + unicode(page) if page!=None else '/'),`
			`"type": NAVIGATION,`
			`})`
			`feed.append(link)`