gutenberg1
eric 2020-09-15 14:07:09 -04:00
parent 3e04d27dfd
commit e73d1dbb9d
1 changed files with 46 additions and 46 deletions

View File

@ -14,15 +14,21 @@ Base class for output formatters.
from __future__ import unicode_literals
import base64
import datetime
import os
import re
from six.moves import urllib
import base64
import os
import genshi.output
import genshi.template
from genshi.core import _ensure
from genshi.core import escape, Markup, QName
from genshi.core import START, END, TEXT, XML_DECL, DOCTYPE, START_CDATA, END_CDATA, PI, COMMENT
import genshi.output
from genshi.output import EMPTY, EmptyTagFilter, WhitespaceFilter, \
NamespaceFlattener, DocTypeInserter
import genshi.template
import cherrypy
from libgutenberg import GutenbergGlobals as gg
@ -32,59 +38,59 @@ import BaseSearcher
# use a bit more aggressive whitespace removal than the standard whitespace filter
COLLAPSE_LINES = re.compile('\n[ \t\r\n]+').sub
WHITESPACE_FILTER = genshi.output.WhitespaceFilter ()
WHITESPACE_FILTER = genshi.output.WhitespaceFilter()
DATA_URL_CACHE = {}
class BaseFormatter (object):
class BaseFormatter(object):
""" Base class for formatters. """
CONTENT_TYPE = 'text/html; charset=UTF-8'
def __init__ (self):
def __init__(self):
self.templates = {}
def format (self, page, os):
def format(self, page, os):
""" Abstract method to override. """
pass
def get_serializer (self):
def get_serializer(self):
""" Abstract method to override.
Like this:
return genshi.output.XMLSerializer (doctype = self.DOCTYPE, strip_whitespace = False)
return genshi.output.XMLSerializer(doctype = self.DOCTYPE, strip_whitespace = False)
"""
pass
def send_headers (self):
def send_headers(self):
""" Send HTTP content-type header. """
cherrypy.response.headers['Content-Type'] = self.CONTENT_TYPE
def render (self, page, os):
def render(self, page, os):
""" Render and send to browser. """
self.send_headers ()
self.send_headers()
template = self.templates[page]
ctxt = genshi.template.Context (cherrypy = cherrypy, os = os, bs = BaseSearcher)
ctxt = genshi.template.Context(cherrypy=cherrypy, os=os, bs=BaseSearcher)
stream = template.stream
for filter_ in template.filters:
stream = filter_ (iter (stream), ctxt)
stream = filter_(iter(stream), ctxt)
# there's no easy way in genshi to pass collapse_lines to this filter
stream = WHITESPACE_FILTER (stream, collapse_lines = COLLAPSE_LINES)
stream = WHITESPACE_FILTER(stream, collapse_lines=COLLAPSE_LINES)
return genshi.output.encode (self.get_serializer ()(_ensure (genshi.Stream (stream))),
encoding = 'utf-8')
return genshi.output.encode(self.get_serializer()(_ensure(genshi.Stream(stream))),
encoding='utf-8')
def set_template (self, page, template):
def set_template(self, page, template):
""" Set template for page.
Override this for special handling of template, like adding filters. """
@ -92,7 +98,7 @@ class BaseFormatter (object):
@staticmethod
def format_date (date):
def format_date(date):
""" Format a date. """
if date is None:
@ -100,31 +106,31 @@ class BaseFormatter (object):
try:
# datetime
return date.replace (tzinfo = gg.UTC (), microsecond = 0).isoformat ()
return date.replace(tzinfo=gg.UTC(), microsecond=0).isoformat()
except TypeError:
# date
return datetime.datetime.combine (
date, datetime.time (tzinfo = gg.UTC ())).isoformat ()
return datetime.datetime.combine(
date, datetime.time(tzinfo=gg.UTC())).isoformat()
@staticmethod
def data_url (path):
def data_url(path):
""" Read and convert a file to a data url. """
if path in DATA_URL_CACHE:
return DATA_URL_CACHE[path]
abs_path = os.path.join ('https://' + cherrypy.config['file_host'], path.lstrip ('/'))
abs_path = os.path.join('https://' + cherrypy.config['file_host'], path.lstrip('/'))
data_url = abs_path
try:
f = urllib.request.urlopen (abs_path)
retcode = f.getcode ()
f = urllib.request.urlopen(abs_path)
retcode = f.getcode()
if retcode is None or retcode == 200:
msg = f.info ()
mediatype = msg.get ('Content-Type')
msg = f.info()
mediatype = msg.get('Content-Type')
if mediatype:
mediatype = mediatype.partition (';')[0]
mediatype = mediatype.partition(';')[0]
data_url = ('data:' + mediatype + ';base64,' +
base64.b64encode (f.read ()).decode ('ascii'))
f.close ()
base64.b64encode(f.read()).decode('ascii'))
f.close()
except IOError:
pass
@ -132,25 +138,25 @@ class BaseFormatter (object):
return data_url
def fix_dc (self, dc, os):
def fix_dc(self, dc, os):
""" Add some info to dc for easier templating. """
# obsolete private marc codes for cover art
dc.marcs = [ marc for marc in dc.marcs if not marc.code.startswith ('9') ]
dc.marcs = [ marc for marc in dc.marcs if not marc.code.startswith('9') ]
dc.cover_image = None
dc.cover_thumb = None
# cover image really should not be a property of opensearch,
# but it is accessed in many places and this way we can save a
# lot of iterations later
os.cover_image_url = None
os.cover_thumb_url = None
os.cover_image_url = None
os.cover_thumb_url = None
for file_ in dc.files:
# HACK for https://
if file_.url.startswith ('http://'):
file_.url = 'https' + file_.url[4:]
if file_.url.startswith('http://'):
file_.url = 'https' + file_.url[4:]
file_.dropbox_url = None
# file_.dropbox_filename = None
@ -165,7 +171,7 @@ class BaseFormatter (object):
dc.cover_thumb = file_
os.cover_thumb_url = file_.url
dc.xsd_release_date_time = self.format_date (dc.release_date)
dc.xsd_release_date_time = self.format_date(dc.release_date)
if 'Sound' in dc.categories:
dc.icon = 'audiobook'
@ -175,13 +181,7 @@ class BaseFormatter (object):
# lang is not allowed in xhtml 1.1 which we must use
# because xhtml+rdfa is based on it
from genshi.core import escape, Attrs, Markup, Namespace, QName, StreamEventKind
from genshi.core import START, END, TEXT, XML_DECL, DOCTYPE, START_NS, END_NS, \
START_CDATA, END_CDATA, PI, COMMENT, XML_NAMESPACE
from genshi.output import EMPTY, EmptyTagFilter, WhitespaceFilter, \
NamespaceFlattener, DocTypeInserter
class XHTMLSerializer (genshi.output.XMLSerializer):
class XHTMLSerializer(genshi.output.XMLSerializer):
"""Produces XHTML text from an event stream.
>>> from genshi.builder import tag