gutenberg1
eric 2020-09-15 14:07:09 -04:00
parent 3e04d27dfd
commit e73d1dbb9d
1 changed files with 46 additions and 46 deletions

View File

@ -14,15 +14,21 @@ Base class for output formatters.
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import datetime import datetime
import os
import re import re
from six.moves import urllib from six.moves import urllib
import base64
import os
import genshi.output
import genshi.template
from genshi.core import _ensure from genshi.core import _ensure
from genshi.core import escape, Markup, QName
from genshi.core import START, END, TEXT, XML_DECL, DOCTYPE, START_CDATA, END_CDATA, PI, COMMENT
import genshi.output
from genshi.output import EMPTY, EmptyTagFilter, WhitespaceFilter, \
NamespaceFlattener, DocTypeInserter
import genshi.template
import cherrypy import cherrypy
from libgutenberg import GutenbergGlobals as gg from libgutenberg import GutenbergGlobals as gg
@ -32,59 +38,59 @@ import BaseSearcher
# use a bit more aggressive whitespace removal than the standard whitespace filter # use a bit more aggressive whitespace removal than the standard whitespace filter
COLLAPSE_LINES = re.compile('\n[ \t\r\n]+').sub COLLAPSE_LINES = re.compile('\n[ \t\r\n]+').sub
WHITESPACE_FILTER = genshi.output.WhitespaceFilter () WHITESPACE_FILTER = genshi.output.WhitespaceFilter()
DATA_URL_CACHE = {} DATA_URL_CACHE = {}
class BaseFormatter (object): class BaseFormatter(object):
""" Base class for formatters. """ """ Base class for formatters. """
CONTENT_TYPE = 'text/html; charset=UTF-8' CONTENT_TYPE = 'text/html; charset=UTF-8'
def __init__ (self): def __init__(self):
self.templates = {} self.templates = {}
def format (self, page, os): def format(self, page, os):
""" Abstract method to override. """ """ Abstract method to override. """
pass pass
def get_serializer (self): def get_serializer(self):
""" Abstract method to override. """ Abstract method to override.
Like this: Like this:
return genshi.output.XMLSerializer (doctype = self.DOCTYPE, strip_whitespace = False) return genshi.output.XMLSerializer(doctype = self.DOCTYPE, strip_whitespace = False)
""" """
pass pass
def send_headers (self): def send_headers(self):
""" Send HTTP content-type header. """ """ Send HTTP content-type header. """
cherrypy.response.headers['Content-Type'] = self.CONTENT_TYPE cherrypy.response.headers['Content-Type'] = self.CONTENT_TYPE
def render (self, page, os): def render(self, page, os):
""" Render and send to browser. """ """ Render and send to browser. """
self.send_headers () self.send_headers()
template = self.templates[page] template = self.templates[page]
ctxt = genshi.template.Context (cherrypy = cherrypy, os = os, bs = BaseSearcher) ctxt = genshi.template.Context(cherrypy=cherrypy, os=os, bs=BaseSearcher)
stream = template.stream stream = template.stream
for filter_ in template.filters: for filter_ in template.filters:
stream = filter_ (iter (stream), ctxt) stream = filter_(iter(stream), ctxt)
# there's no easy way in genshi to pass collapse_lines to this filter # there's no easy way in genshi to pass collapse_lines to this filter
stream = WHITESPACE_FILTER (stream, collapse_lines = COLLAPSE_LINES) stream = WHITESPACE_FILTER(stream, collapse_lines=COLLAPSE_LINES)
return genshi.output.encode (self.get_serializer ()(_ensure (genshi.Stream (stream))), return genshi.output.encode(self.get_serializer()(_ensure(genshi.Stream(stream))),
encoding = 'utf-8') encoding='utf-8')
def set_template (self, page, template): def set_template(self, page, template):
""" Set template for page. """ Set template for page.
Override this for special handling of template, like adding filters. """ Override this for special handling of template, like adding filters. """
@ -92,7 +98,7 @@ class BaseFormatter (object):
@staticmethod @staticmethod
def format_date (date): def format_date(date):
""" Format a date. """ """ Format a date. """
if date is None: if date is None:
@ -100,31 +106,31 @@ class BaseFormatter (object):
try: try:
# datetime # datetime
return date.replace (tzinfo = gg.UTC (), microsecond = 0).isoformat () return date.replace(tzinfo=gg.UTC(), microsecond=0).isoformat()
except TypeError: except TypeError:
# date # date
return datetime.datetime.combine ( return datetime.datetime.combine(
date, datetime.time (tzinfo = gg.UTC ())).isoformat () date, datetime.time(tzinfo=gg.UTC())).isoformat()
@staticmethod @staticmethod
def data_url (path): def data_url(path):
""" Read and convert a file to a data url. """ """ Read and convert a file to a data url. """
if path in DATA_URL_CACHE: if path in DATA_URL_CACHE:
return DATA_URL_CACHE[path] return DATA_URL_CACHE[path]
abs_path = os.path.join ('https://' + cherrypy.config['file_host'], path.lstrip ('/')) abs_path = os.path.join('https://' + cherrypy.config['file_host'], path.lstrip('/'))
data_url = abs_path data_url = abs_path
try: try:
f = urllib.request.urlopen (abs_path) f = urllib.request.urlopen(abs_path)
retcode = f.getcode () retcode = f.getcode()
if retcode is None or retcode == 200: if retcode is None or retcode == 200:
msg = f.info () msg = f.info()
mediatype = msg.get ('Content-Type') mediatype = msg.get('Content-Type')
if mediatype: if mediatype:
mediatype = mediatype.partition (';')[0] mediatype = mediatype.partition(';')[0]
data_url = ('data:' + mediatype + ';base64,' + data_url = ('data:' + mediatype + ';base64,' +
base64.b64encode (f.read ()).decode ('ascii')) base64.b64encode(f.read()).decode('ascii'))
f.close () f.close()
except IOError: except IOError:
pass pass
@ -132,25 +138,25 @@ class BaseFormatter (object):
return data_url return data_url
def fix_dc (self, dc, os): def fix_dc(self, dc, os):
""" Add some info to dc for easier templating. """ """ Add some info to dc for easier templating. """
# obsolete private marc codes for cover art # obsolete private marc codes for cover art
dc.marcs = [ marc for marc in dc.marcs if not marc.code.startswith ('9') ] dc.marcs = [ marc for marc in dc.marcs if not marc.code.startswith('9') ]
dc.cover_image = None dc.cover_image = None
dc.cover_thumb = None dc.cover_thumb = None
# cover image really should not be a property of opensearch, # cover image really should not be a property of opensearch,
# but it is accessed in many places and this way we can save a # but it is accessed in many places and this way we can save a
# lot of iterations later # lot of iterations later
os.cover_image_url = None os.cover_image_url = None
os.cover_thumb_url = None os.cover_thumb_url = None
for file_ in dc.files: for file_ in dc.files:
# HACK for https:// # HACK for https://
if file_.url.startswith ('http://'): if file_.url.startswith('http://'):
file_.url = 'https' + file_.url[4:] file_.url = 'https' + file_.url[4:]
file_.dropbox_url = None file_.dropbox_url = None
# file_.dropbox_filename = None # file_.dropbox_filename = None
@ -165,7 +171,7 @@ class BaseFormatter (object):
dc.cover_thumb = file_ dc.cover_thumb = file_
os.cover_thumb_url = file_.url os.cover_thumb_url = file_.url
dc.xsd_release_date_time = self.format_date (dc.release_date) dc.xsd_release_date_time = self.format_date(dc.release_date)
if 'Sound' in dc.categories: if 'Sound' in dc.categories:
dc.icon = 'audiobook' dc.icon = 'audiobook'
@ -175,13 +181,7 @@ class BaseFormatter (object):
# lang is not allowed in xhtml 1.1 which we must use # lang is not allowed in xhtml 1.1 which we must use
# because xhtml+rdfa is based on it # because xhtml+rdfa is based on it
from genshi.core import escape, Attrs, Markup, Namespace, QName, StreamEventKind class XHTMLSerializer(genshi.output.XMLSerializer):
from genshi.core import START, END, TEXT, XML_DECL, DOCTYPE, START_NS, END_NS, \
START_CDATA, END_CDATA, PI, COMMENT, XML_NAMESPACE
from genshi.output import EMPTY, EmptyTagFilter, WhitespaceFilter, \
NamespaceFlattener, DocTypeInserter
class XHTMLSerializer (genshi.output.XMLSerializer):
"""Produces XHTML text from an event stream. """Produces XHTML text from an event stream.
>>> from genshi.builder import tag >>> from genshi.builder import tag