autocat3/HTMLFormatter.py

188 lines
6.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/usr/bin/env python
# -*- mode: python; indent-tabs-mode: nil; -*- coding: iso-8859-1 -*-
"""
HTMLFormatter.py
Copyright 2009-2014 by Marcello Perathoner
Distributable under the GNU General Public License Version 3 or newer.
Produce a HTML page.
"""
from __future__ import unicode_literals
import operator
import cherrypy
import genshi.output
import re
import six
from six.moves import urllib
from libgutenberg.MediaTypes import mediatypes as mt
import BaseSearcher
import BaseFormatter
from i18n_tool import ugettext as _
# filetypes ignored on desktop site
NO_DESKTOP_FILETYPES = 'plucker qioo rdf rst rst.gen rst.master tei cover.medium cover.small'.split ()
# filetypes which are usually handed over to a separate app on mobile devices
HANDOVER_TYPES = (mt.epub, mt.mobi, mt.pdf)
# self-contained files we can send to dropbox
CLOUD_TYPES = (mt.epub, mt.mobi, mt.pdf)
STD_PDF_MATCH = re.compile (r'files/\d+/\d+-pdf.pdf$')
class XMLishFormatter (BaseFormatter.BaseFormatter):
""" Produce XMLish output. """
def __init__ (self):
super (XMLishFormatter, self).__init__ ()
def fix_dc (self, dc, os):
""" Tweak dc. """
def has_std_path (file_obj):
''' so cloudstorage links can be elided when the url is non-standard'''
if file_obj.filetype == 'pdf':
return STD_PDF_MATCH.search (file_obj.url)
return True
super (XMLishFormatter, self).fix_dc (dc, os)
# generated_files always [] AFAICT -esh
for file_ in dc.generated_files:
file_.help_topic = file_.hr_filetype
file_.compression = 'none'
file_.encoding = None
dedupable = {}
for file_ in dc.files:
if file_.filetype and file_.filetype.endswith('images'):
dedupable[file_.filetype] = file_
do_dedupe = False
for ft in ['epub', 'kindle', 'pdf', 'html']:
if ft + '.images' in dedupable and ft + '.noimages' in dedupable:
# because of timestamps, identical files may vary by a bit or 2
if abs (dedupable[ft + '.images'].extent - dedupable[ft + '.noimages'].extent) < 3:
do_dedupe = True
if do_dedupe:
for ft in ['epub', 'kindle', 'pdf', 'html']:
if ft + '.images' in dedupable and ft + '.noimages' in dedupable:
dc.files.remove(dedupable[ft + '.images'])
for file_ in dc.files + dc.generated_files:
type_ = six.text_type (file_.mediatypes[0])
m = type_.partition (';')[0]
if m in CLOUD_TYPES and has_std_path (file_):
file_.dropbox_url = os.url (
'dropbox_send', id = dc.project_gutenberg_id, filetype = file_.filetype)
file_.gdrive_url = os.url (
'gdrive_send', id = dc.project_gutenberg_id, filetype = file_.filetype)
file_.msdrive_url = os.url (
'msdrive_send', id = dc.project_gutenberg_id, filetype = file_.filetype)
# these are used as relative links
if file_.generated and not file_.filetype.startswith ('cover.'):
file_.filename = "ebooks/%d.%s" % (dc.project_gutenberg_id, file_.filetype)
if m in HANDOVER_TYPES:
file_.filename = file_.filename + '?' + urllib.parse.urlencode (
{ 'session_id': str (cherrypy.session.id) } )
for file_ in dc.files:
file_.honeypot_url = os.url (
'honeypot_send', id = dc.project_gutenberg_id, filetype = file_.filetype)
break
def format (self, page, os):
""" Format to HTML. """
for e in os.entries:
if isinstance (e, BaseSearcher.DC):
self.fix_dc (e, os)
# loop again because fix:dc appends things
for e in os.entries:
if isinstance (e, BaseSearcher.Cat):
if e.url:
e.icon2 = e.icon2 or 'next'
else:
e.class_ += 'grayed'
if os.title_icon:
os.class_ += 'icon_' + os.title_icon
os.entries.sort (key = operator.attrgetter ('order'))
return self.render (page, os)
class HTMLFormatter (XMLishFormatter):
""" Produce HTML output. """
CONTENT_TYPE = 'text/html; charset=UTF-8'
DOCTYPE = 'html5'
def __init__ (self):
super (HTMLFormatter, self).__init__ ()
def get_serializer (self):
# return BaseFormatter.XHTMLSerializer (doctype = self.DOCTYPE, strip_whitespace = False)
return genshi.output.HTMLSerializer (doctype = self.DOCTYPE, strip_whitespace = False)
def fix_dc (self, dc, os):
""" Add some info to dc for easier templating.
Also make sure that dc `walks like a cat´. """
super (HTMLFormatter, self).fix_dc (dc, os)
#for author in dc.authors:
# author.authors_page_url = (
# "/browse/authors/%s#a%d" % (author.name[:1].lower (), author.id))
if dc.new_filesystem:
dc.base_dir = "/files/%d/" % dc.project_gutenberg_id
# dc.mirror_dir = gg.archive_dir (dc.project_gutenberg_id)
else:
dc.base_dir = None
# dc.mirror_dir = None
dc.magnetlink = None
# hide all txt files but the first one
txtcount = showncount = 0
for file_ in dc.files + dc.generated_files:
filetype = file_.filetype or ''
file_.hidden = False
if filetype in NO_DESKTOP_FILETYPES:
file_.hidden = True
if file_.compression != 'none':
file_.hidden = True
if filetype.startswith ('txt'):
if txtcount > 0:
file_.hidden = True
txtcount += 1
if filetype != 'txt':
file_.encoding = None
if file_.encoding:
file_.hr_filetype += ' ' + file_.encoding.upper ()
if filetype.startswith ('html') and file_.compression == 'none':
file_.hr_filetype = 'Read this book online: {}'.format (file_.hr_filetype)
if not file_.hidden:
showncount += 1
# if we happened to hide everything, show all files
if showncount == 0:
for file_ in dc.files + dc.generated_files:
file_.hidden = False