2019-03-28 13:45:03 +00:00
|
|
|
|
#!/usr/bin/env python
|
|
|
|
|
# -*- mode: python; indent-tabs-mode: nil; -*- coding: iso-8859-1 -*-
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
HTMLFormatter.py
|
|
|
|
|
|
|
|
|
|
Copyright 2009-2014 by Marcello Perathoner
|
|
|
|
|
|
|
|
|
|
Distributable under the GNU General Public License Version 3 or newer.
|
|
|
|
|
|
|
|
|
|
Produce a HTML page.
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
|
|
|
|
import operator
|
|
|
|
|
|
|
|
|
|
import cherrypy
|
|
|
|
|
import genshi.output
|
2019-06-05 15:52:47 +00:00
|
|
|
|
import re
|
2019-03-28 13:45:03 +00:00
|
|
|
|
import six
|
|
|
|
|
from six.moves import urllib
|
|
|
|
|
|
|
|
|
|
from libgutenberg.MediaTypes import mediatypes as mt
|
|
|
|
|
|
|
|
|
|
import BaseSearcher
|
|
|
|
|
import BaseFormatter
|
2019-07-26 15:54:24 +00:00
|
|
|
|
from i18n_tool import ugettext as _
|
2019-03-28 13:45:03 +00:00
|
|
|
|
|
|
|
|
|
# filetypes ignored on desktop site
|
|
|
|
|
NO_DESKTOP_FILETYPES = 'plucker qioo rdf rst rst.gen rst.master tei cover.medium cover.small'.split ()
|
|
|
|
|
|
|
|
|
|
# filetypes which are usually handed over to a separate app on mobile devices
|
2019-09-03 19:30:36 +00:00
|
|
|
|
HANDOVER_TYPES = (mt.epub, mt.mobi, mt.pdf)
|
2019-03-28 13:45:03 +00:00
|
|
|
|
|
|
|
|
|
# self-contained files we can send to dropbox
|
|
|
|
|
CLOUD_TYPES = (mt.epub, mt.mobi, mt.pdf)
|
2019-06-05 15:52:47 +00:00
|
|
|
|
STD_PDF_MATCH = re.compile (r'files/\d+/\d+-pdf.pdf$')
|
2019-03-28 13:45:03 +00:00
|
|
|
|
|
|
|
|
|
class XMLishFormatter (BaseFormatter.BaseFormatter):
|
|
|
|
|
""" Produce XMLish output. """
|
|
|
|
|
|
|
|
|
|
def __init__ (self):
|
|
|
|
|
super (XMLishFormatter, self).__init__ ()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fix_dc (self, dc, os):
|
|
|
|
|
""" Tweak dc. """
|
2019-06-05 15:52:47 +00:00
|
|
|
|
def has_std_path (file_obj):
|
2019-06-07 15:42:09 +00:00
|
|
|
|
''' so cloudstorage links can be elided when the url is non-standard'''
|
2019-06-05 15:52:47 +00:00
|
|
|
|
if file_obj.filetype == 'pdf':
|
|
|
|
|
return STD_PDF_MATCH.search (file_obj.url)
|
|
|
|
|
return True
|
2019-03-28 13:45:03 +00:00
|
|
|
|
|
|
|
|
|
super (XMLishFormatter, self).fix_dc (dc, os)
|
|
|
|
|
|
2019-09-03 19:11:38 +00:00
|
|
|
|
# generated_files always [] AFAICT -esh
|
2019-03-28 13:45:03 +00:00
|
|
|
|
for file_ in dc.generated_files:
|
|
|
|
|
file_.help_topic = file_.hr_filetype
|
|
|
|
|
file_.compression = 'none'
|
|
|
|
|
file_.encoding = None
|
|
|
|
|
|
2019-09-12 13:27:25 +00:00
|
|
|
|
dedupable = {}
|
|
|
|
|
for file_ in dc.files:
|
2019-09-14 16:51:34 +00:00
|
|
|
|
if file_.filetype and file_.filetype.endswith('images'):
|
2019-09-12 13:27:25 +00:00
|
|
|
|
dedupable[file_.filetype] = file_
|
|
|
|
|
do_dedupe = False
|
|
|
|
|
for ft in ['epub', 'kindle', 'pdf']:
|
|
|
|
|
if ft + '.images' in dedupable and ft + '.noimages' in dedupable:
|
|
|
|
|
if dedupable[ft + '.images'].extent == dedupable[ft + '.noimages'].extent:
|
|
|
|
|
do_dedupe = True
|
|
|
|
|
if do_dedupe:
|
|
|
|
|
for ft in ['epub', 'kindle', 'pdf']:
|
|
|
|
|
if ft + '.images' in dedupable and ft + '.noimages' in dedupable:
|
|
|
|
|
dc.files.remove(dedupable[ft + '.images'])
|
|
|
|
|
|
2019-03-28 13:45:03 +00:00
|
|
|
|
for file_ in dc.files + dc.generated_files:
|
|
|
|
|
type_ = six.text_type (file_.mediatypes[0])
|
|
|
|
|
m = type_.partition (';')[0]
|
2019-06-05 15:52:47 +00:00
|
|
|
|
if m in CLOUD_TYPES and has_std_path (file_):
|
2019-03-28 13:45:03 +00:00
|
|
|
|
file_.dropbox_url = os.url (
|
|
|
|
|
'dropbox_send', id = dc.project_gutenberg_id, filetype = file_.filetype)
|
|
|
|
|
file_.gdrive_url = os.url (
|
|
|
|
|
'gdrive_send', id = dc.project_gutenberg_id, filetype = file_.filetype)
|
|
|
|
|
file_.msdrive_url = os.url (
|
|
|
|
|
'msdrive_send', id = dc.project_gutenberg_id, filetype = file_.filetype)
|
|
|
|
|
|
2019-09-03 19:11:38 +00:00
|
|
|
|
# these are used as relative links
|
|
|
|
|
if file_.generated and not file_.filetype.startswith ('cover.'):
|
2019-09-09 17:59:59 +00:00
|
|
|
|
file_.filename = "ebooks/%d.%s" % (dc.project_gutenberg_id, file_.filetype)
|
2019-09-03 19:11:38 +00:00
|
|
|
|
if m in HANDOVER_TYPES:
|
|
|
|
|
file_.filename = file_.filename + '?' + urllib.parse.urlencode (
|
|
|
|
|
{ 'session_id': str (cherrypy.session.id) } )
|
|
|
|
|
|
2019-03-28 13:45:03 +00:00
|
|
|
|
for file_ in dc.files:
|
|
|
|
|
file_.honeypot_url = os.url (
|
|
|
|
|
'honeypot_send', id = dc.project_gutenberg_id, filetype = file_.filetype)
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def format (self, page, os):
|
|
|
|
|
""" Format to HTML. """
|
|
|
|
|
|
|
|
|
|
for e in os.entries:
|
|
|
|
|
if isinstance (e, BaseSearcher.DC):
|
|
|
|
|
self.fix_dc (e, os)
|
|
|
|
|
|
|
|
|
|
# loop again because fix:dc appends things
|
|
|
|
|
for e in os.entries:
|
|
|
|
|
if isinstance (e, BaseSearcher.Cat):
|
|
|
|
|
if e.url:
|
|
|
|
|
e.icon2 = e.icon2 or 'next'
|
|
|
|
|
else:
|
|
|
|
|
e.class_ += 'grayed'
|
|
|
|
|
|
|
|
|
|
if os.title_icon:
|
|
|
|
|
os.class_ += 'icon_' + os.title_icon
|
|
|
|
|
|
|
|
|
|
os.entries.sort (key = operator.attrgetter ('order'))
|
|
|
|
|
|
|
|
|
|
return self.render (page, os)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class HTMLFormatter (XMLishFormatter):
|
|
|
|
|
""" Produce HTML output. """
|
|
|
|
|
|
|
|
|
|
CONTENT_TYPE = 'text/html; charset=UTF-8'
|
2020-03-11 19:47:51 +00:00
|
|
|
|
DOCTYPE = 'html5'
|
2019-03-28 13:45:03 +00:00
|
|
|
|
|
|
|
|
|
def __init__ (self):
|
|
|
|
|
super (HTMLFormatter, self).__init__ ()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_serializer (self):
|
|
|
|
|
# return BaseFormatter.XHTMLSerializer (doctype = self.DOCTYPE, strip_whitespace = False)
|
2020-03-11 19:47:51 +00:00
|
|
|
|
return genshi.output.HTMLSerializer (doctype = self.DOCTYPE, strip_whitespace = False)
|
2019-03-28 13:45:03 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fix_dc (self, dc, os):
|
|
|
|
|
""" Add some info to dc for easier templating.
|
|
|
|
|
|
|
|
|
|
Also make sure that dc `walks like a cat´. """
|
|
|
|
|
|
|
|
|
|
super (HTMLFormatter, self).fix_dc (dc, os)
|
|
|
|
|
|
|
|
|
|
#for author in dc.authors:
|
|
|
|
|
# author.authors_page_url = (
|
|
|
|
|
# "/browse/authors/%s#a%d" % (author.name[:1].lower (), author.id))
|
|
|
|
|
if dc.new_filesystem:
|
|
|
|
|
dc.base_dir = "/files/%d/" % dc.project_gutenberg_id
|
|
|
|
|
# dc.mirror_dir = gg.archive_dir (dc.project_gutenberg_id)
|
|
|
|
|
else:
|
|
|
|
|
dc.base_dir = None
|
|
|
|
|
# dc.mirror_dir = None
|
|
|
|
|
|
|
|
|
|
dc.magnetlink = None
|
|
|
|
|
|
|
|
|
|
# hide all txt files but the first one
|
|
|
|
|
txtcount = showncount = 0
|
|
|
|
|
|
|
|
|
|
for file_ in dc.files + dc.generated_files:
|
|
|
|
|
filetype = file_.filetype or ''
|
|
|
|
|
file_.hidden = False
|
|
|
|
|
|
|
|
|
|
if filetype in NO_DESKTOP_FILETYPES:
|
|
|
|
|
file_.hidden = True
|
|
|
|
|
if file_.compression != 'none':
|
|
|
|
|
file_.hidden = True
|
|
|
|
|
if filetype.startswith ('txt'):
|
|
|
|
|
if txtcount > 0:
|
|
|
|
|
file_.hidden = True
|
|
|
|
|
txtcount += 1
|
|
|
|
|
if filetype != 'txt':
|
|
|
|
|
file_.encoding = None
|
|
|
|
|
if file_.encoding:
|
|
|
|
|
file_.hr_filetype += ' ' + file_.encoding.upper ()
|
|
|
|
|
if filetype.startswith ('html') and file_.compression == 'none':
|
|
|
|
|
file_.hr_filetype = 'Read this book online: {}'.format (file_.hr_filetype)
|
|
|
|
|
if not file_.hidden:
|
|
|
|
|
showncount += 1
|
|
|
|
|
|
|
|
|
|
# if we happened to hide everything, show all files
|
|
|
|
|
if showncount == 0:
|
|
|
|
|
for file_ in dc.files + dc.generated_files:
|
|
|
|
|
file_.hidden = False
|