autocat3/BaseSearcher.py

954 lines
31 KiB
Python
Raw Normal View History

2019-03-28 13:45:03 +00:00
# -*- mode: python; indent-tabs-mode: nil; -*- coding: iso-8859-1 -*-
"""
BaseSearcher.py
Copyright 2009-2014 by Marcello Perathoner
Distributable under the GNU General Public License Version 3 or newer.
Project Gutenberg Catalog Search
Base class
"""
from __future__ import unicode_literals
from __future__ import division
import datetime
import logging
2020-09-03 18:52:31 +00:00
from six.moves import urllib
2019-03-28 13:45:03 +00:00
import cherrypy
import routes
import babel
import regex # module re does not support \p{L}
import six
from libgutenberg.MediaTypes import mediatypes as mt
from libgutenberg.GutenbergDatabase import xl, DatabaseError
from libgutenberg import DublinCore
from libgutenberg import GutenbergDatabaseDublinCore
from libgutenberg import GutenbergGlobals as gg
2019-07-26 15:54:24 +00:00
from i18n_tool import ugettext as _
from i18n_tool import ungettext as __
2019-03-28 13:45:03 +00:00
import DublinCoreI18n
from SupportedLocales import FB_LANGS, TWITTER_LANGS, GOOGLE_LANGS, PAYPAL_LANGS, FLATTR_LANGS
VALID_PROTOCOLS = ('http', 'https')
MEDIATYPE_TO_FORMAT = {
2020-09-03 18:52:31 +00:00
'text/html': 'html',
mt.mobile: 'html',
mt.opds: 'opds',
mt.json: 'json',
}
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
USER_FORMATS = 'html mobile print opds stanza json'.split()
2019-03-28 13:45:03 +00:00
# max no. of results returned by search
MAX_RESULTS = 1000
# sort orders available to the user
2020-09-03 18:52:31 +00:00
USER_SORT_ORDERS = 'downloads release_date title alpha quantity nentry random'.split()
2019-03-28 13:45:03 +00:00
# internally used sort orders
2020-09-03 18:52:31 +00:00
SORT_ORDERS = USER_SORT_ORDERS + 'nentry'.split()
2019-03-28 13:45:03 +00:00
# fk_categories of sound files
2020-09-03 18:52:31 +00:00
AUDIOBOOK_CATEGORIES = set([1, 2, 3, 6])
2019-03-28 13:45:03 +00:00
# updated by cron thread
books_in_archive = 0
2020-09-03 18:52:31 +00:00
class ClassAttr(object):
2019-03-28 13:45:03 +00:00
""" Holds an XML class attribute. """
__slots__ = 'value'
2020-09-03 18:52:31 +00:00
def __init__(self, v=None):
self.value = set()
self.__iadd__(v)
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
def __len__(self):
return len(self.value)
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
def __unicode__(self):
return ' '.join(self.value) if self.value else ''
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
def __str__(self):
return ' '.join(self.value) if self.value else ''
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
def __iadd__(self, v):
2019-03-28 13:45:03 +00:00
""" Implements operator += """
if not v:
return self
2020-09-03 18:52:31 +00:00
if isinstance(v, six.string_types):
for i in six.text_type(v).split():
self.value.add(i)
2019-03-28 13:45:03 +00:00
return self
2020-09-03 18:52:31 +00:00
if isinstance(v, ClassAttr):
2019-03-28 13:45:03 +00:00
self.value |= v.value
return self
2020-09-03 18:52:31 +00:00
def __contains__(self, b):
2019-03-28 13:45:03 +00:00
return b in self.value
2020-09-03 18:52:31 +00:00
class DC(GutenbergDatabaseDublinCore.GutenbergDatabaseDublinCore,
2019-03-28 13:45:03 +00:00
DublinCoreI18n.DublinCoreI18nMixin):
""" A localized DublinCore. """
2020-09-03 18:52:31 +00:00
def __init__(self, pool):
GutenbergDatabaseDublinCore.GutenbergDatabaseDublinCore.__init__(self, pool)
DublinCoreI18n.DublinCoreI18nMixin.__init__(self)
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
class Cat(object):
2019-03-28 13:45:03 +00:00
""" Hold data of one list item in output. """
2020-09-03 18:52:31 +00:00
def __init__(self):
2019-03-28 13:45:03 +00:00
self.type = None # use default
self.header = ''
2020-09-03 18:52:31 +00:00
self.class_ = ClassAttr()
2019-03-28 13:45:03 +00:00
self.downloads = 1
self.rel = None
self.order = 0
self.charset = None
self.title = None
self.subtitle = None
self.extra = None
self.icon = None
self.icon2 = None
self.url = None
self.thumb_url = None
2020-09-03 18:52:31 +00:00
class SearchUrlFormatter(object):
2019-03-28 13:45:03 +00:00
""" Callable to format a search url. """
2020-09-03 18:52:31 +00:00
def __init__(self, action):
2019-03-28 13:45:03 +00:00
self.action = action
2020-09-03 18:52:31 +00:00
def __call__(self, row):
2019-03-28 13:45:03 +00:00
os = cherrypy.request.os
2020-09-03 18:52:31 +00:00
return os.url(
2019-03-28 13:45:03 +00:00
self.action,
format = os.format,
id = row.pk)
2020-09-03 18:52:31 +00:00
class SQLStatement(object):
2019-03-28 13:45:03 +00:00
""" Class implementing an SQL statement. """
prefix_to_prefix = {
2020-09-03 18:52:31 +00:00
'a.': 'ax',
't.': 'tx',
's.': 'sx',
'bs.': 'bsx',
'l.': 'l0',
'#': 'no.',
'n.': 'no.',
'type.': 'y0',
'lcn.': 'lcnx',
'lcc.': 'lcc0',
'cat.': 'cat0',
}
2019-03-28 13:45:03 +00:00
"""Dict of user-visible prefixes to translate.
User-visible prefixes must be easy to type. The dot is on the
lowercase keyboard of most phones, so you need no shifting to type
these.
Internal prefixes exploit the quirks of the tsvec stemmer. Words
containing numbers do not get stemmed, so any '*0' prefix searches
for the unstemmed word. All other words get stemmed, so any '*x'
prefix searches for the stem of the word. 'x' was selected because
it is a rare character that will cause few false positives.
"""
regex_cache = {}
""" Cache of compiled regexes. """
2020-09-03 18:52:31 +00:00
def __init__(self):
2019-03-28 13:45:03 +00:00
self.query = ''
self.params = {}
self.from_ = []
self.where = []
self.groupby = []
self.sort_order = None
self.start_index = 1
self.items_per_page = -1
@classmethod
2020-09-03 18:52:31 +00:00
def sub(cls, regex_, replace, query):
2019-03-28 13:45:03 +00:00
""" Like re.sub but also compile and cache the regex. """
2021-02-18 16:13:44 +00:00
if not isinstance(query, str):
2021-02-18 16:18:10 +00:00
query = query[0] if isinstance(query, list) and len(query) > 0 else ''
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
cregex = cls.regex_cache.setdefault(
regex_, regex.compile(regex_, regex.UNICODE | regex.VERSION1))
return cregex.sub(replace, query)
2019-03-28 13:45:03 +00:00
@classmethod
2020-09-03 18:52:31 +00:00
def preprocess_query(cls, query):
2019-03-28 13:45:03 +00:00
""" Preprocess query.
The preprocessed query might get echoed to the user.
"""
sub = cls.sub
# strip most not (letter or digit)
2020-09-03 18:52:31 +00:00
query = sub(r'[\p{Z}\p{P}\p{S}\p{M}\p{C}--.!|()#]', ' ', query)
2019-03-28 13:45:03 +00:00
# strip operators adjacent to non-whitespace
# if you want grouping you have to add space on both sides of the parens
2020-09-03 18:52:31 +00:00
query = sub(r'\b[!)]', ' ', query)
query = sub(r'[(]\b', ' ', query)
2019-03-28 13:45:03 +00:00
# insert spaces around operators
2020-09-03 18:52:31 +00:00
query = sub(r'\s*[|!()]\s*', r' \g<0> ', query)
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
return ' '.join(query.split())
2019-03-28 13:45:03 +00:00
@classmethod
2020-09-03 18:52:31 +00:00
def translate_query(cls, query):
2019-03-28 13:45:03 +00:00
""" Translate query from user syntax to postgres tsvec syntax. """
sub = cls.sub
2020-09-03 18:52:31 +00:00
def prefix_sub(match_object):
2019-03-28 13:45:03 +00:00
""" Translate from user-visible prefix to internal prefix. """
2020-09-03 18:52:31 +00:00
s = match_object.group(0)
return cls.prefix_to_prefix.get(s, s)
2019-03-28 13:45:03 +00:00
def balance(query):
""" Balance parens. """
def scan(query, up, down):
scan = ''
depth = 0
for char in query:
if char == up:
depth += 1
scan += char
elif char == down:
depth += -1
if depth < 0:
depth = 0
else:
scan += char
else:
scan += char
return scan, depth
balanced, depth = scan(query, '(', ')')
if depth:
balanced, depth = scan(balanced[::-1], ')', '(')
balanced = balanced[::-1]
return balanced
2019-03-28 13:45:03 +00:00
# Replace the user-visible prefixes with the internally used prefixes.
2020-09-03 18:52:31 +00:00
query = sub(r'(\b\w+\.|#)(?=\w)', prefix_sub, query)
2019-03-28 13:45:03 +00:00
# add wildcards to all words
2020-09-03 18:52:31 +00:00
query = sub(r'\b(\p{L}+)(\s|$)', r'\1:*\2', query)
query = query.replace('. ', ' ')
2019-03-28 13:45:03 +00:00
# if parens aren't balanced, remove them
2020-09-03 18:52:31 +00:00
query = balance(query)
# if ! or | are at the wrong ends, remove them
2020-09-03 18:52:31 +00:00
query = sub(r'(^[ \|]+|[ \|\!]+$)', '', query)
2019-03-28 13:45:03 +00:00
# replace ' ' with ' & '
2020-09-03 18:52:31 +00:00
query = ' '.join(query.split())
query = sub(r'(?<![|!(\s])\s+(?![|)])', ' & ', query)
2019-03-28 13:45:03 +00:00
return query
2020-09-03 18:52:31 +00:00
def build(self):
2019-03-28 13:45:03 +00:00
""" Returns the SQL query string and parameter array. """
query = self.query
if self.from_:
2020-09-03 18:52:31 +00:00
query += " FROM " + ", ".join(self.from_)
2019-03-28 13:45:03 +00:00
if self.where:
2020-09-03 18:52:31 +00:00
query += " WHERE " + " AND ".join(self.where)
2019-03-28 13:45:03 +00:00
if self.groupby:
2020-09-03 18:52:31 +00:00
query += " GROUP BY " + ", ".join(self.groupby)
2019-03-28 13:45:03 +00:00
params = self.params
if self.sort_order in SORT_ORDERS:
if self.sort_order == 'random':
query += " ORDER BY random ()"
elif self.sort_order == 'title':
query += " ORDER BY filing"
elif self.sort_order == 'alpha':
query += " ORDER BY title"
elif self.sort_order == 'release_date':
query += " ORDER BY release_date DESC, pk DESC"
else:
query += " ORDER BY %s DESC" % (self.sort_order)
if self.start_index > 1:
# opensearch is 1-based, SQL is 0-based
params['offset'] = self.start_index - 1
query += " OFFSET %(offset)s"
if self.items_per_page > -1:
# need one more to know when to display 'next' link
params['limit'] = self.items_per_page + 1
query += " LIMIT %(limit)s"
return query, params
2020-09-03 18:52:31 +00:00
def split(self, field, query):
2019-03-28 13:45:03 +00:00
""" Split multiple-term query for sql consumption. """
terms = []
2020-09-03 18:52:31 +00:00
n = len(self.params)
for i, q in enumerate(query.split()):
q = q.strip('.,:;')
terms.append("%s ~* %%(p%d)s" % (field, n + i))
2019-03-28 13:45:03 +00:00
# self.params['p%d' % (n + i)] = r'\m' + q
self.params['p%d' % (n + i)] = '(^| )' + q
return terms
2020-09-03 18:52:31 +00:00
def split_and_append(self, field, query):
2019-03-28 13:45:03 +00:00
""" Split multiple-term query for sql consumption and append terms to query. """
2020-09-03 18:52:31 +00:00
for term in self.split(field, query):
self.where.append(term)
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
def fulltext(self, field, query, stemmer='english'):
2019-03-28 13:45:03 +00:00
""" Perform fulltext search on query words. """
2020-09-03 18:52:31 +00:00
query = query.strip()
if len(query) == 0:
2019-03-28 13:45:03 +00:00
return
2020-09-03 18:52:31 +00:00
query = self.translate_query(query)
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
self.where.append("%s @@ to_tsquery('%s', %%(p%d)s)" %
(field, stemmer, len(self.params)))
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
self.params['p%d' % len(self.params)] = query
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
class OpenSearch(object):
2019-03-28 13:45:03 +00:00
""" Hold search results and lots of other stuff.
We use this to pass everything we know around and into the
templating engine.
"""
lang_to_default_locale = {
'en': 'en_US',
'de': 'de_DE',
'fr': 'fr_FR',
'es': 'es_ES',
'it': 'it_IT',
'pt': 'pt_BR',
'ru': 'ru_RU',
2020-09-03 18:52:31 +00:00
}
def __init__(self):
self.format = None
self.page = None
self.template = None
self.query = None
self.id = None
self.sort_order = None
self.search_terms = None
self.start_index = 1
2019-03-28 13:45:03 +00:00
self.items_per_page = 1
2020-09-03 18:52:31 +00:00
self.total_results = -1
self.page_mode = 'screen'
self.user_dialog = ('', '')
self.opensearch_support = 0 # 0 = none, 1 = full, 2 = fake(Stanza, Aldiko, ...)
self.books_in_archive = babel.numbers.format_number(
books_in_archive, locale = str(cherrypy.response.i18n.locale))
self.breadcrumbs = [
2019-03-28 13:45:03 +00:00
(_('Project Gutenberg'), _('Go to the Main page.'), '/'),
2020-09-03 18:52:31 +00:00
(__('1 free ebook', '{count} free ebooks', books_in_archive).format(
2019-03-28 13:45:03 +00:00
count = self.books_in_archive), _('Start a new search.'), '/ebooks/'),
2020-09-03 18:52:31 +00:00
]
2019-03-28 13:45:03 +00:00
# default output formatting functions
2020-09-03 18:52:31 +00:00
self.f_format_title = self.format_title
self.f_format_subtitle = self.format_author
self.f_format_extra = self.format_none # depends on sort order, set in fix_sortorder ()
self.f_format_url = self.format_bibrec_url
2019-03-28 13:45:03 +00:00
self.f_format_thumb_url = self.format_thumb_url
2020-09-03 18:52:31 +00:00
self.f_format_icon = self.format_icon # icon class
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
self.user_agent = cherrypy.request.headers.get('User-Agent', '')
2019-03-28 13:45:03 +00:00
cherrypy.request.os = self
s = cherrypy.session
k = cherrypy.request.params
2020-09-03 18:52:31 +00:00
host = cherrypy.request.headers.get('X-Forwarded-Host', cherrypy.config['host'])
self.host = host.split(',')[-1].strip() # keep only the last hub
# turns out X-Forwarded-Protocol (X-Forwarded-Proto is the defacto standaard)
# is not a thing and has to be set in HAProxy
2020-09-03 18:52:31 +00:00
self.protocol = cherrypy.request.headers.get('X-Forwarded-Protocol', 'https')
2019-03-28 13:45:03 +00:00
# sanity check
if self.host not in (cherrypy.config['all_hosts']):
self.host = cherrypy.config['host']
if self.protocol not in VALID_PROTOCOLS:
self.protocol = 'https'
2019-03-28 13:45:03 +00:00
2020-09-15 17:52:22 +00:00
self.urlgen = routes.URLGenerator(cherrypy.routes_mapper, {'HTTP_HOST': self.host})
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
self.set_format(k.get('format'))
2019-03-28 13:45:03 +00:00
# query: this param is set when an actual query is requested
self.query = ''
if 'query' in k:
2020-09-03 18:52:31 +00:00
self.query = SQLStatement.preprocess_query(k['query'])
2019-03-28 13:45:03 +00:00
# search_terms: this is used to carry the last query
# to display in the search input box
2020-09-03 18:52:31 +00:00
self.search_terms = self.query or s.get('search_terms', '')
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
self.sort_order = k.get('sort_order') or s.get('sort_order') or USER_SORT_ORDERS[0]
2019-03-28 13:45:03 +00:00
if self.sort_order not in USER_SORT_ORDERS:
2020-09-03 18:52:31 +00:00
raise cherrypy.HTTPError(400, 'Bad Request. Unknown sort order.')
2019-03-28 13:45:03 +00:00
s['sort_order'] = self.sort_order
try:
2020-09-03 18:52:31 +00:00
self.id = int(k.get('id') or '0')
self.start_index = int(k.get('start_index') or '1')
self.items_per_page = min(100, int(k.get('items_per_page') or '25'))
2019-03-28 13:45:03 +00:00
except ValueError as what:
2020-09-03 18:52:31 +00:00
raise cherrypy.HTTPError(400, 'Bad Request. ' + str(what))
2019-03-28 13:45:03 +00:00
self.file_host = cherrypy.config['file_host']
2020-09-03 18:52:31 +00:00
self.now = datetime.datetime.utcnow().replace(microsecond = 0).isoformat() + 'Z'
2019-03-28 13:45:03 +00:00
self.do_animations = 'Kindle/' not in self.user_agent # no animations on e-ink
self.ip = cherrypy.request.remote.ip
self.type_opds = 'application/atom+xml;profile=opds-catalog'
self.base_url = None
self.canonical_url = None
self.entries = []
# NOTE: For page titles etc.
self.pg = self.title = _('Project Gutenberg')
# NOTE: The tagline at the top of every page.
2020-09-03 18:52:31 +00:00
self.tagline = _('Project Gutenberg offers {count} free ebooks to download.').format(
2019-03-28 13:45:03 +00:00
count = self.books_in_archive)
# NOTE: The site's description in the html meta tags.
self.description = _('Project Gutenberg offers {count} free ebooks for '
2020-09-03 18:52:31 +00:00
'Kindle, iPad, Nook, Android, and iPhone.').format(
2019-03-28 13:45:03 +00:00
count = self.books_in_archive)
# NOTE: The placeholder inside an empty search box.
self.placeholder = _('Search Project Gutenberg.')
# these need to be here because they have to be localized
# NOTE: Msg to user indicating the order of the search results.
self.sorted_msgs = {
2020-09-03 18:52:31 +00:00
'downloads': _("sorted by popularity"),
2019-03-28 13:45:03 +00:00
'release_date': _("sorted by release date"),
2020-09-03 18:52:31 +00:00
'quantity': _("sorted by quantity of books"),
'title': _("sorted alphabetically"),
'alpha': _("sorted alphabetically"),
'nentry': _("sorted by relevance"),
'random': _("in random order"),
2019-03-28 13:45:03 +00:00
}
2020-09-03 18:52:31 +00:00
self.snippet_image_url = self.url('/pics/logo-144x144.png', host=self.file_host)
2019-03-28 13:45:03 +00:00
self.og_type = 'website'
2020-09-03 18:52:31 +00:00
self.class_ = ClassAttr()
2019-03-28 13:45:03 +00:00
self.title_icon = None
self.icon = None
self.sort_orders = []
self.alternate_sort_orders = []
2020-09-03 18:52:31 +00:00
lang = self.lang = s.get('_lang_', 'en_US')
if len(lang) == 2:
lang = self.lang_to_default_locale.get(lang, 'en_US')
2019-03-28 13:45:03 +00:00
lang2 = self.lang[:2]
self.paypal_lang = lang if lang in PAYPAL_LANGS else 'en_US'
self.flattr_lang = lang if lang in FLATTR_LANGS else 'en_US'
2020-09-03 18:52:31 +00:00
lang = lang.replace('_', '-')
2019-03-28 13:45:03 +00:00
self.google_lang = lang if lang in GOOGLE_LANGS else (
lang2 if lang2 in GOOGLE_LANGS else 'en-US')
2020-09-03 18:52:31 +00:00
lang = lang.lower()
2019-03-28 13:45:03 +00:00
self.twitter_lang = lang if lang in TWITTER_LANGS else (
lang2 if lang2 in TWITTER_LANGS else 'en')
2020-09-03 18:52:31 +00:00
self.viewport = "width=device-width" # , initial-scale=1.0"
2020-09-15 17:48:33 +00:00
self.touch_icon = '/gutenberg/apple-icon.png'
2019-03-28 13:45:03 +00:00
self.touch_icon_precomposed = None # not yet used
if 'user_dialog' in s:
self.user_dialog = s['user_dialog']
del s['user_dialog']
2020-09-03 18:52:31 +00:00
msg = k.get('msg')
2019-03-28 13:45:03 +00:00
if msg is not None:
if msg == 'welcome_stranger':
self.user_dialog = (
_("Welcome to Project Gutenberg. "
"You'll find here {count} ebooks completely free of charge.")
2020-09-03 18:52:31 +00:00
.format(count = self.books_in_archive),
2019-03-28 13:45:03 +00:00
_('Welcome'))
2020-09-03 18:52:31 +00:00
def finalize(self):
2019-03-28 13:45:03 +00:00
""" Calculate fields that depend on start_index, items_per_page and total_results.
start_index, etc. must be set before calling this.
"""
# FIXME: android browser crashes on XHR with
# meta name=viewport or link rel=apple-touch-icon
# see:
# http://code.google.com/p/android/issues/detail?id=6593
# http://code.google.com/p/android/issues/detail?id=9261
#
# remove this from all browsers because we are caching responses
if self.start_index > 1:
self.touch_icon = None
self.viewport = None
self.desktop_host = cherrypy.config['host']
2020-09-03 18:52:31 +00:00
last_page = max((self.total_results - 1) // self.items_per_page, 0) # 0-based
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
self.end_index = min(self.start_index + self.items_per_page - 1, self.total_results)
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
self.prev_page_index = max(self.start_index - self.items_per_page, 1)
self.next_page_index = min(self.start_index + self.items_per_page, self.total_results)
2019-03-28 13:45:03 +00:00
self.last_page_index = last_page * self.items_per_page + 1
self.show_prev_page_link = self.start_index > 1
self.show_next_page_link = (self.end_index < self.total_results)
2020-09-03 18:52:31 +00:00
self.desktop_search = self.url('search', format = None)
2019-03-28 13:45:03 +00:00
2020-09-15 17:50:01 +00:00
self.base_url = self.url(host = self.file_host, protocol='https')
2019-03-28 13:45:03 +00:00
# for google, fb etc.
2020-09-03 18:52:31 +00:00
self.canonical_url = self.url_carry(host = self.file_host, format = None)
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
self.desktop_url = self.url_carry(host = self.desktop_host, format = None)
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
self.osd_url = self.qualify('/catalog/osd-books.xml')
2019-03-28 13:45:03 +00:00
s = cherrypy.session
# write this late so pages can change it
s['search_terms'] = self.search_terms
2020-09-03 18:52:31 +00:00
def url(self, *args, **params):
2019-03-28 13:45:03 +00:00
""" Generate url carrying the 'format' parameter from self.
See: http://tools.cherrypy.org/wiki/RoutesUrlGeneration """
# We need to explicitly carry the parameters in the query
# string (eg. those not matched by routes) because routes has
# no memory for those.
#
# Also route memory is not used when generating named routes
2020-09-03 18:52:31 +00:00
# eg. url('search').
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
params.setdefault('format', str(self.format))
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
route_name = args[0] if args else str(cherrypy.request.params['route_name'])
2019-03-28 13:45:03 +00:00
rn = cherrypy.routes_mapper._routenames # pylint: disable=protected-access
if route_name in rn:
route_obj = rn[route_name]
if 'id' in route_obj.minkeys:
2020-09-03 18:52:31 +00:00
params.setdefault('id', str(self.id))
2019-03-28 13:45:03 +00:00
# Eliminate null and superflous params.
2020-09-03 18:52:31 +00:00
for k, v in list(params.items()):
2019-03-28 13:45:03 +00:00
if v is None or (k == 'start_index' and
2020-09-03 18:52:31 +00:00
int(v) < 2) or (k == 'format' and v == 'html'):
2019-03-28 13:45:03 +00:00
del params[k]
2020-09-03 18:52:31 +00:00
return self.urlgen(route_name, **params)
2019-03-28 13:45:03 +00:00
@staticmethod
2020-09-03 18:52:31 +00:00
def params(**kwargs):
2019-03-28 13:45:03 +00:00
""" Get dict of current params with override option. """
2020-09-03 18:52:31 +00:00
d = cherrypy.request.params.copy()
2019-03-28 13:45:03 +00:00
# del d['action']
# del d['controller']
# del d['route_name']
try:
del d['fb_locale']
except KeyError:
pass
2020-09-03 18:52:31 +00:00
d.update(kwargs)
2019-03-28 13:45:03 +00:00
return d
2020-09-03 18:52:31 +00:00
def url_carry(self, *args, **params):
2019-03-28 13:45:03 +00:00
""" Generate url carrying most params from self. """
2020-09-03 18:52:31 +00:00
return self.url(*args, **self.params(**params))
2019-03-28 13:45:03 +00:00
@staticmethod
2020-09-03 18:52:31 +00:00
def add_amp(url):
2019-03-28 13:45:03 +00:00
""" Add ? or & to url. """
if '?' in url:
return url + '&'
return url + '?'
2020-09-03 18:52:31 +00:00
def qualify(self, url):
2019-03-28 13:45:03 +00:00
""" Append host part. """
2020-09-03 18:52:31 +00:00
return urllib.parse.urljoin(self.base_url, url)
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
def set_format(self, format_):
2019-03-28 13:45:03 +00:00
""" Sanity check and set the parameter we got from the user.
Calc format and mediatype to send to the client. """
if format_ and format_ not in USER_FORMATS:
2020-09-03 18:52:31 +00:00
raise cherrypy.HTTPError(400, 'Bad Request. Unknown format.')
2019-03-28 13:45:03 +00:00
# fold print into html
if format_ == 'print':
format_ = 'html'
self.page_mode = 'print'
# user explicitly requested format
if format_:
2020-09-03 17:57:11 +00:00
self.format = 'html' if format_ == 'mobile' else format_
2019-03-28 13:45:03 +00:00
self.mediatype = mt[format_]
self.opensearch_support = 1 if format_ == 'opds' else 2
return
# no specific format requested
ua = self.user_agent
2020-09-03 18:04:42 +00:00
format_ = 'html'
mediatype = 'text/html'
opensearch_support = 0
2019-03-28 13:45:03 +00:00
# user accessed the mobile site
# known OPDS consumers
# 'stanza' is the older opds-ish format supported by stanza et al.
if ua:
2020-09-03 18:52:31 +00:00
if ua.startswith('Stanza/'):
2019-03-28 13:45:03 +00:00
# Stanza/2.1.1 iPhone OS/3.1.3/iPod touch catalog/2.1.1
# Stanza/3.0 iPhone OS/3.1.3/iPod touch catalog/3.0
format_ = 'stanza'
mediatype = mt.opds
opensearch_support = 2
2020-09-03 18:52:31 +00:00
elif ua.startswith('FBReader/'):
2019-03-28 13:45:03 +00:00
# FBReader/0.6.6(java)
format_ = 'opds'
mediatype = mt.opds
opensearch_support = 1
elif 'Aldiko/' in ua:
format_ = 'opds'
mediatype = mt.opds
opensearch_support = 2
2020-09-03 18:52:31 +00:00
elif ua.startswith('Ibis-Reader/'):
2019-03-28 13:45:03 +00:00
# Ibis-Reader/0.1
format_ = 'opds'
mediatype = mt.opds
opensearch_support = 1
2020-09-03 18:52:31 +00:00
elif ua.startswith('ouiivo'):
2019-03-28 13:45:03 +00:00
# ouiivo
format_ = 'opds'
mediatype = mt.opds
opensearch_support = 1
2020-09-03 18:52:31 +00:00
elif (ua.startswith('QuickR') or
ua.startswith('Young Reader') or
ua.startswith('MegaRead') or
ua.startswith('eBook Search')):
2019-03-28 13:45:03 +00:00
# MegaReadLite 1.0 (iPhone Simulator; iPhone OS 4.2; en_US)
# QuickReader 2.1.0 (iPhone; iPhone OS 3.1.3; en_US)
# QuickRdrLite 3.0.1 (iPhone Simulator; iPhone OS 4.2; en_US)
# eBook Search1.0(iPhone Simulator; iPhone OS 4.2; en_US)
format_ = 'opds'
mediatype = mt.opds
opensearch_support = 1
2020-09-03 18:52:31 +00:00
elif ua.startswith('CoolReader/'):
2019-03-28 13:45:03 +00:00
# CoolReader/3(Android)
format_ = 'opds'
mediatype = mt.opds
opensearch_support = 1
elif 'Freda' in ua:
format_ = 'opds'
mediatype = mt.opds
opensearch_support = 1
elif 'Duokan' in ua:
format_ = 'opds'
mediatype = mt.opds
opensearch_support = 1
self.format = format_
self.mediatype = mediatype
self.opensearch_support = opensearch_support
2020-09-03 18:52:31 +00:00
def log_request(self, page):
2019-03-28 13:45:03 +00:00
""" Log the request params. Now a dummy. """
pass
2020-09-03 18:52:31 +00:00
def fix_sortorder(self):
2019-03-28 13:45:03 +00:00
""" Check selected sort order against available sort orders. """
if self.sort_orders:
if not self.sort_order or self.sort_order not in self.sort_orders:
self.sort_order = self.sort_orders [0]
self.alternate_sort_orders = [x for x in self.sort_orders
if x != self.sort_order]
self.sorted_by = self.sorted_msgs [self.sort_order]
self.title += " (%s)" % self.sorted_by
# content of extra field depends on sorting
self.f_format_extra = {
'alpha': self.format_none,
'title': self.format_none,
'downloads': self.format_downloads,
'quantity': self.format_quantity,
'release_date': self.format_release_date,
'random': self.format_none,
}[self.sort_order]
if self.sort_order == 'title':
self.f_format_title = self.format_title_filing
@staticmethod
2020-09-03 18:52:31 +00:00
def format_title(row):
2019-03-28 13:45:03 +00:00
""" Format a book title for display in results. """
2020-09-03 18:52:31 +00:00
title = gg.cut_at_newline(row.get('title') or 'No Title')
for lang_id in row.get('fk_langs') or []:
2019-03-28 13:45:03 +00:00
if lang_id != 'en':
2020-09-03 18:52:31 +00:00
title += " (%s)" % cherrypy.response.i18n.locale.languages.get(lang_id, lang_id)
2019-03-28 13:45:03 +00:00
return title
@staticmethod
2020-09-03 18:52:31 +00:00
def format_title_filing(row):
2019-03-28 13:45:03 +00:00
""" Format a book title for display in results. """
2020-09-03 18:52:31 +00:00
title = gg.cut_at_newline(row.get('filing') or 'No Title')
for lang_id in row.get('fk_langs') or []:
2019-03-28 13:45:03 +00:00
if lang_id != 'en':
2020-09-03 18:52:31 +00:00
title += " (%s)" % cherrypy.response.i18n.locale.languages.get(lang_id, lang_id)
2019-03-28 13:45:03 +00:00
return title
@staticmethod
2020-09-03 18:52:31 +00:00
def format_author(row):
2019-03-28 13:45:03 +00:00
""" Format an author name for display in results. """
2020-09-03 18:52:31 +00:00
authors = row.get('author')
2019-03-28 13:45:03 +00:00
if authors is None:
return None
2020-09-03 18:52:31 +00:00
authors = [ DublinCore.DublinCore.make_pretty_name(a) for a in authors ]
return DublinCore.DublinCore.strunk(authors)
2019-03-28 13:45:03 +00:00
@staticmethod
2020-09-03 18:52:31 +00:00
def format_language(row):
2019-03-28 13:45:03 +00:00
""" Format a language name for display in results. """
if row.pk in cherrypy.response.i18n.locale.languages:
return cherrypy.response.i18n.locale.languages[row.pk]
return row.title
@staticmethod
2020-09-03 18:52:31 +00:00
def format_none(dummy_row):
2019-03-28 13:45:03 +00:00
""" Output nothing on results. """
return None
@staticmethod
2020-09-03 18:52:31 +00:00
def format_subtitle(row):
2019-03-28 13:45:03 +00:00
""" Format a book subtitle for display in results. """
2020-09-03 18:52:31 +00:00
return row.get('subtitle')
2019-03-28 13:45:03 +00:00
@staticmethod
2020-09-03 18:52:31 +00:00
def format_downloads(row):
2019-03-28 13:45:03 +00:00
""" Format the no. of download for display in results. """
2020-09-03 18:52:31 +00:00
downloads = int(row.get('downloads', 0))
2019-03-28 13:45:03 +00:00
# NOTE: No. of times a book was downloaded
2020-09-03 18:52:31 +00:00
return __('1 download', '{0} downloads', downloads).format(downloads)
2019-03-28 13:45:03 +00:00
@staticmethod
2020-09-03 18:52:31 +00:00
def format_quantity(row):
2019-03-28 13:45:03 +00:00
""" Format the quantity of books for display in results. """
2020-09-03 18:52:31 +00:00
count = int(row.get('quantity', 0))
2019-03-28 13:45:03 +00:00
# NOTE: No. of books by some author, on a subject, etc.
2020-09-03 18:52:31 +00:00
return __('1 book', '{0} books', count).format(count)
2019-03-28 13:45:03 +00:00
@staticmethod
2020-09-03 18:52:31 +00:00
def format_release_date(row):
2019-03-28 13:45:03 +00:00
""" Format the release date for display in results. """
2020-09-03 18:52:31 +00:00
return babel.dates.format_date(row.get('release_date'),
locale = str(cherrypy.response.i18n.locale))
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
def format_suggestion(self, row):
2019-03-28 13:45:03 +00:00
""" Format a suggestion for display in results. """
2020-09-03 18:52:31 +00:00
query = ' '.join(self.query.split()[0:-1])
2019-03-28 13:45:03 +00:00
if query:
query += ' '
2020-09-03 18:52:31 +00:00
return query + gg.cut_at_newline(row.get('title') or '')
2019-03-28 13:45:03 +00:00
@staticmethod
2020-09-03 18:52:31 +00:00
def format_no_url(dummy_row):
2019-03-28 13:45:03 +00:00
""" Show no url in results. """
return None
2020-09-03 18:52:31 +00:00
def format_bibrec_url(self, row):
2019-03-28 13:45:03 +00:00
""" Generate a bibrec url """
2020-09-03 18:52:31 +00:00
return self.url('bibrec', id = row.pk)
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
def format_canonical_bibrec_url(self, row):
2019-03-28 13:45:03 +00:00
""" Generate the rel=canonical bibrec url for a book. """
2020-09-03 18:52:31 +00:00
return self.url('bibrec', host=self.file_host, protocol='https', id=row.pk, format=None)
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
def format_thumb_url(self, row):
2019-03-28 13:45:03 +00:00
""" Generate the thumb url in results. """
if row.coverpages:
2019-09-10 19:31:09 +00:00
return '/' + row.coverpages[0]
2019-03-28 13:45:03 +00:00
return None
2020-09-03 18:52:31 +00:00
def format_icon(self, dummy_row):
2019-03-28 13:45:03 +00:00
""" Show a book icon in results. """
return self.icon
2020-09-03 18:52:31 +00:00
def format_icon_titles(self, row):
2019-03-28 13:45:03 +00:00
""" Show a book icon or audio icon in results. """
# for 'title' listings, replace book icon with audio icon
2020-09-03 18:52:31 +00:00
if row.fk_categories and AUDIOBOOK_CATEGORIES.intersection(row.fk_categories):
2019-03-28 13:45:03 +00:00
return 'audiobook'
return self.icon
2020-09-03 18:52:31 +00:00
def sql_get(query, **params):
2019-03-28 13:45:03 +00:00
""" Quick and dirty SQL query returning one value. """
2020-09-03 18:52:31 +00:00
conn = cherrypy.engine.pool.connect()
2019-03-28 13:45:03 +00:00
try:
2020-09-03 18:52:31 +00:00
c = conn.cursor()
c.execute(query, params)
row = c.fetchone()
2019-03-28 13:45:03 +00:00
if row:
return row[0]
return None
except DatabaseError as what:
2020-09-03 18:52:31 +00:00
cherrypy.log("SQL Error: %s\n" % what,
2019-03-28 13:45:03 +00:00
context = 'REQUEST', severity = logging.ERROR)
2020-09-03 18:52:31 +00:00
cherrypy.log("Query was: %s\n" % c.mogrify(query, params),
2019-03-28 13:45:03 +00:00
context = 'REQUEST', severity = logging.ERROR)
2020-09-03 18:52:31 +00:00
conn.detach()
2019-03-28 13:45:03 +00:00
raise
2020-09-03 18:52:31 +00:00
class SQLSearcher(object):
2019-03-28 13:45:03 +00:00
""" An SQL searcher. """
2020-09-03 18:52:31 +00:00
def search(self, os, sql):
2019-03-28 13:45:03 +00:00
"""
Perform the SQL query and format rows into `Cat´s .
Use plugin functions to format rows.
"""
sql.sort_order = os.sort_order
sql.start_index = os.start_index
sql.items_per_page = os.items_per_page
2020-09-03 18:52:31 +00:00
query, params = sql.build()
2019-03-28 13:45:03 +00:00
query += ' -- ' + os.ip
2020-09-03 18:52:31 +00:00
rows = self.execute(query, params)
2019-03-28 13:45:03 +00:00
# this is not necessarily the size of the result set.
# if the result set is bigger than this page can show
# total_results will be last item on page + 1
2020-09-03 18:52:31 +00:00
os.total_results = min(os.start_index - 1 + len(rows), MAX_RESULTS)
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
for i in range(0, min(len(rows), os.items_per_page)):
2019-03-28 13:45:03 +00:00
row = rows[i]
2020-09-03 18:52:31 +00:00
cat = Cat()
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
cat.title = os.f_format_title(row)
cat.subtitle = os.f_format_subtitle(row)
cat.extra = os.f_format_extra(row)
cat.url = os.f_format_url(row)
cat.thumb_url = os.f_format_thumb_url(row)
cat.icon = os.f_format_icon(row)
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
cat.header = row.get('header', '')
2019-03-28 13:45:03 +00:00
cat.class_ += os.class_
cat.order = 10
2020-09-03 18:52:31 +00:00
os.entries.append(cat)
2019-03-28 13:45:03 +00:00
return os
@staticmethod
2020-09-03 18:52:31 +00:00
def mogrify(dummy_os, sql):
2019-03-28 13:45:03 +00:00
""" Format a query and return it as string without executing it. """
2020-09-03 18:52:31 +00:00
conn = cherrypy.engine.pool.connect()
c = conn.cursor()
query, params = sql.build()
return c.mogrify(query, params).decode('utf-8')
2019-03-28 13:45:03 +00:00
@staticmethod
2020-09-03 18:52:31 +00:00
def execute(query, params):
2019-03-28 13:45:03 +00:00
""" Execute a query and return an array of rows. """
2020-09-03 18:52:31 +00:00
conn = cherrypy.engine.pool.connect()
2019-03-28 13:45:03 +00:00
try:
2020-09-03 18:52:31 +00:00
c = conn.cursor()
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
#cherrypy.log("SQL Query: %s\n" % c.mogrify (query, params),
2019-03-28 13:45:03 +00:00
# context = 'REQUEST', severity = logging.ERROR)
2020-09-03 18:52:31 +00:00
c.execute(query, params)
2019-03-28 13:45:03 +00:00
2020-09-03 18:52:31 +00:00
return [xl(c, row) for row in c.fetchall()]
2019-03-28 13:45:03 +00:00
except DatabaseError as what:
2020-09-03 18:52:31 +00:00
cherrypy.log("SQL Error: %s\n" % what,
2019-03-28 13:45:03 +00:00
context = 'REQUEST', severity = logging.ERROR)
2020-09-03 18:52:31 +00:00
cherrypy.log("Query was: %s\n" % c.mogrify(query, params),
2019-03-28 13:45:03 +00:00
context = 'REQUEST', severity = logging.ERROR)
2020-09-03 18:52:31 +00:00
conn.detach()
2019-03-28 13:45:03 +00:00
raise