autocat3/AdvSearchPage.py

#!/usr/bin/env python
#  -*- mode: python; indent-tabs-mode: nil; -*- coding: utf-8 -*-

"""
AdvSearchPage.py

Copyright 2021 by Project Gutenberg

Distributable under the GNU General Public License Version 3 or newer.

Not really "advanced", it reproduces functionality of the old results.php search,
labelled as "Advanced Search", using SQLAlchemy ORM

Differences:
- instead of a link for a new search, a pre-filled advanced search form is shown contextually
- Following our Bibrecord pages, BC -> BCE in dates
- The language selector invokes some language-localization
- Authors are now all in a <ul>
- the first page of results is pageno=1, not 0
- cataloguer mode to list for missing subjects and loccs no longer supported 
    (no authentication in autocat3!)


"""
import cherrypy
import routes

from sqlalchemy import or_, and_, select
from genshi.filters import HTMLFormFiller

from libgutenberg.Models import (
    Alias, Attribute, Author, Book, BookAuthor, Category, File, Lang, Locc, Subject)

import BaseSearcher
from errors import ErrorPage
from Page import Page
from Formatters import formatters


config = cherrypy.config

BROWSE_KEYS = {'lang': 'languages', 'locc': 'loccs', 'category': 'categories'}
PAGESIZE = 100
MAX_RESULTS = 1000

_langs = {}
def langname(langcode):
    """ cache of Language names"""
    if not _langs:
        session = cherrypy.engine.pool.Session()
        for lang in session.query(Lang).all():
            _langs[lang.id] = lang.language
    return _langs.get(langcode, langcode)

_cats = {}
def catname(catpk):
    """ cache of category names"""
    if not _cats:
        session = cherrypy.engine.pool.Session()
        for cat in session.query(Category).all():
            _cats[cat.pk] = cat.category
    return _cats.get(catpk, 'Not a valid Category')


class AdvSearcher(BaseSearcher.OpenSearch):
    """ this object passes the context for the page renderer """
    def __init__(self):
        super().__init__()
        self.items_per_page = PAGESIZE

    def url(self, *args, **params):
        params = BaseSearcher.OpenSearch.params(**params)
        return super(AdvSearcher,self).url('results', *args, **params)

    def finalize(self):
        super().finalize()
        self.lastpage = int(self.total_results / PAGESIZE) + 1
        self.nextpage = self.pageno + 1 if self.pageno + 1 <= self.lastpage else 0
        self.prevpage = self.pageno - 1 if self.pageno > 1 <= self.lastpage else 0


class AdvSearchPage(Page):
    """ search term => list of items """
    def __init__(self):
        super().__init__()
        self.host = cherrypy.config['host']
        self.urlgen = routes.URLGenerator(cherrypy.routes_mapper, {'HTTP_HOST': self.host})
        self.formatter = formatters['html']

    def index (self, **kwargs):
        def entries(results, offset):
            """ results is a list of book ids, sorted by first Author,
            the query lazily returns book objects
            """
            query = session.query(Book).join(
                Book.authors.and_(BookAuthor.heading == 1)).join(BookAuthor.author).filter(
                Book.pk.in_(results)).order_by(Author.name).offset(offset).limit(PAGESIZE)

            for book in query:
                yield book


        os = AdvSearcher()
        params = cherrypy.request.params.copy()
        try:
            pageno = abs(int(params.pop("pageno", 1)))
        except KeyError:
            pageno = 1
        os.pageno = pageno
        for key in ["submit_search", "route_name", "controller", "action"]:
            params.pop(key, None)
        terms = [key for key in params if params[key]]

        # Return a search result page.

        # no terms provided
        if len(terms) == 0:
            os.total_results = 0
            os.finalize()
            return self.formatter.render('advresults', os)

        # single term, redirect if browsable
        if len(terms) == 1:
            browse_key = BROWSE_KEYS.get(terms[0], None)
            if browse_key:
                raise cherrypy.HTTPRedirect(
                    "/browse/%s/%s" % (browse_key, params[terms[0]].lower()))

        # multiple terms, create a query
        session = cherrypy.engine.pool.Session()
        query = session.query(Book.pk)
        selections = []
        resultpks = None
        searchterms = []
        for key in terms:
            if key in ['author', 'title', 'subject']:
                for word in params[key].split():
                    searchterms.append((key, word))
            else:
                searchterms.append((key, params[key]))

        for key, val in searchterms:
            if key == 'filetype':
                pks = query.join(File).filter(File.fk_filetypes == val).all()
                key = 'Filetype'

            elif key == 'lang':
                pks = query.join(Book.langs).filter(Lang.id == val).all()
                val = langname(val)
                key = 'Language'

            elif key == 'locc':
                pks = query.join(Book.loccs).filter(Locc.id == val).all()
                val = val.upper()
                key = 'LoC Class'

            elif key == 'category':
                try:
                    val = int(val)
                except ValueError:
                    continue
                pks = query.join(Book.categories).filter(Category.pk == val).all()
                val = catname(val)
                key = 'Category'

            elif key == 'author':
                word = "%{}%".format(val)
                subq = select(Author.id).join(Author.aliases).filter(
                    Alias.alias.ilike(word))
                pks = query.join(Book.authors).join(BookAuthor.author).filter(or_(
                    Author.name.ilike(word),
                    Author.id.in_(subq),
                )).all()
                key = 'Author'

            elif key == 'title':
                word = "%{}%".format(val)
                pks = query.join(Book.attributes).filter(and_(
                    Attribute.fk_attriblist.in_([240, 245, 246, 505]),
                    Attribute.text.ilike(word),
                )).all()
                key = 'Title'

            elif key == 'subject':
                word = "%{}%".format(val)
                pks = query.join(Book.subjects).filter(                    
                    Subject.subject.ilike(word),
                ).all()
                key = 'Subject'

            pks = {row[0] for row in pks}
            resultpks = resultpks.intersection(pks) if resultpks is not None else pks
            num_rows = len(pks)
            selections.append((key, val, num_rows))

        os.total_results = len(resultpks)
        os.finalize()
        offset = PAGESIZE * (pageno - 1)
        os.start_index = offset + 1
        if os.total_results > MAX_RESULTS:
            os.entries = []
        else:
            os.entries = entries(resultpks, offset)
        os.search_terms = selections
        instance_filter = HTMLFormFiller(data=params)
        rendered = self.formatter.render('advresults', os, instance_filter)
        session.close()
        return rendered
add advanced search page 2021-04-14 21:58:31 +00:00			`#!/usr/bin/env python`
			`# -- mode: python; indent-tabs-mode: nil; -- coding: utf-8 -*-`

			`"""`
			`AdvSearchPage.py`

			`Copyright 2021 by Project Gutenberg`

			`Distributable under the GNU General Public License Version 3 or newer.`

			`Not really "advanced", it reproduces functionality of the old results.php search,`
			`labelled as "Advanced Search", using SQLAlchemy ORM`

			`Differences:`
			`- instead of a link for a new search, a pre-filled advanced search form is shown contextually`
			`- Following our Bibrecord pages, BC -> BCE in dates`
			`- The language selector invokes some language-localization`
			`- Authors are now all in a <ul>`
			`- the first page of results is pageno=1, not 0`
add note on cataloguer mode 2021-04-17 23:03:01 +00:00			`- cataloguer mode to list for missing subjects and loccs no longer supported`
			`(no authentication in autocat3!)`
add advanced search page 2021-04-14 21:58:31 +00:00

			`"""`
			`import cherrypy`
			`import routes`

quell coercion warning on author search 2021-04-17 23:03:40 +00:00			`from sqlalchemy import or_, and_, select`
use htmlfiller filter 2021-04-27 15:59:55 +00:00			`from genshi.filters import HTMLFormFiller`
add advanced search page 2021-04-14 21:58:31 +00:00
			`from libgutenberg.Models import (`
add subject searching 2021-04-17 22:33:14 +00:00			`Alias, Attribute, Author, Book, BookAuthor, Category, File, Lang, Locc, Subject)`
add advanced search page 2021-04-14 21:58:31 +00:00
			`import BaseSearcher`
			`from errors import ErrorPage`
			`from Page import Page`
			`from Formatters import formatters`


			`config = cherrypy.config`

			`BROWSE_KEYS = {'lang': 'languages', 'locc': 'loccs', 'category': 'categories'}`
			`PAGESIZE = 100`
			`MAX_RESULTS = 1000`

			`_langs = {}`
			`def langname(langcode):`
			`""" cache of Language names"""`
			`if not _langs:`
			`session = cherrypy.engine.pool.Session()`
			`for lang in session.query(Lang).all():`
			`_langs[lang.id] = lang.language`
			`return _langs.get(langcode, langcode)`

			`_cats = {}`
			`def catname(catpk):`
			`""" cache of category names"""`
			`if not _cats:`
			`session = cherrypy.engine.pool.Session()`
			`for cat in session.query(Category).all():`
			`_cats[cat.pk] = cat.category`
			`return _cats.get(catpk, 'Not a valid Category')`


			`class AdvSearcher(BaseSearcher.OpenSearch):`
			`""" this object passes the context for the page renderer """`
			`def __init__(self):`
			`super().__init__()`
			`self.items_per_page = PAGESIZE`

			`def url(self, args, *params):`
			`params = BaseSearcher.OpenSearch.params(**params)`
			`return super(AdvSearcher,self).url('results', args, *params)`

			`def finalize(self):`
			`super().finalize()`
			`self.lastpage = int(self.total_results / PAGESIZE) + 1`
			`self.nextpage = self.pageno + 1 if self.pageno + 1 <= self.lastpage else 0`
			`self.prevpage = self.pageno - 1 if self.pageno > 1 <= self.lastpage else 0`


			`class AdvSearchPage(Page):`
			`""" search term => list of items """`
			`def __init__(self):`
			`super().__init__()`
			`self.host = cherrypy.config['host']`
			`self.urlgen = routes.URLGenerator(cherrypy.routes_mapper, {'HTTP_HOST': self.host})`
			`self.formatter = formatters['html']`

			`def index (self, **kwargs):`
			`def entries(results, offset):`
			`""" results is a list of book ids, sorted by first Author,`
			`the query lazily returns book objects`
			`"""`
			`query = session.query(Book).join(`
			`Book.authors.and_(BookAuthor.heading == 1)).join(BookAuthor.author).filter(`
			`Book.pk.in_(results)).order_by(Author.name).offset(offset).limit(PAGESIZE)`

			`for book in query:`
			`yield book`


			`os = AdvSearcher()`
			`params = cherrypy.request.params.copy()`
			`try:`
			`pageno = abs(int(params.pop("pageno", 1)))`
			`except KeyError:`
			`pageno = 1`
			`os.pageno = pageno`
			`for key in ["submit_search", "route_name", "controller", "action"]:`
			`params.pop(key, None)`
			`terms = [key for key in params if params[key]]`

			`# Return a search result page.`

			`# no terms provided`
			`if len(terms) == 0:`
			`os.total_results = 0`
			`os.finalize()`
			`return self.formatter.render('advresults', os)`

			`# single term, redirect if browsable`
			`if len(terms) == 1:`
			`browse_key = BROWSE_KEYS.get(terms[0], None)`
			`if browse_key:`
			`raise cherrypy.HTTPRedirect(`
			`"/browse/%s/%s" % (browse_key, params[terms[0]].lower()))`

			`# multiple terms, create a query`
			`session = cherrypy.engine.pool.Session()`
			`query = session.query(Book.pk)`
			`selections = []`
			`resultpks = None`
			`searchterms = []`
			`for key in terms:`
add subject searching 2021-04-17 22:33:14 +00:00			`if key in ['author', 'title', 'subject']:`
add advanced search page 2021-04-14 21:58:31 +00:00			`for word in params[key].split():`
			`searchterms.append((key, word))`
			`else:`
			`searchterms.append((key, params[key]))`

			`for key, val in searchterms:`
			`if key == 'filetype':`
			`pks = query.join(File).filter(File.fk_filetypes == val).all()`
			`key = 'Filetype'`

			`elif key == 'lang':`
			`pks = query.join(Book.langs).filter(Lang.id == val).all()`
			`val = langname(val)`
			`key = 'Language'`

			`elif key == 'locc':`
			`pks = query.join(Book.loccs).filter(Locc.id == val).all()`
			`val = val.upper()`
			`key = 'LoC Class'`

			`elif key == 'category':`
			`try:`
			`val = int(val)`
			`except ValueError:`
			`continue`
			`pks = query.join(Book.categories).filter(Category.pk == val).all()`
			`val = catname(val)`
			`key = 'Category'`

			`elif key == 'author':`
			`word = "%{}%".format(val)`
quell coercion warning on author search 2021-04-17 23:03:40 +00:00			`subq = select(Author.id).join(Author.aliases).filter(`
			`Alias.alias.ilike(word))`
add advanced search page 2021-04-14 21:58:31 +00:00			`pks = query.join(Book.authors).join(BookAuthor.author).filter(or_(`
			`Author.name.ilike(word),`
			`Author.id.in_(subq),`
			`)).all()`
			`key = 'Author'`

			`elif key == 'title':`
			`word = "%{}%".format(val)`
			`pks = query.join(Book.attributes).filter(and_(`
			`Attribute.fk_attriblist.in_([240, 245, 246, 505]),`
			`Attribute.text.ilike(word),`
			`)).all()`
			`key = 'Title'`

add subject searching 2021-04-17 22:33:14 +00:00			`elif key == 'subject':`
			`word = "%{}%".format(val)`
			`pks = query.join(Book.subjects).filter(`
			`Subject.subject.ilike(word),`
			`).all()`
			`key = 'Subject'`

add advanced search page 2021-04-14 21:58:31 +00:00			`pks = {row[0] for row in pks}`
			`resultpks = resultpks.intersection(pks) if resultpks is not None else pks`
			`num_rows = len(pks)`
			`selections.append((key, val, num_rows))`

			`os.total_results = len(resultpks)`
			`os.finalize()`
			`offset = PAGESIZE * (pageno - 1)`
			`os.start_index = offset + 1`
			`if os.total_results > MAX_RESULTS:`
			`os.entries = []`
			`else:`
			`os.entries = entries(resultpks, offset)`
			`os.search_terms = selections`
use htmlfiller filter 2021-04-27 15:59:55 +00:00			`instance_filter = HTMLFormFiller(data=params)`
			`rendered = self.formatter.render('advresults', os, instance_filter)`
explicitly close sessions 2021-04-21 21:09:32 +00:00			`session.close()`
			`return rendered`