autocat3/CloudStorage.py

316 lines
9.4 KiB
Python
Raw Normal View History

2019-03-28 13:45:03 +00:00
#!/usr/bin/env python
# -*- mode: python; indent-tabs-mode: nil; -*- coding: utf-8 -*-
"""
CloudStorage.py
Copyright 2013-15 by Marcello Perathoner
Distributable under the GNU General Public License Version 3 or newer.
Base classes for uploads to file hosting services.
"""
from __future__ import unicode_literals
from contextlib import closing
from six.moves import urllib
import logging
import re
import os
import cherrypy
import routes
import requests
import requests_oauthlib
from requests import RequestException
from oauthlib.oauth2.rfc6749.errors import OAuth2Error
import BaseSearcher
# pylint: disable=R0921
http_adapter = requests.adapters.HTTPAdapter ()
https_adapter = requests.adapters.HTTPAdapter ()
# Google Drive `bug´ see:
# https://github.com/idan/oauthlib/commit/ca4811b3087f9d34754d3debf839e247593b8a39
os.environ['OAUTHLIB_RELAX_TOKEN_SCOPE'] = '1'
config = cherrypy.config
urlgen = routes.URLGenerator (cherrypy.routes_mapper, {
'HTTP_HOST': config['file_host'],
'HTTPS': config['host_https']
})
2019-03-28 13:45:03 +00:00
def log (msg):
""" Log an informational message. """
cherrypy.log (msg, context = 'CLOUDSTORAGE', severity = logging.INFO)
def error_log (msg):
""" Log an error message. """
cherrypy.log ('Error: ' + msg, context = 'CLOUDSTORAGE', severity = logging.ERROR)
class CloudOAuth2Session (requests_oauthlib.OAuth2Session): # pylint: disable=R0904
""" An OAuth2 session. """
name_prefix = None
oauth2_auth_endpoint = None
oauth2_token_endpoint = None
oauth2_scope = None
def __init__ (self, **kwargs):
""" Initialize session from cherrypy config. """
prefix = self.name_prefix
client_id = config[prefix + '_client_id']
redirect_uri = urlgen (prefix + '_callback', host = config['file_host'])
2019-03-28 13:45:03 +00:00
super (CloudOAuth2Session, self).__init__ (
client_id = client_id,
scope = self.oauth2_scope,
redirect_uri = redirect_uri,
**kwargs
)
self.client_secret = config[prefix + '_client_secret']
self.ebook = None
self.mount ("http://", http_adapter)
self.mount ("https://", https_adapter)
def oauth_dance (self, kwargs):
""" Do the OAuth2 dance. """
#
# OAuth 2.0 flow see:
# http://tools.ietf.org/html/rfc6749
#
if not self.token:
if 'code' not in kwargs:
# oauth step 1:
# redirect the user to the Authorization Endpoint
log ('Building auth url ...')
auth_url, dummy_state = self.authorization_url (
self.oauth2_auth_endpoint)
log ('Redirecting user to auth endpoint ...')
raise cherrypy.HTTPRedirect (auth_url)
else:
# oauth step 2
# the user's browser just came back with an authorization code
# get the access_token from the Token Endpoint
log ('Fetching access token ...')
self.fetch_token (self.oauth2_token_endpoint,
client_secret = self.client_secret,
code = kwargs['code'])
log ('Got access token.')
def unauthorized (self, msg = 'Unauthorized'):
""" Called on OAuth2 failure. """
pass
class CloudStorage (object):
""" Base class for uploads to cloud storage providers.
:param name: The name of the cloud service, eg. 'Dropbox'.
:param session_class: The class to use for the oauth session.
:param user_agent: The user agent to make requests to www.gutenberg.org.
"""
name = None
session_class = CloudOAuth2Session
user_agent = None
upload_endpoint = None
2019-04-24 17:38:14 +00:00
re_filename = re.compile (r'[/\<>:"|?* ]')
2019-03-28 13:45:03 +00:00
def __init__ (self):
self.host = cherrypy.config['host']
self.urlgen = urlgen
2019-03-28 13:45:03 +00:00
def index (self, **kwargs):
""" Output the page. """
#
# OAuth 2.0 flow see:
# http://tools.ietf.org/html/rfc6749
#
session = self.get_or_create_session ()
if 'id' in kwargs:
session.ebook = EbookMetaData (kwargs)
if session.ebook is None:
raise cherrypy.HTTPError (400, "No ebook selected. Are your cookies enabled?")
name = self.name
if 'not_approved' in kwargs or 'error' in kwargs:
self._dialog (
_('Sorry. The file could not be sent to {name}.').format (name = name),
_('Error'))
self.redirect_done (session)
try:
session.oauth_dance (kwargs)
log ("Sending file %s to %s" % (
session.ebook.get_source_url (), name))
with closing (self.request_ebook (session)) as r:
r.raise_for_status ()
self.upload_file (session, r)
log ("File %s sent to %s" % (
session.ebook.get_source_url (), name))
self._dialog (
_('The file has been sent to {name}.').format (name = name),
_('Sent to {name}').format (name = name))
self.redirect_done (session)
except (OAuth2Error, ) as what:
session.unauthorized (what)
self.unauthorized ('OAuthError: ' + str (what.urlencoded))
except (RequestException, IOError, ValueError) as what:
session.unauthorized (what)
self.unauthorized ('RequestError: ' + str (what))
raise cherrypy.HTTPError (500, str (what))
def upload_file (self, oauth_session, response):
""" Upload the file. """
raise NotImplementedError
def get_or_create_session (self):
""" Retrieve an ongoing cloud session or create a new one. """
session_name = self.session_class.name_prefix + '_session'
session = cherrypy.session.get (session_name, self.session_class ())
cherrypy.session[session_name] = session
return session
def delete_session (self):
""" Delete cloud session. """
session_name = self.session_class.name_prefix + '_session'
# cherrypy.session[session_name].close ()
del cherrypy.session[session_name]
def request_ebook (self, session):
""" Return an open request object for the ebook file. """
url = session.ebook.get_source_url ()
# Caveat: use requests.get, not session.get, because it is an insecure
# transport. session.get would raise InsecureTransportError
2019-06-07 15:42:47 +00:00
# turn off server encoding since we're going to re-stream the bytes
2019-03-28 13:45:03 +00:00
return requests.get (
2019-06-07 15:42:47 +00:00
url,
headers = {'user-agent': self.user_agent, 'accept-encoding': ''},
stream = True
)
2019-03-28 13:45:03 +00:00
def fix_filename (self, filename):
""" Replace characters unsupported by many OSs. """
return self.re_filename.sub ('_', filename)
def redirect_done (self, session):
""" Redirect user back to bibrec page. """
raise cherrypy.HTTPRedirect (self.urlgen (
'bibrec', id = session.ebook.id, host = self.host))
def unauthorized (self, msg = 'Unauthorized'):
""" Call on OAuth failure. """
msg = str (msg) # msg may be exception class
error_log (msg)
self.delete_session ()
raise cherrypy.HTTPError (401, msg)
@staticmethod
def _dialog (message, title):
""" Open a user-visible dialog on the next page. """
cherrypy.session['user_dialog'] = (message, title)
class EbookMetaData (object):
""" Helper class that holds ebook metadata. """
accepted_filetypes = (
'epub.images',
'epub.noimages',
'kindle.images',
'kindle.noimages',
'pdf')
def __init__ (self, kwargs):
self.id = None
self.filetype = None
try :
self.id = int (kwargs['id'])
self.filetype = kwargs['filetype']
if self.filetype not in self.accepted_filetypes:
self.filetype = None
raise ValueError
except (KeyError, ValueError):
raise cherrypy.HTTPError (400, 'Bad Request. Invalid parameters')
def get_dc (self):
""" Get a DublinCore struct for the ebook. """
dc = BaseSearcher.DC (cherrypy.engine.pool)
dc.load_from_database (self.id)
# dc.translate ()
return dc
def get_extension (self):
""" Get the ebook filename extension. """
ext = self.filetype.split ('.', 1)[0]
if ext == 'kindle':
ext = 'mobi'
return ext
def get_filename (self):
""" Get a suitable filename to store the ebook. """
filename = self.get_dc ().make_pretty_title () + '.' + self.get_extension ()
return filename.replace (':', '_')
def get_source_url (self):
""" Return the url of the ebook file on gutenberg.org. """
protocol = 'https://' if cherrypy.config['host_https'] else 'http://'
if self.id == 99999:
# test filename
return urllib.parse.urljoin (
protocol + str(cherrypy.config['file_host']) , 'test.pdf')
2019-06-05 15:51:56 +00:00
if self.filetype == 'pdf':
return urllib.parse.urljoin (
protocol + cherrypy.config['file_host'],
'files/%d/%d-pdf.pdf' % (self.id, self.id))
else:
return urllib.parse.urljoin (
protocol + cherrypy.config['file_host'],
'ebooks/%d.%s' % (self.id, self.filetype))