Merge branch 'master' into add-travis
commit
35569dd88b
|
@ -25,7 +25,7 @@ def onix_feed(facet, max=None):
|
|||
editions = facet.facet_object.filter_model("Edition",editions).distinct()
|
||||
for edition in editions:
|
||||
edition_prod = product(edition, facet.facet_object)
|
||||
if edition_prod:
|
||||
if edition_prod is not None:
|
||||
feed.append(edition_prod)
|
||||
return etree.tostring(feed, pretty_print=True)
|
||||
|
||||
|
@ -34,7 +34,7 @@ def onix_feed_for_work(work):
|
|||
feed.append(header(work))
|
||||
for edition in models.Edition.objects.filter(work=work,ebooks__isnull=False).distinct():
|
||||
edition_prod = product(edition)
|
||||
if edition_prod:
|
||||
if edition_prod is not None:
|
||||
feed.append(product(edition))
|
||||
return etree.tostring(feed, pretty_print=True)
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@ django imports
|
|||
from django.contrib.auth.models import User
|
||||
from django.test import TestCase
|
||||
from django.test.client import Client
|
||||
from django.utils.timezone import now
|
||||
|
||||
"""
|
||||
regluit imports
|
||||
|
@ -17,7 +18,6 @@ regluit imports
|
|||
import regluit.core.isbn
|
||||
|
||||
from regluit.core import models
|
||||
from regluit.utils.localdatetime import now
|
||||
from regluit.api import models as apimodels
|
||||
|
||||
class ApiTests(TestCase):
|
||||
|
|
|
@ -40,7 +40,7 @@ urlpatterns = [
|
|||
url(r"^onix/(?P<facet>.*)/$", OnixView.as_view(), name="onix"),
|
||||
url(r"^onix/$", OnixView.as_view(), name="onix_all"),
|
||||
url(r'^id/work/(?P<work_id>\w+)/$', negotiate_content, name="work_identifier"),
|
||||
url(r'^loader/yaml$',load_yaml, name="load_yaml"),
|
||||
url(r'^travisci/webhook$',travisci_webhook, name="travisci_webhook"),
|
||||
url(r'^loader/yaml$', load_yaml, name="load_yaml"),
|
||||
url(r'^travisci/webhook$', travisci_webhook, name="travisci_webhook"),
|
||||
url(r'^', include(v1_api.urls)),
|
||||
]
|
||||
|
|
File diff suppressed because one or more lines are too long
54230
bookdata/doab_auths.json
54230
bookdata/doab_auths.json
File diff suppressed because it is too large
Load Diff
|
@ -5,25 +5,24 @@ from urllib import quote
|
|||
from functools import partial
|
||||
from xml.etree import ElementTree
|
||||
|
||||
from django.apps import apps
|
||||
|
||||
from . exceptions import BooXtreamError
|
||||
from . models import Boox
|
||||
|
||||
|
||||
class BooXtream(object):
|
||||
""" ``apikey``
|
||||
|
||||
The API key for your BooXtream account, obtained from BooXtream. Defaults to using
|
||||
The API key for your BooXtream account, obtained from BooXtream. Defaults to using
|
||||
settings.BOOXTREAM_API_KEY
|
||||
|
||||
``apiuser``
|
||||
|
||||
The username key for your BooXtream account, obtained from BooXtream. Defaults to using
|
||||
The username key for your BooXtream account, obtained from BooXtream. Defaults to using
|
||||
settings.BOOXTREAM_API_USER
|
||||
|
||||
|
||||
|
||||
|
||||
``timeout``
|
||||
|
||||
|
||||
passed to requests
|
||||
"""
|
||||
def __init__(self,
|
||||
|
@ -36,58 +35,60 @@ class BooXtream(object):
|
|||
apiuser = settings.BOOXTREAM_API_USER
|
||||
self.endpoint = 'https://service.booxtream.com/'
|
||||
self.postrequest = partial(requests.post, timeout=timeout, auth=(apiuser,apikey))
|
||||
|
||||
|
||||
|
||||
def platform(self, epubfile=None, epub=True, kf8mobi=False, **kwargs):
|
||||
""" Make an API request to BooXtream
|
||||
""" Make an API request to BooXtream
|
||||
``self.apikey``, ``epubfile`` and the supplied ``kwargs``.
|
||||
Attempts to deserialize the XML response and return the download link.
|
||||
|
||||
Will raise ``BooXtreamError`` if BooXtream returns an exception
|
||||
code.
|
||||
"""
|
||||
url = self.endpoint + 'booxtream.xml'
|
||||
Boox = apps.get_model('booxtream', 'Boox')
|
||||
|
||||
url = self.endpoint + 'booxtream.xml'
|
||||
kwargs['epub'] = '1' if epub else '0'
|
||||
kwargs['kf8mobi'] = '1' if kf8mobi else '0'
|
||||
if epubfile:
|
||||
if hasattr(epubfile,'name') and str(epubfile.name).endswith('.epub'):
|
||||
files= {'epubfile': (str(epubfile.name),epubfile)}
|
||||
else:
|
||||
# give it a random file name so that kindlegen doesn't choke
|
||||
# needed for in-memory (StringIO) epubs
|
||||
# give it a random file name so that kindlegen doesn't choke
|
||||
# needed for in-memory (StringIO) epubs
|
||||
files= {'epubfile': ('%012x.epub' % random.randrange(16**12),epubfile)}
|
||||
else:
|
||||
files={}
|
||||
files={}
|
||||
if settings.LOCAL_TEST:
|
||||
# fake it, so you can test other functions without hitting booxtream
|
||||
boox = Boox.objects.create(
|
||||
download_link_epub='https://github.com/eshellman/42_ebook/blob/master/download/42.epub?raw=true&extra=download.booxtream.com/',
|
||||
download_link_mobi='https://github.com/eshellman/42_ebook/blob/master/download/42.mobi?raw=true',
|
||||
referenceid= kwargs.get('referenceid'),
|
||||
downloads_remaining= kwargs.get('downloadlimit'),
|
||||
expirydays=kwargs.get('expirydays'),
|
||||
)
|
||||
download_link_epub='https://github.com/eshellman/42_ebook/blob/master/download/42.epub?raw=true&extra=download.booxtream.com/',
|
||||
download_link_mobi='https://github.com/eshellman/42_ebook/blob/master/download/42.mobi?raw=true',
|
||||
referenceid= kwargs.get('referenceid'),
|
||||
downloads_remaining= kwargs.get('downloadlimit'),
|
||||
expirydays=kwargs.get('expirydays'),
|
||||
)
|
||||
return boox
|
||||
|
||||
resp = self.postrequest(url, data=kwargs, files=files)
|
||||
doc = ElementTree.fromstring(resp.content)
|
||||
|
||||
# it turns out an Error can have an Error in it
|
||||
errors = doc.findall('.//Response/Error')
|
||||
errors = doc.findall('.//Response/Error')
|
||||
if len(errors) > 0:
|
||||
raise BooXtreamError(errors)
|
||||
download_link_epub = doc.find('.//DownloadLink[@type="epub"]')
|
||||
if download_link_epub is not None:
|
||||
download_link_epub = download_link_epub.text
|
||||
download_link_epub = download_link_epub.text
|
||||
download_link_mobi = doc.find('.//DownloadLink[@type="mobi"]')
|
||||
if download_link_mobi is not None:
|
||||
download_link_mobi = download_link_mobi.text
|
||||
download_link_mobi = download_link_mobi.text
|
||||
boox = Boox.objects.create(
|
||||
download_link_epub=download_link_epub,
|
||||
download_link_mobi=download_link_mobi,
|
||||
referenceid= kwargs.get('referenceid'),
|
||||
downloads_remaining= kwargs.get('downloadlimit'),
|
||||
expirydays=kwargs.get('expirydays'),
|
||||
)
|
||||
download_link_epub=download_link_epub,
|
||||
download_link_mobi=download_link_mobi,
|
||||
referenceid= kwargs.get('referenceid'),
|
||||
downloads_remaining= kwargs.get('downloadlimit'),
|
||||
expirydays=kwargs.get('expirydays'),
|
||||
)
|
||||
return boox
|
||||
|
||||
|
|
|
@ -1,11 +1,10 @@
|
|||
from django.apps import AppConfig
|
||||
from django.db.models.signals import post_migrate
|
||||
|
||||
from regluit.core.signals import create_notice_types
|
||||
|
||||
class CoreConfig(AppConfig):
|
||||
name = 'regluit.core'
|
||||
verbose_name = ' core objects'
|
||||
|
||||
def ready(self):
|
||||
from regluit.core.signals import create_notice_types
|
||||
post_migrate.connect(create_notice_types, sender=self)
|
|
@ -23,6 +23,7 @@ from django_comments.models import Comment
|
|||
from github3 import (login, GitHub)
|
||||
from github3.repos.release import Release
|
||||
|
||||
from django.utils.timezone import now
|
||||
from gitenberg.metadata.pandata import Pandata
|
||||
|
||||
# regluit imports
|
||||
|
@ -31,7 +32,6 @@ import regluit
|
|||
import regluit.core.isbn
|
||||
from regluit.core.validation import test_file
|
||||
from regluit.marc.models import inverse_marc_rels
|
||||
from regluit.utils.localdatetime import now
|
||||
|
||||
from . import cc
|
||||
from . import models
|
||||
|
@ -49,7 +49,7 @@ def add_by_oclc(isbn, work=None):
|
|||
|
||||
def add_by_oclc_from_google(oclc):
|
||||
if oclc:
|
||||
logger.info("adding book by oclc %s", oclc)
|
||||
logger.info(u"adding book by oclc %s", oclc)
|
||||
else:
|
||||
return None
|
||||
try:
|
||||
|
@ -59,10 +59,10 @@ def add_by_oclc_from_google(oclc):
|
|||
try:
|
||||
results = _get_json(url, {"q": '"OCLC%s"' % oclc})
|
||||
except LookupFailure, e:
|
||||
logger.exception("lookup failure for %s", oclc)
|
||||
logger.exception(u"lookup failure for %s", oclc)
|
||||
return None
|
||||
if not results.has_key('items') or not results['items']:
|
||||
logger.warn("no google hits for %s", oclc)
|
||||
logger.warn(u"no google hits for %s", oclc)
|
||||
return None
|
||||
|
||||
try:
|
||||
|
@ -70,16 +70,16 @@ def add_by_oclc_from_google(oclc):
|
|||
models.Identifier(type='oclc', value=oclc, edition=e, work=e.work).save()
|
||||
return e
|
||||
except LookupFailure, e:
|
||||
logger.exception("failed to add edition for %s", oclc)
|
||||
logger.exception(u"failed to add edition for %s", oclc)
|
||||
except IntegrityError, e:
|
||||
logger.exception("google books data for %s didn't fit our db", oclc)
|
||||
logger.exception(u"google books data for %s didn't fit our db", oclc)
|
||||
return None
|
||||
|
||||
def valid_isbn(isbn):
|
||||
try:
|
||||
return identifier_cleaner('isbn')(isbn)
|
||||
except:
|
||||
logger.exception("invalid isbn: %s", isbn)
|
||||
logger.exception(u"invalid isbn: %s", isbn)
|
||||
return None
|
||||
|
||||
def add_by_isbn(isbn, work=None, language='xx', title=''):
|
||||
|
@ -88,13 +88,17 @@ def add_by_isbn(isbn, work=None, language='xx', title=''):
|
|||
try:
|
||||
e = add_by_isbn_from_google(isbn, work=work)
|
||||
except LookupFailure:
|
||||
logger.exception("failed google lookup for %s", isbn)
|
||||
logger.exception(u"failed google lookup for %s", isbn)
|
||||
# try again some other time
|
||||
return None
|
||||
if e:
|
||||
if e.work.language == 'xx' and language != 'xx':
|
||||
e.work.language == language
|
||||
e.work.save()
|
||||
logger.info('changed language for {} to {}'.format(isbn, language))
|
||||
return e
|
||||
|
||||
logger.info("null came back from add_by_isbn_from_google: %s", isbn)
|
||||
logger.info(u"null came back from add_by_isbn_from_google: %s", isbn)
|
||||
|
||||
# if there's a a title, we want to create stub editions and
|
||||
# works, even if google doesn't know about it # but if it's not valid,
|
||||
|
@ -129,10 +133,10 @@ def get_google_isbn_results(isbn):
|
|||
try:
|
||||
results = _get_json(url, {"q": "isbn:%s" % isbn})
|
||||
except LookupFailure:
|
||||
logger.exception("lookup failure for %s", isbn)
|
||||
logger.exception(u"lookup failure for %s", isbn)
|
||||
return None
|
||||
if not results.has_key('items') or not results['items']:
|
||||
logger.warn("no google hits for %s", isbn)
|
||||
logger.warn(u"no google hits for %s", isbn)
|
||||
return None
|
||||
return results
|
||||
|
||||
|
@ -201,7 +205,7 @@ def update_edition(edition):
|
|||
# if the language of the edition no longer matches that of the parent work,
|
||||
# attach edition to the
|
||||
if edition.work.language != language:
|
||||
logger.info("reconnecting %s since it is %s instead of %s",
|
||||
logger.info(u"reconnecting %s since it is %s instead of %s",
|
||||
googlebooks_id, language, edition.work.language)
|
||||
old_work = edition.work
|
||||
|
||||
|
@ -210,7 +214,7 @@ def update_edition(edition):
|
|||
edition.work = new_work
|
||||
edition.save()
|
||||
for identifier in edition.identifiers.all():
|
||||
logger.info("moving identifier %s", identifier.value)
|
||||
logger.info(u"moving identifier %s", identifier.value)
|
||||
identifier.work = new_work
|
||||
identifier.save()
|
||||
if old_work and old_work.editions.count() == 0:
|
||||
|
@ -256,7 +260,7 @@ def add_by_isbn_from_google(isbn, work=None):
|
|||
edition.new = False
|
||||
return edition
|
||||
|
||||
logger.info("adding new book by isbn %s", isbn)
|
||||
logger.info(u"adding new book by isbn %s", isbn)
|
||||
results = get_google_isbn_results(isbn)
|
||||
if results:
|
||||
try:
|
||||
|
@ -267,9 +271,9 @@ def add_by_isbn_from_google(isbn, work=None):
|
|||
isbn=isbn
|
||||
)
|
||||
except LookupFailure, e:
|
||||
logger.exception("failed to add edition for %s", isbn)
|
||||
logger.exception(u"failed to add edition for %s", isbn)
|
||||
except IntegrityError, e:
|
||||
logger.exception("google books data for %s didn't fit our db", isbn)
|
||||
logger.exception(u"google books data for %s didn't fit our db", isbn)
|
||||
return None
|
||||
return None
|
||||
|
||||
|
@ -320,7 +324,7 @@ def add_by_googlebooks_id(googlebooks_id, work=None, results=None, isbn=None):
|
|||
if results:
|
||||
item = results
|
||||
else:
|
||||
logger.info("loading metadata from google for %s", googlebooks_id)
|
||||
logger.info(u"loading metadata from google for %s", googlebooks_id)
|
||||
url = "https://www.googleapis.com/books/v1/volumes/%s" % googlebooks_id
|
||||
item = _get_json(url)
|
||||
d = item['volumeInfo']
|
||||
|
@ -343,7 +347,7 @@ def add_by_googlebooks_id(googlebooks_id, work=None, results=None, isbn=None):
|
|||
if len(language) > 5:
|
||||
language = language[0:5]
|
||||
if work and work.language != language:
|
||||
logger.info("not connecting %s since it is %s instead of %s",
|
||||
logger.info(u"not connecting %s since it is %s instead of %s",
|
||||
googlebooks_id, language, work.language)
|
||||
work = None
|
||||
# isbn = None
|
||||
|
@ -371,7 +375,7 @@ def add_by_googlebooks_id(googlebooks_id, work=None, results=None, isbn=None):
|
|||
try:
|
||||
e = models.Identifier.objects.get(type='goog', value=googlebooks_id).edition
|
||||
e.new = False
|
||||
logger.warning(" whoa nellie, somebody else created an edition while we were working.")
|
||||
logger.warning(u" whoa nellie, somebody else created an edition while we were working.")
|
||||
if work.new:
|
||||
work.delete()
|
||||
return e
|
||||
|
@ -404,19 +408,19 @@ def relate_isbn(isbn, cluster_size=1):
|
|||
"""add a book by isbn and then see if there's an existing work to add it to so as to make a
|
||||
cluster bigger than cluster_size.
|
||||
"""
|
||||
logger.info("finding a related work for %s", isbn)
|
||||
logger.info(u"finding a related work for %s", isbn)
|
||||
|
||||
edition = add_by_isbn(isbn)
|
||||
if edition is None:
|
||||
return None
|
||||
if edition.work is None:
|
||||
logger.info("didn't add related to null work")
|
||||
logger.info(u"didn't add related to null work")
|
||||
return None
|
||||
if edition.work.editions.count() > cluster_size:
|
||||
return edition.work
|
||||
for other_isbn in thingisbn(isbn):
|
||||
# 979's come back as 13
|
||||
logger.debug("other_isbn: %s", other_isbn)
|
||||
logger.debug(u"other_isbn: %s", other_isbn)
|
||||
if len(other_isbn) == 10:
|
||||
other_isbn = regluit.core.isbn.convert_10_to_13(other_isbn)
|
||||
related_edition = add_by_isbn(other_isbn, work=edition.work)
|
||||
|
@ -427,7 +431,7 @@ def relate_isbn(isbn, cluster_size=1):
|
|||
related_edition.work = edition.work
|
||||
related_edition.save()
|
||||
elif related_edition.work_id != edition.work_id:
|
||||
logger.debug("merge_works path 1 %s %s", edition.work_id, related_edition.work_id)
|
||||
logger.debug(u"merge_works path 1 %s %s", edition.work_id, related_edition.work_id)
|
||||
merge_works(related_edition.work, edition.work)
|
||||
if related_edition.work.editions.count() > cluster_size:
|
||||
return related_edition.work
|
||||
|
@ -438,7 +442,7 @@ def add_related(isbn):
|
|||
The initial seed ISBN will be added if it's not already there.
|
||||
"""
|
||||
# make sure the seed edition is there
|
||||
logger.info("adding related editions for %s", isbn)
|
||||
logger.info(u"adding related editions for %s", isbn)
|
||||
|
||||
new_editions = []
|
||||
|
||||
|
@ -446,14 +450,14 @@ def add_related(isbn):
|
|||
if edition is None:
|
||||
return new_editions
|
||||
if edition.work is None:
|
||||
logger.warning("didn't add related to null work")
|
||||
logger.warning(u"didn't add related to null work")
|
||||
return new_editions
|
||||
# this is the work everything will hang off
|
||||
work = edition.work
|
||||
other_editions = {}
|
||||
for other_isbn in thingisbn(isbn):
|
||||
# 979's come back as 13
|
||||
logger.debug("other_isbn: %s", other_isbn)
|
||||
logger.debug(u"other_isbn: %s", other_isbn)
|
||||
if len(other_isbn) == 10:
|
||||
other_isbn = regluit.core.isbn.convert_10_to_13(other_isbn)
|
||||
related_edition = add_by_isbn(other_isbn, work=work)
|
||||
|
@ -466,7 +470,7 @@ def add_related(isbn):
|
|||
related_edition.work = work
|
||||
related_edition.save()
|
||||
elif related_edition.work_id != work.id:
|
||||
logger.debug("merge_works path 1 %s %s", work.id, related_edition.work_id)
|
||||
logger.debug(u"merge_works path 1 %s %s", work.id, related_edition.work_id)
|
||||
work = merge_works(work, related_edition.work)
|
||||
else:
|
||||
if other_editions.has_key(related_language):
|
||||
|
@ -476,14 +480,14 @@ def add_related(isbn):
|
|||
|
||||
# group the other language editions together
|
||||
for lang_group in other_editions.itervalues():
|
||||
logger.debug("lang_group (ed, work): %s", [(ed.id, ed.work_id) for ed in lang_group])
|
||||
logger.debug(u"lang_group (ed, work): %s", [(ed.id, ed.work_id) for ed in lang_group])
|
||||
if len(lang_group) > 1:
|
||||
lang_edition = lang_group[0]
|
||||
logger.debug("lang_edition.id: %s", lang_edition.id)
|
||||
logger.debug(u"lang_edition.id: %s", lang_edition.id)
|
||||
# compute the distinct set of works to merge into lang_edition.work
|
||||
works_to_merge = set([ed.work for ed in lang_group[1:]]) - set([lang_edition.work])
|
||||
for w in works_to_merge:
|
||||
logger.debug("merge_works path 2 %s %s", lang_edition.work_id, w.id)
|
||||
logger.debug(u"merge_works path 2 %s %s", lang_edition.work_id, w.id)
|
||||
merged_work = merge_works(lang_edition.work, w)
|
||||
models.WorkRelation.objects.get_or_create(
|
||||
to_work=lang_group[0].work,
|
||||
|
@ -498,17 +502,21 @@ def thingisbn(isbn):
|
|||
Library Thing. (takes isbn_10 or isbn_13, returns isbn_10, except for 979 isbns,
|
||||
which come back as isbn_13')
|
||||
"""
|
||||
logger.info("looking up %s at ThingISBN", isbn)
|
||||
logger.info(u"looking up %s at ThingISBN", isbn)
|
||||
url = "https://www.librarything.com/api/thingISBN/%s" % isbn
|
||||
xml = requests.get(url, headers={"User-Agent": settings.USER_AGENT}).content
|
||||
doc = ElementTree.fromstring(xml)
|
||||
return [e.text for e in doc.findall('isbn')]
|
||||
try:
|
||||
doc = ElementTree.fromstring(xml)
|
||||
return [e.text for e in doc.findall('isbn')]
|
||||
except SyntaxError:
|
||||
# LibraryThing down
|
||||
return []
|
||||
|
||||
|
||||
def merge_works(w1, w2, user=None):
|
||||
"""will merge the second work (w2) into the first (w1)
|
||||
"""
|
||||
logger.info("merging work %s into %s", w2.id, w1.id)
|
||||
logger.info(u"merging work %s into %s", w2.id, w1.id)
|
||||
# don't merge if the works are the same or at least one of the works has no id
|
||||
#(for example, when w2 has already been deleted)
|
||||
if w1 is None or w2 is None or w1.id == w2.id or w1.id is None or w2.id is None:
|
||||
|
@ -583,7 +591,7 @@ def detach_edition(e):
|
|||
will detach edition from its work, creating a new stub work. if remerge=true, will see if
|
||||
there's another work to attach to
|
||||
"""
|
||||
logger.info("splitting edition %s from %s", e, e.work)
|
||||
logger.info(u"splitting edition %s from %s", e, e.work)
|
||||
w = models.Work(title=e.title, language=e.work.language)
|
||||
w.save()
|
||||
|
||||
|
@ -618,7 +626,7 @@ def add_openlibrary(work, hard_refresh=False):
|
|||
work.save()
|
||||
|
||||
# find the first ISBN match in OpenLibrary
|
||||
logger.info("looking up openlibrary data for work %s", work.id)
|
||||
logger.info(u"looking up openlibrary data for work %s", work.id)
|
||||
|
||||
e = None # openlibrary edition json
|
||||
w = None # openlibrary work json
|
||||
|
@ -633,7 +641,7 @@ def add_openlibrary(work, hard_refresh=False):
|
|||
try:
|
||||
e = _get_json(url, params, type='ol')
|
||||
except LookupFailure:
|
||||
logger.exception("OL lookup failed for %s", isbn_key)
|
||||
logger.exception(u"OL lookup failed for %s", isbn_key)
|
||||
e = {}
|
||||
if e.has_key(isbn_key):
|
||||
if e[isbn_key].has_key('details'):
|
||||
|
@ -673,7 +681,7 @@ def add_openlibrary(work, hard_refresh=False):
|
|||
)
|
||||
if e[isbn_key]['details'].has_key('works'):
|
||||
work_key = e[isbn_key]['details']['works'].pop(0)['key']
|
||||
logger.info("got openlibrary work %s for isbn %s", work_key, isbn_key)
|
||||
logger.info(u"got openlibrary work %s for isbn %s", work_key, isbn_key)
|
||||
models.Identifier.get_or_add(type='olwk', value=work_key, work=work)
|
||||
try:
|
||||
w = _get_json("https://openlibrary.org" + work_key, type='ol')
|
||||
|
@ -691,14 +699,14 @@ def add_openlibrary(work, hard_refresh=False):
|
|||
if w.has_key('subjects') and len(w['subjects']) > len(subjects):
|
||||
subjects = w['subjects']
|
||||
except LookupFailure:
|
||||
logger.exception("OL lookup failed for %s", work_key)
|
||||
logger.exception(u"OL lookup failed for %s", work_key)
|
||||
if not subjects:
|
||||
logger.warn("unable to find work %s at openlibrary", work.id)
|
||||
logger.warn(u"unable to find work %s at openlibrary", work.id)
|
||||
return
|
||||
|
||||
# add the subjects to the Work
|
||||
for s in subjects:
|
||||
logger.info("adding subject %s to work %s", s, work.id)
|
||||
logger.info(u"adding subject %s to work %s", s, work.id)
|
||||
subject = models.Subject.set_by_name(s, work=work)
|
||||
|
||||
work.save()
|
||||
|
@ -716,9 +724,9 @@ def _get_json(url, params={}, type='gb'):
|
|||
if response.status_code == 200:
|
||||
return json.loads(response.content)
|
||||
else:
|
||||
logger.error("unexpected HTTP response: %s", response)
|
||||
logger.error(u"unexpected HTTP response: %s", response)
|
||||
if response.content:
|
||||
logger.error("response content: %s", response.content)
|
||||
logger.error(u"response content: %s", response.content)
|
||||
raise LookupFailure("GET failed: url=%s and params=%s" % (url, params))
|
||||
|
||||
|
||||
|
@ -766,7 +774,7 @@ def load_gutenberg_edition(title, gutenberg_etext_id, ol_work_id, seed_isbn, url
|
|||
ebook = models.Ebook()
|
||||
|
||||
if len(ebooks) > 1:
|
||||
logger.warning("There is more than one Ebook matching url {0}".format(url))
|
||||
logger.warning(u"There is more than one Ebook matching url {0}".format(url))
|
||||
|
||||
|
||||
ebook.format = format
|
||||
|
@ -826,8 +834,6 @@ def edition_for_etype(etype, metadata, default=None):
|
|||
for key in metadata.edition_identifiers.keys():
|
||||
return edition_for_ident(key, metadata.identifiers[key])
|
||||
|
||||
MATCH_LICENSE = re.compile(r'creativecommons.org/licenses/([^/]+)/')
|
||||
|
||||
def load_ebookfile(url, etype):
|
||||
'''
|
||||
return a ContentFile if a new ebook has been loaded
|
||||
|
@ -960,8 +966,7 @@ class BasePandataLoader(object):
|
|||
if contentfile:
|
||||
contentfile_name = '/loaded/ebook_{}.{}'.format(edition.id, key)
|
||||
path = default_storage.save(contentfile_name, contentfile)
|
||||
lic = MATCH_LICENSE.search(metadata.rights_url)
|
||||
license = 'CC {}'.format(lic.group(1).upper()) if lic else ''
|
||||
license = cc.license_from_cc_url(metadata.rights_url)
|
||||
ebf = models.EbookFile.objects.create(
|
||||
format=key,
|
||||
edition=edition,
|
||||
|
|
17
core/cc.py
17
core/cc.py
|
@ -1,8 +1,11 @@
|
|||
# coding=utf-8
|
||||
# mostly constants related to Creative Commons
|
||||
''' mostly constants related to Creative Commons
|
||||
# let's be DRY with these parameters
|
||||
|
||||
## need to add versioned CC entries
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
INFO_CC = (
|
||||
('CC BY-NC-ND', 'by-nc-nd', 'Creative Commons Attribution-NonCommercial-NoDerivs 3.0 Unported (CC BY-NC-ND 3.0)', 'https://creativecommons.org/licenses/by-nc-nd/3.0/', 'Creative Commons Attribution-NonCommercial-NoDerivs'),
|
||||
|
@ -162,3 +165,15 @@ def match_license(license_string):
|
|||
except ValueError:
|
||||
pass
|
||||
return RIGHTS_ALIAS.get(license_string, None)
|
||||
|
||||
MATCH_LICENSE = re.compile(r'creativecommons.org/licenses/([^/]+)/')
|
||||
def license_from_cc_url(rights_url):
|
||||
if not rights_url:
|
||||
return None
|
||||
lic = MATCH_LICENSE.search(rights_url)
|
||||
if lic:
|
||||
return 'CC {}'.format(lic.group(1).upper())
|
||||
if rights_url.find('openedition.org') >= 0:
|
||||
return 'OPENEDITION'
|
||||
return ''
|
||||
|
||||
|
|
|
@ -45,10 +45,10 @@ def convert_10_to_13(isbn):
|
|||
except:
|
||||
return None
|
||||
|
||||
ISBN_REGEX = re.compile(r'^(\d{9}|\d{12})(\d|X)$')
|
||||
DASH_REGEX = re.compile(r'[ \-–—]+')
|
||||
ISBN_REGEX = re.compile(r'^(\d{9}[\dX]|\d{13})$')
|
||||
DASH_REGEX = re.compile(u'[ \\-–—‐,;]+') #includes unicode hyphen, endash and emdash
|
||||
def strip(s):
|
||||
"""Strips away any - or spaces. If the remaining string is of length 10 or 13
|
||||
"""Strips away any - or spaces and some punctuation. If the remaining string is of length 10 or 13
|
||||
with digits only in anything but the last
|
||||
check digit (which may be X), then return '' -- otherwise return the remaining string
|
||||
"""
|
||||
|
|
|
@ -2,11 +2,12 @@ import csv
|
|||
import HTMLParser
|
||||
import httplib
|
||||
import logging
|
||||
import mechanize
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
import mechanize
|
||||
import requests
|
||||
|
||||
from datetime import datetime
|
||||
from regluit.core import models
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -20,7 +21,7 @@ class LibraryThing(object):
|
|||
"""
|
||||
url = "https://www.librarything.com"
|
||||
csv_file_url = "https://www.librarything.com/export-csv"
|
||||
|
||||
|
||||
def __init__(self, username=None, password=None):
|
||||
self.username = username
|
||||
self.password = password
|
||||
|
@ -40,77 +41,98 @@ class LibraryThing(object):
|
|||
def parse_csv(self):
|
||||
h = HTMLParser.HTMLParser()
|
||||
reader = csv.DictReader(self.csv_handle)
|
||||
# There are more fields to be parsed out. Note that there is a second author column to handle
|
||||
for (i,row) in enumerate(reader):
|
||||
# There are more fields to be parsed out. Note that there is a
|
||||
# second author column to handle
|
||||
for (i, row) in enumerate(reader):
|
||||
# ISBNs are written like '[123456789x]' in the CSV, suggesting possibility of a list
|
||||
m = re.match(r'^\[(.*)\]$', row["'ISBNs'"])
|
||||
if m:
|
||||
isbn = m.group(1).split()
|
||||
else:
|
||||
isbn = []
|
||||
yield {'title':h.unescape(row["'TITLE'"]), 'author':h.unescape(row["'AUTHOR (first, last)'"]),
|
||||
'isbn':isbn, 'comment':row["'COMMENT'"],
|
||||
'tags':row["'TAGS'"], 'collections':row["'COLLECTIONS'"],
|
||||
'reviews':h.unescape(row["'REVIEWS'"])}
|
||||
yield {
|
||||
'title':h.unescape(row["'TITLE'"]),
|
||||
'author':h.unescape(row["'AUTHOR (first, last)'"]),
|
||||
'isbn':isbn,
|
||||
'comment':row["'COMMENT'"],
|
||||
'tags':row["'TAGS'"],
|
||||
'collections':row["'COLLECTIONS'"],
|
||||
'reviews':h.unescape(row["'REVIEWS'"])
|
||||
}
|
||||
def viewstyle_1(self, rows):
|
||||
|
||||
for (i,row) in enumerate(rows):
|
||||
|
||||
for (i, row) in enumerate(rows):
|
||||
book_data = {}
|
||||
cols = row.xpath('td')
|
||||
# cover
|
||||
book_data["cover"] = {"cover_id":cols[0].attrib["id"],
|
||||
"image": {"width":cols[0].xpath('.//img')[0].attrib['width'],
|
||||
"src": cols[0].xpath('.//img')[0].attrib['src']}
|
||||
book_data["cover"] = {
|
||||
"cover_id":cols[0].attrib["id"],
|
||||
"image": {
|
||||
"width":cols[0].xpath('.//img')[0].attrib['width'],
|
||||
"src": cols[0].xpath('.//img')[0].attrib['src']
|
||||
}
|
||||
}
|
||||
# title
|
||||
book_data["title"] = {"href":cols[1].xpath('.//a')[0].attrib['href'],
|
||||
"title":cols[1].xpath('.//a')[0].text}
|
||||
|
||||
book_data["title"] = {
|
||||
"href":cols[1].xpath('.//a')[0].attrib['href'],
|
||||
"title":cols[1].xpath('.//a')[0].text
|
||||
}
|
||||
|
||||
# extract work_id and book_id from href
|
||||
try:
|
||||
(book_data["work_id"], book_data["book_id"]) = re.match("^/work/(.*)/book/(.*)$",book_data["title"]["href"]).groups()
|
||||
(book_data["work_id"], book_data["book_id"]) = re.match(
|
||||
"^/work/(.*)/book/(.*)$",
|
||||
book_data["title"]["href"]
|
||||
).groups()
|
||||
except:
|
||||
(book_data["work_id"], book_data["book_id"]) = (None, None)
|
||||
|
||||
|
||||
# author -- what if there is more than 1? or none?
|
||||
try:
|
||||
book_data["author"] = {"display_name":cols[2].xpath('.//a')[0].text,
|
||||
"href":cols[2].xpath('.//a')[0].attrib['href'],
|
||||
"name":cols[2].xpath('div')[0].text}
|
||||
book_data["author"] = {
|
||||
"display_name":cols[2].xpath('.//a')[0].text,
|
||||
"href":cols[2].xpath('.//a')[0].attrib['href'],
|
||||
"name":cols[2].xpath('div')[0].text
|
||||
}
|
||||
except:
|
||||
book_data["author"] = None
|
||||
|
||||
|
||||
# date
|
||||
book_data["date"] = cols[3].xpath('span')[0].text
|
||||
|
||||
|
||||
# tags: grab tags that are not empty strings
|
||||
tag_links = cols[4].xpath('.//a')
|
||||
book_data["tags"] = filter(lambda x: x is not None, [a.text for a in tag_links])
|
||||
|
||||
|
||||
# rating -- count # of stars
|
||||
book_data["rating"] = len(cols[5].xpath('.//img[@alt="*"]'))
|
||||
|
||||
|
||||
# entry date
|
||||
book_data["entry_date"] = datetime.date(datetime.strptime(cols[6].xpath('span')[0].text, "%b %d, %Y"))
|
||||
|
||||
book_data["entry_date"] = datetime.date(
|
||||
datetime.strptime(cols[6].xpath('span')[0].text, "%b %d, %Y")
|
||||
)
|
||||
|
||||
yield book_data
|
||||
|
||||
|
||||
def viewstyle_5(self, rows):
|
||||
# implement this view to get at the ISBNs
|
||||
for (i,row) in enumerate(rows):
|
||||
for (i, row) in enumerate(rows):
|
||||
book_data = {}
|
||||
cols = row.xpath('td')
|
||||
|
||||
|
||||
# title
|
||||
book_data["title"] = {"href":cols[0].xpath('.//a')[0].attrib['href'],
|
||||
"title":cols[0].xpath('.//a')[0].text}
|
||||
|
||||
|
||||
# extract work_id and book_id from href
|
||||
try:
|
||||
(book_data["work_id"], book_data["book_id"]) = re.match("^/work/(.*)/book/(.*)$",book_data["title"]["href"]).groups()
|
||||
(book_data["work_id"], book_data["book_id"]) = re.match(
|
||||
"^/work/(.*)/book/(.*)$",
|
||||
book_data["title"]["href"]
|
||||
).groups()
|
||||
except:
|
||||
(book_data["work_id"], book_data["book_id"]) = (None, None)
|
||||
|
||||
|
||||
# tags
|
||||
tag_links = cols[1].xpath('.//a')
|
||||
book_data["tags"] = filter(lambda x: x is not None, [a.text for a in tag_links])
|
||||
|
@ -121,13 +143,13 @@ class LibraryThing(object):
|
|||
except Exception, e:
|
||||
logger.info("no lc call number for: %s %s", book_data["title"], e)
|
||||
book_data["lc_call_number"] = None
|
||||
|
||||
|
||||
# subject
|
||||
|
||||
|
||||
subjects = cols[3].xpath('.//div[@class="subjectLine"]')
|
||||
book_data["subjects"] = [{'href':s.xpath('a')[0].attrib['href'],
|
||||
'text':s.xpath('a')[0].text} for s in subjects]
|
||||
|
||||
|
||||
# isbn
|
||||
try:
|
||||
book_data["isbn"] = cols[4].xpath('.//span')[0].text
|
||||
|
@ -136,90 +158,94 @@ class LibraryThing(object):
|
|||
book_data["isbn"] = None
|
||||
except Exception, e:
|
||||
book_data["isbn"] = None
|
||||
|
||||
|
||||
yield book_data
|
||||
|
||||
|
||||
|
||||
def parse_user_catalog(self, view_style=1):
|
||||
from lxml import html
|
||||
|
||||
|
||||
# we can vary viewstyle to get different info
|
||||
|
||||
IMPLEMENTED_STYLES = [1,5]
|
||||
|
||||
IMPLEMENTED_STYLES = [1, 5]
|
||||
COLLECTION = 2 # set to get All Collections
|
||||
|
||||
|
||||
if view_style not in IMPLEMENTED_STYLES:
|
||||
raise NotImplementedError()
|
||||
style_parser = getattr(self,"viewstyle_%s" % view_style)
|
||||
style_parser = getattr(self, "viewstyle_%s" % view_style)
|
||||
next_page = True
|
||||
offset = 0
|
||||
cookies = None
|
||||
|
||||
|
||||
# go to the front page of LibraryThing first to pick up relevant session-like cookies
|
||||
r = requests.get("https://www.librarything.com/")
|
||||
cookies = r.cookies
|
||||
|
||||
|
||||
while next_page:
|
||||
url = "https://www.librarything.com/catalog_bottom.php?view=%s&viewstyle=%d&collection=%d&offset=%d" % (self.username,
|
||||
view_style, COLLECTION, offset)
|
||||
url = "https://www.librarything.com/catalog_bottom.php?view=%s&viewstyle=%d&collection=%d&offset=%d" % (
|
||||
self.username, view_style, COLLECTION, offset
|
||||
)
|
||||
logger.info("url: %s", url)
|
||||
if cookies is None:
|
||||
r = requests.get(url)
|
||||
else:
|
||||
r = requests.get(url, cookies=cookies)
|
||||
|
||||
|
||||
if r.status_code != httplib.OK:
|
||||
raise LibraryThingException("Error accessing %s: %s" % (url, e))
|
||||
logger.info("Error accessing %s: %s", url, e)
|
||||
raise LibraryThingException("Error accessing %s: status %s" % (url, r.status_code))
|
||||
etree = html.fromstring(r.content)
|
||||
#logger.info("r.content %s", r.content)
|
||||
cookies = r.cookies # retain the cookies
|
||||
|
||||
|
||||
# look for a page bar
|
||||
# try to grab the total number of books
|
||||
# 1 - 50 of 82
|
||||
try:
|
||||
count_text = etree.xpath('//td[@class="pbGroup"]')[0].text
|
||||
total = int(re.search(r'(\d+)$',count_text).group(1))
|
||||
total = int(re.search(r'(\d+)$', count_text).group(1))
|
||||
logger.info('total: %d', total)
|
||||
except Exception, e: # assume for now that if we can't grab this text, there is no page bar and no books
|
||||
except Exception, e:
|
||||
# assume for now that if we can't grab this text,
|
||||
# there is no page bar and no books
|
||||
logger.info('Exception {0}'.format(e))
|
||||
total = 0
|
||||
|
||||
# to do paging we can either look for a next link or just increase the offset by the number of rows.
|
||||
|
||||
# to do paging we can either look for a next link or just increase the offset
|
||||
# by the number of rows.
|
||||
# Let's try the latter
|
||||
# possible_next_link = etree.xpath('//a[@class="pageShuttleButton"]')[0]
|
||||
|
||||
|
||||
rows_xpath = '//table[@id="lt_catalog_list"]/tbody/tr'
|
||||
|
||||
|
||||
# deal with page 1 first and then working on paging through the collection
|
||||
rows = etree.xpath(rows_xpath)
|
||||
|
||||
i = -1 # have to account for the problem of style_parser(rows) returning nothing
|
||||
|
||||
for (i,row) in enumerate(style_parser(rows)):
|
||||
yield row
|
||||
|
||||
# page size = 50, first page offset = 0, second page offset = 50 -- if total = 50 no need to go
|
||||
|
||||
offset += i + 1
|
||||
i = -1 # have to account for the problem of style_parser(rows) returning nothing
|
||||
|
||||
for (i, row) in enumerate(style_parser(rows)):
|
||||
yield row
|
||||
|
||||
# page size = 50, first page offset = 0, second page offset = 50
|
||||
# -- if total = 50 no need to go
|
||||
|
||||
offset += i + 1
|
||||
if offset >= total:
|
||||
next_page = False
|
||||
|
||||
def load_librarything_into_wishlist(user, lt_username, max_books=None):
|
||||
"""
|
||||
Load a specified LibraryThing shelf (by default: all the books from the LibraryThing account associated with user)
|
||||
Load a specified LibraryThing shelf (by default: all the books
|
||||
from the LibraryThing account associated with user)
|
||||
"""
|
||||
|
||||
|
||||
from regluit.core import bookloader
|
||||
from regluit.core import tasks
|
||||
from itertools import islice
|
||||
|
||||
|
||||
logger.info("Entering into load_librarything_into_wishlist")
|
||||
lt = LibraryThing(lt_username)
|
||||
|
||||
|
||||
for (i,book) in enumerate(islice(lt.parse_user_catalog(view_style=5),max_books)):
|
||||
|
||||
|
||||
for (i, book) in enumerate(islice(lt.parse_user_catalog(view_style=5), max_books)):
|
||||
isbn = book["isbn"] # grab the first one
|
||||
logger.info("%d %s %s", i, book["title"]["title"], isbn)
|
||||
try:
|
||||
|
@ -229,13 +255,27 @@ def load_librarything_into_wishlist(user, lt_username, max_books=None):
|
|||
if not edition:
|
||||
continue
|
||||
# add the librarything ids to the db since we know them now
|
||||
identifier= models.Identifier.get_or_add(type = 'thng', value = book['book_id'], edition = edition, work = edition.work)
|
||||
identifier= models.Identifier.get_or_add(type = 'ltwk', value = book['work_id'], work = edition.work)
|
||||
identifier = models.Identifier.get_or_add(
|
||||
type='thng',
|
||||
value=book['book_id'],
|
||||
edition=edition,
|
||||
work=edition.work
|
||||
)
|
||||
identifier = models.Identifier.get_or_add(
|
||||
type='ltwk',
|
||||
value=book['work_id'],
|
||||
work=edition.work
|
||||
)
|
||||
if book['lc_call_number']:
|
||||
identifier= models.Identifier.get_or_add(type = 'lccn', value = book['lc_call_number'], edition = edition, work = edition.work)
|
||||
identifier = models.Identifier.get_or_add(
|
||||
type='lccn',
|
||||
value=book['lc_call_number'],
|
||||
edition=edition,
|
||||
work=edition.work
|
||||
)
|
||||
user.wishlist.add_work(edition.work, 'librarything', notify=True)
|
||||
if edition.new:
|
||||
tasks.populate_edition.delay(edition.isbn_13)
|
||||
logger.info("Work with isbn %s added to wishlist.", isbn)
|
||||
except Exception, e:
|
||||
logger.info ("error adding ISBN %s: %s", isbn, e)
|
||||
logger.info("error adding ISBN %s: %s", isbn, e)
|
||||
|
|
|
@ -16,10 +16,10 @@ from .smashwords import SmashwordsScraper
|
|||
def get_scraper(url):
|
||||
scrapers = [
|
||||
PressbooksScraper,
|
||||
HathitrustScraper,
|
||||
SpringerScraper,
|
||||
UbiquityScraper,
|
||||
SmashwordsScraper,
|
||||
HathitrustScraper,
|
||||
BaseScraper,
|
||||
]
|
||||
for scraper in scrapers:
|
||||
|
@ -52,3 +52,9 @@ def add_by_webpage(url, work=None, user=None):
|
|||
|
||||
def add_by_sitemap(url, maxnum=None):
|
||||
return add_from_bookdatas(scrape_sitemap(url, maxnum=maxnum))
|
||||
|
||||
def scrape_language(url):
|
||||
scraper = get_scraper(url)
|
||||
return scraper.metadata.get('language')
|
||||
|
||||
|
||||
|
|
|
@ -1,42 +1,54 @@
|
|||
#!/usr/bin/env python
|
||||
# encoding: utf-8
|
||||
import logging
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
|
||||
from itertools import islice
|
||||
|
||||
import requests
|
||||
|
||||
from django.db.models import (Q, F)
|
||||
from django.db.models import Q
|
||||
|
||||
from django.core.files.storage import default_storage
|
||||
from django.core.files.base import ContentFile
|
||||
from django.core.files.storage import default_storage
|
||||
|
||||
import regluit
|
||||
from oaipmh.client import Client
|
||||
from oaipmh.error import IdDoesNotExistError
|
||||
from oaipmh.metadata import MetadataRegistry, oai_dc_reader
|
||||
|
||||
from regluit.core import bookloader, cc
|
||||
from regluit.core import models, tasks
|
||||
from regluit.core import bookloader
|
||||
from regluit.core.bookloader import add_by_isbn, merge_works
|
||||
from regluit.core.bookloader import merge_works
|
||||
from regluit.core.isbn import ISBN
|
||||
from regluit.core.validation import valid_subject
|
||||
from regluit.core.loaders.utils import type_for_url
|
||||
from regluit.core.validation import identifier_cleaner, valid_subject
|
||||
|
||||
from . import scrape_language
|
||||
from .doab_utils import doab_lang_to_iso_639_1, online_to_download, url_to_provider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
springercover = re.compile(r'ftp.+springer\.de.+(\d{13}\.jpg)$', flags=re.U)
|
||||
def unlist(alist):
|
||||
if not alist:
|
||||
return None
|
||||
return alist[0]
|
||||
|
||||
|
||||
SPRINGER_COVER = re.compile(r'ftp.+springer\.de.+(\d{13}\.jpg)$', flags=re.U)
|
||||
SPRINGER_IMAGE = u'https://images.springer.com/sgw/books/medium/{}.jpg'
|
||||
def store_doab_cover(doab_id, redo=False):
|
||||
|
||||
|
||||
"""
|
||||
returns tuple: 1) cover URL, 2) whether newly created (boolean)
|
||||
"""
|
||||
|
||||
cover_file_name= '/doab/%s/cover' % (doab_id)
|
||||
|
||||
|
||||
cover_file_name = '/doab/%s/cover' % (doab_id)
|
||||
|
||||
# if we don't want to redo and the cover exists, return the URL of the cover
|
||||
|
||||
|
||||
if not redo and default_storage.exists(cover_file_name):
|
||||
return (default_storage.url(cover_file_name), False)
|
||||
|
||||
|
||||
# download cover image to cover_file
|
||||
url = "http://www.doabooks.org/doab?func=cover&rid={0}".format(doab_id)
|
||||
try:
|
||||
|
@ -44,29 +56,31 @@ def store_doab_cover(doab_id, redo=False):
|
|||
if r.status_code == 302:
|
||||
redirurl = r.headers['Location']
|
||||
if redirurl.startswith(u'ftp'):
|
||||
springerftp = springercover.match(redirurl)
|
||||
springerftp = SPRINGER_COVER.match(redirurl)
|
||||
if springerftp:
|
||||
redirurl = u'https://images.springer.com/sgw/books/medium/{}.jpg'.format(springerftp.groups(1))
|
||||
redirurl = SPRINGER_IMAGE.format(springerftp.groups(1))
|
||||
r = requests.get(redirurl)
|
||||
else:
|
||||
r = requests.get(url)
|
||||
else:
|
||||
r = requests.get(url)
|
||||
r = requests.get(url)
|
||||
cover_file = ContentFile(r.content)
|
||||
cover_file.content_type = r.headers.get('content-type', '')
|
||||
|
||||
path = default_storage.save(cover_file_name, cover_file)
|
||||
default_storage.save(cover_file_name, cover_file)
|
||||
return (default_storage.url(cover_file_name), True)
|
||||
except Exception, e:
|
||||
# if there is a problem, return None for cover URL
|
||||
logger.warning('Failed to make cover image for doab_id={}: {}'.format(doab_id, e))
|
||||
return (None, False)
|
||||
|
||||
def update_cover_doab(doab_id, edition, store_cover=True):
|
||||
def update_cover_doab(doab_id, edition, store_cover=True, redo=True):
|
||||
"""
|
||||
update the cover url for work with doab_id
|
||||
if store_cover is True, use the cover from our own storage
|
||||
"""
|
||||
if store_cover:
|
||||
(cover_url, new_cover) = store_doab_cover(doab_id)
|
||||
(cover_url, new_cover) = store_doab_cover(doab_id, redo=redo)
|
||||
else:
|
||||
cover_url = "http://www.doabooks.org/doab?func=cover&rid={0}".format(doab_id)
|
||||
|
||||
|
@ -74,131 +88,133 @@ def update_cover_doab(doab_id, edition, store_cover=True):
|
|||
edition.cover_image = cover_url
|
||||
edition.save()
|
||||
return cover_url
|
||||
else:
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
def attach_more_doab_metadata(edition, description, subjects,
|
||||
publication_date, publisher_name=None, language=None, authors=u''):
|
||||
|
||||
|
||||
"""
|
||||
for given edition, attach description, subjects, publication date to
|
||||
corresponding Edition and Work
|
||||
"""
|
||||
# if edition doesn't have a publication date, update it
|
||||
# if edition doesn't have a publication date, update it
|
||||
if not edition.publication_date:
|
||||
edition.publication_date = publication_date
|
||||
|
||||
|
||||
# if edition.publisher_name is empty, set it
|
||||
if not edition.publisher_name:
|
||||
edition.set_publisher(publisher_name)
|
||||
|
||||
|
||||
edition.save()
|
||||
|
||||
|
||||
# attach description to work if it's not empty
|
||||
work = edition.work
|
||||
if not work.description:
|
||||
work.description = description
|
||||
|
||||
|
||||
# update subjects
|
||||
for s in subjects:
|
||||
if valid_subject(s):
|
||||
models.Subject.set_by_name(s, work=work)
|
||||
|
||||
|
||||
# set reading level of work if it's empty; doab is for adults.
|
||||
if not work.age_level:
|
||||
work.age_level = '18-'
|
||||
|
||||
if language:
|
||||
|
||||
if language and language != 'xx':
|
||||
work.language = language
|
||||
work.save()
|
||||
|
||||
|
||||
if authors and authors == authors: # test for authors != NaN
|
||||
authlist = creator_list(authors)
|
||||
if edition.authors.all().count() < len(authlist):
|
||||
edition.authors.clear()
|
||||
if authlist is not None:
|
||||
for [rel,auth] in authlist:
|
||||
for [rel, auth] in authlist:
|
||||
edition.add_author(auth, rel)
|
||||
|
||||
|
||||
return edition
|
||||
|
||||
def add_all_isbns(isbns, work, language=None, title=None):
|
||||
first_edition = None
|
||||
for isbn in isbns:
|
||||
first_edition = None
|
||||
edition = bookloader.add_by_isbn(isbn, work, language=language, title=title)
|
||||
if edition:
|
||||
first_edition = first_edition if first_edition else edition
|
||||
if work and (edition.work_id != work.id):
|
||||
first_edition = first_edition if first_edition else edition
|
||||
if work and (edition.work_id != work.id):
|
||||
if work.created < edition.work.created:
|
||||
work = merge_works(work, edition.work)
|
||||
else:
|
||||
work = merge_works(edition.work, work)
|
||||
else:
|
||||
work = edition.work
|
||||
return first_edition
|
||||
return work, first_edition
|
||||
|
||||
def load_doab_edition(title, doab_id, url, format, rights,
|
||||
language, isbns,
|
||||
provider, **kwargs):
|
||||
|
||||
|
||||
"""
|
||||
load a record from doabooks.org represented by input parameters and return an ebook
|
||||
"""
|
||||
logger.info('load doab {} {} {} {} {}'.format(doab_id, format, rights, language, provider))
|
||||
if language and isinstance(language, list):
|
||||
language = language[0]
|
||||
|
||||
if language == 'xx' and format == 'online':
|
||||
language = scrape_language(url)
|
||||
# check to see whether the Edition hasn't already been loaded first
|
||||
# search by url
|
||||
ebooks = models.Ebook.objects.filter(url=url)
|
||||
|
||||
|
||||
# 1 match
|
||||
# > 1 matches
|
||||
# 0 match
|
||||
|
||||
# simplest case -- if match (1 or more), we could check whether any
|
||||
# ebook.edition.work has a doab id matching given doab_id
|
||||
|
||||
|
||||
# put a migration to force Ebook.url to be unique id
|
||||
|
||||
|
||||
# if yes, then return one of the Edition(s) whose work is doab_id
|
||||
# if no, then
|
||||
# if no, then
|
||||
ebook = None
|
||||
if len(ebooks) > 1:
|
||||
raise Exception("There is more than one Ebook matching url {0}".format(url))
|
||||
elif len(ebooks) == 1:
|
||||
raise Exception("There is more than one Ebook matching url {0}".format(url))
|
||||
elif len(ebooks) == 1:
|
||||
ebook = ebooks[0]
|
||||
doab_identifer = models.Identifier.get_or_add(type='doab',value=doab_id,
|
||||
work=ebook.edition.work)
|
||||
# update the cover id
|
||||
cover_url = update_cover_doab(doab_id, ebook.edition)
|
||||
doab_identifer = models.Identifier.get_or_add(type='doab', value=doab_id,
|
||||
work=ebook.edition.work)
|
||||
if not ebook.rights:
|
||||
ebook.rights = rights
|
||||
ebook.save()
|
||||
|
||||
# update the cover id
|
||||
cover_url = update_cover_doab(doab_id, ebook.edition, redo=False)
|
||||
|
||||
# attach more metadata
|
||||
attach_more_doab_metadata(ebook.edition,
|
||||
description=kwargs.get('description'),
|
||||
subjects=kwargs.get('subject'),
|
||||
publication_date=kwargs.get('date'),
|
||||
publisher_name=kwargs.get('publisher'),
|
||||
language=language,
|
||||
authors=kwargs.get('authors'),)
|
||||
attach_more_doab_metadata(
|
||||
ebook.edition,
|
||||
description=unlist(kwargs.get('description')),
|
||||
subjects=kwargs.get('subject'),
|
||||
publication_date=unlist(kwargs.get('date')),
|
||||
publisher_name=unlist(kwargs.get('publisher')),
|
||||
language=language,
|
||||
authors=kwargs.get('creator'),
|
||||
)
|
||||
# make sure all isbns are added
|
||||
add_all_isbns(isbns, None, language=language, title=title)
|
||||
return ebook
|
||||
|
||||
add_all_isbns(isbns, ebook.edition.work, language=language, title=title)
|
||||
return ebook.edition
|
||||
|
||||
# remaining case --> no ebook, load record, create ebook if there is one.
|
||||
assert len(ebooks) == 0
|
||||
|
||||
assert not ebooks
|
||||
|
||||
|
||||
# we need to find the right Edition/Work to tie Ebook to...
|
||||
|
||||
|
||||
# look for the Edition with which to associate ebook.
|
||||
# loop through the isbns to see whether we get one that is not None
|
||||
work = None
|
||||
edition = add_all_isbns(isbns, None, language=language, title=title)
|
||||
if edition:
|
||||
edition.refresh_from_db()
|
||||
work = edition.work
|
||||
|
||||
work, edition = add_all_isbns(isbns, None, language=language, title=title)
|
||||
if doab_id and not work:
|
||||
# make sure there's not already a doab_id
|
||||
idents = models.Identifier.objects.filter(type='doab', value=doab_id)
|
||||
|
@ -206,16 +222,17 @@ def load_doab_edition(title, doab_id, url, format, rights,
|
|||
edition = ident.work.preferred_edition
|
||||
work = edition.work
|
||||
break
|
||||
|
||||
if edition is not None:
|
||||
# if this is a new edition, then add related editions asynchronously
|
||||
if getattr(edition,'new', False):
|
||||
tasks.populate_edition.delay(edition.isbn_13)
|
||||
doab_identifer = models.Identifier.get_or_add(type='doab', value=doab_id,
|
||||
work=edition.work)
|
||||
|
||||
# we need to create Edition(s) de novo
|
||||
else:
|
||||
if edition is not None:
|
||||
# if this is a new edition, then add related editions SYNCHRONOUSLY
|
||||
if getattr(edition, 'new', False):
|
||||
tasks.populate_edition(edition.isbn_13)
|
||||
edition.refresh_from_db()
|
||||
doab_identifer = models.Identifier.get_or_add(type='doab', value=doab_id,
|
||||
work=edition.work)
|
||||
|
||||
# we need to create Edition(s) de novo
|
||||
else:
|
||||
# if there is a Work with doab_id already, attach any new Edition(s)
|
||||
try:
|
||||
work = models.Identifier.objects.get(type='doab', value=doab_id).work
|
||||
|
@ -226,11 +243,11 @@ def load_doab_edition(title, doab_id, url, format, rights,
|
|||
work = models.Work(language='xx', title=title, age_level='18-')
|
||||
work.save()
|
||||
doab_identifer = models.Identifier.get_or_add(type='doab', value=doab_id,
|
||||
work=work)
|
||||
|
||||
work=work)
|
||||
|
||||
# if work has any ebooks already, attach the ebook to the corresponding edition
|
||||
# otherwise pick the first one
|
||||
# pick the first edition as the one to tie ebook to
|
||||
# pick the first edition as the one to tie ebook to
|
||||
editions_with_ebooks = models.Edition.objects.filter(Q(work__id=work.id) & \
|
||||
Q(ebooks__isnull=False)).distinct()
|
||||
if editions_with_ebooks:
|
||||
|
@ -240,73 +257,41 @@ def load_doab_edition(title, doab_id, url, format, rights,
|
|||
else:
|
||||
edition = models.Edition(work=work, title=title)
|
||||
edition.save()
|
||||
|
||||
|
||||
# make the edition the selected_edition of the work
|
||||
work.selected_edition = edition
|
||||
work.save()
|
||||
|
||||
if format in ('pdf', 'epub', 'mobi'):
|
||||
|
||||
if format in ('pdf', 'epub', 'mobi', 'html', 'online') and rights:
|
||||
ebook = models.Ebook()
|
||||
ebook.format = format
|
||||
ebook.provider = provider
|
||||
ebook.url = url
|
||||
ebook.url = url
|
||||
ebook.rights = rights
|
||||
# tie the edition to ebook
|
||||
ebook.edition = edition
|
||||
if format == "online":
|
||||
ebook.active = False
|
||||
ebook.save()
|
||||
|
||||
|
||||
# update the cover id (could be done separately)
|
||||
cover_url = update_cover_doab(doab_id, edition)
|
||||
|
||||
cover_url = update_cover_doab(doab_id, edition, redo=False)
|
||||
|
||||
# attach more metadata
|
||||
attach_more_doab_metadata(edition,
|
||||
description=kwargs.get('description'),
|
||||
subjects=kwargs.get('subject'),
|
||||
publication_date=kwargs.get('date'),
|
||||
publisher_name=kwargs.get('publisher'),
|
||||
authors=kwargs.get('authors'),)
|
||||
return ebook
|
||||
attach_more_doab_metadata(
|
||||
edition,
|
||||
description=unlist(kwargs.get('description')),
|
||||
subjects=kwargs.get('subject'),
|
||||
publication_date=unlist(kwargs.get('date')),
|
||||
publisher_name=unlist(kwargs.get('publisher')),
|
||||
authors=kwargs.get('creator'),
|
||||
)
|
||||
return edition
|
||||
|
||||
|
||||
def load_doab_records(fname, limit=None):
|
||||
|
||||
success_count = 0
|
||||
ebook_count = 0
|
||||
|
||||
records = json.load(open(fname))
|
||||
|
||||
for (i, book) in enumerate(islice(records,limit)):
|
||||
d = dict(book)
|
||||
d['isbns'] = split_isbns(d['isbns_raw']) # use stricter isbn string parsing.
|
||||
try:
|
||||
ebook = load_doab_edition(**d)
|
||||
success_count += 1
|
||||
if ebook:
|
||||
ebook_count +=1
|
||||
except Exception, e:
|
||||
logger.error(e)
|
||||
logger.error(book)
|
||||
|
||||
logger.info("Number of records processed: " + str(success_count))
|
||||
logger.info("Number of ebooks processed: " + str(ebook_count))
|
||||
|
||||
"""
|
||||
#
|
||||
#tools to parse the author lists in doab.csv
|
||||
from pandas import DataFrame
|
||||
url = "http://www.doabooks.org/doab?func=csv"
|
||||
df_csv = DataFrame.from_csv(url)
|
||||
#
|
||||
|
||||
out=[]
|
||||
for val in df_csv.values:
|
||||
isbn = split_isbns(val[0])
|
||||
if isbn:
|
||||
auths = []
|
||||
if val[2] == val[2] and val[-2] == val[-2]: # test for NaN auths and licenses
|
||||
auths = creator_list(val[2])
|
||||
out.append(( isbn[0], auths))
|
||||
open("/Users/eric/doab_auths.json","w+").write(json.dumps(out,indent=2, separators=(',', ': ')))
|
||||
"""
|
||||
|
||||
au = re.compile(r'\(Authors?\)', flags=re.U)
|
||||
ed = re.compile(r'\([^\)]*(dir.|[Eeé]ds?.|org.|coord.|Editor|a cura di|archivist)[^\)]*\)', flags=re.U)
|
||||
tr = re.compile(r'\([^\)]*([Tt]rans.|tr.|translated by)[^\)]*\)', flags=re.U)
|
||||
|
@ -326,14 +311,14 @@ def fnf(auth):
|
|||
if len(parts) == 1:
|
||||
return parts[0].strip()
|
||||
elif len(parts) == 2:
|
||||
return u'{} {}'.format(parts[1].strip(),parts[0].strip())
|
||||
return u'{} {}'.format(parts[1].strip(), parts[0].strip())
|
||||
else:
|
||||
if parts[1].strip() in ('der','van', 'von', 'de', 'ter'):
|
||||
return u'{} {} {}'.format(parts[2].strip(),parts[1].strip(),parts[0].strip())
|
||||
if parts[1].strip() in ('der', 'van', 'von', 'de', 'ter'):
|
||||
return u'{} {} {}'.format(parts[2].strip(), parts[1].strip(), parts[0].strip())
|
||||
#print auth
|
||||
#print re.search(namelist,auth).group(0)
|
||||
return u'{} {}, {}'.format(parts[2].strip(),parts[0].strip(),parts[1].strip())
|
||||
|
||||
return u'{} {}, {}'.format(parts[2].strip(), parts[0].strip(), parts[1].strip())
|
||||
|
||||
|
||||
def creator(auth, editor=False):
|
||||
auth = auth.strip()
|
||||
|
@ -349,68 +334,100 @@ def creator(auth, editor=False):
|
|||
return [u'dsr', fnf(ds.sub(u'', auth))]
|
||||
if re.search(cm, auth):
|
||||
return [u'com', fnf(cm.sub(u'', auth))]
|
||||
|
||||
|
||||
auth = au.sub('', auth)
|
||||
return ['aut', fnf(auth)]
|
||||
|
||||
def split_auths(auths):
|
||||
if ';' in auths or '/' in auths:
|
||||
return namesep2.split(auths)
|
||||
else:
|
||||
nl = namelist.match(auths.strip())
|
||||
if nl:
|
||||
if nl.group(3).endswith(' de') \
|
||||
or ' de ' in nl.group(3) \
|
||||
or nl.group(3).endswith(' da') \
|
||||
or nl.group(1).endswith(' Jr.') \
|
||||
or ' e ' in nl.group(1):
|
||||
return [auths]
|
||||
else:
|
||||
return namesep.split(auths)
|
||||
else :
|
||||
return [auths]
|
||||
|
||||
def split_isbns(isbns):
|
||||
result = []
|
||||
for isbn in isbnsep.split(isbns):
|
||||
isbn = ISBN(isbn)
|
||||
if isbn.valid:
|
||||
result.append(isbn.to_string())
|
||||
return result
|
||||
|
||||
def creator_list(creators):
|
||||
auths = []
|
||||
if re.search(edlist, creators):
|
||||
for auth in split_auths(edlist.sub(u'', creators)):
|
||||
if auth:
|
||||
auths.append(creator(auth, editor=True))
|
||||
else:
|
||||
for auth in split_auths(unicode(creators)):
|
||||
if auth:
|
||||
auths.append(creator(auth))
|
||||
for auth in creators:
|
||||
auths.append(creator(auth))
|
||||
return auths
|
||||
|
||||
def load_doab_auths(fname, limit=None):
|
||||
doab_auths = json.load(open(fname))
|
||||
recnum = 0
|
||||
failed = 0
|
||||
for [isbnraw, authlist] in doab_auths:
|
||||
isbn = ISBN(isbnraw).to_string()
|
||||
try:
|
||||
work = models.Identifier.objects.get(type='isbn',value=isbn).work
|
||||
except models.Identifier.DoesNotExist:
|
||||
print 'isbn = {} not found'.format(isbnraw)
|
||||
failed += 1
|
||||
if work.preferred_edition.authors.all().count() < len(authlist):
|
||||
work.preferred_edition.authors.clear()
|
||||
if authlist is None:
|
||||
print "null authlist; isbn={}".format(isbn)
|
||||
DOAB_OAIURL = 'https://www.doabooks.org/oai'
|
||||
DOAB_PATT = re.compile(r'[\./]doabooks\.org/doab\?.*rid:(\d{1,8}).*')
|
||||
mdregistry = MetadataRegistry()
|
||||
mdregistry.registerReader('oai_dc', oai_dc_reader)
|
||||
doab_client = Client(DOAB_OAIURL, mdregistry)
|
||||
isbn_cleaner = identifier_cleaner('isbn', quiet=True)
|
||||
ISBNSEP = re.compile(r'[/]+')
|
||||
|
||||
def add_by_doab(doab_id, record=None):
|
||||
try:
|
||||
record = record if record else doab_client.getRecord(
|
||||
metadataPrefix='oai_dc',
|
||||
identifier='oai:doab-books:{}'.format(doab_id)
|
||||
)
|
||||
metadata = record[1].getMap()
|
||||
isbns = []
|
||||
url = None
|
||||
for ident in metadata.pop('identifier', []):
|
||||
if ident.startswith('ISBN: '):
|
||||
isbn_strings = ISBNSEP.split(ident[6:].strip())
|
||||
for isbn_string in isbn_strings:
|
||||
isbn = isbn_cleaner(isbn_string)
|
||||
if isbn:
|
||||
isbns.append(isbn)
|
||||
elif ident.find('doabooks.org') >= 0:
|
||||
# should already know the doab_id
|
||||
continue
|
||||
for [rel,auth] in authlist:
|
||||
work.preferred_edition.add_author(auth, rel)
|
||||
recnum +=1
|
||||
if limit and recnum > limit:
|
||||
break
|
||||
logger.info("Number of records processed: " + str(recnum))
|
||||
logger.info("Number of missing isbns: " + str(failed))
|
||||
|
||||
else:
|
||||
url = ident
|
||||
language = doab_lang_to_iso_639_1(unlist(metadata.pop('language', None)))
|
||||
urls = online_to_download(url)
|
||||
edition = None
|
||||
title = unlist(metadata.pop('title', None))
|
||||
license = cc.license_from_cc_url(unlist(metadata.pop('rights', None)))
|
||||
for dl_url in urls:
|
||||
format = type_for_url(dl_url)
|
||||
if 'format' in metadata:
|
||||
del metadata['format']
|
||||
edition = load_doab_edition(
|
||||
title,
|
||||
doab_id,
|
||||
dl_url,
|
||||
format,
|
||||
license,
|
||||
language,
|
||||
isbns,
|
||||
url_to_provider(dl_url) if dl_url else None,
|
||||
**metadata
|
||||
)
|
||||
return edition
|
||||
except IdDoesNotExistError:
|
||||
return None
|
||||
|
||||
|
||||
def getdoab(url):
|
||||
id_match = DOAB_PATT.search(url)
|
||||
if id_match:
|
||||
return id_match.group(1)
|
||||
return False
|
||||
|
||||
def load_doab_oai(from_year=None, limit=100000):
|
||||
'''
|
||||
use oai feed to get oai updates
|
||||
'''
|
||||
if from_year:
|
||||
from_ = datetime.datetime(year=from_year, month=1, day=1)
|
||||
else:
|
||||
# last 45 days
|
||||
from_ = datetime.datetime.now() - datetime.timedelta(days=45)
|
||||
doab_ids = []
|
||||
for record in doab_client.listRecords(metadataPrefix='oai_dc', from_=from_):
|
||||
if not record[1]:
|
||||
continue
|
||||
item_type = unlist(record[1].getMap().get('type', None))
|
||||
if item_type != 'book':
|
||||
continue
|
||||
idents = record[1].getMap()['identifier']
|
||||
if idents:
|
||||
for ident in idents:
|
||||
doab = getdoab(ident)
|
||||
if doab:
|
||||
doab_ids.append(doab)
|
||||
e = add_by_doab(doab, record=record)
|
||||
title = e.title if e else None
|
||||
logger.info(u'updated:\t{}\t{}'.format(doab, title))
|
||||
if len(doab_ids) > limit:
|
||||
break
|
||||
|
|
|
@ -0,0 +1,128 @@
|
|||
"""
|
||||
doab_utils.py
|
||||
|
||||
"""
|
||||
|
||||
import re
|
||||
import urlparse
|
||||
|
||||
import requests
|
||||
|
||||
from regluit.utils.lang import get_language_code
|
||||
from .utils import get_soup
|
||||
|
||||
# utility functions for converting lists of individual items into individual items
|
||||
|
||||
# let's do a mapping of the DOAB languages into the language codes used
|
||||
# mostly, we just handle mispellings
|
||||
# also null -> xx
|
||||
|
||||
EXTRA_LANG_MAP = dict([
|
||||
(u'chinese', 'de'),
|
||||
(u'deutsch', 'de'),
|
||||
(u'eng', 'en'),
|
||||
(u'englilsh', 'en'),
|
||||
(u'englilsh', 'en'),
|
||||
(u'englisch', 'en'),
|
||||
(u'espanol', 'es'),
|
||||
(u'ger', 'de'),
|
||||
(u'fra', 'fr'),
|
||||
(u'fre', 'fr'),
|
||||
(u'francese', 'fr'),
|
||||
(u'ita', 'it'),
|
||||
(u'italiano', 'it'),
|
||||
(u'norwegian', 'no'),
|
||||
(u'por', 'pt'),
|
||||
(u'portugese', 'pt'),
|
||||
(u'slovene', 'sl'),
|
||||
(u'spa', 'es'),
|
||||
(u'spagnolo', 'es'),
|
||||
])
|
||||
|
||||
sep = re.compile(r'[ \-;^,/]+')
|
||||
def doab_lang_to_iso_639_1(lang):
|
||||
if lang is None or not lang:
|
||||
return "xx"
|
||||
else:
|
||||
lang = sep.split(lang)[0]
|
||||
code = get_language_code(lang)
|
||||
if code:
|
||||
return code
|
||||
else:
|
||||
return EXTRA_LANG_MAP.get(lang.lower(), 'xx')
|
||||
|
||||
|
||||
DOMAIN_TO_PROVIDER = dict([
|
||||
[u'antropologie.zcu.cz', u'AntropoWeb'],
|
||||
[u'books.mdpi.com', u'MDPI Books'],
|
||||
[u'books.openedition.org', u'OpenEdition Books'],
|
||||
[u'books.scielo.org', u'SciELO'],
|
||||
[u'ccdigitalpress.org', u'Computers and Composition Digital Press'],
|
||||
[u'digitalcommons.usu.edu', u'DigitalCommons, Utah State University'],
|
||||
[u'dl.dropboxusercontent.com', u'Dropbox'],
|
||||
[u'dspace.ucalgary.ca', u'Institutional Repository at the University of Calgary'],
|
||||
[u'dx.doi.org', u'DOI Resolver'],
|
||||
[u'ebooks.iospress.nl', u'IOS Press Ebooks'],
|
||||
[u'hdl.handle.net', u'Handle Proxy'],
|
||||
[u'hw.oeaw.ac.at', u'Austrian Academy of Sciences'],
|
||||
[u'img.mdpi.org', u'MDPI Books'],
|
||||
[u'ledibooks.com', u'LediBooks'],
|
||||
[u'leo.cilea.it', u'LEO '],
|
||||
[u'leo.cineca.it', u'Letteratura Elettronica Online'],
|
||||
[u'link.springer.com', u'Springer'],
|
||||
[u'oapen.org', u'OAPEN Library'],
|
||||
[u'press.openedition.org', u'OpenEdition Press'],
|
||||
[u'windsor.scholarsportal.info', u'Scholars Portal'],
|
||||
[u'www.adelaide.edu.au', u'University of Adelaide'],
|
||||
[u'www.aliprandi.org', u'Simone Aliprandi'],
|
||||
[u'www.antilia.to.it', u'antilia.to.it'],
|
||||
[u'www.aupress.ca', u'Athabasca University Press'],
|
||||
[u'www.bloomsburyacademic.com', u'Bloomsbury Academic'],
|
||||
[u'www.co-action.net', u'Co-Action Publishing'],
|
||||
[u'www.degruyter.com', u'De Gruyter Online'],
|
||||
[u'www.doabooks.org', u'Directory of Open Access Books'],
|
||||
[u'www.dropbox.com', u'Dropbox'],
|
||||
[u'www.ebooks.iospress.nl', u'IOS Press Ebooks'],
|
||||
[u'www.ledizioni.it', u'Ledizioni'],
|
||||
[u'www.maestrantonella.it', u'maestrantonella.it'],
|
||||
[u'www.oapen.org', u'OAPEN Library'],
|
||||
[u'www.openbookpublishers.com', u'Open Book Publishers'],
|
||||
[u'www.palgraveconnect.com', u'Palgrave Connect'],
|
||||
[u'www.scribd.com', u'Scribd'],
|
||||
[u'www.springerlink.com', u'Springer'],
|
||||
[u'www.ubiquitypress.com', u'Ubiquity Press'],
|
||||
[u'www.unimib.it', u'University of Milano-Bicocca'],
|
||||
[u'www.unito.it', u"University of Turin"],
|
||||
])
|
||||
|
||||
def url_to_provider(url):
|
||||
netloc = urlparse.urlparse(url).netloc
|
||||
return DOMAIN_TO_PROVIDER.get(netloc, netloc)
|
||||
|
||||
FRONTIERSIN = re.compile(r'frontiersin.org/books/[^/]+/(\d+)')
|
||||
|
||||
def online_to_download(url):
|
||||
urls = []
|
||||
if url.find(u'mdpi.com/books/pdfview/book/') >= 0:
|
||||
doc = get_soup(url)
|
||||
if doc:
|
||||
obj = doc.find('object', type='application/pdf')
|
||||
if obj:
|
||||
urls.append(obj['data'].split('#')[0])
|
||||
elif url.find(u'books.scielo.org/') >= 0:
|
||||
doc = get_soup(url)
|
||||
if doc:
|
||||
obj = doc.find('a', class_='pdf_file')
|
||||
if obj:
|
||||
urls.append(urlparse.urljoin(url, obj['href']))
|
||||
obj = doc.find('a', class_='epub_file')
|
||||
if obj:
|
||||
urls.append(urlparse.urljoin(url, obj['href']))
|
||||
elif FRONTIERSIN.search(url):
|
||||
booknum = FRONTIERSIN.search(url).group(1)
|
||||
urls.append(u'https://www.frontiersin.org/GetFile.aspx?ebook={}&fileformat=EPUB'.format(booknum))
|
||||
urls.append(u'https://www.frontiersin.org/GetFile.aspx?ebook={}&fileformat=PDF'.format(booknum))
|
||||
else:
|
||||
urls.append(url)
|
||||
return urls
|
||||
|
|
@ -26,38 +26,54 @@ class HathitrustScraper(BaseScraper):
|
|||
for record in records:
|
||||
self.record = record
|
||||
return
|
||||
self.record = {}
|
||||
|
||||
self.record = None # probably a hdl not pointing at Hathitrust
|
||||
self.record = None
|
||||
|
||||
def get_downloads(self):
|
||||
dl_a = self.doc.select_one('#fullPdfLink')
|
||||
value = dl_a['href'] if dl_a else None
|
||||
if value:
|
||||
self.set(
|
||||
'download_url_{}'.format('pdf'),
|
||||
'https://babel.hathitrust.org{}'.format(value)
|
||||
)
|
||||
if self.record:
|
||||
dl_a = self.doc.select_one('#fullPdfLink')
|
||||
value = dl_a['href'] if dl_a else None
|
||||
if value:
|
||||
self.set(
|
||||
'download_url_{}'.format('pdf'),
|
||||
'https://babel.hathitrust.org{}'.format(value)
|
||||
)
|
||||
return super(HathitrustScraper, self).get_downloads()
|
||||
|
||||
def get_isbns(self):
|
||||
isbn = self.record.get('issn', [])
|
||||
value = identifier_cleaner('isbn', quiet=True)(isbn)
|
||||
return {'print': value} if value else {}
|
||||
if self.record:
|
||||
isbn = self.record.get('issn', [])
|
||||
value = identifier_cleaner('isbn', quiet=True)(isbn)
|
||||
return {'print': value} if value else {}
|
||||
return super(HathitrustScraper, self).get_isbns()
|
||||
|
||||
def get_title(self):
|
||||
self.set('title', self.record.get('title', ''))
|
||||
if self.record:
|
||||
self.set('title', self.record.get('title', ''))
|
||||
return super(HathitrustScraper, self).get_title()
|
||||
|
||||
def get_keywords(self):
|
||||
self.set('subjects', self.record.get('keywords', []))
|
||||
if self.record:
|
||||
self.set('subjects', self.record.get('keywords', []))
|
||||
return super(HathitrustScraper, self).get_keywords()
|
||||
|
||||
def get_publisher(self):
|
||||
self.set('publisher', self.record.get('publisher', ''))
|
||||
if self.record:
|
||||
self.set('publisher', self.record.get('publisher', ''))
|
||||
return super(HathitrustScraper, self).get_publisher()
|
||||
|
||||
def get_pubdate(self):
|
||||
self.set('publication_date', self.record.get('year', ''))
|
||||
if self.record:
|
||||
self.set('publication_date', self.record.get('year', ''))
|
||||
return super(HathitrustScraper, self).get_pubdate()
|
||||
|
||||
def get_description(self):
|
||||
notes = self.record.get('notes', [])
|
||||
self.set('description', '\r'.join(notes))
|
||||
if self.record:
|
||||
notes = self.record.get('notes', [])
|
||||
self.set('description', '\r'.join(notes))
|
||||
return super(HathitrustScraper, self).get_description()
|
||||
|
||||
def get_genre(self):
|
||||
self.set('genre', self.record.get('type_of_reference', '').lower())
|
||||
if self.record:
|
||||
self.set('genre', self.record.get('type_of_reference', '').lower())
|
||||
return super(HathitrustScraper, self).get_genre()
|
||||
|
|
|
@ -110,15 +110,19 @@ class SpringerScraper(BaseScraper):
|
|||
self.set('publisher', 'Springer')
|
||||
|
||||
search_url = 'https://link.springer.com/search/page/{}?facet-content-type=%22Book%22&package=openaccess'
|
||||
def load_springer(num_pages):
|
||||
def springer_open_books(num_pages):
|
||||
for page in range(1, num_pages+1):
|
||||
def load_springer(startpage=1, endpage=None):
|
||||
def springer_open_books(startpage, endpage):
|
||||
endpage = endpage if endpage else startpage + 10
|
||||
for page in range(startpage, endpage + 1):
|
||||
url = search_url.format(page)
|
||||
response = requests.get(url, headers={"User-Agent": settings.USER_AGENT})
|
||||
if response.status_code == 200:
|
||||
base = response.url
|
||||
doc = BeautifulSoup(response.content, 'lxml')
|
||||
for link in doc.select('a.title'):
|
||||
book_url = urljoin(base, link['href'])
|
||||
yield SpringerScraper(book_url)
|
||||
return add_from_bookdatas(springer_open_books(num_pages))
|
||||
try:
|
||||
response = requests.get(url, headers={"User-Agent": settings.USER_AGENT})
|
||||
if response.status_code == 200:
|
||||
base = response.url
|
||||
doc = BeautifulSoup(response.content, 'lxml')
|
||||
for link in doc.select('a.title'):
|
||||
book_url = urljoin(base, link['href'])
|
||||
yield SpringerScraper(book_url)
|
||||
except requests.exceptions.ConnectionError:
|
||||
print 'couldn\'t connect to %s' % url
|
||||
return add_from_bookdatas(springer_open_books(startpage, endpage))
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
from django.conf import settings
|
||||
from django.test import TestCase
|
||||
from regluit.core.models import Ebook, Edition, Work
|
||||
from .utils import dl_online
|
||||
|
||||
class LoaderTests(TestCase):
|
||||
def setUp(self):
|
||||
pass
|
||||
|
||||
def test_downloads(self):
|
||||
if not (settings.TEST_INTEGRATION):
|
||||
return
|
||||
|
||||
work = Work(title="online work")
|
||||
work.save()
|
||||
|
||||
edition = Edition(work=work)
|
||||
edition.save()
|
||||
|
||||
dropbox_url = 'https://www.dropbox.com/s/h5jzpb4vknk8n7w/Jakobsson_The_Troll_Inside_You_EBook.pdf?dl=0'
|
||||
dropbox_ebook = Ebook.objects.create(format='online', url=dropbox_url, edition=edition)
|
||||
dropbox_ebf = dl_online(dropbox_ebook)
|
||||
self.assertTrue(dropbox_ebf.ebook.filesize)
|
||||
|
||||
jbe_url = 'http://www.jbe-platform.com/content/books/9789027295958'
|
||||
jbe_ebook = Ebook.objects.create(format='online', url=jbe_url, edition=edition)
|
||||
jbe_ebf = dl_online(jbe_ebook)
|
||||
self.assertTrue(jbe_ebf.ebook.filesize)
|
|
@ -1,15 +1,23 @@
|
|||
import csv
|
||||
import re
|
||||
import requests
|
||||
import logging
|
||||
import sys
|
||||
import re
|
||||
import time
|
||||
import unicodedata
|
||||
import urlparse
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.files.base import ContentFile
|
||||
|
||||
from regluit.core.models import Work, Edition, Author, PublisherName, Identifier, Subject
|
||||
from regluit.core.isbn import ISBN
|
||||
from regluit.core.bookloader import add_by_isbn_from_google, merge_works
|
||||
from regluit.api.crosswalks import inv_relator_contrib
|
||||
from regluit.bisac.models import BisacHeading
|
||||
from regluit.core.bookloader import add_by_isbn_from_google, merge_works
|
||||
from regluit.core.isbn import ISBN
|
||||
from regluit.core.models import (
|
||||
Ebook, EbookFile, Edition, Identifier, path_for_file, Subject, Work,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -22,7 +30,7 @@ def utf8_general_ci_norm(s):
|
|||
"""
|
||||
Normalize a la MySQL utf8_general_ci collation
|
||||
(As of 2016.05.24, we're using the utf8_general_ci collation for author names)
|
||||
|
||||
|
||||
https://stackoverflow.com/questions/1036454/what-are-the-diffrences-between-utf8-general-ci-and-utf8-unicode-ci/1036459#1036459
|
||||
|
||||
* converts to Unicode normalization form D for canonical decomposition
|
||||
|
@ -34,79 +42,84 @@ def utf8_general_ci_norm(s):
|
|||
s1 = unicodedata.normalize('NFD', s)
|
||||
return ''.join(c for c in s1 if not unicodedata.combining(c)).upper()
|
||||
|
||||
def get_soup(url):
|
||||
response = requests.get(url, headers={"User-Agent": settings.USER_AGENT})
|
||||
if response.status_code == 200:
|
||||
return BeautifulSoup(response.content, 'lxml')
|
||||
return None
|
||||
|
||||
def get_authors(book):
|
||||
authors=[]
|
||||
if book.get('AuthorsList',''):
|
||||
authors = []
|
||||
if book.get('AuthorsList', ''):
|
||||
#UMich
|
||||
for i in range(1,3):
|
||||
fname=u'Author{}First'.format(i)
|
||||
lname=u'Author{}Last'.format(i)
|
||||
role=u'Author{}Role'.format(i)
|
||||
authname = u'{} {}'.format(book[fname],book[lname])
|
||||
for i in range(1, 3):
|
||||
fname = u'Author{}First'.format(i)
|
||||
lname = u'Author{}Last'.format(i)
|
||||
role = u'Author{}Role'.format(i)
|
||||
authname = u'{} {}'.format(book[fname], book[lname])
|
||||
if authname != u' ':
|
||||
role = book[role] if book[role].strip() else 'A01'
|
||||
authors.append((authname,role))
|
||||
authors.append((authname, role))
|
||||
else:
|
||||
break
|
||||
authlist = book["AuthorsList"].replace(' and ', ', ').split(', ')
|
||||
if len(authlist)>3:
|
||||
if len(authlist) > 3:
|
||||
for authname in authlist[3:]:
|
||||
authors.append((authname, 'A01'))
|
||||
else:
|
||||
#OBP
|
||||
for i in range(1,6):
|
||||
fname= book.get(u'Contributor {} first name'.format(i), '')
|
||||
lname= book.get(u'Contributor {} surname'.format(i), '')
|
||||
role= book.get(u'ONIX Role Code (List 17){}'.format(i), '')
|
||||
authname = u'{} {}'.format(fname,lname)
|
||||
for i in range(1, 6):
|
||||
fname = book.get(u'Contributor {} first name'.format(i), '')
|
||||
lname = book.get(u'Contributor {} surname'.format(i), '')
|
||||
role = book.get(u'ONIX Role Code (List 17){}'.format(i), '')
|
||||
authname = u'{} {}'.format(fname, lname)
|
||||
if authname != u' ':
|
||||
role = role if role.strip() else 'A01'
|
||||
authors.append((authname,role))
|
||||
authors.append((authname, role))
|
||||
else:
|
||||
break
|
||||
return authors
|
||||
|
||||
def get_subjects(book):
|
||||
subjects=[]
|
||||
for i in range(1,5):
|
||||
subjects = []
|
||||
for i in range(1, 5):
|
||||
key = u'BISACCode{}'.format(i) #UMich dialect
|
||||
key2 = u'BISAC subject code {}'.format(i) #OBP dialect
|
||||
code = book.get(key,'')
|
||||
code = code if code else book.get(key2,'')
|
||||
code = book.get(key, '')
|
||||
code = code if code else book.get(key2, '')
|
||||
if code != '':
|
||||
try:
|
||||
bisac=BisacHeading.objects.get(notation=code)
|
||||
bisac = BisacHeading.objects.get(notation=code)
|
||||
subjects.append(bisac)
|
||||
except BisacHeading.DoesNotExist:
|
||||
logger.warning( "Please add BISAC {}".format(code))
|
||||
logger.warning("Please add BISAC {}".format(code))
|
||||
return subjects
|
||||
|
||||
def add_subject(subject_name, work, authority=''):
|
||||
try:
|
||||
subject= Subject.objects.get(name=subject_name)
|
||||
subject = Subject.objects.get(name=subject_name)
|
||||
except Subject.DoesNotExist:
|
||||
subject=Subject.objects.create(name=subject_name, authority=authority)
|
||||
subject = Subject.objects.create(name=subject_name, authority=authority)
|
||||
subject.works.add(work)
|
||||
|
||||
def get_title(book):
|
||||
title = book.get('FullTitle','') #UMICH
|
||||
title = book.get('FullTitle', '') #UMICH
|
||||
if title:
|
||||
return title
|
||||
title = book.get('Title','') #OBP
|
||||
sub = book.get('Subtitle','')
|
||||
title = book.get('Title', '') #OBP
|
||||
sub = book.get('Subtitle', '')
|
||||
if sub:
|
||||
return u'{}: {}'.format(title,sub)
|
||||
else:
|
||||
return title
|
||||
|
||||
return u'{}: {}'.format(title, sub)
|
||||
return title
|
||||
|
||||
def get_cover(book):
|
||||
cover_url = book.get('Cover URL','') #OBP
|
||||
cover_url = book.get('Cover URL', '') #OBP
|
||||
if cover_url:
|
||||
return cover_url
|
||||
url = book['URL']
|
||||
if "10.3998" in url:
|
||||
# code for umich books; can generalize, of course!
|
||||
idmatch= re.search( r'([^/]+)\.(\d+\.\d+\.\d+)', url)
|
||||
idmatch = re.search(r'([^/]+)\.(\d+\.\d+\.\d+)', url)
|
||||
if idmatch:
|
||||
book_id = idmatch.group(2)
|
||||
if idmatch.group(1) == 'ohp':
|
||||
|
@ -116,74 +129,78 @@ def get_cover(book):
|
|||
else:
|
||||
cover_url = "http://quod.lib.umich.edu/d/dculture/images/{}.jpg".format(book_id)
|
||||
cover = requests.head(cover_url)
|
||||
if cover.status_code<400:
|
||||
if cover.status_code < 400:
|
||||
return cover_url
|
||||
else:
|
||||
logger.warning( "bad cover: {} for: {}".format(cover_url, url))
|
||||
|
||||
logger.warning("bad cover: {} for: {}".format(cover_url, url))
|
||||
|
||||
def get_isbns(book):
|
||||
isbns = []
|
||||
edition = None
|
||||
#'ISBN 1' is OBP, others are UMICH
|
||||
for code in ['eISBN', 'ISBN 3','PaperISBN', 'ISBN 2', 'ClothISBN', 'ISBN 1', 'ISBN 4', 'ISBN 5']:
|
||||
if book.get(code, '') not in ('','N/A'):
|
||||
for code in ['eISBN', 'ISBN 3', 'PaperISBN', 'ISBN 2', 'ClothISBN',
|
||||
'ISBN 1', 'ISBN 4', 'ISBN 5'
|
||||
]:
|
||||
if book.get(code, '') not in ('', 'N/A'):
|
||||
values = book[code].split(',')
|
||||
for value in values:
|
||||
isbn = ISBN(value).to_string()
|
||||
if isbn:
|
||||
isbns.append(isbn)
|
||||
for isbn in isbns :
|
||||
for isbn in isbns:
|
||||
if not edition:
|
||||
edition = Edition.get_by_isbn(isbn)
|
||||
return (isbns, edition )
|
||||
return (isbns, edition)
|
||||
|
||||
def get_pubdate(book):
|
||||
value = book.get('CopyrightYear','') #UMICH
|
||||
value = book.get('CopyrightYear', '') #UMICH
|
||||
if value:
|
||||
return value
|
||||
value = book.get('publication year','') #OBP
|
||||
sub = book.get('publication month','')
|
||||
sub2 = book.get('publication day','')
|
||||
value = book.get('publication year', '') #OBP
|
||||
sub = book.get('publication month', '')
|
||||
sub2 = book.get('publication day', '')
|
||||
if sub2:
|
||||
return u'{}-{}-{}'.format(value,sub,sub2)
|
||||
return u'{}-{}-{}'.format(value, sub, sub2)
|
||||
elif sub:
|
||||
return u'{}-{}'.format(value,sub,sub2)
|
||||
else:
|
||||
return value
|
||||
|
||||
return u'{}-{}'.format(value, sub, sub2)
|
||||
return value
|
||||
|
||||
def get_publisher(book):
|
||||
value = book.get('Publisher','')
|
||||
value = book.get('Publisher', '')
|
||||
if value:
|
||||
return value
|
||||
if book.get('DOI prefix','')=='10.11647':
|
||||
if book.get('DOI prefix', '') == '10.11647':
|
||||
return "Open Book Publishers"
|
||||
|
||||
|
||||
def get_url(book):
|
||||
url = book.get('URL','')
|
||||
url = url if url else u'https://doi.org/{}/{}'.format( book.get('DOI prefix',''),book.get('DOI suffix',''))
|
||||
url = book.get('URL', '')
|
||||
url = url if url else u'https://doi.org/{}/{}'.format(
|
||||
book.get('DOI prefix', ''),
|
||||
book.get('DOI suffix', '')
|
||||
)
|
||||
return url
|
||||
|
||||
def get_description(book):
|
||||
value = book.get('DescriptionBrief','')
|
||||
value = value if value else book.get('Plain Text Blurb','')
|
||||
value = book.get('DescriptionBrief', '')
|
||||
value = value if value else book.get('Plain Text Blurb', '')
|
||||
return value
|
||||
|
||||
def get_language(book):
|
||||
value = book.get('ISO Language Code','')
|
||||
value = book.get('ISO Language Code', '')
|
||||
return value
|
||||
|
||||
|
||||
|
||||
def load_from_books(books):
|
||||
''' books is an iterator of book dicts.
|
||||
each book must have attributes
|
||||
(umich dialect)
|
||||
eISBN, ClothISBN, PaperISBN, Publisher, FullTitle, Title, Subtitle, AuthorsList,
|
||||
Author1Last, Author1First, Author1Role, Author2Last, Author2First, Author2Role, Author3Last,
|
||||
Author3First, Author3Role, AuthorBio, TableOfContents, Excerpt, DescriptionLong,
|
||||
DescriptionBrief, BISACCode1, BISACCode2, BISACCode3, CopyrightYear, ePublicationDate,
|
||||
eListPrice, ListPriceCurrencyType, List Price in USD (paper ISBN), eTerritoryRights,
|
||||
eISBN, ClothISBN, PaperISBN, Publisher, FullTitle, Title, Subtitle, AuthorsList,
|
||||
Author1Last, Author1First, Author1Role, Author2Last, Author2First, Author2Role, Author3Last,
|
||||
Author3First, Author3Role, AuthorBio, TableOfContents, Excerpt, DescriptionLong,
|
||||
DescriptionBrief, BISACCode1, BISACCode2, BISACCode3, CopyrightYear, ePublicationDate,
|
||||
eListPrice, ListPriceCurrencyType, List Price in USD (paper ISBN), eTerritoryRights,
|
||||
SubjectListMARC, , Book-level DOI, URL, License
|
||||
|
||||
|
||||
'''
|
||||
|
||||
# Goal: get or create an Edition and Work for each given book
|
||||
|
@ -194,21 +211,21 @@ def load_from_books(books):
|
|||
|
||||
# try first to get an Edition already in DB with by one of the ISBNs in book
|
||||
(isbns, edition) = get_isbns(book)
|
||||
if len(isbns)==0:
|
||||
if not isbns:
|
||||
continue
|
||||
title=get_title(book)
|
||||
title = get_title(book)
|
||||
authors = get_authors(book)
|
||||
|
||||
# if matching by ISBN doesn't work, then create a Work and Edition
|
||||
# if matching by ISBN doesn't work, then create a Work and Edition
|
||||
# with a title and the first ISBN
|
||||
if not edition:
|
||||
work = Work(title=title)
|
||||
work.save()
|
||||
edition= Edition(title=title, work=work)
|
||||
edition = Edition(title=title, work=work)
|
||||
edition.save()
|
||||
Identifier.set(type='isbn', value=isbns[0], edition=edition, work=work)
|
||||
|
||||
work=edition.work
|
||||
work = edition.work
|
||||
|
||||
# at this point, work and edition exist
|
||||
url = get_url(book)
|
||||
|
@ -222,7 +239,7 @@ def load_from_books(books):
|
|||
if edition and edition.work != work:
|
||||
work = merge_works(work, edition.work)
|
||||
if not edition:
|
||||
edition= Edition(title=title, work=work)
|
||||
edition = Edition(title=title, work=work)
|
||||
edition.save()
|
||||
Identifier.set(type='isbn', value=isbn, edition=edition, work=work)
|
||||
|
||||
|
@ -234,18 +251,18 @@ def load_from_books(books):
|
|||
edition.save()
|
||||
edition.set_publisher(get_publisher(book))
|
||||
|
||||
# possibly replace work.description
|
||||
# possibly replace work.description
|
||||
description = get_description(book)
|
||||
if len(description)>len (work.description):
|
||||
if len(description) > len(work.description):
|
||||
work.description = description
|
||||
work.save()
|
||||
|
||||
|
||||
# set language
|
||||
lang= get_language(book)
|
||||
lang = get_language(book)
|
||||
if lang:
|
||||
work.language = lang
|
||||
work.save()
|
||||
|
||||
|
||||
# add a bisac subject (and ancestors) to work
|
||||
for bisacsh in get_subjects(book):
|
||||
while bisacsh:
|
||||
|
@ -258,13 +275,13 @@ def load_from_books(books):
|
|||
results.append((book, work, edition))
|
||||
|
||||
try:
|
||||
logger.info (u"{} {} {}\n".format(i, title, loading_ok))
|
||||
logger.info(u"{} {} {}\n".format(i, title, loading_ok))
|
||||
except Exception as e:
|
||||
logger.info (u"{} {}\n".format(i, title, str(e) ))
|
||||
logger.info(u"{} {} {}\n".format(i, title, str(e)))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
|
||||
def loaded_book_ok(book, work, edition):
|
||||
|
||||
isbns = get_isbns(book)[0]
|
||||
|
@ -277,10 +294,10 @@ def loaded_book_ok(book, work, edition):
|
|||
try:
|
||||
url_id = Identifier.objects.get(type='http', value=get_url(book))
|
||||
if url_id is None:
|
||||
logger.info ("url_id problem: work.id {}, url: {}".format(work.id, get_url(book)))
|
||||
logger.info("url_id problem: work.id {}, url: {}".format(work.id, get_url(book)))
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.info (str(e))
|
||||
logger.info(str(e))
|
||||
return False
|
||||
|
||||
# isbns
|
||||
|
@ -292,15 +309,17 @@ def loaded_book_ok(book, work, edition):
|
|||
try:
|
||||
edition_for_isbn = Identifier.objects.get(type='isbn', value=isbn).edition
|
||||
except Exception as e:
|
||||
print (e)
|
||||
logger.info(e)
|
||||
return False
|
||||
|
||||
# authors
|
||||
# print set([ed.name for ed in edition_for_isbn.authors.all()])
|
||||
|
||||
if (set([utf8_general_ci_norm(author[0]) for author in authors]) !=
|
||||
set([utf8_general_ci_norm(ed.name) for ed in edition_for_isbn.authors.all()])):
|
||||
print "problem with authors"
|
||||
if (
|
||||
set([utf8_general_ci_norm(author[0]) for author in authors]) !=
|
||||
set([utf8_general_ci_norm(ed.name) for ed in edition_for_isbn.authors.all()])
|
||||
):
|
||||
logger.info("problem with authors")
|
||||
return False
|
||||
|
||||
try:
|
||||
|
@ -312,7 +331,7 @@ def loaded_book_ok(book, work, edition):
|
|||
|
||||
# work description
|
||||
description = get_description(book)
|
||||
if not ((work.description == description) or (len(description) <len (work.description))):
|
||||
if not ((work.description == description) or (len(description) < len(work.description))):
|
||||
return False
|
||||
|
||||
# bisac
|
||||
|
@ -331,14 +350,15 @@ def loaded_book_ok(book, work, edition):
|
|||
return True
|
||||
|
||||
ID_URLPATTERNS = {
|
||||
'goog': re.compile(r'[\./]google\.com/books\?.*id=([a-zA-Z0-9\-_]{12})'),
|
||||
'olwk': re.compile(r'[\./]openlibrary\.org(/works/OL\d{1,8}W)'),
|
||||
'gdrd': re.compile(r'[\./]goodreads\.com/book/show/(\d{1,8})'),
|
||||
'ltwk': re.compile(r'[\./]librarything\.com/work/(\d{1,8})'),
|
||||
'oclc': re.compile(r'\.worldcat\.org/.*oclc/(\d{8,12})'),
|
||||
'doi': re.compile(r'[\./]doi\.org/(10\.\d+/\S+)'),
|
||||
'gtbg': re.compile(r'[\./]gutenberg\.org/ebooks/(\d{1,6})'),
|
||||
'glue': re.compile(r'[\./]unglue\.it/work/(\d{1,7})'),
|
||||
'goog': re.compile(r'[\./]google\.com/books\?.*id=(?P<id>[a-zA-Z0-9\-_]{12})'),
|
||||
'olwk': re.compile(r'[\./]openlibrary\.org(?P<id>/works/OL\d{1,8}W)'),
|
||||
'doab': re.compile(r'([\./]doabooks\.org/doab\?.*rid:|=oai:doab-books:)(?P<id>\d{1,8})'),
|
||||
'gdrd': re.compile(r'[\./]goodreads\.com/book/show/(?P<id>\d{1,8})'),
|
||||
'ltwk': re.compile(r'[\./]librarything\.com/work/(?P<id>\d{1,8})'),
|
||||
'oclc': re.compile(r'\.worldcat\.org/.*oclc/(?P<id>\d{8,12})'),
|
||||
'doi': re.compile(r'[\./]doi\.org/(?P<id>10\.\d+/\S+)'),
|
||||
'gtbg': re.compile(r'[\./]gutenberg\.org/ebooks/(?P<id>\d{1,6})'),
|
||||
'glue': re.compile(r'[\./]unglue\.it/work/(?P<id>\d{1,7})'),
|
||||
}
|
||||
|
||||
def ids_from_urls(url):
|
||||
|
@ -346,7 +366,128 @@ def ids_from_urls(url):
|
|||
for ident in ID_URLPATTERNS.keys():
|
||||
id_match = ID_URLPATTERNS[ident].search(url)
|
||||
if id_match:
|
||||
ids[ident] = id_match.group(1)
|
||||
ids[ident] = id_match.group('id')
|
||||
return ids
|
||||
|
||||
|
||||
|
||||
DROPBOX_DL = re.compile(r'"(https://dl.dropboxusercontent.com/content_link/[^"]+)"')
|
||||
|
||||
def dl_online(ebook):
|
||||
if ebook.format != 'online':
|
||||
pass
|
||||
elif ebook.url.find(u'dropbox.com/s/') >= 0:
|
||||
response = requests.get(ebook.url, headers={"User-Agent": settings.USER_AGENT})
|
||||
if response.status_code == 200:
|
||||
match_dl = DROPBOX_DL.search(response.content)
|
||||
if match_dl:
|
||||
return make_dl_ebook(match_dl.group(1), ebook)
|
||||
else:
|
||||
logger.warning('couldn\'t get {}'.format(ebook.url))
|
||||
else:
|
||||
logger.warning('couldn\'t get dl for {}'.format(ebook.url))
|
||||
|
||||
elif ebook.url.find(u'jbe-platform.com/content/books/') >= 0:
|
||||
doc = get_soup(ebook.url)
|
||||
if doc:
|
||||
obj = doc.select_one('div.fulltexticoncontainer-PDF a')
|
||||
if obj:
|
||||
dl_url = urlparse.urljoin(ebook.url, obj['href'])
|
||||
return make_dl_ebook(dl_url, ebook)
|
||||
else:
|
||||
logger.warning('couldn\'t get dl_url for {}'.format(ebook.url))
|
||||
else:
|
||||
logger.warning('couldn\'t get soup for {}'.format(ebook.url))
|
||||
|
||||
return None, False
|
||||
|
||||
def make_dl_ebook(url, ebook):
|
||||
if EbookFile.objects.filter(source=ebook.url):
|
||||
return EbookFile.objects.filter(source=ebook.url)[0], False
|
||||
response = requests.get(url, headers={"User-Agent": settings.USER_AGENT})
|
||||
if response.status_code == 200:
|
||||
filesize = int(response.headers.get("Content-Length", 0))
|
||||
filesize = filesize if filesize else None
|
||||
format = type_for_url(url, content_type=response.headers.get('content-type'))
|
||||
if format != 'online':
|
||||
new_ebf = EbookFile.objects.create(
|
||||
edition=ebook.edition,
|
||||
format=format,
|
||||
source=ebook.url,
|
||||
)
|
||||
new_ebf.file.save(path_for_file(new_ebf, None), ContentFile(response.content))
|
||||
new_ebf.save()
|
||||
new_ebook = Ebook.objects.create(
|
||||
edition=ebook.edition,
|
||||
format=format,
|
||||
provider='Unglue.it',
|
||||
url=new_ebf.file.url,
|
||||
rights=ebook.rights,
|
||||
filesize=filesize,
|
||||
version_label=ebook.version_label,
|
||||
version_iter=ebook.version_iter,
|
||||
)
|
||||
new_ebf.ebook = new_ebook
|
||||
new_ebf.save()
|
||||
return new_ebf, True
|
||||
else:
|
||||
logger.warning('download format for {} is not ebook'.format(url))
|
||||
else:
|
||||
logger.warning('couldn\'t get {}'.format(url))
|
||||
return None, False
|
||||
|
||||
def type_for_url(url, content_type=None):
|
||||
if not url:
|
||||
return ''
|
||||
if url.find('books.openedition.org') >= 0:
|
||||
return 'online'
|
||||
if Ebook.objects.filter(url=url):
|
||||
return Ebook.objects.filter(url=url)[0].format
|
||||
ct = content_type if content_type else contenttyper.calc_type(url)
|
||||
if re.search("pdf", ct):
|
||||
return "pdf"
|
||||
elif re.search("octet-stream", ct) and re.search("pdf", url, flags=re.I):
|
||||
return "pdf"
|
||||
elif re.search("octet-stream", ct) and re.search("epub", url, flags=re.I):
|
||||
return "epub"
|
||||
elif re.search("text/plain", ct):
|
||||
return "text"
|
||||
elif re.search("text/html", ct):
|
||||
if url.find('oapen.org/view') >= 0:
|
||||
return "html"
|
||||
return "online"
|
||||
elif re.search("epub", ct):
|
||||
return "epub"
|
||||
elif re.search("mobi", ct):
|
||||
return "mobi"
|
||||
return "other"
|
||||
|
||||
class ContentTyper(object):
|
||||
""" """
|
||||
def __init__(self):
|
||||
self.last_call = dict()
|
||||
|
||||
def content_type(self, url):
|
||||
try:
|
||||
r = requests.head(url)
|
||||
return r.headers.get('content-type', '')
|
||||
except:
|
||||
return ''
|
||||
|
||||
def calc_type(self, url):
|
||||
delay = 1
|
||||
# is there a delay associated with the url
|
||||
netloc = urlparse.urlparse(url).netloc
|
||||
|
||||
# wait if necessary
|
||||
last_call = self.last_call.get(netloc)
|
||||
if last_call is not None:
|
||||
now = time.time()
|
||||
min_time_next_call = last_call + delay
|
||||
if min_time_next_call > now:
|
||||
time.sleep(min_time_next_call-now)
|
||||
|
||||
self.last_call[netloc] = time.time()
|
||||
|
||||
# compute the content-type
|
||||
return self.content_type(url)
|
||||
|
||||
contenttyper = ContentTyper()
|
||||
|
|
|
@ -5,18 +5,18 @@ from regluit.core.models import Work
|
|||
from regluit.core.loaders.doab import update_cover_doab
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "make covers for doab editions"
|
||||
help = "make covers for doab editions with bad covers"
|
||||
|
||||
def handle(self, **options):
|
||||
|
||||
works = Work.objects.filter(selected_edition__isnull=False, selected_edition__cover_image__isnull=True)
|
||||
#.filter(selected_edition__isnull=False, selected_edition__cover_image__isnull=True)
|
||||
#.exclude(selected_edition__identifiers__type='goog')
|
||||
added = 0
|
||||
for (i, work) in enumerate(works):
|
||||
if work.doab and work.selected_edition.googlebooks_id == '':
|
||||
update_cover_doab(work.doab, work.selected_edition)
|
||||
added += 1
|
||||
print ('\r {}:{}'.format(i, added), end='')
|
||||
|
||||
print('added {} covers'.format(added))
|
||||
works = Work.objects.filter(identifiers__type='doab').distinct()
|
||||
print('checking {} works with doab'.format(works.count()))
|
||||
num = 0
|
||||
for work in works:
|
||||
if not work.cover_image_thumbnail():
|
||||
update_cover_doab(work.doab, work.preferred_edition, store_cover=True)
|
||||
#print(work.doab)
|
||||
num += 1
|
||||
if num % 10 == 0:
|
||||
print('{} doab covers updated'.format(num))
|
||||
#break
|
||||
print('Done: {} doab covers updated'.format(num))
|
|
@ -1,6 +1,7 @@
|
|||
from django.core.management.base import BaseCommand
|
||||
|
||||
from regluit.core.models import Subject
|
||||
from regluit.core.validation import valid_subject
|
||||
|
||||
|
||||
|
||||
|
@ -27,3 +28,8 @@ class Command(BaseCommand):
|
|||
for work in subject.works.all():
|
||||
Subject.set_by_name(subject.name, work=work)
|
||||
subject.delete()
|
||||
|
||||
period_subjects = Subject.objects.filter(name__contains=".")
|
||||
for subject in period_subjects:
|
||||
if not valid_subject(subject.name):
|
||||
subject.delete()
|
||||
|
|
|
@ -1,17 +0,0 @@
|
|||
import os
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from regluit.core.loaders import doab
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "load doab auths"
|
||||
args = "<limit> <file_name>"
|
||||
|
||||
def handle(self, limit=None, file_name="../../../bookdata/doab_auths.json", **options):
|
||||
|
||||
command_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(command_dir, file_name)
|
||||
doab.load_doab_auths(file_path, limit=int(limit) if limit else None)
|
|
@ -1,17 +0,0 @@
|
|||
import os
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from regluit.core.loaders import doab
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "load doab books"
|
||||
args = "<limit> <file_name>"
|
||||
|
||||
def handle(self, limit=None, file_name="../../../bookdata/doab.json", **options):
|
||||
|
||||
command_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(command_dir, file_name)
|
||||
doab.load_doab_records(file_path, limit=int(limit))
|
|
@ -0,0 +1,21 @@
|
|||
from django.core.management.base import BaseCommand
|
||||
|
||||
from regluit.core.loaders.utils import dl_online
|
||||
from regluit.core.models import Ebook
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "harvest downloadable ebooks from 'online' ebooks"
|
||||
args = "<limit>"
|
||||
|
||||
def handle(self, limit=0, **options):
|
||||
limit = int(limit) if limit else 0
|
||||
onlines = Ebook.objects.filter(format='online')
|
||||
done = 0
|
||||
for online in onlines:
|
||||
new_ebf, new = dl_online(online)
|
||||
if new_ebf and new:
|
||||
done += 1
|
||||
if done > limit:
|
||||
break
|
||||
print 'harvested {} ebooks'.format(done)
|
||||
|
|
@ -30,9 +30,9 @@ class Command(BaseCommand):
|
|||
books = []
|
||||
for sitemap in content:
|
||||
added = add_by_sitemap(sitemap.strip(), maxnum=max)
|
||||
max = max - len(added)
|
||||
max = max - len(added) if max else max
|
||||
books = books + added
|
||||
if max < 0:
|
||||
if max and max < 0:
|
||||
break
|
||||
else:
|
||||
books = add_by_sitemap(url, maxnum=max)
|
||||
|
|
|
@ -4,9 +4,9 @@ from regluit.core.loaders.springer import load_springer
|
|||
|
||||
class Command(BaseCommand):
|
||||
help = "load books from springer open"
|
||||
args = "<pages>"
|
||||
args = "<startpage> <endpage>"
|
||||
|
||||
|
||||
def handle(self, pages, **options):
|
||||
books = load_springer(int(pages))
|
||||
def handle(self, startpage, endpage=0, **options):
|
||||
books = load_springer(int(startpage), int(endpage))
|
||||
print "loaded {} books".format(len(books))
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
from django.core.management.base import BaseCommand
|
||||
|
||||
from regluit.core.loaders import doab
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "load doab books by doab_id via oai"
|
||||
args = "<doab_id>"
|
||||
|
||||
def handle(self, doab_id, **options):
|
||||
doab.add_by_doab(doab_id)
|
|
@ -0,0 +1,18 @@
|
|||
from django.core.management.base import BaseCommand
|
||||
|
||||
from regluit.core.loaders import doab
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "load doab books via oai"
|
||||
args = "<from_year> <limit>"
|
||||
|
||||
def handle(self, from_year= None, limit=None, **options):
|
||||
from_year = int(from_year) if from_year else None
|
||||
limit = int(limit) if limit else None
|
||||
if limit:
|
||||
doab.load_doab_oai(from_year=from_year, limit=limit)
|
||||
else:
|
||||
if from_year:
|
||||
doab.load_doab_oai(from_year=from_year)
|
||||
else:
|
||||
doab.load_doab_oai()
|
|
@ -4,9 +4,9 @@ from random import randint, randrange
|
|||
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.utils.timezone import now
|
||||
|
||||
from regluit.core.models import Work, Campaign
|
||||
from regluit.utils.localdatetime import now
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "creates random campaigns for any works that lack one for testing"
|
||||
|
|
|
@ -26,6 +26,7 @@ from django.core.files.base import ContentFile
|
|||
from django.db import models
|
||||
from django.db.models import F, Q
|
||||
from django.db.models.signals import post_save
|
||||
from django.utils.timezone import now
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
#regluit imports
|
||||
|
@ -45,8 +46,9 @@ from regluit.payment.parameters import (
|
|||
TRANSACTION_STATUS_FAILED,
|
||||
TRANSACTION_STATUS_INCOMPLETE
|
||||
)
|
||||
|
||||
from regluit.utils import encryption as crypto
|
||||
from regluit.utils.localdatetime import now, date_today
|
||||
from regluit.utils.localdatetime import date_today
|
||||
|
||||
from regluit.core.parameters import (
|
||||
REWARDS,
|
||||
|
|
|
@ -20,10 +20,10 @@ from django.core.urlresolvers import reverse
|
|||
from django.db import models
|
||||
from django.db.models import F
|
||||
from django.db.models.signals import post_save, pre_delete
|
||||
from django.utils.timezone import now
|
||||
|
||||
import regluit
|
||||
from regluit.marc.models import MARCRecord as NewMARC
|
||||
from regluit.utils.localdatetime import now
|
||||
from questionnaire.models import Landing
|
||||
|
||||
from regluit.core import mobi
|
||||
|
@ -1082,8 +1082,7 @@ class EbookFile(models.Model):
|
|||
asking=self.asking,
|
||||
source=self.file.url
|
||||
)
|
||||
|
||||
new_mobi_ebf.file.save(path_for_file('ebf', None), mobi_cf)
|
||||
new_mobi_ebf.file.save(path_for_file(new_mobi_ebf, None), mobi_cf)
|
||||
new_mobi_ebf.save()
|
||||
if self.ebook:
|
||||
new_ebook = Ebook.objects.create(
|
||||
|
|
|
@ -42,7 +42,7 @@ OTHER_ID_CHOICES = (
|
|||
('edid', 'pragmatic edition ID'),
|
||||
)
|
||||
|
||||
WORK_IDENTIFIERS = ('doi','olwk','glue','ltwk', 'http')
|
||||
WORK_IDENTIFIERS = ('doi','olwk','glue','ltwk', 'http', 'doab')
|
||||
|
||||
ID_CHOICES_MAP = dict(ID_CHOICES)
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@ from django.db.utils import DatabaseError
|
|||
from django.dispatch import Signal
|
||||
from django.utils.translation import ugettext_noop as _
|
||||
from django.template.loader import render_to_string
|
||||
from django.utils.timezone import now
|
||||
|
||||
from notification import models as notification
|
||||
|
||||
|
@ -29,9 +30,9 @@ from notification import models as notification
|
|||
regluit imports
|
||||
"""
|
||||
from regluit.payment.signals import transaction_charged, transaction_failed, pledge_modified, pledge_created
|
||||
from regluit.utils.localdatetime import now, date_today
|
||||
from regluit.core.parameters import REWARDS, BUY2UNGLUE, THANKS, LIBRARY, RESERVE, THANKED
|
||||
from regluit.libraryauth.models import Library, LibraryUser
|
||||
from regluit.utils.localdatetime import date_today
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -100,7 +101,7 @@ def create_notice_types( **kwargs):
|
|||
notification.create_notice_type("purchase_notgot_gift", _("Your gift wasn't received."), _("The ebook you sent as a gift has not yet been redeemed."))
|
||||
notification.create_notice_type("donation", _("Your donation was processed."), _("Thank you, your generous donation has been processed."))
|
||||
|
||||
signals.post_syncdb.connect(create_notice_types, sender=notification)
|
||||
signals.post_migrate.connect(create_notice_types, sender=notification)
|
||||
|
||||
# define the notifications and tie them to corresponding signals
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@ django imports
|
|||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from django.core.mail import send_mail
|
||||
from django.utils.timezone import now
|
||||
from notification.engine import send_all
|
||||
from notification import models as notification
|
||||
|
||||
|
@ -29,8 +30,7 @@ from regluit.core import (
|
|||
from regluit.core.models import Campaign, Acq, Gift
|
||||
from regluit.core.signals import deadline_impending
|
||||
from regluit.core.parameters import RESERVE, REWARDS, THANKS
|
||||
|
||||
from regluit.utils.localdatetime import now, date_today
|
||||
from regluit.utils.localdatetime import date_today
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
845
core/tests.py
845
core/tests.py
File diff suppressed because it is too large
Load Diff
|
@ -19,7 +19,7 @@ ID_VALIDATION = {
|
|||
'http': (re.compile(r"(https?|ftp)://(-\.)?([^\s/?\.#]+\.?)+(/[^\s]*)?$",
|
||||
flags=re.IGNORECASE|re.S),
|
||||
"The Web Address must be a valid http(s) URL."),
|
||||
'isbn': (r'^([\dxX\-–— ]+|delete)$',
|
||||
'isbn': (u'^([\\dxX \\-–—‐,;]+|delete)$', #includes unicode hyphen, endash and emdash
|
||||
"The ISBN must be a valid ISBN-13."),
|
||||
'doab': (r'^(\d{1,6}|delete)$',
|
||||
"The value must be 1-6 digits."),
|
||||
|
@ -44,8 +44,6 @@ ID_VALIDATION = {
|
|||
}
|
||||
|
||||
def isbn_cleaner(value):
|
||||
if value == 'delete':
|
||||
return value
|
||||
if not value:
|
||||
raise ValidationError('no identifier value found')
|
||||
elif value == 'delete':
|
||||
|
@ -132,6 +130,8 @@ def valid_xml_char_ordinal(c):
|
|||
)
|
||||
|
||||
def valid_subject(subject_name):
|
||||
if len(subject_name) > 200:
|
||||
return False
|
||||
num_commas = 0
|
||||
for c in subject_name:
|
||||
if not valid_xml_char_ordinal(c):
|
||||
|
@ -140,6 +140,10 @@ def valid_subject(subject_name):
|
|||
num_commas += 1
|
||||
if num_commas > 2:
|
||||
return False
|
||||
if len(subject_name.split('--')) > 6:
|
||||
return False
|
||||
if len(subject_name.split('. ')) > 4:
|
||||
return False
|
||||
return True
|
||||
|
||||
reverse_name_comma = re.compile(r',(?! *Jr[\., ])')
|
||||
|
|
|
@ -149,14 +149,27 @@ class EditionForm(forms.ModelForm):
|
|||
id_type = self.cleaned_data['id_type']
|
||||
id_value = self.cleaned_data.get('id_value','').strip()
|
||||
if id_value:
|
||||
identifier = Identifier.objects.filter(type=id_type, value=id_value)
|
||||
if identifier:
|
||||
err_msg = "{} is a duplicate for work #{}.".format(identifier[0], identifier[0].work_id)
|
||||
self.add_error('id_value', forms.ValidationError(err_msg))
|
||||
try:
|
||||
self.cleaned_data['id_value'] = identifier_cleaner(id_type)(id_value)
|
||||
id_value = identifier_cleaner(id_type)(id_value)
|
||||
identifier = Identifier.objects.filter(type=id_type, value=id_value)
|
||||
ident = identifier[0] if identifier else None
|
||||
if not ident or not self.instance:
|
||||
self.cleaned_data['id_value'] = id_value
|
||||
elif ident.edition_id == self.instance.id:
|
||||
self.cleaned_data['id_value'] = id_value
|
||||
elif not ident.edition_id and ident.work_id == self.instance.work_id:
|
||||
self.cleaned_data['id_value'] = id_value
|
||||
else:
|
||||
if ident.edition_id:
|
||||
err_msg = "{} is a duplicate for edition #{}.".format(id_value, ident.edition_id)
|
||||
else:
|
||||
err_msg = "{} is a duplicate for work #{}.".format(id_value, ident.work_id)
|
||||
self.add_error('id_value', forms.ValidationError(err_msg))
|
||||
except forms.ValidationError, ve:
|
||||
self.add_error('id_value', forms.ValidationError('{}: {}'.format(ve.message, id_value)))
|
||||
self.add_error(
|
||||
'id_value',
|
||||
forms.ValidationError('{}: {}'.format(ve.message, id_value))
|
||||
)
|
||||
return self.cleaned_data
|
||||
|
||||
class Meta:
|
||||
|
|
|
@ -13,11 +13,11 @@ from django.conf import settings
|
|||
from django.forms.extras.widgets import SelectDateWidget
|
||||
from django.forms.widgets import RadioSelect
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
from django.utils.timezone import now
|
||||
|
||||
from regluit.core.lookups import OwnerLookup
|
||||
from regluit.core.models import Campaign, Edition, Claim, RightsHolder, WasWork
|
||||
from regluit.core.parameters import *
|
||||
from regluit.utils.localdatetime import now
|
||||
|
||||
class RightsHolderForm(forms.ModelForm):
|
||||
email = forms.EmailField(
|
||||
|
|
|
@ -171,7 +171,7 @@
|
|||
<div class="column show-for-medium">
|
||||
<span>Contact</span>
|
||||
<ul>
|
||||
<li> <a href="mailto:info@ebookfoundation.org"><i class="fa fa-envelope fa-2x"></i></a> <a href="https://twitter.com/unglueit"><i class="fa fa-twitter fa-2x"></i></a> <a href="https://facebook/com/unglueit"><i class="fa fa-facebook fa-2x"></i></a></li>
|
||||
<li> <a href="mailto:info@ebookfoundation.org"><i class="fa fa-envelope fa-2x"></i></a> <a href="https://twitter.com/unglueit"><i class="fa fa-twitter fa-2x"></i></a> <a href="https://facebook.com/unglueit"><i class="fa fa-facebook fa-2x"></i></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{% extends 'work_list.html' %}
|
||||
|
||||
{% load endless %}
|
||||
{% load el_pagination_tags %}
|
||||
{% load lang_utils %}
|
||||
|
||||
{% block title %} Works published by {{ pubname }} {% endblock %}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{% extends 'base.html' %}
|
||||
|
||||
{% load endless %}
|
||||
{% load el_pagination_tags %}
|
||||
{% load lang_utils %}
|
||||
{% load sass_tags %}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{% extends 'base.html' %}
|
||||
|
||||
{% load endless %}
|
||||
{% load el_pagination_tags %}
|
||||
{% load lang_utils %}
|
||||
{% load sass_tags %}
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
{% block doccontent %}
|
||||
|
||||
<h2>Rights Holder Claim Form </h2>
|
||||
{% if work %}
|
||||
<h3> Rightsholder making claim </h3>
|
||||
{{ rights_holder.rights_holder_name }}
|
||||
<h3> Work being claimed </h3>
|
||||
|
@ -42,4 +43,7 @@
|
|||
<input type="submit" name="submit" value="Confirm Claim">
|
||||
</form>
|
||||
{% endif %}
|
||||
{% else %}
|
||||
Please find a work to claim.
|
||||
{% endif %}
|
||||
{% endblock %}
|
|
@ -1,6 +1,6 @@
|
|||
{% extends 'base.html' %}
|
||||
|
||||
{% load endless %}
|
||||
{% load el_pagination_tags %}
|
||||
{% load lang_utils %}
|
||||
{% load sass_tags %}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{% extends 'base.html' %}
|
||||
|
||||
{% load endless %}
|
||||
{% load el_pagination_tags %}
|
||||
{% load sass_tags %}
|
||||
{% load truncatechars %}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{% extends 'work_list.html' %}
|
||||
|
||||
{% load endless %}
|
||||
{% load el_pagination_tags %}
|
||||
{% load lang_utils %}
|
||||
|
||||
{% block title %} Books we're recommending. {% endblock %}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{% extends 'base.html' %}
|
||||
|
||||
{% load endless %}
|
||||
{% load el_pagination_tags %}
|
||||
{% load truncatechars %}
|
||||
{% load sass_tags %}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{% extends 'base.html' %}
|
||||
|
||||
{% load endless %}
|
||||
{% load el_pagination_tags %}
|
||||
{% load lang_utils %}
|
||||
{% load sass_tags %}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{% extends 'base.html' %}
|
||||
|
||||
{% load endless %}
|
||||
{% load el_pagination_tags %}
|
||||
{% load lang_utils %}
|
||||
{% load sass_tags %}
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from django import template
|
||||
from regluit.utils.localdatetime import now
|
||||
from django.utils.timezone import now
|
||||
|
||||
from regluit.core.parameters import REWARDS, BUY2UNGLUE
|
||||
|
||||
register = template.Library()
|
||||
|
|
|
@ -1,12 +1,6 @@
|
|||
"""
|
||||
The truncatechars filter is part of Django dev, but we're on 1.3.1
|
||||
The following is the filter and its dependencies
|
||||
To use this filter, put "{% load truncatechars %}" at the beginning of your template,
|
||||
then {{ myvariable|truncatechars:num }}
|
||||
"""
|
||||
import unicodedata
|
||||
|
||||
from django.template.base import Library
|
||||
from django.template import Library
|
||||
from django.template.defaultfilters import stringfilter
|
||||
from django.utils.translation import get_language_info
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from regluit.utils.localdatetime import now
|
||||
from django.utils.timezone import now
|
||||
from django import template
|
||||
register = template.Library()
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from regluit.utils.localdatetime import now
|
||||
from django import template
|
||||
from django.utils.timezone import now
|
||||
|
||||
from regluit.core.models import Acq
|
||||
register = template.Library()
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ then {{ myvariable|truncatechars:num }}
|
|||
import unicodedata
|
||||
|
||||
from django import template
|
||||
from django.template.base import Library
|
||||
from django.template import Library
|
||||
from django.template.defaultfilters import stringfilter
|
||||
from django.utils.encoding import force_unicode
|
||||
from django.utils.functional import allow_lazy, SimpleLazyObject
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
"""
|
||||
from urllib import unquote
|
||||
|
||||
from django.template.base import Library
|
||||
from django.template import Library
|
||||
from django.template.defaultfilters import stringfilter
|
||||
|
||||
register = Library()
|
||||
|
|
|
@ -13,6 +13,7 @@ from django.core import mail
|
|||
from django.core.urlresolvers import reverse
|
||||
from django.test import TestCase
|
||||
from django.test.client import Client
|
||||
from django.utils.timezone import now
|
||||
|
||||
from notification.models import Notice
|
||||
|
||||
|
@ -21,7 +22,6 @@ from regluit.core.models import Work, Campaign, RightsHolder, Claim, Subject
|
|||
from regluit.payment.models import Transaction
|
||||
from regluit.payment.manager import PaymentManager
|
||||
from regluit.payment.stripelib import StripeClient, TEST_CARDS, ERROR_TESTING, card
|
||||
from regluit.utils.localdatetime import now
|
||||
|
||||
class WishlistTests(TestCase):
|
||||
fixtures = ['initial_data.json', 'neuromancer.json']
|
||||
|
|
|
@ -35,7 +35,7 @@ urlpatterns = [
|
|||
url(r"^rightsholders/campaign/(?P<id>\d+)/mademobi/$", views.manage_campaign, {'action': 'mademobi'}, name="mademobi"),
|
||||
url(r"^rightsholders/edition/(?P<work_id>\d*)/(?P<edition_id>\d*)$", views.edit_edition, {'by': 'rh'}, name="rh_edition"),
|
||||
url(r"^rightsholders/edition/(?P<edition_id>\d*)/upload/$", views.edition_uploads, name="edition_uploads"),
|
||||
url(r"^rightsholders/claim/$", views.claim, name="claim"),
|
||||
url(r"^rightsholders/claim/$", login_required(views.claim), name="claim"),
|
||||
url(r"^rightsholders/surveys/$", views.surveys, name="surveys"),
|
||||
url(r"^rightsholders/new_survey/(?P<work_id>\d*)/?$", views.new_survey, name="new_survey"),
|
||||
url(r"^rightsholders/surveys/answers_(?P<qid>\d+)_(?P<work_id>\d*).csv$", views.export_surveys, name="survey_answers"),
|
||||
|
|
|
@ -45,6 +45,7 @@ from django.template import TemplateDoesNotExist
|
|||
from django.template.loader import render_to_string
|
||||
from django.utils.http import urlencode
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
from django.utils.timezone import now
|
||||
from django.views.decorators.csrf import csrf_exempt
|
||||
from django.views.decorators.http import require_POST
|
||||
from django.views.generic.edit import FormView
|
||||
|
@ -123,11 +124,11 @@ from regluit.payment.parameters import (
|
|||
COMPANY_TITLE
|
||||
)
|
||||
|
||||
from regluit.utils.localdatetime import now, date_today
|
||||
from regluit.libraryauth.forms import UserNamePass
|
||||
from regluit.libraryauth.views import Authenticator, superlogin, login_user
|
||||
from regluit.libraryauth.models import Library
|
||||
from regluit.marc.views import qs_marc_records
|
||||
from regluit.utils.localdatetime import date_today
|
||||
from questionnaire.models import Landing, Questionnaire
|
||||
from questionnaire.views import export_summary as answer_summary, export_csv as export_answers
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ from regluit.core.bookloader import (
|
|||
from regluit.core.parameters import WORK_IDENTIFIERS
|
||||
|
||||
from regluit.core.loaders import add_by_webpage
|
||||
from regluit.core.loaders.doab import add_by_doab
|
||||
from regluit.core.loaders.utils import ids_from_urls
|
||||
from regluit.frontend.forms import EditionForm, IdentifierForm
|
||||
|
||||
|
@ -106,6 +107,11 @@ def get_edition_for_id(id_type, id_value, user=None):
|
|||
if edition:
|
||||
return user_edition(edition, user)
|
||||
|
||||
if identifiers.has_key('doab'):
|
||||
edition = add_by_doab(identifiers['doab'])
|
||||
if edition:
|
||||
return user_edition(edition, user)
|
||||
|
||||
if identifiers.has_key('oclc'):
|
||||
edition = add_by_oclc(identifiers['oclc'])
|
||||
if edition:
|
||||
|
@ -296,11 +302,17 @@ def edit_edition(request, work_id, edition_id, by=None):
|
|||
|
||||
id_type = form.cleaned_data['id_type']
|
||||
id_val = form.cleaned_data['id_value']
|
||||
if id_val == 'delete':
|
||||
if edition.identifiers.exclude(type=id_type):
|
||||
edition.identifiers.filter(type=id_type).delete()
|
||||
if id_val == 'delete':
|
||||
if id_type in WORK_IDENTIFIERS:
|
||||
if edition.work.identifiers.exclude(type=id_type):
|
||||
edition.work.identifiers.filter(type=id_type).delete()
|
||||
else:
|
||||
alert = ('Can\'t delete identifier - must have at least one left.')
|
||||
else:
|
||||
alert = ('Can\'t delete identifier - must have at least one left.')
|
||||
if edition.identifiers.exclude(type=id_type):
|
||||
edition.identifiers.filter(type=id_type).delete()
|
||||
else:
|
||||
alert = ('Can\'t delete identifier - must have at least one left.')
|
||||
elif id_val:
|
||||
models.Identifier.set(
|
||||
type=id_type,
|
||||
|
|
|
@ -88,6 +88,8 @@ class ClaimView(CreateView):
|
|||
return HttpResponseRedirect(reverse('rightsholders'))
|
||||
|
||||
def get_context_data(self, form):
|
||||
if not form.is_valid():
|
||||
return {'form': form}
|
||||
work = form.cleaned_data['work']
|
||||
rights_holder = form.cleaned_data['rights_holder']
|
||||
active_claims = work.claim.exclude(status = 'release')
|
||||
|
|
|
@ -1 +1,9 @@
|
|||
from . import signals
|
||||
from django.apps import AppConfig
|
||||
|
||||
default_app_config = 'regluit.libraryauth.LibraryAuthConfig'
|
||||
|
||||
class LibraryAuthConfig(AppConfig):
|
||||
name = 'regluit.libraryauth'
|
||||
|
||||
def ready(self):
|
||||
from . import signals
|
|
@ -8,7 +8,7 @@ from django.core import validators
|
|||
from django.db import models
|
||||
from django.db.models import Q
|
||||
from django.db.models.signals import post_save
|
||||
from django.forms import IPAddressField as BaseIPAddressField
|
||||
from django.forms import GenericIPAddressField as BaseIPAddressField
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
from django.core.urlresolvers import reverse
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import unicodedata
|
||||
|
||||
from django.template.base import Library
|
||||
from django.template import Library
|
||||
from .. import models
|
||||
|
||||
register = Library()
|
||||
|
|
|
@ -10,12 +10,12 @@ from datetime import timedelta
|
|||
django imports
|
||||
"""
|
||||
from django.http import HttpResponseForbidden
|
||||
from django.utils.timezone import now
|
||||
|
||||
"""
|
||||
regluit imports
|
||||
"""
|
||||
from regluit.payment.models import PaymentResponse
|
||||
from regluit.utils.localdatetime import now, zuluformat
|
||||
|
||||
class ProcessorError(Exception):
|
||||
"""An abstraction around payment processor exceptions"""
|
||||
|
|
|
@ -18,6 +18,7 @@ django imports
|
|||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from django.core.urlresolvers import reverse
|
||||
from django.utils.timezone import now
|
||||
|
||||
"""
|
||||
regluit imports
|
||||
|
@ -26,7 +27,6 @@ from regluit.payment import credit
|
|||
from regluit.payment.models import Transaction, Receiver, PaymentResponse, Account
|
||||
from regluit.payment.parameters import *
|
||||
from regluit.payment.signals import transaction_charged, pledge_modified, pledge_created
|
||||
from regluit.utils.localdatetime import now
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ from django.db.models import Q
|
|||
from django.contrib.sites.models import Site
|
||||
from django.db.models.signals import post_save, post_delete
|
||||
from django.utils.http import urlquote
|
||||
from django.utils.timezone import now
|
||||
|
||||
## django module imports
|
||||
|
||||
|
@ -42,7 +43,7 @@ from regluit.payment.parameters import (
|
|||
)
|
||||
|
||||
from regluit.payment.signals import credit_balance_added, pledge_created
|
||||
from regluit.utils.localdatetime import now, date_today
|
||||
from regluit.utils.localdatetime import date_today
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
@ -6,12 +6,15 @@ external library imports
|
|||
"""
|
||||
import logging
|
||||
import json
|
||||
import re
|
||||
import stripe
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from itertools import islice
|
||||
from pytz import utc
|
||||
import re
|
||||
import unittest
|
||||
from unittest import TestCase
|
||||
|
||||
import stripe
|
||||
|
||||
"""
|
||||
django imports
|
||||
|
@ -19,6 +22,7 @@ django imports
|
|||
from django.conf import settings
|
||||
from django.core.mail import send_mail
|
||||
from django.http import HttpResponse
|
||||
from django.utils.timezone import now
|
||||
|
||||
"""
|
||||
regluit imports
|
||||
|
@ -35,7 +39,6 @@ from regluit.payment.parameters import (
|
|||
TRANSACTION_STATUS_CANCELED
|
||||
)
|
||||
from regluit.payment.signals import transaction_charged, transaction_failed
|
||||
from regluit.utils.localdatetime import now, zuluformat
|
||||
|
||||
# as of 2013.07.15
|
||||
# ['charge.disputed', 'coupon.updated'] are legacy events -- don't know whether to
|
||||
|
@ -73,12 +76,6 @@ def grouper(iterable, page_size):
|
|||
class StripelibError(baseprocessor.ProcessorError):
|
||||
pass
|
||||
|
||||
try:
|
||||
import unittest
|
||||
from unittest import TestCase
|
||||
except:
|
||||
from django.test import TestCase
|
||||
from django.utils import unittest
|
||||
|
||||
# if customer.id doesn't exist, create one and then charge the customer
|
||||
# we probably should ask our users whether they are ok with our creating a customer id account -- or ask for credit
|
||||
|
|
|
@ -5,6 +5,7 @@ import logging
|
|||
import os
|
||||
import time
|
||||
import traceback
|
||||
import unittest
|
||||
|
||||
from datetime import timedelta
|
||||
from decimal import Decimal as D
|
||||
|
@ -19,7 +20,7 @@ from django.contrib.auth.models import User
|
|||
from django.core.exceptions import ValidationError
|
||||
from django.core.validators import URLValidator
|
||||
from django.test import TestCase
|
||||
from django.utils import unittest
|
||||
from django.utils.timezone import now
|
||||
|
||||
"""
|
||||
regluit imports
|
||||
|
@ -29,7 +30,6 @@ from regluit.core.signals import handle_transaction_charged
|
|||
from regluit.payment.manager import PaymentManager
|
||||
from regluit.payment.models import Transaction, Account
|
||||
from regluit.payment.parameters import *
|
||||
from regluit.utils.localdatetime import now
|
||||
|
||||
def setup_selenium():
|
||||
# Set the display window for our xvfb
|
||||
|
|
|
@ -13,7 +13,7 @@ django imports
|
|||
"""
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from django.contrib.sites.models import RequestSite
|
||||
from django.contrib.sites.requests import RequestSite
|
||||
from django.core.urlresolvers import reverse
|
||||
from django.http import (
|
||||
HttpResponse,
|
||||
|
@ -24,6 +24,7 @@ from django.http import (
|
|||
from django.shortcuts import render_to_response
|
||||
from django.template import RequestContext
|
||||
from django.test.utils import setup_test_environment
|
||||
from django.utils.timezone import now
|
||||
from django.views.decorators.csrf import csrf_exempt
|
||||
from django.views.generic.edit import FormView
|
||||
from django.views.generic.base import TemplateView
|
||||
|
@ -38,7 +39,6 @@ from regluit.payment.models import Transaction
|
|||
from regluit.payment.parameters import *
|
||||
from regluit.payment.stripelib import STRIPE_PK
|
||||
from regluit.payment.tests import PledgeTest, AuthorizeTest
|
||||
from regluit.utils.localdatetime import now
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ django-ckeditor==4.5.1
|
|||
git+git://github.com/eshellman/django-email-change.git@57169bdef1c8a41d122e2bab2dcd8564b8fb231d
|
||||
django-compat==1.0.10
|
||||
django-contrib-comments==1.7.1
|
||||
django-endless-pagination==2.0
|
||||
django-el-pagination==3.2.4
|
||||
django-extensions==1.6.1
|
||||
django-jsonfield==1.0.0
|
||||
#django-kombu==0.9.4
|
||||
|
|
|
@ -165,7 +165,7 @@ INSTALLED_APPS = (
|
|||
'social.apps.django_app.default',
|
||||
'tastypie',
|
||||
'djcelery',
|
||||
'endless_pagination',
|
||||
'el_pagination',
|
||||
'selectable',
|
||||
'regluit.frontend.templatetags',
|
||||
'notification',
|
||||
|
|
|
@ -29,7 +29,9 @@ DATABASES = {
|
|||
'PASSWORD': '',
|
||||
'HOST': '',
|
||||
'PORT': '',
|
||||
'TEST_CHARSET': 'utf8',
|
||||
'TEST': {
|
||||
'CHARSET': 'utf8',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -22,7 +22,9 @@ DATABASES = {
|
|||
'PASSWORD': DATABASE_PASSWORD,
|
||||
'HOST': DATABASE_HOST,
|
||||
'PORT': '',
|
||||
'TEST_CHARSET': 'utf8'
|
||||
'TEST': {
|
||||
'CHARSET': 'utf8',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -21,7 +21,9 @@ DATABASES = {
|
|||
'PASSWORD': DATABASE_PASSWORD,
|
||||
'HOST': DATABASE_HOST,
|
||||
'PORT': '',
|
||||
'TEST_CHARSET': 'utf8',
|
||||
'TEST': {
|
||||
'CHARSET': 'utf8',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -23,7 +23,9 @@ DATABASES = {
|
|||
'PASSWORD': DATABASE_PASSWORD,
|
||||
'HOST': DATABASE_HOST,
|
||||
'PORT': '',
|
||||
'TEST_CHARSET': 'utf8',
|
||||
'TEST': {
|
||||
'CHARSET': 'utf8',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,9 @@ DATABASES = {
|
|||
'PASSWORD': '',
|
||||
'HOST': '',
|
||||
'PORT': '',
|
||||
'TEST_CHARSET': 'utf8',
|
||||
'TEST': {
|
||||
'CHARSET': 'utf8',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1 +1 @@
|
|||
import localdatetime
|
||||
|
||||
|
|
|
@ -1,6 +1,10 @@
|
|||
from django.conf.global_settings import LANGUAGES
|
||||
|
||||
lang2code = dict([ (lang[1].lower(), lang[0]) for lang in LANGUAGES ])
|
||||
code2lang = dict(LANGUAGES)
|
||||
|
||||
def get_language_code(language):
|
||||
return lang2code.get(language.lower().strip(), '')
|
||||
language = language.lower().strip()
|
||||
if language in code2lang:
|
||||
return language
|
||||
return lang2code.get(language, '')
|
||||
|
|
|
@ -1,140 +1,8 @@
|
|||
"""
|
||||
Utility to return datetime.datetime.utcnow() by default but allows for a custom utcnow() (e.g., for testing)
|
||||
from django.utils.timezone import now
|
||||
|
||||
>>> import regluit
|
||||
>>> from regluit.utils.localdatetime import now
|
||||
>>> now()
|
||||
datetime.datetime(2012, 3, 8, 14, 0, 35, 409270)
|
||||
>>> now()
|
||||
datetime.datetime(2012, 3, 8, 14, 0, 36, 985271)
|
||||
>>> n = now()
|
||||
>>> n
|
||||
datetime.datetime(2012, 3, 8, 14, 1, 54, 650679)
|
||||
>>> regluit.utils.localdatetime._now = lambda: n
|
||||
>>> now()
|
||||
datetime.datetime(2012, 3, 8, 14, 1, 54, 650679)
|
||||
>>> now()
|
||||
datetime.datetime(2012, 3, 8, 14, 1, 54, 650679)
|
||||
>>> now()
|
||||
|
||||
DST handled:
|
||||
|
||||
>>> ptz = pytz.timezone('America/Los_Angeles')
|
||||
>>> make_naive(datetime.datetime(2012,03,11,10,tzinfo=utc), ptz)
|
||||
datetime.datetime(2012, 3, 11, 3, 0)
|
||||
>>> make_naive(datetime.datetime(2012,03,11,9,tzinfo=utc), ptz)
|
||||
datetime.datetime(2012, 3, 11, 1, 0)
|
||||
|
||||
>>> make_aware(datetime.datetime(2012,11,4,1,30), ptz)
|
||||
Traceback (most recent call last):
|
||||
File "<console>", line 1, in <module>
|
||||
File "/Users/raymondyee/C/src/Gluejar/regluit/utils/localdatetime.py", line 90, in make_aware
|
||||
return timezone.localize(value, is_dst=None)
|
||||
File "/Users/raymondyee/.virtualenvs/regluit/lib/python2.7/site-packages/pytz/tzinfo.py", line 349, in localize
|
||||
raise AmbiguousTimeError(dt)
|
||||
AmbiguousTimeError: 2012-11-04 01:30:00
|
||||
|
||||
|
||||
"""
|
||||
|
||||
import pytz
|
||||
import datetime
|
||||
import django
|
||||
from django.conf import settings
|
||||
|
||||
# for Django 1.3.x, return a timestamp naive now()
|
||||
# for Django 1.4 should switch to django.utils.timezone.now()
|
||||
# see https://code.djangoproject.com/browser/django/trunk/django/utils/timezone.py?rev=17642#L232
|
||||
|
||||
def now():
|
||||
if hasattr(settings, 'LOCALDATETIME_NOW') and settings.LOCALDATETIME_NOW is not None:
|
||||
return settings.LOCALDATETIME_NOW()
|
||||
else:
|
||||
try:
|
||||
return django.utils.timezone.now()
|
||||
except AttributeError, e:
|
||||
return datetime.datetime.now()
|
||||
|
||||
# provide a replacement for datetime.date.today()
|
||||
# this will be timezone naive -- is that what we really want?
|
||||
# switch to django.utils.timezone.localdate in django 1.11
|
||||
|
||||
def date_today():
|
||||
return now().date()
|
||||
|
||||
# borrow a lot of the routines/code that will be in Django 1.4+ django.utils.timezone
|
||||
# https://code.djangoproject.com/browser/django/trunk/django/utils/timezone.py
|
||||
|
||||
utc = pytz.utc
|
||||
|
||||
def get_default_timezone():
|
||||
return pytz.timezone(settings.TIME_ZONE)
|
||||
|
||||
def is_aware(value):
|
||||
"""
|
||||
Determines if a given datetime.datetime is aware.
|
||||
|
||||
The logic is described in Python's docs:
|
||||
http://docs.python.org/library/datetime.html#datetime.tzinfo
|
||||
"""
|
||||
return value.tzinfo is not None and value.tzinfo.utcoffset(value) is not None
|
||||
|
||||
def is_naive(value):
|
||||
"""
|
||||
Determines if a given datetime.datetime is naive.
|
||||
|
||||
The logic is described in Python's docs:
|
||||
http://docs.python.org/library/datetime.html#datetime.tzinfo
|
||||
"""
|
||||
return value.tzinfo is None or value.tzinfo.utcoffset(value) is None
|
||||
|
||||
def make_aware(value, timezone):
|
||||
"""
|
||||
Makes a naive datetime.datetime in a given time zone aware.
|
||||
"""
|
||||
if hasattr(timezone, 'localize'):
|
||||
# available for pytz time zones
|
||||
return timezone.localize(value, is_dst=None)
|
||||
else:
|
||||
# may be wrong around DST changes
|
||||
return value.replace(tzinfo=timezone)
|
||||
|
||||
def make_naive(value, timezone):
|
||||
"""
|
||||
Makes an aware datetime.datetime naive in a given time zone.
|
||||
"""
|
||||
value = value.astimezone(timezone)
|
||||
if hasattr(timezone, 'normalize'):
|
||||
# available for pytz time zones
|
||||
value = timezone.normalize(value)
|
||||
return value.replace(tzinfo=None)
|
||||
|
||||
def isoformat(value):
|
||||
"""
|
||||
if value is naive, assume it's in the default_timezone
|
||||
"""
|
||||
if is_naive(value):
|
||||
return make_aware(value, get_default_timezone()).isoformat()
|
||||
else:
|
||||
return value.isoformat()
|
||||
|
||||
def zuluformat(value):
|
||||
"""format value in zulu format -- e.g., 2012-03-26T17:47:22.654449Z"""
|
||||
return "{0}Z".format(as_utc_naive(value).isoformat())
|
||||
|
||||
def as_utc_naive(value):
|
||||
"""
|
||||
if value is naive, assume it's in the default time zone, then convert to UTC but make naive
|
||||
"""
|
||||
if is_naive(value):
|
||||
return make_naive(make_aware(value, get_default_timezone()), utc)
|
||||
else:
|
||||
return make_naive(value, utc)
|
||||
|
||||
def as_default_timezone_naive(value):
|
||||
"""
|
||||
if value is naive, assume it's in UTC and convert to the default tz and make it naive
|
||||
"""
|
||||
if is_naive(value):
|
||||
return make_naive(make_aware(value, utc), get_default_timezone())
|
||||
else:
|
||||
return make_naive(value, get_default_timezone())
|
||||
|
|
Loading…
Reference in New Issue