Merge remote-tracking branch 'Gluejar/master' into catchup

# Conflicts:
#	core/models/__init__.py
#	core/models/bibmodels.py
#	vagrant/host_vars/prod/secrets.yml
pull/85/head
eric 2018-05-23 13:56:27 -04:00
commit 3661faec0a
78 changed files with 1551 additions and 55413 deletions

View File

@ -25,7 +25,7 @@ def onix_feed(facet, max=None):
editions = facet.facet_object.filter_model("Edition",editions).distinct()
for edition in editions:
edition_prod = product(edition, facet.facet_object)
if edition_prod:
if edition_prod is not None:
feed.append(edition_prod)
return etree.tostring(feed, pretty_print=True)
@ -34,7 +34,7 @@ def onix_feed_for_work(work):
feed.append(header(work))
for edition in models.Edition.objects.filter(work=work,ebooks__isnull=False).distinct():
edition_prod = product(edition)
if edition_prod:
if edition_prod is not None:
feed.append(product(edition))
return etree.tostring(feed, pretty_print=True)

View File

@ -10,6 +10,7 @@ django imports
from django.contrib.auth.models import User
from django.test import TestCase
from django.test.client import Client
from django.utils.timezone import now
"""
regluit imports
@ -17,7 +18,6 @@ regluit imports
import regluit.core.isbn
from regluit.core import models
from regluit.utils.localdatetime import now
from regluit.api import models as apimodels
class ApiTests(TestCase):

View File

@ -40,7 +40,7 @@ urlpatterns = [
url(r"^onix/(?P<facet>.*)/$", OnixView.as_view(), name="onix"),
url(r"^onix/$", OnixView.as_view(), name="onix_all"),
url(r'^id/work/(?P<work_id>\w+)/$', negotiate_content, name="work_identifier"),
url(r'^loader/yaml$',load_yaml, name="load_yaml"),
url(r'^travisci/webhook$',travisci_webhook, name="travisci_webhook"),
url(r'^loader/yaml$', load_yaml, name="load_yaml"),
url(r'^travisci/webhook$', travisci_webhook, name="travisci_webhook"),
url(r'^', include(v1_api.urls)),
]

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@ -5,10 +5,9 @@ from urllib import quote
from functools import partial
from xml.etree import ElementTree
from django.apps import apps
from . exceptions import BooXtreamError
from . models import Boox
class BooXtream(object):
""" ``apikey``
@ -46,6 +45,8 @@ class BooXtream(object):
Will raise ``BooXtreamError`` if BooXtream returns an exception
code.
"""
Boox = apps.get_model('booxtream', 'Boox')
url = self.endpoint + 'booxtream.xml'
kwargs['epub'] = '1' if epub else '0'
kwargs['kf8mobi'] = '1' if kf8mobi else '0'

View File

@ -1,11 +1,10 @@
from django.apps import AppConfig
from django.db.models.signals import post_migrate
from regluit.core.signals import create_notice_types
class CoreConfig(AppConfig):
name = 'regluit.core'
verbose_name = ' core objects'
def ready(self):
from regluit.core.signals import create_notice_types
post_migrate.connect(create_notice_types, sender=self)

View File

@ -23,6 +23,7 @@ from django_comments.models import Comment
from github3 import (login, GitHub)
from github3.repos.release import Release
from django.utils.timezone import now
from gitenberg.metadata.pandata import Pandata
# regluit imports
@ -31,7 +32,6 @@ import regluit
import regluit.core.isbn
from regluit.core.validation import test_file
from regluit.marc.models import inverse_marc_rels
from regluit.utils.localdatetime import now
from . import cc
from . import models
@ -49,7 +49,7 @@ def add_by_oclc(isbn, work=None):
def add_by_oclc_from_google(oclc):
if oclc:
logger.info("adding book by oclc %s", oclc)
logger.info(u"adding book by oclc %s", oclc)
else:
return None
try:
@ -59,10 +59,10 @@ def add_by_oclc_from_google(oclc):
try:
results = _get_json(url, {"q": '"OCLC%s"' % oclc})
except LookupFailure, e:
logger.exception("lookup failure for %s", oclc)
logger.exception(u"lookup failure for %s", oclc)
return None
if not results.has_key('items') or not results['items']:
logger.warn("no google hits for %s", oclc)
logger.warn(u"no google hits for %s", oclc)
return None
try:
@ -70,16 +70,16 @@ def add_by_oclc_from_google(oclc):
models.Identifier(type='oclc', value=oclc, edition=e, work=e.work).save()
return e
except LookupFailure, e:
logger.exception("failed to add edition for %s", oclc)
logger.exception(u"failed to add edition for %s", oclc)
except IntegrityError, e:
logger.exception("google books data for %s didn't fit our db", oclc)
logger.exception(u"google books data for %s didn't fit our db", oclc)
return None
def valid_isbn(isbn):
try:
return identifier_cleaner('isbn')(isbn)
except:
logger.exception("invalid isbn: %s", isbn)
logger.exception(u"invalid isbn: %s", isbn)
return None
def add_by_isbn(isbn, work=None, language='xx', title=''):
@ -88,13 +88,17 @@ def add_by_isbn(isbn, work=None, language='xx', title=''):
try:
e = add_by_isbn_from_google(isbn, work=work)
except LookupFailure:
logger.exception("failed google lookup for %s", isbn)
logger.exception(u"failed google lookup for %s", isbn)
# try again some other time
return None
if e:
if e.work.language == 'xx' and language != 'xx':
e.work.language == language
e.work.save()
logger.info('changed language for {} to {}'.format(isbn, language))
return e
logger.info("null came back from add_by_isbn_from_google: %s", isbn)
logger.info(u"null came back from add_by_isbn_from_google: %s", isbn)
# if there's a a title, we want to create stub editions and
# works, even if google doesn't know about it # but if it's not valid,
@ -129,10 +133,10 @@ def get_google_isbn_results(isbn):
try:
results = _get_json(url, {"q": "isbn:%s" % isbn})
except LookupFailure:
logger.exception("lookup failure for %s", isbn)
logger.exception(u"lookup failure for %s", isbn)
return None
if not results.has_key('items') or not results['items']:
logger.warn("no google hits for %s", isbn)
logger.warn(u"no google hits for %s", isbn)
return None
return results
@ -201,7 +205,7 @@ def update_edition(edition):
# if the language of the edition no longer matches that of the parent work,
# attach edition to the
if edition.work.language != language:
logger.info("reconnecting %s since it is %s instead of %s",
logger.info(u"reconnecting %s since it is %s instead of %s",
googlebooks_id, language, edition.work.language)
old_work = edition.work
@ -210,7 +214,7 @@ def update_edition(edition):
edition.work = new_work
edition.save()
for identifier in edition.identifiers.all():
logger.info("moving identifier %s", identifier.value)
logger.info(u"moving identifier %s", identifier.value)
identifier.work = new_work
identifier.save()
if old_work and old_work.editions.count() == 0:
@ -256,7 +260,7 @@ def add_by_isbn_from_google(isbn, work=None):
edition.new = False
return edition
logger.info("adding new book by isbn %s", isbn)
logger.info(u"adding new book by isbn %s", isbn)
results = get_google_isbn_results(isbn)
if results:
try:
@ -267,9 +271,9 @@ def add_by_isbn_from_google(isbn, work=None):
isbn=isbn
)
except LookupFailure, e:
logger.exception("failed to add edition for %s", isbn)
logger.exception(u"failed to add edition for %s", isbn)
except IntegrityError, e:
logger.exception("google books data for %s didn't fit our db", isbn)
logger.exception(u"google books data for %s didn't fit our db", isbn)
return None
return None
@ -320,7 +324,7 @@ def add_by_googlebooks_id(googlebooks_id, work=None, results=None, isbn=None):
if results:
item = results
else:
logger.info("loading metadata from google for %s", googlebooks_id)
logger.info(u"loading metadata from google for %s", googlebooks_id)
url = "https://www.googleapis.com/books/v1/volumes/%s" % googlebooks_id
item = _get_json(url)
d = item['volumeInfo']
@ -343,7 +347,7 @@ def add_by_googlebooks_id(googlebooks_id, work=None, results=None, isbn=None):
if len(language) > 5:
language = language[0:5]
if work and work.language != language:
logger.info("not connecting %s since it is %s instead of %s",
logger.info(u"not connecting %s since it is %s instead of %s",
googlebooks_id, language, work.language)
work = None
# isbn = None
@ -371,7 +375,7 @@ def add_by_googlebooks_id(googlebooks_id, work=None, results=None, isbn=None):
try:
e = models.Identifier.objects.get(type='goog', value=googlebooks_id).edition
e.new = False
logger.warning(" whoa nellie, somebody else created an edition while we were working.")
logger.warning(u" whoa nellie, somebody else created an edition while we were working.")
if work.new:
work.delete()
return e
@ -404,19 +408,19 @@ def relate_isbn(isbn, cluster_size=1):
"""add a book by isbn and then see if there's an existing work to add it to so as to make a
cluster bigger than cluster_size.
"""
logger.info("finding a related work for %s", isbn)
logger.info(u"finding a related work for %s", isbn)
edition = add_by_isbn(isbn)
if edition is None:
return None
if edition.work is None:
logger.info("didn't add related to null work")
logger.info(u"didn't add related to null work")
return None
if edition.work.editions.count() > cluster_size:
return edition.work
for other_isbn in thingisbn(isbn):
# 979's come back as 13
logger.debug("other_isbn: %s", other_isbn)
logger.debug(u"other_isbn: %s", other_isbn)
if len(other_isbn) == 10:
other_isbn = regluit.core.isbn.convert_10_to_13(other_isbn)
related_edition = add_by_isbn(other_isbn, work=edition.work)
@ -427,7 +431,7 @@ def relate_isbn(isbn, cluster_size=1):
related_edition.work = edition.work
related_edition.save()
elif related_edition.work_id != edition.work_id:
logger.debug("merge_works path 1 %s %s", edition.work_id, related_edition.work_id)
logger.debug(u"merge_works path 1 %s %s", edition.work_id, related_edition.work_id)
merge_works(related_edition.work, edition.work)
if related_edition.work.editions.count() > cluster_size:
return related_edition.work
@ -438,7 +442,7 @@ def add_related(isbn):
The initial seed ISBN will be added if it's not already there.
"""
# make sure the seed edition is there
logger.info("adding related editions for %s", isbn)
logger.info(u"adding related editions for %s", isbn)
new_editions = []
@ -446,14 +450,14 @@ def add_related(isbn):
if edition is None:
return new_editions
if edition.work is None:
logger.warning("didn't add related to null work")
logger.warning(u"didn't add related to null work")
return new_editions
# this is the work everything will hang off
work = edition.work
other_editions = {}
for other_isbn in thingisbn(isbn):
# 979's come back as 13
logger.debug("other_isbn: %s", other_isbn)
logger.debug(u"other_isbn: %s", other_isbn)
if len(other_isbn) == 10:
other_isbn = regluit.core.isbn.convert_10_to_13(other_isbn)
related_edition = add_by_isbn(other_isbn, work=work)
@ -466,7 +470,7 @@ def add_related(isbn):
related_edition.work = work
related_edition.save()
elif related_edition.work_id != work.id:
logger.debug("merge_works path 1 %s %s", work.id, related_edition.work_id)
logger.debug(u"merge_works path 1 %s %s", work.id, related_edition.work_id)
work = merge_works(work, related_edition.work)
else:
if other_editions.has_key(related_language):
@ -476,14 +480,14 @@ def add_related(isbn):
# group the other language editions together
for lang_group in other_editions.itervalues():
logger.debug("lang_group (ed, work): %s", [(ed.id, ed.work_id) for ed in lang_group])
logger.debug(u"lang_group (ed, work): %s", [(ed.id, ed.work_id) for ed in lang_group])
if len(lang_group) > 1:
lang_edition = lang_group[0]
logger.debug("lang_edition.id: %s", lang_edition.id)
logger.debug(u"lang_edition.id: %s", lang_edition.id)
# compute the distinct set of works to merge into lang_edition.work
works_to_merge = set([ed.work for ed in lang_group[1:]]) - set([lang_edition.work])
for w in works_to_merge:
logger.debug("merge_works path 2 %s %s", lang_edition.work_id, w.id)
logger.debug(u"merge_works path 2 %s %s", lang_edition.work_id, w.id)
merged_work = merge_works(lang_edition.work, w)
models.WorkRelation.objects.get_or_create(
to_work=lang_group[0].work,
@ -498,17 +502,21 @@ def thingisbn(isbn):
Library Thing. (takes isbn_10 or isbn_13, returns isbn_10, except for 979 isbns,
which come back as isbn_13')
"""
logger.info("looking up %s at ThingISBN", isbn)
logger.info(u"looking up %s at ThingISBN", isbn)
url = "https://www.librarything.com/api/thingISBN/%s" % isbn
xml = requests.get(url, headers={"User-Agent": settings.USER_AGENT}).content
try:
doc = ElementTree.fromstring(xml)
return [e.text for e in doc.findall('isbn')]
except SyntaxError:
# LibraryThing down
return []
def merge_works(w1, w2, user=None):
"""will merge the second work (w2) into the first (w1)
"""
logger.info("merging work %s into %s", w2.id, w1.id)
logger.info(u"merging work %s into %s", w2.id, w1.id)
# don't merge if the works are the same or at least one of the works has no id
#(for example, when w2 has already been deleted)
if w1 is None or w2 is None or w1.id == w2.id or w1.id is None or w2.id is None:
@ -583,7 +591,7 @@ def detach_edition(e):
will detach edition from its work, creating a new stub work. if remerge=true, will see if
there's another work to attach to
"""
logger.info("splitting edition %s from %s", e, e.work)
logger.info(u"splitting edition %s from %s", e, e.work)
w = models.Work(title=e.title, language=e.work.language)
w.save()
@ -618,7 +626,7 @@ def add_openlibrary(work, hard_refresh=False):
work.save()
# find the first ISBN match in OpenLibrary
logger.info("looking up openlibrary data for work %s", work.id)
logger.info(u"looking up openlibrary data for work %s", work.id)
e = None # openlibrary edition json
w = None # openlibrary work json
@ -633,7 +641,7 @@ def add_openlibrary(work, hard_refresh=False):
try:
e = _get_json(url, params, type='ol')
except LookupFailure:
logger.exception("OL lookup failed for %s", isbn_key)
logger.exception(u"OL lookup failed for %s", isbn_key)
e = {}
if e.has_key(isbn_key):
if e[isbn_key].has_key('details'):
@ -673,7 +681,7 @@ def add_openlibrary(work, hard_refresh=False):
)
if e[isbn_key]['details'].has_key('works'):
work_key = e[isbn_key]['details']['works'].pop(0)['key']
logger.info("got openlibrary work %s for isbn %s", work_key, isbn_key)
logger.info(u"got openlibrary work %s for isbn %s", work_key, isbn_key)
models.Identifier.get_or_add(type='olwk', value=work_key, work=work)
try:
w = _get_json("https://openlibrary.org" + work_key, type='ol')
@ -691,14 +699,14 @@ def add_openlibrary(work, hard_refresh=False):
if w.has_key('subjects') and len(w['subjects']) > len(subjects):
subjects = w['subjects']
except LookupFailure:
logger.exception("OL lookup failed for %s", work_key)
logger.exception(u"OL lookup failed for %s", work_key)
if not subjects:
logger.warn("unable to find work %s at openlibrary", work.id)
logger.warn(u"unable to find work %s at openlibrary", work.id)
return
# add the subjects to the Work
for s in subjects:
logger.info("adding subject %s to work %s", s, work.id)
logger.info(u"adding subject %s to work %s", s, work.id)
subject = models.Subject.set_by_name(s, work=work)
work.save()
@ -716,9 +724,9 @@ def _get_json(url, params={}, type='gb'):
if response.status_code == 200:
return json.loads(response.content)
else:
logger.error("unexpected HTTP response: %s", response)
logger.error(u"unexpected HTTP response: %s", response)
if response.content:
logger.error("response content: %s", response.content)
logger.error(u"response content: %s", response.content)
raise LookupFailure("GET failed: url=%s and params=%s" % (url, params))
@ -766,7 +774,7 @@ def load_gutenberg_edition(title, gutenberg_etext_id, ol_work_id, seed_isbn, url
ebook = models.Ebook()
if len(ebooks) > 1:
logger.warning("There is more than one Ebook matching url {0}".format(url))
logger.warning(u"There is more than one Ebook matching url {0}".format(url))
ebook.format = format
@ -826,8 +834,6 @@ def edition_for_etype(etype, metadata, default=None):
for key in metadata.edition_identifiers.keys():
return edition_for_ident(key, metadata.identifiers[key])
MATCH_LICENSE = re.compile(r'creativecommons.org/licenses/([^/]+)/')
def load_ebookfile(url, etype):
'''
return a ContentFile if a new ebook has been loaded
@ -960,8 +966,7 @@ class BasePandataLoader(object):
if contentfile:
contentfile_name = '/loaded/ebook_{}.{}'.format(edition.id, key)
path = default_storage.save(contentfile_name, contentfile)
lic = MATCH_LICENSE.search(metadata.rights_url)
license = 'CC {}'.format(lic.group(1).upper()) if lic else ''
license = cc.license_from_cc_url(metadata.rights_url)
ebf = models.EbookFile.objects.create(
format=key,
edition=edition,

View File

@ -1,8 +1,11 @@
# coding=utf-8
# mostly constants related to Creative Commons
''' mostly constants related to Creative Commons
# let's be DRY with these parameters
## need to add versioned CC entries
'''
import re
INFO_CC = (
('CC BY-NC-ND', 'by-nc-nd', 'Creative Commons Attribution-NonCommercial-NoDerivs 3.0 Unported (CC BY-NC-ND 3.0)', 'https://creativecommons.org/licenses/by-nc-nd/3.0/', 'Creative Commons Attribution-NonCommercial-NoDerivs'),
@ -162,3 +165,15 @@ def match_license(license_string):
except ValueError:
pass
return RIGHTS_ALIAS.get(license_string, None)
MATCH_LICENSE = re.compile(r'creativecommons.org/licenses/([^/]+)/')
def license_from_cc_url(rights_url):
if not rights_url:
return None
lic = MATCH_LICENSE.search(rights_url)
if lic:
return 'CC {}'.format(lic.group(1).upper())
if rights_url.find('openedition.org') >= 0:
return 'OPENEDITION'
return ''

View File

@ -45,10 +45,10 @@ def convert_10_to_13(isbn):
except:
return None
ISBN_REGEX = re.compile(r'^(\d{9}|\d{12})(\d|X)$')
DASH_REGEX = re.compile(r'[ \-–—]+')
ISBN_REGEX = re.compile(r'^(\d{9}[\dX]|\d{13})$')
DASH_REGEX = re.compile(u'[ \\-–—‐,;]+') #includes unicode hyphen, endash and emdash
def strip(s):
"""Strips away any - or spaces. If the remaining string is of length 10 or 13
"""Strips away any - or spaces and some punctuation. If the remaining string is of length 10 or 13
with digits only in anything but the last
check digit (which may be X), then return '' -- otherwise return the remaining string
"""

View File

@ -2,11 +2,12 @@ import csv
import HTMLParser
import httplib
import logging
import mechanize
import re
from datetime import datetime
import mechanize
import requests
from datetime import datetime
from regluit.core import models
logger = logging.getLogger(__name__)
@ -40,43 +41,59 @@ class LibraryThing(object):
def parse_csv(self):
h = HTMLParser.HTMLParser()
reader = csv.DictReader(self.csv_handle)
# There are more fields to be parsed out. Note that there is a second author column to handle
for (i,row) in enumerate(reader):
# There are more fields to be parsed out. Note that there is a
# second author column to handle
for (i, row) in enumerate(reader):
# ISBNs are written like '[123456789x]' in the CSV, suggesting possibility of a list
m = re.match(r'^\[(.*)\]$', row["'ISBNs'"])
if m:
isbn = m.group(1).split()
else:
isbn = []
yield {'title':h.unescape(row["'TITLE'"]), 'author':h.unescape(row["'AUTHOR (first, last)'"]),
'isbn':isbn, 'comment':row["'COMMENT'"],
'tags':row["'TAGS'"], 'collections':row["'COLLECTIONS'"],
'reviews':h.unescape(row["'REVIEWS'"])}
yield {
'title':h.unescape(row["'TITLE'"]),
'author':h.unescape(row["'AUTHOR (first, last)'"]),
'isbn':isbn,
'comment':row["'COMMENT'"],
'tags':row["'TAGS'"],
'collections':row["'COLLECTIONS'"],
'reviews':h.unescape(row["'REVIEWS'"])
}
def viewstyle_1(self, rows):
for (i,row) in enumerate(rows):
for (i, row) in enumerate(rows):
book_data = {}
cols = row.xpath('td')
# cover
book_data["cover"] = {"cover_id":cols[0].attrib["id"],
"image": {"width":cols[0].xpath('.//img')[0].attrib['width'],
"src": cols[0].xpath('.//img')[0].attrib['src']}
book_data["cover"] = {
"cover_id":cols[0].attrib["id"],
"image": {
"width":cols[0].xpath('.//img')[0].attrib['width'],
"src": cols[0].xpath('.//img')[0].attrib['src']
}
}
# title
book_data["title"] = {"href":cols[1].xpath('.//a')[0].attrib['href'],
"title":cols[1].xpath('.//a')[0].text}
book_data["title"] = {
"href":cols[1].xpath('.//a')[0].attrib['href'],
"title":cols[1].xpath('.//a')[0].text
}
# extract work_id and book_id from href
try:
(book_data["work_id"], book_data["book_id"]) = re.match("^/work/(.*)/book/(.*)$",book_data["title"]["href"]).groups()
(book_data["work_id"], book_data["book_id"]) = re.match(
"^/work/(.*)/book/(.*)$",
book_data["title"]["href"]
).groups()
except:
(book_data["work_id"], book_data["book_id"]) = (None, None)
# author -- what if there is more than 1? or none?
try:
book_data["author"] = {"display_name":cols[2].xpath('.//a')[0].text,
book_data["author"] = {
"display_name":cols[2].xpath('.//a')[0].text,
"href":cols[2].xpath('.//a')[0].attrib['href'],
"name":cols[2].xpath('div')[0].text}
"name":cols[2].xpath('div')[0].text
}
except:
book_data["author"] = None
@ -91,13 +108,15 @@ class LibraryThing(object):
book_data["rating"] = len(cols[5].xpath('.//img[@alt="*"]'))
# entry date
book_data["entry_date"] = datetime.date(datetime.strptime(cols[6].xpath('span')[0].text, "%b %d, %Y"))
book_data["entry_date"] = datetime.date(
datetime.strptime(cols[6].xpath('span')[0].text, "%b %d, %Y")
)
yield book_data
def viewstyle_5(self, rows):
# implement this view to get at the ISBNs
for (i,row) in enumerate(rows):
for (i, row) in enumerate(rows):
book_data = {}
cols = row.xpath('td')
@ -107,7 +126,10 @@ class LibraryThing(object):
# extract work_id and book_id from href
try:
(book_data["work_id"], book_data["book_id"]) = re.match("^/work/(.*)/book/(.*)$",book_data["title"]["href"]).groups()
(book_data["work_id"], book_data["book_id"]) = re.match(
"^/work/(.*)/book/(.*)$",
book_data["title"]["href"]
).groups()
except:
(book_data["work_id"], book_data["book_id"]) = (None, None)
@ -145,12 +167,12 @@ class LibraryThing(object):
# we can vary viewstyle to get different info
IMPLEMENTED_STYLES = [1,5]
IMPLEMENTED_STYLES = [1, 5]
COLLECTION = 2 # set to get All Collections
if view_style not in IMPLEMENTED_STYLES:
raise NotImplementedError()
style_parser = getattr(self,"viewstyle_%s" % view_style)
style_parser = getattr(self, "viewstyle_%s" % view_style)
next_page = True
offset = 0
cookies = None
@ -160,8 +182,9 @@ class LibraryThing(object):
cookies = r.cookies
while next_page:
url = "https://www.librarything.com/catalog_bottom.php?view=%s&viewstyle=%d&collection=%d&offset=%d" % (self.username,
view_style, COLLECTION, offset)
url = "https://www.librarything.com/catalog_bottom.php?view=%s&viewstyle=%d&collection=%d&offset=%d" % (
self.username, view_style, COLLECTION, offset
)
logger.info("url: %s", url)
if cookies is None:
r = requests.get(url)
@ -169,10 +192,8 @@ class LibraryThing(object):
r = requests.get(url, cookies=cookies)
if r.status_code != httplib.OK:
raise LibraryThingException("Error accessing %s: %s" % (url, e))
logger.info("Error accessing %s: %s", url, e)
raise LibraryThingException("Error accessing %s: status %s" % (url, r.status_code))
etree = html.fromstring(r.content)
#logger.info("r.content %s", r.content)
cookies = r.cookies # retain the cookies
# look for a page bar
@ -180,13 +201,16 @@ class LibraryThing(object):
# 1 - 50 of 82
try:
count_text = etree.xpath('//td[@class="pbGroup"]')[0].text
total = int(re.search(r'(\d+)$',count_text).group(1))
total = int(re.search(r'(\d+)$', count_text).group(1))
logger.info('total: %d', total)
except Exception, e: # assume for now that if we can't grab this text, there is no page bar and no books
except Exception, e:
# assume for now that if we can't grab this text,
# there is no page bar and no books
logger.info('Exception {0}'.format(e))
total = 0
# to do paging we can either look for a next link or just increase the offset by the number of rows.
# to do paging we can either look for a next link or just increase the offset
# by the number of rows.
# Let's try the latter
# possible_next_link = etree.xpath('//a[@class="pageShuttleButton"]')[0]
@ -197,10 +221,11 @@ class LibraryThing(object):
i = -1 # have to account for the problem of style_parser(rows) returning nothing
for (i,row) in enumerate(style_parser(rows)):
for (i, row) in enumerate(style_parser(rows)):
yield row
# page size = 50, first page offset = 0, second page offset = 50 -- if total = 50 no need to go
# page size = 50, first page offset = 0, second page offset = 50
# -- if total = 50 no need to go
offset += i + 1
if offset >= total:
@ -208,7 +233,8 @@ class LibraryThing(object):
def load_librarything_into_wishlist(user, lt_username, max_books=None):
"""
Load a specified LibraryThing shelf (by default: all the books from the LibraryThing account associated with user)
Load a specified LibraryThing shelf (by default: all the books
from the LibraryThing account associated with user)
"""
from regluit.core import bookloader
@ -219,7 +245,7 @@ def load_librarything_into_wishlist(user, lt_username, max_books=None):
lt = LibraryThing(lt_username)
for (i,book) in enumerate(islice(lt.parse_user_catalog(view_style=5),max_books)):
for (i, book) in enumerate(islice(lt.parse_user_catalog(view_style=5), max_books)):
isbn = book["isbn"] # grab the first one
logger.info("%d %s %s", i, book["title"]["title"], isbn)
try:
@ -229,13 +255,27 @@ def load_librarything_into_wishlist(user, lt_username, max_books=None):
if not edition:
continue
# add the librarything ids to the db since we know them now
identifier= models.Identifier.get_or_add(type = 'thng', value = book['book_id'], edition = edition, work = edition.work)
identifier= models.Identifier.get_or_add(type = 'ltwk', value = book['work_id'], work = edition.work)
identifier = models.Identifier.get_or_add(
type='thng',
value=book['book_id'],
edition=edition,
work=edition.work
)
identifier = models.Identifier.get_or_add(
type='ltwk',
value=book['work_id'],
work=edition.work
)
if book['lc_call_number']:
identifier= models.Identifier.get_or_add(type = 'lccn', value = book['lc_call_number'], edition = edition, work = edition.work)
identifier = models.Identifier.get_or_add(
type='lccn',
value=book['lc_call_number'],
edition=edition,
work=edition.work
)
user.wishlist.add_work(edition.work, 'librarything', notify=True)
if edition.new:
tasks.populate_edition.delay(edition.isbn_13)
logger.info("Work with isbn %s added to wishlist.", isbn)
except Exception, e:
logger.info ("error adding ISBN %s: %s", isbn, e)
logger.info("error adding ISBN %s: %s", isbn, e)

View File

@ -16,10 +16,10 @@ from .smashwords import SmashwordsScraper
def get_scraper(url):
scrapers = [
PressbooksScraper,
HathitrustScraper,
SpringerScraper,
UbiquityScraper,
SmashwordsScraper,
HathitrustScraper,
BaseScraper,
]
for scraper in scrapers:
@ -52,3 +52,9 @@ def add_by_webpage(url, work=None, user=None):
def add_by_sitemap(url, maxnum=None):
return add_from_bookdatas(scrape_sitemap(url, maxnum=maxnum))
def scrape_language(url):
scraper = get_scraper(url)
return scraper.metadata.get('language')

View File

@ -1,36 +1,48 @@
#!/usr/bin/env python
# encoding: utf-8
import logging
import datetime
import json
import logging
import re
from itertools import islice
import requests
from django.db.models import (Q, F)
from django.db.models import Q
from django.core.files.storage import default_storage
from django.core.files.base import ContentFile
from django.core.files.storage import default_storage
import regluit
from oaipmh.client import Client
from oaipmh.error import IdDoesNotExistError
from oaipmh.metadata import MetadataRegistry, oai_dc_reader
from regluit.core import bookloader, cc
from regluit.core import models, tasks
from regluit.core import bookloader
from regluit.core.bookloader import add_by_isbn, merge_works
from regluit.core.bookloader import merge_works
from regluit.core.isbn import ISBN
from regluit.core.validation import valid_subject
from regluit.core.loaders.utils import type_for_url
from regluit.core.validation import identifier_cleaner, valid_subject
from . import scrape_language
from .doab_utils import doab_lang_to_iso_639_1, online_to_download, url_to_provider
logger = logging.getLogger(__name__)
springercover = re.compile(r'ftp.+springer\.de.+(\d{13}\.jpg)$', flags=re.U)
def unlist(alist):
if not alist:
return None
return alist[0]
SPRINGER_COVER = re.compile(r'ftp.+springer\.de.+(\d{13}\.jpg)$', flags=re.U)
SPRINGER_IMAGE = u'https://images.springer.com/sgw/books/medium/{}.jpg'
def store_doab_cover(doab_id, redo=False):
"""
returns tuple: 1) cover URL, 2) whether newly created (boolean)
"""
cover_file_name= '/doab/%s/cover' % (doab_id)
cover_file_name = '/doab/%s/cover' % (doab_id)
# if we don't want to redo and the cover exists, return the URL of the cover
@ -44,29 +56,31 @@ def store_doab_cover(doab_id, redo=False):
if r.status_code == 302:
redirurl = r.headers['Location']
if redirurl.startswith(u'ftp'):
springerftp = springercover.match(redirurl)
springerftp = SPRINGER_COVER.match(redirurl)
if springerftp:
redirurl = u'https://images.springer.com/sgw/books/medium/{}.jpg'.format(springerftp.groups(1))
redirurl = SPRINGER_IMAGE.format(springerftp.groups(1))
r = requests.get(redirurl)
else:
r = requests.get(url)
else:
r = requests.get(url)
cover_file = ContentFile(r.content)
cover_file.content_type = r.headers.get('content-type', '')
path = default_storage.save(cover_file_name, cover_file)
default_storage.save(cover_file_name, cover_file)
return (default_storage.url(cover_file_name), True)
except Exception, e:
# if there is a problem, return None for cover URL
logger.warning('Failed to make cover image for doab_id={}: {}'.format(doab_id, e))
return (None, False)
def update_cover_doab(doab_id, edition, store_cover=True):
def update_cover_doab(doab_id, edition, store_cover=True, redo=True):
"""
update the cover url for work with doab_id
if store_cover is True, use the cover from our own storage
"""
if store_cover:
(cover_url, new_cover) = store_doab_cover(doab_id)
(cover_url, new_cover) = store_doab_cover(doab_id, redo=redo)
else:
cover_url = "http://www.doabooks.org/doab?func=cover&rid={0}".format(doab_id)
@ -74,7 +88,6 @@ def update_cover_doab(doab_id, edition, store_cover=True):
edition.cover_image = cover_url
edition.save()
return cover_url
else:
return None
def attach_more_doab_metadata(edition, description, subjects,
@ -108,7 +121,7 @@ def attach_more_doab_metadata(edition, description, subjects,
if not work.age_level:
work.age_level = '18-'
if language:
if language and language != 'xx':
work.language = language
work.save()
@ -117,7 +130,7 @@ def attach_more_doab_metadata(edition, description, subjects,
if edition.authors.all().count() < len(authlist):
edition.authors.clear()
if authlist is not None:
for [rel,auth] in authlist:
for [rel, auth] in authlist:
edition.add_author(auth, rel)
return edition
@ -125,7 +138,6 @@ def attach_more_doab_metadata(edition, description, subjects,
def add_all_isbns(isbns, work, language=None, title=None):
first_edition = None
for isbn in isbns:
first_edition = None
edition = bookloader.add_by_isbn(isbn, work, language=language, title=title)
if edition:
first_edition = first_edition if first_edition else edition
@ -136,7 +148,7 @@ def add_all_isbns(isbns, work, language=None, title=None):
work = merge_works(edition.work, work)
else:
work = edition.work
return first_edition
return work, first_edition
def load_doab_edition(title, doab_id, url, format, rights,
language, isbns,
@ -145,9 +157,11 @@ def load_doab_edition(title, doab_id, url, format, rights,
"""
load a record from doabooks.org represented by input parameters and return an ebook
"""
logger.info('load doab {} {} {} {} {}'.format(doab_id, format, rights, language, provider))
if language and isinstance(language, list):
language = language[0]
if language == 'xx' and format == 'online':
language = scrape_language(url)
# check to see whether the Edition hasn't already been loaded first
# search by url
ebooks = models.Ebook.objects.filter(url=url)
@ -168,37 +182,39 @@ def load_doab_edition(title, doab_id, url, format, rights,
raise Exception("There is more than one Ebook matching url {0}".format(url))
elif len(ebooks) == 1:
ebook = ebooks[0]
doab_identifer = models.Identifier.get_or_add(type='doab',value=doab_id,
doab_identifer = models.Identifier.get_or_add(type='doab', value=doab_id,
work=ebook.edition.work)
if not ebook.rights:
ebook.rights = rights
ebook.save()
# update the cover id
cover_url = update_cover_doab(doab_id, ebook.edition)
cover_url = update_cover_doab(doab_id, ebook.edition, redo=False)
# attach more metadata
attach_more_doab_metadata(ebook.edition,
description=kwargs.get('description'),
attach_more_doab_metadata(
ebook.edition,
description=unlist(kwargs.get('description')),
subjects=kwargs.get('subject'),
publication_date=kwargs.get('date'),
publisher_name=kwargs.get('publisher'),
publication_date=unlist(kwargs.get('date')),
publisher_name=unlist(kwargs.get('publisher')),
language=language,
authors=kwargs.get('authors'),)
authors=kwargs.get('creator'),
)
# make sure all isbns are added
add_all_isbns(isbns, None, language=language, title=title)
return ebook
add_all_isbns(isbns, ebook.edition.work, language=language, title=title)
return ebook.edition
# remaining case --> no ebook, load record, create ebook if there is one.
assert len(ebooks) == 0
assert not ebooks
# we need to find the right Edition/Work to tie Ebook to...
# look for the Edition with which to associate ebook.
# loop through the isbns to see whether we get one that is not None
work = None
edition = add_all_isbns(isbns, None, language=language, title=title)
if edition:
edition.refresh_from_db()
work = edition.work
work, edition = add_all_isbns(isbns, None, language=language, title=title)
if doab_id and not work:
# make sure there's not already a doab_id
idents = models.Identifier.objects.filter(type='doab', value=doab_id)
@ -208,9 +224,10 @@ def load_doab_edition(title, doab_id, url, format, rights,
break
if edition is not None:
# if this is a new edition, then add related editions asynchronously
if getattr(edition,'new', False):
tasks.populate_edition.delay(edition.isbn_13)
# if this is a new edition, then add related editions SYNCHRONOUSLY
if getattr(edition, 'new', False):
tasks.populate_edition(edition.isbn_13)
edition.refresh_from_db()
doab_identifer = models.Identifier.get_or_add(type='doab', value=doab_id,
work=edition.work)
@ -245,7 +262,7 @@ def load_doab_edition(title, doab_id, url, format, rights,
work.selected_edition = edition
work.save()
if format in ('pdf', 'epub', 'mobi'):
if format in ('pdf', 'epub', 'mobi', 'html', 'online') and rights:
ebook = models.Ebook()
ebook.format = format
ebook.provider = provider
@ -253,59 +270,27 @@ def load_doab_edition(title, doab_id, url, format, rights,
ebook.rights = rights
# tie the edition to ebook
ebook.edition = edition
if format == "online":
ebook.active = False
ebook.save()
# update the cover id (could be done separately)
cover_url = update_cover_doab(doab_id, edition)
cover_url = update_cover_doab(doab_id, edition, redo=False)
# attach more metadata
attach_more_doab_metadata(edition,
description=kwargs.get('description'),
attach_more_doab_metadata(
edition,
description=unlist(kwargs.get('description')),
subjects=kwargs.get('subject'),
publication_date=kwargs.get('date'),
publisher_name=kwargs.get('publisher'),
authors=kwargs.get('authors'),)
return ebook
publication_date=unlist(kwargs.get('date')),
publisher_name=unlist(kwargs.get('publisher')),
authors=kwargs.get('creator'),
)
return edition
def load_doab_records(fname, limit=None):
success_count = 0
ebook_count = 0
records = json.load(open(fname))
for (i, book) in enumerate(islice(records,limit)):
d = dict(book)
d['isbns'] = split_isbns(d['isbns_raw']) # use stricter isbn string parsing.
try:
ebook = load_doab_edition(**d)
success_count += 1
if ebook:
ebook_count +=1
except Exception, e:
logger.error(e)
logger.error(book)
logger.info("Number of records processed: " + str(success_count))
logger.info("Number of ebooks processed: " + str(ebook_count))
"""
#
#tools to parse the author lists in doab.csv
from pandas import DataFrame
url = "http://www.doabooks.org/doab?func=csv"
df_csv = DataFrame.from_csv(url)
out=[]
for val in df_csv.values:
isbn = split_isbns(val[0])
if isbn:
auths = []
if val[2] == val[2] and val[-2] == val[-2]: # test for NaN auths and licenses
auths = creator_list(val[2])
out.append(( isbn[0], auths))
open("/Users/eric/doab_auths.json","w+").write(json.dumps(out,indent=2, separators=(',', ': ')))
"""
#
au = re.compile(r'\(Authors?\)', flags=re.U)
ed = re.compile(r'\([^\)]*(dir.|[Eeé]ds?.|org.|coord.|Editor|a cura di|archivist)[^\)]*\)', flags=re.U)
@ -326,13 +311,13 @@ def fnf(auth):
if len(parts) == 1:
return parts[0].strip()
elif len(parts) == 2:
return u'{} {}'.format(parts[1].strip(),parts[0].strip())
return u'{} {}'.format(parts[1].strip(), parts[0].strip())
else:
if parts[1].strip() in ('der','van', 'von', 'de', 'ter'):
return u'{} {} {}'.format(parts[2].strip(),parts[1].strip(),parts[0].strip())
if parts[1].strip() in ('der', 'van', 'von', 'de', 'ter'):
return u'{} {} {}'.format(parts[2].strip(), parts[1].strip(), parts[0].strip())
#print auth
#print re.search(namelist,auth).group(0)
return u'{} {}, {}'.format(parts[2].strip(),parts[0].strip(),parts[1].strip())
return u'{} {}, {}'.format(parts[2].strip(), parts[0].strip(), parts[1].strip())
def creator(auth, editor=False):
@ -353,64 +338,96 @@ def creator(auth, editor=False):
auth = au.sub('', auth)
return ['aut', fnf(auth)]
def split_auths(auths):
if ';' in auths or '/' in auths:
return namesep2.split(auths)
else:
nl = namelist.match(auths.strip())
if nl:
if nl.group(3).endswith(' de') \
or ' de ' in nl.group(3) \
or nl.group(3).endswith(' da') \
or nl.group(1).endswith(' Jr.') \
or ' e ' in nl.group(1):
return [auths]
else:
return namesep.split(auths)
else :
return [auths]
def split_isbns(isbns):
result = []
for isbn in isbnsep.split(isbns):
isbn = ISBN(isbn)
if isbn.valid:
result.append(isbn.to_string())
return result
def creator_list(creators):
auths = []
if re.search(edlist, creators):
for auth in split_auths(edlist.sub(u'', creators)):
if auth:
auths.append(creator(auth, editor=True))
else:
for auth in split_auths(unicode(creators)):
if auth:
for auth in creators:
auths.append(creator(auth))
return auths
def load_doab_auths(fname, limit=None):
doab_auths = json.load(open(fname))
recnum = 0
failed = 0
for [isbnraw, authlist] in doab_auths:
isbn = ISBN(isbnraw).to_string()
try:
work = models.Identifier.objects.get(type='isbn',value=isbn).work
except models.Identifier.DoesNotExist:
print 'isbn = {} not found'.format(isbnraw)
failed += 1
if work.preferred_edition.authors.all().count() < len(authlist):
work.preferred_edition.authors.clear()
if authlist is None:
print "null authlist; isbn={}".format(isbn)
continue
for [rel,auth] in authlist:
work.preferred_edition.add_author(auth, rel)
recnum +=1
if limit and recnum > limit:
break
logger.info("Number of records processed: " + str(recnum))
logger.info("Number of missing isbns: " + str(failed))
DOAB_OAIURL = 'https://www.doabooks.org/oai'
DOAB_PATT = re.compile(r'[\./]doabooks\.org/doab\?.*rid:(\d{1,8}).*')
mdregistry = MetadataRegistry()
mdregistry.registerReader('oai_dc', oai_dc_reader)
doab_client = Client(DOAB_OAIURL, mdregistry)
isbn_cleaner = identifier_cleaner('isbn', quiet=True)
ISBNSEP = re.compile(r'[/]+')
def add_by_doab(doab_id, record=None):
try:
record = record if record else doab_client.getRecord(
metadataPrefix='oai_dc',
identifier='oai:doab-books:{}'.format(doab_id)
)
metadata = record[1].getMap()
isbns = []
url = None
for ident in metadata.pop('identifier', []):
if ident.startswith('ISBN: '):
isbn_strings = ISBNSEP.split(ident[6:].strip())
for isbn_string in isbn_strings:
isbn = isbn_cleaner(isbn_string)
if isbn:
isbns.append(isbn)
elif ident.find('doabooks.org') >= 0:
# should already know the doab_id
continue
else:
url = ident
language = doab_lang_to_iso_639_1(unlist(metadata.pop('language', None)))
urls = online_to_download(url)
edition = None
title = unlist(metadata.pop('title', None))
license = cc.license_from_cc_url(unlist(metadata.pop('rights', None)))
for dl_url in urls:
format = type_for_url(dl_url)
if 'format' in metadata:
del metadata['format']
edition = load_doab_edition(
title,
doab_id,
dl_url,
format,
license,
language,
isbns,
url_to_provider(dl_url) if dl_url else None,
**metadata
)
return edition
except IdDoesNotExistError:
return None
def getdoab(url):
id_match = DOAB_PATT.search(url)
if id_match:
return id_match.group(1)
return False
def load_doab_oai(from_year=None, limit=100000):
'''
use oai feed to get oai updates
'''
if from_year:
from_ = datetime.datetime(year=from_year, month=1, day=1)
else:
# last 45 days
from_ = datetime.datetime.now() - datetime.timedelta(days=45)
doab_ids = []
for record in doab_client.listRecords(metadataPrefix='oai_dc', from_=from_):
if not record[1]:
continue
item_type = unlist(record[1].getMap().get('type', None))
if item_type != 'book':
continue
idents = record[1].getMap()['identifier']
if idents:
for ident in idents:
doab = getdoab(ident)
if doab:
doab_ids.append(doab)
e = add_by_doab(doab, record=record)
title = e.title if e else None
logger.info(u'updated:\t{}\t{}'.format(doab, title))
if len(doab_ids) > limit:
break

128
core/loaders/doab_utils.py Normal file
View File

@ -0,0 +1,128 @@
"""
doab_utils.py
"""
import re
import urlparse
import requests
from regluit.utils.lang import get_language_code
from .utils import get_soup
# utility functions for converting lists of individual items into individual items
# let's do a mapping of the DOAB languages into the language codes used
# mostly, we just handle mispellings
# also null -> xx
EXTRA_LANG_MAP = dict([
(u'chinese', 'de'),
(u'deutsch', 'de'),
(u'eng', 'en'),
(u'englilsh', 'en'),
(u'englilsh', 'en'),
(u'englisch', 'en'),
(u'espanol', 'es'),
(u'ger', 'de'),
(u'fra', 'fr'),
(u'fre', 'fr'),
(u'francese', 'fr'),
(u'ita', 'it'),
(u'italiano', 'it'),
(u'norwegian', 'no'),
(u'por', 'pt'),
(u'portugese', 'pt'),
(u'slovene', 'sl'),
(u'spa', 'es'),
(u'spagnolo', 'es'),
])
sep = re.compile(r'[ \-;^,/]+')
def doab_lang_to_iso_639_1(lang):
if lang is None or not lang:
return "xx"
else:
lang = sep.split(lang)[0]
code = get_language_code(lang)
if code:
return code
else:
return EXTRA_LANG_MAP.get(lang.lower(), 'xx')
DOMAIN_TO_PROVIDER = dict([
[u'antropologie.zcu.cz', u'AntropoWeb'],
[u'books.mdpi.com', u'MDPI Books'],
[u'books.openedition.org', u'OpenEdition Books'],
[u'books.scielo.org', u'SciELO'],
[u'ccdigitalpress.org', u'Computers and Composition Digital Press'],
[u'digitalcommons.usu.edu', u'DigitalCommons, Utah State University'],
[u'dl.dropboxusercontent.com', u'Dropbox'],
[u'dspace.ucalgary.ca', u'Institutional Repository at the University of Calgary'],
[u'dx.doi.org', u'DOI Resolver'],
[u'ebooks.iospress.nl', u'IOS Press Ebooks'],
[u'hdl.handle.net', u'Handle Proxy'],
[u'hw.oeaw.ac.at', u'Austrian Academy of Sciences'],
[u'img.mdpi.org', u'MDPI Books'],
[u'ledibooks.com', u'LediBooks'],
[u'leo.cilea.it', u'LEO '],
[u'leo.cineca.it', u'Letteratura Elettronica Online'],
[u'link.springer.com', u'Springer'],
[u'oapen.org', u'OAPEN Library'],
[u'press.openedition.org', u'OpenEdition Press'],
[u'windsor.scholarsportal.info', u'Scholars Portal'],
[u'www.adelaide.edu.au', u'University of Adelaide'],
[u'www.aliprandi.org', u'Simone Aliprandi'],
[u'www.antilia.to.it', u'antilia.to.it'],
[u'www.aupress.ca', u'Athabasca University Press'],
[u'www.bloomsburyacademic.com', u'Bloomsbury Academic'],
[u'www.co-action.net', u'Co-Action Publishing'],
[u'www.degruyter.com', u'De Gruyter Online'],
[u'www.doabooks.org', u'Directory of Open Access Books'],
[u'www.dropbox.com', u'Dropbox'],
[u'www.ebooks.iospress.nl', u'IOS Press Ebooks'],
[u'www.ledizioni.it', u'Ledizioni'],
[u'www.maestrantonella.it', u'maestrantonella.it'],
[u'www.oapen.org', u'OAPEN Library'],
[u'www.openbookpublishers.com', u'Open Book Publishers'],
[u'www.palgraveconnect.com', u'Palgrave Connect'],
[u'www.scribd.com', u'Scribd'],
[u'www.springerlink.com', u'Springer'],
[u'www.ubiquitypress.com', u'Ubiquity Press'],
[u'www.unimib.it', u'University of Milano-Bicocca'],
[u'www.unito.it', u"University of Turin"],
])
def url_to_provider(url):
netloc = urlparse.urlparse(url).netloc
return DOMAIN_TO_PROVIDER.get(netloc, netloc)
FRONTIERSIN = re.compile(r'frontiersin.org/books/[^/]+/(\d+)')
def online_to_download(url):
urls = []
if url.find(u'mdpi.com/books/pdfview/book/') >= 0:
doc = get_soup(url)
if doc:
obj = doc.find('object', type='application/pdf')
if obj:
urls.append(obj['data'].split('#')[0])
elif url.find(u'books.scielo.org/') >= 0:
doc = get_soup(url)
if doc:
obj = doc.find('a', class_='pdf_file')
if obj:
urls.append(urlparse.urljoin(url, obj['href']))
obj = doc.find('a', class_='epub_file')
if obj:
urls.append(urlparse.urljoin(url, obj['href']))
elif FRONTIERSIN.search(url):
booknum = FRONTIERSIN.search(url).group(1)
urls.append(u'https://www.frontiersin.org/GetFile.aspx?ebook={}&fileformat=EPUB'.format(booknum))
urls.append(u'https://www.frontiersin.org/GetFile.aspx?ebook={}&fileformat=PDF'.format(booknum))
else:
urls.append(url)
return urls

View File

@ -26,10 +26,11 @@ class HathitrustScraper(BaseScraper):
for record in records:
self.record = record
return
self.record = {}
self.record = None # probably a hdl not pointing at Hathitrust
self.record = None
def get_downloads(self):
if self.record:
dl_a = self.doc.select_one('#fullPdfLink')
value = dl_a['href'] if dl_a else None
if value:
@ -37,27 +38,42 @@ class HathitrustScraper(BaseScraper):
'download_url_{}'.format('pdf'),
'https://babel.hathitrust.org{}'.format(value)
)
return super(HathitrustScraper, self).get_downloads()
def get_isbns(self):
if self.record:
isbn = self.record.get('issn', [])
value = identifier_cleaner('isbn', quiet=True)(isbn)
return {'print': value} if value else {}
return super(HathitrustScraper, self).get_isbns()
def get_title(self):
if self.record:
self.set('title', self.record.get('title', ''))
return super(HathitrustScraper, self).get_title()
def get_keywords(self):
if self.record:
self.set('subjects', self.record.get('keywords', []))
return super(HathitrustScraper, self).get_keywords()
def get_publisher(self):
if self.record:
self.set('publisher', self.record.get('publisher', ''))
return super(HathitrustScraper, self).get_publisher()
def get_pubdate(self):
if self.record:
self.set('publication_date', self.record.get('year', ''))
return super(HathitrustScraper, self).get_pubdate()
def get_description(self):
if self.record:
notes = self.record.get('notes', [])
self.set('description', '\r'.join(notes))
return super(HathitrustScraper, self).get_description()
def get_genre(self):
if self.record:
self.set('genre', self.record.get('type_of_reference', '').lower())
return super(HathitrustScraper, self).get_genre()

View File

@ -110,10 +110,12 @@ class SpringerScraper(BaseScraper):
self.set('publisher', 'Springer')
search_url = 'https://link.springer.com/search/page/{}?facet-content-type=%22Book%22&package=openaccess'
def load_springer(num_pages):
def springer_open_books(num_pages):
for page in range(1, num_pages+1):
def load_springer(startpage=1, endpage=None):
def springer_open_books(startpage, endpage):
endpage = endpage if endpage else startpage + 10
for page in range(startpage, endpage + 1):
url = search_url.format(page)
try:
response = requests.get(url, headers={"User-Agent": settings.USER_AGENT})
if response.status_code == 200:
base = response.url
@ -121,4 +123,6 @@ def load_springer(num_pages):
for link in doc.select('a.title'):
book_url = urljoin(base, link['href'])
yield SpringerScraper(book_url)
return add_from_bookdatas(springer_open_books(num_pages))
except requests.exceptions.ConnectionError:
print 'couldn\'t connect to %s' % url
return add_from_bookdatas(springer_open_books(startpage, endpage))

28
core/loaders/tests.py Normal file
View File

@ -0,0 +1,28 @@
from django.conf import settings
from django.test import TestCase
from regluit.core.models import Ebook, Edition, Work
from .utils import dl_online
class LoaderTests(TestCase):
def setUp(self):
pass
def test_downloads(self):
if not (settings.TEST_INTEGRATION):
return
work = Work(title="online work")
work.save()
edition = Edition(work=work)
edition.save()
dropbox_url = 'https://www.dropbox.com/s/h5jzpb4vknk8n7w/Jakobsson_The_Troll_Inside_You_EBook.pdf?dl=0'
dropbox_ebook = Ebook.objects.create(format='online', url=dropbox_url, edition=edition)
dropbox_ebf = dl_online(dropbox_ebook)
self.assertTrue(dropbox_ebf.ebook.filesize)
jbe_url = 'http://www.jbe-platform.com/content/books/9789027295958'
jbe_ebook = Ebook.objects.create(format='online', url=jbe_url, edition=edition)
jbe_ebf = dl_online(jbe_ebook)
self.assertTrue(jbe_ebf.ebook.filesize)

View File

@ -1,15 +1,23 @@
import csv
import re
import requests
import logging
import sys
import re
import time
import unicodedata
import urlparse
from bs4 import BeautifulSoup
import requests
from django.conf import settings
from django.core.files.base import ContentFile
from regluit.core.models import Work, Edition, Author, PublisherName, Identifier, Subject
from regluit.core.isbn import ISBN
from regluit.core.bookloader import add_by_isbn_from_google, merge_works
from regluit.api.crosswalks import inv_relator_contrib
from regluit.bisac.models import BisacHeading
from regluit.core.bookloader import add_by_isbn_from_google, merge_works
from regluit.core.isbn import ISBN
from regluit.core.models import (
Ebook, EbookFile, Edition, Identifier, path_for_file, Subject, Work,
)
logger = logging.getLogger(__name__)
@ -34,79 +42,84 @@ def utf8_general_ci_norm(s):
s1 = unicodedata.normalize('NFD', s)
return ''.join(c for c in s1 if not unicodedata.combining(c)).upper()
def get_soup(url):
response = requests.get(url, headers={"User-Agent": settings.USER_AGENT})
if response.status_code == 200:
return BeautifulSoup(response.content, 'lxml')
return None
def get_authors(book):
authors=[]
if book.get('AuthorsList',''):
authors = []
if book.get('AuthorsList', ''):
#UMich
for i in range(1,3):
fname=u'Author{}First'.format(i)
lname=u'Author{}Last'.format(i)
role=u'Author{}Role'.format(i)
authname = u'{} {}'.format(book[fname],book[lname])
for i in range(1, 3):
fname = u'Author{}First'.format(i)
lname = u'Author{}Last'.format(i)
role = u'Author{}Role'.format(i)
authname = u'{} {}'.format(book[fname], book[lname])
if authname != u' ':
role = book[role] if book[role].strip() else 'A01'
authors.append((authname,role))
authors.append((authname, role))
else:
break
authlist = book["AuthorsList"].replace(' and ', ', ').split(', ')
if len(authlist)>3:
if len(authlist) > 3:
for authname in authlist[3:]:
authors.append((authname, 'A01'))
else:
#OBP
for i in range(1,6):
fname= book.get(u'Contributor {} first name'.format(i), '')
lname= book.get(u'Contributor {} surname'.format(i), '')
role= book.get(u'ONIX Role Code (List 17){}'.format(i), '')
authname = u'{} {}'.format(fname,lname)
for i in range(1, 6):
fname = book.get(u'Contributor {} first name'.format(i), '')
lname = book.get(u'Contributor {} surname'.format(i), '')
role = book.get(u'ONIX Role Code (List 17){}'.format(i), '')
authname = u'{} {}'.format(fname, lname)
if authname != u' ':
role = role if role.strip() else 'A01'
authors.append((authname,role))
authors.append((authname, role))
else:
break
return authors
def get_subjects(book):
subjects=[]
for i in range(1,5):
subjects = []
for i in range(1, 5):
key = u'BISACCode{}'.format(i) #UMich dialect
key2 = u'BISAC subject code {}'.format(i) #OBP dialect
code = book.get(key,'')
code = code if code else book.get(key2,'')
code = book.get(key, '')
code = code if code else book.get(key2, '')
if code != '':
try:
bisac=BisacHeading.objects.get(notation=code)
bisac = BisacHeading.objects.get(notation=code)
subjects.append(bisac)
except BisacHeading.DoesNotExist:
logger.warning( "Please add BISAC {}".format(code))
logger.warning("Please add BISAC {}".format(code))
return subjects
def add_subject(subject_name, work, authority=''):
try:
subject= Subject.objects.get(name=subject_name)
subject = Subject.objects.get(name=subject_name)
except Subject.DoesNotExist:
subject=Subject.objects.create(name=subject_name, authority=authority)
subject = Subject.objects.create(name=subject_name, authority=authority)
subject.works.add(work)
def get_title(book):
title = book.get('FullTitle','') #UMICH
title = book.get('FullTitle', '') #UMICH
if title:
return title
title = book.get('Title','') #OBP
sub = book.get('Subtitle','')
title = book.get('Title', '') #OBP
sub = book.get('Subtitle', '')
if sub:
return u'{}: {}'.format(title,sub)
else:
return u'{}: {}'.format(title, sub)
return title
def get_cover(book):
cover_url = book.get('Cover URL','') #OBP
cover_url = book.get('Cover URL', '') #OBP
if cover_url:
return cover_url
url = book['URL']
if "10.3998" in url:
# code for umich books; can generalize, of course!
idmatch= re.search( r'([^/]+)\.(\d+\.\d+\.\d+)', url)
idmatch = re.search(r'([^/]+)\.(\d+\.\d+\.\d+)', url)
if idmatch:
book_id = idmatch.group(2)
if idmatch.group(1) == 'ohp':
@ -116,60 +129,64 @@ def get_cover(book):
else:
cover_url = "http://quod.lib.umich.edu/d/dculture/images/{}.jpg".format(book_id)
cover = requests.head(cover_url)
if cover.status_code<400:
if cover.status_code < 400:
return cover_url
else:
logger.warning( "bad cover: {} for: {}".format(cover_url, url))
logger.warning("bad cover: {} for: {}".format(cover_url, url))
def get_isbns(book):
isbns = []
edition = None
#'ISBN 1' is OBP, others are UMICH
for code in ['eISBN', 'ISBN 3','PaperISBN', 'ISBN 2', 'ClothISBN', 'ISBN 1', 'ISBN 4', 'ISBN 5']:
if book.get(code, '') not in ('','N/A'):
for code in ['eISBN', 'ISBN 3', 'PaperISBN', 'ISBN 2', 'ClothISBN',
'ISBN 1', 'ISBN 4', 'ISBN 5'
]:
if book.get(code, '') not in ('', 'N/A'):
values = book[code].split(',')
for value in values:
isbn = ISBN(value).to_string()
if isbn:
isbns.append(isbn)
for isbn in isbns :
for isbn in isbns:
if not edition:
edition = Edition.get_by_isbn(isbn)
return (isbns, edition )
return (isbns, edition)
def get_pubdate(book):
value = book.get('CopyrightYear','') #UMICH
value = book.get('CopyrightYear', '') #UMICH
if value:
return value
value = book.get('publication year','') #OBP
sub = book.get('publication month','')
sub2 = book.get('publication day','')
value = book.get('publication year', '') #OBP
sub = book.get('publication month', '')
sub2 = book.get('publication day', '')
if sub2:
return u'{}-{}-{}'.format(value,sub,sub2)
return u'{}-{}-{}'.format(value, sub, sub2)
elif sub:
return u'{}-{}'.format(value,sub,sub2)
else:
return u'{}-{}'.format(value, sub, sub2)
return value
def get_publisher(book):
value = book.get('Publisher','')
value = book.get('Publisher', '')
if value:
return value
if book.get('DOI prefix','')=='10.11647':
if book.get('DOI prefix', '') == '10.11647':
return "Open Book Publishers"
def get_url(book):
url = book.get('URL','')
url = url if url else u'https://doi.org/{}/{}'.format( book.get('DOI prefix',''),book.get('DOI suffix',''))
url = book.get('URL', '')
url = url if url else u'https://doi.org/{}/{}'.format(
book.get('DOI prefix', ''),
book.get('DOI suffix', '')
)
return url
def get_description(book):
value = book.get('DescriptionBrief','')
value = value if value else book.get('Plain Text Blurb','')
value = book.get('DescriptionBrief', '')
value = value if value else book.get('Plain Text Blurb', '')
return value
def get_language(book):
value = book.get('ISO Language Code','')
value = book.get('ISO Language Code', '')
return value
@ -194,9 +211,9 @@ def load_from_books(books):
# try first to get an Edition already in DB with by one of the ISBNs in book
(isbns, edition) = get_isbns(book)
if len(isbns)==0:
if not isbns:
continue
title=get_title(book)
title = get_title(book)
authors = get_authors(book)
# if matching by ISBN doesn't work, then create a Work and Edition
@ -204,11 +221,11 @@ def load_from_books(books):
if not edition:
work = Work(title=title)
work.save()
edition= Edition(title=title, work=work)
edition = Edition(title=title, work=work)
edition.save()
Identifier.set(type='isbn', value=isbns[0], edition=edition, work=work)
work=edition.work
work = edition.work
# at this point, work and edition exist
url = get_url(book)
@ -222,7 +239,7 @@ def load_from_books(books):
if edition and edition.work != work:
work = merge_works(work, edition.work)
if not edition:
edition= Edition(title=title, work=work)
edition = Edition(title=title, work=work)
edition.save()
Identifier.set(type='isbn', value=isbn, edition=edition, work=work)
@ -236,12 +253,12 @@ def load_from_books(books):
# possibly replace work.description
description = get_description(book)
if len(description)>len (work.description):
if len(description) > len(work.description):
work.description = description
work.save()
# set language
lang= get_language(book)
lang = get_language(book)
if lang:
work.language = lang
work.save()
@ -258,9 +275,9 @@ def load_from_books(books):
results.append((book, work, edition))
try:
logger.info (u"{} {} {}\n".format(i, title, loading_ok))
logger.info(u"{} {} {}\n".format(i, title, loading_ok))
except Exception as e:
logger.info (u"{} {}\n".format(i, title, str(e) ))
logger.info(u"{} {} {}\n".format(i, title, str(e)))
return results
@ -277,10 +294,10 @@ def loaded_book_ok(book, work, edition):
try:
url_id = Identifier.objects.get(type='http', value=get_url(book))
if url_id is None:
logger.info ("url_id problem: work.id {}, url: {}".format(work.id, get_url(book)))
logger.info("url_id problem: work.id {}, url: {}".format(work.id, get_url(book)))
return False
except Exception as e:
logger.info (str(e))
logger.info(str(e))
return False
# isbns
@ -292,15 +309,17 @@ def loaded_book_ok(book, work, edition):
try:
edition_for_isbn = Identifier.objects.get(type='isbn', value=isbn).edition
except Exception as e:
print (e)
logger.info(e)
return False
# authors
# print set([ed.name for ed in edition_for_isbn.authors.all()])
if (set([utf8_general_ci_norm(author[0]) for author in authors]) !=
set([utf8_general_ci_norm(ed.name) for ed in edition_for_isbn.authors.all()])):
print "problem with authors"
if (
set([utf8_general_ci_norm(author[0]) for author in authors]) !=
set([utf8_general_ci_norm(ed.name) for ed in edition_for_isbn.authors.all()])
):
logger.info("problem with authors")
return False
try:
@ -312,7 +331,7 @@ def loaded_book_ok(book, work, edition):
# work description
description = get_description(book)
if not ((work.description == description) or (len(description) <len (work.description))):
if not ((work.description == description) or (len(description) < len(work.description))):
return False
# bisac
@ -331,14 +350,15 @@ def loaded_book_ok(book, work, edition):
return True
ID_URLPATTERNS = {
'goog': re.compile(r'[\./]google\.com/books\?.*id=([a-zA-Z0-9\-_]{12})'),
'olwk': re.compile(r'[\./]openlibrary\.org(/works/OL\d{1,8}W)'),
'gdrd': re.compile(r'[\./]goodreads\.com/book/show/(\d{1,8})'),
'ltwk': re.compile(r'[\./]librarything\.com/work/(\d{1,8})'),
'oclc': re.compile(r'\.worldcat\.org/.*oclc/(\d{8,12})'),
'doi': re.compile(r'[\./]doi\.org/(10\.\d+/\S+)'),
'gtbg': re.compile(r'[\./]gutenberg\.org/ebooks/(\d{1,6})'),
'glue': re.compile(r'[\./]unglue\.it/work/(\d{1,7})'),
'goog': re.compile(r'[\./]google\.com/books\?.*id=(?P<id>[a-zA-Z0-9\-_]{12})'),
'olwk': re.compile(r'[\./]openlibrary\.org(?P<id>/works/OL\d{1,8}W)'),
'doab': re.compile(r'([\./]doabooks\.org/doab\?.*rid:|=oai:doab-books:)(?P<id>\d{1,8})'),
'gdrd': re.compile(r'[\./]goodreads\.com/book/show/(?P<id>\d{1,8})'),
'ltwk': re.compile(r'[\./]librarything\.com/work/(?P<id>\d{1,8})'),
'oclc': re.compile(r'\.worldcat\.org/.*oclc/(?P<id>\d{8,12})'),
'doi': re.compile(r'[\./]doi\.org/(?P<id>10\.\d+/\S+)'),
'gtbg': re.compile(r'[\./]gutenberg\.org/ebooks/(?P<id>\d{1,6})'),
'glue': re.compile(r'[\./]unglue\.it/work/(?P<id>\d{1,7})'),
}
def ids_from_urls(url):
@ -346,7 +366,128 @@ def ids_from_urls(url):
for ident in ID_URLPATTERNS.keys():
id_match = ID_URLPATTERNS[ident].search(url)
if id_match:
ids[ident] = id_match.group(1)
ids[ident] = id_match.group('id')
return ids
DROPBOX_DL = re.compile(r'"(https://dl.dropboxusercontent.com/content_link/[^"]+)"')
def dl_online(ebook):
if ebook.format != 'online':
pass
elif ebook.url.find(u'dropbox.com/s/') >= 0:
response = requests.get(ebook.url, headers={"User-Agent": settings.USER_AGENT})
if response.status_code == 200:
match_dl = DROPBOX_DL.search(response.content)
if match_dl:
return make_dl_ebook(match_dl.group(1), ebook)
else:
logger.warning('couldn\'t get {}'.format(ebook.url))
else:
logger.warning('couldn\'t get dl for {}'.format(ebook.url))
elif ebook.url.find(u'jbe-platform.com/content/books/') >= 0:
doc = get_soup(ebook.url)
if doc:
obj = doc.select_one('div.fulltexticoncontainer-PDF a')
if obj:
dl_url = urlparse.urljoin(ebook.url, obj['href'])
return make_dl_ebook(dl_url, ebook)
else:
logger.warning('couldn\'t get dl_url for {}'.format(ebook.url))
else:
logger.warning('couldn\'t get soup for {}'.format(ebook.url))
return None, False
def make_dl_ebook(url, ebook):
if EbookFile.objects.filter(source=ebook.url):
return EbookFile.objects.filter(source=ebook.url)[0], False
response = requests.get(url, headers={"User-Agent": settings.USER_AGENT})
if response.status_code == 200:
filesize = int(response.headers.get("Content-Length", 0))
filesize = filesize if filesize else None
format = type_for_url(url, content_type=response.headers.get('content-type'))
if format != 'online':
new_ebf = EbookFile.objects.create(
edition=ebook.edition,
format=format,
source=ebook.url,
)
new_ebf.file.save(path_for_file(new_ebf, None), ContentFile(response.content))
new_ebf.save()
new_ebook = Ebook.objects.create(
edition=ebook.edition,
format=format,
provider='Unglue.it',
url=new_ebf.file.url,
rights=ebook.rights,
filesize=filesize,
version_label=ebook.version_label,
version_iter=ebook.version_iter,
)
new_ebf.ebook = new_ebook
new_ebf.save()
return new_ebf, True
else:
logger.warning('download format for {} is not ebook'.format(url))
else:
logger.warning('couldn\'t get {}'.format(url))
return None, False
def type_for_url(url, content_type=None):
if not url:
return ''
if url.find('books.openedition.org') >= 0:
return 'online'
if Ebook.objects.filter(url=url):
return Ebook.objects.filter(url=url)[0].format
ct = content_type if content_type else contenttyper.calc_type(url)
if re.search("pdf", ct):
return "pdf"
elif re.search("octet-stream", ct) and re.search("pdf", url, flags=re.I):
return "pdf"
elif re.search("octet-stream", ct) and re.search("epub", url, flags=re.I):
return "epub"
elif re.search("text/plain", ct):
return "text"
elif re.search("text/html", ct):
if url.find('oapen.org/view') >= 0:
return "html"
return "online"
elif re.search("epub", ct):
return "epub"
elif re.search("mobi", ct):
return "mobi"
return "other"
class ContentTyper(object):
""" """
def __init__(self):
self.last_call = dict()
def content_type(self, url):
try:
r = requests.head(url)
return r.headers.get('content-type', '')
except:
return ''
def calc_type(self, url):
delay = 1
# is there a delay associated with the url
netloc = urlparse.urlparse(url).netloc
# wait if necessary
last_call = self.last_call.get(netloc)
if last_call is not None:
now = time.time()
min_time_next_call = last_call + delay
if min_time_next_call > now:
time.sleep(min_time_next_call-now)
self.last_call[netloc] = time.time()
# compute the content-type
return self.content_type(url)
contenttyper = ContentTyper()

View File

@ -5,18 +5,18 @@ from regluit.core.models import Work
from regluit.core.loaders.doab import update_cover_doab
class Command(BaseCommand):
help = "make covers for doab editions"
help = "make covers for doab editions with bad covers"
def handle(self, **options):
works = Work.objects.filter(selected_edition__isnull=False, selected_edition__cover_image__isnull=True)
#.filter(selected_edition__isnull=False, selected_edition__cover_image__isnull=True)
#.exclude(selected_edition__identifiers__type='goog')
added = 0
for (i, work) in enumerate(works):
if work.doab and work.selected_edition.googlebooks_id == '':
update_cover_doab(work.doab, work.selected_edition)
added += 1
print ('\r {}:{}'.format(i, added), end='')
print('added {} covers'.format(added))
works = Work.objects.filter(identifiers__type='doab').distinct()
print('checking {} works with doab'.format(works.count()))
num = 0
for work in works:
if not work.cover_image_thumbnail():
update_cover_doab(work.doab, work.preferred_edition, store_cover=True)
#print(work.doab)
num += 1
if num % 10 == 0:
print('{} doab covers updated'.format(num))
#break
print('Done: {} doab covers updated'.format(num))

View File

@ -1,6 +1,7 @@
from django.core.management.base import BaseCommand
from regluit.core.models import Subject
from regluit.core.validation import valid_subject
@ -27,3 +28,8 @@ class Command(BaseCommand):
for work in subject.works.all():
Subject.set_by_name(subject.name, work=work)
subject.delete()
period_subjects = Subject.objects.filter(name__contains=".")
for subject in period_subjects:
if not valid_subject(subject.name):
subject.delete()

View File

@ -1,17 +0,0 @@
import os
from django.conf import settings
from django.contrib.auth.models import User
from django.core.management.base import BaseCommand
from regluit.core.loaders import doab
class Command(BaseCommand):
help = "load doab auths"
args = "<limit> <file_name>"
def handle(self, limit=None, file_name="../../../bookdata/doab_auths.json", **options):
command_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(command_dir, file_name)
doab.load_doab_auths(file_path, limit=int(limit) if limit else None)

View File

@ -1,17 +0,0 @@
import os
from django.conf import settings
from django.contrib.auth.models import User
from django.core.management.base import BaseCommand
from regluit.core.loaders import doab
class Command(BaseCommand):
help = "load doab books"
args = "<limit> <file_name>"
def handle(self, limit=None, file_name="../../../bookdata/doab.json", **options):
command_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(command_dir, file_name)
doab.load_doab_records(file_path, limit=int(limit))

View File

@ -0,0 +1,21 @@
from django.core.management.base import BaseCommand
from regluit.core.loaders.utils import dl_online
from regluit.core.models import Ebook
class Command(BaseCommand):
help = "harvest downloadable ebooks from 'online' ebooks"
args = "<limit>"
def handle(self, limit=0, **options):
limit = int(limit) if limit else 0
onlines = Ebook.objects.filter(format='online')
done = 0
for online in onlines:
new_ebf, new = dl_online(online)
if new_ebf and new:
done += 1
if done > limit:
break
print 'harvested {} ebooks'.format(done)

View File

@ -30,9 +30,9 @@ class Command(BaseCommand):
books = []
for sitemap in content:
added = add_by_sitemap(sitemap.strip(), maxnum=max)
max = max - len(added)
max = max - len(added) if max else max
books = books + added
if max < 0:
if max and max < 0:
break
else:
books = add_by_sitemap(url, maxnum=max)

View File

@ -4,9 +4,9 @@ from regluit.core.loaders.springer import load_springer
class Command(BaseCommand):
help = "load books from springer open"
args = "<pages>"
args = "<startpage> <endpage>"
def handle(self, pages, **options):
books = load_springer(int(pages))
def handle(self, startpage, endpage=0, **options):
books = load_springer(int(startpage), int(endpage))
print "loaded {} books".format(len(books))

View File

@ -0,0 +1,10 @@
from django.core.management.base import BaseCommand
from regluit.core.loaders import doab
class Command(BaseCommand):
help = "load doab books by doab_id via oai"
args = "<doab_id>"
def handle(self, doab_id, **options):
doab.add_by_doab(doab_id)

View File

@ -0,0 +1,18 @@
from django.core.management.base import BaseCommand
from regluit.core.loaders import doab
class Command(BaseCommand):
help = "load doab books via oai"
args = "<from_year> <limit>"
def handle(self, from_year= None, limit=None, **options):
from_year = int(from_year) if from_year else None
limit = int(limit) if limit else None
if limit:
doab.load_doab_oai(from_year=from_year, limit=limit)
else:
if from_year:
doab.load_doab_oai(from_year=from_year)
else:
doab.load_doab_oai()

View File

@ -4,9 +4,9 @@ from random import randint, randrange
from django.conf import settings
from django.core.management.base import BaseCommand
from django.utils.timezone import now
from regluit.core.models import Work, Campaign
from regluit.utils.localdatetime import now
class Command(BaseCommand):
help = "creates random campaigns for any works that lack one for testing"

View File

@ -26,6 +26,7 @@ from django.core.files.base import ContentFile
from django.db import models
from django.db.models import F, Q
from django.db.models.signals import post_save
from django.utils.timezone import now
from django.utils.translation import ugettext_lazy as _
#regluit imports
@ -45,8 +46,9 @@ from regluit.payment.parameters import (
TRANSACTION_STATUS_FAILED,
TRANSACTION_STATUS_INCOMPLETE
)
from regluit.utils import encryption as crypto
from regluit.utils.localdatetime import now, date_today
from regluit.utils.localdatetime import date_today
from regluit.core.parameters import (
REWARDS,

View File

@ -20,10 +20,10 @@ from django.core.urlresolvers import reverse
from django.db import models
from django.db.models import F
from django.db.models.signals import post_save, pre_delete
from django.utils.timezone import now
import regluit
from regluit.marc.models import MARCRecord as NewMARC
from regluit.utils.localdatetime import now
from questionnaire.models import Landing
from regluit.core import mobi
@ -1082,8 +1082,7 @@ class EbookFile(models.Model):
asking=self.asking,
source=self.file.url
)
new_mobi_ebf.file.save(path_for_file('ebf', None), mobi_cf)
new_mobi_ebf.file.save(path_for_file(new_mobi_ebf, None), mobi_cf)
new_mobi_ebf.save()
if self.ebook:
new_ebook = Ebook.objects.create(

View File

@ -42,7 +42,7 @@ OTHER_ID_CHOICES = (
('edid', 'pragmatic edition ID'),
)
WORK_IDENTIFIERS = ('doi','olwk','glue','ltwk', 'http')
WORK_IDENTIFIERS = ('doi','olwk','glue','ltwk', 'http', 'doab')
ID_CHOICES_MAP = dict(ID_CHOICES)

View File

@ -22,6 +22,7 @@ from django.db.utils import DatabaseError
from django.dispatch import Signal
from django.utils.translation import ugettext_noop as _
from django.template.loader import render_to_string
from django.utils.timezone import now
from notification import models as notification
@ -29,9 +30,9 @@ from notification import models as notification
regluit imports
"""
from regluit.payment.signals import transaction_charged, transaction_failed, pledge_modified, pledge_created
from regluit.utils.localdatetime import now, date_today
from regluit.core.parameters import REWARDS, BUY2UNGLUE, THANKS, LIBRARY, RESERVE, THANKED
from regluit.libraryauth.models import Library, LibraryUser
from regluit.utils.localdatetime import date_today
logger = logging.getLogger(__name__)
@ -100,7 +101,7 @@ def create_notice_types( **kwargs):
notification.create_notice_type("purchase_notgot_gift", _("Your gift wasn't received."), _("The ebook you sent as a gift has not yet been redeemed."))
notification.create_notice_type("donation", _("Your donation was processed."), _("Thank you, your generous donation has been processed."))
signals.post_syncdb.connect(create_notice_types, sender=notification)
signals.post_migrate.connect(create_notice_types, sender=notification)
# define the notifications and tie them to corresponding signals

View File

@ -13,6 +13,7 @@ django imports
from django.conf import settings
from django.contrib.auth.models import User
from django.core.mail import send_mail
from django.utils.timezone import now
from notification.engine import send_all
from notification import models as notification
@ -29,8 +30,7 @@ from regluit.core import (
from regluit.core.models import Campaign, Acq, Gift
from regluit.core.signals import deadline_impending
from regluit.core.parameters import RESERVE, REWARDS, THANKS
from regluit.utils.localdatetime import now, date_today
from regluit.utils.localdatetime import date_today
logger = logging.getLogger(__name__)

View File

@ -1,25 +1,22 @@
# encoding: utf-8
"""
external library imports
"""
#external library imports
import os
from datetime import datetime, timedelta
from decimal import Decimal as D
from math import factorial
from time import sleep, mktime
import unittest
from urlparse import parse_qs, urlparse
from tempfile import NamedTemporaryFile
from celery.task import chord
from time import sleep, mktime
from celery.task.sets import TaskSet
import requests
import requests_mock
import os
"""
django imports
"""
#django imports
from django.conf import settings
from django.contrib.auth.models import User
from django_comments.models import Comment
from django.contrib.contenttypes.models import ContentType
from django.contrib.sites.models import Site
from django.core.files import File as DjangoFile
@ -29,11 +26,12 @@ from django.http import Http404
from django.test import TestCase
from django.test.client import Client
from django.test.utils import override_settings
from django.utils import unittest
from django.utils.timezone import now
from django_comments.models import Comment
#regluit imports
"""
regluit imports
"""
from regluit.core import (
isbn,
bookloader,
@ -56,7 +54,6 @@ from regluit.core.models import (
Premium,
Subject,
Publisher,
PublisherName,
Offer,
EbookFile,
Acq,
@ -69,17 +66,17 @@ from regluit.core.validation import valid_subject
from regluit.frontend.views import safe_get_work
from regluit.payment.models import Transaction
from regluit.payment.parameters import PAYMENT_TYPE_AUTHORIZATION
from regluit.utils.localdatetime import now, date_today
from regluit.pyepub import EPUB
from regluit.utils.localdatetime import date_today
from .epub import test_epub
from .pdf import ask_pdf, test_pdf
from .pdf import test_pdf
TESTDIR = os.path.join(os.path.dirname(__file__), '../test/')
YAML_VERSIONFILE = os.path.join(TESTDIR, 'versiontest.yaml')
YAML_HUCKFILE = os.path.join(TESTDIR, 'raw/master/metadata.yaml')
class BookLoaderTests(TestCase):
fixtures = ['initial_data.json','bookloader.json']
fixtures = ['initial_data.json', 'bookloader.json']
def setUp(self):
self.user = User.objects.create_user('core_test', 'test@example.org', 'core_test')
@ -90,23 +87,27 @@ class BookLoaderTests(TestCase):
noebook_id = bookloader.load_from_yaml(YAML_VERSIONFILE)
noebook = models.Work.objects.get(id=noebook_id)
self.assertEqual( noebook.first_ebook(), None)
self.assertEqual(noebook.first_ebook(), None)
huck_id = bookloader.load_from_yaml(YAML_HUCKFILE, test_mode=True)
huck = models.Work.objects.get(id=huck_id)
self.assertTrue( huck.ebooks().count()>1)
self.assertTrue(huck.ebooks().count() > 1)
def test_add_by_yaml(self):
space_id = bookloader.load_from_yaml('https://github.com/gitenberg-dev/metadata/raw/master/samples/pandata.yaml')
huck_id = bookloader.load_from_yaml('https://github.com/GITenberg/Adventures-of-Huckleberry-Finn_76/raw/master/metadata.yaml')
space_id = bookloader.load_from_yaml(
'https://github.com/gitenberg-dev/metadata/raw/master/samples/pandata.yaml'
)
huck_id = bookloader.load_from_yaml(
'https://github.com/GITenberg/Adventures-of-Huckleberry-Finn_76/raw/master/metadata.yaml'
)
space = models.Work.objects.get(id=space_id)
huck = models.Work.objects.get(id=huck_id)
#test ebook archiving
num_ebf= EbookFile.objects.all().count()
num_ebf = EbookFile.objects.all().count()
for ebook in huck.ebooks().all():
f = ebook.get_archive()
self.assertTrue(EbookFile.objects.all().count()>num_ebf)
self.assertTrue(EbookFile.objects.all().count() > num_ebf)
def test_add_by_isbn_mock(self):
with requests_mock.Mocker(real_http=True) as m:
@ -162,7 +163,7 @@ class BookLoaderTests(TestCase):
if not (mocking or settings.TEST_INTEGRATION):
return
edition = bookloader.add_by_isbn('9787104030126')
self.assertEqual(edition.work.language, 'zh-CN')
self.assertEqual(edition.work.language, u'zh-CN')
def test_update_edition_mock(self):
with requests_mock.Mocker(real_http=True) as m:
@ -175,7 +176,7 @@ class BookLoaderTests(TestCase):
return
w = models.Work(title='silly title', language='xx')
w.save()
e = models.Edition(title=w.title,work=w)
e = models.Edition(title=w.title, work=w)
e.save()
models.Identifier(type='isbn', value='9781449319793', work=w, edition=e).save()
bookloader.update_edition(e)
@ -192,7 +193,7 @@ class BookLoaderTests(TestCase):
self.assertEqual(models.Work.objects.all().count(), before)
def test_missing_isbn(self):
e = bookloader.add_by_isbn_from_google('0139391401')
e = bookloader.add_by_isbn_from_google('9781938616990') #unassigned in gluejar block
self.assertEqual(e, None)
def test_thingisbn_mock(self):
@ -211,10 +212,11 @@ class BookLoaderTests(TestCase):
def test_add_related(self):
# add one edition
edition = bookloader.add_by_isbn('0441007465') #Neuromancer; editions in fixture but not joined
#Neuromancer; editions in fixture not joined
edition = bookloader.add_by_isbn('0441007465')
edbefore = models.Edition.objects.count()
before = models.Work.objects.count()
lang=edition.work.language
lang = edition.work.language
langbefore = models.Work.objects.filter(language=lang).count()
# ask for related editions to be added using the work we just created
with requests_mock.Mocker(real_http=True) as m:
@ -252,12 +254,13 @@ class BookLoaderTests(TestCase):
def test_merge_works_mechanics(self):
"""Make sure then merge_works is still okay when we try to merge works with themselves and with deleted works"""
"""Make sure then merge_works is still okay when we try to merge
works with themselves and with deleted works"""
before = models.Work.objects.count()
wasbefore = models.WasWork.objects.count()
sub1= Subject(name='test1')
sub1 = Subject(name='test1')
sub1.save()
sub2= Subject(name='test2')
sub2 = Subject(name='test2')
sub2.save()
w1 = Work(title="Work 1")
w1.save()
@ -265,7 +268,7 @@ class BookLoaderTests(TestCase):
w2 = Work(title="Work 2")
w2.save()
w2.subjects.add(sub1,sub2)
w2.subjects.add(sub1, sub2)
e1 = Edition(work=w1)
e1.save()
@ -273,7 +276,7 @@ class BookLoaderTests(TestCase):
e2 = Edition(work=w2)
e2.save()
eb1 = Ebook(edition = e2)
eb1 = Ebook(edition=e2)
eb1.save()
e2a = Edition(work=w2)
@ -293,7 +296,7 @@ class BookLoaderTests(TestCase):
w2_id = w2.id
# first try to merge work 1 into itself -- should not do anything
bookloader.merge_works(w1,w1)
bookloader.merge_works(w1, w1)
self.assertEqual(models.Work.objects.count(), before + 2)
# merge the second work into the first
@ -319,11 +322,11 @@ class BookLoaderTests(TestCase):
self.assertEqual(r.status_code, 200)
# if the work has a selected edition, then don't touch the work.
w3= Work(title='work 3')
e_pref= Edition(work=w1)
w1.selected_edition=e_pref
w3 = Work(title='work 3')
e_pref = Edition(work=w1)
w1.selected_edition = e_pref
bookloader.merge_works(w3, w1)
self.assertTrue(w1.title=='Work 1')
self.assertTrue(w1.title == 'Work 1')
def test_merge_works(self):
before = models.Work.objects.count()
@ -398,8 +401,8 @@ class BookLoaderTests(TestCase):
w3 = models.Edition.get_by_isbn(isbn1).work
# and that relevant Campaigns and Wishlists are updated
c1=Campaign.objects.get(pk=c1.pk)
c2=Campaign.objects.get(pk=c2.pk)
c1 = Campaign.objects.get(pk=c1.pk)
c2 = Campaign.objects.get(pk=c2.pk)
self.assertEqual(c1.work, c2.work)
self.assertEqual(user.wishlist.works.all().count(), 1)
@ -417,20 +420,19 @@ class BookLoaderTests(TestCase):
with open(os.path.join(TESTDIR, 'gb_latinlanguage.json')) as gb:
m.get('https://www.googleapis.com/books/v1/volumes', content=gb.read())
edition = bookloader.add_by_oclc('1246014')
# we've seen the public domain status of this book fluctuate -- and the OCLC number can disappear. So if the ebook count is 2 then test
# we've seen the public domain status of this book fluctuate -- and the OCLC
# number can disappear. So if the ebook count is 2 then test
#if edition is not None and edition.ebooks.count() == 2:
self.assertEqual(edition.ebooks.count(), 2)
#ebook_epub = edition.ebooks.all()[0]
ebook_epub = edition.ebooks.filter(format='epub')[0]
self.assertEqual(ebook_epub.format, 'epub')
#self.assertEqual(ebook_epub.url, 'http://books.google.com/books/download/The_Latin_language.epub?id=N1RfAAAAMAAJ&ie=ISO-8859-1&output=epub&source=gbs_api')
self.assertEqual(parse_qs(urlparse(ebook_epub.url).query).get("id"), ['N1RfAAAAMAAJ'])
self.assertEqual(parse_qs(urlparse(ebook_epub.url).query).get("output"), ['epub'])
self.assertEqual(ebook_epub.provider, 'Google Books')
self.assertEqual(ebook_epub.set_provider(), 'Google Books')
ebook_pdf = edition.ebooks.filter(format='pdf')[0]
self.assertEqual(ebook_pdf.format, 'pdf')
#self.assertEqual(ebook_pdf.url, 'http://books.google.com/books/download/The_Latin_language.pdf?id=N1RfAAAAMAAJ&ie=ISO-8859-1&output=pdf&sig=ACfU3U2yLt3nmTncB8ozxOWUc4iHKUznCA&source=gbs_api')
self.assertEqual(parse_qs(urlparse(ebook_pdf.url).query).get("id"), ['N1RfAAAAMAAJ'])
self.assertEqual(parse_qs(urlparse(ebook_pdf.url).query).get("output"), ['pdf'])
self.assertEqual(ebook_pdf.provider, 'Google Books')
@ -441,12 +443,12 @@ class BookLoaderTests(TestCase):
self.assertEqual(w.first_epub_url(), ebook_epub.url)
self.assertEqual(w.first_pdf_url(), ebook_pdf.url)
ebook_pdf.url='https://en.wikisource.org/wiki/Frankenstein'
ebook_pdf.url = 'https://en.wikisource.org/wiki/Frankenstein'
self.assertEqual(ebook_pdf.set_provider(), 'Wikisource')
self.user.wishlist.add_work(w, 'test')
tasks.report_new_ebooks(date_today())
r = self.client.get("/notification/" )
r = self.client.get("/notification/")
self.assertEqual(r.status_code, 200)
ebook_pdf.increment()
@ -471,9 +473,9 @@ class BookLoaderTests(TestCase):
subjects = [s.name for s in work.subjects.all()]
self.assertTrue(len(subjects) > 10)
self.assertTrue('Science fiction' in subjects)
self.assertTrue('/works/OL27258W' in work.identifiers.filter(type='olwk').values_list('value',flat=True) )
self.assertTrue('888628' in work.identifiers.filter(type='gdrd').values_list('value',flat=True))
self.assertTrue('609' in work.identifiers.filter(type='ltwk').values_list('value',flat=True))
self.assertTrue('/works/OL27258W' in work.identifiers.filter(type='olwk').values_list('value', flat=True))
self.assertTrue('888628' in work.identifiers.filter(type='gdrd').values_list('value', flat=True))
self.assertTrue('609' in work.identifiers.filter(type='ltwk').values_list('value', flat=True))
def test_unicode_openlibrary(self):
with requests_mock.Mocker(real_http=True) as m:
@ -493,10 +495,21 @@ class BookLoaderTests(TestCase):
license = 'https://www.gutenberg.org/license'
lang = 'en'
format = 'epub'
publication_date = datetime(2001,7,1)
seed_isbn = '9780142000083' # https://www.amazon.com/Moby-Dick-Whale-Penguin-Classics-Deluxe/dp/0142000086
publication_date = datetime(2001, 7, 1)
# https://www.amazon.com/Moby-Dick-Whale-Penguin-Classics-Deluxe/dp/0142000086
seed_isbn = '9780142000083'
ebook = bookloader.load_gutenberg_edition(title, gutenberg_etext_id, ol_work_id, seed_isbn, epub_url, format, license, lang, publication_date)
ebook = bookloader.load_gutenberg_edition(
title,
gutenberg_etext_id,
ol_work_id,
seed_isbn,
epub_url,
format,
license,
lang,
publication_date
)
self.assertEqual(ebook.url, epub_url)
def tearDown(self):
@ -506,8 +519,13 @@ class BookLoaderTests(TestCase):
class SearchTests(TestCase):
def test_search_mock(self):
with requests_mock.Mocker(real_http=True) as m:
with open(os.path.join(TESTDIR, 'gb_melville.json')) as gb, open(os.path.join(TESTDIR, 'gb_melville2.json')) as gb2:
m.get('https://www.googleapis.com/books/v1/volumes', [{'content':gb2.read()}, {'content':gb.read()}])
with open(
os.path.join(TESTDIR, 'gb_melville.json')
) as gb, open(os.path.join(TESTDIR, 'gb_melville2.json')) as gb2:
m.get(
'https://www.googleapis.com/books/v1/volumes',
[{'content':gb2.read()}, {'content':gb.read()}]
)
self.test_pagination(mocking=True)
self.test_basic_search(mocking=True)
self.test_googlebooks_search(mocking=True)
@ -523,7 +541,10 @@ class SearchTests(TestCase):
self.assertTrue(r.has_key('author'))
self.assertTrue(r.has_key('description'))
self.assertTrue(r.has_key('cover_image_thumbnail'))
self.assertTrue(r['cover_image_thumbnail'].startswith('https') or r['cover_image_thumbnail'].startswith('http'))
self.assertTrue(
r['cover_image_thumbnail'].startswith('https')
or r['cover_image_thumbnail'].startswith('http')
)
self.assertTrue(r.has_key('publisher'))
self.assertTrue(r.has_key('isbn_13'))
self.assertTrue(r.has_key('googlebooks_id'))
@ -556,19 +577,19 @@ class CampaignTests(TestCase):
work=w, type=2,
cc_date_initial=datetime(this_year + 100, 1, 1),
)
self.assertTrue(c.set_dollar_per_day()<0.34)
self.assertTrue(c.dollar_per_day>0.31)
t = Transaction(type=1, campaign=c, approved=True, amount= D(6000.1), status="Complete")
self.assertTrue(c.set_dollar_per_day() < 0.34)
self.assertTrue(c.dollar_per_day > 0.31)
t = Transaction(type=1, campaign=c, approved=True, amount=D(6000.1), status="Complete")
t.save()
c.status = 'ACTIVE'
c.save()
c.update_left()
#print(w.percent_of_goal())
self.assertEqual(w.percent_unglued(),3)
self.assertTrue(w.percent_of_goal()>49)
ofr = Offer.objects.create(work=w,price=D(10),active=True)
self.assertTrue(c.days_per_copy <D(32.26))
self.assertTrue(c.days_per_copy >D(29.41))
self.assertEqual(w.percent_unglued(), 3)
self.assertTrue(w.percent_of_goal() > 49)
ofr = Offer.objects.create(work=w, price=D(10), active=True)
self.assertTrue(c.days_per_copy < D(32.26))
self.assertTrue(c.days_per_copy > D(29.41))
def test_required_fields(self):
# a campaign must have a target, deadline and a work
@ -601,20 +622,25 @@ class CampaignTests(TestCase):
w2 = Work()
w2.save()
# INITIALIZED
c1 = Campaign(target=D('1000.00'),deadline=Campaign.latest_ending(),work=w)
c1 = Campaign(target=D('1000.00'), deadline=Campaign.latest_ending(), work=w)
c1.save()
self.assertEqual(c1.status, 'INITIALIZED')
# ACTIVATED
c2 = Campaign(target=D('1000.00'),deadline=datetime(2013,1,1),work=w,description='dummy description')
c2 = Campaign(
target=D('1000.00'),
deadline=datetime(2013, 1, 1),
work=w,
description='dummy description'
)
c2.save()
self.assertEqual(c2.status, 'INITIALIZED')
u = User.objects.create_user('claimer', 'claimer@example.org', 'claimer')
u.save()
rh = RightsHolder(owner = u, rights_holder_name = 'rights holder name')
rh = RightsHolder(owner=u, rights_holder_name='rights holder name')
rh.save()
cl = Claim(rights_holder = rh, work = w, user = u, status = 'active')
cl = Claim(rights_holder=rh, work=w, user=u, status='active')
cl.save()
cl2 = Claim(rights_holder = rh, work = w2, user = u, status = 'active')
cl2 = Claim(rights_holder=rh, work=w2, user=u, status='active')
cl2.save()
c2.activate()
self.assertEqual(c2.status, 'ACTIVE')
@ -624,31 +650,42 @@ class CampaignTests(TestCase):
# RESUMING
c2.resume(reason="for testing")
#self.assertEqual(c2.suspended, None)
self.assertEqual(c2.status,'ACTIVE')
self.assertEqual(c2.status, 'ACTIVE')
# should not let me suspend a campaign that hasn't been initialized
self.assertRaises(UnglueitError, c1.suspend, "for testing")
# UNSUCCESSFUL
c3 = Campaign(target=D('1000.00'),deadline=now() - timedelta(days=1),work=w2,description='dummy description')
c3 = Campaign(
target=D('1000.00'),
deadline=now() - timedelta(days=1),
work=w2,
description='dummy description'
)
c3.save()
c3.activate()
self.assertEqual(c3.status, 'ACTIVE')
# at this point, since the deadline has passed, the status should change and be UNSUCCESSFUL
# at this point, since the deadline has passed,
# the status should change and be UNSUCCESSFUL
self.assertTrue(c3.update_status())
self.assertEqual(c3.status, 'UNSUCCESSFUL')
# premiums
pr1= Premium(type='CU', campaign=c3, amount=10, description='botsnack', limit=1)
pr1 = Premium(type='CU', campaign=c3, amount=10, description='botsnack', limit=1)
pr1.save()
self.assertEqual(pr1.premium_remaining,1)
self.assertEqual(pr1.premium_remaining, 1)
#cloning (note we changed c3 to w2 to make it clonable)
c7= c3.clone()
c7 = c3.clone()
self.assertEqual(c7.status, 'INITIALIZED')
self.assertEqual(c7.premiums.all()[0].description , 'botsnack')
self.assertEqual(c7.premiums.all()[0].description, 'botsnack')
# SUCCESSFUL
c4 = Campaign(target=D('1000.00'),deadline=now() - timedelta(days=1),work=w,description='dummy description')
c4 = Campaign(
target=D('1000.00'),
deadline=now() - timedelta(days=1),
work=w,
description='dummy description'
)
c4.save()
c4.activate()
t = Transaction()
@ -663,7 +700,12 @@ class CampaignTests(TestCase):
self.assertEqual(c4.status, 'SUCCESSFUL')
# WITHDRAWN
c5 = Campaign(target=D('1000.00'),deadline=datetime(2013,1,1),work=w,description='dummy description')
c5 = Campaign(
target=D('1000.00'),
deadline=datetime(2013, 1, 1),
work=w,
description='dummy description'
)
c5.save()
c5.activate().withdraw('testing')
self.assertEqual(c5.status, 'WITHDRAWN')
@ -671,9 +713,14 @@ class CampaignTests(TestCase):
# testing percent-of-goal
w2 = Work()
w2.save()
c6 = Campaign(target=D('1000.00'),deadline=now() + timedelta(days=1),work=w2,description='dummy description')
c6 = Campaign(
target=D('1000.00'),
deadline=now() + timedelta(days=1),
work=w2,
description='dummy description'
)
c6.save()
cl = Claim(rights_holder = rh, work = w2, user = u, status = 'active')
cl = Claim(rights_holder=rh, work=w2, user=u, status='active')
cl.save()
c6.activate()
t = Transaction()
@ -687,7 +734,7 @@ class CampaignTests(TestCase):
self.assertEqual(w2.percent_of_goal(), 23)
self.assertEqual(c1.launchable, False)
c1.description="description"
c1.description = "description"
self.assertEqual(c1.launchable, True)
c1.work.create_offers()
self.assertEqual(c1.work.offers.count(), 2)
@ -695,14 +742,14 @@ class CampaignTests(TestCase):
c1.type = 2
c1.save()
self.assertEqual(c1.launchable, False)
of1=c1.work.offers.get(license=2)
of1.price=D(2)
of1.active=True
of1 = c1.work.offers.get(license=2)
of1.price = D(2)
of1.active = True
of1.save()
self.assertEqual(c1.launchable, False)
e1= models.Edition(title="title",work=c1.work)
e1 = models.Edition(title="title", work=c1.work)
e1.save()
ebf1= models.EbookFile(edition=e1, format=1)
ebf1 = models.EbookFile(edition=e1, format=1)
ebf1.save()
c1.set_cc_date_initial()
self.assertEqual(c1.cc_date, settings.MAX_CC_DATE)
@ -717,7 +764,7 @@ class WishlistTest(TestCase):
user = User.objects.create_user('test', 'test@example.org', 'testpass')
edition = bookloader.add_by_isbn('0441007465')
work = edition.work
num_wishes=work.num_wishes
num_wishes = work.num_wishes
user.wishlist.add_work(work, 'test')
self.assertEqual(user.wishlist.works.count(), 1)
self.assertEqual(work.num_wishes, num_wishes+1)
@ -732,7 +779,7 @@ class CeleryTaskTest(TestCase):
n = 10
task = tasks.fac.delay(n)
result = task.get(timeout=10)
self.assertEqual(result,factorial(n))
self.assertEqual(result, factorial(n))
def test_subtask(self):
n = 30
@ -741,7 +788,7 @@ class CeleryTaskTest(TestCase):
result = job.apply_async()
while not result.ready():
sleep(0.2)
self.assertEqual(result.join(),[factorial(x) for x in range(n)])
self.assertEqual(result.join(), [factorial(x) for x in range(n)])
class GoodreadsTest(TestCase):
@ -751,7 +798,10 @@ class GoodreadsTest(TestCase):
return
# test to see whether the core undeletable shelves are on the list
gr_uid = "767708" # for Raymond Yee
gc = goodreads.GoodreadsClient(key=settings.GOODREADS_API_KEY, secret=settings.GOODREADS_API_SECRET)
gc = goodreads.GoodreadsClient(
key=settings.GOODREADS_API_KEY,
secret=settings.GOODREADS_API_SECRET
)
shelves = gc.shelves_list(gr_uid)
shelf_names = [s['name'] for s in shelves['user_shelves']]
self.assertTrue('currently-reading' in shelf_names)
@ -763,7 +813,10 @@ class GoodreadsTest(TestCase):
if not settings.GOODREADS_API_SECRET:
return
gr_uid = "767708" # for Raymond Yee
gc = goodreads.GoodreadsClient(key=settings.GOODREADS_API_KEY, secret=settings.GOODREADS_API_SECRET)
gc = goodreads.GoodreadsClient(
key=settings.GOODREADS_API_KEY,
secret=settings.GOODREADS_API_SECRET
)
reviews = gc.review_list_unauth(user_id=gr_uid, shelf='read')
# test to see whether there is a book field in each of the review
# url for test is https://www.goodreads.com/review/list.xml?id=767708&shelf=read&page=1&per_page=20&order=a&v=2&key=[key]
@ -776,7 +829,7 @@ class LibraryThingTest(TestCase):
lt_username = 'yujx'
lt = librarything.LibraryThing(username=lt_username)
books = list(lt.parse_user_catalog(view_style=5))
self.assertEqual(len(books),1)
self.assertEqual(len(books), 1)
self.assertEqual(books[0]['isbn'], '0471925675')
self.assertEqual(books[0]['work_id'], '80826')
self.assertEqual(books[0]['book_id'], '79883733')
@ -787,7 +840,7 @@ class ISBNTest(TestCase):
milosz_10 = '006019667X'
milosz_13 = '9780060196677'
python_10 = '0-672-32978-6'
funky = '0672—329 78-6' # endash, mdash, space
funky = u'0672—329 78-6' # endash, mdash, space
python_10_wrong = '0-672-32978-7'
python_13 = '978-0-672-32978-4'
@ -812,19 +865,19 @@ class ISBNTest(TestCase):
self.assertEqual(isbn_python_13.to_string(), bookloader.valid_isbn(python_10_wrong))
# do conversion -- first the outside methods
self.assertEqual(isbn.convert_10_to_13(isbn.strip(python_10)),isbn.strip(python_13))
self.assertEqual(isbn.convert_10_to_13(isbn.strip(python_10)),isbn.strip(python_13))
self.assertEqual(isbn.convert_13_to_10('xxxxxxxxxxxxx'),None)
self.assertEqual(isbn.convert_10_to_13('xxxxxxxxxx'),None)
self.assertEqual(isbn.convert_10_to_13(isbn.strip(python_10)), isbn.strip(python_13))
self.assertEqual(isbn.convert_10_to_13(isbn.strip(python_10)), isbn.strip(python_13))
self.assertEqual(isbn.convert_13_to_10('xxxxxxxxxxxxx'), None)
self.assertEqual(isbn.convert_10_to_13('xxxxxxxxxx'), None)
self.assertEqual(None, bookloader.valid_isbn('xxxxxxxxxxxxx'))
self.assertEqual(None, bookloader.valid_isbn('xxxxxxxxxx'))
# check formatting
self.assertEqual(isbn.ISBN(python_13).to_string(type='13'), '9780672329784')
self.assertEqual(isbn.ISBN(python_13).to_string('13',True), '978-0-672-32978-4')
self.assertEqual(isbn.ISBN(python_13).to_string('13', True), '978-0-672-32978-4')
self.assertEqual(isbn.ISBN(python_13).to_string(type='10'), '0672329786')
self.assertEqual(isbn.ISBN(python_10).to_string(type='13'), '9780672329784')
self.assertEqual(isbn.ISBN(python_10).to_string(10,True), '0-672-32978-6')
self.assertEqual(isbn.ISBN(python_10).to_string(10, True), '0-672-32978-6')
# complain if one tries to get ISBN-10 for a 979 ISBN 13
# making up a 979 ISBN
@ -844,9 +897,12 @@ class ISBNTest(TestCase):
self.assertEqual(isbn.ISBN(python_13).validate(), python_10)
# curious about set membership
self.assertEqual(len(set([isbn.ISBN(milosz_10), isbn.ISBN(milosz_13)])),2)
self.assertEqual(len(set([str(isbn.ISBN(milosz_10)), str(isbn.ISBN(milosz_13))])),2)
self.assertEqual(len(set([isbn.ISBN(milosz_10).to_string(), isbn.ISBN(milosz_13).to_string()])),1)
self.assertEqual(len(set([isbn.ISBN(milosz_10), isbn.ISBN(milosz_13)])), 2)
self.assertEqual(len(set([str(isbn.ISBN(milosz_10)), str(isbn.ISBN(milosz_13))])), 2)
self.assertEqual(
len(set([isbn.ISBN(milosz_10).to_string(), isbn.ISBN(milosz_13).to_string()])),
1
)
class EncryptedKeyTest(TestCase):
def test_create_read_key(self):
@ -880,11 +936,11 @@ class WorkTests(TestCase):
self.w2 = models.Work.objects.create()
def test_preferred_edition(self):
ww = models.WasWork.objects.create(work=self.w1, was= self.w2.id)
ww = models.WasWork.objects.create(work=self.w1, was=self.w2.id)
e1 = models.Edition.objects.create(work=self.w1)
self.assertEqual(e1, self.w1.preferred_edition)
e2 = models.Edition.objects.create(work=self.w1)
self.w1.selected_edition=e2
self.w1.selected_edition = e2
self.w1.save()
self.assertEqual(e2, self.w1.preferred_edition)
self.assertEqual(e2, self.w2.preferred_edition)
@ -944,46 +1000,6 @@ class DownloadPageTest(TestCase):
eb2.delete()
self.assertFalse(eb2.edition.work.is_free)
class LocaldatetimeTest(TestCase):
@override_settings(LOCALDATETIME_NOW=None)
def test_LOCALDATETIME_NOW_none(self):
try:
localdatetime.now
except NameError:
from regluit.utils import localdatetime
else:
reload(localdatetime)
self.assertAlmostEqual(mktime(datetime.now().timetuple()), mktime(localdatetime.now().timetuple()), 1.0)
@override_settings(LOCALDATETIME_NOW=lambda : datetime.now() + timedelta(365))
def test_LOCALDATETIME_NOW_year_ahead(self):
try:
localdatetime.now
except NameError:
from regluit.utils import localdatetime
else:
reload(localdatetime)
self.assertAlmostEqual(mktime((datetime.now() + timedelta(365)).timetuple()), mktime(localdatetime.now().timetuple()), 1.0)
def test_no_time_override(self):
from regluit.utils import localdatetime
self.assertAlmostEqual(mktime(datetime.now().timetuple()), mktime(localdatetime.now().timetuple()), 1.0)
def tearDown(self):
# restore localdatetime.now() to what's in the settings file
try:
localdatetime.now
except NameError:
from regluit.utils import localdatetime
else:
reload(localdatetime)
class MailingListTests(TestCase):
#mostly to check that MailChimp account is setp correctly
@ -991,7 +1007,7 @@ class MailingListTests(TestCase):
from postmonkey import PostMonkey
pm = PostMonkey(settings.MAILCHIMP_API_KEY)
if settings.TEST_INTEGRATION:
self.assertEqual(pm.ping(),"Everything's Chimpy!" )
self.assertEqual(pm.ping(), "Everything's Chimpy!")
self.user = User.objects.create_user('chimp_test', 'eric@gluejar.com', 'chimp_test')
self.assertTrue(self.user.profile.on_ml)
@ -1009,17 +1025,18 @@ class EbookFileTests(TestCase):
Read the test epub file
"""
w = Work.objects.create(title="Work 1")
e = Edition.objects.create(title=w.title,work=w)
e = Edition.objects.create(title=w.title, work=w)
u = User.objects.create_user('test', 'test@example.org', 'testpass')
rh = RightsHolder.objects.create(owner = u, rights_holder_name = 'rights holder name')
cl = Claim.objects.create(rights_holder = rh, work = w, user = u, status = 'active')
c = Campaign.objects.create(work = w,
type = parameters.BUY2UNGLUE,
cc_date_initial = datetime(2020,1,1),
target = 1000,
deadline = datetime(2020,1,1),
license = 'CC BY',
description = "dummy description",
rh = RightsHolder.objects.create(owner=u, rights_holder_name='rights holder name')
cl = Claim.objects.create(rights_holder=rh, work=w, user=u, status='active')
c = Campaign.objects.create(
work=w,
type=parameters.BUY2UNGLUE,
cc_date_initial=datetime(2020, 1, 1),
target=1000,
deadline=datetime(2020, 1, 1),
license='CC BY',
description="dummy description",
)
# download the test epub into a temp file
temp = NamedTemporaryFile(delete=False)
@ -1033,7 +1050,7 @@ class EbookFileTests(TestCase):
temp_file = open(temp.name)
dj_file = DjangoFile(temp_file)
ebf = EbookFile( format='epub', edition=e, file=dj_file)
ebf = EbookFile(format='epub', edition=e, file=dj_file)
ebf.save()
temp_file.close()
@ -1041,16 +1058,16 @@ class EbookFileTests(TestCase):
# make sure we get rid of temp file
os.remove(temp.name)
test_epub= EPUB(ebf.file, mode='a')
self.assertEqual(len(test_epub.opf) , 4)
test_epub = EPUB(ebf.file, mode='a')
self.assertEqual(len(test_epub.opf), 4)
self.assertTrue(len(test_epub.opf[2]) < 30)
acq=Acq.objects.create(user=u,work=w,license=TESTING)
acq = Acq.objects.create(user=u,work=w,license=TESTING)
self.assertIsNot(acq.nonce, None)
url= acq.get_watermarked().download_link_epub
self.assertRegexpMatches(url,'github.com/eshellman/42_ebook/blob/master/download/42')
#self.assertRegexpMatches(url,'booxtream.com/')
url = acq.get_watermarked().download_link_epub
self.assertRegexpMatches(url, 'github.com/eshellman/42_ebook/blob/master/download/42')
#self.assertRegexpMatches(url, 'booxtream.com/')
with self.assertRaises(UnglueitError) as cm:
c.activate()
@ -1058,23 +1075,23 @@ class EbookFileTests(TestCase):
off.save()
c.activate()
#flip the campaign to success
c.cc_date_initial= datetime(2012,1,1)
c.cc_date_initial = datetime(2012, 1, 1)
c.update_status()
self.assertEqual( c.work.ebooks().count(),2 )
c.do_watermark=False
self.assertEqual(c.work.ebooks().count(), 2)
c.do_watermark = False
c.save()
url= acq.get_watermarked().download_link_epub
url = acq.get_watermarked().download_link_epub
def test_ebookfile_thanks(self):
w = Work.objects.create(title="Work 2")
e = Edition.objects.create(title=w.title,work=w)
e = Edition.objects.create(title=w.title, work=w)
u = User.objects.create_user('test2', 'test@example.org', 'testpass')
rh = RightsHolder.objects.create(owner = u, rights_holder_name = 'rights holder name 2')
cl = Claim.objects.create(rights_holder = rh, work = w, user = u, status = 'active')
c = Campaign.objects.create(work = w,
type = parameters.THANKS,
license = 'CC BY-NC',
description = "Please send me money",
rh = RightsHolder.objects.create(owner=u, rights_holder_name='rights holder name 2')
cl = Claim.objects.create(rights_holder=rh, work=w, user=u, status='active')
c = Campaign.objects.create(work=w,
type=parameters.THANKS,
license='CC BY-NC',
description="Please send me money",
)
# download the test epub into a temp file
temp = NamedTemporaryFile(delete=False)
@ -1087,9 +1104,9 @@ class EbookFileTests(TestCase):
temp_file = open(temp.name)
dj_file = DjangoFile(temp_file)
ebf = EbookFile( format='pdf', edition=e, file=dj_file)
ebf = EbookFile(format='pdf', edition=e, file=dj_file)
ebf.save()
eb = Ebook( format='pdf', edition=e, url=ebf.file.url, provider='Unglue.it')
eb = Ebook(format='pdf', edition=e, url=ebf.file.url, provider='Unglue.it')
eb.save()
ebf.ebook = eb
ebf.save()
@ -1117,9 +1134,9 @@ class EbookFileTests(TestCase):
temp_file = open(temp.name)
dj_file = DjangoFile(temp_file)
ebf = EbookFile( format='epub', edition=e, file=dj_file)
ebf = EbookFile(format='epub', edition=e, file=dj_file)
ebf.save()
eb = Ebook( format='epub', edition=e, url=ebf.file.url, provider='Unglue.it')
eb = Ebook(format='epub', edition=e, url=ebf.file.url, provider='Unglue.it')
eb.save()
ebf.ebook = eb
ebf.save()
@ -1130,15 +1147,15 @@ class EbookFileTests(TestCase):
os.remove(temp.name)
#test the ask-appender
c.add_ask_to_ebfs()
self.assertTrue( c.work.ebookfiles().filter(asking = True, format='epub').count() > 0)
self.assertTrue(c.work.ebookfiles().filter(asking=True, format='epub').count() > 0)
if settings.MOBIGEN_URL:
self.assertTrue( c.work.ebookfiles().filter(asking = True, format='mobi').count() > 0)
self.assertTrue( c.work.ebookfiles().filter(asking = True, ebook__active=True).count() > 0)
self.assertTrue( c.work.ebookfiles().filter(asking = False, ebook__active=True).count() == 0)
self.assertTrue(c.work.ebookfiles().filter(asking=True, format='mobi').count() > 0)
self.assertTrue(c.work.ebookfiles().filter(asking=True, ebook__active=True).count() > 0)
self.assertTrue(c.work.ebookfiles().filter(asking=False, ebook__active=True).count() == 0)
#test the unasker
c.revert_asks()
self.assertTrue( c.work.ebookfiles().filter(asking = True, ebook__active=True).count() == 0)
self.assertTrue( c.work.ebookfiles().filter(asking = False, ebook__active=True).count() > 0)
self.assertTrue(c.work.ebookfiles().filter(asking=True, ebook__active=True).count() == 0)
self.assertTrue(c.work.ebookfiles().filter(asking=False, ebook__active=True).count() > 0)
class MobigenTests(TestCase):
def test_convert_to_mobi(self):
@ -1147,10 +1164,11 @@ class MobigenTests(TestCase):
"""
from regluit.core.mobigen import convert_to_mobi
if settings.TEST_INTEGRATION:
output = convert_to_mobi("https://github.com/GITenberg/Moby-Dick--Or-The-Whale_2701/releases/download/0.2.0/Moby-Dick-Or-The-Whale.epub")
self.assertTrue(len(output)>2207877)
output = convert_to_mobi(
"https://github.com/GITenberg/Moby-Dick--Or-The-Whale_2701/releases/download/0.2.0/Moby-Dick-Or-The-Whale.epub"
)
self.assertTrue(len(output) > 2207877)
from .signals import handle_transaction_charged
@override_settings(LOCAL_TEST=True)
class LibTests(TestCase):
fixtures = ['initial_data.json']
@ -1159,33 +1177,47 @@ class LibTests(TestCase):
def test_purchase(self):
w = Work.objects.create(title="Work 1")
e = Edition.objects.create(title=w.title,work=w)
e = Edition.objects.create(title=w.title, work=w)
u = User.objects.create_user('test', 'test@example.org', 'testpass')
lu = User.objects.create_user('library', 'testu@example.org', 'testpass')
lib = Library.objects.create(user=lu,owner=u)
c = Campaign.objects.create(work=w, type = parameters.BUY2UNGLUE, cc_date_initial= datetime(2020,1,1),target=1000, deadline=datetime(2020,1,1))
lib = Library.objects.create(user=lu, owner=u)
c = Campaign.objects.create(
work=w,
type=parameters.BUY2UNGLUE,
cc_date_initial=datetime(2020, 1, 1),
target=1000,
deadline=datetime(2020, 1, 1)
)
new_acq = Acq.objects.create(user=lib.user,work=c.work,license= LIBRARY)
new_acq = Acq.objects.create(user=lib.user, work=c.work, license=LIBRARY)
self.assertTrue(new_acq.borrowable)
reserve_acq = Acq.objects.create(user=u,work=c.work,license= RESERVE, lib_acq = new_acq)
reserve_acq = Acq.objects.create(user=u, work=c.work, license=RESERVE, lib_acq=new_acq)
self.assertTrue(reserve_acq.borrowable)
self.assertFalse(new_acq.borrowable)
self.assertTrue(reserve_acq.expires< now()+timedelta(hours=25))
self.assertTrue(reserve_acq.expires < now() + timedelta(hours=25))
reserve_acq.borrow()
self.assertTrue(reserve_acq.expires> now()+timedelta(hours=25))
self.assertTrue(reserve_acq.expires > now() + timedelta(hours=25))
u2 = User.objects.create_user('user2', 'test2@example.org', 'testpass')
Hold.objects.get_or_create(library=lib,work=w,user=u2)
Hold.objects.get_or_create(library=lib, work=w, user=u2)
reserve_acq.expire_in(timedelta(seconds=0))
tasks.refresh_acqs()
self.assertEqual(reserve_acq.holds.count(),0)
self.assertEqual(reserve_acq.holds.count(), 0)
class GitHubTests(TestCase):
def test_ebooks_in_github_release(self):
(repo_owner, repo_name, repo_tag) = ('GITenberg', 'Adventures-of-Huckleberry-Finn_76', '0.0.50')
ebooks = bookloader.ebooks_in_github_release(repo_owner, repo_name,
tag=repo_tag, token=settings.GITHUB_PUBLIC_TOKEN)
(repo_owner, repo_name, repo_tag) = (
'GITenberg',
'Adventures-of-Huckleberry-Finn_76',
'0.0.50'
)
ebooks = bookloader.ebooks_in_github_release(
repo_owner,
repo_name,
tag=repo_tag,
token=settings.GITHUB_PUBLIC_TOKEN
)
expected_set = set([
('epub', u'Adventures-of-Huckleberry-Finn.epub'),
('mobi', u'Adventures-of-Huckleberry-Finn.mobi'),
@ -1197,7 +1229,8 @@ class GitHubTests(TestCase):
class OnixLoaderTests(TestCase):
fixtures = ['initial_data.json']
def test_load(self):
TEST_BOOKS = [{'': u'',
TEST_BOOKS = [{
'': u'',
'Author1First': u'Joseph',
'Author1Last': u'Necvatal',
'Author1Role': u'',
@ -1232,8 +1265,9 @@ class OnixLoaderTests(TestCase):
'eISBN': u'N/A',
'eListPrice': u'N/A',
'ePublicationDate': u'',
'eTerritoryRights': u''},
{'': u'',
'eTerritoryRights': u''
}, {
'': u'',
'CAD price eub': u'9.95',
'Title': u'That Greece Might Still Be Free',
'USD price epub': u'9.95',
@ -1289,12 +1323,9 @@ class OnixLoaderTests(TestCase):
'GBP price mobi': u'5.95', 'Format 1': u'Paperback ', 'EUR price PDF': u'7.95', 'Format 3': u'pdf',
'Format 2': u'Hardback', 'Format 5': u'mobi', 'Format 4': u'epub', 'MARC Code1': u'aut',
'MARC Code2': u'aui', 'MARC Code3': u'', 'MARC Code4': u'', 'MARC Code5': u'',
'MARC Code6': u'', 'ISO Language Code': u'en'}
]
'MARC Code6': u'', 'ISO Language Code': u'en'
}]
results = load_from_books(TEST_BOOKS)
for (book, work, edition) in results:
assert (loaded_book_ok(book, work, edition))
assert loaded_book_ok(book, work, edition)

View File

@ -19,7 +19,7 @@ ID_VALIDATION = {
'http': (re.compile(r"(https?|ftp)://(-\.)?([^\s/?\.#]+\.?)+(/[^\s]*)?$",
flags=re.IGNORECASE|re.S),
"The Web Address must be a valid http(s) URL."),
'isbn': (r'^([\dxX\-–— ]+|delete)$',
'isbn': (u'^([\\dxX \\-–—‐,;]+|delete)$', #includes unicode hyphen, endash and emdash
"The ISBN must be a valid ISBN-13."),
'doab': (r'^(\d{1,6}|delete)$',
"The value must be 1-6 digits."),
@ -44,8 +44,6 @@ ID_VALIDATION = {
}
def isbn_cleaner(value):
if value == 'delete':
return value
if not value:
raise ValidationError('no identifier value found')
elif value == 'delete':
@ -132,6 +130,8 @@ def valid_xml_char_ordinal(c):
)
def valid_subject(subject_name):
if len(subject_name) > 200:
return False
num_commas = 0
for c in subject_name:
if not valid_xml_char_ordinal(c):
@ -140,6 +140,10 @@ def valid_subject(subject_name):
num_commas += 1
if num_commas > 2:
return False
if len(subject_name.split('--')) > 6:
return False
if len(subject_name.split('. ')) > 4:
return False
return True
reverse_name_comma = re.compile(r',(?! *Jr[\., ])')

View File

@ -149,14 +149,27 @@ class EditionForm(forms.ModelForm):
id_type = self.cleaned_data['id_type']
id_value = self.cleaned_data.get('id_value','').strip()
if id_value:
identifier = Identifier.objects.filter(type=id_type, value=id_value)
if identifier:
err_msg = "{} is a duplicate for work #{}.".format(identifier[0], identifier[0].work_id)
self.add_error('id_value', forms.ValidationError(err_msg))
try:
self.cleaned_data['id_value'] = identifier_cleaner(id_type)(id_value)
id_value = identifier_cleaner(id_type)(id_value)
identifier = Identifier.objects.filter(type=id_type, value=id_value)
ident = identifier[0] if identifier else None
if not ident or not self.instance:
self.cleaned_data['id_value'] = id_value
elif ident.edition_id == self.instance.id:
self.cleaned_data['id_value'] = id_value
elif not ident.edition_id and ident.work_id == self.instance.work_id:
self.cleaned_data['id_value'] = id_value
else:
if ident.edition_id:
err_msg = "{} is a duplicate for edition #{}.".format(id_value, ident.edition_id)
else:
err_msg = "{} is a duplicate for work #{}.".format(id_value, ident.work_id)
self.add_error('id_value', forms.ValidationError(err_msg))
except forms.ValidationError, ve:
self.add_error('id_value', forms.ValidationError('{}: {}'.format(ve.message, id_value)))
self.add_error(
'id_value',
forms.ValidationError('{}: {}'.format(ve.message, id_value))
)
return self.cleaned_data
class Meta:

View File

@ -13,11 +13,11 @@ from django.conf import settings
from django.forms.extras.widgets import SelectDateWidget
from django.forms.widgets import RadioSelect
from django.utils.translation import ugettext_lazy as _
from django.utils.timezone import now
from regluit.core.lookups import OwnerLookup
from regluit.core.models import Campaign, Edition, Claim, RightsHolder, WasWork
from regluit.core.parameters import *
from regluit.utils.localdatetime import now
class RightsHolderForm(forms.ModelForm):
email = forms.EmailField(

View File

@ -168,7 +168,7 @@
<div class="column show-for-medium">
<span>Contact</span>
<ul>
<li> <a href="mailto:info@ebookfoundation.org"><i class="fa fa-envelope fa-2x"></i></a> <a href="https://twitter.com/unglueit"><i class="fa fa-twitter fa-2x"></i></a> <a href="https://facebook/com/unglueit"><i class="fa fa-facebook fa-2x"></i></a></li>
<li> <a href="mailto:info@ebookfoundation.org"><i class="fa fa-envelope fa-2x"></i></a> <a href="https://twitter.com/unglueit"><i class="fa fa-twitter fa-2x"></i></a> <a href="https://facebook.com/unglueit"><i class="fa fa-facebook fa-2x"></i></a></li>
</ul>
</div>
</div>

View File

@ -1,6 +1,6 @@
{% extends 'work_list.html' %}
{% load endless %}
{% load el_pagination_tags %}
{% load lang_utils %}
{% block title %} Works published by {{ pubname }} {% endblock %}

View File

@ -1,6 +1,6 @@
{% extends 'base.html' %}
{% load endless %}
{% load el_pagination_tags %}
{% load lang_utils %}
{% load sass_tags %}

View File

@ -1,6 +1,6 @@
{% extends 'base.html' %}
{% load endless %}
{% load el_pagination_tags %}
{% load lang_utils %}
{% load sass_tags %}

View File

@ -4,6 +4,7 @@
{% block doccontent %}
<h2>Rights Holder Claim Form </h2>
{% if work %}
<h3> Rightsholder making claim </h3>
{{ rights_holder.rights_holder_name }}
<h3> Work being claimed </h3>
@ -42,4 +43,7 @@
<input type="submit" name="submit" value="Confirm Claim">
</form>
{% endif %}
{% else %}
Please find a work to claim.
{% endif %}
{% endblock %}

View File

@ -1,6 +1,6 @@
{% extends 'base.html' %}
{% load endless %}
{% load el_pagination_tags %}
{% load lang_utils %}
{% load sass_tags %}

View File

@ -1,6 +1,6 @@
{% extends 'base.html' %}
{% load endless %}
{% load el_pagination_tags %}
{% load sass_tags %}
{% load truncatechars %}

View File

@ -1,6 +1,6 @@
{% extends 'work_list.html' %}
{% load endless %}
{% load el_pagination_tags %}
{% load lang_utils %}
{% block title %} Books we're recommending. {% endblock %}

View File

@ -1,6 +1,6 @@
{% extends 'base.html' %}
{% load endless %}
{% load el_pagination_tags %}
{% load truncatechars %}
{% load sass_tags %}

View File

@ -1,6 +1,6 @@
{% extends 'base.html' %}
{% load endless %}
{% load el_pagination_tags %}
{% load lang_utils %}
{% load sass_tags %}

View File

@ -1,6 +1,6 @@
{% extends 'base.html' %}
{% load endless %}
{% load el_pagination_tags %}
{% load lang_utils %}
{% load sass_tags %}

View File

@ -1,5 +1,6 @@
from django import template
from regluit.utils.localdatetime import now
from django.utils.timezone import now
from regluit.core.parameters import REWARDS, BUY2UNGLUE
register = template.Library()

View File

@ -1,12 +1,6 @@
"""
The truncatechars filter is part of Django dev, but we're on 1.3.1
The following is the filter and its dependencies
To use this filter, put "{% load truncatechars %}" at the beginning of your template,
then {{ myvariable|truncatechars:num }}
"""
import unicodedata
from django.template.base import Library
from django.template import Library
from django.template.defaultfilters import stringfilter
from django.utils.translation import get_language_info

View File

@ -1,4 +1,4 @@
from regluit.utils.localdatetime import now
from django.utils.timezone import now
from django import template
register = template.Library()

View File

@ -1,5 +1,6 @@
from regluit.utils.localdatetime import now
from django import template
from django.utils.timezone import now
from regluit.core.models import Acq
register = template.Library()

View File

@ -7,7 +7,7 @@ then {{ myvariable|truncatechars:num }}
import unicodedata
from django import template
from django.template.base import Library
from django.template import Library
from django.template.defaultfilters import stringfilter
from django.utils.encoding import force_unicode
from django.utils.functional import allow_lazy, SimpleLazyObject

View File

@ -3,7 +3,7 @@
"""
from urllib import unquote
from django.template.base import Library
from django.template import Library
from django.template.defaultfilters import stringfilter
register = Library()

View File

@ -13,6 +13,7 @@ from django.core import mail
from django.core.urlresolvers import reverse
from django.test import TestCase
from django.test.client import Client
from django.utils.timezone import now
from notification.models import Notice
@ -21,7 +22,6 @@ from regluit.core.models import Work, Campaign, RightsHolder, Claim, Subject
from regluit.payment.models import Transaction
from regluit.payment.manager import PaymentManager
from regluit.payment.stripelib import StripeClient, TEST_CARDS, ERROR_TESTING, card
from regluit.utils.localdatetime import now
class WishlistTests(TestCase):
fixtures = ['initial_data.json', 'neuromancer.json']

View File

@ -35,7 +35,7 @@ urlpatterns = [
url(r"^rightsholders/campaign/(?P<id>\d+)/mademobi/$", views.manage_campaign, {'action': 'mademobi'}, name="mademobi"),
url(r"^rightsholders/edition/(?P<work_id>\d*)/(?P<edition_id>\d*)$", views.edit_edition, {'by': 'rh'}, name="rh_edition"),
url(r"^rightsholders/edition/(?P<edition_id>\d*)/upload/$", views.edition_uploads, name="edition_uploads"),
url(r"^rightsholders/claim/$", views.claim, name="claim"),
url(r"^rightsholders/claim/$", login_required(views.claim), name="claim"),
url(r"^rightsholders/surveys/$", views.surveys, name="surveys"),
url(r"^rightsholders/new_survey/(?P<work_id>\d*)/?$", views.new_survey, name="new_survey"),
url(r"^rightsholders/surveys/answers_(?P<qid>\d+)_(?P<work_id>\d*).csv$", views.export_surveys, name="survey_answers"),

View File

@ -45,6 +45,7 @@ from django.template import TemplateDoesNotExist
from django.template.loader import render_to_string
from django.utils.http import urlencode
from django.utils.translation import ugettext_lazy as _
from django.utils.timezone import now
from django.views.decorators.csrf import csrf_exempt
from django.views.decorators.http import require_POST
from django.views.generic.edit import FormView
@ -123,11 +124,11 @@ from regluit.payment.parameters import (
COMPANY_TITLE
)
from regluit.utils.localdatetime import now, date_today
from regluit.libraryauth.forms import UserNamePass
from regluit.libraryauth.views import Authenticator, superlogin, login_user
from regluit.libraryauth.models import Library
from regluit.marc.views import qs_marc_records
from regluit.utils.localdatetime import date_today
from questionnaire.models import Landing, Questionnaire
from questionnaire.views import export_summary as answer_summary, export_csv as export_answers

View File

@ -21,6 +21,7 @@ from regluit.core.bookloader import (
from regluit.core.parameters import WORK_IDENTIFIERS
from regluit.core.loaders import add_by_webpage
from regluit.core.loaders.doab import add_by_doab
from regluit.core.loaders.utils import ids_from_urls
from regluit.frontend.forms import EditionForm, IdentifierForm
@ -106,6 +107,11 @@ def get_edition_for_id(id_type, id_value, user=None):
if edition:
return user_edition(edition, user)
if identifiers.has_key('doab'):
edition = add_by_doab(identifiers['doab'])
if edition:
return user_edition(edition, user)
if identifiers.has_key('oclc'):
edition = add_by_oclc(identifiers['oclc'])
if edition:
@ -297,6 +303,12 @@ def edit_edition(request, work_id, edition_id, by=None):
id_type = form.cleaned_data['id_type']
id_val = form.cleaned_data['id_value']
if id_val == 'delete':
if id_type in WORK_IDENTIFIERS:
if edition.work.identifiers.exclude(type=id_type):
edition.work.identifiers.filter(type=id_type).delete()
else:
alert = ('Can\'t delete identifier - must have at least one left.')
else:
if edition.identifiers.exclude(type=id_type):
edition.identifiers.filter(type=id_type).delete()
else:

View File

@ -88,6 +88,8 @@ class ClaimView(CreateView):
return HttpResponseRedirect(reverse('rightsholders'))
def get_context_data(self, form):
if not form.is_valid():
return {'form': form}
work = form.cleaned_data['work']
rights_holder = form.cleaned_data['rights_holder']
active_claims = work.claim.exclude(status = 'release')

View File

@ -1 +1,9 @@
from . import signals
from django.apps import AppConfig
default_app_config = 'regluit.libraryauth.LibraryAuthConfig'
class LibraryAuthConfig(AppConfig):
name = 'regluit.libraryauth'
def ready(self):
from . import signals

View File

@ -8,7 +8,7 @@ from django.core import validators
from django.db import models
from django.db.models import Q
from django.db.models.signals import post_save
from django.forms import IPAddressField as BaseIPAddressField
from django.forms import GenericIPAddressField as BaseIPAddressField
from django.utils.translation import ugettext_lazy as _
from django.core.urlresolvers import reverse

View File

@ -1,6 +1,6 @@
import unicodedata
from django.template.base import Library
from django.template import Library
from .. import models
register = Library()

View File

@ -10,12 +10,12 @@ from datetime import timedelta
django imports
"""
from django.http import HttpResponseForbidden
from django.utils.timezone import now
"""
regluit imports
"""
from regluit.payment.models import PaymentResponse
from regluit.utils.localdatetime import now, zuluformat
class ProcessorError(Exception):
"""An abstraction around payment processor exceptions"""

View File

@ -18,6 +18,7 @@ django imports
from django.conf import settings
from django.contrib.auth.models import User
from django.core.urlresolvers import reverse
from django.utils.timezone import now
"""
regluit imports
@ -26,7 +27,6 @@ from regluit.payment import credit
from regluit.payment.models import Transaction, Receiver, PaymentResponse, Account
from regluit.payment.parameters import *
from regluit.payment.signals import transaction_charged, pledge_modified, pledge_created
from regluit.utils.localdatetime import now
logger = logging.getLogger(__name__)

View File

@ -18,6 +18,7 @@ from django.db.models import Q
from django.contrib.sites.models import Site
from django.db.models.signals import post_save, post_delete
from django.utils.http import urlquote
from django.utils.timezone import now
## django module imports
@ -42,7 +43,7 @@ from regluit.payment.parameters import (
)
from regluit.payment.signals import credit_balance_added, pledge_created
from regluit.utils.localdatetime import now, date_today
from regluit.utils.localdatetime import date_today
logger = logging.getLogger(__name__)

View File

@ -6,12 +6,15 @@ external library imports
"""
import logging
import json
import re
import stripe
from datetime import datetime, timedelta
from itertools import islice
from pytz import utc
import re
import unittest
from unittest import TestCase
import stripe
"""
django imports
@ -19,6 +22,7 @@ django imports
from django.conf import settings
from django.core.mail import send_mail
from django.http import HttpResponse
from django.utils.timezone import now
"""
regluit imports
@ -35,7 +39,6 @@ from regluit.payment.parameters import (
TRANSACTION_STATUS_CANCELED
)
from regluit.payment.signals import transaction_charged, transaction_failed
from regluit.utils.localdatetime import now, zuluformat
# as of 2013.07.15
# ['charge.disputed', 'coupon.updated'] are legacy events -- don't know whether to
@ -73,12 +76,6 @@ def grouper(iterable, page_size):
class StripelibError(baseprocessor.ProcessorError):
pass
try:
import unittest
from unittest import TestCase
except:
from django.test import TestCase
from django.utils import unittest
# if customer.id doesn't exist, create one and then charge the customer
# we probably should ask our users whether they are ok with our creating a customer id account -- or ask for credit

View File

@ -5,6 +5,7 @@ import logging
import os
import time
import traceback
import unittest
from datetime import timedelta
from decimal import Decimal as D
@ -19,7 +20,7 @@ from django.contrib.auth.models import User
from django.core.exceptions import ValidationError
from django.core.validators import URLValidator
from django.test import TestCase
from django.utils import unittest
from django.utils.timezone import now
"""
regluit imports
@ -29,7 +30,6 @@ from regluit.core.signals import handle_transaction_charged
from regluit.payment.manager import PaymentManager
from regluit.payment.models import Transaction, Account
from regluit.payment.parameters import *
from regluit.utils.localdatetime import now
def setup_selenium():
# Set the display window for our xvfb

View File

@ -13,7 +13,7 @@ django imports
"""
from django.conf import settings
from django.contrib.auth.models import User
from django.contrib.sites.models import RequestSite
from django.contrib.sites.requests import RequestSite
from django.core.urlresolvers import reverse
from django.http import (
HttpResponse,
@ -24,6 +24,7 @@ from django.http import (
from django.shortcuts import render_to_response
from django.template import RequestContext
from django.test.utils import setup_test_environment
from django.utils.timezone import now
from django.views.decorators.csrf import csrf_exempt
from django.views.generic.edit import FormView
from django.views.generic.base import TemplateView
@ -38,7 +39,6 @@ from regluit.payment.models import Transaction
from regluit.payment.parameters import *
from regluit.payment.stripelib import STRIPE_PK
from regluit.payment.tests import PledgeTest, AuthorizeTest
from regluit.utils.localdatetime import now
logger = logging.getLogger(__name__)

View File

@ -3,19 +3,14 @@ Fabric==1.6.0
MySQL-python==1.2.5
Pillow==3.4.2
PyJWT==1.4.1
PyPDF2==1.23
PyPDF2==1.26
PyGithub==1.15.0
PyYAML==3.11
git+git://github.com/urschrei/pyzotero.git@v0.9.51
SPARQLWrapper==1.6.4
WebOb==1.2.3
WebTest==1.4.0
amqp==1.4.9
anyjson==0.3.3
billiard==3.3.0.23
awscli==1.10.26
boto==2.42.0
#git+ssh://git@github.com/Gluejar/boto.git@2.3.0
celery==3.1.23
certifi==2016.2.28
# pip installing pillow seems to delete distribute
@ -24,36 +19,34 @@ certifi==2016.2.28
django-celery==3.1.17
django-ckeditor==4.5.1
#django-email-change==0.2.3
git+git://github.com/eshellman/django-email-change.git@1e71dd320504d56b1fc7d447ce4cffb550cedce7
git+git://github.com/eshellman/django-email-change.git@57169bdef1c8a41d122e2bab2dcd8564b8fb231d
django-compat==1.0.10
django-contrib-comments==1.7.1
django-endless-pagination==2.0
django-el-pagination==3.2.4
django-extensions==1.6.1
django-jsonfield==1.0.0
#django-kombu==0.9.4
django-maintenancemode==0.11.2
django-mptt==0.8.5
#django-nose-selenium==0.7.3
#django-notification==0.2
git+git://github.com/eshellman/django-notification.git@412c7a03a327195a1017c2be92c8e2caabc880b6
git+git://github.com/eshellman/django-notification.git@a4620e893e2da220994e0189bf5d980bfbdcf0ad
django-registration==2.1.2
django-selectable==0.9.0
django-smtp-ssl==1.0
django-storages==1.4.1
django-tastypie==0.13.3
django-transmeta==0.7.3
feedparser==5.1.2
#django-transmeta==0.7.3
git+git://github.com/resulto/django-transmeta.git@ad4d7278ba330dcf8c8446f8ae9b2c769ae8684e
fef-questionnaire==4.0.1
freebase==1.0.8
#gitenberg.metadata==0.1.6
git+https://github.com/gitenberg-dev/gitberg-build
#git+ssh://git@github.com/gitenberg-dev/metadata.git@0.1.11
github3.py==0.9.5
html5lib==1.0b3
html5lib==1.0.1
httplib2==0.7.5
isodate==0.5.1
kombu==3.0.35
lxml==2.3.5
lxml==4.2.1
defusedxml==0.4.1
mechanize==0.2.5
mimeparse==0.1.3
@ -66,6 +59,7 @@ paramiko==1.14.1
postmonkey==1.0b
pycrypto==2.6
pymarc==3.0.2
pyoai==2.5.0
pyparsing==2.0.3
python-dateutil==2.5.3
python-mimeparse==0.1.4
@ -75,12 +69,12 @@ pytz==2016.6.1
rdflib==4.2.0
rdflib-jsonld==0.3
redis==2.10.3
reportlab==3.1.8
reportlab==3.4.0
requests==2.10.0
requests-mock==1.2.0
requests-oauthlib==0.6.2
selenium==2.53.1
six==1.9.0
six==1.11.0
sorl-thumbnail==12.3
ssh==1.7.14
stevedore==1.12.0
@ -89,7 +83,8 @@ virtualenv==1.4.9
# virtualenv-clone==0.2.4 not sure why I have this in my env
#virtualenvwrapper==3.6
wsgiref==0.1.2
xhtml2pdf==0.0.6
xhtml2pdf==0.2.2
webencodings==0.5.1
#for urllib3 secure
cffi==1.7.0
cryptography==2.1.4

View File

@ -165,7 +165,7 @@ INSTALLED_APPS = (
'social.apps.django_app.default',
'tastypie',
'djcelery',
'endless_pagination',
'el_pagination',
'selectable',
'regluit.frontend.templatetags',
'notification',

View File

@ -29,7 +29,9 @@ DATABASES = {
'PASSWORD': '',
'HOST': '',
'PORT': '',
'TEST_CHARSET': 'utf8',
'TEST': {
'CHARSET': 'utf8',
}
}
}

View File

@ -20,7 +20,9 @@ DATABASES = {
'PASSWORD': 'regluit',
'HOST': '',
'PORT': '',
'TEST_CHARSET': 'utf8',
'TEST': {
'CHARSET': 'utf8',
}
}
}

View File

@ -22,7 +22,9 @@ DATABASES = {
'PASSWORD': DATABASE_PASSWORD,
'HOST': DATABASE_HOST,
'PORT': '',
'TEST_CHARSET': 'utf8'
'TEST': {
'CHARSET': 'utf8',
}
}
}

View File

@ -21,7 +21,9 @@ DATABASES = {
'PASSWORD': DATABASE_PASSWORD,
'HOST': DATABASE_HOST,
'PORT': '',
'TEST_CHARSET': 'utf8',
'TEST': {
'CHARSET': 'utf8',
}
}
}

View File

@ -23,7 +23,9 @@ DATABASES = {
'PASSWORD': DATABASE_PASSWORD,
'HOST': DATABASE_HOST,
'PORT': '',
'TEST_CHARSET': 'utf8',
'TEST': {
'CHARSET': 'utf8',
}
}
}

View File

@ -1 +1 @@
import localdatetime

View File

@ -1,6 +1,10 @@
from django.conf.global_settings import LANGUAGES
lang2code = dict([ (lang[1].lower(), lang[0]) for lang in LANGUAGES ])
code2lang = dict(LANGUAGES)
def get_language_code(language):
return lang2code.get(language.lower().strip(), '')
language = language.lower().strip()
if language in code2lang:
return language
return lang2code.get(language, '')

View File

@ -1,140 +1,8 @@
"""
Utility to return datetime.datetime.utcnow() by default but allows for a custom utcnow() (e.g., for testing)
>>> import regluit
>>> from regluit.utils.localdatetime import now
>>> now()
datetime.datetime(2012, 3, 8, 14, 0, 35, 409270)
>>> now()
datetime.datetime(2012, 3, 8, 14, 0, 36, 985271)
>>> n = now()
>>> n
datetime.datetime(2012, 3, 8, 14, 1, 54, 650679)
>>> regluit.utils.localdatetime._now = lambda: n
>>> now()
datetime.datetime(2012, 3, 8, 14, 1, 54, 650679)
>>> now()
datetime.datetime(2012, 3, 8, 14, 1, 54, 650679)
>>> now()
DST handled:
>>> ptz = pytz.timezone('America/Los_Angeles')
>>> make_naive(datetime.datetime(2012,03,11,10,tzinfo=utc), ptz)
datetime.datetime(2012, 3, 11, 3, 0)
>>> make_naive(datetime.datetime(2012,03,11,9,tzinfo=utc), ptz)
datetime.datetime(2012, 3, 11, 1, 0)
>>> make_aware(datetime.datetime(2012,11,4,1,30), ptz)
Traceback (most recent call last):
File "<console>", line 1, in <module>
File "/Users/raymondyee/C/src/Gluejar/regluit/utils/localdatetime.py", line 90, in make_aware
return timezone.localize(value, is_dst=None)
File "/Users/raymondyee/.virtualenvs/regluit/lib/python2.7/site-packages/pytz/tzinfo.py", line 349, in localize
raise AmbiguousTimeError(dt)
AmbiguousTimeError: 2012-11-04 01:30:00
from django.utils.timezone import now
"""
import pytz
import datetime
import django
from django.conf import settings
# for Django 1.3.x, return a timestamp naive now()
# for Django 1.4 should switch to django.utils.timezone.now()
# see https://code.djangoproject.com/browser/django/trunk/django/utils/timezone.py?rev=17642#L232
def now():
if hasattr(settings, 'LOCALDATETIME_NOW') and settings.LOCALDATETIME_NOW is not None:
return settings.LOCALDATETIME_NOW()
else:
try:
return django.utils.timezone.now()
except AttributeError, e:
return datetime.datetime.now()
# provide a replacement for datetime.date.today()
# this will be timezone naive -- is that what we really want?
# switch to django.utils.timezone.localdate in django 1.11
def date_today():
return now().date()
# borrow a lot of the routines/code that will be in Django 1.4+ django.utils.timezone
# https://code.djangoproject.com/browser/django/trunk/django/utils/timezone.py
utc = pytz.utc
def get_default_timezone():
return pytz.timezone(settings.TIME_ZONE)
def is_aware(value):
"""
Determines if a given datetime.datetime is aware.
The logic is described in Python's docs:
http://docs.python.org/library/datetime.html#datetime.tzinfo
"""
return value.tzinfo is not None and value.tzinfo.utcoffset(value) is not None
def is_naive(value):
"""
Determines if a given datetime.datetime is naive.
The logic is described in Python's docs:
http://docs.python.org/library/datetime.html#datetime.tzinfo
"""
return value.tzinfo is None or value.tzinfo.utcoffset(value) is None
def make_aware(value, timezone):
"""
Makes a naive datetime.datetime in a given time zone aware.
"""
if hasattr(timezone, 'localize'):
# available for pytz time zones
return timezone.localize(value, is_dst=None)
else:
# may be wrong around DST changes
return value.replace(tzinfo=timezone)
def make_naive(value, timezone):
"""
Makes an aware datetime.datetime naive in a given time zone.
"""
value = value.astimezone(timezone)
if hasattr(timezone, 'normalize'):
# available for pytz time zones
value = timezone.normalize(value)
return value.replace(tzinfo=None)
def isoformat(value):
"""
if value is naive, assume it's in the default_timezone
"""
if is_naive(value):
return make_aware(value, get_default_timezone()).isoformat()
else:
return value.isoformat()
def zuluformat(value):
"""format value in zulu format -- e.g., 2012-03-26T17:47:22.654449Z"""
return "{0}Z".format(as_utc_naive(value).isoformat())
def as_utc_naive(value):
"""
if value is naive, assume it's in the default time zone, then convert to UTC but make naive
"""
if is_naive(value):
return make_naive(make_aware(value, get_default_timezone()), utc)
else:
return make_naive(value, utc)
def as_default_timezone_naive(value):
"""
if value is naive, assume it's in UTC and convert to the default tz and make it naive
"""
if is_naive(value):
return make_naive(make_aware(value, utc), get_default_timezone())
else:
return make_naive(value, get_default_timezone())