Merge remote-tracking branch 'Gluejar/master' into catchup

# Conflicts: # core/models/__init__.py # core/models/bibmodels.py # vagrant/host_vars/prod/secrets.yml
2018-05-23 13:56:27 -04:00 · 2018-05-23 13:56:27 -04:00 · 3661faec0a
parent ad7aff9664 71ba8dc9fa
commit 3661faec0a
78 changed files with 1551 additions and 55413 deletions
--- a/api/onix.py
+++ b/api/onix.py
@ -25,7 +25,7 @@ def onix_feed(facet, max=None):
        editions = facet.facet_object.filter_model("Edition",editions).distinct()
        for edition in editions:
            edition_prod = product(edition, facet.facet_object)
-            if edition_prod:
+            if edition_prod is not None:
                feed.append(edition_prod)    
    return etree.tostring(feed, pretty_print=True)
    
@ -34,7 +34,7 @@ def onix_feed_for_work(work):
    feed.append(header(work))
    for edition in models.Edition.objects.filter(work=work,ebooks__isnull=False).distinct():
        edition_prod = product(edition)
-        if edition_prod:
+        if edition_prod is not None:
            feed.append(product(edition))
    return etree.tostring(feed, pretty_print=True)
    
--- a/api/tests.py
+++ b/api/tests.py
@ -10,6 +10,7 @@ django imports
 from django.contrib.auth.models import User
 from django.test import TestCase
 from django.test.client import Client
+from django.utils.timezone import now

 """
 regluit imports
@ -17,7 +18,6 @@ regluit imports
 import regluit.core.isbn

 from regluit.core import models
-from regluit.utils.localdatetime import now
 from regluit.api import models as apimodels

 class ApiTests(TestCase):
--- a/bookdata/doab.json
+++ b/bookdata/doab.json
--- a/bookdata/doab_auths.json
+++ b/bookdata/doab_auths.json
--- a/booxtream/init.py
+++ b/booxtream/init.py
@ -5,10 +5,9 @@ from urllib import quote
 from functools import partial
 from xml.etree import ElementTree

+from django.apps import apps

 from . exceptions import BooXtreamError
-from . models import Boox
-

 class BooXtream(object):
    """ ``apikey``
@ -46,6 +45,8 @@ class BooXtream(object):
        Will raise ``BooXtreamError`` if BooXtream returns an exception
        code.
        """
+        Boox = apps.get_model('booxtream', 'Boox')
+
        url = self.endpoint + 'booxtream.xml'
        kwargs['epub'] =  '1' if epub else '0'
        kwargs['kf8mobi'] = '1' if kf8mobi else '0'
--- a/core/apps.py
+++ b/core/apps.py
@ -1,11 +1,10 @@
 from django.apps import AppConfig
 from django.db.models.signals import post_migrate

-from regluit.core.signals import create_notice_types
-
 class CoreConfig(AppConfig):
    name = 'regluit.core'
    verbose_name = ' core objects'

    def ready(self):
+        from regluit.core.signals import create_notice_types
        post_migrate.connect(create_notice_types, sender=self)
--- a/core/bookloader.py
+++ b/core/bookloader.py
@ -23,6 +23,7 @@ from django_comments.models import Comment
 from github3 import (login, GitHub)
 from github3.repos.release import Release

+from django.utils.timezone import now
 from gitenberg.metadata.pandata import Pandata

 # regluit imports
@ -31,7 +32,6 @@ import regluit
 import regluit.core.isbn
 from regluit.core.validation import test_file
 from regluit.marc.models import inverse_marc_rels
-from regluit.utils.localdatetime import now

 from . import cc
 from . import models
@ -49,7 +49,7 @@ def add_by_oclc(isbn, work=None):

 def add_by_oclc_from_google(oclc):
    if oclc:
-        logger.info("adding book by oclc %s", oclc)
+        logger.info(u"adding book by oclc %s", oclc)
    else:
        return None
    try:
@ -59,10 +59,10 @@ def add_by_oclc_from_google(oclc):
        try:
            results = _get_json(url, {"q": '"OCLC%s"' % oclc})
        except LookupFailure, e:
-            logger.exception("lookup failure for %s", oclc)
+            logger.exception(u"lookup failure for %s", oclc)
            return None
        if not results.has_key('items') or not results['items']:
-            logger.warn("no google hits for %s", oclc)
+            logger.warn(u"no google hits for %s", oclc)
            return None

        try:
@ -70,16 +70,16 @@ def add_by_oclc_from_google(oclc):
            models.Identifier(type='oclc', value=oclc, edition=e, work=e.work).save()
            return e
        except LookupFailure, e:
-            logger.exception("failed to add edition for %s", oclc)
+            logger.exception(u"failed to add edition for %s", oclc)
        except IntegrityError, e:
-            logger.exception("google books data for %s didn't fit our db", oclc)
+            logger.exception(u"google books data for %s didn't fit our db", oclc)
        return None

 def valid_isbn(isbn):
    try:
        return identifier_cleaner('isbn')(isbn)
    except:
-        logger.exception("invalid isbn: %s", isbn)
+        logger.exception(u"invalid isbn: %s", isbn)
        return None

 def add_by_isbn(isbn, work=None, language='xx', title=''):
@ -88,13 +88,17 @@ def add_by_isbn(isbn, work=None, language='xx', title=''):
    try:
        e = add_by_isbn_from_google(isbn, work=work)
    except LookupFailure:
-        logger.exception("failed google lookup for %s", isbn)
+        logger.exception(u"failed google lookup for %s", isbn)
        # try again some other time
        return None
    if e:
+        if e.work.language == 'xx' and language != 'xx':
+            e.work.language == language
+            e.work.save()
+            logger.info('changed language for {} to {}'.format(isbn, language))
        return e

-    logger.info("null came back from add_by_isbn_from_google: %s", isbn)
+    logger.info(u"null came back from add_by_isbn_from_google: %s", isbn)

    # if there's a a title, we want to create stub editions and
    # works, even if google doesn't know about it # but if it's not valid,
@ -129,10 +133,10 @@ def get_google_isbn_results(isbn):
    try:
        results = _get_json(url, {"q": "isbn:%s" % isbn})
    except LookupFailure:
-        logger.exception("lookup failure for %s", isbn)
+        logger.exception(u"lookup failure for %s", isbn)
        return None
    if not results.has_key('items') or not results['items']:
-        logger.warn("no google hits for %s", isbn)
+        logger.warn(u"no google hits for %s", isbn)
        return None
    return results

@ -201,7 +205,7 @@ def update_edition(edition):
    # if the language of the edition no longer matches that of the parent work,
    # attach edition to the
    if edition.work.language != language:
-        logger.info("reconnecting %s since it is %s instead of %s",
+        logger.info(u"reconnecting %s since it is %s instead of %s",
            googlebooks_id, language, edition.work.language)
        old_work = edition.work

@ -210,7 +214,7 @@ def update_edition(edition):
        edition.work = new_work
        edition.save()
        for identifier in edition.identifiers.all():
-            logger.info("moving identifier %s", identifier.value)
+            logger.info(u"moving identifier %s", identifier.value)
            identifier.work = new_work
            identifier.save()
        if old_work and old_work.editions.count() == 0:
@ -256,7 +260,7 @@ def add_by_isbn_from_google(isbn, work=None):
        edition.new = False
        return edition

-    logger.info("adding new book by isbn %s", isbn)
+    logger.info(u"adding new book by isbn %s", isbn)
    results = get_google_isbn_results(isbn)
    if results:
        try:
@ -267,9 +271,9 @@ def add_by_isbn_from_google(isbn, work=None):
                isbn=isbn
            )
        except LookupFailure, e:
-            logger.exception("failed to add edition for %s", isbn)
+            logger.exception(u"failed to add edition for %s", isbn)
        except IntegrityError, e:
-            logger.exception("google books data for %s didn't fit our db", isbn)
+            logger.exception(u"google books data for %s didn't fit our db", isbn)
        return None
    return None

@ -320,7 +324,7 @@ def add_by_googlebooks_id(googlebooks_id, work=None, results=None, isbn=None):
    if results:
        item = results
    else:
-        logger.info("loading metadata from google for %s", googlebooks_id)
+        logger.info(u"loading metadata from google for %s", googlebooks_id)
        url = "https://www.googleapis.com/books/v1/volumes/%s" % googlebooks_id
        item = _get_json(url)
    d = item['volumeInfo']
@ -343,7 +347,7 @@ def add_by_googlebooks_id(googlebooks_id, work=None, results=None, isbn=None):
    if len(language) > 5:
        language = language[0:5]
    if work and work.language != language:
-        logger.info("not connecting %s since it is %s instead of %s",
+        logger.info(u"not connecting %s since it is %s instead of %s",
                    googlebooks_id, language, work.language)
        work = None
    # isbn = None
@ -371,7 +375,7 @@ def add_by_googlebooks_id(googlebooks_id, work=None, results=None, isbn=None):
    try:
        e = models.Identifier.objects.get(type='goog', value=googlebooks_id).edition
        e.new = False
-        logger.warning(" whoa nellie, somebody else created an edition while we were working.")
+        logger.warning(u" whoa nellie, somebody else created an edition while we were working.")
        if work.new:
            work.delete()
        return e
@ -404,19 +408,19 @@ def relate_isbn(isbn, cluster_size=1):
    """add a book by isbn and then see if there's an existing work to add it to so as to make a
    cluster bigger than cluster_size.
    """
-    logger.info("finding a related work for %s", isbn)
+    logger.info(u"finding a related work for %s", isbn)

    edition = add_by_isbn(isbn)
    if edition is None:
        return None
    if edition.work is None:
-        logger.info("didn't add related to null work")
+        logger.info(u"didn't add related to null work")
        return None
    if edition.work.editions.count() > cluster_size:
        return edition.work
    for other_isbn in thingisbn(isbn):
        # 979's come back as 13
-        logger.debug("other_isbn: %s", other_isbn)
+        logger.debug(u"other_isbn: %s", other_isbn)
        if len(other_isbn) == 10:
            other_isbn = regluit.core.isbn.convert_10_to_13(other_isbn)
        related_edition = add_by_isbn(other_isbn, work=edition.work)
@ -427,7 +431,7 @@ def relate_isbn(isbn, cluster_size=1):
                    related_edition.work = edition.work
                    related_edition.save()
                elif related_edition.work_id != edition.work_id:
-                    logger.debug("merge_works path 1 %s %s", edition.work_id, related_edition.work_id)
+                    logger.debug(u"merge_works path 1 %s %s", edition.work_id, related_edition.work_id)
                    merge_works(related_edition.work, edition.work)
                if related_edition.work.editions.count() > cluster_size:
                    return related_edition.work
@ -438,7 +442,7 @@ def add_related(isbn):
    The initial seed ISBN will be added if it's not already there.
    """
    # make sure the seed edition is there
-    logger.info("adding related editions for %s", isbn)
+    logger.info(u"adding related editions for %s", isbn)

    new_editions = []

@ -446,14 +450,14 @@ def add_related(isbn):
    if edition is None:
        return new_editions
    if edition.work is None:
-        logger.warning("didn't add related to null work")
+        logger.warning(u"didn't add related to null work")
        return new_editions
    # this is the work everything will hang off
    work = edition.work
    other_editions = {}
    for other_isbn in thingisbn(isbn):
        # 979's come back as 13
-        logger.debug("other_isbn: %s", other_isbn)
+        logger.debug(u"other_isbn: %s", other_isbn)
        if len(other_isbn) == 10:
            other_isbn = regluit.core.isbn.convert_10_to_13(other_isbn)
        related_edition = add_by_isbn(other_isbn, work=work)
@ -466,7 +470,7 @@ def add_related(isbn):
                    related_edition.work = work
                    related_edition.save()
                elif related_edition.work_id != work.id:
-                    logger.debug("merge_works path 1 %s %s", work.id, related_edition.work_id)
+                    logger.debug(u"merge_works path 1 %s %s", work.id, related_edition.work_id)
                    work = merge_works(work, related_edition.work)
            else:
                if other_editions.has_key(related_language):
@ -476,14 +480,14 @@ def add_related(isbn):

    # group the other language editions together
    for lang_group in other_editions.itervalues():
-        logger.debug("lang_group (ed, work): %s", [(ed.id, ed.work_id) for ed in lang_group])
+        logger.debug(u"lang_group (ed, work): %s", [(ed.id, ed.work_id) for ed in lang_group])
        if len(lang_group) > 1:
            lang_edition = lang_group[0]
-            logger.debug("lang_edition.id: %s", lang_edition.id)
+            logger.debug(u"lang_edition.id: %s", lang_edition.id)
            # compute the distinct set of works to merge into lang_edition.work
            works_to_merge = set([ed.work for ed in lang_group[1:]]) - set([lang_edition.work])
            for w in works_to_merge:
-                logger.debug("merge_works path 2 %s %s", lang_edition.work_id, w.id)
+                logger.debug(u"merge_works path 2 %s %s", lang_edition.work_id, w.id)
                merged_work = merge_works(lang_edition.work, w)
        models.WorkRelation.objects.get_or_create(
            to_work=lang_group[0].work,
@ -498,17 +502,21 @@ def thingisbn(isbn):
    Library Thing. (takes isbn_10 or isbn_13, returns isbn_10, except for 979 isbns,
    which come back as isbn_13')
    """
-    logger.info("looking up %s at ThingISBN", isbn)
+    logger.info(u"looking up %s at ThingISBN", isbn)
    url = "https://www.librarything.com/api/thingISBN/%s" % isbn
    xml = requests.get(url, headers={"User-Agent": settings.USER_AGENT}).content
+    try:
        doc = ElementTree.fromstring(xml)
        return [e.text for e in doc.findall('isbn')]
+    except SyntaxError:
+        # LibraryThing down
+        return []


 def merge_works(w1, w2, user=None):
    """will merge the second work (w2) into the first (w1)
    """
-    logger.info("merging work %s into %s", w2.id, w1.id)
+    logger.info(u"merging work %s into %s", w2.id, w1.id)
    # don't merge if the works are the same or at least one of the works has no id
    #(for example, when w2 has already been deleted)
    if w1 is None or w2 is None or w1.id == w2.id or w1.id is None or w2.id is None:
@ -583,7 +591,7 @@ def detach_edition(e):
    will detach edition from its work, creating a new stub work. if remerge=true, will see if
    there's another work to attach to
    """
-    logger.info("splitting edition %s from %s", e, e.work)
+    logger.info(u"splitting edition %s from %s", e, e.work)
    w = models.Work(title=e.title, language=e.work.language)
    w.save()

@ -618,7 +626,7 @@ def add_openlibrary(work, hard_refresh=False):
    work.save()

    # find the first ISBN match in OpenLibrary
-    logger.info("looking up openlibrary data for work %s", work.id)
+    logger.info(u"looking up openlibrary data for work %s", work.id)

    e = None # openlibrary edition json
    w = None # openlibrary work json
@ -633,7 +641,7 @@ def add_openlibrary(work, hard_refresh=False):
        try:
            e = _get_json(url, params, type='ol')
        except LookupFailure:
-            logger.exception("OL lookup failed for  %s", isbn_key)
+            logger.exception(u"OL lookup failed for  %s", isbn_key)
            e = {}
        if e.has_key(isbn_key):
            if e[isbn_key].has_key('details'):
@ -673,7 +681,7 @@ def add_openlibrary(work, hard_refresh=False):
                        )
                if e[isbn_key]['details'].has_key('works'):
                    work_key = e[isbn_key]['details']['works'].pop(0)['key']
-                    logger.info("got openlibrary work %s for isbn %s", work_key, isbn_key)
+                    logger.info(u"got openlibrary work %s for isbn %s", work_key, isbn_key)
                    models.Identifier.get_or_add(type='olwk', value=work_key, work=work)
                    try:
                        w = _get_json("https://openlibrary.org" + work_key, type='ol')
@ -691,14 +699,14 @@ def add_openlibrary(work, hard_refresh=False):
                        if w.has_key('subjects') and len(w['subjects']) > len(subjects):
                            subjects = w['subjects']
                    except LookupFailure:
-                        logger.exception("OL lookup failed for  %s", work_key)
+                        logger.exception(u"OL lookup failed for  %s", work_key)
    if not subjects:
-        logger.warn("unable to find work %s at openlibrary", work.id)
+        logger.warn(u"unable to find work %s at openlibrary", work.id)
        return

    # add the subjects to the Work
    for s in subjects:
-        logger.info("adding subject %s to work %s", s, work.id)
+        logger.info(u"adding subject %s to work %s", s, work.id)
        subject = models.Subject.set_by_name(s, work=work)

    work.save()
@ -716,9 +724,9 @@ def _get_json(url, params={}, type='gb'):
    if response.status_code == 200:
        return json.loads(response.content)
    else:
-        logger.error("unexpected HTTP response: %s", response)
+        logger.error(u"unexpected HTTP response: %s", response)
        if response.content:
-            logger.error("response content: %s", response.content)
+            logger.error(u"response content: %s", response.content)
        raise LookupFailure("GET failed: url=%s and params=%s" % (url, params))


@ -766,7 +774,7 @@ def load_gutenberg_edition(title, gutenberg_etext_id, ol_work_id, seed_isbn, url
        ebook = models.Ebook()

    if len(ebooks) > 1:
-        logger.warning("There is more than one Ebook matching url {0}".format(url))
+        logger.warning(u"There is more than one Ebook matching url {0}".format(url))


    ebook.format = format
@ -826,8 +834,6 @@ def edition_for_etype(etype, metadata, default=None):
        for key in metadata.edition_identifiers.keys():
            return edition_for_ident(key, metadata.identifiers[key])

-MATCH_LICENSE = re.compile(r'creativecommons.org/licenses/([^/]+)/')
-
 def load_ebookfile(url, etype):
    '''
    return a ContentFile if a new ebook has been loaded
@ -960,8 +966,7 @@ class BasePandataLoader(object):
                    if contentfile:
                        contentfile_name = '/loaded/ebook_{}.{}'.format(edition.id, key)
                        path = default_storage.save(contentfile_name, contentfile)
-                        lic = MATCH_LICENSE.search(metadata.rights_url)
-                        license = 'CC {}'.format(lic.group(1).upper()) if lic else ''
+                        license = cc.license_from_cc_url(metadata.rights_url)
                        ebf = models.EbookFile.objects.create(
                            format=key,
                            edition=edition,
--- a/core/cc.py
+++ b/core/cc.py
@ -1,8 +1,11 @@
 # coding=utf-8
-# mostly constants related to Creative Commons
+''' mostly constants related to Creative Commons
 # let's be DRY with these parameters

 ## need to add versioned CC  entries
+'''
+
+import re

 INFO_CC = (
    ('CC BY-NC-ND', 'by-nc-nd', 'Creative Commons Attribution-NonCommercial-NoDerivs 3.0 Unported (CC BY-NC-ND 3.0)', 'https://creativecommons.org/licenses/by-nc-nd/3.0/', 'Creative Commons Attribution-NonCommercial-NoDerivs'),     
@ -162,3 +165,15 @@ def match_license(license_string):
    except ValueError:
        pass
    return RIGHTS_ALIAS.get(license_string, None)
+
+MATCH_LICENSE = re.compile(r'creativecommons.org/licenses/([^/]+)/')
+def license_from_cc_url(rights_url):
+    if not rights_url:
+        return None
+    lic = MATCH_LICENSE.search(rights_url)
+    if lic:
+        return 'CC {}'.format(lic.group(1).upper())
+    if rights_url.find('openedition.org') >= 0:
+        return 'OPENEDITION'
+    return ''
+
--- a/core/isbn.py
+++ b/core/isbn.py
@ -45,10 +45,10 @@ def convert_10_to_13(isbn):
    except:
        return None

-ISBN_REGEX = re.compile(r'^(\d{9}|\d{12})(\d|X)$')
-DASH_REGEX = re.compile(r'[ \-–—]+')
+ISBN_REGEX = re.compile(r'^(\d{9}[\dX]|\d{13})$')
+DASH_REGEX = re.compile(u'[ \\-–—‐,;]+')  #includes unicode hyphen, endash and emdash
 def strip(s):
-    """Strips away any - or spaces.  If the remaining string is of length 10 or 13
+    """Strips away any - or spaces and some punctuation.  If the remaining string is of length 10 or 13
    with digits only in anything but the last
    check digit (which may be X), then return '' -- otherwise return the remaining string
    """
--- a/core/librarything.py
+++ b/core/librarything.py
@ -2,11 +2,12 @@ import csv
 import HTMLParser
 import httplib
 import logging
-import mechanize
 import re
+from datetime import datetime
+
+import mechanize
 import requests

-from datetime import datetime
 from regluit.core import models

 logger = logging.getLogger(__name__)
@ -40,7 +41,8 @@ class LibraryThing(object):
    def parse_csv(self):
        h = HTMLParser.HTMLParser()
        reader = csv.DictReader(self.csv_handle)
-        # There are more fields to be parsed out.  Note that there is a second author column to handle
+        # There are more fields to be parsed out.  Note that there is a
+        # second author column to handle
        for (i, row) in enumerate(reader):
            # ISBNs are written like '[123456789x]' in the CSV, suggesting possibility of a list
            m = re.match(r'^\[(.*)\]$', row["'ISBNs'"])
@ -48,35 +50,50 @@ class LibraryThing(object):
                isbn = m.group(1).split()
            else:
                isbn = []
-            yield {'title':h.unescape(row["'TITLE'"]), 'author':h.unescape(row["'AUTHOR (first, last)'"]),
-                   'isbn':isbn, 'comment':row["'COMMENT'"],
-                   'tags':row["'TAGS'"], 'collections':row["'COLLECTIONS'"],
-                    'reviews':h.unescape(row["'REVIEWS'"])}
+            yield {
+                'title':h.unescape(row["'TITLE'"]),
+                'author':h.unescape(row["'AUTHOR (first, last)'"]),
+                'isbn':isbn,
+                'comment':row["'COMMENT'"],
+                'tags':row["'TAGS'"],
+                'collections':row["'COLLECTIONS'"],
+                'reviews':h.unescape(row["'REVIEWS'"])
+            }
    def viewstyle_1(self, rows):

        for (i, row) in enumerate(rows):
            book_data = {}
            cols = row.xpath('td')
            # cover
-            book_data["cover"] = {"cover_id":cols[0].attrib["id"],
-                                  "image": {"width":cols[0].xpath('.//img')[0].attrib['width'],
-                                    "src": cols[0].xpath('.//img')[0].attrib['src']}
+            book_data["cover"] = {
+                "cover_id":cols[0].attrib["id"],
+                "image": {
+                    "width":cols[0].xpath('.//img')[0].attrib['width'],
+                    "src": cols[0].xpath('.//img')[0].attrib['src']
+                }
            }
            # title
-            book_data["title"] = {"href":cols[1].xpath('.//a')[0].attrib['href'],
-                                  "title":cols[1].xpath('.//a')[0].text}
+            book_data["title"] = {
+                "href":cols[1].xpath('.//a')[0].attrib['href'],
+                "title":cols[1].xpath('.//a')[0].text
+            }

            # extract work_id and book_id from href
            try:
-                (book_data["work_id"], book_data["book_id"]) = re.match("^/work/(.*)/book/(.*)$",book_data["title"]["href"]).groups()
+                (book_data["work_id"], book_data["book_id"]) = re.match(
+                    "^/work/(.*)/book/(.*)$",
+                    book_data["title"]["href"]
+                ).groups()
            except:
                (book_data["work_id"], book_data["book_id"]) = (None, None)

            # author -- what if there is more than 1?  or none?
            try:
-                book_data["author"] = {"display_name":cols[2].xpath('.//a')[0].text,
+                book_data["author"] = {
+                    "display_name":cols[2].xpath('.//a')[0].text,
                    "href":cols[2].xpath('.//a')[0].attrib['href'],
-                                       "name":cols[2].xpath('div')[0].text}
+                    "name":cols[2].xpath('div')[0].text
+                }
            except:
                book_data["author"] = None

@ -91,7 +108,9 @@ class LibraryThing(object):
            book_data["rating"] = len(cols[5].xpath('.//img[@alt="*"]'))

            # entry date
-            book_data["entry_date"] = datetime.date(datetime.strptime(cols[6].xpath('span')[0].text, "%b %d, %Y"))
+            book_data["entry_date"] = datetime.date(
+                datetime.strptime(cols[6].xpath('span')[0].text, "%b %d, %Y")
+            )

            yield book_data

@ -107,7 +126,10 @@ class LibraryThing(object):

            # extract work_id and book_id from href
            try:
-                (book_data["work_id"], book_data["book_id"]) = re.match("^/work/(.*)/book/(.*)$",book_data["title"]["href"]).groups()
+                (book_data["work_id"], book_data["book_id"]) = re.match(
+                    "^/work/(.*)/book/(.*)$",
+                    book_data["title"]["href"]
+                ).groups()
            except:
                (book_data["work_id"], book_data["book_id"]) = (None, None)

@ -160,8 +182,9 @@ class LibraryThing(object):
        cookies = r.cookies

        while next_page:
-            url = "https://www.librarything.com/catalog_bottom.php?view=%s&viewstyle=%d&collection=%d&offset=%d" % (self.username,
-                                        view_style, COLLECTION, offset)
+            url = "https://www.librarything.com/catalog_bottom.php?view=%s&viewstyle=%d&collection=%d&offset=%d" % (
+                self.username, view_style, COLLECTION, offset
+            )
            logger.info("url: %s", url)
            if cookies is None:
                r = requests.get(url)
@ -169,10 +192,8 @@ class LibraryThing(object):
                r = requests.get(url, cookies=cookies)

            if r.status_code != httplib.OK:
-                raise LibraryThingException("Error accessing %s: %s" % (url, e))
-                logger.info("Error accessing %s: %s", url, e)
+                raise LibraryThingException("Error accessing %s: status %s" % (url, r.status_code))
            etree = html.fromstring(r.content)
-            #logger.info("r.content %s", r.content)
            cookies = r.cookies  # retain the cookies

            # look for a page bar
@ -182,11 +203,14 @@ class LibraryThing(object):
                count_text = etree.xpath('//td[@class="pbGroup"]')[0].text
                total = int(re.search(r'(\d+)$', count_text).group(1))
                logger.info('total: %d', total)
-            except Exception, e:  # assume for now that if we can't grab this text, there is no page bar and no books
+            except Exception, e:
+                # assume for now that if we can't grab this text,
+                # there is no page bar and no books
                logger.info('Exception {0}'.format(e))
                total = 0

-            # to do paging we can either look for a next link or just increase the offset by the number of rows.
+            # to do paging we can either look for a next link or just increase the offset
+            # by the number of rows.
            # Let's try the latter
            # possible_next_link = etree.xpath('//a[@class="pageShuttleButton"]')[0]

@ -200,7 +224,8 @@ class LibraryThing(object):
            for (i, row) in enumerate(style_parser(rows)):
                yield row

-            # page size = 50, first page offset = 0, second page offset = 50 -- if total = 50 no need to go
+            # page size = 50, first page offset = 0, second page offset = 50
+            # -- if total = 50 no need to go

            offset += i + 1
            if offset >= total:
@ -208,7 +233,8 @@ class LibraryThing(object):

 def load_librarything_into_wishlist(user, lt_username, max_books=None):
    """
-    Load a specified LibraryThing shelf (by default:  all the books from the LibraryThing account associated with user)
+    Load a specified LibraryThing shelf (by default:  all the books
+    from the LibraryThing account associated with user)
    """

    from regluit.core import bookloader
@ -229,10 +255,24 @@ def load_librarything_into_wishlist(user, lt_username, max_books=None):
            if not edition:
                continue
            # add the librarything ids to the db since we know them now
-            identifier= models.Identifier.get_or_add(type = 'thng', value = book['book_id'], edition = edition, work = edition.work)
-            identifier= models.Identifier.get_or_add(type = 'ltwk', value = book['work_id'], work = edition.work)
+            identifier = models.Identifier.get_or_add(
+                type='thng',
+                value=book['book_id'],
+                edition=edition,
+                work=edition.work
+            )
+            identifier = models.Identifier.get_or_add(
+                type='ltwk',
+                value=book['work_id'],
+                work=edition.work
+            )
            if book['lc_call_number']:
-                identifier= models.Identifier.get_or_add(type = 'lccn', value = book['lc_call_number'], edition = edition, work = edition.work)
+                identifier = models.Identifier.get_or_add(
+                    type='lccn',
+                    value=book['lc_call_number'],
+                    edition=edition,
+                    work=edition.work
+                )
            user.wishlist.add_work(edition.work, 'librarything', notify=True)
            if edition.new:
                tasks.populate_edition.delay(edition.isbn_13)
--- a/core/loaders/init.py
+++ b/core/loaders/init.py
@ -16,10 +16,10 @@ from .smashwords import SmashwordsScraper
 def get_scraper(url):
    scrapers = [
        PressbooksScraper,
-        HathitrustScraper,
        SpringerScraper,
        UbiquityScraper,
        SmashwordsScraper,
+        HathitrustScraper,
        BaseScraper,
    ]
    for scraper in scrapers:
@ -52,3 +52,9 @@ def add_by_webpage(url, work=None, user=None):
        
 def add_by_sitemap(url, maxnum=None):
    return add_from_bookdatas(scrape_sitemap(url, maxnum=maxnum))
+    
+def scrape_language(url):
+    scraper = get_scraper(url)
+    return scraper.metadata.get('language')
+
+
--- a/core/loaders/doab.py
+++ b/core/loaders/doab.py
@ -1,29 +1,41 @@
 #!/usr/bin/env python
 # encoding: utf-8
-import logging
+import datetime
 import json
+import logging
 import re

-from itertools import islice
-
 import requests

-from django.db.models import (Q, F)
+from django.db.models import Q

-from django.core.files.storage import default_storage
 from django.core.files.base import ContentFile
+from django.core.files.storage import default_storage

-import regluit
+from oaipmh.client import Client
+from oaipmh.error import IdDoesNotExistError
+from oaipmh.metadata import MetadataRegistry, oai_dc_reader
+
+from regluit.core import bookloader, cc
 from regluit.core import models, tasks
-from regluit.core import bookloader
-from regluit.core.bookloader import add_by_isbn, merge_works
+from regluit.core.bookloader import merge_works
 from regluit.core.isbn import ISBN
-from regluit.core.validation import valid_subject
+from regluit.core.loaders.utils import type_for_url
+from regluit.core.validation import identifier_cleaner, valid_subject
+
+from . import scrape_language
+from .doab_utils import doab_lang_to_iso_639_1, online_to_download, url_to_provider

 logger = logging.getLogger(__name__)

-springercover = re.compile(r'ftp.+springer\.de.+(\d{13}\.jpg)$', flags=re.U)
+def unlist(alist):
+    if not alist:
+        return None
+    return alist[0]

+
+SPRINGER_COVER = re.compile(r'ftp.+springer\.de.+(\d{13}\.jpg)$', flags=re.U)
+SPRINGER_IMAGE = u'https://images.springer.com/sgw/books/medium/{}.jpg'
 def store_doab_cover(doab_id, redo=False):

    """
@ -44,29 +56,31 @@ def store_doab_cover(doab_id, redo=False):
        if r.status_code == 302:
            redirurl = r.headers['Location']
            if redirurl.startswith(u'ftp'):
-                springerftp = springercover.match(redirurl)
+                springerftp = SPRINGER_COVER.match(redirurl)
                if springerftp:
-                    redirurl = u'https://images.springer.com/sgw/books/medium/{}.jpg'.format(springerftp.groups(1))
+                    redirurl = SPRINGER_IMAGE.format(springerftp.groups(1))
                    r = requests.get(redirurl)
            else:
                r = requests.get(url)
+        else:
+            r = requests.get(url)
        cover_file = ContentFile(r.content)
        cover_file.content_type = r.headers.get('content-type', '')

-        path = default_storage.save(cover_file_name, cover_file)    
+        default_storage.save(cover_file_name, cover_file)
        return (default_storage.url(cover_file_name), True)
    except Exception, e:
        # if there is a problem, return None for cover URL
        logger.warning('Failed to make cover image for doab_id={}: {}'.format(doab_id, e))
        return (None, False)

-def update_cover_doab(doab_id, edition, store_cover=True):
+def update_cover_doab(doab_id, edition, store_cover=True, redo=True):
    """
    update the cover url for work with doab_id
    if store_cover is True, use the cover from our own storage
    """
    if store_cover:
-        (cover_url, new_cover) = store_doab_cover(doab_id)
+        (cover_url, new_cover) = store_doab_cover(doab_id, redo=redo)
    else:
        cover_url = "http://www.doabooks.org/doab?func=cover&rid={0}".format(doab_id)

@ -74,7 +88,6 @@ def update_cover_doab(doab_id, edition, store_cover=True):
        edition.cover_image = cover_url
        edition.save()
        return cover_url
-    else:
    return None

 def attach_more_doab_metadata(edition, description, subjects,
@ -108,7 +121,7 @@ def attach_more_doab_metadata(edition, description, subjects,
    if not work.age_level:
        work.age_level = '18-'

-    if language:
+    if language and language != 'xx':
        work.language = language
    work.save()

@ -125,7 +138,6 @@ def attach_more_doab_metadata(edition, description, subjects,
 def add_all_isbns(isbns, work, language=None, title=None):
    first_edition = None
    for isbn in isbns:
-        first_edition = None
        edition = bookloader.add_by_isbn(isbn, work, language=language, title=title)
        if edition:
            first_edition = first_edition if first_edition else edition
@ -136,7 +148,7 @@ def add_all_isbns(isbns, work, language=None, title=None):
                    work = merge_works(edition.work, work)
            else:
                work = edition.work
-    return first_edition 
+    return work, first_edition

 def load_doab_edition(title, doab_id, url, format, rights,
                      language, isbns,
@ -145,9 +157,11 @@ def load_doab_edition(title, doab_id, url, format, rights,
    """
    load a record from doabooks.org represented by input parameters and return an ebook
    """
+    logger.info('load doab {} {} {} {} {}'.format(doab_id, format, rights, language, provider))
    if language and isinstance(language, list):
        language = language[0]
-        
+    if language == 'xx' and format == 'online':
+        language = scrape_language(url)
    # check to see whether the Edition hasn't already been loaded first
    # search by url
    ebooks = models.Ebook.objects.filter(url=url)
@ -170,35 +184,37 @@ def load_doab_edition(title, doab_id, url, format, rights,
        ebook = ebooks[0]
        doab_identifer = models.Identifier.get_or_add(type='doab', value=doab_id,
                                                      work=ebook.edition.work)
+        if not ebook.rights:
+            ebook.rights = rights
+            ebook.save()
+        
        # update the cover id
-        cover_url = update_cover_doab(doab_id, ebook.edition)
+        cover_url = update_cover_doab(doab_id, ebook.edition, redo=False)

        # attach more metadata
-        attach_more_doab_metadata(ebook.edition, 
-                                  description=kwargs.get('description'),
+        attach_more_doab_metadata(
+            ebook.edition,
+            description=unlist(kwargs.get('description')),
            subjects=kwargs.get('subject'),
-                                  publication_date=kwargs.get('date'),
-                                  publisher_name=kwargs.get('publisher'),
+            publication_date=unlist(kwargs.get('date')),
+            publisher_name=unlist(kwargs.get('publisher')),
            language=language,
-                                  authors=kwargs.get('authors'),)
+            authors=kwargs.get('creator'),
+        )
        # make sure all isbns are added
-        add_all_isbns(isbns, None, language=language, title=title)
-        return ebook
+        add_all_isbns(isbns, ebook.edition.work, language=language, title=title)
+        return ebook.edition

    # remaining case --> no ebook, load record, create ebook if there is one.
-    assert len(ebooks) == 0
+    assert not ebooks


    # we need to find the right Edition/Work to tie Ebook to...

    # look for the Edition with which to associate ebook.
    # loop through the isbns to see whether we get one that is not None
-    work = None
-    edition = add_all_isbns(isbns, None, language=language, title=title)
-    if edition:
-        edition.refresh_from_db()
-        work = edition.work

+    work, edition = add_all_isbns(isbns, None, language=language, title=title)
    if doab_id and not work:
        # make sure there's not already a doab_id
        idents = models.Identifier.objects.filter(type='doab', value=doab_id)
@ -208,9 +224,10 @@ def load_doab_edition(title, doab_id, url, format, rights,
            break

    if edition is not None:
-        # if this is a new edition, then add related editions asynchronously
+        # if this is a new edition, then add related editions SYNCHRONOUSLY
        if getattr(edition, 'new', False):
-            tasks.populate_edition.delay(edition.isbn_13)
+            tasks.populate_edition(edition.isbn_13)
+        edition.refresh_from_db()
        doab_identifer = models.Identifier.get_or_add(type='doab', value=doab_id,
                                                      work=edition.work)

@ -245,7 +262,7 @@ def load_doab_edition(title, doab_id, url, format, rights,
    work.selected_edition = edition
    work.save()

-    if format in ('pdf', 'epub', 'mobi'):
+    if format in ('pdf', 'epub', 'mobi', 'html', 'online') and rights:
        ebook = models.Ebook()
        ebook.format = format
        ebook.provider = provider
@ -253,59 +270,27 @@ def load_doab_edition(title, doab_id, url, format, rights,
        ebook.rights = rights
        # tie the edition to ebook
        ebook.edition = edition
+        if format == "online":
+            ebook.active = False
        ebook.save()

    # update the cover id (could be done separately)
-    cover_url = update_cover_doab(doab_id, edition)
+    cover_url = update_cover_doab(doab_id, edition, redo=False)

    # attach more metadata
-    attach_more_doab_metadata(edition, 
-                              description=kwargs.get('description'),
+    attach_more_doab_metadata(
+        edition,
+        description=unlist(kwargs.get('description')),
        subjects=kwargs.get('subject'),
-                              publication_date=kwargs.get('date'),
-                              publisher_name=kwargs.get('publisher'),
-                              authors=kwargs.get('authors'),)    
-    return ebook
+        publication_date=unlist(kwargs.get('date')),
+        publisher_name=unlist(kwargs.get('publisher')),
+        authors=kwargs.get('creator'),
+    )
+    return edition

-
-def load_doab_records(fname, limit=None):
-    
-    success_count = 0
-    ebook_count = 0
-    
-    records = json.load(open(fname))
-
-    for (i, book) in enumerate(islice(records,limit)):
-        d = dict(book)
-        d['isbns'] = split_isbns(d['isbns_raw']) # use stricter isbn string parsing.
-        try:
-            ebook = load_doab_edition(**d)
-            success_count += 1 
-            if ebook:
-                ebook_count +=1
-        except Exception, e:
-            logger.error(e)
-            logger.error(book)
-            
-    logger.info("Number of records processed: " + str(success_count))
-    logger.info("Number of ebooks processed: " + str(ebook_count))
-
-"""
+#
 #tools to parse the author lists in doab.csv
-from pandas import DataFrame
-url = "http://www.doabooks.org/doab?func=csv"
-df_csv = DataFrame.from_csv(url)
-
-out=[]
-for val in df_csv.values:
-    isbn = split_isbns(val[0])
-    if isbn:
-        auths = []
-        if val[2] == val[2] and val[-2] == val[-2]: # test for NaN auths and licenses
-            auths = creator_list(val[2])
-            out.append(( isbn[0], auths))
-open("/Users/eric/doab_auths.json","w+").write(json.dumps(out,indent=2, separators=(',', ': ')))
-"""
+#

 au = re.compile(r'\(Authors?\)', flags=re.U)
 ed = re.compile(r'\([^\)]*(dir.|[Eeé]ds?.|org.|coord.|Editor|a cura di|archivist)[^\)]*\)', flags=re.U)
@ -353,64 +338,96 @@ def creator(auth, editor=False):
    auth = au.sub('', auth)
    return ['aut', fnf(auth)]

-def split_auths(auths):
-    if ';' in auths or '/' in auths:
-        return namesep2.split(auths)
-    else:
-        nl = namelist.match(auths.strip())
-        if nl:
-            if nl.group(3).endswith(' de') \
-                or ' de ' in nl.group(3) \
-                or nl.group(3).endswith(' da') \
-                or nl.group(1).endswith(' Jr.') \
-                or ' e ' in nl.group(1):
-                return [auths]
-            else:
-                return namesep.split(auths)
-        else :
-            return [auths]
-
-def split_isbns(isbns):
-    result = []
-    for isbn in isbnsep.split(isbns):
-        isbn = ISBN(isbn)
-        if isbn.valid:
-            result.append(isbn.to_string())
-    return result
-
 def creator_list(creators):
    auths = []
-    if re.search(edlist, creators):
-        for auth in split_auths(edlist.sub(u'', creators)):
-            if auth:
-                auths.append(creator(auth, editor=True))
-    else:
-        for auth in split_auths(unicode(creators)):
-            if auth:
+    for auth in creators:
        auths.append(creator(auth))
    return auths

-def load_doab_auths(fname, limit=None):
-    doab_auths = json.load(open(fname))
-    recnum = 0
-    failed = 0
-    for [isbnraw, authlist] in doab_auths:
-        isbn = ISBN(isbnraw).to_string()
-        try:
-            work = models.Identifier.objects.get(type='isbn',value=isbn).work
-        except models.Identifier.DoesNotExist:
-            print 'isbn = {} not found'.format(isbnraw)
-            failed += 1
-        if work.preferred_edition.authors.all().count() < len(authlist):
-            work.preferred_edition.authors.clear()
-            if authlist is None:
-                print "null authlist; isbn={}".format(isbn)
-                continue
-            for [rel,auth] in authlist:
-                work.preferred_edition.add_author(auth, rel)
-        recnum +=1
-        if limit and recnum > limit:
-            break          
-    logger.info("Number of records processed: " + str(recnum))
-    logger.info("Number of missing isbns: " + str(failed))
+DOAB_OAIURL = 'https://www.doabooks.org/oai'
+DOAB_PATT = re.compile(r'[\./]doabooks\.org/doab\?.*rid:(\d{1,8}).*')
+mdregistry = MetadataRegistry()
+mdregistry.registerReader('oai_dc', oai_dc_reader)
+doab_client = Client(DOAB_OAIURL, mdregistry)
+isbn_cleaner = identifier_cleaner('isbn', quiet=True)
+ISBNSEP = re.compile(r'[/]+')

+def add_by_doab(doab_id, record=None):
+    try:
+        record = record if record else doab_client.getRecord(
+            metadataPrefix='oai_dc',
+            identifier='oai:doab-books:{}'.format(doab_id)
+        )
+        metadata = record[1].getMap()
+        isbns = []
+        url = None
+        for ident in metadata.pop('identifier', []):
+            if ident.startswith('ISBN: '):
+                isbn_strings = ISBNSEP.split(ident[6:].strip())
+                for isbn_string in isbn_strings:
+                    isbn = isbn_cleaner(isbn_string)
+                    if isbn:
+                        isbns.append(isbn)
+            elif ident.find('doabooks.org') >= 0:
+                # should already know the doab_id
+                continue
+            else:
+                url = ident
+        language = doab_lang_to_iso_639_1(unlist(metadata.pop('language', None)))
+        urls = online_to_download(url)
+        edition = None
+        title = unlist(metadata.pop('title', None))
+        license = cc.license_from_cc_url(unlist(metadata.pop('rights', None)))
+        for dl_url in urls:
+            format = type_for_url(dl_url)
+            if 'format' in metadata:
+                del metadata['format']
+            edition = load_doab_edition(
+                title,
+                doab_id,
+                dl_url,
+                format,
+                license,
+                language,
+                isbns,
+                url_to_provider(dl_url) if dl_url else None,
+                **metadata
+            )
+        return edition
+    except IdDoesNotExistError:
+        return None
+
+
+def getdoab(url):
+    id_match = DOAB_PATT.search(url)
+    if id_match:
+        return id_match.group(1)
+    return False
+
+def load_doab_oai(from_year=None, limit=100000):
+    '''
+    use oai feed to get oai updates
+    '''
+    if from_year:
+        from_ = datetime.datetime(year=from_year, month=1, day=1)
+    else: 
+        # last 45 days
+        from_ = datetime.datetime.now() - datetime.timedelta(days=45)
+    doab_ids = []
+    for record in doab_client.listRecords(metadataPrefix='oai_dc', from_=from_):
+        if not record[1]:
+            continue
+        item_type = unlist(record[1].getMap().get('type', None))
+        if item_type != 'book':
+            continue
+        idents = record[1].getMap()['identifier']
+        if idents:
+            for ident in idents:
+                doab = getdoab(ident)
+                if doab:
+                    doab_ids.append(doab)
+                    e = add_by_doab(doab, record=record)
+                    title = e.title if e else None
+                    logger.info(u'updated:\t{}\t{}'.format(doab, title))
+        if len(doab_ids) > limit:
+            break
--- a/core/loaders/doab_utils.py
+++ b/core/loaders/doab_utils.py
@ -0,0 +1,128 @@
+"""
+doab_utils.py
+
+"""
+
+import re
+import urlparse
+
+import requests
+
+from regluit.utils.lang import get_language_code
+from .utils import get_soup
+
+# utility functions for converting lists of individual items into individual items
+
+# let's do a mapping of the DOAB languages into the language codes used 
+# mostly, we just handle mispellings
+# also null -> xx
+
+EXTRA_LANG_MAP = dict([
+    (u'chinese', 'de'),
+    (u'deutsch', 'de'),
+    (u'eng', 'en'),
+    (u'englilsh', 'en'),
+    (u'englilsh', 'en'),
+    (u'englisch', 'en'),
+    (u'espanol', 'es'),
+    (u'ger', 'de'),
+    (u'fra', 'fr'),
+    (u'fre', 'fr'),
+    (u'francese', 'fr'),
+    (u'ita', 'it'),
+    (u'italiano', 'it'),
+    (u'norwegian', 'no'),
+    (u'por', 'pt'),
+    (u'portugese', 'pt'),
+    (u'slovene', 'sl'),
+    (u'spa', 'es'),
+    (u'spagnolo', 'es'),
+])
+
+sep = re.compile(r'[ \-;^,/]+')
+def doab_lang_to_iso_639_1(lang):
+    if lang is None or not lang:
+        return "xx"
+    else:
+        lang = sep.split(lang)[0]
+        code = get_language_code(lang)
+        if code:
+            return code
+        else:
+            return EXTRA_LANG_MAP.get(lang.lower(), 'xx')
+
+
+DOMAIN_TO_PROVIDER = dict([
+    [u'antropologie.zcu.cz', u'AntropoWeb'],
+    [u'books.mdpi.com', u'MDPI Books'],
+    [u'books.openedition.org', u'OpenEdition Books'],
+    [u'books.scielo.org', u'SciELO'],
+    [u'ccdigitalpress.org', u'Computers and Composition Digital Press'],
+    [u'digitalcommons.usu.edu', u'DigitalCommons, Utah State University'],
+    [u'dl.dropboxusercontent.com', u'Dropbox'],
+    [u'dspace.ucalgary.ca', u'Institutional Repository at the University of Calgary'],
+    [u'dx.doi.org', u'DOI Resolver'],
+    [u'ebooks.iospress.nl', u'IOS Press Ebooks'],
+    [u'hdl.handle.net', u'Handle Proxy'],
+    [u'hw.oeaw.ac.at', u'Austrian Academy of Sciences'],
+    [u'img.mdpi.org', u'MDPI Books'],
+    [u'ledibooks.com', u'LediBooks'],
+    [u'leo.cilea.it', u'LEO '],
+    [u'leo.cineca.it', u'Letteratura Elettronica Online'],
+    [u'link.springer.com', u'Springer'],
+    [u'oapen.org', u'OAPEN Library'],
+    [u'press.openedition.org', u'OpenEdition Press'],
+    [u'windsor.scholarsportal.info', u'Scholars Portal'],
+    [u'www.adelaide.edu.au', u'University of Adelaide'],
+    [u'www.aliprandi.org', u'Simone Aliprandi'],
+    [u'www.antilia.to.it', u'antilia.to.it'],
+    [u'www.aupress.ca', u'Athabasca University Press'],
+    [u'www.bloomsburyacademic.com', u'Bloomsbury Academic'],
+    [u'www.co-action.net', u'Co-Action Publishing'],
+    [u'www.degruyter.com', u'De Gruyter Online'],
+    [u'www.doabooks.org', u'Directory of Open Access Books'],
+    [u'www.dropbox.com', u'Dropbox'],
+    [u'www.ebooks.iospress.nl', u'IOS Press Ebooks'],
+    [u'www.ledizioni.it', u'Ledizioni'],
+    [u'www.maestrantonella.it', u'maestrantonella.it'],
+    [u'www.oapen.org', u'OAPEN Library'],
+    [u'www.openbookpublishers.com', u'Open Book Publishers'],
+    [u'www.palgraveconnect.com', u'Palgrave Connect'],
+    [u'www.scribd.com', u'Scribd'],
+    [u'www.springerlink.com', u'Springer'],
+    [u'www.ubiquitypress.com', u'Ubiquity Press'],
+    [u'www.unimib.it', u'University of Milano-Bicocca'],
+    [u'www.unito.it', u"University of Turin"],
+])
+
+def url_to_provider(url):
+    netloc = urlparse.urlparse(url).netloc
+    return DOMAIN_TO_PROVIDER.get(netloc, netloc)
+
+FRONTIERSIN = re.compile(r'frontiersin.org/books/[^/]+/(\d+)')
+
+def online_to_download(url):
+    urls = []
+    if url.find(u'mdpi.com/books/pdfview/book/') >= 0:
+        doc = get_soup(url)
+        if doc:
+            obj = doc.find('object', type='application/pdf')
+            if obj:
+                urls.append(obj['data'].split('#')[0])
+    elif url.find(u'books.scielo.org/') >= 0:
+        doc = get_soup(url)
+        if doc:
+            obj = doc.find('a', class_='pdf_file')
+            if obj:
+                urls.append(urlparse.urljoin(url, obj['href']))
+            obj = doc.find('a', class_='epub_file')
+            if obj:
+                urls.append(urlparse.urljoin(url, obj['href']))
+    elif FRONTIERSIN.search(url):
+        booknum = FRONTIERSIN.search(url).group(1)
+        urls.append(u'https://www.frontiersin.org/GetFile.aspx?ebook={}&fileformat=EPUB'.format(booknum))
+        urls.append(u'https://www.frontiersin.org/GetFile.aspx?ebook={}&fileformat=PDF'.format(booknum))
+    else:
+        urls.append(url)
+    return urls
+
--- a/core/loaders/hathitrust.py
+++ b/core/loaders/hathitrust.py
@ -26,10 +26,11 @@ class HathitrustScraper(BaseScraper):
            for record in records:
                self.record = record
                return
-            self.record = {}
-
+            self.record = None # probably a hdl not pointing at Hathitrust
+        self.record = None

    def get_downloads(self):
+        if self.record:
            dl_a = self.doc.select_one('#fullPdfLink')
            value = dl_a['href'] if dl_a else None
            if value:
@ -37,27 +38,42 @@ class HathitrustScraper(BaseScraper):
                    'download_url_{}'.format('pdf'),
                    'https://babel.hathitrust.org{}'.format(value)
                )
+        return super(HathitrustScraper, self).get_downloads()

    def get_isbns(self):
+        if self.record:
            isbn = self.record.get('issn', [])
            value = identifier_cleaner('isbn', quiet=True)(isbn)
            return {'print': value} if value else {}
+        return super(HathitrustScraper, self).get_isbns()

    def get_title(self):
+        if self.record:
            self.set('title', self.record.get('title', ''))
+        return super(HathitrustScraper, self).get_title()

    def get_keywords(self):
+        if self.record:
            self.set('subjects', self.record.get('keywords', []))
+        return super(HathitrustScraper, self).get_keywords()

    def get_publisher(self):
+        if self.record:
            self.set('publisher', self.record.get('publisher', ''))
+        return super(HathitrustScraper, self).get_publisher()

    def get_pubdate(self):
+        if self.record:
            self.set('publication_date', self.record.get('year', ''))
+        return super(HathitrustScraper, self).get_pubdate()

    def get_description(self):
+        if self.record:
            notes = self.record.get('notes', [])
            self.set('description', '\r'.join(notes))
+        return super(HathitrustScraper, self).get_description()

    def get_genre(self):
+        if self.record:
            self.set('genre', self.record.get('type_of_reference', '').lower())
+        return super(HathitrustScraper, self).get_genre()
--- a/core/loaders/springer.py
+++ b/core/loaders/springer.py
@ -110,10 +110,12 @@ class SpringerScraper(BaseScraper):
        self.set('publisher', 'Springer')

 search_url = 'https://link.springer.com/search/page/{}?facet-content-type=%22Book%22&package=openaccess'
-def load_springer(num_pages):
-    def springer_open_books(num_pages):
-        for page in range(1, num_pages+1):
+def load_springer(startpage=1, endpage=None):
+    def springer_open_books(startpage, endpage):
+        endpage = endpage if endpage else startpage + 10
+        for page in range(startpage, endpage + 1):
            url = search_url.format(page)
+            try:
                response = requests.get(url, headers={"User-Agent": settings.USER_AGENT})
                if response.status_code == 200:
                    base = response.url
@ -121,4 +123,6 @@ def load_springer(num_pages):
                    for link in doc.select('a.title'):
                        book_url = urljoin(base, link['href'])
                        yield SpringerScraper(book_url)
-    return add_from_bookdatas(springer_open_books(num_pages))
+            except requests.exceptions.ConnectionError:
+                print 'couldn\'t connect to %s' % url
+    return add_from_bookdatas(springer_open_books(startpage, endpage))
--- a/core/loaders/tests.py
+++ b/core/loaders/tests.py
@ -0,0 +1,28 @@
+from django.conf import settings
+from django.test import TestCase
+from regluit.core.models import Ebook, Edition, Work
+from .utils import dl_online
+
+class LoaderTests(TestCase):
+    def setUp(self):
+        pass
+
+    def test_downloads(self):
+        if not (settings.TEST_INTEGRATION):
+            return
+
+        work = Work(title="online work")
+        work.save()
+
+        edition = Edition(work=work)
+        edition.save()
+
+        dropbox_url = 'https://www.dropbox.com/s/h5jzpb4vknk8n7w/Jakobsson_The_Troll_Inside_You_EBook.pdf?dl=0'
+        dropbox_ebook = Ebook.objects.create(format='online', url=dropbox_url, edition=edition)
+        dropbox_ebf = dl_online(dropbox_ebook)
+        self.assertTrue(dropbox_ebf.ebook.filesize)
+
+        jbe_url = 'http://www.jbe-platform.com/content/books/9789027295958'
+        jbe_ebook = Ebook.objects.create(format='online', url=jbe_url, edition=edition)
+        jbe_ebf = dl_online(jbe_ebook)
+        self.assertTrue(jbe_ebf.ebook.filesize)
--- a/core/loaders/utils.py
+++ b/core/loaders/utils.py
@ -1,15 +1,23 @@
 import csv
-import re
-import requests
 import logging
-import sys
+import re
+import time
 import unicodedata
+import urlparse
+
+from bs4 import BeautifulSoup
+import requests
+
+from django.conf import settings
+from django.core.files.base import ContentFile

-from regluit.core.models import Work, Edition, Author, PublisherName, Identifier, Subject
-from regluit.core.isbn import ISBN
-from regluit.core.bookloader import add_by_isbn_from_google, merge_works
 from regluit.api.crosswalks import inv_relator_contrib
 from regluit.bisac.models import BisacHeading
+from regluit.core.bookloader import add_by_isbn_from_google, merge_works
+from regluit.core.isbn import ISBN
+from regluit.core.models import (
+    Ebook, EbookFile, Edition, Identifier, path_for_file, Subject, Work,
+)

 logger = logging.getLogger(__name__)

@ -34,6 +42,12 @@ def utf8_general_ci_norm(s):
    s1 = unicodedata.normalize('NFD', s)
    return ''.join(c for c in s1 if not unicodedata.combining(c)).upper()

+def get_soup(url):
+    response = requests.get(url, headers={"User-Agent": settings.USER_AGENT})
+    if response.status_code == 200:
+        return BeautifulSoup(response.content, 'lxml')
+    return None
+
 def get_authors(book):
    authors = []
    if book.get('AuthorsList', ''):
@ -96,7 +110,6 @@ def get_title(book):
    sub = book.get('Subtitle', '')
    if sub:
        return u'{}: {}'.format(title, sub)
-    else:
    return title

 def get_cover(book):
@ -125,7 +138,9 @@ def get_isbns(book):
    isbns = []
    edition = None
    #'ISBN 1' is OBP, others are UMICH
-    for code in ['eISBN', 'ISBN 3','PaperISBN', 'ISBN 2', 'ClothISBN', 'ISBN 1', 'ISBN 4', 'ISBN 5']:
+    for code in ['eISBN', 'ISBN 3', 'PaperISBN', 'ISBN 2', 'ClothISBN',
+                 'ISBN 1', 'ISBN 4', 'ISBN 5'
+                ]:
        if book.get(code, '') not in ('', 'N/A'):
            values = book[code].split(',')
            for value in values:
@ -148,7 +163,6 @@ def get_pubdate(book):
        return u'{}-{}-{}'.format(value, sub, sub2)
    elif sub:
        return u'{}-{}'.format(value, sub, sub2)
-    else:
    return value

 def get_publisher(book):
@ -160,7 +174,10 @@ def get_publisher(book):

 def get_url(book):
    url = book.get('URL', '')
-    url = url if url else u'https://doi.org/{}/{}'.format( book.get('DOI prefix',''),book.get('DOI suffix',''))
+    url = url if url else u'https://doi.org/{}/{}'.format(
+        book.get('DOI prefix', ''),
+        book.get('DOI suffix', '')
+    )
    return url

 def get_description(book):
@ -194,7 +211,7 @@ def load_from_books(books):

        # try first to get an Edition already in DB with by one of the ISBNs in book
        (isbns, edition) = get_isbns(book)
-        if len(isbns)==0:
+        if not isbns:
            continue
        title = get_title(book)
        authors = get_authors(book)
@ -260,7 +277,7 @@ def load_from_books(books):
        try:
            logger.info(u"{} {} {}\n".format(i, title, loading_ok))
        except Exception as e:
-            logger.info (u"{} {}\n".format(i, title, str(e) ))
+            logger.info(u"{} {} {}\n".format(i, title, str(e)))

    return results

@ -292,15 +309,17 @@ def loaded_book_ok(book, work, edition):
            try:
                edition_for_isbn = Identifier.objects.get(type='isbn', value=isbn).edition
            except Exception as e:
-                print (e)
+                logger.info(e)
                return False

            # authors
            # print set([ed.name for ed in edition_for_isbn.authors.all()])

-            if (set([utf8_general_ci_norm(author[0]) for author in authors]) != 
-                   set([utf8_general_ci_norm(ed.name) for ed in edition_for_isbn.authors.all()])):
-                print "problem with authors"
+            if (
+                    set([utf8_general_ci_norm(author[0]) for author in authors]) !=
+                    set([utf8_general_ci_norm(ed.name) for ed in edition_for_isbn.authors.all()])
+            ):
+                logger.info("problem with authors")
                return False

            try:
@ -331,14 +350,15 @@ def loaded_book_ok(book, work, edition):
    return True

 ID_URLPATTERNS = {
-    'goog': re.compile(r'[\./]google\.com/books\?.*id=([a-zA-Z0-9\-_]{12})'),
-    'olwk': re.compile(r'[\./]openlibrary\.org(/works/OL\d{1,8}W)'),
-    'gdrd': re.compile(r'[\./]goodreads\.com/book/show/(\d{1,8})'),
-    'ltwk': re.compile(r'[\./]librarything\.com/work/(\d{1,8})'),
-    'oclc': re.compile(r'\.worldcat\.org/.*oclc/(\d{8,12})'),
-    'doi': re.compile(r'[\./]doi\.org/(10\.\d+/\S+)'),
-    'gtbg': re.compile(r'[\./]gutenberg\.org/ebooks/(\d{1,6})'),
-    'glue': re.compile(r'[\./]unglue\.it/work/(\d{1,7})'),
+    'goog': re.compile(r'[\./]google\.com/books\?.*id=(?P<id>[a-zA-Z0-9\-_]{12})'),
+    'olwk': re.compile(r'[\./]openlibrary\.org(?P<id>/works/OL\d{1,8}W)'),
+    'doab': re.compile(r'([\./]doabooks\.org/doab\?.*rid:|=oai:doab-books:)(?P<id>\d{1,8})'),
+    'gdrd': re.compile(r'[\./]goodreads\.com/book/show/(?P<id>\d{1,8})'),
+    'ltwk': re.compile(r'[\./]librarything\.com/work/(?P<id>\d{1,8})'),
+    'oclc': re.compile(r'\.worldcat\.org/.*oclc/(?P<id>\d{8,12})'),
+    'doi': re.compile(r'[\./]doi\.org/(?P<id>10\.\d+/\S+)'),
+    'gtbg': re.compile(r'[\./]gutenberg\.org/ebooks/(?P<id>\d{1,6})'),
+    'glue': re.compile(r'[\./]unglue\.it/work/(?P<id>\d{1,7})'),
 }

 def ids_from_urls(url):
@ -346,7 +366,128 @@ def ids_from_urls(url):
    for ident in ID_URLPATTERNS.keys():
        id_match = ID_URLPATTERNS[ident].search(url)
        if id_match:
-            ids[ident] = id_match.group(1)
+            ids[ident] = id_match.group('id')
    return ids

+DROPBOX_DL = re.compile(r'"(https://dl.dropboxusercontent.com/content_link/[^"]+)"')

+def dl_online(ebook):
+    if ebook.format != 'online':
+        pass
+    elif ebook.url.find(u'dropbox.com/s/') >= 0:
+        response = requests.get(ebook.url, headers={"User-Agent": settings.USER_AGENT})
+        if response.status_code == 200:
+            match_dl = DROPBOX_DL.search(response.content)
+            if match_dl:
+                return make_dl_ebook(match_dl.group(1), ebook)
+            else:
+                logger.warning('couldn\'t get {}'.format(ebook.url))
+        else:
+            logger.warning('couldn\'t get dl for {}'.format(ebook.url))
+
+    elif ebook.url.find(u'jbe-platform.com/content/books/') >= 0:
+        doc = get_soup(ebook.url)
+        if doc:
+            obj = doc.select_one('div.fulltexticoncontainer-PDF a')
+            if obj:
+                dl_url = urlparse.urljoin(ebook.url, obj['href'])
+                return make_dl_ebook(dl_url, ebook)
+            else:
+                logger.warning('couldn\'t get dl_url for {}'.format(ebook.url))
+        else:
+            logger.warning('couldn\'t get soup for {}'.format(ebook.url))
+
+    return None, False
+
+def make_dl_ebook(url, ebook):
+    if EbookFile.objects.filter(source=ebook.url):
+        return EbookFile.objects.filter(source=ebook.url)[0], False
+    response = requests.get(url, headers={"User-Agent": settings.USER_AGENT})
+    if response.status_code == 200:
+        filesize = int(response.headers.get("Content-Length", 0))
+        filesize = filesize if filesize else None
+        format = type_for_url(url, content_type=response.headers.get('content-type'))
+        if format != 'online':
+            new_ebf = EbookFile.objects.create(
+                edition=ebook.edition,
+                format=format,
+                source=ebook.url,
+            )
+            new_ebf.file.save(path_for_file(new_ebf, None), ContentFile(response.content))
+            new_ebf.save()
+            new_ebook = Ebook.objects.create(
+                edition=ebook.edition,
+                format=format,
+                provider='Unglue.it',
+                url=new_ebf.file.url,
+                rights=ebook.rights,
+                filesize=filesize,
+                version_label=ebook.version_label,
+                version_iter=ebook.version_iter,
+            )
+            new_ebf.ebook = new_ebook
+            new_ebf.save()
+            return new_ebf, True
+        else:
+            logger.warning('download format for {} is not ebook'.format(url))
+    else:
+        logger.warning('couldn\'t get {}'.format(url))
+    return None, False
+
+def type_for_url(url, content_type=None):
+    if not url:
+        return ''
+    if url.find('books.openedition.org') >= 0:
+        return 'online'
+    if Ebook.objects.filter(url=url):
+        return Ebook.objects.filter(url=url)[0].format
+    ct = content_type if content_type else contenttyper.calc_type(url)
+    if re.search("pdf", ct):
+        return "pdf"
+    elif re.search("octet-stream", ct) and re.search("pdf", url, flags=re.I):
+        return "pdf"
+    elif re.search("octet-stream", ct) and re.search("epub", url, flags=re.I):
+        return "epub"
+    elif re.search("text/plain", ct):
+        return "text"
+    elif re.search("text/html", ct):
+        if url.find('oapen.org/view') >= 0:
+            return "html"
+        return "online"
+    elif re.search("epub", ct):
+        return "epub"
+    elif re.search("mobi", ct):
+        return "mobi"
+    return "other"
+
+class ContentTyper(object):
+    """ """
+    def __init__(self):
+        self.last_call = dict()
+
+    def content_type(self, url):
+        try:
+            r = requests.head(url)
+            return r.headers.get('content-type', '')
+        except:
+            return ''
+
+    def calc_type(self, url):
+        delay = 1
+        # is there a delay associated with the url
+        netloc = urlparse.urlparse(url).netloc
+
+        # wait if necessary
+        last_call = self.last_call.get(netloc)
+        if last_call is not None:
+            now = time.time()
+            min_time_next_call = last_call + delay
+            if min_time_next_call > now:
+                time.sleep(min_time_next_call-now)
+
+        self.last_call[netloc] = time.time()
+
+        # compute the content-type
+        return self.content_type(url)
+
+contenttyper = ContentTyper()
--- a/core/management/commands/add_missing_doab_covers.py
+++ b/core/management/commands/add_missing_doab_covers.py
@ -5,18 +5,18 @@ from regluit.core.models import Work
 from regluit.core.loaders.doab import update_cover_doab

 class Command(BaseCommand):
-    help = "make covers for doab editions"
+    help = "make covers for doab editions with bad covers"
    
    def handle(self, **options):
-
-        works = Work.objects.filter(selected_edition__isnull=False, selected_edition__cover_image__isnull=True)
-        #.filter(selected_edition__isnull=False, selected_edition__cover_image__isnull=True)
-        #.exclude(selected_edition__identifiers__type='goog')
-        added = 0
-        for (i, work) in enumerate(works):
-            if work.doab and work.selected_edition.googlebooks_id == '':
-                update_cover_doab(work.doab, work.selected_edition)
-                added += 1
-            print ('\r {}:{}'.format(i, added), end='')
- 
-        print('added {} covers'.format(added))
+        works = Work.objects.filter(identifiers__type='doab').distinct()
+        print('checking {} works with doab'.format(works.count()))
+        num = 0
+        for work in works:
+            if not work.cover_image_thumbnail():
+                update_cover_doab(work.doab, work.preferred_edition, store_cover=True)
+                #print(work.doab)
+                num += 1
+                if num % 10 == 0:
+                    print('{} doab covers updated'.format(num))
+                    #break
+        print('Done: {} doab covers updated'.format(num))
--- a/core/management/commands/clean_subjects.py
+++ b/core/management/commands/clean_subjects.py
@ -1,6 +1,7 @@
 from django.core.management.base import BaseCommand

 from regluit.core.models import Subject
+from regluit.core.validation import valid_subject



@ -27,3 +28,8 @@ class Command(BaseCommand):
            for work in subject.works.all():
                Subject.set_by_name(subject.name, work=work)
            subject.delete()
+
+       period_subjects = Subject.objects.filter(name__contains=".")
+       for subject in period_subjects:
+            if not valid_subject(subject.name):
+                subject.delete()
--- a/core/management/commands/doab_load_auths.py
+++ b/core/management/commands/doab_load_auths.py
@ -1,17 +0,0 @@
-import os
-
-from django.conf import settings
-from django.contrib.auth.models import User
-from django.core.management.base import BaseCommand
-
-from regluit.core.loaders import doab
-
-class Command(BaseCommand):
-    help = "load doab auths"
-    args = "<limit> <file_name>"
-    
-    def handle(self, limit=None, file_name="../../../bookdata/doab_auths.json", **options):
-
-        command_dir =  os.path.dirname(os.path.realpath(__file__))
-        file_path = os.path.join(command_dir, file_name)
-        doab.load_doab_auths(file_path, limit=int(limit) if limit else None)
--- a/core/management/commands/doab_load_books.py
+++ b/core/management/commands/doab_load_books.py
@ -1,17 +0,0 @@
-import os
-
-from django.conf import settings
-from django.contrib.auth.models import User
-from django.core.management.base import BaseCommand
-
-from regluit.core.loaders import doab
-
-class Command(BaseCommand):
-    help = "load doab books"
-    args = "<limit> <file_name>"
-    
-    def handle(self, limit=None, file_name="../../../bookdata/doab.json", **options):
-
-        command_dir =  os.path.dirname(os.path.realpath(__file__))
-        file_path = os.path.join(command_dir, file_name)
-        doab.load_doab_records(file_path, limit=int(limit))
--- a/core/management/commands/harvest_online_ebooks.py
+++ b/core/management/commands/harvest_online_ebooks.py
@ -0,0 +1,21 @@
+from django.core.management.base import BaseCommand
+
+from regluit.core.loaders.utils import dl_online
+from regluit.core.models import Ebook
+
+class Command(BaseCommand):
+    help = "harvest downloadable ebooks from 'online' ebooks"
+    args = "<limit>"
+    
+    def handle(self, limit=0, **options):
+        limit = int(limit) if limit else 0
+        onlines = Ebook.objects.filter(format='online')
+        done = 0
+        for online in onlines:
+            new_ebf, new = dl_online(online)
+            if new_ebf and new:
+                done += 1
+                if done > limit:
+                    break
+        print 'harvested {} ebooks'.format(done)
+        
--- a/core/management/commands/load_books_from_sitemap.py
+++ b/core/management/commands/load_books_from_sitemap.py
@ -30,9 +30,9 @@ class Command(BaseCommand):
            books = []
            for sitemap in content:
                added = add_by_sitemap(sitemap.strip(), maxnum=max)
-                max = max - len(added)
+                max = max - len(added) if max else max
                books =  books + added
-                if max < 0:
+                if max and max < 0:
                    break
        else:
            books = add_by_sitemap(url, maxnum=max)  
--- a/core/management/commands/load_books_springer.py
+++ b/core/management/commands/load_books_springer.py
@ -4,9 +4,9 @@ from regluit.core.loaders.springer import load_springer

 class Command(BaseCommand):
    help = "load books from springer open"
-    args = "<pages>"
+    args = "<startpage> <endpage>"


-    def handle(self, pages, **options):
-        books = load_springer(int(pages))        
+    def handle(self, startpage, endpage=0, **options):        
+        books = load_springer(int(startpage), int(endpage))       
        print "loaded {} books".format(len(books))
--- a/core/management/commands/load_by_doab.py
+++ b/core/management/commands/load_by_doab.py
@ -0,0 +1,10 @@
+from django.core.management.base import BaseCommand
+
+from regluit.core.loaders import doab
+
+class Command(BaseCommand):
+    help = "load doab books by doab_id via oai"
+    args = "<doab_id>"
+    
+    def handle(self, doab_id, **options):
+        doab.add_by_doab(doab_id)
--- a/core/management/commands/load_doab.py
+++ b/core/management/commands/load_doab.py
@ -0,0 +1,18 @@
+from django.core.management.base import BaseCommand
+
+from regluit.core.loaders import doab
+
+class Command(BaseCommand):
+    help = "load doab books via oai"
+    args = "<from_year> <limit>"
+    
+    def handle(self, from_year= None, limit=None, **options):
+        from_year = int(from_year) if from_year else None
+        limit = int(limit) if limit else None
+        if limit:
+            doab.load_doab_oai(from_year=from_year, limit=limit)
+        else:
+            if from_year:
+                doab.load_doab_oai(from_year=from_year)
+            else:
+                doab.load_doab_oai()
--- a/core/management/commands/random_campaigns.py
+++ b/core/management/commands/random_campaigns.py
@ -4,9 +4,9 @@ from random import randint, randrange

 from django.conf import settings
 from django.core.management.base import BaseCommand
+from django.utils.timezone import now

 from regluit.core.models import Work, Campaign
-from regluit.utils.localdatetime import now

 class Command(BaseCommand):
    help = "creates random campaigns for any works that lack one for testing"
--- a/core/models/init.py
+++ b/core/models/init.py
@ -26,6 +26,7 @@ from django.core.files.base import ContentFile
 from django.db import models
 from django.db.models import F, Q
 from django.db.models.signals import post_save
+from django.utils.timezone import now
 from django.utils.translation import ugettext_lazy as _

 #regluit imports
@ -45,8 +46,9 @@ from regluit.payment.parameters import (
    TRANSACTION_STATUS_FAILED,
    TRANSACTION_STATUS_INCOMPLETE
 )
+
 from regluit.utils import encryption as crypto
-from regluit.utils.localdatetime import now, date_today
+from regluit.utils.localdatetime import date_today

 from regluit.core.parameters import (
    REWARDS,
--- a/core/models/bibmodels.py
+++ b/core/models/bibmodels.py
@ -20,10 +20,10 @@ from django.core.urlresolvers import reverse
 from django.db import models
 from django.db.models import F
 from django.db.models.signals import post_save, pre_delete
+from django.utils.timezone import now

 import regluit
 from regluit.marc.models import MARCRecord as NewMARC
-from regluit.utils.localdatetime import now
 from questionnaire.models import Landing

 from regluit.core import mobi
@ -1082,8 +1082,7 @@ class EbookFile(models.Model):
            asking=self.asking,
            source=self.file.url
        )
-
-        new_mobi_ebf.file.save(path_for_file('ebf', None), mobi_cf)
+        new_mobi_ebf.file.save(path_for_file(new_mobi_ebf, None), mobi_cf)
        new_mobi_ebf.save()
        if self.ebook:
            new_ebook = Ebook.objects.create(
--- a/core/parameters.py
+++ b/core/parameters.py
@ -42,7 +42,7 @@ OTHER_ID_CHOICES = (
    ('edid', 'pragmatic edition ID'),
 )

-WORK_IDENTIFIERS = ('doi','olwk','glue','ltwk', 'http')
+WORK_IDENTIFIERS = ('doi','olwk','glue','ltwk', 'http', 'doab')

 ID_CHOICES_MAP = dict(ID_CHOICES)

--- a/core/signals.py
+++ b/core/signals.py
@ -22,6 +22,7 @@ from django.db.utils import DatabaseError
 from django.dispatch import Signal
 from django.utils.translation import ugettext_noop as _
 from django.template.loader import render_to_string
+from django.utils.timezone import now

 from notification import models as notification

@ -29,9 +30,9 @@ from notification import models as notification
 regluit imports
 """
 from regluit.payment.signals import transaction_charged, transaction_failed, pledge_modified, pledge_created
-from regluit.utils.localdatetime import now, date_today
 from regluit.core.parameters import REWARDS, BUY2UNGLUE, THANKS, LIBRARY, RESERVE, THANKED
 from regluit.libraryauth.models import Library, LibraryUser
+from regluit.utils.localdatetime import date_today

 logger = logging.getLogger(__name__)

@ -100,7 +101,7 @@ def create_notice_types( **kwargs):
    notification.create_notice_type("purchase_notgot_gift", _("Your gift wasn't received."), _("The ebook you sent as a gift has not yet been redeemed."))
    notification.create_notice_type("donation", _("Your donation was processed."), _("Thank you, your generous donation has been processed."))
    
-signals.post_syncdb.connect(create_notice_types, sender=notification)
+signals.post_migrate.connect(create_notice_types, sender=notification)

 # define the notifications and tie them to corresponding signals

--- a/core/tasks.py
+++ b/core/tasks.py
@ -13,6 +13,7 @@ django imports
 from django.conf import settings
 from django.contrib.auth.models import User
 from django.core.mail import send_mail
+from django.utils.timezone import now
 from notification.engine import send_all
 from notification import models as notification

@ -29,8 +30,7 @@ from regluit.core import (
 from regluit.core.models import Campaign, Acq, Gift
 from regluit.core.signals import deadline_impending
 from regluit.core.parameters import RESERVE, REWARDS, THANKS
-
-from regluit.utils.localdatetime import now, date_today
+from regluit.utils.localdatetime import date_today

 logger = logging.getLogger(__name__)

--- a/core/tests.py
+++ b/core/tests.py
@ -1,25 +1,22 @@
 # encoding: utf-8
-"""
-external library imports
-"""
+#external library imports
+
+import os
 from datetime import datetime, timedelta
 from decimal import Decimal as D
 from math import factorial
-from time import sleep, mktime
+import unittest
 from urlparse import parse_qs, urlparse
 from tempfile import NamedTemporaryFile
-from celery.task import chord
+from time import sleep, mktime
+
 from celery.task.sets import TaskSet
 import requests
 import requests_mock
-import os

-"""
-django imports
-"""
+#django imports
 from django.conf import settings
 from django.contrib.auth.models import User
-from django_comments.models import Comment
 from django.contrib.contenttypes.models import ContentType
 from django.contrib.sites.models import Site
 from django.core.files import File as DjangoFile
@ -29,11 +26,12 @@ from django.http import Http404
 from django.test import TestCase
 from django.test.client import Client
 from django.test.utils import override_settings
-from django.utils import unittest
+from django.utils.timezone import now
+
+from django_comments.models import Comment
+
+#regluit imports

-"""
-regluit imports
-"""
 from regluit.core import (
    isbn,
    bookloader,
@ -56,7 +54,6 @@ from regluit.core.models import (
    Premium,
    Subject,
    Publisher,
-    PublisherName,
    Offer,
    EbookFile,
    Acq,
@ -69,10 +66,10 @@ from regluit.core.validation import valid_subject
 from regluit.frontend.views import safe_get_work
 from regluit.payment.models import Transaction
 from regluit.payment.parameters import PAYMENT_TYPE_AUTHORIZATION
-from regluit.utils.localdatetime import now, date_today
 from regluit.pyepub import EPUB
+from regluit.utils.localdatetime import date_today
 from .epub import test_epub
-from .pdf import ask_pdf, test_pdf
+from .pdf import test_pdf

 TESTDIR = os.path.join(os.path.dirname(__file__), '../test/')
 YAML_VERSIONFILE = os.path.join(TESTDIR, 'versiontest.yaml')
@ -97,8 +94,12 @@ class BookLoaderTests(TestCase):


    def test_add_by_yaml(self):
-        space_id = bookloader.load_from_yaml('https://github.com/gitenberg-dev/metadata/raw/master/samples/pandata.yaml')
-        huck_id = bookloader.load_from_yaml('https://github.com/GITenberg/Adventures-of-Huckleberry-Finn_76/raw/master/metadata.yaml')
+        space_id = bookloader.load_from_yaml(
+            'https://github.com/gitenberg-dev/metadata/raw/master/samples/pandata.yaml'
+        )
+        huck_id = bookloader.load_from_yaml(
+            'https://github.com/GITenberg/Adventures-of-Huckleberry-Finn_76/raw/master/metadata.yaml'
+        )
        space = models.Work.objects.get(id=space_id)
        huck = models.Work.objects.get(id=huck_id)

@ -162,7 +163,7 @@ class BookLoaderTests(TestCase):
        if not (mocking or settings.TEST_INTEGRATION):
            return
        edition = bookloader.add_by_isbn('9787104030126')
-        self.assertEqual(edition.work.language, 'zh-CN')
+        self.assertEqual(edition.work.language, u'zh-CN')

    def test_update_edition_mock(self):
        with requests_mock.Mocker(real_http=True) as m:
@ -192,7 +193,7 @@ class BookLoaderTests(TestCase):
        self.assertEqual(models.Work.objects.all().count(), before)

    def test_missing_isbn(self):
-        e = bookloader.add_by_isbn_from_google('0139391401')
+        e = bookloader.add_by_isbn_from_google('9781938616990') #unassigned in gluejar block
        self.assertEqual(e, None)

    def test_thingisbn_mock(self):
@ -211,7 +212,8 @@ class BookLoaderTests(TestCase):

    def test_add_related(self):
        # add one edition
-        edition = bookloader.add_by_isbn('0441007465') #Neuromancer; editions in fixture but not joined
+        #Neuromancer; editions in fixture not joined
+        edition = bookloader.add_by_isbn('0441007465')
        edbefore = models.Edition.objects.count()
        before = models.Work.objects.count()
        lang = edition.work.language
@ -252,7 +254,8 @@ class BookLoaderTests(TestCase):


    def test_merge_works_mechanics(self):
-        """Make sure then merge_works is still okay when we try to merge works with themselves and with deleted works"""
+        """Make sure then merge_works is still okay when we try to merge
+        works with themselves and with deleted works"""
        before = models.Work.objects.count()
        wasbefore = models.WasWork.objects.count()
        sub1 = Subject(name='test1')
@ -417,20 +420,19 @@ class BookLoaderTests(TestCase):
            with open(os.path.join(TESTDIR, 'gb_latinlanguage.json')) as gb:
                m.get('https://www.googleapis.com/books/v1/volumes', content=gb.read())
                edition = bookloader.add_by_oclc('1246014')
-            # we've seen the public domain status of this book fluctuate -- and the OCLC number can disappear. So if the ebook count is 2 then test 
+            # we've seen the public domain status of this book fluctuate -- and the OCLC
+            # number can disappear. So if the ebook count is 2 then test
            #if edition is not None and edition.ebooks.count() == 2:
                self.assertEqual(edition.ebooks.count(), 2)
                #ebook_epub = edition.ebooks.all()[0]
                ebook_epub = edition.ebooks.filter(format='epub')[0]
                self.assertEqual(ebook_epub.format, 'epub')
-                #self.assertEqual(ebook_epub.url, 'http://books.google.com/books/download/The_Latin_language.epub?id=N1RfAAAAMAAJ&ie=ISO-8859-1&output=epub&source=gbs_api')
                self.assertEqual(parse_qs(urlparse(ebook_epub.url).query).get("id"), ['N1RfAAAAMAAJ'])
                self.assertEqual(parse_qs(urlparse(ebook_epub.url).query).get("output"), ['epub'])
                self.assertEqual(ebook_epub.provider, 'Google Books')
                self.assertEqual(ebook_epub.set_provider(), 'Google Books')
                ebook_pdf = edition.ebooks.filter(format='pdf')[0]
                self.assertEqual(ebook_pdf.format, 'pdf')
-                #self.assertEqual(ebook_pdf.url, 'http://books.google.com/books/download/The_Latin_language.pdf?id=N1RfAAAAMAAJ&ie=ISO-8859-1&output=pdf&sig=ACfU3U2yLt3nmTncB8ozxOWUc4iHKUznCA&source=gbs_api')
                self.assertEqual(parse_qs(urlparse(ebook_pdf.url).query).get("id"), ['N1RfAAAAMAAJ'])
                self.assertEqual(parse_qs(urlparse(ebook_pdf.url).query).get("output"), ['pdf'])
                self.assertEqual(ebook_pdf.provider, 'Google Books')
@ -494,9 +496,20 @@ class BookLoaderTests(TestCase):
        lang = 'en'
        format = 'epub'
        publication_date = datetime(2001, 7, 1)
-        seed_isbn = '9780142000083' # https://www.amazon.com/Moby-Dick-Whale-Penguin-Classics-Deluxe/dp/0142000086
+        # https://www.amazon.com/Moby-Dick-Whale-Penguin-Classics-Deluxe/dp/0142000086
+        seed_isbn = '9780142000083'

-        ebook = bookloader.load_gutenberg_edition(title, gutenberg_etext_id, ol_work_id, seed_isbn, epub_url, format, license, lang, publication_date)
+        ebook = bookloader.load_gutenberg_edition(
+            title,
+            gutenberg_etext_id,
+            ol_work_id,
+            seed_isbn,
+            epub_url,
+            format,
+            license,
+            lang,
+            publication_date
+        )
        self.assertEqual(ebook.url, epub_url)

    def tearDown(self):
@ -506,8 +519,13 @@ class BookLoaderTests(TestCase):
 class SearchTests(TestCase):
    def test_search_mock(self):
        with requests_mock.Mocker(real_http=True) as m:
-            with open(os.path.join(TESTDIR, 'gb_melville.json')) as gb,  open(os.path.join(TESTDIR, 'gb_melville2.json')) as gb2:
-                m.get('https://www.googleapis.com/books/v1/volumes', [{'content':gb2.read()}, {'content':gb.read()}])
+            with open(
+                os.path.join(TESTDIR, 'gb_melville.json')
+            ) as gb, open(os.path.join(TESTDIR, 'gb_melville2.json')) as gb2:
+                m.get(
+                    'https://www.googleapis.com/books/v1/volumes',
+                    [{'content':gb2.read()}, {'content':gb.read()}]
+                )
            self.test_pagination(mocking=True)
            self.test_basic_search(mocking=True)
            self.test_googlebooks_search(mocking=True)
@ -523,7 +541,10 @@ class SearchTests(TestCase):
        self.assertTrue(r.has_key('author'))
        self.assertTrue(r.has_key('description'))
        self.assertTrue(r.has_key('cover_image_thumbnail'))
-        self.assertTrue(r['cover_image_thumbnail'].startswith('https') or r['cover_image_thumbnail'].startswith('http'))
+        self.assertTrue(
+            r['cover_image_thumbnail'].startswith('https')
+            or r['cover_image_thumbnail'].startswith('http')
+        )
        self.assertTrue(r.has_key('publisher'))
        self.assertTrue(r.has_key('isbn_13'))
        self.assertTrue(r.has_key('googlebooks_id'))
@ -605,7 +626,12 @@ class CampaignTests(TestCase):
        c1.save()
        self.assertEqual(c1.status, 'INITIALIZED')
        # ACTIVATED
-        c2 = Campaign(target=D('1000.00'),deadline=datetime(2013,1,1),work=w,description='dummy description')
+        c2 = Campaign(
+            target=D('1000.00'),
+            deadline=datetime(2013, 1, 1),
+            work=w,
+            description='dummy description'
+        )
        c2.save()
        self.assertEqual(c2.status, 'INITIALIZED')
        u = User.objects.create_user('claimer', 'claimer@example.org', 'claimer')
@ -628,11 +654,17 @@ class CampaignTests(TestCase):
        # should not let me suspend a campaign that hasn't been initialized
        self.assertRaises(UnglueitError, c1.suspend, "for testing")
        # UNSUCCESSFUL
-        c3 = Campaign(target=D('1000.00'),deadline=now() - timedelta(days=1),work=w2,description='dummy description')
+        c3 = Campaign(
+            target=D('1000.00'),
+            deadline=now() - timedelta(days=1),
+            work=w2,
+            description='dummy description'
+        )
        c3.save()
        c3.activate()
        self.assertEqual(c3.status, 'ACTIVE')
-        # at this point, since the deadline has passed, the status should change and be UNSUCCESSFUL
+        # at this point, since the deadline has passed,
+        # the status should change and be UNSUCCESSFUL
        self.assertTrue(c3.update_status())
        self.assertEqual(c3.status, 'UNSUCCESSFUL')

@ -648,7 +680,12 @@ class CampaignTests(TestCase):


        # SUCCESSFUL
-        c4 = Campaign(target=D('1000.00'),deadline=now() - timedelta(days=1),work=w,description='dummy description')
+        c4 = Campaign(
+            target=D('1000.00'),
+            deadline=now() - timedelta(days=1),
+            work=w,
+            description='dummy description'
+        )
        c4.save()
        c4.activate()
        t = Transaction()
@ -663,7 +700,12 @@ class CampaignTests(TestCase):
        self.assertEqual(c4.status, 'SUCCESSFUL')

        # WITHDRAWN
-        c5 = Campaign(target=D('1000.00'),deadline=datetime(2013,1,1),work=w,description='dummy description')
+        c5 = Campaign(
+            target=D('1000.00'),
+            deadline=datetime(2013, 1, 1),
+            work=w,
+            description='dummy description'
+        )
        c5.save()
        c5.activate().withdraw('testing')
        self.assertEqual(c5.status, 'WITHDRAWN')
@ -671,7 +713,12 @@ class CampaignTests(TestCase):
        # testing percent-of-goal
        w2 = Work()
        w2.save()
-        c6 = Campaign(target=D('1000.00'),deadline=now() + timedelta(days=1),work=w2,description='dummy description')
+        c6 = Campaign(
+            target=D('1000.00'),
+            deadline=now() + timedelta(days=1),
+            work=w2,
+            description='dummy description'
+        )
        c6.save()
        cl = Claim(rights_holder=rh, work=w2, user=u, status='active')
        cl.save()
@ -751,7 +798,10 @@ class GoodreadsTest(TestCase):
            return
        # test to see whether the core undeletable shelves are on the list
        gr_uid = "767708"  # for Raymond Yee
-        gc = goodreads.GoodreadsClient(key=settings.GOODREADS_API_KEY, secret=settings.GOODREADS_API_SECRET)
+        gc = goodreads.GoodreadsClient(
+            key=settings.GOODREADS_API_KEY,
+            secret=settings.GOODREADS_API_SECRET
+        )
        shelves = gc.shelves_list(gr_uid)
        shelf_names = [s['name'] for s in shelves['user_shelves']]
        self.assertTrue('currently-reading' in shelf_names)
@ -763,7 +813,10 @@ class GoodreadsTest(TestCase):
        if not settings.GOODREADS_API_SECRET:
            return
        gr_uid = "767708"  # for Raymond Yee
-        gc = goodreads.GoodreadsClient(key=settings.GOODREADS_API_KEY, secret=settings.GOODREADS_API_SECRET)
+        gc = goodreads.GoodreadsClient(
+            key=settings.GOODREADS_API_KEY,
+            secret=settings.GOODREADS_API_SECRET
+        )
        reviews = gc.review_list_unauth(user_id=gr_uid, shelf='read')
        # test to see whether there is a book field in each of the review
        # url for test is https://www.goodreads.com/review/list.xml?id=767708&shelf=read&page=1&per_page=20&order=a&v=2&key=[key]
@ -787,7 +840,7 @@ class ISBNTest(TestCase):
        milosz_10 = '006019667X'
        milosz_13 = '9780060196677'
        python_10 = '0-672-32978-6'
-        funky = '0–672—329 78-6' # endash, mdash, space
+        funky = u'0–672—329 78-6' # endash, mdash, space
        python_10_wrong = '0-672-32978-7'
        python_13 = '978-0-672-32978-4'

@ -846,7 +899,10 @@ class ISBNTest(TestCase):
        # curious about set membership
        self.assertEqual(len(set([isbn.ISBN(milosz_10), isbn.ISBN(milosz_13)])), 2)
        self.assertEqual(len(set([str(isbn.ISBN(milosz_10)), str(isbn.ISBN(milosz_13))])), 2)
-        self.assertEqual(len(set([isbn.ISBN(milosz_10).to_string(), isbn.ISBN(milosz_13).to_string()])),1)
+        self.assertEqual(
+            len(set([isbn.ISBN(milosz_10).to_string(), isbn.ISBN(milosz_13).to_string()])),
+            1
+        )

 class EncryptedKeyTest(TestCase):
    def test_create_read_key(self):
@ -944,46 +1000,6 @@ class DownloadPageTest(TestCase):
        eb2.delete()
        self.assertFalse(eb2.edition.work.is_free)

-
-class LocaldatetimeTest(TestCase):
-    @override_settings(LOCALDATETIME_NOW=None)
-    def test_LOCALDATETIME_NOW_none(self):
-        
-        try:
-            localdatetime.now
-        except NameError:
-            from regluit.utils import localdatetime
-        else:
-            reload(localdatetime)
-            
-        self.assertAlmostEqual(mktime(datetime.now().timetuple()), mktime(localdatetime.now().timetuple()), 1.0)
-        
-    @override_settings(LOCALDATETIME_NOW=lambda : datetime.now() + timedelta(365))
-    def test_LOCALDATETIME_NOW_year_ahead(self):
-                
-        try:
-            localdatetime.now
-        except NameError:
-            from regluit.utils import localdatetime
-        else:
-            reload(localdatetime)
-            
-        self.assertAlmostEqual(mktime((datetime.now() + timedelta(365)).timetuple()), mktime(localdatetime.now().timetuple()), 1.0)
-        
-    def test_no_time_override(self):
-
-        from regluit.utils import localdatetime
-        self.assertAlmostEqual(mktime(datetime.now().timetuple()), mktime(localdatetime.now().timetuple()), 1.0)
-    
-    def tearDown(self):
-        # restore localdatetime.now() to what's in the settings file
-        try:
-            localdatetime.now
-        except NameError:
-            from regluit.utils import localdatetime
-        else:
-            reload(localdatetime)
-        
 class MailingListTests(TestCase):
    #mostly to check that MailChimp account is setp correctly

@ -1013,7 +1029,8 @@ class EbookFileTests(TestCase):
        u = User.objects.create_user('test', 'test@example.org', 'testpass')
        rh = RightsHolder.objects.create(owner=u, rights_holder_name='rights holder name')
        cl = Claim.objects.create(rights_holder=rh, work=w, user=u, status='active')
-        c = Campaign.objects.create(work = w, 
+        c = Campaign.objects.create(
+            work=w,
            type=parameters.BUY2UNGLUE,
            cc_date_initial=datetime(2020, 1, 1),
            target=1000,
@ -1147,10 +1164,11 @@ class MobigenTests(TestCase):
        """
        from regluit.core.mobigen import convert_to_mobi
        if settings.TEST_INTEGRATION:
-            output = convert_to_mobi("https://github.com/GITenberg/Moby-Dick--Or-The-Whale_2701/releases/download/0.2.0/Moby-Dick-Or-The-Whale.epub")
+            output = convert_to_mobi(
+                "https://github.com/GITenberg/Moby-Dick--Or-The-Whale_2701/releases/download/0.2.0/Moby-Dick-Or-The-Whale.epub"
+            )
            self.assertTrue(len(output) > 2207877)

-from .signals import handle_transaction_charged
@override_settings(LOCAL_TEST=True)
 class LibTests(TestCase):
    fixtures = ['initial_data.json']
@ -1163,7 +1181,13 @@ class LibTests(TestCase):
        u = User.objects.create_user('test', 'test@example.org', 'testpass')
        lu = User.objects.create_user('library', 'testu@example.org', 'testpass')
        lib = Library.objects.create(user=lu, owner=u)
-        c = Campaign.objects.create(work=w, type = parameters.BUY2UNGLUE, cc_date_initial= datetime(2020,1,1),target=1000, deadline=datetime(2020,1,1))
+        c = Campaign.objects.create(
+            work=w,
+            type=parameters.BUY2UNGLUE,
+            cc_date_initial=datetime(2020, 1, 1),
+            target=1000,
+            deadline=datetime(2020, 1, 1)
+        )

        new_acq = Acq.objects.create(user=lib.user, work=c.work, license=LIBRARY)
        self.assertTrue(new_acq.borrowable)
@ -1183,9 +1207,17 @@ class LibTests(TestCase):

 class GitHubTests(TestCase):
    def test_ebooks_in_github_release(self):
-        (repo_owner, repo_name, repo_tag) = ('GITenberg', 'Adventures-of-Huckleberry-Finn_76', '0.0.50')
-        ebooks = bookloader.ebooks_in_github_release(repo_owner, repo_name,
-                                                tag=repo_tag, token=settings.GITHUB_PUBLIC_TOKEN)
+        (repo_owner, repo_name, repo_tag) = (
+            'GITenberg',
+            'Adventures-of-Huckleberry-Finn_76',
+            '0.0.50'
+        )
+        ebooks = bookloader.ebooks_in_github_release(
+            repo_owner,
+            repo_name,
+            tag=repo_tag,
+            token=settings.GITHUB_PUBLIC_TOKEN
+        )
        expected_set = set([
            ('epub', u'Adventures-of-Huckleberry-Finn.epub'),
            ('mobi', u'Adventures-of-Huckleberry-Finn.mobi'),
@ -1197,7 +1229,8 @@ class GitHubTests(TestCase):
 class OnixLoaderTests(TestCase):
    fixtures = ['initial_data.json']
    def test_load(self):
-        TEST_BOOKS = [{'': u'',
+        TEST_BOOKS = [{
+            '': u'',
            'Author1First': u'Joseph',
            'Author1Last': u'Necvatal',
            'Author1Role': u'',
@ -1232,8 +1265,9 @@ class OnixLoaderTests(TestCase):
            'eISBN': u'N/A',
            'eListPrice': u'N/A',
            'ePublicationDate': u'',
-             'eTerritoryRights': u''},
-            {'': u'', 
+            'eTerritoryRights': u''
+        }, {
+            '': u'',
            'CAD price eub': u'9.95',
            'Title': u'That Greece Might Still Be Free',
            'USD price epub': u'9.95',
@ -1289,12 +1323,9 @@ class OnixLoaderTests(TestCase):
            'GBP price mobi': u'5.95', 'Format 1': u'Paperback ', 'EUR price PDF': u'7.95', 'Format 3': u'pdf',
            'Format 2': u'Hardback', 'Format 5': u'mobi', 'Format 4': u'epub', 'MARC Code1': u'aut',
            'MARC Code2': u'aui', 'MARC Code3': u'', 'MARC Code4': u'', 'MARC Code5': u'',
-            'MARC Code6': u'', 'ISO Language Code': u'en'}
-        ]
+            'MARC Code6': u'', 'ISO Language Code': u'en'
+        }]

        results = load_from_books(TEST_BOOKS)
        for (book, work, edition) in results:
-            assert (loaded_book_ok(book, work, edition))
-
-
-        
+            assert loaded_book_ok(book, work, edition)
--- a/core/validation.py
+++ b/core/validation.py
@ -19,7 +19,7 @@ ID_VALIDATION = {
    'http': (re.compile(r"(https?|ftp)://(-\.)?([^\s/?\.#]+\.?)+(/[^\s]*)?$",
                        flags=re.IGNORECASE|re.S),
             "The Web Address must be a valid http(s) URL."),
-    'isbn':  (r'^([\dxX\-–— ]+|delete)$',
+    'isbn':  (u'^([\\dxX \\-–—‐,;]+|delete)$', #includes unicode hyphen, endash and emdash
              "The ISBN must be a valid ISBN-13."),
    'doab': (r'^(\d{1,6}|delete)$',
             "The value must be 1-6 digits."),
@ -44,8 +44,6 @@ ID_VALIDATION = {
 }

 def isbn_cleaner(value):
-    if value == 'delete':
-        return value
    if not value:
        raise ValidationError('no identifier value found')
    elif value == 'delete':
@ -132,6 +130,8 @@ def valid_xml_char_ordinal(c):
        )

 def valid_subject(subject_name):
+    if len(subject_name) > 200:
+        return False
    num_commas = 0
    for c in subject_name:
        if not valid_xml_char_ordinal(c):
@ -140,6 +140,10 @@ def valid_subject(subject_name):
            num_commas += 1
            if num_commas > 2:
                return False
+    if len(subject_name.split('--')) > 6:
+        return False
+    if len(subject_name.split('. ')) > 4:
+        return False
    return True

 reverse_name_comma = re.compile(r',(?! *Jr[\., ])')
--- a/frontend/forms/bibforms.py
+++ b/frontend/forms/bibforms.py
@ -149,14 +149,27 @@ class EditionForm(forms.ModelForm):
        id_type = self.cleaned_data['id_type']
        id_value = self.cleaned_data.get('id_value','').strip()
        if id_value:
-            identifier = Identifier.objects.filter(type=id_type, value=id_value)
-            if identifier:
-                err_msg = "{} is a duplicate for work #{}.".format(identifier[0], identifier[0].work_id)
-                self.add_error('id_value', forms.ValidationError(err_msg))
            try:
-                self.cleaned_data['id_value'] = identifier_cleaner(id_type)(id_value)
+                id_value = identifier_cleaner(id_type)(id_value)
+                identifier = Identifier.objects.filter(type=id_type, value=id_value)
+                ident = identifier[0] if identifier else None
+                if not ident or not self.instance:
+                    self.cleaned_data['id_value'] = id_value
+                elif ident.edition_id == self.instance.id:
+                    self.cleaned_data['id_value'] = id_value
+                elif not ident.edition_id and ident.work_id == self.instance.work_id:
+                    self.cleaned_data['id_value'] = id_value
+                else:
+                    if ident.edition_id:
+                        err_msg = "{} is a duplicate for edition #{}.".format(id_value, ident.edition_id)
+                    else:
+                        err_msg = "{} is a duplicate for work #{}.".format(id_value, ident.work_id)
+                    self.add_error('id_value', forms.ValidationError(err_msg))
            except forms.ValidationError, ve:
-                self.add_error('id_value', forms.ValidationError('{}: {}'.format(ve.message, id_value)))
+                self.add_error(
+                    'id_value',
+                    forms.ValidationError('{}: {}'.format(ve.message, id_value))
+                )
        return self.cleaned_data

    class Meta:
--- a/frontend/forms/rh_forms.py
+++ b/frontend/forms/rh_forms.py
@ -13,11 +13,11 @@ from django.conf import settings
 from django.forms.extras.widgets import SelectDateWidget
 from django.forms.widgets import RadioSelect
 from django.utils.translation import ugettext_lazy as _
+from django.utils.timezone import now

 from regluit.core.lookups import OwnerLookup
 from regluit.core.models import Campaign, Edition, Claim, RightsHolder, WasWork
 from regluit.core.parameters import *
-from regluit.utils.localdatetime import now

 class RightsHolderForm(forms.ModelForm):
    email = forms.EmailField(
--- a/frontend/templates/base.html
+++ b/frontend/templates/base.html
@ -168,7 +168,7 @@
    <div class="column show-for-medium">
        <span>Contact</span>
        <ul>
-            <li> <a href="mailto:info@ebookfoundation.org"><i class="fa fa-envelope fa-2x"></i></a> <a href="https://twitter.com/unglueit"><i class="fa fa-twitter fa-2x"></i></a> <a href="https://facebook/com/unglueit"><i class="fa fa-facebook fa-2x"></i></a></li>
+            <li> <a href="mailto:info@ebookfoundation.org"><i class="fa fa-envelope fa-2x"></i></a> <a href="https://twitter.com/unglueit"><i class="fa fa-twitter fa-2x"></i></a> <a href="https://facebook.com/unglueit"><i class="fa fa-facebook fa-2x"></i></a></li>
        </ul>
    </div>
 </div>
--- a/frontend/templates/bypub_list.html
+++ b/frontend/templates/bypub_list.html
@ -1,6 +1,6 @@
 {% extends 'work_list.html' %}

-{% load endless %}
+{% load el_pagination_tags %}
 {% load lang_utils %}

 {% block title %} Works published by {{ pubname }} {% endblock %}
--- a/frontend/templates/campaign_list.html
+++ b/frontend/templates/campaign_list.html
@ -1,6 +1,6 @@
 {% extends 'base.html' %}

-{% load endless %}
+{% load el_pagination_tags %}
 {% load lang_utils %}
 {% load sass_tags %}

--- a/frontend/templates/cc_list.html
+++ b/frontend/templates/cc_list.html
@ -1,6 +1,6 @@
 {% extends 'base.html' %}

-{% load endless %}
+{% load el_pagination_tags %}
 {% load lang_utils %}
 {% load sass_tags %}

--- a/frontend/templates/claim.html
+++ b/frontend/templates/claim.html
@ -4,6 +4,7 @@
 {% block doccontent %}

 <h2>Rights Holder Claim Form </h2>
+{% if work %}
 <h3> Rightsholder making claim </h3>
 {{ rights_holder.rights_holder_name }}
 <h3> Work being claimed  </h3>
@ -42,4 +43,7 @@
        <input type="submit" name="submit" value="Confirm Claim">
    </form>
 {% endif %}
+{% else %}
+Please find a work to claim.
+{% endif %}
 {% endblock %}
--- a/frontend/templates/faceted_list.html
+++ b/frontend/templates/faceted_list.html
@ -1,6 +1,6 @@
 {% extends 'base.html' %}

-{% load endless %}
+{% load el_pagination_tags %}
 {% load lang_utils %}
 {% load sass_tags %}

--- a/frontend/templates/libraryauth/library.html
+++ b/frontend/templates/libraryauth/library.html
@ -1,6 +1,6 @@
 {% extends 'base.html' %}

-{% load endless %}
+{% load el_pagination_tags %}
 {% load sass_tags %}
 {% load truncatechars %}

--- a/frontend/templates/recommended.html
+++ b/frontend/templates/recommended.html
@ -1,6 +1,6 @@
 {% extends 'work_list.html' %}

-{% load endless %}
+{% load el_pagination_tags %}
 {% load lang_utils %}

 {% block title %} Books we're recommending. {% endblock %}
--- a/frontend/templates/supporter.html
+++ b/frontend/templates/supporter.html
@ -1,6 +1,6 @@
 {% extends 'base.html' %}

-{% load endless %}
+{% load el_pagination_tags %}
 {% load truncatechars %}
 {% load sass_tags %}

--- a/frontend/templates/unglued_list.html
+++ b/frontend/templates/unglued_list.html
@ -1,6 +1,6 @@
 {% extends 'base.html' %}

-{% load endless %}
+{% load el_pagination_tags %}
 {% load lang_utils %}
 {% load sass_tags %}

--- a/frontend/templates/work_list.html
+++ b/frontend/templates/work_list.html
@ -1,6 +1,6 @@
 {% extends 'base.html' %}

-{% load endless %}
+{% load el_pagination_tags %}
 {% load lang_utils %}
 {% load sass_tags %}

--- a/frontend/templatetags/bookpanel.py
+++ b/frontend/templatetags/bookpanel.py
@ -1,5 +1,6 @@
 from django import template
-from regluit.utils.localdatetime import now
+from django.utils.timezone import now
+
 from regluit.core.parameters import REWARDS, BUY2UNGLUE

 register = template.Library()
--- a/frontend/templatetags/lang_utils.py
+++ b/frontend/templatetags/lang_utils.py
@ -1,12 +1,6 @@
-"""
-The truncatechars filter is part of Django dev, but we're on 1.3.1
-The following is the filter and its dependencies
-To use this filter, put "{% load truncatechars %}" at the beginning of your template, 
-then {{ myvariable|truncatechars:num }}
-"""
 import unicodedata

-from django.template.base import Library
+from django.template import Library
 from django.template.defaultfilters import stringfilter
 from django.utils.translation import get_language_info

--- a/frontend/templatetags/lib_acqs.py
+++ b/frontend/templatetags/lib_acqs.py
@ -1,4 +1,4 @@
-from regluit.utils.localdatetime import now
+from django.utils.timezone import now
 from django import template
 register = template.Library()

--- a/frontend/templatetags/purchased.py
+++ b/frontend/templatetags/purchased.py
@ -1,5 +1,6 @@
-from regluit.utils.localdatetime import now
 from django import template
+from django.utils.timezone import now
+
 from regluit.core.models import Acq
 register = template.Library()

--- a/frontend/templatetags/truncatechars.py
+++ b/frontend/templatetags/truncatechars.py
@ -7,7 +7,7 @@ then {{ myvariable|truncatechars:num }}
 import unicodedata

 from django import template
-from django.template.base import Library
+from django.template import Library
 from django.template.defaultfilters import stringfilter
 from django.utils.encoding import force_unicode
 from django.utils.functional import allow_lazy, SimpleLazyObject
--- a/frontend/templatetags/urldecode.py
+++ b/frontend/templatetags/urldecode.py
@ -3,7 +3,7 @@
 """
 from urllib import unquote

-from django.template.base import Library
+from django.template import Library
 from django.template.defaultfilters import stringfilter

 register = Library()
--- a/frontend/tests.py
+++ b/frontend/tests.py
@ -13,6 +13,7 @@ from django.core import mail
 from django.core.urlresolvers import reverse
 from django.test import TestCase
 from django.test.client import Client
+from django.utils.timezone import now

 from notification.models import Notice

@ -21,7 +22,6 @@ from regluit.core.models import Work, Campaign, RightsHolder, Claim, Subject
 from regluit.payment.models import Transaction
 from regluit.payment.manager import PaymentManager
 from regluit.payment.stripelib import StripeClient, TEST_CARDS, ERROR_TESTING, card
-from regluit.utils.localdatetime import now

 class WishlistTests(TestCase):
    fixtures = ['initial_data.json', 'neuromancer.json']
--- a/frontend/urls.py
+++ b/frontend/urls.py
@ -35,7 +35,7 @@ urlpatterns = [
    url(r"^rightsholders/campaign/(?P<id>\d+)/mademobi/$", views.manage_campaign, {'action': 'mademobi'}, name="mademobi"),
    url(r"^rightsholders/edition/(?P<work_id>\d*)/(?P<edition_id>\d*)$", views.edit_edition, {'by': 'rh'}, name="rh_edition"),
    url(r"^rightsholders/edition/(?P<edition_id>\d*)/upload/$", views.edition_uploads, name="edition_uploads"),
-    url(r"^rightsholders/claim/$", views.claim, name="claim"), 
+    url(r"^rightsholders/claim/$", login_required(views.claim), name="claim"), 
    url(r"^rightsholders/surveys/$", views.surveys, name="surveys"), 
    url(r"^rightsholders/new_survey/(?P<work_id>\d*)/?$", views.new_survey, name="new_survey"),
    url(r"^rightsholders/surveys/answers_(?P<qid>\d+)_(?P<work_id>\d*).csv$", views.export_surveys, name="survey_answers"),
--- a/frontend/views/init.py
+++ b/frontend/views/init.py
@ -45,6 +45,7 @@ from django.template import TemplateDoesNotExist
 from django.template.loader import render_to_string
 from django.utils.http import urlencode
 from django.utils.translation import ugettext_lazy as _
+from django.utils.timezone import now
 from django.views.decorators.csrf import csrf_exempt
 from django.views.decorators.http import require_POST
 from django.views.generic.edit import FormView
@ -123,11 +124,11 @@ from regluit.payment.parameters import (
    COMPANY_TITLE
 )

-from regluit.utils.localdatetime import now, date_today
 from regluit.libraryauth.forms import UserNamePass
 from regluit.libraryauth.views import Authenticator, superlogin, login_user
 from regluit.libraryauth.models import Library
 from regluit.marc.views import qs_marc_records
+from regluit.utils.localdatetime import date_today
 from questionnaire.models import Landing, Questionnaire
 from questionnaire.views import export_summary as answer_summary, export_csv as export_answers

--- a/frontend/views/bibedit.py
+++ b/frontend/views/bibedit.py
@ -21,6 +21,7 @@ from regluit.core.bookloader import (
 from regluit.core.parameters import WORK_IDENTIFIERS

 from regluit.core.loaders import add_by_webpage
+from regluit.core.loaders.doab import add_by_doab
 from regluit.core.loaders.utils import ids_from_urls
 from regluit.frontend.forms import EditionForm, IdentifierForm

@ -106,6 +107,11 @@ def get_edition_for_id(id_type, id_value, user=None):
        if edition:
            return user_edition(edition, user)
    
+    if identifiers.has_key('doab'):
+        edition = add_by_doab(identifiers['doab'])
+        if edition:
+            return user_edition(edition, user)
+    
    if identifiers.has_key('oclc'):
        edition = add_by_oclc(identifiers['oclc'])
        if edition:
@ -297,6 +303,12 @@ def edit_edition(request, work_id, edition_id, by=None):
                id_type = form.cleaned_data['id_type']
                id_val = form.cleaned_data['id_value']
                if id_val == 'delete':
+                    if id_type in WORK_IDENTIFIERS:
+                        if edition.work.identifiers.exclude(type=id_type):
+                            edition.work.identifiers.filter(type=id_type).delete()
+                        else:
+                            alert = ('Can\'t delete identifier -  must have at least one left.')
+                    else:
                        if edition.identifiers.exclude(type=id_type):
                            edition.identifiers.filter(type=id_type).delete()
                        else:
--- a/frontend/views/rh_views.py
+++ b/frontend/views/rh_views.py
@ -88,6 +88,8 @@ class ClaimView(CreateView):
        return HttpResponseRedirect(reverse('rightsholders'))

    def get_context_data(self, form):
+        if not form.is_valid():
+            return {'form': form}
        work = form.cleaned_data['work']
        rights_holder = form.cleaned_data['rights_holder']
        active_claims = work.claim.exclude(status = 'release')
--- a/libraryauth/init.py
+++ b/libraryauth/init.py
@ -1 +1,9 @@
+from django.apps import AppConfig
+
+default_app_config = 'regluit.libraryauth.LibraryAuthConfig'
+
+class LibraryAuthConfig(AppConfig):
+    name = 'regluit.libraryauth'
+
+    def ready(self):
        from . import signals
--- a/libraryauth/models.py
+++ b/libraryauth/models.py
@ -8,7 +8,7 @@ from django.core import validators
 from django.db import models
 from django.db.models import Q
 from django.db.models.signals import post_save
-from django.forms import IPAddressField as BaseIPAddressField
+from django.forms import GenericIPAddressField as BaseIPAddressField
 from django.utils.translation import ugettext_lazy as _
 from django.core.urlresolvers import reverse

--- a/libraryauth/templatetags/libraryauthtags.py
+++ b/libraryauth/templatetags/libraryauthtags.py
@ -1,6 +1,6 @@
 import unicodedata

-from django.template.base import Library
+from django.template import Library
 from .. import models

 register = Library()
--- a/payment/baseprocessor.py
+++ b/payment/baseprocessor.py
@ -10,12 +10,12 @@ from datetime import timedelta
 django imports
 """
 from django.http import  HttpResponseForbidden
+from django.utils.timezone import now

 """
 regluit imports
 """
 from regluit.payment.models import PaymentResponse
-from regluit.utils.localdatetime import now, zuluformat

 class ProcessorError(Exception):
    """An abstraction around payment processor exceptions"""
--- a/payment/manager.py
+++ b/payment/manager.py
@ -18,6 +18,7 @@ django imports
 from django.conf import settings
 from django.contrib.auth.models import User
 from django.core.urlresolvers import reverse
+from django.utils.timezone import now

 """
 regluit imports
@ -26,7 +27,6 @@ from regluit.payment import credit
 from regluit.payment.models import Transaction, Receiver, PaymentResponse, Account
 from regluit.payment.parameters import *
 from regluit.payment.signals import transaction_charged, pledge_modified, pledge_created
-from regluit.utils.localdatetime import now

 logger = logging.getLogger(__name__)

--- a/payment/models.py
+++ b/payment/models.py
@ -18,6 +18,7 @@ from django.db.models import Q
 from django.contrib.sites.models import Site
 from django.db.models.signals import post_save, post_delete
 from django.utils.http import urlquote
+from django.utils.timezone import now

 ## django module imports

@ -42,7 +43,7 @@ from regluit.payment.parameters import (
 )

 from regluit.payment.signals import credit_balance_added, pledge_created
-from regluit.utils.localdatetime import now, date_today
+from regluit.utils.localdatetime import date_today

 logger = logging.getLogger(__name__)

--- a/payment/stripelib.py
+++ b/payment/stripelib.py
@ -6,12 +6,15 @@ external library imports
 """
 import logging
 import json
-import re
-import stripe

 from datetime import datetime, timedelta
 from itertools import islice
 from pytz import utc
+import re
+import unittest
+from unittest import TestCase  
+  
+import stripe

 """
 django imports
@ -19,6 +22,7 @@ django imports
 from django.conf import settings
 from django.core.mail import send_mail
 from django.http import HttpResponse
+from django.utils.timezone import now

 """
 regluit imports
@ -35,7 +39,6 @@ from regluit.payment.parameters import (
    TRANSACTION_STATUS_CANCELED
 )
 from regluit.payment.signals import transaction_charged, transaction_failed
-from regluit.utils.localdatetime import now, zuluformat

 # as of 2013.07.15
 # ['charge.disputed', 'coupon.updated'] are legacy events -- don't know whether to
@ -73,12 +76,6 @@ def grouper(iterable, page_size):
 class StripelibError(baseprocessor.ProcessorError):
    pass

-try:
-    import unittest
-    from unittest import TestCase    
-except:
-    from django.test import TestCase
-    from django.utils import unittest

 # if customer.id doesn't exist, create one and then charge the customer
 # we probably should ask our users whether they are ok with our creating a customer id account -- or ask for credit
--- a/payment/tests.py
+++ b/payment/tests.py
@ -5,6 +5,7 @@ import logging
 import os
 import time
 import traceback
+import unittest

 from datetime import timedelta
 from decimal import Decimal as D
@ -19,7 +20,7 @@ from django.contrib.auth.models import User
 from django.core.exceptions import ValidationError
 from django.core.validators import URLValidator
 from django.test import TestCase
-from django.utils import unittest
+from django.utils.timezone import now

 """
 regluit imports
@ -29,7 +30,6 @@ from regluit.core.signals import handle_transaction_charged
 from regluit.payment.manager import PaymentManager
 from regluit.payment.models import Transaction, Account
 from regluit.payment.parameters import *
-from regluit.utils.localdatetime import now

 def setup_selenium():
    # Set the display window for our xvfb
--- a/payment/views.py
+++ b/payment/views.py
@ -13,7 +13,7 @@ django imports
 """
 from django.conf import settings
 from django.contrib.auth.models import User
-from django.contrib.sites.models import RequestSite
+from django.contrib.sites.requests import RequestSite
 from django.core.urlresolvers import reverse
 from django.http import (
    HttpResponse,
@ -24,6 +24,7 @@ from django.http import (
 from django.shortcuts import render_to_response
 from django.template import RequestContext
 from django.test.utils import setup_test_environment
+from django.utils.timezone import now
 from django.views.decorators.csrf import csrf_exempt
 from django.views.generic.edit import FormView
 from django.views.generic.base import TemplateView
@ -38,7 +39,6 @@ from regluit.payment.models import Transaction
 from regluit.payment.parameters import *
 from regluit.payment.stripelib import STRIPE_PK
 from regluit.payment.tests import PledgeTest, AuthorizeTest
-from regluit.utils.localdatetime import now

 logger = logging.getLogger(__name__)

--- a/requirements_versioned.pip
+++ b/requirements_versioned.pip
@ -3,19 +3,14 @@ Fabric==1.6.0
 MySQL-python==1.2.5
 Pillow==3.4.2
 PyJWT==1.4.1
-PyPDF2==1.23
+PyPDF2==1.26
 PyGithub==1.15.0
 PyYAML==3.11
-git+git://github.com/urschrei/pyzotero.git@v0.9.51
-SPARQLWrapper==1.6.4
-WebOb==1.2.3
-WebTest==1.4.0
 amqp==1.4.9
 anyjson==0.3.3
 billiard==3.3.0.23
 awscli==1.10.26
 boto==2.42.0
-#git+ssh://git@github.com/Gluejar/boto.git@2.3.0
 celery==3.1.23
 certifi==2016.2.28
 # pip installing pillow seems to delete distribute
@ -24,36 +19,34 @@ certifi==2016.2.28
 django-celery==3.1.17
 django-ckeditor==4.5.1
 #django-email-change==0.2.3
-git+git://github.com/eshellman/django-email-change.git@1e71dd320504d56b1fc7d447ce4cffb550cedce7
+git+git://github.com/eshellman/django-email-change.git@57169bdef1c8a41d122e2bab2dcd8564b8fb231d
 django-compat==1.0.10
 django-contrib-comments==1.7.1
-django-endless-pagination==2.0
+django-el-pagination==3.2.4
 django-extensions==1.6.1
 django-jsonfield==1.0.0
 #django-kombu==0.9.4
 django-maintenancemode==0.11.2
 django-mptt==0.8.5
-#django-nose-selenium==0.7.3
 #django-notification==0.2
-git+git://github.com/eshellman/django-notification.git@412c7a03a327195a1017c2be92c8e2caabc880b6
+git+git://github.com/eshellman/django-notification.git@a4620e893e2da220994e0189bf5d980bfbdcf0ad
 django-registration==2.1.2
 django-selectable==0.9.0
 django-smtp-ssl==1.0
 django-storages==1.4.1
 django-tastypie==0.13.3
-django-transmeta==0.7.3
-feedparser==5.1.2
+#django-transmeta==0.7.3 
+git+git://github.com/resulto/django-transmeta.git@ad4d7278ba330dcf8c8446f8ae9b2c769ae8684e
 fef-questionnaire==4.0.1
-freebase==1.0.8
 #gitenberg.metadata==0.1.6
 git+https://github.com/gitenberg-dev/gitberg-build
 #git+ssh://git@github.com/gitenberg-dev/metadata.git@0.1.11
 github3.py==0.9.5
-html5lib==1.0b3
+html5lib==1.0.1
 httplib2==0.7.5
 isodate==0.5.1
 kombu==3.0.35
-lxml==2.3.5
+lxml==4.2.1
 defusedxml==0.4.1
 mechanize==0.2.5
 mimeparse==0.1.3
@ -66,6 +59,7 @@ paramiko==1.14.1
 postmonkey==1.0b
 pycrypto==2.6
 pymarc==3.0.2
+pyoai==2.5.0
 pyparsing==2.0.3
 python-dateutil==2.5.3
 python-mimeparse==0.1.4
@ -75,12 +69,12 @@ pytz==2016.6.1
 rdflib==4.2.0
 rdflib-jsonld==0.3
 redis==2.10.3
-reportlab==3.1.8
+reportlab==3.4.0
 requests==2.10.0
 requests-mock==1.2.0
 requests-oauthlib==0.6.2
 selenium==2.53.1
-six==1.9.0
+six==1.11.0
 sorl-thumbnail==12.3
 ssh==1.7.14
 stevedore==1.12.0
@ -89,7 +83,8 @@ virtualenv==1.4.9
 # virtualenv-clone==0.2.4 not sure why I have this in my env
 #virtualenvwrapper==3.6
 wsgiref==0.1.2
-xhtml2pdf==0.0.6
+xhtml2pdf==0.2.2
+webencodings==0.5.1
 #for urllib3 secure
 cffi==1.7.0
 cryptography==2.1.4
--- a/settings/common.py
+++ b/settings/common.py
@ -165,7 +165,7 @@ INSTALLED_APPS = (
    'social.apps.django_app.default',
    'tastypie',
    'djcelery',
-    'endless_pagination',
+    'el_pagination',
    'selectable',
    'regluit.frontend.templatetags',
    'notification',
--- a/settings/dev.py
+++ b/settings/dev.py
@ -29,7 +29,9 @@ DATABASES = {
        'PASSWORD': '',
        'HOST': '',
        'PORT': '',
-        'TEST_CHARSET': 'utf8',
+        'TEST': {
+            'CHARSET': 'utf8',
+        }
    }
 }

--- a/settings/jenkins.py
+++ b/settings/jenkins.py
@ -20,7 +20,9 @@ DATABASES = {
        'PASSWORD': 'regluit',
        'HOST': '',
        'PORT': '',
-        'TEST_CHARSET': 'utf8',
+        'TEST': {
+            'CHARSET': 'utf8',
+        }
    }
 }

--- a/settings/just.py
+++ b/settings/just.py
@ -22,7 +22,9 @@ DATABASES = {
        'PASSWORD': DATABASE_PASSWORD,
        'HOST': DATABASE_HOST,
        'PORT': '',
-        'TEST_CHARSET': 'utf8'
+        'TEST': {
+            'CHARSET': 'utf8',
+        }
    }
 }

--- a/settings/please.py
+++ b/settings/please.py
@ -21,7 +21,9 @@ DATABASES = {
        'PASSWORD': DATABASE_PASSWORD,
        'HOST': DATABASE_HOST,
        'PORT': '',
-        'TEST_CHARSET': 'utf8',
+        'TEST': {
+            'CHARSET': 'utf8',
+        }
    }
 }

--- a/settings/prod.py
+++ b/settings/prod.py
@ -23,7 +23,9 @@ DATABASES = {
        'PASSWORD': DATABASE_PASSWORD,
        'HOST': DATABASE_HOST,
        'PORT': '',
-        'TEST_CHARSET': 'utf8',
+        'TEST': {
+            'CHARSET': 'utf8',
+        }
    }
 }

--- a/utils/init.py
+++ b/utils/init.py
@ -1 +1 @@
-import localdatetime
+
--- a/utils/lang.py
+++ b/utils/lang.py
@ -1,6 +1,10 @@
 from django.conf.global_settings import LANGUAGES

 lang2code = dict([ (lang[1].lower(), lang[0]) for lang in LANGUAGES ])
+code2lang = dict(LANGUAGES)

 def get_language_code(language):
-    return lang2code.get(language.lower().strip(), '')
+    language = language.lower().strip()
+    if language in code2lang:
+        return language
+    return lang2code.get(language, '')
--- a/utils/localdatetime.py
+++ b/utils/localdatetime.py
@ -1,140 +1,8 @@
-"""
-Utility to return datetime.datetime.utcnow() by default but allows for a custom utcnow() (e.g., for testing)
-
->>> import regluit
->>> from regluit.utils.localdatetime import now
->>> now()
-datetime.datetime(2012, 3, 8, 14, 0, 35, 409270)
->>> now()
-datetime.datetime(2012, 3, 8, 14, 0, 36, 985271)
->>> n = now()
->>> n
-datetime.datetime(2012, 3, 8, 14, 1, 54, 650679)
->>> regluit.utils.localdatetime._now = lambda: n
->>> now()
-datetime.datetime(2012, 3, 8, 14, 1, 54, 650679)
->>> now()
-datetime.datetime(2012, 3, 8, 14, 1, 54, 650679)
->>> now()
-
-DST handled:
-
->>> ptz = pytz.timezone('America/Los_Angeles')
->>> make_naive(datetime.datetime(2012,03,11,10,tzinfo=utc), ptz)
-datetime.datetime(2012, 3, 11, 3, 0)
->>> make_naive(datetime.datetime(2012,03,11,9,tzinfo=utc), ptz)
-datetime.datetime(2012, 3, 11, 1, 0)
-
->>> make_aware(datetime.datetime(2012,11,4,1,30), ptz)
-Traceback (most recent call last):
-  File "<console>", line 1, in <module>
-  File "/Users/raymondyee/C/src/Gluejar/regluit/utils/localdatetime.py", line 90, in make_aware
-    return timezone.localize(value, is_dst=None)
-  File "/Users/raymondyee/.virtualenvs/regluit/lib/python2.7/site-packages/pytz/tzinfo.py", line 349, in localize
-    raise AmbiguousTimeError(dt)
-AmbiguousTimeError: 2012-11-04 01:30:00
+from django.utils.timezone import now

    
-"""
-
-import pytz
-import datetime
-import django
-from django.conf import settings
-
-# for Django 1.3.x, return a timestamp naive now()
-# for Django 1.4 should switch to django.utils.timezone.now()
-# see https://code.djangoproject.com/browser/django/trunk/django/utils/timezone.py?rev=17642#L232
-
-def now():
-    if hasattr(settings, 'LOCALDATETIME_NOW') and settings.LOCALDATETIME_NOW is not None:
-        return settings.LOCALDATETIME_NOW()
-    else:
-        try:
-            return django.utils.timezone.now()
-        except AttributeError, e:
-            return datetime.datetime.now()    
-    
-# provide a replacement for datetime.date.today()
-# this will be timezone naive -- is that what we really want?
+# switch to  django.utils.timezone.localdate in django 1.11

 def date_today():
    return now().date()

-# borrow a lot of the routines/code that will be in Django 1.4+ django.utils.timezone
-# https://code.djangoproject.com/browser/django/trunk/django/utils/timezone.py
-
-utc = pytz.utc
-
-def get_default_timezone():
-    return pytz.timezone(settings.TIME_ZONE)
-    
-def is_aware(value):
-    """
-    Determines if a given datetime.datetime is aware.
-
-    The logic is described in Python's docs:
-    http://docs.python.org/library/datetime.html#datetime.tzinfo
-    """
-    return value.tzinfo is not None and value.tzinfo.utcoffset(value) is not None
-
-def is_naive(value):
-    """
-    Determines if a given datetime.datetime is naive.
-
-    The logic is described in Python's docs:
-    http://docs.python.org/library/datetime.html#datetime.tzinfo
-    """
-    return value.tzinfo is None or value.tzinfo.utcoffset(value) is None
-
-def make_aware(value, timezone):
-    """
-    Makes a naive datetime.datetime in a given time zone aware.
-    """
-    if hasattr(timezone, 'localize'):
-        # available for pytz time zones
-        return timezone.localize(value, is_dst=None)
-    else:
-        # may be wrong around DST changes
-        return value.replace(tzinfo=timezone)
-
-def make_naive(value, timezone):
-    """
-    Makes an aware datetime.datetime naive in a given time zone.
-    """
-    value = value.astimezone(timezone)
-    if hasattr(timezone, 'normalize'):
-        # available for pytz time zones
-        value = timezone.normalize(value)
-    return value.replace(tzinfo=None)
-
-def isoformat(value):
-    """
-    if value is naive, assume it's in the default_timezone
-    """
-    if is_naive(value):
-        return make_aware(value, get_default_timezone()).isoformat()
-    else:
-        return value.isoformat()
-
-def zuluformat(value):
-    """format value in zulu format -- e.g., 2012-03-26T17:47:22.654449Z"""
-    return "{0}Z".format(as_utc_naive(value).isoformat())
-
-def as_utc_naive(value):
-    """
-    if value is naive, assume it's in the default time zone, then convert to UTC but make naive 
-    """
-    if is_naive(value):
-        return make_naive(make_aware(value, get_default_timezone()), utc)
-    else:
-        return make_naive(value, utc)
-    
-def as_default_timezone_naive(value):
-    """
-    if value is naive, assume it's in UTC and convert to the default tz and make it naive
-    """
-    if is_naive(value):
-        return make_naive(make_aware(value, utc), get_default_timezone())
-    else:
-        return make_naive(value, get_default_timezone())