update doab loader
parent
d4f47b2a5e
commit
167c7fc60a
|
@ -10,7 +10,7 @@ from django.core.files.storage import default_storage
|
|||
from django.core.files.base import ContentFile
|
||||
|
||||
import regluit
|
||||
from regluit.core import models
|
||||
from regluit.core import models, tasks
|
||||
from regluit.core import bookloader
|
||||
from regluit.core.bookloader import add_by_isbn
|
||||
|
||||
|
@ -62,15 +62,14 @@ def update_cover_doab(doab_id, store_cover=True):
|
|||
else:
|
||||
return None
|
||||
|
||||
def attach_more_doab_metadata(ebook, description, subjects,
|
||||
def attach_more_doab_metadata(edition, description, subjects,
|
||||
publication_date, publisher_name=None):
|
||||
|
||||
"""
|
||||
for given ebook, attach description, subjects, publication date to
|
||||
for given edition, attach description, subjects, publication date to
|
||||
corresponding Edition and Work
|
||||
"""
|
||||
# if edition doesn't have a publication date, update it
|
||||
edition = ebook.edition
|
||||
edition_to_save = False
|
||||
|
||||
if not edition.publication_date:
|
||||
|
@ -94,8 +93,13 @@ def attach_more_doab_metadata(ebook, description, subjects,
|
|||
for s in subjects:
|
||||
if bookloader.valid_subject(s):
|
||||
work.subjects.add(models.Subject.objects.get_or_create(name=s)[0])
|
||||
|
||||
return ebook
|
||||
|
||||
# set reading level of work if it's empty; doab is for adults.
|
||||
if not work.age_level:
|
||||
work.age_level = '18-'
|
||||
work.save()
|
||||
|
||||
return edition
|
||||
|
||||
def load_doab_edition(title, doab_id, seed_isbn, url, format, rights,
|
||||
language, isbns,
|
||||
|
@ -104,7 +108,6 @@ def load_doab_edition(title, doab_id, seed_isbn, url, format, rights,
|
|||
"""
|
||||
load a record from doabooks.org represented by input parameters and return an ebook
|
||||
"""
|
||||
from regluit.core import tasks
|
||||
|
||||
# check to see whether the Edition hasn't already been loaded first
|
||||
# search by url
|
||||
|
@ -121,7 +124,7 @@ def load_doab_edition(title, doab_id, seed_isbn, url, format, rights,
|
|||
|
||||
# if yes, then return one of the Edition(s) whose work is doab_id
|
||||
# if no, then
|
||||
|
||||
ebook = None
|
||||
if len(ebooks) > 1:
|
||||
raise Exception("There is more than one Ebook matching url {0}".format(url))
|
||||
elif len(ebooks) == 1:
|
||||
|
@ -132,55 +135,55 @@ def load_doab_edition(title, doab_id, seed_isbn, url, format, rights,
|
|||
cover_url = update_cover_doab(doab_id)
|
||||
|
||||
# attach more metadata
|
||||
attach_more_doab_metadata(ebook,
|
||||
attach_more_doab_metadata(ebook.edition,
|
||||
description=kwargs.get('description'),
|
||||
subjects=kwargs.get('subject'),
|
||||
publication_date=kwargs.get('date'),
|
||||
publisher_name=kwargs.get('publisher'))
|
||||
|
||||
# make sure all isbns are added
|
||||
for isbn in isbns:
|
||||
bookloader.add_by_isbn(isbn, ebook.edition.work)
|
||||
|
||||
return ebook
|
||||
|
||||
# remaining case --> need to create a new Ebook
|
||||
# remaining case --> no ebook, load record, create ebook if there is one.
|
||||
assert len(ebooks) == 0
|
||||
|
||||
# make sure we have isbns to work with before creating ebook
|
||||
if len(isbns) == 0:
|
||||
return None
|
||||
|
||||
ebook = models.Ebook()
|
||||
ebook.format = format
|
||||
ebook.provider = provider
|
||||
ebook.url = url
|
||||
ebook.rights = rights
|
||||
|
||||
# we still need to find the right Edition/Work to tie Ebook to...
|
||||
|
||||
# we need to find the right Edition/Work to tie Ebook to...
|
||||
|
||||
# look for the Edition with which to associate ebook.
|
||||
# loop through the isbns to see whether we get one that is not None
|
||||
|
||||
|
||||
for isbn in isbns:
|
||||
edition = bookloader.add_by_isbn(isbn)
|
||||
if edition is not None: break
|
||||
if edition is not None:
|
||||
break
|
||||
|
||||
if doab_id and not edition:
|
||||
# make sure there's not already a doab_id
|
||||
idents = models.Identifier.objects.filter(type='doab', value=doab_id)
|
||||
for ident in idents:
|
||||
edition = ident.edition if ident.edition else ident.work.preferred_edition
|
||||
break
|
||||
|
||||
if edition is not None:
|
||||
# if this is a new edition, then add related editions asynchronously
|
||||
if getattr(edition,'new', False):
|
||||
tasks.populate_edition.delay(edition.isbn_13)
|
||||
|
||||
# QUESTION: Is this good enough?
|
||||
# what's going to happen to edition.work if there's merging
|
||||
doab_identifer = models.Identifier.get_or_add(type='doab',value=doab_id,
|
||||
doab_identifer = models.Identifier.get_or_add(type='doab', value=doab_id,
|
||||
work=edition.work)
|
||||
|
||||
# we need to create Edition(s) de novo
|
||||
else:
|
||||
# if there is a Work with doab_id already, attach any new Edition(s)
|
||||
try:
|
||||
work = models.Identifier.objects.get(type='doab',value=doab_id).work
|
||||
work = models.Identifier.objects.get(type='doab', value=doab_id).work
|
||||
except models.Identifier.DoesNotExist:
|
||||
work = models.Work(language=language,title=title)
|
||||
work = models.Work(language=language, title=title, age_level='18-')
|
||||
work.save()
|
||||
doab_identifer = models.Identifier.get_or_add(type='doab',value=doab_id,
|
||||
doab_identifer = models.Identifier.get_or_add(type='doab', value=doab_id,
|
||||
work=work)
|
||||
|
||||
|
||||
|
@ -190,7 +193,7 @@ def load_doab_edition(title, doab_id, seed_isbn, url, format, rights,
|
|||
edition = models.Edition(title=title, work=work)
|
||||
edition.save()
|
||||
|
||||
isbn_id = models.Identifier.get_or_add(type='isbn',value=isbn,work=work)
|
||||
isbn_id = models.Identifier.get_or_add(type='isbn', value=isbn, work=work)
|
||||
|
||||
editions.append(edition)
|
||||
|
||||
|
@ -209,15 +212,21 @@ def load_doab_edition(title, doab_id, seed_isbn, url, format, rights,
|
|||
work.selected_edition = edition
|
||||
work.save()
|
||||
|
||||
# tie the edition to ebook
|
||||
ebook.edition = edition
|
||||
ebook.save()
|
||||
if format in ('pdf', 'epub', 'mobi'):
|
||||
ebook = models.Ebook()
|
||||
ebook.format = format
|
||||
ebook.provider = provider
|
||||
ebook.url = url
|
||||
ebook.rights = rights
|
||||
# tie the edition to ebook
|
||||
ebook.edition = edition
|
||||
ebook.save()
|
||||
|
||||
# update the cover id (could be done separately)
|
||||
cover_url = update_cover_doab(doab_id)
|
||||
|
||||
# attach more metadata
|
||||
attach_more_doab_metadata(ebook,
|
||||
attach_more_doab_metadata(edition,
|
||||
description=kwargs.get('description'),
|
||||
subjects=kwargs.get('subject'),
|
||||
publication_date=kwargs.get('date'),
|
||||
|
@ -227,30 +236,21 @@ def load_doab_edition(title, doab_id, seed_isbn, url, format, rights,
|
|||
|
||||
def load_doab_records(fname, limit=None, async=True):
|
||||
|
||||
from regluit.core import tasks
|
||||
success_count = 0
|
||||
ebook_count = 0
|
||||
|
||||
records = json.load(open(fname))
|
||||
|
||||
for (i, book) in enumerate(islice(records,limit)):
|
||||
d = dict(book)
|
||||
if d['format'] == 'pdf':
|
||||
try:
|
||||
if async:
|
||||
task_id = tasks.load_doab_edition.delay(**dict(book))
|
||||
|
||||
ct = models.CeleryTask()
|
||||
ct.task_id = task_id
|
||||
ct.function_name = "load_doab_edition"
|
||||
ct.user = None
|
||||
ct.description = "Loading DOAB %s " % (dict(book)['doab_id'])
|
||||
ct.save()
|
||||
|
||||
else:
|
||||
edition = load_doab_edition(**dict(book))
|
||||
success_count += 1
|
||||
except Exception, e:
|
||||
logger.warning(e)
|
||||
try:
|
||||
ebook = load_doab_edition(**dict(book))
|
||||
success_count += 1
|
||||
if ebook:
|
||||
ebook_count +=1
|
||||
except Exception, e:
|
||||
logger.warning(e)
|
||||
|
||||
logger.info("Number of books successfully uploaded: " + str(success_count))
|
||||
logger.info("Number of records processed: " + str(success_count))
|
||||
logger.info("Number of ebooks processed: " + str(ebook_count))
|
||||
|
|
@ -26,7 +26,6 @@ from regluit.core import (
|
|||
librarything,
|
||||
mobigen
|
||||
)
|
||||
from regluit.core.loaders import doab
|
||||
from regluit.core.models import Campaign, Acq, Gift
|
||||
from regluit.core.signals import deadline_impending
|
||||
from regluit.core.parameters import RESERVE, REWARDS, THANKS
|
||||
|
@ -176,13 +175,6 @@ def refresh_acqs():
|
|||
else:
|
||||
acq.refreshed = True
|
||||
|
||||
@task
|
||||
def load_doab_edition(title, doab_id, seed_isbn, url, format, rights, language, isbns,
|
||||
provider='Directory of Open Access Books', **kwargs):
|
||||
|
||||
return doab.load_doab_edition(title, doab_id, seed_isbn, url, format, rights,
|
||||
language, isbns, provider, **kwargs)
|
||||
|
||||
@task
|
||||
def convert_to_mobi(input_url, input_format="application/epub+zip"):
|
||||
return mobigen.convert_to_mobi(input_url, input_format)
|
||||
|
|
Loading…
Reference in New Issue