regluit/core/models/bibmodels.py

1324 lines
47 KiB
Python
Raw Normal View History

2016-07-30 06:36:01 +00:00
import logging
import math
import re
import urllib
import urllib2
import uuid
from decimal import Decimal
import unicodedata
from urlparse import urlparse
from sorl.thumbnail import get_thumbnail
from PIL import ImageFile
2016-07-30 06:36:01 +00:00
from django.conf import settings
from django.contrib.auth.models import User
from django.contrib.contenttypes.fields import GenericRelation
from django.core.files.base import ContentFile
from django.core.urlresolvers import reverse
from django.db import models
from django.db.models import F
from django.db.models.signals import post_save, pre_delete
2018-04-19 16:24:34 +00:00
from django.utils.timezone import now
2016-07-30 06:36:01 +00:00
from django_comments.models import Comment
2016-07-30 06:36:01 +00:00
import regluit
from regluit.marc.models import MARCRecord as NewMARC
2017-06-20 15:08:14 +00:00
from questionnaire.models import Landing
2016-07-30 06:36:01 +00:00
from regluit.core import mobi
2016-07-30 06:36:01 +00:00
import regluit.core.cc as cc
from regluit.core.epub import test_epub
2017-08-16 19:46:19 +00:00
from regluit.core.links import id_url
from regluit.core.validation import valid_subject
2016-07-30 06:36:01 +00:00
from regluit.core.parameters import (
2016-08-05 19:53:29 +00:00
AGE_LEVEL_CHOICES,
2016-07-30 06:36:01 +00:00
BORROWED,
BUY2UNGLUE,
ID_CHOICES_MAP,
2016-07-30 06:36:01 +00:00
INDIVIDUAL,
LIBRARY,
OFFER_CHOICES,
TESTING,
2016-08-05 19:53:29 +00:00
TEXT_RELATION_CHOICES,
2016-07-30 06:36:01 +00:00
THANKED,
THANKS,
WORK_IDENTIFIERS,
2016-07-30 06:36:01 +00:00
)
2017-07-27 19:13:04 +00:00
# fix truncated file problems per https://stackoverflow.com/questions/12984426/python-pil-ioerror-image-file-truncated-with-big-images
ImageFile.LOAD_TRUNCATED_IMAGES = True
2016-07-30 06:36:01 +00:00
logger = logging.getLogger(__name__)
good_providers = ('Internet Archive', 'Unglue.it', 'Github', 'OAPEN Library')
2016-07-30 06:36:01 +00:00
def id_for(obj, type):
if not obj.pk:
return ''
try:
return obj.identifiers.filter(type=type)[0].value
except IndexError:
return ''
2016-07-30 06:36:01 +00:00
class Identifier(models.Model):
# olib, ltwk, goog, gdrd, thng, isbn, oclc, olwk, doab, gtbg, glue, doi
2016-07-30 06:36:01 +00:00
type = models.CharField(max_length=4, null=False)
value = models.CharField(max_length=250, null=False)
work = models.ForeignKey("Work", related_name="identifiers", null=False)
edition = models.ForeignKey("Edition", related_name="identifiers", null=True)
class Meta:
unique_together = ("type", "value")
@staticmethod
def set(type=None, value=None, edition=None, work=None):
# if there's already an id of this type for this work and edition, change it
# if not, create it. if the id exists and points to something else, change it.
try:
identifier = Identifier.objects.filter(type=type, value=value)[0]
except IndexError:
if type in WORK_IDENTIFIERS:
identifier = Identifier.objects.create(type=type, value=value, work=work)
else:
identifier = Identifier.objects.create(type=type, value=value, work=work, edition=edition)
if identifier.work_id != work.id:
2016-07-30 06:36:01 +00:00
identifier.work = work
identifier.save()
if identifier.edition and edition:
if identifier.edition_id != edition.id:
2016-07-30 06:36:01 +00:00
identifier.edition = edition
identifier.save()
others = Identifier.objects.filter(type=type, work=work, edition=edition).exclude(value=value)
if others.count() > 0:
for other in others:
other.delete()
return identifier
@staticmethod
def get_or_add(type='goog', value=None, edition=None, work=None):
try:
return Identifier.objects.get(type=type, value=value)
except Identifier.DoesNotExist:
i = Identifier(type=type, value=value, edition=edition, work=work)
i.save()
return i
def __unicode__(self):
return u'{0}:{1}'.format(self.type, self.value)
def label(self):
return ID_CHOICES_MAP.get(self.type, self.type)
2017-08-16 19:46:19 +00:00
def url(self):
return id_url(self.type, self.value)
2016-07-30 06:36:01 +00:00
class Work(models.Model):
created = models.DateTimeField(auto_now_add=True, db_index=True,)
title = models.CharField(max_length=1000)
language = models.CharField(max_length=5, default="en", null=False, db_index=True,)
2016-09-23 18:53:54 +00:00
openlibrary_lookup = models.DateTimeField(null=True, blank=True)
2016-07-30 06:36:01 +00:00
num_wishes = models.IntegerField(default=0, db_index=True)
description = models.TextField(default='', null=True, blank=True)
selected_edition = models.ForeignKey("Edition", related_name='selected_works', null=True)
# repurposed earliest_publication to actually be publication range
2016-08-25 21:56:16 +00:00
publication_range = models.CharField(max_length=50, null=True, blank=True)
2016-07-30 06:36:01 +00:00
featured = models.DateTimeField(null=True, blank=True, db_index=True,)
is_free = models.BooleanField(default=False)
landings = GenericRelation(Landing, related_query_name='works')
2016-11-09 19:55:37 +00:00
related = models.ManyToManyField('self', symmetrical=False, blank=True, through='WorkRelation', related_name='reverse_related')
2016-08-25 21:56:16 +00:00
age_level = models.CharField(max_length=5, choices=AGE_LEVEL_CHOICES, default='', blank=True)
2016-07-30 06:36:01 +00:00
class Meta:
ordering = ['title']
2016-07-30 06:36:01 +00:00
def __unicode__(self):
return self.title
def __init__(self, *args, **kwargs):
self._last_campaign = None
super(Work, self).__init__(*args, **kwargs)
2018-06-19 14:43:42 +00:00
def delete(self, cascade=True, *args, **kwargs):
if cascade:
if self.offers.all() or self.claim.all() or self.campaigns.all() or self.acqs.all() \
or self.holds.all() or self.landings.all():
return
for wishlist in self.wishlists.all():
wishlist.remove_work(self)
for userprofile in self.contributors.all():
userprofile.works.remove(self)
for identifier in self.identifiers.all():
identifier.delete()
for comment in Comment.objects.for_model(self):
comment.delete()
for edition in self.editions.all():
for ebook in edition.ebooks.all():
ebook.delete()
for ebookfile in edition.ebook_files.all():
ebookfile.delete()
edition.delete()
for work_relation in self.works_related_to.all():
work_relation.delete()
for work_relation in self.works_related_from.all():
work_relation.delete()
super(Work, self).delete(*args, **kwargs) # Call the "real" save() method.
def id_for(self, type):
return id_for(self, type)
@property
def gtbg(self):
return id_for(self, 'gtbg')
@property
def doab(self):
return id_for(self, 'doab')
2016-07-30 06:36:01 +00:00
@property
def doi(self):
return self.id_for('doi')
@property
def http_id(self):
return self.id_for('http')
2016-07-30 06:36:01 +00:00
@property
def googlebooks_id(self):
try:
preferred_id = self.preferred_edition.googlebooks_id
# note that there should always be a preferred edition
except AttributeError:
# this work has no edition.
return ''
if preferred_id:
return preferred_id
try:
return self.identifiers.filter(type='goog')[0].value
except IndexError:
return ''
@property
def googlebooks_url(self):
2017-08-16 19:46:19 +00:00
return id_url('goog', self.googlebooks_id)
2016-07-30 06:36:01 +00:00
@property
def goodreads_id(self):
preferred_id = self.preferred_edition.goodreads_id
if preferred_id:
return preferred_id
try:
return self.identifiers.filter(type='gdrd')[0].value
except IndexError:
return ''
@property
def goodreads_url(self):
2017-08-16 19:46:19 +00:00
return id_url('gdrd', self.goodreads_id)
2016-07-30 06:36:01 +00:00
@property
def librarything_id(self):
return self.id_for('ltwk')
2016-07-30 06:36:01 +00:00
@property
def librarything_url(self):
2017-08-16 19:46:19 +00:00
return id_url('ltwk', self.librarything_id)
2016-07-30 06:36:01 +00:00
@property
def openlibrary_id(self):
return self.id_for('olwk')
2016-07-30 06:36:01 +00:00
@property
def openlibrary_url(self):
2017-08-16 19:46:19 +00:00
return id_url('olwk', self.openlibrary_id)
2016-07-30 06:36:01 +00:00
def cover_filetype(self):
if self.uses_google_cover():
return 'jpeg'
else:
# consider the path only and not the params, query, or fragment
url = urlparse(self.cover_image_small().lower()).path
if url.endswith('.png'):
return 'png'
elif url.endswith('.gif'):
return 'gif'
elif url.endswith('.jpg') or url.endswith('.jpeg'):
return 'jpeg'
else:
return 'image'
def work_ids(self):
return self.identifiers.filter(edition__isnull=True)
2016-07-30 06:36:01 +00:00
def uses_google_cover(self):
if self.preferred_edition and self.preferred_edition.cover_image:
return False
else:
return self.googlebooks_id
def cover_image_large(self):
if self.preferred_edition and self.preferred_edition.has_cover_image():
return self.preferred_edition.cover_image_large()
return "/static/images/generic_cover_larger.png"
def cover_image_small(self):
if self.preferred_edition and self.preferred_edition.has_cover_image():
return self.preferred_edition.cover_image_small()
return "/static/images/generic_cover_larger.png"
def cover_image_thumbnail(self):
try:
if self.preferred_edition and self.preferred_edition.has_cover_image():
return self.preferred_edition.cover_image_thumbnail()
except IndexError:
pass
return "/static/images/generic_cover_larger.png"
def authors(self):
# assumes that they come out in the same order they go in!
if self.preferred_edition and self.preferred_edition.authors.all().count() > 0:
return self.preferred_edition.authors.all()
for edition in self.editions.all():
if edition.authors.all().count() > 0:
return edition.authors.all()
return Author.objects.none()
def relators(self):
# assumes that they come out in the same order they go in!
if self.preferred_edition and self.preferred_edition.relators.all().count() > 0:
return self.preferred_edition.relators.all()
for edition in self.editions.all():
if edition.relators.all().count() > 0:
return edition.relators.all()
return Relator.objects.none()
def author(self):
# assumes that they come out in the same order they go in!
if self.relators().count() > 0:
return self.relators()[0].name
return ''
def authors_short(self):
# assumes that they come out in the same order they go in!
if self.relators().count() == 1:
return self.relators()[0].name
elif self.relators().count() == 2:
if self.relators()[0].relation == self.relators()[1].relation:
if self.relators()[0].relation.code == 'aut':
return "%s and %s" % (self.relators()[0].author.name, self.relators()[1].author.name)
else:
return "%s and %s, %ss" % (self.relators()[0].author.name, self.relators()[1].author.name, self.relators()[0].relation.name)
else:
return "%s (%s) and %s (%s)" % (self.relators()[0].author.name, self.relators()[0].relation.name, self.relators()[1].author.name, self.relators()[1].relation.name)
elif self.relators().count() > 2:
auths = self.relators()
2016-07-30 06:36:01 +00:00
if auths[0].relation.code == 'aut':
return "%s et al." % auths[0].author.name
else:
return "%s et al. (%ss)" % (auths[0].author.name, auths[0].relation.name)
return ''
def kindle_safe_title(self):
"""
Removes accents, keeps letters and numbers, replaces non-Latin characters with "#", and replaces punctuation with "_"
"""
safe = u''
nkfd_form = unicodedata.normalize('NFKD', self.title) #unaccent accented letters
for c in nkfd_form:
ccat = unicodedata.category(c)
#print ccat
if ccat.startswith('L') or ccat.startswith('N'): # only letters and numbers
if ord(c) > 127:
safe = safe + '#' #a non latin script letter or number
else:
safe = safe + c
elif not unicodedata.combining(c): #not accents (combining forms)
safe = safe + '_' #punctuation
return safe
def last_campaign(self):
# stash away the last campaign to prevent repeated lookups
if hasattr(self, '_last_campaign_'):
return self._last_campaign_
try:
self._last_campaign_ = self.campaigns.order_by('-created')[0]
except IndexError:
self._last_campaign_ = None
return self._last_campaign_
@property
def preferred_edition(self):
if self.selected_edition:
return self.selected_edition
if self.last_campaign():
if self.last_campaign().edition:
self.selected_edition = self.last_campaign().edition
self.save()
return self.last_campaign().edition
try:
self.selected_edition = self.editions.all().order_by('-cover_image', '-created')[0] # prefer editions with covers
self.save()
return self.selected_edition
except IndexError:
#should only happen if there are no editions for the work,
#which can happen when works are being merged
try:
return WasWork.objects.get(was=self.id).work.preferred_edition
except WasWork.DoesNotExist:
#should not happen
logger.warning('work {} has no edition'.format(self.id))
return None
def last_campaign_status(self):
campaign = self.last_campaign()
if campaign:
status = campaign.status
else:
if self.first_ebook():
status = "Available"
else:
status = "No campaign yet"
return status
def percent_unglued(self):
status = 0
campaign = self.last_campaign()
if campaign is not None:
if campaign.status == 'SUCCESSFUL':
status = 6
elif campaign.status == 'ACTIVE':
if campaign.target is not None:
target = float(campaign.target)
else:
#shouldn't happen, but did once because of a failed pdf conversion
return 0
if target <= 0:
status = 6
else:
if campaign.type == BUY2UNGLUE:
status = int(6 - 6*campaign.left/campaign.target)
else:
status = int(float(campaign.current_total)*6/target)
if status >= 6:
status = 6
return status
def percent_of_goal(self):
campaign = self.last_campaign()
return 0 if campaign is None else campaign.percent_of_goal()
def ebooks_all(self):
return self.ebooks(all=True)
def ebooks(self, all=False):
if all:
return Ebook.objects.filter(edition__work=self).order_by('-created')
else:
return Ebook.objects.filter(edition__work=self, active=True).order_by('-created')
def ebookfiles(self):
return EbookFile.objects.filter(edition__work=self).exclude(file='').order_by('-created')
def epubfiles(self):
# filter out non-epub because that's what booxtream accepts
return EbookFile.objects.filter(edition__work=self, format='epub').exclude(file='').order_by('-created')
def mobifiles(self):
return EbookFile.objects.filter(edition__work=self, format='mobi').exclude(file='').order_by('-created')
def pdffiles(self):
return EbookFile.objects.filter(edition__work=self, format='pdf').exclude(file='').order_by('-created')
def versions(self):
2016-09-23 18:53:54 +00:00
version_labels = []
for ebook in self.ebooks_all():
if ebook.version_label and not ebook.version_label in version_labels:
version_labels.append(ebook.version_label)
return version_labels
2016-07-30 06:36:01 +00:00
def formats(self):
fmts = []
for fmt in ['pdf', 'epub', 'mobi', 'html']:
for ebook in self.ebooks().filter(format=fmt):
fmts.append(fmt)
break
return fmts
def remove_old_ebooks(self):
# this method is triggered after an file upload or new ebook saved
2016-09-23 18:53:54 +00:00
old = Ebook.objects.filter(edition__work=self, active=True).order_by('-version_iter', '-created')
2016-09-23 18:53:54 +00:00
# keep highest version ebook for each format and version label
done_format_versions = []
2016-07-30 06:36:01 +00:00
for eb in old:
format_version = '{}_{}'.format(eb.format, eb.version_label)
if format_version in done_format_versions:
2016-07-30 06:36:01 +00:00
eb.deactivate()
else:
done_format_versions.append(format_version)
# check for failed uploads.
2016-07-30 06:36:01 +00:00
null_files = EbookFile.objects.filter(edition__work=self, file='')
for ebf in null_files:
ebf.file.delete()
ebf.delete()
@property
def download_count(self):
dlc = 0
for ebook in self.ebooks(all=True):
dlc += ebook.download_count
return dlc
def first_pdf(self):
return self.first_ebook('pdf')
def first_epub(self):
return self.first_ebook('epub')
def first_pdf_url(self):
try:
url = self.first_ebook('pdf').url
return url
except:
return None
def first_epub_url(self):
try:
url = self.first_ebook('epub').url
return url
except:
return None
def first_ebook(self, ebook_format=None):
if ebook_format:
for ebook in self.ebooks().filter(format=ebook_format):
return ebook
else:
for ebook in self.ebooks():
return ebook
def wished_by(self, excluding=None):
if excluding:
return User.objects.filter(wishlist__wishes__work=self).exclude(wishlist__wishes__created__range=excluding)
else:
return User.objects.filter(wishlist__wishes__work=self)
2016-07-30 06:36:01 +00:00
def update_num_wishes(self):
self.num_wishes = self.wishes.count()
self.save()
def priority(self):
if self.last_campaign():
return 5
freedom = 1 if self.is_free else 0
wishing = int(math.log(self.num_wishes)) + 1 if self.num_wishes else 0
return min(freedom + wishing, 5)
def first_oclc(self):
if self.preferred_edition is None:
return ''
preferred_id = self.preferred_edition.oclc
if preferred_id:
return preferred_id
try:
return self.identifiers.filter(type='oclc')[0].value
except IndexError:
return ''
def first_isbn_13(self):
if self.preferred_edition is None:
return ''
preferred_id = self.preferred_edition.isbn_13
if preferred_id:
return preferred_id
try:
return self.identifiers.filter(type='isbn')[0].value
except IndexError:
return ''
@property
def earliest_publication_date(self):
for edition in Edition.objects.filter(work=self, publication_date__isnull=False).order_by('publication_date'):
if edition.publication_date and len(edition.publication_date) >= 4:
return edition.publication_date
@property
def publication_date(self):
if self.publication_range:
return self.publication_range
for edition in Edition.objects.filter(work=self, publication_date__isnull=False).order_by('publication_date'):
if edition.publication_date:
try:
earliest_publication = edition.publication_date[:4]
except IndexError:
continue
latest_publication = None
for edition in Edition.objects.filter(work=self, publication_date__isnull=False).order_by('-publication_date'):
if edition.publication_date:
try:
latest_publication = edition.publication_date[:4]
except IndexError:
continue
break
if earliest_publication == latest_publication:
publication_range = earliest_publication
else:
publication_range = earliest_publication + "-" + latest_publication
self.publication_range = publication_range
self.save()
return publication_range
return ''
@property
def has_unglued_edition(self):
"""
allows us to distinguish successful campaigns with ebooks still in progress from successful campaigns with ebooks available
"""
if self.ebooks().filter(edition__unglued=True):
return True
return False
@property
def user_with_rights(self):
"""
return queryset of users (should be at most one) who act for rights holders with active claims to the work
"""
claims = self.claim.filter(status='active')
assert claims.count() < 2, "There is more than one active claim on %r" % self.title
try:
return claims[0].user
except:
return False
def get_absolute_url(self):
return reverse('work', args=[str(self.id)])
def publishers(self):
# returns a set of publishers associated with this Work
return Publisher.objects.filter(name__editions__work=self).distinct()
def create_offers(self):
for choice in OFFER_CHOICES:
if not self.offers.filter(license=choice[0]):
self.offers.create(license=choice[0], active=True, price=Decimal(10))
return self.offers.all()
def get_lib_license(self, user):
lib_user = (lib.user for lib in user.profile.libraries)
return self.get_user_license(lib_user)
def borrowable(self, user):
if user.is_anonymous():
return False
lib_license = self.get_lib_license(user)
if lib_license and lib_license.borrowable:
return True
return False
def lib_thanked(self, user):
if user.is_anonymous():
return False
lib_license = self.get_lib_license(user)
if lib_license and lib_license.thanked:
return True
return False
def in_library(self, user):
if user.is_anonymous():
return False
lib_license = self.get_lib_license(user)
if lib_license and lib_license.acqs.count():
return True
return False
@property
def lib_acqs(self):
return self.acqs.filter(license=LIBRARY)
@property
def test_acqs(self):
return self.acqs.filter(license=TESTING).order_by('-created')
class user_license:
acqs = Identifier.objects.none() # Identifier is just convenient.
def __init__(self, acqs):
self.acqs = acqs
@property
def is_active(self):
return self.acqs.filter(expires__isnull=True).count() > 0 or self.acqs.filter(expires__gt=now()).count() > 0
@property
def borrowed(self):
loans = self.acqs.filter(license=BORROWED, expires__gt=now())
if loans.count() == 0:
return None
else:
return loans[0]
@property
def purchased(self):
purchases = self.acqs.filter(license=INDIVIDUAL, expires__isnull=True)
if purchases.count() == 0:
return None
else:
return purchases[0]
@property
def lib_acqs(self):
return self.acqs.filter(license=LIBRARY)
@property
def next_acq(self):
""" This is the next available copy in the user's libraries"""
loans = self.acqs.filter(license=LIBRARY, refreshes__gt=now()).order_by('refreshes')
if loans.count() == 0:
return None
else:
return loans[0]
@property
def borrowable(self):
return self.acqs.filter(license=LIBRARY, refreshes__lt=now()).count() > 0
@property
def thanked(self):
return self.acqs.filter(license=THANKED).count() > 0
@property
def borrowable_acq(self):
for acq in self.acqs.filter(license=LIBRARY, refreshes__lt=now()):
return acq
@property
def is_duplicate(self):
# does user have two individual licenses?
pending = self.acqs.filter(license=INDIVIDUAL, expires__isnull=True, gifts__used__isnull=True).count()
return self.acqs.filter(license=INDIVIDUAL, expires__isnull=True).count() > pending
def get_user_license(self, user):
""" This is all the acqs, wrapped in user_license object for the work, user(s) """
if user is None:
return None
if hasattr(user, 'is_anonymous'):
if user.is_anonymous():
return None
return self.user_license(self.acqs.filter(user=user))
else:
# assume it's several users
return self.user_license(self.acqs.filter(user__in=user))
@property
def has_marc(self):
for record in NewMARC.objects.filter(edition__work=self):
return True
return False
### for compatibility with MARC output
def marc_records(self):
record_list = []
record_list.extend(NewMARC.objects.filter(edition__work=self))
for obj in record_list:
break
else:
for ebook in self.ebooks():
record_list.append(ebook.edition)
break
return record_list
2016-08-05 19:53:29 +00:00
class WorkRelation(models.Model):
to_work = models.ForeignKey('Work', related_name='works_related_to')
from_work= models.ForeignKey('Work', related_name='works_related_from')
relation = models.CharField(max_length=15, choices=TEXT_RELATION_CHOICES)
2016-07-30 06:36:01 +00:00
class Author(models.Model):
created = models.DateTimeField(auto_now_add=True)
name = models.CharField(max_length=255, unique=True)
editions = models.ManyToManyField("Edition", related_name="authors", through="Relator")
def __unicode__(self):
return self.name
@property
def last_name_first(self):
names = self.name.rsplit()
if len(names) == 0:
return ''
elif len(names) == 1:
return names[0]
elif len(names) == 2:
return names[1] + ", " + names[0]
else:
reversed_name = names[-1]+","
for name in names[0:-1]:
reversed_name += " "
reversed_name += name
return reversed_name
class Relation(models.Model):
code = models.CharField(max_length=3, blank=False, db_index=True, unique=True)
name = models.CharField(max_length=30, blank=True,)
class Relator(models.Model):
relation = models.ForeignKey('Relation', default=1) #first relation should have code='aut'
author = models.ForeignKey('Author')
edition = models.ForeignKey('Edition', related_name='relators')
class Meta:
db_table = 'core_author_editions'
@property
def name(self):
if self.relation.code == 'aut':
return self.author.name
else:
return "%s (%s)" % (self.author.name, self.relation.name)
def set(self, relation_code):
if self.relation.code != relation_code:
try:
self.relation = Relation.objects.get(code=relation_code)
self.save()
except Relation.DoesNotExist:
logger.warning("relation not found: code = %s" % relation_code)
class Subject(models.Model):
created = models.DateTimeField(auto_now_add=True)
name = models.CharField(max_length=200, unique=True)
works = models.ManyToManyField("Work", related_name="subjects")
is_visible = models.BooleanField(default=True)
authority = models.CharField(max_length=10, blank=False, default="")
class Meta:
ordering = ['name']
@classmethod
def set_by_name(cls, subject, work=None, authority=None):
''' use this method whenever you would be creating a new subject!'''
subject = subject.strip()
# make sure it's not a ; delineated list
subjects = subject.split(';')
for additional_subject in subjects[1:]:
cls.set_by_name(additional_subject, work, authority)
subject = subjects[0]
# make sure there's no heading
headingmatch = re.match(r'^!(.+):(.+)', subject)
if headingmatch:
subject = headingmatch.group(2).strip()
authority = headingmatch.group(1).strip()
elif subject.startswith('nyt:'):
subject = subject[4:].split('=')[0].replace('_', ' ').strip().capitalize()
subject = 'NYT Bestseller - {}'.format(subject)
authority = 'nyt'
elif subject.startswith('award:'):
subject = subject[6:].split('=')[0].replace('_', ' ').strip().capitalize()
subject = 'Award Winner - {}'.format(subject)
authority = 'award'
if valid_subject(subject):
(subject_obj, created) = cls.objects.get_or_create(name=subject)
if not subject_obj.authority and authority:
subject_obj.authority = authority
subject_obj.save()
subject_obj.works.add(work)
return subject_obj
else:
return None
2016-07-30 06:36:01 +00:00
def __unicode__(self):
return self.name
@property
def kw(self):
return 'kw.%s' % self.name
def free_works(self):
return self.works.filter(is_free=True)
class Edition(models.Model):
created = models.DateTimeField(auto_now_add=True)
title = models.CharField(max_length=1000)
2016-08-25 21:56:16 +00:00
publisher_name = models.ForeignKey("PublisherName", related_name="editions", null=True, blank=True)
publication_date = models.CharField(max_length=50, null=True, blank=True, db_index=True)
2016-07-30 06:36:01 +00:00
work = models.ForeignKey("Work", related_name="editions", null=True)
cover_image = models.URLField(null=True, blank=True)
unglued = models.BooleanField(default=False)
2016-08-25 21:56:16 +00:00
note = models.ForeignKey("EditionNote", null=True, blank=True)
2016-07-30 06:36:01 +00:00
def __unicode__(self):
if self.isbn_13:
return "%s (ISBN %s) %s" % (self.title, self.isbn_13, self.publisher)
if self.oclc:
return "%s (OCLC %s) %s" % (self.title, self.oclc, self.publisher)
if self.googlebooks_id:
return "%s (GOOG %s) %s" % (self.title, self.googlebooks_id, self.publisher)
else:
return "%s (GLUE %s) %s" % (self.title, self.id, self.publisher)
def cover_image_large(self):
#550 pixel high image
if self.cover_image:
2018-07-10 17:59:05 +00:00
try:
im = get_thumbnail(self.cover_image, 'x550', crop='noop', quality=95)
if im.exists():
return im.url
except IOError:
pass
2016-07-30 06:36:01 +00:00
elif self.googlebooks_id:
url = "https://encrypted.google.com/books?id=%s&printsec=frontcover&img=1&zoom=0" % self.googlebooks_id
2018-07-10 17:59:05 +00:00
try:
2016-07-30 06:36:01 +00:00
im = get_thumbnail(url, 'x550', crop='noop', quality=95)
2018-07-10 17:59:05 +00:00
if not im.exists() or im.storage.size(im.name) == 16392: # check for "image not available" image
url = "https://encrypted.google.com/books?id=%s&printsec=frontcover&img=1&zoom=1" % self.googlebooks_id
im = get_thumbnail(url, 'x550', crop='noop', quality=95)
if im.exists():
return im.url
except IOError:
pass
return ''
2016-07-30 06:36:01 +00:00
def cover_image_small(self):
#80 pixel high image
if self.cover_image:
2018-07-10 17:59:05 +00:00
try:
im = get_thumbnail(self.cover_image, 'x80', crop='noop', quality=95)
if im.exists():
return im.url
except IOError:
pass
2016-07-30 06:36:01 +00:00
if self.googlebooks_id:
return "https://encrypted.google.com/books?id=%s&printsec=frontcover&img=1&zoom=5" % self.googlebooks_id
2018-07-10 17:59:05 +00:00
return ''
2016-07-30 06:36:01 +00:00
def cover_image_thumbnail(self):
#128 pixel wide image
if self.cover_image:
2018-07-10 17:59:05 +00:00
try:
im = get_thumbnail(self.cover_image, '128', crop='noop', quality=95)
if im.exists():
return im.url
except IOError:
pass
2016-07-30 06:36:01 +00:00
if self.googlebooks_id:
return "https://encrypted.google.com/books?id=%s&printsec=frontcover&img=1&zoom=1" % self.googlebooks_id
else:
return ''
def has_cover_image(self):
if self.cover_image:
return self.cover_image
elif self.googlebooks_id:
return True
else:
return False
@property
def publisher(self):
if self.publisher_name:
return self.publisher_name.name
return ''
@property
def isbn_10(self):
return regluit.core.isbn.convert_13_to_10(self.isbn_13)
def id_for(self, type):
if type in WORK_IDENTIFIERS:
return self.work.id_for(type)
return id_for(self, type)
2016-07-30 06:36:01 +00:00
@property
def isbn_13(self):
return self.id_for('isbn')
@property
def googlebooks_id(self):
return self.id_for('goog')
@property
def librarything_id(self):
return self.id_for('thng')
@property
def oclc(self):
return self.id_for('oclc')
2016-08-08 20:27:12 +00:00
@property
2016-07-30 06:36:01 +00:00
def goodreads_id(self):
return self.id_for('gdrd')
@staticmethod
def get_by_isbn(isbn):
if len(isbn) == 10:
isbn = regluit.core.isbn.convert_10_to_13(isbn)
try:
return Identifier.objects.get(type='isbn', value=isbn).edition
except Identifier.DoesNotExist:
return None
def add_author(self, author_name, relation='aut'):
if author_name:
(author, created) = Author.objects.get_or_create(name=author_name)
(relation, created) = Relation.objects.get_or_create(code=relation)
(new_relator, created) = Relator.objects.get_or_create(author=author, edition=self)
if new_relator.relation != relation:
new_relator.relation = relation
new_relator.save()
def remove_author(self, author):
if author:
try:
relator = Relator.objects.get(author=author, edition=self)
relator.delete()
except Relator.DoesNotExist:
pass
def set_publisher(self, publisher_name):
if publisher_name and publisher_name != '':
try:
pub_name = PublisherName.objects.get(name=publisher_name)
if pub_name.publisher:
pub_name = pub_name.publisher.name
except PublisherName.DoesNotExist:
pub_name = PublisherName.objects.create(name=publisher_name)
pub_name.save()
self.publisher_name = pub_name
self.save()
#### following methods for compatibility with marc outputter
def downloads(self):
return self.ebooks.filter(active=True)
def download_via_url(self):
return settings.BASE_URL_SECURE + reverse('download', args=[self.work_id])
2016-07-30 06:36:01 +00:00
def authnames(self):
return [auth.last_name_first for auth in self.authors.all()]
@property
def license(self):
try:
return self.ebooks.all()[0].rights
except:
return None
@property
def funding_info(self):
if self.ebooks.all().count() == 0:
return ''
if self.unglued:
return 'The book is available as a free download thanks to the generous support of interested readers and organizations, who made donations using the crowd-funding website Unglue.it.'
else:
if self.ebooks.all()[0].rights in cc.LICENSE_LIST:
return 'The book is available as a free download thanks to a Creative Commons license.'
else:
return 'The book is available as a free download because it is in the Public Domain.'
@property
def description(self):
return self.work.description
2016-08-16 21:16:44 +00:00
class EditionNote(models.Model):
note = models.CharField(max_length=64, null=True, blank=True, unique=True)
def __unicode__(self):
return self.note
2016-07-30 06:36:01 +00:00
class Publisher(models.Model):
created = models.DateTimeField(auto_now_add=True)
name = models.ForeignKey('PublisherName', related_name='key_publisher')
url = models.URLField(max_length=1024, null=True, blank=True)
logo_url = models.URLField(max_length=1024, null=True, blank=True)
description = models.TextField(default='', null=True, blank=True)
def __unicode__(self):
return self.name.name
class PublisherName(models.Model):
name = models.CharField(max_length=255, blank=False, unique=True)
publisher = models.ForeignKey('Publisher', related_name='alternate_names', null=True)
def __unicode__(self):
return self.name
def save(self, *args, **kwargs):
super(PublisherName, self).save(*args, **kwargs) # Call the "real" save() method.
if self.publisher and self != self.publisher.name:
#this name is an alias, repoint all editions with this name to the other.
for edition in Edition.objects.filter(publisher_name=self):
edition.publisher_name = self.publisher.name
edition.save()
class WasWork(models.Model):
work = models.ForeignKey('Work')
was = models.IntegerField(unique=True)
moved = models.DateTimeField(auto_now_add=True)
user = models.ForeignKey(settings.AUTH_USER_MODEL, null=True)
def safe_get_work(work_id):
"""
use this rather than querying the db directly for a work by id
"""
try:
work = Work.objects.get(id=work_id)
except Work.DoesNotExist:
try:
work = WasWork.objects.get(was=work_id).work
except WasWork.DoesNotExist:
raise Work.DoesNotExist()
except ValueError:
#work_id is not a number
raise Work.DoesNotExist()
return work
def path_for_file(instance, filename):
return "ebf/{}.{}".format(uuid.uuid4().get_hex(), instance.format)
class EbookFile(models.Model):
file = models.FileField(upload_to=path_for_file)
2016-08-05 19:53:29 +00:00
format = models.CharField(max_length=25, choices=settings.FORMATS)
2016-07-30 06:36:01 +00:00
edition = models.ForeignKey('Edition', related_name='ebook_files')
created = models.DateTimeField(auto_now_add=True)
asking = models.BooleanField(default=False)
ebook = models.ForeignKey('Ebook', related_name='ebook_files', null=True)
source = models.URLField(null=True, blank=True)
mobied = models.IntegerField(default=0) #-1 indicates a failed conversion attempt
version = None
2016-07-30 06:36:01 +00:00
def check_file(self):
if self.format == 'epub':
return test_epub(self.file)
return None
@property
def active(self):
try:
return Ebook.objects.filter(url=self.file.url)[0].active
except:
return False
def make_mobi(self):
2016-12-31 03:26:16 +00:00
if not self.format == 'epub' or not settings.MOBIGEN_URL:
return False
if self.mobied < 0:
return False
2017-12-10 21:33:59 +00:00
try:
mobi_cf = ContentFile(mobi.convert_to_mobi(self.file.url))
except:
self.mobied = -1
self.save()
2017-12-10 21:33:59 +00:00
return False
new_mobi_ebf = EbookFile.objects.create(
edition=self.edition,
format='mobi',
asking=self.asking,
source=self.file.url
)
2018-04-09 20:29:04 +00:00
new_mobi_ebf.file.save(path_for_file(new_mobi_ebf, None), mobi_cf)
new_mobi_ebf.save()
if self.ebook:
new_ebook = Ebook.objects.create(
edition=self.edition,
format='mobi',
2017-12-14 19:19:20 +00:00
provider='Unglue.it',
url=new_mobi_ebf.file.url,
rights=self.ebook.rights,
2016-09-23 18:53:54 +00:00
version_label=self.ebook.version_label,
version_iter=self.ebook.version_iter,
)
new_mobi_ebf.ebook = new_ebook
new_mobi_ebf.save()
self.mobied = 1
self.save()
return True
2016-07-30 06:36:01 +00:00
send_to_kindle_limit = 7492232
class Ebook(models.Model):
url = models.URLField(max_length=1024) #change to unique?
created = models.DateTimeField(auto_now_add=True, db_index=True,)
2017-04-17 17:47:40 +00:00
format = models.CharField(max_length=25, choices=settings.FORMATS, blank=False)
2016-07-30 06:36:01 +00:00
provider = models.CharField(max_length=255)
download_count = models.IntegerField(default=0)
active = models.BooleanField(default=True)
filesize = models.PositiveIntegerField(null=True)
2016-09-23 18:53:54 +00:00
version_label = models.CharField(max_length=255, default="", blank=True)
version_iter = models.PositiveIntegerField(default=0)
2016-07-30 06:36:01 +00:00
# use 'PD-US', 'CC BY', 'CC BY-NC-SA', 'CC BY-NC-ND', 'CC BY-NC', 'CC BY-ND', 'CC BY-SA', 'CC0'
2016-08-05 19:53:29 +00:00
rights = models.CharField(max_length=255, null=True, choices=cc.CHOICES, db_index=True)
2016-07-30 06:36:01 +00:00
edition = models.ForeignKey('Edition', related_name='ebooks')
user = models.ForeignKey(settings.AUTH_USER_MODEL, null=True)
def kindle_sendable(self):
if not self.filesize or self.filesize < send_to_kindle_limit:
return True
else:
return False
def get_archive(self): # returns an open file
ebf = self.get_archive_ebf()
if not ebf:
return None
try:
ebf.file.open()
except ValueError:
logger.error(u'couldn\'t open EbookFile {}'.format(ebf.id))
return None
except IOError:
logger.error(u'EbookFile {} does not exist'.format(ebf.id))
return None
return ebf.file
def get_archive_ebf(self): # returns an ebf
if not self.ebook_files.filter(asking=False):
if not self.provider in good_providers:
2016-07-30 06:36:01 +00:00
return None
try:
r = urllib2.urlopen(self.url)
try:
self.filesize = int(r.info().getheaders("Content-Length")[0])
if self.save:
self.filesize = self.filesize if self.filesize < 2147483647 else 2147483647 # largest safe positive integer
self.save()
ebf = EbookFile.objects.create(
edition=self.edition,
ebook=self,
format=self.format,
source=self.url
)
ebf.file.save(path_for_file(ebf, None), ContentFile(r.read()))
ebf.file.close()
ebf.save()
return ebf
except IndexError:
# response has no Content-Length header probably a bad link
logging.error('Bad link error: {}'.format(self.url))
2016-07-30 06:36:01 +00:00
except IOError:
logger.error(u'could not open {}'.format(self.url))
else:
ebf = self.ebook_files.filter(asking=False).order_by('-created')[0]
2017-03-23 17:30:05 +00:00
if not self.filesize:
self.filesize = ebf.file.size
self.save()
return ebf
2016-07-30 06:36:01 +00:00
def set_provider(self):
self.provider = Ebook.infer_provider(self.url)
return self.provider
@property
2016-09-23 18:53:54 +00:00
def version(self):
if self.version_label is None:
return '.{}'.format(self.version_iter)
else:
return '().{}'.format(self.version_label, self.version_iter)
def set_version(self, version):
#set both version_label and version_iter with one string with format "version.iter"
version_pattern = r'(.*)\.(\d+)$'
match = re.match(version_pattern,version)
if match:
(self.version_label, self.version_iter) = (match.group(1), match.group(2))
else:
self.version_label = version
self.save()
2016-09-23 18:53:54 +00:00
def set_next_iter(self):
# set the version iter to the next unused iter for that version
for ebook in Ebook.objects.filter(
edition=self.edition,
version_label=self.version_label,
format=self.format,
provider=self.provider
).order_by('-version_iter'):
iter = ebook.version_iter
break
self.version_iter = iter + 1
self.save()
2016-07-30 06:36:01 +00:00
@property
def rights_badge(self):
if self.rights is None:
return cc.CCLicense.badge('PD-US')
return cc.CCLicense.badge(self.rights)
@staticmethod
def infer_provider(url):
if not url:
return None
# provider derived from url. returns provider value. remember to call save() afterward
if re.match(r'https?://books.google.com/', url):
provider = 'Google Books'
elif re.match(r'https?://www.gutenberg.org/', url):
provider = 'Project Gutenberg'
elif re.match(r'https?://(www\.|)archive.org/', url):
provider = 'Internet Archive'
elif url.startswith('http://hdl.handle.net/2027/') or url.startswith('http://babel.hathitrust.org/'):
provider = 'Hathitrust'
elif re.match(r'https?://\w\w\.wikisource\.org/', url):
provider = 'Wikisource'
elif re.match(r'https?://\w\w\.wikibooks\.org/', url):
provider = 'Wikibooks'
elif re.match(r'https://github\.com/[^/ ]+/[^/ ]+/raw/[^ ]+', url):
provider = 'Github'
2017-04-17 17:47:51 +00:00
elif re.match(r'https?://www\.oapen\.org/download', url):
provider = 'OAPEN Library'
2016-07-30 06:36:01 +00:00
else:
provider = None
return provider
def increment(self):
Ebook.objects.filter(id=self.id).update(download_count=F('download_count') +1)
@property
def download_url(self):
return settings.BASE_URL_SECURE + reverse('download_ebook', args=[self.id])
def is_direct(self):
return self.provider not in ('Google Books', 'Project Gutenberg')
def __unicode__(self):
return "%s (%s from %s)" % (self.edition.title, self.format, self.provider)
def deactivate(self):
self.active = False
self.save()
def activate(self):
self.active = True
self.save()
def set_free_flag(sender, instance, created, **kwargs):
if created:
if not instance.edition.work.is_free and instance.active:
instance.edition.work.is_free = True
instance.edition.work.save()
elif not instance.active and instance.edition.work.is_free and instance.edition.work.ebooks().count() == 0:
instance.edition.work.is_free = False
instance.edition.work.save()
elif instance.active and not instance.edition.work.is_free and instance.edition.work.ebooks().count() > 0:
instance.edition.work.is_free = True
instance.edition.work.save()
post_save.connect(set_free_flag, sender=Ebook)
def reset_free_flag(sender, instance, **kwargs):
# if the Work associated with the instance Ebook currenly has only 1 Ebook, then it's no longer a free Work
# once the instance Ebook is deleted.
if instance.edition.work.ebooks().count() == 1:
instance.edition.work.is_free = False
instance.edition.work.save()
pre_delete.connect(reset_free_flag, sender=Ebook)