import logging import math import re import urllib import urllib2 import uuid from decimal import Decimal import unicodedata from urlparse import urlparse from sorl.thumbnail import get_thumbnail from PIL import ImageFile from django.conf import settings from django.contrib.auth.models import User from django.contrib.contenttypes.fields import GenericRelation from django.core.files.base import ContentFile from django.urls import reverse from django.db import models from django.db.models import F from django.db.models.signals import post_save, pre_delete from django.utils.timezone import now from django_comments.models import Comment import regluit from regluit.marc.models import MARCRecord as NewMARC from questionnaire.models import Landing from regluit.core import mobi import regluit.core.cc as cc from regluit.core.epub import test_epub from regluit.core.links import id_url from regluit.core.validation import valid_subject from regluit.core.parameters import ( AGE_LEVEL_CHOICES, BORROWED, BUY2UNGLUE, ID_CHOICES_MAP, INDIVIDUAL, LIBRARY, OFFER_CHOICES, TESTING, TEXT_RELATION_CHOICES, THANKED, THANKS, WORK_IDENTIFIERS, ) # fix truncated file problems per https://stackoverflow.com/questions/12984426/python-pil-ioerror-image-file-truncated-with-big-images ImageFile.LOAD_TRUNCATED_IMAGES = True logger = logging.getLogger(__name__) good_providers = ('Internet Archive', 'Unglue.it', 'Github', 'OAPEN Library') def id_for(obj, type): if not obj.pk: return '' try: return obj.identifiers.filter(type=type)[0].value except IndexError: return '' class Identifier(models.Model): # olib, ltwk, goog, gdrd, thng, isbn, oclc, olwk, doab, gtbg, glue, doi type = models.CharField(max_length=4, null=False) value = models.CharField(max_length=250, null=False) work = models.ForeignKey("Work", on_delete=models.CASCADE, related_name="identifiers", null=False) edition = models.ForeignKey("Edition", on_delete=models.CASCADE, related_name="identifiers", null=True) class Meta: unique_together = ("type", "value") @staticmethod def set(type=None, value=None, edition=None, work=None): # if there's already an id of this type for this work and edition, change it # if not, create it. if the id exists and points to something else, change it. try: identifier = Identifier.objects.filter(type=type, value=value)[0] except IndexError: if type in WORK_IDENTIFIERS: identifier = Identifier.objects.create(type=type, value=value, work=work) else: identifier = Identifier.objects.create(type=type, value=value, work=work, edition=edition) if identifier.work_id != work.id: identifier.work = work identifier.save() if identifier.edition and edition: if identifier.edition_id != edition.id: identifier.edition = edition identifier.save() others = Identifier.objects.filter(type=type, work=work, edition=edition).exclude(value=value) if others.count() > 0: for other in others: other.delete() return identifier @staticmethod def get_or_add(type='goog', value=None, edition=None, work=None): try: return Identifier.objects.get(type=type, value=value) except Identifier.DoesNotExist: i = Identifier(type=type, value=value, edition=edition, work=work) i.save() return i def __unicode__(self): return u'{0}:{1}'.format(self.type, self.value) def label(self): return ID_CHOICES_MAP.get(self.type, self.type) def url(self): return id_url(self.type, self.value) class Work(models.Model): created = models.DateTimeField(auto_now_add=True, db_index=True,) title = models.CharField(max_length=1000) language = models.CharField(max_length=5, default="en", null=False, db_index=True,) openlibrary_lookup = models.DateTimeField(null=True, blank=True) num_wishes = models.IntegerField(default=0, db_index=True) description = models.TextField(default='', null=True, blank=True) selected_edition = models.ForeignKey("Edition", on_delete=models.CASCADE, related_name='selected_works', null=True) # repurposed earliest_publication to actually be publication range publication_range = models.CharField(max_length=50, null=True, blank=True) featured = models.DateTimeField(null=True, blank=True, db_index=True,) is_free = models.BooleanField(default=False) landings = GenericRelation(Landing, related_query_name='works') related = models.ManyToManyField('self', symmetrical=False, blank=True, through='WorkRelation', related_name='reverse_related') age_level = models.CharField(max_length=5, choices=AGE_LEVEL_CHOICES, default='', blank=True) class Meta: ordering = ['title'] def __unicode__(self): return self.title def __init__(self, *args, **kwargs): self._last_campaign = None super(Work, self).__init__(*args, **kwargs) def delete(self, cascade=True, *args, **kwargs): if cascade: if self.offers.all() or self.claim.all() or self.campaigns.all() or self.acqs.all() \ or self.holds.all() or self.landings.all(): return for wishlist in self.wishlists.all(): wishlist.remove_work(self) for userprofile in self.contributors.all(): userprofile.works.remove(self) for identifier in self.identifiers.all(): identifier.delete() for comment in Comment.objects.for_model(self): comment.delete() for edition in self.editions.all(): for ebook in edition.ebooks.all(): ebook.delete() for ebookfile in edition.ebook_files.all(): ebookfile.delete() edition.delete() for work_relation in self.works_related_to.all(): work_relation.delete() for work_relation in self.works_related_from.all(): work_relation.delete() super(Work, self).delete(*args, **kwargs) # Call the "real" save() method. def id_for(self, type): return id_for(self, type) @property def gtbg(self): return id_for(self, 'gtbg') @property def doab(self): return id_for(self, 'doab') @property def doi(self): return self.id_for('doi') @property def http_id(self): return self.id_for('http') @property def googlebooks_id(self): try: preferred_id = self.preferred_edition.googlebooks_id # note that there should always be a preferred edition except AttributeError: # this work has no edition. return '' if preferred_id: return preferred_id try: return self.identifiers.filter(type='goog')[0].value except IndexError: return '' @property def googlebooks_url(self): return id_url('goog', self.googlebooks_id) @property def goodreads_id(self): preferred_id = self.preferred_edition.goodreads_id if preferred_id: return preferred_id try: return self.identifiers.filter(type='gdrd')[0].value except IndexError: return '' @property def goodreads_url(self): return id_url('gdrd', self.goodreads_id) @property def librarything_id(self): return self.id_for('ltwk') @property def librarything_url(self): return id_url('ltwk', self.librarything_id) @property def openlibrary_id(self): return self.id_for('olwk') @property def openlibrary_url(self): return id_url('olwk', self.openlibrary_id) def cover_filetype(self): if self.uses_google_cover(): return 'jpeg' else: # consider the path only and not the params, query, or fragment url = urlparse(self.cover_image_small().lower()).path if url.endswith('.png'): return 'png' elif url.endswith('.gif'): return 'gif' elif url.endswith('.jpg') or url.endswith('.jpeg'): return 'jpeg' else: return 'image' def work_ids(self): return self.identifiers.filter(edition__isnull=True) def uses_google_cover(self): if self.preferred_edition and self.preferred_edition.cover_image: return False else: return self.googlebooks_id def cover_image_large(self): if self.preferred_edition and self.preferred_edition.has_cover_image(): return self.preferred_edition.cover_image_large() return "/static/images/generic_cover_larger.png" def cover_image_small(self): if self.preferred_edition and self.preferred_edition.has_cover_image(): return self.preferred_edition.cover_image_small() return "/static/images/generic_cover_larger.png" def cover_image_thumbnail(self): try: if self.preferred_edition and self.preferred_edition.has_cover_image(): return self.preferred_edition.cover_image_thumbnail() except IndexError: pass return "/static/images/generic_cover_larger.png" def authors(self): # assumes that they come out in the same order they go in! if self.preferred_edition and self.preferred_edition.authors.all().count() > 0: return self.preferred_edition.authors.all() for edition in self.editions.all(): if edition.authors.all().count() > 0: return edition.authors.all() return Author.objects.none() def relators(self): # assumes that they come out in the same order they go in! if self.preferred_edition and self.preferred_edition.relators.all().count() > 0: return self.preferred_edition.relators.all() for edition in self.editions.all(): if edition.relators.all().count() > 0: return edition.relators.all() return Relator.objects.none() def author(self): # assumes that they come out in the same order they go in! if self.relators().count() > 0: return self.relators()[0].name return '' def authors_short(self): # assumes that they come out in the same order they go in! if self.relators().count() == 1: return self.relators()[0].name elif self.relators().count() == 2: if self.relators()[0].relation == self.relators()[1].relation: if self.relators()[0].relation.code == 'aut': return "%s and %s" % (self.relators()[0].author.name, self.relators()[1].author.name) else: return "%s and %s, %ss" % (self.relators()[0].author.name, self.relators()[1].author.name, self.relators()[0].relation.name) else: return "%s (%s) and %s (%s)" % (self.relators()[0].author.name, self.relators()[0].relation.name, self.relators()[1].author.name, self.relators()[1].relation.name) elif self.relators().count() > 2: auths = self.relators() if auths[0].relation.code == 'aut': return "%s et al." % auths[0].author.name else: return "%s et al. (%ss)" % (auths[0].author.name, auths[0].relation.name) return '' def kindle_safe_title(self): """ Removes accents, keeps letters and numbers, replaces non-Latin characters with "#", and replaces punctuation with "_" """ safe = u'' nkfd_form = unicodedata.normalize('NFKD', self.title) #unaccent accented letters for c in nkfd_form: ccat = unicodedata.category(c) #print ccat if ccat.startswith('L') or ccat.startswith('N'): # only letters and numbers if ord(c) > 127: safe = safe + '#' #a non latin script letter or number else: safe = safe + c elif not unicodedata.combining(c): #not accents (combining forms) safe = safe + '_' #punctuation return safe def last_campaign(self): # stash away the last campaign to prevent repeated lookups if hasattr(self, '_last_campaign_'): return self._last_campaign_ try: self._last_campaign_ = self.campaigns.order_by('-created')[0] except IndexError: self._last_campaign_ = None return self._last_campaign_ @property def preferred_edition(self): if self.selected_edition: return self.selected_edition if self.last_campaign(): if self.last_campaign().edition: self.selected_edition = self.last_campaign().edition self.save() return self.last_campaign().edition try: self.selected_edition = self.editions.all().order_by('-cover_image', '-created')[0] # prefer editions with covers self.save() return self.selected_edition except IndexError: #should only happen if there are no editions for the work, #which can happen when works are being merged try: return WasWork.objects.get(was=self.id).work.preferred_edition except WasWork.DoesNotExist: #should not happen logger.warning('work {} has no edition'.format(self.id)) return None def last_campaign_status(self): campaign = self.last_campaign() if campaign: status = campaign.status else: if self.first_ebook(): status = "Available" else: status = "No campaign yet" return status def percent_unglued(self): status = 0 campaign = self.last_campaign() if campaign is not None: if campaign.status == 'SUCCESSFUL': status = 6 elif campaign.status == 'ACTIVE': if campaign.target is not None: target = float(campaign.target) else: #shouldn't happen, but did once because of a failed pdf conversion return 0 if target <= 0: status = 6 else: if campaign.type == BUY2UNGLUE: status = int(6 - 6*campaign.left/campaign.target) else: status = int(float(campaign.current_total)*6/target) if status >= 6: status = 6 return status def percent_of_goal(self): campaign = self.last_campaign() return 0 if campaign is None else campaign.percent_of_goal() def ebooks_all(self): return self.ebooks(all=True) def ebooks(self, all=False): if all: return Ebook.objects.filter(edition__work=self).order_by('-created') else: return Ebook.objects.filter(edition__work=self, active=True).order_by('-created') def ebookfiles(self): return EbookFile.objects.filter(edition__work=self).exclude(file='').order_by('-created') def epubfiles(self): # filter out non-epub because that's what booxtream accepts return EbookFile.objects.filter(edition__work=self, format='epub').exclude(file='').order_by('-created') def mobifiles(self): return EbookFile.objects.filter(edition__work=self, format='mobi').exclude(file='').order_by('-created') def pdffiles(self): return EbookFile.objects.filter(edition__work=self, format='pdf').exclude(file='').order_by('-created') def versions(self): version_labels = [] for ebook in self.ebooks_all(): if ebook.version_label and not ebook.version_label in version_labels: version_labels.append(ebook.version_label) return version_labels def formats(self): fmts = [] for fmt in ['pdf', 'epub', 'mobi', 'html']: for ebook in self.ebooks().filter(format=fmt): fmts.append(fmt) break return fmts def remove_old_ebooks(self): # this method is triggered after an file upload or new ebook saved old = Ebook.objects.filter(edition__work=self, active=True).order_by('-version_iter', '-created') # keep highest version ebook for each format and version label done_format_versions = [] for eb in old: format_version = '{}_{}'.format(eb.format, eb.version_label) if format_version in done_format_versions: eb.deactivate() else: done_format_versions.append(format_version) # check for failed uploads. null_files = EbookFile.objects.filter(edition__work=self, file='') for ebf in null_files: ebf.file.delete() ebf.delete() @property def download_count(self): dlc = 0 for ebook in self.ebooks(all=True): dlc += ebook.download_count return dlc def first_pdf(self): return self.first_ebook('pdf') def first_epub(self): return self.first_ebook('epub') def first_pdf_url(self): try: url = self.first_ebook('pdf').url return url except: return None def first_epub_url(self): try: url = self.first_ebook('epub').url return url except: return None def first_ebook(self, ebook_format=None): if ebook_format: for ebook in self.ebooks().filter(format=ebook_format): return ebook else: for ebook in self.ebooks(): return ebook def wished_by(self, excluding=None): if excluding: return User.objects.filter(wishlist__wishes__work=self).exclude(wishlist__wishes__created__range=excluding) else: return User.objects.filter(wishlist__wishes__work=self) def update_num_wishes(self): self.num_wishes = self.wishes.count() self.save() def priority(self): if self.last_campaign(): return 5 freedom = 1 if self.is_free else 0 wishing = int(math.log(self.num_wishes)) + 1 if self.num_wishes else 0 return min(freedom + wishing, 5) def first_oclc(self): if self.preferred_edition is None: return '' preferred_id = self.preferred_edition.oclc if preferred_id: return preferred_id try: return self.identifiers.filter(type='oclc')[0].value except IndexError: return '' def first_isbn_13(self): if self.preferred_edition is None: return '' preferred_id = self.preferred_edition.isbn_13 if preferred_id: return preferred_id try: return self.identifiers.filter(type='isbn')[0].value except IndexError: return '' @property def earliest_publication_date(self): for edition in Edition.objects.filter(work=self, publication_date__isnull=False).order_by('publication_date'): if edition.publication_date and len(edition.publication_date) >= 4: return edition.publication_date @property def publication_date(self): if self.publication_range: return self.publication_range for edition in Edition.objects.filter(work=self, publication_date__isnull=False).order_by('publication_date'): if edition.publication_date: try: earliest_publication = edition.publication_date[:4] except IndexError: continue latest_publication = None for edition in Edition.objects.filter(work=self, publication_date__isnull=False).order_by('-publication_date'): if edition.publication_date: try: latest_publication = edition.publication_date[:4] except IndexError: continue break if earliest_publication == latest_publication: publication_range = earliest_publication else: publication_range = earliest_publication + "-" + latest_publication self.publication_range = publication_range self.save() return publication_range return '' @property def has_unglued_edition(self): """ allows us to distinguish successful campaigns with ebooks still in progress from successful campaigns with ebooks available """ if self.ebooks().filter(edition__unglued=True): return True return False @property def user_with_rights(self): """ return queryset of users (should be at most one) who act for rights holders with active claims to the work """ claims = self.claim.filter(status='active') assert claims.count() < 2, "There is more than one active claim on %r" % self.title try: return claims[0].user except: return False def get_absolute_url(self): return reverse('work', args=[str(self.id)]) def publishers(self): # returns a set of publishers associated with this Work return Publisher.objects.filter(name__editions__work=self).distinct() def create_offers(self): for choice in OFFER_CHOICES: if not self.offers.filter(license=choice[0]): self.offers.create(license=choice[0], active=True, price=Decimal(10)) return self.offers.all() def get_lib_license(self, user): lib_user = (lib.user for lib in user.profile.libraries) return self.get_user_license(lib_user) def borrowable(self, user): if user.is_anonymous: return False lib_license = self.get_lib_license(user) if lib_license and lib_license.borrowable: return True return False def lib_thanked(self, user): if user.is_anonymous: return False lib_license = self.get_lib_license(user) if lib_license and lib_license.thanked: return True return False def in_library(self, user): if user.is_anonymous: return False lib_license = self.get_lib_license(user) if lib_license and lib_license.acqs.count(): return True return False @property def lib_acqs(self): return self.acqs.filter(license=LIBRARY) @property def test_acqs(self): return self.acqs.filter(license=TESTING).order_by('-created') class user_license: acqs = Identifier.objects.none() # Identifier is just convenient. def __init__(self, acqs): self.acqs = acqs @property def is_active(self): return self.acqs.filter(expires__isnull=True).count() > 0 or self.acqs.filter(expires__gt=now()).count() > 0 @property def borrowed(self): loans = self.acqs.filter(license=BORROWED, expires__gt=now()) if loans.count() == 0: return None else: return loans[0] @property def purchased(self): purchases = self.acqs.filter(license=INDIVIDUAL, expires__isnull=True) if purchases.count() == 0: return None else: return purchases[0] @property def lib_acqs(self): return self.acqs.filter(license=LIBRARY) @property def next_acq(self): """ This is the next available copy in the user's libraries""" loans = self.acqs.filter(license=LIBRARY, refreshes__gt=now()).order_by('refreshes') if loans.count() == 0: return None else: return loans[0] @property def borrowable(self): return self.acqs.filter(license=LIBRARY, refreshes__lt=now()).count() > 0 @property def thanked(self): return self.acqs.filter(license=THANKED).count() > 0 @property def borrowable_acq(self): for acq in self.acqs.filter(license=LIBRARY, refreshes__lt=now()): return acq @property def is_duplicate(self): # does user have two individual licenses? pending = self.acqs.filter(license=INDIVIDUAL, expires__isnull=True, gifts__used__isnull=True).count() return self.acqs.filter(license=INDIVIDUAL, expires__isnull=True).count() > pending def get_user_license(self, user): """ This is all the acqs, wrapped in user_license object for the work, user(s) """ if user is None: return None if hasattr(user, 'is_anonymous'): if user.is_anonymous: return None return self.user_license(self.acqs.filter(user=user)) else: # assume it's several users return self.user_license(self.acqs.filter(user__in=user)) @property def has_marc(self): for record in NewMARC.objects.filter(edition__work=self): return True return False ### for compatibility with MARC output def marc_records(self): record_list = [] record_list.extend(NewMARC.objects.filter(edition__work=self)) for obj in record_list: break else: for ebook in self.ebooks(): record_list.append(ebook.edition) break return record_list class WorkRelation(models.Model): to_work = models.ForeignKey('Work', on_delete=models.CASCADE, related_name='works_related_to') from_work= models.ForeignKey('Work', on_delete=models.CASCADE, related_name='works_related_from') relation = models.CharField(max_length=15, choices=TEXT_RELATION_CHOICES) class Author(models.Model): created = models.DateTimeField(auto_now_add=True) name = models.CharField(max_length=255, unique=True) editions = models.ManyToManyField("Edition", related_name="authors", through="Relator") def __unicode__(self): return self.name @property def last_name_first(self): names = self.name.rsplit() if len(names) == 0: return '' elif len(names) == 1: return names[0] elif len(names) == 2: return names[1] + ", " + names[0] else: reversed_name = names[-1]+"," for name in names[0:-1]: reversed_name += " " reversed_name += name return reversed_name class Relation(models.Model): code = models.CharField(max_length=3, blank=False, db_index=True, unique=True) name = models.CharField(max_length=30, blank=True,) class Relator(models.Model): relation = models.ForeignKey('Relation', on_delete=models.CASCADE, default=1) #first relation should have code='aut' author = models.ForeignKey('Author', on_delete=models.CASCADE) edition = models.ForeignKey('Edition', on_delete=models.CASCADE, related_name='relators') class Meta: db_table = 'core_author_editions' @property def name(self): if self.relation.code == 'aut': return self.author.name else: return "%s (%s)" % (self.author.name, self.relation.name) def set(self, relation_code): if self.relation.code != relation_code: try: self.relation = Relation.objects.get(code=relation_code) self.save() except Relation.DoesNotExist: logger.warning("relation not found: code = %s" % relation_code) class Subject(models.Model): created = models.DateTimeField(auto_now_add=True) name = models.CharField(max_length=200, unique=True) works = models.ManyToManyField("Work", related_name="subjects") is_visible = models.BooleanField(default=True) authority = models.CharField(max_length=10, blank=False, default="") class Meta: ordering = ['name'] @classmethod def set_by_name(cls, subject, work=None, authority=None): ''' use this method whenever you would be creating a new subject!''' subject = subject.strip() # make sure it's not a ; delineated list subjects = subject.split(';') for additional_subject in subjects[1:]: cls.set_by_name(additional_subject, work, authority) subject = subjects[0] # make sure there's no heading headingmatch = re.match(r'^!(.+):(.+)', subject) if headingmatch: subject = headingmatch.group(2).strip() authority = headingmatch.group(1).strip() elif subject.startswith('nyt:'): subject = subject[4:].split('=')[0].replace('_', ' ').strip().capitalize() subject = 'NYT Bestseller - {}'.format(subject) authority = 'nyt' elif subject.startswith('award:'): subject = subject[6:].split('=')[0].replace('_', ' ').strip().capitalize() subject = 'Award Winner - {}'.format(subject) authority = 'award' if valid_subject(subject): (subject_obj, created) = cls.objects.get_or_create(name=subject) if not subject_obj.authority and authority: subject_obj.authority = authority subject_obj.save() subject_obj.works.add(work) return subject_obj else: return None def __unicode__(self): return self.name @property def kw(self): return 'kw.%s' % self.name def free_works(self): return self.works.filter(is_free=True) class Edition(models.Model): created = models.DateTimeField(auto_now_add=True) title = models.CharField(max_length=1000) publisher_name = models.ForeignKey("PublisherName", on_delete=models.CASCADE, related_name="editions", null=True, blank=True) publication_date = models.CharField(max_length=50, null=True, blank=True, db_index=True) work = models.ForeignKey("Work", on_delete=models.CASCADE, related_name="editions", null=True) cover_image = models.URLField(null=True, blank=True) unglued = models.BooleanField(default=False) note = models.ForeignKey("EditionNote", on_delete=models.CASCADE, null=True, blank=True) def __unicode__(self): if self.isbn_13: return "%s (ISBN %s) %s" % (self.title, self.isbn_13, self.publisher) if self.oclc: return "%s (OCLC %s) %s" % (self.title, self.oclc, self.publisher) if self.googlebooks_id: return "%s (GOOG %s) %s" % (self.title, self.googlebooks_id, self.publisher) else: return "%s (GLUE %s) %s" % (self.title, self.id, self.publisher) def cover_image_large(self): #550 pixel high image if self.cover_image: try: im = get_thumbnail(self.cover_image, 'x550', crop='noop', quality=95) if im.exists(): return im.url except IOError: pass elif self.googlebooks_id: url = "https://encrypted.google.com/books?id=%s&printsec=frontcover&img=1&zoom=0" % self.googlebooks_id try: im = get_thumbnail(url, 'x550', crop='noop', quality=95) if not im.exists() or im.storage.size(im.name) == 16392: # check for "image not available" image url = "https://encrypted.google.com/books?id=%s&printsec=frontcover&img=1&zoom=1" % self.googlebooks_id im = get_thumbnail(url, 'x550', crop='noop', quality=95) if im.exists(): return im.url except IOError: pass return '' def cover_image_small(self): #80 pixel high image if self.cover_image: try: im = get_thumbnail(self.cover_image, 'x80', crop='noop', quality=95) if im.exists(): return im.url except IOError: pass if self.googlebooks_id: return "https://encrypted.google.com/books?id=%s&printsec=frontcover&img=1&zoom=5" % self.googlebooks_id return '' def cover_image_thumbnail(self): #128 pixel wide image if self.cover_image: try: im = get_thumbnail(self.cover_image, '128', crop='noop', quality=95) if im.exists(): return im.url except IOError: pass if self.googlebooks_id: return "https://encrypted.google.com/books?id=%s&printsec=frontcover&img=1&zoom=1" % self.googlebooks_id else: return '' def has_cover_image(self): if self.cover_image: return self.cover_image elif self.googlebooks_id: return True else: return False @property def publisher(self): if self.publisher_name: return self.publisher_name.name return '' @property def isbn_10(self): return regluit.core.isbn.convert_13_to_10(self.isbn_13) def id_for(self, type): if type in WORK_IDENTIFIERS: return self.work.id_for(type) return id_for(self, type) @property def isbn_13(self): return self.id_for('isbn') @property def googlebooks_id(self): return self.id_for('goog') @property def librarything_id(self): return self.id_for('thng') @property def oclc(self): return self.id_for('oclc') @property def goodreads_id(self): return self.id_for('gdrd') @staticmethod def get_by_isbn(isbn): if len(isbn) == 10: isbn = regluit.core.isbn.convert_10_to_13(isbn) try: return Identifier.objects.get(type='isbn', value=isbn).edition except Identifier.DoesNotExist: return None def add_author(self, author_name, relation='aut'): if author_name: (author, created) = Author.objects.get_or_create(name=author_name) (relation, created) = Relation.objects.get_or_create(code=relation) (new_relator, created) = Relator.objects.get_or_create(author=author, edition=self) if new_relator.relation != relation: new_relator.relation = relation new_relator.save() def remove_author(self, author): if author: try: relator = Relator.objects.get(author=author, edition=self) relator.delete() except Relator.DoesNotExist: pass def set_publisher(self, publisher_name): if publisher_name and publisher_name != '': try: pub_name = PublisherName.objects.get(name=publisher_name) if pub_name.publisher: pub_name = pub_name.publisher.name except PublisherName.DoesNotExist: pub_name = PublisherName.objects.create(name=publisher_name) pub_name.save() self.publisher_name = pub_name self.save() #### following methods for compatibility with marc outputter def downloads(self): return self.ebooks.filter(active=True) def download_via_url(self): return settings.BASE_URL_SECURE + reverse('download', args=[self.work_id]) def authnames(self): return [auth.last_name_first for auth in self.authors.all()] @property def license(self): try: return self.ebooks.all()[0].rights except: return None @property def funding_info(self): if self.ebooks.all().count() == 0: return '' if self.unglued: return 'The book is available as a free download thanks to the generous support of interested readers and organizations, who made donations using the crowd-funding website Unglue.it.' else: if self.ebooks.all()[0].rights in cc.LICENSE_LIST: return 'The book is available as a free download thanks to a Creative Commons license.' else: return 'The book is available as a free download because it is in the Public Domain.' @property def description(self): return self.work.description class EditionNote(models.Model): note = models.CharField(max_length=64, null=True, blank=True, unique=True) def __unicode__(self): return self.note class Publisher(models.Model): created = models.DateTimeField(auto_now_add=True) name = models.ForeignKey('PublisherName', on_delete=models.CASCADE, related_name='key_publisher') url = models.URLField(max_length=1024, null=True, blank=True) logo_url = models.URLField(max_length=1024, null=True, blank=True) description = models.TextField(default='', null=True, blank=True) def __unicode__(self): return self.name.name class PublisherName(models.Model): name = models.CharField(max_length=255, blank=False, unique=True) publisher = models.ForeignKey('Publisher', on_delete=models.CASCADE, related_name='alternate_names', null=True) def __unicode__(self): return self.name def save(self, *args, **kwargs): super(PublisherName, self).save(*args, **kwargs) # Call the "real" save() method. if self.publisher and self != self.publisher.name: #this name is an alias, repoint all editions with this name to the other. for edition in Edition.objects.filter(publisher_name=self): edition.publisher_name = self.publisher.name edition.save() class WasWork(models.Model): work = models.ForeignKey('Work', on_delete=models.CASCADE) was = models.IntegerField(unique=True) moved = models.DateTimeField(auto_now_add=True) user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, null=True) def safe_get_work(work_id): """ use this rather than querying the db directly for a work by id """ try: work = Work.objects.get(id=work_id) except Work.DoesNotExist: try: work = WasWork.objects.get(was=work_id).work except WasWork.DoesNotExist: raise Work.DoesNotExist() except ValueError: #work_id is not a number raise Work.DoesNotExist() return work def path_for_file(instance, filename): return "ebf/{}.{}".format(uuid.uuid4().get_hex(), instance.format) class EbookFile(models.Model): file = models.FileField(upload_to=path_for_file) format = models.CharField(max_length=25, choices=settings.FORMATS) edition = models.ForeignKey('Edition', on_delete=models.CASCADE, related_name='ebook_files') created = models.DateTimeField(auto_now_add=True) asking = models.BooleanField(default=False) ebook = models.ForeignKey('Ebook', on_delete=models.CASCADE, related_name='ebook_files', null=True) source = models.URLField(null=True, blank=True) mobied = models.IntegerField(default=0) #-1 indicates a failed conversion attempt version = None def check_file(self): if self.format == 'epub': return test_epub(self.file) return None @property def active(self): try: return Ebook.objects.filter(url=self.file.url)[0].active except: return False def make_mobi(self): if not self.format == 'epub' or not settings.MOBIGEN_URL: return False if self.mobied < 0: return False try: mobi_cf = ContentFile(mobi.convert_to_mobi(self.file.url)) except: self.mobied = -1 self.save() return False new_mobi_ebf = EbookFile.objects.create( edition=self.edition, format='mobi', asking=self.asking, source=self.file.url ) new_mobi_ebf.file.save(path_for_file(new_mobi_ebf, None), mobi_cf) new_mobi_ebf.save() if self.ebook: new_ebook = Ebook.objects.create( edition=self.edition, format='mobi', provider='Unglue.it', url=new_mobi_ebf.file.url, rights=self.ebook.rights, version_label=self.ebook.version_label, version_iter=self.ebook.version_iter, ) new_mobi_ebf.ebook = new_ebook new_mobi_ebf.save() self.mobied = 1 self.save() return True send_to_kindle_limit = 7492232 class Ebook(models.Model): url = models.URLField(max_length=1024) #change to unique? created = models.DateTimeField(auto_now_add=True, db_index=True,) format = models.CharField(max_length=25, choices=settings.FORMATS, blank=False) provider = models.CharField(max_length=255) download_count = models.IntegerField(default=0) active = models.BooleanField(default=True) filesize = models.PositiveIntegerField(null=True) version_label = models.CharField(max_length=255, default="", blank=True) version_iter = models.PositiveIntegerField(default=0) # use 'PD-US', 'CC BY', 'CC BY-NC-SA', 'CC BY-NC-ND', 'CC BY-NC', 'CC BY-ND', 'CC BY-SA', 'CC0' rights = models.CharField(max_length=255, null=True, choices=cc.CHOICES, db_index=True) edition = models.ForeignKey('Edition', on_delete=models.CASCADE, related_name='ebooks') user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, null=True) def kindle_sendable(self): if not self.filesize or self.filesize < send_to_kindle_limit: return True else: return False def get_archive(self): # returns an open file ebf = self.get_archive_ebf() if not ebf: return None try: ebf.file.open() except ValueError: logger.error(u'couldn\'t open EbookFile {}'.format(ebf.id)) return None except IOError: logger.error(u'EbookFile {} does not exist'.format(ebf.id)) return None return ebf.file def get_archive_ebf(self): # returns an ebf if not self.ebook_files.filter(asking=False): if not self.provider in good_providers: return None try: r = urllib2.urlopen(self.url) try: self.filesize = int(r.info().getheaders("Content-Length")[0]) if self.save: self.filesize = self.filesize if self.filesize < 2147483647 else 2147483647 # largest safe positive integer self.save() ebf = EbookFile.objects.create( edition=self.edition, ebook=self, format=self.format, source=self.url ) ebf.file.save(path_for_file(ebf, None), ContentFile(r.read())) ebf.file.close() ebf.save() return ebf except IndexError: # response has no Content-Length header probably a bad link logging.error('Bad link error: {}'.format(self.url)) except IOError: logger.error(u'could not open {}'.format(self.url)) else: ebf = self.ebook_files.filter(asking=False).order_by('-created')[0] if not self.filesize: self.filesize = ebf.file.size self.save() return ebf def set_provider(self): self.provider = Ebook.infer_provider(self.url) return self.provider @property def version(self): if self.version_label is None: return '.{}'.format(self.version_iter) else: return '().{}'.format(self.version_label, self.version_iter) def set_version(self, version): #set both version_label and version_iter with one string with format "version.iter" version_pattern = r'(.*)\.(\d+)$' match = re.match(version_pattern,version) if match: (self.version_label, self.version_iter) = (match.group(1), match.group(2)) else: self.version_label = version self.save() def set_next_iter(self): # set the version iter to the next unused iter for that version for ebook in Ebook.objects.filter( edition=self.edition, version_label=self.version_label, format=self.format, provider=self.provider ).order_by('-version_iter'): iter = ebook.version_iter break self.version_iter = iter + 1 self.save() @property def rights_badge(self): if self.rights is None: return cc.CCLicense.badge('PD-US') return cc.CCLicense.badge(self.rights) @staticmethod def infer_provider(url): if not url: return None # provider derived from url. returns provider value. remember to call save() afterward if re.match(r'https?://books.google.com/', url): provider = 'Google Books' elif re.match(r'https?://www.gutenberg.org/', url): provider = 'Project Gutenberg' elif re.match(r'https?://(www\.|)archive.org/', url): provider = 'Internet Archive' elif url.startswith('http://hdl.handle.net/2027/') or url.startswith('http://babel.hathitrust.org/'): provider = 'Hathitrust' elif re.match(r'https?://\w\w\.wikisource\.org/', url): provider = 'Wikisource' elif re.match(r'https?://\w\w\.wikibooks\.org/', url): provider = 'Wikibooks' elif re.match(r'https://github\.com/[^/ ]+/[^/ ]+/raw/[^ ]+', url): provider = 'Github' elif re.match(r'https?://www\.oapen\.org/download', url): provider = 'OAPEN Library' else: provider = None return provider def increment(self): Ebook.objects.filter(id=self.id).update(download_count=F('download_count') +1) @property def download_url(self): return settings.BASE_URL_SECURE + reverse('download_ebook', args=[self.id]) def is_direct(self): return self.provider not in ('Google Books', 'Project Gutenberg') def __unicode__(self): return "%s (%s from %s)" % (self.edition.title, self.format, self.provider) def deactivate(self): self.active = False self.save() def activate(self): self.active = True self.save() def set_free_flag(sender, instance, created, **kwargs): if created: if not instance.edition.work.is_free and instance.active: instance.edition.work.is_free = True instance.edition.work.save() elif not instance.active and instance.edition.work.is_free and instance.edition.work.ebooks().count() == 0: instance.edition.work.is_free = False instance.edition.work.save() elif instance.active and not instance.edition.work.is_free and instance.edition.work.ebooks().count() > 0: instance.edition.work.is_free = True instance.edition.work.save() post_save.connect(set_free_flag, sender=Ebook) def reset_free_flag(sender, instance, **kwargs): # if the Work associated with the instance Ebook currenly has only 1 Ebook, then it's no longer a free Work # once the instance Ebook is deleted. if instance.edition.work.ebooks().count() == 1: instance.edition.work.is_free = False instance.edition.work.save() pre_delete.connect(reset_free_flag, sender=Ebook)