Merge pull request #585 from Gluejar/push

FTP Push seconds on the onix file is a good idea
pull/1/head
eshellman 2016-05-30 10:30:34 -04:00
commit f99b621c19
19 changed files with 452 additions and 27 deletions

View File

@ -315,4 +315,9 @@ admin_site.register(QuestionSet, QuestionSetAdmin)
admin_site.register(QSubject, QSubjectAdmin)
admin_site.register(RunInfo, RunInfoAdmin)
admin_site.register(RunInfoHistory, RunInfoHistoryAdmin)
admin_site.register(Answer, AnswerAdmin)
admin_site.register(Answer, AnswerAdmin)
from regluit.distro.admin import Target
from regluit.distro.admin import TargetAdmin
admin_site.register(Target, TargetAdmin)

View File

@ -61,11 +61,13 @@ def product(edition, facet=None):
ident_node.append(text_node("ProductIDType", "01" )) #proprietary
ident_node.append(text_node("IDTypeName", "unglue.it edition id" )) #proprietary
ident_node.append(text_node("IDValue", unicode(edition.id) ))
if edition.isbn_13:
# wrong isbn better than no isbn
isbn = edition.isbn_13 if edition.isbn_13 else edition.work.first_isbn_13()
if isbn:
ident_node = etree.SubElement(product_node, "ProductIdentifier")
ident_node.append(text_node("ProductIDType", "03" )) #proprietary
ident_node.append(text_node("IDValue", edition.isbn_13 ))
ident_node.append(text_node("IDValue", isbn ))
# Descriptive Detail Block
descriptive_node = etree.SubElement(product_node, "DescriptiveDetail")
@ -163,10 +165,13 @@ def product(edition, facet=None):
pub_node.append(text_node("PublishingRole", '01')) #publisher
pub_node.append(text_node("PublisherName", edition.publisher_name.name))
pubdetail_node.append(text_node("PublishingStatus", '00')) #unspecified
if edition.publication_date:
#consumers really want a pub date
publication_date = edition.publication_date if edition.publication_date else edition.work.earliest_publication_date
if publication_date:
pubdate_node = etree.SubElement(pubdetail_node, "PublishingDate")
pubdate_node.append(text_node("PublishingDateRole", '01')) #nominal pub date
pubdate_node.append(text_node("Date", edition.publication_date.replace('-','')))
pubdate_node.append(text_node("Date", publication_date.replace('-','')))
# Product Supply Block
supply_node = etree.SubElement(product_node,"ProductSupply")

View File

@ -1001,8 +1001,8 @@ class Campaign(models.Model):
self.work.make_ebooks_from_ebfs(add_ask=True)
def make_unglued_ebf(self, format, watermarked):
ebf=EbookFile.objects.create(edition=self.work.preferred_edition, format=format)
r=urllib2.urlopen(watermarked.download_link(format))
ebf=EbookFile.objects.create(edition=self.work.preferred_edition, format=format)
ebf.file.save(path_for_file(ebf,None),ContentFile(r.read()))
ebf.file.close()
ebf.save()
@ -1197,6 +1197,11 @@ class Work(models.Model):
else:
return self.googlebooks_id
def cover_image_large(self):
if self.preferred_edition and self.preferred_edition.has_cover_image():
return self.preferred_edition.cover_image_large()
return "/static/images/generic_cover_larger.png"
def cover_image_small(self):
if self.preferred_edition and self.preferred_edition.has_cover_image():
return self.preferred_edition.cover_image_small()
@ -1360,6 +1365,14 @@ class Work(models.Model):
def pdffiles(self):
return EbookFile.objects.filter(edition__work=self, format='pdf').exclude(file='').order_by('-created')
def formats(self):
fmts=[]
for fmt in ['pdf', 'epub', 'mobi', 'html']:
for ebook in self.ebooks().filter(format=fmt):
fmts.append(fmt)
break
return fmts
def make_ebooks_from_ebfs(self, add_ask=True):
# either the ebf has been uploaded or a created (perhaps an ask was added or mobi generated)
if self.last_campaign().type != THANKS: # just to make sure that ebf's can be unglued by mistake
@ -1477,7 +1490,13 @@ class Work(models.Model):
return self.identifiers.filter(type='isbn')[0].value
except IndexError:
return ''
@property
def earliest_publication_date(self):
for edition in Edition.objects.filter(work=self, publication_date__isnull=False).order_by('publication_date'):
if edition.publication_date and len(edition.publication_date)>=4:
return edition.publication_date
@property
def publication_date(self):
if self.publication_range:
@ -1765,12 +1784,32 @@ class Edition(models.Model):
else:
return "%s (GLUE %s) %s" % (self.title, self.id, self.publisher)
def cover_image_large(self):
#550 pixel high image
if self.cover_image:
im = get_thumbnail(self.cover_image, 'x550', crop='noop', quality=95)
if im.exists():
return im.url
elif self.googlebooks_id:
url = "https://encrypted.google.com/books?id=%s&printsec=frontcover&img=1&zoom=0" % self.googlebooks_id
im = get_thumbnail(url, 'x550', crop='noop', quality=95)
if not im.exists() or im.storage.size(im.name)==16392: # check for "image not available" image
url = "https://encrypted.google.com/books?id=%s&printsec=frontcover&img=1&zoom=1" % self.googlebooks_id
im = get_thumbnail(url, 'x550', crop='noop', quality=95)
if im.exists():
return im.url
else:
return ''
else:
return ''
def cover_image_small(self):
#80 pixel high image
if self.cover_image:
im = get_thumbnail(self.cover_image, 'x80', crop='noop', quality=95)
return im.url
elif self.googlebooks_id:
if im.exists():
return im.url
if self.googlebooks_id:
return "https://encrypted.google.com/books?id=%s&printsec=frontcover&img=1&zoom=5" % self.googlebooks_id
else:
return ''
@ -1779,8 +1818,9 @@ class Edition(models.Model):
#128 pixel wide image
if self.cover_image:
im = get_thumbnail(self.cover_image, '128', crop='noop', quality=95)
return im.url
elif self.googlebooks_id:
if im.exists():
return im.url
if self.googlebooks_id:
return "https://encrypted.google.com/books?id=%s&printsec=frontcover&img=1&zoom=1" % self.googlebooks_id
else:
return ''
@ -2005,7 +2045,46 @@ class Ebook(models.Model):
return True
else:
return False
def get_archive(self): # returns an archived file
if self.edition.ebook_files.filter(format=self.format).count()==0:
if self.provider is not 'Unglue.it':
try:
r=urllib2.urlopen(self.url)
try:
self.filesize = int(r.info().getheaders("Content-Length")[0])
if self.save:
self.filesize = self.filesize if self.filesize < 2147483647 else 2147483647 # largest safe positive integer
self.save()
ebf=EbookFile.objects.create(edition=self.edition, format=self.format)
ebf.file.save(path_for_file(ebf,None),ContentFile(r.read()))
ebf.file.close()
ebf.save()
ebf.file.open()
return ebf.file
except IndexError:
# response has no Content-Length header probably a bad link
logging.error( 'Bad link error: {}'.format(ebook.url) )
except IOError:
logger.error(u'could not open {}'.format(self.url) )
else:
# this shouldn't happen, except in testing perhaps
logger.error(u'couldn\'t find ebookfile for {}'.format(self.url) )
# try the url instead
f = urllib.urlopen(self.url)
return f
else:
ebf = self.edition.ebook_files.filter(format=self.format).order_by('-created')[0]
try:
ebf.file.open()
except ValueError:
logger.error(u'couldn\'t open EbookFile {}'.format(ebf.id) )
return None
except IOError:
logger.error(u'EbookFile {} does not exist'.format(ebf.id) )
return None
return ebf.file
def set_provider(self):
self.provider=Ebook.infer_provider(self.url)
return self.provider

View File

@ -89,11 +89,18 @@ class BookLoaderTests(TestCase):
huck = models.Work.objects.get(id=huck_id)
self.assertTrue( huck.ebooks().count()>1)
def test_add_by_yaml(self):
space_id = bookloader.load_from_yaml('https://github.com/gitenberg-dev/metadata/raw/master/samples/pandata.yaml')
huck_id = bookloader.load_from_yaml('https://github.com/GITenberg/Adventures-of-Huckleberry-Finn_76/raw/master/metadata.yaml')
space = models.Work.objects.get(id=space_id)
huck = models.Work.objects.get(id=huck_id)
#test ebook archiving
num_ebf= EbookFile.objects.all().count()
for ebook in huck.ebooks().all():
f = ebook.get_archive()
self.assertTrue(EbookFile.objects.all().count()>num_ebf)
def test_valid_subject(self):
self.assertTrue(bookloader.valid_subject('A, valid, suj\xc3t'))
@ -457,7 +464,10 @@ class BookLoaderTests(TestCase):
ebook = bookloader.load_gutenberg_edition(title, gutenberg_etext_id, ol_work_id, seed_isbn, epub_url, format, license, lang, publication_date)
self.assertEqual(ebook.url, epub_url)
def tearDown(self):
for ebf in EbookFile.objects.all():
ebf.file.delete()
class SearchTests(TestCase):
def test_basic_search(self):

0
distro/__init__.py Normal file
View File

8
distro/admin.py Normal file
View File

@ -0,0 +1,8 @@
from django.contrib import admin
from .models import Target
# new in dj1.7
# @admin.register(Target)
class TargetAdmin(admin.ModelAdmin):
pass

View File

@ -0,0 +1,23 @@
[
{
"fields": {
"name": "pdf"
},
"model": "distro.format",
"pk": 1
},
{
"fields": {
"name": "epub"
},
"model": "distro.format",
"pk": 2
},
{
"fields": {
"name": "mobi"
},
"model": "distro.format",
"pk": 3
}
]

View File

@ -0,0 +1,33 @@
[
{
"fields": {
"name": "24 Symbols",
"host":"ftp.24symbols.net",
"user":"ebookfoundation",
"pw":"GU0GMNbcjmqILZ9ssdrG",
"protocol":"secure"
},
"model": "distro.target",
"pk": 1
},
{
"fields": {
"name": "Bitlit",
"host":"publishers.bitlit.ca",
"user":"unglueit",
"pw":"9pkgaCZc"
},
"model": "distro.target",
"pk": 2
},
{
"fields": {
"name": "Odilo",
"host":"ftp.odilo.us",
"user":"FreeEbookFundation",
"pw":"Xlh62lCmhd7l"
},
"model": "distro.target",
"pk": 3
}
]

View File

View File

View File

@ -0,0 +1,48 @@
from datetime import datetime
from django.core.management.base import BaseCommand
from regluit.distro.models import Target
from regluit.distro.push import push_all, push_books
class Command(BaseCommand):
help = "ftp <max> books to <target> or 'all' <since> date . "
args = "<max> <target> <since> <new>"
def handle(self, max=0, target=None, since=None, new=None, *args, **options):
try:
max=int(max)
except:
self.stderr.write("max should be number (0 for all available) ")
return
new = new=='new'
if new:
self.stdout.write( "previously deposited books will not be pushed")
try:
since = datetime.strptime(since, '%Y-%m-%d')
except:
since=None
if since:
try:
target = Target.objects.get(name=target)
self.stdout.write( "pushing {} new books since {} to {}".format(max if max else 'all', since, target))
push_books(target, max=max, start=since, new=new)
except Target.DoesNotExist:
if target == "all":
self.stdout.write( "pushing {} to all targets".format(max if max else 'all'))
push_all(start=since, max=max, new=new)
else:
self.stderr.write("'{}' is not a defined target".format(target))
else:
try:
self.stdout.write( "pushing {} books to {}".format(max if max else 'all', target))
target = Target.objects.get(name=target)
push_books(target, max=max, new=new)
except Target.DoesNotExist:
if target == "all":
self.stdout.write("pushing {} books to all targets".format(max if max else 'all'))
push_all(max=max, new=new)
else:
self.stderr.write("'{}' is not a defined target".format(target))

View File

@ -0,0 +1,89 @@
# -*- coding: utf-8 -*-
from south.utils import datetime_utils as datetime
from south.db import db
from south.v2 import SchemaMigration
from django.db import models
class Migration(SchemaMigration):
def forwards(self, orm):
# Adding model 'Target'
db.create_table(u'distro_target', (
(u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
('name', self.gf('django.db.models.fields.CharField')(max_length=30)),
('host', self.gf('django.db.models.fields.CharField')(max_length=60)),
('pw', self.gf('django.db.models.fields.CharField')(max_length=30)),
('user', self.gf('django.db.models.fields.CharField')(max_length=30)),
('protocol', self.gf('django.db.models.fields.CharField')(default='ftp', max_length=10)),
))
db.send_create_signal(u'distro', ['Target'])
# Adding M2M table for field formats on 'Target'
m2m_table_name = db.shorten_name(u'distro_target_formats')
db.create_table(m2m_table_name, (
('id', models.AutoField(verbose_name='ID', primary_key=True, auto_created=True)),
('target', models.ForeignKey(orm[u'distro.target'], null=False)),
('format', models.ForeignKey(orm[u'distro.format'], null=False))
))
db.create_unique(m2m_table_name, ['target_id', 'format_id'])
# Adding model 'Deposit'
db.create_table(u'distro_deposit', (
(u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
('target', self.gf('django.db.models.fields.related.ForeignKey')(related_name='deposits', to=orm['distro.Target'])),
('isbn', self.gf('django.db.models.fields.CharField')(max_length=13)),
('format', self.gf('django.db.models.fields.CharField')(max_length=30)),
('updated', self.gf('django.db.models.fields.DateTimeField')(auto_now_add=True, blank=True)),
))
db.send_create_signal(u'distro', ['Deposit'])
# Adding model 'Format'
db.create_table(u'distro_format', (
(u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
('name', self.gf('django.db.models.fields.CharField')(max_length=4)),
))
db.send_create_signal(u'distro', ['Format'])
def backwards(self, orm):
# Deleting model 'Target'
db.delete_table(u'distro_target')
# Removing M2M table for field formats on 'Target'
db.delete_table(db.shorten_name(u'distro_target_formats'))
# Deleting model 'Deposit'
db.delete_table(u'distro_deposit')
# Deleting model 'Format'
db.delete_table(u'distro_format')
models = {
u'distro.deposit': {
'Meta': {'object_name': 'Deposit'},
'format': ('django.db.models.fields.CharField', [], {'max_length': '30'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'isbn': ('django.db.models.fields.CharField', [], {'max_length': '13'}),
'target': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'deposits'", 'to': u"orm['distro.Target']"}),
'updated': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'})
},
u'distro.format': {
'Meta': {'object_name': 'Format'},
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'name': ('django.db.models.fields.CharField', [], {'max_length': '4'})
},
u'distro.target': {
'Meta': {'object_name': 'Target'},
'formats': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'targets'", 'symmetrical': 'False', 'to': u"orm['distro.Format']"}),
'host': ('django.db.models.fields.CharField', [], {'max_length': '60'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'name': ('django.db.models.fields.CharField', [], {'max_length': '30'}),
'protocol': ('django.db.models.fields.CharField', [], {'default': "'ftp'", 'max_length': '10'}),
'pw': ('django.db.models.fields.CharField', [], {'max_length': '30'}),
'user': ('django.db.models.fields.CharField', [], {'max_length': '30'})
}
}
complete_apps = ['distro']

View File

69
distro/models.py Normal file
View File

@ -0,0 +1,69 @@
import logging
import requests
from ftplib import FTP, FTP_TLS
from StringIO import StringIO
from django.db import models
logger = logging.getLogger(__name__)
class Target(models.Model):
name = models.CharField(max_length=30)
host = models.CharField(max_length=60)
pw = models.CharField(max_length=30)
user = models.CharField(max_length=30)
protocol = models.CharField(max_length=10, default='ftp')
formats = models.ManyToManyField('Format', related_name='targets')
def __unicode__(self):
return self.name
def get_ftp(self):
if self.protocol == 'secure':
_ftp = FTP_TLS(self.host, self.user, self.pw)
_ftp.prot_p()
else:
_ftp = FTP(self.host, self.user, self.pw)
return _ftp
def push(self, book, new=False):
pushed_formats = []
pushed_isbns = set()
for ebook in book.ebooks():
isbn = ebook.edition.isbn_13 if ebook.edition.isbn_13 else book.first_isbn_13()
if isbn and self.formats.filter(name=ebook.format).exists() and ebook.format not in pushed_formats:
dont_push = new and Deposit.objects.filter(isbn=isbn, target=self, format=ebook.format).exists()
if not dont_push:
ebfile = ebook.get_archive()
if ebfile:
self.push_file(u'{}.{}'.format(isbn,ebook.format), ebfile)
ebfile.close()
pushed_formats.append(ebook.format)
pushed_isbns.add(isbn)
Deposit.objects.create(target=self, isbn=isbn, format=ebook.format)
else:
logger.error('no file available for ebook {}'.format(ebook.id))
if pushed_isbns:
cover = book.cover_image_large()
if cover:
r = requests.get(cover)
for isbn in pushed_isbns:
self.push_file(u'{}.{}'.format(isbn,'jpg'), StringIO(r.content))
else:
logger.error('no cover available for {}'.format(book))
return pushed_isbns
def push_file(self, filename, file_to_push):
self.get_ftp().storbinary(u'STOR {}'.format(filename), file_to_push)
class Deposit(models.Model):
target = models.ForeignKey(Target, related_name="deposits")
isbn = models.CharField(max_length=13)
format = models.CharField(max_length=30)
updated = models.DateTimeField(auto_now_add=True)
class Format(models.Model):
name = models.CharField(max_length=4)
def __unicode__(self):
return self.name

63
distro/push.py Normal file
View File

@ -0,0 +1,63 @@
import logging
from datetime import datetime
from StringIO import StringIO
from regluit.core.facets import BaseFacet
from regluit.core.models import Work
from regluit.api.onix import onix_feed
from .models import Target
logger = logging.getLogger(__name__)
def push_books(target, start=datetime(1900,1,1), new=False, max=0):
"""given a list of books this task will push the books, metadata and covers to the target
"""
facet_class = get_target_facet(target, start=start, new=new)
pushed_books = []
for book in facet_class.works:
pushed = target.push(book, new=new)
if pushed:
pushed_books.append(book)
logger.info(u'{} pushed to {}'.format(book, target))
else:
logger.info(u'{} was not pushed to {}'.format(book, target))
if max and len(pushed_books) >= max:
break
facet_class.works = pushed_books
if len(pushed_books)>0:
push_onix(target, facet_class)
def get_target_facet(target, start=datetime(1900,1,1), new=False):
formats = [ format.name for format in target.formats.all() ]
def format_filter(query_set):
return query_set.filter(format__in=formats)
def edition_format_filter(query_set):
return query_set.filter(ebooks__format__in=formats)
class TargetFacet(BaseFacet):
def __init__(self):
self.facet_object = self
self.works = Work.objects.filter(
editions__ebooks__created__gt = start,
identifiers__type="isbn",
editions__ebooks__format__in = formats,
editions__ebooks__provider__in = ('Internet Archive', 'Unglue.it', 'Github', 'OAPEN Library'),
).distinct().order_by('-featured')
model_filters = {"Ebook": format_filter, "Edition": edition_format_filter}
outer_facet = None
title = u"Free Ebooks curated by Unglue.it"
description = "Unglue.it eBooks for {} distribution.".format( target.name )
return TargetFacet()
def push_onix(target, facet_class):
target.push_file('unglueit_onix_{:%Y%m%d%H%M%S}.xml'.format(datetime.now()),StringIO(onix_feed(facet_class)))
def push_all(start=datetime(1900,1,1), new=False, max=0):
for target in Target.objects.all():
push_books(target, start=start, new=new, max=max)

View File

@ -62,11 +62,11 @@
document.getElementById('id_kindle_email').focus()
</script>
</div>
https://www.amazon.com/gp/digital/fiona/manage#manageDevices
<div>
<p>Don't know the email address for your device or reading app? <a href="https://www.amazon.com/myk#manageDevices">Find it here</a>. (If you're not logged in to Amazon, you need to click "Manage Your Devices" in the "Your Kindle Account" section on the lower left side of the page.)</p>
<p>Once we have your Kindle email, you'll be able to send unglued ebooks to your Kindle device or app with one click from any Unglue.it download page.</p>
<p>Once we have your Kindle email, you'll be able to send unglued ebooks to your Kindle device or app with one click from any Unglue.it download page. Note: ebooks that we send will appear in the "Docs" tab, not "Books".</p>
</div>
{% endwith %}
{% endblock %}

View File

@ -6,7 +6,7 @@
{% load lib_acqs %}
{% block title %}&#151;
{% if work.is_free %}
{{ work.title }} is a Free eBook. {% if work.first_pdf %}[PDF]{% endif %}{% if work.first_epub %}[EPUB, MOBI]{% endif %}
{{ work.title }} is a Free eBook. {% for fmt in work.formats %}[{{ fmt }}]{% endfor %}
{% else %}
Help us make {{ work.title }} a Free eBook!
{% endif %}{% ifequal action 'editions' %} All Editions{% endifequal %}

View File

@ -3328,21 +3328,13 @@ def send_to_kindle(request, work_id, javascript='0'):
This won't perfectly measure size of email, but should be safe, and is much faster than doing the check after download.
"""
try:
filehandle = urllib.urlopen(ebook.url)
filehandle = ebook.get_archive()
except IOError:
# problems connection to the ebook source
logger.error("couldn't connect error: %s", ebook.url)
return local_response(request, javascript, context, 5)
if not ebook.filesize:
try:
ebook.filesize = int(filehandle.info().getheaders("Content-Length")[0])
if ebook.save:
ebook.filesize = ebook.filesize if ebook.filesize < 2147483647 else 2147483647 # largest safe positive integer
ebook.save()
except IndexError:
# response has no Content-Length header probably a bad link
logger.error('Bad link error: %s', ebook.url)
return local_response(request, javascript, context, 4)
return local_response(request, javascript, context, 4)
if ebook.filesize > models.send_to_kindle_limit:
logger.info('ebook %s is too large to be emailed' % work.id)
return local_response(request, javascript, context, 0)

View File

@ -169,6 +169,7 @@ INSTALLED_APPS = (
# this must appear *after* django.frontend or else it overrides the
# registration templates in frontend/templates/registration
'django.contrib.admin',
'regluit.distro',
'regluit.booxtream',
'regluit.pyepub',
'regluit.libraryauth',