Merge pull request #292 from Gluejar/unglued_b2u_mobi

Unglued b2u mobi, mobi testing, and require file review
pull/1/head
Raymond Yee 2014-02-28 17:22:35 -08:00
commit 4f124e63f4
12 changed files with 485 additions and 28 deletions

View File

@ -95,7 +95,7 @@ class PremiumAdmin(ModelAdmin):
class CampaignAdmin(ModelAdmin):
list_display = ('work', 'created', 'status')
date_hierarchy = 'created'
exclude = ('edition', 'work', 'managers', 'publisher', 'activated')
exclude = ('edition', 'work', 'managers', 'publisher', 'activated', 'deadline')
search_fields = ['work']
class WorkAdmin(ModelAdmin):

View File

@ -927,6 +927,13 @@ class Campaign(models.Model):
provider="Unglue.it",
url= settings.BASE_URL_SECURE + reverse('download_campaign',args=[self.work.id,format]),
)
old_ebooks = Ebook.objects.exclude(pk=ebook.pk).filter(
format=format,
rights=self.license,
provider="Unglue.it",
)
for old_ebook in old_ebooks:
old_ebook.delete()
return ebook.pk
@ -1070,11 +1077,8 @@ class Work(models.Model):
return "http://openlibrary.org" + self.openlibrary_id
def cover_image_small(self):
try:
if self.preferred_edition.cover_image_small():
return self.preferred_edition.cover_image_small()
except IndexError:
pass
if self.preferred_edition and self.preferred_edition.cover_image_small():
return self.preferred_edition.cover_image_small()
return "/static/images/generic_cover_larger.png"
def cover_image_thumbnail(self):
@ -1303,6 +1307,10 @@ class Work(models.Model):
def lib_acqs(self):
return self.acqs.filter(license=LIBRARY)
@property
def test_acqs(self):
return self.acqs.filter(license=TESTING).order_by('-created')
class user_license:
acqs=Acq.objects.none()
def __init__(self,acqs):

View File

@ -61,6 +61,8 @@ from regluit.core.lookups import (
)
from regluit.utils.localdatetime import now
from regluit.utils.fields import EpubFileField
from regluit.mobi import Mobi
from regluit.pyepub import EPUB
logger = logging.getLogger(__name__)
@ -167,11 +169,16 @@ class EbookFileForm(forms.ModelForm):
the_file = self.cleaned_data.get('file',None)
if the_file and the_file.name:
if format == 'epub':
if not zipfile.is_zipfile(the_file.file):
raise forms.ValidationError(_('%s is not a valid EPUB file' % the_file.name) )
try:
book = EPUB(the_file.file)
except Exception as e:
raise forms.ValidationError(_('Are you sure this is an EPUB file?: %s' % e) )
elif format == 'mobi':
if not zipfile.is_zipfile(the_file.file):
raise forms.ValidationError(_('%s is not a valid MOBI file' % the_file.name) )
try:
book = Mobi(the_file.file)
book.parse()
except Exception as e:
raise forms.ValidationError(_('Are you sure this is a MOBI file?: %s' % e) )
elif format == 'pdf':
try:
doc = PdfFileReader(the_file.file)
@ -400,10 +407,10 @@ class OfferForm(forms.ModelForm):
class Meta:
model = Offer
fields = 'work', 'price', 'license'
widgets = {
'work': forms.HiddenInput,
'license': forms.HiddenInput,
'active': forms.HiddenInput,
}
date_selector=range(date.today().year, settings.MAX_CC_DATE.year+1)

View File

@ -2,7 +2,7 @@
{% block doccontent %}
<div class="work_campaigns">
<b>Title</b>: {{ edition.title}}<br />
<b>Title</b>: <a href="{% url work edition.work.id %}">{{ edition.title}}</a><br />
<b>Publisher</b> : {{ edition.publisher_name }}<br />
<b>Authors</b>:
<ul>
@ -23,7 +23,7 @@
<h2> Ebook Files for this Edition</h2>
<ul>
{% for ebook_file in edition.ebook_files.all %}
<li>{{ebook_file.file}} created {{ebook_file.created}} </li>
<li><a href="{{ebook_file.file.url}}">{{ebook_file.file}}</a> created {{ebook_file.created}} </li>
{% endfor %}
</ul>
{% endif %}
@ -33,16 +33,16 @@
{% if watermarked %}
<p> Reference id: <b>{{watermarked.referenceid}}</b></p>
<ul>
<li><a href="{{watermarked.download_link_epub}}">Watermarked epub for testing</a></li>
<li><a href="{{watermarked.download_link_mobi}}">Watermarked mobi for testing</a></li>
<li><a href="{{watermarked.download_link_epub}}">Processed epub for testing</a></li>
<li><a href="{{watermarked.download_link_mobi}}">Processed mobi (kindle) for testing</a></li>
</ul>
{% else %}
{% else %}{% if upload_error %}
<p>
<span class="yikes">Unfortunately, your file failed testing.</span>
The error(s) were: <pre>
{{ upload_error }}</pre>
</p>
{% endif %}
{% endif %}{% endif %}
{% endifequal %}
{% ifequal edition.work.last_campaign.type 3 %}
{% if upload_error %}
@ -55,7 +55,7 @@
{{ upload_error }}
<h2>Upload Ebook files</h2>
{% ifequal edition.work.last_campaign.type 2 %}
<p>At this time, we accept only EPUB files for "Buy to Unglue" campaigns.
<p>At this time, we accept only EPUB files for "Buy to Unglue" campaigns. Files for Kindle will be autogenerated.
{% endifequal %}
{% ifequal edition.work.last_campaign.type 3 %}
<p>You can upload PDF, EPUB and MOBI files for "Thanks for Ungluing" campaigns.

View File

@ -121,16 +121,24 @@ Please fix the following before launching your campaign:
<li>Edition: <i>{{ edition }}</i>
<ul>
<li><a href="{% url new_edition edition.work.id edition.id %}"> Edit </a> the edition</li>
{% ifnotequal campaign.type 1 %}
{% if campaign.rh.can_sell %}
<li>You can also <a href="{% url edition_uploads edition.id %}"> Load a file</a> for this edition.</li>
{% endif %}
{% endifnotequal %}
</ul>
</li>
{% endfor %}
</ul>
{% if campaign.work.ebookfiles.0 %}
<p> <b>An Ebook file has been loaded.</b> </p>
<p>Active file: {{campaign.work.ebookfiles.0.file}} created {{campaign.work.ebookfiles.0.created}} </p>
<p>Active file: <a href="{{campaign.work.ebookfiles.0.file.url}}">{{campaign.work.ebookfiles.0.file}}</a> created {{campaign.work.ebookfiles.0.created}} </p>
{% if campaign.work.test_acqs.0 %}
<ul>
<li><a href="{{campaign.work.test_acqs.0.watermarked.download_link_epub}}">Processed epub for testing</a></li>
<li><a href="{{campaign.work.test_acqs.0.watermarked.download_link_mobi}}">Processed mobi (kindle) for testing</a></li>
</ul>
{% endif %}
<p>Edition: <i>{{ campaign.work.ebookfiles.0.edition }}</i> </p>
{% endif %}
<form action="#" method="POST">
@ -387,7 +395,11 @@ Please fix the following before launching your campaign:
{{ offer.offer_form.active }}
{{ offer.offer_form.license }}
{{ offer.offer_form.work }}</span>
{% if offer.active %}
<input type="submit" name="change_offer" value="Change Price" />
{% else %}
<input type="submit" name="change_offer" value="Set Offer" /> <span class="yikes"><i>This offer is not active</a>
{% endif %}
</div></form>
<p />
{% endfor %}
@ -437,6 +449,7 @@ Please fix the following before launching your campaign:
<p> Buy To Unglue campaigns can't be launched until ebook files <a class="tabs1">have been loaded</a> and <a class="tabs2">pricing has been set and made active</a></p>
<!-- {{campaign.problems }} -->
{% endifequal %}
{% endif %}
{% else %}

View File

@ -98,6 +98,25 @@
</ul>
</dd>
</dl>
<h2>Ebook Files</h2>
<dd>
<ul class="terms">
<li>{{ ebookfiles.today.count }} have been added today. </li>
<li>{{ ebookfiles.yesterday.count }} were added yesterday.
</li>
<li>{{ ebookfiles.days7.count }} have been added in the past 7 days.{% if request.user.is_staff %}
<ul class="terms">{% for ebook_file in ebookfiles.days7 %}
<li>{{ebook_file.edition.work.title}}: <a href="{{ebook_file.file.url}}">{{ebook_file.file}}</a> created {{ebook_file.created}}</li>
{% endfor %}</ul>{% endif %}
</li>
<li>{{ ebookfiles.month.count }} have been added in this month.
</li>
<li>{{ ebookfiles.all.count }} have been added in total.
</li>
</ul>
</dd>
</dl>
<h2>Pledges</h2>
<dl>

View File

@ -64,6 +64,7 @@
<li><span class="format_display"><img src="/static/images/epub32.png" height="32" alt="epub" title="epub" /> (for iBooks, Readmill, Nook, Kobo) </span></li>
</ul>
</div>
</div>
</div>
</div>

View File

@ -434,16 +434,24 @@ def edition_uploads(request, edition_id):
if form.is_valid() :
logger.info("EbookFileForm is_valid")
form.save()
edition.work.last_campaign().save()
context['uploaded']=True
if campaign_type == BUY2UNGLUE:
# campaign mangager gets a copy
test_acq = models.Acq.objects.create(user=request.user,work=edition.work,license= TESTING)
try:
test_acq.get_watermarked()
context['watermarked']= test_acq.watermarked
except Exception as e:
context['upload_error']= e
form.instance.delete()
if edition.work.last_campaign().status == 'SUCCESSFUL':
try:
edition.work.last_campaign().watermark_success()
except Exception as e:
context['upload_error']= e
form.instance.delete()
else:
# campaign mangager gets a copy
test_acq = models.Acq.objects.create(user=request.user,work=edition.work,license= TESTING)
try:
test_acq.get_watermarked()
context['watermarked']= test_acq.watermarked
except Exception as e:
context['upload_error']= e
form.instance.delete()
if campaign_type == THANKS:
e = form.instance.check_file()
if e != None:
@ -657,6 +665,8 @@ def manage_campaign(request, id, action='manage'):
offer.offer_form=OfferForm(instance=offer, data = request.POST, prefix='offer_%d'%offer.id)
if offer.offer_form.is_valid():
offer.offer_form.save()
offer.active = True
offer.save()
alerts.append(_('Offer has been changed'))
else:
alerts.append(_('Offer has not been changed'))
@ -2051,6 +2061,12 @@ class InfoPageView(TemplateView):
ebooks.year = ebooks.filter(created__year = date_today().year)
ebooks.month = ebooks.year.filter(created__month = date_today().month)
ebooks.yesterday = ebooks.filter(created__range = (date_today()-timedelta(days=1), date_today()))
ebookfiles = models.EbookFile.objects
ebookfiles.today = ebookfiles.filter(created__range = (date_today(), now()))
ebookfiles.days7 = ebookfiles.filter(created__range = (date_today()-timedelta(days=7), now()))
ebookfiles.year = ebookfiles.filter(created__year = date_today().year)
ebookfiles.month = ebookfiles.year.filter(created__month = date_today().month)
ebookfiles.yesterday = ebookfiles.filter(created__range = (date_today()-timedelta(days=1), date_today()))
wishlists= models.Wishlist.objects.exclude(wishes__isnull=True)
wishlists.today = wishlists.filter(created__range = (date_today(), now()))
wishlists.days7 = wishlists.filter(created__range = (date_today()-timedelta(days=7), now()))
@ -2078,6 +2094,7 @@ class InfoPageView(TemplateView):
'users': users,
'works': works,
'ebooks': ebooks,
'ebookfiles': ebookfiles,
'wishlists': wishlists,
'transactions': transactions,
}

286
mobi/__init__.py Normal file
View File

@ -0,0 +1,286 @@
#!/usr/bin/env python
# encoding: utf-8
"""
Mobi.py
Created by Elliot Kroo on 2009-12-25.
Copyright (c) 2009 Elliot Kroo. All rights reserved.
"""
import sys
import os
import unittest
from struct import *
from pprint import pprint
import utils
from lz77 import uncompress_lz77
class Mobi:
def parse(self):
""" reads in the file, then parses record tables"""
self.contents = self.f.read();
self.header = self.parseHeader();
self.records = self.parseRecordInfoList();
self.readRecord0()
def readRecord(self, recordnum, disable_compression=False):
if self.config:
if self.config['palmdoc']['Compression'] == 1 or disable_compression:
return self.contents[self.records[recordnum]['record Data Offset']:self.records[recordnum+1]['record Data Offset']];
elif self.config['palmdoc']['Compression'] == 2:
result = uncompress_lz77(self.contents[self.records[recordnum]['record Data Offset']:self.records[recordnum+1]['record Data Offset']-self.config['mobi']['extra bytes']])
return result
def readImageRecord(self, imgnum):
if self.config:
recordnum = self.config['mobi']['First Image index'] + imgnum;
return self.readRecord(recordnum, disable_compression=True);
def author(self):
"Returns the author of the book"
return self.config['exth']['records'][100]
def title(self):
"Returns the title of the book"
return self.config['mobi']['Full Name']
########### Private API ###########################
def __init__(self, filename):
try:
if isinstance(filename, str):
self.f = open(filename, "rb");
else:
self.f = filename;
except IOError,e:
sys.stderr.write("Could not open %s! " % filename);
raise e;
self.offset = 0;
def __iter__(self):
if not self.config: return;
for record in range(1, self.config['mobi']['First Non-book index'] - 1):
yield self.readRecord(record);
def parseRecordInfoList(self):
records = {};
# read in all records in info list
for recordID in range(self.header['number of records']):
headerfmt = '>II'
headerlen = calcsize(headerfmt)
fields = [
"record Data Offset",
"UniqueID",
]
# create tuple with info
results = zip(fields, unpack(headerfmt, self.contents[self.offset:self.offset+headerlen]))
# increment offset into file
self.offset += headerlen
# convert tuple to dictionary
resultsDict = utils.toDict(results);
# futz around with the unique ID record, as the uniqueID's top 8 bytes are
# really the "record attributes":
resultsDict['record Attributes'] = (resultsDict['UniqueID'] & 0xFF000000) >> 24;
resultsDict['UniqueID'] = resultsDict['UniqueID'] & 0x00FFFFFF;
# store into the records dict
records[resultsDict['UniqueID']] = resultsDict;
return records;
def parseHeader(self):
headerfmt = '>32shhIIIIII4s4sIIH'
headerlen = calcsize(headerfmt)
fields = [
"name",
"attributes",
"version",
"created",
"modified",
"backup",
"modnum",
"appInfoId",
"sortInfoID",
"type",
"creator",
"uniqueIDseed",
"nextRecordListID",
"number of records"
]
# unpack header, zip up into list of tuples
results = zip(fields, unpack(headerfmt, self.contents[self.offset:self.offset+headerlen]))
# increment offset into file
self.offset += headerlen
# convert tuple array to dictionary
resultsDict = utils.toDict(results);
return resultsDict
def readRecord0(self):
palmdocHeader = self.parsePalmDOCHeader();
MobiHeader = self.parseMobiHeader();
exthHeader = None
if MobiHeader['Has EXTH Header']:
exthHeader = self.parseEXTHHeader();
self.config = {
'palmdoc': palmdocHeader,
'mobi' : MobiHeader,
'exth' : exthHeader
}
def parseEXTHHeader(self):
headerfmt = '>III'
headerlen = calcsize(headerfmt)
fields = [
'identifier',
'header length',
'record Count'
]
# unpack header, zip up into list of tuples
results = zip(fields, unpack(headerfmt, self.contents[self.offset:self.offset+headerlen]))
# convert tuple array to dictionary
resultsDict = utils.toDict(results);
self.offset += headerlen;
resultsDict['records'] = {};
for record in range(resultsDict['record Count']):
recordType, recordLen = unpack(">II", self.contents[self.offset:self.offset+8]);
recordData = self.contents[self.offset+8:self.offset+recordLen];
resultsDict['records'][recordType] = recordData;
self.offset += recordLen;
return resultsDict;
def parseMobiHeader(self):
headerfmt = '> IIII II 40s III IIIII IIII I 36s IIII 8s HHIIIII'
headerlen = calcsize(headerfmt)
fields = [
"identifier",
"header length",
"Mobi type",
"text Encoding",
"Unique-ID",
"Generator version",
"-Reserved",
"First Non-book index",
"Full Name Offset",
"Full Name Length",
"Language",
"Input Language",
"Output Language",
"Format version",
"First Image index",
"First Huff Record",
"Huff Record Count",
"First DATP Record",
"DATP Record Count",
"EXTH flags",
"-36 unknown bytes, if Mobi is long enough",
"DRM Offset",
"DRM Count",
"DRM Size",
"DRM Flags",
"-Usually Zeros, unknown 8 bytes",
"-Unknown",
"Last Image Record",
"-Unknown",
"FCIS record",
"-Unknown",
"FLIS record",
"Unknown"
]
# unpack header, zip up into list of tuples
results = zip(fields, unpack(headerfmt, self.contents[self.offset:self.offset+headerlen]))
# convert tuple array to dictionary
resultsDict = utils.toDict(results);
resultsDict['Start Offset'] = self.offset;
resultsDict['Full Name'] = (self.contents[
self.records[0]['record Data Offset'] + resultsDict['Full Name Offset'] :
self.records[0]['record Data Offset'] + resultsDict['Full Name Offset'] + resultsDict['Full Name Length']])
resultsDict['Has DRM'] = resultsDict['DRM Offset'] != 0xFFFFFFFF;
resultsDict['Has EXTH Header'] = (resultsDict['EXTH flags'] & 0x40) != 0;
self.offset += resultsDict['header length'];
def onebits(x, width=16):
return len(filter(lambda x: x == "1", (str((x>>i)&1) for i in xrange(width-1,-1,-1))));
resultsDict['extra bytes'] = 2*onebits(unpack(">H", self.contents[self.offset-2:self.offset])[0] & 0xFFFE)
return resultsDict;
def parsePalmDOCHeader(self):
headerfmt = '>HHIHHHH'
headerlen = calcsize(headerfmt)
fields = [
"Compression",
"Unused",
"text length",
"record count",
"record size",
"Encryption Type",
"Unknown"
]
offset = self.records[0]['record Data Offset'];
# create tuple with info
results = zip(fields, unpack(headerfmt, self.contents[offset:offset+headerlen]))
# convert tuple array to dictionary
resultsDict = utils.toDict(results);
self.offset = offset+headerlen;
return resultsDict
class MobiTests(unittest.TestCase):
def setUp(self):
self.mobitest = Mobi("../test/CharlesDarwin.mobi");
def testParse(self):
self.mobitest.parse();
pprint (self.mobitest.config)
def testRead(self):
self.mobitest.parse();
content = ""
for i in range(1,5):
content += self.mobitest.readRecord(i);
def testImage(self):
self.mobitest.parse();
pprint (self.mobitest.records);
for record in range(4):
f = open("imagerecord%d.jpg" % record, 'w')
f.write(self.mobitest.readImageRecord(record));
f.close();
def testAuthorTitle(self):
self.mobitest.parse()
self.assertEqual(self.mobitest.author(), 'Charles Darwin')
self.assertEqual(self.mobitest.title(), 'The Origin of Species by means '+
'of Natural Selection, 6th Edition')
if __name__ == '__main__':
unittest.main()

86
mobi/lz77.py Normal file
View File

@ -0,0 +1,86 @@
import struct
# ported directly from the PalmDoc Perl library
# http://kobesearch.cpan.org/htdocs/EBook-Tools/EBook/Tools/PalmDoc.pm.html
def uncompress_lz77(data):
length = len(data);
offset = 0; # Current offset into data
# char; # Character being examined
# ord; # Ordinal of $char
# lz77; # 16-bit Lempel-Ziv 77 length-offset pair
# lz77offset; # LZ77 offset
# lz77length; # LZ77 length
# lz77pos; # Position inside $lz77length
text = ''; # Output (uncompressed) text
# textlength; # Length of uncompressed text during LZ77 pass
# textpos; # Position inside $text during LZ77 pass
while offset < length:
# char = substr($data,$offset++,1);
char = data[offset];
offset += 1;
ord_ = ord(char);
# print " ".join([repr(char), hex(ord_)])
# The long if-elsif chain is the best logic for $ord handling
## no critic (Cascading if-elsif chain)
if (ord_ == 0):
# Nulls are literal
text += char;
elif (ord_ <= 8):
# Next $ord bytes are literal
text += data[offset:offset+ord_] # text .=substr($data,$offset,ord);
offset += ord_;
elif (ord_ <= 0x7f):
# Values from 0x09 through 0x7f are literal
text += char;
elif (ord_ <= 0xbf):
# Data is LZ77-compressed
# From Wikipedia:
# "A length-distance pair is always encoded by a two-byte
# sequence. Of the 16 bits that make up these two bytes,
# 11 bits go to encoding the distance, 3 go to encoding
# the length, and the remaining two are used to make sure
# the decoder can identify the first byte as the beginning
# of such a two-byte sequence."
offset += 1;
if (offset > len(data)):
print("WARNING: offset to LZ77 bits is outside of the data: %d" % offset);
return text;
lz77, = struct.unpack('>H', data[offset-2:offset])
# Leftmost two bits are ID bits and need to be dropped
lz77 &= 0x3fff;
# Length is rightmost 3 bits + 3
lz77length = (lz77 & 0x0007) + 3;
# Remaining 11 bits are offset
lz77offset = lz77 >> 3;
if (lz77offset < 1):
print("WARNING: LZ77 decompression offset is invalid!");
return text;
# Getting text from the offset is a little tricky, because
# in theory you can be referring to characters you haven't
# actually decompressed yet. You therefore have to check
# the reference one character at a time.
textlength = len(text);
for lz77pos in range(lz77length): # for($lz77pos = 0; $lz77pos < $lz77length; $lz77pos++)
textpos = textlength - lz77offset;
if (textpos < 0):
print("WARNING: LZ77 decompression reference is before"+
" beginning of text! %x" % lz77);
return;
text += text[textpos:textpos+1]; #text .= substr($text,$textpos,1);
textlength+=1;
else:
# 0xc0 - 0xff are single characters (XOR 0x80) preceded by
# a space
text += ' ' + chr(ord_ ^ 0x80);
return text;

20
mobi/utils.py Normal file
View File

@ -0,0 +1,20 @@
#!/usr/bin/env python
# encoding: utf-8
"""
utils.py
Created by Elliot Kroo on 2009-12-25.
Copyright (c) 2009 Elliot Kroo. All rights reserved.
"""
import sys
import os
import unittest
def toDict(tuples):
resultsDict = {}
for field, value in tuples:
if len(field) > 0 and field[0] != "-":
resultsDict[field] = value
return resultsDict;

View File

@ -6,7 +6,7 @@ class Command(BaseCommand):
help = "retotal all campaigns"
def handle(self, *args, **kwargs):
campaigns = regluit.payment.models.Campaign.objects.all()
campaigns = regluit.core.models.Campaign.objects.all()
for c in campaigns:
c.update_left()
print c.left