237 lines
7.4 KiB
Plaintext
237 lines
7.4 KiB
Plaintext
{
|
|
"metadata": {
|
|
"name": "",
|
|
"signature": "sha256:c3439815c10b9795d3d95691a1edd9789672f61dc0b415cd57b8e06d0552f8a5"
|
|
},
|
|
"nbformat": 3,
|
|
"nbformat_minor": 0,
|
|
"worksheets": [
|
|
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"# compute whether we can apply mobigen to a given edition to produce a mobi file\n",
|
|
"# need to have an ebook in epub or pdf format \n",
|
|
"# possible return values: already has a mobi file / can generate a mobi file / not possible\n",
|
|
"\n",
|
|
"def edition_mobi_status(edition):\n",
|
|
" \"\"\"\n",
|
|
" for a given edition, return 1 if there is a mobi ebook, 0 if there is none but we have an epub or html to convert from,\n",
|
|
" and -1 for no epub/html to convert from\n",
|
|
" \"\"\"\n",
|
|
" formats = set([ebook.format for ebook in edition.ebooks.all()])\n",
|
|
" if 'mobi' in formats:\n",
|
|
" return 1\n",
|
|
" elif ('epub' in formats) or ('html' in formats):\n",
|
|
" return 0\n",
|
|
" else:\n",
|
|
" return -1"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"from collections import Counter\n",
|
|
"from regluit.core.models import Edition\n",
|
|
"\n",
|
|
"# of all the Editions with ebook, compute the \"mobi status\"\n",
|
|
"Counter([edition_mobi_status(edition) for edition in Edition.objects.filter(ebooks__isnull=False).all()])"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"# looking only at Campaign related ebooks\n",
|
|
"\n",
|
|
"from regluit.core.models import Campaign\n",
|
|
"\n",
|
|
"Counter([edition_mobi_status(campaign.edition) for campaign in Campaign.objects.filter(edition__ebooks__isnull=False).distinct()])"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"import requests\n",
|
|
"r = requests.get(\"https://archive.org/download/Feeding_the_City/9781909254039_Feeding_the_City.epub\", verify=False)"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"from regluit.core.mobigen import convert_to_mobi\n",
|
|
"\n",
|
|
"output = convert_to_mobi(\"https://archive.org/download/Feeding_the_City/9781909254039_Feeding_the_City.epub\")\n",
|
|
"\n",
|
|
"with open(\"/Users/raymondyee/Downloads/test.mobi\", \"wb\") as f:\n",
|
|
" f.write(output)\n"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"stream": "stderr",
|
|
"text": [
|
|
"/Users/raymondyee/anaconda/envs/regluit/lib/python2.7/site-packages/requests/packages/urllib3/connectionpool.py:730: InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.org/en/latest/security.html (This warning will only appear once by default.)\n",
|
|
" InsecureRequestWarning)\n"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 1
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"!ls -lt /Users/raymondyee/Downloads/test.mobi"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"# let's remind ourselves about how to write to S3 and integrate an ebook record\n",
|
|
"# https://github.com/Gluejar/regluit/blob/36793b0b118fd97b52ab0d8637e8e34ab6d8672e/core/models.py#L1776\n",
|
|
"\n",
|
|
"#ebf holding the books we generate in the watermarking/custom messaging process, right?\n",
|
|
"\n",
|
|
"from regluit.core.models import EbookFile\n",
|
|
"\n",
|
|
"for ebookfile in EbookFile.objects.all():\n",
|
|
" print (ebookfile, ebookfile.file, ebookfile.edition.title)"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"# how to work with EbookFile\n",
|
|
"# https://github.com/Gluejar/regluit/blob/792659c325a7bee2b49337408336fdeadab3464a/core/models.py#L904\n",
|
|
"# Campaign."
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"for campaign in Campaign.objects.filter(edition__ebooks__isnull=False).distinct():\n",
|
|
" print (campaign.edition.title, edition_mobi_status(campaign.edition))\n",
|
|
" if edition_mobi_status(campaign.edition) == 0: # possible to generate mobi\n",
|
|
" print(campaign.edition.ebooks.filter(format='epub')[0].url)"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"stream": "stdout",
|
|
"text": [
|
|
"(u'Open Access eBooks', 1)\n",
|
|
"(u'Oral Literature in Africa', 1)\n",
|
|
"(u'The Third Awakening', 1)\n",
|
|
"(u'Feeding the City', 0)\n",
|
|
"https://archive.org/download/Feeding_the_City/9781909254039_Feeding_the_City.epub\n",
|
|
"(u'Complex Predicates', -1)\n",
|
|
"(u'Flatland', 1)\n",
|
|
"(u'Green Comet', 1)\n",
|
|
"(u'23rd Century Romance', 1)\n",
|
|
"(u'Moebius Noodles', -1)\n",
|
|
"(u'The Classic Short Story, 1870-1925: Theory of a Genre', -1)\n",
|
|
"(u'The Pains', -1)\n",
|
|
"(u'The Global Librarian', 0)\n",
|
|
"https://unglueit-files.s3.amazonaws.com/ebf/619c98c3192c695caabdce71766e7245.epub\n",
|
|
"(u'Heaven - The Afterlife Series I', 1)\n",
|
|
"(u'Digitization in the Real World', -1)\n",
|
|
"(u'Zero Sum Game', 1)"
|
|
]
|
|
},
|
|
{
|
|
"output_type": "stream",
|
|
"stream": "stdout",
|
|
"text": [
|
|
"\n",
|
|
"(u'Introduction to High Performance Scientific Computing', -1)\n",
|
|
"(u'Libres conseils : Ce que nous aurions aime\\u0301 savoir avant de commencer', 1)\n",
|
|
"(u'Option Libre', 0)\n",
|
|
"https://unglueit-files.s3.amazonaws.com/ebf/830cac2e0b26dfe576e6658623f6243a.epub\n",
|
|
"(u'Libres conseils. Ce que nous aurions aim\\xe9 savoir avant de commencer', 0)\n",
|
|
"https://unglueit-files.s3.amazonaws.com/ebf/f35b38527140a26cf44aa37bf540f24f.epub\n"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 10
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"pledge, b2u, t4u"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"c = Campaign.objects.all()[0]\n",
|
|
"c.edition.ebooks.all(), c.type"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"Ebook.objects.filter(format='epub').filter(edition__id=202594)"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"from regluit.core.models import Identifier\n",
|
|
"Identifier.objects.filter(edition__isnull=False).filter(~Q(edition__work__id = F('work__id'))).count()"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
}
|
|
],
|
|
"metadata": {}
|
|
}
|
|
]
|
|
} |