work in progress

pull/1/head
Raymond Yee 2014-12-18 14:52:06 -05:00
parent 12450db418
commit eb7bba0eaa
1 changed files with 180 additions and 301 deletions

View File

@ -1,7 +1,7 @@
{ {
"metadata": { "metadata": {
"name": "", "name": "",
"signature": "sha256:5f0de387b7cbfe304b5cd1aa8db4f63ffa60fc4e3f17b1980a7fe3cb473b02cd" "signature": "sha256:f02ae8a8cd487879963980d4c8030c6f3082ce0cdb55608e6ae28290f1d2fb8c"
}, },
"nbformat": 3, "nbformat": 3,
"nbformat_minor": 0, "nbformat_minor": 0,
@ -21,7 +21,7 @@
" for a given edition, return 1 if there is a mobi ebook, 0 if there is none but we have an epub or html to convert from,\n", " for a given edition, return 1 if there is a mobi ebook, 0 if there is none but we have an epub or html to convert from,\n",
" and -1 for no epub/html to convert from\n", " and -1 for no epub/html to convert from\n",
" \"\"\"\n", " \"\"\"\n",
" formats = set([ebook.format for ebook in edition.ebooks.all()])\n", " formats = set([ebook.format for ebook in edition.work.ebooks()])\n",
" if 'mobi' in formats:\n", " if 'mobi' in formats:\n",
" return 1\n", " return 1\n",
" elif ('epub' in formats) or ('html' in formats):\n", " elif ('epub' in formats) or ('html' in formats):\n",
@ -31,51 +31,71 @@
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [] "outputs": [],
"prompt_number": 3
}, },
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"from collections import Counter\n", "# generator for editions to add mobi to\n",
"from regluit.core.models import Edition\n", "# campaigns that can have mobi files but don't yet.\n",
"\n", "\n",
"# of all the Editions with ebook, compute the \"mobi status\"\n", "def editions_to_convert():\n",
"Counter([edition_mobi_status(edition) for edition in Edition.objects.filter(ebooks__isnull=False).all()])" " for campaign in Campaign.objects.filter(edition__ebooks__isnull=False).distinct():\n",
" if edition_mobi_status(campaign.edition) == 0: # possible to generate mobi\n",
" yield campaign.edition\n",
" \n",
" \n",
"list(editions_to_convert())"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [] "outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 8,
"text": [
"[<Edition: The Global Librarian (ISBN 9781629218335) Metropolitan New York Library Council>,\n",
" <Edition: Option Libre (ISBN 9782953918748) Framasoft>,\n",
" <Edition: Libres conseils. Ce que nous aurions aim\u00e9 savoir avant de commencer (ISBN 9781092674041) Framasoft>]"
]
}
],
"prompt_number": 8
}, },
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"# looking only at Campaign related ebooks\n", "# http://127.0.0.1:8000/work/138133/ --> The Global Librarian\n",
"\n", "\n",
"from regluit.core.models import Campaign\n", "from itertools import islice\n",
"\n", "edition = list(islice(editions_to_convert(),1))[0]\n",
"Counter([edition_mobi_status(campaign.edition) for campaign in Campaign.objects.filter(edition__ebooks__isnull=False).distinct()])" "edition.work.ebooks(), edition.work.id"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [] "outputs": [
}, {
{ "metadata": {},
"cell_type": "code", "output_type": "pyout",
"collapsed": false, "prompt_number": 9,
"input": [ "text": [
"import requests\n", "([<Ebook: The Global Librarian (epub from Unglue.it)>], 138133L)"
"r = requests.get(\"https://archive.org/download/Feeding_the_City/9781909254039_Feeding_the_City.epub\", verify=False)" ]
}
], ],
"language": "python", "prompt_number": 9
"metadata": {},
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"# sample code to use convert_to_mobi \n",
"# write output to file system\n",
"\n",
"from regluit.core.mobigen import convert_to_mobi\n", "from regluit.core.mobigen import convert_to_mobi\n",
"\n", "\n",
"output = convert_to_mobi(\"https://archive.org/download/Feeding_the_City/9781909254039_Feeding_the_City.epub\")\n", "output = convert_to_mobi(\"https://archive.org/download/Feeding_the_City/9781909254039_Feeding_the_City.epub\")\n",
@ -87,34 +107,6 @@
"metadata": {}, "metadata": {},
"outputs": [] "outputs": []
}, },
{
"cell_type": "code",
"collapsed": false,
"input": [
"!ls -lt /Users/raymondyee/Downloads/test.mobi"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# let's remind ourselves about how to write to S3 and integrate an ebook record\n",
"# https://github.com/Gluejar/regluit/blob/36793b0b118fd97b52ab0d8637e8e34ab6d8672e/core/models.py#L1776\n",
"\n",
"#ebf holding the books we generate in the watermarking/custom messaging process, right?\n",
"\n",
"from regluit.core.models import EbookFile\n",
"\n",
"for ebookfile in EbookFile.objects.all():\n",
" print (ebookfile, ebookfile.file, ebookfile.edition.title)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
@ -127,61 +119,6 @@
"metadata": {}, "metadata": {},
"outputs": [] "outputs": []
}, },
{
"cell_type": "code",
"collapsed": false,
"input": [
"for campaign in Campaign.objects.filter(edition__ebooks__isnull=False).distinct():\n",
" #print (campaign.edition.title, edition_mobi_status(campaign.edition))\n",
" if edition_mobi_status(campaign.edition) == 0: # possible to generate mobi\n",
" print(campaign.edition.title, campaign.edition.ebooks.filter(format='epub')[0].url)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pledge, b2u, t4u"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c = Campaign.objects.all()[0]\n",
"c.edition.ebooks.all(), c.type"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Ebook.objects.filter(format='epub').filter(edition__id=202594)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from regluit.core.models import Identifier\n",
"Identifier.objects.filter(edition__isnull=False).filter(~Q(edition__work__id = F('work__id'))).count()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{ {
"cell_type": "heading", "cell_type": "heading",
"level": 1, "level": 1,
@ -222,12 +159,12 @@
"collapsed": false, "collapsed": false,
"input": [ "input": [
"from django.core.files.storage import default_storage\n", "from django.core.files.storage import default_storage\n",
"from django.core.files.base import ContentFile, File\n", "from django.core.files.base import ContentFile, File"
"#from django.core.cache import cache"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [] "outputs": [],
"prompt_number": 10
}, },
{ {
"cell_type": "code", "cell_type": "code",
@ -237,29 +174,17 @@
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [] "outputs": [
}, {
{ "metadata": {},
"cell_type": "code", "output_type": "pyout",
"collapsed": false, "prompt_number": 20,
"input": [ "text": [
"# but I can't use exists to test existence of S3 folder\n", "([u'ebf', u'Users', u'doab', u'marc_test'], [u'storage_test'])"
"default_storage.exists('ebf'), default_storage.exists('/ebf'), default_storage.exists('/ebf/')" ]
}
], ],
"language": "python", "prompt_number": 20
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# can test existence of files, but not appa\n",
"default_storage.exists(\"/Users/rdhyee/covers/52/AWizardOfEarthsea(1stEd).jpg\")"
],
"language": "python",
"metadata": {},
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
@ -273,35 +198,12 @@
"outputs": [] "outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "heading",
"collapsed": false, "level": 1,
"input": [
"k = default_storage.bucket.get_all_keys()[0]"
],
"language": "python",
"metadata": {}, "metadata": {},
"outputs": [] "source": [
}, "Sample code for using default_storage"
{ ]
"cell_type": "code",
"collapsed": false,
"input": [
"k.url"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# I was expecting true\n",
"default_storage.exists('ebf')"
],
"language": "python",
"metadata": {},
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
@ -364,7 +266,7 @@
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"# clean up some files\n", "# clean up some files \n",
"\n", "\n",
"print (default_storage.bucket)\n", "print (default_storage.bucket)\n",
"\n", "\n",
@ -404,28 +306,18 @@
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [] "outputs": [],
}, "prompt_number": 12
{
"cell_type": "code",
"collapsed": false,
"input": [
"md_book = open(\"/Users/raymondyee/Downloads/hello.mobi\", \"rb\")\n",
"md_s3 = File(md_book)\n",
"md_s3.content_type = \"application/x-mobipocket-ebook\"\n",
"\n",
"default_storage.save(\"/ebf/hello.mobi\", md_s3)"
],
"language": "python",
"metadata": {},
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"def write_file_to_storage(file_object, content_type, path):\n", "def write_file_to_storage(file_object, content_type, path):\n",
" file_s3 = File(file_object)\n", " \"\"\"\n",
" write file_object to the default_storage at given path\n",
" \"\"\"\n",
" file_s3 = ContentFile(file_object)\n",
" file_s3.content_type = content_type\n", " file_s3.content_type = content_type\n",
" \n", " \n",
" default_storage.save(path, file_s3)\n", " default_storage.save(path, file_s3)\n",
@ -433,20 +325,24 @@
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [] "outputs": [],
"prompt_number": 13
}, },
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"file_ = write_file_to_storage(open(\"/Users/raymondyee/Downloads/hello.mobi\"), \n", "import uuid\n",
"\n",
"file_ = write_file_to_storage(open(\"/Users/raymondyee/Downloads/hello.mobi\").read(), \n",
" \"application/x-mobipocket-ebook\", \n", " \"application/x-mobipocket-ebook\", \n",
" \"/ebf/hello.mobi\")\n", " \"/ebf/{0}.mobi\".format(uuid.uuid4().get_hex()))\n",
"\n" "\n"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [] "outputs": [],
"prompt_number": 14
}, },
{ {
"cell_type": "code", "cell_type": "code",
@ -456,7 +352,17 @@
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [] "outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 15,
"text": [
"'/ebf/304dbd385e384e6cbe9fdec019004b69.mobi'"
]
}
],
"prompt_number": 15
}, },
{ {
"cell_type": "code", "cell_type": "code",
@ -466,7 +372,17 @@
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [] "outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 16,
"text": [
"'https://ry-dev-unglueit.s3.amazonaws.com/ebf/304dbd385e384e6cbe9fdec019004b69.mobi'"
]
}
],
"prompt_number": 16
}, },
{ {
"cell_type": "heading", "cell_type": "heading",
@ -487,24 +403,18 @@
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [] "outputs": [
}, {
{ "output_type": "stream",
"cell_type": "code", "stream": "stdout",
"collapsed": false, "text": [
"input": [ "(u'The Global Librarian', u'https://unglueit-files.s3.amazonaws.com/ebf/619c98c3192c695caabdce71766e7245.epub')\n",
"# generator for editions to add mobi to\n", "(u'Option Libre', u'https://unglueit-files.s3.amazonaws.com/ebf/830cac2e0b26dfe576e6658623f6243a.epub')\n",
"\n", "(u'Libres conseils. Ce que nous aurions aim\\xe9 savoir avant de commencer', u'https://unglueit-files.s3.amazonaws.com/ebf/f35b38527140a26cf44aa37bf540f24f.epub')\n"
"def editions_to_convert():\n", ]
" for campaign in Campaign.objects.filter(edition__ebooks__isnull=False).distinct():\n", }
" #print (campaign.edition.title, edition_mobi_status(campaign.edition))\n",
" if edition_mobi_status(campaign.edition) == 0: # possible to generate mobi\n",
" yield campaign.edition\n",
" "
], ],
"language": "python", "prompt_number": 17
"metadata": {},
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
@ -514,41 +424,53 @@
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [] "outputs": [
}, {
{ "metadata": {},
"cell_type": "code", "output_type": "pyout",
"collapsed": false, "prompt_number": 18,
"input": [ "text": [
"import uuid" "[<Edition: The Global Librarian (ISBN 9781629218335) Metropolitan New York Library Council>,\n",
" <Edition: Option Libre (ISBN 9782953918748) Framasoft>,\n",
" <Edition: Libres conseils. Ce que nous aurions aim\u00e9 savoir avant de commencer (ISBN 9781092674041) Framasoft>]"
]
}
], ],
"language": "python", "prompt_number": 18
"metadata": {},
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"from itertools import islice\n", "from itertools import islice\n",
"from StringIO import StringIO\n",
"\n",
"from regluit.core.mobigen import convert_to_mobi\n",
"\n",
"import uuid\n",
"\n",
"for edition in islice(editions_to_convert(),1):\n", "for edition in islice(editions_to_convert(),1):\n",
" print (edition)\n", " print (edition, edition.work.id)\n",
" \n", " \n",
" # pull out the sister edition to convert from\n", " # pull out the sister edition to convert from\n",
" sister_ebook = edition.ebooks.filter(format__in=['epub', 'pdf'])[0]\n", " sister_ebook = edition.ebooks.filter(format__in=['epub', 'html'])[0]\n",
" \n", " \n",
" # run the conversion process\n", " # run the conversion process\n",
" # output = StringIO(convert_to_mobi(sister_ebook.url))\n", " \n",
" output = open(\"/Users/raymondyee/Downloads/hello.mobi\")\n", " print(sister_ebook.url)\n",
"\n",
" #output = convert_to_mobi(sister_ebook.url)\n",
" output = open(\"/Users/raymondyee/Downloads/hello.mobi\").read()\n",
" \n",
" file_ = write_file_to_storage(output, \n", " file_ = write_file_to_storage(output, \n",
" \"application/x-mobipocket-ebook\", \n", " \"application/x-mobipocket-ebook\", \n",
" \"/ebf/hello.mobi\")\n", " \"/ebf/{0}.mobi\".format(uuid.uuid4().get_hex()))\n",
" \n", " \n",
" # create a path for the ebookfile: \n", " # create a path for the ebookfile: IS THIS NECESSARY?\n",
" # https://github.com/Gluejar/regluit/blob/25dcb06f464dc11b5e589ab6859dfcc487f8f3ef/core/models.py#L1771\n", " # https://github.com/Gluejar/regluit/blob/25dcb06f464dc11b5e589ab6859dfcc487f8f3ef/core/models.py#L1771\n",
" \n", " \n",
" ebfile = EbookFile(edition=edition, file=file_, format='mobi')\n", " #ebfile = EbookFile(edition=edition, file=file_, format='mobi')\n",
" ebfile.save()\n", " #ebfile.save()\n",
"\n", "\n",
" # maybe need to create an ebook pointing to ebookFile ?\n", " # maybe need to create an ebook pointing to ebookFile ?\n",
" # copy metadata from sister ebook\n", " # copy metadata from sister ebook\n",
@ -565,19 +487,48 @@
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [] "outputs": [
}, {
{ "output_type": "stream",
"cell_type": "code", "stream": "stdout",
"collapsed": false, "text": [
"input": [ "(<Edition: Option Libre (ISBN 9782953918748) Framasoft>, 140699L)\n",
"# from django.db.models import Q\n", "https://unglueit-files.s3.amazonaws.com/ebf/830cac2e0b26dfe576e6658623f6243a.epub\n",
"# Item.objects.filter(creator__in=creators)\n", "send:"
"edition.ebooks.filter(format__in=['epub', 'pdf'])[0]" ]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 'HEAD /ebf/d2a010ffe36a12ebba58f3721db814ad.mobi HTTP/1.1\\r\\nHost: ry-dev-unglueit.s3.amazonaws.com\\r\\nAccept-Encoding: identity\\r\\nDate: Wed, 17 Dec 2014 15:57:56 GMT\\r\\nContent-Length: 0\\r\\nAuthorization: AWS AKIAINIMDFN7LAY3WWKA:3Qb/Xh2ukKpn756OFfv42sKuduw=\\r\\nUser-Agent: Boto/2.8.0 (darwin)\\r\\n\\r\\n'\n",
"reply:"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 'HTTP/1.1 404 Not Found\\r\\n'\n",
"header: x-amz-request-id: 5C64AFB873D76C27\r\n",
"header: x-amz-id-2: BlGzZrubjkmomUW141X0CnBxSMqNQx4ty1+tX8wof/Kb8l1PRyLWjTn39RaEpYbF5IzFR7tfCGc=\r\n",
"header: Content-Type: application/xml\r\n",
"header: Transfer-Encoding: chunked\r\n",
"header: Date: Wed, 17 Dec 2014 15:57:55 GMT\r\n",
"header: Server: AmazonS3\r\n",
"send: 'PUT /ebf/d2a010ffe36a12ebba58f3721db814ad.mobi HTTP/1.1\\r\\nHost: ry-dev-unglueit.s3.amazonaws.com\\r\\nAccept-Encoding: identity\\r\\nContent-MD5: LLhBofTBmSbRkVZFGkZDww==\\r\\nContent-Length: 8749347\\r\\nExpect: 100-Continue\\r\\nDate: Wed, 17 Dec 2014 15:57:56 GMT\\r\\nUser-Agent: Boto/2.8.0 (darwin)\\r\\nContent-Type: application/x-mobipocket-ebook\\r\\nAuthorization: AWS AKIAINIMDFN7LAY3WWKA:+HaOsbKJd1IWoC8fELOTeuTIRWM=\\r\\nx-amz-acl: public-read\\r\\n\\r\\n'\n",
"https://ry-dev-unglueit.s3.amazonaws.com/ebf/1549a778ca974aea89bc125b83e52dcc.mobi"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
], ],
"language": "python", "prompt_number": 22
"metadata": {},
"outputs": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
@ -621,78 +572,6 @@
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [] "outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"edition.ebooks.all()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"edition.ebook_files.all()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ebf.save()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ebf.id"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ebf.edition.ebooks.all()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"edition_mobi_status(edition)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"\n",
"\n",
"ebf1= EbookFile(edition=e1, file=file_, format='mobi')"
],
"language": "python",
"metadata": {},
"outputs": []
} }
], ],
"metadata": {} "metadata": {}