580 lines
17 KiB
Plaintext
580 lines
17 KiB
Plaintext
{
|
|
"metadata": {
|
|
"name": "",
|
|
"signature": "sha256:f02ae8a8cd487879963980d4c8030c6f3082ce0cdb55608e6ae28290f1d2fb8c"
|
|
},
|
|
"nbformat": 3,
|
|
"nbformat_minor": 0,
|
|
"worksheets": [
|
|
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"# compute whether we can apply mobigen to a given edition to produce a mobi file\n",
|
|
"# need to have an ebook in epub or pdf format \n",
|
|
"# possible return values: already has a mobi file / can generate a mobi file / not possible\n",
|
|
"\n",
|
|
"def edition_mobi_status(edition):\n",
|
|
" \"\"\"\n",
|
|
" for a given edition, return 1 if there is a mobi ebook, 0 if there is none but we have an epub or html to convert from,\n",
|
|
" and -1 for no epub/html to convert from\n",
|
|
" \"\"\"\n",
|
|
" formats = set([ebook.format for ebook in edition.work.ebooks()])\n",
|
|
" if 'mobi' in formats:\n",
|
|
" return 1\n",
|
|
" elif ('epub' in formats) or ('html' in formats):\n",
|
|
" return 0\n",
|
|
" else:\n",
|
|
" return -1"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"prompt_number": 3
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"# generator for editions to add mobi to\n",
|
|
"# campaigns that can have mobi files but don't yet.\n",
|
|
"\n",
|
|
"def editions_to_convert():\n",
|
|
" for campaign in Campaign.objects.filter(edition__ebooks__isnull=False).distinct():\n",
|
|
" if edition_mobi_status(campaign.edition) == 0: # possible to generate mobi\n",
|
|
" yield campaign.edition\n",
|
|
" \n",
|
|
" \n",
|
|
"list(editions_to_convert())"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 8,
|
|
"text": [
|
|
"[<Edition: The Global Librarian (ISBN 9781629218335) Metropolitan New York Library Council>,\n",
|
|
" <Edition: Option Libre (ISBN 9782953918748) Framasoft>,\n",
|
|
" <Edition: Libres conseils. Ce que nous aurions aim\u00e9 savoir avant de commencer (ISBN 9781092674041) Framasoft>]"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 8
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"# http://127.0.0.1:8000/work/138133/ --> The Global Librarian\n",
|
|
"\n",
|
|
"from itertools import islice\n",
|
|
"edition = list(islice(editions_to_convert(),1))[0]\n",
|
|
"edition.work.ebooks(), edition.work.id"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 9,
|
|
"text": [
|
|
"([<Ebook: The Global Librarian (epub from Unglue.it)>], 138133L)"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 9
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"# sample code to use convert_to_mobi \n",
|
|
"# write output to file system\n",
|
|
"\n",
|
|
"from regluit.core.mobigen import convert_to_mobi\n",
|
|
"\n",
|
|
"output = convert_to_mobi(\"https://archive.org/download/Feeding_the_City/9781909254039_Feeding_the_City.epub\")\n",
|
|
"\n",
|
|
"with open(\"/Users/raymondyee/Downloads/test.mobi\", \"wb\") as f:\n",
|
|
" f.write(output)\n"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"# how to work with EbookFile\n",
|
|
"# https://github.com/Gluejar/regluit/blob/792659c325a7bee2b49337408336fdeadab3464a/core/models.py#L904\n",
|
|
"# Campaign."
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "heading",
|
|
"level": 1,
|
|
"metadata": {},
|
|
"source": [
|
|
"Using the low level Django file storage API: File Storage API"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"I need to remind myself about how the [File Storage API](https://docs.djangoproject.com/en/1.4/ref/files/storage/) works.\n",
|
|
"\n",
|
|
"We can use `default_storage` directly to read, write [file storage objects](https://docs.djangoproject.com/en/1.4/topics/files/#storage-objects) and to test for existence.\n",
|
|
"\n",
|
|
"I'm a bit unclear about the relevance of [FileField](https://docs.djangoproject.com/en/1.4/ref/models/fields/#filefield).\n",
|
|
"\n",
|
|
"And how does Ebookfile work? Look at https://github.com/Gluejar/regluit/blob/f7b796c6a6d220f6475dbfdc0a8aeb16a09e84b1/core/models.py#L1777:\n",
|
|
"\n",
|
|
"```python\n",
|
|
"class EbookFile(models.Model):\n",
|
|
" file = models.FileField(upload_to=path_for_file)\n",
|
|
"``` \n",
|
|
"\n",
|
|
"I should be able to find hints about how to instantiate an `EbookFile` in the right way."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"BTW, is there a tension between the standard API and extras that are being used by S3storages https://django-storages.readthedocs.org/en/latest/backends/amazon-S3.html?"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"from django.core.files.storage import default_storage\n",
|
|
"from django.core.files.base import ContentFile, File"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"prompt_number": 10
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"default_storage.listdir(\"/\")"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 20,
|
|
"text": [
|
|
"([u'ebf', u'Users', u'doab', u'marc_test'], [u'storage_test'])"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 20
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"# getting key pairs\n",
|
|
"default_storage.bucket.get_all_keys()"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "heading",
|
|
"level": 1,
|
|
"metadata": {},
|
|
"source": [
|
|
"Sample code for using default_storage"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"default_storage.exists('storage_test')"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"file = default_storage.open('storage_test', 'w')\n",
|
|
"file.write('storage contents')\n",
|
|
"file.close()\n",
|
|
"\n",
|
|
"default_storage.exists('storage_test')"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"file."
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"file = default_storage.open('storage_test', 'r')\n",
|
|
"file.read()\n",
|
|
"file.close()"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"default_storage.delete('storage_test')\n",
|
|
"default_storage.exists('storage_test')"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"# clean up some files \n",
|
|
"\n",
|
|
"print (default_storage.bucket)\n",
|
|
"\n",
|
|
"for key in default_storage.listdir(\"/ebf\")[1]:\n",
|
|
" print default_storage.delete(\"/ebf/\" + key)\n"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "heading",
|
|
"level": 1,
|
|
"metadata": {},
|
|
"source": [
|
|
"How to write the results of the conversion"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"from regluit.core.models import EbookFile\n",
|
|
"from django.core.files.storage import default_storage\n",
|
|
"from django.core.files.base import ContentFile, File\n",
|
|
"\n",
|
|
"# http://stackoverflow.com/a/519653\n",
|
|
"\n",
|
|
"def read_in_chunks(file_object, chunk_size=1024):\n",
|
|
" \"\"\"Lazy function (generator) to read a file piece by piece.\n",
|
|
" Default chunk size: 1k.\"\"\"\n",
|
|
" while True:\n",
|
|
" data = file_object.read(chunk_size)\n",
|
|
" if not data:\n",
|
|
" break\n",
|
|
" yield data"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"prompt_number": 12
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"def write_file_to_storage(file_object, content_type, path):\n",
|
|
" \"\"\"\n",
|
|
" write file_object to the default_storage at given path\n",
|
|
" \"\"\"\n",
|
|
" file_s3 = ContentFile(file_object)\n",
|
|
" file_s3.content_type = content_type\n",
|
|
" \n",
|
|
" default_storage.save(path, file_s3)\n",
|
|
" return file_s3"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"prompt_number": 13
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"import uuid\n",
|
|
"\n",
|
|
"file_ = write_file_to_storage(open(\"/Users/raymondyee/Downloads/hello.mobi\").read(), \n",
|
|
" \"application/x-mobipocket-ebook\", \n",
|
|
" \"/ebf/{0}.mobi\".format(uuid.uuid4().get_hex()))\n",
|
|
"\n"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"prompt_number": 14
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"file_.name"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 15,
|
|
"text": [
|
|
"'/ebf/304dbd385e384e6cbe9fdec019004b69.mobi'"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 15
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"default_storage.url(file_.name)"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 16,
|
|
"text": [
|
|
"'https://ry-dev-unglueit.s3.amazonaws.com/ebf/304dbd385e384e6cbe9fdec019004b69.mobi'"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 16
|
|
},
|
|
{
|
|
"cell_type": "heading",
|
|
"level": 1,
|
|
"metadata": {},
|
|
"source": [
|
|
"Sample: write a sample mobi file as an Ebook for a campaign book"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"for campaign in Campaign.objects.filter(edition__ebooks__isnull=False).distinct():\n",
|
|
" #print (campaign.edition.title, edition_mobi_status(campaign.edition))\n",
|
|
" if edition_mobi_status(campaign.edition) == 0: # possible to generate mobi\n",
|
|
" print(campaign.edition.title, campaign.edition.ebooks.filter(format='epub')[0].url)"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"stream": "stdout",
|
|
"text": [
|
|
"(u'The Global Librarian', u'https://unglueit-files.s3.amazonaws.com/ebf/619c98c3192c695caabdce71766e7245.epub')\n",
|
|
"(u'Option Libre', u'https://unglueit-files.s3.amazonaws.com/ebf/830cac2e0b26dfe576e6658623f6243a.epub')\n",
|
|
"(u'Libres conseils. Ce que nous aurions aim\\xe9 savoir avant de commencer', u'https://unglueit-files.s3.amazonaws.com/ebf/f35b38527140a26cf44aa37bf540f24f.epub')\n"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 17
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"list(editions_to_convert())"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 18,
|
|
"text": [
|
|
"[<Edition: The Global Librarian (ISBN 9781629218335) Metropolitan New York Library Council>,\n",
|
|
" <Edition: Option Libre (ISBN 9782953918748) Framasoft>,\n",
|
|
" <Edition: Libres conseils. Ce que nous aurions aim\u00e9 savoir avant de commencer (ISBN 9781092674041) Framasoft>]"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 18
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"from itertools import islice\n",
|
|
"from StringIO import StringIO\n",
|
|
"\n",
|
|
"from regluit.core.mobigen import convert_to_mobi\n",
|
|
"\n",
|
|
"import uuid\n",
|
|
"\n",
|
|
"for edition in islice(editions_to_convert(),1):\n",
|
|
" print (edition, edition.work.id)\n",
|
|
" \n",
|
|
" # pull out the sister edition to convert from\n",
|
|
" sister_ebook = edition.ebooks.filter(format__in=['epub', 'html'])[0]\n",
|
|
" \n",
|
|
" # run the conversion process\n",
|
|
" \n",
|
|
" print(sister_ebook.url)\n",
|
|
"\n",
|
|
" #output = convert_to_mobi(sister_ebook.url)\n",
|
|
" output = open(\"/Users/raymondyee/Downloads/hello.mobi\").read()\n",
|
|
" \n",
|
|
" file_ = write_file_to_storage(output, \n",
|
|
" \"application/x-mobipocket-ebook\", \n",
|
|
" \"/ebf/{0}.mobi\".format(uuid.uuid4().get_hex()))\n",
|
|
" \n",
|
|
" # create a path for the ebookfile: IS THIS NECESSARY?\n",
|
|
" # https://github.com/Gluejar/regluit/blob/25dcb06f464dc11b5e589ab6859dfcc487f8f3ef/core/models.py#L1771\n",
|
|
" \n",
|
|
" #ebfile = EbookFile(edition=edition, file=file_, format='mobi')\n",
|
|
" #ebfile.save()\n",
|
|
"\n",
|
|
" # maybe need to create an ebook pointing to ebookFile ?\n",
|
|
" # copy metadata from sister ebook\n",
|
|
" \n",
|
|
" ebfile_url = default_storage.url(file_.name)\n",
|
|
" print (ebfile_url)\n",
|
|
" \n",
|
|
" ebook = Ebook(url=ebfile_url,\n",
|
|
" format=\"mobi\", \n",
|
|
" provider=\"Unglue.it\",\n",
|
|
" rights=sister_ebook.rights, \n",
|
|
" edition=edition)\n",
|
|
" ebook.save()"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"stream": "stdout",
|
|
"text": [
|
|
"(<Edition: Option Libre (ISBN 9782953918748) Framasoft>, 140699L)\n",
|
|
"https://unglueit-files.s3.amazonaws.com/ebf/830cac2e0b26dfe576e6658623f6243a.epub\n",
|
|
"send:"
|
|
]
|
|
},
|
|
{
|
|
"output_type": "stream",
|
|
"stream": "stdout",
|
|
"text": [
|
|
" 'HEAD /ebf/d2a010ffe36a12ebba58f3721db814ad.mobi HTTP/1.1\\r\\nHost: ry-dev-unglueit.s3.amazonaws.com\\r\\nAccept-Encoding: identity\\r\\nDate: Wed, 17 Dec 2014 15:57:56 GMT\\r\\nContent-Length: 0\\r\\nAuthorization: AWS AKIAINIMDFN7LAY3WWKA:3Qb/Xh2ukKpn756OFfv42sKuduw=\\r\\nUser-Agent: Boto/2.8.0 (darwin)\\r\\n\\r\\n'\n",
|
|
"reply:"
|
|
]
|
|
},
|
|
{
|
|
"output_type": "stream",
|
|
"stream": "stdout",
|
|
"text": [
|
|
" 'HTTP/1.1 404 Not Found\\r\\n'\n",
|
|
"header: x-amz-request-id: 5C64AFB873D76C27\r\n",
|
|
"header: x-amz-id-2: BlGzZrubjkmomUW141X0CnBxSMqNQx4ty1+tX8wof/Kb8l1PRyLWjTn39RaEpYbF5IzFR7tfCGc=\r\n",
|
|
"header: Content-Type: application/xml\r\n",
|
|
"header: Transfer-Encoding: chunked\r\n",
|
|
"header: Date: Wed, 17 Dec 2014 15:57:55 GMT\r\n",
|
|
"header: Server: AmazonS3\r\n",
|
|
"send: 'PUT /ebf/d2a010ffe36a12ebba58f3721db814ad.mobi HTTP/1.1\\r\\nHost: ry-dev-unglueit.s3.amazonaws.com\\r\\nAccept-Encoding: identity\\r\\nContent-MD5: LLhBofTBmSbRkVZFGkZDww==\\r\\nContent-Length: 8749347\\r\\nExpect: 100-Continue\\r\\nDate: Wed, 17 Dec 2014 15:57:56 GMT\\r\\nUser-Agent: Boto/2.8.0 (darwin)\\r\\nContent-Type: application/x-mobipocket-ebook\\r\\nAuthorization: AWS AKIAINIMDFN7LAY3WWKA:+HaOsbKJd1IWoC8fELOTeuTIRWM=\\r\\nx-amz-acl: public-read\\r\\n\\r\\n'\n",
|
|
"https://ry-dev-unglueit.s3.amazonaws.com/ebf/1549a778ca974aea89bc125b83e52dcc.mobi"
|
|
]
|
|
},
|
|
{
|
|
"output_type": "stream",
|
|
"stream": "stdout",
|
|
"text": [
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 22
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"edition_mobi_status(edition)"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"# check connection between edition and Ebook, Ebookfile before creating mobi.\n",
|
|
"[(ebook.id, edition.ebooks.all(), edition.ebook_files.all()) for ebook in edition.ebooks.all()]"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"[(eb.url, eb.format, eb.provider, eb.rights, eb.edition) for eb in edition.ebooks.all()]"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"# checking on relationship between Ebookfile and Ebook\n",
|
|
"\n",
|
|
"[ebf for ebf in EbookFile.objects.all() if ebf.active]"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
}
|
|
],
|
|
"metadata": {}
|
|
}
|
|
]
|
|
} |