{ "metadata": { "name": "", "signature": "sha256:c3439815c10b9795d3d95691a1edd9789672f61dc0b415cd57b8e06d0552f8a5" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "# compute whether we can apply mobigen to a given edition to produce a mobi file\n", "# need to have an ebook in epub or pdf format \n", "# possible return values: already has a mobi file / can generate a mobi file / not possible\n", "\n", "def edition_mobi_status(edition):\n", " \"\"\"\n", " for a given edition, return 1 if there is a mobi ebook, 0 if there is none but we have an epub or html to convert from,\n", " and -1 for no epub/html to convert from\n", " \"\"\"\n", " formats = set([ebook.format for ebook in edition.ebooks.all()])\n", " if 'mobi' in formats:\n", " return 1\n", " elif ('epub' in formats) or ('html' in formats):\n", " return 0\n", " else:\n", " return -1" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "from collections import Counter\n", "from regluit.core.models import Edition\n", "\n", "# of all the Editions with ebook, compute the \"mobi status\"\n", "Counter([edition_mobi_status(edition) for edition in Edition.objects.filter(ebooks__isnull=False).all()])" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "# looking only at Campaign related ebooks\n", "\n", "from regluit.core.models import Campaign\n", "\n", "Counter([edition_mobi_status(campaign.edition) for campaign in Campaign.objects.filter(edition__ebooks__isnull=False).distinct()])" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "import requests\n", "r = requests.get(\"https://archive.org/download/Feeding_the_City/9781909254039_Feeding_the_City.epub\", verify=False)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "from regluit.core.mobigen import convert_to_mobi\n", "\n", "output = convert_to_mobi(\"https://archive.org/download/Feeding_the_City/9781909254039_Feeding_the_City.epub\")\n", "\n", "with open(\"/Users/raymondyee/Downloads/test.mobi\", \"wb\") as f:\n", " f.write(output)\n" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ "/Users/raymondyee/anaconda/envs/regluit/lib/python2.7/site-packages/requests/packages/urllib3/connectionpool.py:730: InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.org/en/latest/security.html (This warning will only appear once by default.)\n", " InsecureRequestWarning)\n" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "!ls -lt /Users/raymondyee/Downloads/test.mobi" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "# let's remind ourselves about how to write to S3 and integrate an ebook record\n", "# https://github.com/Gluejar/regluit/blob/36793b0b118fd97b52ab0d8637e8e34ab6d8672e/core/models.py#L1776\n", "\n", "#ebf holding the books we generate in the watermarking/custom messaging process, right?\n", "\n", "from regluit.core.models import EbookFile\n", "\n", "for ebookfile in EbookFile.objects.all():\n", " print (ebookfile, ebookfile.file, ebookfile.edition.title)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "# how to work with EbookFile\n", "# https://github.com/Gluejar/regluit/blob/792659c325a7bee2b49337408336fdeadab3464a/core/models.py#L904\n", "# Campaign." ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "for campaign in Campaign.objects.filter(edition__ebooks__isnull=False).distinct():\n", " print (campaign.edition.title, edition_mobi_status(campaign.edition))\n", " if edition_mobi_status(campaign.edition) == 0: # possible to generate mobi\n", " print(campaign.edition.ebooks.filter(format='epub')[0].url)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "(u'Open Access eBooks', 1)\n", "(u'Oral Literature in Africa', 1)\n", "(u'The Third Awakening', 1)\n", "(u'Feeding the City', 0)\n", "https://archive.org/download/Feeding_the_City/9781909254039_Feeding_the_City.epub\n", "(u'Complex Predicates', -1)\n", "(u'Flatland', 1)\n", "(u'Green Comet', 1)\n", "(u'23rd Century Romance', 1)\n", "(u'Moebius Noodles', -1)\n", "(u'The Classic Short Story, 1870-1925: Theory of a Genre', -1)\n", "(u'The Pains', -1)\n", "(u'The Global Librarian', 0)\n", "https://unglueit-files.s3.amazonaws.com/ebf/619c98c3192c695caabdce71766e7245.epub\n", "(u'Heaven - The Afterlife Series I', 1)\n", "(u'Digitization in the Real World', -1)\n", "(u'Zero Sum Game', 1)" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "(u'Introduction to High Performance Scientific Computing', -1)\n", "(u'Libres conseils : Ce que nous aurions aime\\u0301 savoir avant de commencer', 1)\n", "(u'Option Libre', 0)\n", "https://unglueit-files.s3.amazonaws.com/ebf/830cac2e0b26dfe576e6658623f6243a.epub\n", "(u'Libres conseils. Ce que nous aurions aim\\xe9 savoir avant de commencer', 0)\n", "https://unglueit-files.s3.amazonaws.com/ebf/f35b38527140a26cf44aa37bf540f24f.epub\n" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "pledge, b2u, t4u" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "c = Campaign.objects.all()[0]\n", "c.edition.ebooks.all(), c.type" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "Ebook.objects.filter(format='epub').filter(edition__id=202594)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "from regluit.core.models import Identifier\n", "Identifier.objects.filter(edition__isnull=False).filter(~Q(edition__work__id = F('work__id'))).count()" ], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }