regluit/notebooks/mobigen_integrate.ipynb

{
 "metadata": {
  "name": "",
  "signature": "sha256:c3439815c10b9795d3d95691a1edd9789672f61dc0b415cd57b8e06d0552f8a5"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# compute whether we can apply mobigen to a given edition to produce a mobi file\n",
      "# need to have an ebook in epub or pdf format \n",
      "# possible return values:  already has a mobi file / can generate a mobi file / not possible\n",
      "\n",
      "def edition_mobi_status(edition):\n",
      "    \"\"\"\n",
      "    for a given edition, return 1 if there is a mobi ebook, 0 if there is none but we have an epub or html to convert from,\n",
      "    and -1 for no epub/html to convert from\n",
      "    \"\"\"\n",
      "    formats = set([ebook.format for ebook in edition.ebooks.all()])\n",
      "    if 'mobi' in formats:\n",
      "        return 1\n",
      "    elif ('epub' in formats) or ('html' in formats):\n",
      "        return 0\n",
      "    else:\n",
      "        return -1"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from collections import Counter\n",
      "from regluit.core.models import Edition\n",
      "\n",
      "# of all the Editions with ebook, compute the \"mobi status\"\n",
      "Counter([edition_mobi_status(edition) for edition in Edition.objects.filter(ebooks__isnull=False).all()])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# looking only at Campaign related ebooks\n",
      "\n",
      "from regluit.core.models import Campaign\n",
      "\n",
      "Counter([edition_mobi_status(campaign.edition) for campaign in Campaign.objects.filter(edition__ebooks__isnull=False).distinct()])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import requests\n",
      "r = requests.get(\"https://archive.org/download/Feeding_the_City/9781909254039_Feeding_the_City.epub\", verify=False)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from regluit.core.mobigen import convert_to_mobi\n",
      "\n",
      "output = convert_to_mobi(\"https://archive.org/download/Feeding_the_City/9781909254039_Feeding_the_City.epub\")\n",
      "\n",
      "with open(\"/Users/raymondyee/Downloads/test.mobi\", \"wb\") as f:\n",
      "    f.write(output)\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "/Users/raymondyee/anaconda/envs/regluit/lib/python2.7/site-packages/requests/packages/urllib3/connectionpool.py:730: InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.org/en/latest/security.html (This warning will only appear once by default.)\n",
        "  InsecureRequestWarning)\n"
       ]
      }
     ],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!ls -lt /Users/raymondyee/Downloads/test.mobi"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# let's remind ourselves about how to write to S3 and integrate an ebook record\n",
      "# https://github.com/Gluejar/regluit/blob/36793b0b118fd97b52ab0d8637e8e34ab6d8672e/core/models.py#L1776\n",
      "\n",
      "#ebf holding the books we generate in the watermarking/custom messaging process, right?\n",
      "\n",
      "from regluit.core.models import EbookFile\n",
      "\n",
      "for ebookfile in EbookFile.objects.all():\n",
      "    print (ebookfile, ebookfile.file, ebookfile.edition.title)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# how to work with EbookFile\n",
      "# https://github.com/Gluejar/regluit/blob/792659c325a7bee2b49337408336fdeadab3464a/core/models.py#L904\n",
      "# Campaign."
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "for campaign in Campaign.objects.filter(edition__ebooks__isnull=False).distinct():\n",
      "    print (campaign.edition.title, edition_mobi_status(campaign.edition))\n",
      "    if edition_mobi_status(campaign.edition) == 0: # possible to generate mobi\n",
      "        print(campaign.edition.ebooks.filter(format='epub')[0].url)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "(u'Open Access eBooks', 1)\n",
        "(u'Oral Literature in Africa', 1)\n",
        "(u'The Third Awakening', 1)\n",
        "(u'Feeding the City', 0)\n",
        "https://archive.org/download/Feeding_the_City/9781909254039_Feeding_the_City.epub\n",
        "(u'Complex Predicates', -1)\n",
        "(u'Flatland', 1)\n",
        "(u'Green Comet', 1)\n",
        "(u'23rd Century Romance', 1)\n",
        "(u'Moebius Noodles', -1)\n",
        "(u'The Classic Short Story, 1870-1925: Theory of a Genre', -1)\n",
        "(u'The Pains', -1)\n",
        "(u'The Global Librarian', 0)\n",
        "https://unglueit-files.s3.amazonaws.com/ebf/619c98c3192c695caabdce71766e7245.epub\n",
        "(u'Heaven - The Afterlife Series I', 1)\n",
        "(u'Digitization in the Real World', -1)\n",
        "(u'Zero Sum Game', 1)"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "(u'Introduction to High Performance Scientific Computing', -1)\n",
        "(u'Libres conseils : Ce que nous aurions aime\\u0301 savoir avant de commencer', 1)\n",
        "(u'Option Libre', 0)\n",
        "https://unglueit-files.s3.amazonaws.com/ebf/830cac2e0b26dfe576e6658623f6243a.epub\n",
        "(u'Libres conseils. Ce que nous aurions aim\\xe9 savoir avant de commencer', 0)\n",
        "https://unglueit-files.s3.amazonaws.com/ebf/f35b38527140a26cf44aa37bf540f24f.epub\n"
       ]
      }
     ],
     "prompt_number": 10
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pledge, b2u, t4u"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "c = Campaign.objects.all()[0]\n",
      "c.edition.ebooks.all(), c.type"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "Ebook.objects.filter(format='epub').filter(edition__id=202594)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from regluit.core.models import Identifier\n",
      "Identifier.objects.filter(edition__isnull=False).filter(~Q(edition__work__id = F('work__id'))).count()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}