diff --git a/notebooks/opds_consumption.ipynb b/notebooks/opds_consumption.ipynb new file mode 100644 index 00000000..0097c51b --- /dev/null +++ b/notebooks/opds_consumption.ipynb @@ -0,0 +1,281 @@ +{ + "metadata": { + "name": "", + "signature": "sha256:7865e16e46f11164fb7a3a9954930ceb8071cb6c6842de0bd66aba69c8147ae1" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "Let me see some examples of OPDS in the wild to see how it works:\n", + "\n", + "available feeds: https://code.google.com/p/openpub/wiki/AvailableFeeds\n", + "\n", + "let's look at archive.org, which presumably should have a good feed\n", + "\n", + "* archive.org: http://bookserver.archive.org/catalog/\n", + "* feedbooks.com: http://www.feedbooks.com/catalog.atom\n", + "* oreilly.com: http://opds.oreilly.com/opds/\n" + ] + }, + { + "cell_type": "heading", + "level": 1, + "metadata": {}, + "source": [ + "Some concepts" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "http://www.slideshare.net/fullscreen/HadrienGardeur/understanding-opds/7\n", + "\n", + "OPDS is based on\n", + "\n", + "* resources\n", + "* collections \n", + "\n", + "A collection aggregates resources.\n", + "\n", + "Two kinds of resources:\n", + "\n", + "* Navigation link \n", + "* Catalog entry \n", + "\n", + "for two kinds of collections:\n", + "\n", + "* Navigation \n", + "* Acquisition" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Acquisition scenarios" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Multiple acquisition scenarios:\n", + " \n", + "* Open Access\n", + "* Sale\n", + "* Lending\n", + "* Subscription\n", + "* Extract\n", + "* Undefined" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import requests\n", + "from lxml.etree import fromstring\n", + "\n", + "ATOM_NS = \"http://www.w3.org/2005/Atom\"\n", + "\n", + "def nsq(url, tag):\n", + " return \"{\" + url +\"}\" + tag\n", + "\n", + "url = \"http://bookserver.archive.org/catalog/\"\n", + " \n", + "r = requests.get(url)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 32 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "doc=fromstring(r.text)\n", + "doc" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 7, + "text": [ + "" + ] + } + ], + "prompt_number": 7 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# get links\n", + "# what types specified in spec?\n", + "\n", + "[link.attrib for link in doc.findall(nsq(ATOM_NS,'link'))]" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 34, + "text": [ + "[{'href': 'http://bookserver.archive.org/catalog/', 'type': 'application/atom+xml', 'rel': 'self'},\n", + " {'href': 'http://bookserver.archive.org/catalog/crawlable', 'type': 'application/atom+xml', 'rel': 'http://opds-spec.org/crawlable', 'title': 'Crawlable feed'},\n", + " {'href': 'http://bookserver.archive.org/catalog/opensearch.xml', 'type': 'application/opensearchdescription+xml', 'rel': 'search'}]" + ] + } + ], + "prompt_number": 34 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "it might be useful to use specialized libraries to handle Atom or AtomPub." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "doc.findall(nsq(ATOM_NS, \"entry\"))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 24, + "text": [ + "[,\n", + " ,\n", + " ]" + ] + } + ], + "prompt_number": 24 + }, + { + "cell_type": "heading", + "level": 1, + "metadata": {}, + "source": [ + "Atom feed generation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "https://github.com/sramana/pyatom\n", + "\n", + " pip install pyatom" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# let's try the basics of pyatom\n", + "# puzzled wwhere come from.\n", + "\n", + "from pyatom import AtomFeed\n", + "import datetime\n", + "\n", + "feed = AtomFeed(title=\"Unglue.it\",\n", + " subtitle=\"Unglue.it OPDS Navigation\",\n", + " feed_url=\"https://unglue.it/opds\",\n", + " url=\"https://unglue.it/\",\n", + " author=\"unglue.it\")\n", + "\n", + "# Do this for each feed entry\n", + "feed.add(title=\"My Post\",\n", + " content=\"Body of my post\",\n", + " content_type=\"html\",\n", + " author=\"Me\",\n", + " url=\"http://example.org/entry1\",\n", + " updated=datetime.datetime.utcnow())\n", + "\n", + "print feed.to_string()" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "\n", + " Unglue.it\n", + " https://unglue.it/opds\n", + " 2014-06-13T21:48:34Z\n", + " \n", + " \n", + " \n", + " unglue.it\n", + " \n", + " Unglue.it OPDS Navigation\n", + " PyAtom\n", + " \n", + " My Post\n", + " http://example.org/entry1\n", + " 2014-06-13T21:48:34Z\n", + " \n", + " \n", + " Me\n", + " \n", + " Body of my post\n", + " \n", + "\n", + "\n" + ] + } + ], + "prompt_number": 35 + }, + { + "cell_type": "heading", + "level": 1, + "metadata": {}, + "source": [ + "Appendix: dealing with namespaces in ElementTree" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Maybe come back to http://effbot.org/zone/element-namespaces.htm for more sophisticated ways to register namespaces." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": {} + } + ] +} \ No newline at end of file