342 lines
9.5 KiB
Plaintext
342 lines
9.5 KiB
Plaintext
|
{
|
||
|
"metadata": {
|
||
|
"name": "",
|
||
|
"signature": "sha256:28329b435a70c100ca945af98844f2b43c856584f361e4fc1b5e6304e3b3c830"
|
||
|
},
|
||
|
"nbformat": 3,
|
||
|
"nbformat_minor": 0,
|
||
|
"worksheets": [
|
||
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"Let me see some examples of OPDS in the wild to see how it works:\n",
|
||
|
"\n",
|
||
|
"available feeds: https://code.google.com/p/openpub/wiki/AvailableFeeds\n",
|
||
|
"\n",
|
||
|
"let's look at archive.org, which presumably should have a good feed\n",
|
||
|
"\n",
|
||
|
"* archive.org: http://bookserver.archive.org/catalog/\n",
|
||
|
"* feedbooks.com: http://www.feedbooks.com/catalog.atom\n",
|
||
|
"* oreilly.com: http://opds.oreilly.com/opds/\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "heading",
|
||
|
"level": 1,
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"Some concepts"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"http://www.slideshare.net/fullscreen/HadrienGardeur/understanding-opds/7\n",
|
||
|
"\n",
|
||
|
"OPDS is based on\n",
|
||
|
"\n",
|
||
|
"* resources\n",
|
||
|
"* collections \n",
|
||
|
"\n",
|
||
|
"A collection aggregates resources.\n",
|
||
|
"\n",
|
||
|
"Two kinds of resources:\n",
|
||
|
"\n",
|
||
|
"* Navigation link \n",
|
||
|
"* Catalog entry \n",
|
||
|
"\n",
|
||
|
"for two kinds of collections:\n",
|
||
|
"\n",
|
||
|
"* Navigation \n",
|
||
|
"* Acquisition"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "heading",
|
||
|
"level": 2,
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"Acquisition scenarios"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"Multiple acquisition scenarios:\n",
|
||
|
" \n",
|
||
|
"* Open Access\n",
|
||
|
"* Sale\n",
|
||
|
"* Lending\n",
|
||
|
"* Subscription\n",
|
||
|
"* Extract\n",
|
||
|
"* Undefined"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"collapsed": false,
|
||
|
"input": [
|
||
|
"import requests\n",
|
||
|
"from lxml.etree import fromstring\n",
|
||
|
"\n",
|
||
|
"ATOM_NS = \"http://www.w3.org/2005/Atom\"\n",
|
||
|
"\n",
|
||
|
"def nsq(url, tag):\n",
|
||
|
" return \"{\" + url +\"}\" + tag\n",
|
||
|
"\n",
|
||
|
"url = \"http://bookserver.archive.org/catalog/\"\n",
|
||
|
" \n",
|
||
|
"r = requests.get(url)"
|
||
|
],
|
||
|
"language": "python",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"prompt_number": 32
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"collapsed": false,
|
||
|
"input": [
|
||
|
"doc=fromstring(r.text)\n",
|
||
|
"doc"
|
||
|
],
|
||
|
"language": "python",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"metadata": {},
|
||
|
"output_type": "pyout",
|
||
|
"prompt_number": 7,
|
||
|
"text": [
|
||
|
"<Element {http://www.w3.org/2005/Atom}feed at 0x1034b7320>"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"prompt_number": 7
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"collapsed": false,
|
||
|
"input": [
|
||
|
"# get links\n",
|
||
|
"# what types specified in spec?\n",
|
||
|
"\n",
|
||
|
"[link.attrib for link in doc.findall(nsq(ATOM_NS,'link'))]"
|
||
|
],
|
||
|
"language": "python",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"metadata": {},
|
||
|
"output_type": "pyout",
|
||
|
"prompt_number": 34,
|
||
|
"text": [
|
||
|
"[{'href': 'http://bookserver.archive.org/catalog/', 'type': 'application/atom+xml', 'rel': 'self'},\n",
|
||
|
" {'href': 'http://bookserver.archive.org/catalog/crawlable', 'type': 'application/atom+xml', 'rel': 'http://opds-spec.org/crawlable', 'title': 'Crawlable feed'},\n",
|
||
|
" {'href': 'http://bookserver.archive.org/catalog/opensearch.xml', 'type': 'application/opensearchdescription+xml', 'rel': 'search'}]"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"prompt_number": 34
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"it might be useful to use specialized libraries to handle Atom or AtomPub."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"collapsed": false,
|
||
|
"input": [
|
||
|
"doc.findall(nsq(ATOM_NS, \"entry\"))"
|
||
|
],
|
||
|
"language": "python",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"metadata": {},
|
||
|
"output_type": "pyout",
|
||
|
"prompt_number": 24,
|
||
|
"text": [
|
||
|
"[<Element {http://www.w3.org/2005/Atom}entry at 0x10442d128>,\n",
|
||
|
" <Element {http://www.w3.org/2005/Atom}entry at 0x10442d098>,\n",
|
||
|
" <Element {http://www.w3.org/2005/Atom}entry at 0x10442d050>]"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"prompt_number": 24
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "heading",
|
||
|
"level": 1,
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"Atom feed generation"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"https://github.com/sramana/pyatom\n",
|
||
|
"\n",
|
||
|
" pip install pyatom"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"collapsed": false,
|
||
|
"input": [
|
||
|
"# let's try the basics of pyatom\n",
|
||
|
"# puzzled wwhere <links> come from.\n",
|
||
|
"\n",
|
||
|
"from pyatom import AtomFeed\n",
|
||
|
"import datetime\n",
|
||
|
"\n",
|
||
|
"feed = AtomFeed(title=\"Unglue.it\",\n",
|
||
|
" subtitle=\"Unglue.it OPDS Navigation\",\n",
|
||
|
" feed_url=\"https://unglue.it/opds\",\n",
|
||
|
" url=\"https://unglue.it/\",\n",
|
||
|
" author=\"unglue.it\")\n",
|
||
|
"\n",
|
||
|
"# Do this for each feed entry\n",
|
||
|
"feed.add(title=\"My Post\",\n",
|
||
|
" content=\"Body of my post\",\n",
|
||
|
" content_type=\"html\",\n",
|
||
|
" author=\"Me\",\n",
|
||
|
" url=\"http://example.org/entry1\",\n",
|
||
|
" updated=datetime.datetime.utcnow())\n",
|
||
|
"\n",
|
||
|
"print feed.to_string()"
|
||
|
],
|
||
|
"language": "python",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "stream",
|
||
|
"stream": "stdout",
|
||
|
"text": [
|
||
|
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n",
|
||
|
"<feed xmlns=\"http://www.w3.org/2005/Atom\">\n",
|
||
|
" <title type=\"text\">Unglue.it</title>\n",
|
||
|
" <id>https://unglue.it/opds</id>\n",
|
||
|
" <updated>2014-06-13T21:48:34Z</updated>\n",
|
||
|
" <link href=\"https://unglue.it/\" />\n",
|
||
|
" <link href=\"https://unglue.it/opds\" rel=\"self\" />\n",
|
||
|
" <author>\n",
|
||
|
" <name>unglue.it</name>\n",
|
||
|
" </author>\n",
|
||
|
" <subtitle type=\"text\">Unglue.it OPDS Navigation</subtitle>\n",
|
||
|
" <generator>PyAtom</generator>\n",
|
||
|
" <entry xml:base=\"https://unglue.it/opds\">\n",
|
||
|
" <title type=\"text\">My Post</title>\n",
|
||
|
" <id>http://example.org/entry1</id>\n",
|
||
|
" <updated>2014-06-13T21:48:34Z</updated>\n",
|
||
|
" <link href=\"http://example.org/entry1\" />\n",
|
||
|
" <author>\n",
|
||
|
" <name>Me</name>\n",
|
||
|
" </author>\n",
|
||
|
" <content type=\"html\">Body of my post</content>\n",
|
||
|
" </entry>\n",
|
||
|
"</feed>\n",
|
||
|
"\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"prompt_number": 35
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "heading",
|
||
|
"level": 1,
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"Creating navigation feed"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"template: https://gist.github.com/rdhyee/94d82f6639809fb7796f#file-unglueit_nav_opds-xml"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"collapsed": false,
|
||
|
"input": [
|
||
|
"from lxml import etree\n",
|
||
|
"\n",
|
||
|
"feed_xml = \"\"\"<feed xmlns:dcterms=\"http://purl.org/dc/terms/\" \n",
|
||
|
" xmlns:opds=\"http://opds-spec.org/\"\n",
|
||
|
" xmlns=\"http://www.w3.org/2005/Atom\"\n",
|
||
|
" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n",
|
||
|
" xsi:noNamespaceSchemaLocation=\"http://www.kbcafe.com/rss/atom.xsd.xml\"\n",
|
||
|
" xsi:schemaLocation=\"http://purl.org/dc/elements/1.1/ http://dublincore.org/schemas/xmls/qdc/2008/02/11/dc.xsd http://purl.org/dc/terms/ http://dublincore.org/schemas/xmls/qdc/2008/02/11/dcterms.xsd\"/>\"\"\"\n",
|
||
|
"\n",
|
||
|
"feed = etree.fromstring(feed_xml)\n",
|
||
|
"\n",
|
||
|
"# add title\n",
|
||
|
"\n",
|
||
|
"title_node = etree.Element(\"title\")\n",
|
||
|
"title_node.text = \"Unglue.it Catalog\"\n",
|
||
|
"feed.append(title_node)\n",
|
||
|
"\n",
|
||
|
"# id \n",
|
||
|
"\n",
|
||
|
"id_node = etree.Element(\"id\")\n",
|
||
|
"id_node.text = \"https://unglue.it/opds\"\n",
|
||
|
"feed.append(id_node)\n",
|
||
|
"\n",
|
||
|
"print etree.tostring(feed, pretty_print=True)\n"
|
||
|
],
|
||
|
"language": "python",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "stream",
|
||
|
"stream": "stdout",
|
||
|
"text": [
|
||
|
"<feed xmlns:dcterms=\"http://purl.org/dc/terms/\" xmlns:opds=\"http://opds-spec.org/\" xmlns=\"http://www.w3.org/2005/Atom\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:noNamespaceSchemaLocation=\"http://www.kbcafe.com/rss/atom.xsd.xml\" xsi:schemaLocation=\"http://purl.org/dc/elements/1.1/ http://dublincore.org/schemas/xmls/qdc/2008/02/11/dc.xsd http://purl.org/dc/terms/ http://dublincore.org/schemas/xmls/qdc/2008/02/11/dcterms.xsd\">\n",
|
||
|
" <title>Unglue.it Catalog</title>\n",
|
||
|
" <id>https://unglue.it/opds</id>\n",
|
||
|
"</feed>\n",
|
||
|
"\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"prompt_number": 18
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "heading",
|
||
|
"level": 1,
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"Appendix: dealing with namespaces in ElementTree"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"Maybe come back to http://effbot.org/zone/element-namespaces.htm for more sophisticated ways to register namespaces."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": []
|
||
|
}
|
||
|
],
|
||
|
"metadata": {}
|
||
|
}
|
||
|
]
|
||
|
}
|