makes a valid file!

validator at
https://www.readyet.net/tools/onixFileValidation/3
pull/1/head
eric 2015-08-26 21:35:29 -04:00
parent 40cdd1b141
commit 2a3a1546fd
6 changed files with 23618 additions and 1 deletions

290
api/crosswalks.py Normal file
View File

@ -0,0 +1,290 @@
relator_contrib = {
"act" : "E01",
"adp" : "B05",
"adp" : "B99",
"aft" : "A19",
"aft" : "A22",
"ant" : "A37",
"arr" : "B25",
"art" : "A07",
"aui" : "A15",
"aui" : "A16",
"aui" : "A23",
"aui" : "A24",
"aui" : "A29",
"aus" : "A03",
"aut" : "A01",
"aut" : "A38",
"clb" : "A33",
"cmm" : "A21",
"cmm" : "E04",
"cmp" : "A06",
"cnd" : "D03",
"com" : "C01",
"com" : "C02",
"com" : "C99",
"cov" : "A36",
"cre" : "A09",
"cre" : "A10",
"cre" : "A99",
"ctb" : "A02",
"ctb" : "A14",
"ctb" : "A17",
"ctb" : "A18",
"ctb" : "A20",
"ctb" : "A25",
"ctb" : "A27",
"ctb" : "A32",
"ctb" : "A34",
"ctg" : "A39",
"cwt" : "A21",
"dnc" : "E02",
"drt" : "D02",
"drt" : "D99",
"dsr" : "A11",
"edt" : "B01",
"edt" : "B02",
"edt" : "B04",
"edt" : "B09",
"edt" : "B12",
"edt" : "B13",
"edt" : "B14",
"edt" : "B15",
"edt" : "B16",
"edt" : "B19",
"edt" : "B20",
"edt" : "B21",
"edt" : "B23",
"edt" : "B24",
"ill" : "A12",
"ill" : "A35",
"ill" : "A40",
"itr" : "E06",
"ive" : "A44",
"ivr" : "A43",
"lbt" : "A04",
"lyr" : "A05",
"lyr" : "A31",
"nrt" : "B03",
"nrt" : "B07",
"nrt" : "E03",
"nrt" : "E07",
"pbd" : "B11",
"pbl" : "B18",
"pht" : "A08",
"pht" : "A13",
"pht" : "F01",
"prf" : "E08",
"prf" : "E99",
"prg" : "A30",
"pro" : "D01",
"trl" : "B06",
"trl" : "B08",
"trl" : "B10",
"voc" : "E05",
}
iso639 = {
"aa" : "aar",
"ab" : "abk",
"ae" : "ave",
"af" : "afr",
"ak" : "aka",
"am" : "amh",
"an" : "arg",
"ar" : "ara",
"as" : "asm",
"av" : "ava",
"ay" : "aym",
"az" : "aze",
"ba" : "bak",
"be" : "bel",
"bg" : "bul",
"bh" : "bih",
"bi" : "bis",
"bm" : "bam",
"bn" : "ben",
"bo" : "tib",
"br" : "bre",
"bs" : "bos",
"ca" : "cat",
"ce" : "che",
"ch" : "cha",
"co" : "cos",
"cr" : "cre",
"cs" : "cze",
"cs" : "cze",
"cu" : "chu",
"cv" : "chv",
"cy" : "wel",
"cy" : "wel",
"da" : "dan",
"de" : "ger",
"de" : "ger",
"dv" : "div",
"dz" : "dzo",
"ee" : "ewe",
"el" : "gre",
"el" : "gre",
"en" : "eng",
"eo" : "epo",
"es" : "spa",
"et" : "est",
"eu" : "baq",
"eu" : "baq",
"fa" : "per",
"fa" : "per",
"ff" : "ful",
"fi" : "fin",
"fj" : "fij",
"fo" : "fao",
"fr" : "fre",
"fr" : "fre",
"fy" : "fry",
"ga" : "gle",
"gd" : "gla",
"gl" : "glg",
"gn" : "grn",
"gu" : "guj",
"gv" : "glv",
"ha" : "hau",
"he" : "heb",
"hi" : "hin",
"ho" : "hmo",
"hr" : "hrv",
"ht" : "hat",
"hu" : "hun",
"hy" : "arm ",
"hy" : "arm",
"hz" : "her",
"ia" : "ina",
"id" : "ind",
"ie" : "ile",
"ig" : "ibo",
"ii" : "iii",
"ik" : "ipk",
"io" : "ido",
"is" : "ice",
"is" : "ice",
"it" : "ita",
"iu" : "iku",
"ja" : "jpn",
"jv" : "jav",
"ka" : "geo",
"ka" : "geo",
"kg" : "kon",
"ki" : "kik",
"kj" : "kua",
"kk" : "kaz",
"kl" : "kal",
"km" : "khm",
"kn" : "kan",
"ko" : "kor",
"kr" : "kau",
"ks" : "kas",
"ku" : "kur",
"kv" : "kom",
"kw" : "cor",
"ky" : "kir",
"la" : "lat",
"lb" : "ltz",
"lg" : "lug",
"li" : "lim",
"ln" : "lin",
"lo" : "lao",
"lt" : "lit",
"lu" : "lub",
"lv" : "lav",
"mg" : "mlg",
"mh" : "mah",
"mi" : "mao",
"mi" : "mao",
"mk" : "mac",
"mk" : "mac",
"ml" : "mal",
"mn" : "mon",
"mr" : "mar",
"ms" : "may",
"ms" : "may",
"mt" : "mlt",
"my" : "bur",
"my" : "bur",
"na" : "nau",
"nb" : "nob",
"nd" : "nde",
"ne" : "nep",
"ng" : "ndo",
"nl" : "dut",
"nl" : "dut",
"nn" : "nno",
"no" : "nor",
"nr" : "nbl",
"nv" : "nav",
"ny" : "nya",
"oc" : "oci",
"oj" : "oji",
"om" : "orm",
"or" : "ori",
"os" : "oss",
"pa" : "pan",
"pi" : "pli",
"pl" : "pol",
"ps" : "pus",
"pt" : "por",
"qu" : "que",
"rm" : "roh",
"rn" : "run",
"ro" : "rum",
"ro" : "rum",
"ru" : "rus",
"rw" : "kin",
"sa" : "san",
"sc" : "srd",
"sd" : "snd",
"se" : "sme",
"sg" : "sag",
"si" : "sin",
"sk" : "slo",
"sk" : "slo",
"sl" : "slv",
"sm" : "smo",
"sn" : "sna",
"so" : "som",
"sq" : "alb ",
"sq" : "alb",
"sr" : "srp",
"ss" : "ssw",
"st" : "sot",
"su" : "sun",
"sv" : "swe",
"sw" : "swa",
"ta" : "tam",
"te" : "tel",
"tg" : "tgk",
"th" : "tha",
"ti" : "tir",
"tk" : "tuk",
"tl" : "tgl",
"tn" : "tsn",
"to" : "ton",
"tr" : "tur",
"ts" : "tso",
"tt" : "tat",
"tw" : "twi",
"ty" : "tah",
"ug" : "uig",
"uk" : "ukr",
"ur" : "urd",
"uz" : "uzb",
"ve" : "ven",
"vi" : "vie",
"vo" : "vol",
"wa" : "wln",
"wo" : "wol",
"xh" : "xho",
"yi" : "yid",
"yo" : "yor",
"za" : "zha",
"zh" : "chi",
"zh" : "chi",
"zu" : "zul",
}

175
api/onix.py Normal file
View File

@ -0,0 +1,175 @@
import datetime
import pytz
from lxml import etree
from regluit.core import models
from regluit.core.cc import ccinfo
from regluit.bisac import Bisac
from .crosswalks import relator_contrib, iso639
feed_xml = """<?xml version="1.0" encoding="UTF-8"?>
<ONIXMessage release="3.0" xmlns="http://ns.editeur.org/onix/3.0/reference" />
"""
bisac = Bisac()
def text_node(tag, text, attrib={}):
node = etree.Element(tag, attrib=attrib)
node.text = text
return node
def onix_feed(facet, page=None, order_by='newest'):
feed = etree.fromstring(feed_xml)
feed.append(header(facet))
works = islice(facet.works, 10 * page, 10 * page + 10) if page>0 else facet.works
for work in works:
for edition in models.Edition.objects.filter(work=work,ebooks__isnull=False).distinct():
feed.append(product(edition))
return etree.tostring(feed, pretty_print=True)
def onix_feed_for_work(work):
feed = etree.fromstring(feed_xml)
feed.append(header(work))
for edition in models.Edition.objects.filter(work=work,ebooks__isnull=False).distinct():
feed.append(product(edition))
return etree.tostring(feed, pretty_print=True)
def header(facet=None):
header_node = etree.Element("Header")
sender_node = etree.Element("Sender")
sender_node.append(text_node("SenderName", "unglue.it"))
sender_node.append(text_node("EmailAddress", "support@gluejar.com"))
header_node.append(sender_node)
header_node.append(text_node("SentDateTime", pytz.utc.localize(datetime.datetime.utcnow()).strftime('%Y%m%dT%H%M%SZ')))
header_node.append(text_node("MessageNote", facet.title if facet else "Unglue.it Editions"))
return header_node
def product(edition):
print edition.id
work=edition.work
product_node = etree.Element("Product")
product_node.append(text_node("RecordReference", "it.unglue.work.%s.%s" % (work.id, edition.id)))
product_node.append(text_node("NotificationType", "03" )) # final
ident_node = etree.SubElement(product_node, "ProductIdentifier")
ident_node.append(text_node("ProductIDType", "01" )) #proprietary
ident_node.append(text_node("IDTypeName", "unglue.it edition id" )) #proprietary
ident_node.append(text_node("IDValue", unicode(edition.id) ))
if edition.isbn_13:
ident_node = etree.SubElement(product_node, "ProductIdentifier")
ident_node.append(text_node("ProductIDType", "03" )) #proprietary
ident_node.append(text_node("IDValue", edition.isbn_13 ))
# Descriptive Detail Block
descriptive_node = etree.SubElement(product_node, "DescriptiveDetail")
descriptive_node.append(text_node("ProductComposition", "00" )) # single item
descriptive_node.append(text_node("ProductForm", "ED" )) # download
ebook = None
for ebook in edition.ebooks.all():
if ebook.format=='epub':
descriptive_node.append(text_node("ProductFormDetail", "E101" ))
elif ebook.format=='pdf':
descriptive_node.append(text_node("ProductFormDetail", "E107" ))
elif ebook.format=='mobi':
descriptive_node.append(text_node("ProductFormDetail", "E116" ))
license_node = etree.SubElement(descriptive_node, "EpubLicense")
license_node.append(text_node("EpubLicenseName", ebook.rights ))
lic_expr_node = etree.SubElement(license_node, "EpubLicenseExpression")
lic_expr_node.append(text_node("EpubLicenseExpressionType", '01' )) #human readable
lic_expr_node.append(text_node("EpubLicenseExpressionLink", ccinfo(ebook.rights).url ))
title_node = etree.SubElement(descriptive_node, "TitleDetail")
title_node.append(text_node("TitleType", '01' )) #distinctive title
title_el = etree.SubElement(title_node, "TitleElement")
title_el.append(text_node("TitleElementLevel", '01' ))
title_el.append(text_node("TitleText", edition.title ))
contrib_i = 0
for contrib in edition.relators.all():
contrib_i+=1
contrib_node = etree.SubElement(descriptive_node, "Contributor")
contrib_node.append(text_node("SequenceNumber", unicode(contrib_i )))
contrib_node.append(text_node("ContributorRole", relator_contrib.get(contrib.relation.code,"") ))
contrib_node.append(text_node("PersonName", contrib.author.name))
(lang, locale) = (edition.work.language, None)
if '_' in lang:
(lang, locale) = lang.split('_')
if len(lang)==2:
lang = iso639.get(lang, None)
if lang:
lang_node = etree.SubElement(descriptive_node, "Language")
lang_node.append(text_node("LanguageRole", "01"))
lang_node.append(text_node("LanguageCode", lang))
if locale:
lang_node.append(text_node("CountryCode", locale))
for subject in work.subjects.all():
subj_node = etree.SubElement(descriptive_node, "Subject")
if subject.authority == 'lcsh':
subj_node.append(text_node("SubjectSchemeIdentifier", "04"))
subj_node.append(text_node("SubjectHeadingText", subject.name))
elif subject.authority == 'lcc':
subj_node.append(text_node("SubjectSchemeIdentifier", "03"))
subj_node.append(text_node("SubjectCode", subject.name))
elif subject.authority == 'bisacsh':
subj_node.append(text_node("SubjectSchemeIdentifier", "10"))
subj_node.append(text_node("SubjectCode", bisac.code(subject.name)))
subj_node.append(text_node("SubjectHeadingText", subject.name))
else:
subj_node.append(text_node("SubjectSchemeIdentifier", "20"))
subj_node.append(text_node("SubjectHeadingText", subject.name))
# Collateral Detail Block
coll_node = etree.SubElement(product_node, "CollateralDetail")
desc_node = etree.SubElement(coll_node, "TextContent")
desc_node.append(text_node("TextType", '03')) # description
desc_node.append(text_node("ContentAudience", '00')) #unrestricted
desc = work.description + '<br /><br />Listed by <a href="https://unglue.it/work/%s/">Unglue.it</a>.' % work.id
try :
content = etree.XML("<div>" + desc + "</div>")
content_node = etree.SubElement(desc_node, "Text",attrib={"textformat":"05"}) #xhtml
content_node.append(content)
except etree.XMLSyntaxError:
content_node = etree.SubElement(desc_node, attrib={"textformat":"02"}) #html
content_node.text = etree.CDATA(desc)
supp_node = etree.SubElement(coll_node, "SupportingResource")
supp_node.append(text_node("ResourceContentType", '01')) #front cover
supp_node.append(text_node("ContentAudience", '00')) #unrestricted
supp_node.append(text_node("ResourceMode", '03')) #image
cover_node = etree.SubElement(supp_node, "ResourceVersion")
cover_node.append(text_node("ResourceForm", '01')) #linkable
coverfeat_node = etree.SubElement(cover_node, "ResourceVersionFeature")
coverfeat_node.append(text_node("ResourceVersionFeatureType", '01')) #image format
coverfeat_node.append(text_node("FeatureValue", 'D502')) #jpeg
cover_node.append(text_node("ResourceLink", edition.cover_image_thumbnail())) #link
# Publishing Detail Block
pubdetail_node = etree.SubElement(product_node, "PublishingDetail")
pub_node = etree.SubElement(pubdetail_node, "Publisher")
pub_node.append(text_node("PublishingRole", '01')) #publisher
pub_node.append(text_node("PublisherName", edition.publisher_name.name))
pubdetail_node.append(text_node("PublishingStatus", '00')) #unspecified
if edition.publication_date:
pubdate_node = etree.SubElement(pubdetail_node, "PublishingDate")
pubdate_node.append(text_node("PublishingDateRole", '01')) #nominal pub date
pubdate_node.append(text_node("Date", edition.publication_date.replace('-','')))
# Product Supply Block
supply_node = etree.SubElement(product_node,"ProductSupply")
market_node = etree.SubElement(supply_node,"Market")
terr_node = etree.SubElement(market_node,"Territory")
terr_node.append(text_node("RegionsIncluded", 'WORLD'))
supply_detail_node = etree.SubElement(supply_node,"SupplyDetail")
supplier_node = etree.SubElement(supply_detail_node,"Supplier")
supplier_node.append(text_node("SupplierRole", '11')) #non-exclusive distributer
supplier_node.append(text_node("SupplierName", 'Unglue.it')) #non-exclusive distributer
for ebook in edition.ebooks.all():
website_node = etree.SubElement(supplier_node,"Website")
website_node.append(text_node("WebsiteRole", '29')) #full content
website_node.append(text_node("WebsiteDescription", '%s file download' % ebook.format, attrib={'textformat':'06'})) #full content
website_node.append(text_node("WebsiteLink", ebook.download_url)) #full content
supply_detail_node.append(text_node("ProductAvailability", '20')) #Available
price_node = etree.SubElement(supply_detail_node,"Price")
price_node.append(text_node("PriceType", '01')) #retail excluding tax
price_node.append(text_node("PriceAmount", '0.00')) #retail excluding tax
price_node.append(text_node("CurrencyCode", 'USD')) #retail excluding tax
return product_node

View File

@ -6,6 +6,7 @@ from django.views.generic.base import TemplateView
from regluit.api import resources
from regluit.api.views import ApiHelpView
from regluit.api.views import OPDSNavigationView, OPDSAcquisitionView
from regluit.api.views import OnixView
v1_api = Api(api_name='v1')
@ -24,6 +25,7 @@ urlpatterns = patterns('',
url(r'^widget/(?P<isbn>\w+)/$','regluit.api.views.widget', name="widget"),
url(r"^opds/$", OPDSNavigationView.as_view(template_name="opds.xml"), name="opds"),
url(r"^opds/(?P<facet>.*)/$", OPDSAcquisitionView.as_view(), name="opds_acqusition"),
url(r"^onix/(?P<facet>.*)/$", OnixView.as_view(), name="onix"),
url(r'^id/work/(?P<work_id>\w+)/$', 'regluit.api.views.negotiate_content', name="work_identifier"),
url(r'^loader/yaml$','regluit.api.views.load_yaml', name="load_yaml"),
(r'^', include(v1_api.urls)),

View File

@ -16,7 +16,7 @@ from django.http import (
import regluit.core.isbn
from regluit.core.bookloader import load_from_yaml
from regluit.api import opds
from regluit.api import opds, onix
from regluit.api.models import repo_allowed
from regluit.core import models
@ -142,3 +142,22 @@ class OPDSAcquisitionView(View):
facet_class = opds.get_facet_class(facet)()
return HttpResponse(facet_class.feed(page,order_by),
content_type="application/atom+xml;profile=opds-catalog;kind=acquisition")
class OnixView(View):
def get(self, request, *args, **kwargs):
work = request.GET.get('work', None)
if work:
return HttpResponse(onix.onix_feed_for_work(work),
content_type="text/xml")
facet = kwargs.get('facet')
page = request.GET.get('page', None)
order_by = request.GET.get('order_by', 'newest')
try:
page = int(page)
except:
page = None
facet_class = opds.get_facet_class(facet)()
return HttpResponse(onix.onix_feed(facet_class, page, order_by),
content_type="text/xml")

23122
bisac/__init__.py Normal file

File diff suppressed because it is too large Load Diff

9
bisac/tests.py Normal file
View File

@ -0,0 +1,9 @@
import unittest
from . import Bisac
class TestBisac(unittest.TestCase):
def setUp(self):
self.bisac=Bisac()
def test_code(self):
self.assertEqual(self.bisac.code('Religion'),'REL000000')