Merge pull request #551 from Gluejar/multi_formats_in_load_from_yaml
updating load_from_yaml to handle formats other than epubpull/1/head
commit
f15cfdcc7f
|
@ -3,14 +3,22 @@ from django.db import models
|
|||
|
||||
|
||||
#https://github.com/GITenberg/Adventures-of-Huckleberry-Finn_76/raw/master/metadata.yaml
|
||||
|
||||
#or https://raw.githubusercontent.com/GITenberg/Free-Russia_51117/master/metadata.yaml
|
||||
def repo_allowed(repo_url):
|
||||
if not repo_url.startswith('https://github.com/'):
|
||||
return (False, "repo url must start with 'https://github.com/'")
|
||||
try:
|
||||
(org,repo_name,raw,branch,filename) = repo_url[19:].split('/')
|
||||
except ValueError:
|
||||
return (False, "repo url must be well formed, metadata at top repo level")
|
||||
if repo_url.startswith('https://github.com/'):
|
||||
try:
|
||||
(org,repo_name,raw,branch,filename) = repo_url[19:].split('/')
|
||||
except ValueError:
|
||||
return (False, "repo url must be well formed, metadata at top repo level")
|
||||
elif repo_url.startswith('https://raw.githubusercontent.com/'):
|
||||
try:
|
||||
(org,repo_name,branch,filename) = repo_url[34:].split('/')
|
||||
raw = 'raw'
|
||||
except ValueError:
|
||||
return (False, "repo url must be well formed, metadata at top repo level")
|
||||
|
||||
else:
|
||||
return (False, "repo url must start with 'https://github.com/' or 'https://raw.githubusercontent.com/'")
|
||||
if not raw == 'raw':
|
||||
return (False, "repo url must point at 'raw' file")
|
||||
if not filename == 'metadata.yaml':
|
||||
|
|
|
@ -6,9 +6,10 @@ import logging
|
|||
import requests
|
||||
|
||||
from datetime import timedelta
|
||||
from itertools import izip, islice
|
||||
from itertools import (izip, islice)
|
||||
from xml.etree import ElementTree
|
||||
from urlparse import urljoin
|
||||
from urlparse import (urljoin, urlparse)
|
||||
|
||||
|
||||
"""
|
||||
django imports
|
||||
|
@ -18,6 +19,9 @@ from django.contrib.comments.models import Comment
|
|||
from django.db import IntegrityError
|
||||
from django.db.models import Q
|
||||
|
||||
from github3 import (login, GitHub)
|
||||
from github3.repos.release import Release
|
||||
|
||||
from gitenberg.metadata.pandata import Pandata
|
||||
from ..marc.models import inverse_marc_rels
|
||||
|
||||
|
@ -800,7 +804,10 @@ def unreverse(name):
|
|||
return '%s %s, %s' % (first.strip(),last.strip(),rest.strip())
|
||||
|
||||
|
||||
def load_from_yaml(yaml_url):
|
||||
def load_from_yaml(yaml_url, test_mode=False):
|
||||
"""
|
||||
if mock_ebook is True, don't construct list of ebooks from a release -- rather use an epub
|
||||
"""
|
||||
all_metadata = Pandata(yaml_url)
|
||||
for metadata in all_metadata.get_edition_list():
|
||||
#find an work to associate
|
||||
|
@ -863,24 +870,85 @@ def load_from_yaml(yaml_url):
|
|||
edition.cover_image=urljoin(yaml_url,cover['image_path'])
|
||||
break
|
||||
edition.save()
|
||||
# if there is a version, assume there is an ebook. if not, not.
|
||||
# create Ebook for any ebook in the corresponding GitHub release
|
||||
# assuming yaml_url of form (from GitHub, though not necessarily GITenberg)
|
||||
# https://github.com/GITenberg/Adventures-of-Huckleberry-Finn_76/raw/master/metadata.yaml
|
||||
|
||||
url_path = urlparse(yaml_url).path.split("/")
|
||||
(repo_owner, repo_name) = (url_path[1], url_path[2])
|
||||
repo_tag = metadata._version
|
||||
# allow for there not to be a token in the settings
|
||||
try:
|
||||
token = settings.GITHUB_PUBLIC_TOKEN
|
||||
except:
|
||||
token = None
|
||||
|
||||
if metadata._version and not metadata._version.startswith('0.0.'):
|
||||
#there should be an ebook to link to!
|
||||
(ebook, created)= models.Ebook.objects.get_or_create(
|
||||
url=git_download_from_yaml_url(yaml_url,metadata._version,edition_name=metadata._edition ),
|
||||
provider='Github',
|
||||
rights = metadata.rights if metadata.rights in cc.LICENSE_LIST_ALL else None,
|
||||
format = 'epub',
|
||||
edition = edition,
|
||||
# version = metadata._version
|
||||
)
|
||||
# use GitHub API to compute the ebooks in release until we're in test mode
|
||||
if test_mode:
|
||||
# not using ebook_name in this code
|
||||
ebooks_in_release = [('epub', None)]
|
||||
else:
|
||||
ebooks_in_release = ebooks_in_github_release(repo_owner, repo_name, repo_tag, token=token)
|
||||
|
||||
for (ebook_format, ebook_name) in ebooks_in_release:
|
||||
(ebook, created)= models.Ebook.objects.get_or_create(
|
||||
url=git_download_from_yaml_url(yaml_url,metadata._version,edition_name=metadata._edition,
|
||||
format_= ebook_format),
|
||||
provider='Github',
|
||||
rights = metadata.rights if metadata.rights in cc.LICENSE_LIST_ALL else None,
|
||||
format = ebook_format,
|
||||
edition = edition,
|
||||
# version = metadata._version
|
||||
)
|
||||
|
||||
return work.id
|
||||
|
||||
def git_download_from_yaml_url(yaml_url, version, edition_name='book'):
|
||||
def git_download_from_yaml_url(yaml_url, version, edition_name='book', format_='epub'):
|
||||
# go from https://github.com/GITenberg/Adventures-of-Huckleberry-Finn_76/raw/master/metadata.yaml
|
||||
# to https://github.com/GITenberg/Adventures-of-Huckleberry-Finn_76/releases/download/v0.0.3/Adventures-of-Huckleberry-Finn.epub
|
||||
if yaml_url.endswith('raw/master/metadata.yaml'):
|
||||
repo_url = yaml_url[0:-24]
|
||||
#print (repo_url,version,edition_name)
|
||||
ebook_url = repo_url + 'releases/download/' + version + '/' + edition_name + '.epub'
|
||||
return ebook_url
|
||||
ebook_url = repo_url + 'releases/download/' + version + '/' + edition_name + '.' + format_
|
||||
return ebook_url
|
||||
|
||||
|
||||
def release_from_tag(repo, tag_name):
|
||||
"""Get a release by tag name.
|
||||
release_from_tag() returns a release with specified tag
|
||||
while release() returns a release with specified release id
|
||||
:param str tag_name: (required) name of tag
|
||||
:returns: :class:`Release <github3.repos.release.Release>`
|
||||
"""
|
||||
# release_from_tag adapted from
|
||||
# https://github.com/sigmavirus24/github3.py/blob/38de787e465bffc63da73d23dc51f50d86dc903d/github3/repos/repo.py#L1781-L1793
|
||||
|
||||
url = repo._build_url('releases', 'tags', tag_name,
|
||||
base_url=repo._api)
|
||||
json = repo._json(repo._get(url), 200)
|
||||
return Release(json, repo) if json else None
|
||||
|
||||
|
||||
def ebooks_in_github_release(repo_owner, repo_name, tag, token=None):
|
||||
"""
|
||||
returns a list of (book_type, book_name) for a given GitHub release (specified by
|
||||
owner, name, tag). token is a GitHub authorization token -- useful for accessing
|
||||
higher rate limit in the GitHub API
|
||||
"""
|
||||
|
||||
# map mimetype to file extension
|
||||
EBOOK_FORMATS = dict([(v,k) for (k,v) in settings.CONTENT_TYPES.items()])
|
||||
|
||||
if token is not None:
|
||||
gh = login(token=token)
|
||||
else:
|
||||
# anonymous access
|
||||
gh = GitHub()
|
||||
|
||||
repo = gh.repository(repo_owner, repo_name)
|
||||
release = release_from_tag(repo, tag)
|
||||
|
||||
return [(EBOOK_FORMATS.get(asset.content_type), asset.name)
|
||||
for asset in release.iter_assets()
|
||||
if EBOOK_FORMATS.get(asset.content_type) is not None]
|
||||
|
|
|
@ -83,7 +83,7 @@ class BookLoaderTests(TestCase):
|
|||
noebook_id = bookloader.load_from_yaml(YAML_VERSIONFILE)
|
||||
noebook = models.Work.objects.get(id=noebook_id)
|
||||
self.assertEqual( noebook.first_ebook(), None)
|
||||
huck_id = bookloader.load_from_yaml(YAML_HUCKFILE)
|
||||
huck_id = bookloader.load_from_yaml(YAML_HUCKFILE, test_mode=True)
|
||||
huck = models.Work.objects.get(id=huck_id)
|
||||
self.assertTrue( huck.ebooks().count()>1)
|
||||
|
||||
|
@ -1041,4 +1041,15 @@ class LibTests(TestCase):
|
|||
tasks.refresh_acqs()
|
||||
self.assertEqual(reserve_acq.holds.count(),0)
|
||||
|
||||
|
||||
class GitHubTests(TestCase):
|
||||
def test_ebooks_in_github_release(self):
|
||||
(repo_owner, repo_name, repo_tag) = ('GITenberg', 'Adventures-of-Huckleberry-Finn_76', '0.0.50')
|
||||
ebooks = bookloader.ebooks_in_github_release(repo_owner, repo_name,
|
||||
tag=repo_tag, token=settings.GITHUB_PUBLIC_TOKEN)
|
||||
expected_set = set([
|
||||
('epub', u'Adventures-of-Huckleberry-Finn.epub'),
|
||||
('mobi', u'Adventures-of-Huckleberry-Finn.mobi'),
|
||||
('pdf', u'Adventures-of-Huckleberry-Finn.pdf')
|
||||
])
|
||||
|
||||
self.assertEqual(set(ebooks), expected_set)
|
||||
|
|
|
@ -42,6 +42,7 @@ feedparser==5.1.2
|
|||
freebase==1.0.8
|
||||
#gitenberg.metadata==0.1.6
|
||||
git+ssh://git@github.com/gitenberg-dev/metadata.git@0.1.11
|
||||
github3.py==0.9.5
|
||||
html5lib==1.0b3
|
||||
httplib2==0.7.5
|
||||
isodate==0.5.1
|
||||
|
|
|
@ -451,6 +451,10 @@ FILE_UPLOAD_MAX_MEMORY_SIZE = 20971520 #20MB
|
|||
DROPBOX_KEY = '4efhwty5aph52bd' #for unglue.it, just.unglue.it
|
||||
#DROPBOX_KEY = '6uefhocpvp0s1ep' #for localhost
|
||||
|
||||
# for reading GITenberg releases
|
||||
# generated from rdhyee account
|
||||
GITHUB_PUBLIC_TOKEN = 'f702409f913d7f9046f93c677710f829e2b599c9'
|
||||
|
||||
SOUTH_MIGRATION_MODULES = {
|
||||
'default': 'social.apps.django_app.default.south_migrations'
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue