pull/94/head
eric 2019-03-27 21:46:25 -04:00
parent c142533898
commit 14346ed868
3 changed files with 20 additions and 38 deletions

View File

@ -18,12 +18,12 @@ from django.core.files.base import ContentFile
from django.core.files.storage import default_storage
from django.db import IntegrityError
from django.forms import ValidationError
from django.utils.timezone import now
from django_comments.models import Comment
from github3 import (login, GitHub)
from github3.repos.release import Release
from django.utils.timezone import now
from gitenberg.metadata.pandata import Pandata
# regluit imports
@ -207,7 +207,7 @@ def update_edition(edition):
# attach edition to the
if edition.work.language != language:
logger.info(u"reconnecting %s since it is %s instead of %s",
googlebooks_id, language, edition.work.language)
googlebooks_id, language, edition.work.language)
old_work = edition.work
new_work = models.Work(title=title, language=language)
@ -251,9 +251,6 @@ def get_isbn_item(items, isbn):
for ident in industryIdentifiers:
if ident['identifier'] == isbn:
return item
else:
return None # no items
return item
def add_by_isbn_from_google(isbn, work=None):
"""add a book to the UnglueIt database from google based on ISBN. The work parameter
@ -350,7 +347,7 @@ def add_by_googlebooks_id(googlebooks_id, work=None, results=None, isbn=None):
else:
title = ''
if not title:
# need a title to make an edition record; some crap records in GB.
# need a title to make an edition record; some crap records in GB.
# use title from parent if available
if work:
title = work.title
@ -541,7 +538,7 @@ def merge_works(w1, w2, user=None):
#don't merge if the works are related.
if w2 in w1.works_related_to.all() or w1 in w2.works_related_to.all():
return w1
# check if one of the works is a series with parts (that have their own isbn)
if w1.works_related_from.filter(relation='part'):
models.WorkRelation.objects.get_or_create(to_work=w2, from_work=w1, relation='part')
@ -549,8 +546,8 @@ def merge_works(w1, w2, user=None):
if w2.works_related_from.filter(relation='part'):
models.WorkRelation.objects.get_or_create(to_work=w1, from_work=w2, relation='part')
return w1
if w2.selected_edition is not None and w1.selected_edition is None:
#the merge should be reversed
temp = w1
@ -970,7 +967,7 @@ class BasePandataLoader(object):
for yaml_subject in metadata.subjects: #always add yaml subjects (don't clear)
if isinstance(yaml_subject, tuple):
(authority, heading) = yaml_subject
elif isinstance(yaml_subject, str) or isinstance(yaml_subject, unicode) :
elif isinstance(yaml_subject, str) or isinstance(yaml_subject, unicode):
(authority, heading) = ('', yaml_subject)
else:
continue

View File

@ -1,7 +1,6 @@
#!/usr/bin/env python
# encoding: utf-8
import datetime
import json
import logging
import re
@ -19,7 +18,6 @@ from oaipmh.metadata import MetadataRegistry, oai_dc_reader
from regluit.core import bookloader, cc
from regluit.core import models, tasks
from regluit.core.bookloader import merge_works
from regluit.core.isbn import ISBN
from regluit.core.loaders.utils import type_for_url
from regluit.core.validation import identifier_cleaner, valid_subject
@ -67,10 +65,10 @@ def store_doab_cover(doab_id, redo=False):
cover_file = ContentFile(r.content)
content_type = r.headers.get('content-type', '')
if u'text/html' in content_type:
logger.warning('Cover return html for doab_id={}: {}'.format(doab_id, e))
logger.warning('Cover return html for doab_id={}'.format(doab_id))
return (None, False)
cover_file.content_type = content_type
default_storage.save(cover_file_name, cover_file)
return (default_storage.url(cover_file_name), True)
@ -194,7 +192,7 @@ def load_doab_edition(title, doab_id, url, format, rights,
if not ebook.rights:
ebook.rights = rights
ebook.save()
# update the cover id
cover_url = update_cover_doab(doab_id, ebook.edition, redo=False)
@ -408,20 +406,6 @@ def add_by_doab(doab_id, record=None):
url_to_provider(dl_url) if dl_url else None,
**metadata
)
else:
if 'format' in metadata:
del metadata['format']
edition = load_doab_edition(
title,
doab_id,
'',
'',
license,
language,
isbns,
None,
**metadata
)
return edition
except IdDoesNotExistError:
return None
@ -439,7 +423,7 @@ def load_doab_oai(from_year=None, limit=100000):
'''
if from_year:
from_ = datetime.datetime(year=from_year, month=1, day=1)
else:
else:
# last 15 days
from_ = datetime.datetime.now() - datetime.timedelta(days=15)
doab_ids = []

View File

@ -1,7 +1,7 @@
import re
from django.conf.global_settings import LANGUAGES
lang2code = dict([ (lang[1].lower(), lang[0]) for lang in LANGUAGES ])
lang2code = dict([(lang[1].lower(), lang[0]) for lang in LANGUAGES])
code2lang = dict(LANGUAGES)
iso639 = re.compile(r'^[a-z][a-z][a-z]?$')
@ -13,21 +13,21 @@ def get_language_code(language):
language = sep.split(language)[0].strip()
if language in code2lang:
return language
# language names (english)
if language in lang2code:
return lang2code.get(language)
# mispellings and language names
if language in EXTRA_LANG_MAP:
return EXTRA_LANG_MAP.get(language)
# accept 2 and 3 letter codes
if iso639.match(language):
return language
return language
return ''
# let's do a mapping of the DOAB languages into the language codes used
# let's do a mapping of the DOAB languages into the language codes used
# mostly, we just handle mispellings
# also null -> xx
sep = re.compile(r'[ ;^,/\|\'\"\]\[\t\n\r\-]+')
@ -63,7 +63,7 @@ def lang_to_language_code(lang):
if lang is None:
return ''
lang = lang.strip()
#get codes like en-US
if lang_and_locale.match(lang):
return lang
@ -77,4 +77,5 @@ def lang_to_language_code(lang):
code = get_language_code(lang)
if code:
return code
return ''
return ''