commit
71ba8dc9fa
|
@ -1,7 +1,6 @@
|
|||
import csv
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import unicodedata
|
||||
import urlparse
|
||||
|
@ -17,7 +16,7 @@ from regluit.bisac.models import BisacHeading
|
|||
from regluit.core.bookloader import add_by_isbn_from_google, merge_works
|
||||
from regluit.core.isbn import ISBN
|
||||
from regluit.core.models import (
|
||||
Author, Ebook, EbookFile, Edition, Identifier, path_for_file, PublisherName, Subject, Work,
|
||||
Ebook, EbookFile, Edition, Identifier, path_for_file, Subject, Work,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -111,7 +110,6 @@ def get_title(book):
|
|||
sub = book.get('Subtitle', '')
|
||||
if sub:
|
||||
return u'{}: {}'.format(title, sub)
|
||||
else:
|
||||
return title
|
||||
|
||||
def get_cover(book):
|
||||
|
@ -140,7 +138,9 @@ def get_isbns(book):
|
|||
isbns = []
|
||||
edition = None
|
||||
#'ISBN 1' is OBP, others are UMICH
|
||||
for code in ['eISBN', 'ISBN 3','PaperISBN', 'ISBN 2', 'ClothISBN', 'ISBN 1', 'ISBN 4', 'ISBN 5']:
|
||||
for code in ['eISBN', 'ISBN 3', 'PaperISBN', 'ISBN 2', 'ClothISBN',
|
||||
'ISBN 1', 'ISBN 4', 'ISBN 5'
|
||||
]:
|
||||
if book.get(code, '') not in ('', 'N/A'):
|
||||
values = book[code].split(',')
|
||||
for value in values:
|
||||
|
@ -163,7 +163,6 @@ def get_pubdate(book):
|
|||
return u'{}-{}-{}'.format(value, sub, sub2)
|
||||
elif sub:
|
||||
return u'{}-{}'.format(value, sub, sub2)
|
||||
else:
|
||||
return value
|
||||
|
||||
def get_publisher(book):
|
||||
|
@ -175,7 +174,10 @@ def get_publisher(book):
|
|||
|
||||
def get_url(book):
|
||||
url = book.get('URL', '')
|
||||
url = url if url else u'https://doi.org/{}/{}'.format( book.get('DOI prefix',''),book.get('DOI suffix',''))
|
||||
url = url if url else u'https://doi.org/{}/{}'.format(
|
||||
book.get('DOI prefix', ''),
|
||||
book.get('DOI suffix', '')
|
||||
)
|
||||
return url
|
||||
|
||||
def get_description(book):
|
||||
|
@ -209,7 +211,7 @@ def load_from_books(books):
|
|||
|
||||
# try first to get an Edition already in DB with by one of the ISBNs in book
|
||||
(isbns, edition) = get_isbns(book)
|
||||
if len(isbns)==0:
|
||||
if not isbns:
|
||||
continue
|
||||
title = get_title(book)
|
||||
authors = get_authors(book)
|
||||
|
@ -275,7 +277,7 @@ def load_from_books(books):
|
|||
try:
|
||||
logger.info(u"{} {} {}\n".format(i, title, loading_ok))
|
||||
except Exception as e:
|
||||
logger.info (u"{} {}\n".format(i, title, str(e) ))
|
||||
logger.info(u"{} {} {}\n".format(i, title, str(e)))
|
||||
|
||||
return results
|
||||
|
||||
|
@ -307,15 +309,17 @@ def loaded_book_ok(book, work, edition):
|
|||
try:
|
||||
edition_for_isbn = Identifier.objects.get(type='isbn', value=isbn).edition
|
||||
except Exception as e:
|
||||
print (e)
|
||||
logger.info(e)
|
||||
return False
|
||||
|
||||
# authors
|
||||
# print set([ed.name for ed in edition_for_isbn.authors.all()])
|
||||
|
||||
if (set([utf8_general_ci_norm(author[0]) for author in authors]) !=
|
||||
set([utf8_general_ci_norm(ed.name) for ed in edition_for_isbn.authors.all()])):
|
||||
print "problem with authors"
|
||||
if (
|
||||
set([utf8_general_ci_norm(author[0]) for author in authors]) !=
|
||||
set([utf8_general_ci_norm(ed.name) for ed in edition_for_isbn.authors.all()])
|
||||
):
|
||||
logger.info("problem with authors")
|
||||
return False
|
||||
|
||||
try:
|
||||
|
@ -369,14 +373,18 @@ DROPBOX_DL = re.compile(r'"(https://dl.dropboxusercontent.com/content_link/[^"]+
|
|||
|
||||
def dl_online(ebook):
|
||||
if ebook.format != 'online':
|
||||
return None, False
|
||||
|
||||
if ebook.url.find(u'dropbox.com/s/') >= 0:
|
||||
pass
|
||||
elif ebook.url.find(u'dropbox.com/s/') >= 0:
|
||||
response = requests.get(ebook.url, headers={"User-Agent": settings.USER_AGENT})
|
||||
if response.status_code == 200:
|
||||
match_dl = DROPBOX_DL.search(response.content)
|
||||
if match_dl:
|
||||
return make_dl_ebook(match_dl.group(1), ebook)
|
||||
else:
|
||||
logger.warning('couldn\'t get {}'.format(ebook.url))
|
||||
else:
|
||||
logger.warning('couldn\'t get dl for {}'.format(ebook.url))
|
||||
|
||||
elif ebook.url.find(u'jbe-platform.com/content/books/') >= 0:
|
||||
doc = get_soup(ebook.url)
|
||||
if doc:
|
||||
|
@ -384,6 +392,12 @@ def dl_online(ebook):
|
|||
if obj:
|
||||
dl_url = urlparse.urljoin(ebook.url, obj['href'])
|
||||
return make_dl_ebook(dl_url, ebook)
|
||||
else:
|
||||
logger.warning('couldn\'t get dl_url for {}'.format(ebook.url))
|
||||
else:
|
||||
logger.warning('couldn\'t get soup for {}'.format(ebook.url))
|
||||
|
||||
return None, False
|
||||
|
||||
def make_dl_ebook(url, ebook):
|
||||
if EbookFile.objects.filter(source=ebook.url):
|
||||
|
@ -414,12 +428,17 @@ def make_dl_ebook(url, ebook):
|
|||
new_ebf.ebook = new_ebook
|
||||
new_ebf.save()
|
||||
return new_ebf, True
|
||||
else:
|
||||
logger.warning('download format for {} is not ebook'.format(url))
|
||||
else:
|
||||
logger.warning('couldn\'t get {}'.format(url))
|
||||
return None, False
|
||||
|
||||
def type_for_url(url, content_type=None):
|
||||
if not url:
|
||||
return ''
|
||||
if url.find('books.openedition.org') >= 0:
|
||||
return ('online')
|
||||
return 'online'
|
||||
if Ebook.objects.filter(url=url):
|
||||
return Ebook.objects.filter(url=url)[0].format
|
||||
ct = content_type if content_type else contenttyper.calc_type(url)
|
||||
|
|
Loading…
Reference in New Issue