change doab ids to doab handles
parent
97ce6bfa49
commit
c7e2ae7b25
|
@ -338,7 +338,7 @@ def loaded_book_ok(book, work, edition):
|
|||
ID_URLPATTERNS = {
|
||||
'goog': re.compile(r'[\./]google\.com/books\?.*id=(?P<id>[a-zA-Z0-9\-_]{12})'),
|
||||
'olwk': re.compile(r'[\./]openlibrary\.org(?P<id>/works/OL\d{1,8}W)'),
|
||||
'doab': re.compile(r'([\./]doabooks\.org/doab\?.*rid:|=oai:doab-books:)(?P<id>\d{1,8})'),
|
||||
'doab': re.compile(r'([\./]directory\.doabooks\.org/handle/)(?P<id>20\.500\.12854/\d{5,8})'),
|
||||
'gdrd': re.compile(r'[\./]goodreads\.com/book/show/(?P<id>\d{1,8})'),
|
||||
'ltwk': re.compile(r'[\./]librarything\.com/work/(?P<id>\d{1,8})'),
|
||||
'oclc': re.compile(r'\.worldcat\.org/.*oclc/(?P<id>\d{8,12})'),
|
||||
|
|
|
@ -2,15 +2,13 @@ from django.core.management.base import BaseCommand
|
|||
|
||||
import re
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from regluit.core.models import Edition
|
||||
from regluit.core.loaders.doab import store_doab_cover
|
||||
|
||||
to_fix = ['16198', '16199', '16201', '16202', '16204', '16205', '16206', '16207', '16208', '16209', '16210', '16213', '16279', '16287', '16302', '17116', '17117', '17121', '17129', '17149', '17154', '19501', '20186', '20238', '20280', '20395', '20447', '20504', '20706', '20750', '21317', '21319', '21332', '21338', '21343', '21345', '21348', '21356', '21643', '21814', '23509', '23510', '23516', '23517', '23518', '23521', '23523', '23593', '23596', '24216', '24282', '24587', '24865', '24867', '25496', '25497', '25655', '26181', '26308', '26508', '26509', '26510', '26511', '26512', '26513', '26514', '26515', '26516', '26517', '26518', '26523', '26524', '26710', '26995', '31899', '31902', '31908', '31924', '31930', '31933', '32327', '44061']
|
||||
COVER_PATTERN = re.compile(r'doabooks.org/doab\?func=cover\&rid=(\d+)')
|
||||
to_fix = []
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
""" To repair covers, will need a new refresh_cover method"""
|
||||
help = "fix bad covers for doab"
|
||||
|
||||
def add_arguments(self, parser):
|
||||
|
@ -26,28 +24,23 @@ class Command(BaseCommand):
|
|||
def fix_doab_cover(self, doab):
|
||||
eds = Edition.objects.filter(cover_image__contains='amazonaws.com/doab/%s/cover' % doab)
|
||||
|
||||
resp = requests.get('https://unglueit-files.s3.amazonaws.com/doab/%s/cover' % doab)
|
||||
if resp.status_code == 200 and 'text/html' in resp.headers['Content-Type']:
|
||||
doc = BeautifulSoup(resp.content, 'lxml')
|
||||
link = doc.find('a')
|
||||
if link:
|
||||
self.stdout.write(link['href'])
|
||||
new_doab = COVER_PATTERN.search(link['href'])
|
||||
if new_doab:
|
||||
(cover_url, new_cover) = store_doab_cover(new_doab.group(1), redo=True)
|
||||
if cover_url:
|
||||
for e in eds:
|
||||
e.cover_image = cover_url
|
||||
e.save()
|
||||
if e.cover_image_small() and e.cover_image_thumbnail():
|
||||
self.stdout.write('fixed %s using %s' % (doab, new_doab.group(1)))
|
||||
else:
|
||||
self.stdout.write('bad thumbnails for %s' % new_doab.group(1))
|
||||
return False
|
||||
return True
|
||||
cover_url = self.refresh_cover(doab)
|
||||
if cover_url:
|
||||
for e in eds:
|
||||
e.cover_image = cover_url
|
||||
e.save()
|
||||
if e.cover_image_small() and e.cover_image_thumbnail():
|
||||
self.stdout.write('fixed %s using %s' % (doab, new_doab.group(1)))
|
||||
else:
|
||||
self.stdout.write('bad thumbnails for %s' % new_doab.group(1))
|
||||
return False
|
||||
return True
|
||||
self.stdout.write('removing bad cover for %s' % doab)
|
||||
|
||||
for e in eds:
|
||||
e.cover_image = None
|
||||
e.save()
|
||||
return False
|
||||
|
||||
def refresh_cover(self, doab):
|
||||
return False
|
||||
|
|
|
@ -6,7 +6,7 @@ class Command(BaseCommand):
|
|||
help = "load doab books by doab_id via oai"
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument('doab_ids', nargs='+', type=int, default=1, help="doab ids to add")
|
||||
parser.add_argument('doab_ids', nargs='+', type=str, default=1, help="doab ids to add")
|
||||
|
||||
def handle(self, doab_ids, **options):
|
||||
for doab_id in doab_ids:
|
||||
|
|
|
@ -12,7 +12,7 @@ class Command(BaseCommand):
|
|||
def add_arguments(self, parser):
|
||||
parser.add_argument('from_date', nargs='?', type=timefromiso,
|
||||
default=None, help="YYYY-MM-DD to start")
|
||||
parser.add_argument('--from_id', nargs='?', type=int, default=0, help="id to start with")
|
||||
parser.add_argument('--from_id', nargs='?', type=str, default='', help="handle to start with")
|
||||
parser.add_argument('--max', nargs='?', type=int, default=None, help="max desired records")
|
||||
|
||||
def handle(self, **options):
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
import csv
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from regluit.core.models import Identifier
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "translate doab ids to handles"
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument('filename', nargs='+', help="filename")
|
||||
|
||||
def handle(self, filename, **options):
|
||||
with open(filename,'r') as jsonfile:
|
||||
newdoab = json.loads(jsonfile.read())
|
||||
for doab in Identifier.objects.filter(type='doab'):
|
||||
if doab.value in newdoab:
|
||||
doab.value = newdoab[doab.value]
|
||||
doab.save()
|
||||
else:
|
||||
doab.delete()
|
||||
self.stdout.write("new doab ids loaded!")
|
||||
|
|
@ -27,7 +27,7 @@ TEXT_RELATION_CHOICES = (
|
|||
ID_CHOICES = (
|
||||
('http', 'Web Address'),
|
||||
('isbn', 'ISBN'),
|
||||
('doab', 'DOABooks ID'),
|
||||
('doab', 'DOABooks handle'),
|
||||
('gtbg', 'Project Gutenberg Number'),
|
||||
('doi', 'Digital Object Identifier'),
|
||||
('oclc', 'OCLC Number'),
|
||||
|
@ -43,7 +43,7 @@ OTHER_ID_CHOICES = (
|
|||
('edid', 'pragmatic edition ID'),
|
||||
)
|
||||
|
||||
WORK_IDENTIFIERS = ('doi','olwk','glue','ltwk', 'http', 'doab')
|
||||
WORK_IDENTIFIERS = ('doi', 'olwk', 'glue', 'ltwk', 'http', 'doab')
|
||||
|
||||
ID_CHOICES_MAP = dict(ID_CHOICES)
|
||||
|
||||
|
|
|
@ -24,8 +24,8 @@ ID_VALIDATION = {
|
|||
"The Web Address must be a valid http(s) URL."),
|
||||
'isbn': (u'^([\\dxX \\-–—‐,;]+|delete)$', #includes unicode hyphen, endash and emdash
|
||||
"The ISBN must be a valid ISBN-13."),
|
||||
'doab': (r'^(\d{1,6}|delete)$',
|
||||
"The value must be 1-6 digits."),
|
||||
'doab': (r'^20.500.12854/(\d{5,8}|delete)$',
|
||||
"The value must be a handle, starting with 20.500.12854/, followed by 5-8 digits."),
|
||||
'gtbg': (r'^(\d{1,6}|delete)$',
|
||||
"The Gutenberg number must be 1-6 digits."),
|
||||
'doi': (r'^(https?://dx\.doi\.org/|https?://doi\.org/)?(10\.\d+/\S+|delete)$',
|
||||
|
|
|
@ -5,13 +5,13 @@
|
|||
These books are included in <a href="https://www.gutenberg.org">Project Gutenberg</a>. They are archived and can be improved at <a href="https://www.gitenberg.org">GITenberg</a>.
|
||||
{% endif %}
|
||||
{% if facet.facet_name == 'doab' %}
|
||||
These books are included in the <a href="http://doabooks.org/">Directory of Open Access Books</a>. This means that they have been peer-reviewed and are of interest to scholars.
|
||||
These books are included in the <a href="https://doabooks.org/">Directory of Open Access Books</a>. This means that they have been peer-reviewed and are of interest to scholars.
|
||||
{% endif %}
|
||||
{% if facet.facet_name == '-gtbg' %}
|
||||
These books are not included in <a href="https://www.gutenberg.org">Project Gutenberg</a>.
|
||||
{% endif %}
|
||||
{% if facet.facet_name == '-doab' %}
|
||||
These books do not seem to be included in the <a href="http://doabooks.org/">Directory of Open Access Books</a>.
|
||||
These books do not seem to be included in the <a href="https://doabooks.org/">Directory of Open Access Books</a>.
|
||||
{% endif %}
|
||||
</p>
|
||||
</div>
|
|
@ -300,7 +300,7 @@
|
|||
{% endfor %}
|
||||
{% if work.doab %}
|
||||
<p>
|
||||
This book is included in <a href="http://www.doabooks.org/doab?func=search&query=rid%3A{{ work.doab }}">DOAB</a>.
|
||||
This book is included in <a href="https://directory.doabooks.org/handle/{{ work.doab }}">DOAB</a>.
|
||||
</p>
|
||||
{% endif %}
|
||||
{% if work.gtbg %}
|
||||
|
|
Loading…
Reference in New Issue