Add tracking for google_id_not_found to add_missing_isbn_to_editions

Add a django-admin command to run add_missing_isbn_to_editions
pull/1/head
Raymond Yee 2012-02-16 10:44:13 -08:00
parent 4752181ace
commit fb5ad2da76
1 changed files with 25 additions and 18 deletions

View File

@ -570,18 +570,20 @@ def load_gutenberg_edition(title, gutenberg_etext_id, ol_work_id, seed_isbn, url
def add_missing_isbn_to_editions(max_num=None, confirm=False):
"""For each of the editions with Google Books ids, do a lookup and attach ISBNs. Set confirm to True to check db changes made correctly"""
print "Number of editions with Google Books IDs but not ISBNs", \
models.Edition.objects.filter(identifiers__type='goog').exclude(identifiers__type='isbn').count()
logger.info("Number of editions with Google Books IDs but not ISBNs (before): %d",
models.Edition.objects.filter(identifiers__type='goog').exclude(identifiers__type='isbn').count())
from regluit.experimental import bookdata
gb = bookdata.GoogleBooks(key=settings.GOOGLE_BOOKS_API_KEY)
new_isbns = []
google_id_not_found = []
no_isbn_found = []
editions_to_merge = []
exceptions = []
for (i, ed) in enumerate(islice(models.Edition.objects.filter(identifiers__type='goog').exclude(identifiers__type='isbn'), max_num)):
try:
g_id = ed.identifiers.get(type='goog').value
@ -593,27 +595,32 @@ def add_missing_isbn_to_editions(max_num=None, confirm=False):
# try to get ISBN from Google Books
try:
isbn = gb.volumeid(g_id)['isbn']
logger.info("g_id, isbn: %s %s", g_id, isbn)
if isbn is not None:
# check to see whether the isbn is actually already in the db but attached to another Edition
existing_isbn_ids = models.Identifier.objects.filter(type='isbn', value=isbn)
if len(existing_isbn_ids):
# don't try to merge editions right now, just note the need to merge
ed2 = existing_isbn_ids[0].edition
editions_to_merge.append((ed.id, g_id, isbn, ed2.id))
else:
new_id = models.Identifier(type='isbn', value=isbn, edition=ed, work=ed.work)
new_id.save()
new_isbns.append((ed.id, g_id, isbn))
vol_id = gb.volumeid(g_id)
if vol_id is None:
google_id_not_found.append((ed.id, g_id))
logger.debug("g_id not found: %s", g_id)
else:
no_isbn_found.append((ed.id, g_id, None))
isbn = vol_id.get('isbn')
logger.info("g_id, isbn: %s %s", g_id, isbn)
if isbn is not None:
# check to see whether the isbn is actually already in the db but attached to another Edition
existing_isbn_ids = models.Identifier.objects.filter(type='isbn', value=isbn)
if len(existing_isbn_ids):
# don't try to merge editions right now, just note the need to merge
ed2 = existing_isbn_ids[0].edition
editions_to_merge.append((ed.id, g_id, isbn, ed2.id))
else:
new_id = models.Identifier(type='isbn', value=isbn, edition=ed, work=ed.work)
new_id.save()
new_isbns.append((ed.id, g_id, isbn))
else:
no_isbn_found.append((ed.id, g_id, None))
except Exception, e:
logger.exception("add_missing_isbn_to_editions for edition.id %s: %s", ed.id, e)
exceptions.append((ed.id, g_id, None, e))
print "Number of editions with Google Books IDs but not ISBNs", \
models.Edition.objects.filter(identifiers__type='goog').exclude(identifiers__type='isbn').count()
logger.info("Number of editions with Google Books IDs but not ISBNs (after): %d",
models.Edition.objects.filter(identifiers__type='goog').exclude(identifiers__type='isbn').count())
ok = None