Check current progress in so that I can focus on a change in the master branch to add missing isbns to Editions
parent
8c397f4953
commit
a8f1c157be
|
@ -6,8 +6,9 @@ from pprint import pprint
|
||||||
from itertools import islice, izip, repeat
|
from itertools import islice, izip, repeat
|
||||||
import logging
|
import logging
|
||||||
from xml.etree import ElementTree
|
from xml.etree import ElementTree
|
||||||
|
import random
|
||||||
|
|
||||||
|
random.seed()
|
||||||
|
|
||||||
import sys, os
|
import sys, os
|
||||||
|
|
||||||
|
@ -44,6 +45,7 @@ RY_OLID = 'OL4264806A'
|
||||||
SURFACING_WORK_OLID = 'OL675829W'
|
SURFACING_WORK_OLID = 'OL675829W'
|
||||||
SURFACING_EDITION_OLID = 'OL8075248M'
|
SURFACING_EDITION_OLID = 'OL8075248M'
|
||||||
SURFACING_ISBN = '9780446311076'
|
SURFACING_ISBN = '9780446311076'
|
||||||
|
SURFACING_LT_WORK_ID = '18997'
|
||||||
|
|
||||||
USER_AGENT = "rdhyee@gluejar.com"
|
USER_AGENT = "rdhyee@gluejar.com"
|
||||||
|
|
||||||
|
@ -96,12 +98,17 @@ def lt_whatwork(isbn=None, title=None, author=None):
|
||||||
http://www.librarything.com/blogs/thingology/2009/03/new-api-what-work/
|
http://www.librarything.com/blogs/thingology/2009/03/new-api-what-work/
|
||||||
"""
|
"""
|
||||||
logger.info("looking up at lt_whatwork (isbn, title, author): %s %s %s" ,isbn, title, author)
|
logger.info("looking up at lt_whatwork (isbn, title, author): %s %s %s" ,isbn, title, author)
|
||||||
url = "http://www.librarything.com/api/whatwork.php?"
|
url = "http://www.librarything.com/api/whatwork.php"
|
||||||
url = "http://www.librarything.com/api/thingISBN/%s" % isbn
|
params=dict([(k,v) for (k,v) in {'isbn':isbn, 'title':title, 'author':author}.items() if v is not None])
|
||||||
xml = requests.get(url, headers={"User-Agent": USER_AGENT}).content
|
|
||||||
|
xml = requests.get(url, params=params, headers={"User-Agent": USER_AGENT}).content
|
||||||
doc = ElementTree.fromstring(xml)
|
doc = ElementTree.fromstring(xml)
|
||||||
return [e.text for e in doc.findall('isbn')]
|
|
||||||
|
|
||||||
|
work = doc.find('work')
|
||||||
|
if work is not None:
|
||||||
|
return work.text
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
def hathi_bib(id, id_type='isbn', detail_level='brief'):
|
def hathi_bib(id, id_type='isbn', detail_level='brief'):
|
||||||
url = "http://catalog.hathitrust.org/api/volumes/brief/%s/%s.json" % (id_type, id)
|
url = "http://catalog.hathitrust.org/api/volumes/brief/%s/%s.json" % (id_type, id)
|
||||||
|
@ -763,21 +770,30 @@ class GoogleBooksTest(TestCase):
|
||||||
results = gb.volumeid(g_id, glossed=True)
|
results = gb.volumeid(g_id, glossed=True)
|
||||||
print results
|
print results
|
||||||
|
|
||||||
|
class LibraryThingTest(TestCase):
|
||||||
|
|
||||||
class thingISBNTest(TestCase):
|
|
||||||
def test_lt_isbn(self):
|
def test_lt_isbn(self):
|
||||||
|
|
||||||
isbns = thingisbn(SURFACING_ISBN)
|
isbns = thingisbn(SURFACING_ISBN)
|
||||||
# convert to isbn-13
|
# convert to isbn-13
|
||||||
isbns = map(lambda x: isbn_mod.ISBN(x).to_string('13'), isbns)
|
isbns = map(lambda x: isbn_mod.ISBN(x).to_string('13'), isbns)
|
||||||
print isbns
|
self.assertTrue(SURFACING_ISBN in isbns)
|
||||||
|
|
||||||
|
# grab a random ISBN from the list, issue another call and then check that the new list is the same
|
||||||
|
isbns1 = map(lambda x: isbn_mod.ISBN(x).to_string('13'), thingisbn(random.sample(isbns,1)[0]))
|
||||||
|
self.assertEqual(set(isbns), set(isbns1))
|
||||||
|
def test_whatwork(self):
|
||||||
|
work_id = lt_whatwork(isbn=SURFACING_ISBN)
|
||||||
|
self.assertEqual(work_id, SURFACING_LT_WORK_ID)
|
||||||
|
work_id = lt_whatwork(title='Hamlet', author='Shakespeare')
|
||||||
|
self.assertEqual(work_id, '2199')
|
||||||
|
|
||||||
|
|
||||||
def suite():
|
def suite():
|
||||||
|
|
||||||
#testcases = [WorkMapperTest,FreebaseBooksTest, OpenLibraryTest,GoogleBooksTest]
|
#testcases = [WorkMapperTest,FreebaseBooksTest, OpenLibraryTest,GoogleBooksTest]
|
||||||
testcases = []
|
testcases = []
|
||||||
suites = unittest.TestSuite([unittest.TestLoader().loadTestsFromTestCase(testcase) for testcase in testcases])
|
suites = unittest.TestSuite([unittest.TestLoader().loadTestsFromTestCase(testcase) for testcase in testcases])
|
||||||
suites.addTest(GoogleBooksTest('test_volumeid'))
|
suites.addTest(LibraryThingTest('test_whatwork'))
|
||||||
#suites.addTest(SettingsTest('test_dev_me_alignment')) # give option to test this alignment
|
#suites.addTest(SettingsTest('test_dev_me_alignment')) # give option to test this alignment
|
||||||
return suites
|
return suites
|
||||||
|
|
||||||
|
|
|
@ -90,6 +90,7 @@ def get_or_create(session, model, defaults=None, **kwargs):
|
||||||
|
|
||||||
Base = declarative_base()
|
Base = declarative_base()
|
||||||
|
|
||||||
|
|
||||||
class SeedISBN(Base):
|
class SeedISBN(Base):
|
||||||
|
|
||||||
__tablename__ = 'SeedISBN'
|
__tablename__ = 'SeedISBN'
|
||||||
|
|
|
@ -89,14 +89,19 @@ def cluster_status():
|
||||||
"""Look at the current Work, Edition instances to figure out what needs to be fixed"""
|
"""Look at the current Work, Edition instances to figure out what needs to be fixed"""
|
||||||
results = OrderedDict([
|
results = OrderedDict([
|
||||||
('number of Works', models.Work.objects.count()),
|
('number of Works', models.Work.objects.count()),
|
||||||
|
('number of Editions', models.Edition.objects.count())
|
||||||
('number of Edition that have both Google Books id and ISBNs',
|
('number of Edition that have both Google Books id and ISBNs',
|
||||||
models.Edition.objects.filter(identifiers__type='isbn').filter(identifiers__type='goog').count()),
|
models.Edition.objects.filter(identifiers__type='isbn').filter(identifiers__type='goog').count()),
|
||||||
('number of Editions with Google Books IDs but not ISBNs',
|
('number of Editions with Google Books IDs but not ISBNs',
|
||||||
models.Edition.objects.filter(identifiers__type='goog').exclude(identifiers__type='isbn').count()),
|
models.Edition.objects.filter(identifiers__type='goog').exclude(identifiers__type='isbn').count()),
|
||||||
|
|
||||||
])
|
])
|
||||||
|
|
||||||
# Are there Edition without ISBNs? Are they all singletons?
|
# What needs to be done to recluster editions?
|
||||||
|
|
||||||
|
# Are there Edition without ISBNs? Look up the corresponding ISBNs from Google Books and Are they all singletons?
|
||||||
|
|
||||||
|
# identify Editions that should be merged (e.g., if one Edition has a Google Books ID and another Edition has one with
|
||||||
|
# an ISBN tied to that Google Books ID)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue