Add g_seed_isbn.json which hold the Gutenberg editions I'm loading.

pull/1/head
Raymond Yee 2012-02-27 13:19:58 -08:00
parent 446907109f
commit c98258e459
3 changed files with 8 additions and 3 deletions

File diff suppressed because one or more lines are too long

View File

@ -1293,7 +1293,12 @@ if __name__ == '__main__':
#print list(gutenberg_and_seed_isbn(max=10))
print list(repick_seed_isbn(10))
#print list(repick_seed_isbn(10))
# output a filtered gutenberg list
# 0.56 and 0.7 I got by eye-balling the results in Google Refine
y = list(filtered_gutenberg_and_seed_isbn(min_l_ratio=0.56, min_dominance=0.7))
export_to_json(y,fname="g_seed_isbn.json")
#suites = suite()
#suites = unittest.defaultTestLoader.loadTestsFromModule(__import__('__main__'))

View File

@ -83,7 +83,7 @@ def load_gutenberg_moby_dick():
epub_url, format, license, lang, publication_date)
return ebook
def load_gutenberg_books(fname="/Users/raymondyee/D/Document/Gluejar/Gluejar.github/regluit/experimental/gutenberg/g_seed_isbn.json",
def load_gutenberg_books(fname="{0}/gutenberg/g_seed_isbn.json".format(experimental.__path__[0]),
max_num=None):
headers = ()