From 3a30e22814295f98d1831ef6a5b746766dc76db4 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Tue, 31 Jan 2012 18:09:01 -0800 Subject: [PATCH 1/5] A stepping stone towards loading Gutenberg books. Loading Moby Dick seems to be working more or less --- test/booktests.py | 89 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 2 deletions(-) diff --git a/test/booktests.py b/test/booktests.py index 0d4c3799..b1535679 100644 --- a/test/booktests.py +++ b/test/booktests.py @@ -1,7 +1,12 @@ -from regluit.core import librarything, bookloader +from regluit.core import librarything, bookloader, models import itertools import django +from django.db.models import Q, F +from regluit.core import bookloader +import warnings +import datetime + def ry_lt_books(): """return parsing of rdhyee's LibraryThing collection""" lt = librarything.LibraryThing('rdhyee') @@ -25,4 +30,84 @@ def ry_wish_list_equal_loadable_lt_books(): editions = editions_for_lt(ry_lt_books()) # assume only one user -- and that we have run a LT book loading process for that user ry = django.contrib.auth.models.User.objects.all()[0] - return set([ed.work for ed in filter(None, editions)]) == set(ry.wishlist.works.all()) \ No newline at end of file + return set([ed.work for ed in filter(None, editions)]) == set(ry.wishlist.works.all()) + +def clear_works_editions_ebooks(): + models.Ebook.objects.all().delete() + models.Work.objects.all().delete() + models.Edition.objects.all().delete() + +def load_gutenberg_edition(title, gutenberg_etext_id, ol_work_id, url, format, license, lang, publication_date): + + # let's start with instantiating the relevant Work and Edition if they don't already exist + + works = models.Work.objects.filter(Q(identifiers__type='olwk') & Q(identifiers__value=ol_work_id)) + + try: + work = models.Identifier.objects.get(type='olwk',value=ol_work_id).work + except models.Identifier.DoesNotExist: # create a new work + work = models.Work() + work.title = title + work.language = lang + work.openlibrary_lookup = None + work.save() + + work_id = models.Identifier.get_or_add(type='olwk',value=ol_work_id, work=work) + + # Now pull out any existing Gutenberg editions tied to the work with the proper Gutenberg ID + try: + edition = models.Identifier.objects.get( type='gtbg', value=gutenberg_etext_id ).edition + except models.Identifier.DoesNotExist: + edition = models.Edition() + edition.title = title + edition.work = work + + edition.save() + edition_id = models.Identifier.get_or_add(type='gtbg',value=gutenberg_etext_id, edition=edition, work=work) + + # check to see whether the Edition hasn't already been loaded first + # search by url + ebooks = models.Ebook.objects.filter(url=url) + + # format: what's the controlled vocab? -- from Google -- alternative would be mimetype + + if len(ebooks): + ebook = ebooks[0] + elif len(ebooks) == 0: # need to create new ebook + ebook = models.Ebook() + + if len(ebooks) > 1: + warnings.warn("There is more than one Ebook matching url {0}".format(url)) + + + ebook.format = format + ebook.provider = 'gutenberg' + ebook.url = url + ebook.rights = license + + # is an Ebook instantiable without a corresponding Edition? (No, I think) + + ebook.edition = edition + ebook.save() + + return ebook + + # get associated info from OL + # book_loader.add_openlibrary(work) + + +def load_moby_dick(): + """Let's try this out for Moby Dick""" + + title = "Moby Dick" + ol_work_id = "/works/OL102749W" + gutenberg_etext_id = 2701 + epub_url = "http://www.gutenberg.org/cache/epub/2701/pg2701.epub" + license = 'http://www.gutenberg.org/license' + lang = 'en' + format = 'epub' + publication_date = datetime.datetime(2001,7,1) + + result = load_gutenberg_edition(title, gutenberg_etext_id, ol_work_id, epub_url, format, license, lang, publication_date) + return result + \ No newline at end of file From 020f4266d374cf75c172ea50a0a23d589d47bc1a Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Wed, 1 Feb 2012 08:27:21 -0800 Subject: [PATCH 2/5] Adding a test to make sure that new and popular are correct --- test/querytests.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 test/querytests.py diff --git a/test/querytests.py b/test/querytests.py new file mode 100644 index 00000000..2b7f30df --- /dev/null +++ b/test/querytests.py @@ -0,0 +1,23 @@ +from regluit.core import models +from django.db.models import Q, Count, Sum +from regluit.core import userlists + +from itertools import izip + +def list_popular(): + work_set = models.Work.objects.annotate(wished=Count('wishlists')).order_by('-wished') + print work_set + + counts={} + counts['unglued'] = work_set.filter(editions__ebooks__isnull=False).distinct().count() + counts['unglueing'] = work_set.filter(campaigns__status='ACTIVE').count() + counts['wished'] = work_set.count() - counts['unglued'] - counts['unglueing'] + print counts + + ungluers = userlists.work_list_users(work_set,5) + print ungluers + +def list_new(): + works1 = models.Work.objects.filter(wishlists__isnull=False).distinct().order_by('-created') + print works1.count() + \ No newline at end of file From 17c8de9473ae70e8cd415c92a37b4e50a67f821a Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Wed, 1 Feb 2012 08:33:11 -0800 Subject: [PATCH 3/5] Add a bit more detail to querytests.list_new --- test/querytests.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/test/querytests.py b/test/querytests.py index 2b7f30df..8cbc7e5f 100644 --- a/test/querytests.py +++ b/test/querytests.py @@ -18,6 +18,9 @@ def list_popular(): print ungluers def list_new(): - works1 = models.Work.objects.filter(wishlists__isnull=False).distinct().order_by('-created') - print works1.count() - \ No newline at end of file + w1 = models.Work.objects.filter(wishlists__isnull=False).distinct().order_by('-created') + w0 = [w for w in models.Work.objects.order_by('-created') if w.wishlists.count()] + + print w1.count() + print len(w0) + print w0 == w1 \ No newline at end of file From da552d75db74ca0fb5aa7be1b292707b4c097bf5 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Wed, 1 Feb 2012 17:41:22 +0000 Subject: [PATCH 4/5] Two tests to demonstrate that the queryset queries using distinct() and order_by() generate the correct results --- test/querytests.py | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/test/querytests.py b/test/querytests.py index 8cbc7e5f..17606ef5 100644 --- a/test/querytests.py +++ b/test/querytests.py @@ -1,26 +1,39 @@ from regluit.core import models -from django.db.models import Q, Count, Sum -from regluit.core import userlists +from django.db.models import Count from itertools import izip def list_popular(): - work_set = models.Work.objects.annotate(wished=Count('wishlists')).order_by('-wished') - print work_set + """Compare calculating popular works using QuerySets + distinct() and order_by() with an alternate approach """ - counts={} - counts['unglued'] = work_set.filter(editions__ebooks__isnull=False).distinct().count() - counts['unglueing'] = work_set.filter(campaigns__status='ACTIVE').count() - counts['wished'] = work_set.count() - counts['unglued'] - counts['unglueing'] - print counts + w1 = models.Work.objects.filter(wishlists__isnull=False). \ + distinct().annotate(wished=Count('wishlists')).order_by('-wished', 'id') + + # create a list of tuples of Works + the wishlist count, filter by non-zero wishlist counts, sort the list by descending + # number of wishlists + Work.id and then blot out the wishlist count + + w0 = map (lambda x: x[0], + sorted( + filter(lambda x: x[1] > 0, + [(w, w.wishlists.count()) for w in models.Work.objects.all()] + ) , + key=lambda x: (-x[1],x[0].id) + ) + ) + + print w1.count() + print len(w0) + print list(w1.all()) == w0 + + print "difference: ", filter(lambda item: item[1][0] != item[1][1], enumerate(izip(w0,w1))) - ungluers = userlists.work_list_users(work_set,5) - print ungluers - def list_new(): - w1 = models.Work.objects.filter(wishlists__isnull=False).distinct().order_by('-created') - w0 = [w for w in models.Work.objects.order_by('-created') if w.wishlists.count()] + """Compare calculating new works using QuerySets + distinct() and order_by() with an alternate approach """ + w1 = models.Work.objects.filter(wishlists__isnull=False).distinct().order_by('-created', 'id') + w0 = [w for w in models.Work.objects.order_by('-created', 'id') if w.wishlists.count()] print w1.count() print len(w0) - print w0 == w1 \ No newline at end of file + print list(w1.all()) == w0 + + print "difference: ", filter(lambda item: item[1][0] != item[1][1], enumerate(izip(w0,w1))) \ No newline at end of file From fb77d1a08ddc3a830c7c009d3ae873c2a6fe724b Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Wed, 1 Feb 2012 09:43:46 -0800 Subject: [PATCH 5/5] Resolve ambiguity in Work lists order -- sort by Work.id as a secondary sort key --- frontend/views.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/frontend/views.py b/frontend/views.py index 945b97dc..845da62e 100755 --- a/frontend/views.py +++ b/frontend/views.py @@ -201,13 +201,13 @@ class WorkListView(ListView): def get_queryset(self): facet = self.kwargs['facet'] if (facet == 'popular'): - return models.Work.objects.filter(wishlists__isnull=False).distinct().annotate(wished=Count('wishlists')).order_by('-wished') + return models.Work.objects.filter(wishlists__isnull=False).distinct().annotate(wished=Count('wishlists')).order_by('-wished', 'id') elif (facet == 'recommended'): return models.Work.objects.filter(wishlists__user=recommended_user) elif (facet == 'new'): - return models.Work.objects.filter(wishlists__isnull=False).distinct().order_by('-created') + return models.Work.objects.filter(wishlists__isnull=False).distinct().order_by('-created', 'id') else: - return models.Work.objects.all().order_by('-created') + return models.Work.objects.all().order_by('-created', 'id') def get_context_data(self, **kwargs): context = super(WorkListView, self).get_context_data(**kwargs)