Merge pull request #363 from Gluejar/doab_load

Doab load
pull/1/head
Raymond Yee 2014-07-25 15:17:54 -07:00
commit 550abf2cd2
5 changed files with 872 additions and 1 deletions

1
bookdata/doab.json Normal file

File diff suppressed because one or more lines are too long

254
core/doab.py Normal file
View File

@ -0,0 +1,254 @@
import logging
import json
from itertools import islice
import requests
from django.db.models import (Q, F)
from django.core.files.storage import default_storage
from django.core.files.base import ContentFile
import regluit
from regluit.core import models
from regluit.core import bookloader
from regluit.core.bookloader import add_by_isbn
logger = logging.getLogger(__name__)
def store_doab_cover(doab_id, redo=False):
"""
returns tuple: 1) cover URL, 2) whether newly created (boolean)
"""
cover_file_name= '/doab/%s/cover' % (doab_id)
# if we don't want to redo and the cover exists, return the URL of the cover
if not redo and default_storage.exists(cover_file_name):
return (default_storage.url(cover_file_name), False)
# download cover image to cover_file
url = "http://www.doabooks.org/doab?func=cover&rid={0}".format(doab_id)
try:
r = requests.get(url)
cover_file = ContentFile(r.content)
cover_file.content_type = r.headers.get('content-type', '')
path = default_storage.save(cover_file_name, cover_file)
return (default_storage.url(cover_file_name), True)
except Exception, e:
# if there is a problem, return None for cover URL
return (None, False)
def update_cover_doab(doab_id, store_cover=True):
"""
update the cover url for work with doab_id
if store_cover is True, use the cover from our own storage
"""
work = models.Identifier.objects.get(type='doab', value=doab_id).work
edition = work.preferred_edition
if store_cover:
(cover_url, new_cover) = store_doab_cover(doab_id)
else:
cover_url = "http://www.doabooks.org/doab?func=cover&rid={0}".format(doab_id)
if cover_url is not None:
edition.cover_image = cover_url
edition.save()
return cover_url
else:
return None
def attach_more_doab_metadata(ebook, description, subjects,
publication_date, publisher_name=None):
"""
for given ebook, attach description, subjects, publication date to
corresponding Edition and Work
"""
# if edition doesn't have a publication date, update it
edition = ebook.edition
edition_to_save = False
if not edition.publication_date:
edition.publication_date = publication_date
edition_to_save = True
# if edition.publisher_name is empty, set it
if not edition.publisher_name:
edition.set_publisher(publisher_name)
if edition_to_save:
edition.save()
# attach description to work if it's not empty
work = edition.work
if not work.description:
work.description = description
work.save()
# update subjects
work.subjects.add(*[models.Subject.objects.get_or_create(name=s)[0] for s in subjects])
return ebook
def load_doab_edition(title, doab_id, seed_isbn, url, format, rights,
language, isbns,
provider, **kwargs):
"""
load a record from doabooks.org represented by input parameters and return an ebook
"""
from regluit.core import tasks
# check to see whether the Edition hasn't already been loaded first
# search by url
ebooks = models.Ebook.objects.filter(url=url)
# 1 match
# > 1 matches
# 0 match
# simplest case -- if match (1 or more), we could check whether any
# ebook.edition.work has a doab id matching given doab_id
# put a migration to force Ebook.url to be unique id
# if yes, then return one of the Edition(s) whose work is doab_id
# if no, then
if len(ebooks) > 1:
raise Exception("There is more than one Ebook matching url {0}".format(url))
elif len(ebooks) == 1:
ebook = ebooks[0]
doab_identifer = models.Identifier.get_or_add(type='doab',value=doab_id,
work=ebook.edition.work)
# update the cover id
cover_url = update_cover_doab(doab_id)
# attach more metadata
attach_more_doab_metadata(ebook,
description=kwargs.get('description'),
subjects=kwargs.get('subject'),
publication_date=kwargs.get('date'),
publisher_name=kwargs.get('publisher'))
return ebook
# remaining case --> need to create a new Ebook
assert len(ebooks) == 0
# make sure we have isbns to work with before creating ebook
if len(isbns) == 0:
return None
ebook = models.Ebook()
ebook.format = format
ebook.provider = provider
ebook.url = url
ebook.rights = rights
# we still need to find the right Edition/Work to tie Ebook to...
# look for the Edition with which to associate ebook.
# loop through the isbns to see whether we get one that is not None
for isbn in isbns:
edition = bookloader.add_by_isbn(isbn)
if edition is not None: break
if edition is not None:
# if this is a new edition, then add related editions asynchronously
if getattr(edition,'new', False):
tasks.populate_edition.delay(edition.isbn_13)
# QUESTION: Is this good enough?
# what's going to happen to edition.work if there's merging
doab_identifer = models.Identifier.get_or_add(type='doab',value=doab_id,
work=edition.work)
# we need to create Edition(s) de novo
else:
# if there is a Work with doab_id already, attach any new Edition(s)
try:
work = models.Identifier.objects.get(type='doab',value=doab_id).work
except models.Identifier.DoesNotExist:
work = models.Work(language=language,title=title)
work.save()
doab_identifer = models.Identifier.get_or_add(type='doab',value=doab_id,
work=work)
# create Edition(s) for each of the isbn from the input info
editions = []
for isbn in isbns:
edition = models.Edition(title=title, work=work)
edition.save()
isbn_id = models.Identifier.get_or_add(type='isbn',value=isbn,work=work)
editions.append(edition)
# if work has any ebooks already, attach the ebook to the corresponding edition
# otherwise pick the first one
# pick the first edition as the one to tie ebook to
editions_with_ebooks = models.Edition.objects.filter(Q(work__id=work.id) & \
Q(ebooks__isnull=False)).distinct()
if editions_with_ebooks:
edition = editions_with_ebooks[0]
else:
edition = editions[0]
# make the edition the selected_edition of the work
work = edition.work
work.selected_edition = edition
work.save()
# tie the edition to ebook
ebook.edition = edition
ebook.save()
# update the cover id (could be done separately)
cover_url = update_cover_doab(doab_id)
# attach more metadata
attach_more_doab_metadata(ebook,
description=kwargs.get('description'),
subjects=kwargs.get('subject'),
publication_date=kwargs.get('date'),
publisher_name=kwargs.get('publisher'))
return ebook
def load_doab_records(fname, limit=None, async=True):
from regluit.core import (doab, tasks)
success_count = 0
records = json.load(open(fname))
for (i, book) in enumerate(islice(records,limit)):
d = dict(book)
if d['format'] == 'pdf':
try:
if async:
task_id = tasks.load_doab_edition.delay(**dict(book))
ct = models.CeleryTask()
ct.task_id = task_id
ct.function_name = "load_doab_edition"
ct.user = None
ct.description = "Loading DOAB %s " % (dict(book)['doab_id'])
ct.save()
else:
edition = load_doab_edition(**dict(book))
success_count += 1
except Exception, e:
logger.warning(e)
logger.info("Number of books successfully uploaded: " + str(success_count))

View File

@ -0,0 +1,17 @@
import os
from django.conf import settings
from django.contrib.auth.models import User
from django.core.management.base import BaseCommand
from regluit.core import doab
class Command(BaseCommand):
help = "load doab books"
args = "<limit> <file_name> <async>"
def handle(self, limit=None, file_name="../../../bookdata/doab.json", async=True, **options):
command_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(command_dir, file_name)
doab.load_doab_records(file_path, limit=int(limit), async=async)

View File

@ -23,7 +23,8 @@ from regluit.core import (
bookloader,
models,
goodreads,
librarything
librarything,
doab
)
from regluit.core.models import Campaign, Acq
from regluit.core.signals import deadline_impending
@ -151,6 +152,13 @@ def refresh_acqs():
else:
acq.refreshed = True
@task
def load_doab_edition(title, doab_id, seed_isbn, url, format, rights, language, isbns,
provider='Directory of Open Access Books', **kwargs):
return doab.load_doab_edition(title, doab_id, seed_isbn, url, format, rights,
language, isbns, provider, **kwargs)
from postmonkey import PostMonkey, MailChimpException
pm = PostMonkey(settings.MAILCHIMP_API_KEY)

View File

@ -0,0 +1,591 @@
{
"metadata": {
"name": "",
"signature": "sha256:ed8b1c5d25bddcabb988082214fa205de769890440e681157f4177e2836b0053"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from __future__ import print_function"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# optionally copy doab.json from DOAB repo\n",
"\n",
"import shutil\n",
"\n",
"# toggle the boolean to set whether to copy\n",
"if (False):\n",
" shutil.copyfile(\"/Users/raymondyee/D/Document/Gluejar/Gluejar.github/DOAB/doab.json\",\n",
" \"../bookdata/doab.json\")"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Loading the list of books"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import json\n",
"import codecs\n",
"s = codecs.open(\"../bookdata/doab.json\", encoding='UTF-8').read()\n",
"records = json.loads(s)\n",
"records[:1]"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# how many PDFs are there to load?\n",
"\n",
"pdf_records = [record for record in records if dict(record).get('format') == 'pdf']\n",
"len(pdf_records)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from collections import Counter\n",
"\n",
"# doab_ids unique for the PDFs?\n",
"[c for c in Counter([dict(r).get('doab_id') for r in pdf_records]).items() if c[1] > 1]\n",
"\n",
"# 2 of the doab records have more than 1 pdf\n",
"# doab_id of 15968 and 15969 "
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# do the loading from the local version of load_doab_editon or core.doab\n",
"\n",
"from regluit.core import models\n",
"\n",
"def attach_more_doab_metadata(ebook, description, subjects,\n",
" publication_date, publisher_name):\n",
" \n",
" # if edition doesn't have a publication date, update it\n",
" edition = ebook.edition\n",
" edition_to_save = False\n",
" \n",
" if not edition.publication_date:\n",
" edition.publication_date = publication_date\n",
" edition_to_save = True\n",
" \n",
" # if edition.publisher_name is empty, set it\n",
" if not edition.publisher_name:\n",
" edition.publisher_name = models.PublisherName.objects.get_or_create(name=publisher_name)[0]\n",
" edition_to_save = True\n",
" \n",
" if edition_to_save:\n",
" edition.save()\n",
" \n",
" # attach description to work if it's not empty\n",
" work = edition.work\n",
" if not work.description:\n",
" work.description = description\n",
" work.save()\n",
" \n",
" # update subjects\n",
" work.subjects.add(*[models.Subject.objects.get_or_create(name=s)[0] for s in subjects])\n",
" \n",
" return ebook\n",
"\n",
"def load_doab_edition(title, doab_id, seed_isbn, url, format, rights,\n",
" language, isbns,\n",
" provider='Directory of Open Access Books', **kwargs):\n",
" \n",
"\n",
" from django.db.models import (Q, F)\n",
" \n",
" from regluit.core import tasks\n",
" from regluit.core import (models, bookloader)\n",
" \n",
" from regluit.core.doab import update_cover_doab \n",
" \n",
" # check to see whether the Edition hasn't already been loaded first\n",
" # search by url\n",
" ebooks = models.Ebook.objects.filter(url=url)\n",
" \n",
" # 1 match\n",
" # > 1 matches\n",
" # 0 match\n",
"\n",
" # simplest case -- if match (1 or more), we could check whether any\n",
" # ebook.edition.work has a doab id matching given doab_id\n",
" \n",
" # put a migration to force Ebook.url to be unique id\n",
" \n",
" # if yes, then return one of the Edition(s) whose work is doab_id\n",
" # if no, then \n",
" \n",
" if len(ebooks) > 1:\n",
" raise Exception(\"There is more than one Ebook matching url {0}\".format(url)) \n",
" elif len(ebooks) == 1: \n",
" ebook = ebooks[0]\n",
" doab_identifer = models.Identifier.get_or_add(type='doab',value=doab_id, \n",
" work=ebook.edition.work)\n",
" # update the cover id \n",
" cover_url = update_cover_doab(doab_id)\n",
" \n",
" # attach more metadata\n",
" attach_more_doab_metadata(ebook, \n",
" description=kwargs.get('description'),\n",
" subjects=kwargs.get('subject'),\n",
" publication_date=kwargs.get('date'),\n",
" publisher_name=kwargs.get('publisher'))\n",
" \n",
" return ebook\n",
" \n",
" # remaining case --> need to create a new Ebook \n",
" assert len(ebooks) == 0\n",
" \n",
" # make sure we have isbns to work with before creating ebook\n",
" if len(isbns) == 0:\n",
" return None\n",
" \n",
" ebook = models.Ebook()\n",
" ebook.format = format\n",
" ebook.provider = provider\n",
" ebook.url = url\n",
" ebook.rights = rights\n",
"\n",
" # we still need to find the right Edition/Work to tie Ebook to...\n",
" \n",
" # look for the Edition with which to associate ebook.\n",
" # loop through the isbns to see whether we get one that is not None\n",
" \n",
" for isbn in isbns:\n",
" edition = bookloader.add_by_isbn(isbn)\n",
" if edition is not None: break \n",
" \n",
" if edition is not None:\n",
" # if this is a new edition, then add related editions asynchronously\n",
" if getattr(edition,'new', False):\n",
" tasks.populate_edition.delay(edition.isbn_13)\n",
" \n",
" # QUESTION: Is this good enough?\n",
" # what's going to happen to edition.work if there's merging \n",
" doab_identifer = models.Identifier.get_or_add(type='doab',value=doab_id, \n",
" work=edition.work)\n",
"\n",
" # we need to create Edition(s) de novo \n",
" else: \n",
" # if there is a Work with doab_id already, attach any new Edition(s)\n",
" try:\n",
" work = models.Identifier.objects.get(type='doab',value=doab_id).work\n",
" except models.Identifier.DoesNotExist:\n",
" work = models.Work(language=language,title=title)\n",
" work.save()\n",
" doab_identifer = models.Identifier.get_or_add(type='doab',value=doab_id, \n",
" work=work)\n",
" \n",
" \n",
" # create Edition(s) for each of the isbn from the input info\n",
" editions = []\n",
" for isbn in isbns:\n",
" edition = models.Edition(title=title, work=work)\n",
" edition.save()\n",
" \n",
" isbn_id = models.Identifier.get_or_add(type='isbn',value=isbn,work=work)\n",
" \n",
" editions.append(edition)\n",
" \n",
" # if work has any ebooks already, attach the ebook to the corresponding edition\n",
" # otherwise pick the first one\n",
" # pick the first edition as the one to tie ebook to \n",
" editions_with_ebooks = models.Edition.objects.filter(Q(work__id=work.id) & \\\n",
" Q(ebooks__isnull=False)).distinct()\n",
" if editions_with_ebooks:\n",
" edition = editions_with_ebooks[0]\n",
" else:\n",
" edition = editions[0]\n",
" \n",
" # make the edition the selected_edition of the work\n",
" edition.work.selected_edition = edition\n",
" \n",
" # tie the edition to ebook\n",
" \n",
" ebook.edition = edition\n",
" ebook.save()\n",
" \n",
" # update the cover id (could be done separately)\n",
" cover_url = update_cover_doab(doab_id)\n",
" \n",
" # attach more metadata\n",
" attach_more_doab_metadata(ebook, \n",
" description=kwargs.get('description'),\n",
" subjects=kwargs.get('subject'),\n",
" publication_date=kwargs.get('date'),\n",
" publisher_name=kwargs.get('publisher')) \n",
" return ebook\n"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from itertools import islice\n",
"\n",
"import json\n",
"import codecs\n",
"\n",
"from regluit.core import doab\n",
"\n",
"USE_LOCAL_DOAB_FUNCTION = False\n",
"\n",
"loading_problems = []\n",
"\n",
"s = codecs.open(\"../bookdata/doab.json\", encoding='UTF-8').read()\n",
"records = json.loads(s)\n",
"\n",
"# filter out the pdf records\n",
"\n",
"pdf_records = [record for record in records if dict(record).get('format') == 'pdf']\n",
"records_to_load = list(islice(pdf_records,100))\n",
"\n",
"for (i, book) in enumerate(records_to_load):\n",
" print (i,) \n",
" d = dict(book)\n",
" try:\n",
" if USE_LOCAL_DOAB_FUNCTION:\n",
" edition = load_doab_edition(**dict(book))\n",
" else:\n",
" edition = doab.load_doab_edition(**dict(book))\n",
" print (\"success\")\n",
" except Exception, e:\n",
" import sys\n",
" import traceback\n",
" loading_problems.append((d, e))\n",
" exc_type, exc_value, exc_traceback = sys.exc_info()\n",
" print (\"*** print_tb:\")\n",
" traceback.print_tb(exc_traceback, limit=1, file=sys.stdout)\n",
" print (\"*** print_exception:\")\n",
" traceback.print_exception(exc_type, exc_value, exc_traceback,\n",
" limit=2, file=sys.stdout)\n",
" print (\"*** print_exc:\")\n",
"# traceback.print_exc()\n",
"# print \"*** format_exc, first and last line:\"\n",
"# formatted_lines = traceback.format_exc().splitlines()\n",
"# print formatted_lines[0]\n",
"# print formatted_lines[-1]\n",
"# print \"*** format_exception:\"\n",
"# print repr(traceback.format_exception(exc_type, exc_value,\n",
"# exc_traceback))\n",
"# print \"*** extract_tb:\"\n",
"# print repr(traceback.extract_tb(exc_traceback))\n",
"# print \"*** format_tb:\"\n",
"# print repr(traceback.format_tb(exc_traceback))\n",
"# print \"*** tb_lineno:\", exc_traceback.tb_lineno\n"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"I need to remind myself of how to check that there are no outstanding celery jobs after I do this loading. \n",
"\n",
"I have a technique for using `django-celery` monitoring that works on redis (what we use on just and production) -- but not laptop (http://stackoverflow.com/a/5451479/7782). I think a workable way is to look at the celery_taskmeta table."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import djcelery\n",
"[t.status for t in djcelery.models.TaskMeta.objects.all()]"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Tests for the loading\n",
"\n",
" * can we find all the the URLs?\n",
" * is it associated with the the right doab_id?\n",
" * all the ISBNs loaded?\n",
" * which books are not matched with Google Books IDs -- and therefore might require URLs for covers?\n",
" * did I make sure the edition I'm attaching the ebooks to is the \"selected edition\"?\n",
" * for editions that I create [and maybe all editions?], attach a cover_image from DOAB.\n",
" * all clustered around the same work? (or do I need to do further merging?)\n",
" * are we creating extraneous works?\n",
" * subject metadata\n",
" * are we loading all the useful metadata? \n",
" * is the loading script idempotent?\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## important limit to testing\n",
"\n",
"I have written code to handle the loading of all associated ISBNs with DOAB records -- but we upload only records with non-null licenses, we will have only one ISBN per DOAB record for records with known licenses. So the loading of works for which we know the license won't exercise the code in question:\n",
"https://github.com/Gluejar/regluit/blob/5b3a8d7b1302bc1b1985c675add06c345567a7a1/core/doab.py#L91\n",
"I also checked that there is no intersection of DOAB ids betwen records with known licenses and those that don't."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from regluit.core.models import Work, Edition, Ebook, Identifier\n",
"from regluit.core.isbn import ISBN\n",
"\n",
"tests_exceptions = []\n",
"no_google_book_id = []\n",
"all_problems = []\n",
"cover_problems = []\n",
"\n",
"for record in islice(records_to_load, None ):\n",
" d = dict(record)\n",
" ebooks = Ebook.objects.filter(url=d.get('url'))\n",
" \n",
" problems = []\n",
" \n",
" try:\n",
" # check only one ebook with this URL.\n",
" if len(ebooks) != 1:\n",
" problems.append(\"len(ebooks): \" + len(ebooks))\n",
" \n",
" # ebook.edition.work is the work with the doab_id\n",
" if not(ebooks[0].edition.work == Identifier.objects.get(type='doab', \n",
" value=d.get('doab_id')).work):\n",
" problems.append(\"ebook.edition.work is the work with the doab_id\")\n",
" # all the ISBNs loaded?\n",
" # this code might be a bit inefficient given there might only be one isbn per record\n",
" \n",
" isbns = [ISBN(i).to_string() for i in d.get('isbns')]\n",
" if not(set(isbns) == set([id_.value for id_ in Identifier.objects.filter(type=\"isbn\", \n",
" value__in=isbns)])):\n",
" problems.append(\"isbns not matching\")\n",
" \n",
" if problems:\n",
" all_problems.append((d, problems))\n",
" \n",
" # check on presence of Google books id\n",
" if len(ebooks[0].edition.identifiers.filter(type=\"goog\")) < 1:\n",
" no_google_book_id.append(d)\n",
"\n",
" # check on the cover URLs\n",
" #print (ebooks[0].edition.work.cover_image_small())\n",
" if ebooks[0].edition.work.cover_image_small().find(\"amazonaws\") < 0:\n",
" cover_problems.append((d))\n",
" \n",
" except Exception, e:\n",
" tests_exceptions.append((d, e))\n",
" \n",
"print (\"number of records loaded\", len(records_to_load))\n",
"print ()\n",
"print (\"all_problems\", all_problems)\n",
"print ()\n",
"print (\"tests_exceptions\", tests_exceptions)\n",
"print ()\n",
"print (\"no_google_book_id\", no_google_book_id)\n",
"print ()\n",
"print (\"cover problems\", cover_problems)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Stop"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"raise Exception(\"Stop here\")"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# invalid ISBNs?\n",
"\n",
"for (d, p) in all_problems:\n",
" print (d['isbns'][0], ISBN(d['isbns'][0]).valid)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"[(d['doab_id'], d['isbns'][0]) for d in no_google_book_id]"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# it is possible to do a query for a whole set of values, a technique I might make use of.\n",
"# http://stackoverflow.com/a/9304968\n",
"# e.g., Blog.objects.filter(pk__in=[1,4,7])\n",
"\n",
"urls = [dict(record).get('url') for record in records_to_load]\n",
"set([ebook.url for ebook in Ebook.objects.filter(url__in=urls)]) == set(urls)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Code I was working out to use Django querysets to pull out relationships among ebooks, editions, works"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from regluit.core.models import (Ebook, Edition, Work)\n",
"from django.db.models import (Q, F)\n",
"\n",
"# models.Identifier.objects.filter(edition__isnull=False).filter(~Q(edition__work__id = F('work__id'))).count()\n",
"\n",
"editions_with_ebooks = Edition.objects.filter(ebooks__isnull=False)\n",
"editions_with_ebooks\n",
"\n",
"edition = editions_with_ebooks[0]\n",
"print (edition.work_id)\n",
"work = edition.work\n",
"print (work.editions.all())\n",
"# didn't know you should use distinct()\n",
"Edition.objects.filter(Q(work__id=edition.work_id) & Q(ebooks__isnull=False)).distinct()\n",
"#Edition.objects.filter(Q(work__id=edition.work_id))\n",
"#work.objects.filter(editions__ebooks__isnull=False)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# let me grab ebooks and look at their parent works\n",
"\n",
"from regluit.core.models import Ebook\n",
"\n",
"[eb.edition for eb in Ebook.objects.all()]"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Extra"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"raise Exception(\"Stop here\")"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Checking Celery Results"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Checking the results of a local celery task \n",
"from regluit.core import tasks\n",
"\n",
"task_id = \"28982485-efc3-44d7-9cf6-439645180d5d\"\n",
"result = tasks.fac.AsyncResult(task_id)\n",
"result.get()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}