From bb257451e41d4af9fbe732fb625b213ccbf54c84 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Wed, 25 Jan 2012 14:31:10 -0800 Subject: [PATCH 01/22] Updated setting for please --- README.md | 4 ++-- settings/please.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index e60957d5..8e5ab342 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ Production Deployment Below are the steps for getting regluit running on EC2 with Apache and mod_wsgi, and talking to an Amazon Relational Data Store instance. -1. create an ubuntu LTS ec2 instance +1. create an ubuntu ec2 instance (e.g, go http://alestic.com/ to find various ubuntu images) 1. `sudo aptitude update` 1. `sudo aptitude upgrade` 1. `sudo aptitude install git-core apache libapache2-mod-wsgi mysql-client python-virtualenv python-mysqldb redis-server python-lxml postfix` @@ -54,7 +54,7 @@ Below are the steps for getting regluit running on EC2 with Apache and mod_wsgi, 1. create an Amazon RDS instance 1. connect to it, e.g. `mysql -u root -h gluejardb.cboagmr25pjs.us-east-1.rds.amazonaws.com -p` 1. `CREATE DATABASE unglueit CHARSET utf8;` -1. `GRANT ALL ON unglueit.\* TO ‘unglueit’@’ip-10-244-250-168.ec2.internal’ IDENTIFIED BY 'unglueit' REQUIRE SSL` +1. `GRANT ALL ON unglueit.\* TO ‘unglueit’@’ip-10-244-250-168.ec2.internal’ IDENTIFIED BY 'unglueit' REQUIRE SSL;` 1. update settings/prod.py with database credentials 1. `virtualenv ENV` 1. `source ENV/bin/activate` diff --git a/settings/please.py b/settings/please.py index cc83b031..ec8d41b9 100644 --- a/settings/please.py +++ b/settings/please.py @@ -20,7 +20,7 @@ DATABASES = { 'NAME': 'please', 'USER': 'please', 'PASSWORD': 'unglueit', - 'HOST': 'gluejardb.cboagmr25pjs.us-east-1.rds.amazonaws.com', + 'HOST': 'domU-12-31-39-14-F6-46.compute-1.internal', 'PORT': '', } } From f626ecc772bd393072873e201c9dcd92c614a822 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Wed, 25 Jan 2012 15:40:43 -0800 Subject: [PATCH 02/22] I had to change the instance again. --- settings/please.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/settings/please.py b/settings/please.py index ec8d41b9..b97d4370 100644 --- a/settings/please.py +++ b/settings/please.py @@ -20,7 +20,7 @@ DATABASES = { 'NAME': 'please', 'USER': 'please', 'PASSWORD': 'unglueit', - 'HOST': 'domU-12-31-39-14-F6-46.compute-1.internal', + 'HOST': 'ip-10-204-97-89.ec2.internal', 'PORT': '', } } From 63ce2a34878a57bc6d79e9434a28c5376254c8d7 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Wed, 25 Jan 2012 17:16:53 -0800 Subject: [PATCH 03/22] Trying a user please2 which doesn't require SSL --- settings/please.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/settings/please.py b/settings/please.py index b97d4370..3bea6791 100644 --- a/settings/please.py +++ b/settings/please.py @@ -18,7 +18,7 @@ DATABASES = { 'default': { 'ENGINE': 'django.db.backends.mysql', 'NAME': 'please', - 'USER': 'please', + 'USER': 'please2', 'PASSWORD': 'unglueit', 'HOST': 'ip-10-204-97-89.ec2.internal', 'PORT': '', From 11ba724508288553830a591689d2d90f2b9b7e15 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Wed, 25 Jan 2012 17:19:28 -0800 Subject: [PATCH 04/22] Still trying to get please db parameters correct --- settings/please.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/settings/please.py b/settings/please.py index 3bea6791..784bb832 100644 --- a/settings/please.py +++ b/settings/please.py @@ -20,7 +20,7 @@ DATABASES = { 'NAME': 'please', 'USER': 'please2', 'PASSWORD': 'unglueit', - 'HOST': 'ip-10-204-97-89.ec2.internal', + 'HOST': 'pleasedb.cboagmr25pjs.us-east-1.rds.amazonaws.com', 'PORT': '', } } From 84c37a988a127520c5119d643d7e11660e5f8f16 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Wed, 25 Jan 2012 17:48:58 -0800 Subject: [PATCH 05/22] Turn off the godaddy bundle temporarily --- deploy/please.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/please.conf b/deploy/please.conf index 28992b89..86acdd0a 100644 --- a/deploy/please.conf +++ b/deploy/please.conf @@ -31,7 +31,7 @@ Alias /static /var/www/static SSLEngine on SSLCertificateFile /etc/ssl/certs/server.crt SSLCertificateKeyFile /etc/ssl/private/server.key -SSLCertificateChainFile /etc/ssl/certs/gd_bundle.crt +#SSLCertificateChainFile /etc/ssl/certs/gd_bundle.crt WSGIDaemonProcess regluit-ssl processes=4 threads=4 python-eggs=/tmp/regluit-python-eggs WSGIScriptAlias / /opt/regluit/deploy/regluit.wsgi From 744b751279589951767bebe2ea7141c879ed19f0 Mon Sep 17 00:00:00 2001 From: Ed Summers Date: Thu, 26 Jan 2012 02:12:23 +0000 Subject: [PATCH 06/22] We need separate wsgi file for please --- deploy/please.conf | 4 ++-- deploy/please.wsgi | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 deploy/please.wsgi diff --git a/deploy/please.conf b/deploy/please.conf index 86acdd0a..7a5043e1 100644 --- a/deploy/please.conf +++ b/deploy/please.conf @@ -12,7 +12,7 @@ RewriteRule /admin(.*) https://please.unglueit.com/admin$1 [R=301] RewriteRule /accounts(.*) https://please.unglueit.com/accounts$1 [R=301] WSGIDaemonProcess regluit processes=4 threads=4 python-eggs=/tmp/regluit-python-eggs -WSGIScriptAlias / /opt/regluit/deploy/regluit.wsgi +WSGIScriptAlias / /opt/regluit/deploy/please.wsgi Options Indexes FollowSymLinks @@ -34,7 +34,7 @@ SSLCertificateKeyFile /etc/ssl/private/server.key #SSLCertificateChainFile /etc/ssl/certs/gd_bundle.crt WSGIDaemonProcess regluit-ssl processes=4 threads=4 python-eggs=/tmp/regluit-python-eggs -WSGIScriptAlias / /opt/regluit/deploy/regluit.wsgi +WSGIScriptAlias / /opt/regluit/deploy/please.wsgi Options Indexes FollowSymLinks diff --git a/deploy/please.wsgi b/deploy/please.wsgi new file mode 100644 index 00000000..bfc2b5bd --- /dev/null +++ b/deploy/please.wsgi @@ -0,0 +1,9 @@ +#!/usr/bin/env python + +import os + +import django.core.handlers.wsgi + +os.environ['CELERY_LOADER'] = 'django' +os.environ['DJANGO_SETTINGS_MODULE'] = 'regluit.settings.please' +application = django.core.handlers.wsgi.WSGIHandler() From dcf477c34a45415eddbd776fcdcee23244a59820 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Thu, 26 Jan 2012 07:29:46 -0800 Subject: [PATCH 07/22] Adding a site for ry-dev to initial_data.json Creating a separate celeryd_please.conf to handle please Added SITE_ID to dev.py More instructions for README.md --- README.md | 11 +++++++---- core/fixtures/initial_data.json | 8 ++++++++ deploy/celeryd_please.conf | 11 +++++++++++ settings/dev.py | 3 +++ 4 files changed, 29 insertions(+), 4 deletions(-) create mode 100644 deploy/celeryd_please.conf diff --git a/README.md b/README.md index 8e5ab342..af65726e 100644 --- a/README.md +++ b/README.md @@ -61,14 +61,17 @@ Below are the steps for getting regluit running on EC2 with Apache and mod_wsgi, 1. `pip install -r requirements.pip` 1. `echo "/opt/" > ENV/lib/python2.7/site-packages/regluit.pth` 1. `django-admin.py syncdb --migrate --settings regluit.settings.prod` -1. `mkdir /var/www/static` -1. `chown ubuntu:ubuntu /var/www/static` +1. `sudo mkdir /var/www/static` +1. `sudo chown ubuntu:ubuntu /var/www/static` 1. `django-admin.py collectstatic --settings regluit.settings.prod` 1. `sudo ln -s /opt/regluit/deploy/regluit.conf /etc/apache2/sites-available/regluit` 1. `sudo a2ensite regluit` 1. `sudo a2enmod ssl rewrite` +1. `cd /home/ubuntu` +1. copy SSL server key to `/etc/ssl/private/server.key` +1. copy SSL certificate to `/etc/ssl/certs/server.crt` 1. `sudo /etc/init.d/apache2 restart` -1. `sudo adduser --no-create-home celery --disabled-password --disabled-login` +1. `sudo adduser --no-create-home celery --disabled-password --disabled-login` (just enter return for all?) 1. `sudo cp deploy/celeryd /etc/init.d/celeryd` 1. `sudo chmod 755 /etc/init.d/celeryd` 1. `sudo cp deploy/celeryd.conf /etc/default/celeryd` @@ -76,7 +79,7 @@ Below are the steps for getting regluit running on EC2 with Apache and mod_wsgi, 1. `sudo chown celery:celery /var/log/celery` 1. `sudo /etc/init.d/celeryd start` -OS X Develper Notes +OS X Developer Notes ------------------- To run regluit on OS X you should have XCode installed diff --git a/core/fixtures/initial_data.json b/core/fixtures/initial_data.json index afe77bf9..502cbdc4 100644 --- a/core/fixtures/initial_data.json +++ b/core/fixtures/initial_data.json @@ -23,6 +23,14 @@ "name": "unglue.it local development" } }, + { + "pk": 4, + "model": "sites.site", + "fields": { + "domain": "ry-dev.dyndns.org", + "name": "ry-dev development" + } + }, { "pk": 1, "model": "core.premium", diff --git a/deploy/celeryd_please.conf b/deploy/celeryd_please.conf new file mode 100644 index 00000000..8c41c95a --- /dev/null +++ b/deploy/celeryd_please.conf @@ -0,0 +1,11 @@ +CELERYD_NODES="w1" +CELERYD_CHDIR="/opt/regluit/" +CELERYD_LOG_FILE="/var/log/celery/%n.log" +CELERYD_PID_FILE="/var/log/celery/%n.pid" +CELERYD_USER="celery" +CELERYD_GROUP="celery" +CELERYD="/opt/regluit/ENV/bin/django-admin.py celeryd" +CELERYD_MULTI="/opt/regluit/ENV/bin/django-admin.py celeryd_multi" + +VIRTUALENV_ACTIVATE="/opt/regluit/ENV/bin/activate" +export DJANGO_SETTINGS_MODULE="regluit.settings.please" diff --git a/settings/dev.py b/settings/dev.py index 41f8d9e7..866fba2b 100644 --- a/settings/dev.py +++ b/settings/dev.py @@ -6,6 +6,9 @@ TEMPLATE_DEBUG = DEBUG # if you're doing development work, you'll want this to be zero IS_PREVIEW = False +# SITE_ID for your particular site -- must be configured in /core/fixtures/initial_data.json +SITE_ID = 3 + ADMINS = ( ('Ed Summers', 'ehs@pobox.com'), ) From e4337b5986e38d4d209261e35d1b7f7bff6201a4 Mon Sep 17 00:00:00 2001 From: Ed Summers Date: Thu, 26 Jan 2012 15:53:59 +0000 Subject: [PATCH 08/22] Change update-regluit to refer to settings.please not settings.prod --- deploy/update-regluit | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/update-regluit b/deploy/update-regluit index 8905787b..8523bd15 100755 --- a/deploy/update-regluit +++ b/deploy/update-regluit @@ -9,9 +9,9 @@ cd /opt/regluit sudo -u ubuntu /usr/bin/git pull source ENV/bin/activate -pip install -r requirements.pip -django-admin.py syncdb --migrate --settings regluit.settings.prod -django-admin.py collectstatic --noinput --settings regluit.settings.prod +#pip install -r requirements.pip +django-admin.py syncdb --migrate --settings regluit.settings.please +django-admin.py collectstatic --noinput --settings regluit.settings.please sudo /etc/init.d/apache2 restart sudo /etc/init.d/celeryd restart touch /opt/regluit/deploy/last-update From 01ae5011033bd5a1a83a30431ab43674ea14e46c Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Thu, 26 Jan 2012 09:24:35 -0800 Subject: [PATCH 09/22] Noted that the instructions for setting up please are slightly different from production -- details to come. --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index af65726e..7e9590f5 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,7 @@ Production Deployment --------------------- Below are the steps for getting regluit running on EC2 with Apache and mod_wsgi, and talking to an Amazon Relational Data Store instance. +Instructions for setting please are slightly different. 1. create an ubuntu ec2 instance (e.g, go http://alestic.com/ to find various ubuntu images) 1. `sudo aptitude update` From 9500fb2b8f25a7fd948ef32fe124b2900fea3cee Mon Sep 17 00:00:00 2001 From: eric Date: Thu, 19 Jan 2012 19:57:51 -0500 Subject: [PATCH 10/22] fixed bug where first_ebook() is always None --- core/models.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/core/models.py b/core/models.py index fe684ca3..e21a3094 100755 --- a/core/models.py +++ b/core/models.py @@ -323,7 +323,7 @@ class Work(models.Model): if campaign: status = campaign.status else: - if self.first_ebook() or self.first_epub(): + if self.first_ebook(): status = "Available" else: status = "No campaign yet" @@ -386,9 +386,13 @@ class Work(models.Model): return None def first_ebook(self, ebook_format=None): - for ebook in Ebook.objects.filter(edition__work=self, - format=ebook_format): - return ebook + if ebook_format: + for ebook in Ebook.objects.filter(edition__work=self, + format=ebook_format): + return ebook + else: + for ebook in Ebook.objects.filter(edition__work=self): + return ebook return None def wished_by(self): @@ -462,8 +466,8 @@ class Edition(models.Model): def cover_image_thumbnail(self): if self.googlebooks_id: - server_id = random.randint(0, 9) - return "http://bks%s.books.google.com/books?id=%s&printsec=frontcover&img=1&zoom=1" % (server_id, self.googlebooks_id) + server_id = random.randint(0, 9) + return "http://bks%s.books.google.com/books?id=%s&printsec=frontcover&img=1&zoom=1" % (server_id, self.googlebooks_id) else: return '' From 28e18229cd079f5f40a4628289e3c26e7d583fc1 Mon Sep 17 00:00:00 2001 From: eric Date: Thu, 19 Jan 2012 21:11:28 -0500 Subject: [PATCH 11/22] fixed broken image on about page, tweaked bios. --- frontend/templates/about.html | 16 +++++----------- .../headshots/{amanda.JPG => amanda2.jpg} | Bin 2 files changed, 5 insertions(+), 11 deletions(-) rename static/images/headshots/{amanda.JPG => amanda2.jpg} (100%) diff --git a/frontend/templates/about.html b/frontend/templates/about.html index e8e27246..1a234308 100644 --- a/frontend/templates/about.html +++ b/frontend/templates/about.html @@ -16,11 +16,11 @@
-
Eric Hellman, President of Gluejar, is a technologist, entrepreneur, and writer. After 10 years at Bell Labs in physics research, Eric became interested in technologies surrounding e-journals and libraries. His first business, Openly Informatics, developed OpenURL linking software and knowledgebases, and was acquired by OCLC in 1996. At OCLC, he led the effort to productize and expand the xISBN service, and began the development of OCLC's Electronic Resource Management offerings. After leaving OCLC, Eric began blogging at Go To Hellman. He covers the intersection of technology, libraries and ebooks, and has written extensively on the Semantic Web and Linked Data.
+
Eric Hellman, President of Gluejar, is a technologist, entrepreneur, and writer. After 10 years at Bell Labs in physics research, Eric became interested in technologies surrounding e-journals and libraries. His first business, Openly Informatics, developed OpenURL linking software and knowledgebases, and was acquired by OCLC in 1996. At OCLC, he led the effort to productize and expand the xISBN service, and began the development of OCLC's Electronic Resource Management offerings. After leaving OCLC, Eric began blogging at Go To Hellman. He covers the intersection of technology, libraries and ebooks, and has written extensively on the Semantic Web and Linked Data. Eric has a B.S.E. from Princeton University, and a Ph. D. in Electrical Engineering from Stanford University
-
+
Amanda Mecke is an expert in literary rights management. Before founding her own literary agency, Amanda was VP, Director of Subsidiary Rights for Bantam Dell, a division of Random House Inc. from 1989-2003, where she led a department that sold international and domestic book rights and pioneered early electronic licenses for subscription databases, CD-ROMs, audiobooks, and ebooks. She was also a co-leader of the Random House/SAP Contracts and Royalties software development team. Prior to joining Bantam Dell, Amanda ran the New York marketing office of the University of California Press. While there she served the board of the American Association of University Presses and was President of Women in Scholarly Publishing. Amanda has been a speaker at the Frankfurt Book Messe Rights Workshop, NYU Summer Publishing Program, American Independent Writers conference, and the International Women’s Writers Guild. She has a B.A. from Pitzer College, Claremont, California and a Ph.D. in English from UCLA. Amanda will continue to represent original work by her literary agency clients.

Although our founding team will be be playing many roles at once, Amanda will be spending much of her time reaching out to rights holders and identifying works that will attract financial support from book lovers who want to see the ebooks available for free to anyone, anywhere. Her experience in both trade and academic publishing, together with her keen insight into the world of book rights, stood her above a lot of great people who expressed interest in working for Gluejar.
@@ -41,18 +41,12 @@
-
-
Stefan Fabry made the site pretty. His bio is TBA.
+
Design Anthem helped us make the site pretty.
-
-
Jason Kace wrote code. His bio is TBA.
-
- -
-
-
Ed Summers wrote more code. His bio is TBA.
+
Jason Kace wrote code. Ed Summers wrote some more code. Both of them helped us write even more code. +
{% endblock %} \ No newline at end of file diff --git a/static/images/headshots/amanda.JPG b/static/images/headshots/amanda2.jpg similarity index 100% rename from static/images/headshots/amanda.JPG rename to static/images/headshots/amanda2.jpg From 6fffbcd3d01609d34c03aac834c6308a8e5c2395 Mon Sep 17 00:00:00 2001 From: eric Date: Fri, 27 Jan 2012 09:35:00 -0500 Subject: [PATCH 12/22] added guard against concurrent threads adding same googlebooks id --- core/bookloader.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/core/bookloader.py b/core/bookloader.py index 3de03866..7c64fcea 100755 --- a/core/bookloader.py +++ b/core/bookloader.py @@ -163,13 +163,21 @@ def add_by_googlebooks_id(googlebooks_id, work=None, results=None): isbn = i['identifier'] # now check to see if there's an existing Work - if not work: + if isbn and not work: work = get_work_by_id(type='isbn',value=isbn) if not work: work = models.Work.objects.create(title=d['title'], language=language) work.new = True work.save() + + # going off to google can take some time, so we want to make sure this edition has not + # been created in another thread while we were waiting + try: + return models.Identifier.objects.get(type='goog', value=googlebooks_id).edition + except models.Identifier.DoesNotExist: + pass + # because this is a new google id, we have to create a new edition e = models.Edition(work=work) From a7d867bbf8f1b5d126a22663621871c21d71e898 Mon Sep 17 00:00:00 2001 From: eric Date: Thu, 19 Jan 2012 23:20:06 -0500 Subject: [PATCH 13/22] populate edition was never running for works imported from library thing. started saving lccn. 186 errors today --- core/librarything.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/core/librarything.py b/core/librarything.py index af49ec3d..dfd2c6af 100644 --- a/core/librarything.py +++ b/core/librarything.py @@ -201,6 +201,7 @@ def load_librarything_into_wishlist(user, lt_username, max_books=None): """ from regluit.core import bookloader + from regluit.core import tasks from itertools import islice logger.info("Entering into load_librarything_into_wishlist") @@ -217,10 +218,11 @@ def load_librarything_into_wishlist(user, lt_username, max_books=None): # add the librarything ids to the db since we know them now identifier= models.Identifier.get_or_add(type = 'thng', value = book['book_id'], edition = edition, work = edition.work) identifier= models.Identifier.get_or_add(type = 'ltwk', value = book['work_id'], work = edition.work) - + if book['lc_call_number']: + identifier= models.Identifier.get_or_add(type = 'lccn', value = book['lc_call_number'], edition = edition, work = edition.work) user.wishlist.add_work(edition.work, 'librarything') if edition.new: - regluit.core.tasks.populate_edition.delay(edition) + tasks.populate_edition.delay(edition) logger.info("Work with isbn %s added to wishlist.", isbn) except Exception, e: logger.info ("error adding ISBN %s: %s", isbn, e) From b3bfa2edc2900e2b10ea0543813aae6449606401 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Fri, 27 Jan 2012 15:45:58 +0000 Subject: [PATCH 14/22] Added a site for ry-dyndns --- core/fixtures/initial_data.json | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/core/fixtures/initial_data.json b/core/fixtures/initial_data.json index afe77bf9..502cbdc4 100644 --- a/core/fixtures/initial_data.json +++ b/core/fixtures/initial_data.json @@ -23,6 +23,14 @@ "name": "unglue.it local development" } }, + { + "pk": 4, + "model": "sites.site", + "fields": { + "domain": "ry-dev.dyndns.org", + "name": "ry-dev development" + } + }, { "pk": 1, "model": "core.premium", From 5fba8be6a69aaba773bc6b60c6c4e2b85c84be0b Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Fri, 27 Jan 2012 16:16:46 -0800 Subject: [PATCH 15/22] Changed the URL so that "all collections" loaded from Librarything, not just "my library" In add_related, make sure edition is not None before trying to add related editions A demo test --- core/bookloader.py | 6 +++++- core/librarything.py | 10 ++++++++-- test/booktests.py | 17 +++++++++++++++++ 3 files changed, 30 insertions(+), 3 deletions(-) create mode 100644 test/booktests.py diff --git a/core/bookloader.py b/core/bookloader.py index 3de03866..bcdc895b 100755 --- a/core/bookloader.py +++ b/core/bookloader.py @@ -215,11 +215,15 @@ def add_related(isbn): """ # make sure the seed edition is there logger.info("adding related editions for %s", isbn) + + new_editions = [] + edition = add_by_isbn(isbn) + if edition is None: + return new_editions # this is the work everything will hang off work = edition.work - new_editions = [] other_editions = {} for other_isbn in thingisbn(isbn): # 979's come back as 13 diff --git a/core/librarything.py b/core/librarything.py index dfd2c6af..e18d52e0 100644 --- a/core/librarything.py +++ b/core/librarything.py @@ -131,6 +131,9 @@ class LibraryThing(object): # isbn try: book_data["isbn"] = cols[4].xpath('.//span')[0].text + # check for   + if book_data["isbn"] == u'\xA0': + book_data["isbn"] = None except Exception, e: book_data["isbn"] = None @@ -143,6 +146,8 @@ class LibraryThing(object): # we can vary viewstyle to get different info IMPLEMENTED_STYLES = [1,5] + COLLECTION = 2 # set to get All Collections + if view_style not in IMPLEMENTED_STYLES: raise NotImplementedError() style_parser = getattr(self,"viewstyle_%s" % view_style) @@ -151,8 +156,8 @@ class LibraryThing(object): cookies = None while next_page: - url = "http://www.librarything.com/catalog_bottom.php?view=%s&viewstyle=%d&offset=%d" % (self.username, - view_style, offset) + url = "http://www.librarything.com/catalog_bottom.php?view=%s&viewstyle=%d&collection=%d&offset=%d" % (self.username, + view_style, COLLECTION, offset) logger.info("url: %s", url) if cookies is None: r = requests.get(url) @@ -163,6 +168,7 @@ class LibraryThing(object): raise LibraryThingException("Error accessing %s: %s" % (url, e)) logger.info("Error accessing %s: %s", url, e) etree = html.fromstring(r.content) + #logger.info("r.content %s", r.content) cookies = r.cookies # retain the cookies # look for a page bar diff --git a/test/booktests.py b/test/booktests.py new file mode 100644 index 00000000..a13d9257 --- /dev/null +++ b/test/booktests.py @@ -0,0 +1,17 @@ +from regluit.core import librarything, bookloader +import itertools + +def ry_lt_books(): + lt = librarything.LibraryThing('rdhyee') + books = lt.parse_user_catalog(view_style=5) + return books + +def editions_for_lt(books): + editions = [bookloader.add_by_isbn(b["isbn"]) for b in books] + return editions + +def ry_lt_unloaded(): + books = list(ry_lt_books()) + editions = editions_for_lt(books) + unloaded_books = [b for (b, ed) in itertools.izip(books, editions) if ed is None] + return unloaded_books \ No newline at end of file From 069985e02c4eba1763fbfc3d936cca6ba2ad4bc5 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Fri, 27 Jan 2012 17:06:10 -0800 Subject: [PATCH 16/22] Comment on the "tests" --- test/booktests.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/test/booktests.py b/test/booktests.py index a13d9257..0d4c3799 100644 --- a/test/booktests.py +++ b/test/booktests.py @@ -1,17 +1,28 @@ from regluit.core import librarything, bookloader import itertools +import django def ry_lt_books(): + """return parsing of rdhyee's LibraryThing collection""" lt = librarything.LibraryThing('rdhyee') books = lt.parse_user_catalog(view_style=5) return books def editions_for_lt(books): + """return the Editions that correspond to the list of LibraryThing books""" editions = [bookloader.add_by_isbn(b["isbn"]) for b in books] return editions -def ry_lt_unloaded(): +def ry_lt_not_loaded(): + """Calculate which of the books on rdhyee's librarything list don't yield Editions""" books = list(ry_lt_books()) editions = editions_for_lt(books) - unloaded_books = [b for (b, ed) in itertools.izip(books, editions) if ed is None] - return unloaded_books \ No newline at end of file + not_loaded_books = [b for (b, ed) in itertools.izip(books, editions) if ed is None] + return not_loaded_books + +def ry_wish_list_equal_loadable_lt_books(): + """returnwhether the set of works in the user's wishlist is the same as the works in a user's loadable editions from LT""" + editions = editions_for_lt(ry_lt_books()) + # assume only one user -- and that we have run a LT book loading process for that user + ry = django.contrib.auth.models.User.objects.all()[0] + return set([ed.work for ed in filter(None, editions)]) == set(ry.wishlist.works.all()) \ No newline at end of file From be988f43f3eb634781073785a0505a7a08a7f587 Mon Sep 17 00:00:00 2001 From: eric Date: Fri, 27 Jan 2012 21:18:00 -0500 Subject: [PATCH 17/22] minor Librarything tweaks --- core/librarything.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/core/librarything.py b/core/librarything.py index dfd2c6af..9eccea25 100644 --- a/core/librarything.py +++ b/core/librarything.py @@ -119,7 +119,7 @@ class LibraryThing(object): try: book_data["lc_call_number"] = cols[2].xpath('.//span')[0].text except Exception, e: - logger.info("book lc call number exception: %s %s", book_data["title"], e) + logger.info("no lc call number for: %s %s", book_data["title"], e) book_data["lc_call_number"] = None # subject @@ -197,7 +197,7 @@ class LibraryThing(object): def load_librarything_into_wishlist(user, lt_username, max_books=None): """ - Load a specified Goodreads shelf (by default: all the books from the Goodreads account associated with user) + Load a specified LibraryThing shelf (by default: all the books from the LibraryThing account associated with user) """ from regluit.core import bookloader @@ -212,6 +212,8 @@ def load_librarything_into_wishlist(user, lt_username, max_books=None): isbn = book["isbn"] # grab the first one logger.info("%d %s %s", i, book["title"]["title"], isbn) try: + if not isbn: + continue edition = bookloader.add_by_isbn(isbn) if not edition: continue From efa8da19ff10fc8dfa250c7d75dd80ec2853777c Mon Sep 17 00:00:00 2001 From: eric Date: Fri, 27 Jan 2012 21:44:02 -0500 Subject: [PATCH 18/22] pass isbn to add_by_googlebooks_id to address occasional missing isbns --- core/bookloader.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/core/bookloader.py b/core/bookloader.py index 4f603fcd..1806e0f5 100755 --- a/core/bookloader.py +++ b/core/bookloader.py @@ -105,7 +105,7 @@ def add_by_isbn_from_google(isbn, work=None): return None try: - return add_by_googlebooks_id(results['items'][0]['id'], work=work, results=results['items'][0]) + return add_by_googlebooks_id(results['items'][0]['id'], work=work, results=results['items'][0], isbn=isbn) except LookupFailure, e: logger.exception("failed to add edition for %s", isbn) except IntegrityError, e: @@ -127,10 +127,10 @@ def get_edition_by_id(type,value): return None -def add_by_googlebooks_id(googlebooks_id, work=None, results=None): +def add_by_googlebooks_id(googlebooks_id, work=None, results=None, isbn=None): """add a book to the UnglueIt database based on the GoogleBooks ID. The work parameter is optional, and if not supplied the edition will be - associated with a stub work. + associated with a stub work. isbn can be passed because sometimes passed data won't include it """ # don't ping google again if we already know about the edition @@ -155,12 +155,13 @@ def add_by_googlebooks_id(googlebooks_id, work=None, results=None): logger.info("not connecting %s since it is %s instead of %s" % (googlebooks_id, language, work.language)) work = None - isbn = None - for i in d.get('industryIdentifiers', []): - if i['type'] == 'ISBN_10' and not isbn: - isbn = regluit.core.isbn.convert_10_to_13(i['identifier']) - elif i['type'] == 'ISBN_13': - isbn = i['identifier'] + # isbn = None + if not isbn: + for i in d.get('industryIdentifiers', []): + if i['type'] == 'ISBN_10' and not isbn: + isbn = regluit.core.isbn.convert_10_to_13(i['identifier']) + elif i['type'] == 'ISBN_13': + isbn = i['identifier'] # now check to see if there's an existing Work if isbn and not work: From 4259e055d0960528b4387503950887aca74b1f67 Mon Sep 17 00:00:00 2001 From: eric Date: Fri, 27 Jan 2012 22:09:58 -0500 Subject: [PATCH 19/22] management command to remove orphans (works with no ids pointing at them) --- .../management/commands/remove_orphan_works.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 core/management/commands/remove_orphan_works.py diff --git a/core/management/commands/remove_orphan_works.py b/core/management/commands/remove_orphan_works.py new file mode 100644 index 00000000..ce1adbf1 --- /dev/null +++ b/core/management/commands/remove_orphan_works.py @@ -0,0 +1,18 @@ +# no, not that kind of orphan works. removes works with no connected identifiers. + +from django.core.management.base import BaseCommand + +from regluit.core import models + +class Command(BaseCommand): + help = "removes works with no connected identifiers" + + def handle(self, **options): + numworks=0 + deleted=0 + for work in models.Work.objects.all(): + if work.identifiers.count()==0: + work.delete() + deleted=deleted+1 + numworks=numworks+1 + print "%s deleted from %s total" % (deleted, numworks) From 85366cc21a21b085e1071e3b8cf30c2b4fc21c61 Mon Sep 17 00:00:00 2001 From: eric Date: Sat, 28 Jan 2012 22:16:14 -0500 Subject: [PATCH 20/22] handle locale in language --- core/bookloader.py | 2 ++ core/tests.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/core/bookloader.py b/core/bookloader.py index 1806e0f5..490a860f 100755 --- a/core/bookloader.py +++ b/core/bookloader.py @@ -151,6 +151,8 @@ def add_by_googlebooks_id(googlebooks_id, work=None, results=None, isbn=None): # don't add the edition to a work with a different language # https://www.pivotaltracker.com/story/show/17234433 language = d['language'] + if len(language)>2: + language= language[0:2] if work and work.language != language: logger.info("not connecting %s since it is %s instead of %s" % (googlebooks_id, language, work.language)) diff --git a/core/tests.py b/core/tests.py index 3d547c1f..c68c9e0f 100755 --- a/core/tests.py +++ b/core/tests.py @@ -40,6 +40,9 @@ class BookLoaderTests(TestCase): # work self.assertTrue(edition.work) + # locale in language + edition = bookloader.add_by_isbn('9787500676911') + self.assertEqual(edition.work.language, 'zh') def test_double_add(self): bookloader.add_by_isbn('0441012035') From b92ce71ea0357985d827947fdb110e7322c3a489 Mon Sep 17 00:00:00 2001 From: eric Date: Sat, 28 Jan 2012 22:17:11 -0500 Subject: [PATCH 21/22] make load_wishlist safer --- core/management/commands/load_wishlist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/management/commands/load_wishlist.py b/core/management/commands/load_wishlist.py index 5ca96a00..14f452ca 100644 --- a/core/management/commands/load_wishlist.py +++ b/core/management/commands/load_wishlist.py @@ -13,8 +13,8 @@ class Command(BaseCommand): for isbn in open(filename): isbn = isbn.strip() edition = bookloader.add_by_isbn(isbn) - bookloader.add_related(isbn) if edition: + bookloader.add_related(isbn) user.wishlist.add_work(edition.work, source="user") print "loaded %s as %s for %s" % (isbn, edition, user) else: From 8dcca8c1e77d7f6f64ea6bf5c043e85c2b9bb7dc Mon Sep 17 00:00:00 2001 From: eric Date: Sat, 28 Jan 2012 22:17:58 -0500 Subject: [PATCH 22/22] deprecated managemetn command --- core/management/commands/random_campaigns.py~ | 38 ------------------- 1 file changed, 38 deletions(-) delete mode 100644 core/management/commands/random_campaigns.py~ diff --git a/core/management/commands/random_campaigns.py~ b/core/management/commands/random_campaigns.py~ deleted file mode 100644 index 5787a201..00000000 --- a/core/management/commands/random_campaigns.py~ +++ /dev/null @@ -1,38 +0,0 @@ -from decimal import Decimal -from random import randint, randrange -from datetime import datetime, timedelta - -from django.core.management.base import BaseCommand - -from regluit.core.models import Work, Campaign - -class Command(BaseCommand): - help = "creates random campaigns for any works that lack one for testing" - - def handle(self, *args, **options): - for work in Work.objects.all(): - if work.campaigns.all().count() > 0: - continue - campaign = Campaign() - campaign.name = work.title - campaign.work = work - campaign.description = "Test Campaign" - - # random campaign target between $200 and $10,000 - campaign.target = float(randint(200,10000)) - - # random deadline between 5 days from now and 180 days from now - now = datetime.now() - campaign.deadline = random_date(now + timedelta(days=5), - now + timedelta(days=180)) - - campaign.save() - print "created %s" % campaign - - -def random_date(start, end): - delta = end - start - int_delta = (delta.days * 24 * 60 * 60) + delta.seconds - random_second = randrange(int_delta) - return (start + timedelta(seconds=random_second)) -