diff --git a/README.md b/README.md index e60957d5..7e9590f5 100644 --- a/README.md +++ b/README.md @@ -37,8 +37,9 @@ Production Deployment --------------------- Below are the steps for getting regluit running on EC2 with Apache and mod_wsgi, and talking to an Amazon Relational Data Store instance. +Instructions for setting please are slightly different. -1. create an ubuntu LTS ec2 instance +1. create an ubuntu ec2 instance (e.g, go http://alestic.com/ to find various ubuntu images) 1. `sudo aptitude update` 1. `sudo aptitude upgrade` 1. `sudo aptitude install git-core apache libapache2-mod-wsgi mysql-client python-virtualenv python-mysqldb redis-server python-lxml postfix` @@ -54,21 +55,24 @@ Below are the steps for getting regluit running on EC2 with Apache and mod_wsgi, 1. create an Amazon RDS instance 1. connect to it, e.g. `mysql -u root -h gluejardb.cboagmr25pjs.us-east-1.rds.amazonaws.com -p` 1. `CREATE DATABASE unglueit CHARSET utf8;` -1. `GRANT ALL ON unglueit.\* TO ‘unglueit’@’ip-10-244-250-168.ec2.internal’ IDENTIFIED BY 'unglueit' REQUIRE SSL` +1. `GRANT ALL ON unglueit.\* TO ‘unglueit’@’ip-10-244-250-168.ec2.internal’ IDENTIFIED BY 'unglueit' REQUIRE SSL;` 1. update settings/prod.py with database credentials 1. `virtualenv ENV` 1. `source ENV/bin/activate` 1. `pip install -r requirements.pip` 1. `echo "/opt/" > ENV/lib/python2.7/site-packages/regluit.pth` 1. `django-admin.py syncdb --migrate --settings regluit.settings.prod` -1. `mkdir /var/www/static` -1. `chown ubuntu:ubuntu /var/www/static` +1. `sudo mkdir /var/www/static` +1. `sudo chown ubuntu:ubuntu /var/www/static` 1. `django-admin.py collectstatic --settings regluit.settings.prod` 1. `sudo ln -s /opt/regluit/deploy/regluit.conf /etc/apache2/sites-available/regluit` 1. `sudo a2ensite regluit` 1. `sudo a2enmod ssl rewrite` +1. `cd /home/ubuntu` +1. copy SSL server key to `/etc/ssl/private/server.key` +1. copy SSL certificate to `/etc/ssl/certs/server.crt` 1. `sudo /etc/init.d/apache2 restart` -1. `sudo adduser --no-create-home celery --disabled-password --disabled-login` +1. `sudo adduser --no-create-home celery --disabled-password --disabled-login` (just enter return for all?) 1. `sudo cp deploy/celeryd /etc/init.d/celeryd` 1. `sudo chmod 755 /etc/init.d/celeryd` 1. `sudo cp deploy/celeryd.conf /etc/default/celeryd` @@ -76,7 +80,7 @@ Below are the steps for getting regluit running on EC2 with Apache and mod_wsgi, 1. `sudo chown celery:celery /var/log/celery` 1. `sudo /etc/init.d/celeryd start` -OS X Develper Notes +OS X Developer Notes ------------------- To run regluit on OS X you should have XCode installed diff --git a/core/bookloader.py b/core/bookloader.py index 3de03866..490a860f 100755 --- a/core/bookloader.py +++ b/core/bookloader.py @@ -105,7 +105,7 @@ def add_by_isbn_from_google(isbn, work=None): return None try: - return add_by_googlebooks_id(results['items'][0]['id'], work=work, results=results['items'][0]) + return add_by_googlebooks_id(results['items'][0]['id'], work=work, results=results['items'][0], isbn=isbn) except LookupFailure, e: logger.exception("failed to add edition for %s", isbn) except IntegrityError, e: @@ -127,10 +127,10 @@ def get_edition_by_id(type,value): return None -def add_by_googlebooks_id(googlebooks_id, work=None, results=None): +def add_by_googlebooks_id(googlebooks_id, work=None, results=None, isbn=None): """add a book to the UnglueIt database based on the GoogleBooks ID. The work parameter is optional, and if not supplied the edition will be - associated with a stub work. + associated with a stub work. isbn can be passed because sometimes passed data won't include it """ # don't ping google again if we already know about the edition @@ -151,25 +151,36 @@ def add_by_googlebooks_id(googlebooks_id, work=None, results=None): # don't add the edition to a work with a different language # https://www.pivotaltracker.com/story/show/17234433 language = d['language'] + if len(language)>2: + language= language[0:2] if work and work.language != language: logger.info("not connecting %s since it is %s instead of %s" % (googlebooks_id, language, work.language)) work = None - isbn = None - for i in d.get('industryIdentifiers', []): - if i['type'] == 'ISBN_10' and not isbn: - isbn = regluit.core.isbn.convert_10_to_13(i['identifier']) - elif i['type'] == 'ISBN_13': - isbn = i['identifier'] + # isbn = None + if not isbn: + for i in d.get('industryIdentifiers', []): + if i['type'] == 'ISBN_10' and not isbn: + isbn = regluit.core.isbn.convert_10_to_13(i['identifier']) + elif i['type'] == 'ISBN_13': + isbn = i['identifier'] # now check to see if there's an existing Work - if not work: + if isbn and not work: work = get_work_by_id(type='isbn',value=isbn) if not work: work = models.Work.objects.create(title=d['title'], language=language) work.new = True work.save() + + # going off to google can take some time, so we want to make sure this edition has not + # been created in another thread while we were waiting + try: + return models.Identifier.objects.get(type='goog', value=googlebooks_id).edition + except models.Identifier.DoesNotExist: + pass + # because this is a new google id, we have to create a new edition e = models.Edition(work=work) @@ -215,11 +226,15 @@ def add_related(isbn): """ # make sure the seed edition is there logger.info("adding related editions for %s", isbn) + + new_editions = [] + edition = add_by_isbn(isbn) + if edition is None: + return new_editions # this is the work everything will hang off work = edition.work - new_editions = [] other_editions = {} for other_isbn in thingisbn(isbn): # 979's come back as 13 diff --git a/core/fixtures/initial_data.json b/core/fixtures/initial_data.json index afe77bf9..502cbdc4 100644 --- a/core/fixtures/initial_data.json +++ b/core/fixtures/initial_data.json @@ -23,6 +23,14 @@ "name": "unglue.it local development" } }, + { + "pk": 4, + "model": "sites.site", + "fields": { + "domain": "ry-dev.dyndns.org", + "name": "ry-dev development" + } + }, { "pk": 1, "model": "core.premium", diff --git a/core/librarything.py b/core/librarything.py index dfd2c6af..d22041f1 100644 --- a/core/librarything.py +++ b/core/librarything.py @@ -119,7 +119,7 @@ class LibraryThing(object): try: book_data["lc_call_number"] = cols[2].xpath('.//span')[0].text except Exception, e: - logger.info("book lc call number exception: %s %s", book_data["title"], e) + logger.info("no lc call number for: %s %s", book_data["title"], e) book_data["lc_call_number"] = None # subject @@ -131,6 +131,9 @@ class LibraryThing(object): # isbn try: book_data["isbn"] = cols[4].xpath('.//span')[0].text + # check for   + if book_data["isbn"] == u'\xA0': + book_data["isbn"] = None except Exception, e: book_data["isbn"] = None @@ -143,6 +146,8 @@ class LibraryThing(object): # we can vary viewstyle to get different info IMPLEMENTED_STYLES = [1,5] + COLLECTION = 2 # set to get All Collections + if view_style not in IMPLEMENTED_STYLES: raise NotImplementedError() style_parser = getattr(self,"viewstyle_%s" % view_style) @@ -151,8 +156,8 @@ class LibraryThing(object): cookies = None while next_page: - url = "http://www.librarything.com/catalog_bottom.php?view=%s&viewstyle=%d&offset=%d" % (self.username, - view_style, offset) + url = "http://www.librarything.com/catalog_bottom.php?view=%s&viewstyle=%d&collection=%d&offset=%d" % (self.username, + view_style, COLLECTION, offset) logger.info("url: %s", url) if cookies is None: r = requests.get(url) @@ -163,6 +168,7 @@ class LibraryThing(object): raise LibraryThingException("Error accessing %s: %s" % (url, e)) logger.info("Error accessing %s: %s", url, e) etree = html.fromstring(r.content) + #logger.info("r.content %s", r.content) cookies = r.cookies # retain the cookies # look for a page bar @@ -197,7 +203,7 @@ class LibraryThing(object): def load_librarything_into_wishlist(user, lt_username, max_books=None): """ - Load a specified Goodreads shelf (by default: all the books from the Goodreads account associated with user) + Load a specified LibraryThing shelf (by default: all the books from the LibraryThing account associated with user) """ from regluit.core import bookloader @@ -212,6 +218,8 @@ def load_librarything_into_wishlist(user, lt_username, max_books=None): isbn = book["isbn"] # grab the first one logger.info("%d %s %s", i, book["title"]["title"], isbn) try: + if not isbn: + continue edition = bookloader.add_by_isbn(isbn) if not edition: continue diff --git a/core/management/commands/load_wishlist.py b/core/management/commands/load_wishlist.py index 5ca96a00..14f452ca 100644 --- a/core/management/commands/load_wishlist.py +++ b/core/management/commands/load_wishlist.py @@ -13,8 +13,8 @@ class Command(BaseCommand): for isbn in open(filename): isbn = isbn.strip() edition = bookloader.add_by_isbn(isbn) - bookloader.add_related(isbn) if edition: + bookloader.add_related(isbn) user.wishlist.add_work(edition.work, source="user") print "loaded %s as %s for %s" % (isbn, edition, user) else: diff --git a/core/management/commands/random_campaigns.py~ b/core/management/commands/random_campaigns.py~ deleted file mode 100644 index 5787a201..00000000 --- a/core/management/commands/random_campaigns.py~ +++ /dev/null @@ -1,38 +0,0 @@ -from decimal import Decimal -from random import randint, randrange -from datetime import datetime, timedelta - -from django.core.management.base import BaseCommand - -from regluit.core.models import Work, Campaign - -class Command(BaseCommand): - help = "creates random campaigns for any works that lack one for testing" - - def handle(self, *args, **options): - for work in Work.objects.all(): - if work.campaigns.all().count() > 0: - continue - campaign = Campaign() - campaign.name = work.title - campaign.work = work - campaign.description = "Test Campaign" - - # random campaign target between $200 and $10,000 - campaign.target = float(randint(200,10000)) - - # random deadline between 5 days from now and 180 days from now - now = datetime.now() - campaign.deadline = random_date(now + timedelta(days=5), - now + timedelta(days=180)) - - campaign.save() - print "created %s" % campaign - - -def random_date(start, end): - delta = end - start - int_delta = (delta.days * 24 * 60 * 60) + delta.seconds - random_second = randrange(int_delta) - return (start + timedelta(seconds=random_second)) - diff --git a/core/management/commands/remove_orphan_works.py b/core/management/commands/remove_orphan_works.py new file mode 100644 index 00000000..ce1adbf1 --- /dev/null +++ b/core/management/commands/remove_orphan_works.py @@ -0,0 +1,18 @@ +# no, not that kind of orphan works. removes works with no connected identifiers. + +from django.core.management.base import BaseCommand + +from regluit.core import models + +class Command(BaseCommand): + help = "removes works with no connected identifiers" + + def handle(self, **options): + numworks=0 + deleted=0 + for work in models.Work.objects.all(): + if work.identifiers.count()==0: + work.delete() + deleted=deleted+1 + numworks=numworks+1 + print "%s deleted from %s total" % (deleted, numworks) diff --git a/core/tests.py b/core/tests.py index 3d547c1f..c68c9e0f 100755 --- a/core/tests.py +++ b/core/tests.py @@ -40,6 +40,9 @@ class BookLoaderTests(TestCase): # work self.assertTrue(edition.work) + # locale in language + edition = bookloader.add_by_isbn('9787500676911') + self.assertEqual(edition.work.language, 'zh') def test_double_add(self): bookloader.add_by_isbn('0441012035') diff --git a/deploy/celeryd_please.conf b/deploy/celeryd_please.conf new file mode 100644 index 00000000..8c41c95a --- /dev/null +++ b/deploy/celeryd_please.conf @@ -0,0 +1,11 @@ +CELERYD_NODES="w1" +CELERYD_CHDIR="/opt/regluit/" +CELERYD_LOG_FILE="/var/log/celery/%n.log" +CELERYD_PID_FILE="/var/log/celery/%n.pid" +CELERYD_USER="celery" +CELERYD_GROUP="celery" +CELERYD="/opt/regluit/ENV/bin/django-admin.py celeryd" +CELERYD_MULTI="/opt/regluit/ENV/bin/django-admin.py celeryd_multi" + +VIRTUALENV_ACTIVATE="/opt/regluit/ENV/bin/activate" +export DJANGO_SETTINGS_MODULE="regluit.settings.please" diff --git a/deploy/please.conf b/deploy/please.conf index 28992b89..7a5043e1 100644 --- a/deploy/please.conf +++ b/deploy/please.conf @@ -12,7 +12,7 @@ RewriteRule /admin(.*) https://please.unglueit.com/admin$1 [R=301] RewriteRule /accounts(.*) https://please.unglueit.com/accounts$1 [R=301] WSGIDaemonProcess regluit processes=4 threads=4 python-eggs=/tmp/regluit-python-eggs -WSGIScriptAlias / /opt/regluit/deploy/regluit.wsgi +WSGIScriptAlias / /opt/regluit/deploy/please.wsgi Options Indexes FollowSymLinks @@ -31,10 +31,10 @@ Alias /static /var/www/static SSLEngine on SSLCertificateFile /etc/ssl/certs/server.crt SSLCertificateKeyFile /etc/ssl/private/server.key -SSLCertificateChainFile /etc/ssl/certs/gd_bundle.crt +#SSLCertificateChainFile /etc/ssl/certs/gd_bundle.crt WSGIDaemonProcess regluit-ssl processes=4 threads=4 python-eggs=/tmp/regluit-python-eggs -WSGIScriptAlias / /opt/regluit/deploy/regluit.wsgi +WSGIScriptAlias / /opt/regluit/deploy/please.wsgi Options Indexes FollowSymLinks diff --git a/deploy/please.wsgi b/deploy/please.wsgi new file mode 100644 index 00000000..bfc2b5bd --- /dev/null +++ b/deploy/please.wsgi @@ -0,0 +1,9 @@ +#!/usr/bin/env python + +import os + +import django.core.handlers.wsgi + +os.environ['CELERY_LOADER'] = 'django' +os.environ['DJANGO_SETTINGS_MODULE'] = 'regluit.settings.please' +application = django.core.handlers.wsgi.WSGIHandler() diff --git a/deploy/update-regluit b/deploy/update-regluit index 8905787b..8523bd15 100755 --- a/deploy/update-regluit +++ b/deploy/update-regluit @@ -9,9 +9,9 @@ cd /opt/regluit sudo -u ubuntu /usr/bin/git pull source ENV/bin/activate -pip install -r requirements.pip -django-admin.py syncdb --migrate --settings regluit.settings.prod -django-admin.py collectstatic --noinput --settings regluit.settings.prod +#pip install -r requirements.pip +django-admin.py syncdb --migrate --settings regluit.settings.please +django-admin.py collectstatic --noinput --settings regluit.settings.please sudo /etc/init.d/apache2 restart sudo /etc/init.d/celeryd restart touch /opt/regluit/deploy/last-update diff --git a/settings/dev.py b/settings/dev.py index 41f8d9e7..866fba2b 100644 --- a/settings/dev.py +++ b/settings/dev.py @@ -6,6 +6,9 @@ TEMPLATE_DEBUG = DEBUG # if you're doing development work, you'll want this to be zero IS_PREVIEW = False +# SITE_ID for your particular site -- must be configured in /core/fixtures/initial_data.json +SITE_ID = 3 + ADMINS = ( ('Ed Summers', 'ehs@pobox.com'), ) diff --git a/settings/please.py b/settings/please.py index cc83b031..784bb832 100644 --- a/settings/please.py +++ b/settings/please.py @@ -18,9 +18,9 @@ DATABASES = { 'default': { 'ENGINE': 'django.db.backends.mysql', 'NAME': 'please', - 'USER': 'please', + 'USER': 'please2', 'PASSWORD': 'unglueit', - 'HOST': 'gluejardb.cboagmr25pjs.us-east-1.rds.amazonaws.com', + 'HOST': 'pleasedb.cboagmr25pjs.us-east-1.rds.amazonaws.com', 'PORT': '', } } diff --git a/test/booktests.py b/test/booktests.py new file mode 100644 index 00000000..0d4c3799 --- /dev/null +++ b/test/booktests.py @@ -0,0 +1,28 @@ +from regluit.core import librarything, bookloader +import itertools +import django + +def ry_lt_books(): + """return parsing of rdhyee's LibraryThing collection""" + lt = librarything.LibraryThing('rdhyee') + books = lt.parse_user_catalog(view_style=5) + return books + +def editions_for_lt(books): + """return the Editions that correspond to the list of LibraryThing books""" + editions = [bookloader.add_by_isbn(b["isbn"]) for b in books] + return editions + +def ry_lt_not_loaded(): + """Calculate which of the books on rdhyee's librarything list don't yield Editions""" + books = list(ry_lt_books()) + editions = editions_for_lt(books) + not_loaded_books = [b for (b, ed) in itertools.izip(books, editions) if ed is None] + return not_loaded_books + +def ry_wish_list_equal_loadable_lt_books(): + """returnwhether the set of works in the user's wishlist is the same as the works in a user's loadable editions from LT""" + editions = editions_for_lt(ry_lt_books()) + # assume only one user -- and that we have run a LT book loading process for that user + ry = django.contrib.auth.models.User.objects.all()[0] + return set([ed.work for ed in filter(None, editions)]) == set(ry.wishlist.works.all()) \ No newline at end of file