Merge branch 'master' of github.com:Gluejar/regluit

pull/1/head
Andromeda Yelton 2012-01-30 10:12:29 -05:00
commit a9c3cc8301
15 changed files with 137 additions and 68 deletions

View File

@ -37,8 +37,9 @@ Production Deployment
--------------------- ---------------------
Below are the steps for getting regluit running on EC2 with Apache and mod_wsgi, and talking to an Amazon Relational Data Store instance. Below are the steps for getting regluit running on EC2 with Apache and mod_wsgi, and talking to an Amazon Relational Data Store instance.
Instructions for setting please are slightly different.
1. create an ubuntu LTS ec2 instance 1. create an ubuntu ec2 instance (e.g, go http://alestic.com/ to find various ubuntu images)
1. `sudo aptitude update` 1. `sudo aptitude update`
1. `sudo aptitude upgrade` 1. `sudo aptitude upgrade`
1. `sudo aptitude install git-core apache libapache2-mod-wsgi mysql-client python-virtualenv python-mysqldb redis-server python-lxml postfix` 1. `sudo aptitude install git-core apache libapache2-mod-wsgi mysql-client python-virtualenv python-mysqldb redis-server python-lxml postfix`
@ -54,21 +55,24 @@ Below are the steps for getting regluit running on EC2 with Apache and mod_wsgi,
1. create an Amazon RDS instance 1. create an Amazon RDS instance
1. connect to it, e.g. `mysql -u root -h gluejardb.cboagmr25pjs.us-east-1.rds.amazonaws.com -p` 1. connect to it, e.g. `mysql -u root -h gluejardb.cboagmr25pjs.us-east-1.rds.amazonaws.com -p`
1. `CREATE DATABASE unglueit CHARSET utf8;` 1. `CREATE DATABASE unglueit CHARSET utf8;`
1. `GRANT ALL ON unglueit.\* TO unglueit@ip-10-244-250-168.ec2.internal IDENTIFIED BY 'unglueit' REQUIRE SSL` 1. `GRANT ALL ON unglueit.\* TO unglueit@ip-10-244-250-168.ec2.internal IDENTIFIED BY 'unglueit' REQUIRE SSL;`
1. update settings/prod.py with database credentials 1. update settings/prod.py with database credentials
1. `virtualenv ENV` 1. `virtualenv ENV`
1. `source ENV/bin/activate` 1. `source ENV/bin/activate`
1. `pip install -r requirements.pip` 1. `pip install -r requirements.pip`
1. `echo "/opt/" > ENV/lib/python2.7/site-packages/regluit.pth` 1. `echo "/opt/" > ENV/lib/python2.7/site-packages/regluit.pth`
1. `django-admin.py syncdb --migrate --settings regluit.settings.prod` 1. `django-admin.py syncdb --migrate --settings regluit.settings.prod`
1. `mkdir /var/www/static` 1. `sudo mkdir /var/www/static`
1. `chown ubuntu:ubuntu /var/www/static` 1. `sudo chown ubuntu:ubuntu /var/www/static`
1. `django-admin.py collectstatic --settings regluit.settings.prod` 1. `django-admin.py collectstatic --settings regluit.settings.prod`
1. `sudo ln -s /opt/regluit/deploy/regluit.conf /etc/apache2/sites-available/regluit` 1. `sudo ln -s /opt/regluit/deploy/regluit.conf /etc/apache2/sites-available/regluit`
1. `sudo a2ensite regluit` 1. `sudo a2ensite regluit`
1. `sudo a2enmod ssl rewrite` 1. `sudo a2enmod ssl rewrite`
1. `cd /home/ubuntu`
1. copy SSL server key to `/etc/ssl/private/server.key`
1. copy SSL certificate to `/etc/ssl/certs/server.crt`
1. `sudo /etc/init.d/apache2 restart` 1. `sudo /etc/init.d/apache2 restart`
1. `sudo adduser --no-create-home celery --disabled-password --disabled-login` 1. `sudo adduser --no-create-home celery --disabled-password --disabled-login` (just enter return for all?)
1. `sudo cp deploy/celeryd /etc/init.d/celeryd` 1. `sudo cp deploy/celeryd /etc/init.d/celeryd`
1. `sudo chmod 755 /etc/init.d/celeryd` 1. `sudo chmod 755 /etc/init.d/celeryd`
1. `sudo cp deploy/celeryd.conf /etc/default/celeryd` 1. `sudo cp deploy/celeryd.conf /etc/default/celeryd`
@ -76,7 +80,7 @@ Below are the steps for getting regluit running on EC2 with Apache and mod_wsgi,
1. `sudo chown celery:celery /var/log/celery` 1. `sudo chown celery:celery /var/log/celery`
1. `sudo /etc/init.d/celeryd start` 1. `sudo /etc/init.d/celeryd start`
OS X Develper Notes OS X Developer Notes
------------------- -------------------
To run regluit on OS X you should have XCode installed To run regluit on OS X you should have XCode installed

View File

@ -105,7 +105,7 @@ def add_by_isbn_from_google(isbn, work=None):
return None return None
try: try:
return add_by_googlebooks_id(results['items'][0]['id'], work=work, results=results['items'][0]) return add_by_googlebooks_id(results['items'][0]['id'], work=work, results=results['items'][0], isbn=isbn)
except LookupFailure, e: except LookupFailure, e:
logger.exception("failed to add edition for %s", isbn) logger.exception("failed to add edition for %s", isbn)
except IntegrityError, e: except IntegrityError, e:
@ -127,10 +127,10 @@ def get_edition_by_id(type,value):
return None return None
def add_by_googlebooks_id(googlebooks_id, work=None, results=None): def add_by_googlebooks_id(googlebooks_id, work=None, results=None, isbn=None):
"""add a book to the UnglueIt database based on the GoogleBooks ID. The """add a book to the UnglueIt database based on the GoogleBooks ID. The
work parameter is optional, and if not supplied the edition will be work parameter is optional, and if not supplied the edition will be
associated with a stub work. associated with a stub work. isbn can be passed because sometimes passed data won't include it
""" """
# don't ping google again if we already know about the edition # don't ping google again if we already know about the edition
@ -151,11 +151,14 @@ def add_by_googlebooks_id(googlebooks_id, work=None, results=None):
# don't add the edition to a work with a different language # don't add the edition to a work with a different language
# https://www.pivotaltracker.com/story/show/17234433 # https://www.pivotaltracker.com/story/show/17234433
language = d['language'] language = d['language']
if len(language)>2:
language= language[0:2]
if work and work.language != language: if work and work.language != language:
logger.info("not connecting %s since it is %s instead of %s" % logger.info("not connecting %s since it is %s instead of %s" %
(googlebooks_id, language, work.language)) (googlebooks_id, language, work.language))
work = None work = None
isbn = None # isbn = None
if not isbn:
for i in d.get('industryIdentifiers', []): for i in d.get('industryIdentifiers', []):
if i['type'] == 'ISBN_10' and not isbn: if i['type'] == 'ISBN_10' and not isbn:
isbn = regluit.core.isbn.convert_10_to_13(i['identifier']) isbn = regluit.core.isbn.convert_10_to_13(i['identifier'])
@ -163,7 +166,7 @@ def add_by_googlebooks_id(googlebooks_id, work=None, results=None):
isbn = i['identifier'] isbn = i['identifier']
# now check to see if there's an existing Work # now check to see if there's an existing Work
if not work: if isbn and not work:
work = get_work_by_id(type='isbn',value=isbn) work = get_work_by_id(type='isbn',value=isbn)
if not work: if not work:
work = models.Work.objects.create(title=d['title'], language=language) work = models.Work.objects.create(title=d['title'], language=language)
@ -171,6 +174,14 @@ def add_by_googlebooks_id(googlebooks_id, work=None, results=None):
work.save() work.save()
# going off to google can take some time, so we want to make sure this edition has not
# been created in another thread while we were waiting
try:
return models.Identifier.objects.get(type='goog', value=googlebooks_id).edition
except models.Identifier.DoesNotExist:
pass
# because this is a new google id, we have to create a new edition # because this is a new google id, we have to create a new edition
e = models.Edition(work=work) e = models.Edition(work=work)
e.title = d.get('title') e.title = d.get('title')
@ -215,11 +226,15 @@ def add_related(isbn):
""" """
# make sure the seed edition is there # make sure the seed edition is there
logger.info("adding related editions for %s", isbn) logger.info("adding related editions for %s", isbn)
new_editions = []
edition = add_by_isbn(isbn) edition = add_by_isbn(isbn)
if edition is None:
return new_editions
# this is the work everything will hang off # this is the work everything will hang off
work = edition.work work = edition.work
new_editions = []
other_editions = {} other_editions = {}
for other_isbn in thingisbn(isbn): for other_isbn in thingisbn(isbn):
# 979's come back as 13 # 979's come back as 13

View File

@ -23,6 +23,14 @@
"name": "unglue.it local development" "name": "unglue.it local development"
} }
}, },
{
"pk": 4,
"model": "sites.site",
"fields": {
"domain": "ry-dev.dyndns.org",
"name": "ry-dev development"
}
},
{ {
"pk": 1, "pk": 1,
"model": "core.premium", "model": "core.premium",

View File

@ -119,7 +119,7 @@ class LibraryThing(object):
try: try:
book_data["lc_call_number"] = cols[2].xpath('.//span')[0].text book_data["lc_call_number"] = cols[2].xpath('.//span')[0].text
except Exception, e: except Exception, e:
logger.info("book lc call number exception: %s %s", book_data["title"], e) logger.info("no lc call number for: %s %s", book_data["title"], e)
book_data["lc_call_number"] = None book_data["lc_call_number"] = None
# subject # subject
@ -131,6 +131,9 @@ class LibraryThing(object):
# isbn # isbn
try: try:
book_data["isbn"] = cols[4].xpath('.//span')[0].text book_data["isbn"] = cols[4].xpath('.//span')[0].text
# check for &nbsp
if book_data["isbn"] == u'\xA0':
book_data["isbn"] = None
except Exception, e: except Exception, e:
book_data["isbn"] = None book_data["isbn"] = None
@ -143,6 +146,8 @@ class LibraryThing(object):
# we can vary viewstyle to get different info # we can vary viewstyle to get different info
IMPLEMENTED_STYLES = [1,5] IMPLEMENTED_STYLES = [1,5]
COLLECTION = 2 # set to get All Collections
if view_style not in IMPLEMENTED_STYLES: if view_style not in IMPLEMENTED_STYLES:
raise NotImplementedError() raise NotImplementedError()
style_parser = getattr(self,"viewstyle_%s" % view_style) style_parser = getattr(self,"viewstyle_%s" % view_style)
@ -151,8 +156,8 @@ class LibraryThing(object):
cookies = None cookies = None
while next_page: while next_page:
url = "http://www.librarything.com/catalog_bottom.php?view=%s&viewstyle=%d&offset=%d" % (self.username, url = "http://www.librarything.com/catalog_bottom.php?view=%s&viewstyle=%d&collection=%d&offset=%d" % (self.username,
view_style, offset) view_style, COLLECTION, offset)
logger.info("url: %s", url) logger.info("url: %s", url)
if cookies is None: if cookies is None:
r = requests.get(url) r = requests.get(url)
@ -163,6 +168,7 @@ class LibraryThing(object):
raise LibraryThingException("Error accessing %s: %s" % (url, e)) raise LibraryThingException("Error accessing %s: %s" % (url, e))
logger.info("Error accessing %s: %s", url, e) logger.info("Error accessing %s: %s", url, e)
etree = html.fromstring(r.content) etree = html.fromstring(r.content)
#logger.info("r.content %s", r.content)
cookies = r.cookies # retain the cookies cookies = r.cookies # retain the cookies
# look for a page bar # look for a page bar
@ -197,7 +203,7 @@ class LibraryThing(object):
def load_librarything_into_wishlist(user, lt_username, max_books=None): def load_librarything_into_wishlist(user, lt_username, max_books=None):
""" """
Load a specified Goodreads shelf (by default: all the books from the Goodreads account associated with user) Load a specified LibraryThing shelf (by default: all the books from the LibraryThing account associated with user)
""" """
from regluit.core import bookloader from regluit.core import bookloader
@ -212,6 +218,8 @@ def load_librarything_into_wishlist(user, lt_username, max_books=None):
isbn = book["isbn"] # grab the first one isbn = book["isbn"] # grab the first one
logger.info("%d %s %s", i, book["title"]["title"], isbn) logger.info("%d %s %s", i, book["title"]["title"], isbn)
try: try:
if not isbn:
continue
edition = bookloader.add_by_isbn(isbn) edition = bookloader.add_by_isbn(isbn)
if not edition: if not edition:
continue continue

View File

@ -13,8 +13,8 @@ class Command(BaseCommand):
for isbn in open(filename): for isbn in open(filename):
isbn = isbn.strip() isbn = isbn.strip()
edition = bookloader.add_by_isbn(isbn) edition = bookloader.add_by_isbn(isbn)
bookloader.add_related(isbn)
if edition: if edition:
bookloader.add_related(isbn)
user.wishlist.add_work(edition.work, source="user") user.wishlist.add_work(edition.work, source="user")
print "loaded %s as %s for %s" % (isbn, edition, user) print "loaded %s as %s for %s" % (isbn, edition, user)
else: else:

View File

@ -1,38 +0,0 @@
from decimal import Decimal
from random import randint, randrange
from datetime import datetime, timedelta
from django.core.management.base import BaseCommand
from regluit.core.models import Work, Campaign
class Command(BaseCommand):
help = "creates random campaigns for any works that lack one for testing"
def handle(self, *args, **options):
for work in Work.objects.all():
if work.campaigns.all().count() > 0:
continue
campaign = Campaign()
campaign.name = work.title
campaign.work = work
campaign.description = "Test Campaign"
# random campaign target between $200 and $10,000
campaign.target = float(randint(200,10000))
# random deadline between 5 days from now and 180 days from now
now = datetime.now()
campaign.deadline = random_date(now + timedelta(days=5),
now + timedelta(days=180))
campaign.save()
print "created %s" % campaign
def random_date(start, end):
delta = end - start
int_delta = (delta.days * 24 * 60 * 60) + delta.seconds
random_second = randrange(int_delta)
return (start + timedelta(seconds=random_second))

View File

@ -0,0 +1,18 @@
# no, not that kind of orphan works. removes works with no connected identifiers.
from django.core.management.base import BaseCommand
from regluit.core import models
class Command(BaseCommand):
help = "removes works with no connected identifiers"
def handle(self, **options):
numworks=0
deleted=0
for work in models.Work.objects.all():
if work.identifiers.count()==0:
work.delete()
deleted=deleted+1
numworks=numworks+1
print "%s deleted from %s total" % (deleted, numworks)

View File

@ -40,6 +40,9 @@ class BookLoaderTests(TestCase):
# work # work
self.assertTrue(edition.work) self.assertTrue(edition.work)
# locale in language
edition = bookloader.add_by_isbn('9787500676911')
self.assertEqual(edition.work.language, 'zh')
def test_double_add(self): def test_double_add(self):
bookloader.add_by_isbn('0441012035') bookloader.add_by_isbn('0441012035')

View File

@ -0,0 +1,11 @@
CELERYD_NODES="w1"
CELERYD_CHDIR="/opt/regluit/"
CELERYD_LOG_FILE="/var/log/celery/%n.log"
CELERYD_PID_FILE="/var/log/celery/%n.pid"
CELERYD_USER="celery"
CELERYD_GROUP="celery"
CELERYD="/opt/regluit/ENV/bin/django-admin.py celeryd"
CELERYD_MULTI="/opt/regluit/ENV/bin/django-admin.py celeryd_multi"
VIRTUALENV_ACTIVATE="/opt/regluit/ENV/bin/activate"
export DJANGO_SETTINGS_MODULE="regluit.settings.please"

View File

@ -12,7 +12,7 @@ RewriteRule /admin(.*) https://please.unglueit.com/admin$1 [R=301]
RewriteRule /accounts(.*) https://please.unglueit.com/accounts$1 [R=301] RewriteRule /accounts(.*) https://please.unglueit.com/accounts$1 [R=301]
WSGIDaemonProcess regluit processes=4 threads=4 python-eggs=/tmp/regluit-python-eggs WSGIDaemonProcess regluit processes=4 threads=4 python-eggs=/tmp/regluit-python-eggs
WSGIScriptAlias / /opt/regluit/deploy/regluit.wsgi WSGIScriptAlias / /opt/regluit/deploy/please.wsgi
<Directory /opt/regluit/static> <Directory /opt/regluit/static>
Options Indexes FollowSymLinks Options Indexes FollowSymLinks
@ -31,10 +31,10 @@ Alias /static /var/www/static
SSLEngine on SSLEngine on
SSLCertificateFile /etc/ssl/certs/server.crt SSLCertificateFile /etc/ssl/certs/server.crt
SSLCertificateKeyFile /etc/ssl/private/server.key SSLCertificateKeyFile /etc/ssl/private/server.key
SSLCertificateChainFile /etc/ssl/certs/gd_bundle.crt #SSLCertificateChainFile /etc/ssl/certs/gd_bundle.crt
WSGIDaemonProcess regluit-ssl processes=4 threads=4 python-eggs=/tmp/regluit-python-eggs WSGIDaemonProcess regluit-ssl processes=4 threads=4 python-eggs=/tmp/regluit-python-eggs
WSGIScriptAlias / /opt/regluit/deploy/regluit.wsgi WSGIScriptAlias / /opt/regluit/deploy/please.wsgi
<Directory /opt/regluit/static> <Directory /opt/regluit/static>
Options Indexes FollowSymLinks Options Indexes FollowSymLinks

9
deploy/please.wsgi Normal file
View File

@ -0,0 +1,9 @@
#!/usr/bin/env python
import os
import django.core.handlers.wsgi
os.environ['CELERY_LOADER'] = 'django'
os.environ['DJANGO_SETTINGS_MODULE'] = 'regluit.settings.please'
application = django.core.handlers.wsgi.WSGIHandler()

View File

@ -9,9 +9,9 @@
cd /opt/regluit cd /opt/regluit
sudo -u ubuntu /usr/bin/git pull sudo -u ubuntu /usr/bin/git pull
source ENV/bin/activate source ENV/bin/activate
pip install -r requirements.pip #pip install -r requirements.pip
django-admin.py syncdb --migrate --settings regluit.settings.prod django-admin.py syncdb --migrate --settings regluit.settings.please
django-admin.py collectstatic --noinput --settings regluit.settings.prod django-admin.py collectstatic --noinput --settings regluit.settings.please
sudo /etc/init.d/apache2 restart sudo /etc/init.d/apache2 restart
sudo /etc/init.d/celeryd restart sudo /etc/init.d/celeryd restart
touch /opt/regluit/deploy/last-update touch /opt/regluit/deploy/last-update

View File

@ -6,6 +6,9 @@ TEMPLATE_DEBUG = DEBUG
# if you're doing development work, you'll want this to be zero # if you're doing development work, you'll want this to be zero
IS_PREVIEW = False IS_PREVIEW = False
# SITE_ID for your particular site -- must be configured in /core/fixtures/initial_data.json
SITE_ID = 3
ADMINS = ( ADMINS = (
('Ed Summers', 'ehs@pobox.com'), ('Ed Summers', 'ehs@pobox.com'),
) )

View File

@ -18,9 +18,9 @@ DATABASES = {
'default': { 'default': {
'ENGINE': 'django.db.backends.mysql', 'ENGINE': 'django.db.backends.mysql',
'NAME': 'please', 'NAME': 'please',
'USER': 'please', 'USER': 'please2',
'PASSWORD': 'unglueit', 'PASSWORD': 'unglueit',
'HOST': 'gluejardb.cboagmr25pjs.us-east-1.rds.amazonaws.com', 'HOST': 'pleasedb.cboagmr25pjs.us-east-1.rds.amazonaws.com',
'PORT': '', 'PORT': '',
} }
} }

28
test/booktests.py Normal file
View File

@ -0,0 +1,28 @@
from regluit.core import librarything, bookloader
import itertools
import django
def ry_lt_books():
"""return parsing of rdhyee's LibraryThing collection"""
lt = librarything.LibraryThing('rdhyee')
books = lt.parse_user_catalog(view_style=5)
return books
def editions_for_lt(books):
"""return the Editions that correspond to the list of LibraryThing books"""
editions = [bookloader.add_by_isbn(b["isbn"]) for b in books]
return editions
def ry_lt_not_loaded():
"""Calculate which of the books on rdhyee's librarything list don't yield Editions"""
books = list(ry_lt_books())
editions = editions_for_lt(books)
not_loaded_books = [b for (b, ed) in itertools.izip(books, editions) if ed is None]
return not_loaded_books
def ry_wish_list_equal_loadable_lt_books():
"""returnwhether the set of works in the user's wishlist is the same as the works in a user's loadable editions from LT"""
editions = editions_for_lt(ry_lt_books())
# assume only one user -- and that we have run a LT book loading process for that user
ry = django.contrib.auth.models.User.objects.all()[0]
return set([ed.work for ed in filter(None, editions)]) == set(ry.wishlist.works.all())