Merge branch 'master' of github.com:Gluejar/regluit

pull/1/head
Andromeda Yelton 2012-01-30 10:12:29 -05:00
commit a9c3cc8301
15 changed files with 137 additions and 68 deletions

View File

@ -37,8 +37,9 @@ Production Deployment
---------------------
Below are the steps for getting regluit running on EC2 with Apache and mod_wsgi, and talking to an Amazon Relational Data Store instance.
Instructions for setting please are slightly different.
1. create an ubuntu LTS ec2 instance
1. create an ubuntu ec2 instance (e.g, go http://alestic.com/ to find various ubuntu images)
1. `sudo aptitude update`
1. `sudo aptitude upgrade`
1. `sudo aptitude install git-core apache libapache2-mod-wsgi mysql-client python-virtualenv python-mysqldb redis-server python-lxml postfix`
@ -54,21 +55,24 @@ Below are the steps for getting regluit running on EC2 with Apache and mod_wsgi,
1. create an Amazon RDS instance
1. connect to it, e.g. `mysql -u root -h gluejardb.cboagmr25pjs.us-east-1.rds.amazonaws.com -p`
1. `CREATE DATABASE unglueit CHARSET utf8;`
1. `GRANT ALL ON unglueit.\* TO unglueit@ip-10-244-250-168.ec2.internal IDENTIFIED BY 'unglueit' REQUIRE SSL`
1. `GRANT ALL ON unglueit.\* TO unglueit@ip-10-244-250-168.ec2.internal IDENTIFIED BY 'unglueit' REQUIRE SSL;`
1. update settings/prod.py with database credentials
1. `virtualenv ENV`
1. `source ENV/bin/activate`
1. `pip install -r requirements.pip`
1. `echo "/opt/" > ENV/lib/python2.7/site-packages/regluit.pth`
1. `django-admin.py syncdb --migrate --settings regluit.settings.prod`
1. `mkdir /var/www/static`
1. `chown ubuntu:ubuntu /var/www/static`
1. `sudo mkdir /var/www/static`
1. `sudo chown ubuntu:ubuntu /var/www/static`
1. `django-admin.py collectstatic --settings regluit.settings.prod`
1. `sudo ln -s /opt/regluit/deploy/regluit.conf /etc/apache2/sites-available/regluit`
1. `sudo a2ensite regluit`
1. `sudo a2enmod ssl rewrite`
1. `cd /home/ubuntu`
1. copy SSL server key to `/etc/ssl/private/server.key`
1. copy SSL certificate to `/etc/ssl/certs/server.crt`
1. `sudo /etc/init.d/apache2 restart`
1. `sudo adduser --no-create-home celery --disabled-password --disabled-login`
1. `sudo adduser --no-create-home celery --disabled-password --disabled-login` (just enter return for all?)
1. `sudo cp deploy/celeryd /etc/init.d/celeryd`
1. `sudo chmod 755 /etc/init.d/celeryd`
1. `sudo cp deploy/celeryd.conf /etc/default/celeryd`
@ -76,7 +80,7 @@ Below are the steps for getting regluit running on EC2 with Apache and mod_wsgi,
1. `sudo chown celery:celery /var/log/celery`
1. `sudo /etc/init.d/celeryd start`
OS X Develper Notes
OS X Developer Notes
-------------------
To run regluit on OS X you should have XCode installed

View File

@ -105,7 +105,7 @@ def add_by_isbn_from_google(isbn, work=None):
return None
try:
return add_by_googlebooks_id(results['items'][0]['id'], work=work, results=results['items'][0])
return add_by_googlebooks_id(results['items'][0]['id'], work=work, results=results['items'][0], isbn=isbn)
except LookupFailure, e:
logger.exception("failed to add edition for %s", isbn)
except IntegrityError, e:
@ -127,10 +127,10 @@ def get_edition_by_id(type,value):
return None
def add_by_googlebooks_id(googlebooks_id, work=None, results=None):
def add_by_googlebooks_id(googlebooks_id, work=None, results=None, isbn=None):
"""add a book to the UnglueIt database based on the GoogleBooks ID. The
work parameter is optional, and if not supplied the edition will be
associated with a stub work.
associated with a stub work. isbn can be passed because sometimes passed data won't include it
"""
# don't ping google again if we already know about the edition
@ -151,25 +151,36 @@ def add_by_googlebooks_id(googlebooks_id, work=None, results=None):
# don't add the edition to a work with a different language
# https://www.pivotaltracker.com/story/show/17234433
language = d['language']
if len(language)>2:
language= language[0:2]
if work and work.language != language:
logger.info("not connecting %s since it is %s instead of %s" %
(googlebooks_id, language, work.language))
work = None
isbn = None
for i in d.get('industryIdentifiers', []):
if i['type'] == 'ISBN_10' and not isbn:
isbn = regluit.core.isbn.convert_10_to_13(i['identifier'])
elif i['type'] == 'ISBN_13':
isbn = i['identifier']
# isbn = None
if not isbn:
for i in d.get('industryIdentifiers', []):
if i['type'] == 'ISBN_10' and not isbn:
isbn = regluit.core.isbn.convert_10_to_13(i['identifier'])
elif i['type'] == 'ISBN_13':
isbn = i['identifier']
# now check to see if there's an existing Work
if not work:
if isbn and not work:
work = get_work_by_id(type='isbn',value=isbn)
if not work:
work = models.Work.objects.create(title=d['title'], language=language)
work.new = True
work.save()
# going off to google can take some time, so we want to make sure this edition has not
# been created in another thread while we were waiting
try:
return models.Identifier.objects.get(type='goog', value=googlebooks_id).edition
except models.Identifier.DoesNotExist:
pass
# because this is a new google id, we have to create a new edition
e = models.Edition(work=work)
@ -215,11 +226,15 @@ def add_related(isbn):
"""
# make sure the seed edition is there
logger.info("adding related editions for %s", isbn)
new_editions = []
edition = add_by_isbn(isbn)
if edition is None:
return new_editions
# this is the work everything will hang off
work = edition.work
new_editions = []
other_editions = {}
for other_isbn in thingisbn(isbn):
# 979's come back as 13

View File

@ -23,6 +23,14 @@
"name": "unglue.it local development"
}
},
{
"pk": 4,
"model": "sites.site",
"fields": {
"domain": "ry-dev.dyndns.org",
"name": "ry-dev development"
}
},
{
"pk": 1,
"model": "core.premium",

View File

@ -119,7 +119,7 @@ class LibraryThing(object):
try:
book_data["lc_call_number"] = cols[2].xpath('.//span')[0].text
except Exception, e:
logger.info("book lc call number exception: %s %s", book_data["title"], e)
logger.info("no lc call number for: %s %s", book_data["title"], e)
book_data["lc_call_number"] = None
# subject
@ -131,6 +131,9 @@ class LibraryThing(object):
# isbn
try:
book_data["isbn"] = cols[4].xpath('.//span')[0].text
# check for &nbsp
if book_data["isbn"] == u'\xA0':
book_data["isbn"] = None
except Exception, e:
book_data["isbn"] = None
@ -143,6 +146,8 @@ class LibraryThing(object):
# we can vary viewstyle to get different info
IMPLEMENTED_STYLES = [1,5]
COLLECTION = 2 # set to get All Collections
if view_style not in IMPLEMENTED_STYLES:
raise NotImplementedError()
style_parser = getattr(self,"viewstyle_%s" % view_style)
@ -151,8 +156,8 @@ class LibraryThing(object):
cookies = None
while next_page:
url = "http://www.librarything.com/catalog_bottom.php?view=%s&viewstyle=%d&offset=%d" % (self.username,
view_style, offset)
url = "http://www.librarything.com/catalog_bottom.php?view=%s&viewstyle=%d&collection=%d&offset=%d" % (self.username,
view_style, COLLECTION, offset)
logger.info("url: %s", url)
if cookies is None:
r = requests.get(url)
@ -163,6 +168,7 @@ class LibraryThing(object):
raise LibraryThingException("Error accessing %s: %s" % (url, e))
logger.info("Error accessing %s: %s", url, e)
etree = html.fromstring(r.content)
#logger.info("r.content %s", r.content)
cookies = r.cookies # retain the cookies
# look for a page bar
@ -197,7 +203,7 @@ class LibraryThing(object):
def load_librarything_into_wishlist(user, lt_username, max_books=None):
"""
Load a specified Goodreads shelf (by default: all the books from the Goodreads account associated with user)
Load a specified LibraryThing shelf (by default: all the books from the LibraryThing account associated with user)
"""
from regluit.core import bookloader
@ -212,6 +218,8 @@ def load_librarything_into_wishlist(user, lt_username, max_books=None):
isbn = book["isbn"] # grab the first one
logger.info("%d %s %s", i, book["title"]["title"], isbn)
try:
if not isbn:
continue
edition = bookloader.add_by_isbn(isbn)
if not edition:
continue

View File

@ -13,8 +13,8 @@ class Command(BaseCommand):
for isbn in open(filename):
isbn = isbn.strip()
edition = bookloader.add_by_isbn(isbn)
bookloader.add_related(isbn)
if edition:
bookloader.add_related(isbn)
user.wishlist.add_work(edition.work, source="user")
print "loaded %s as %s for %s" % (isbn, edition, user)
else:

View File

@ -1,38 +0,0 @@
from decimal import Decimal
from random import randint, randrange
from datetime import datetime, timedelta
from django.core.management.base import BaseCommand
from regluit.core.models import Work, Campaign
class Command(BaseCommand):
help = "creates random campaigns for any works that lack one for testing"
def handle(self, *args, **options):
for work in Work.objects.all():
if work.campaigns.all().count() > 0:
continue
campaign = Campaign()
campaign.name = work.title
campaign.work = work
campaign.description = "Test Campaign"
# random campaign target between $200 and $10,000
campaign.target = float(randint(200,10000))
# random deadline between 5 days from now and 180 days from now
now = datetime.now()
campaign.deadline = random_date(now + timedelta(days=5),
now + timedelta(days=180))
campaign.save()
print "created %s" % campaign
def random_date(start, end):
delta = end - start
int_delta = (delta.days * 24 * 60 * 60) + delta.seconds
random_second = randrange(int_delta)
return (start + timedelta(seconds=random_second))

View File

@ -0,0 +1,18 @@
# no, not that kind of orphan works. removes works with no connected identifiers.
from django.core.management.base import BaseCommand
from regluit.core import models
class Command(BaseCommand):
help = "removes works with no connected identifiers"
def handle(self, **options):
numworks=0
deleted=0
for work in models.Work.objects.all():
if work.identifiers.count()==0:
work.delete()
deleted=deleted+1
numworks=numworks+1
print "%s deleted from %s total" % (deleted, numworks)

View File

@ -40,6 +40,9 @@ class BookLoaderTests(TestCase):
# work
self.assertTrue(edition.work)
# locale in language
edition = bookloader.add_by_isbn('9787500676911')
self.assertEqual(edition.work.language, 'zh')
def test_double_add(self):
bookloader.add_by_isbn('0441012035')

View File

@ -0,0 +1,11 @@
CELERYD_NODES="w1"
CELERYD_CHDIR="/opt/regluit/"
CELERYD_LOG_FILE="/var/log/celery/%n.log"
CELERYD_PID_FILE="/var/log/celery/%n.pid"
CELERYD_USER="celery"
CELERYD_GROUP="celery"
CELERYD="/opt/regluit/ENV/bin/django-admin.py celeryd"
CELERYD_MULTI="/opt/regluit/ENV/bin/django-admin.py celeryd_multi"
VIRTUALENV_ACTIVATE="/opt/regluit/ENV/bin/activate"
export DJANGO_SETTINGS_MODULE="regluit.settings.please"

View File

@ -12,7 +12,7 @@ RewriteRule /admin(.*) https://please.unglueit.com/admin$1 [R=301]
RewriteRule /accounts(.*) https://please.unglueit.com/accounts$1 [R=301]
WSGIDaemonProcess regluit processes=4 threads=4 python-eggs=/tmp/regluit-python-eggs
WSGIScriptAlias / /opt/regluit/deploy/regluit.wsgi
WSGIScriptAlias / /opt/regluit/deploy/please.wsgi
<Directory /opt/regluit/static>
Options Indexes FollowSymLinks
@ -31,10 +31,10 @@ Alias /static /var/www/static
SSLEngine on
SSLCertificateFile /etc/ssl/certs/server.crt
SSLCertificateKeyFile /etc/ssl/private/server.key
SSLCertificateChainFile /etc/ssl/certs/gd_bundle.crt
#SSLCertificateChainFile /etc/ssl/certs/gd_bundle.crt
WSGIDaemonProcess regluit-ssl processes=4 threads=4 python-eggs=/tmp/regluit-python-eggs
WSGIScriptAlias / /opt/regluit/deploy/regluit.wsgi
WSGIScriptAlias / /opt/regluit/deploy/please.wsgi
<Directory /opt/regluit/static>
Options Indexes FollowSymLinks

9
deploy/please.wsgi Normal file
View File

@ -0,0 +1,9 @@
#!/usr/bin/env python
import os
import django.core.handlers.wsgi
os.environ['CELERY_LOADER'] = 'django'
os.environ['DJANGO_SETTINGS_MODULE'] = 'regluit.settings.please'
application = django.core.handlers.wsgi.WSGIHandler()

View File

@ -9,9 +9,9 @@
cd /opt/regluit
sudo -u ubuntu /usr/bin/git pull
source ENV/bin/activate
pip install -r requirements.pip
django-admin.py syncdb --migrate --settings regluit.settings.prod
django-admin.py collectstatic --noinput --settings regluit.settings.prod
#pip install -r requirements.pip
django-admin.py syncdb --migrate --settings regluit.settings.please
django-admin.py collectstatic --noinput --settings regluit.settings.please
sudo /etc/init.d/apache2 restart
sudo /etc/init.d/celeryd restart
touch /opt/regluit/deploy/last-update

View File

@ -6,6 +6,9 @@ TEMPLATE_DEBUG = DEBUG
# if you're doing development work, you'll want this to be zero
IS_PREVIEW = False
# SITE_ID for your particular site -- must be configured in /core/fixtures/initial_data.json
SITE_ID = 3
ADMINS = (
('Ed Summers', 'ehs@pobox.com'),
)

View File

@ -18,9 +18,9 @@ DATABASES = {
'default': {
'ENGINE': 'django.db.backends.mysql',
'NAME': 'please',
'USER': 'please',
'USER': 'please2',
'PASSWORD': 'unglueit',
'HOST': 'gluejardb.cboagmr25pjs.us-east-1.rds.amazonaws.com',
'HOST': 'pleasedb.cboagmr25pjs.us-east-1.rds.amazonaws.com',
'PORT': '',
}
}

28
test/booktests.py Normal file
View File

@ -0,0 +1,28 @@
from regluit.core import librarything, bookloader
import itertools
import django
def ry_lt_books():
"""return parsing of rdhyee's LibraryThing collection"""
lt = librarything.LibraryThing('rdhyee')
books = lt.parse_user_catalog(view_style=5)
return books
def editions_for_lt(books):
"""return the Editions that correspond to the list of LibraryThing books"""
editions = [bookloader.add_by_isbn(b["isbn"]) for b in books]
return editions
def ry_lt_not_loaded():
"""Calculate which of the books on rdhyee's librarything list don't yield Editions"""
books = list(ry_lt_books())
editions = editions_for_lt(books)
not_loaded_books = [b for (b, ed) in itertools.izip(books, editions) if ed is None]
return not_loaded_books
def ry_wish_list_equal_loadable_lt_books():
"""returnwhether the set of works in the user's wishlist is the same as the works in a user's loadable editions from LT"""
editions = editions_for_lt(ry_lt_books())
# assume only one user -- and that we have run a LT book loading process for that user
ry = django.contrib.auth.models.User.objects.all()[0]
return set([ed.work for ed in filter(None, editions)]) == set(ry.wishlist.works.all())