Merge branch 'master' of github.com:Gluejar/regluit

2012-01-30 10:12:29 -05:00 · 2012-01-30 10:12:29 -05:00 · a9c3cc8301
parent 4929e90108 8dcca8c1e7
commit a9c3cc8301
15 changed files with 137 additions and 68 deletions
--- a/README.md
+++ b/README.md
@ -37,8 +37,9 @@ Production Deployment
 ---------------------

 Below are the steps for getting regluit running on EC2 with Apache and mod_wsgi, and talking to an Amazon Relational Data Store instance.
+Instructions for setting please are slightly different.

-1. create an ubuntu LTS ec2 instance
+1. create an ubuntu ec2 instance (e.g, go http://alestic.com/ to find various ubuntu images)
 1. `sudo aptitude update`
 1. `sudo aptitude upgrade`
 1. `sudo aptitude install git-core apache libapache2-mod-wsgi mysql-client python-virtualenv python-mysqldb redis-server python-lxml postfix`
@ -54,21 +55,24 @@ Below are the steps for getting regluit running on EC2 with Apache and mod_wsgi,
 1. create an Amazon RDS instance
 1. connect to it, e.g. `mysql -u root -h gluejardb.cboagmr25pjs.us-east-1.rds.amazonaws.com -p`
 1. `CREATE DATABASE unglueit CHARSET utf8;`
-1. `GRANT ALL ON unglueit.\* TO ‘unglueit’@’ip-10-244-250-168.ec2.internal’ IDENTIFIED BY 'unglueit' REQUIRE SSL`
+1. `GRANT ALL ON unglueit.\* TO ‘unglueit’@’ip-10-244-250-168.ec2.internal’ IDENTIFIED BY 'unglueit' REQUIRE SSL;`
 1. update settings/prod.py with database credentials
 1. `virtualenv ENV`
 1. `source ENV/bin/activate`
 1. `pip install -r requirements.pip`
 1. `echo "/opt/" > ENV/lib/python2.7/site-packages/regluit.pth`
 1. `django-admin.py syncdb --migrate --settings regluit.settings.prod`
-1. `mkdir /var/www/static`
-1. `chown ubuntu:ubuntu /var/www/static`
+1. `sudo mkdir /var/www/static`
+1. `sudo chown ubuntu:ubuntu /var/www/static`
 1. `django-admin.py collectstatic --settings regluit.settings.prod`
 1. `sudo ln -s /opt/regluit/deploy/regluit.conf /etc/apache2/sites-available/regluit`
 1. `sudo a2ensite regluit`
 1. `sudo a2enmod ssl rewrite`
+1. `cd /home/ubuntu`
+1. copy SSL server key to `/etc/ssl/private/server.key`
+1. copy SSL certificate to `/etc/ssl/certs/server.crt`
 1. `sudo /etc/init.d/apache2 restart`
-1. `sudo adduser --no-create-home celery --disabled-password --disabled-login`
+1. `sudo adduser --no-create-home celery --disabled-password --disabled-login` (just enter return for all?)
 1. `sudo cp deploy/celeryd /etc/init.d/celeryd`
 1. `sudo chmod 755 /etc/init.d/celeryd`
 1. `sudo cp deploy/celeryd.conf /etc/default/celeryd`
@ -76,7 +80,7 @@ Below are the steps for getting regluit running on EC2 with Apache and mod_wsgi,
 1. `sudo chown celery:celery /var/log/celery`
 1. `sudo /etc/init.d/celeryd start`

-OS X Develper Notes
+OS X Developer Notes
 -------------------

 To run regluit on OS X you should have XCode installed
--- a/core/bookloader.py
+++ b/core/bookloader.py
@ -105,7 +105,7 @@ def add_by_isbn_from_google(isbn, work=None):
        return None

    try:
-        return add_by_googlebooks_id(results['items'][0]['id'], work=work, results=results['items'][0])
+        return add_by_googlebooks_id(results['items'][0]['id'], work=work, results=results['items'][0], isbn=isbn)
    except LookupFailure, e:
        logger.exception("failed to add edition for %s", isbn)
    except IntegrityError, e:
@ -127,10 +127,10 @@ def get_edition_by_id(type,value):
            return None


-def add_by_googlebooks_id(googlebooks_id, work=None, results=None):
+def add_by_googlebooks_id(googlebooks_id, work=None, results=None, isbn=None):
    """add a book to the UnglueIt database based on the GoogleBooks ID. The
    work parameter is optional, and if not supplied the edition will be 
-    associated with a stub work. 
+    associated with a stub work. isbn can be passed because sometimes passed data won't include it 
    
    """
    # don't ping google again if we already know about the edition
@ -151,19 +151,22 @@ def add_by_googlebooks_id(googlebooks_id, work=None, results=None):
    # don't add the edition to a work with a different language
    # https://www.pivotaltracker.com/story/show/17234433
    language = d['language']
+    if len(language)>2:
+        language= language[0:2]
    if work and work.language != language:
        logger.info("not connecting %s since it is %s instead of %s" %
                (googlebooks_id, language, work.language))
        work = None
-    isbn = None
-    for i in d.get('industryIdentifiers', []):
-        if i['type'] == 'ISBN_10' and not isbn:
-            isbn = regluit.core.isbn.convert_10_to_13(i['identifier'])
-        elif i['type'] == 'ISBN_13':
-            isbn = i['identifier']
+    # isbn = None
+    if not isbn: 
+        for i in d.get('industryIdentifiers', []):
+            if i['type'] == 'ISBN_10' and not isbn:
+                isbn = regluit.core.isbn.convert_10_to_13(i['identifier'])
+            elif i['type'] == 'ISBN_13':
+                isbn = i['identifier']

    # now check to see if there's an existing Work
-    if not work:
+    if isbn and not work:
        work = get_work_by_id(type='isbn',value=isbn)
    if not work:
        work = models.Work.objects.create(title=d['title'], language=language)
@ -171,6 +174,14 @@ def add_by_googlebooks_id(googlebooks_id, work=None, results=None):
        work.save()


+    # going off to google can take some time, so we want to make sure this edition has not
+    # been created in another thread while we were waiting
+    try:
+        return models.Identifier.objects.get(type='goog', value=googlebooks_id).edition
+    except models.Identifier.DoesNotExist:
+        pass
+    
+    
    # because this is a new google id, we have to create a new edition
    e = models.Edition(work=work)
    e.title = d.get('title')
@ -215,11 +226,15 @@ def add_related(isbn):
    """
    # make sure the seed edition is there
    logger.info("adding related editions for %s", isbn)
+    
+    new_editions = []
+
    edition = add_by_isbn(isbn)
+    if edition is None:
+        return new_editions

    # this is the work everything will hang off
    work = edition.work
-    new_editions = []
    other_editions = {}
    for other_isbn in thingisbn(isbn):
        # 979's come back as 13
--- a/core/fixtures/initial_data.json
+++ b/core/fixtures/initial_data.json
@ -23,6 +23,14 @@
      "name": "unglue.it local development"
    }
  },
+  {
+    "pk": 4, 
+    "model": "sites.site", 
+    "fields": {
+      "domain": "ry-dev.dyndns.org", 
+      "name": "ry-dev development"
+    }
+  },  
  {
    "pk": 1, 
    "model": "core.premium", 
--- a/core/librarything.py
+++ b/core/librarything.py
@ -119,7 +119,7 @@ class LibraryThing(object):
            try:
                book_data["lc_call_number"] = cols[2].xpath('.//span')[0].text
            except Exception, e:
-                logger.info("book lc call number exception: %s %s", book_data["title"], e)
+                logger.info("no lc call number for: %s %s", book_data["title"], e)
                book_data["lc_call_number"] = None
                
            # subject
@ -131,6 +131,9 @@ class LibraryThing(object):
            # isbn
            try:
                book_data["isbn"] = cols[4].xpath('.//span')[0].text
+                # check for &nbsp
+                if book_data["isbn"] == u'\xA0':
+                    book_data["isbn"] = None
            except Exception, e:
                book_data["isbn"] = None
            
@ -143,6 +146,8 @@ class LibraryThing(object):
        # we can vary viewstyle to get different info
        
        IMPLEMENTED_STYLES = [1,5]
+        COLLECTION = 2 # set to get All Collections
+        
        if view_style not in IMPLEMENTED_STYLES:
            raise NotImplementedError()
        style_parser = getattr(self,"viewstyle_%s" % view_style)
@ -151,8 +156,8 @@ class LibraryThing(object):
        cookies = None
        
        while next_page:
-            url = "http://www.librarything.com/catalog_bottom.php?view=%s&viewstyle=%d&offset=%d" % (self.username,
-                                        view_style, offset)
+            url = "http://www.librarything.com/catalog_bottom.php?view=%s&viewstyle=%d&collection=%d&offset=%d" % (self.username,
+                                        view_style, COLLECTION, offset)
            logger.info("url: %s", url)
            if cookies is None:
                r = requests.get(url)
@ -163,6 +168,7 @@ class LibraryThing(object):
                raise LibraryThingException("Error accessing %s: %s" % (url, e))
                logger.info("Error accessing %s: %s", url, e)
            etree = html.fromstring(r.content)
+            #logger.info("r.content %s", r.content)
            cookies = r.cookies  # retain the cookies
            
            # look for a page bar
@ -197,7 +203,7 @@ class LibraryThing(object):

 def load_librarything_into_wishlist(user, lt_username, max_books=None):
    """
-    Load a specified Goodreads shelf (by default:  all the books from the Goodreads account associated with user)
+    Load a specified LibraryThing shelf (by default:  all the books from the LibraryThing account associated with user)
    """
   
    from regluit.core import bookloader
@ -212,6 +218,8 @@ def load_librarything_into_wishlist(user, lt_username, max_books=None):
        isbn = book["isbn"]  # grab the first one
        logger.info("%d %s %s", i, book["title"]["title"], isbn)
        try:
+            if not isbn:
+                continue
            edition = bookloader.add_by_isbn(isbn)
            if not edition:
                continue
--- a/core/management/commands/load_wishlist.py
+++ b/core/management/commands/load_wishlist.py
@ -13,8 +13,8 @@ class Command(BaseCommand):
        for isbn in open(filename):
            isbn = isbn.strip()
            edition = bookloader.add_by_isbn(isbn)
-            bookloader.add_related(isbn)
            if edition:
+                bookloader.add_related(isbn)
                user.wishlist.add_work(edition.work, source="user")
                print "loaded %s as %s for %s" % (isbn, edition, user)
            else:
--- a/core/management/commands/random_campaigns.py~
+++ b/core/management/commands/random_campaigns.py~
@ -1,38 +0,0 @@
-from decimal import Decimal
-from random import randint, randrange
-from datetime import datetime, timedelta
-
-from django.core.management.base import BaseCommand
-
-from regluit.core.models import Work, Campaign
-
-class Command(BaseCommand):
-    help = "creates random campaigns for any works that lack one for testing"
-
-    def handle(self, *args, **options):
-        for work in Work.objects.all():
-            if work.campaigns.all().count() > 0:
-                continue
-            campaign = Campaign()
-            campaign.name = work.title
-            campaign.work = work
-            campaign.description = "Test Campaign"
-
-            # random campaign target between $200 and $10,000
-            campaign.target = float(randint(200,10000))
-
-            # random deadline between 5 days from now and 180 days from now
-            now = datetime.now()
-            campaign.deadline = random_date(now + timedelta(days=5),
-                                            now + timedelta(days=180))
-
-            campaign.save()
-            print "created %s" % campaign
-
-
-def random_date(start, end):
-    delta = end - start
-    int_delta = (delta.days * 24 * 60 * 60) + delta.seconds
-    random_second = randrange(int_delta)
-    return (start + timedelta(seconds=random_second))
-
--- a/core/management/commands/remove_orphan_works.py
+++ b/core/management/commands/remove_orphan_works.py
@ -0,0 +1,18 @@
+# no, not that kind of orphan works. removes works with no connected identifiers.
+
+from django.core.management.base import BaseCommand
+
+from regluit.core import models
+
+class Command(BaseCommand):
+    help = "removes works with no connected identifiers"
+
+    def handle(self,  **options):
+        numworks=0
+        deleted=0
+        for work in models.Work.objects.all():
+            if work.identifiers.count()==0:
+                work.delete()
+                deleted=deleted+1
+            numworks=numworks+1
+        print "%s deleted from %s total" % (deleted, numworks)
--- a/core/tests.py
+++ b/core/tests.py
@ -40,6 +40,9 @@ class BookLoaderTests(TestCase):
        # work
        self.assertTrue(edition.work)
        
+        # locale in language
+        edition = bookloader.add_by_isbn('9787500676911')
+        self.assertEqual(edition.work.language, 'zh')

    def test_double_add(self):
        bookloader.add_by_isbn('0441012035')
--- a/deploy/celeryd_please.conf
+++ b/deploy/celeryd_please.conf
@ -0,0 +1,11 @@
+CELERYD_NODES="w1"
+CELERYD_CHDIR="/opt/regluit/"
+CELERYD_LOG_FILE="/var/log/celery/%n.log"
+CELERYD_PID_FILE="/var/log/celery/%n.pid"
+CELERYD_USER="celery"
+CELERYD_GROUP="celery"
+CELERYD="/opt/regluit/ENV/bin/django-admin.py celeryd"
+CELERYD_MULTI="/opt/regluit/ENV/bin/django-admin.py celeryd_multi"
+
+VIRTUALENV_ACTIVATE="/opt/regluit/ENV/bin/activate"
+export DJANGO_SETTINGS_MODULE="regluit.settings.please"
--- a/deploy/please.conf
+++ b/deploy/please.conf
@ -12,7 +12,7 @@ RewriteRule /admin(.*) https://please.unglueit.com/admin$1 [R=301]
 RewriteRule /accounts(.*) https://please.unglueit.com/accounts$1 [R=301]

 WSGIDaemonProcess regluit processes=4 threads=4 python-eggs=/tmp/regluit-python-eggs
-WSGIScriptAlias / /opt/regluit/deploy/regluit.wsgi
+WSGIScriptAlias / /opt/regluit/deploy/please.wsgi

 <Directory /opt/regluit/static>
  Options Indexes FollowSymLinks
@ -31,10 +31,10 @@ Alias /static /var/www/static
 SSLEngine on
 SSLCertificateFile    /etc/ssl/certs/server.crt
 SSLCertificateKeyFile /etc/ssl/private/server.key
-SSLCertificateChainFile /etc/ssl/certs/gd_bundle.crt
+#SSLCertificateChainFile /etc/ssl/certs/gd_bundle.crt

 WSGIDaemonProcess regluit-ssl processes=4 threads=4 python-eggs=/tmp/regluit-python-eggs
-WSGIScriptAlias / /opt/regluit/deploy/regluit.wsgi
+WSGIScriptAlias / /opt/regluit/deploy/please.wsgi

 <Directory /opt/regluit/static>
  Options Indexes FollowSymLinks
--- a/deploy/please.wsgi
+++ b/deploy/please.wsgi
@ -0,0 +1,9 @@
+#!/usr/bin/env python
+
+import os
+
+import django.core.handlers.wsgi
+
+os.environ['CELERY_LOADER'] = 'django'
+os.environ['DJANGO_SETTINGS_MODULE'] = 'regluit.settings.please'
+application = django.core.handlers.wsgi.WSGIHandler()
--- a/deploy/update-regluit
+++ b/deploy/update-regluit
@ -9,9 +9,9 @@
 cd /opt/regluit
 sudo -u ubuntu /usr/bin/git pull
 source ENV/bin/activate
-pip install -r requirements.pip
-django-admin.py syncdb --migrate --settings regluit.settings.prod
-django-admin.py collectstatic --noinput --settings regluit.settings.prod
+#pip install -r requirements.pip
+django-admin.py syncdb --migrate --settings regluit.settings.please
+django-admin.py collectstatic --noinput --settings regluit.settings.please
 sudo /etc/init.d/apache2 restart
 sudo /etc/init.d/celeryd restart
 touch /opt/regluit/deploy/last-update
--- a/settings/dev.py
+++ b/settings/dev.py
@ -6,6 +6,9 @@ TEMPLATE_DEBUG = DEBUG
 # if you're doing development work, you'll want this to be zero
 IS_PREVIEW = False

+# SITE_ID for your particular site -- must be configured in /core/fixtures/initial_data.json
+SITE_ID = 3
+
 ADMINS = (
    ('Ed Summers', 'ehs@pobox.com'),
 )
--- a/settings/please.py
+++ b/settings/please.py
@ -18,9 +18,9 @@ DATABASES = {
    'default': {
        'ENGINE': 'django.db.backends.mysql',
        'NAME': 'please',
-        'USER': 'please',
+        'USER': 'please2',
        'PASSWORD': 'unglueit',
-        'HOST': 'gluejardb.cboagmr25pjs.us-east-1.rds.amazonaws.com',
+        'HOST': 'pleasedb.cboagmr25pjs.us-east-1.rds.amazonaws.com',
        'PORT': '',
    }
 }
--- a/test/booktests.py
+++ b/test/booktests.py
@ -0,0 +1,28 @@
+from regluit.core import librarything, bookloader
+import itertools
+import django
+
+def ry_lt_books():
+    """return parsing of rdhyee's LibraryThing collection"""
+    lt = librarything.LibraryThing('rdhyee')
+    books = lt.parse_user_catalog(view_style=5)
+    return books
+
+def editions_for_lt(books):
+    """return the Editions that correspond to the list of LibraryThing books"""
+    editions = [bookloader.add_by_isbn(b["isbn"]) for b in books]
+    return editions
+
+def ry_lt_not_loaded():
+    """Calculate which of the books on rdhyee's librarything list don't yield Editions"""
+    books = list(ry_lt_books())
+    editions = editions_for_lt(books)
+    not_loaded_books = [b for (b, ed) in itertools.izip(books, editions) if ed is None]
+    return not_loaded_books
+
+def ry_wish_list_equal_loadable_lt_books():
+    """returnwhether the set of works in the user's wishlist is the same as the works in a user's loadable editions from LT"""
+    editions = editions_for_lt(ry_lt_books())
+    # assume only one user -- and that we have run a LT book loading process for that user
+    ry = django.contrib.auth.models.User.objects.all()[0]
+    return set([ed.work for ed in filter(None, editions)]) == set(ry.wishlist.works.all())