diff --git a/README.md b/README.md index 7b70504c..3aef8c27 100644 --- a/README.md +++ b/README.md @@ -16,11 +16,11 @@ Here are some instructions for setting up regluit for development on an Ubuntu system. If you are on OS X see notes below to install python-setuptools in step 1: -1. `aptitude install python-setuptools git` +1. `aptitude install python-setuptools git python-lxml` 1. `sudo easy_install virtualenv virtualenvwrapper` 1. `git clone git@github.com:Gluejar/regluit.git` 1. `cd regluit` -1. `mkvirtualenv --no-site-packages regluit` +1. `mkvirtualenv regluit` 1. `pip install -r requirements.pip` 1. `add2virtualenv ..` 1. `cp settings/dev.py settings/me.py` @@ -41,23 +41,22 @@ Below are the steps for getting regluit running on EC2 with Apache and mod_wsgi, 1. create an ubuntu natty ec2 instance using ami-1aad5273 1. `sudo aptitude update` 1. `sudo aptitude upgrade` -1. `sudo aptitude install git apache libapache2-mod-wsgi mysql-client python-virtualenv python-mysqldb redis-server` +1. `sudo aptitude install git apache libapache2-mod-wsgi mysql-client python-virtualenv python-mysqldb redis-server python-lxml` 1. `sudo mkdir /opt/regluit` 1. `sudo chown ubuntu:ubuntu /opt/regluit` 1. `cd /opt` 1. `git config --global user.name "Ed Summers"` 1. `git config --global user.email "ehs@pobox.com"` 1. `ssh-keygen` -1. add `~/.ssh/id_rsa.pub` as a deploy key on github +1. add `~/.ssh/id_rsa.pub` as a deploy key on github https://github.com/Gluejar/regluit/admin/keys 1. `git clone git@github.com:Gluejar/regluit.git` 1. `cd /opt/regluit` -1. `cp settings/dev.py settings/prod.py` 1. create an Amazon RDS instance 1. connect to it, e.g. `mysql -u root -h gluejardb.cboagmr25pjs.us-east-1.rds.amazonaws.com -p` 1. `CREATE DATABASE unglueit CHARSET utf8;` 1. `GRANT ALL ON unglueit.* TO ‘unglueit’@’ip-10-244-250-168.ec2.internal’ IDENTIFIED BY 'unglueit' REQUIRE SSL` 1. update settings/prod.py with database credentials -1. `virtualenv --no-site-packages ENV` +1. `virtualenv ENV` 1. `source ENV/bin/activate` 1. `pip install -r requirements.pip` 1. `echo "/opt/" > ENV/lib/python2.7/site-packages/regluit.pth` @@ -66,9 +65,9 @@ Below are the steps for getting regluit running on EC2 with Apache and mod_wsgi, 1. `sudo a2ensite regluit` 1. `sudo /etc/init.d/apache2 restart` 1. `sudo adduser --no-create-home celery --disabled-password --disabled-login` -1. `sudo cp celeryd /etc/init.d/celeryd` +1. `sudo cp deploy/celeryd /etc/init.d/celeryd` 1. `sudo chmod 755 /etc/init.d/celeryd` -1. `sudo cp celeryd.conf /etc/default/celeryd` +1. `sudo cp deploy/celeryd.conf /etc/default/celeryd` 1. `sudo mkdir /var/log/celery` 1. `sudo chown celery:celery /var/log/celery` 1. `sudo mkdir /var/run/celery` diff --git a/core/librarything.py b/core/librarything.py index 5de52443..5f0a21a3 100644 --- a/core/librarything.py +++ b/core/librarything.py @@ -62,6 +62,13 @@ class LibraryThing(object): # title book_data["title"] = {"href":cols[1].xpath('.//a')[0].attrib['href'], "title":cols[1].xpath('.//a')[0].text} + + # extract work_id and book_id from href + try: + (book_data["work_id"], book_data["book_id"]) = re.match("^/work/(.*)/book/(.*)$",book_data["title"]["href"]).groups() + except: + (book_data["work_id"], book_data["book_id"]) = (None, None) + # author -- what if there is more than 1? or none? try: book_data["author"] = {"display_name":cols[2].xpath('.//a')[0].text, @@ -86,7 +93,46 @@ class LibraryThing(object): yield book_data def viewstyle_5(self, rows): - raise NotImplementedError() + # implement this view to get at the ISBNs + for (i,row) in enumerate(rows): + book_data = {} + cols = row.xpath('td') + + # title + book_data["title"] = {"href":cols[0].xpath('.//a')[0].attrib['href'], + "title":cols[0].xpath('.//a')[0].text} + + # extract work_id and book_id from href + try: + (book_data["work_id"], book_data["book_id"]) = re.match("^/work/(.*)/book/(.*)$",book_data["title"]["href"]).groups() + except: + (book_data["work_id"], book_data["book_id"]) = (None, None) + + # tags + tag_links = cols[1].xpath('.//a') + book_data["tags"] = filter(lambda x: x is not None, [a.text for a in tag_links]) + + # lc classification + try: + book_data["lc_call_number"] = cols[2].xpath('.//span')[0].text + except Exception, e: + logger.info("book lc call number exception: %s %s", book_data["title"], e) + book_data["lc_call_number"] = None + + # subject + + subjects = cols[3].xpath('.//div[@class="subjectLine"]') + book_data["subjects"] = [{'href':s.xpath('a')[0].attrib['href'], + 'text':s.xpath('a')[0].text} for s in subjects] + + # isbn + try: + book_data["isbn"] = cols[4].xpath('.//span')[0].text + except Exception, e: + book_data["isbn"] = None + + yield book_data + def parse_user_catalog(self, view_style=1): from lxml import html diff --git a/core/management/commands/librarything_load_books_2.py b/core/management/commands/librarything_load_books_2.py index 9c6b3fbf..d7c02672 100644 --- a/core/management/commands/librarything_load_books_2.py +++ b/core/management/commands/librarything_load_books_2.py @@ -11,6 +11,6 @@ class Command(BaseCommand): def handle(self, lt_username, **options): lt = librarything.LibraryThing(username=lt_username) - for (i, book) in enumerate(lt.parse_user_catalog()): - print i, book["title"] + for (i, book) in enumerate(lt.parse_user_catalog(view_style=5)): + print i, book["title"], book["isbn"], book["work_id"], book["book_id"] \ No newline at end of file diff --git a/core/models.py b/core/models.py index 2db43f82..4c133ef4 100755 --- a/core/models.py +++ b/core/models.py @@ -17,7 +17,7 @@ class CeleryTask(models.Model): user = models.ForeignKey(User, related_name="tasks", null=True) description = models.CharField(max_length=2048, null=True) # a description of what the task is function_name = models.CharField(max_length=1024) # used to reconstitute the AsyncTask with which to get status - function_args = models.IntegerField() # not full generalized here -- takes only a single arg for now. + function_args = models.IntegerField(null=True) # not full generalized here -- takes only a single arg for now. active = models.NullBooleanField(default=True) def __unicode__(self): @@ -43,13 +43,15 @@ class CeleryTask(models.Model): class Claim(models.Model): rights_holder = models.ForeignKey("RightsHolder", related_name="claim", null=False ) work = models.ForeignKey("Work", related_name="claim", null=False ) - user = models.ForeignKey(User, related_name="user", null=False ) + user = models.ForeignKey(User, related_name="claim", null=False ) created = models.DateTimeField(auto_now_add=True) class RightsHolder(models.Model): email = models.CharField(max_length=100, blank=True) rights_holder_name = models.CharField(max_length=100, blank=True) owner = models.ForeignKey(User, related_name="rights_holder", null=False ) + def __unicode__(self): + return self.rights_holder_name class Premium(models.Model): PREMIUM_TYPES = ((u'00', u'Default'),(u'CU', u'Custom')) diff --git a/frontend/forms.py b/frontend/forms.py index 652a8a5d..03dc94fc 100644 --- a/frontend/forms.py +++ b/frontend/forms.py @@ -1,9 +1,15 @@ from django import forms from django.db import models -from regluit.core.models import UserProfile, RightsHolder from django.contrib.auth.models import User from django.utils.translation import ugettext_lazy as _ from decimal import Decimal as D +from regluit.core.models import UserProfile, RightsHolder, Claim + +class ClaimForm(forms.ModelForm): + i_agree=forms.BooleanField() + class Meta: + model = Claim + widgets = { 'user': forms.HiddenInput, 'work': forms.HiddenInput } class RightsHolderForm(forms.ModelForm): class Meta: diff --git a/frontend/templates/claim.html b/frontend/templates/claim.html new file mode 100644 index 00000000..10be3679 --- /dev/null +++ b/frontend/templates/claim.html @@ -0,0 +1,20 @@ +{% extends "basedocumentation.html" %} + +{% block doccontent %} + +
Author: {{claim.work.author }}
+On Behalf of: {{ claim.rights_holder.rights_holder_name }}
+PSA #: {{ claim.rights_holder.id }}
+ + {% endfor %} +{% endif %}This work has been claimed by:
+