From 2f532b97f9ecc399048cddf876c7046e180b5c4e Mon Sep 17 00:00:00 2001 From: eric Date: Mon, 9 Jul 2018 15:46:36 -0400 Subject: [PATCH 01/13] scrape multiple books from one url --- core/loaders/doab_utils.py | 3 + core/loaders/multiscrape.py | 94 ++++++++++++++++++++++++++++ core/loaders/scrape.py | 29 +++++---- core/management/commands/load_edp.py | 10 +++ 4 files changed, 123 insertions(+), 13 deletions(-) create mode 100644 core/loaders/multiscrape.py create mode 100644 core/management/commands/load_edp.py diff --git a/core/loaders/doab_utils.py b/core/loaders/doab_utils.py index ceef8bb7..4db1c42b 100644 --- a/core/loaders/doab_utils.py +++ b/core/loaders/doab_utils.py @@ -124,6 +124,9 @@ def online_to_download(url): booknum = FRONTIERSIN.search(url).group(1) urls.append(u'https://www.frontiersin.org/GetFile.aspx?ebook={}&fileformat=EPUB'.format(booknum)) urls.append(u'https://www.frontiersin.org/GetFile.aspx?ebook={}&fileformat=PDF'.format(booknum)) + elif url.find(u'edp-open.org/books-in-') >= 0: + # pages needing multi-scrape + return urls else: urls.append(url) return urls diff --git a/core/loaders/multiscrape.py b/core/loaders/multiscrape.py new file mode 100644 index 00000000..ba98ad81 --- /dev/null +++ b/core/loaders/multiscrape.py @@ -0,0 +1,94 @@ +import logging +import re +from urlparse import urljoin + +from bs4 import BeautifulSoup +import requests + +from django.conf import settings + +from regluit.core.bookloader import add_from_bookdatas +from regluit.core.loaders.scrape import BaseScraper +from regluit.core.validation import identifier_cleaner + +logger = logging.getLogger(__name__) +''' +use for web pages with multiple books +returns an iterator of scrapers +''' + +class BaseMultiScraper(BaseScraper): + def __init__(self, url, doc): + self.metadata = {} + self.identifiers = {'http': url} + self.doc = doc + self.base = url + self.get_all() + if not self.metadata.get('title', None): + self.set('title', '!!! missing title !!!') + if not self.metadata.get('language', None): + self.set('language', 'en') + self.metadata['identifiers'] = self.identifiers + +def multiscrape(url, divider, scraper_class=BaseMultiScraper): + try: + response = requests.get(url, headers={"User-Agent": settings.USER_AGENT}) + if response.status_code == 200: + doc = BeautifulSoup(response.content, 'lxml') + sections = divider(doc) + for section in sections: + yield scraper_class(url, section) + except requests.exceptions.RequestException as e: + logger.error(e) + self.metadata = None + + +# following is code specific to edp-open.org; refactor when we add another + +def divider(doc): + return doc.select('article.Bk') + +ISBNMATCH = re.compile(r'([\d\-]+)') +class EDPMultiScraper(BaseMultiScraper): + def get_isbns(self): + '''return a dict of edition keys and ISBNs''' + isbns = {} + isbn_cleaner = identifier_cleaner('isbn', quiet=True) + labels = ['epub', 'pdf', 'paper'] + info = self.doc.select_one('p.nfo').text + isbntexts = re.split('ISBN', info) + for isbntext in isbntexts[1:]: + isbnmatch = ISBNMATCH.search(isbntext) + if isbnmatch: + isbn = isbn_cleaner(isbnmatch.group(0)) + isbns[labels.pop()] = isbn + return isbns + + def get_downloads(self): + dl = self.doc.select_one('nav.dl') + links = dl.select('a.fulldl') + for link in links: + href = urljoin(self.base, link['href']) + if href.endswith('.pdf'): + self.set('download_url_pdf', href) + elif href.endswith('.epub'): + self.set('download_url_epub', href) + + def get_language(self): + self.set('language', 'fr') + + def get_title(self): + value = self.doc.select_one('h2').text + book_id = self.doc.select_one('h2')['id'] + self.identifiers['http'] = u'{}#{}'.format(self.base, book_id) + self.set('title', value) + +def edp_scrape(): + edp_urls = [ + 'https://www.edp-open.org/books-in-french', + 'https://www.edp-open.org/books-in-english', + ] + for url in edp_urls: + scrapers = multiscrape(url, divider, scraper_class=EDPMultiScraper) + add_from_bookdatas(scrapers) + diff --git a/core/loaders/scrape.py b/core/loaders/scrape.py index 04a40e70..521748fd 100644 --- a/core/loaders/scrape.py +++ b/core/loaders/scrape.py @@ -51,19 +51,7 @@ class BaseScraper(object): self.doc = BeautifulSoup(response.content, 'lxml') for review in self.doc.find_all(itemtype="http://schema.org/Review"): review.clear() - self.setup() - self.get_genre() - self.get_title() - self.get_language() - self.get_description() - self.get_identifiers() - self.get_keywords() - self.get_publisher() - self.get_pubdate() - self.get_authors() - self.get_cover() - self.get_downloads() - self.get_license() + self.get_all() if not self.metadata.get('title', None): self.set('title', '!!! missing title !!!') if not self.metadata.get('language', None): @@ -140,6 +128,21 @@ class BaseScraper(object): elif el.has_key('content'): value_list.append(el['content']) return value_list + + def get_all(self): + self.setup() + self.get_genre() + self.get_title() + self.get_language() + self.get_description() + self.get_identifiers() + self.get_keywords() + self.get_publisher() + self.get_pubdate() + self.get_authors() + self.get_cover() + self.get_downloads() + self.get_license() def setup(self): # use this method to get auxiliary resources based on doc diff --git a/core/management/commands/load_edp.py b/core/management/commands/load_edp.py new file mode 100644 index 00000000..55961052 --- /dev/null +++ b/core/management/commands/load_edp.py @@ -0,0 +1,10 @@ +from django.core.management.base import BaseCommand + +from regluit.core.loaders.multiscrape import edp_scrape + + +class Command(BaseCommand): + help = "load books from edp-open" + + def handle(self, **options): + edp_scrape() From ec3d26118e3325363c21097e4ca163acf649f177 Mon Sep 17 00:00:00 2001 From: eric Date: Tue, 10 Jul 2018 13:58:06 -0400 Subject: [PATCH 02/13] fr/en --- core/loaders/multiscrape.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/loaders/multiscrape.py b/core/loaders/multiscrape.py index ba98ad81..21e2140e 100644 --- a/core/loaders/multiscrape.py +++ b/core/loaders/multiscrape.py @@ -75,6 +75,8 @@ class EDPMultiScraper(BaseMultiScraper): self.set('download_url_epub', href) def get_language(self): + if 'english' in self.base: + self.set('language', 'en') self.set('language', 'fr') def get_title(self): From 40794ee3f9de897fa51689fe96b18b6eda1318de Mon Sep 17 00:00:00 2001 From: eric Date: Tue, 10 Jul 2018 13:58:38 -0400 Subject: [PATCH 03/13] use rights info to set rights --- core/bookloader.py | 2 +- core/loaders/doab.py | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/core/bookloader.py b/core/bookloader.py index a9fed0c2..688a0edf 100755 --- a/core/bookloader.py +++ b/core/bookloader.py @@ -986,6 +986,7 @@ class BasePandataLoader(object): def load_ebooks(self, metadata, edition, test_mode=False, user=None): default_edition = edition + license = cc.license_from_cc_url(metadata.rights_url) for key in ['epub', 'pdf', 'mobi']: url = metadata.metadata.get('download_url_{}'.format(key), None) if url: @@ -995,7 +996,6 @@ class BasePandataLoader(object): if contentfile: contentfile_name = '/loaded/ebook_{}.{}'.format(edition.id, key) path = default_storage.save(contentfile_name, contentfile) - license = cc.license_from_cc_url(metadata.rights_url) ebf = models.EbookFile.objects.create( format=key, edition=edition, diff --git a/core/loaders/doab.py b/core/loaders/doab.py index 6d364328..c45b02c6 100644 --- a/core/loaders/doab.py +++ b/core/loaders/doab.py @@ -65,7 +65,12 @@ def store_doab_cover(doab_id, redo=False): else: r = requests.get(url) cover_file = ContentFile(r.content) - cover_file.content_type = r.headers.get('content-type', '') + content_type = r.headers.get('content-type', '') + if u'text/html' in content_type: + logger.warning('Cover return html for doab_id={}: {}'.format(doab_id, e)) + return (None, False) + cover_file.content_type = content_type + default_storage.save(cover_file_name, cover_file) return (default_storage.url(cover_file_name), True) @@ -287,6 +292,12 @@ def load_doab_edition(title, doab_id, url, format, rights, publisher_name=unlist(kwargs.get('publisher')), authors=kwargs.get('creator'), ) + if rights: + for ebook in edition.ebooks.all(): + if not ebook.rights: + ebook.rights = rights + ebook.save() + return edition # From 1b4beb0b0b600a9f975c4f277093a5863bd4c8d6 Mon Sep 17 00:00:00 2001 From: eric Date: Tue, 10 Jul 2018 13:59:05 -0400 Subject: [PATCH 04/13] fix thumbnail fails --- core/models/bibmodels.py | 48 +++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/core/models/bibmodels.py b/core/models/bibmodels.py index 4347c6f5..d804eba7 100644 --- a/core/models/bibmodels.py +++ b/core/models/bibmodels.py @@ -866,39 +866,47 @@ class Edition(models.Model): def cover_image_large(self): #550 pixel high image if self.cover_image: - im = get_thumbnail(self.cover_image, 'x550', crop='noop', quality=95) - if im.exists(): - return im.url + try: + im = get_thumbnail(self.cover_image, 'x550', crop='noop', quality=95) + if im.exists(): + return im.url + except IOError: + pass elif self.googlebooks_id: url = "https://encrypted.google.com/books?id=%s&printsec=frontcover&img=1&zoom=0" % self.googlebooks_id - im = get_thumbnail(url, 'x550', crop='noop', quality=95) - if not im.exists() or im.storage.size(im.name) == 16392: # check for "image not available" image - url = "https://encrypted.google.com/books?id=%s&printsec=frontcover&img=1&zoom=1" % self.googlebooks_id + try: im = get_thumbnail(url, 'x550', crop='noop', quality=95) - if im.exists(): - return im.url - else: - return '' - else: - return '' + if not im.exists() or im.storage.size(im.name) == 16392: # check for "image not available" image + url = "https://encrypted.google.com/books?id=%s&printsec=frontcover&img=1&zoom=1" % self.googlebooks_id + im = get_thumbnail(url, 'x550', crop='noop', quality=95) + if im.exists(): + return im.url + except IOError: + pass + return '' def cover_image_small(self): #80 pixel high image if self.cover_image: - im = get_thumbnail(self.cover_image, 'x80', crop='noop', quality=95) - if im.exists(): - return im.url + try: + im = get_thumbnail(self.cover_image, 'x80', crop='noop', quality=95) + if im.exists(): + return im.url + except IOError: + pass if self.googlebooks_id: return "https://encrypted.google.com/books?id=%s&printsec=frontcover&img=1&zoom=5" % self.googlebooks_id - else: - return '' + return '' def cover_image_thumbnail(self): #128 pixel wide image if self.cover_image: - im = get_thumbnail(self.cover_image, '128', crop='noop', quality=95) - if im.exists(): - return im.url + try: + im = get_thumbnail(self.cover_image, '128', crop='noop', quality=95) + if im.exists(): + return im.url + except IOError: + pass if self.googlebooks_id: return "https://encrypted.google.com/books?id=%s&printsec=frontcover&img=1&zoom=1" % self.googlebooks_id else: From da601a77f6379ccb920150941ec101dbe19ddb9e Mon Sep 17 00:00:00 2001 From: eric Date: Wed, 11 Jul 2018 13:41:52 -0400 Subject: [PATCH 05/13] final fixes --- core/loaders/multiscrape.py | 5 ++--- requirements_versioned.pip | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/core/loaders/multiscrape.py b/core/loaders/multiscrape.py index 21e2140e..1a42a24b 100644 --- a/core/loaders/multiscrape.py +++ b/core/loaders/multiscrape.py @@ -26,8 +26,6 @@ class BaseMultiScraper(BaseScraper): self.get_all() if not self.metadata.get('title', None): self.set('title', '!!! missing title !!!') - if not self.metadata.get('language', None): - self.set('language', 'en') self.metadata['identifiers'] = self.identifiers def multiscrape(url, divider, scraper_class=BaseMultiScraper): @@ -77,7 +75,8 @@ class EDPMultiScraper(BaseMultiScraper): def get_language(self): if 'english' in self.base: self.set('language', 'en') - self.set('language', 'fr') + else: + self.set('language', 'fr') def get_title(self): value = self.doc.select_one('h2').text diff --git a/requirements_versioned.pip b/requirements_versioned.pip index f1e0f02d..38481e0e 100644 --- a/requirements_versioned.pip +++ b/requirements_versioned.pip @@ -41,7 +41,7 @@ django-tastypie==0.13.3 git+git://github.com/resulto/django-transmeta.git@ad4d7278ba330dcf8c8446f8ae9b2c769ae8684e fef-questionnaire==4.0.1 #gitenberg.metadata==0.1.6 -git+https://github.com/gitenberg-dev/gitberg-build +git+git://github.com/gitenberg-dev/gitberg-build.git@61a5fb0011e1a547b1eac14dd845ce37dbb5f85a #git+ssh://git@github.com/gitenberg-dev/metadata.git@0.1.11 github3.py==0.9.5 html5lib==1.0.1 From ee03d2d434440096f4adae9d42444be135df3650 Mon Sep 17 00:00:00 2001 From: eric Date: Thu, 12 Jul 2018 12:56:09 -0400 Subject: [PATCH 06/13] add hosts --- bookdata/sitemaps.txt | 10 +++++++++- core/loaders/ubiquity.py | 4 +++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/bookdata/sitemaps.txt b/bookdata/sitemaps.txt index b91f2814..8f31f2a0 100644 --- a/bookdata/sitemaps.txt +++ b/bookdata/sitemaps.txt @@ -6,4 +6,12 @@ https://oa.psupress.org/sitemap.xml https://www.larcommons.net/sitemap.xml https://www.uwestminsterpress.co.uk/sitemap.xml https://www.stockholmuniversitypress.se/sitemap.xml -https://www.luminosoa.org/sitemap.xml \ No newline at end of file +https://www.luminosoa.org/sitemap.xml +https://iitikship.iiti.ac.in/sitemap.xml +https://aperio.press/sitemap.xml +https://press.lse.ac.uk/sitemap.xml +https://press.sjms.nu/sitemap.xml +https://trystingtree.library.oregonstate.edu/sitemap.xml +https://publishing.vt.edu/sitemap.xml +https://universitypress.whiterose.ac.uk/sitemap.xml +https://www.winchesteruniversitypress.org/sitemap.xml \ No newline at end of file diff --git a/core/loaders/ubiquity.py b/core/loaders/ubiquity.py index c346cec4..05334b88 100644 --- a/core/loaders/ubiquity.py +++ b/core/loaders/ubiquity.py @@ -8,7 +8,9 @@ from . import BaseScraper HAS_EDS = re.compile(r'\(eds?\.\)') UBIQUITY_HOSTS = ["ubiquitypress.com", "kriterium.se", "oa.finlit.fi", "humanities-map.net", "oa.psupress.org", "larcommons.net", "uwestminsterpress.co.uk", "stockholmuniversitypress.se", - "luminosoa.org", + "luminosoa.org", "iitikship.iiti.ac.in", "aperio.press", "press.lse.ac.uk", "press.sjms.nu", + "trystingtree.library.oregonstate.edu", "publishing.vt.edu", "universitypress.whiterose.ac.uk", + "www.winchesteruniversitypress.org", ] class UbiquityScraper(BaseScraper): From 311d6fa0be5dd29c6a4652c311528dc078ba45ca Mon Sep 17 00:00:00 2001 From: eric Date: Thu, 12 Jul 2018 12:56:57 -0400 Subject: [PATCH 07/13] fix rare merge issue --- core/bookloader.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/core/bookloader.py b/core/bookloader.py index 688a0edf..ea88aafb 100755 --- a/core/bookloader.py +++ b/core/bookloader.py @@ -909,10 +909,9 @@ class BasePandataLoader(object): if work and id.work and id.work_id is not work.id: # dangerous! merge newer into older if work.id < id.work_id: - merge_works(work, id.work) + work = merge_works(work, id.work) else: - merge_works(id.work, work) - work = id.work + work = merge_works(id.work, work) else: work = id.work if id.edition and not edition: From 9f98ddddfe4eb02e50ad69227d3ac9597e393fbb Mon Sep 17 00:00:00 2001 From: eric Date: Thu, 19 Jul 2018 17:38:36 -0400 Subject: [PATCH 08/13] update to factored social_auth addresses facebook's limits on passing state in redirect; (also addresses dj111 compatibility) updated httplib2 fixes goodreads issue --- libraryauth/auth.py | 10 +++++----- libraryauth/urls.py | 2 +- requirements_versioned.pip | 7 ++++--- settings/common.py | 30 +++++++++++++++--------------- settings/dummy/host.py | 1 + 5 files changed, 26 insertions(+), 24 deletions(-) diff --git a/libraryauth/auth.py b/libraryauth/auth.py index 3876fd49..4c326601 100644 --- a/libraryauth/auth.py +++ b/libraryauth/auth.py @@ -4,11 +4,11 @@ from django.http import HttpResponse from django.shortcuts import redirect from django.utils.http import urlquote -from social.pipeline.social_auth import associate_by_email -from social.apps.django_app.default.models import UserSocialAuth -from social.apps.django_app.middleware import SocialAuthExceptionMiddleware -from social.exceptions import (AuthAlreadyAssociated,SocialAuthBaseException) -from social.utils import social_logger +from social_core.pipeline.social_auth import associate_by_email +from social_django.models import UserSocialAuth +from social_django.middleware import SocialAuthExceptionMiddleware +from social_core.exceptions import (AuthAlreadyAssociated, SocialAuthBaseException) +from social_core.utils import social_logger ANONYMOUS_AVATAR = '/static/images/header/avatar.png' (NO_AVATAR, GRAVATAR, TWITTER, FACEBOOK, PRIVATETAR) = (0, 1, 2, 3, 4) diff --git a/libraryauth/urls.py b/libraryauth/urls.py index a14b3cd3..6b1e889c 100644 --- a/libraryauth/urls.py +++ b/libraryauth/urls.py @@ -65,7 +65,7 @@ urlpatterns = [ 'password_reset_form': forms.SocialAwarePasswordResetForm}, name='libraryauth_password_reset'), - url(r'^socialauth/', include('social.apps.django_app.urls', namespace='social')), + url(r'^socialauth/', include('social_django.urls', namespace='social')), url('accounts/', include('email_change.urls')), url(r'^accounts/', include('registration.backends.model_activation.urls')), url(r'^accounts/', include('django.contrib.auth.urls')), diff --git a/requirements_versioned.pip b/requirements_versioned.pip index 38481e0e..e0916ca3 100644 --- a/requirements_versioned.pip +++ b/requirements_versioned.pip @@ -45,7 +45,7 @@ git+git://github.com/gitenberg-dev/gitberg-build.git@61a5fb0011e1a547b1eac14dd84 #git+ssh://git@github.com/gitenberg-dev/metadata.git@0.1.11 github3.py==0.9.5 html5lib==1.0.1 -httplib2==0.7.5 +httplib2==0.11.3 isodate==0.5.1 kombu==3.0.35 lxml==4.2.1 @@ -54,7 +54,7 @@ mechanize==0.2.5 mimeparse==0.1.3 nose==1.1.2 numpy==1.11.2 -oauth2==1.5.211 +oauth2==1.9.0.post1 oauthlib==1.1.2 pandas==0.19.1 paramiko==1.14.1 @@ -66,7 +66,8 @@ pyparsing==2.0.3 python-dateutil==2.5.3 python-mimeparse==0.1.4 python-openid==2.2.5 -python-social-auth==0.2.21 +social-auth-core==1.7.0 +social-auth-app-django==2.1.0 pytz==2016.6.1 rdflib==4.2.0 rdflib-jsonld==0.3 diff --git a/settings/common.py b/settings/common.py index e9d2016b..8dc750be 100644 --- a/settings/common.py +++ b/settings/common.py @@ -157,7 +157,7 @@ INSTALLED_APPS = ( 'regluit.payment', 'regluit.utils', 'registration', - 'social.apps.django_app.default', + 'social_django', 'tastypie', 'djcelery', 'el_pagination', @@ -243,11 +243,11 @@ SESSION_COOKIE_AGE = 3628800 # 6 weeks # django-socialauth AUTHENTICATION_BACKENDS = ( - 'social.backends.google.GoogleOAuth2', - 'social.backends.twitter.TwitterOAuth', - 'social.backends.yahoo.YahooOpenId', - 'social.backends.facebook.FacebookOAuth2', - 'social.backends.open_id.OpenIdAuth', + 'social_core.backends.google.GoogleOAuth2', + 'social_core.backends.twitter.TwitterOAuth', + 'social_core.backends.yahoo.YahooOpenId', + 'social_core.backends.facebook.FacebookOAuth2', + 'social_core.backends.open_id.OpenIdAuth', 'django.contrib.auth.backends.ModelBackend', ) @@ -265,50 +265,50 @@ SOCIAL_AUTH_PIPELINE = ( # format to create the user instance later. On some cases the details are # already part of the auth response from the provider, but sometimes this # could hit a provider API. - 'social.pipeline.social_auth.social_details', + 'social_core.pipeline.social_auth.social_details', # Get the social uid from whichever service we're authing thru. The uid is # the unique identifier of the given user in the provider. - 'social.pipeline.social_auth.social_uid', + 'social_core.pipeline.social_auth.social_uid', # Verifies that the current auth process is valid within the current # project, this is were emails and domains whitelists are applied (if # defined). - 'social.pipeline.social_auth.auth_allowed', + 'social_core.pipeline.social_auth.auth_allowed', # Checks if the current social-account is already associated in the site. 'regluit.libraryauth.auth.selective_social_user', # Make up a username for this person, appends a random string at the end if # there's any collision. - 'social.pipeline.user.get_username', + 'social_core.pipeline.user.get_username', # make username < 222 in length 'regluit.libraryauth.auth.chop_username', # Send a validation email to the user to verify its email address. # Disabled by default. - # 'social.pipeline.mail.mail_validation', + # 'social_core.pipeline.mail.mail_validation', # Associates the current social details with another user account with # a similar email address. don't use twitter or facebook to log in 'regluit.libraryauth.auth.selectively_associate_by_email', # Create a user account if we haven't found one yet. - 'social.pipeline.user.create_user', + 'social_core.pipeline.user.create_user', # Create the record that associated the social account with this user. - 'social.pipeline.social_auth.associate_user', + 'social_core.pipeline.social_auth.associate_user', # Populate the extra_data field in the social record with the values # specified by settings (and the default ones like access_token, etc). - 'social.pipeline.social_auth.load_extra_data', + 'social_core.pipeline.social_auth.load_extra_data', # add extra data to user profile 'regluit.libraryauth.auth.deliver_extra_data', # Update the user record with any changed info from the auth service. - 'social.pipeline.user.user_details' + 'social_core.pipeline.user.user_details' ) SOCIAL_AUTH_TWITTER_EXTRA_DATA = [('profile_image_url_https', 'profile_image_url_https'),('screen_name','screen_name')] diff --git a/settings/dummy/host.py b/settings/dummy/host.py index 9819dc7f..39d19c6f 100644 --- a/settings/dummy/host.py +++ b/settings/dummy/host.py @@ -22,6 +22,7 @@ EMAIL_HOST_PASSWORD = os.environ.get("EMAIL_HOST_PASSWORD", '012345678901234567 # twitter auth # you'll need to create a new Twitter application to fill in these blanks # https://dev.twitter.com/apps/new +# the field for redirect url must be filled in with https://unglue.it/socialauth/complete/twitter/? SOCIAL_AUTH_TWITTER_KEY = os.environ.get("SOCIAL_AUTH_TWITTER_KEY", '0123456789012345678901234') SOCIAL_AUTH_TWITTER_SECRET = os.environ.get("SOCIAL_AUTH_TWITTER_SECRET", '01234567890123456789012345678901234567890123456789') From f453555bf32c5c4ca8b2745b8040d65f53742e2e Mon Sep 17 00:00:00 2001 From: eric Date: Fri, 20 Jul 2018 07:54:14 -0400 Subject: [PATCH 09/13] redirects should send user back to supporter page --- frontend/templates/supporter.html | 8 ++++---- frontend/views/__init__.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/frontend/templates/supporter.html b/frontend/templates/supporter.html index 8c06deb5..a533e3ca 100644 --- a/frontend/templates/supporter.html +++ b/frontend/templates/supporter.html @@ -186,16 +186,16 @@ function highlightTarget(targetdiv) {
{% if supporter.profile.twitter_id %} - Update your Twitter connection
or disconnect Twitter: {{ profile_form.clear_twitter }} + Update your Twitter connection
or disconnect Twitter: {{ profile_form.clear_twitter }} {% else %} - Connect your Twitter account to Unglue.it + Connect your Twitter account to Unglue.it {% endif %}
{% if supporter.profile.facebook_id %} - Update your Facebook connection
or disconnect Facebook: {{ profile_form.clear_facebook }} + Update your Facebook connection
or disconnect Facebook: {{ profile_form.clear_facebook }} {% else %} - Connect your Facebook account to Unglue.it + Connect your Facebook account to Unglue.it {% endif %}
diff --git a/frontend/views/__init__.py b/frontend/views/__init__.py index 84e764bc..8f583d99 100755 --- a/frontend/views/__init__.py +++ b/frontend/views/__init__.py @@ -2236,7 +2236,7 @@ def goodreads_cb(request): profile.save() # is this needed? # redirect to the Goodreads display page -- should observe some next later - return HttpResponseRedirect(reverse('home')) + return HttpResponseRedirect(reverse('supporter', args=[request.user])) @require_POST @login_required From 26d65e8793b7ff05a75fdb66cd76c1ec1171db68 Mon Sep 17 00:00:00 2001 From: eric Date: Fri, 20 Jul 2018 13:03:51 -0400 Subject: [PATCH 10/13] facebook id not useful; get pic instead --- core/models/__init__.py | 7 ++----- libraryauth/auth.py | 20 +++++++++----------- settings/common.py | 4 +++- 3 files changed, 14 insertions(+), 17 deletions(-) diff --git a/core/models/__init__.py b/core/models/__init__.py index 662ddca3..1641b61a 100755 --- a/core/models/__init__.py +++ b/core/models/__init__.py @@ -1314,7 +1314,7 @@ class UserProfile(models.Model): @property def avatar_url(self): - if self.avatar_source is None or self.avatar_source is TWITTER: + if self.avatar_source is None or self.avatar_source in (TWITTER, FACEBOOK): if self.pic_url: return self.pic_url else: @@ -1323,10 +1323,7 @@ class UserProfile(models.Model): return self.unglueitar() elif self.avatar_source == GRAVATAR: return self.gravatar() - elif self.avatar_source == FACEBOOK and self.facebook_id != None: - return 'https://graph.facebook.com/v2.3/' + str(self.facebook_id) + '/picture?redirect=true' - else: - return ANONYMOUS_AVATAR + return ANONYMOUS_AVATAR @property def social_auths(self): diff --git a/libraryauth/auth.py b/libraryauth/auth.py index 4c326601..bd337f1f 100644 --- a/libraryauth/auth.py +++ b/libraryauth/auth.py @@ -8,7 +8,6 @@ from social_core.pipeline.social_auth import associate_by_email from social_django.models import UserSocialAuth from social_django.middleware import SocialAuthExceptionMiddleware from social_core.exceptions import (AuthAlreadyAssociated, SocialAuthBaseException) -from social_core.utils import social_logger ANONYMOUS_AVATAR = '/static/images/header/avatar.png' (NO_AVATAR, GRAVATAR, TWITTER, FACEBOOK, PRIVATETAR) = (0, 1, 2, 3, 4) @@ -30,19 +29,18 @@ def selectively_associate_by_email(backend, details, user=None, *args, **kwargs) return None return associate_by_email(backend, details, user=None, *args, **kwargs) -def facebook_extra_values( user, extra_data): +def facebook_extra_values(user, extra_data): try: - facebook_id = extra_data.get('id') - user.profile.facebook_id = facebook_id + user.profile.pic_url = extra_data['picture']['data']['url'] if user.profile.avatar_source is None or user.profile.avatar_source is PRIVATETAR: user.profile.avatar_source = FACEBOOK user.profile.save() return True - except Exception,e: - logger.error(e) - return False + except Exception, e: + logger.exception(e) + return -def twitter_extra_values( user, extra_data): +def twitter_extra_values(user, extra_data): try: twitter_id = extra_data.get('screen_name') profile_image_url = extra_data.get('profile_image_url_https') @@ -57,11 +55,11 @@ def twitter_extra_values( user, extra_data): logger.error(e) return False -def deliver_extra_data(backend, user, social, *args, **kwargs): +def deliver_extra_data(backend, user, social, response, *args, **kwargs): if backend.name is 'twitter': twitter_extra_values( user, social.extra_data) if backend.name is 'facebook': - facebook_extra_values( user, social.extra_data) + facebook_extra_values( user, response) # following is needed because of length limitations in a unique constrain for MySQL def chop_username(username, *args, **kwargs): @@ -98,7 +96,7 @@ class SocialAuthExceptionMiddlewareWithoutMessages(SocialAuthExceptionMiddleware backend_name = getattr(backend, 'name', 'unknown-backend') message = self.get_message(request, exception) - social_logger.error(message) + logger.warning(message) url = self.get_redirect_uri(request, exception) url += ('?' in url and '&' or '?') + \ diff --git a/settings/common.py b/settings/common.py index 8dc750be..aef82e98 100644 --- a/settings/common.py +++ b/settings/common.py @@ -254,11 +254,13 @@ AUTHENTICATION_BACKENDS = ( SOCIAL_AUTH_ENABLED_BACKENDS = ['google', 'facebook', 'twitter'] #SOCIAL_AUTH_ASSOCIATE_BY_MAIL = True SOCIAL_AUTH_NEW_USER_REDIRECT_URL = '/' -FACEBOOK_SOCIAL_AUTH_BACKEND_ERROR_URL = '/' SOCIAL_AUTH_SLUGIFY_USERNAMES = True SOCIAL_AUTH_NONCE_SERVER_URL_LENGTH = 200 SOCIAL_AUTH_ASSOCIATION_SERVER_URL_LENGTH = 135 SOCIAL_AUTH_ASSOCIATION_HANDLE_LENGTH = 125 +SOCIAL_AUTH_FACEBOOK_PROFILE_EXTRA_PARAMS = {'fields': 'picture'} +SOCIAL_AUTH_FACEBOOK_LOGIN_ERROR_URL = '/' +SOCIAL_AUTH_TWITTER_LOGIN_ERROR_URL = '/' SOCIAL_AUTH_PIPELINE = ( # Get the information we can about the user and return it in a simple From 5455c21d269abedcf65c6b1b91ad80589d458009 Mon Sep 17 00:00:00 2001 From: eric Date: Fri, 20 Jul 2018 15:10:38 -0400 Subject: [PATCH 11/13] cleanup after the facebook changes --- core/management/commands/fix_avatars.py | 22 +++++++++++++++ .../commands/fix_twitter_avatars.py | 16 ----------- core/migrations/0015_auto_20180720_1413.py | 25 +++++++++++++++++ core/models/__init__.py | 5 ++-- frontend/forms/__init__.py | 14 ++-------- frontend/templates/supporter.html | 2 ++ libraryauth/auth.py | 27 +++++++++++++++++-- 7 files changed, 78 insertions(+), 33 deletions(-) create mode 100644 core/management/commands/fix_avatars.py delete mode 100644 core/management/commands/fix_twitter_avatars.py create mode 100644 core/migrations/0015_auto_20180720_1413.py diff --git a/core/management/commands/fix_avatars.py b/core/management/commands/fix_avatars.py new file mode 100644 index 00000000..f7d69f8d --- /dev/null +++ b/core/management/commands/fix_avatars.py @@ -0,0 +1,22 @@ +import string +from django.core.management.base import BaseCommand +from regluit.core.models import FACEBOOK, UNGLUEITAR +from regluit.libraryauth.auth import pic_storage_url + +from regluit.core import models + +class Command(BaseCommand): + help = "fix avatar urls and settings" + + def handle(self, **options): + for profile in models.UserProfile.objects.exclude(pic_url=''): + print "updating user %s" % profile.user + if not profile.pic_url.startswith('https://unglueit'): + profile.pic_url = pic_storage_url(profile.user, 'twitter', profile.pic_url) + profile.save() + for profile in models.UserProfile.objects.filter(avatar_source=FACEBOOK): + print "updating user %s" % profile.user + profile.facebook_id = '' + if not profile.pic_url: + profile.avatar_source = UNGLUEITAR + profile.save() diff --git a/core/management/commands/fix_twitter_avatars.py b/core/management/commands/fix_twitter_avatars.py deleted file mode 100644 index 52051309..00000000 --- a/core/management/commands/fix_twitter_avatars.py +++ /dev/null @@ -1,16 +0,0 @@ -import string -from django.core.management.base import BaseCommand -from regluit.core.models import TWITTER - -from regluit.core import models - -class Command(BaseCommand): - help = "fix old twitter avatar urls" - - def handle(self, **options): - print "Number of users affected with : %s" % models.UserProfile.objects.filter( pic_url__contains='//si0.twimg.com').count() - - for profile in models.UserProfile.objects.filter(pic_url__contains='//si0.twimg.com'): - print "updating user %s" % profile.user - profile.pic_url = string.replace( profile.pic_url, '//si0.twimg.com','//pbs.twimg.com') - profile.save() diff --git a/core/migrations/0015_auto_20180720_1413.py b/core/migrations/0015_auto_20180720_1413.py new file mode 100644 index 00000000..e9bf39bf --- /dev/null +++ b/core/migrations/0015_auto_20180720_1413.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0014_auto_20180618_1646'), + ] + + operations = [ + migrations.AlterField( + model_name='userprofile', + name='avatar_source', + field=models.PositiveSmallIntegerField(default=4, null=True, choices=[(0, b'No Avatar, Please'), (1, b'Gravatar'), (2, b'Twitter/Facebook'), (4, b'Unglueitar')]), + ), + migrations.AlterField( + model_name='userprofile', + name='facebook_id', + field=models.CharField(default='', max_length=31, blank=True), + preserve_default=False, + ), + ] diff --git a/core/models/__init__.py b/core/models/__init__.py index 1641b61a..76de86dc 100755 --- a/core/models/__init__.py +++ b/core/models/__init__.py @@ -1163,7 +1163,7 @@ class UserProfile(models.Model): pic_url = models.URLField(blank=True) home_url = models.URLField(blank=True) twitter_id = models.CharField(max_length=15, blank=True) - facebook_id = models.BigIntegerField(null=True, blank=True) + facebook_id = models.CharField(max_length=31, blank=True) librarything_id = models.CharField(max_length=31, blank=True) badges = models.ManyToManyField('Badge', related_name='holders', blank=True) kindle_email = models.EmailField(max_length=254, blank=True) @@ -1183,8 +1183,7 @@ class UserProfile(models.Model): choices=( (NO_AVATAR, 'No Avatar, Please'), (GRAVATAR, 'Gravatar'), - (TWITTER, 'Twitter'), - (FACEBOOK, 'Facebook'), + (TWITTER, 'Twitter/Facebook'), (UNGLUEITAR, 'Unglueitar'), ) ) diff --git a/frontend/forms/__init__.py b/frontend/forms/__init__.py index 64233dea..050bb9e0 100644 --- a/frontend/forms/__init__.py +++ b/frontend/forms/__init__.py @@ -188,7 +188,7 @@ class ProfileForm(forms.ModelForm): class Meta: model = UserProfile - fields = 'tagline', 'librarything_id', 'home_url', 'clear_facebook', 'clear_twitter', 'clear_goodreads', 'avatar_source' + fields = 'tagline', 'librarything_id', 'facebook_id', 'home_url', 'clear_facebook', 'clear_twitter', 'clear_goodreads', 'avatar_source' widgets = { 'tagline': forms.Textarea(attrs={'rows': 5, 'onKeyUp': "counter(this, 140)", 'onBlur': "counter(this, 140)"}), } @@ -198,22 +198,12 @@ class ProfileForm(forms.ModelForm): super(ProfileForm, self).__init__(*args, **kwargs) choices = [] for choice in self.fields['avatar_source'].choices : - if choice[0] == FACEBOOK and not profile.facebook_id: - pass - elif choice[0] == TWITTER and not profile.twitter_id: + if choice[0] == TWITTER and not profile.pic_url: pass else: choices.append(choice) self.fields['avatar_source'].choices = choices - def clean(self): - # check that if a social net is cleared, we're not using it a avatar source - if self.cleaned_data.get("clear_facebook", False) and self.cleaned_data.get("avatar_source", None) == FACEBOOK: - self.cleaned_data["avatar_source"] == UNGLUEITAR - if self.cleaned_data.get("clear_twitter", False) and self.cleaned_data.get("avatar_source", None) == TWITTER: - self.cleaned_data["avatar_source"] == UNGLUEITAR - return self.cleaned_data - def getTransferCreditForm(maximum, data=None, *args, **kwargs ): class TransferCreditForm(forms.Form): recipient = AutoCompleteSelectField( diff --git a/frontend/templates/supporter.html b/frontend/templates/supporter.html index a533e3ca..674c8c51 100644 --- a/frontend/templates/supporter.html +++ b/frontend/templates/supporter.html @@ -197,6 +197,8 @@ function highlightTarget(targetdiv) { {% else %} Connect your Facebook account to Unglue.it {% endif %} + + {{ profile_form.facebook_id }}{{ profile_form.facebook_id.errors }}
{% if user.profile.goodreads_user_id %} diff --git a/libraryauth/auth.py b/libraryauth/auth.py index bd337f1f..9ee036c3 100644 --- a/libraryauth/auth.py +++ b/libraryauth/auth.py @@ -1,9 +1,12 @@ import logging +import requests from django.http import HttpResponse from django.shortcuts import redirect from django.utils.http import urlquote +from django.core.files.base import ContentFile +from django.core.files.storage import default_storage from social_core.pipeline.social_auth import associate_by_email from social_django.models import UserSocialAuth from social_django.middleware import SocialAuthExceptionMiddleware @@ -12,8 +15,27 @@ from social_core.exceptions import (AuthAlreadyAssociated, SocialAuthBaseExcepti ANONYMOUS_AVATAR = '/static/images/header/avatar.png' (NO_AVATAR, GRAVATAR, TWITTER, FACEBOOK, PRIVATETAR) = (0, 1, 2, 3, 4) AVATARS = (NO_AVATAR, GRAVATAR, TWITTER, FACEBOOK, PRIVATETAR) + logger = logging.getLogger(__name__) +def pic_storage_url(user, backend, url): + pic_file_name = '/pic/{}/{}'.format(backend, user) + # download cover image to cover_file + try: + r = requests.get(url) + pic_file = ContentFile(r.content) + content_type = r.headers.get('content-type', '') + if u'text' in content_type: + logger.warning('Cover return text for pic_url={}'.format(pic_url)) + return None + pic_file.content_type = content_type + default_storage.save(pic_file_name, pic_file) + return default_storage.url(pic_file_name) + except Exception, e: + # if there is a problem, return None for cover URL + logger.warning('Failed to store cover for username={}'.format(user)) + return None + def selectively_associate_by_email(backend, details, user=None, *args, **kwargs): """ @@ -31,7 +53,8 @@ def selectively_associate_by_email(backend, details, user=None, *args, **kwargs) def facebook_extra_values(user, extra_data): try: - user.profile.pic_url = extra_data['picture']['data']['url'] + profile_image_url = extra_data['picture']['data']['url'] + user.profile.pic_url = pic_storage_url(user, 'facebook', profile_image_url) if user.profile.avatar_source is None or user.profile.avatar_source is PRIVATETAR: user.profile.avatar_source = FACEBOOK user.profile.save() @@ -46,7 +69,7 @@ def twitter_extra_values(user, extra_data): profile_image_url = extra_data.get('profile_image_url_https') user.profile.twitter_id = twitter_id if user.profile.avatar_source is None or user.profile.avatar_source in (TWITTER, PRIVATETAR): - user.profile.pic_url = profile_image_url + user.profile.pic_url = pic_storage_url(user, 'twitter', profile_image_url) if user.profile.avatar_source is None or user.profile.avatar_source is PRIVATETAR: user.profile.avatar_source = TWITTER user.profile.save() From 456a341885e014ce72f03215b0823479ad147f88 Mon Sep 17 00:00:00 2001 From: eric Date: Fri, 20 Jul 2018 15:23:14 -0400 Subject: [PATCH 12/13] fix test fixture --- core/fixtures/basic_campaign_test.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/fixtures/basic_campaign_test.json b/core/fixtures/basic_campaign_test.json index cdc498a2..9617386b 100644 --- a/core/fixtures/basic_campaign_test.json +++ b/core/fixtures/basic_campaign_test.json @@ -65,7 +65,7 @@ "goodreads_auth_token": null, "goodreads_user_link": null, "user": 1, - "facebook_id": null, + "facebook_id": "", "librarything_id": "", "home_url": "", "pic_url": "", @@ -85,7 +85,7 @@ "goodreads_auth_token": null, "goodreads_user_link": null, "user": 2, - "facebook_id": null, + "facebook_id": "", "librarything_id": "", "home_url": "", "pic_url": "", @@ -105,7 +105,7 @@ "goodreads_auth_token": null, "goodreads_user_link": null, "user": 3, - "facebook_id": null, + "facebook_id": "", "librarything_id": "", "home_url": "", "pic_url": "", From 725f616811394e3d5c38f4b77c9e6fd3936fb7e0 Mon Sep 17 00:00:00 2001 From: eric Date: Fri, 20 Jul 2018 15:41:48 -0400 Subject: [PATCH 13/13] privacy updates --- frontend/templates/privacy.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/templates/privacy.html b/frontend/templates/privacy.html index 8c45a10a..ac0b3d6d 100644 --- a/frontend/templates/privacy.html +++ b/frontend/templates/privacy.html @@ -86,7 +86,7 @@ We use Stripe to collect payments. As a result
  • -We use avatar images from Twitter, Facebook, and Automattic's Gravatar service. If you see an avatar on a page at unglue.it and you use an older web browser that doesn't use Referrer meta tags, one or more of these companies can tell what page on our site you're looking at. As you're probably aware, facebook doesn't put much stock in privacy. You can judge privacy policies at Twitter and Automattic for yourself. +We use avatar images from Automattic's Gravatar service. If you see an avatar on a page at unglue.it and you use an older web browser that doesn't use Referrer meta tags, Automattic can tell what page on our site you're looking at. You can judge the privacy policy at Automattic for yourself. We no longer use images from Facebook or Twitter services.