Merge branch 'scrub_descriptions'
commit
861d8355b3
|
@ -488,6 +488,17 @@ def merge_works(w1, w2, user=None):
|
||||||
|
|
||||||
w2.delete()
|
w2.delete()
|
||||||
|
|
||||||
|
def despam_description(description):
|
||||||
|
""" a lot of descriptions from openlibrary have free-book promotion text; this removes some of it."""
|
||||||
|
if description.find("GeneralBooksClub.com")>-1 or description.find("AkashaPublishing.Com")>-1:
|
||||||
|
return ""
|
||||||
|
pieces=description.split("1stWorldLibrary.ORG -")
|
||||||
|
if len(pieces)>1:
|
||||||
|
return pieces[1]
|
||||||
|
pieces=description.split("a million books for free.")
|
||||||
|
if len(pieces)>1:
|
||||||
|
return pieces[1]
|
||||||
|
return description
|
||||||
|
|
||||||
def add_openlibrary(work, hard_refresh = False):
|
def add_openlibrary(work, hard_refresh = False):
|
||||||
if (not hard_refresh) and work.openlibrary_lookup is not None:
|
if (not hard_refresh) and work.openlibrary_lookup is not None:
|
||||||
|
@ -541,6 +552,7 @@ def add_openlibrary(work, hard_refresh = False):
|
||||||
if isinstance(description,dict):
|
if isinstance(description,dict):
|
||||||
if description.has_key('value'):
|
if description.has_key('value'):
|
||||||
description=description['value']
|
description=description['value']
|
||||||
|
description=despam_description(description)
|
||||||
if not work.description or work.description.startswith('{') or len(description) > len(work.description):
|
if not work.description or work.description.startswith('{') or len(description) > len(work.description):
|
||||||
work.description = description
|
work.description = description
|
||||||
work.save()
|
work.save()
|
||||||
|
@ -710,3 +722,4 @@ def add_missing_isbn_to_editions(max_num=None, confirm=False):
|
||||||
|
|
||||||
class LookupFailure(Exception):
|
class LookupFailure(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,17 @@
|
||||||
|
|
||||||
|
from django.core.management.base import BaseCommand
|
||||||
|
from regluit.core import models, bookloader
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = "check description db for free ebook spam"
|
||||||
|
|
||||||
|
def handle(self, **options):
|
||||||
|
spam_strings=["1stWorldLibrary.ORG", "GeneralBooksClub.com", "million-books.com", "AkashaPublishing.Com"]
|
||||||
|
for spam_string in spam_strings:
|
||||||
|
qs=models.Work.objects.filter(description__icontains=spam_string)
|
||||||
|
print "Number of Works with %s in description: %s" % (spam_string, qs.count())
|
||||||
|
|
||||||
|
for work in qs:
|
||||||
|
work.description = bookloader.despam_description(work.description)
|
||||||
|
print "updating work %s" % work
|
||||||
|
bookloader.add_openlibrary(work, hard_refresh = True)
|
Loading…
Reference in New Issue