despam descriptions in db
wasn't real happy with this: https://unglue.it/work/66938/ so I looked for other spammy descriptions in the dbpull/1/head
parent
14174ecb17
commit
05d37163ea
|
@ -488,6 +488,17 @@ def merge_works(w1, w2, user=None):
|
|||
|
||||
w2.delete()
|
||||
|
||||
def despam_description(description):
|
||||
""" a lot of descriptions from openlibrary have free-book promotion text; this removes some of it."""
|
||||
if description.find("GeneralBooksClub.com")>-1 or description.find("AkashaPublishing.Com")>-1:
|
||||
return ""
|
||||
pieces=description.split("1stWorldLibrary.ORG -")
|
||||
if len(pieces)>1:
|
||||
return pieces[1]
|
||||
pieces=description.split("a million books for free.")
|
||||
if len(pieces)>1:
|
||||
return pieces[1]
|
||||
return description
|
||||
|
||||
def add_openlibrary(work, hard_refresh = False):
|
||||
if (not hard_refresh) and work.openlibrary_lookup is not None:
|
||||
|
@ -541,6 +552,7 @@ def add_openlibrary(work, hard_refresh = False):
|
|||
if isinstance(description,dict):
|
||||
if description.has_key('value'):
|
||||
description=description['value']
|
||||
description=despam_description(description)
|
||||
if not work.description or work.description.startswith('{') or len(description) > len(work.description):
|
||||
work.description = description
|
||||
work.save()
|
||||
|
@ -710,3 +722,4 @@ def add_missing_isbn_to_editions(max_num=None, confirm=False):
|
|||
|
||||
class LookupFailure(Exception):
|
||||
pass
|
||||
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
|
||||
from django.core.management.base import BaseCommand
|
||||
from regluit.core import models, bookloader
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "check description db for free ebook spam"
|
||||
|
||||
def handle(self, **options):
|
||||
spam_strings=["1stWorldLibrary.ORG", "GeneralBooksClub.com", "million-books.com", "AkashaPublishing.Com"]
|
||||
for spam_string in spam_strings:
|
||||
qs=models.Work.objects.filter(description__icontains=spam_string)
|
||||
print "Number of Works with %s in description: %s" % (spam_string, qs.count())
|
||||
|
||||
for work in qs:
|
||||
work.description = bookloader.despam_description(work.description)
|
||||
print "updating work %s" % work
|
||||
bookloader.add_openlibrary(work, hard_refresh = True)
|
Loading…
Reference in New Issue