Merge pull request #897 from Gluejar/production

tweak cmp harvest
pull/94/head
Eric Hellman 2020-08-01 22:56:59 -04:00 committed by GitHub
commit 55cb3dbe49
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 26 additions and 0 deletions

View File

@ -489,6 +489,9 @@ def harvest_fahce(ebook):
def harvest_cmp(ebook):
def selector(doc):
objs = doc.select('.tab-content a.cmp_download_link[href]')
if (len({obj['href'] for obj in objs})) > 1:
return []
return doc.select('a.cmp_download_link[href]')
def dl(url):
return url.replace('view', 'download') + '?inline=1'

View File

@ -0,0 +1,23 @@
from django.core.management.base import BaseCommand
from regluit.core.models import EbookFile, Ebook
from regluit.core.loaders.utils import get_soup
class Command(BaseCommand):
def handle(self, **options):
prov = 'editorial.uniagustiniana.edu.co'
for ebook in Ebook.objects.filter(provider=prov, format='online'):
print(ebook.url)
doc = get_soup(ebook.url)
if doc:
objs = doc.select('.tab-content a.cmp_download_link[href]')
for obj in objs:
for ebf in EbookFile.objects.filter(source=obj['href']):
bad_ebook = ebf.ebook
try:
ebf.file.delete()
except:
pass
ebf.delete()
bad_ebook.delete()