commit
55cb3dbe49
|
@ -489,6 +489,9 @@ def harvest_fahce(ebook):
|
||||||
|
|
||||||
def harvest_cmp(ebook):
|
def harvest_cmp(ebook):
|
||||||
def selector(doc):
|
def selector(doc):
|
||||||
|
objs = doc.select('.tab-content a.cmp_download_link[href]')
|
||||||
|
if (len({obj['href'] for obj in objs})) > 1:
|
||||||
|
return []
|
||||||
return doc.select('a.cmp_download_link[href]')
|
return doc.select('a.cmp_download_link[href]')
|
||||||
def dl(url):
|
def dl(url):
|
||||||
return url.replace('view', 'download') + '?inline=1'
|
return url.replace('view', 'download') + '?inline=1'
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
from django.core.management.base import BaseCommand
|
||||||
|
|
||||||
|
from regluit.core.models import EbookFile, Ebook
|
||||||
|
from regluit.core.loaders.utils import get_soup
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
|
||||||
|
def handle(self, **options):
|
||||||
|
prov = 'editorial.uniagustiniana.edu.co'
|
||||||
|
for ebook in Ebook.objects.filter(provider=prov, format='online'):
|
||||||
|
print(ebook.url)
|
||||||
|
doc = get_soup(ebook.url)
|
||||||
|
if doc:
|
||||||
|
objs = doc.select('.tab-content a.cmp_download_link[href]')
|
||||||
|
for obj in objs:
|
||||||
|
for ebf in EbookFile.objects.filter(source=obj['href']):
|
||||||
|
bad_ebook = ebf.ebook
|
||||||
|
try:
|
||||||
|
ebf.file.delete()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
ebf.delete()
|
||||||
|
bad_ebook.delete()
|
Loading…
Reference in New Issue