diff --git a/core/admin.py b/core/admin.py index 962c6736..af3962eb 100644 --- a/core/admin.py +++ b/core/admin.py @@ -225,8 +225,8 @@ class EbookFileAdmin(ModelAdmin): list_display = ('created', 'format', 'ebook_link', 'asking') date_hierarchy = 'created' ordering = ('edition__work',) - fields = ('file', 'format', 'edition', 'edition_link', 'ebook', 'ebook_link', 'source') - readonly_fields = ('file', 'edition_link', 'ebook_link',) + fields = ('file', 'format', 'edition', 'edition_link', 'ebook', 'ebook_link', 'source', 'mobied') + readonly_fields = ('file', 'edition_link', 'ebook_link', 'source') def edition_link(self, obj): if obj.edition: link = reverse("admin:core_edition_change", args=[obj.edition_id]) diff --git a/core/loaders/__init__.py b/core/loaders/__init__.py index e47ca29b..c43ed6e7 100755 --- a/core/loaders/__init__.py +++ b/core/loaders/__init__.py @@ -55,6 +55,7 @@ def add_by_sitemap(url, maxnum=None): def scrape_language(url): scraper = get_scraper(url) - return scraper.metadata.get('language') + language = scraper.metadata.get('language') + return language if language else 'xx' diff --git a/core/loaders/harvest.py b/core/loaders/harvest.py index f4a72a59..ff68c90d 100644 --- a/core/loaders/harvest.py +++ b/core/loaders/harvest.py @@ -158,8 +158,14 @@ def make_harvested_ebook(content, ebook, format, filesize=0): format=format, source=ebook.url, ) - new_ebf.file.save(path_for_file(new_ebf, None), ContentFile(content)) - new_ebf.save() + try: + new_ebf.file.save(path_for_file(new_ebf, None), ContentFile(content)) + new_ebf.save() + except MemoryError: #huge pdf files cause problems here + logger.error("memory error saving ebook file for %s", ebook.url) + new_ebf.delete() + return None, False + new_ebook = Ebook.objects.create( edition=ebook.edition, format=format, diff --git a/core/management/commands/harvest_online_ebooks.py b/core/management/commands/harvest_online_ebooks.py index bc6a8c91..cfa941b9 100644 --- a/core/management/commands/harvest_online_ebooks.py +++ b/core/management/commands/harvest_online_ebooks.py @@ -20,8 +20,8 @@ class Command(BaseCommand): if new_ebf and new: done += 1 self.stdout.write(unicode(new_ebf.edition.work.title)) - if done == limit or done == 50: + if done == limit or done == 100: break self.stdout.write('harvested {} ebooks'.format(done)) - if done == 50: - self.stdout.write('50 is the maximum; repeat to do more') + if done == 100: + self.stdout.write('100 is the maximum; repeat to do more') diff --git a/requirements_versioned.pip b/requirements_versioned.pip index e9427c70..a53afd75 100644 --- a/requirements_versioned.pip +++ b/requirements_versioned.pip @@ -5,7 +5,8 @@ Jinja2>=2.7 MySQL-python==1.2.5 Pillow==3.4.2 PyJWT==1.4.1 -PyPDF2==1.26 +#PyPDF2==1.26 +git+git://github.com/mstamy2/PyPDF2.git@18a2627adac13124d4122c8b92aaa863ccfb8c29 PyGithub==1.15.0 PyYAML==3.13 amqp==1.4.9