Merge pull request #40 from gutenbergtools/dedupe-files

remove dupes. addresses #34
gutenberg1
eshellman 2019-09-13 11:08:49 -04:00 committed by GitHub
commit 380dc0c6b7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 14 additions and 0 deletions

View File

@ -64,6 +64,20 @@ class XMLishFormatter (BaseFormatter.BaseFormatter):
file_.compression = 'none'
file_.encoding = None
dedupable = {}
for file_ in dc.files:
if file_.filetype.endswith('images'):
dedupable[file_.filetype] = file_
do_dedupe = False
for ft in ['epub', 'kindle', 'pdf']:
if ft + '.images' in dedupable and ft + '.noimages' in dedupable:
if dedupable[ft + '.images'].extent == dedupable[ft + '.noimages'].extent:
do_dedupe = True
if do_dedupe:
for ft in ['epub', 'kindle', 'pdf']:
if ft + '.images' in dedupable and ft + '.noimages' in dedupable:
dc.files.remove(dedupable[ft + '.images'])
for file_ in dc.files + dc.generated_files:
type_ = six.text_type (file_.mediatypes[0])
m = type_.partition (';')[0]