fix dedupe

generated html images were not deduped. also do dedupe if filesizes differ by 1 or 2 bits
master
eric 2020-12-08 15:11:09 -05:00
parent 4081280bae
commit 14971e3a47
1 changed files with 4 additions and 3 deletions

View File

@ -66,12 +66,13 @@ class XMLishFormatter (BaseFormatter.BaseFormatter):
if file_.filetype and file_.filetype.endswith('images'):
dedupable[file_.filetype] = file_
do_dedupe = False
for ft in ['epub', 'kindle', 'pdf']:
for ft in ['epub', 'kindle', 'pdf', 'html']:
if ft + '.images' in dedupable and ft + '.noimages' in dedupable:
if dedupable[ft + '.images'].extent == dedupable[ft + '.noimages'].extent:
# because of timestamps, identical files may vary by a bit or 2
if abs (dedupable[ft + '.images'].extent - dedupable[ft + '.noimages'].extent) < 3:
do_dedupe = True
if do_dedupe:
for ft in ['epub', 'kindle', 'pdf']:
for ft in ['epub', 'kindle', 'pdf', 'html']:
if ft + '.images' in dedupable and ft + '.noimages' in dedupable:
dc.files.remove(dedupable[ft + '.images'])