fix dedupe
generated html images were not deduped. also do dedupe if filesizes differ by 1 or 2 bitsmaster
parent
4081280bae
commit
14971e3a47
|
@ -66,12 +66,13 @@ class XMLishFormatter (BaseFormatter.BaseFormatter):
|
|||
if file_.filetype and file_.filetype.endswith('images'):
|
||||
dedupable[file_.filetype] = file_
|
||||
do_dedupe = False
|
||||
for ft in ['epub', 'kindle', 'pdf']:
|
||||
for ft in ['epub', 'kindle', 'pdf', 'html']:
|
||||
if ft + '.images' in dedupable and ft + '.noimages' in dedupable:
|
||||
if dedupable[ft + '.images'].extent == dedupable[ft + '.noimages'].extent:
|
||||
# because of timestamps, identical files may vary by a bit or 2
|
||||
if abs (dedupable[ft + '.images'].extent - dedupable[ft + '.noimages'].extent) < 3:
|
||||
do_dedupe = True
|
||||
if do_dedupe:
|
||||
for ft in ['epub', 'kindle', 'pdf']:
|
||||
for ft in ['epub', 'kindle', 'pdf', 'html']:
|
||||
if ft + '.images' in dedupable and ft + '.noimages' in dedupable:
|
||||
dc.files.remove(dedupable[ft + '.images'])
|
||||
|
||||
|
|
Loading…
Reference in New Issue