fix dedupe

generated html images were not deduped. also do dedupe if filesizes differ by 1 or 2 bits
2020-12-08 15:11:09 -05:00 · 2020-12-08 15:11:09 -05:00 · 14971e3a47
parent 4081280bae
commit 14971e3a47
1 changed files with 4 additions and 3 deletions
--- a/HTMLFormatter.py
+++ b/HTMLFormatter.py
@ -66,12 +66,13 @@ class XMLishFormatter (BaseFormatter.BaseFormatter):
            if file_.filetype and file_.filetype.endswith('images'):
                dedupable[file_.filetype] = file_
        do_dedupe = False
-        for ft in ['epub', 'kindle', 'pdf']:
+        for ft in ['epub', 'kindle', 'pdf', 'html']:
            if ft + '.images' in dedupable and ft + '.noimages' in dedupable:
-                if dedupable[ft + '.images'].extent == dedupable[ft + '.noimages'].extent:
+                # because of timestamps, identical files may vary by a bit or 2
+                if abs (dedupable[ft + '.images'].extent - dedupable[ft + '.noimages'].extent) < 3:
                    do_dedupe = True
        if do_dedupe:
-            for ft in ['epub', 'kindle', 'pdf']:
+            for ft in ['epub', 'kindle', 'pdf', 'html']:
                if ft + '.images' in dedupable and ft + '.noimages' in dedupable:
                    dc.files.remove(dedupable[ft + '.images'])