ethan branch

dev-ethan
kethan351 2024-02-09 14:07:16 -05:00
parent b1ab270a7e
commit c164a5b255
4 changed files with 486187 additions and 0 deletions

485943
empty_alt_text.txt Normal file

File diff suppressed because it is too large Load Diff

200
empty_alt_text_sample.TXT Normal file
View File

@ -0,0 +1,200 @@
15 cache/epub/15/images/mark.jpg
54 cache/epub/54/images/enlarge.jpg
76 cache/epub/76/images/c01-02.jpg
76 cache/epub/76/images/c01-18.jpg
76 cache/epub/76/images/c01-19.jpg
76 cache/epub/76/images/c01-21.jpg
76 cache/epub/76/images/c02-22.jpg
76 cache/epub/76/images/c02-24.jpg
76 cache/epub/76/images/c02-25.jpg
76 cache/epub/76/images/c02-28.jpg
76 cache/epub/76/images/c03-29.jpg
76 cache/epub/76/images/c03-31.jpg
76 cache/epub/76/images/c03-33.jpg
76 cache/epub/76/images/c04-34.jpg
76 cache/epub/76/images/c04-36.jpg
76 cache/epub/76/images/c04-37.jpg
76 cache/epub/76/images/c05-39.jpg
76 cache/epub/76/images/c05-41.jpg
76 cache/epub/76/images/c05-43.jpg
76 cache/epub/76/images/c05-44.jpg
76 cache/epub/76/images/c06-45.jpg
76 cache/epub/76/images/c06-46.jpg
76 cache/epub/76/images/c06-48.jpg
76 cache/epub/76/images/c06-51.jpg
76 cache/epub/76/images/c07-53.jpg
76 cache/epub/76/images/c07-54.jpg
76 cache/epub/76/images/c07-56.jpg
76 cache/epub/76/images/c07-59.jpg
76 cache/epub/76/images/c08-61.jpg
76 cache/epub/76/images/c08-63.jpg
76 cache/epub/76/images/c08-64.jpg
76 cache/epub/76/images/c08-67.jpg
76 cache/epub/76/images/c08-72.jpg
76 cache/epub/76/images/c09-74.jpg
76 cache/epub/76/images/c09-75.jpg
76 cache/epub/76/images/c09-77.jpg
76 cache/epub/76/images/c10-79.jpg
76 cache/epub/76/images/c10-80.jpg
76 cache/epub/76/images/c10-81.jpg
76 cache/epub/76/images/c10-82.jpg
76 cache/epub/76/images/c11-84.jpg
76 cache/epub/76/images/c11-87.jpg
76 cache/epub/76/images/c11-90.jpg
76 cache/epub/76/images/c11-91.jpg
76 cache/epub/76/images/c12-093.jpg
76 cache/epub/76/images/c12-095.jpg
76 cache/epub/76/images/c12-098.jpg
76 cache/epub/76/images/c12-100.jpg
76 cache/epub/76/images/c12-101.jpg
76 cache/epub/76/images/c13-102.jpg
76 cache/epub/76/images/c13-104.jpg
76 cache/epub/76/images/c13-107.jpg
76 cache/epub/76/images/c13-108.jpg
76 cache/epub/76/images/c14-109.jpg
76 cache/epub/76/images/c14-110.jpg
76 cache/epub/76/images/c14-112.jpg
76 cache/epub/76/images/c15-115.jpg
76 cache/epub/76/images/c15-117.jpg
76 cache/epub/76/images/c15-118.jpg
76 cache/epub/76/images/c16-122.jpg
76 cache/epub/76/images/c16-126.jpg
76 cache/epub/76/images/c16-127.jpg
76 cache/epub/76/images/c16-131.jpg
76 cache/epub/76/images/c17-132.jpg
76 cache/epub/76/images/c17-134.jpg
76 cache/epub/76/images/c17-138.jpg
76 cache/epub/76/images/c17-139.jpg
76 cache/epub/76/images/c17-140.jpg
76 cache/epub/76/images/c17-142.jpg
76 cache/epub/76/images/c18-143.jpg
76 cache/epub/76/images/c18-145.jpg
76 cache/epub/76/images/c18-146.jpg
76 cache/epub/76/images/c18-149.jpg
76 cache/epub/76/images/c18-153.jpg
76 cache/epub/76/images/c19-157.jpg
76 cache/epub/76/images/c19-160.jpg
76 cache/epub/76/images/c19-163.jpg
76 cache/epub/76/images/c19-165.jpg
76 cache/epub/76/images/c19-166.jpg
76 cache/epub/76/images/c20-167.jpg
76 cache/epub/76/images/c20-170.jpg
76 cache/epub/76/images/c20-172.jpg
76 cache/epub/76/images/c20-174.jpg
76 cache/epub/76/images/c20-175.jpg
76 cache/epub/76/images/c21-177.jpg
76 cache/epub/76/images/c21-178.jpg
76 cache/epub/76/images/c21-182.jpg
76 cache/epub/76/images/c21-185.jpg
76 cache/epub/76/images/c21-187.jpg
76 cache/epub/76/images/c22-189.jpg
76 cache/epub/76/images/c22-191.jpg
76 cache/epub/76/images/c22-193.jpg
76 cache/epub/76/images/c23-196.jpg
76 cache/epub/76/images/c23-198.jpg
76 cache/epub/76/images/c23-200.jpg
76 cache/epub/76/images/c24-203.jpg
76 cache/epub/76/images/c24-205.jpg
76 cache/epub/76/images/c24-207.jpg
76 cache/epub/76/images/c24-209.jpg
76 cache/epub/76/images/c25-211.jpg
76 cache/epub/76/images/c25-212.jpg
76 cache/epub/76/images/c25-215.jpg
76 cache/epub/76/images/c25-216.jpg
76 cache/epub/76/images/c25-218.jpg
76 cache/epub/76/images/c25-219.jpg
76 cache/epub/76/images/c26-220.jpg
76 cache/epub/76/images/c26-221.jpg
76 cache/epub/76/images/c26-224.jpg
76 cache/epub/76/images/c26-226.jpg
76 cache/epub/76/images/c26-229.jpg
76 cache/epub/76/images/c27-230.jpg
76 cache/epub/76/images/c27-232.jpg
76 cache/epub/76/images/c27-233.jpg
76 cache/epub/76/images/c27-235.jpg
76 cache/epub/76/images/c27-237.jpg
76 cache/epub/76/images/c28-239.jpg
76 cache/epub/76/images/c28-241.jpg
76 cache/epub/76/images/c28-242.jpg
76 cache/epub/76/images/c28-244.jpg
76 cache/epub/76/images/c28-246.jpg
76 cache/epub/76/images/c28-248.jpg
76 cache/epub/76/images/c29-250.jpg
76 cache/epub/76/images/c29-252.jpg
76 cache/epub/76/images/c29-255.jpg
76 cache/epub/76/images/c29-257.jpg
76 cache/epub/76/images/c29-260.jpg
76 cache/epub/76/images/c30-261.jpg
76 cache/epub/76/images/c30-263.jpg
76 cache/epub/76/images/c31-266.jpg
76 cache/epub/76/images/c31-269.jpg
76 cache/epub/76/images/c31-271.jpg
76 cache/epub/76/images/c31-274.jpg
76 cache/epub/76/images/c31-275.jpg
76 cache/epub/76/images/c32-277.jpg
76 cache/epub/76/images/c32-279.jpg
76 cache/epub/76/images/c32-283.jpg
76 cache/epub/76/images/c33-284.jpg
76 cache/epub/76/images/c33-287.jpg
76 cache/epub/76/images/c33-290.jpg
76 cache/epub/76/images/c33-291.jpg
76 cache/epub/76/images/c34-293.jpg
76 cache/epub/76/images/c34-296.jpg
76 cache/epub/76/images/c34-299.jpg
76 cache/epub/76/images/c35-300.jpg
76 cache/epub/76/images/c35-302.jpg
76 cache/epub/76/images/c35-305.jpg
76 cache/epub/76/images/c35-307.jpg
76 cache/epub/76/images/c36-309.jpg
76 cache/epub/76/images/c36-311.jpg
76 cache/epub/76/images/c36-314.jpg
76 cache/epub/76/images/c37-316.jpg
76 cache/epub/76/images/c37-318.jpg
76 cache/epub/76/images/c37-321.jpg
76 cache/epub/76/images/c37-322.jpg
76 cache/epub/76/images/c38-324.jpg
76 cache/epub/76/images/c38-327.jpg
76 cache/epub/76/images/c38-329.jpg
76 cache/epub/76/images/c38-331.jpg
76 cache/epub/76/images/c39-333.jpg
76 cache/epub/76/images/c39-335.jpg
76 cache/epub/76/images/c39-337.jpg
76 cache/epub/76/images/c40-339.jpg
76 cache/epub/76/images/c40-341.jpg
76 cache/epub/76/images/c40-343.jpg
76 cache/epub/76/images/c40-345.jpg
76 cache/epub/76/images/c41-347.jpg
76 cache/epub/76/images/c41-348.jpg
76 cache/epub/76/images/c41-350.jpg
76 cache/epub/76/images/c41-353.jpg
76 cache/epub/76/images/c42-355.jpg
76 cache/epub/76/images/c42-357.jpg
76 cache/epub/76/images/c42-361.jpg
76 cache/epub/76/images/c42-362.jpg
76 cache/epub/76/images/c43-364.jpg
76 cache/epub/76/images/c43-365.jpg
76 cache/epub/76/images/c43-366.jpg
76 cache/epub/76/images/cover.jpg
76 cache/epub/76/images/frontispiece.jpg
76 cache/epub/76/images/frontispiece2.jpg
76 cache/epub/76/images/titlepage.jpg
86 cache/epub/86/images/32-413.jpg
86 cache/epub/86/images/cover.jpg
86 cache/epub/86/images/frontispiece.jpg
86 cache/epub/86/images/titlepage.jpg
120 cache/epub/120/images/enlarge.jpg
142 cache/epub/142/images/enlarge.jpg
201 cache/epub/201/images/ill_010.png
203 cache/epub/203/images/001.jpg
203 cache/epub/203/images/002.jpg
203 cache/epub/203/images/003.jpg
203 cache/epub/203/images/004.jpg
203 cache/epub/203/images/005.jpg
203 cache/epub/203/images/006.jpg
203 cache/epub/203/images/titlepage.jpg
236 cache/epub/236/images/enlarge.jpg
376 cache/epub/376/images/img1.jpg
421 cache/epub/421/images/enlarge.jpg
423 cache/epub/423/images/cover.jpg
580 cache/epub/580/images/enlarge.jpg
586 cache/epub/586/images/zill_011_2.png

0
generated_alt_texts.TXT Normal file
View File

44
src/alttext/automate.py Normal file
View File

@ -0,0 +1,44 @@
import os
from pathlib import Path
from alttext import genAltTextV2
from descengine import genDesc
from ocrengine import genChars
from langengine import refineDesc, refineOCR #need to implement these
def read_paths_from_file(file_path):
"""Reads image paths from a given file and returns a list of tuples containing book number and path."""
with open(file_path, 'r') as file:
lines = file.readlines()
paths = [line.strip().split('\t') for line in lines]
return paths
def generate_alt_text_for_images(image_paths):
"""
Generates alt-text for a list of image paths. Each path is a tuple containing the book number and the image path.
"""
alt_texts = []
for path_info in image_paths:
book_num, image_path = path_info.split('\t')
full_image_path = f"cache/epub/{book_num}/images/{image_path}"
# Generate alt-text using the genAltTextV2 method
alt_text = alt_text.genAltTextV2(full_image_path) #I don't think I am doing this right
alt_texts.append((book_num, image_path, alt_text))
return alt_texts
def main():
input_file = '../empty_alt_text_sample.text' # Update this path
output_file = '../generated_alt_texts.txt' # Update this path
image_paths = read_paths_from_file(input_file)
alt_texts = generate_alt_text_for_images(image_paths)
with open(output_file, 'w') as file:
for alt_text in alt_texts:
file.write(f'{alt_text}\n')
if __name__ == '__main__':
main()