2023-10-16 20:26:01 +00:00
|
|
|
import sys
|
2023-11-06 19:44:05 +00:00
|
|
|
|
2023-10-16 20:26:01 +00:00
|
|
|
sys.path.append("../")
|
2023-10-26 18:40:06 +00:00
|
|
|
import src.alttext.alttext as alttext
|
2023-11-06 19:44:05 +00:00
|
|
|
import src.alttext.descengine as descengine
|
|
|
|
import src.alttext.ocrengine as ocrengine
|
2023-11-27 23:58:51 +00:00
|
|
|
import src.alttext.langengine as langengine
|
2023-11-06 19:44:05 +00:00
|
|
|
import keys
|
2023-10-16 20:26:01 +00:00
|
|
|
|
2023-11-06 19:44:05 +00:00
|
|
|
# HTML BOOK FILEPATHS
|
|
|
|
HTML_BIRD = "../books/pg30221-h/pg30221-images.html"
|
|
|
|
HTML_HUNTING = "../books/pg37122-h/pg37122-images.html"
|
|
|
|
HTML_MECHANIC = "../books/pg71856-h/pg71856-images.html"
|
|
|
|
HTML_INFINITY = "../books/pg71859-h/pg71859-images.html"
|
2023-10-26 18:40:06 +00:00
|
|
|
|
2023-11-06 19:44:05 +00:00
|
|
|
# EPUB BOOK FILEPATHS
|
2023-10-26 18:40:06 +00:00
|
|
|
EPUB1 = "../books/pg71856-images-3.epub"
|
|
|
|
EPUB2 = "../books/pg71908-images-3.epub"
|
|
|
|
EPUB3 = "../books/seuss.epub"
|
|
|
|
|
2023-11-27 23:58:51 +00:00
|
|
|
HOST1 = "http://127.0.0.1:8001"
|
|
|
|
|
2023-11-06 19:44:05 +00:00
|
|
|
|
2023-10-26 18:40:06 +00:00
|
|
|
def testHTML():
|
|
|
|
print("TESTING HTML")
|
2023-10-16 20:26:01 +00:00
|
|
|
|
2024-02-02 18:38:11 +00:00
|
|
|
# alt: alttext.AltTextHTML = alttext.AltTextHTML(
|
|
|
|
# # descengine.ReplicateAPI(keys.ReplicateEricKey(), "blip"),
|
|
|
|
# # ocrengine.Tesseract(),
|
|
|
|
# # langengine.PrivateGPT(HOST1),
|
|
|
|
# )
|
|
|
|
|
|
|
|
# alt: alttext.AltTextHTML = alttext.AltTextHTML(
|
|
|
|
# descengine.BlipLocal("C:/Users/dacru/Desktop/Codebase/ALT/image-captioning"),
|
|
|
|
# options={"version": 1},
|
|
|
|
# )
|
|
|
|
|
2023-11-06 19:44:05 +00:00
|
|
|
alt: alttext.AltTextHTML = alttext.AltTextHTML(
|
2024-02-02 18:38:11 +00:00
|
|
|
descengine.BlipLocal("C:/Users/dacru/Desktop/Codebase/ALT/image-captioning"),
|
|
|
|
ocrengine.Tesseract(),
|
|
|
|
langengine.PrivateGPT(HOST1),
|
2023-11-06 19:44:05 +00:00
|
|
|
)
|
2024-02-02 18:38:11 +00:00
|
|
|
|
2023-11-27 23:58:51 +00:00
|
|
|
alt.parseFile(HTML_HUNTING)
|
2023-11-29 02:17:35 +00:00
|
|
|
imgs = alt.getAllImgs()
|
2024-02-02 18:38:11 +00:00
|
|
|
src = imgs[4].attrs["src"]
|
|
|
|
print(src)
|
|
|
|
print(alt.genAltText(src))
|
2023-11-29 02:17:35 +00:00
|
|
|
|
|
|
|
# desc = alt.genDesc(alt.getImgData(src), src)
|
|
|
|
# print(desc)
|
2024-02-02 18:38:11 +00:00
|
|
|
# associations = alt.genAltAssociations(imgs)
|
|
|
|
# print(associations)
|
2023-11-06 19:44:05 +00:00
|
|
|
|
2023-10-26 18:40:06 +00:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2023-11-06 19:44:05 +00:00
|
|
|
testHTML()
|