reworking with changes in alttext
parent
59feace10b
commit
7a0631551d
|
@ -6,9 +6,10 @@ import bs4
|
|||
import ebooklib
|
||||
from ebooklib import epub
|
||||
|
||||
from .descengine import DescEngine
|
||||
from .ocrengine import OCREngine
|
||||
from .langengine import LangEngine
|
||||
|
||||
from descengine import DescEngine
|
||||
from ocrengine import OCREngine
|
||||
from langengine import LangEngine
|
||||
|
||||
|
||||
DEFOPTIONS = {
|
||||
|
@ -83,7 +84,7 @@ class AltText(ABC):
|
|||
|
||||
# PARSING METHODS
|
||||
@abstractmethod
|
||||
def parse(self, data: str) -> typing.Union[bs4.BeautifulSoup, epub.EpubBook]:
|
||||
def parse(self, data: str) -> bs4.BeautifulSoup | epub.EpubBook:
|
||||
"""Parses data into a BeautifulSoup or EpubBook object.
|
||||
|
||||
Args:
|
||||
|
@ -95,7 +96,7 @@ class AltText(ABC):
|
|||
pass
|
||||
|
||||
@abstractmethod
|
||||
def parseFile(self, filepath: str) -> typing.Union[bs4.BeautifulSoup, epub.EpubBook]:
|
||||
def parseFile(self, filepath: str) -> bs4.BeautifulSoup | epub.EpubBook:
|
||||
"""Parses data from a file into a BeautifulSoup or EpubBook object.
|
||||
|
||||
Args:
|
||||
|
@ -162,7 +163,7 @@ class AltText(ABC):
|
|||
pass
|
||||
|
||||
@abstractmethod
|
||||
def export(self) -> typing.Union[str, epub.EpubBook]:
|
||||
def export(self) -> str | epub.EpubBook:
|
||||
"""Exports the current data.
|
||||
|
||||
Returns:
|
||||
|
@ -428,8 +429,8 @@ class AltTextHTML(AltText):
|
|||
|
||||
def parseFile(self, filepath: str) -> bs4.BeautifulSoup:
|
||||
with open(filepath, encoding="utf8") as html:
|
||||
self.filepath = filepath
|
||||
l = filepath.split("/")
|
||||
self.filepath = filepath.replace("\\", "/")
|
||||
l = self.filepath.split("/")
|
||||
self.filename = l.pop()
|
||||
self.filedir = "/".join(l) + "/"
|
||||
return self.parse(html)
|
||||
|
@ -516,20 +517,17 @@ class AltTextHTML(AltText):
|
|||
text = elem.text.strip()
|
||||
context[0] = text
|
||||
except:
|
||||
print("error 0")
|
||||
context[0] = None
|
||||
elem = tag
|
||||
text = ""
|
||||
try:
|
||||
text = elem.text.strip()
|
||||
while text == "":
|
||||
elem = elem.previous_element
|
||||
elem = elem.next_element
|
||||
text = elem.text.strip()
|
||||
context[1] = text
|
||||
except:
|
||||
print("error 1")
|
||||
context[1] = None
|
||||
print(context)
|
||||
return context
|
||||
|
||||
def genChars(self, imgData: bytes, src: str) -> str:
|
||||
|
@ -564,7 +562,6 @@ class AltTextHTML(AltText):
|
|||
if self.options["withContext"]:
|
||||
context = self.getContext(self.getImg(src))
|
||||
desc = self.genDesc(imgdata, src, context)
|
||||
|
||||
chars = ""
|
||||
if self.ocrEngine != None:
|
||||
chars = self.genChars(imgdata, src).strip()
|
||||
|
@ -684,4 +681,4 @@ class AltTextEPUB(AltText):
|
|||
|
||||
def exportToFile(self, path: str) -> str:
|
||||
epub.write_epub(path, self.export())
|
||||
return path
|
||||
return path
|
|
@ -0,0 +1,29 @@
|
|||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import uuid
|
||||
|
||||
from .descengine import DescEngine
|
||||
|
||||
class BlipLocal(DescEngine):
|
||||
def __init__(self, path: str) -> None:
|
||||
self.__setPath(path)
|
||||
return None
|
||||
|
||||
def __setPath(self, path: str) -> str:
|
||||
self.path = path
|
||||
return self.path
|
||||
|
||||
def genDesc(self, imgData: bytes, src: str, context: str = None) -> str:
|
||||
folderName = uuid.uuid4()
|
||||
ext = src.split(".")[-1]
|
||||
os.makedirs(f"{self.path}/{folderName}")
|
||||
open(f"{self.path}/{folderName}/image.{ext}", "wb+").write(imgData)
|
||||
subprocess.call(
|
||||
f"py inference.py -i ./{folderName} --batch 1 --gpu 0",
|
||||
cwd=f"{self.path}",
|
||||
)
|
||||
desc = open(f"{self.path}/{folderName}/0_captions.txt", "r").read()
|
||||
shutil.rmtree(f"{self.path}/{folderName}")
|
||||
desc = desc.split(",")
|
||||
return desc[1]
|
|
@ -0,0 +1,37 @@
|
|||
import os
|
||||
import vertexai
|
||||
from vertexai.vision_models import ImageTextModel, Image
|
||||
|
||||
from .descengine import DescEngine
|
||||
|
||||
class GoogleVertexAPI(DescEngine):
|
||||
def __init__(self, project_id: str, location: str, gac_path: str) -> None:
|
||||
self.project_id = project_id
|
||||
self.location = location
|
||||
vertexai.init(project=self.project_id, location=self.location)
|
||||
|
||||
self.gac_path = gac_path
|
||||
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.gac_path
|
||||
return None
|
||||
|
||||
def __setProject(self, project_id: str):
|
||||
self.project_id = project_id
|
||||
vertexai.init(project=self.project_id, location=self.location)
|
||||
|
||||
def __setLocation(self, location: str):
|
||||
self.location = location
|
||||
vertexai.init(project=self.project_id, location=self.location)
|
||||
|
||||
def __setGAC(self, gac_path: str):
|
||||
self.gac_path = gac_path
|
||||
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.gac_path
|
||||
|
||||
def genDesc(self, imgData: bytes, src: str, context: str = None) -> str:
|
||||
model = ImageTextModel.from_pretrained("imagetext@001")
|
||||
source_image = Image(imgData)
|
||||
captions = model.get_captions(
|
||||
image=source_image,
|
||||
number_of_results=1,
|
||||
language="en",
|
||||
)
|
||||
return captions[0]
|
|
@ -0,0 +1,51 @@
|
|||
import replicate
|
||||
import base64
|
||||
import os
|
||||
|
||||
from .descengine import DescEngine
|
||||
|
||||
REPLICATE_MODELS = {
|
||||
"blip": "salesforce/blip:2e1dddc8621f72155f24cf2e0adbde548458d3cab9f00c0139eea840d0ac4746",
|
||||
"clip_prefix_caption": "rmokady/clip_prefix_caption:9a34a6339872a03f45236f114321fb51fc7aa8269d38ae0ce5334969981e4cd8",
|
||||
"clip-caption-reward": "j-min/clip-caption-reward:de37751f75135f7ebbe62548e27d6740d5155dfefdf6447db35c9865253d7e06",
|
||||
"img2prompt": "methexis-inc/img2prompt:50adaf2d3ad20a6f911a8a9e3ccf777b263b8596fbd2c8fc26e8888f8a0edbb5",
|
||||
"minigpt4": "daanelson/minigpt-4:b96a2f33cc8e4b0aa23eacfce731b9c41a7d9466d9ed4e167375587b54db9423",
|
||||
"image-captioning-with-visual-attention": "nohamoamary/image-captioning-with-visual-attention:9bb60a6baa58801aa7cd4c4fafc95fcf1531bf59b84962aff5a718f4d1f58986",
|
||||
}
|
||||
|
||||
class ReplicateAPI(DescEngine):
|
||||
def __init__(self, key: str, model: str = "blip") -> None:
|
||||
self.__setKey(key)
|
||||
self.__setModel(model)
|
||||
return None
|
||||
|
||||
def __getModel(self) -> str:
|
||||
return self.model
|
||||
|
||||
def __setModel(self, modelName: str) -> str:
|
||||
if modelName not in REPLICATE_MODELS:
|
||||
raise Exception(
|
||||
f"{modelName} is not a valid model. Please choose from {list(REPLICATE_MODELS.keys())}"
|
||||
)
|
||||
self.model = REPLICATE_MODELS[modelName]
|
||||
return self.model
|
||||
|
||||
def __getKey(self) -> str:
|
||||
return self.key
|
||||
|
||||
def __setKey(self, key: str) -> str:
|
||||
self.key = key
|
||||
os.environ["REPLICATE_API_TOKEN"] = key
|
||||
return self.key
|
||||
|
||||
def genDesc(self, imgData: bytes, src: str, context: str = None) -> str:
|
||||
base64_utf8_str = base64.b64encode(imgData).decode("utf-8")
|
||||
model = self.__getModel()
|
||||
ext = src.split(".")[-1]
|
||||
prompt = "Create alternative-text for this image."
|
||||
if context != None:
|
||||
prompt = f"Create alternative-text for this image given the following context...\n{context}"
|
||||
|
||||
dataurl = f"data:image/{ext};base64,{base64_utf8_str}"
|
||||
output = replicate.run(model, input={"image": dataurl, "prompt": prompt})
|
||||
return output
|
|
@ -0,0 +1,119 @@
|
|||
import requests
|
||||
|
||||
from .langengine import LangEngine
|
||||
|
||||
class PrivateGPT(LangEngine):
|
||||
def __init__(self, host) -> None:
|
||||
self.host = host
|
||||
|
||||
def __setHost(self, host) -> bool:
|
||||
self.host = host
|
||||
return True
|
||||
|
||||
def _completion(self, prompt: str) -> str:
|
||||
body = {
|
||||
"include_sources": False,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"use_context": False,
|
||||
}
|
||||
r = requests.post(f"{self.host}/v1/completions", json=body)
|
||||
r = r.json()
|
||||
return r["choices"][0]["message"]["content"].strip()
|
||||
|
||||
def refineDesc(self, description: str) -> str:
|
||||
prompt = f"""The following string surrounded with '///' was generated by an Image Captioning AI when ran on some arbitrary image.
|
||||
///{description}///
|
||||
|
||||
Your goal is to refine the string to be inserted as alt-text for an image in an Ebook.
|
||||
|
||||
Here are guidelines to follow...
|
||||
1. Prioritize information in text alternative:
|
||||
Aim to put the most important information at the beginning.
|
||||
2. Length of the text alternative:
|
||||
The alt text should be the most concise description possible of the image's purpose. If anything more than a short phrase or sentence is needed, it would be better to use one of the long description methods discussed in complex images.
|
||||
3. Superfluous information in the text alternative:
|
||||
Usually, there's no need to include words like “image”, “icon”, or “picture” in the alt text. People who can see will know this already, and screen readers announce the presence of an image. In some situations, it may be important to distinguish between paintings, photographs, or illustrations, etc., but it's best to avoid the more generic use of the terms.
|
||||
|
||||
Format your response as...
|
||||
The refined string is: <refined_string>
|
||||
|
||||
If the string is empty, simply respond with...
|
||||
The refined string is: N/A"""
|
||||
return self._completion(prompt)
|
||||
|
||||
def refineOCR(self, chars: str) -> str:
|
||||
prompt = f"""The following string surrounded with '///' was generated by an Optical Character Recognition software when ran on some arbitrary image.
|
||||
///
|
||||
{chars}
|
||||
///
|
||||
|
||||
Your goal is to refine the string.
|
||||
There may be random/excess spaces or other characters in the string, please remove them.
|
||||
Do not surround the refined string in quotation marks.
|
||||
|
||||
Format your response as...
|
||||
The refined string is: <refined_string>
|
||||
|
||||
If the string is empty, simply respond with...
|
||||
The refined string is: N/A"""
|
||||
return self._completion(prompt)
|
||||
|
||||
def genPrompt(self, desc: str, chars: str, context: list[str], caption: str) -> str:
|
||||
ocr = ""
|
||||
if chars != None and chars != "":
|
||||
ocr = f"\nThe following string surrounded with '///' was generated by an Optical Character Recognition software when ran on the image.\n///{chars}///"
|
||||
before = ""
|
||||
if context[0] != None and context[0] != "":
|
||||
before = f"\nThe following string surrounded with '///' is the nearest text found before the image.\n///{context[0]}///"
|
||||
after = ""
|
||||
if context[1] != None and context[1] != "":
|
||||
after = f"\nThe following string surrounded with '///' is the nearest text found after the image.\n///{context[1]}///"
|
||||
cap = ""
|
||||
if caption != None and caption != "":
|
||||
cap = f"\nThe following string surrounded with '///' is a caption in the Ebook for the image.\n///{caption}///"
|
||||
|
||||
prompt = f"""There following information is regarding an image found in an Ebook with no alternative-text.
|
||||
The following string surrounded with '///' was generated by an Image Captioning AI when ran on the image.
|
||||
///{desc}///{ocr}{cap}{before}{after}
|
||||
|
||||
Your goal is to create alternative-text for the image given the prior information.
|
||||
|
||||
Here are guidelines to follow to create quality alt-text...
|
||||
1. Prioritize information in text alternative:
|
||||
Aim to put the most important information at the beginning.
|
||||
2. Length of the text alternative:
|
||||
The alt text should be the most concise description possible of the image's purpose. If anything more than a short phrase or sentence is needed, it would be better to use one of the long description methods discussed in complex images.
|
||||
3. Superfluous information in the text alternative:
|
||||
Usually, there's no need to include words like “image”, “icon”, or “picture” in the alt text. People who can see will know this already, and screen readers announce the presence of an image. In some situations, it may be important to distinguish between paintings, photographs, or illustrations, etc., but it's best to avoid the more generic use of the terms.
|
||||
|
||||
Using all of the information stated, please generate alt-text for the image.
|
||||
In your response, please only give the alt-text."""
|
||||
return prompt
|
||||
|
||||
def refineAlt(
|
||||
self,
|
||||
desc: str,
|
||||
chars: str = None,
|
||||
context: list[str] = None,
|
||||
caption: str = None,
|
||||
) -> str:
|
||||
prompt = self.genPrompt(
|
||||
desc,
|
||||
chars,
|
||||
context,
|
||||
caption,
|
||||
)
|
||||
return self._completion(prompt)
|
||||
|
||||
def ingest(self, filename: str, binary) -> bool:
|
||||
ext = filename.split(".")[1]
|
||||
files = {"file": (filename, binary, f"application/{ext}")}
|
||||
headers = {"accept": "application/json"}
|
||||
r = requests.post(f"{self.host}/v1/ingest", files=files, headers=headers)
|
||||
return True
|
||||
|
||||
def degest(self, filename: str) -> bool:
|
||||
headers = {"accept": "application/json"}
|
||||
r = requests.delete(f"{self.host}/v1/ingest/{filename}", headers=headers)
|
||||
return True
|
|
@ -0,0 +1,20 @@
|
|||
from PIL import Image
|
||||
from io import BytesIO
|
||||
import pytesseract
|
||||
|
||||
from .ocrengine import OCREngine
|
||||
|
||||
class Tesseract(OCREngine):
|
||||
def __init__(self, path:str = None) -> None:
|
||||
if (path != None):
|
||||
self._setTesseract(path)
|
||||
return None
|
||||
|
||||
def _setTesseract(self, path: str) -> bool:
|
||||
self.customPath = path
|
||||
pytesseract.pytesseract.tesseract_cmd = path
|
||||
return True
|
||||
|
||||
def genChars(self, imgData: bytes, src: str, context: str = None) -> str:
|
||||
image = Image.open(BytesIO(imgData))
|
||||
return pytesseract.image_to_string(image)
|
|
@ -0,0 +1,687 @@
|
|||
from abc import ABC, abstractmethod
|
||||
import typing
|
||||
from threading import Thread
|
||||
|
||||
import bs4
|
||||
import ebooklib
|
||||
from ebooklib import epub
|
||||
|
||||
from .descengine import DescEngine
|
||||
from .ocrengine import OCREngine
|
||||
from .langengine import LangEngine
|
||||
|
||||
|
||||
DEFOPTIONS = {
|
||||
"withContext": True,
|
||||
"withHash": True,
|
||||
"multiThreaded": True,
|
||||
"version": 2,
|
||||
}
|
||||
|
||||
|
||||
### ALTTEXT CLASSES
|
||||
class AltText(ABC):
|
||||
@abstractmethod
|
||||
def setDescEngine(self, descEngine: DescEngine) -> bool:
|
||||
"""Sets current description engine.
|
||||
|
||||
Args:
|
||||
descEngine (DescEngine): A description engine.
|
||||
|
||||
Returns:
|
||||
bool: True if successful.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def setOCREngine(self, ocrEngine: OCREngine) -> bool:
|
||||
"""Sets current OCR engine.
|
||||
|
||||
Args:
|
||||
ocrEngine (OCREngine): An OCR engine.
|
||||
|
||||
Returns:
|
||||
bool: True if successful.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def setLangEngine(self, langEngine: LangEngine) -> bool:
|
||||
"""Sets current language engine.
|
||||
|
||||
Args:
|
||||
langEngine (LangEngine): A language engine.
|
||||
|
||||
Returns:
|
||||
bool: True if successful.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def setOptions(self, options: dict) -> bool:
|
||||
"""Sets current options.
|
||||
|
||||
Args:
|
||||
options (dict): A subset of DEFOPTIONS. See DEFOPTIONS constant for possible fields.
|
||||
|
||||
Returns:
|
||||
bool: True if successful.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def checkData(self) -> bool:
|
||||
"""Checks if current data exists.
|
||||
|
||||
Returns:
|
||||
bool: True if data exists.
|
||||
|
||||
Raises:
|
||||
Exception: If no data exists.
|
||||
"""
|
||||
pass
|
||||
|
||||
# PARSING METHODS
|
||||
@abstractmethod
|
||||
def parse(self, data: str) -> typing.Union[bs4.BeautifulSoup, epub.EpubBook]:
|
||||
"""Parses data into a BeautifulSoup or EpubBook object.
|
||||
|
||||
Args:
|
||||
data (str): HTML or EPUB data.
|
||||
|
||||
Returns:
|
||||
bs4.BeautifulSoup | epub.EpubBook: The BeautifulSoup or EpubBook object stored in self.data.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def parseFile(self, filepath: str) -> typing.Union[bs4.BeautifulSoup, epub.EpubBook]:
|
||||
"""Parses data from a file into a BeautifulSoup or EpubBook object.
|
||||
|
||||
Args:
|
||||
filepath (str): Path to HTML or EPUB file.
|
||||
|
||||
Returns:
|
||||
bs4.BeautifulSoup | epub.EpubBook: The BeautifulSoup or EpubBook object stored in self.data.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def getAllImgs(self) -> typing.List[bs4.element.Tag]:
|
||||
"""Gets all img tags.
|
||||
|
||||
Returns:
|
||||
typing.List[bs4.element.Tag]: A list of img tags.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def getNoAltImgs(self) -> typing.List[bs4.element.Tag]:
|
||||
"""Gets all img tags that either do not have an alt attribute or alt.strip() is an empty string.
|
||||
|
||||
Returns:
|
||||
typing.List[bs4.element.Tag]: A list of img tags.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def getImg(self, src: str) -> bs4.element.Tag:
|
||||
"""Gets an img tag given a src.
|
||||
|
||||
Args:
|
||||
src (str): Image source.
|
||||
|
||||
Returns:
|
||||
bs4.element.Tag: An img tag.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def setAlt(self, src: str, text: str) -> bs4.element.Tag:
|
||||
"""Sets the alt of an img tag given a src.
|
||||
|
||||
Args:
|
||||
src (str): Image source.
|
||||
text (str): New alt-text.
|
||||
|
||||
Returns:
|
||||
bs4.element.Tag: Newly modified img tag.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def setAlts(self, associations: list[dict]) -> list[bs4.element.Tag]:
|
||||
"""Sets the alt of multiple img tags given a list of associations.
|
||||
|
||||
Args:
|
||||
associations (list[dict]): A list of associations. Must have keys "src" and "alt".
|
||||
|
||||
Returns:
|
||||
list[bs4.element.Tag]: A list of newly modified img tags.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def export(self) -> typing.Union[str, epub.EpubBook]:
|
||||
"""Exports the current data.
|
||||
|
||||
Returns:
|
||||
str | epub.EpubBook: A string of HTML or an epub.EpubBook object.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def exportToFile(self, path: str) -> str:
|
||||
"""Exports the current data to a file.
|
||||
|
||||
Args:
|
||||
path (str): A path to the file to be written.
|
||||
|
||||
Returns:
|
||||
str: The path to the file written.
|
||||
"""
|
||||
pass
|
||||
|
||||
# GENERATIVE METHODS
|
||||
@abstractmethod
|
||||
def ingest(self) -> bool:
|
||||
"""Uploads the current data and to the language engine for ingestion.
|
||||
This allows the language engine to reference the current data as a document.
|
||||
|
||||
Returns:
|
||||
bool: True if successful.
|
||||
|
||||
Raises:
|
||||
Exception: If no langEngine is set.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def degest(self) -> bool:
|
||||
"""Deletes the current data from the language engine.
|
||||
|
||||
Returns:
|
||||
bool: True if successful.
|
||||
|
||||
Raises:
|
||||
Exception: If no langEngine is set.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def getImgData(self, src: str) -> bytes:
|
||||
"""Gets byte data of an image given a src.
|
||||
|
||||
Args:
|
||||
src (str): Image source.
|
||||
|
||||
Returns:
|
||||
bytes: Image data as bytes.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def getContext(self, tag: bs4.Tag) -> list[str]:
|
||||
"""Gets the context of an img tag.
|
||||
Context being the text immediately before and after the img tag.
|
||||
|
||||
Args:
|
||||
tag (bs4.Tag): The img tag to get context for.
|
||||
|
||||
Returns:
|
||||
list[str]: A list of length 2. The first element is the text immediately before the img tag. The second element is the text immediately after the img tag.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def genChars(self, imgData: bytes, src: str) -> str:
|
||||
"""Searches for characters in an image.
|
||||
|
||||
Args:
|
||||
imgData (bytes): Image data as bytes.
|
||||
src (str): Source of the image.
|
||||
|
||||
Returns:
|
||||
str: String of characters found in the image.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def genDesc(self, imgData: bytes, src: str, context: str = None) -> str:
|
||||
"""Generates a description of an image.
|
||||
|
||||
Args:
|
||||
imgData (bytes): Image data as bytes.
|
||||
src (str): Source of the image.
|
||||
context (str, optional): Context for an image. See getContext for more information. Defaults to None.
|
||||
|
||||
Returns:
|
||||
str: Description of the image.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def genAltTextV1(self, src: str) -> str:
|
||||
"""Generates alt-text for an image given its source.
|
||||
Uses V1 Dataflow model. This means the description and characters are generated and optionally refined separately.
|
||||
|
||||
Args:
|
||||
src (str): Source of the image.
|
||||
|
||||
Returns:
|
||||
str: Generated alt-text for the image.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def genAltTextV2(self, src: str) -> str:
|
||||
"""Generates alt-text for an image given its source.
|
||||
Uses V2 Dataflow model. This means the description and characters are generated and then alt-text is generated using both pieces of information.
|
||||
|
||||
Args:
|
||||
src (str): Source of the image.
|
||||
|
||||
Returns:
|
||||
str: Generated alt-text for the image.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def genAltText(self, src: str) -> str:
|
||||
"""Generates alt-text for an image given its source and current options.
|
||||
|
||||
Args:
|
||||
src (str): Source of the image.
|
||||
|
||||
Returns:
|
||||
str: Generated alt-text for the image.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def genAssociation(
|
||||
self,
|
||||
tag: bs4.element.Tag,
|
||||
) -> dict:
|
||||
"""Generates alt-text and returns an association given an img tag and current options.
|
||||
|
||||
Args:
|
||||
tag (bs4.element.Tag): Image tag to make an association for.
|
||||
|
||||
Returns:
|
||||
dict: The association. Must have keys "src" and "alt". If "withHash" is True, must also have key "hash".
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def _genAltAssociationsST(
|
||||
self,
|
||||
tags: list[bs4.element.Tag],
|
||||
) -> list[dict]:
|
||||
"""Generates alt-text and creates associations given a list of img tags and current options.
|
||||
Single threaded implementation.
|
||||
|
||||
Args:
|
||||
tags (list[bs4.element.Tag]): List of img tags to make associations for.
|
||||
|
||||
Returns:
|
||||
list[dict]: List of associations. Must have keys "src" and "alt". If "withHash" is True, must also have key "hash".
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def _genAltAssociationsMT(
|
||||
self,
|
||||
tags: list[bs4.element.Tag],
|
||||
) -> list[dict]:
|
||||
"""Generates alt-text and creates associations given a list of img tags and current options.
|
||||
Multi threaded implementation.
|
||||
|
||||
Args:
|
||||
tags (list[bs4.element.Tag]): List of img tags to make associations for.
|
||||
|
||||
Returns:
|
||||
list[dict]: List of associations. Must have keys "src" and "alt". If "withHash" is True, must also have key "hash".
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def genAltAssociations(
|
||||
self,
|
||||
tags: list[bs4.element.Tag],
|
||||
) -> list[dict]:
|
||||
"""Generates alt-text and creates associations given a list of img tags and current options.
|
||||
Automatically selects mutli or single threaded implementation based on current options.
|
||||
|
||||
Args:
|
||||
tags (list[bs4.element.Tag]): List of img tags to make associations for.
|
||||
|
||||
Returns:
|
||||
list[dict]: List of associations. Must have keys "src" and "alt". If "withHash" is True, must also have key "hash".
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
### HELPER METHODS
|
||||
def getSoup(content: str) -> bs4.BeautifulSoup:
|
||||
try:
|
||||
return bs4.BeautifulSoup(content, "html.parser")
|
||||
except Exception as htmlErr:
|
||||
try:
|
||||
return bs4.BeautifulSoup(content, features="xml")
|
||||
except Exception as xmlErr:
|
||||
raise Exception(
|
||||
f"Failed to parse the document as HTML: {htmlErr}\nFailed to parse the document as XML: {xmlErr}"
|
||||
)
|
||||
|
||||
|
||||
### IMPLEMENTATIONS
|
||||
class AltTextHTML(AltText):
|
||||
def __init__(
|
||||
self,
|
||||
descEngine: DescEngine,
|
||||
ocrEngine: OCREngine = None,
|
||||
langEngine: LangEngine = None,
|
||||
options: dict = {},
|
||||
) -> None:
|
||||
self.data = None
|
||||
self.filename = None
|
||||
self.filedir = None
|
||||
|
||||
self.descEngine = descEngine
|
||||
self.ocrEngine = ocrEngine
|
||||
self.langEngine = langEngine
|
||||
|
||||
self.options = DEFOPTIONS
|
||||
for key in dict.keys(options):
|
||||
self.options[key] = options[key]
|
||||
|
||||
return None
|
||||
|
||||
def setDescEngine(self, descEngine: DescEngine) -> bool:
|
||||
self.descEngine = descEngine
|
||||
return True
|
||||
|
||||
def setOCREngine(self, ocrEngine: OCREngine) -> bool:
|
||||
self.descEngine = ocrEngine
|
||||
return True
|
||||
|
||||
def setLangEngine(self, langEngine: LangEngine) -> bool:
|
||||
self.descEngine = langEngine
|
||||
return True
|
||||
|
||||
def setOptions(self, options: dict) -> bool:
|
||||
for key in dict.keys(options):
|
||||
self.options[key] = options[key]
|
||||
return True
|
||||
|
||||
def checkData(self) -> bool:
|
||||
if not hasattr(self, "data") or self.data == None:
|
||||
raise Exception("no data set. please use .parse or .parseFile")
|
||||
return True
|
||||
|
||||
# PARSING METHODS
|
||||
def parse(self, html: str) -> bs4.BeautifulSoup:
|
||||
soup = getSoup(html)
|
||||
self.data = soup
|
||||
return soup
|
||||
|
||||
def parseFile(self, filepath: str) -> bs4.BeautifulSoup:
|
||||
with open(filepath, encoding="utf8") as html:
|
||||
self.filepath = filepath
|
||||
l = filepath.split("/")
|
||||
self.filename = l.pop()
|
||||
self.filedir = "/".join(l) + "/"
|
||||
return self.parse(html)
|
||||
|
||||
def getAllImgs(self) -> typing.List[bs4.element.Tag]:
|
||||
self.checkData()
|
||||
imgs = self.data.find_all("img")
|
||||
return imgs
|
||||
|
||||
def getNoAltImgs(self) -> typing.List[bs4.element.Tag]:
|
||||
imgs = self.getAllImgs()
|
||||
noalt = []
|
||||
for img in imgs:
|
||||
if not "alt" in img.attrs.keys() or img.attrs["alt"].strip() == "":
|
||||
noalt.append(img)
|
||||
return noalt
|
||||
|
||||
def getImg(self, src: str) -> bs4.element.Tag:
|
||||
self.checkData()
|
||||
img = self.data.find("img", src=src)
|
||||
return img
|
||||
|
||||
def setAlt(self, src: str, text: str) -> bs4.element.Tag:
|
||||
self.checkData()
|
||||
img = self.data.find("img", src=src)
|
||||
img.attrs["alt"] = text
|
||||
return img
|
||||
|
||||
def setAlts(self, associations: list[dict]) -> list[bs4.element.Tag]:
|
||||
self.checkData()
|
||||
tags = []
|
||||
for association in associations:
|
||||
tags.append(self.setAlt(association["src"], association["alt"]))
|
||||
return tags
|
||||
|
||||
def export(self) -> str:
|
||||
self.checkData()
|
||||
html = self.data.prettify()
|
||||
return html
|
||||
|
||||
def exportToFile(self, path: str) -> str:
|
||||
html = self.export()
|
||||
with open(path, "w", encoding="utf-8") as file:
|
||||
file.write(html)
|
||||
return path
|
||||
|
||||
# GENERATIVE METHODS
|
||||
def ingest(self) -> bool:
|
||||
if self.langEngine == None:
|
||||
raise Exception(
|
||||
"To use ingest, you must have an appropriate langEngine set."
|
||||
)
|
||||
with open(self.filepath, "rb") as html:
|
||||
self.langEngine.ingest(self.filename, html)
|
||||
return True
|
||||
|
||||
def degest(self) -> bool:
|
||||
if self.langEngine == None:
|
||||
raise Exception(
|
||||
"To use degest, you must have an appropriate langEngine set."
|
||||
)
|
||||
self.langEngine.degest(self.filename)
|
||||
return True
|
||||
|
||||
def __getImgFilePath(self, src: str) -> str:
|
||||
self.checkData()
|
||||
path = f"{self.filedir}{src}"
|
||||
return path
|
||||
|
||||
def getImgData(self, src: str) -> bytes:
|
||||
path = self.__getImgFilePath(src)
|
||||
with open(path, "rb") as bin:
|
||||
bin = bin.read()
|
||||
return bin
|
||||
|
||||
def getContext(self, tag: bs4.Tag) -> list[str]:
|
||||
context = [None, None]
|
||||
elem = tag
|
||||
text = ""
|
||||
try:
|
||||
text = elem.text.strip()
|
||||
while text == "":
|
||||
elem = elem.previous_element
|
||||
text = elem.text.strip()
|
||||
context[0] = text
|
||||
except:
|
||||
print("error 0")
|
||||
context[0] = None
|
||||
elem = tag
|
||||
text = ""
|
||||
try:
|
||||
text = elem.text.strip()
|
||||
while text == "":
|
||||
elem = elem.previous_element
|
||||
text = elem.text.strip()
|
||||
context[1] = text
|
||||
except:
|
||||
print("error 1")
|
||||
context[1] = None
|
||||
print(context)
|
||||
return context
|
||||
|
||||
def genChars(self, imgData: bytes, src: str) -> str:
|
||||
text = self.ocrEngine.genChars(imgData, src)
|
||||
return text
|
||||
|
||||
def genDesc(self, imgData: bytes, src: str, context: str = None) -> str:
|
||||
alt = self.descEngine.genDesc(imgData, src, context)
|
||||
return alt
|
||||
|
||||
def genAltTextV1(self, src: str) -> str:
|
||||
imgdata = self.getImgData(src)
|
||||
context = None
|
||||
if self.options["withContext"]:
|
||||
context = self.getContext(self.getImg(src))
|
||||
desc = self.genDesc(imgdata, src, context)
|
||||
if self.langEngine != None:
|
||||
chars = self.langEngine.refineDesc(desc)
|
||||
|
||||
alt = f"IMAGE CAPTION: {desc}"
|
||||
if self.ocrEngine != None:
|
||||
chars = self.genChars(imgdata, src)
|
||||
if self.langEngine != None:
|
||||
chars = self.langEngine.refineOCR(chars)
|
||||
alt = f"{alt}\nTEXT IN IMAGE: {chars}"
|
||||
|
||||
return alt
|
||||
|
||||
def genAltTextV2(self, src: str) -> str:
|
||||
imgdata = self.getImgData(src)
|
||||
context = [None, None]
|
||||
if self.options["withContext"]:
|
||||
context = self.getContext(self.getImg(src))
|
||||
desc = self.genDesc(imgdata, src, context)
|
||||
|
||||
chars = ""
|
||||
if self.ocrEngine != None:
|
||||
chars = self.genChars(imgdata, src).strip()
|
||||
|
||||
if self.langEngine == None:
|
||||
raise Exception("To use version 2, you must have a langEngine set.")
|
||||
|
||||
return self.langEngine.refineAlt(desc, chars, context, None)
|
||||
|
||||
def genAltText(self, src: str) -> str:
|
||||
if self.options["version"] == 1:
|
||||
return self.genAltTextV1(src)
|
||||
return self.genAltTextV2(src)
|
||||
|
||||
def genAssociation(
|
||||
self,
|
||||
tag: bs4.element.Tag,
|
||||
) -> dict:
|
||||
src = tag.attrs["src"]
|
||||
alt = self.genAltText(src)
|
||||
association = {"src": src, "alt": alt}
|
||||
if self.options["withHash"]:
|
||||
data = self.getImgData(src)
|
||||
association["hash"] = hash(data)
|
||||
return association
|
||||
|
||||
def _genAltAssociationsST(self, tags: list[bs4.element.Tag]) -> list[dict]:
|
||||
associations = []
|
||||
for tag in tags:
|
||||
associations.append(self.genAssociation(tag))
|
||||
return associations
|
||||
|
||||
def _genAltAssociationsMT(
|
||||
self,
|
||||
tags: list[bs4.element.Tag],
|
||||
) -> list[dict]:
|
||||
associations = []
|
||||
|
||||
def genAppend(tag):
|
||||
associations.append(self.genAssociation(tag))
|
||||
|
||||
threads: list[Thread] = []
|
||||
for tag in tags:
|
||||
thread = Thread(
|
||||
target=genAppend,
|
||||
args=(tag,),
|
||||
)
|
||||
thread.start()
|
||||
threads.append(thread)
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
return associations
|
||||
|
||||
def genAltAssociations(
|
||||
self,
|
||||
tags: list[bs4.element.Tag],
|
||||
) -> list[dict]:
|
||||
if self.options["multiThreaded"]:
|
||||
return self._genAltAssociationsMT(tags)
|
||||
return self._genAltAssociationsST(tags)
|
||||
|
||||
|
||||
class AltTextEPUB(AltText):
|
||||
def __init__(self) -> None:
|
||||
return None
|
||||
|
||||
def checkData(self) -> bool:
|
||||
if not hasattr(self, "data"):
|
||||
raise Exception("no data set. please use .parse or .parseFile")
|
||||
return True
|
||||
|
||||
def parse(self, epub: epub.EpubBook) -> epub.EpubBook:
|
||||
self.data = epub
|
||||
return self.data
|
||||
|
||||
def parseFile(self, filepath: str) -> epub.EpubBook:
|
||||
book = epub.read_epub(filepath, {"ignore_ncx": True})
|
||||
self.data = book
|
||||
return book
|
||||
|
||||
def getAllImgs(self) -> typing.List[bs4.element.Tag]:
|
||||
documents = self.data.get_items_of_type(ebooklib.ITEM_DOCUMENT)
|
||||
imgs = []
|
||||
for docs in documents:
|
||||
# features="xml"
|
||||
soup = getSoup(docs.get_content())
|
||||
imgsInDoc = soup.find_all("img")
|
||||
for img in imgsInDoc:
|
||||
imgs.append(img)
|
||||
return imgs
|
||||
|
||||
def getNoAltImgs(self) -> typing.List[bs4.element.Tag]:
|
||||
imgs = self.getAllImgs()
|
||||
noalt = []
|
||||
for img in imgs:
|
||||
if not "alt" in img.attrs.keys() or img.attrs["alt"].strip() == "":
|
||||
noalt.append(img)
|
||||
return noalt
|
||||
|
||||
def setAlt(self, src: str, text: str):
|
||||
self.checkData()
|
||||
documents = self.data.get_items_of_type(ebooklib.ITEM_DOCUMENT)
|
||||
for doc in documents:
|
||||
soup = getSoup(doc.get_content())
|
||||
imgsInDoc = soup.find_all("img")
|
||||
for img in imgsInDoc:
|
||||
if img.attrs["src"] == src:
|
||||
img.attrs["alt"] = text
|
||||
newHtml = soup.prettify()
|
||||
doc.set_content(newHtml.encode("utf-8"))
|
||||
return
|
||||
raise Exception("unable to find image with src '{src}'".format(src=src))
|
||||
|
||||
def export(self) -> epub.EpubBook:
|
||||
self.checkData()
|
||||
return self.data
|
||||
|
||||
def exportToFile(self, path: str) -> str:
|
||||
epub.write_epub(path, self.export())
|
||||
return path
|
|
@ -12,6 +12,10 @@ import importlib
|
|||
sys.path.append("c:/Users/ketha/Code/Senior D") #This will need to be changed system to system
|
||||
AltTextHTML = importlib.import_module("alt-text.src.alttext.alttext").AltTextHTML
|
||||
PrivateGPT = importlib.import_module("alt-text.src.alttext.langengine").PrivateGPT
|
||||
descengine_path = 'c:/Users/ketha/Code/Senior D/alt-text/src/alttext/descengine.py'
|
||||
|
||||
|
||||
|
||||
|
||||
# access downloaded books and go thru all of them
|
||||
# 1. parse html file to find img src to get the before and after context (using get context funct)
|
||||
|
@ -24,10 +28,10 @@ class AltTextGenerator(AltTextHTML):
|
|||
# uses the class from alttext.py
|
||||
# adds relevant benchmarking and saving methods
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
def __init__(self, api_key, descengine):
|
||||
super().__init__(descengine)
|
||||
self.benchmark_records = []
|
||||
|
||||
self.api_key = api_key
|
||||
#Use genAltTextV2
|
||||
#ADD benchmark time stamps
|
||||
def genAltTextV2(self, src: str) -> str:
|
||||
|
@ -109,26 +113,35 @@ class AltTextGenerator(AltTextHTML):
|
|||
writer.writerow(record)
|
||||
print(f"CSV file has been generated at: {csv_file_path}")
|
||||
|
||||
def import_descengine():
|
||||
#Key Stuff
|
||||
spec = importlib.util.spec_from_file_location("descengine", descengine_path)
|
||||
descengine = importlib.util.module_from_spec(spec)
|
||||
sys.modules["descengine"] = descengine
|
||||
spec.loader.exec_module(descengine)
|
||||
return descengine
|
||||
|
||||
def automate_process(extr_folder : str):
|
||||
# Iterate through all images in a folder to produce a table (csv) with benchmarking
|
||||
descengine = import_descengine()
|
||||
minigpt4_key = descengine.REPLICATE_MODELS['minigpt4']
|
||||
|
||||
generator = AltTextGenerator()
|
||||
generator = AltTextGenerator(minigpt4_key, descengine)
|
||||
|
||||
# Iterate thru each book in folder (ex. downloaded_books)
|
||||
for book_id in os.listdir(extr_folder):
|
||||
book_path = os.path.join(extr_folder, book_id)
|
||||
if os.path.isdir(book_path):
|
||||
if os.path.exists(extr_folder):
|
||||
for book_id in os.listdir(extr_folder):
|
||||
book_path = os.path.join(extr_folder, book_id)
|
||||
if os.path.isdir(book_path):
|
||||
for filename in os.listdir(book_path):
|
||||
filepath = os.path.join(book_path, filename)
|
||||
|
||||
# Iterate thru files in the book's directory
|
||||
for filename in os.listdir(book_path):
|
||||
filepath = os.path.join(book_path, filename)
|
||||
# Check if the file is an HTML file
|
||||
if filepath.endswith(".html"):
|
||||
|
||||
# Check if the file is an HTML file
|
||||
if filepath.endswith(".html"):
|
||||
|
||||
# Use the parseFile method to parse the HTML file for the genAltText function
|
||||
soup = generator.parseFile(filepath)
|
||||
generator.genAltText(soup)
|
||||
# Use the parseFile method to parse the HTML file for the genAltText function
|
||||
soup = generator.parseFile(filepath)
|
||||
generator.genAltText(soup)
|
||||
|
||||
generator.generate_csv('test_benchmark.csv', generator.benchmark_records)
|
||||
|
||||
|
|
Loading…
Reference in New Issue