Reformated engines into folders w/ their own files. Fixed small bug regarding finding context.

pull/12/head
xxmistacruzxx 2024-02-14 19:39:56 -05:00
parent 3ab8838903
commit 35b978b492
12 changed files with 282 additions and 299 deletions

View File

@ -6,9 +6,9 @@ import bs4
import ebooklib
from ebooklib import epub
from .descengine import DescEngine
from .ocrengine import OCREngine
from .langengine import LangEngine
from .descengine.descengine import DescEngine
from .ocrengine.ocrengine import OCREngine
from .langengine.langengine import LangEngine
DEFOPTIONS = {
@ -523,7 +523,7 @@ class AltTextHTML(AltText):
try:
text = elem.text.strip()
while text == "":
elem = elem.previous_element
elem = elem.next_element
text = elem.text.strip()
context[1] = text
except:
@ -564,7 +564,6 @@ class AltTextHTML(AltText):
if self.options["withContext"]:
context = self.getContext(self.getImg(src))
desc = self.genDesc(imgdata, src, context)
chars = ""
if self.ocrEngine != None:
chars = self.genChars(imgdata, src).strip()

View File

@ -1,133 +0,0 @@
from abc import ABC, abstractmethod
import base64
import os
import shutil
import subprocess
import uuid
import replicate
import vertexai
from vertexai.vision_models import ImageTextModel, Image
### DESCENGINE CLASSES
class DescEngine(ABC):
@abstractmethod
def genDesc(self, imgData: bytes, src: str, context: str = None) -> str:
"""Generates description for an image.
Args:
imgData (bytes): Image data in bytes.
src (str): Source of image.
context (str, optional): Context of image. See getContext in alttext for more information. Defaults to None.
Returns:
str: _description_
"""
pass
### IMPLEMENTATIONS
REPLICATE_MODELS = {
"blip": "salesforce/blip:2e1dddc8621f72155f24cf2e0adbde548458d3cab9f00c0139eea840d0ac4746",
"clip_prefix_caption": "rmokady/clip_prefix_caption:9a34a6339872a03f45236f114321fb51fc7aa8269d38ae0ce5334969981e4cd8",
"clip-caption-reward": "j-min/clip-caption-reward:de37751f75135f7ebbe62548e27d6740d5155dfefdf6447db35c9865253d7e06",
"img2prompt": "methexis-inc/img2prompt:50adaf2d3ad20a6f911a8a9e3ccf777b263b8596fbd2c8fc26e8888f8a0edbb5",
"minigpt4": "daanelson/minigpt-4:b96a2f33cc8e4b0aa23eacfce731b9c41a7d9466d9ed4e167375587b54db9423",
"image-captioning-with-visual-attention": "nohamoamary/image-captioning-with-visual-attention:9bb60a6baa58801aa7cd4c4fafc95fcf1531bf59b84962aff5a718f4d1f58986",
}
class ReplicateAPI(DescEngine):
def __init__(self, key: str, model: str = "blip") -> None:
self.__setKey(key)
self.__setModel(model)
return None
def __getModel(self) -> str:
return self.model
def __setModel(self, modelName: str) -> str:
if modelName not in REPLICATE_MODELS:
raise Exception(
f"{modelName} is not a valid model. Please choose from {list(REPLICATE_MODELS.keys())}"
)
self.model = REPLICATE_MODELS[modelName]
return self.model
def __getKey(self) -> str:
return self.key
def __setKey(self, key: str) -> str:
self.key = key
os.environ["REPLICATE_API_TOKEN"] = key
return self.key
def genDesc(self, imgData: bytes, src: str, context: str = None) -> str:
base64_utf8_str = base64.b64encode(imgData).decode("utf-8")
model = self.__getModel()
ext = src.split(".")[-1]
prompt = "Create alternative-text for this image."
if context != None:
prompt = f"Create alternative-text for this image given the following context...\n{context}"
dataurl = f"data:image/{ext};base64,{base64_utf8_str}"
output = replicate.run(model, input={"image": dataurl, "prompt": prompt})
return output
class BlipLocal(DescEngine):
def __init__(self, path: str) -> None:
self.__setPath(path)
return None
def __setPath(self, path: str) -> str:
self.path = path
return self.path
def genDesc(self, imgData: bytes, src: str, context: str = None) -> str:
folderName = uuid.uuid4()
ext = src.split(".")[-1]
os.makedirs(f"{self.path}/{folderName}")
open(f"{self.path}/{folderName}/image.{ext}", "wb+").write(imgData)
subprocess.call(
f"python {self.path}/inference.py -i ./{folderName} --batch 1 --gpu 0",
cwd=f"{self.path}",
)
desc = open(f"{self.path}/{folderName}/0_captions.txt", "r").read()
shutil.rmtree(f"{self.path}/{folderName}")
desc = desc.split(",")
return desc[1]
class GoogleVertexAPI(DescEngine):
def __init__(self, project_id: str, location: str, gac_path: str) -> None:
self.project_id = project_id
self.location = location
vertexai.init(project=self.project_id, location=self.location)
self.gac_path = gac_path
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.gac_path
return None
def __setProject(self, project_id: str):
self.project_id = project_id
vertexai.init(project=self.project_id, location=self.location)
def __setLocation(self, location: str):
self.location = location
vertexai.init(project=self.project_id, location=self.location)
def __setGAC(self, gac_path: str):
self.gac_path = gac_path
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.gac_path
def genDesc(self, imgData: bytes, src: str, context: str = None) -> str:
model = ImageTextModel.from_pretrained("imagetext@001")
source_image = Image(imgData)
captions = model.get_captions(
image=source_image,
number_of_results=1,
language="en",
)
return captions[0]

View File

@ -0,0 +1,29 @@
import os
import shutil
import subprocess
import uuid
from .descengine import DescEngine
class BlipLocal(DescEngine):
def __init__(self, path: str) -> None:
self.__setPath(path)
return None
def __setPath(self, path: str) -> str:
self.path = path
return self.path
def genDesc(self, imgData: bytes, src: str, context: str = None) -> str:
folderName = uuid.uuid4()
ext = src.split(".")[-1]
os.makedirs(f"{self.path}/{folderName}")
open(f"{self.path}/{folderName}/image.{ext}", "wb+").write(imgData)
subprocess.call(
f"py inference.py -i ./{folderName} --batch 1 --gpu 0",
cwd=f"{self.path}",
)
desc = open(f"{self.path}/{folderName}/0_captions.txt", "r").read()
shutil.rmtree(f"{self.path}/{folderName}")
desc = desc.split(",")
return desc[1]

View File

@ -0,0 +1,17 @@
from abc import ABC, abstractmethod
### DESCENGINE CLASSES
class DescEngine(ABC):
@abstractmethod
def genDesc(self, imgData: bytes, src: str, context: str = None) -> str:
"""Generates description for an image.
Args:
imgData (bytes): Image data in bytes.
src (str): Source of image.
context (str, optional): Context of image. See getContext in alttext for more information. Defaults to None.
Returns:
str: _description_
"""
pass

View File

@ -0,0 +1,37 @@
import os
import vertexai
from vertexai.vision_models import ImageTextModel, Image
from .descengine import DescEngine
class GoogleVertexAPI(DescEngine):
def __init__(self, project_id: str, location: str, gac_path: str) -> None:
self.project_id = project_id
self.location = location
vertexai.init(project=self.project_id, location=self.location)
self.gac_path = gac_path
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.gac_path
return None
def __setProject(self, project_id: str):
self.project_id = project_id
vertexai.init(project=self.project_id, location=self.location)
def __setLocation(self, location: str):
self.location = location
vertexai.init(project=self.project_id, location=self.location)
def __setGAC(self, gac_path: str):
self.gac_path = gac_path
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.gac_path
def genDesc(self, imgData: bytes, src: str, context: str = None) -> str:
model = ImageTextModel.from_pretrained("imagetext@001")
source_image = Image(imgData)
captions = model.get_captions(
image=source_image,
number_of_results=1,
language="en",
)
return captions[0]

View File

@ -0,0 +1,51 @@
import replicate
import base64
import os
from .descengine import DescEngine
REPLICATE_MODELS = {
"blip": "salesforce/blip:2e1dddc8621f72155f24cf2e0adbde548458d3cab9f00c0139eea840d0ac4746",
"clip_prefix_caption": "rmokady/clip_prefix_caption:9a34a6339872a03f45236f114321fb51fc7aa8269d38ae0ce5334969981e4cd8",
"clip-caption-reward": "j-min/clip-caption-reward:de37751f75135f7ebbe62548e27d6740d5155dfefdf6447db35c9865253d7e06",
"img2prompt": "methexis-inc/img2prompt:50adaf2d3ad20a6f911a8a9e3ccf777b263b8596fbd2c8fc26e8888f8a0edbb5",
"minigpt4": "daanelson/minigpt-4:b96a2f33cc8e4b0aa23eacfce731b9c41a7d9466d9ed4e167375587b54db9423",
"image-captioning-with-visual-attention": "nohamoamary/image-captioning-with-visual-attention:9bb60a6baa58801aa7cd4c4fafc95fcf1531bf59b84962aff5a718f4d1f58986",
}
class ReplicateAPI(DescEngine):
def __init__(self, key: str, model: str = "blip") -> None:
self.__setKey(key)
self.__setModel(model)
return None
def __getModel(self) -> str:
return self.model
def __setModel(self, modelName: str) -> str:
if modelName not in REPLICATE_MODELS:
raise Exception(
f"{modelName} is not a valid model. Please choose from {list(REPLICATE_MODELS.keys())}"
)
self.model = REPLICATE_MODELS[modelName]
return self.model
def __getKey(self) -> str:
return self.key
def __setKey(self, key: str) -> str:
self.key = key
os.environ["REPLICATE_API_TOKEN"] = key
return self.key
def genDesc(self, imgData: bytes, src: str, context: str = None) -> str:
base64_utf8_str = base64.b64encode(imgData).decode("utf-8")
model = self.__getModel()
ext = src.split(".")[-1]
prompt = "Create alternative-text for this image."
if context != None:
prompt = f"Create alternative-text for this image given the following context...\n{context}"
dataurl = f"data:image/{ext};base64,{base64_utf8_str}"
output = replicate.run(model, input={"image": dataurl, "prompt": prompt})
return output

View File

@ -0,0 +1,102 @@
from abc import ABC, abstractmethod
class LangEngine(ABC):
@abstractmethod
def _completion(self, prompt: str) -> str:
"""Sends message to language model and returns its response.
Args:
prompt (str): Prompt to send to language model.
Returns:
str: Response from language model.
"""
pass
@abstractmethod
def refineDesc(self, description: str) -> str:
"""Refines description of an image.
Used in V1 Dataflow.
Args:
description (str): Description of an image.
Returns:
str: Refinement of description.
"""
pass
@abstractmethod
def refineOCR(self, chars: str) -> str:
"""Refines characters found in an image.
Used in V1 Dataflow.
Args:
chars (str): Characters found in an image.
Returns:
str: Refinement of characters.
"""
pass
@abstractmethod
def genPrompt(self, desc: str, chars: str, context: list[str], caption: str) -> str:
"""Generates prompt to send to language model in V2 Dataflow.
Args:
desc (str): Description of an image.
chars (str): Characters found in an image.
context (list[str]): Context of an image. See getContext in alttext for more information.
caption (str): Caption of an image.
Returns:
str: Prompt to send to language model.
"""
pass
@abstractmethod
def refineAlt(
self,
desc: str,
chars: str = None,
context: list[str] = None,
caption: str = None,
) -> str:
"""Generates alt-text for an image.
Used in V2 Dataflow.
Args:
desc (str): Description of an image.
chars (str, optional): Characters found in an image. Defaults to None.
context (list[str], optional): Context of an image. See getContext in alttext for more information. Defaults to None.
caption (str, optional): Caption of an image. Defaults to None.
Returns:
str: Alt-text for an image.
"""
pass
@abstractmethod
def ingest(self, filename: str, binary) -> bool:
"""Ingests a file into the language model.
Args:
filename (str): Name of file.
binary (_type_): Data of file.
Returns:
bool: True if successful.
"""
pass
@abstractmethod
def degest(self, filename: str) -> bool:
"""Removes a file from the language model.
Args:
filename (str): Name of file.
Returns:
bool: True if successful.
"""
pass

View File

@ -1,111 +1,7 @@
from abc import ABC, abstractmethod
import requests
from .langengine import LangEngine
### LANGENGINE CLASSES
class LangEngine(ABC):
@abstractmethod
def _completion(self, prompt: str) -> str:
"""Sends message to language model and returns its response.
Args:
prompt (str): Prompt to send to language model.
Returns:
str: Response from language model.
"""
pass
@abstractmethod
def refineDesc(self, description: str) -> str:
"""Refines description of an image.
Used in V1 Dataflow.
Args:
description (str): Description of an image.
Returns:
str: Refinement of description.
"""
pass
@abstractmethod
def refineOCR(self, chars: str) -> str:
"""Refines characters found in an image.
Used in V1 Dataflow.
Args:
chars (str): Characters found in an image.
Returns:
str: Refinement of characters.
"""
pass
@abstractmethod
def genPrompt(self, desc: str, chars: str, context: list[str], caption: str) -> str:
"""Generates prompt to send to language model in V2 Dataflow.
Args:
desc (str): Description of an image.
chars (str): Characters found in an image.
context (list[str]): Context of an image. See getContext in alttext for more information.
caption (str): Caption of an image.
Returns:
str: Prompt to send to language model.
"""
pass
@abstractmethod
def refineAlt(
self,
desc: str,
chars: str = None,
context: list[str] = None,
caption: str = None,
) -> str:
"""Generates alt-text for an image.
Used in V2 Dataflow.
Args:
desc (str): Description of an image.
chars (str, optional): Characters found in an image. Defaults to None.
context (list[str], optional): Context of an image. See getContext in alttext for more information. Defaults to None.
caption (str, optional): Caption of an image. Defaults to None.
Returns:
str: Alt-text for an image.
"""
pass
@abstractmethod
def ingest(self, filename: str, binary) -> bool:
"""Ingests a file into the language model.
Args:
filename (str): Name of file.
binary (_type_): Data of file.
Returns:
bool: True if successful.
"""
pass
@abstractmethod
def degest(self, filename: str) -> bool:
"""Removes a file from the language model.
Args:
filename (str): Name of file.
Returns:
bool: True if successful.
"""
pass
### IMPLEMENTATIONS
class PrivateGPT(LangEngine):
def __init__(self, host) -> None:
self.host = host

View File

@ -1,38 +0,0 @@
from abc import ABC, abstractmethod
from PIL import Image
from io import BytesIO
import pytesseract
### OCRENGINE ABSTRACT
class OCREngine(ABC):
@abstractmethod
def genChars(self, imgData: bytes, src: str, context: str = None) -> str:
"""Searches for characters in an image.
Args:
imgData (bytes): Image data in bytes.
src (str): Image source.
context (str, optional): Context of an image. See getContext in alttext for more information. Defaults to None.
Returns:
str: Characters found in an image.
"""
pass
### IMPLEMENTATIONS
class Tesseract(OCREngine):
def __init__(self) -> None:
self.customPath = None
return None
def _setTesseract(self, path: str) -> bool:
self.customPath = path
pytesseract.pytesseract.tesseract_cmd = path
return True
def genChars(self, imgData: bytes, src: str, context: str = None) -> str:
image = Image.open(BytesIO(imgData))
return pytesseract.image_to_string(image)

View File

@ -0,0 +1,16 @@
from abc import ABC, abstractmethod
class OCREngine(ABC):
@abstractmethod
def genChars(self, imgData: bytes, src: str, context: str = None) -> str:
"""Searches for characters in an image.
Args:
imgData (bytes): Image data in bytes.
src (str): Image source.
context (str, optional): Context of an image. See getContext in alttext for more information. Defaults to None.
Returns:
str: Characters found in an image.
"""
pass

View File

@ -0,0 +1,19 @@
from PIL import Image
from io import BytesIO
import pytesseract
from .ocrengine import OCREngine
class Tesseract(OCREngine):
def __init__(self) -> None:
self.customPath = None
return None
def _setTesseract(self, path: str) -> bool:
self.customPath = path
pytesseract.pytesseract.tesseract_cmd = path
return True
def genChars(self, imgData: bytes, src: str, context: str = None) -> str:
image = Image.open(BytesIO(imgData))
return pytesseract.image_to_string(image)

View File

@ -2,9 +2,9 @@ import sys
sys.path.append("../")
import src.alttext.alttext as alttext
import src.alttext.descengine as descengine
import src.alttext.ocrengine as ocrengine
import src.alttext.langengine as langengine
from src.alttext.descengine.bliplocal import BlipLocal
from src.alttext.ocrengine.tesseract import Tesseract
from src.alttext.langengine.privategpt import PrivateGPT
import keys
# HTML BOOK FILEPATHS
@ -23,22 +23,10 @@ HOST1 = "http://127.0.0.1:8001"
def testHTML():
print("TESTING HTML")
# alt: alttext.AltTextHTML = alttext.AltTextHTML(
# # descengine.ReplicateAPI(keys.ReplicateEricKey(), "blip"),
# # ocrengine.Tesseract(),
# # langengine.PrivateGPT(HOST1),
# )
# alt: alttext.AltTextHTML = alttext.AltTextHTML(
# descengine.BlipLocal("C:/Users/dacru/Desktop/Codebase/ALT/image-captioning"),
# options={"version": 1},
# )
alt: alttext.AltTextHTML = alttext.AltTextHTML(
descengine.BlipLocal("C:/Users/dacru/Desktop/Codebase/ALT/image-captioning"),
ocrengine.Tesseract(),
langengine.PrivateGPT(HOST1),
BlipLocal("C:/Users/dacru/Desktop/ALT/image-captioning"),
Tesseract(),
PrivateGPT(HOST1),
)
alt.parseFile(HTML_HUNTING)