diff --git a/src/alttext/langengine/openaiapi.py b/src/alttext/langengine/openaiapi.py new file mode 100644 index 0000000..35828a8 --- /dev/null +++ b/src/alttext/langengine/openaiapi.py @@ -0,0 +1,121 @@ +import openai +import os + + +class OpenAIAPI: + def __init__(self, key: str, model: str) -> None: + self.__setKey(key) + self.__setModel(model) + self.client = openai.OpenAI() + return + + def __setKey(self, key: str) -> bool: + self.key = key + os.environ["OPENAI_API_KEY"] = key + return True + + def __setModel(self, model: str) -> bool: + self.model = model + return True + + def _completion(self, prompt: str) -> str: + completion = self.client.chat.completions.create( + model=self.model, + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": prompt}, + ], + ) + + return completion.choices[0].message.content + + def refineDesc(self, description: str) -> str: + prompt = f"""The following string surrounded with '///' was generated by an Image Captioning AI when ran on some arbitrary image. +///{description}/// + +Your goal is to refine the string to be inserted as alt-text for an image in an Ebook. + +Here are guidelines to follow... +1. Prioritize information in text alternative: +Aim to put the most important information at the beginning. +2. Length of the text alternative: +The alt text should be the most concise description possible of the image's purpose. If anything more than a short phrase or sentence is needed, it would be better to use one of the long description methods discussed in complex images. +3. Superfluous information in the text alternative: +Usually, there's no need to include words like “image”, “icon”, or “picture” in the alt text. People who can see will know this already, and screen readers announce the presence of an image. In some situations, it may be important to distinguish between paintings, photographs, or illustrations, etc., but it's best to avoid the more generic use of the terms. + +Format your response as... +The refined string is: + +If the string is empty, simply respond with... +The refined string is: N/A""" + return self._completion(prompt) + + def refineOCR(self, chars: str) -> str: + prompt = f"""The following string surrounded with '///' was generated by an Optical Character Recognition software when ran on some arbitrary image. +/// +{chars} +/// + +Your goal is to refine the string. +There may be random/excess spaces or other characters in the string, please remove them. +Do not surround the refined string in quotation marks. + +Format your response as... +The refined string is: + +If the string is empty, simply respond with... +The refined string is: N/A""" + return self._completion(prompt) + + def genPrompt(self, desc: str, chars: str, context: list[str], caption: str) -> str: + ocr = "" + if chars != None and chars != "": + ocr = f"\nThe following string surrounded with '///' was generated by an Optical Character Recognition software when ran on the image.\n///{chars}///" + before = "" + if context[0] != None and context[0] != "": + before = f"\nThe following string surrounded with '///' is the nearest text found before the image.\n///{context[0]}///" + after = "" + if context[1] != None and context[1] != "": + after = f"\nThe following string surrounded with '///' is the nearest text found after the image.\n///{context[1]}///" + cap = "" + if caption != None and caption != "": + cap = f"\nThe following string surrounded with '///' is a caption in the Ebook for the image.\n///{caption}///" + + prompt = f"""There following information is regarding an image found in an Ebook with no alternative-text. +The following string surrounded with '///' was generated by an Image Captioning AI when ran on the image. +///{desc}///{ocr}{cap}{before}{after} + +Your goal is to create alternative-text for the image given the prior information. + +Here are guidelines to follow to create quality alt-text... +1. Prioritize information in text alternative: +Aim to put the most important information at the beginning. +2. Length of the text alternative: +The alt text should be the most concise description possible of the image's purpose. If anything more than a short phrase or sentence is needed, it would be better to use one of the long description methods discussed in complex images. +3. Superfluous information in the text alternative: +Usually, there's no need to include words like “image”, “icon”, or “picture” in the alt text. People who can see will know this already, and screen readers announce the presence of an image. In some situations, it may be important to distinguish between paintings, photographs, or illustrations, etc., but it's best to avoid the more generic use of the terms. + +Using all of the information stated, please generate alt-text for the image. +In your response, please only give the alt-text.""" + return prompt + + def refineAlt( + self, + desc: str, + chars: str = None, + context: list[str] = None, + caption: str = None, + ) -> str: + prompt = self.genPrompt( + desc, + chars, + context, + caption, + ) + return self._completion(prompt) + + def ingest(self, filename: str, binary) -> bool: + raise NotImplementedError + + def degest(self, filename: str) -> bool: + raise NotImplementedError diff --git a/tests/test.py b/tests/test.py index 0e89ed3..fbac329 100644 --- a/tests/test.py +++ b/tests/test.py @@ -3,8 +3,10 @@ import sys sys.path.append("../") import src.alttext.alttext as alttext from src.alttext.descengine.bliplocal import BlipLocal +from src.alttext.descengine.replicateapi import ReplicateAPI from src.alttext.ocrengine.tesseract import Tesseract from src.alttext.langengine.privategpt import PrivateGPT +from src.alttext.langengine.openaiapi import OpenAIAPI import keys # HTML BOOK FILEPATHS @@ -24,14 +26,16 @@ HOST1 = "http://127.0.0.1:8001" def testHTML(): print("TESTING HTML") alt: alttext.AltTextHTML = alttext.AltTextHTML( - BlipLocal("C:/Users/dacru/Desktop/ALT/image-captioning"), + # BlipLocal("C:/Users/dacru/Desktop/ALT/image-captioning"), + ReplicateAPI(keys.ReplicateEricKey()), Tesseract(), - PrivateGPT(HOST1), + # PrivateGPT(HOST1), + OpenAIAPI(keys.OpenAIKey(), "gpt-3.5-turbo"), ) alt.parseFile(HTML_HUNTING) imgs = alt.getAllImgs() - src = imgs[4].attrs["src"] + src = imgs[7].attrs["src"] print(src) print(alt.genAltText(src))