diff --git a/.env b/.env deleted file mode 100644 index e69de29..0000000 diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..7f850e4 --- /dev/null +++ b/.env.example @@ -0,0 +1,39 @@ +# DATABASE CONFIG +DATABASE_NAME=postgres +DATABASE_HOST=127.0.0.1 +DATABASE_USER=postgres +DATABASE_PASSWORD=testpassword +DATABASE_PORT=5432 + +# GENERAL CONFIG +ALT_WITH_CONTEXT=1 +ALT_WITH_HASH=1 +ALT_MULTITHREADED=0 +## ALT_VERSION OPTIONS: 1, 2 +ALT_VERSION=2 + +## DESC_ENGINE OPTIONS: replicateapi, bliplocal, googlevertexapi +DESC_ENGINE=replicateapi +## OCR_ENGINE OPTIONS: tesseract +OCR_ENGINE=tesseract +## LANG_ENGINE OPTIONS: privategpt +LANG_ENGINE=privategpt + +# DESC_ENGINE CONFIG OPTIONS +## REPLICATEAPI +REPLICATE_KEY=example_key +## BLIPLOCAL +BLIPLOCAL_DIR=/path/to/image-captioning +## GOOGLEVERTEXAPI +VERTEX_PROJECT_ID=example-123456 +### VERTEX_LOCATION OPTIONS: https://cloud.google.com/vertex-ai/docs/general/locations +VERTEX_LOCATION=us-central1 +VERTEX_GAC_PATH=/path/to/vertex-key.json + +# OCR_ENGINE CONFIG OPTIONS +## TESSERACT +TESSERACT_PATH=/path/to/tesseract.exe + +# LANG_ENGINE CONFIG OPTIONS +## PRIVATEGPT +PRIVATEGPT_HOST=http://localhost:8001 \ No newline at end of file diff --git a/.gitignore b/.gitignore index 2e4a69f..ebd18da 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ -*/__pycache__/ +**/__pycache__/ +.env /books /covers \ No newline at end of file diff --git a/alttextbackend/__pycache__/__init__.cpython-311.pyc b/alttextbackend/__pycache__/__init__.cpython-311.pyc deleted file mode 100644 index 897ce98..0000000 Binary files a/alttextbackend/__pycache__/__init__.cpython-311.pyc and /dev/null differ diff --git a/alttextbackend/__pycache__/settings.cpython-311.pyc b/alttextbackend/__pycache__/settings.cpython-311.pyc deleted file mode 100644 index c11f49a..0000000 Binary files a/alttextbackend/__pycache__/settings.cpython-311.pyc and /dev/null differ diff --git a/alttextbackend/__pycache__/urls.cpython-311.pyc b/alttextbackend/__pycache__/urls.cpython-311.pyc deleted file mode 100644 index 5b4ab8d..0000000 Binary files a/alttextbackend/__pycache__/urls.cpython-311.pyc and /dev/null differ diff --git a/alttextbackend/__pycache__/wsgi.cpython-311.pyc b/alttextbackend/__pycache__/wsgi.cpython-311.pyc deleted file mode 100644 index f57697c..0000000 Binary files a/alttextbackend/__pycache__/wsgi.cpython-311.pyc and /dev/null differ diff --git a/alttextbackend/data/analyze.py b/alttextbackend/data/analyze.py new file mode 100644 index 0000000..21912a1 --- /dev/null +++ b/alttextbackend/data/analyze.py @@ -0,0 +1,132 @@ +import os +import threading + +import bs4 +from alttext import alttext +from alttext.descengine.bliplocal import BlipLocal +from alttext.descengine.replicateapi import ReplicateAPI +from alttext.langengine.privategpt import PrivateGPT +from alttext.ocrengine.tesseract import Tesseract +from django.core.files.storage import default_storage + +from .postgres import books, images + +# from alttext.descengine.googlevertexapi import GoogleVertexAPI + + +def createAnalyzer(): + descEngine = None + match os.environ["DESC_ENGINE"].lower(): + case "replicateapi": + descEngine = ReplicateAPI(os.environ["REPLICATE_KEY"]) + case "bliplocal": + descEngine = BlipLocal(os.environ["BLIPLOCAL_DIR"]) + # case "googlevertexapi": + # descEngine = GoogleVertexAPI(os.environ["VERTEX_PROJECT_ID"], os.environ["VERTEX_LOCATION"], os.environ["VERTEX_GAC_PATH"]) + case _: + raise ValueError("Invalid description engine") + + ocrEngine = None + match os.environ["OCR_ENGINE"].lower(): + case "tesseract": + ocrEngine = Tesseract() + case _: + raise ValueError("Invalid OCR engine") + + langEngine = None + match os.environ["LANG_ENGINE"].lower(): + case "privategpt": + langEngine = PrivateGPT(os.environ["PRIVATEGPT_HOST"]) + case _: + raise ValueError("Invalid language engine") + + options = { + "withContext": bool(int(os.environ["ALT_WITH_CONTEXT"])), + "withHash": bool(int(os.environ["ALT_WITH_HASH"])), + "multiThreaded": bool(int(os.environ["ALT_MULTITHREADED"])), + "version": int(os.environ["ALT_VERSION"]), + } + + return alttext.AltTextHTML(descEngine, ocrEngine, langEngine, options) + + +def findHTML(path: str): + html_file = None + for root, _, files in os.walk(path): + for file_name in files: + if file_name.endswith(".html"): + html_file = default_storage.path(os.path.join(root, file_name)) + break + if html_file: + break + return html_file + + +def getSize(path: str): + size = 0 + for path, _, files in os.walk(path): + for f in files: + fp = os.path.join(path, f) + size += os.path.getsize(fp) + return size + + +def analyzeImageV2(alt: alttext.AltTextHTML, img: bs4.element.Tag, bookid: str): + imgRecord = images.jsonifyImage(images.getImageByBook(bookid, img["src"])) + context = [imgRecord["beforeContext"], imgRecord["afterContext"]] + imgData = alt.getImgData(img["src"]) + desc = alt.genDesc(imgData, img["src"], context) + chars = alt.genChars(imgData, img["src"]).strip() + thisAlt = alt.langEngine.refineAlt(desc, chars, context, None) + + images.updateImage( + bookid, + img["src"], + status="available", + genAlt=thisAlt, + genImageCaption=desc, + ocr=chars, + beforeContext=context[0], + afterContext=context[1], + ) + + return images.jsonifyImage(images.getImageByBook(bookid, img["src"])) + + +def analyzeSingularImageV2(alt: alttext.AltTextHTML, img: bs4.element.Tag, bookid: str): + books.updateBook(bookid, status="processing") + images.updateImage( + bookid, + img["src"], + status="processing", + ) + analyzeImageV2(alt, img, bookid) + books.updateBook(bookid, status="available") + return images.jsonifyImage(images.getImageByBook(bookid, img["src"])) + + +def analyzeImagesV2(alt: alttext.AltTextHTML, imgs: list[bs4.element.Tag], bookid: str): + books.updateBook(bookid, status="processing") + for img in imgs: + images.updateImage( + bookid, + img["src"], + status="processing", + ) + + if bool(int(os.environ["ALT_MULTITHREADED"])): + # TODO: TEST WITH OPENAI API + threads = [] + for img in imgs: + thread = threading.Thread(target=analyzeImageV2, args=(alt, img, bookid)) + thread.start() + threads.append(thread) + + for thread in threads: + thread.join() + else: + for img in imgs: + analyzeImageV2(alt, img, bookid) + + books.updateBook(bookid, status="available") + return books.jsonifyBook(books.getBook(bookid)) diff --git a/alttextbackend/data/books.py b/alttextbackend/data/books.py deleted file mode 100644 index e69de29..0000000 diff --git a/alttextbackend/data/images.py b/alttextbackend/data/images.py deleted file mode 100644 index e69de29..0000000 diff --git a/alttextbackend/data/postgres/books.py b/alttextbackend/data/postgres/books.py new file mode 100644 index 0000000..5a81300 --- /dev/null +++ b/alttextbackend/data/postgres/books.py @@ -0,0 +1,127 @@ +import uuid + +try: + from .config import Database +except ImportError: + from config import Database + +""" +BOOKS DATABASE ATTRIBUTES + *id: str + title: str + size: str + status: str + numImages: int + coverExt: str +""" + + +def createBookTable(): + db = Database() + query = "CREATE TABLE books (id varchar(255) NOT NULL PRIMARY KEY, title varchar(255), size varchar(255), status varchar(255), numImages int, coverExt varchar(255));" + db.sendQuery(query) + db.commit() + db.close() + + +def jsonifyBook(book: tuple): + return { + "id": book[0], + "title": book[1], + "size": book[2], + "status": book[3], + "numImages": book[4], + "coverExt": book[5], + } + + +def getBook(id: str): + db = Database() + query = "SELECT * FROM books WHERE id = %s" + params = (id,) + db.sendQuery(query, params) + book = db.fetchOne() + db.close() + return book + + +def getBooks(titleQ: str = None, limit: int = None, skip: int = None): + db = Database() + params = [] + query = "SELECT * FROM books" + + if titleQ: + lowerTitleQ = f"%{titleQ.lower()}%" + query += " WHERE LOWER(title) LIKE %s" + params.append(lowerTitleQ) + + if limit is not None: + query += " LIMIT %s" + params.append(limit) + + if skip is not None: + query += " OFFSET %s" + params.append(skip) + + db.sendQuery(query, params) + books = db.fetchAll() + db.close() + return books + + +def addBook( + title: str, + size: str, + numImages: int, + id: str = None, + status: str = "available", + coverExt: str = None, +): + if id == None: + id = str(uuid.uuid4()) + + db = Database() + query = "INSERT INTO books (id, title, status, numimages, size, coverext) VALUES (%s, %s, %s, %s, %s, %s);" + params = (id, title, status, numImages, size, coverExt) + db.sendQuery(query, params) + db.commit() + db.close() + return getBook(id) + + +def deleteBook(id: str): + db = Database() + query = "DELETE FROM books WHERE id = %s" + params = (id,) + db.sendQuery(query, params) + db.commit() + db.close() + + +def updateBook(id: str, title: str = None, status: str = None, coverExt: str = None): + db = Database() + + if title or status or coverExt: + params = [] + query = "UPDATE books SET" + + if title: + query += " title = %s," + params.append(title) + + if status: + query += " status = %s," + params.append(status) + + if coverExt: + query += " coverext = %s," + params.append(coverExt) + + query = query[:-1] + + query += " WHERE id = %s" + params.append(id) + db.sendQuery(query, params) + db.commit() + + db.close() diff --git a/alttextbackend/data/postgres/config.py b/alttextbackend/data/postgres/config.py new file mode 100644 index 0000000..3090e41 --- /dev/null +++ b/alttextbackend/data/postgres/config.py @@ -0,0 +1,32 @@ +import psycopg2 +import os + +class Database: + def __init__(self): + self.conn = psycopg2.connect( + database=os.environ['DATABASE_NAME'], + host=os.environ['DATABASE_HOST'], + user=os.environ['DATABASE_USER'], + password=os.environ['DATABASE_PASSWORD'], + port=os.environ['DATABASE_PORT'] + ) + self.cursor = self.conn.cursor() + + def sendQuery(self, query:str, params = None): + self.cursor.execute(query, params) + + def commit(self): + self.conn.commit() + + def fetchOne(self): + return self.cursor.fetchone() + + def fetchAll(self): + return self.cursor.fetchall() + + def fetchMany(self, size:int): + return self.cursor.fetchmany(size=size) + + def close(self): + self.cursor.close() + self.conn.close() \ No newline at end of file diff --git a/alttextbackend/data/postgres/images.py b/alttextbackend/data/postgres/images.py new file mode 100644 index 0000000..b76ca7c --- /dev/null +++ b/alttextbackend/data/postgres/images.py @@ -0,0 +1,208 @@ +try: + from .config import Database +except ImportError: + from config import Database + +""" +IMAGE DATABASE ATTRIBUTES + *bookid: str + *src: str + hash: str + status: str + alt: str + originalAlt: str + genAlt: str + genImageCaption: str + ocr: str + beforeContext: str + afterContext: str + additionalContext: str +""" + + +def createImageTable(): + db = Database() + query = "CREATE TABLE images (bookid varchar(255) NOT NULL, src varchar(255) NOT NULL, hash varchar(255), status varchar(255), alt varchar(1000), originalAlt varchar(1000), genAlt varchar(1000), genImageCaption varchar(1000), ocr varchar(1000), beforeContext varchar(2000), afterContext varchar(2000), additionalContext varchar(1000), CONSTRAINT PK_Image PRIMARY KEY (bookid, src), FOREIGN KEY (bookid) REFERENCES books(id) ON DELETE CASCADE);" + db.sendQuery(query) + db.commit() + db.close() + + +def jsonifyImage(image: tuple): + return { + "bookid": image[0], + "src": image[1], + "hash": image[2], + "status": image[3], + "alt": image[4], + "originalAlt": image[5], + "genAlt": image[6], + "genImageCaption": image[7], + "ocr": image[8], + "beforeContext": image[9], + "afterContext": image[10], + "additionalContext": image[11], + } + + +def getImageByBook(bookid: str, src: str): + db = Database() + query = "SELECT * FROM images WHERE bookid = %s AND src = %s" + params = (bookid, src) + db.sendQuery(query, params) + image = db.fetchOne() + db.close + return image + + +def getImagesByBook(bookid: str): + db = Database() + query = "SELECT * FROM images WHERE bookid = %s" + params = (bookid,) + db.sendQuery(query, params) + images = db.fetchAll() + db.close() + return images + + +def getImagesByHash(hash: str): + db = Database() + query = "SELECT * FROM images WHERE hash = %s" + params = (hash,) + db.sendQuery(query, params) + images = db.fetchAll() + db.close() + return images + + +def addImage( + bookid: str, + src: str, + hash: str = None, + status: str = "available", + alt: str = "", + originalAlt: str = None, + genAlt: str = None, + genImageCaption: str = None, + ocr: str = None, + beforeContext: str = None, + afterContext: str = None, + additionalContext: str = None, +): + db = Database() + query = "INSERT INTO images (bookid, src, hash, status, alt, originalalt, genalt, genimagecaption, ocr, beforecontext, aftercontext, additionalcontext) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);" + if status != "available" and status != "processing": + status = "available" + if alt is not None: + alt = alt[:1000] + if originalAlt is not None: + originalAlt = originalAlt[:1000] + if genAlt is not None: + genAlt = genAlt[:1000] + if genImageCaption is not None: + genImageCaption = genImageCaption[:1000] + if ocr is not None: + ocr = ocr[:1000] + if beforeContext is not None: + beforeContext = beforeContext[:2000] + if afterContext is not None: + afterContext = afterContext[:2000] + if additionalContext is not None: + additionalContext = additionalContext[:1000] + params = ( + bookid, + src, + hash, + status, + alt, + originalAlt, + genAlt, + genImageCaption, + ocr, + beforeContext, + afterContext, + additionalContext, + ) + db.sendQuery(query, params) + db.commit() + db.close() + return getImageByBook(bookid, src) + + +def deleteImage(bookid: str, src: str): + db = Database() + query = "DELETE FROM images WHERE bookid = %s AND src = %s;" + params = (bookid, src) + db.sendQuery(query, params) + db.commit() + db.close() + + +def updateImage( + bookid: str, + src: str, + status: str = None, + alt: str = None, + genAlt: str = None, + genImageCaption: str = None, + ocr: str = None, + beforeContext: str = None, + afterContext: str = None, + additionalContext: str = None, +): + db = Database() + + if ( + status + or alt + or genAlt + or genImageCaption + or ocr + or beforeContext + or afterContext + or additionalContext + ): + params = [] + query = "UPDATE images SET" + + if status: + query += " status = %s," + params.append(status) + + if alt: + query += " alt = %s," + params.append(alt) + + if genAlt: + query += " genalt = %s," + params.append(genAlt) + + if genImageCaption: + query += " genimagecaption = %s," + params.append(genImageCaption) + + if ocr: + query += " ocr = %s," + params.append(ocr) + + if beforeContext: + query += " beforecontext = %s," + params.append(beforeContext) + + if afterContext: + query += " aftercontext = %s," + params.append(afterContext) + + if additionalContext: + query += " additionalcontext = %s," + params.append(additionalContext) + + query = query[:-1] + + query += " WHERE bookid = %s AND src = %s" + params.append(bookid) + params.append(src) + db.sendQuery(query, params) + db.commit() + + db.close() diff --git a/alttextbackend/data/postgres/test.py b/alttextbackend/data/postgres/test.py new file mode 100644 index 0000000..8ce8815 --- /dev/null +++ b/alttextbackend/data/postgres/test.py @@ -0,0 +1,57 @@ +import dotenv +from books import addBook, getBooks, getBook, updateBook +from images import ( + addImage, + getImagesByBook, + getImageByBook, + getImagesByHash, + updateImage, +) +from config import Database + +dotenv.load_dotenv() + +""" +createBookTable = "CREATE TABLE books (id varchar(255) NOT NULL PRIMARY KEY, title varchar(255), size varchar(255), status varchar(255), numImages int, coverExt varchar(255));" +createImageTable = "CREATE TABLE images (bookid varchar(255) NOT NULL, src varchar(255) NOT NULL, hash varchar(255), status varchar(255), alt varchar(255), originalAlt varchar(255), genAlt varchar(255), genImageCaption varchar(255), ocr varchar(255), beforeContext varchar(255), afterContext varchar(255), additionalContext varchar(255), CONSTRAINT PK_Image PRIMARY KEY (bookid, src), FOREIGN KEY (bookid) REFERENCES books(id) ON DELETE CASCADE);" +""" + +# db.sendQuery("SELECT * FROM books") +# print(db.fetchOne()) + +# addBook(title="Harry Potter", size="300kb", numImages=25) +""" +addBook(title="Harry Potter", size="300kb", numImages=25) +addBook(title="Harraoeu", size="300kb", numImages=25) +addBook(title="Hartter", size="300kb", numImages=25) +""" + +# getBooks(titleQ="Harry Potter", limit=1, skip=2) + +""" +addImage( + bookid="f1ac43cc-9f6d-4dc8-ac4f-aea0c4af5198", + src="sampleSrcMEOW", + hash="brown", + status="available", +) +""" + +# getImagesByBook("fa47d830-586a-485f-a579-67b33fd3eae3") + +# print(getImagesByHash("brown")) + +updateImage( + bookid="f1ac43cc-9f6d-4dc8-ac4f-aea0c4af5198", + src="sampleSrcMEOW", + status="bruh2", + beforeContext="before context be like", +) + + +# updateBook(id="72950", title="Test Title Two", status="available") + +db = Database() +# db.sendQuery("SELECT * FROM images;") +# print(db.fetchAll()) +db.close() diff --git a/alttextbackend/views/__pycache__/books.cpython-311.pyc b/alttextbackend/views/__pycache__/books.cpython-311.pyc index ffa3687..7bf92c3 100644 Binary files a/alttextbackend/views/__pycache__/books.cpython-311.pyc and b/alttextbackend/views/__pycache__/books.cpython-311.pyc differ diff --git a/alttextbackend/views/__pycache__/books_bookid.cpython-311.pyc b/alttextbackend/views/__pycache__/books_bookid.cpython-311.pyc index c98368b..271bf1c 100644 Binary files a/alttextbackend/views/__pycache__/books_bookid.cpython-311.pyc and b/alttextbackend/views/__pycache__/books_bookid.cpython-311.pyc differ diff --git a/alttextbackend/views/__pycache__/books_bookid_export.cpython-311.pyc b/alttextbackend/views/__pycache__/books_bookid_export.cpython-311.pyc index 0a3a2ce..daf9c43 100644 Binary files a/alttextbackend/views/__pycache__/books_bookid_export.cpython-311.pyc and b/alttextbackend/views/__pycache__/books_bookid_export.cpython-311.pyc differ diff --git a/alttextbackend/views/__pycache__/books_bookid_image.cpython-311.pyc b/alttextbackend/views/__pycache__/books_bookid_image.cpython-311.pyc index 2208826..326fb71 100644 Binary files a/alttextbackend/views/__pycache__/books_bookid_image.cpython-311.pyc and b/alttextbackend/views/__pycache__/books_bookid_image.cpython-311.pyc differ diff --git a/alttextbackend/views/__pycache__/books_bookid_images.cpython-311.pyc b/alttextbackend/views/__pycache__/books_bookid_images.cpython-311.pyc index 2e827d6..8ac905e 100644 Binary files a/alttextbackend/views/__pycache__/books_bookid_images.cpython-311.pyc and b/alttextbackend/views/__pycache__/books_bookid_images.cpython-311.pyc differ diff --git a/alttextbackend/views/__pycache__/books_bookid_src.cpython-311.pyc b/alttextbackend/views/__pycache__/books_bookid_src.cpython-311.pyc deleted file mode 100644 index 6e54407..0000000 Binary files a/alttextbackend/views/__pycache__/books_bookid_src.cpython-311.pyc and /dev/null differ diff --git a/alttextbackend/views/__pycache__/images_hash.cpython-311.pyc b/alttextbackend/views/__pycache__/images_hash.cpython-311.pyc index e7d5bad..d5a388a 100644 Binary files a/alttextbackend/views/__pycache__/images_hash.cpython-311.pyc and b/alttextbackend/views/__pycache__/images_hash.cpython-311.pyc differ diff --git a/alttextbackend/views/books.py b/alttextbackend/views/books.py index 5bc55d7..2af9958 100644 --- a/alttextbackend/views/books.py +++ b/alttextbackend/views/books.py @@ -1,35 +1,41 @@ -from rest_framework.views import APIView -from rest_framework.response import Response -from rest_framework import status, permissions, serializers -from rest_framework.exceptions import ValidationError -from rest_framework.parsers import FormParser, MultiPartParser -from django.core.files.storage import default_storage -from django.core.files.base import ContentFile +import sys +import zipfile from uuid import uuid4 +import alttextbackend.data.analyze as analyze +import alttextbackend.data.postgres.books as books +import alttextbackend.data.postgres.images as images +from django.core.files.base import ContentFile +from django.core.files.storage import default_storage +from rest_framework import serializers, status +from rest_framework.parsers import FormParser, MultiPartParser +from rest_framework.response import Response +from rest_framework.views import APIView + +sys.path.append("../") + class GetBooksSerializer(serializers.Serializer): titleQ = serializers.CharField(required=False) - authorQ = serializers.CharField(required=False) - sortBy = serializers.ChoiceField(choices=['title', 'author'], style={'base_template': 'radio.html'}, default = 'title') - sortOrder = serializers.ChoiceField(choices=['asc', 'desc'], style={'base_template': 'radio.html'}, default = 'asc') limit = serializers.IntegerField(min_value=1, required=False) skip = serializers.IntegerField(min_value=0, required=False) + class AddBookSerializer(serializers.Serializer): + id = serializers.CharField(required=False) title = serializers.CharField(required=True, allow_blank=False) - author = serializers.CharField(required=True, allow_blank=False) - description = serializers.CharField(required=False, allow_blank=True) - file = serializers.FileField(required=True) + book = serializers.FileField(required=True) cover = serializers.ImageField(required=False) + class BooksView(APIView): parser_classes = (FormParser, MultiPartParser) serializer_class = AddBookSerializer + def get_serializer_class(self): - if self.request.method == 'GET': + if self.request.method == "GET": return GetBooksSerializer - elif self.request.method == 'POST': + elif self.request.method == "POST": return AddBookSerializer return super().get_serializer_class() @@ -41,17 +47,14 @@ class BooksView(APIView): # Access validated data validated_data = serializer.validated_data - title_query = validated_data.get('titleQ') - author_query = validated_data.get('authorQ') - sort_by = validated_data.get('sortBy') - sort_order = validated_data.get('sortOrder') - limit = validated_data.get('limit') - skip = validated_data.get('skip') + titleQ = validated_data.get("titleQ", None) + limit = validated_data.get("limit", None) + skip = validated_data.get("skip", None) - # TODO: perform logic + # get array of books + result = books.getBooks(titleQ, limit, skip) - # TODO: return books - return Response(validated_data, status=status.HTTP_200_OK) + return Response(map(books.jsonifyBook, result), status=status.HTTP_200_OK) def post(self, request, *args, **kwargs): # validate request data @@ -61,31 +64,72 @@ class BooksView(APIView): return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) validated_data = serializer.validated_data + id = validated_data.get("id", uuid4()) + # check if id is already in use + book = books.getBook(id) + if book: + return Response( + {"error": "id already in use"}, status=status.HTTP_400_BAD_REQUEST + ) + # perform initial book processing - file = validated_data["file"] + file = validated_data["book"] if not file.name.endswith(".zip"): return Response( {"file": ["file must be a zip"]}, status=status.HTTP_400_BAD_REQUEST ) - id = uuid4() - books_path = "./books/" - default_storage.save(f"{books_path}{str(id)}.zip", ContentFile(file.read())) + book_path = f"./books/{str(id)}" + default_storage.save(f"{book_path}.zip", ContentFile(file.read())) + with zipfile.ZipFile(default_storage.path(f"{book_path}.zip"), "r") as zip_ref: + zip_ref.extractall(default_storage.path(f"{book_path}")) + default_storage.delete(f"{book_path}.zip") - # TODO: ensure book has valid root html file - - # TODO: analyze book and images, store them in database + # ensure book has valid root html file + html_file = analyze.findHTML(book_path) + if html_file == None: + default_storage.delete(book_path) + return Response( + {"error": "No HTML file found in the extracted folder"}, + status=status.HTTP_400_BAD_REQUEST, + ) # save cover image - covers_path = "./covers/" - default_storage.save( - f"{covers_path}{str(id)}.{validated_data['cover'].name.split('.')[-1]}", - ContentFile(validated_data["cover"].read()), - ) + coverExt = None + if "cover" in validated_data and validated_data["cover"] is not None: + coverExt = validated_data["cover"].name.split(".")[-1] + default_storage.save( + f"./covers/{str(id)}.{coverExt}", + ContentFile(validated_data["cover"].read()), + ) + alt = analyze.createAnalyzer() + alt.parseFile(html_file) + # store basic book info into database + size = analyze.getSize(book_path) + imgs = alt.getAllImgs() + books.addBook( + title=validated_data["title"], + size=str(size), + numImages=len(imgs), + id=id, + coverExt=coverExt, + ) + # store info for all images in database + for img in imgs: + context = alt.getContext(img) + thisHash = hash(alt.getImgData(img["src"])) + images.addImage( + bookid=id, + src=img["src"], + hash=thisHash, + alt=img["alt"], + originalAlt=img["alt"], + beforeContext=context[0], + afterContext=context[1], + ) + + book = books.getBook(id) return Response( - { - "book": validated_data.get("title"), - "description": validated_data.get("description"), - }, + books.jsonifyBook(book), status=status.HTTP_201_CREATED, ) diff --git a/alttextbackend/views/books_bookid.py b/alttextbackend/views/books_bookid.py index 16ec09a..990d525 100644 --- a/alttextbackend/views/books_bookid.py +++ b/alttextbackend/views/books_bookid.py @@ -1,102 +1,204 @@ -from rest_framework.views import APIView -from rest_framework.response import Response -from rest_framework import status, permissions, serializers -from rest_framework.exceptions import ValidationError -from rest_framework.parsers import FormParser, MultiPartParser +import copy +import os +import shutil +import threading +import time + +import alttextbackend.data.analyze as analyzer +import alttextbackend.data.postgres.books as books +import alttextbackend.data.postgres.images as images from django.core.files.storage import default_storage +from rest_framework import serializers, status +from rest_framework.parsers import FormParser, MultiPartParser +from rest_framework.response import Response +from rest_framework.views import APIView from django.core.files.base import ContentFile -from uuid import uuid4 + class GetBookSerializer(serializers.Serializer): bookid = serializers.CharField(required=True) + class UpdateBookSerialzer(serializers.Serializer): bookid = serializers.CharField(required=True) title = serializers.CharField(required=False, allow_blank=False) - author = serializers.CharField(required=False, allow_blank=False) - description = serializers.CharField(required=False, allow_blank=True) cover = serializers.ImageField(required=False) + class AnalyzeBookSerializer(serializers.Serializer): bookid = serializers.CharField(required=True) + missingOnly = serializers.BooleanField(required=False, default=True) + waitForAnalysis = serializers.BooleanField(required=False, default=False) -class OverwriteBookSerializer(serializers.Serializer): - bookid = serializers.CharField(required=True) - file = serializers.FileField(required=True) class DeleteBookSerializer(serializers.Serializer): bookid = serializers.CharField(required=True) + class BooksBookidView(APIView): parser_classes = (FormParser, MultiPartParser) serializer_class = UpdateBookSerialzer + def get_serializer_class(self): - if self.request.method == 'GET': + if self.request.method == "GET": return GetBookSerializer - elif self.request.method == 'PATCH': + elif self.request.method == "PATCH": return UpdateBookSerialzer - elif self.request.method == 'PUT': + elif self.request.method == "PUT": return AnalyzeBookSerializer - elif self.request.method == 'DELETE': + elif self.request.method == "DELETE": return DeleteBookSerializer return super().get_serializer_class() def get(self, request, *args, **kwargs): serializer_class = self.get_serializer_class() - serializer = serializer_class(data={"bookid": kwargs.get('bookid')}) + serializer = serializer_class(data={"bookid": kwargs.get("bookid")}) if not serializer.is_valid(): return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) validated_data = serializer.validated_data - # TODO: IMPLEMENT LOGIC + # get book from database + book = books.getBook(validated_data.get("bookid")) + if not book: + return Response( + {"error": "No book of that id was found in database."}, + status=status.HTTP_404_BAD_REQUEST, + ) - return Response(validated_data, status=status.HTTP_200_OK) + return Response(books.jsonifyBook(book), status=status.HTTP_200_OK) def patch(self, request, *args, **kwargs): serializer_class = self.get_serializer_class() data = request.data - data['bookid'] = kwargs.get('bookid') + data["bookid"] = kwargs.get("bookid") serializer = serializer_class(data=data) if not serializer.is_valid(): return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) validated_data = serializer.validated_data - # TODO: IMPLEMENT LOGIC + # check if book exists in database + book = books.getBook(validated_data.get("bookid")) + if not book: + return Response( + {"error": "No book of that id was found in database."}, + status=status.HTTP_404_BAD_REQUEST, + ) + book = books.jsonifyBook(book) + + # update book title and cover + title = validated_data.get("title", None) + coverExt = None + if "cover" in validated_data and validated_data["cover"] is not None: + coverExt = validated_data["cover"].name.split(".")[-1] + default_storage.delete( + f"./covers/{str(validated_data.get('bookid'))}.{book['coverExt']}" + ) + default_storage.save( + f"./covers/{str(validated_data.get('bookid'))}.{coverExt}", + ContentFile(validated_data["cover"].read()), + ) + + books.updateBook(validated_data.get("bookid"), title=title, coverExt=coverExt) + + book = books.jsonifyBook(books.getBook(validated_data.get("bookid"))) + + return Response(book, status=status.HTTP_200_OK) - return Response(validated_data, status=status.HTTP_200_OK) - def put(self, request, *args, **kwargs): serializer_class = self.get_serializer_class() - serializer = serializer_class(data={"bookid": kwargs.get('bookid')}) + data = copy.deepcopy(request.query_params) + data["bookid"] = kwargs.get("bookid") + serializer = serializer_class(data=data) if not serializer.is_valid(): return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) validated_data = serializer.validated_data - # TODO: IMPLEMENT LOGIC + bookid = validated_data.get("bookid") + # check for book's existence + book = books.getBook(bookid) + if not book: + return Response( + {"error": "Book not found in database."}, + status=status.HTTP_404_BAD_REQUEST, + ) - return Response(validated_data, status=status.HTTP_200_OK) - - def post(self, request, *args, **kwargs): - serializer_class = self.get_serializer_class() - data = request.data - data['bookid'] = kwargs.get('bookid') - serializer = serializer_class(data=request.data) - if not serializer.is_valid(): - return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) - validated_data = serializer.validated_data + html_file = analyzer.findHTML(f"./books/{str(validated_data.get('bookid'))}") + if html_file == None: + return Response( + {"error": "Failed to find HTML file in book directory."}, + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) - # TODO: IMPLEMENT LOGIC + alt = analyzer.createAnalyzer() + alt.parseFile(html_file) + imgs = [] + if validated_data.get("missingOnly"): + imgs = alt.getNoAltImgs() + else: + imgs = alt.getAllImgs() + + # set book and all images to "processing" status + if validated_data.get("waitForAnalysis"): + analyzer.analyzeImagesV2(alt, imgs, bookid) + else: + threading.Thread( + target=analyzer.analyzeImagesV2, args=(alt, imgs, bookid) + ).start() + + book = books.jsonifyBook(books.getBook(bookid)) + if not validated_data.get("waitForAnalysis"): + book["status"] = "processing" + + return Response(book, status=status.HTTP_200_OK) - return Response(validated_data, status=status.HTTP_200_OK) - def delete(self, request, *args, **kwargs): serializer_class = self.get_serializer_class() - serializer = serializer_class(data={"bookid": kwargs.get('bookid')}) + serializer = serializer_class(data={"bookid": kwargs.get("bookid")}) if not serializer.is_valid(): return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) validated_data = serializer.validated_data - # TODO: IMPLEMENT LOGIC + # check for book's existence + book = books.getBook(validated_data.get("bookid")) + if not book: + return Response( + {"error": "Book not found in database."}, + status=status.HTTP_404_BAD_REQUEST, + ) + book = books.jsonifyBook(book) + book["status"] = "deleted" - return Response(validated_data, status=status.HTTP_200_OK) + # delete book from table (this cascades to images table as well) + books.deleteBook(validated_data.get("bookid")) + + # delete book directory and cover image + try: + folder_path = f"./books/{str(validated_data.get('bookid'))}" + if default_storage.exists(folder_path): + shutil.rmtree(default_storage.path(folder_path)) + if book["coverExt"]: + try: + default_storage.delete( + f"./covers/{str(validated_data.get('bookid'))}.{book['coverExt']}" + ) + except: + return Response( + {"error": "Failed to delete cover image."}, + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + else: + return Response( + {"error": "Failed to find book directory."}, + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + except Exception: + return Response( + {"error": "Failed to delete book directory."}, + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + + return Response( + book, + status=status.HTTP_200_OK, + ) diff --git a/alttextbackend/views/books_bookid_export.py b/alttextbackend/views/books_bookid_export.py index 15b21cc..5744b44 100644 --- a/alttextbackend/views/books_bookid_export.py +++ b/alttextbackend/views/books_bookid_export.py @@ -1,25 +1,102 @@ -from rest_framework.views import APIView -from rest_framework.response import Response -from rest_framework import status, permissions, serializers -from rest_framework.exceptions import ValidationError -from rest_framework.parsers import FormParser, MultiPartParser +import copy +import os +import shutil +import zipfile + +import alttextbackend.data.analyze as analyze +import alttextbackend.data.postgres.books as books +import alttextbackend.data.postgres.images as images from django.core.files.storage import default_storage -from django.core.files.base import ContentFile -from uuid import uuid4 +from django.http import HttpResponse +from rest_framework import serializers, status +from rest_framework.parsers import FormParser, MultiPartParser +from rest_framework.response import Response +from rest_framework.views import APIView + class ExportBookSerializer(serializers.Serializer): bookid = serializers.CharField(required=True) + name = serializers.CharField(required=False) + class BooksBookidExportView(APIView): parser_classes = (FormParser, MultiPartParser) serializer_class = ExportBookSerializer def get(self, request, *args, **kwargs): - serializer = self.serializer_class(data={"bookid": kwargs.get('bookid')}) + data = copy.deepcopy(request.query_params) + data["bookid"] = kwargs.get("bookid") + serializer = self.serializer_class(data=data) if not serializer.is_valid(): return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) validated_data = serializer.validated_data - # TODO: IMPLEMENT LOGIC + bookid = validated_data.get("bookid") + # check if book exists in database + book = books.getBook(bookid) + if not book: + return Response( + {"error": "Book not found"}, status=status.HTTP_404_NOT_FOUND + ) - return Response(validated_data, status=status.HTTP_200_OK) + # find HTML file + bookid = str(validated_data.get("bookid")) + html_file = analyze.findHTML(f"./books/{bookid}") + if html_file == None: + return Response( + {"error": "Failed to find HTML file in book directory."}, + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + + # get all image tags in book + alt = analyze.createAnalyzer() + alt.parseFile(html_file) + imgs = alt.getAllImgs() + for img in imgs: + databaseImg = images.jsonifyImage(images.getImageByBook(bookid, img["src"])) + alt.setAlt(img["src"], databaseImg["alt"]) + + try: + shutil.copytree( + default_storage.path(f"./books/{bookid}"), f"./books/{bookid}-t" + ) + except Exception as e: + return Response( + {"error": "Failed to copy book into temp folder."}, + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + + html_file = analyze.findHTML(f"./books/{bookid}-t") + if html_file == None: + return Response( + {"error": "Failed to find HTML file in temp book directory."}, + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + default_storage.delete(html_file) + alt.exportToFile(html_file) + + # Zip the temp folder + zip_filename = f"./books/{bookid}-t.zip" + with zipfile.ZipFile(zip_filename, "w") as zipf: + for root, _, files in os.walk(f"./books/{bookid}-t"): + for file in files: + zipf.write( + os.path.join(root, file), + os.path.relpath( + os.path.join(root, file), f"./books/{bookid}-t" + ), + ) + + # Send the zip file as a response + filename = validated_data.get("name", f"{bookid}") + print(filename) + response = None + with open(zip_filename, "rb") as f: + response = HttpResponse(f, content_type="application/zip") + response["Content-Disposition"] = f"attachment; filename={filename}.zip" + + # Delete the temp zip and folder + os.remove(zip_filename) + shutil.rmtree(f"./books/{bookid}-t") + + return response diff --git a/alttextbackend/views/books_bookid_image.py b/alttextbackend/views/books_bookid_image.py index 6cdbc05..34365fd 100644 --- a/alttextbackend/views/books_bookid_image.py +++ b/alttextbackend/views/books_bookid_image.py @@ -1,74 +1,158 @@ -from rest_framework.views import APIView -from rest_framework.response import Response -from rest_framework import status, permissions, serializers -from rest_framework.exceptions import ValidationError +import copy +import threading + +import alttextbackend.data.analyze as analyze +import alttextbackend.data.postgres.books as books +import alttextbackend.data.postgres.images as images +from rest_framework import serializers, status from rest_framework.parsers import FormParser, MultiPartParser -from django.core.files.storage import default_storage -from django.core.files.base import ContentFile -from uuid import uuid4 +from rest_framework.response import Response +from rest_framework.views import APIView + class GetImageBySrc(serializers.Serializer): bookid = serializers.CharField(required=True) src = serializers.CharField(required=True) + class UpdateImageBySrc(serializers.Serializer): bookid = serializers.CharField(required=True) src = serializers.CharField(required=True) alt = serializers.CharField(required=True) beforeContext = serializers.CharField(required=False) afterContext = serializers.CharField(required=False) + additionalContext = serializers.CharField(required=False) + class AnalyzeImageBySrc(serializers.Serializer): bookid = serializers.CharField(required=True) src = serializers.CharField(required=True) + waitForAnalysis = serializers.BooleanField(required=False, default=False) + class BooksBookidImageView(APIView): parser_classes = (FormParser, MultiPartParser) + def get_serializer_class(self): - if self.request.method == 'GET': + if self.request.method == "GET": return GetImageBySrc - elif self.request.method == 'PATCH': + elif self.request.method == "PATCH": return UpdateImageBySrc - elif self.request.method == 'PUT': + elif self.request.method == "PUT": return AnalyzeImageBySrc return super().get_serializer_class() def get(self, request, *args, **kwargs): serializer_class = self.get_serializer_class() - data = request.query_params - data['bookid'] = kwargs.get('bookid') + data = copy.deepcopy(request.query_params) + data["bookid"] = kwargs.get("bookid") serializer = serializer_class(data=data) if not serializer.is_valid(): return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) validated_data = serializer.validated_data - # TODO: IMPLEMENT LOGIC + # check if book exists in database + book = books.getBook(validated_data.get("bookid")) + if not book: + return Response( + {"error": "Book not found"}, status=status.HTTP_404_NOT_FOUND + ) + + # get image from database + img = images.getImageByBook( + validated_data.get("bookid"), validated_data.get("src") + ) + if img == None: + return Response( + {"error": "Image not found"}, status=status.HTTP_404_NOT_FOUND + ) + + return Response( + images.jsonifyImage(img), + status=status.HTTP_200_OK, + ) - return Response(validated_data, status=status.HTTP_200_OK) - def patch(self, request, *args, **kwargs): serializer_class = self.get_serializer_class() - data = request.data + data = copy.deepcopy(request.data) data.update(request.query_params) - data['bookid'] = kwargs.get('bookid') + data["bookid"] = kwargs.get("bookid") serializer = serializer_class(data=data) if not serializer.is_valid(): return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) validated_data = serializer.validated_data - # TODO: IMPLEMENT LOGIC + alt = validated_data.get("alt", None) + beforeContext = validated_data.get("beforeContext", None) + afterContext = validated_data.get("afterContext", None) + additionalContext = validated_data.get("additionalContext", None) + + img = images.getImageByBook( + validated_data.get("bookid"), validated_data.get("src") + ) + if img == None: + return Response( + {"error": "Image not found"}, status=status.HTTP_404_NOT_FOUND + ) + + # update image in database + images.updateImage( + bookid=validated_data.get("bookid"), + src=validated_data.get("src"), + alt=alt, + beforeContext=beforeContext, + afterContext=afterContext, + additionalContext=additionalContext, + ) + + img = images.getImageByBook( + validated_data.get("bookid"), validated_data.get("src") + ) + + return Response(images.jsonifyImage(img), status=status.HTTP_200_OK) - return Response(validated_data, status=status.HTTP_200_OK) - def put(self, request, *args, **kwargs): serializer_class = self.get_serializer_class() - data = request.query_params - data['bookid'] = kwargs.get('bookid') + data = copy.deepcopy(request.query_params) + data["bookid"] = kwargs.get("bookid") serializer = serializer_class(data=data) if not serializer.is_valid(): return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) validated_data = serializer.validated_data - # TODO: IMPLEMENT LOGIC + # find HTML file + bookid = str(validated_data.get("bookid")) + html_file = analyze.findHTML(f"./books/{bookid}") + if html_file == None: + return Response( + {"error": "Failed to find HTML file in book directory."}, + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) - return Response(validated_data, status=status.HTTP_200_OK) + # generate alt for image + alt = analyze.createAnalyzer() + alt.parseFile(html_file) + img = alt.getImg(validated_data.get("src")) + if img == None: + return Response( + {"error": "Failed to find image in book."}, + status=status.HTTP_400_BAD_REQUEST, + ) + + if validated_data.get("waitForAnalysis"): + analyze.analyzeSingularImageV2(alt, img, bookid) + else: + threading.Thread( + target=analyze.analyzeSingularImageV2, args=(alt, img, bookid) + ).start() + + image = images.jsonifyImage( + images.getImageByBook( + validated_data.get("bookid"), validated_data.get("src") + ) + ) + + if not validated_data.get("waitForAnalysis"): + image["status"] = "processing" + + return Response(image, status=status.HTTP_200_OK) diff --git a/alttextbackend/views/books_bookid_images.py b/alttextbackend/views/books_bookid_images.py index 7036cf0..a0fd432 100644 --- a/alttextbackend/views/books_bookid_images.py +++ b/alttextbackend/views/books_bookid_images.py @@ -1,25 +1,38 @@ -from rest_framework.views import APIView -from rest_framework.response import Response -from rest_framework import status, permissions, serializers -from rest_framework.exceptions import ValidationError +import sys + +import alttextbackend.data.postgres.books as books +import alttextbackend.data.postgres.images as images +from rest_framework import serializers, status from rest_framework.parsers import FormParser, MultiPartParser -from django.core.files.storage import default_storage -from django.core.files.base import ContentFile -from uuid import uuid4 +from rest_framework.response import Response +from rest_framework.views import APIView + +sys.path.append("../") + class ImagesFromBookSerializer(serializers.Serializer): bookid = serializers.CharField(required=True) + class BooksBookidImagesView(APIView): parser_classes = (FormParser, MultiPartParser) serializer_class = ImagesFromBookSerializer def get(self, request, *args, **kwargs): - serializer = self.serializer_class(data={"bookid": kwargs.get('bookid')}) + serializer = self.serializer_class(data={"bookid": kwargs.get("bookid")}) if not serializer.is_valid(): return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) validated_data = serializer.validated_data + id = validated_data.get("bookid") - # TODO: IMPLEMENT LOGIC + # check if book exists in database + book = books.getBook(id) + if not book: + return Response( + {"error": "Book not found"}, status=status.HTTP_404_NOT_FOUND + ) - return Response(validated_data, status=status.HTTP_200_OK) + # get images from database + imgs = images.getImagesByBook(id) + + return Response(map(images.jsonifyImage, imgs), status=status.HTTP_200_OK) diff --git a/alttextbackend/views/images_hash.py b/alttextbackend/views/images_hash.py index 0503f91..04b2fde 100644 --- a/alttextbackend/views/images_hash.py +++ b/alttextbackend/views/images_hash.py @@ -1,26 +1,26 @@ -from rest_framework.views import APIView -from rest_framework.response import Response -from rest_framework import status, permissions, serializers -from rest_framework.exceptions import ValidationError +from rest_framework import serializers, status from rest_framework.parsers import FormParser, MultiPartParser -from django.core.files.storage import default_storage -from django.core.files.base import ContentFile -from uuid import uuid4 +from rest_framework.response import Response +from rest_framework.views import APIView + +import alttextbackend.data.postgres.images as images + class GetImagesByHashSerializer(serializers.Serializer): hash = serializers.CharField(required=True) + class ImagesHashView(APIView): parser_classes = (FormParser, MultiPartParser) serializer_class = GetImagesByHashSerializer def get(self, request, *args, **kwargs): - image_hash = kwargs.get('hash') - data = {'hash': image_hash} + image_hash = kwargs.get("hash") + data = {"hash": image_hash} serializer = self.serializer_class(data=data) if not serializer.is_valid(): return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) - # TODO: IMPLEMENT LOGIC + imgs = images.getImagesByHash(image_hash) - return Response(data, status=status.HTTP_200_OK) + return Response(map(images.jsonifyImage, imgs), status=status.HTTP_200_OK) diff --git a/openapi.yaml b/openapi.yaml index e0f97b4..743b9fb 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -3,18 +3,12 @@ info: title: Alt-text Backend API description: |- This is the Alt-text Backend API based on the OpenAPI 3.0 specification. - # termsOfService: http://swagger.io/terms/ contact: email: da.cruz@aol.com - # license: - # name: Apache 2.0 - # url: http://www.apache.org/licenses/LICENSE-2.0.html version: 1.0.11 externalDocs: description: Find out more about Alt-text url: https://github.com/EbookFoundation/alt-text -# servers: -# - url: https://petstore3.swagger.io/api/v3 tags: - name: Books description: Everything regarding books @@ -36,31 +30,6 @@ paths: explode: true schema: type: string - - name: authorQ - in: query - description: String to match the author to. - required: false - explode: true - schema: - type: string - - name: sortBy - in: query - description: Field to sort by. - required: false - explode: true - schema: - type: string - enum: ["title", "author"] - default: "title" - - name: sortOrder - in: query - description: Order to sort by. - required: false - explode: true - schema: - type: string - enum: ["asc", "desc"] - default: "asc" - name: limit in: query description: Max number of books to return. @@ -100,15 +69,12 @@ paths: schema: type: object properties: + id: + type: string + description: Id of the book (optional). title: type: string description: Title of the book. - author: - type: string - description: Author of the book. - description: - type: string - description: Description of the book (optional). book: type: string description: Zip file of the book. @@ -167,12 +133,6 @@ paths: title: type: string description: Title of the book (optional). - author: - type: string - description: Author of the book (optional). - description: - type: string - description: Description of the book (optional). cover: type: string description: Cover image for the book (optional). @@ -195,32 +155,25 @@ paths: summary: Re-analyze an entire book. description: Re-analyze an entire book and overwrite current image data by its id. operationId: analyzeBook - responses: - '200': - description: Successful operation - content: - application/json: - schema: - $ref: '#/components/schemas/Book' - '500': - description: Internal Server Error - post: - tags: - - Books - summary: Upload a new book file to a book object. - description: Upload a new book to a given book object (by its id), and re-analyze it (essentially creating a new book, except keeping the same bookid). - operationId: overwriteBook - requestBody: - required: true - content: - multipart/form-data: - schema: - type: object - properties: - book: - type: string - description: Zip file of the book. - format: binary + parameters: + - name: missingOnly + in: query + description: If analyzing on upload, whether to analyze only the images without alt-text. + required: false + explode: true + schema: + type: boolean + example: true + default: true + - name: waitForAnalysis + in: query + description: Whether to wait for the analysis to complete before returning a response. + required: false + explode: true + schema: + type: boolean + example: false + default: false responses: '200': description: Successful operation @@ -255,6 +208,15 @@ paths: schema: type: string example: "123e4567-e89b-12d3-a456-426614174000" + - name: name + in: query + description: Alternative name for file download. + required: false + explode: true + schema: + type: string + default: "{bookid}" + example: "harry_potter" get: tags: - Books @@ -270,12 +232,6 @@ paths: type: string example: |- content of the file - # headers: - # Content-Disposition: - # description: File name to prompt for download - # schema: - # type: string - # example: attachment; filename="example.txt" '500': description: Internal Server Error /books/{bookid}/images: @@ -383,6 +339,9 @@ paths: afterContext: type: string description: New afterContext for the image (optional). + additionalContext: + type: string + description: New additionalContext for the image (optional). responses: '200': description: Successful operation @@ -398,6 +357,16 @@ paths: summary: Re-analyze an image. description: Generate an image's alt-text (written to genAlt field in image object). operationId: analyzeImageBySrc + parameters: + - name: waitForAnalysis + in: query + description: Whether to wait for the analysis to complete before returning a response (default = false). + required: false + explode: true + schema: + type: boolean + example: false + default: false responses: '200': description: Successful operation @@ -445,12 +414,6 @@ components: title: type: string example: "Diary of an Oxygen Thief" - author: - type: string - example: "Anonymous" - description: - type: string - example: "Hurt people hurt people." size: type: string example: "1.16MB" @@ -458,39 +421,53 @@ components: type: string example: "processing" enum: ["available", "processing", "deleted"] + default: "available" numImages: type: integer example: 4 Image: type: object properties: + bookid: + type: string + example: "123e4567-e89b-12d3-a456-426614174000" src: type: string example: "images/cover.png" hash: type: string example: "" - size: + status: type: string - example: "24KB" + example: "processing" + enum: ["available", "processing", "deleted"] + default: "available" alt: type: string example: "" + default: "originalAlt" originalAlt: type: string example: "" genAlt: type: string example: "" + default: "" genImageCaption: type: string example: "" + default: "" ocr: type: string example: "" + default: "" beforeContext: type: string example: "" afterContext: type: string - example: "" \ No newline at end of file + example: "" + additionalContext: + type: string + example: "" + default: "" \ No newline at end of file