HUGE COMMIT. Essentially implemented every necessary route. Integrated database functionality. Setup a .env setup. Updated the openapi.yaml.

tyler
xxmistacruzxx 2024-02-29 19:01:00 -05:00
parent 7afe1045c4
commit 49c8d29a79
28 changed files with 1113 additions and 220 deletions

0
.env
View File

39
.env.example Normal file
View File

@ -0,0 +1,39 @@
# DATABASE CONFIG
DATABASE_NAME=postgres
DATABASE_HOST=127.0.0.1
DATABASE_USER=postgres
DATABASE_PASSWORD=testpassword
DATABASE_PORT=5432
# GENERAL CONFIG
ALT_WITH_CONTEXT=1
ALT_WITH_HASH=1
ALT_MULTITHREADED=0
## ALT_VERSION OPTIONS: 1, 2
ALT_VERSION=2
## DESC_ENGINE OPTIONS: replicateapi, bliplocal, googlevertexapi
DESC_ENGINE=replicateapi
## OCR_ENGINE OPTIONS: tesseract
OCR_ENGINE=tesseract
## LANG_ENGINE OPTIONS: privategpt
LANG_ENGINE=privategpt
# DESC_ENGINE CONFIG OPTIONS
## REPLICATEAPI
REPLICATE_KEY=example_key
## BLIPLOCAL
BLIPLOCAL_DIR=/path/to/image-captioning
## GOOGLEVERTEXAPI
VERTEX_PROJECT_ID=example-123456
### VERTEX_LOCATION OPTIONS: https://cloud.google.com/vertex-ai/docs/general/locations
VERTEX_LOCATION=us-central1
VERTEX_GAC_PATH=/path/to/vertex-key.json
# OCR_ENGINE CONFIG OPTIONS
## TESSERACT
TESSERACT_PATH=/path/to/tesseract.exe
# LANG_ENGINE CONFIG OPTIONS
## PRIVATEGPT
PRIVATEGPT_HOST=http://localhost:8001

3
.gitignore vendored
View File

@ -1,4 +1,5 @@
*/__pycache__/
**/__pycache__/
.env
/books
/covers

View File

@ -0,0 +1,132 @@
import os
import threading
import bs4
from alttext import alttext
from alttext.descengine.bliplocal import BlipLocal
from alttext.descengine.replicateapi import ReplicateAPI
from alttext.langengine.privategpt import PrivateGPT
from alttext.ocrengine.tesseract import Tesseract
from django.core.files.storage import default_storage
from .postgres import books, images
# from alttext.descengine.googlevertexapi import GoogleVertexAPI
def createAnalyzer():
descEngine = None
match os.environ["DESC_ENGINE"].lower():
case "replicateapi":
descEngine = ReplicateAPI(os.environ["REPLICATE_KEY"])
case "bliplocal":
descEngine = BlipLocal(os.environ["BLIPLOCAL_DIR"])
# case "googlevertexapi":
# descEngine = GoogleVertexAPI(os.environ["VERTEX_PROJECT_ID"], os.environ["VERTEX_LOCATION"], os.environ["VERTEX_GAC_PATH"])
case _:
raise ValueError("Invalid description engine")
ocrEngine = None
match os.environ["OCR_ENGINE"].lower():
case "tesseract":
ocrEngine = Tesseract()
case _:
raise ValueError("Invalid OCR engine")
langEngine = None
match os.environ["LANG_ENGINE"].lower():
case "privategpt":
langEngine = PrivateGPT(os.environ["PRIVATEGPT_HOST"])
case _:
raise ValueError("Invalid language engine")
options = {
"withContext": bool(int(os.environ["ALT_WITH_CONTEXT"])),
"withHash": bool(int(os.environ["ALT_WITH_HASH"])),
"multiThreaded": bool(int(os.environ["ALT_MULTITHREADED"])),
"version": int(os.environ["ALT_VERSION"]),
}
return alttext.AltTextHTML(descEngine, ocrEngine, langEngine, options)
def findHTML(path: str):
html_file = None
for root, _, files in os.walk(path):
for file_name in files:
if file_name.endswith(".html"):
html_file = default_storage.path(os.path.join(root, file_name))
break
if html_file:
break
return html_file
def getSize(path: str):
size = 0
for path, _, files in os.walk(path):
for f in files:
fp = os.path.join(path, f)
size += os.path.getsize(fp)
return size
def analyzeImageV2(alt: alttext.AltTextHTML, img: bs4.element.Tag, bookid: str):
imgRecord = images.jsonifyImage(images.getImageByBook(bookid, img["src"]))
context = [imgRecord["beforeContext"], imgRecord["afterContext"]]
imgData = alt.getImgData(img["src"])
desc = alt.genDesc(imgData, img["src"], context)
chars = alt.genChars(imgData, img["src"]).strip()
thisAlt = alt.langEngine.refineAlt(desc, chars, context, None)
images.updateImage(
bookid,
img["src"],
status="available",
genAlt=thisAlt,
genImageCaption=desc,
ocr=chars,
beforeContext=context[0],
afterContext=context[1],
)
return images.jsonifyImage(images.getImageByBook(bookid, img["src"]))
def analyzeSingularImageV2(alt: alttext.AltTextHTML, img: bs4.element.Tag, bookid: str):
books.updateBook(bookid, status="processing")
images.updateImage(
bookid,
img["src"],
status="processing",
)
analyzeImageV2(alt, img, bookid)
books.updateBook(bookid, status="available")
return images.jsonifyImage(images.getImageByBook(bookid, img["src"]))
def analyzeImagesV2(alt: alttext.AltTextHTML, imgs: list[bs4.element.Tag], bookid: str):
books.updateBook(bookid, status="processing")
for img in imgs:
images.updateImage(
bookid,
img["src"],
status="processing",
)
if bool(int(os.environ["ALT_MULTITHREADED"])):
# TODO: TEST WITH OPENAI API
threads = []
for img in imgs:
thread = threading.Thread(target=analyzeImageV2, args=(alt, img, bookid))
thread.start()
threads.append(thread)
for thread in threads:
thread.join()
else:
for img in imgs:
analyzeImageV2(alt, img, bookid)
books.updateBook(bookid, status="available")
return books.jsonifyBook(books.getBook(bookid))

View File

@ -0,0 +1,127 @@
import uuid
try:
from .config import Database
except ImportError:
from config import Database
"""
BOOKS DATABASE ATTRIBUTES
*id: str
title: str
size: str
status: str
numImages: int
coverExt: str
"""
def createBookTable():
db = Database()
query = "CREATE TABLE books (id varchar(255) NOT NULL PRIMARY KEY, title varchar(255), size varchar(255), status varchar(255), numImages int, coverExt varchar(255));"
db.sendQuery(query)
db.commit()
db.close()
def jsonifyBook(book: tuple):
return {
"id": book[0],
"title": book[1],
"size": book[2],
"status": book[3],
"numImages": book[4],
"coverExt": book[5],
}
def getBook(id: str):
db = Database()
query = "SELECT * FROM books WHERE id = %s"
params = (id,)
db.sendQuery(query, params)
book = db.fetchOne()
db.close()
return book
def getBooks(titleQ: str = None, limit: int = None, skip: int = None):
db = Database()
params = []
query = "SELECT * FROM books"
if titleQ:
lowerTitleQ = f"%{titleQ.lower()}%"
query += " WHERE LOWER(title) LIKE %s"
params.append(lowerTitleQ)
if limit is not None:
query += " LIMIT %s"
params.append(limit)
if skip is not None:
query += " OFFSET %s"
params.append(skip)
db.sendQuery(query, params)
books = db.fetchAll()
db.close()
return books
def addBook(
title: str,
size: str,
numImages: int,
id: str = None,
status: str = "available",
coverExt: str = None,
):
if id == None:
id = str(uuid.uuid4())
db = Database()
query = "INSERT INTO books (id, title, status, numimages, size, coverext) VALUES (%s, %s, %s, %s, %s, %s);"
params = (id, title, status, numImages, size, coverExt)
db.sendQuery(query, params)
db.commit()
db.close()
return getBook(id)
def deleteBook(id: str):
db = Database()
query = "DELETE FROM books WHERE id = %s"
params = (id,)
db.sendQuery(query, params)
db.commit()
db.close()
def updateBook(id: str, title: str = None, status: str = None, coverExt: str = None):
db = Database()
if title or status or coverExt:
params = []
query = "UPDATE books SET"
if title:
query += " title = %s,"
params.append(title)
if status:
query += " status = %s,"
params.append(status)
if coverExt:
query += " coverext = %s,"
params.append(coverExt)
query = query[:-1]
query += " WHERE id = %s"
params.append(id)
db.sendQuery(query, params)
db.commit()
db.close()

View File

@ -0,0 +1,32 @@
import psycopg2
import os
class Database:
def __init__(self):
self.conn = psycopg2.connect(
database=os.environ['DATABASE_NAME'],
host=os.environ['DATABASE_HOST'],
user=os.environ['DATABASE_USER'],
password=os.environ['DATABASE_PASSWORD'],
port=os.environ['DATABASE_PORT']
)
self.cursor = self.conn.cursor()
def sendQuery(self, query:str, params = None):
self.cursor.execute(query, params)
def commit(self):
self.conn.commit()
def fetchOne(self):
return self.cursor.fetchone()
def fetchAll(self):
return self.cursor.fetchall()
def fetchMany(self, size:int):
return self.cursor.fetchmany(size=size)
def close(self):
self.cursor.close()
self.conn.close()

View File

@ -0,0 +1,208 @@
try:
from .config import Database
except ImportError:
from config import Database
"""
IMAGE DATABASE ATTRIBUTES
*bookid: str
*src: str
hash: str
status: str
alt: str
originalAlt: str
genAlt: str
genImageCaption: str
ocr: str
beforeContext: str
afterContext: str
additionalContext: str
"""
def createImageTable():
db = Database()
query = "CREATE TABLE images (bookid varchar(255) NOT NULL, src varchar(255) NOT NULL, hash varchar(255), status varchar(255), alt varchar(1000), originalAlt varchar(1000), genAlt varchar(1000), genImageCaption varchar(1000), ocr varchar(1000), beforeContext varchar(2000), afterContext varchar(2000), additionalContext varchar(1000), CONSTRAINT PK_Image PRIMARY KEY (bookid, src), FOREIGN KEY (bookid) REFERENCES books(id) ON DELETE CASCADE);"
db.sendQuery(query)
db.commit()
db.close()
def jsonifyImage(image: tuple):
return {
"bookid": image[0],
"src": image[1],
"hash": image[2],
"status": image[3],
"alt": image[4],
"originalAlt": image[5],
"genAlt": image[6],
"genImageCaption": image[7],
"ocr": image[8],
"beforeContext": image[9],
"afterContext": image[10],
"additionalContext": image[11],
}
def getImageByBook(bookid: str, src: str):
db = Database()
query = "SELECT * FROM images WHERE bookid = %s AND src = %s"
params = (bookid, src)
db.sendQuery(query, params)
image = db.fetchOne()
db.close
return image
def getImagesByBook(bookid: str):
db = Database()
query = "SELECT * FROM images WHERE bookid = %s"
params = (bookid,)
db.sendQuery(query, params)
images = db.fetchAll()
db.close()
return images
def getImagesByHash(hash: str):
db = Database()
query = "SELECT * FROM images WHERE hash = %s"
params = (hash,)
db.sendQuery(query, params)
images = db.fetchAll()
db.close()
return images
def addImage(
bookid: str,
src: str,
hash: str = None,
status: str = "available",
alt: str = "",
originalAlt: str = None,
genAlt: str = None,
genImageCaption: str = None,
ocr: str = None,
beforeContext: str = None,
afterContext: str = None,
additionalContext: str = None,
):
db = Database()
query = "INSERT INTO images (bookid, src, hash, status, alt, originalalt, genalt, genimagecaption, ocr, beforecontext, aftercontext, additionalcontext) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);"
if status != "available" and status != "processing":
status = "available"
if alt is not None:
alt = alt[:1000]
if originalAlt is not None:
originalAlt = originalAlt[:1000]
if genAlt is not None:
genAlt = genAlt[:1000]
if genImageCaption is not None:
genImageCaption = genImageCaption[:1000]
if ocr is not None:
ocr = ocr[:1000]
if beforeContext is not None:
beforeContext = beforeContext[:2000]
if afterContext is not None:
afterContext = afterContext[:2000]
if additionalContext is not None:
additionalContext = additionalContext[:1000]
params = (
bookid,
src,
hash,
status,
alt,
originalAlt,
genAlt,
genImageCaption,
ocr,
beforeContext,
afterContext,
additionalContext,
)
db.sendQuery(query, params)
db.commit()
db.close()
return getImageByBook(bookid, src)
def deleteImage(bookid: str, src: str):
db = Database()
query = "DELETE FROM images WHERE bookid = %s AND src = %s;"
params = (bookid, src)
db.sendQuery(query, params)
db.commit()
db.close()
def updateImage(
bookid: str,
src: str,
status: str = None,
alt: str = None,
genAlt: str = None,
genImageCaption: str = None,
ocr: str = None,
beforeContext: str = None,
afterContext: str = None,
additionalContext: str = None,
):
db = Database()
if (
status
or alt
or genAlt
or genImageCaption
or ocr
or beforeContext
or afterContext
or additionalContext
):
params = []
query = "UPDATE images SET"
if status:
query += " status = %s,"
params.append(status)
if alt:
query += " alt = %s,"
params.append(alt)
if genAlt:
query += " genalt = %s,"
params.append(genAlt)
if genImageCaption:
query += " genimagecaption = %s,"
params.append(genImageCaption)
if ocr:
query += " ocr = %s,"
params.append(ocr)
if beforeContext:
query += " beforecontext = %s,"
params.append(beforeContext)
if afterContext:
query += " aftercontext = %s,"
params.append(afterContext)
if additionalContext:
query += " additionalcontext = %s,"
params.append(additionalContext)
query = query[:-1]
query += " WHERE bookid = %s AND src = %s"
params.append(bookid)
params.append(src)
db.sendQuery(query, params)
db.commit()
db.close()

View File

@ -0,0 +1,57 @@
import dotenv
from books import addBook, getBooks, getBook, updateBook
from images import (
addImage,
getImagesByBook,
getImageByBook,
getImagesByHash,
updateImage,
)
from config import Database
dotenv.load_dotenv()
"""
createBookTable = "CREATE TABLE books (id varchar(255) NOT NULL PRIMARY KEY, title varchar(255), size varchar(255), status varchar(255), numImages int, coverExt varchar(255));"
createImageTable = "CREATE TABLE images (bookid varchar(255) NOT NULL, src varchar(255) NOT NULL, hash varchar(255), status varchar(255), alt varchar(255), originalAlt varchar(255), genAlt varchar(255), genImageCaption varchar(255), ocr varchar(255), beforeContext varchar(255), afterContext varchar(255), additionalContext varchar(255), CONSTRAINT PK_Image PRIMARY KEY (bookid, src), FOREIGN KEY (bookid) REFERENCES books(id) ON DELETE CASCADE);"
"""
# db.sendQuery("SELECT * FROM books")
# print(db.fetchOne())
# addBook(title="Harry Potter", size="300kb", numImages=25)
"""
addBook(title="Harry Potter", size="300kb", numImages=25)
addBook(title="Harraoeu", size="300kb", numImages=25)
addBook(title="Hartter", size="300kb", numImages=25)
"""
# getBooks(titleQ="Harry Potter", limit=1, skip=2)
"""
addImage(
bookid="f1ac43cc-9f6d-4dc8-ac4f-aea0c4af5198",
src="sampleSrcMEOW",
hash="brown",
status="available",
)
"""
# getImagesByBook("fa47d830-586a-485f-a579-67b33fd3eae3")
# print(getImagesByHash("brown"))
updateImage(
bookid="f1ac43cc-9f6d-4dc8-ac4f-aea0c4af5198",
src="sampleSrcMEOW",
status="bruh2",
beforeContext="before context be like",
)
# updateBook(id="72950", title="Test Title Two", status="available")
db = Database()
# db.sendQuery("SELECT * FROM images;")
# print(db.fetchAll())
db.close()

View File

@ -1,35 +1,41 @@
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework import status, permissions, serializers
from rest_framework.exceptions import ValidationError
from rest_framework.parsers import FormParser, MultiPartParser
from django.core.files.storage import default_storage
from django.core.files.base import ContentFile
import sys
import zipfile
from uuid import uuid4
import alttextbackend.data.analyze as analyze
import alttextbackend.data.postgres.books as books
import alttextbackend.data.postgres.images as images
from django.core.files.base import ContentFile
from django.core.files.storage import default_storage
from rest_framework import serializers, status
from rest_framework.parsers import FormParser, MultiPartParser
from rest_framework.response import Response
from rest_framework.views import APIView
sys.path.append("../")
class GetBooksSerializer(serializers.Serializer):
titleQ = serializers.CharField(required=False)
authorQ = serializers.CharField(required=False)
sortBy = serializers.ChoiceField(choices=['title', 'author'], style={'base_template': 'radio.html'}, default = 'title')
sortOrder = serializers.ChoiceField(choices=['asc', 'desc'], style={'base_template': 'radio.html'}, default = 'asc')
limit = serializers.IntegerField(min_value=1, required=False)
skip = serializers.IntegerField(min_value=0, required=False)
class AddBookSerializer(serializers.Serializer):
id = serializers.CharField(required=False)
title = serializers.CharField(required=True, allow_blank=False)
author = serializers.CharField(required=True, allow_blank=False)
description = serializers.CharField(required=False, allow_blank=True)
file = serializers.FileField(required=True)
book = serializers.FileField(required=True)
cover = serializers.ImageField(required=False)
class BooksView(APIView):
parser_classes = (FormParser, MultiPartParser)
serializer_class = AddBookSerializer
def get_serializer_class(self):
if self.request.method == 'GET':
if self.request.method == "GET":
return GetBooksSerializer
elif self.request.method == 'POST':
elif self.request.method == "POST":
return AddBookSerializer
return super().get_serializer_class()
@ -41,17 +47,14 @@ class BooksView(APIView):
# Access validated data
validated_data = serializer.validated_data
title_query = validated_data.get('titleQ')
author_query = validated_data.get('authorQ')
sort_by = validated_data.get('sortBy')
sort_order = validated_data.get('sortOrder')
limit = validated_data.get('limit')
skip = validated_data.get('skip')
titleQ = validated_data.get("titleQ", None)
limit = validated_data.get("limit", None)
skip = validated_data.get("skip", None)
# TODO: perform logic
# get array of books
result = books.getBooks(titleQ, limit, skip)
# TODO: return books
return Response(validated_data, status=status.HTTP_200_OK)
return Response(map(books.jsonifyBook, result), status=status.HTTP_200_OK)
def post(self, request, *args, **kwargs):
# validate request data
@ -61,31 +64,72 @@ class BooksView(APIView):
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
validated_data = serializer.validated_data
id = validated_data.get("id", uuid4())
# check if id is already in use
book = books.getBook(id)
if book:
return Response(
{"error": "id already in use"}, status=status.HTTP_400_BAD_REQUEST
)
# perform initial book processing
file = validated_data["file"]
file = validated_data["book"]
if not file.name.endswith(".zip"):
return Response(
{"file": ["file must be a zip"]}, status=status.HTTP_400_BAD_REQUEST
)
id = uuid4()
books_path = "./books/"
default_storage.save(f"{books_path}{str(id)}.zip", ContentFile(file.read()))
book_path = f"./books/{str(id)}"
default_storage.save(f"{book_path}.zip", ContentFile(file.read()))
with zipfile.ZipFile(default_storage.path(f"{book_path}.zip"), "r") as zip_ref:
zip_ref.extractall(default_storage.path(f"{book_path}"))
default_storage.delete(f"{book_path}.zip")
# TODO: ensure book has valid root html file
# TODO: analyze book and images, store them in database
# ensure book has valid root html file
html_file = analyze.findHTML(book_path)
if html_file == None:
default_storage.delete(book_path)
return Response(
{"error": "No HTML file found in the extracted folder"},
status=status.HTTP_400_BAD_REQUEST,
)
# save cover image
covers_path = "./covers/"
default_storage.save(
f"{covers_path}{str(id)}.{validated_data['cover'].name.split('.')[-1]}",
ContentFile(validated_data["cover"].read()),
)
coverExt = None
if "cover" in validated_data and validated_data["cover"] is not None:
coverExt = validated_data["cover"].name.split(".")[-1]
default_storage.save(
f"./covers/{str(id)}.{coverExt}",
ContentFile(validated_data["cover"].read()),
)
alt = analyze.createAnalyzer()
alt.parseFile(html_file)
# store basic book info into database
size = analyze.getSize(book_path)
imgs = alt.getAllImgs()
books.addBook(
title=validated_data["title"],
size=str(size),
numImages=len(imgs),
id=id,
coverExt=coverExt,
)
# store info for all images in database
for img in imgs:
context = alt.getContext(img)
thisHash = hash(alt.getImgData(img["src"]))
images.addImage(
bookid=id,
src=img["src"],
hash=thisHash,
alt=img["alt"],
originalAlt=img["alt"],
beforeContext=context[0],
afterContext=context[1],
)
book = books.getBook(id)
return Response(
{
"book": validated_data.get("title"),
"description": validated_data.get("description"),
},
books.jsonifyBook(book),
status=status.HTTP_201_CREATED,
)

View File

@ -1,102 +1,204 @@
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework import status, permissions, serializers
from rest_framework.exceptions import ValidationError
from rest_framework.parsers import FormParser, MultiPartParser
import copy
import os
import shutil
import threading
import time
import alttextbackend.data.analyze as analyzer
import alttextbackend.data.postgres.books as books
import alttextbackend.data.postgres.images as images
from django.core.files.storage import default_storage
from rest_framework import serializers, status
from rest_framework.parsers import FormParser, MultiPartParser
from rest_framework.response import Response
from rest_framework.views import APIView
from django.core.files.base import ContentFile
from uuid import uuid4
class GetBookSerializer(serializers.Serializer):
bookid = serializers.CharField(required=True)
class UpdateBookSerialzer(serializers.Serializer):
bookid = serializers.CharField(required=True)
title = serializers.CharField(required=False, allow_blank=False)
author = serializers.CharField(required=False, allow_blank=False)
description = serializers.CharField(required=False, allow_blank=True)
cover = serializers.ImageField(required=False)
class AnalyzeBookSerializer(serializers.Serializer):
bookid = serializers.CharField(required=True)
missingOnly = serializers.BooleanField(required=False, default=True)
waitForAnalysis = serializers.BooleanField(required=False, default=False)
class OverwriteBookSerializer(serializers.Serializer):
bookid = serializers.CharField(required=True)
file = serializers.FileField(required=True)
class DeleteBookSerializer(serializers.Serializer):
bookid = serializers.CharField(required=True)
class BooksBookidView(APIView):
parser_classes = (FormParser, MultiPartParser)
serializer_class = UpdateBookSerialzer
def get_serializer_class(self):
if self.request.method == 'GET':
if self.request.method == "GET":
return GetBookSerializer
elif self.request.method == 'PATCH':
elif self.request.method == "PATCH":
return UpdateBookSerialzer
elif self.request.method == 'PUT':
elif self.request.method == "PUT":
return AnalyzeBookSerializer
elif self.request.method == 'DELETE':
elif self.request.method == "DELETE":
return DeleteBookSerializer
return super().get_serializer_class()
def get(self, request, *args, **kwargs):
serializer_class = self.get_serializer_class()
serializer = serializer_class(data={"bookid": kwargs.get('bookid')})
serializer = serializer_class(data={"bookid": kwargs.get("bookid")})
if not serializer.is_valid():
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
validated_data = serializer.validated_data
# TODO: IMPLEMENT LOGIC
# get book from database
book = books.getBook(validated_data.get("bookid"))
if not book:
return Response(
{"error": "No book of that id was found in database."},
status=status.HTTP_404_BAD_REQUEST,
)
return Response(validated_data, status=status.HTTP_200_OK)
return Response(books.jsonifyBook(book), status=status.HTTP_200_OK)
def patch(self, request, *args, **kwargs):
serializer_class = self.get_serializer_class()
data = request.data
data['bookid'] = kwargs.get('bookid')
data["bookid"] = kwargs.get("bookid")
serializer = serializer_class(data=data)
if not serializer.is_valid():
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
validated_data = serializer.validated_data
# TODO: IMPLEMENT LOGIC
# check if book exists in database
book = books.getBook(validated_data.get("bookid"))
if not book:
return Response(
{"error": "No book of that id was found in database."},
status=status.HTTP_404_BAD_REQUEST,
)
book = books.jsonifyBook(book)
# update book title and cover
title = validated_data.get("title", None)
coverExt = None
if "cover" in validated_data and validated_data["cover"] is not None:
coverExt = validated_data["cover"].name.split(".")[-1]
default_storage.delete(
f"./covers/{str(validated_data.get('bookid'))}.{book['coverExt']}"
)
default_storage.save(
f"./covers/{str(validated_data.get('bookid'))}.{coverExt}",
ContentFile(validated_data["cover"].read()),
)
books.updateBook(validated_data.get("bookid"), title=title, coverExt=coverExt)
book = books.jsonifyBook(books.getBook(validated_data.get("bookid")))
return Response(book, status=status.HTTP_200_OK)
return Response(validated_data, status=status.HTTP_200_OK)
def put(self, request, *args, **kwargs):
serializer_class = self.get_serializer_class()
serializer = serializer_class(data={"bookid": kwargs.get('bookid')})
data = copy.deepcopy(request.query_params)
data["bookid"] = kwargs.get("bookid")
serializer = serializer_class(data=data)
if not serializer.is_valid():
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
validated_data = serializer.validated_data
# TODO: IMPLEMENT LOGIC
bookid = validated_data.get("bookid")
# check for book's existence
book = books.getBook(bookid)
if not book:
return Response(
{"error": "Book not found in database."},
status=status.HTTP_404_BAD_REQUEST,
)
return Response(validated_data, status=status.HTTP_200_OK)
def post(self, request, *args, **kwargs):
serializer_class = self.get_serializer_class()
data = request.data
data['bookid'] = kwargs.get('bookid')
serializer = serializer_class(data=request.data)
if not serializer.is_valid():
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
validated_data = serializer.validated_data
html_file = analyzer.findHTML(f"./books/{str(validated_data.get('bookid'))}")
if html_file == None:
return Response(
{"error": "Failed to find HTML file in book directory."},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
# TODO: IMPLEMENT LOGIC
alt = analyzer.createAnalyzer()
alt.parseFile(html_file)
imgs = []
if validated_data.get("missingOnly"):
imgs = alt.getNoAltImgs()
else:
imgs = alt.getAllImgs()
# set book and all images to "processing" status
if validated_data.get("waitForAnalysis"):
analyzer.analyzeImagesV2(alt, imgs, bookid)
else:
threading.Thread(
target=analyzer.analyzeImagesV2, args=(alt, imgs, bookid)
).start()
book = books.jsonifyBook(books.getBook(bookid))
if not validated_data.get("waitForAnalysis"):
book["status"] = "processing"
return Response(book, status=status.HTTP_200_OK)
return Response(validated_data, status=status.HTTP_200_OK)
def delete(self, request, *args, **kwargs):
serializer_class = self.get_serializer_class()
serializer = serializer_class(data={"bookid": kwargs.get('bookid')})
serializer = serializer_class(data={"bookid": kwargs.get("bookid")})
if not serializer.is_valid():
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
validated_data = serializer.validated_data
# TODO: IMPLEMENT LOGIC
# check for book's existence
book = books.getBook(validated_data.get("bookid"))
if not book:
return Response(
{"error": "Book not found in database."},
status=status.HTTP_404_BAD_REQUEST,
)
book = books.jsonifyBook(book)
book["status"] = "deleted"
return Response(validated_data, status=status.HTTP_200_OK)
# delete book from table (this cascades to images table as well)
books.deleteBook(validated_data.get("bookid"))
# delete book directory and cover image
try:
folder_path = f"./books/{str(validated_data.get('bookid'))}"
if default_storage.exists(folder_path):
shutil.rmtree(default_storage.path(folder_path))
if book["coverExt"]:
try:
default_storage.delete(
f"./covers/{str(validated_data.get('bookid'))}.{book['coverExt']}"
)
except:
return Response(
{"error": "Failed to delete cover image."},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
else:
return Response(
{"error": "Failed to find book directory."},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
except Exception:
return Response(
{"error": "Failed to delete book directory."},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
return Response(
book,
status=status.HTTP_200_OK,
)

View File

@ -1,25 +1,102 @@
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework import status, permissions, serializers
from rest_framework.exceptions import ValidationError
from rest_framework.parsers import FormParser, MultiPartParser
import copy
import os
import shutil
import zipfile
import alttextbackend.data.analyze as analyze
import alttextbackend.data.postgres.books as books
import alttextbackend.data.postgres.images as images
from django.core.files.storage import default_storage
from django.core.files.base import ContentFile
from uuid import uuid4
from django.http import HttpResponse
from rest_framework import serializers, status
from rest_framework.parsers import FormParser, MultiPartParser
from rest_framework.response import Response
from rest_framework.views import APIView
class ExportBookSerializer(serializers.Serializer):
bookid = serializers.CharField(required=True)
name = serializers.CharField(required=False)
class BooksBookidExportView(APIView):
parser_classes = (FormParser, MultiPartParser)
serializer_class = ExportBookSerializer
def get(self, request, *args, **kwargs):
serializer = self.serializer_class(data={"bookid": kwargs.get('bookid')})
data = copy.deepcopy(request.query_params)
data["bookid"] = kwargs.get("bookid")
serializer = self.serializer_class(data=data)
if not serializer.is_valid():
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
validated_data = serializer.validated_data
# TODO: IMPLEMENT LOGIC
bookid = validated_data.get("bookid")
# check if book exists in database
book = books.getBook(bookid)
if not book:
return Response(
{"error": "Book not found"}, status=status.HTTP_404_NOT_FOUND
)
return Response(validated_data, status=status.HTTP_200_OK)
# find HTML file
bookid = str(validated_data.get("bookid"))
html_file = analyze.findHTML(f"./books/{bookid}")
if html_file == None:
return Response(
{"error": "Failed to find HTML file in book directory."},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
# get all image tags in book
alt = analyze.createAnalyzer()
alt.parseFile(html_file)
imgs = alt.getAllImgs()
for img in imgs:
databaseImg = images.jsonifyImage(images.getImageByBook(bookid, img["src"]))
alt.setAlt(img["src"], databaseImg["alt"])
try:
shutil.copytree(
default_storage.path(f"./books/{bookid}"), f"./books/{bookid}-t"
)
except Exception as e:
return Response(
{"error": "Failed to copy book into temp folder."},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
html_file = analyze.findHTML(f"./books/{bookid}-t")
if html_file == None:
return Response(
{"error": "Failed to find HTML file in temp book directory."},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
default_storage.delete(html_file)
alt.exportToFile(html_file)
# Zip the temp folder
zip_filename = f"./books/{bookid}-t.zip"
with zipfile.ZipFile(zip_filename, "w") as zipf:
for root, _, files in os.walk(f"./books/{bookid}-t"):
for file in files:
zipf.write(
os.path.join(root, file),
os.path.relpath(
os.path.join(root, file), f"./books/{bookid}-t"
),
)
# Send the zip file as a response
filename = validated_data.get("name", f"{bookid}")
print(filename)
response = None
with open(zip_filename, "rb") as f:
response = HttpResponse(f, content_type="application/zip")
response["Content-Disposition"] = f"attachment; filename={filename}.zip"
# Delete the temp zip and folder
os.remove(zip_filename)
shutil.rmtree(f"./books/{bookid}-t")
return response

View File

@ -1,74 +1,158 @@
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework import status, permissions, serializers
from rest_framework.exceptions import ValidationError
import copy
import threading
import alttextbackend.data.analyze as analyze
import alttextbackend.data.postgres.books as books
import alttextbackend.data.postgres.images as images
from rest_framework import serializers, status
from rest_framework.parsers import FormParser, MultiPartParser
from django.core.files.storage import default_storage
from django.core.files.base import ContentFile
from uuid import uuid4
from rest_framework.response import Response
from rest_framework.views import APIView
class GetImageBySrc(serializers.Serializer):
bookid = serializers.CharField(required=True)
src = serializers.CharField(required=True)
class UpdateImageBySrc(serializers.Serializer):
bookid = serializers.CharField(required=True)
src = serializers.CharField(required=True)
alt = serializers.CharField(required=True)
beforeContext = serializers.CharField(required=False)
afterContext = serializers.CharField(required=False)
additionalContext = serializers.CharField(required=False)
class AnalyzeImageBySrc(serializers.Serializer):
bookid = serializers.CharField(required=True)
src = serializers.CharField(required=True)
waitForAnalysis = serializers.BooleanField(required=False, default=False)
class BooksBookidImageView(APIView):
parser_classes = (FormParser, MultiPartParser)
def get_serializer_class(self):
if self.request.method == 'GET':
if self.request.method == "GET":
return GetImageBySrc
elif self.request.method == 'PATCH':
elif self.request.method == "PATCH":
return UpdateImageBySrc
elif self.request.method == 'PUT':
elif self.request.method == "PUT":
return AnalyzeImageBySrc
return super().get_serializer_class()
def get(self, request, *args, **kwargs):
serializer_class = self.get_serializer_class()
data = request.query_params
data['bookid'] = kwargs.get('bookid')
data = copy.deepcopy(request.query_params)
data["bookid"] = kwargs.get("bookid")
serializer = serializer_class(data=data)
if not serializer.is_valid():
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
validated_data = serializer.validated_data
# TODO: IMPLEMENT LOGIC
# check if book exists in database
book = books.getBook(validated_data.get("bookid"))
if not book:
return Response(
{"error": "Book not found"}, status=status.HTTP_404_NOT_FOUND
)
# get image from database
img = images.getImageByBook(
validated_data.get("bookid"), validated_data.get("src")
)
if img == None:
return Response(
{"error": "Image not found"}, status=status.HTTP_404_NOT_FOUND
)
return Response(
images.jsonifyImage(img),
status=status.HTTP_200_OK,
)
return Response(validated_data, status=status.HTTP_200_OK)
def patch(self, request, *args, **kwargs):
serializer_class = self.get_serializer_class()
data = request.data
data = copy.deepcopy(request.data)
data.update(request.query_params)
data['bookid'] = kwargs.get('bookid')
data["bookid"] = kwargs.get("bookid")
serializer = serializer_class(data=data)
if not serializer.is_valid():
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
validated_data = serializer.validated_data
# TODO: IMPLEMENT LOGIC
alt = validated_data.get("alt", None)
beforeContext = validated_data.get("beforeContext", None)
afterContext = validated_data.get("afterContext", None)
additionalContext = validated_data.get("additionalContext", None)
img = images.getImageByBook(
validated_data.get("bookid"), validated_data.get("src")
)
if img == None:
return Response(
{"error": "Image not found"}, status=status.HTTP_404_NOT_FOUND
)
# update image in database
images.updateImage(
bookid=validated_data.get("bookid"),
src=validated_data.get("src"),
alt=alt,
beforeContext=beforeContext,
afterContext=afterContext,
additionalContext=additionalContext,
)
img = images.getImageByBook(
validated_data.get("bookid"), validated_data.get("src")
)
return Response(images.jsonifyImage(img), status=status.HTTP_200_OK)
return Response(validated_data, status=status.HTTP_200_OK)
def put(self, request, *args, **kwargs):
serializer_class = self.get_serializer_class()
data = request.query_params
data['bookid'] = kwargs.get('bookid')
data = copy.deepcopy(request.query_params)
data["bookid"] = kwargs.get("bookid")
serializer = serializer_class(data=data)
if not serializer.is_valid():
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
validated_data = serializer.validated_data
# TODO: IMPLEMENT LOGIC
# find HTML file
bookid = str(validated_data.get("bookid"))
html_file = analyze.findHTML(f"./books/{bookid}")
if html_file == None:
return Response(
{"error": "Failed to find HTML file in book directory."},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
return Response(validated_data, status=status.HTTP_200_OK)
# generate alt for image
alt = analyze.createAnalyzer()
alt.parseFile(html_file)
img = alt.getImg(validated_data.get("src"))
if img == None:
return Response(
{"error": "Failed to find image in book."},
status=status.HTTP_400_BAD_REQUEST,
)
if validated_data.get("waitForAnalysis"):
analyze.analyzeSingularImageV2(alt, img, bookid)
else:
threading.Thread(
target=analyze.analyzeSingularImageV2, args=(alt, img, bookid)
).start()
image = images.jsonifyImage(
images.getImageByBook(
validated_data.get("bookid"), validated_data.get("src")
)
)
if not validated_data.get("waitForAnalysis"):
image["status"] = "processing"
return Response(image, status=status.HTTP_200_OK)

View File

@ -1,25 +1,38 @@
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework import status, permissions, serializers
from rest_framework.exceptions import ValidationError
import sys
import alttextbackend.data.postgres.books as books
import alttextbackend.data.postgres.images as images
from rest_framework import serializers, status
from rest_framework.parsers import FormParser, MultiPartParser
from django.core.files.storage import default_storage
from django.core.files.base import ContentFile
from uuid import uuid4
from rest_framework.response import Response
from rest_framework.views import APIView
sys.path.append("../")
class ImagesFromBookSerializer(serializers.Serializer):
bookid = serializers.CharField(required=True)
class BooksBookidImagesView(APIView):
parser_classes = (FormParser, MultiPartParser)
serializer_class = ImagesFromBookSerializer
def get(self, request, *args, **kwargs):
serializer = self.serializer_class(data={"bookid": kwargs.get('bookid')})
serializer = self.serializer_class(data={"bookid": kwargs.get("bookid")})
if not serializer.is_valid():
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
validated_data = serializer.validated_data
id = validated_data.get("bookid")
# TODO: IMPLEMENT LOGIC
# check if book exists in database
book = books.getBook(id)
if not book:
return Response(
{"error": "Book not found"}, status=status.HTTP_404_NOT_FOUND
)
return Response(validated_data, status=status.HTTP_200_OK)
# get images from database
imgs = images.getImagesByBook(id)
return Response(map(images.jsonifyImage, imgs), status=status.HTTP_200_OK)

View File

@ -1,26 +1,26 @@
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework import status, permissions, serializers
from rest_framework.exceptions import ValidationError
from rest_framework import serializers, status
from rest_framework.parsers import FormParser, MultiPartParser
from django.core.files.storage import default_storage
from django.core.files.base import ContentFile
from uuid import uuid4
from rest_framework.response import Response
from rest_framework.views import APIView
import alttextbackend.data.postgres.images as images
class GetImagesByHashSerializer(serializers.Serializer):
hash = serializers.CharField(required=True)
class ImagesHashView(APIView):
parser_classes = (FormParser, MultiPartParser)
serializer_class = GetImagesByHashSerializer
def get(self, request, *args, **kwargs):
image_hash = kwargs.get('hash')
data = {'hash': image_hash}
image_hash = kwargs.get("hash")
data = {"hash": image_hash}
serializer = self.serializer_class(data=data)
if not serializer.is_valid():
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
# TODO: IMPLEMENT LOGIC
imgs = images.getImagesByHash(image_hash)
return Response(data, status=status.HTTP_200_OK)
return Response(map(images.jsonifyImage, imgs), status=status.HTTP_200_OK)

View File

@ -3,18 +3,12 @@ info:
title: Alt-text Backend API
description: |-
This is the Alt-text Backend API based on the OpenAPI 3.0 specification.
# termsOfService: http://swagger.io/terms/
contact:
email: da.cruz@aol.com
# license:
# name: Apache 2.0
# url: http://www.apache.org/licenses/LICENSE-2.0.html
version: 1.0.11
externalDocs:
description: Find out more about Alt-text
url: https://github.com/EbookFoundation/alt-text
# servers:
# - url: https://petstore3.swagger.io/api/v3
tags:
- name: Books
description: Everything regarding books
@ -36,31 +30,6 @@ paths:
explode: true
schema:
type: string
- name: authorQ
in: query
description: String to match the author to.
required: false
explode: true
schema:
type: string
- name: sortBy
in: query
description: Field to sort by.
required: false
explode: true
schema:
type: string
enum: ["title", "author"]
default: "title"
- name: sortOrder
in: query
description: Order to sort by.
required: false
explode: true
schema:
type: string
enum: ["asc", "desc"]
default: "asc"
- name: limit
in: query
description: Max number of books to return.
@ -100,15 +69,12 @@ paths:
schema:
type: object
properties:
id:
type: string
description: Id of the book (optional).
title:
type: string
description: Title of the book.
author:
type: string
description: Author of the book.
description:
type: string
description: Description of the book (optional).
book:
type: string
description: Zip file of the book.
@ -167,12 +133,6 @@ paths:
title:
type: string
description: Title of the book (optional).
author:
type: string
description: Author of the book (optional).
description:
type: string
description: Description of the book (optional).
cover:
type: string
description: Cover image for the book (optional).
@ -195,32 +155,25 @@ paths:
summary: Re-analyze an entire book.
description: Re-analyze an entire book and overwrite current image data by its id.
operationId: analyzeBook
responses:
'200':
description: Successful operation
content:
application/json:
schema:
$ref: '#/components/schemas/Book'
'500':
description: Internal Server Error
post:
tags:
- Books
summary: Upload a new book file to a book object.
description: Upload a new book to a given book object (by its id), and re-analyze it (essentially creating a new book, except keeping the same bookid).
operationId: overwriteBook
requestBody:
required: true
content:
multipart/form-data:
schema:
type: object
properties:
book:
type: string
description: Zip file of the book.
format: binary
parameters:
- name: missingOnly
in: query
description: If analyzing on upload, whether to analyze only the images without alt-text.
required: false
explode: true
schema:
type: boolean
example: true
default: true
- name: waitForAnalysis
in: query
description: Whether to wait for the analysis to complete before returning a response.
required: false
explode: true
schema:
type: boolean
example: false
default: false
responses:
'200':
description: Successful operation
@ -255,6 +208,15 @@ paths:
schema:
type: string
example: "123e4567-e89b-12d3-a456-426614174000"
- name: name
in: query
description: Alternative name for file download.
required: false
explode: true
schema:
type: string
default: "{bookid}"
example: "harry_potter"
get:
tags:
- Books
@ -270,12 +232,6 @@ paths:
type: string
example: |-
content of the file
# headers:
# Content-Disposition:
# description: File name to prompt for download
# schema:
# type: string
# example: attachment; filename="example.txt"
'500':
description: Internal Server Error
/books/{bookid}/images:
@ -383,6 +339,9 @@ paths:
afterContext:
type: string
description: New afterContext for the image (optional).
additionalContext:
type: string
description: New additionalContext for the image (optional).
responses:
'200':
description: Successful operation
@ -398,6 +357,16 @@ paths:
summary: Re-analyze an image.
description: Generate an image's alt-text (written to genAlt field in image object).
operationId: analyzeImageBySrc
parameters:
- name: waitForAnalysis
in: query
description: Whether to wait for the analysis to complete before returning a response (default = false).
required: false
explode: true
schema:
type: boolean
example: false
default: false
responses:
'200':
description: Successful operation
@ -445,12 +414,6 @@ components:
title:
type: string
example: "Diary of an Oxygen Thief"
author:
type: string
example: "Anonymous"
description:
type: string
example: "Hurt people hurt people."
size:
type: string
example: "1.16MB"
@ -458,39 +421,53 @@ components:
type: string
example: "processing"
enum: ["available", "processing", "deleted"]
default: "available"
numImages:
type: integer
example: 4
Image:
type: object
properties:
bookid:
type: string
example: "123e4567-e89b-12d3-a456-426614174000"
src:
type: string
example: "images/cover.png"
hash:
type: string
example: ""
size:
status:
type: string
example: "24KB"
example: "processing"
enum: ["available", "processing", "deleted"]
default: "available"
alt:
type: string
example: ""
default: "originalAlt"
originalAlt:
type: string
example: ""
genAlt:
type: string
example: ""
default: ""
genImageCaption:
type: string
example: ""
default: ""
ocr:
type: string
example: ""
default: ""
beforeContext:
type: string
example: ""
afterContext:
type: string
example: ""
example: ""
additionalContext:
type: string
example: ""
default: ""