From 507f261f7c961e0a5828586dbec636fd41c34ce2 Mon Sep 17 00:00:00 2001 From: kethan351 Date: Fri, 16 Feb 2024 12:09:45 -0500 Subject: [PATCH] get,download,automate --- tests/automate.py | 77 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 70 insertions(+), 7 deletions(-) diff --git a/tests/automate.py b/tests/automate.py index b1cba4d..c97b316 100644 --- a/tests/automate.py +++ b/tests/automate.py @@ -7,6 +7,7 @@ from bs4 import BeautifulSoup import time from ..src.alttext.alttext import getImgData, getContext, genDesc, genChars from ..src.alttext.langengine import refineAlt +import csv class BookParser: def __init__(self): @@ -46,22 +47,84 @@ def process_books(extraction_folder): title = soup.find('title').get_text() if soup.find('title') else 'No title' print(f"Book ID: {book_id}, File: {filename}, Title: {title}") -#Use genAltTextV2 -#ADD benchmark time stamps -def genAltTextV2(self, src: str) -> str: +class AltTextGenerator: + def __init__(self): + self.benchmark_records = [] + + #Use genAltTextV2 + #ADD benchmark time stamps + def genAltTextV2(self, src: str) -> str: + # Start total timing + total_start_time = time.time() + + # Image data extraction timing + imgdata_start_time = time.time() imgdata = self.getImgData(src) + imgdata_end_time = time.time() + imgdata_total_time = imgdata_end_time - imgdata_start_time + + # Context extraction timing context = [None, None] + context_start_time = time.time() if self.options["withContext"]: context = self.getContext(self.getImg(src)) + context_end_time = time.time() + context_total_time = context_end_time - context_start_time + beforeContext = context[0] + afterContext = context[1] + + # Description generation timing + genDesc_start_time = time.time() desc = self.genDesc(imgdata, src, context) + genDesc_end_time = time.time() + genDesc_total_time = genDesc_end_time - genDesc_start_time + # OCR processing timing + ocr_start_time = time.time() chars = "" - if self.ocrEngine != None: + if self.ocrEngine is not None: chars = self.genChars(imgdata, src).strip() + ocr_end_time = time.time() + ocr_total_time = ocr_end_time - ocr_start_time - if self.langEngine == None: + # Refinement processing timing + refine_start_time = time.time() + if self.langEngine is None: raise Exception("To use version 2, you must have a langEngine set.") + refined_desc = self.langEngine.refineAlt(desc, chars, context, None) + refine_end_time = time.time() + refine_total_time = refine_end_time - refine_start_time - return self.langEngine.refineAlt(desc, chars, context, None) + # End total timing + total_end_time = time.time() + total_overall_time = total_end_time - total_start_time -#Add .csv generation for benchmark variables \ No newline at end of file + #Record dictionary to store all the timing data + record = { + "Image Data Extraction Time": imgdata_total_time, + "Context Extraction Time": context_total_time, + "Description Generation Time": genDesc_total_time, + "OCR Processing Time": ocr_total_time, + "Refinement Processing Time": refine_total_time, + "Total Overall Time": total_overall_time + } + # Add record to benchmark_records for later CSV generation + self.benchmark_records.append(record) + + return refined_desc + + #CSV generation + def generate_csv(benchmark_records, csv_file_path): + if not benchmark_records: + print("No benchmark data available.") + return + + # Determine the CSV field names from the keys of the first record + fieldnames = benchmark_records[0].keys() + + with open(csv_file_path, mode='w', newline='', encoding='utf-8') as csvfile: + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + for record in benchmark_records: + writer.writerow(record) + print(f"CSV file has been generated at: {csv_file_path}") \ No newline at end of file