Hi @all,
as mentioned in the title, I tried to get a script for detecting handwritten Text in a PDF-file.
Because I am absolutely new to coding, but wanted my idea to be realized, I used ChatGPT to get the basic script done.
The script does not work reliable at the moment. I was able to do some adjustments, so that sometimes the script works kind of properly but sometimes it does not.
When it doesn’t, then an Error appears, that the new generated .png-file cannot be found.
But it indeed it was created and is present in the folder.
Maybe deletion of this created .png-file has to be called at some other point?
Furthermore I would like to remove entries from listbox via double click.
This works just visually. Nevertheless the dropped file will still be processed.
Does anyone have some detailed tips?
This is my code so far:
import os
import csv
from pdf2image import convert_from_path
from google.cloud import vision
from google.cloud.vision_v1 import types
import tkinter as tk
from tkinterdnd2 import DND_FILES, TkinterDnD
#Google Vision API - authetification
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "C:/Users/***/Desktop/HTR/GoogleCloudVision.json"
client = vision.ImageAnnotatorClient()
processing_queue = []
def process_single_file(pdf_path):
png_path = pdf_path.replace(".pdf", ".png")
pdf_images = convert_from_path(pdf_path)
png_filename = os.path.basename(png_path) #Filename
png_folder = os.path.dirname(pdf_path) #Folderpath
png_path = os.path.join(png_folder, png_filename) #creating new Filepath
pdf_images[0].convert("L").save(png_path)
with open(png_path, "rb") as image_file:
content = image_file.read()
image = types.Image(content=content)
response = client.document_text_detection(image=image)
text = response.full_text_annotation.text
with open("C:/Users/***/Desktop/HTR/keywords.txt", "r") as keywords_file:
keywords = keywords_file.read().splitlines()
matched_keywords = [keyword for keyword in keywords if keyword in text]
if matched_keywords:
with open("C:/Users/***/Desktop/HTR/daten.csv", "r") as csv_file:
csv_reader = csv.DictReader(csv_file)
for row in csv_reader:
if row["Keyword"] in matched_keywords:
new_filename = f"{row['Nachname']}, {row['Vorname']}.png"
new_filename_png = new_filename
os.rename(png_path, os.path.join(png_folder, new_filename))
new_pdf_path = os.path.join(png_folder, new_filename.replace(".png", ".pdf"))
pdf_images[0].save(new_pdf_path, "PDF", resolution=100.0, save_all=True)
processing_queue.remove(pdf_path)
os.remove(pdf_path) #remove original PDF-File
os.remove(new_filename_png) #delete the temporary created PNG file
break
else:
os.remove(png_path)
return
else:
os.remove(png_path)
return
def start_processing():
queue_copy = processing_queue.copy() #create a copy of the queue
for file_path in queue_copy:
process_single_file(file_path)
listbox.delete(listbox.get(0, tk.END).index(file_path))
def on_drop(event):
file_paths = event.data.split()
for file_path in file_paths:
processing_queue.append(file_path)
listbox.insert(tk.END, file_path)
root = TkinterDnD.Tk()
root.title("HTR with GUI")
root.geometry("510x220")
label = tk.Label(root, text="Drag&Drop PDF-Dateien:")
label.pack()
listbox = tk.Listbox(root, width=70,height=10,font='ansifixed')
listbox.pack()
start_button = tk.Button(root, text="Verarbeitung starten", command=start_processing)
start_button.pack(padx='5', pady='10')
def on_select(event):
selected_index = listbox.curselection()[0]
selected_file = listbox.get(selected_index)
listbox.delete(selected_index)
listbox.bind('<Double-Button-1>', on_select)
root.drop_target_register(DND_FILES)
root.dnd_bind('<<Drop>>', lambda event: on_drop(event))
root.mainloop()
I think it’s necessary to convert the .pdf in .png, because Google Vision API only works with images.
So for me it is also necessary to reconvert the file to .pdf and delete the original .pdf-file and also the created .png-file, that was only created for text recognition.
Best regards
Fisatec
PS: This is the Error I am talking about