#for color and font styles
import colorama
from colorama import init, Fore, Style, Back
#for pdf manipulation
import fitz
#for input csv
import csv
#for random colors
import random
#for time delay
import time
#for case sensitive
import re
# initialize colorama
init()
print(Fore.GREEN + Style.BRIGHT + "Hello!" + Style.RESET_ALL)
# Ask user for file path
pdf_file = input("\n" + Fore.BLUE + Style.BRIGHT + "Please enter the path of the PDF file: " + Style.RESET_ALL)
# Ask user for search terms CSV file path
search_terms_csv = input("\n" + Fore.BLUE + Style.BRIGHT + "Please enter the path of the Search terms CSV file: " + Style.RESET_ALL)
# Ask user if they want to do case-insensitive search
match_case = input("\n" + Fore.BLUE + Style.BRIGHT + "Do you want to Match Case? (Y/N): " + Style.RESET_ALL)
#Check user response
if match_case == "Y" or match_case == "y":
case_sensitive = True
elif match_case == "N" or match_case == "n":
case_sensitive = False
else:
print("Invalid input")
exit()
print("\n"+ Fore.GREEN + Style.BRIGHT + "Processing......" + Style.RESET_ALL)
# Open PDF file
pdf_doc = fitz.open(pdf_file)
# Open CSV file and read search terms
search_terms = []
try:
with open(search_terms_csv, "r", encoding="utf-8") as file:
reader = csv.reader(file)
for row in reader:
if len(row) == 2:
search_terms.append((row[0], row[1]))
else:
print(f"Error: Incorrect number of columns in row: {row}")
except FileNotFoundError:
print("Error: Search terms CSV file not found.")
exit()
# Define colors for highlighting
num_colors = len(search_terms)
colors = [(random.random(), random.random(), random.random()) for i in range(num_colors)]
# Iterate through each page in the PDF document
for page_num, page in enumerate(pdf_doc):
# Get page text
page_text = page.get_text()
# Iterate through each search term
for i, search_term in enumerate(search_terms):
# Get the search term and tag
term = search_term[0]
tag = search_term[1]
# Find all instances of the search term in the page text
if case_sensitive == True:
matches = page.search_for(term, quads=False, ignore_case=False)
else:
matches = page.search_for(term, quads=False, ignore_case=True)
# Highlight each match in a different color
for j, match in enumerate(matches):
color = colors[i]
highlight = page.add_highlight_annot(match)
highlight.update()
highlight.set_colors(stroke=color)
highlight.set_info(fitz.PDF_ANNOT_TEXT, search_term[0])
highlight.set_info(fitz.PDF_ANNOT_INK, f"{term} : {tag}")
# Save the updated page
pdf_doc.save(f"{pdf_file[:-4]}_Highlighted.pdf")
print("\n" + Fore.RED + Style.BRIGHT + "Highlighted PDF Generated" + Style.RESET_ALL)
time.sleep(2)
- Fix the formatting of your post so that all the code is inside the “code fences”.
```
These code-fences
```
- Provide details of the errors you are seeing so that we have a clue about how to help.
Unless you tell us what errors you are getting, we cannot help you.