Greeting all. Newbie here. I am trying to go through thousands of XML files in a specific folder using Python in Excel to populate a worksheet identifying the file name and what key words were found.
Here is my code, followed by the error message I am getting. I feel like I am missing something really obvious. If anyone out there can take a glance and point me in the right direction, I would be forever grateful.
python
# Parse through files and populate an Excel worksheet
import os
import xml.etree.ElementTree as ET
import openpyxl
# Define the folder path and keywords
folder_path = r"C:\Users\PETestUser"
keywords = ['Gorilla', 'Dinosaur', 'Unicorn'] # Example list of keywords
# Initialize the Excel workbook and worksheet
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "XML Keyword Results"
ws.append(['Filename', 'Keywords Found']) # Header row for the Excel sheet
# Loop through XML files in the specified folder
for filename in os.listdir(folder_path):
if filename.endswith(".xml"): # Check if the file is an XML
file_path = os.path.join(folder_path, filename)
# Parse the XML file
try:
tree = ET.parse(file_path)
root = tree.getroot()
# Check for keywords in the XML content
found_keywords = []
for keyword in keywords:
if any(keyword in elem.text for elem in root.iter() if elem.text): # Search each element for the keyword
found_keywords.append(keyword)
# If any keywords were found, write the results to the Excel sheet
if found_keywords:
ws.append([filename, ', '.join(found_keywords)])
except ET.ParseError:
print(f"Error parsing {filename}")
continue
# Save the Excel file
excel_output_path = r"C:\Users\First.Last\Documents\PETest\keyword_results.xlsx"
wb.save(excel_output_path)
print(f"Excel file saved at: {excel_output_path}")
Error Message:
FileNotFoundError: [Errno 2] No such file or directory: ‘C:\Users\first.last\Documents\PETestUser’