I’m trying to convert the following Python code into an executable file. I’ve experimented with various methods like auto-py-to-exe
and py2exe
, but I consistently encounter the same issue: the conversion process requires manual input.
import requests
from bs4 import BeautifulSoup
import os
import csv
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
import sys
# Check if the script is being executed as an executable or in the development environment
if getattr(sys, 'frozen', False):
# In executable mode (assign default values to avoid input during conversion)
start_date = "240901"
end_date = "240915"
year = "2024"
else:
# In development mode (ask for input in the console)
start_date = input("Enter the start date (format: YYMMDD, e.g., 240901): ")
end_date = input("Enter the end date (format: YYMMDD, e.g., 240915): ")
year = input("Enter the year (format: YYYY, e.g., 2024): ")
# Generate the list of dates in the provided range
dates = []
for i in range(int(start_date[-2:]), int(end_date[-2:]) + 1):
date = start_date[:-2] + str(i).zfill(2) # Concatenate the year and day, ensuring it has two digits
dates.append(date)
# Specify the directory and output file name
directory = os.path.join(os.getenv('USERPROFILE'), "Documents", "00_PJBC", "Data")
file_name = f"Data_{start_date}_{end_date}.csv"
file_path = os.path.join(directory, file_name)
# Create the directory if it doesn't exist
os.makedirs(directory, exist_ok=True)
# Function to process each URL
def process_url(date):
url = f"https://www.pjbc.gob.mx/boletinj/{year}/my_html/bc{date}.htm"
data = []
try:
# Request the webpage
response = requests.get(url)
response.encoding = response.apparent_encoding # Use automatically detected encoding
# Parse the HTML content
soup = BeautifulSoup(response.text, 'html.parser')
# Find paragraphs and other elements that may contain the text
paragraphs = soup.find_all(['p', 'div', 'span'], class_=['MsoNormal', None])
# Extract relevant data
for para in paragraphs:
text = para.get_text(separator=" ").strip() # Extract the text and clean spaces
if text:
data.append([text, date]) # Add the data to the list
except Exception as e:
print(f"Error processing {url}: {e}")
return data
# Number of threads (adjust based on available resources)
num_threads = 5
# Open the CSV file in write mode
with open(file_path, mode='w', newline='', encoding='utf-8') as csv_file:
csv_writer = csv.writer(csv_file)
# Write headers to the CSV file
csv_writer.writerow(["Data", "Date"]) # Headers "Data" and "Date"
# Create a progress bar
with tqdm(total=len(dates), desc="Downloading pages", unit="page") as progress_bar:
# Use ThreadPoolExecutor to handle downloads concurrently
with ThreadPoolExecutor(max_workers=num_threads) as executor:
# Submit all URLs to the thread pool
futures = {executor.submit(process_url, date): date for date in dates}
# As tasks are completed, update the file and progress bar
for future in as_completed(futures):
result = future.result()
if result: # If valid data is returned
csv_writer.writerows(result) # Write all downloaded data
progress_bar.update(1)
print("Download complete, data saved to", file_path)
When using auto-py-to-exe with the ‘one file’ and ‘Console Based’ settings, the process stalls at ‘145067 INFO: Looking for dynamic libraries’ and prompts me for input. I’ve waited over 20 minutes, but there’s no progress.
I’m seeking guidance on resolving this problem or understanding if I might be missing a crucial step, as I’m new to this process.
Note: I have a local folder in C:\00_PJBC\Data, and my plan is to create the same path in the user computer.