Python auto-py-to exe issue

luispacheco · October 2, 2024, 5:35pm

I’m trying to convert the following Python code into an executable file. I’ve experimented with various methods like auto-py-to-exe and py2exe, but I consistently encounter the same issue: the conversion process requires manual input.

import requests
from bs4 import BeautifulSoup
import os
import csv
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
import sys

# Check if the script is being executed as an executable or in the development environment
if getattr(sys, 'frozen', False):
    # In executable mode (assign default values to avoid input during conversion)
    start_date = "240901"
    end_date = "240915"
    year = "2024"
else:
    # In development mode (ask for input in the console)
    start_date = input("Enter the start date (format: YYMMDD, e.g., 240901): ")
    end_date = input("Enter the end date (format: YYMMDD, e.g., 240915): ")
    year = input("Enter the year (format: YYYY, e.g., 2024): ")

# Generate the list of dates in the provided range
dates = []
for i in range(int(start_date[-2:]), int(end_date[-2:]) + 1):
    date = start_date[:-2] + str(i).zfill(2)  # Concatenate the year and day, ensuring it has two digits
    dates.append(date)

# Specify the directory and output file name
directory = os.path.join(os.getenv('USERPROFILE'), "Documents", "00_PJBC", "Data")
file_name = f"Data_{start_date}_{end_date}.csv"
file_path = os.path.join(directory, file_name)

# Create the directory if it doesn't exist
os.makedirs(directory, exist_ok=True)

# Function to process each URL
def process_url(date):
    url = f"https://www.pjbc.gob.mx/boletinj/{year}/my_html/bc{date}.htm"
    data = []
    
    try:
        # Request the webpage
        response = requests.get(url)
        response.encoding = response.apparent_encoding  # Use automatically detected encoding
        
        # Parse the HTML content
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Find paragraphs and other elements that may contain the text
        paragraphs = soup.find_all(['p', 'div', 'span'], class_=['MsoNormal', None])
        
        # Extract relevant data
        for para in paragraphs:
            text = para.get_text(separator=" ").strip()  # Extract the text and clean spaces
            if text:
                data.append([text, date])  # Add the data to the list

    except Exception as e:
        print(f"Error processing {url}: {e}")
    
    return data

# Number of threads (adjust based on available resources)
num_threads = 5

# Open the CSV file in write mode
with open(file_path, mode='w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)
    
    # Write headers to the CSV file
    csv_writer.writerow(["Data", "Date"])  # Headers "Data" and "Date"

    # Create a progress bar
    with tqdm(total=len(dates), desc="Downloading pages", unit="page") as progress_bar:
        # Use ThreadPoolExecutor to handle downloads concurrently
        with ThreadPoolExecutor(max_workers=num_threads) as executor:
            # Submit all URLs to the thread pool
            futures = {executor.submit(process_url, date): date for date in dates}
            
            # As tasks are completed, update the file and progress bar
            for future in as_completed(futures):
                result = future.result()
                if result:  # If valid data is returned
                    csv_writer.writerows(result)  # Write all downloaded data
                progress_bar.update(1)

print("Download complete, data saved to", file_path)

When using auto-py-to-exe with the ‘one file’ and ‘Console Based’ settings, the process stalls at ‘145067 INFO: Looking for dynamic libraries’ and prompts me for input. I’ve waited over 20 minutes, but there’s no progress.

I’m seeking guidance on resolving this problem or understanding if I might be missing a crucial step, as I’m new to this process.

Note: I have a local folder in C:\00_PJBC\Data, and my plan is to create the same path in the user computer.