I’m new to python and programming in general and am having an incredibly difficult time converting local time in multiple time zones to UTC. The difficulty lies in accounting for Daylight Savings Time corrections, and having this correction apply at the appropriate dates.
I currently have a script that reads a file and parses snippets into a csv file for later data manipulation with excel. Specifically, it is an airline flight schedule that I extract the Date, flight number etc from. I have been able to get everything to work correctly in the script except for the UTC conversion.
The code for the whole script is as follows,
import re
import csv
from datetime import datetime
from zoneinfo import ZoneInfo
# Function to get time zone abbreviation from UTC offset
def get_timezone_abbreviation(airport_code):
try:
with open('airport_reference.txt', 'r') as reference_file:
for line in reference_file:
code, tz_name = line.strip().split(',')
if code == airport_code:
return tz_name
except FileNotFoundError:
pass # If the file doesn't exist, continue
# If airport code not found in the reference file, ask the user for IANA time zone name
tz_name = input(f"Enter the IANA time zone name (e.g., 'America/New_York') for {airport_code}: ")
with open('airport_reference.txt', 'a') as reference_file:
reference_file.write(f"{airport_code},{tz_name}\n")
return tz_name
# Get the current year and month
current_date = datetime.now()
current_year = current_date.year
current_month = current_date.month
# Open the file containing the text data
with open('your_file.txt', 'r') as file:
lines = file.readlines()
# Initialize CSV data
csv_data = []
# Initialize variables to hold data
current_flight_number = ''
current_departure = ''
current_arrival = ''
current_dept_time = ''
current_arrival_time = ''
# Define the date format to match "Sun 01-Oct" format
date_format = '%a %d-%b'
# Process each line of text
for index, line in enumerate(lines):
# Use regular expressions to extract relevant data
match_date = re.match(r'(\w{3}\s\d{2}-\w{3})', line)
match_airport = re.findall(r'[A-Z]{3}', line)
match_time = re.findall(r'(\d{2}:\d{2})', line)
# If a valid date is found
if match_date:
# Extract the date and parse it using the defined format
date_str = match_date.group(1)
parsed_date = datetime.strptime(date_str, date_format)
month = parsed_date.month
day = parsed_date.day
year = current_year if month >= current_month else current_year + 1
# Format the date as MM:DD:YY
formatted_date = f"{month:02d}/{day:02d}/{year % 100:02d}"
# Reset all other variables
current_flight_number = ''
current_departure = ''
current_arrival = ''
current_dept_time = ''
current_arrival_time = ''
# Check if the line contains relevant airport codes (excluding "X-1")
has_relevant_airport_codes = any(code != "X-1" for code in match_airport)
# Check if the line contains any time information
has_time_info = bool(match_time)
# Process the line if it has relevant data
if formatted_date and has_relevant_airport_codes and has_time_info:
# Update departure and arrival based on the first relevant airport codes found
if not current_departure:
for code in match_airport:
if code != "X-1":
current_departure = code
break
# Extract Flight Number (numerical string before Departure Airport)
match_flight_number = re.search(r'(\d+)\s+' + re.escape(current_departure), line)
if match_flight_number:
current_flight_number = match_flight_number.group(1)
# Count the number of times the departure airport appears in the line
departure_count = line.count(current_departure)
# If the departure airport appears more than once, retrieve the flight number
# immediately before the second instance
if departure_count > 1:
flight_numbers = re.findall(r'(\d+)\s+' + re.escape(current_departure), line)
if len(flight_numbers) >= 2:
current_flight_number = flight_numbers[1]
# Update arrival based on the first relevant airport codes found
if not current_arrival:
for code in match_airport:
if code != "X-1" and code != current_departure:
current_arrival = code
break
# Extract Departure Time
if not current_dept_time:
# Find all times in the line
match_times = re.findall(r'(\d{1,2}:\d{2})', line)
# Extract the second to last time in the line
if len(match_times) >= 2:
current_dept_time = match_times[-2]
# Extract Arrival Time
if not current_arrival_time:
# Find all times in the line
match_times = re.findall(r'(\d{1,2}:\d{2})', line)
# Extract the last time in the line
if len(match_times) >= 1:
current_arrival_time = match_times[-1]
# If all data is available, create a CSV row
if all([formatted_date, current_flight_number, current_departure, current_arrival, current_dept_time,
current_arrival_time]):
# Convert local times to UTC times using time zone abbreviation
dept_tz_name = get_timezone_abbreviation(current_departure)
arrive_tz_name = get_timezone_abbreviation(current_arrival)
# Parse local times to datetime objects
departure_time = datetime.strptime(current_dept_time, '%H:%M')
arrival_time = datetime.strptime(current_arrival_time, '%H:%M')
# Create time zone objects based on IANA time zone names
departure_tz = ZoneInfo(dept_tz_name)
arrival_tz = ZoneInfo(arrive_tz_name)
# Adjust times to UTC time zone using formatted_date
formatted_date_for_parsing = formatted_date.replace('-', '/').replace(':', '/')
formatted_datetime = datetime.strptime(formatted_date_for_parsing, '%m/%d/%y')
# Calculate the time difference between the local timezone and UTC
departure_utc_offset = departure_tz.utcoffset(departure_time)
arrival_utc_offset = arrival_tz.utcoffset(arrival_time)
# Adjust times to UTC time zone
departure_time_utc = formatted_datetime + (departure_time - datetime(1900, 1, 1)) - departure_utc_offset
arrival_time_utc = formatted_datetime + (arrival_time - datetime(1900, 1, 1)) - arrival_utc_offset
csv_data.append([formatted_date, current_flight_number, current_departure, current_arrival, current_dept_time, current_arrival_time, departure_time_utc.strftime('%H:%M'), arrival_time_utc.strftime('%H:%M')])
current_flight_number = ''
current_departure = ''
current_arrival = ''
current_dept_time = ''
current_arrival_time = ''
# Output the data as a CSV
with open('output.csv', 'w', newline='') as csvfile:
csv_writer = csv.writer(csvfile)
# Write headers
csv_writer.writerow(
["Date", "Flight Number", "Departure", "Arrival", "Dept Time", "Arrival Time", "Dept UTC", "Arrive UTC"])
# Write data
csv_writer.writerows(csv_data)
For simplicity’s sake I am using the follow text in your_file.txt to test the script.
Fri 03-Nov L I30079P-1 PHX 14:19 L 21:47 L 5.38 5.38 7.47 7.47 760 PHX 15:19 L MCI 19:52 L
733 MCI 20:42 L PHX 21:32 L
Sat 04-Nov L I30079P-1 PHX 14:19 L 21:47 L 5.38 5.38 7.47 7.47 760 PHX 15:19 L MCI 19:52 L
733 MCI 20:42 L PHX 21:32 L
Sun 05-Nov L I30079P-1 PHX 14:19 L 21:47 L 5.38 5.38 7.47 7.47 760 PHX 15:19 L MCI 19:52 L
733 MCI 20:42 L PHX 21:32 L
Mon 06-Nov L I30079P-1 PHX 14:19 L 21:47 L 5.38 5.38 7.47 7.47 760 PHX 15:19 L MCI 19:52 L
733 MCI 20:42 L PHX 21:32 L
Lastly, in airport_reference.txt I have the following,
PHX,America/Phoenix
MCI,America/Chicago
My expected output would be,
Date,Flight Number,Departure,Arrival,Dept Time,Arrival Time,Dept UTC,Arrive UTC
11/03/23,760,PHX,MCI,15:19,19:52,22:19,00:52
11/03/23,733,MCI,PHX,20:42,21:32,01:42,04:32
11/04/23,760,PHX,MCI,15:19,19:52,22:19,00:52
11/04/23,733,MCI,PHX,20:42,21:32,01:42,04:32
11/05/23,760,PHX,MCI,15:19,19:52,22:19,01:52
11/05/23,733,MCI,PHX,20:42,21:32,02:42,04:32
11/06/23,760,PHX,MCI,15:19,19:52,22:19,01:52
11/06/23,733,MCI,PHX,20:42,21:32,02:42,04:32
For the dates 11/03 and 11/04 MCI should have a time correction of 5 hours and for 11/05 and 11/06 a time correction of 6 hours. This isn’t happening though and I end up with a time correction for MCI of 6 hours for all 4 dates. PHX is correct for all dates.
Does anyone see what I’m doing wrong? I honestly am splicing bits and pieces of code together while looking to AI for additional help so if things look cobbled together I apologize.