I’m trying to build a project where I pull game and weather data from API’s to determine the likelihood that a game would be canceled based on historic data. If I pull a few dates it works. If I pull two years of games all the results come up as N/A. Here is the code.
import pandas as pd
import requests
from datetime import datetime, timedelta
import time
import statsapi
import csv
def fetch_historical_mlb_games(start_date, end_date):
"""
Fetch historical MLB game schedules and statuses between two dates.
:param start_date: str, start date in 'YYYY-MM-DD' format
:param end_date: str, end date in 'YYYY-MM-DD' format
:return: list of historical games with statuses
"""
games = []
current_date = datetime.strptime(start_date, "%Y-%m-%d")
end_date = datetime.strptime(end_date, "%Y-%m-%d")
while current_date <= end_date:
date_str = current_date.strftime("%Y-%m-%d")
print(f"Fetching games for {date_str}...")
try:
schedule = statsapi.schedule(date=date_str)
for game in schedule:
games.append({
"date": game.get("game_date", "Unknown Date"),
"venue": game.get("venue_name", "Unknown Venue"),
"status": game.get("status", "Unknown Status"),
"home_team": game.get("home_name", "Unknown Team"),
"away_team": game.get("away_name", "Unknown Team"),
})
except Exception as e:
print(f"Error fetching games for {date_str}: {e}")
# Add delay to avoid API rate limits
time.sleep(1)
current_date += timedelta(days=1)
return games
def fetch_historical_weather(venue, date, api_key):
"""
Fetch historical weather data for a given venue and date using Visual Crossing API.
:param venue: str, venue name (city)
:param date: str, date in 'YYYY-MM-DD' format
:param api_key: str, Visual Crossing API key
:return: dict with weather details or error
"""
try:
base_url = "https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline"
request_url = f"{base_url}/{venue}/{date}?unitGroup=metric&key={api_key}&include=days"
response = requests.get(request_url, timeout=10) # Add timeout to avoid hanging
response.raise_for_status()
weather_data = response.json()
if "days" in weather_data and len(weather_data["days"]) > 0:
day_data = weather_data["days"][0]
return {
"temperature": day_data.get("temp", "N/A"),
"humidity": day_data.get("humidity", "N/A"),
"precipitation": day_data.get("precip", "N/A"),
"wind_speed": day_data.get("windspeed", "N/A"),
"description": day_data.get("conditions", "N/A"),
}
else:
return {"error": "No weather data found for this date."}
except Exception as e:
return {"error": str(e)}
def save_historical_data_to_csv(historical_data, output_file="historical_games_with_weather.csv"):
"""
Save historical game and weather data to a CSV file.
:param historical_data: list of dictionaries containing game and weather details
:param output_file: str, name of the output CSV file
"""
try:
with open(output_file, mode="w", newline="") as file:
fieldnames = [
"date", "venue", "home_team", "away_team", "status",
"temperature", "humidity", "precipitation", "wind_speed", "description"
]
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
for game in historical_data:
weather = game.get("weather", {})
writer.writerow({
"date": game.get("date", "N/A"),
"venue": game.get("venue", "N/A"),
"home_team": game.get("home_team", "N/A"),
"away_team": game.get("away_team", "N/A"),
"status": game.get("status", "N/A"),
"temperature": weather.get("temperature", "N/A"),
"humidity": weather.get("humidity", "N/A"),
"precipitation": weather.get("precipitation", "N/A"),
"wind_speed": weather.get("wind_speed", "N/A"),
"description": weather.get("description", "N/A"),
})
print(f"Historical data saved to {output_file}")
except Exception as e:
print(f"Error saving to CSV: {e}")
def calculate_cancellation_likelihood(historical_data_file):
"""
Calculate the likelihood of game cancellations based on historical data.
:param historical_data_file: str, path to the CSV file containing historical data
:return: dict mapping weather descriptions to cancellation likelihood
"""
data = pd.read_csv(historical_data_file)
# Add cancellation flag (1 if postponed, 0 otherwise)
data["is_canceled"] = data["status"].apply(lambda x: 1 if "Postponed" in x else 0)
# Group by weather description and calculate likelihood
grouped = data.groupby("description").agg(
total_games=("is_canceled", "count"),
canceled_games=("is_canceled", "sum")
)
grouped["cancellation_likelihood"] = (grouped["canceled_games"] / grouped["total_games"]) * 100
# Convert to dictionary
likelihood_dict = grouped["cancellation_likelihood"].to_dict()
print("Cancellation likelihood by weather:")
print(grouped)
return likelihood_dict
def apply_cancellation_likelihood(data_file, likelihood_dict, output_file):
"""
Apply cancellation likelihood to a dataset based on weather descriptions.
:param data_file: str, path to the CSV file containing games data
:param likelihood_dict: dict, mapping weather descriptions to cancellation likelihood
:param output_file: str, path to save the enhanced dataset
"""
# Load game data
data = pd.read_csv(data_file)
# Map likelihood to each game
data["cancellation_likelihood"] = data["description"].map(
lambda x: likelihood_dict.get(x, 0) # Default to 0 if no match
)
# Save enhanced dataset
data.to_csv(output_file, index=False)
print(f"Enhanced dataset saved to {output_file}")
def main():
"""
Main function to calculate and apply cancellation likelihood.
"""
print("Welcome to the Game Cancellation Prediction Tool!")
# Step 1: Fetch historical data for 2023
print("Fetching 2023 data...")
historical_games_2023 = fetch_historical_mlb_games("2023-04-01", "2023-10-01")
save_historical_data_to_csv(historical_games_2023, "historical_games_with_weather_2023.csv")
# Step 2: Fetch historical data for 2024
print("Fetching 2024 data...")
historical_games_2024 = fetch_historical_mlb_games("2024-04-01", "2024-10-01")
save_historical_data_to_csv(historical_games_2024, "historical_games_with_weather_2024.csv")
# Step 3: Calculate cancellation likelihood from 2023 data
likelihood_dict = calculate_cancellation_likelihood("historical_games_with_weather_2023.csv")
# Step 4: Apply cancellation likelihood to 2024 data
apply_cancellation_likelihood("historical_games_with_weather_2024.csv", likelihood_dict, "enhanced_games_with_weather_2024.csv")
if __name__ == "__main__":
main()