Hey everyone, this is my code for predicting stock market prices using SVR. I am using the historical data from investing.com. But for some reason the historical prices are appearing weirdly in the graph. Can anyone help rectify this?
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
companies = ['AAPL Historical Data', 'GOOGL Historical Data', 'MSFT Historical Data', 'NVDA Historical Data', 'IBM Historical Data']
predictions = {}
for company in companies:
# Load the company stock data
df = pd.read_csv(f'{company}.csv')
df = df.sort_values('Date')
# Calculate the number of days from the start
df['Date'] = pd.to_datetime(df['Date'])
df['Date'] = (df['Date'] - df['Date'].min()).dt.days
# Prepare the features (Date) and target (Close price)
X = df['Date'].values.reshape(-1,1)
y = df['Price'].values
# Standardize the features and target
scaler_X = StandardScaler().fit(X)
scaler_y = StandardScaler().fit(y.reshape(-1,1))
X = scaler_X.transform(X)
y = scaler_y.transform(y.reshape(-1, 1)).ravel()
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create and train the Support Vector Machine (Regressor)
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_rbf.fit(X_train, y_train)
# Test the model
svr_rbf_confidence = svr_rbf.score(X_test, y_test)
print(f"svr_rbf confidence for {company}: ", svr_rbf_confidence)
# Predict the future stock prices (Next 6 months, 30 days/month)
x_forecast = np.array(list(range(max(df['Date']), max(df['Date'])+180))).reshape(-1,1)
x_forecast = scaler_X.transform(x_forecast)
forecast_result = svr_rbf.predict(x_forecast)
forecast_result = scaler_y.inverse_transform(forecast_result)
predictions[company] = forecast_result
# Plot the prediction
plt.figure(figsize=(12,6))
plt.plot(df['Date'].values, df['Price'].values, color='blue', label='Historical Close price')
plt.plot(list(range(max(df['Date']), max(df['Date'])+180)), forecast_result, color='red', label='Future Close price')
plt.title(f'{company} Stock Price Prediction')
plt.xlabel('Days from start')
plt.ylabel('Stock Price (Price)')
plt.legend()
plt.show()
The results I get are:
and so on for the other 4 companies
I am not allowed to upload more than 1 image but hope y’all can understand what is wrong