Unconsistent results from granger casuality test

Hi, each time i run my code i get different results from the granger casuality test. Do anybody have an idea why?

Here is my code: (dont know if this is the correct way to show my code)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from statsmodels.tsa.stattools import adfuller
from scipy import stats
from statsmodels.tsa.api import VAR
from statsmodels.tools.eval_measures import rmse, aic
import pickle
from IPython.display import display

df = pd.read_excel('Stal_sheet.xlsx', sheet_name='Material', usecols=['Steel_price','Oil_price'])

# Take the logarithm of the values
#df = np.log(df)

#df.index = pd.to_datetime(df.index)
df = df.dropna()
#df = df[df.index.month==12]
display(df)

from pandas.plotting import lag_plot
   
f2, (ax4, ax5) = plt.subplots(1, 2, figsize=(15, 5))
f2.tight_layout()

lag_plot(df['Steel_price'], ax=ax4)
ax4.set_title('Price of steel');

lag_plot(df['Oil_price'], ax=ax5)
ax5.set_title('Price of oil');

#lag_plot(series3, ax=ax6)
#ax6.set_title('Tweet and PM2.5');

plt.show()

rawdf = df.copy(deep=True)

df['Steel_price'] = df['Steel_price'] - df['Steel_price'].shift(1)
df['Oil_price'] = df['Oil_price'] - df['Oil_price'].shift(1)
df = df.dropna()
# split df into train and test. We will need this later for VAR analysis
msk = np.random.rand(len(df)) < 0.8
train = df[msk]
test = df[~msk]
## ADF Null hypothesis: there is a unit root, meaning series is non-stationary
from statsmodels.tsa.stattools import adfuller

X1 = np.array(df['Oil_price'])
X1 = X1[~np.isnan(X1)]

result = adfuller(X1)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))
    
    
X2 = np.array(df['Steel_price'])
X2 = X2[~np.isnan(X2)]

result = adfuller(X2)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

## KPSS Null hypothesis: there is a no unit root, meaning series is stationary


from pandas.plotting import lag_plot
   
f2, (ax4, ax5) = plt.subplots(1, 2, figsize=(15, 5))
f2.tight_layout()

lag_plot(df['Steel_price'], ax=ax4)
ax4.set_title('Price of steel');

lag_plot(df['Oil_price'], ax=ax5)
ax5.set_title('Price of oil');

#lag_plot(series3, ax=ax6)
#ax6.set_title('Tweet and PM2.5');

plt.show()


rawdf = rawdf.dropna()
model = VAR(rawdf) #recall that rawdf is w/o difference operation
for i in [1,2,3,4,5,6,7,8,9,10,11,12]:
    result = model.fit(i)
    try:
        print('Lag Order =', i)
        print('AIC : ', result.aic)
        print('BIC : ', result.bic)
        print('FPE : ', result.fpe)
        print('HQIC: ', result.hqic, '\n')
    except:
        continue

model = VAR(train)
model_fitted = model.fit(2)

from statsmodels.stats.stattools import durbin_watson
out = durbin_watson(model_fitted.resid)

for col, val in zip(df.columns, out):
    print(col, ':', round(val, 2))
#There is no significant correlation between in the residuals 


ALPHA = 0.05
from statsmodels.tsa.stattools import coint
score, pvalue, _ = coint(df["Steel_price"], df["Oil_price"], maxlag=2)
pvalue
print(f'cointegration: score: {score:.2f}')
print(f'cointegration: p-value: {pvalue:.10f}')
print("cointegrated - there MUST be Granger causality" if pvalue < ALPHA else "NOT cointegrated (uncertain about Granger causality)")   

model = VAR(train)
model_fitted = model.fit(2)
#get the lag order
lag_order = model_fitted.k_ar
print(lag_order) 

from statsmodels.tsa.stattools import grangercausalitytests
maxlag=lag_order #becuase we got this value before. We are not suppose to add 1 to it
test = 'ssr_chi2test'
def grangers_causation_matrix(df, variables, test='ssr_chi2test', verbose=False):    
    """Check Granger Causality of all possible combinations of the Time series.
    The rows are the response variable, columns are predictors. The values in the table 
    are the P-Values. P-Values lesser than the significance level (0.05), implies 
    the Null Hypothesis that the coefficients of the corresponding past values is 
    zero, that is, the X does not cause Y can be rejected.

    data      : pandas dataframe containing the time series variables
    variables : list containing names of the time series variables.
    """
    data = pd.DataFrame(np.zeros((len(variables), len(variables))), columns=variables, index=variables)
    for c in data.columns:
        for r in data.index:
            test_result = grangercausalitytests(df[[r, c]], maxlag=maxlag, verbose=False)
            p_values = [round(test_result[i+1][0][test][1],4) for i in range(maxlag)]
            if verbose: print(f'Y = {r}, X = {c}, P Values = {p_values}')
            min_p_value = np.min(p_values)
            data.loc[r, c] = min_p_value
    data.columns = [var + '_x' for var in variables]
    data.index = [var + '_y' for var in variables]
    return data

o = grangers_causation_matrix(train, variables = train.columns)
print(o)

Here:

# split df into train and test. We will need this later for VAR analysis
msk = np.random.rand(len(df)) < 0.8
train = df[msk]
test = df[~msk]
[/quote]

Each time you run the test, you use a randomly chosen subset of the data, so it is not surprising that you get different results each time you run the code. Each time you run the code, you are looking at a different subset of the data.

[quote]

print(“cointegrated - there MUST be Granger causality” if pvalue < ALPHA else “NOT cointegrated (uncertain about Granger causality)”)

[/quote]

That is incorrect. There is no "MUST" in tests involving statistical significance. False positives (Type I errors) are **always** still possible.

https://en.wikipedia.org/wiki/Type_I_and_type_II_errors

Thank you, this fixed my problem.