I want to estimate the linear slope between ‘x’ and ‘y’ for each 11-day moving window by 1-day stride.
from sklearn import linear_model
import numpy as np
import xarray as xr
import pandas as pd
# Create a dataset as an example
site = np.linspace(0,3,dtype='int8')
time= pd.date_range('2018-01-01','2020-12-31',freq='d')
x = np.random.randint(0,500,size=[len(site), len(time)])
y = np.random.randint(0,500,size=[len(site), len(time)])
_ds = xr.Dataset(data_vars=dict(
x=(["site", "time"], x),
y=(["site", "time"], y)),
coords=dict(
site=site,
time=time))
# define the linear regression model
def ransac_fit(xi,yi, **ransac_kwargs):
Xi = xi.reshape(-1, 1)
yi = yi
ransac = linear_model.RANSACRegressor(**ransac_kwargs)
ransac.fit(Xi, yi)
slope= ransac.estimator_.coef_
b = ransac.estimator_.intercept_
return slope, b
At present I am able to use ‘for’ loop for ‘site’ and ‘time’ to do that, which however is extremely clumsy…
def clc_slope(_ds, window_size=11):
slps =[]
bs =[]
mean_xs =[]
mean_ys=[]
var_x = _ds['x']
var_y = _ds['y']
# for loop for each year and date
for year in np.unique(_ds.time.dt.year.values):
for doy in np.unique(_ds.sel(time=str(year)).time.dt.dayofyear.values):
# define inorg and endrg
inorg = doy-np.int(window_size/2+1)
enorg = doy+np.int(window_size/2)
# calculate mean values of x and y for each moving window
mean_x = np.nanmean(var_x.sel(time=str(year))[inorg:enorg].values)
mean_y = np.nanmean(var_y.sel(time=str(year))[inorg:enorg].values)
mean_xs = np.append(mean_xs, mean_x)
mean_ys = np.append(mean_ys, mean_x)
# start to estimate slope and intercept
_x = var_x.sel(time=str(year))[inorg:enorg].values
_y = var_y.sel(time=str(year))[inorg:enorg].values
# if there is too many nans then assign slope and intcept to be nan
if (np.isfinite(_x) & np.isfinite(_y)).sum()<((np.int(window_size/2)+2)*1):
_slp=_b= np.nan
else:
try:
_slp, _b = ransac_fit(_x,_y, min_samples=0.6, stop_n_inliers=np.int(window_size/2)*1)
except:
_slp=_b = np.nan
slps = np.append(slps,_slp)
bs = np.append(bs, _b)
outs = [slps, bs, mean_xs, mean_ys]
return outs
# run the slope and intercept estimation for each site and concat afterwards
_dss = []
for st in ds.site.values:
_ds = ds.sel(site=st)
outs = clc_slope(_ds)
_ds['slp'] = ('time',outs[0])
_ds['b'] = ('time',outs[1])
_ds['mean_xs']= ('time',outs[2])
_ds['mean_ys']= ('time',outs[3])
_dss.append(_ds)
dss = xr.concat(_dss, dim='site')
I know xarray.apply_ufunc can extremely save time, but I do not get this tricky approach. Would be supper appreciate if you can give a hint! Thank you a million…