LSTM Regression Model¶
In [1]:
Copied!
from deep_river.regression import RollingRegressor
from river import (
metrics,
compose,
preprocessing,
datasets,
stats,
feature_extraction,
)
from torch import nn
from tqdm import tqdm
from deep_river.regression import RollingRegressor
from river import (
metrics,
compose,
preprocessing,
datasets,
stats,
feature_extraction,
)
from torch import nn
from tqdm import tqdm
In [2]:
Copied!
def get_hour(x):
x["hour"] = x["moment"].hour
return x
def get_hour(x):
x["hour"] = x["moment"].hour
return x
Simple RNN Regression Model¶
In [3]:
Copied!
class RnnModule(nn.Module):
def __init__(self, n_features, hidden_size):
super().__init__()
self.n_features = n_features
self.rnn = nn.RNN(
input_size=n_features, hidden_size=hidden_size, num_layers=1
)
self.fc = nn.Linear(in_features=hidden_size, out_features=1)
def forward(self, X, **kwargs):
output, hn = self.rnn(X) # lstm with input, hidden, and internal state
return self.fc(output[-1, :])
class RnnModule(nn.Module):
def __init__(self, n_features, hidden_size):
super().__init__()
self.n_features = n_features
self.rnn = nn.RNN(
input_size=n_features, hidden_size=hidden_size, num_layers=1
)
self.fc = nn.Linear(in_features=hidden_size, out_features=1)
def forward(self, X, **kwargs):
output, hn = self.rnn(X) # lstm with input, hidden, and internal state
return self.fc(output[-1, :])
In [4]:
Copied!
dataset = datasets.Bikes()
metric = metrics.MAE()
model_pipeline = compose.Select(
"clouds", "humidity", "pressure", "temperature", "wind"
)
model_pipeline += get_hour | feature_extraction.TargetAgg(
by=["station", "hour"], how=stats.Mean()
)
model_pipeline |= preprocessing.StandardScaler()
model_pipeline |= RollingRegressor(
module=RnnModule,
loss_fn="mse",
optimizer_fn="sgd",
window_size=20,
lr=1e-2,
hidden_size=32, # parameters of MyModule can be overwritten
append_predict=True,
)
model_pipeline
dataset = datasets.Bikes()
metric = metrics.MAE()
model_pipeline = compose.Select(
"clouds", "humidity", "pressure", "temperature", "wind"
)
model_pipeline += get_hour | feature_extraction.TargetAgg(
by=["station", "hour"], how=stats.Mean()
)
model_pipeline |= preprocessing.StandardScaler()
model_pipeline |= RollingRegressor(
module=RnnModule,
loss_fn="mse",
optimizer_fn="sgd",
window_size=20,
lr=1e-2,
hidden_size=32, # parameters of MyModule can be overwritten
append_predict=True,
)
model_pipeline
Out[4]:
['clouds', 'humidity', 'pressure', 'temperature', 'wind']
(
clouds
humidity
pressure
temperature
wind
)
get_hour
def get_hour(x):
x['hour'] = x['moment'].hour
return x
y_mean_by_station_and_hour
(
by=['station', 'hour']
how=Mean ()
target_name="y"
)
StandardScaler
(
with_std=True
)
RollingRegressor
(
module=None
loss_fn="mse_loss"
optimizer_fn=<class 'torch.optim.sgd.SGD'>
lr=0.01
window_size=20
append_predict=True
device="cpu"
seed=42
)
In [5]:
Copied!
for x, y in tqdm(dataset.take(5000)):
y_pred = model_pipeline.predict_one(x)
metric.update(y_true=y, y_pred=y_pred)
model_pipeline.learn_one(x=x, y=y)
print(f"MAE: {metric.get():.2f}")
for x, y in tqdm(dataset.take(5000)):
y_pred = model_pipeline.predict_one(x)
metric.update(y_true=y, y_pred=y_pred)
model_pipeline.learn_one(x=x, y=y)
print(f"MAE: {metric.get():.2f}")
5000it [00:11, 451.42it/s]
MAE: 3.94
LSTM Regression Model¶
In [6]:
Copied!
class LstmModule(nn.Module):
def __init__(self, n_features, hidden_size=1):
super().__init__()
self.n_features = n_features
self.hidden_size = hidden_size
self.lstm = nn.LSTM(
input_size=n_features,
hidden_size=hidden_size,
num_layers=1,
bidirectional=False,
)
self.fc = nn.Linear(in_features=hidden_size, out_features=1)
def forward(self, X, **kwargs):
output, (hn, cn) = self.lstm(
X
) # lstm with input, hidden, and internal state
return self.fc(output[-1, :])
class LstmModule(nn.Module):
def __init__(self, n_features, hidden_size=1):
super().__init__()
self.n_features = n_features
self.hidden_size = hidden_size
self.lstm = nn.LSTM(
input_size=n_features,
hidden_size=hidden_size,
num_layers=1,
bidirectional=False,
)
self.fc = nn.Linear(in_features=hidden_size, out_features=1)
def forward(self, X, **kwargs):
output, (hn, cn) = self.lstm(
X
) # lstm with input, hidden, and internal state
return self.fc(output[-1, :])
In [7]:
Copied!
dataset = datasets.Bikes()
metric = metrics.MAE()
model_pipeline = compose.Select(
"clouds", "humidity", "pressure", "temperature", "wind"
)
model_pipeline += get_hour | feature_extraction.TargetAgg(
by=["station", "hour"], how=stats.Mean()
)
model_pipeline |= preprocessing.StandardScaler()
model_pipeline |= RollingRegressor(
module=LstmModule,
loss_fn="mse",
optimizer_fn="sgd",
window_size=20,
lr=1e-2,
hidden_size=32, # parameters of MyModule can be overwritten
append_predict=True,
)
model_pipeline
dataset = datasets.Bikes()
metric = metrics.MAE()
model_pipeline = compose.Select(
"clouds", "humidity", "pressure", "temperature", "wind"
)
model_pipeline += get_hour | feature_extraction.TargetAgg(
by=["station", "hour"], how=stats.Mean()
)
model_pipeline |= preprocessing.StandardScaler()
model_pipeline |= RollingRegressor(
module=LstmModule,
loss_fn="mse",
optimizer_fn="sgd",
window_size=20,
lr=1e-2,
hidden_size=32, # parameters of MyModule can be overwritten
append_predict=True,
)
model_pipeline
Out[7]:
['clouds', 'humidity', 'pressure', 'temperature', 'wind']
(
clouds
humidity
pressure
temperature
wind
)
get_hour
def get_hour(x):
x['hour'] = x['moment'].hour
return x
y_mean_by_station_and_hour
(
by=['station', 'hour']
how=Mean ()
target_name="y"
)
StandardScaler
(
with_std=True
)
RollingRegressor
(
module=None
loss_fn="mse_loss"
optimizer_fn=<class 'torch.optim.sgd.SGD'>
lr=0.01
window_size=20
append_predict=True
device="cpu"
seed=42
)
In [8]:
Copied!
for x, y in tqdm(dataset.take(5000)):
y_pred = model_pipeline.predict_one(x)
metric.update(y_true=y, y_pred=y_pred)
model_pipeline.learn_one(x=x, y=y)
print(f"MAE: {metric.get():.2f}")
for x, y in tqdm(dataset.take(5000)):
y_pred = model_pipeline.predict_one(x)
metric.update(y_true=y, y_pred=y_pred)
model_pipeline.learn_one(x=x, y=y)
print(f"MAE: {metric.get():.2f}")
5000it [00:22, 225.22it/s]
MAE: 2.81