Simple Regression Model¶
In [1]:
Copied!
from river import (
metrics,
compose,
preprocessing,
datasets,
stats,
feature_extraction,
)
from deep_river.regression import Regressor
from torch import nn
from pprint import pprint
from tqdm import tqdm
from river import (
metrics,
compose,
preprocessing,
datasets,
stats,
feature_extraction,
)
from deep_river.regression import Regressor
from torch import nn
from pprint import pprint
from tqdm import tqdm
In [2]:
Copied!
dataset = datasets.Bikes()
for x, y in dataset:
pprint(x)
print(f"Number of available bikes: {y}")
break
dataset = datasets.Bikes()
for x, y in dataset:
pprint(x)
print(f"Number of available bikes: {y}")
break
{'clouds': 75, 'description': 'light rain', 'humidity': 81, 'moment': datetime.datetime(2016, 4, 1, 0, 0, 7), 'pressure': 1017.0, 'station': 'metro-canal-du-midi', 'temperature': 6.54, 'wind': 9.3} Number of available bikes: 1
In [3]:
Copied!
class MyModule(nn.Module):
def __init__(self, n_features):
super(MyModule, self).__init__()
self.dense0 = nn.Linear(n_features, 5)
self.nonlin = nn.ReLU()
self.dense1 = nn.Linear(5, 1)
self.softmax = nn.Softmax(dim=-1)
def forward(self, X, **kwargs):
X = self.nonlin(self.dense0(X))
X = self.nonlin(self.dense1(X))
X = self.softmax(X)
return X
def get_hour(x):
x["hour"] = x["moment"].hour
return x
class MyModule(nn.Module):
def __init__(self, n_features):
super(MyModule, self).__init__()
self.dense0 = nn.Linear(n_features, 5)
self.nonlin = nn.ReLU()
self.dense1 = nn.Linear(5, 1)
self.softmax = nn.Softmax(dim=-1)
def forward(self, X, **kwargs):
X = self.nonlin(self.dense0(X))
X = self.nonlin(self.dense1(X))
X = self.softmax(X)
return X
def get_hour(x):
x["hour"] = x["moment"].hour
return x
In [4]:
Copied!
metric = metrics.MAE()
model_pipeline = compose.Select(
"clouds", "humidity", "pressure", "temperature", "wind"
)
model_pipeline += get_hour | feature_extraction.TargetAgg(
by=["station", "hour"], how=stats.Mean()
)
model_pipeline |= preprocessing.StandardScaler()
model_pipeline |= Regressor(module=MyModule, loss_fn="mse", optimizer_fn="sgd")
model_pipeline
metric = metrics.MAE()
model_pipeline = compose.Select(
"clouds", "humidity", "pressure", "temperature", "wind"
)
model_pipeline += get_hour | feature_extraction.TargetAgg(
by=["station", "hour"], how=stats.Mean()
)
model_pipeline |= preprocessing.StandardScaler()
model_pipeline |= Regressor(module=MyModule, loss_fn="mse", optimizer_fn="sgd")
model_pipeline
Out[4]:
['clouds', 'humidity', 'pressure', 'temperature', 'wind']
(
clouds
humidity
pressure
temperature
wind
)
get_hour
def get_hour(x):
x['hour'] = x['moment'].hour
return x
y_mean_by_station_and_hour
(
by=['station', 'hour']
how=Mean ()
target_name="y"
)
StandardScaler
(
with_std=True
)
Regressor
(
module=None
loss_fn="mse_loss"
optimizer_fn=<class 'torch.optim.sgd.SGD'>
lr=0.001
device="cpu"
seed=42
)
In [5]:
Copied!
for x, y in tqdm(dataset.take(5000)):
y_pred = model_pipeline.predict_one(x)
metric.update(y_true=y, y_pred=y_pred)
model_pipeline.learn_one(x=x, y=y)
print(f"MAE: {metric.get():.2f}")
for x, y in tqdm(dataset.take(5000)):
y_pred = model_pipeline.predict_one(x)
metric.update(y_true=y, y_pred=y_pred)
model_pipeline.learn_one(x=x, y=y)
print(f"MAE: {metric.get():.2f}")
5000it [00:04, 1029.49it/s]
MAE: 6.83