Simple Regression Model¶

In [1]:

Copied!





from river import (
    metrics,
    compose,
    preprocessing,
    datasets,
    stats,
    feature_extraction,
)
from deep_river.regression import RegressorInitialized
from torch import nn
from pprint import pprint
from tqdm import tqdm
from river import (
    metrics,
    compose,
    preprocessing,
    datasets,
    stats,
    feature_extraction,
)
from deep_river.regression import RegressorInitialized
from torch import nn
from pprint import pprint
from tqdm import tqdm

In [2]:

Copied!





dataset = datasets.Bikes()

for x, y in dataset:
    pprint(x)
    print(f"Number of available bikes: {y}")
    break
dataset = datasets.Bikes()

for x, y in dataset:
    pprint(x)
    print(f"Number of available bikes: {y}")
    break

{'clouds': 75,
 'description': 'light rain',
 'humidity': 81,
 'moment': datetime.datetime(2016, 4, 1, 0, 0, 7),
 'pressure': 1017.0,
 'station': 'metro-canal-du-midi',
 'temperature': 6.54,
 'wind': 9.3}
Number of available bikes: 1

In [3]:

Copied!





class MyModule(nn.Module):
    def __init__(self, n_features):
        super(MyModule, self).__init__()
        self.dense0 = nn.Linear(n_features, 5)
        self.nonlin = nn.ReLU()
        self.dense1 = nn.Linear(5, 1)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, X, **kwargs):
        X = self.nonlin(self.dense0(X))
        X = self.nonlin(self.dense1(X))
        X = self.softmax(X)
        return X


def get_hour(x):
    x["hour"] = x["moment"].hour
    return x
class MyModule(nn.Module):
    def __init__(self, n_features):
        super(MyModule, self).__init__()
        self.dense0 = nn.Linear(n_features, 5)
        self.nonlin = nn.ReLU()
        self.dense1 = nn.Linear(5, 1)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, X, **kwargs):
        X = self.nonlin(self.dense0(X))
        X = self.nonlin(self.dense1(X))
        X = self.softmax(X)
        return X


def get_hour(x):
    x["hour"] = x["moment"].hour
    return x

In [4]:

Copied!





metric = metrics.MAE()

model_pipeline = compose.Select(
    "clouds", "humidity", "pressure", "temperature", "wind"
)
model_pipeline += get_hour | feature_extraction.TargetAgg(
    by=["station", "hour"], how=stats.Mean()
)
model_pipeline |= preprocessing.StandardScaler()
model_pipeline |= RegressorInitialized(module=MyModule(10), loss_fn="mse", optimizer_fn="sgd")
model_pipeline
metric = metrics.MAE()

model_pipeline = compose.Select(
    "clouds", "humidity", "pressure", "temperature", "wind"
)
model_pipeline += get_hour | feature_extraction.TargetAgg(
    by=["station", "hour"], how=stats.Mean()
)
model_pipeline |= preprocessing.StandardScaler()
model_pipeline |= RegressorInitialized(module=MyModule(10), loss_fn="mse", optimizer_fn="sgd")
model_pipeline

Out[4]:

['clouds', [...]

Select (
  clouds
  humidity
  pressure
  temperature
  wind
)

get_hour


def get_hour(x):
    x["hour"] = x["moment"].hour
    return x

y_mean_by_station_and_hour

TargetAgg (
  by=['station', 'hour']
  how=Mean ()
  target_name="y"
)

StandardScaler

StandardScaler (
  with_std=True
)

RegressorInitialized

RegressorInitialized (
  module=MyModule(
  (dense0): Linear(in_features=10, out_features=5, bias=True)
  (nonlin): ReLU()
  (dense1): Linear(in_features=5, out_features=1, bias=True)
  (softmax): Softmax(dim=-1)
)
  loss_fn="mse"
  optimizer_fn="sgd"
  lr=0.001
  output_is_logit=True
  is_feature_incremental=False
  device="cpu"
  seed=42
)

In [5]:

Copied!





for x, y in tqdm(dataset.take(5000)):
    y_pred = model_pipeline.predict_one(x)
    metric.update(y_true=y, y_pred=y_pred)
    model_pipeline.learn_one(x=x, y=y)
print(f"MAE: {metric.get():.2f}")
for x, y in tqdm(dataset.take(5000)):
    y_pred = model_pipeline.predict_one(x)
    metric.update(y_true=y, y_pred=y_pred)
    model_pipeline.learn_one(x=x, y=y)
print(f"MAE: {metric.get():.2f}")

0it [00:00, ?it/s]

83it [00:00, 828.38it/s]

215it [00:00, 1113.79it/s]

356it [00:00, 1247.43it/s]

494it [00:00, 1293.06it/s]

635it [00:00, 1332.09it/s]

781it [00:00, 1370.73it/s]

929it [00:00, 1404.25it/s]

1071it [00:00, 1407.97it/s]

1225it [00:00, 1448.75it/s]

1374it [00:01, 1461.33it/s]

1527it [00:01, 1479.61it/s]

1681it [00:01, 1495.33it/s]

1832it [00:01, 1499.63it/s]

1982it [00:01, 1414.77it/s]

2125it [00:01, 1416.94it/s]

2273it [00:01, 1434.12it/s]

2422it [00:01, 1447.89it/s]

2568it [00:01, 1434.99it/s]

2714it [00:01, 1439.62it/s]

2859it [00:02, 1435.47it/s]

3003it [00:02, 1429.22it/s]

3147it [00:02, 1429.40it/s]

3291it [00:02, 1413.45it/s]

3433it [00:02, 1407.83it/s]

3574it [00:02, 1393.92it/s]

3714it [00:02, 1352.39it/s]

3850it [00:02, 1239.74it/s]

4002it [00:02, 1315.11it/s]

4158it [00:02, 1383.08it/s]

4305it [00:03, 1407.78it/s]

4459it [00:03, 1445.25it/s]

4607it [00:03, 1454.45it/s]

4755it [00:03, 1460.43it/s]

4902it [00:03, 1449.62it/s]

5000it [00:03, 1404.67it/s]

MAE: 6.83