Skip to content

Example Mini Batches

Open in Colab Binder

import pandas as pd
from river import datasets
from deep_river import regression
from torch import nn
from river import compose
from river import preprocessing
from itertools import islice
from pprint import pprint
from sklearn import metrics
class MyModule(nn.Module):
    def __init__(self, n_features):
        super(MyModule, self).__init__()
        self.dense0 = nn.Linear(n_features, 5)
        self.nonlin = nn.ReLU()
        self.dense1 = nn.Linear(5, 1)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, X, **kwargs):
        X = self.nonlin(self.dense0(X))
        X = self.nonlin(self.dense1(X))
        X = self.softmax(X)
        return X


def batcher(iterable, batch_size):
    iterator = iter(iterable)
    while batch := list(islice(iterator, batch_size)):
        yield batch
dataset = datasets.Bikes()

for x, y in dataset:
    pprint(x)
    print(f"Number of available bikes: {y}")
    break
{'clouds': 75,
 'description': 'light rain',
 'humidity': 81,
 'moment': datetime.datetime(2016, 4, 1, 0, 0, 7),
 'pressure': 1017.0,
 'station': 'metro-canal-du-midi',
 'temperature': 6.54,
 'wind': 9.3}
Number of available bikes: 1
dataset = datasets.Bikes()

model_pipeline = compose.Select(
    "clouds", "humidity", "pressure", "temperature", "wind"
)
model_pipeline |= regression.Regressor(
    module=MyModule(5), loss_fn="mse", optimizer_fn="sgd"
)
model_pipeline
['clouds', [...]
Select ( clouds humidity pressure temperature wind )
Regressor
Regressor ( module=MyModule( (dense0): Linear(in_features=5, out_features=5, bias=True) (nonlin): ReLU() (dense1): Linear(in_features=5, out_features=1, bias=True) (softmax): Softmax(dim=-1) ) loss_fn="mse" optimizer_fn="sgd" lr=0.001 is_feature_incremental=False device="cpu" seed=42 )
y_trues = []
y_preds = []
for batch in batcher(dataset.take(5000), 5):
    x, y = zip(*batch)
    x = pd.DataFrame(x)
    y_trues.extend(y)
    y_preds.extend(model_pipeline.predict_many(X=x).values)
    model_pipeline.learn_many(X=x, y=pd.Series(y))
metrics.mean_squared_error(y_true=y_trues, y_pred=y_preds)
102.4412