Skip to content

Mini Batches

Open in Colab Binder

Iterate over a data stream in mini batches

import pandas as pd
from river import datasets
from deep_river import classification
from torch import nn
from river import compose
from river import preprocessing
from itertools import islice
from sklearn import metrics
dataset = datasets.Phishing()
class MyModule(nn.Module):
    def __init__(self, n_features):
        super(MyModule, self).__init__()
        self.dense0 = nn.Linear(n_features, 5)
        self.nonlin = nn.ReLU()
        self.dense1 = nn.Linear(5, 2)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, X, **kwargs):
        X = self.nonlin(self.dense0(X))
        X = self.nonlin(self.dense1(X))
        X = self.softmax(X)
        return X


def batcher(iterable, batch_size):
    iterator = iter(iterable)
    while batch := list(islice(iterator, batch_size)):
        yield batch
model = compose.Pipeline(
    preprocessing.StandardScaler(),
    classification.Classifier(
        module=MyModule(10), loss_fn="binary_cross_entropy", optimizer_fn="sgd"
    ),
)
model
StandardScaler
StandardScaler ( with_std=True )
Classifier
Classifier ( module=MyModule( (dense0): Linear(in_features=10, out_features=5, bias=True) (nonlin): ReLU() (dense1): Linear(in_features=5, out_features=2, bias=True) (softmax): Softmax(dim=-1) ) loss_fn="binary_cross_entropy" optimizer_fn="sgd" lr=0.001 output_is_logit=True is_class_incremental=False is_feature_incremental=False device="cpu" seed=42 )
y_trues = []
y_preds = []
for batch in batcher(dataset, 5):
    x, y = zip(*batch)
    x = pd.DataFrame(x)
    y_trues.extend(y)
    y = pd.Series(y)
    y_preds.extend(model.predict_many(x))
    model.learn_many(x, y)  # make the model learn
metrics.accuracy_score(
    y_pred=[str(i) for i in y_preds], y_true=[str(i) for i in y_trues]
)
0.436