Mini Batches¶

Iterate over a data stream in mini batches

In [1]:

Copied!





import pandas as pd
from river import datasets
from deep_river import classification
from torch import nn
from river import compose
from river import preprocessing
from itertools import islice
from sklearn import metrics
import pandas as pd
from river import datasets
from deep_river import classification
from torch import nn
from river import compose
from river import preprocessing
from itertools import islice
from sklearn import metrics

In [2]:

Copied!

dataset = datasets.Phishing()
dataset = datasets.Phishing()

In [3]:

Copied!





class MyModule(nn.Module):
    def __init__(self, n_features):
        super(MyModule, self).__init__()
        self.dense0 = nn.Linear(n_features, 5)
        self.nonlin = nn.ReLU()
        self.dense1 = nn.Linear(5, 2)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, X, **kwargs):
        X = self.nonlin(self.dense0(X))
        X = self.nonlin(self.dense1(X))
        X = self.softmax(X)
        return X


def batcher(iterable, batch_size):
    iterator = iter(iterable)
    while batch := list(islice(iterator, batch_size)):
        yield batch
class MyModule(nn.Module):
    def __init__(self, n_features):
        super(MyModule, self).__init__()
        self.dense0 = nn.Linear(n_features, 5)
        self.nonlin = nn.ReLU()
        self.dense1 = nn.Linear(5, 2)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, X, **kwargs):
        X = self.nonlin(self.dense0(X))
        X = self.nonlin(self.dense1(X))
        X = self.softmax(X)
        return X


def batcher(iterable, batch_size):
    iterator = iter(iterable)
    while batch := list(islice(iterator, batch_size)):
        yield batch

In [4]:

Copied!





model = compose.Pipeline(
    preprocessing.StandardScaler(),
    classification.ClassifierInitialized(
        module=MyModule(10), loss_fn="binary_cross_entropy", optimizer_fn="sgd"
    ),
)
model
model = compose.Pipeline(
    preprocessing.StandardScaler(),
    classification.ClassifierInitialized(
        module=MyModule(10), loss_fn="binary_cross_entropy", optimizer_fn="sgd"
    ),
)
model

Out[4]:

StandardScaler

StandardScaler (
  with_std=True
)

ClassifierInitialized

ClassifierInitialized (
  module=MyModule(
  (dense0): Linear(in_features=10, out_features=5, bias=True)
  (nonlin): ReLU()
  (dense1): Linear(in_features=5, out_features=2, bias=True)
  (softmax): Softmax(dim=-1)
)
  loss_fn="binary_cross_entropy"
  optimizer_fn="sgd"
  lr=0.001
  output_is_logit=True
  is_class_incremental=False
  is_feature_incremental=False
  device="cpu"
  seed=42
)

In [5]:

Copied!





y_trues = []
y_preds = []
for batch in batcher(dataset, 5):
    x, y = zip(*batch)
    x = pd.DataFrame(x)
    y_trues.extend(y)
    y = pd.Series(y)
    y_preds.extend(model.predict_many(x))
    model.learn_many(x, y)  # make the model learn
y_trues = []
y_preds = []
for batch in batcher(dataset, 5):
    x, y = zip(*batch)
    x = pd.DataFrame(x)
    y_trues.extend(y)
    y = pd.Series(y)
    y_preds.extend(model.predict_many(x))
    model.learn_many(x, y)  # make the model learn

In [6]:

Copied!

metrics.accuracy_score(
    y_pred=[str(i) for i in y_preds], y_true=[str(i) for i in y_trues]
)
metrics.accuracy_score(
    y_pred=[str(i) for i in y_preds], y_true=[str(i) for i in y_trues]
)

Out[6]:

0.4144