Mini Batches¶
Iterate over a data stream in mini batches
In [18]:
Copied!
import pandas as pd
from river import datasets
from deep_river import classification
from torch import nn
from river import compose
from river import preprocessing
from itertools import islice
from sklearn import metrics
import pandas as pd
from river import datasets
from deep_river import classification
from torch import nn
from river import compose
from river import preprocessing
from itertools import islice
from sklearn import metrics
In [19]:
Copied!
dataset = datasets.Phishing()
dataset = datasets.Phishing()
In [20]:
Copied!
class MyModule(nn.Module):
def __init__(self, n_features):
super(MyModule, self).__init__()
self.dense0 = nn.Linear(n_features, 5)
self.nonlin = nn.ReLU()
self.dense1 = nn.Linear(5, 2)
self.softmax = nn.Softmax(dim=-1)
def forward(self, X, **kwargs):
X = self.nonlin(self.dense0(X))
X = self.nonlin(self.dense1(X))
X = self.softmax(X)
return X
def batcher(iterable, batch_size):
iterator = iter(iterable)
while batch := list(islice(iterator, batch_size)):
yield batch
class MyModule(nn.Module):
def __init__(self, n_features):
super(MyModule, self).__init__()
self.dense0 = nn.Linear(n_features, 5)
self.nonlin = nn.ReLU()
self.dense1 = nn.Linear(5, 2)
self.softmax = nn.Softmax(dim=-1)
def forward(self, X, **kwargs):
X = self.nonlin(self.dense0(X))
X = self.nonlin(self.dense1(X))
X = self.softmax(X)
return X
def batcher(iterable, batch_size):
iterator = iter(iterable)
while batch := list(islice(iterator, batch_size)):
yield batch
In [21]:
Copied!
model = compose.Pipeline(
preprocessing.StandardScaler(),
classification.Classifier(
module=MyModule, loss_fn="binary_cross_entropy", optimizer_fn="sgd"
),
)
model
model = compose.Pipeline(
preprocessing.StandardScaler(),
classification.Classifier(
module=MyModule, loss_fn="binary_cross_entropy", optimizer_fn="sgd"
),
)
model
Out[21]:
StandardScaler
(
with_std=True
)
Classifier
(
module=None
loss_fn="binary_cross_entropy"
optimizer_fn=<class 'torch.optim.sgd.SGD'>
lr=0.001
output_is_logit=True
is_class_incremental=False
device="cpu"
seed=42
)
In [22]:
Copied!
y_trues = []
y_preds = []
for batch in batcher(dataset, 5):
x, y = zip(*batch)
x = pd.DataFrame(x)
y_trues.extend(y)
y = pd.Series(y)
y_preds.extend(model.predict_many(X=x))
model = model.learn_many(x, y) # make the model learn
y_trues = []
y_preds = []
for batch in batcher(dataset, 5):
x, y = zip(*batch)
x = pd.DataFrame(x)
y_trues.extend(y)
y = pd.Series(y)
y_preds.extend(model.predict_many(X=x))
model = model.learn_many(x, y) # make the model learn
In [23]:
Copied!
metrics.accuracy_score(
y_pred=[str(i) for i in y_preds], y_true=[str(i) for i in y_trues]
)
metrics.accuracy_score(
y_pred=[str(i) for i in y_preds], y_true=[str(i) for i in y_trues]
)
Out[23]:
0.4192