Skip to content


Bases: feature_selection

Incremental variance-based feature selection.

This class selects features based on their variance, either by applying a threshold or selecting a fixed number of features with the highest variance. It supports incremental updates to the variance estimates.


Name Type Description
threshold float

The minimum variance threshold for feature selection.

v Tensor or None

The current variance estimates for each feature.

t int or None

The iteration count for incremental updates.


Name Description

Update the number of features to select.


Update the variance threshold for feature selection.


Incrementally update the variance estimates.


Compute variance estimates for the input data.


Select features based on the variance estimates.

Source code in tinybig/koala/machine_learning/feature_selection/
class incremental_variance_threshold(feature_selection):
        Incremental variance-based feature selection.

        This class selects features based on their variance, either by applying a threshold or selecting a fixed number
        of features with the highest variance. It supports incremental updates to the variance estimates.

        threshold : float
            The minimum variance threshold for feature selection.
        v : torch.Tensor or None
            The current variance estimates for each feature.
        t : int or None
            The iteration count for incremental updates.

            Update the number of features to select.
            Update the variance threshold for feature selection.
            Incrementally update the variance estimates.
        fit(X, device='cpu', *args, **kwargs)
            Compute variance estimates for the input data.
        transform(X, device='cpu', *args, **kwargs)
            Select features based on the variance estimates.
    def __init__(self, threshold: float = 0.0, name: str = 'incremental_variance_threshold', *args, **kwargs):
            Initialize the incremental variance threshold feature selection class.

            threshold : float, optional
                The minimum variance threshold for feature selection. Default is 0.0.
            name : str, optional
                The name of the feature selection method. Default is 'incremental_variance_threshold'.
            *args, **kwargs
                Additional arguments for the base class.
        super().__init__(name=name, *args, **kwargs)

        self.threshold = threshold
        self.v = None
        self.t = None

    def update_n_feature(self, new_n_feature: int):
            Update the number of features to select.

            new_n_feature : int
                The new number of features to select.
        assert new_n_feature > 0
        self.v = None
        self.t = None

    def update_threshold(self, new_threshold: float):
            Update the variance threshold for feature selection.

            new_threshold : float
                The new variance threshold.
        self.threshold = new_threshold
        self.v = None
        self.t = None

    def update_v(self, new_v: torch.Tensor):
            Incrementally update the variance estimates.

            new_v : torch.Tensor
                The new variance estimates to update or replace the current estimates.
        if self.incremental:
            if self.v is None:
                self.v = torch.zeros_like(new_v)
                self.t = 0

            assert new_v.shape == self.v.shape and self.t >= 0
            self.t += 1
            old_v = self.v
            self.v = ((self.t - 1) * self.v + new_v)/self.t

            if self.t >= self.t_threshold or euclidean_distance(x=old_v, x2=self.v) < self.incremental_stop_threshold:
                self.incremental = False
            self.v = new_v

    def fit(self, X: Union[np.ndarray, torch.Tensor], device: str = 'cpu', *args, **kwargs):
            Compute variance estimates for the input data.

            X : Union[np.ndarray, torch.Tensor]
                The input data for feature selection.
            device : str, optional
                The device to use for computation ('cpu' or 'cuda'). Default is 'cpu'.
            *args, **kwargs
                Additional arguments for the fitting process.
        X = torch.tensor(X)
        new_v = batch_variance(X, dim=0)

    def transform(self, X: Union[np.ndarray, torch.Tensor], device: str = 'cpu', *args, **kwargs):
            Select features based on the variance estimates.

            X : Union[np.ndarray, torch.Tensor]
                The input data to transform.
            device : str, optional
                The device to use for computation ('cpu' or 'cuda'). Default is 'cpu'.
            *args, **kwargs
                Additional arguments for the transformation process.

            Union[np.ndarray, torch.Tensor]
                The input data with selected features.
        input_X = torch.tensor(X)

        assert self.v is not None and self.v.shape[0] == input_X.shape[1]

        if self.n_feature is not None:
            n = min(self.n_feature, input_X.shape[1])
            indices = np.argsort(self.v)[-n:]
            indices = np.where(self.v >= self.threshold)[0]

        if len(indices) == 0:
            indices = np.arange(self.v.size)

        X_selected = input_X[:, indices]

        assert X_selected.shape[1] == self.n_feature
        return X_selected.detach().cpu().numpy() if isinstance(X, np.ndarray) and not isinstance(X_selected, np.ndarray) else X_selected

__init__(threshold=0.0, name='incremental_variance_threshold', *args, **kwargs)

Initialize the incremental variance threshold feature selection class.


Name Type Description Default
threshold float

The minimum variance threshold for feature selection. Default is 0.0.

name str

The name of the feature selection method. Default is 'incremental_variance_threshold'.


Additional arguments for the base class.


Additional arguments for the base class.

Source code in tinybig/koala/machine_learning/feature_selection/
def __init__(self, threshold: float = 0.0, name: str = 'incremental_variance_threshold', *args, **kwargs):
        Initialize the incremental variance threshold feature selection class.

        threshold : float, optional
            The minimum variance threshold for feature selection. Default is 0.0.
        name : str, optional
            The name of the feature selection method. Default is 'incremental_variance_threshold'.
        *args, **kwargs
            Additional arguments for the base class.
    super().__init__(name=name, *args, **kwargs)

    self.threshold = threshold
    self.v = None
    self.t = None

fit(X, device='cpu', *args, **kwargs)

Compute variance estimates for the input data.


Name Type Description Default
X Union[ndarray, Tensor]

The input data for feature selection.

device str

The device to use for computation ('cpu' or 'cuda'). Default is 'cpu'.


Additional arguments for the fitting process.


Additional arguments for the fitting process.

Source code in tinybig/koala/machine_learning/feature_selection/
def fit(self, X: Union[np.ndarray, torch.Tensor], device: str = 'cpu', *args, **kwargs):
        Compute variance estimates for the input data.

        X : Union[np.ndarray, torch.Tensor]
            The input data for feature selection.
        device : str, optional
            The device to use for computation ('cpu' or 'cuda'). Default is 'cpu'.
        *args, **kwargs
            Additional arguments for the fitting process.
    X = torch.tensor(X)
    new_v = batch_variance(X, dim=0)

transform(X, device='cpu', *args, **kwargs)

Select features based on the variance estimates.


Name Type Description Default
X Union[ndarray, Tensor]

The input data to transform.

device str

The device to use for computation ('cpu' or 'cuda'). Default is 'cpu'.


Additional arguments for the transformation process.


Additional arguments for the transformation process.



Type Description
Union[ndarray, Tensor]

The input data with selected features.

Source code in tinybig/koala/machine_learning/feature_selection/
def transform(self, X: Union[np.ndarray, torch.Tensor], device: str = 'cpu', *args, **kwargs):
        Select features based on the variance estimates.

        X : Union[np.ndarray, torch.Tensor]
            The input data to transform.
        device : str, optional
            The device to use for computation ('cpu' or 'cuda'). Default is 'cpu'.
        *args, **kwargs
            Additional arguments for the transformation process.

        Union[np.ndarray, torch.Tensor]
            The input data with selected features.
    input_X = torch.tensor(X)

    assert self.v is not None and self.v.shape[0] == input_X.shape[1]

    if self.n_feature is not None:
        n = min(self.n_feature, input_X.shape[1])
        indices = np.argsort(self.v)[-n:]
        indices = np.where(self.v >= self.threshold)[0]

    if len(indices) == 0:
        indices = np.arange(self.v.size)

    X_selected = input_X[:, indices]

    assert X_selected.shape[1] == self.n_feature
    return X_selected.detach().cpu().numpy() if isinstance(X, np.ndarray) and not isinstance(X_selected, np.ndarray) else X_selected


Update the number of features to select.


Name Type Description Default
new_n_feature int

The new number of features to select.

Source code in tinybig/koala/machine_learning/feature_selection/
def update_n_feature(self, new_n_feature: int):
        Update the number of features to select.

        new_n_feature : int
            The new number of features to select.
    assert new_n_feature > 0
    self.v = None
    self.t = None


Update the variance threshold for feature selection.


Name Type Description Default
new_threshold float

The new variance threshold.

Source code in tinybig/koala/machine_learning/feature_selection/
def update_threshold(self, new_threshold: float):
        Update the variance threshold for feature selection.

        new_threshold : float
            The new variance threshold.
    self.threshold = new_threshold
    self.v = None
    self.t = None


Incrementally update the variance estimates.


Name Type Description Default
new_v Tensor

The new variance estimates to update or replace the current estimates.

Source code in tinybig/koala/machine_learning/feature_selection/
def update_v(self, new_v: torch.Tensor):
        Incrementally update the variance estimates.

        new_v : torch.Tensor
            The new variance estimates to update or replace the current estimates.
    if self.incremental:
        if self.v is None:
            self.v = torch.zeros_like(new_v)
            self.t = 0

        assert new_v.shape == self.v.shape and self.t >= 0
        self.t += 1
        old_v = self.v
        self.v = ((self.t - 1) * self.v + new_v)/self.t

        if self.t >= self.t_threshold or euclidean_distance(x=old_v, x2=self.v) < self.incremental_stop_threshold:
            self.incremental = False
        self.v = new_v