incremental_variance_threshold

Bases: feature_selection

Incremental variance-based feature selection.

This class selects features based on their variance, either by applying a threshold or selecting a fixed number of features with the highest variance. It supports incremental updates to the variance estimates.

Attributes:

Name	Type	Description
`threshold`	`float`	The minimum variance threshold for feature selection.
`v`	`Tensor or None`	The current variance estimates for each feature.
`t`	`int or None`	The iteration count for incremental updates.

Methods:

Name	Description
`update_n_feature`	Update the number of features to select.
`update_threshold`	Update the variance threshold for feature selection.
`update_v`	Incrementally update the variance estimates.
`fit`	Compute variance estimates for the input data.
`transform`	Select features based on the variance estimates.

Source code in tinybig/koala/machine_learning/feature_selection/incremental_variance_threshold.py

class incremental_variance_threshold(feature_selection):
    """
        Incremental variance-based feature selection.

        This class selects features based on their variance, either by applying a threshold or selecting a fixed number
        of features with the highest variance. It supports incremental updates to the variance estimates.

        Attributes
        ----------
        threshold : float
            The minimum variance threshold for feature selection.
        v : torch.Tensor or None
            The current variance estimates for each feature.
        t : int or None
            The iteration count for incremental updates.

        Methods
        -------
        update_n_feature(new_n_feature)
            Update the number of features to select.
        update_threshold(new_threshold)
            Update the variance threshold for feature selection.
        update_v(new_v)
            Incrementally update the variance estimates.
        fit(X, device='cpu', *args, **kwargs)
            Compute variance estimates for the input data.
        transform(X, device='cpu', *args, **kwargs)
            Select features based on the variance estimates.
    """
    def __init__(self, threshold: float = 0.0, name: str = 'incremental_variance_threshold', *args, **kwargs):
        """
            Initialize the incremental variance threshold feature selection class.

            Parameters
            ----------
            threshold : float, optional
                The minimum variance threshold for feature selection. Default is 0.0.
            name : str, optional
                The name of the feature selection method. Default is 'incremental_variance_threshold'.
            *args, **kwargs
                Additional arguments for the base class.
        """
        super().__init__(name=name, *args, **kwargs)

        self.threshold = threshold
        self.v = None
        self.t = None

    def update_n_feature(self, new_n_feature: int):
        """
            Update the number of features to select.

            Parameters
            ----------
            new_n_feature : int
                The new number of features to select.
        """
        assert new_n_feature > 0
        self.set_n_feature(new_n_feature)
        self.v = None
        self.t = None

    def update_threshold(self, new_threshold: float):
        """
            Update the variance threshold for feature selection.

            Parameters
            ----------
            new_threshold : float
                The new variance threshold.
        """
        self.threshold = new_threshold
        self.v = None
        self.t = None

    def update_v(self, new_v: torch.Tensor):
        """
            Incrementally update the variance estimates.

            Parameters
            ----------
            new_v : torch.Tensor
                The new variance estimates to update or replace the current estimates.
        """
        if self.incremental:
            if self.v is None:
                self.v = torch.zeros_like(new_v)
                self.t = 0

            assert new_v.shape == self.v.shape and self.t >= 0
            self.t += 1
            old_v = self.v
            self.v = ((self.t - 1) * self.v + new_v)/self.t

            if self.t >= self.t_threshold or euclidean_distance(x=old_v, x2=self.v) < self.incremental_stop_threshold:
                self.incremental = False
        else:
            self.v = new_v

    def fit(self, X: Union[np.ndarray, torch.Tensor], device: str = 'cpu', *args, **kwargs):
        """
            Compute variance estimates for the input data.

            Parameters
            ----------
            X : Union[np.ndarray, torch.Tensor]
                The input data for feature selection.
            device : str, optional
                The device to use for computation ('cpu' or 'cuda'). Default is 'cpu'.
            *args, **kwargs
                Additional arguments for the fitting process.
        """
        X = torch.tensor(X)
        new_v = batch_variance(X, dim=0)
        self.update_v(new_v)

    def transform(self, X: Union[np.ndarray, torch.Tensor], device: str = 'cpu', *args, **kwargs):
        """
            Select features based on the variance estimates.

            Parameters
            ----------
            X : Union[np.ndarray, torch.Tensor]
                The input data to transform.
            device : str, optional
                The device to use for computation ('cpu' or 'cuda'). Default is 'cpu'.
            *args, **kwargs
                Additional arguments for the transformation process.

            Returns
            -------
            Union[np.ndarray, torch.Tensor]
                The input data with selected features.
        """
        input_X = torch.tensor(X)

        assert self.v is not None and self.v.shape[0] == input_X.shape[1]

        if self.n_feature is not None:
            n = min(self.n_feature, input_X.shape[1])
            indices = np.argsort(self.v)[-n:]
        else:
            indices = np.where(self.v >= self.threshold)[0]

        if len(indices) == 0:
            indices = np.arange(self.v.size)

        X_selected = input_X[:, indices]

        assert X_selected.shape[1] == self.n_feature
        return X_selected.detach().cpu().numpy() if isinstance(X, np.ndarray) and not isinstance(X_selected, np.ndarray) else X_selected

`init(threshold=0.0, name='incremental_variance_threshold', *args, **kwargs)`

Initialize the incremental variance threshold feature selection class.

Parameters:

Name	Type	Description	Default
`threshold`	`float`	The minimum variance threshold for feature selection. Default is 0.0.	`0.0`
`name`	`str`	The name of the feature selection method. Default is 'incremental_variance_threshold'.	`'incremental_variance_threshold'`
`*args`		Additional arguments for the base class.	`()`
`**kwargs`		Additional arguments for the base class.	`()`

Source code in tinybig/koala/machine_learning/feature_selection/incremental_variance_threshold.py

def __init__(self, threshold: float = 0.0, name: str = 'incremental_variance_threshold', *args, **kwargs):
    """
        Initialize the incremental variance threshold feature selection class.

        Parameters
        ----------
        threshold : float, optional
            The minimum variance threshold for feature selection. Default is 0.0.
        name : str, optional
            The name of the feature selection method. Default is 'incremental_variance_threshold'.
        *args, **kwargs
            Additional arguments for the base class.
    """
    super().__init__(name=name, *args, **kwargs)

    self.threshold = threshold
    self.v = None
    self.t = None

`fit(X, device='cpu', *args, **kwargs)`

Compute variance estimates for the input data.

Parameters:

Name	Type	Description	Default
`X`	`Union[ndarray, Tensor]`	The input data for feature selection.	required
`device`	`str`	The device to use for computation ('cpu' or 'cuda'). Default is 'cpu'.	`'cpu'`
`*args`		Additional arguments for the fitting process.	`()`
`**kwargs`		Additional arguments for the fitting process.	`()`

Source code in tinybig/koala/machine_learning/feature_selection/incremental_variance_threshold.py

def fit(self, X: Union[np.ndarray, torch.Tensor], device: str = 'cpu', *args, **kwargs):
    """
        Compute variance estimates for the input data.

        Parameters
        ----------
        X : Union[np.ndarray, torch.Tensor]
            The input data for feature selection.
        device : str, optional
            The device to use for computation ('cpu' or 'cuda'). Default is 'cpu'.
        *args, **kwargs
            Additional arguments for the fitting process.
    """
    X = torch.tensor(X)
    new_v = batch_variance(X, dim=0)
    self.update_v(new_v)

`transform(X, device='cpu', *args, **kwargs)`

Select features based on the variance estimates.

Parameters:

Name	Type	Description	Default
`X`	`Union[ndarray, Tensor]`	The input data to transform.	required
`device`	`str`	The device to use for computation ('cpu' or 'cuda'). Default is 'cpu'.	`'cpu'`
`*args`		Additional arguments for the transformation process.	`()`
`**kwargs`		Additional arguments for the transformation process.	`()`

Returns:

Type	Description
`Union[ndarray, Tensor]`	The input data with selected features.

Source code in tinybig/koala/machine_learning/feature_selection/incremental_variance_threshold.py

def transform(self, X: Union[np.ndarray, torch.Tensor], device: str = 'cpu', *args, **kwargs):
    """
        Select features based on the variance estimates.

        Parameters
        ----------
        X : Union[np.ndarray, torch.Tensor]
            The input data to transform.
        device : str, optional
            The device to use for computation ('cpu' or 'cuda'). Default is 'cpu'.
        *args, **kwargs
            Additional arguments for the transformation process.

        Returns
        -------
        Union[np.ndarray, torch.Tensor]
            The input data with selected features.
    """
    input_X = torch.tensor(X)

    assert self.v is not None and self.v.shape[0] == input_X.shape[1]

    if self.n_feature is not None:
        n = min(self.n_feature, input_X.shape[1])
        indices = np.argsort(self.v)[-n:]
    else:
        indices = np.where(self.v >= self.threshold)[0]

    if len(indices) == 0:
        indices = np.arange(self.v.size)

    X_selected = input_X[:, indices]

    assert X_selected.shape[1] == self.n_feature
    return X_selected.detach().cpu().numpy() if isinstance(X, np.ndarray) and not isinstance(X_selected, np.ndarray) else X_selected

`update_n_feature(new_n_feature)`

Update the number of features to select.

Parameters:

Name	Type	Description	Default
`new_n_feature`	`int`	The new number of features to select.	required

Source code in tinybig/koala/machine_learning/feature_selection/incremental_variance_threshold.py

def update_n_feature(self, new_n_feature: int):
    """
        Update the number of features to select.

        Parameters
        ----------
        new_n_feature : int
            The new number of features to select.
    """
    assert new_n_feature > 0
    self.set_n_feature(new_n_feature)
    self.v = None
    self.t = None

`update_threshold(new_threshold)`

Update the variance threshold for feature selection.

Parameters:

Name	Type	Description	Default
`new_threshold`	`float`	The new variance threshold.	required

Source code in tinybig/koala/machine_learning/feature_selection/incremental_variance_threshold.py

def update_threshold(self, new_threshold: float):
    """
        Update the variance threshold for feature selection.

        Parameters
        ----------
        new_threshold : float
            The new variance threshold.
    """
    self.threshold = new_threshold
    self.v = None
    self.t = None

`update_v(new_v)`

Incrementally update the variance estimates.

Parameters:

Name	Type	Description	Default
`new_v`	`Tensor`	The new variance estimates to update or replace the current estimates.	required

Source code in tinybig/koala/machine_learning/feature_selection/incremental_variance_threshold.py

def update_v(self, new_v: torch.Tensor):
    """
        Incrementally update the variance estimates.

        Parameters
        ----------
        new_v : torch.Tensor
            The new variance estimates to update or replace the current estimates.
    """
    if self.incremental:
        if self.v is None:
            self.v = torch.zeros_like(new_v)
            self.t = 0

        assert new_v.shape == self.v.shape and self.t >= 0
        self.t += 1
        old_v = self.v
        self.v = ((self.t - 1) * self.v + new_v)/self.t

        if self.t >= self.t_threshold or euclidean_distance(x=old_v, x2=self.v) < self.incremental_stop_threshold:
            self.incremental = False
    else:
        self.v = new_v

incremental_variance_threshold

__init__(threshold=0.0, name='incremental_variance_threshold', *args, **kwargs)

fit(X, device='cpu', *args, **kwargs)

transform(X, device='cpu', *args, **kwargs)

update_n_feature(new_n_feature)

update_threshold(new_threshold)

update_v(new_v)

`init(threshold=0.0, name='incremental_variance_threshold', *args, **kwargs)`

`fit(X, device='cpu', *args, **kwargs)`

`transform(X, device='cpu', *args, **kwargs)`

`update_n_feature(new_n_feature)`

`update_threshold(new_threshold)`

`update_v(new_v)`