Skip to content

dataloader

The base dataloader class.

It defines the base dataloader class that can be used for loading data from files.

Attributes:

Name Type Description
name str, default = 'base_dataloader'

The name of the base dataloader class.

Methods:

Name Description
__init__

The base dataloader class initialization method.

load

The load method for loading the data from file.

Source code in tinybig/data/base_data.py
class dataloader:
    """
    The base dataloader class.

    It defines the base dataloader class that can be used for loading data from files.

    Attributes
    ----------
    name: str, default = 'base_dataloader'
        The name of the base dataloader class.

    Methods
    ----------
    __init__
        The base dataloader class initialization method.

    load
        The load method for loading the data from file.
    """
    def __init__(self, train_batch_size: int, test_batch_size: int, name: str = 'base_dataloader', *args, **kwargs):
        """
        The initialization method of base dataloader.

        Parameters
        ----------
        name: str, default = 'base_dataloader'
            The name of the base loader class object.

        Returns
        ----------
        object
            The initialized object of the base dataloader class.
        """
        self.name = name
        self.train_batch_size = train_batch_size
        self.test_batch_size = test_batch_size

    @staticmethod
    def from_config(configs: dict):
        if configs is None:
            raise ValueError("configs cannot be None")
        assert 'data_class' in configs
        class_name = configs['data_class']
        parameters = configs['data_parameters'] if 'data_parameters' in configs else {}
        return config.get_obj_from_str(class_name)(**parameters)

    def to_config(self):
        class_name = self.__class__.__name__
        attributes = {attr: getattr(self, attr) for attr in self.__dict__}

        return {
            "data_class": class_name,
            "data_parameters": attributes
        }

    @staticmethod
    def encode_str_labels(labels: Union[List, Tuple, np.array], one_hot: bool = False, device: str = 'cpu'):
        if labels is None or len(labels) == 0:
            raise ValueError("labels cannot be None")

        classes = set(labels)
        if one_hot:
            classes_dict = {c: np.identity(len(classes))[i, :] for i, c in enumerate(classes)}
        else:
            classes_dict = {c: i for i, c in enumerate(classes)}
        encoded_labels = np.array(list(map(classes_dict.get, labels)))
        labels_onehot = torch.tensor(encoded_labels, dtype=torch.int32, device=device)
        return labels_onehot

    @abstractmethod
    def load(self, *args, **kwargs):
        """
        The load function of the base dataloader class.

        It loads the data from file in the dataloader class.
        This method is declared to be abstract, and needs to be implemented in the inherited class.

        """
        pass

__init__(train_batch_size, test_batch_size, name='base_dataloader', *args, **kwargs)

The initialization method of base dataloader.

Parameters:

Name Type Description Default
name str

The name of the base loader class object.

'base_dataloader'

Returns:

Type Description
object

The initialized object of the base dataloader class.

Source code in tinybig/data/base_data.py
def __init__(self, train_batch_size: int, test_batch_size: int, name: str = 'base_dataloader', *args, **kwargs):
    """
    The initialization method of base dataloader.

    Parameters
    ----------
    name: str, default = 'base_dataloader'
        The name of the base loader class object.

    Returns
    ----------
    object
        The initialized object of the base dataloader class.
    """
    self.name = name
    self.train_batch_size = train_batch_size
    self.test_batch_size = test_batch_size

load(*args, **kwargs) abstractmethod

The load function of the base dataloader class.

It loads the data from file in the dataloader class. This method is declared to be abstract, and needs to be implemented in the inherited class.

Source code in tinybig/data/base_data.py
@abstractmethod
def load(self, *args, **kwargs):
    """
    The load function of the base dataloader class.

    It loads the data from file in the dataloader class.
    This method is declared to be abstract, and needs to be implemented in the inherited class.

    """
    pass