Skip to content

mnist

Bases: vision_dataloader

A dataloader for the MNIST dataset.

Handles loading and preprocessing of the MNIST dataset.

Attributes:

Name Type Description
name str

Name of the dataloader, default is 'mnist'.

train_batch_size int

Batch size for training data.

test_batch_size int

Batch size for testing data.

Methods:

Name Description
__init__

Initializes the MNIST dataloader.

load

Loads and preprocesses the MNIST dataset.

Source code in tinybig/data/vision_dataloader.py
class mnist(vision_dataloader):
    """
    A dataloader for the MNIST dataset.

    Handles loading and preprocessing of the MNIST dataset.

    Attributes
    ----------
    name : str
        Name of the dataloader, default is 'mnist'.
    train_batch_size : int
        Batch size for training data.
    test_batch_size : int
        Batch size for testing data.

    Methods
    -------
    __init__(...)
        Initializes the MNIST dataloader.
    load(...)
        Loads and preprocesses the MNIST dataset.
    """
    def __init__(self, name='mnist', train_batch_size: int = 64, test_batch_size: int = 64):
        """
        Initializes the MNIST dataloader.

        Parameters
        ----------
        name : str, optional
            Name of the dataloader, default is 'mnist'.
        train_batch_size : int, optional
            Batch size for training data, default is 64.
        test_batch_size : int, optional
            Batch size for testing data, default is 64.

        Returns
        -------
        None
        """
        super().__init__(name=name, train_batch_size=train_batch_size, test_batch_size=test_batch_size)

    def load(self, cache_dir='./data/', with_transformation: bool = True, *args, **kwargs):
        """
        Loads and preprocesses the MNIST dataset.

        Parameters
        ----------
        cache_dir : str, optional
            Directory to cache the dataset, default is './data/'.
        with_transformation : bool, optional
            Whether to load the training or testing dataset with transformation, default is True.
        *args, **kwargs
            Additional arguments for dataset loading.

        Returns
        -------
        dict
            A dictionary containing the train and test dataloaders.
        """

        if with_transformation:
            transform = Compose([
                # transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                Normalize((0.1307,), (0.3081,)),
                torch.flatten
            ])
        else:
            transform = Compose([
                transforms.ToTensor(),
            ])

        train_loader = DataLoader(
            MNIST(root=cache_dir, train=True, download=True, transform=transform),
            batch_size=self.train_batch_size, shuffle=True)

        test_loader = DataLoader(
            MNIST(root=cache_dir, train=False, download=True, transform=transform),
            batch_size=self.test_batch_size, shuffle=False)

        return {'train_loader': train_loader, 'test_loader': test_loader}

__init__(name='mnist', train_batch_size=64, test_batch_size=64)

Initializes the MNIST dataloader.

Parameters:

Name Type Description Default
name str

Name of the dataloader, default is 'mnist'.

'mnist'
train_batch_size int

Batch size for training data, default is 64.

64
test_batch_size int

Batch size for testing data, default is 64.

64

Returns:

Type Description
None
Source code in tinybig/data/vision_dataloader.py
def __init__(self, name='mnist', train_batch_size: int = 64, test_batch_size: int = 64):
    """
    Initializes the MNIST dataloader.

    Parameters
    ----------
    name : str, optional
        Name of the dataloader, default is 'mnist'.
    train_batch_size : int, optional
        Batch size for training data, default is 64.
    test_batch_size : int, optional
        Batch size for testing data, default is 64.

    Returns
    -------
    None
    """
    super().__init__(name=name, train_batch_size=train_batch_size, test_batch_size=test_batch_size)

load(cache_dir='./data/', with_transformation=True, *args, **kwargs)

Loads and preprocesses the MNIST dataset.

Parameters:

Name Type Description Default
cache_dir str

Directory to cache the dataset, default is './data/'.

'./data/'
with_transformation bool

Whether to load the training or testing dataset with transformation, default is True.

True
*args

Additional arguments for dataset loading.

()
**kwargs

Additional arguments for dataset loading.

()

Returns:

Type Description
dict

A dictionary containing the train and test dataloaders.

Source code in tinybig/data/vision_dataloader.py
def load(self, cache_dir='./data/', with_transformation: bool = True, *args, **kwargs):
    """
    Loads and preprocesses the MNIST dataset.

    Parameters
    ----------
    cache_dir : str, optional
        Directory to cache the dataset, default is './data/'.
    with_transformation : bool, optional
        Whether to load the training or testing dataset with transformation, default is True.
    *args, **kwargs
        Additional arguments for dataset loading.

    Returns
    -------
    dict
        A dictionary containing the train and test dataloaders.
    """

    if with_transformation:
        transform = Compose([
            # transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            Normalize((0.1307,), (0.3081,)),
            torch.flatten
        ])
    else:
        transform = Compose([
            transforms.ToTensor(),
        ])

    train_loader = DataLoader(
        MNIST(root=cache_dir, train=True, download=True, transform=transform),
        batch_size=self.train_batch_size, shuffle=True)

    test_loader = DataLoader(
        MNIST(root=cache_dir, train=False, download=True, transform=transform),
        batch_size=self.test_batch_size, shuffle=False)

    return {'train_loader': train_loader, 'test_loader': test_loader}