Skip to content

imagenet

Bases: vision_dataloader

A dataloader for the ImageNet dataset.

Handles loading and preprocessing of the ImageNet dataset.

Attributes:

Name Type Description
name str

Name of the dataloader, default is 'imagenet'.

train_batch_size int

Batch size for training data.

test_batch_size int

Batch size for testing data.

Methods:

Name Description
__init__

Initializes the ImageNet dataloader.

load

Loads and preprocesses the ImageNet dataset.

Source code in tinybig/data/vision_dataloader.py
class imagenet(vision_dataloader):
    """
    A dataloader for the ImageNet dataset.

    Handles loading and preprocessing of the ImageNet dataset.

    Attributes
    ----------
    name : str
        Name of the dataloader, default is 'imagenet'.
    train_batch_size : int
        Batch size for training data.
    test_batch_size : int
        Batch size for testing data.

    Methods
    -------
    __init__(...)
        Initializes the ImageNet dataloader.
    load(...)
        Loads and preprocesses the ImageNet dataset.
    """
    def __init__(self, name='imagenet', train_batch_size: int = 64, test_batch_size: int = 64):
        """
        Initializes the ImageNet dataloader.

        Parameters
        ----------
        name : str, optional
            Name of the dataloader, default is 'imagenet'.
        train_batch_size : int, optional
            Batch size for training data, default is 64.
        test_batch_size : int, optional
            Batch size for testing data, default is 64.

        Returns
        -------
        None
        """
        super().__init__(name=name, train_batch_size=train_batch_size, test_batch_size=test_batch_size)

    # @staticmethod
    # def flatten(x):
    #     x = torch.flatten(x)
    #     return x.view(-1)

    def load(self, cache_dir='./data/', with_transformation: bool = True, *args, **kwargs):
        """
        Loads and preprocesses the ImageNet dataset.

        Parameters
        ----------
        cache_dir : str, optional
            Directory to cache the dataset, default is './data/'.
        with_transformation : bool, optional
            Whether to load the training or testing dataset with transformation, default is True.
        *args, **kwargs
            Additional arguments for dataset loading.

        Returns
        -------
        dict
            A dictionary containing the train and test dataloaders.
        """
        if with_transformation:
            imagenet_transform = transforms.Compose([
                transforms.Resize(256),
                transforms.RandomResizedCrop(224),
                #transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                torch.flatten
            ])
        else:
            imagenet_transform = Compose([
                transforms.ToTensor(),
            ])

        train_loader = DataLoader(
            ImageNet(root=cache_dir, split='train', transform=imagenet_transform),
            batch_size=self.train_batch_size, shuffle=True)

        test_loader = DataLoader(
            ImageNet(root=cache_dir, split='val', transform=imagenet_transform),
            batch_size=self.test_batch_size, shuffle=False)

        return {'train_loader': train_loader, 'test_loader': test_loader}

__init__(name='imagenet', train_batch_size=64, test_batch_size=64)

Initializes the ImageNet dataloader.

Parameters:

Name Type Description Default
name str

Name of the dataloader, default is 'imagenet'.

'imagenet'
train_batch_size int

Batch size for training data, default is 64.

64
test_batch_size int

Batch size for testing data, default is 64.

64

Returns:

Type Description
None
Source code in tinybig/data/vision_dataloader.py
def __init__(self, name='imagenet', train_batch_size: int = 64, test_batch_size: int = 64):
    """
    Initializes the ImageNet dataloader.

    Parameters
    ----------
    name : str, optional
        Name of the dataloader, default is 'imagenet'.
    train_batch_size : int, optional
        Batch size for training data, default is 64.
    test_batch_size : int, optional
        Batch size for testing data, default is 64.

    Returns
    -------
    None
    """
    super().__init__(name=name, train_batch_size=train_batch_size, test_batch_size=test_batch_size)

load(cache_dir='./data/', with_transformation=True, *args, **kwargs)

Loads and preprocesses the ImageNet dataset.

Parameters:

Name Type Description Default
cache_dir str

Directory to cache the dataset, default is './data/'.

'./data/'
with_transformation bool

Whether to load the training or testing dataset with transformation, default is True.

True
*args

Additional arguments for dataset loading.

()
**kwargs

Additional arguments for dataset loading.

()

Returns:

Type Description
dict

A dictionary containing the train and test dataloaders.

Source code in tinybig/data/vision_dataloader.py
def load(self, cache_dir='./data/', with_transformation: bool = True, *args, **kwargs):
    """
    Loads and preprocesses the ImageNet dataset.

    Parameters
    ----------
    cache_dir : str, optional
        Directory to cache the dataset, default is './data/'.
    with_transformation : bool, optional
        Whether to load the training or testing dataset with transformation, default is True.
    *args, **kwargs
        Additional arguments for dataset loading.

    Returns
    -------
    dict
        A dictionary containing the train and test dataloaders.
    """
    if with_transformation:
        imagenet_transform = transforms.Compose([
            transforms.Resize(256),
            transforms.RandomResizedCrop(224),
            #transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            torch.flatten
        ])
    else:
        imagenet_transform = Compose([
            transforms.ToTensor(),
        ])

    train_loader = DataLoader(
        ImageNet(root=cache_dir, split='train', transform=imagenet_transform),
        batch_size=self.train_batch_size, shuffle=True)

    test_loader = DataLoader(
        ImageNet(root=cache_dir, split='val', transform=imagenet_transform),
        batch_size=self.test_batch_size, shuffle=False)

    return {'train_loader': train_loader, 'test_loader': test_loader}