pubmed

Bases: graph_dataloader

A dataloader class for the PubMed dataset.

This class extends graph_dataloader to handle the PubMed graph dataset, a large-scale dataset used for graph-based learning.

Attributes:

Name	Type	Description
`data_profile`	`dict`	Metadata and download links specific to the PubMed dataset.
`graph`	`graph`	The loaded graph structure for the PubMed dataset.

Methods:

Name	Description
`__init__`	Initializes the dataloader for the PubMed dataset.
`get_train_test_idx`	Generates train and test indices for the PubMed dataset.

Source code in tinybig/data/graph_dataloader.py

class pubmed(graph_dataloader):
    """
    A dataloader class for the PubMed dataset.

    This class extends `graph_dataloader` to handle the PubMed graph dataset,
    a large-scale dataset used for graph-based learning.

    Attributes
    ----------
    data_profile: dict
        Metadata and download links specific to the PubMed dataset.
    graph: graph_class
        The loaded graph structure for the PubMed dataset.

    Methods
    -------
    __init__(name: str = 'pubmed', train_batch_size: int = 64, test_batch_size: int = 64, ...)
        Initializes the dataloader for the PubMed dataset.
    get_train_test_idx(X: torch.Tensor = None, y: torch.Tensor = None, ...)
        Generates train and test indices for the PubMed dataset.
    """
    def __init__(self, name: str = 'pubmed', train_batch_size: int = 64, test_batch_size: int = 64, *args, **kwargs):
        """
        Initializes the dataloader for the PubMed dataset.

        Parameters
        ----------
        name: str, default = 'pubmed'
            The name of the dataset.
        train_batch_size: int, default = 64
            Batch size for the training dataset.
        test_batch_size: int, default = 64
            Batch size for the testing dataset.

        Returns
        -------
        None
        """
        super().__init__(data_profile=PUBMED_DATA_PROFILE, name=name, train_batch_size=train_batch_size, test_batch_size=test_batch_size)

    def get_train_test_idx(self, X: torch.Tensor = None, y: torch.Tensor = None, *args, **kwargs):
        """
        Generates train and test indices for the PubMed dataset.

        Parameters
        ----------
        X: torch.Tensor, optional
            Node features (not used in this method).
        y: torch.Tensor, optional
            Labels (not used in this method).

        Returns
        -------
        tuple
            Train indices (`torch.LongTensor`) and test indices (`torch.LongTensor`).

        Notes
        -----
        The train indices are predefined as the first 60 nodes.
        The test indices are predefined as nodes 6300 to 7299.
        """
        train_idx = torch.LongTensor(range(60))
        test_idx = torch.LongTensor(range(6300, 7300))
        return train_idx, test_idx

`init(name='pubmed', train_batch_size=64, test_batch_size=64, *args, **kwargs)`

Initializes the dataloader for the PubMed dataset.

Parameters:

Name	Type	Description	Default
`name`	`str`	The name of the dataset.	`'pubmed'`
`train_batch_size`	`int`	Batch size for the training dataset.	`64`
`test_batch_size`	`int`	Batch size for the testing dataset.	`64`

Returns:

Type	Description
`None`

Source code in tinybig/data/graph_dataloader.py

def __init__(self, name: str = 'pubmed', train_batch_size: int = 64, test_batch_size: int = 64, *args, **kwargs):
    """
    Initializes the dataloader for the PubMed dataset.

    Parameters
    ----------
    name: str, default = 'pubmed'
        The name of the dataset.
    train_batch_size: int, default = 64
        Batch size for the training dataset.
    test_batch_size: int, default = 64
        Batch size for the testing dataset.

    Returns
    -------
    None
    """
    super().__init__(data_profile=PUBMED_DATA_PROFILE, name=name, train_batch_size=train_batch_size, test_batch_size=test_batch_size)

`get_train_test_idx(X=None, y=None, *args, **kwargs)`

Generates train and test indices for the PubMed dataset.

Parameters:

Name	Type	Description	Default
`X`	`Tensor`	Node features (not used in this method).	`None`
`y`	`Tensor`	Labels (not used in this method).	`None`

Returns:

Type	Description
`tuple`	Train indices (`torch.LongTensor`) and test indices (`torch.LongTensor`).

Notes

The train indices are predefined as the first 60 nodes. The test indices are predefined as nodes 6300 to 7299.

Source code in tinybig/data/graph_dataloader.py

def get_train_test_idx(self, X: torch.Tensor = None, y: torch.Tensor = None, *args, **kwargs):
    """
    Generates train and test indices for the PubMed dataset.

    Parameters
    ----------
    X: torch.Tensor, optional
        Node features (not used in this method).
    y: torch.Tensor, optional
        Labels (not used in this method).

    Returns
    -------
    tuple
        Train indices (`torch.LongTensor`) and test indices (`torch.LongTensor`).

    Notes
    -----
    The train indices are predefined as the first 60 nodes.
    The test indices are predefined as nodes 6300 to 7299.
    """
    train_idx = torch.LongTensor(range(60))
    test_idx = torch.LongTensor(range(6300, 7300))
    return train_idx, test_idx

pubmed

__init__(name='pubmed', train_batch_size=64, test_batch_size=64, *args, **kwargs)

get_train_test_idx(X=None, y=None, *args, **kwargs)

`init(name='pubmed', train_batch_size=64, test_batch_size=64, *args, **kwargs)`

`get_train_test_idx(X=None, y=None, *args, **kwargs)`