| 123456789101112131415161718192021222324252627282930 |
- from torch.utils.data import Dataset
- import os
- from random import shuffle
- import numpy as np
- import torch
- import scipy.sparse
-
-
- class NGramDataset(Dataset):
- def __init__(self, csv_filepath: str):
- self.all_files = []
- with open(csv_filepath, "r") as f:
- lines = f.readlines()
- for line in lines:
- filepath, label = line.strip().split(",")
- self.all_files.append((filepath, int(label)))
- shuffle(self.all_files)
-
-
- def __len__(self):
- return len(self.all_files)
-
- def __getitem__(self, index):
- to_load, y = self.all_files[index]
- # Step 1: Load the .npz file
- matrix = np.load(to_load)["arr_0"]
- # Step 2: Convert the dense matrix to a PyTorch tensor
- x = torch.tensor(matrix, dtype=torch.float)
- x = x.squeeze()
- return x, torch.tensor(y)
|