from torch.utils.data import Dataset import os from random import shuffle import numpy as np import torch import scipy.sparse class NGramDataset(Dataset): def __init__(self, csv_filepath: str): self.all_files = [] with open(csv_filepath, "r") as f: lines = f.readlines() for line in lines: filepath, label = line.strip().split(",") self.all_files.append((filepath, int(label))) shuffle(self.all_files) def __len__(self): return len(self.all_files) def __getitem__(self, index): to_load, y = self.all_files[index] # Step 1: Load the .npz file matrix = np.load(to_load)["arr_0"] # Step 2: Convert the dense matrix to a PyTorch tensor x = torch.tensor(matrix, dtype=torch.float) x = x.squeeze() return x, torch.tensor(y)