#!/usr/bin/python3 # Libraries import argparse import torch import os import sys import json import joblib import numpy as np current = os.path.dirname(os.path.realpath(__file__)) parent = os.path.dirname(current) sys.path.append(parent) from boolean_classifier.architectures.ffnn import FFNN from boolean_classifier.feature_extractors.boolean_ngram_feature_extractor import BooleanNGramFeatureExtractor from boolean_classifier.feature_extractors.ngram_feature_extractor import NGramFeatureExtractor # Functions def get_header(args, input_name, output_name, free_features_indices): '''Get the header for the VNN file''' str = f'; Input file: {args.input_file}\n' str += f'; Free features: {args.free}\n' str += f'; Free features indices:' for i in range(len(free_features_indices)): str += f' {free_features_indices[i]}' str += f'\n' str += f'; Total features: {args.total_features}\n' str += f'; Feature type: {args.feature_type}\n' str += f'; Input name: {input_name}\n' str += f'; Output name: {output_name}\n' str += f'; Epsilon: {args.epsilon}\n' str += f'; Random seed: {args.seed}\n' return str def get_input_vars(args, input_name): '''Get the input variables for the VNN file''' str = f'\n; Input variables:\n\n' for i in range(args.total_features): str += f'(declare-const {input_name}_{i} Real)\n' return str def get_output_vars(output_name): '''Get the output variables for the VNN file''' str = f'\n; Output variables:\n\n' str += f'(declare-const {output_name}_0 Real)\n' str += f'(declare-const {output_name}_1 Real)\n' return str def select_free_features(args, features): '''Select features to be free but only from features that are zero''' if args.list_ff_indices is not None: # If list of free feature indices is provided, use it. Do not check if they are zero or if it matches the number of arts.free features. indices = args.list_ff_indices # assert len(indices) == args.free, "Number of free features does not match the length of the provided indices." assert all(i >= 0 and i < args.total_features for i in indices), "Some indices are out of bounds." else: zero_indices = np.where(features == 0)[1] # For numpy arrays # print(f'Selecting {args.free} out of {len(zero_indices)} features with zero value') assert len(zero_indices) >= args.free, "Not enough zero features to select from." indices = np.random.choice(zero_indices, size=args.free, replace=False) # print('Free features indices:', random_indices) free_features = [False] * args.total_features for i in indices: free_features[i] = True return free_features, indices def get_input_constraints(args, input_name, features, free_features): '''Get the input constraints for the VNN file''' str = f'\n; Input constraints:\n\n' # Set ranges for the free features for i, free in enumerate(free_features): if free: # Standard constraint X >= 0 and <= 1 str += f'(assert (>= {input_name}_{i} {max(0, features[0, i] - args.epsilon)}))\n' str += f'(assert (<= {input_name}_{i} {min(1, features[0, i] + args.epsilon)}))\n' # Additional constraint to standard to ensure 0 or 1 #str += f'(assert (or (<= {input_name}_{i} {max(0, dense_features[0, i] - args.epsilon)})' #str += f' (>= {input_name}_{i} {min(1, dense_features[0, i] + args.epsilon)})))\n' else: str += f'(assert (>= {input_name}_{i} {features[0, i]}))\n' str += f'(assert (<= {input_name}_{i} {features[0, i]}))\n' return str def get_output_constraints(output_name, predicted_label): '''Get the output constraints for the VNN file''' str = f'\n; Output constraints:\n\n' if predicted_label == 1: str += f'(assert (>= {output_name}_0 0.55))\n' str += f'(assert (<= {output_name}_0 1.0))\n' str += f'(assert (>= {output_name}_1 0.0))\n' str += f'(assert (<= {output_name}_1 0.45))\n' else: str += f'(assert (>= {output_name}_0 0.0))\n' str += f'(assert (<= {output_name}_0 0.45))\n' str += f'(assert (>= {output_name}_1 0.55))\n' str += f'(assert (<= {output_name}_1 1.0))\n' return str def load_configuration(configuration_filepath: str) -> dict: with open(configuration_filepath, "r") as configuration_file: configuration = json.load(configuration_file) return configuration class VNNLIBargs(): def __init__(self, input_file, model_path, config_file, feature_type, free, total_features, list_ff_indices, epsilon=1, output_file='out.vnnlib', seed=None): self.input_file = input_file self.model_path = model_path self.config_file = config_file self.feature_type = feature_type self.free = free self.total_features = total_features self.list_ff_indices = list_ff_indices self.epsilon = epsilon self.output_file = output_file self.seed = seed def create_vnnlib(args, features, predicted_label): input_name, output_name = "X", "Y" np.random.seed(args.seed) free_features, free_features_indices = select_free_features(args, features) with open(args.output_file, 'w') as output_file: output_file.write(get_header(args, input_name, output_name, free_features_indices)) output_file.write(get_input_vars(args, input_name)) output_file.write(get_output_vars(output_name)) output_file.write(get_input_constraints(args, input_name, features, free_features)) output_file.write(get_output_constraints(output_name, predicted_label)) # Main if __name__ == '__main__' : # Parse arguments parser = argparse.ArgumentParser(description = 'Generates data.') # Optional arguments parser.add_argument('input_file', type = str, help = 'Input binary file name') parser.add_argument('model_path', type = str, help = 'Path to the model .pth file') parser.add_argument('config_file', type = str, help = 'Configuration file containing the hyperparameters of the model') parser.add_argument('feature_type', type = str, help = 'Type of features to extract. Select one of the following: {BooleanBigrams, Bigrams}') parser.add_argument('free', type = int, help = 'Number of free features') parser.add_argument('total_features', type = int, help = 'Total number of features') parser.add_argument('-l', '--list_ff_indices', nargs = '+', default = None, type = int, help = 'List of free feature indices (default: None)', dest = 'list_ff_indices') parser.add_argument('-e', '--epsilon', default = 1, type = int, help = 'Input epsilon variation (default: 1)', dest = 'epsilon') parser.add_argument('-o', '--output_file', default = 'out.vnnlib', type = str, help = 'output file name (default: out.vnnlib)', dest = 'output_file') parser.add_argument('-s', '--seed', default = None, type = int, help = 'Random seed', dest = 'seed') args = parser.parse_args() # Set device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Device: ", device) configuration = load_configuration(args.config_file) # Load feature extractor if "feature_selector" in configuration: config = '../boolean_classifier/data/BODMAS/feature_selectors/boolean_bigrams/boolean_bigrams_feature_selector_k=1000.pkl' feature_selector = joblib.load(config) else: feature_selector = None # Load model model = FFNN(configuration) model = model.to(device) model.load_state_dict(torch.load(args.model_path, weights_only=True)) model.eval() with open(args.input_file, "rb") as f: bytez = f.read() if args.feature_type == "BooleanBigrams": feature_extractor = BooleanNGramFeatureExtractor(N=2) sparse_features = feature_extractor.feature_vector(bytez) features = sparse_features.todense() elif args.feature_type == "Bigrams": feature_extractor = NGramFeatureExtractor(N=2) features = feature_extractor.feature_vector(bytez) else: raise NotImplementedError("Select one of the following: {BooleanBigrams, Bigrams}") if feature_selector is not None: features = feature_selector.transform(torch.Tensor(features)) x = torch.tensor(features, dtype=torch.float).to(device) probs = model.predict(x) y_pred = probs.argmax(dim=1) print("Predicted label: ", y_pred, probs) create_vnnlib(args, features, y_pred[0].item())