| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193 |
- #!/usr/bin/python3
-
- # Libraries
-
- import argparse
- import torch
- import os
- import sys
- import json
- import joblib
- import numpy as np
-
- current = os.path.dirname(os.path.realpath(__file__))
- parent = os.path.dirname(current)
- sys.path.append(parent)
-
- from boolean_classifier.architectures.ffnn import FFNN
-
- from boolean_classifier.feature_extractors.boolean_ngram_feature_extractor import BooleanNGramFeatureExtractor
- from boolean_classifier.feature_extractors.ngram_feature_extractor import NGramFeatureExtractor
-
- # Functions
-
- def get_header(args, input_name, output_name, free_features_indices):
- '''Get the header for the VNN file'''
- str = f'; Input file: {args.input_file}\n'
- str += f'; Free features: {args.free}\n'
- str += f'; Free features indices:'
- for i in range(len(free_features_indices)):
- str += f' {free_features_indices[i]}'
- str += f'\n'
- str += f'; Total features: {args.total_features}\n'
- str += f'; Feature type: {args.feature_type}\n'
- str += f'; Input name: {input_name}\n'
- str += f'; Output name: {output_name}\n'
- str += f'; Epsilon: {args.epsilon}\n'
- str += f'; Random seed: {args.seed}\n'
- return str
-
- def get_input_vars(args, input_name):
- '''Get the input variables for the VNN file'''
- str = f'\n; Input variables:\n\n'
- for i in range(args.total_features):
- str += f'(declare-const {input_name}_{i} Real)\n'
- return str
-
- def get_output_vars(output_name):
- '''Get the output variables for the VNN file'''
- str = f'\n; Output variables:\n\n'
- str += f'(declare-const {output_name}_0 Real)\n'
- str += f'(declare-const {output_name}_1 Real)\n'
- return str
-
- def select_free_features(args, features):
- '''Select features to be free but only from features that are zero'''
- if args.list_ff_indices is not None: # If list of free feature indices is provided, use it. Do not check if they are zero or if it matches the number of arts.free features.
- indices = args.list_ff_indices
- # assert len(indices) == args.free, "Number of free features does not match the length of the provided indices."
- assert all(i >= 0 and i < args.total_features for i in indices), "Some indices are out of bounds."
- else:
- zero_indices = np.where(features == 0)[1] # For numpy arrays
- # print(f'Selecting {args.free} out of {len(zero_indices)} features with zero value')
- assert len(zero_indices) >= args.free, "Not enough zero features to select from."
- indices = np.random.choice(zero_indices, size=args.free, replace=False)
- # print('Free features indices:', random_indices)
- free_features = [False] * args.total_features
- for i in indices:
- free_features[i] = True
- return free_features, indices
-
- def get_input_constraints(args, input_name, features, free_features):
- '''Get the input constraints for the VNN file'''
- str = f'\n; Input constraints:\n\n'
- # Set ranges for the free features
- for i, free in enumerate(free_features):
- if free:
- # Standard constraint X >= 0 and <= 1
- str += f'(assert (>= {input_name}_{i} {max(0, features[0, i] - args.epsilon)}))\n'
- str += f'(assert (<= {input_name}_{i} {min(1, features[0, i] + args.epsilon)}))\n'
- # Additional constraint to standard to ensure 0 or 1
- #str += f'(assert (or (<= {input_name}_{i} {max(0, dense_features[0, i] - args.epsilon)})'
- #str += f' (>= {input_name}_{i} {min(1, dense_features[0, i] + args.epsilon)})))\n'
- else:
- str += f'(assert (>= {input_name}_{i} {features[0, i]}))\n'
- str += f'(assert (<= {input_name}_{i} {features[0, i]}))\n'
- return str
-
- def get_output_constraints(output_name, predicted_label):
- '''Get the output constraints for the VNN file'''
- str = f'\n; Output constraints:\n\n'
- if predicted_label == 1:
- str += f'(assert (>= {output_name}_0 0.55))\n'
- str += f'(assert (<= {output_name}_0 1.0))\n'
- str += f'(assert (>= {output_name}_1 0.0))\n'
- str += f'(assert (<= {output_name}_1 0.45))\n'
- else:
- str += f'(assert (>= {output_name}_0 0.0))\n'
- str += f'(assert (<= {output_name}_0 0.45))\n'
- str += f'(assert (>= {output_name}_1 0.55))\n'
- str += f'(assert (<= {output_name}_1 1.0))\n'
- return str
-
- def load_configuration(configuration_filepath: str) -> dict:
- with open(configuration_filepath, "r") as configuration_file:
- configuration = json.load(configuration_file)
- return configuration
-
- class VNNLIBargs():
- def __init__(self, input_file, model_path, config_file, feature_type, free, total_features, list_ff_indices, epsilon=1, output_file='out.vnnlib', seed=None):
- self.input_file = input_file
- self.model_path = model_path
- self.config_file = config_file
- self.feature_type = feature_type
- self.free = free
- self.total_features = total_features
- self.list_ff_indices = list_ff_indices
- self.epsilon = epsilon
- self.output_file = output_file
- self.seed = seed
-
- def create_vnnlib(args, features, predicted_label):
- input_name, output_name = "X", "Y"
- np.random.seed(args.seed)
- free_features, free_features_indices = select_free_features(args, features)
- with open(args.output_file, 'w') as output_file:
- output_file.write(get_header(args, input_name, output_name, free_features_indices))
- output_file.write(get_input_vars(args, input_name))
- output_file.write(get_output_vars(output_name))
- output_file.write(get_input_constraints(args, input_name, features, free_features))
- output_file.write(get_output_constraints(output_name, predicted_label))
-
-
- # Main
-
- if __name__ == '__main__' :
- # Parse arguments
- parser = argparse.ArgumentParser(description = 'Generates data.')
- # Optional arguments
- parser.add_argument('input_file', type = str, help = 'Input binary file name')
- parser.add_argument('model_path', type = str, help = 'Path to the model .pth file')
- parser.add_argument('config_file', type = str, help = 'Configuration file containing the hyperparameters of the model')
- parser.add_argument('feature_type', type = str, help = 'Type of features to extract. Select one of the following: {BooleanBigrams, Bigrams}')
- parser.add_argument('free', type = int, help = 'Number of free features')
- parser.add_argument('total_features', type = int, help = 'Total number of features')
- parser.add_argument('-l', '--list_ff_indices', nargs = '+', default = None, type = int, help = 'List of free feature indices (default: None)', dest = 'list_ff_indices')
- parser.add_argument('-e', '--epsilon', default = 1, type = int, help = 'Input epsilon variation (default: 1)', dest = 'epsilon')
- parser.add_argument('-o', '--output_file', default = 'out.vnnlib', type = str, help = 'output file name (default: out.vnnlib)', dest = 'output_file')
- parser.add_argument('-s', '--seed', default = None, type = int, help = 'Random seed', dest = 'seed')
- args = parser.parse_args()
-
- # Set device
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
- print("Device: ", device)
-
- configuration = load_configuration(args.config_file)
-
- # Load feature extractor
- if "feature_selector" in configuration:
- config = '../boolean_classifier/data/BODMAS/feature_selectors/boolean_bigrams/boolean_bigrams_feature_selector_k=1000.pkl'
- feature_selector = joblib.load(config)
- else:
- feature_selector = None
-
-
- # Load model
- model = FFNN(configuration)
- model = model.to(device)
- model.load_state_dict(torch.load(args.model_path, weights_only=True))
- model.eval()
-
- with open(args.input_file, "rb") as f:
- bytez = f.read()
-
- if args.feature_type == "BooleanBigrams":
- feature_extractor = BooleanNGramFeatureExtractor(N=2)
- sparse_features = feature_extractor.feature_vector(bytez)
- features = sparse_features.todense()
- elif args.feature_type == "Bigrams":
- feature_extractor = NGramFeatureExtractor(N=2)
- features = feature_extractor.feature_vector(bytez)
- else:
- raise NotImplementedError("Select one of the following: {BooleanBigrams, Bigrams}")
-
-
- if feature_selector is not None:
- features = feature_selector.transform(torch.Tensor(features))
- x = torch.tensor(features, dtype=torch.float).to(device)
- probs = model.predict(x)
- y_pred = probs.argmax(dim=1)
- print("Predicted label: ", y_pred, probs)
-
- create_vnnlib(args, features, y_pred[0].item())
|