josep
/
OpenData


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
							#!/usr/bin/python3

# Libraries

import argparse
import torch
import os
import sys
import json
import joblib
import numpy as np

current = os.path.dirname(os.path.realpath(__file__))
parent = os.path.dirname(current)
sys.path.append(parent)

from boolean_classifier.architectures.ffnn import FFNN

from boolean_classifier.feature_extractors.boolean_ngram_feature_extractor import BooleanNGramFeatureExtractor
from boolean_classifier.feature_extractors.ngram_feature_extractor import NGramFeatureExtractor

# Functions

def get_header(args, input_name, output_name, free_features_indices):
    '''Get the header for the VNN file'''
    str = f'; Input file: {args.input_file}\n'
    str += f'; Free features: {args.free}\n'
    str += f'; Free features indices:'
    for i in range(len(free_features_indices)):
        str += f' {free_features_indices[i]}'
    str += f'\n'
    str += f'; Total features: {args.total_features}\n'
    str += f'; Feature type: {args.feature_type}\n'
    str += f'; Input name: {input_name}\n'
    str += f'; Output name: {output_name}\n'
    str += f'; Epsilon: {args.epsilon}\n'
    str += f'; Random seed: {args.seed}\n'
    return str

def get_input_vars(args, input_name):
    '''Get the input variables for the VNN file'''
    str = f'\n; Input variables:\n\n'
    for i in range(args.total_features):
        str += f'(declare-const {input_name}_{i} Real)\n'
    return str

def get_output_vars(output_name):
    '''Get the output variables for the VNN file'''
    str = f'\n; Output variables:\n\n'
    str += f'(declare-const {output_name}_0 Real)\n'
    str += f'(declare-const {output_name}_1 Real)\n'
    return str

def select_free_features(args, features):
    '''Select features to be free but only from features that are zero'''
    if args.list_ff_indices is not None: # If list of free feature indices is provided, use it. Do not check if they are zero or if it matches the number of arts.free features.
        indices = args.list_ff_indices
        # assert len(indices) == args.free, "Number of free features does not match the length of the provided indices."
        assert all(i >= 0 and i < args.total_features for i in indices), "Some indices are out of bounds."
    else:
        zero_indices = np.where(features == 0)[1] # For numpy arrays
        # print(f'Selecting {args.free} out of {len(zero_indices)} features with zero value')
        assert len(zero_indices) >= args.free, "Not enough zero features to select from."
        indices = np.random.choice(zero_indices, size=args.free, replace=False)
        # print('Free features indices:', random_indices)
    free_features = [False] * args.total_features
    for i in indices:
        free_features[i] = True
    return free_features, indices

def get_input_constraints(args, input_name, features, free_features):
    '''Get the input constraints for the VNN file'''
    str = f'\n; Input constraints:\n\n'
    # Set ranges for the free features
    for i, free in enumerate(free_features):
        if free:
            # Standard constraint X >= 0 and <= 1
            str += f'(assert (>= {input_name}_{i} {max(0, features[0, i] - args.epsilon)}))\n'
            str += f'(assert (<= {input_name}_{i} {min(1, features[0, i] + args.epsilon)}))\n' 
            # Additional constraint to standard to ensure 0 or 1
            #str += f'(assert (or (<= {input_name}_{i} {max(0, dense_features[0, i] - args.epsilon)})'
            #str += f' (>= {input_name}_{i} {min(1, dense_features[0, i] + args.epsilon)})))\n'
        else:
            str += f'(assert (>= {input_name}_{i} {features[0, i]}))\n'
            str += f'(assert (<= {input_name}_{i} {features[0, i]}))\n'
    return str

def get_output_constraints(output_name, predicted_label):
    '''Get the output constraints for the VNN file'''
    str = f'\n; Output constraints:\n\n'
    if predicted_label == 1:
        str += f'(assert (>= {output_name}_0 0.55))\n'
        str += f'(assert (<= {output_name}_0 1.0))\n'
        str += f'(assert (>= {output_name}_1 0.0))\n'
        str += f'(assert (<= {output_name}_1 0.45))\n'
    else:
        str += f'(assert (>= {output_name}_0 0.0))\n'
        str += f'(assert (<= {output_name}_0 0.45))\n'
        str += f'(assert (>= {output_name}_1 0.55))\n'
        str += f'(assert (<= {output_name}_1 1.0))\n'   
    return str

def load_configuration(configuration_filepath: str) -> dict:
    with open(configuration_filepath, "r") as configuration_file:
        configuration = json.load(configuration_file)
    return configuration

class VNNLIBargs():
    def __init__(self, input_file, model_path, config_file, feature_type, free, total_features, list_ff_indices, epsilon=1, output_file='out.vnnlib', seed=None):
        self.input_file = input_file
        self.model_path = model_path
        self.config_file = config_file
        self.feature_type = feature_type
        self.free = free
        self.total_features = total_features
        self.list_ff_indices = list_ff_indices
        self.epsilon = epsilon
        self.output_file = output_file
        self.seed = seed

def create_vnnlib(args, features, predicted_label):
    input_name, output_name = "X", "Y"
    np.random.seed(args.seed)
    free_features, free_features_indices = select_free_features(args, features)
    with open(args.output_file, 'w') as output_file:
        output_file.write(get_header(args, input_name, output_name, free_features_indices))
        output_file.write(get_input_vars(args, input_name))
        output_file.write(get_output_vars(output_name))
        output_file.write(get_input_constraints(args, input_name, features, free_features))
        output_file.write(get_output_constraints(output_name, predicted_label))            


# Main

if __name__ == '__main__' :
    # Parse arguments
    parser = argparse.ArgumentParser(description = 'Generates data.')
    # Optional arguments
    parser.add_argument('input_file', type = str, help = 'Input binary file name')
    parser.add_argument('model_path', type = str, help = 'Path to the model .pth file')
    parser.add_argument('config_file', type = str, help = 'Configuration file containing the hyperparameters of the model')
    parser.add_argument('feature_type', type = str, help = 'Type of features to extract. Select one of the following: {BooleanBigrams, Bigrams}')
    parser.add_argument('free', type = int, help = 'Number of free features')
    parser.add_argument('total_features', type = int, help = 'Total number of features')
    parser.add_argument('-l', '--list_ff_indices', nargs = '+', default = None, type = int, help = 'List of free feature indices (default: None)', dest = 'list_ff_indices')
    parser.add_argument('-e', '--epsilon', default = 1, type = int, help = 'Input epsilon variation (default: 1)', dest = 'epsilon')
    parser.add_argument('-o', '--output_file', default = 'out.vnnlib', type = str, help = 'output file name (default: out.vnnlib)', dest = 'output_file')
    parser.add_argument('-s', '--seed', default = None, type = int, help = 'Random seed', dest = 'seed')
    args = parser.parse_args()

    # Set device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Device: ", device)
    
    configuration = load_configuration(args.config_file)

    # Load feature extractor
    if  "feature_selector" in configuration:
        config = '../boolean_classifier/data/BODMAS/feature_selectors/boolean_bigrams/boolean_bigrams_feature_selector_k=1000.pkl'
        feature_selector = joblib.load(config)
    else:
        feature_selector = None


    # Load model
    model = FFNN(configuration)
    model = model.to(device)
    model.load_state_dict(torch.load(args.model_path, weights_only=True))
    model.eval()

    with open(args.input_file, "rb") as f:
        bytez = f.read()

    if args.feature_type == "BooleanBigrams":
        feature_extractor = BooleanNGramFeatureExtractor(N=2)
        sparse_features = feature_extractor.feature_vector(bytez)
        features = sparse_features.todense()
    elif args.feature_type == "Bigrams":
        feature_extractor = NGramFeatureExtractor(N=2)
        features = feature_extractor.feature_vector(bytez)
    else:
        raise NotImplementedError("Select one of the following: {BooleanBigrams, Bigrams}")

    
    if feature_selector is not None:
        features = feature_selector.transform(torch.Tensor(features))
    x = torch.tensor(features, dtype=torch.float).to(device)
    probs = model.predict(x)
    y_pred = probs.argmax(dim=1)
    print("Predicted label: ", y_pred, probs)

    create_vnnlib(args, features, y_pred[0].item())