Research data available for everyone.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. #!/usr/bin/python3
  2. # Libraries
  3. import argparse
  4. import torch
  5. import os
  6. import sys
  7. import json
  8. import joblib
  9. import numpy as np
  10. current = os.path.dirname(os.path.realpath(__file__))
  11. parent = os.path.dirname(current)
  12. sys.path.append(parent)
  13. from boolean_classifier.architectures.ffnn import FFNN
  14. from boolean_classifier.feature_extractors.boolean_ngram_feature_extractor import BooleanNGramFeatureExtractor
  15. from boolean_classifier.feature_extractors.ngram_feature_extractor import NGramFeatureExtractor
  16. # Functions
  17. def get_header(args, input_name, output_name, free_features_indices):
  18. '''Get the header for the VNN file'''
  19. str = f'; Input file: {args.input_file}\n'
  20. str += f'; Free features: {args.free}\n'
  21. str += f'; Free features indices:'
  22. for i in range(len(free_features_indices)):
  23. str += f' {free_features_indices[i]}'
  24. str += f'\n'
  25. str += f'; Total features: {args.total_features}\n'
  26. str += f'; Feature type: {args.feature_type}\n'
  27. str += f'; Input name: {input_name}\n'
  28. str += f'; Output name: {output_name}\n'
  29. str += f'; Epsilon: {args.epsilon}\n'
  30. str += f'; Random seed: {args.seed}\n'
  31. return str
  32. def get_input_vars(args, input_name):
  33. '''Get the input variables for the VNN file'''
  34. str = f'\n; Input variables:\n\n'
  35. for i in range(args.total_features):
  36. str += f'(declare-const {input_name}_{i} Real)\n'
  37. return str
  38. def get_output_vars(output_name):
  39. '''Get the output variables for the VNN file'''
  40. str = f'\n; Output variables:\n\n'
  41. str += f'(declare-const {output_name}_0 Real)\n'
  42. str += f'(declare-const {output_name}_1 Real)\n'
  43. return str
  44. def select_free_features(args, features):
  45. '''Select features to be free but only from features that are zero'''
  46. if args.list_ff_indices is not None: # If list of free feature indices is provided, use it. Do not check if they are zero or if it matches the number of arts.free features.
  47. indices = args.list_ff_indices
  48. # assert len(indices) == args.free, "Number of free features does not match the length of the provided indices."
  49. assert all(i >= 0 and i < args.total_features for i in indices), "Some indices are out of bounds."
  50. else:
  51. zero_indices = np.where(features == 0)[1] # For numpy arrays
  52. # print(f'Selecting {args.free} out of {len(zero_indices)} features with zero value')
  53. assert len(zero_indices) >= args.free, "Not enough zero features to select from."
  54. indices = np.random.choice(zero_indices, size=args.free, replace=False)
  55. # print('Free features indices:', random_indices)
  56. free_features = [False] * args.total_features
  57. for i in indices:
  58. free_features[i] = True
  59. return free_features, indices
  60. def get_input_constraints(args, input_name, features, free_features):
  61. '''Get the input constraints for the VNN file'''
  62. str = f'\n; Input constraints:\n\n'
  63. # Set ranges for the free features
  64. for i, free in enumerate(free_features):
  65. if free:
  66. # Standard constraint X >= 0 and <= 1
  67. str += f'(assert (>= {input_name}_{i} {max(0, features[0, i] - args.epsilon)}))\n'
  68. str += f'(assert (<= {input_name}_{i} {min(1, features[0, i] + args.epsilon)}))\n'
  69. # Additional constraint to standard to ensure 0 or 1
  70. #str += f'(assert (or (<= {input_name}_{i} {max(0, dense_features[0, i] - args.epsilon)})'
  71. #str += f' (>= {input_name}_{i} {min(1, dense_features[0, i] + args.epsilon)})))\n'
  72. else:
  73. str += f'(assert (>= {input_name}_{i} {features[0, i]}))\n'
  74. str += f'(assert (<= {input_name}_{i} {features[0, i]}))\n'
  75. return str
  76. def get_output_constraints(output_name, predicted_label):
  77. '''Get the output constraints for the VNN file'''
  78. str = f'\n; Output constraints:\n\n'
  79. if predicted_label == 1:
  80. str += f'(assert (>= {output_name}_0 0.55))\n'
  81. str += f'(assert (<= {output_name}_0 1.0))\n'
  82. str += f'(assert (>= {output_name}_1 0.0))\n'
  83. str += f'(assert (<= {output_name}_1 0.45))\n'
  84. else:
  85. str += f'(assert (>= {output_name}_0 0.0))\n'
  86. str += f'(assert (<= {output_name}_0 0.45))\n'
  87. str += f'(assert (>= {output_name}_1 0.55))\n'
  88. str += f'(assert (<= {output_name}_1 1.0))\n'
  89. return str
  90. def load_configuration(configuration_filepath: str) -> dict:
  91. with open(configuration_filepath, "r") as configuration_file:
  92. configuration = json.load(configuration_file)
  93. return configuration
  94. class VNNLIBargs():
  95. def __init__(self, input_file, model_path, config_file, feature_type, free, total_features, list_ff_indices, epsilon=1, output_file='out.vnnlib', seed=None):
  96. self.input_file = input_file
  97. self.model_path = model_path
  98. self.config_file = config_file
  99. self.feature_type = feature_type
  100. self.free = free
  101. self.total_features = total_features
  102. self.list_ff_indices = list_ff_indices
  103. self.epsilon = epsilon
  104. self.output_file = output_file
  105. self.seed = seed
  106. def create_vnnlib(args, features, predicted_label):
  107. input_name, output_name = "X", "Y"
  108. np.random.seed(args.seed)
  109. free_features, free_features_indices = select_free_features(args, features)
  110. with open(args.output_file, 'w') as output_file:
  111. output_file.write(get_header(args, input_name, output_name, free_features_indices))
  112. output_file.write(get_input_vars(args, input_name))
  113. output_file.write(get_output_vars(output_name))
  114. output_file.write(get_input_constraints(args, input_name, features, free_features))
  115. output_file.write(get_output_constraints(output_name, predicted_label))
  116. # Main
  117. if __name__ == '__main__' :
  118. # Parse arguments
  119. parser = argparse.ArgumentParser(description = 'Generates data.')
  120. # Optional arguments
  121. parser.add_argument('input_file', type = str, help = 'Input binary file name')
  122. parser.add_argument('model_path', type = str, help = 'Path to the model .pth file')
  123. parser.add_argument('config_file', type = str, help = 'Configuration file containing the hyperparameters of the model')
  124. parser.add_argument('feature_type', type = str, help = 'Type of features to extract. Select one of the following: {BooleanBigrams, Bigrams}')
  125. parser.add_argument('free', type = int, help = 'Number of free features')
  126. parser.add_argument('total_features', type = int, help = 'Total number of features')
  127. parser.add_argument('-l', '--list_ff_indices', nargs = '+', default = None, type = int, help = 'List of free feature indices (default: None)', dest = 'list_ff_indices')
  128. parser.add_argument('-e', '--epsilon', default = 1, type = int, help = 'Input epsilon variation (default: 1)', dest = 'epsilon')
  129. parser.add_argument('-o', '--output_file', default = 'out.vnnlib', type = str, help = 'output file name (default: out.vnnlib)', dest = 'output_file')
  130. parser.add_argument('-s', '--seed', default = None, type = int, help = 'Random seed', dest = 'seed')
  131. args = parser.parse_args()
  132. # Set device
  133. device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  134. print("Device: ", device)
  135. configuration = load_configuration(args.config_file)
  136. # Load feature extractor
  137. if "feature_selector" in configuration:
  138. config = '../boolean_classifier/data/BODMAS/feature_selectors/boolean_bigrams/boolean_bigrams_feature_selector_k=1000.pkl'
  139. feature_selector = joblib.load(config)
  140. else:
  141. feature_selector = None
  142. # Load model
  143. model = FFNN(configuration)
  144. model = model.to(device)
  145. model.load_state_dict(torch.load(args.model_path, weights_only=True))
  146. model.eval()
  147. with open(args.input_file, "rb") as f:
  148. bytez = f.read()
  149. if args.feature_type == "BooleanBigrams":
  150. feature_extractor = BooleanNGramFeatureExtractor(N=2)
  151. sparse_features = feature_extractor.feature_vector(bytez)
  152. features = sparse_features.todense()
  153. elif args.feature_type == "Bigrams":
  154. feature_extractor = NGramFeatureExtractor(N=2)
  155. features = feature_extractor.feature_vector(bytez)
  156. else:
  157. raise NotImplementedError("Select one of the following: {BooleanBigrams, Bigrams}")
  158. if feature_selector is not None:
  159. features = feature_selector.transform(torch.Tensor(features))
  160. x = torch.tensor(features, dtype=torch.float).to(device)
  161. probs = model.predict(x)
  162. y_pred = probs.argmax(dim=1)
  163. print("Predicted label: ", y_pred, probs)
  164. create_vnnlib(args, features, y_pred[0].item())

Powered by TurnKey Linux.