Research data available for everyone.

extract_EMBER_features.py 1.2KB

123456789101112131415161718192021222324
  1. import sys
  2. sys.path.append("../../../")
  3. from boolean_classifier.feature_extractors.ember_feature_extractor import EMBERFeatureExtractor
  4. training_filepaths = ["bodmas_training_set.csv", "bodmas_validation_set.csv", "bodmas_test_set.csv"]
  5. output_filepaths = ["bodmas_ember_training_set.csv", "bodmas_ember_validation_set.csv", "bodmas_ember_test_set.csv"]
  6. for i, training_filepath in enumerate(training_filepaths):
  7. for output_filepath in output_filepaths:
  8. with open(output_filepath, "w") as output_file:
  9. with open(training_filepath, "r") as f:
  10. lines = f.readlines()
  11. for j, line in enumerate(lines):
  12. exe_filepath, label = line.strip().split(",")
  13. print(j, exe_filepath, label)
  14. with open(exe_filepath, "rb") as exe_file:
  15. bytez = exe_file.read()
  16. try:
  17. features = EMBERFeatureExtractor().feature_vector(bytez)
  18. for feature in features:
  19. output_file.write("{},".format(feature))
  20. output_file.write(str(label)+"\n")
  21. except ValueError as e:
  22. print(e)

Powered by TurnKey Linux.