| 123456789101112131415161718192021222324 |
- import sys
- sys.path.append("../../../")
- from boolean_classifier.feature_extractors.ember_feature_extractor import EMBERFeatureExtractor
-
- training_filepaths = ["bodmas_training_set.csv", "bodmas_validation_set.csv", "bodmas_test_set.csv"]
- output_filepaths = ["bodmas_ember_training_set.csv", "bodmas_ember_validation_set.csv", "bodmas_ember_test_set.csv"]
- for i, training_filepath in enumerate(training_filepaths):
- for output_filepath in output_filepaths:
- with open(output_filepath, "w") as output_file:
- with open(training_filepath, "r") as f:
- lines = f.readlines()
- for j, line in enumerate(lines):
- exe_filepath, label = line.strip().split(",")
- print(j, exe_filepath, label)
- with open(exe_filepath, "rb") as exe_file:
- bytez = exe_file.read()
- try:
- features = EMBERFeatureExtractor().feature_vector(bytez)
- for feature in features:
- output_file.write("{},".format(feature))
- output_file.write(str(label)+"\n")
- except ValueError as e:
- print(e)
|