josep
/
OpenData


			
				
					
						
						
							123456789101112131415161718192021222324
							import sys
sys.path.append("../../../")
from boolean_classifier.feature_extractors.ember_feature_extractor import EMBERFeatureExtractor

training_filepaths = ["bodmas_training_set.csv", "bodmas_validation_set.csv", "bodmas_test_set.csv"]
output_filepaths = ["bodmas_ember_training_set.csv", "bodmas_ember_validation_set.csv", "bodmas_ember_test_set.csv"]
for i, training_filepath in enumerate(training_filepaths):
    for output_filepath in output_filepaths:
        with open(output_filepath, "w") as output_file:
            with open(training_filepath, "r") as f:
                lines = f.readlines()
                for j, line in enumerate(lines):
                    exe_filepath, label = line.strip().split(",")
                    print(j, exe_filepath, label)
                    with open(exe_filepath, "rb") as exe_file:
                        bytez = exe_file.read()
                    try:
                        features = EMBERFeatureExtractor().feature_vector(bytez)
                        for feature in features:
                            output_file.write("{},".format(feature))
                        output_file.write(str(label)+"\n")
                    except ValueError as e:
                        print(e)