|
def run_naive_folds_train( self, |
|
model, |
|
num_folds |
|
): |
|
""" |
|
Running 10 fold cross validation for naive baseline |
|
|
|
Parameters |
|
---------- |
|
model : NaiveModel object |
|
Naive Baseline model |
|
num_folds : Int |
|
|
|
""" |
|
|
|
dish_list = os.listdir(folder) |
|
|
|
dish_list = [dish for dish in dish_list if not dish.startswith(".")] |
|
dish_list.sort() |
|
|
|
fold_result_df = pd.DataFrame( |
|
columns=[ |
|
"Fold", |
|
"Test_Accuracy", |
|
"Correct_Predictions", |
|
"Num_Actions", |
|
] |
|
) # , "Test_Dish1_accuracy", "Test_Dish2_accuracy"]) |
|
|
|
destination_folder = destination_folder4 |
|
|
|
overall_predictions = 0 |
|
overall_actions = 0 |
|
|
|
for fold in range(num_folds): |
|
|
|
start = datetime.now() |
|
|
|
saved_file_path = os.path.join( |
|
destination_folder, "model" + str(fold + 1) + ".pt" |
|
) # Model saved path |
|
|
|
train_dish_list = dish_list.copy() |
|
print("Fold [{}/{}]".format(fold + 1, num_folds)) |
|
|
|
print("-------Training-------") |
|
|
|
self.basic_training( |
|
model, |
|
train_dish_list, |
|
saved_file_path, |
|
) |
|
|
|
|
|
overall_predictions += total_correct_predictions |
|
overall_actions += total_actions |
|
|
|
fold_result = { |
|
"Fold": fold + 1, |
|
"Test_Accuracy": test_accuracy, |
|
"Correct_Predictions": total_correct_predictions, |
|
"Num_Actions": total_actions, |
|
} # , |
|
# "Test_Dish1_accuracy" : test_accuracy_list[0][2], |
|
# "Test_Dish2_accuracy" : test_accuracy_list[1][2]} |
|
|
|
fold_result_df = fold_result_df.append(fold_result, ignore_index=True) |
|
|
|
end = datetime.now() |
|
|
|
elapsedTime = end - start |
|
elapsed_duration = divmod(elapsedTime.total_seconds(), 60) |
|
|
|
print( |
|
"Time elapsed: {} mins and {:.2f} secs".format( |
|
elapsed_duration[0], elapsed_duration[1] |
|
) |
|
) |
|
print("--------------") |
|
|
|
|
|
overall_accuracy = overall_predictions * 100 / overall_actions |
|
|
|
print("Overall Model Accuracy: {:.2f}".format(overall_accuracy)) |
|
|
|
fold_result = { |
|
"Fold": 'Overall', |
|
"Test_Accuracy": overall_accuracy, |
|
"Correct_Predictions": overall_predictions, |
|
"Num_Actions": overall_actions, |
|
} |
|
|
|
fold_result_df = fold_result_df.append(fold_result, ignore_index=True) |
|
|
|
save_result_path = os.path.join(destination_folder, "fold_results.tsv") |
|
|
|
results_file_path = os.path.join( |
|
destination_folder, "model_result.tsv" |
|
) # Model saved path |
|
|
|
# Saving the results |
|
fold_result_df.to_csv(save_result_path, sep="\t", index=False, encoding="utf-8") |
|
|
|
|
|
print("Fold Results saved in ==>" + save_result_path) |
As far as I can see, there's no difference between the folds. (We would expect different train/dev/test splits.)
alignment-models/train.py
Lines 812 to 916 in 8165592