import numpy as np from collections import defaultdict from sklearn.metrics import f1_score, precision_score, recall_score prediction_file = "/vulcanscratch/lzhao/data/DSDI20/predictions/frames_top_preds.txt" reference_file = "/vulcanscratch/lzhao/data/DSDI20/references.txt" threshold = 10 features = ["damage", "flooding or water damage", "landslide", "road washout", "rubble or debris", "smoke or fire", "dirt", "grass", "lava", "rocks", "sand", "shrubs", "snow or ice", "trees", "bridge", "building", "dam or levee", "pipes", "utility or power lines or electric towers", "railway", "wireless or radio communication towers", "water tower", "aircraft", "boat", "car", "truck", "flooding", "lake or pond", "ocean", "puddle", "river or stream", "road"] shot2ref = {} with open(reference_file) as f_ref: for line in f_ref: shot_id, ref_text = line.split(" ", 1) ref_list = ref_text.strip().split(", ") shot2ref[shot_id] = ref_list pred_dict, ref_dict = defaultdict(list), defaultdict(list) count = 0 with open(prediction_file) as f_pred: for line in f_pred: shot_id, text = line.split(" ", 1) if shot_id.startswith("shot"): pred_list = text.split(", ") count += 1 curr_shot_id = shot_id continue score_list = text.split(", ") top_pred_list = [] for i in range(len(pred_list)): if float(score_list[i]) >= threshold: top_pred_list.append(pred_list[i]) for feature in features: if feature in top_pred_list: pred_dict[feature].append(1) else: pred_dict[feature].append(0) if feature in shot2ref[curr_shot_id]: ref_dict[feature].append(1) else: ref_dict[feature].append(0) print("Number of outputs: ", count) print("F1, precision, recall for each feature:") f1_list, precision_list, recall_list = [], [], [] for feature in features: #precision, recall, f1, _ = precision_recall_fscore_support(ref_dict[feature], pred_dict[feature], average='macro', zero_division=0) #f1 = f1_score(ref_dict[feature], pred_dict[feature], average="macro", zero_division=0) #f1_list.append(f1) precision = precision_score(ref_dict[feature], pred_dict[feature], average="macro", zero_division=0) precision_list.append(precision) recall = recall_score(ref_dict[feature], pred_dict[feature], average="macro", zero_division=0) recall_list.append(recall) if (precision + recall != 0.0): f1 = (2 * precision * recall) / (precision + recall) else: f1 = 0.0 f1_list.append(f1) print(feature) print("{:.2f}\t{:.2f}\t{:.2f}".format(f1, precision, recall)) print("\nOverall f1, precision, recall:") print("{:.2f}\t{:.2f}\t{:.2f}".format(np.mean(f1_list), np.mean(precision_list), np.mean(recall_list)))