import numpy as np from collections import defaultdict from sklearn.metrics import f1_score, precision_score, recall_score, precision_recall_fscore_support base_dir = "/vulcanscratch/lzhao/data/DSDI20/" prediction_file = base_dir + "predictions/frames_top_preds.txt" features = ["damage", "flooding or water damage", "landslide", "road washout", "rubble or debris", "smoke or fire", "dirt", "grass", "lava", "rocks", "sand", "shrubs", "snow or ice", "trees", "bridge", "building", "dam or levee", "pipes", "utility or power lines or electric towers", "railway", "wireless or radio communication towers", "water tower", "aircraft", "boat", "car", "truck", "flooding", "lake or pond", "ocean", "puddle", "river or stream", "road"] top_k = 10 feature2score = defaultdict(list) count = 0 with open(prediction_file) as f_pred: for line in f_pred: shot_id, text = line.split(" ", 1) if shot_id.startswith("shot"): pred_list = text.split(", ") count += 1 curr_shot_id = shot_id continue score_list = text.split(", ") #top_pred_list = [] for i in range(len(pred_list)): feature2score[pred_list[i]].append((float(score_list[i]),curr_shot_id)) print("Number of outputs: ", count) for feature in feature2score.keys(): sorted_list = sorted(feature2score[feature], key=lambda x:x[0]) low_k = sorted_list[:top_k] high_k = sorted_list[-top_k:] print("\nFeature {} highest predictions:".format(feature)) for score, name in reversed(high_k): print("{}: {}".format(name, str(score))) print("\nFeature {} lowest predictions:".format(feature)) for score, name in low_k: print("{}: {}".format(name, str(score)))