import numpy as np
from collections import defaultdict
from sklearn.metrics import f1_score, precision_score, recall_score, precision_recall_fscore_support

base_dir = "/vulcanscratch/lzhao/data/DSDI20/"
prediction_file = base_dir + "predictions/frames_top_preds.txt"

features = ["damage", "flooding or water damage", "landslide", "road washout", "rubble or debris", "smoke or fire", "dirt", "grass", "lava", "rocks", "sand", "shrubs", "snow or ice", "trees", "bridge", "building", "dam or levee", "pipes", "utility or power lines or electric towers", "railway", "wireless or radio communication towers", "water tower", "aircraft", "boat", "car", "truck", "flooding", "lake or pond", "ocean", "puddle", "river or stream", "road"]
top_k = 10


feature2score = defaultdict(list)
count = 0
with open(prediction_file) as f_pred:
    for line in f_pred:
        shot_id, text = line.split(" ", 1)
        if shot_id.startswith("shot"):
            pred_list = text.split(", ")
            count += 1
            curr_shot_id = shot_id
            continue

        score_list = text.split(", ")
        #top_pred_list = []
        for i in range(len(pred_list)):
            feature2score[pred_list[i]].append((float(score_list[i]),curr_shot_id))

print("Number of outputs: ", count)

for feature in feature2score.keys():
    sorted_list = sorted(feature2score[feature], key=lambda x:x[0])
    low_k = sorted_list[:top_k]
    high_k = sorted_list[-top_k:]
    print("\nFeature {} highest predictions:".format(feature))
    for score, name in reversed(high_k):
        print("{}: {}".format(name, str(score)))
    print("\nFeature {} lowest predictions:".format(feature))
    for score, name in low_k:
        print("{}: {}".format(name, str(score)))