from tqdm import tqdm import json import pprint from collections import defaultdict base_dir = "/cfarhomes/lzhao/data/IncidentsDataset/" dataset_dir = base_dir + "data/" output_ref_file = base_dir + "val_incident_reference.txt" label_types = ["incidents"] with open(dataset_dir+"eccv_val.json", "r") as fp: dataset = json.load(fp) count = 0 img2labels = defaultdict(list) counters_positive, counters_negative = defaultdict(int), defaultdict(int) for image_name in tqdm(dataset.keys()): count += 1 new_img_name = image_name.replace("/", "_") for label_type in label_types: for category, label in dataset[image_name][label_type].items(): if label == 1: img2labels[new_img_name].append(category) counters_positive[category] += 1 else: counters_negative[category] += 1 print("Number of entries in json: ", count) print(counters_positive) print(counters_negative) positive = set(counters_positive.keys()) print("Number of labels:", len(positive)) print(positive) with open(output_ref_file, "w") as f_out: for shot, feature_list in img2labels.items(): f_out.write("{} {}\n".format(shot, ", ".join(feature_list)))