bepuca · June 14, 2022 15:19
diff --git a/load_coco_dataset.py b/load_coco_dataset.py
 # Copyright © 2022 Bernat Puig Camps
 import json
 from pathlib import Path
 from typing import Tuple

 import pandas as pd

 DATA_PATH = Path("./coco_val2017")


 def load_dataset(
    data_path: Path = DATA_PATH,
 ) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """Read the COCO style json dataset and transform it into convenient DataFrames

    :return (images_df, targets_df):
        images_df: Columns "image_id" and "file_name"
        targets_df: Columns
            "target_id", "image_id", "xmin", "ymin", "xmax", "ymax", "label_id"
    """
    annotations_path = data_path / "annotations.json"

    with open(annotations_path, "r") as f:
        targets_json = json.load(f)

    images_df = pd.DataFrame.from_records(targets_json["images"])
    images_df.rename(columns={"id": "image_id"}, inplace=True)
    images_df = images_df[["image_id", "file_name"]]

    targets_df = pd.DataFrame.from_records(targets_json["annotations"])
    targets_df[["xmin", "ymin", "w", "h"]] = targets_df["bbox"].tolist()
    targets_df["xmax"] = targets_df["xmin"] + targets_df["w"]
    targets_df["ymax"] = targets_df["ymin"] + targets_df["h"]
    targets_df.reset_index(inplace=True)
    targets_df.rename(
        columns={"index": "target_id", "category_id": "label_id"}, inplace=True
    )
    targets_df = targets_df[
        ["target_id", "image_id", "label_id", "xmin", "ymin", "xmax", "ymax"]
    ]

    return images_df, targets_df
	# Copyright © 2022 Bernat Puig Camps
	import json
	from pathlib import Path
	from typing import Tuple

	import pandas as pd

	DATA_PATH = Path("./coco_val2017")


	def load_dataset(
	data_path: Path = DATA_PATH,
	) -> Tuple[pd.DataFrame, pd.DataFrame]:
	"""Read the COCO style json dataset and transform it into convenient DataFrames

	:return (images_df, targets_df):
	images_df: Columns "image_id" and "file_name"
	targets_df: Columns
	"target_id", "image_id", "xmin", "ymin", "xmax", "ymax", "label_id"
	"""
	annotations_path = data_path / "annotations.json"

	with open(annotations_path, "r") as f:
	targets_json = json.load(f)

	images_df = pd.DataFrame.from_records(targets_json["images"])
	images_df.rename(columns={"id": "image_id"}, inplace=True)
	images_df = images_df[["image_id", "file_name"]]

	targets_df = pd.DataFrame.from_records(targets_json["annotations"])
	targets_df[["xmin", "ymin", "w", "h"]] = targets_df["bbox"].tolist()
	targets_df["xmax"] = targets_df["xmin"] + targets_df["w"]
	targets_df["ymax"] = targets_df["ymin"] + targets_df["h"]
	targets_df.reset_index(inplace=True)
	targets_df.rename(
	columns={"index": "target_id", "category_id": "label_id"}, inplace=True
	)
	targets_df = targets_df[
	["target_id", "image_id", "label_id", "xmin", "ymin", "xmax", "ymax"]
	]

	return images_df, targets_df