Skip to content

utils

coco_ground_truth_to_df(ground_truth_file, max_images=200000)

Load and transforms COCO ground truth data to pd.DataFrame object.

Parameters:

Name Type Description Default
ground_truth_file str

Path of ground truth file.

required
max_images int

Maximum number of images to process.

200000

Returns:

Type Description
pd.DataFrame

pd.DataFrame with df_annotations keys and image sizes.

Source code in pyodi/core/utils.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
def coco_ground_truth_to_df(
    ground_truth_file: str, max_images: int = 200000
) -> pd.DataFrame:
    """Load and transforms COCO ground truth data to pd.DataFrame object.

    Args:
        ground_truth_file: Path of ground truth file.
        max_images: Maximum number of images to process.

    Returns:
        pd.DataFrame with df_annotations keys and image sizes.

    """
    logger.info("Loading Ground Truth File")
    with open(ground_truth_file) as gt:
        coco_ground_truth = json.load(gt)

    if len(coco_ground_truth["images"]) > max_images:
        logger.warning(
            f"Number of images {len(coco_ground_truth['images'])} exceeds maximum: "
            f"{max_images}.\nAll the exceeding images will be ignored."
        )

    logger.info("Converting COCO Ground Truth to pd.DataFrame")
    df_images = pd.DataFrame(coco_ground_truth["images"][:max_images])[
        ["id", "file_name", "width", "height"]
    ]
    df_images = df_images.add_prefix("img_")

    df_annotations = pd.DataFrame(coco_ground_truth["annotations"])

    # Replace label with category name
    categories = {x["id"]: x["name"] for x in coco_ground_truth["categories"]}
    df_annotations["category"] = df_annotations["category_id"].replace(categories)

    # Add bbox columns
    bbox_columns = ["col_left", "row_top", "width", "height"]
    df_annotations[bbox_columns] = pd.DataFrame(
        df_annotations.bbox.tolist(), index=df_annotations.index
    )

    # Filter columns by name
    column_names = ["image_id", "area", "id", "category"] + bbox_columns
    if "iscrowd" in df_annotations.columns:
        column_names.append("iscrowd")

    # Join with images
    df_annotations = df_annotations[column_names].join(
        df_images.set_index("img_id"), how="inner", on="image_id"
    )

    return df_annotations

load_coco_ground_truth_from_StringIO(string_io)

Returns COCO object from StringIO.

Parameters:

Name Type Description Default
string_io TextIO

IO stream in text mode.

required

Returns:

Type Description
COCO

COCO object.

Source code in pyodi/core/utils.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
def load_coco_ground_truth_from_StringIO(string_io: TextIO) -> COCO:
    """Returns COCO object from StringIO.

    Args:
        string_io: IO stream in text mode.

    Returns:
        COCO object.

    """
    coco_ground_truth = COCO()
    coco_ground_truth.dataset = json.load(string_io)
    coco_ground_truth.createIndex()
    return coco_ground_truth