Last active
June 14, 2022 15:20
-
-
Save bepuca/6094d73509f37000c8bea2574ddf32e8 to your computer and use it in GitHub Desktop.
object detection error analysis blog - Get predictions and losses of a Faster RCNN model
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright © 2022 Bernat Puig Camps | |
from pathlib import Path | |
import pandas as pd | |
from PIL import Image | |
import torch | |
import torchvision | |
def get_predictions( | |
images_path: Path, images_df: pd.DataFrame, targets_df: pd.DataFrame | |
): | |
"""Get predictions and losses of `model` for all images in `images_df` | |
:param model: Faster-RCNN PyTorch model. | |
:param images_df: DataFrame with images. | |
:param targets_df: DataFrame with ground truth target for images. | |
:return preds_df: DataFrame with columns | |
[ | |
"pred_id", "image_id", "image_loss", "label_id", "score", | |
"xmin", "ymin", "xmax", "ymax" | |
] | |
""" | |
model = torchvision.models.detection.fasterrcnn_resnet50_fpn( | |
pretrained=True | |
) | |
device = ( | |
torch.device("cuda:0") | |
if torch.cuda.is_available() | |
else torch.device("cpu") | |
) | |
model = model.to(device) | |
preds_dfs = [] | |
for sample in images_df.itertuples(): | |
# We iterate over single samples (batch size of 1) because we need one | |
# loss per image and PyTorch Faster-RCNN outputs losses per batch, | |
# not sample. | |
t_df = targets_df.query("image_id == @sample.image_id") | |
image = Image.open(images_path / sample.file_name).convert("RGB") | |
image_tensor = torchvision.transforms.ToTensor()(image).to(device) | |
bboxes = t_df[["xmin", "ymin", "xmax", "ymax"]].values | |
labels = t_df["label_id"].values | |
if bboxes.size == 0: | |
# PyTorch Faster-RCNN expects targets to be tensors that fulfill | |
# len(boxes.shape) == 2 & boxes.shape[-1] == 4 | |
bboxes = torch.empty(0, 4) | |
targets = { | |
"boxes": torch.as_tensor(bboxes, dtype=torch.float32).to(device), | |
"labels": torch.as_tensor(labels, dtype=torch.int64).to(device), | |
} | |
with torch.no_grad(): | |
# Faster-RCNN outputs losses only when train mode | |
model.train() | |
losses = model([image_tensor], [targets]) | |
# Faster-RCNN outputs predictions only when eval mode | |
model.eval() | |
preds = model([image_tensor]) | |
# Unify all sublosses into one (this is just one way of doing it) | |
loss = sum(losses.values()).item() | |
preds_dfs.append( | |
pd.DataFrame( | |
{ | |
"image_id": sample.image_id, | |
"image_loss": loss, | |
"label_id": preds[0]["labels"].to("cpu"), | |
"score": preds[0]["scores"].to("cpu"), | |
"xmin": preds[0]["boxes"][:, 0].to("cpu"), | |
"ymin": preds[0]["boxes"][:, 1].to("cpu"), | |
"xmax": preds[0]["boxes"][:, 2].to("cpu"), | |
"ymax": preds[0]["boxes"][:, 3].to("cpu"), | |
} | |
) | |
) | |
preds_df = pd.concat(preds_dfs, ignore_index=True) | |
preds_df = preds_df.reset_index().rename(columns={"index": "pred_id"}) | |
return preds_df[ | |
[ | |
"pred_id", | |
"image_id", | |
"label_id", | |
"xmin", | |
"ymin", | |
"xmax", | |
"ymax", | |
"score", | |
"image_loss", | |
] | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment