"""
bbox.py
Provides the `Bbox` class and utility functions for manipulating bounding boxes
in various coordinate formats (Pascal VOC, COCO, YOLO, etc.). Supports
transformations, geometric operations, and conversions.
"""
from __future__ import annotations
from typing import List, Optional, Self, Sequence, Tuple
from pydantic import BaseModel, model_validator, __version__ as pydantic_version
[docs]
class Bbox(BaseModel):
"""
A class to represent a Bbox (inherits from Pydantic BaseModel).
The bbox is stored in Pascal_VOC format:
top-left, bottom-right with a top-left origin (PIL coord system).
(meaning that top < bottom)
The bottom and right edges are considered excluded from the Bbox for compatibility with array
slicing and PIL image cropping features (in case of Int Bboxes).
Attributes:
left (float): The left coordinate of the bounding box.
top (float): The top coordinate of the bounding box.
right (float): The right coordinate of the bounding box.
bottom (float): The bottom coordinate of the bounding box.
"""
left: float
top: float
right: float
bottom: float
if pydantic_version >= "2.12.0":
@model_validator(mode="after")
def check_passwords_match(self) -> Self:
if self.left > self.right or self.top > self.bottom:
raise ValueError("The Bbox is not valid (negative width or height).")
return self
else:
# Ensure compatibility with previous pydantic versions
[docs]
@model_validator(mode="after") # type: ignore[arg-type]
@classmethod
def check_passwords_match(cls, model: "Bbox") -> "Bbox":
if model.left > model.right or model.top > model.bottom:
raise ValueError("The Bbox is not valid (negative width or height).")
return model
# region From methods
[docs]
@classmethod
def from_tlbr(cls, tlbr: Sequence[float]) -> Bbox:
"""
Initializes the bounding box from top-left and bottom-right coordinates.
Args:
tlbr (Sequence[float]): A sequence containing the top-left and bottom-right coordinates
of the bounding box in the format (left, top, right, bottom).
Returns:
Bbox: The Bbox instance.
Raises:
ValueError: If the length of the sequence is not 4, or if the Bbox is not valid
(ie `left > right` or `top > bottom`).
Example:
>>> bbox = Bbox.from_tlbr((10, 20, 30, 40))
>>> print(bbox.left, bbox.top, bbox.right, bbox.bottom)
10 20 30 40
"""
_assert_sequence_len(seq=tlbr)
return cls(left=tlbr[0], top=tlbr[1], right=tlbr[2], bottom=tlbr[3])
[docs]
@classmethod
def from_tlwh(cls, tlwh: Sequence[float]) -> Bbox:
"""
Initializes the bounding box from top-left and width-height coordinates.
Args:
tlwh (Sequence[float]): A sequence containing the top-left and width-height coordinates
of the bounding box in the format (left, top, width, height).
Returns:
Bbox: The Bbox instance.
Raises:
ValueError: If the length of the sequence is not 4, or if the Bbox is not valid
(ie `width < 0` or `height < 0`).
Example:
>>> bbox = Bbox.from_tlwh((10, 20, 20, 30))
>>> print(bbox.left, bbox.top, bbox.right, bbox.bottom)
10 20 30 50
"""
_assert_sequence_len(seq=tlwh)
return cls(
left=tlwh[0],
top=tlwh[1],
right=tlwh[0] + tlwh[2],
bottom=tlwh[1] + tlwh[3],
)
[docs]
@classmethod
def from_cwh(cls, cwh: Sequence[float]) -> Bbox:
"""
Initializes the bounding box from center and width-height coordinates.
Args:
cwh (Sequence[float]): A sequence containing the center and width-height coordinates
of the bounding box in the format (center_x, center_y, width, height).
Returns:
Bbox: The Bbox instance.
Raises:
ValueError: If the length of the sequence is not 4, or if the Bbox is not valid
(ie `width < 0` or `height < 0`).
Example:
>>> bbox = Bbox.from_cwh((20, 35, 20, 30))
>>> print(bbox.left, bbox.top, bbox.right, bbox.bottom)
10 20 30 50
"""
_assert_sequence_len(seq=cwh)
half_width = cwh[2] / 2
half_height = cwh[3] / 2
return cls(
left=cwh[0] - half_width,
top=cwh[1] - half_height,
right=cwh[0] + half_width,
bottom=cwh[1] + half_height,
)
from_xyxy = from_tlbr
from_pascal_voc = from_tlbr
from_list = from_tlbr
from_coco = from_tlwh
# endregion
# region To methods
[docs]
def to_tlbr(self) -> List[float]:
"""
Returns the bounding box coordinates in Top-Left, Bottom-Right format.
Returns:
List[float]: The bounding box coordinates [x_min, y_min, x_max, y_max].
x_min and y_min are the coordinates of the top-left corner of the bounding box.
x_max and y_max are the coordinates of the bottom-right corner of the bounding box.
"""
return [self.left, self.top, self.right, self.bottom]
[docs]
def to_norm_tlbr(self, img_w: int, img_h: int) -> List[float]:
"""
Returns the bounding box coordinates in Top-Left, Bottom-Right format, normalized
based on the image dimensions.
Args:
img_w (int): The image width in pixels.
img_h (int): The image height in pixels.
Returns:
List[float]: The bounding box coordinates [x_min, y_min, x_max, y_max].
x_min and y_min are the coordinates of the top-left corner of the bounding box.
x_max and y_max are the coordinates of the bottom-right corner of the bounding box.
All the returned values are **NORMALIZED** based on the image dimensions.
"""
return [
self.left / img_w,
self.top / img_h,
self.right / img_w,
self.bottom / img_h,
]
[docs]
def to_tlwh(self) -> List[float]:
"""
Returns the bounding box coordinates in Top-Left, Width-Height format.
Returns:
List[float]: The bounding box coordinates [x_min, y_min, width, height].
x_min and y_min are coordinates of the top-left corner of the bounding box.
"""
return [self.left, self.top, self.width, self.height]
[docs]
def to_norm_tlwh(self, img_w: int, img_h: int) -> List[float]:
"""
Returns the bounding box coordinates in Top-Left, Width-Height format, normalized
based on the image dimensions.
Args:
img_w (int): The image width in pixels.
img_h (int): The image height in pixels.
Returns:
List[float]: The bounding box coordinates [x_min, y_min, width, height].
x_min and y_min are the coordinates of the top-left
corner of the bounding box.
All the returned values are **NORMALIZED** based on the image dimensions.
"""
return [
self.left / img_w,
self.top / img_h,
self.right / img_w,
self.bottom / img_h,
]
[docs]
def to_cwh(self) -> List[float]:
"""
Returns the bounding box coordinates in Center, Width-Height format.
Returns:
List[float]: The bounding box coordinates [x_center, y_center, width, height].
"""
return [*self.center, self.width, self.height]
[docs]
def to_norm_cwh(self, img_w: int, img_h: int) -> List[float]:
"""
Returns the bounding box coordinates in Center, Width-Height format, normalized
based on the image dimensions.
Args:
img_w (int): The image width in pixels.
img_h (int): The image height in pixels.
Returns:
List[float]: The NORMALIZED bounding box coordinates [x_center, y_center, width,
height].
"""
cx, cy = self.center
return [cx / img_w, cy / img_h, self.width / img_w, self.height / img_h]
[docs]
def to_polygon(self) -> List[Tuple[float, float]]:
"""
Returns the bounding box corners as points.
Returns:
List[Tuple[float, float]]: The corners coordinates in (x, y) format.
The order is `top_left > top_right > bottom_right > bottom_left`
"""
return [
(self.left, self.top),
(self.right, self.top),
(self.right, self.bottom),
(self.left, self.bottom),
]
to_pascal_voc = to_tlbr
to_xyxy = to_tlbr
to_list = to_tlbr
to_albu = to_norm_tlbr
to_coco = to_tlwh
to_yolo = to_norm_cwh
# endregion
# region Transformations
[docs]
def shift(self, horizontal_shift: float = 0, vertical_shift: float = 0) -> Bbox:
"""
Return a shifted Bbox by the specified horizontal and vertical amounts.
Args:
horizontal_shift (float, optional): The amount to shift the bounding box horizontally.
Defaults to 0.
vertical_shift (float, optional): The amount to shift the bounding box vertically.
Defaults to 0.
Returns:
Bbox: The shifted Bbox instance.
"""
return Bbox(
left=self.left + horizontal_shift,
top=self.top + vertical_shift,
right=self.right + horizontal_shift,
bottom=self.bottom + vertical_shift,
)
[docs]
def scale(self, scale_factor: float) -> Bbox:
"""
Return a scaled Bbox by the specified scale factor. The scaling will be from the center.
Args:
scale_factor (float): The factor to scale the bounding box by. Width and height will
be scaled by this factor.
Returns:
Bbox: The scaled Bbox instance.
Raises:
ValueError: If the scale is strictly negative.
"""
if scale_factor < 0:
raise ValueError(
"Scaling with a negative value would result in an invalid Bbox."
)
cx, cy = self.center
new_width = self.width * scale_factor
new_height = self.height * scale_factor
return Bbox(
left=cx - new_width / 2,
right=cx + new_width / 2,
top=cy - new_height / 2,
bottom=cy + new_height / 2,
)
[docs]
def expand(
self, left: float = 0, top: float = 0, right: float = 0, bottom: float = 0
) -> Bbox:
"""
Return an expanded Bbox by the specified padding for each side.
Args:
left (float, optional): The amount to expand the left side of the bounding box by.
Defaults to 0.
top (float, optional): The amount to expand the top side of the bounding box by.
Defaults to 0.
right (float, optional): The amount to expand the right side of the bounding box by.
Defaults to 0.
bottom (float, optional): The amount to expand the bottom side of the bounding box by.
Defaults to 0.
Returns:
Bbox: The expanded Bbox instance.
"""
return Bbox(
left=self.left - left,
right=self.right + right,
top=self.top - top,
bottom=self.bottom + bottom,
)
[docs]
def pad_to_square(self) -> Bbox:
"""Returns a padded Bbox to make it a square."""
width = self.width
height = self.height
if width > height:
diff = (width - height) / 2
return Bbox(
left=self.left,
right=self.right,
top=self.top - diff,
bottom=self.bottom + diff,
)
if height > width:
diff = (height - width) / 2
return Bbox(
left=self.left - diff,
right=self.right + diff,
top=self.top,
bottom=self.bottom,
)
return self.model_copy()
[docs]
def pad_to_aspect_ratio(self, target_ratio: float) -> Bbox:
"""
Returns a padded Bbox to achieve the target aspect ratio.
Args:
target_ratio (float): The target aspect ratio.
Returns:
Bbox: A Bbox instance padded to the correct ratio.
Raises:
ValueError: If target_ratio is <= 0.
"""
if target_ratio <= 0:
raise ValueError(
f"Target ratio cannot be negative or zero. Received {target_ratio}"
)
current_ratio = self.aspect_ratio
if current_ratio > target_ratio:
# Need to increase height
new_height = self.width / target_ratio
diff = (new_height - self.height) / 2
return Bbox(
left=self.left,
right=self.right,
top=self.top - diff,
bottom=self.bottom + diff,
)
if current_ratio < target_ratio:
# Need to increase width
new_width = self.height * target_ratio
diff = (new_width - self.width) / 2
return Bbox(
left=self.left - diff,
right=self.right + diff,
top=self.top,
bottom=self.bottom,
)
return self.model_copy()
[docs]
def clip_to_img(self, img_w: int, img_h: int) -> Bbox:
"""
Returns a clipped Bbox to the image dimensions.
Remember that the bottom and right edges are inclusive, so
`Bbox(left=-10, top=-20, right=100, bottom=120).clipt_to_img(img_w=32, img_h=64)`
returns `Bbox(left=0, top=0, right=31, bottom=63)`
Args:
img_w (int): The image width in pixels.
img_h (int): The image height in pixels.
Returns:
Bbox: The clipped Bbox.
"""
return Bbox(
left=max(0, self.left),
top=max(0, self.top),
right=min(img_w, self.right),
bottom=min(img_h, self.bottom),
)
# endregion
[docs]
def overlaps(self, other: Bbox) -> bool:
"""
Checks if the current bounding box overlaps with another bounding box.
Two bboxes are considered as overlapping if they intersect with a non-zero area.
Args:
other (Bbox): The other bounding box to check for overlap.
Returns:
bool: True if the bounding boxes overlap, False otherwise.
"""
inter = self.intersection(other)
return inter is not None and inter.area > 0
[docs]
def contains_point(self, x: float, y: float) -> bool:
"""
Checks if a point is inside the bounding box.
Args:
x (float): The x-coordinate of the point.
y (float): The y-coordinate of the point.
Returns:
bool: True if the point is inside the bounding box, False otherwise.
"""
return self.left <= x <= self.right and self.top <= y <= self.bottom
[docs]
def union(self, other: Bbox) -> Bbox:
"""
Calculates the minimal Bbox that englobes this one AND the other.
Args:
other (Bbox): The other bounding box to calculate the union with.
Returns:
Bbox: The minimal englobing Bbox.
"""
return Bbox(
left=min(self.left, other.left),
top=min(self.top, other.top),
right=max(self.right, other.right),
bottom=max(self.bottom, other.bottom),
)
[docs]
def intersection(self, other: Bbox) -> Optional[Bbox]:
"""
Calculates the intersection with another Bbox. If the resulting Bbox is not valid (ie
`left > right` or `top > bottom`, returns None.
Args:
other (Bbox): The other bounding box to calculate the intersection with.
Returns:
Optional[Bbox]: The intersection of the two bounding boxes if valid.
"""
left = max(self.left, other.left)
top = max(self.top, other.top)
right = min(self.right, other.right)
bottom = min(self.bottom, other.bottom)
if left > right or top > bottom:
return None
return Bbox(left=left, top=top, right=right, bottom=bottom)
[docs]
def iou(self, other: Bbox) -> float:
"""Calculates the Intersection over Union (IoU) with another bounding box.
Args:
other (Bbox): The other Bbox.
Returns:
float: The IoU between the two bounding boxes.
"""
# Calculate the intersection area
inter = self.intersection(other)
intersection_area = inter.area if inter is not None else 0
# Calculate the union area
union_area = self.area + other.area - intersection_area
if union_area == 0:
return 0
# Calculate the IoU
return intersection_area / union_area
[docs]
def distance_to_point(self, x: float, y: float) -> float:
"""
Calculates the distance from the bounding box to a point.
Args:
x (float): The x-coordinate of the point.
y (float): The y-coordinate of the point.
Returns:
float: The distance from the bounding box to the point.
"""
dx = max(self.left - x, 0, x - self.right)
dy = max(self.top - y, 0, y - self.bottom)
return (dx**2 + dy**2) ** 0.5
@property
def width(self) -> float:
"""The width of the Bbox."""
return self.right - self.left
@property
def height(self):
"""The height of the Bbox."""
return self.bottom - self.top
@property
def area(self) -> float:
"""The area of the Bbox"""
return self.width * self.height
@property
def center(self) -> Tuple[float, float]:
"""The center of the Bbox in (x, y) format."""
return (self.left + self.right) / 2, (self.top + self.bottom) / 2
@property
def aspect_ratio(self) -> float:
"""The aspect ratio of the Bbox (width over height)."""
return self.width / self.height
__or__ = union
__and__ = intersection
def _assert_sequence_len(seq: Sequence[float], target_len: int = 4) -> None:
"""
Asserts that the length of the sequence is 4.
Args:
seq (Sequence[float]): The sequence to check the length of.
target_len (int, optional): The target length of the sequence. Defaults to 4.
Raises:
ValueError: If the length of the sequence is not the target one.
"""
if len(seq) != target_len:
raise ValueError(
f"A sequence of len {len(seq)} has been passed. Need a sequence of len {target_len}."
)