Skip to content

Commit c9201d0

Browse files
committed
feat: add show_progress option to dataset loaders and savers
Add optional tqdm progress bars to all time-consuming dataset operations. Addresses #183. - load_coco_annotations / DetectionDataset.from_coco - load_pascal_voc_annotations / DetectionDataset.from_pascal_voc - load_yolo_annotations / DetectionDataset.from_yolo - save_dataset_images / DetectionDataset.as_coco / as_yolo / as_pascal_voc The show_progress parameter defaults to False for full backward compatibility. Uses tqdm.auto so progress bars work in both terminal and Jupyter notebook environments.
1 parent d94db74 commit c9201d0

File tree

5 files changed

+196
-74
lines changed

5 files changed

+196
-74
lines changed

src/supervision/dataset/core.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,7 @@ def as_pascal_voc(
334334
min_image_area_percentage: float = 0.0,
335335
max_image_area_percentage: float = 1.0,
336336
approximation_percentage: float = 0.0,
337+
show_progress: bool = False,
337338
) -> None:
338339
"""
339340
Exports the dataset to PASCAL VOC format. This method saves the images
@@ -357,11 +358,13 @@ def as_pascal_voc(
357358
approximation_percentage: The percentage of
358359
polygon points to be removed from the input polygon,
359360
in the range [0, 1). Argument is used only for segmentation datasets.
361+
show_progress: If `True`, display a progress bar while saving images.
360362
"""
361363
if images_directory_path:
362364
save_dataset_images(
363365
dataset=self,
364366
images_directory_path=images_directory_path,
367+
show_progress=show_progress,
365368
)
366369
if annotations_directory_path:
367370
Path(annotations_directory_path).mkdir(parents=True, exist_ok=True)
@@ -390,6 +393,7 @@ def from_pascal_voc(
390393
images_directory_path: str,
391394
annotations_directory_path: str,
392395
force_masks: bool = False,
396+
show_progress: bool = False,
393397
) -> DetectionDataset:
394398
"""
395399
Creates a Dataset instance from PASCAL VOC formatted data.
@@ -400,6 +404,7 @@ def from_pascal_voc(
400404
containing the PASCAL VOC XML annotations.
401405
force_masks: If True, forces masks to
402406
be loaded for all annotations, regardless of whether they are present.
407+
show_progress: If `True`, display a progress bar while loading images.
403408
404409
Returns:
405410
A DetectionDataset instance containing
@@ -432,6 +437,7 @@ def from_pascal_voc(
432437
images_directory_path=images_directory_path,
433438
annotations_directory_path=annotations_directory_path,
434439
force_masks=force_masks,
440+
show_progress=show_progress,
435441
)
436442

437443
return DetectionDataset(
@@ -446,6 +452,7 @@ def from_yolo(
446452
data_yaml_path: str,
447453
force_masks: bool = False,
448454
is_obb: bool = False,
455+
show_progress: bool = False,
449456
) -> DetectionDataset:
450457
"""
451458
Creates a Dataset instance from YOLO formatted data.
@@ -463,6 +470,7 @@ def from_yolo(
463470
is_obb: If True, loads the annotations in OBB format.
464471
OBB annotations are defined as `[class_id, x, y, x, y, x, y, x, y]`,
465472
where pairs of [x, y] are box corners.
473+
show_progress: If `True`, display a progress bar while loading images.
466474
467475
Returns:
468476
A DetectionDataset instance
@@ -496,6 +504,7 @@ def from_yolo(
496504
data_yaml_path=data_yaml_path,
497505
force_masks=force_masks,
498506
is_obb=is_obb,
507+
show_progress=show_progress,
499508
)
500509
return DetectionDataset(
501510
classes=classes, images=image_paths, annotations=annotations
@@ -509,6 +518,7 @@ def as_yolo(
509518
min_image_area_percentage: float = 0.0,
510519
max_image_area_percentage: float = 1.0,
511520
approximation_percentage: float = 0.0,
521+
show_progress: bool = False,
512522
) -> None:
513523
"""
514524
Exports the dataset to YOLO format. This method saves the
@@ -537,10 +547,13 @@ def as_yolo(
537547
be removed from the input polygon, in the range [0, 1).
538548
This is useful for simplifying the annotations.
539549
Argument is used only for segmentation datasets.
550+
show_progress: If `True`, display a progress bar while saving images.
540551
"""
541552
if images_directory_path is not None:
542553
save_dataset_images(
543-
dataset=self, images_directory_path=images_directory_path
554+
dataset=self,
555+
images_directory_path=images_directory_path,
556+
show_progress=show_progress,
544557
)
545558
if annotations_directory_path is not None:
546559
save_yolo_annotations(
@@ -559,6 +572,7 @@ def from_coco(
559572
images_directory_path: str,
560573
annotations_path: str,
561574
force_masks: bool = False,
575+
show_progress: bool = False,
562576
) -> DetectionDataset:
563577
"""
564578
Creates a Dataset instance from COCO formatted data.
@@ -570,6 +584,7 @@ def from_coco(
570584
force_masks: If True,
571585
forces masks to be loaded for all annotations,
572586
regardless of whether they are present.
587+
show_progress: If `True`, display a progress bar while loading images.
573588
Returns:
574589
A DetectionDataset instance containing
575590
the loaded images and annotations.
@@ -599,6 +614,7 @@ def from_coco(
599614
images_directory_path=images_directory_path,
600615
annotations_path=annotations_path,
601616
force_masks=force_masks,
617+
show_progress=show_progress,
602618
)
603619
return DetectionDataset(classes=classes, images=images, annotations=annotations)
604620

@@ -609,6 +625,7 @@ def as_coco(
609625
min_image_area_percentage: float = 0.0,
610626
max_image_area_percentage: float = 1.0,
611627
approximation_percentage: float = 0.0,
628+
show_progress: bool = False,
612629
) -> None:
613630
"""
614631
Exports the dataset to COCO format. This method saves the
@@ -645,10 +662,13 @@ def as_coco(
645662
to be removed from the input polygon,
646663
in the range [0, 1). This is useful for simplifying the annotations.
647664
Argument is used only for segmentation datasets.
665+
show_progress: If `True`, display a progress bar while saving images.
648666
"""
649667
if images_directory_path is not None:
650668
save_dataset_images(
651-
dataset=self, images_directory_path=images_directory_path
669+
dataset=self,
670+
images_directory_path=images_directory_path,
671+
show_progress=show_progress,
652672
)
653673
if annotations_path is not None:
654674
save_coco_annotations(

src/supervision/dataset/formats/coco.py

Lines changed: 43 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import numpy as np
88
import numpy.typing as npt
9+
from tqdm.auto import tqdm
910

1011
from supervision.dataset.utils import (
1112
approximate_mask_with_polygons,
@@ -254,6 +255,7 @@ def load_coco_annotations(
254255
annotations_path: str,
255256
force_masks: bool = False,
256257
use_iscrowd: bool = True,
258+
show_progress: bool = False,
257259
) -> tuple[list[str], list[str], dict[str, Detections]]:
258260
"""
259261
Load COCO annotations and convert them to `Detections`.
@@ -267,9 +269,21 @@ def load_coco_annotations(
267269
annotations_path: Path to COCO JSON annotations.
268270
force_masks: If `True`, always attempt to load masks.
269271
use_iscrowd: If `True`, include `iscrowd` and `area` in detection data.
272+
show_progress: If `True`, display a progress bar while loading images.
270273
271274
Returns:
272275
A tuple of `(classes, image_paths, annotations)`.
276+
277+
Examples:
278+
```python
279+
import supervision as sv
280+
281+
ds = sv.DetectionDataset.from_coco(
282+
images_directory_path="images/train",
283+
annotations_path="images/train/_annotations.coco.json",
284+
show_progress=True,
285+
)
286+
```
273287
"""
274288
coco_data = read_json_file(file_path=annotations_path)
275289
classes = coco_categories_to_classes(coco_categories=coco_data["categories"])
@@ -286,32 +300,38 @@ def load_coco_annotations(
286300
images = []
287301
annotations = {}
288302

289-
for coco_image in coco_images:
290-
image_name, image_width, image_height = (
291-
coco_image["file_name"],
292-
coco_image["width"],
293-
coco_image["height"],
294-
)
295-
image_annotations = coco_annotations_groups.get(coco_image["id"], [])
296-
image_path = os.path.join(images_directory_path, image_name)
303+
with tqdm(
304+
total=len(coco_images),
305+
desc="Loading COCO annotations",
306+
disable=not show_progress,
307+
) as progress_bar:
308+
for coco_image in coco_images:
309+
image_name, image_width, image_height = (
310+
coco_image["file_name"],
311+
coco_image["width"],
312+
coco_image["height"],
313+
)
314+
image_annotations = coco_annotations_groups.get(coco_image["id"], [])
315+
image_path = os.path.join(images_directory_path, image_name)
297316

298-
with_masks = force_masks or any(
299-
_with_seg_mask(annotation) for annotation in image_annotations
300-
)
301-
annotation = coco_annotations_to_detections(
302-
image_annotations=image_annotations,
303-
resolution_wh=(image_width, image_height),
304-
with_masks=with_masks,
305-
use_iscrowd=use_iscrowd,
306-
)
317+
with_masks = force_masks or any(
318+
_with_seg_mask(annotation) for annotation in image_annotations
319+
)
320+
annotation = coco_annotations_to_detections(
321+
image_annotations=image_annotations,
322+
resolution_wh=(image_width, image_height),
323+
with_masks=with_masks,
324+
use_iscrowd=use_iscrowd,
325+
)
307326

308-
annotation = map_detections_class_id(
309-
source_to_target_mapping=class_index_mapping,
310-
detections=annotation,
311-
)
327+
annotation = map_detections_class_id(
328+
source_to_target_mapping=class_index_mapping,
329+
detections=annotation,
330+
)
312331

313-
images.append(image_path)
314-
annotations[image_path] = annotation
332+
images.append(image_path)
333+
annotations[image_path] = annotation
334+
progress_bar.update(1)
315335

316336
return classes, images, annotations
317337

src/supervision/dataset/formats/pascal_voc.py

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import numpy.typing as npt
1010
from defusedxml.ElementTree import parse, tostring
1111
from defusedxml.minidom import parseString
12+
from tqdm.auto import tqdm
1213

1314
from supervision.dataset.utils import approximate_mask_with_polygons
1415
from supervision.detection.core import Detections
@@ -149,6 +150,7 @@ def load_pascal_voc_annotations(
149150
images_directory_path: str,
150151
annotations_directory_path: str,
151152
force_masks: bool = False,
153+
show_progress: bool = False,
152154
) -> tuple[list[str], list[str], dict[str, Detections]]:
153155
"""
154156
Loads PASCAL VOC XML annotations and returns the image name,
@@ -160,11 +162,23 @@ def load_pascal_voc_annotations(
160162
PASCAL VOC annotation files.
161163
force_masks: If True, forces masks to be loaded for all
162164
annotations, regardless of whether they are present.
165+
show_progress: If `True`, display a progress bar while loading images.
163166
164167
Returns:
165168
A tuple with a list
166169
of class names, a list of paths to images, and a dictionary with image
167170
paths as keys and corresponding Detections instances as values.
171+
172+
Examples:
173+
```python
174+
import supervision as sv
175+
176+
ds = sv.DetectionDataset.from_pascal_voc(
177+
images_directory_path="images/train",
178+
annotations_directory_path="images/train/labels",
179+
show_progress=True,
180+
)
181+
```
168182
"""
169183

170184
image_paths = [
@@ -177,24 +191,33 @@ def load_pascal_voc_annotations(
177191
classes: list[str] = []
178192
annotations = {}
179193

180-
for image_path in image_paths:
181-
image_stem = Path(image_path).stem
182-
annotation_path = os.path.join(annotations_directory_path, f"{image_stem}.xml")
183-
if not os.path.exists(annotation_path):
184-
annotations[image_path] = Detections.empty()
185-
continue
186-
187-
tree = parse(annotation_path)
188-
root = tree.getroot()
189-
190-
image = cv2.imread(image_path)
191-
if image is None:
192-
raise ValueError(f"Could not read image from path: {image_path}")
193-
resolution_wh = (image.shape[1], image.shape[0])
194-
annotation, classes = detections_from_xml_obj(
195-
root, classes, resolution_wh, force_masks
196-
)
197-
annotations[image_path] = annotation
194+
with tqdm(
195+
total=len(image_paths),
196+
desc="Loading Pascal VOC annotations",
197+
disable=not show_progress,
198+
) as progress_bar:
199+
for image_path in image_paths:
200+
image_stem = Path(image_path).stem
201+
annotation_path = os.path.join(
202+
annotations_directory_path, f"{image_stem}.xml"
203+
)
204+
if not os.path.exists(annotation_path):
205+
annotations[image_path] = Detections.empty()
206+
progress_bar.update(1)
207+
continue
208+
209+
tree = parse(annotation_path)
210+
root = tree.getroot()
211+
212+
image = cv2.imread(image_path)
213+
if image is None:
214+
raise ValueError(f"Could not read image from path: {image_path}")
215+
resolution_wh = (image.shape[1], image.shape[0])
216+
annotation, classes = detections_from_xml_obj(
217+
root, classes, resolution_wh, force_masks
218+
)
219+
annotations[image_path] = annotation
220+
progress_bar.update(1)
198221

199222
return classes, image_paths, annotations
200223

0 commit comments

Comments
 (0)