Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dagshub/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.6.9"
__version__ = "0.6.10"
Comment thread
deanp70 marked this conversation as resolved.
Outdated
from .logger import DAGsHubLogger, dagshub_logger
from .common.init import init
from .upload.wrapper import upload_files
Expand Down
2 changes: 1 addition & 1 deletion dagshub/auth/token_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def auth_flow(self, request: Request) -> Generator[Request, Response, None]:

def can_renegotiate(self):
# Env var tokens cannot renegotiate, every other token type can
return not type(self._token) is EnvVarDagshubToken
return type(self._token) is not EnvVarDagshubToken

Comment thread
deanp70 marked this conversation as resolved.
def renegotiate_token(self):
if not self._token_storage.is_valid_token(self._token, self._host):
Expand Down
14 changes: 10 additions & 4 deletions dagshub/data_engine/annotation/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from tempfile import TemporaryDirectory
from typing import TYPE_CHECKING, Literal, Optional, Union, Sequence, Mapping, Callable, List

from dagshub_annotation_converter.converters.coco import load_coco_from_file
from dagshub_annotation_converter.converters.cvat import load_cvat_from_zip
from dagshub_annotation_converter.converters.yolo import load_yolo_from_fs
from dagshub_annotation_converter.formats.label_studio.task import LabelStudioTask
Expand All @@ -16,7 +17,7 @@
if TYPE_CHECKING:
from dagshub.data_engine.model.datasource import Datasource

AnnotationType = Literal["yolo", "cvat"]
AnnotationType = Literal["yolo", "cvat", "coco"]
AnnotationLocation = Literal["repo", "disk"]


Expand Down Expand Up @@ -85,14 +86,15 @@ def import_annotations(self) -> Mapping[str, Sequence[IRAnnotationBase]]:
)
elif self.annotations_type == "cvat":
annotation_dict = load_cvat_from_zip(annotations_file)
elif self.annotations_type == "coco":
annotation_dict, _ = load_coco_from_file(annotations_file)

return annotation_dict

def download_annotations(self, dest_dir: Path):
log_message("Downloading annotations from repository")
repoApi = self.ds.source.repoApi
if self.annotations_type == "cvat":
# Download just the annotation file
Comment thread
deanp70 marked this conversation as resolved.
repoApi.download(self.annotations_file.as_posix(), dest_dir, keep_source_prefix=True)
elif self.annotations_type == "yolo":
# Download the dataset .yaml file and the images + annotations
Expand All @@ -104,6 +106,8 @@ def download_annotations(self, dest_dir: Path):
# Download the annotation data
assert context.path is not None
repoApi.download(self.annotations_file.parent / context.path, dest_dir, keep_source_prefix=True)
elif self.annotations_type == "coco":
repoApi.download(self.annotations_file.as_posix(), dest_dir, keep_source_prefix=True)

@staticmethod
def determine_load_location(ds: "Datasource", annotations_path: Union[str, Path]) -> AnnotationLocation:
Expand Down Expand Up @@ -153,8 +157,10 @@ def remap_annotations(
)
continue
for ann in anns:
assert ann.filename is not None
ann.filename = remap_func(ann.filename)
if ann.filename is not None:
ann.filename = remap_func(ann.filename)
else:
ann.filename = new_filename
Comment thread
deanp70 marked this conversation as resolved.
Outdated
remapped[new_filename] = anns

return remapped
Expand Down
22 changes: 22 additions & 0 deletions dagshub/data_engine/annotation/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,28 @@ def add_image_pose(
self.annotations.append(ann)
self._update_datapoint()

def add_coco_annotation(
self,
coco_json: str,
):
"""
Comment thread
deanp70 marked this conversation as resolved.
Outdated
Add annotations from a COCO-format JSON string.

Args:
coco_json: A COCO-format JSON string with ``categories``, ``images``, and ``annotations`` keys.
"""
from dagshub_annotation_converter.converters.coco import load_coco_from_json_string
Comment thread
deanp70 marked this conversation as resolved.
Outdated

grouped, _ = load_coco_from_json_string(coco_json)
new_anns: list[IRAnnotationBase] = []
for anns in grouped.values():
for ann in anns:
ann.filename = self.datapoint.path
new_anns.append(ann)
Comment thread
deanp70 marked this conversation as resolved.
Outdated
self.annotations.extend(new_anns)
log_message(f"Added {len(new_anns)} COCO annotation(s) to datapoint {self.datapoint.path}")
self._update_datapoint()

def add_yolo_annotation(
self,
annotation_type: Literal["bbox", "segmentation", "pose"],
Expand Down
67 changes: 61 additions & 6 deletions dagshub/data_engine/model/query_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import dacite
import dagshub_annotation_converter.converters.yolo
import rich.progress
from dagshub_annotation_converter.converters.coco import export_to_coco_file
from dagshub_annotation_converter.formats.coco import CocoContext
from dagshub_annotation_converter.formats.yolo import YoloContext
from dagshub_annotation_converter.formats.yolo.categories import Categories
from dagshub_annotation_converter.formats.yolo.common import ir_mapping
Expand Down Expand Up @@ -778,6 +780,16 @@ def _get_all_annotations(self, annotation_field: str) -> List[IRImageAnnotationB
annotations.extend(dp.metadata[annotation_field].annotations)
return annotations

def _resolve_annotation_field(self, annotation_field: Optional[str]) -> str:
if annotation_field is not None:
return annotation_field
annotation_fields = sorted([f.name for f in self.fields if f.is_annotation()])
if len(annotation_fields) == 0:
raise ValueError("No annotation fields found in the datasource")
annotation_field = annotation_fields[0]
log_message(f"Using annotations from field {annotation_field}")
return annotation_field

def export_as_yolo(
self,
download_dir: Optional[Union[str, Path]] = None,
Expand All @@ -803,12 +815,7 @@ def export_as_yolo(
Returns:
The path to the YAML file with the metadata. Pass this path to ``YOLO.train()`` to train a model.
"""
if annotation_field is None:
annotation_fields = sorted([f.name for f in self.fields if f.is_annotation()])
if len(annotation_fields) == 0:
raise ValueError("No annotation fields found in the datasource")
annotation_field = annotation_fields[0]
log_message(f"Using annotations from field {annotation_field}")
annotation_field = self._resolve_annotation_field(annotation_field)

if download_dir is None:
download_dir = Path("dagshub_export")
Expand Down Expand Up @@ -861,6 +868,54 @@ def export_as_yolo(
log_message(f"Done! Saved YOLO Dataset, YAML file is at {yaml_path.absolute()}")
return yaml_path

def export_as_coco(
self,
download_dir: Optional[Union[str, Path]] = None,
annotation_field: Optional[str] = None,
output_filename: str = "annotations.json",
classes: Optional[Dict[int, str]] = None,
) -> Path:
"""
Downloads the files and exports annotations in COCO format.

Args:
download_dir: Where to download the files. Defaults to ``./dagshub_export``
annotation_field: Field with the annotations. If None, uses the first alphabetical annotation field.
output_filename: Name of the output COCO JSON file. Default is ``annotations.json``.
classes: Category mapping for the COCO dataset as ``{id: name}``.
If ``None``, categories will be inferred from the annotations.

Returns:
Path to the exported COCO JSON file.
"""
annotation_field = self._resolve_annotation_field(annotation_field)

if download_dir is None:
download_dir = Path("dagshub_export")
download_dir = Path(download_dir)

annotations = self._get_all_annotations(annotation_field)
if not annotations:
raise RuntimeError("No annotations found to export")

context = CocoContext()
if classes is not None:
context.categories = dict(classes)

Comment thread
deanp70 marked this conversation as resolved.
# Add the source prefix to all annotations
for ann in annotations:
ann.filename = os.path.join(self.datasource.source.source_prefix, ann.filename)

image_download_path = download_dir / "data"
log_message("Downloading image files...")
self.download_files(image_download_path)

output_path = download_dir / output_filename
log_message("Exporting COCO annotations...")
result_path = export_to_coco_file(annotations, output_path, context=context)
log_message(f"Done! Saved COCO annotations to {result_path.absolute()}")
return result_path

def to_voxel51_dataset(self, **kwargs) -> "fo.Dataset":
"""
Creates a voxel51 dataset that can be used with\
Expand Down
9 changes: 7 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import setuptools
import os.path

import setuptools


# Thank you pip contributors
def read(rel_path: str) -> str:
Expand Down Expand Up @@ -41,7 +42,11 @@ def get_version(rel_path: str) -> str:
"python-dateutil",
"boto3",
"semver",
"dagshub-annotation-converter>=0.1.12",
# FIXME: roll back to main after merging
# "dagshub-annotation-converter>=0.1.12",
"dagshub-annotation-converter @ "
+ "git+https://github.com/DagsHub/"
+ "dagshub-annotation-converter@coco_converter#egg=dagshub-annotation-converter",
Comment thread
deanp70 marked this conversation as resolved.
Outdated
]

extras_require = {
Expand Down
Loading
Loading