diff --git a/.dockerignore b/.dockerignore index bdf319e7..8b767f67 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1 +1,3 @@ **/build/ +real* +test* diff --git a/docker/Dockerfile b/docker/Dockerfile index 2b03ad71..9c88e253 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -53,7 +53,10 @@ RUN chmod +x /usr/local/bin/link-editable-source \ && uv pip install --no-build-isolation /opt/rcs-src/extensions/rcs_fr3 \ && uv pip install /opt/rcs-src/extensions/rcs_realsense \ && uv pip install /opt/rcs-src/extensions/rcs_robotiq2f85 \ - && uv pip install /opt/rcs-src/extensions/rcs_zed + && uv pip install /opt/rcs-src/extensions/rcs_zed \ + && uv pip install /opt/rcs-src/examples/teleop/SimPublisher \ + && uv pip install /opt/rcs-src/2f85-python-driver \ + && uv pip install /opt/rcs-src/vlagents WORKDIR /workspace/robot-control-stack diff --git a/docker/README.md b/docker/README.md index 78bd0da4..35b4bd13 100644 --- a/docker/README.md +++ b/docker/README.md @@ -23,6 +23,7 @@ Notes: - `~/zed_models` is mounted into `/usr/local/zed/resources` to match the direct `docker run` setup. - `/dev/dri` is masked inside the container so host Mesa/AMD render nodes do not override the NVIDIA runtime devices. - NVIDIA PRIME/GLX environment variables are exported to bias OpenGL/EGL selection toward the NVIDIA stack when using X11 forwarding. -- Python source changes are picked up from the mounted repo, including `extensions/rcs_zed`. +- The simulator still bootstraps EGL for offscreen MuJoCo camera rendering; set `RCS_MUJOCO_DISABLE_EGL=1` only if you intentionally want to disable that path. +- Python source changes are picked up from the mounted repo. - If you change C++ code in `rcs` or `rcs_fr3`, rebuild the image. - For non-GPU hosts, comment out the GPU-related lines in `docker/compose/dev.yml`. diff --git a/docker/link-editable-source.sh b/docker/link-editable-source.sh index 0b1b967a..02bf007e 100644 --- a/docker/link-editable-source.sh +++ b/docker/link-editable-source.sh @@ -10,26 +10,21 @@ fi SITE_PACKAGES="$(python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')" -link_mixed_package() { +link_compiled_package() { src_dir="$1" dst_dir="$2" - keep_dir_name="${3:-}" if [ ! -d "$src_dir" ] || [ ! -d "$dst_dir" ]; then return fi - # Replace only the Python sources from the mounted repo and keep compiled - # artifacts that were installed into site-packages during image build. - for path in "$src_dir"/* "$src_dir"/.[!.]* "$src_dir"/..?*; do - [ -e "$path" ] || continue - name="$(basename "$path")" - if [ -n "$keep_dir_name" ] && [ "$name" = "$keep_dir_name" ]; then - continue - fi - rm -rf "$dst_dir/$name" - cp -as "$path" "$dst_dir/$name" - done + tmp_keep="$(mktemp -d)" + find "$dst_dir" -maxdepth 1 \( -name '_core*.so' -o -name 'lib*.so*' \) -exec mv {} "$tmp_keep/" \; + rm -rf "$dst_dir" + mkdir -p "$dst_dir" + cp -as "$src_dir/." "$dst_dir/" + find "$tmp_keep" -maxdepth 1 -type f -exec mv {} "$dst_dir/" \; + rmdir "$tmp_keep" } link_pure_python_package() { @@ -44,8 +39,9 @@ link_pure_python_package() { ln -s "$src_dir" "$dst_dir" } -link_mixed_package "$REPO_ROOT/python/rcs" "$SITE_PACKAGES/rcs" "_core" -link_mixed_package "$REPO_ROOT/extensions/rcs_fr3/src/rcs_fr3" "$SITE_PACKAGES/rcs_fr3" "_core" +link_compiled_package "$REPO_ROOT/python/rcs" "$SITE_PACKAGES/rcs" +link_compiled_package "$REPO_ROOT/extensions/rcs_fr3/src/rcs_fr3" "$SITE_PACKAGES/rcs_fr3" link_pure_python_package "$REPO_ROOT/extensions/rcs_realsense/src/rcs_realsense" "$SITE_PACKAGES/rcs_realsense" link_pure_python_package "$REPO_ROOT/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85" "$SITE_PACKAGES/rcs_robotiq2f85" link_pure_python_package "$REPO_ROOT/extensions/rcs_zed/src/rcs_zed" "$SITE_PACKAGES/rcs_zed" +link_pure_python_package "$REPO_ROOT/vlagents" "$SITE_PACKAGES/vlagents" diff --git a/examples/inference/README.md b/examples/inference/README.md new file mode 100644 index 00000000..e69de29b diff --git a/examples/inference/franka.py b/examples/inference/franka.py new file mode 100644 index 00000000..2c2ed6ad --- /dev/null +++ b/examples/inference/franka.py @@ -0,0 +1,441 @@ +import copy +import json +import logging +import threading +from dataclasses import asdict, dataclass, field +from pathlib import Path +from queue import Empty, Queue +from time import sleep +from typing import Any + +from PIL import Image + +from rcs.utils import SimpleFrameRate + +import gymnasium as gym +import numpy as np +from rcs._core.common import BaseCameraConfig, RobotPlatform +from rcs._core.sim import SimConfig +from rcs.envs.base import ControlMode, RelativeTo +from rcs.envs.configs import EmptyWorldFR3Duo +from rcs.envs.storage_wrapper import StorageWrapper +from rcs.envs.tasks import PickTaskConfig + +import rcs +from rcs_duobench.tasks.bin_sort import BinSortEnvConfig +from vlagents.client import RemoteAgent +from vlagents.policies import Act, Obs + +logger = logging.getLogger(__name__) + + +ROBOT2IP = { + "right": "192.168.102.1", + "left": "192.168.101.1", +} +ROBOT2ID = { + "left": "1", + "right": "0", +} + + +ROBOT_INSTANCE = RobotPlatform.SIMULATION +# ROBOT_INSTANCE = RobotPlatform.HARDWARE + +# set camera dict to none disable cameras +CAMERA_DICT = { + "left_wrist": "230422272017", + "right_wrist": "230422271040", + # "side": "243522070385", + # "bird_eye": "243522070364", +} +# CAMERA_DICT = None +ZED_CAMERA_DICT = { + "head": "19928076", +} +INCLUDE_DEPTH = False + +ROBOTIQ_SERIAL = { + "left": "DAAQMPDC", + "right": "DAAQMJHX", +} + +# DIGIT_DICT = { +# "digit_right_left": "D21182", +# "digit_right_right": "D21193" +# } +DIGIT_DICT = None + + +INSTRUCTION = "pick up the black cube with the right arm and place it into the black bowl; pick up the white cube with the left arm and place it into the white bowl" +FPS = 30 +CONTROL_MODE = ControlMode.JOINTS +RELATIVETO = RelativeTo.NONE +RECORD_PATH = "inference_recordings" +MODEL = "lerobot" +IP = "localhost" +PORT = 20000 +CONFIG_PATH = Path(__file__).with_suffix(".json") +MAX_REL_MOV_JOINTS = np.deg2rad(0.5) +MAX_REL_MOV_CART = (0.5, np.deg2rad(90)) + +logging.basicConfig( + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + level=logging.INFO, +) + + +robot2world = { + "right": rcs.common.Pose( + translation=np.array([0, 0, 0]), rpy_vector=np.array([0.89360858, -0.17453293, 0.46425758]) + ), + "left": rcs.common.Pose( + translation=np.array([0, 0, 0]), rpy_vector=np.array([-0.89360858, -0.17453293, -0.46425758]) + ), +} + + +@dataclass +class InferenceConfig: + vlagents_host: str = IP + vlagents_port: int = PORT + vlagents_model: str = MODEL + instruction: str = INSTRUCTION + robot_keys: list[str] = field(default_factory=lambda: ["left", "right"]) + jpeg_encoding: bool = True + on_same_machine: bool = False + fps: int = FPS + record_path: str = RECORD_PATH + n_action_steps: int | None = None + max_rel_mov_joints: float = MAX_REL_MOV_JOINTS + max_rel_mov_cart: tuple[float, float] = MAX_REL_MOV_CART + + +def load_inference_config() -> InferenceConfig: + if not CONFIG_PATH.exists(): + CONFIG_PATH.write_text(json.dumps(asdict(InferenceConfig()), indent=2) + "\n") + return InferenceConfig() + return InferenceConfig(**json.loads(CONFIG_PATH.read_text())) + + + +class ModelInference: + def __init__(self, env: gym.Env, cfg: InferenceConfig): + self.env = env + self.gripper_state = 1 + self._cfg = cfg + self._episode_running = False + self._command_queue: Queue[str] = Queue() + self._shutdown_requested = threading.Event() + self.remote_agent = RemoteAgent( + cfg.vlagents_host, cfg.vlagents_port, cfg.vlagents_model, cfg.on_same_machine, cfg.jpeg_encoding + ) + self.frame_rate = SimpleFrameRate(self._cfg.fps) + self._action_buffer = [] + + def submit_command(self, command: str) -> None: + self._command_queue.put(command) + + def request_shutdown(self) -> None: + self._shutdown_requested.set() + + def _drain_commands(self) -> tuple[bool, bool, bool, bool, bool]: + start_requested = False + record_requested = False + success_requested = False + stop_requested = False + reload_requested = False + + while True: + try: + command = self._command_queue.get_nowait() + except Empty: + break + + if command == "e": + start_requested = True + elif command == "r": + record_requested = True + elif command == "s": + success_requested = True + elif command == "q": + stop_requested = True + elif command == "o": + reload_requested = True + + return start_requested, record_requested, success_requested, stop_requested, reload_requested + + def obs_rcs2agents(self, obs: dict, info: dict | None = None) -> Obs: + cameras = {} + for frame in obs["frames"]: + cameras[frame] = obs["frames"][frame]["rgb"]["data"] + cameras[frame] = np.array(Image.fromarray(cameras[frame]).resize((224, 224), Image.Resampling.LANCZOS)) + + state = [] + for robot in self._cfg.robot_keys: + # TODO: currently hardcoded for joints + state.append(obs[robot]["joints"]) + state.append(obs[robot]["gripper"]) + + return Obs(cameras=cameras, gripper=None, info=info, state=np.concatenate(state)) + + def act(self, obs_dict) -> None: + done = False + if self._cfg.n_action_steps is None: + return self.remote_agent.act(obs_dict) + if len(self._action_buffer) == 0: + action = self.remote_agent.act(obs_dict) + selected_action = action.action[:self._cfg.n_action_steps] + self._action_buffer = selected_action.tolist() + done = action.done + if RELATIVETO == RelativeTo.CONFIGURED_ORIGIN: + for robot in self.env.get_wrapper_attr("envs"): + self.env.get_wrapper_attr("envs")[robot].get_wrapper_attr("set_origin_to_current")() + act = self._action_buffer.pop(0) + return Act(action=act, done=done) + + def action_agents2rcs(self, action: Act) -> dict[str, Any]: + act = {} + for idx, robot in enumerate(self._cfg.robot_keys): + # TODO: this is currently hard coded for franka joints + act[robot] = {} + act[robot]["joints"] = action.action[idx * 8 : idx * 8 + 7] + act[robot]["gripper"] = action.action[idx * 8 + 7 : idx * 8 + 8] + return act + + def loop(self): + obs, _ = self.env.reset() + obs_dict = self.obs_rcs2agents(obs) + logger.info( + "waiting for input: 'e' to start, 'r' to start and record, 's' for success and reset, 'q' to stop and reset, and 'o' to reload config" + ) + + while not self._shutdown_requested.is_set(): + start_requested, record_requested, success_requested, stop_requested, reload_requested = ( + self._drain_commands() + ) + + if reload_requested: + self._cfg = load_inference_config() + try: + self.remote_agent.reconnect( + host=self._cfg.vlagents_host, + port=self._cfg.vlagents_port, + model=self._cfg.vlagents_model, + on_same_machine=self._cfg.on_same_machine, + jpeg_encoding=self._cfg.jpeg_encoding, + ) + logger.info( + "reloaded config from %s with host=%s port=%s model=%s", + CONFIG_PATH, + self._cfg.vlagents_host, + self._cfg.vlagents_port, + self._cfg.vlagents_model, + ) + except Exception: + logger.exception("failed to reconnect after reloading %s", CONFIG_PATH) + if isinstance(self.env, StorageWrapper): + self.env.base_dir = self._cfg.record_path + self.env.set_instruction(self._cfg.instruction) + obs, _ = self.env.reset() + obs_dict = self.obs_rcs2agents(obs) + self._action_buffer = [] + self._episode_running = False + + if success_requested: + if self._episode_running: + logger.info("marking episode successful and resetting environment") + self.env.get_wrapper_attr("success")() + obs, _ = self.env.reset() + obs_dict = self.obs_rcs2agents(obs) + self._action_buffer = [] + self._episode_running = False + + if stop_requested: + if self._episode_running: + logger.info("stopping episode and resetting environment") + obs, _ = self.env.reset() + obs_dict = self.obs_rcs2agents(obs) + self._action_buffer = [] + self._episode_running = False + + if not self._episode_running: + try: + self.remote_agent.ensure_connected() + except Exception: + sleep(0.5) + continue + if start_requested or record_requested: + if isinstance(self.env, StorageWrapper): + self.env.set_instruction(self._cfg.instruction) + if record_requested: + self.env.start_record() + logger.info("starting episode%s", " with recording" if record_requested else "") + self.remote_agent.reset(copy.deepcopy(obs_dict), instruction=self._cfg.instruction) + self._episode_running = True + else: + sleep(0.05) + continue + + action = self.act(copy.deepcopy(obs_dict)) + if action.done: + logger.info("done issued by agent, resetting environment") + obs, _ = self.env.reset() + obs_dict = self.obs_rcs2agents(obs) + self._action_buffer = [] + self._episode_running = False + continue + a = self.action_agents2rcs(action) + obs, _, _, _, info = self.env.step(a) + # print(obs["left"]["joints"], obs["left"]["gripper"], obs["right"]["joints"], obs["right"]["gripper"]) + + obs_dict = self.obs_rcs2agents(obs) + + if ROBOT_INSTANCE == RobotPlatform.HARDWARE: + self.frame_rate() + + +def command_loop(controller: ModelInference) -> None: + prompt = "Command [e=start, r=record, s=success/reset, q=stop/reset, o=reload, x=exit]: " + while True: + try: + command = input(prompt).strip().lower() + except EOFError: + command = "x" + except KeyboardInterrupt: + print() + command = "x" + + if not command: + continue + if command == "x": + controller.request_shutdown() + return + if command in {"e", "r", "s", "q", "o"}: + controller.submit_command(command) + continue + logger.info("unknown command %r", command) + + +def get_env(cfg: InferenceConfig) -> gym.Env: + if ROBOT_INSTANCE == RobotPlatform.HARDWARE: + from rcs_fr3.configs import FrankaDuoEnv + from rcs_fr3.creators import HardwareCameraCreatorConfig + + env_creator = FrankaDuoEnv() + env_creator.left_ip = ROBOT2IP["left"] + env_creator.right_ip = ROBOT2IP["right"] + hw_cfg = env_creator.config() + camera_cfgs: dict[str, HardwareCameraCreatorConfig] = {} + if CAMERA_DICT is not None: + camera_cfgs["realsense"] = HardwareCameraCreatorConfig( + camera_type_id="realsense", + camera_cfgs={ + name: BaseCameraConfig( + identifier=identifier, + resolution_width=1280, + resolution_height=720, + frame_rate=30, + ) + for name, identifier in CAMERA_DICT.items() + }, + ) + if ZED_CAMERA_DICT is not None: + camera_cfgs["zed"] = HardwareCameraCreatorConfig( + camera_type_id="zed", + camera_cfgs={ + name: BaseCameraConfig( + identifier=identifier, + resolution_width=1280, + resolution_height=720, + frame_rate=30, + ) + for name, identifier in ZED_CAMERA_DICT.items() + }, + kwargs={ + "enable_depth": False, + "enable_imu": False, + }, + ) + if DIGIT_DICT is not None: + camera_cfgs["digit"] = HardwareCameraCreatorConfig( + camera_type_id="digit", + camera_cfgs={ + name: BaseCameraConfig( + identifier=identifier, + resolution_width=320, + resolution_height=240, + frame_rate=30, + ) + for name, identifier in DIGIT_DICT.items() + }, + ) + hw_cfg.camera_cfgs = camera_cfgs or None + hw_cfg.control_mode = CONTROL_MODE + hw_cfg.wrapper_cfg.include_depth = INCLUDE_DEPTH + hw_cfg.max_relative_movement = cfg.max_rel_mov_joints if CONTROL_MODE == ControlMode.JOINTS else cfg.max_rel_mov_cart + hw_cfg.relative_to = RELATIVETO + hw_cfg.robot_to_shared_base_frame = robot2world + hw_cfg.robot_cfgs["left"].ignore_realtime = True + hw_cfg.robot_cfgs["right"].ignore_realtime = True + hw_cfg.robot_cfgs["left"].speed_factor = 0.1 + hw_cfg.robot_cfgs["right"].speed_factor = 0.1 + hw_cfg.gripper_cfgs["left"].serial_number = ROBOTIQ_SERIAL["left"] + hw_cfg.gripper_cfgs["right"].serial_number = ROBOTIQ_SERIAL["right"] + env_rel = env_creator.create_env(hw_cfg) + else: + # FR3 + + scene = BinSortEnvConfig() + sim_cfg_data = scene.config() + sim_cfg_data.sim_cfg = SimConfig( + async_control=True, realtime=False, frequency=cfg.fps, max_convergence_steps=500 + ) + sim_cfg_data.relative_to = RelativeTo.CONFIGURED_ORIGIN + sim_cfg_data.wrapper_cfg.include_depth = INCLUDE_DEPTH + sim_cfg_data.control_mode = ControlMode.JOINTS + sim_cfg_data.relative_to = RELATIVETO + sim_cfg_data.wrapper_cfg.binary_gripper = True + sim_cfg_data.max_relative_movement = cfg.max_rel_mov_joints if CONTROL_MODE == ControlMode.JOINTS else cfg.max_rel_mov_cart + + + # if sim_cfg_data.root_frame_objects is None: + # sim_cfg_data.root_frame_objects = {} + # sim_cfg_data.task_cfg = PickTaskConfig(robot_name="right") + + env_rel = scene.create_env(sim_cfg_data) + + return StorageWrapper( + env_rel, + cfg.record_path, + cfg.instruction, + batch_size=32, + max_rows_per_group=2, + max_rows_per_file=10, + allow_wrapper_instruction=False + ) + + +def main(): + cfg = load_inference_config() + env_rel = get_env(cfg) + env_rel.reset() + + # Path(VIDEO_PATH).mkdir(parents=True, exist_ok=True) + # timestamp = str(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) + + # camera_set = env_rel.get_wrapper_attr("camera_set") + # camera_set.record_video(Path(VIDEO_PATH), timestamp) + + # env = RHCWrapper(env, exec_horizon=1) + + controller = ModelInference(env_rel, cfg) + with env_rel: + worker = threading.Thread(target=controller.loop, name="model-inference", daemon=True) + worker.start() + command_loop(controller) + worker.join() + + +if __name__ == "__main__": + main() diff --git a/examples/inference/requirements.txt b/examples/inference/requirements.txt new file mode 100644 index 00000000..cb843744 --- /dev/null +++ b/examples/inference/requirements.txt @@ -0,0 +1 @@ +vlagents @ git+https://github.com/RobotControlStack/vlagents.git@lerobot \ No newline at end of file diff --git a/examples/teleop/franka.py b/examples/teleop/franka.py index 2f0d6ede..f3951c98 100644 --- a/examples/teleop/franka.py +++ b/examples/teleop/franka.py @@ -26,22 +26,26 @@ "right": "0", } - -ROBOT_INSTANCE = RobotPlatform.SIMULATION -# ROBOT_INSTANCE = RobotPlatform.HARDWARE +# use `udevadm info -a -n /dev/ttyUSB0 | grep serial` +# to find out the serial numbers +ROBOTIQ_SERIAL = { + "left": "DAAQMPDC", + "right": "DAAQMJHX", +} RECORD_FPS = 30 # set camera dict to none disable cameras -# CAMERA_DICT = { -# "left_wrist": "230422272017", -# "right_wrist": "230422271040", -# "side": "243522070385", -# "bird_eye": "243522070364", -# } -CAMERA_DICT = None +CAMERA_DICT = { + "right_wrist": "230422272017", + "left_wrist": "230422271040", + # "side": "243522070385", + # "bird_eye": "243522070364", +} +# CAMERA_DICT = None ZED_CAMERA_DICT = { - "zed": "19928076", + "head": "19928076", } +# ZED_CAMERA_DICT = None MQ3_ADDR = "10.42.0.1" INCLUDE_DEPTH = False @@ -117,8 +121,9 @@ def get_env(): for name, identifier in ZED_CAMERA_DICT.items() }, kwargs={ - "enable_depth": False, + "enable_depth": INCLUDE_DEPTH, "enable_imu": False, + "include_right": True, }, ) if DIGIT_DICT is not None: @@ -137,11 +142,19 @@ def get_env(): hw_cfg.camera_cfgs = camera_cfgs or None hw_cfg.control_mode = config.operator_class.control_mode[0] hw_cfg.wrapper_cfg.include_depth = INCLUDE_DEPTH + hw_cfg.wrapper_cfg.binary_gripper = False + # hw_cfg.wrapper_cfg.binary_gripper = True hw_cfg.max_relative_movement = ( 0.5 if config.operator_class.control_mode[0] == ControlMode.JOINTS else (0.5, np.deg2rad(90)) ) hw_cfg.relative_to = config.operator_class.control_mode[1] hw_cfg.robot_to_shared_base_frame = robot2world + hw_cfg.robot_cfgs["left"].ignore_realtime = True + hw_cfg.robot_cfgs["right"].ignore_realtime = True + hw_cfg.robot_cfgs["left"].speed_factor = 0.3 + hw_cfg.robot_cfgs["right"].speed_factor = 0.3 + hw_cfg.gripper_cfgs["left"].serial_number = ROBOTIQ_SERIAL["left"] + hw_cfg.gripper_cfgs["right"].serial_number = ROBOTIQ_SERIAL["right"] env_rel = env_creator.create_env(hw_cfg) operator = GelloOperator(config) if isinstance(config, GelloConfig) else QuestOperator(config) else: diff --git a/extensions/rcs_fr3/src/hw/Franka.cpp b/extensions/rcs_fr3/src/hw/Franka.cpp index 1fbb140d..4d0eb285 100644 --- a/extensions/rcs_fr3/src/hw/Franka.cpp +++ b/extensions/rcs_fr3/src/hw/Franka.cpp @@ -220,6 +220,11 @@ void Franka::check_for_background_errors() { } } +void Franka::clear_background_error() { + std::lock_guard lock(this->exception_mutex); + this->background_exception = nullptr; +} + void Franka::osc_set_cartesian_position( const common::Pose& desired_pose_EE_in_base_frame) { this->check_for_background_errors(); @@ -335,8 +340,7 @@ void Franka::osc() { // torques handler if (this->running_controller.load() == Controller::none) { - franka::Torques zero_torques{{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}}; - return franka::MotionFinished(zero_torques); + return franka::MotionFinished(franka::Torques(robot_state.tau_J_d)); } // TO BE replaced // if (!this->control_thread_running && dq.maxCoeff() < 0.0001) { @@ -541,9 +545,7 @@ void Franka::joint_controller() { // torques handler if (this->running_controller.load() == Controller::none) { - // TODO: test if this also works when the robot is moving - franka::Torques zero_torques{{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}}; - return franka::MotionFinished(zero_torques); + return franka::MotionFinished(franka::Torques(robot_state.tau_J_d)); } common::Vector7d desired_q; @@ -667,6 +669,7 @@ void Franka::automatic_error_recovery() { void Franka::reset() { this->stop_control_thread(); this->automatic_error_recovery(); + this->clear_background_error(); } void Franka::wait_milliseconds(int milliseconds) { diff --git a/extensions/rcs_fr3/src/hw/Franka.h b/extensions/rcs_fr3/src/hw/Franka.h index 6054b924..9601d343 100644 --- a/extensions/rcs_fr3/src/hw/Franka.h +++ b/extensions/rcs_fr3/src/hw/Franka.h @@ -93,6 +93,7 @@ class Franka : public common::Robot { void joint_controller(); void zero_torque_controller(); void check_for_background_errors(); + void clear_background_error(); public: Franka(const FrankaConfig& cfg, diff --git a/extensions/rcs_fr3/src/rcs_fr3/configs.py b/extensions/rcs_fr3/src/rcs_fr3/configs.py index 3eaa8b0a..baf47eaf 100644 --- a/extensions/rcs_fr3/src/rcs_fr3/configs.py +++ b/extensions/rcs_fr3/src/rcs_fr3/configs.py @@ -152,6 +152,7 @@ def config(self) -> FR3MultiHardwareEnvCreatorConfig: class FrankaDuoEnv(DefaultFR3MultiHardwareEnv): + # use `udevadm info -a -n /dev/ttyUSB0 | grep serial` to find out the serial numbers left_gripper_serial_number = "DAAQMJHX" right_gripper_serial_number = "DAAQMPDC" diff --git a/extensions/rcs_fr3/src/rcs_fr3/creators.py b/extensions/rcs_fr3/src/rcs_fr3/creators.py index 2225a577..01386503 100644 --- a/extensions/rcs_fr3/src/rcs_fr3/creators.py +++ b/extensions/rcs_fr3/src/rcs_fr3/creators.py @@ -15,6 +15,7 @@ GripperWrapper, HandWrapper, HardwareEnv, + LimitedAbsoluteAction, MultiRobotWrapper, RelativeActionSpace, RelativeTo, @@ -203,10 +204,12 @@ def create_env(self, cfg: FR3HardwareEnvCreatorConfig) -> gym.Env: camera_set.start() camera_set.wait_for_frames() logger.info("CameraSet started") - env = CameraSetWrapper(env, camera_set) + env = CameraSetWrapper(env, camera_set, cfg.wrapper_cfg.include_depth) if cfg.relative_to != RelativeTo.NONE: env = RelativeActionSpace(env, max_mov=cfg.max_relative_movement, relative_to=cfg.relative_to) + else: + env = LimitedAbsoluteAction(env, max_mov=cfg.max_relative_movement) return CoverWrapper(env) def config(self) -> FR3HardwareEnvCreatorConfig: @@ -236,7 +239,7 @@ def create_env(self, cfg: FR3MultiHardwareEnvCreatorConfig) -> gym.Env: camera_set.start() camera_set.wait_for_frames() logger.info("CameraSet started") - env = CameraSetWrapper(env, camera_set) + env = CameraSetWrapper(env, camera_set, cfg.wrapper_cfg.include_depth) return CoverWrapper(env) def config(self) -> FR3MultiHardwareEnvCreatorConfig: diff --git a/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py b/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py index a5ed5736..db4be1fa 100644 --- a/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py +++ b/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py @@ -43,10 +43,12 @@ def __init__(self, cfg: RobotiQ2F85GripperConfig): super().__init__() self._cfg: RobotiQ2F85GripperConfig = cfg self.gripper = Robotiq2F85Driver(serial_number=cfg.serial_number) + self._last_normalized_width = 1.0 + self.gripper.reset() def get_normalized_width(self) -> float: - # value between 0 and 1 (0 is closed) - return self.gripper.opening / 85 + # Return the last commanded width to avoid a synchronous Modbus read on every env step. + return self._last_normalized_width def grasp(self) -> None: """ @@ -61,7 +63,7 @@ def open(self) -> None: self.set_normalized_width(1.0) def reset(self) -> None: - self.gripper.reset() + self.open() def set_normalized_width(self, width: float, force: float = 0) -> None: """ @@ -70,6 +72,7 @@ def set_normalized_width(self, width: float, force: float = 0) -> None: if not (0 <= width <= 1): msg = f"Width must be between 0 and 1, got {width}." raise ValueError(msg) + self._last_normalized_width = width abs_width = width * 85 self.gripper.go_to( opening=float(abs_width), diff --git a/extensions/rcs_zed/src/rcs_zed/__main__.py b/extensions/rcs_zed/src/rcs_zed/__main__.py index cfe51fa5..8e2558f4 100644 --- a/extensions/rcs_zed/src/rcs_zed/__main__.py +++ b/extensions/rcs_zed/src/rcs_zed/__main__.py @@ -1,4 +1,6 @@ import logging +from pathlib import Path +from time import monotonic import cv2 import typer @@ -14,6 +16,36 @@ def _display_frame(window_name: str, frame): cv2.imshow(window_name, frame.camera.color.data[:, :, ::-1]) +def _resolve_serial(serial: str | None) -> str: + if serial is not None: + return serial + + try: + devices = ZEDCameraSet.enumerate_connected_devices() + except RuntimeError as exc: + msg = str(exc) + raise typer.BadParameter(msg) from exc + if len(devices) == 0: + msg = "No ZED devices connected." + raise typer.BadParameter(msg) + return next(iter(devices)) + + +def _create_rgb_camera(serial: str, width: int, height: int, fps: int) -> ZEDCameraSet: + return ZEDCameraSet( + cameras={ + "viewer": common.BaseCameraConfig( + identifier=serial, + resolution_width=width, + resolution_height=height, + frame_rate=fps, + ) + }, + enable_depth=False, + enable_imu=False, + ) + + @zed_app.command() def serials(): """Reads out the serial numbers and models of connected ZED devices via the SDK.""" @@ -39,29 +71,8 @@ def rgb_view( window_name: str = typer.Option("ZED RGB", help="OpenCV window title."), ): """Open a live RGB window using the RCS ZED camera interface.""" - if serial is None: - try: - devices = ZEDCameraSet.enumerate_connected_devices() - except RuntimeError as exc: - msg = str(exc) - raise typer.BadParameter(msg) from exc - if len(devices) == 0: - msg = "No ZED devices connected." - raise typer.BadParameter(msg) - serial = next(iter(devices)) - - camera = ZEDCameraSet( - cameras={ - "viewer": common.BaseCameraConfig( - identifier=serial, - resolution_width=width, - resolution_height=height, - frame_rate=fps, - ) - }, - enable_depth=False, - enable_imu=False, - ) + serial = _resolve_serial(serial) + camera = _create_rgb_camera(serial, width, height, fps) try: camera.open() @@ -81,6 +92,53 @@ def rgb_view( cv2.destroyAllWindows() +@zed_app.command("rgb-snapshot") +def rgb_snapshot( + serial: str | None = typer.Argument(None, help="Optional ZED serial number. Uses the first device if omitted."), + output: Path = typer.Option(Path("zed_latest.png"), "--output", "-o", help="PNG file to write."), + width: int = typer.Option(1280, help="Requested capture width."), + height: int = typer.Option(720, help="Requested capture height."), + fps: int = typer.Option(30, help="Requested capture frame rate."), + duration: float = typer.Option(1.0, "--duration", "-d", help="Seconds to read frames before saving."), +): + """Open a ZED camera briefly and save the latest RGB frame as a PNG.""" + if duration <= 0: + msg = "Duration must be greater than zero." + raise typer.BadParameter(msg) + if output.suffix.lower() != ".png": + msg = "Output path must end in .png." + raise typer.BadParameter(msg) + + serial = _resolve_serial(serial) + camera = _create_rgb_camera(serial, width, height, fps) + + try: + camera.open() + except Exception as exc: + msg = f"Could not start ZED camera {serial}: {exc}" + raise typer.BadParameter(msg) from exc + + latest_frame = None + frames_read = 0 + deadline = monotonic() + duration + try: + while latest_frame is None or monotonic() < deadline: + latest_frame = camera.poll_frame("viewer") + frames_read += 1 + finally: + camera.close() + + assert latest_frame is not None + output.parent.mkdir(parents=True, exist_ok=True) + image_bgr = latest_frame.camera.color.data[:, :, ::-1] + print(latest_frame.camera.color.intrinsics) + if not cv2.imwrite(str(output), image_bgr): + msg = f"Could not write PNG to {output}." + raise typer.BadParameter(msg) + + typer.echo(f"Saved {output} from ZED {serial} after reading {frames_read} frame(s).") + + def main(): zed_app() diff --git a/extensions/rcs_zed/src/rcs_zed/camera.py b/extensions/rcs_zed/src/rcs_zed/camera.py index e9fd61dd..881a1368 100644 --- a/extensions/rcs_zed/src/rcs_zed/camera.py +++ b/extensions/rcs_zed/src/rcs_zed/camera.py @@ -30,6 +30,8 @@ class ZEDFrameBundle: color: np.ndarray timestamp: float color_intrinsics: np.ndarray[tuple[typing.Literal[3], typing.Literal[4]], np.dtype[np.float64]] + right_color: np.ndarray | None = None + right_color_intrinsics: np.ndarray[tuple[typing.Literal[3], typing.Literal[4]], np.dtype[np.float64]] | None = None depth: np.ndarray | None = None accel: np.ndarray | None = None gyro: np.ndarray | None = None @@ -47,12 +49,20 @@ def _intrinsics_matrix(fx: float, fy: float, cx: float, cy: float) -> np.ndarray class PyZEDCameraHandle: - def __init__(self, camera: typing.Any, device_info: ZEDDeviceInfo, color_intrinsics: np.ndarray): + def __init__( + self, + camera: typing.Any, + device_info: ZEDDeviceInfo, + color_intrinsics: np.ndarray, + right_color_intrinsics: np.ndarray | None = None, + ): self.camera = camera self.device_info = device_info self.color_intrinsics = color_intrinsics + self.right_color_intrinsics = right_color_intrinsics self.runtime_parameters = sl.RuntimeParameters() # type: ignore[union-attr] self.image_mat = sl.Mat() # type: ignore[union-attr] + self.right_image_mat = sl.Mat() # type: ignore[union-attr] self.depth_mat = sl.Mat() # type: ignore[union-attr] self.sensors_data = sl.SensorsData() # type: ignore[union-attr] @@ -69,7 +79,7 @@ def _timestamp_seconds(self) -> float: pass return time() - def grab_frame(self) -> ZEDFrameBundle: + def grab_frame(self, include_right: bool = False) -> ZEDFrameBundle: err = self.camera.grab(self.runtime_parameters) if err != sl.ERROR_CODE.SUCCESS: # type: ignore[union-attr] msg = f"Failed to grab ZED frame: {err}" @@ -82,6 +92,15 @@ def grab_frame(self) -> ZEDFrameBundle: raise RuntimeError(msg) color_rgb = color_raw[:, :, :3][:, :, ::-1] if color_raw.shape[2] == 4 else color_raw[:, :, ::-1] + right_color = None + if include_right: + self.camera.retrieve_image(self.right_image_mat, sl.VIEW.RIGHT) # type: ignore[union-attr] + right_raw = np.array(self.right_image_mat.get_data(), copy=True) + if right_raw.ndim != 3: + msg = f"Unexpected ZED right image shape {right_raw.shape}" + raise RuntimeError(msg) + right_color = right_raw[:, :, :3][:, :, ::-1] if right_raw.shape[2] == 4 else right_raw[:, :, ::-1] + depth = None if self.device_info.has_depth: self.camera.retrieve_measure(self.depth_mat, sl.MEASURE.DEPTH) # type: ignore[union-attr] @@ -105,6 +124,8 @@ def grab_frame(self) -> ZEDFrameBundle: return ZEDFrameBundle( color=color_rgb, + right_color=right_color, + right_color_intrinsics=self.right_color_intrinsics if right_color is not None else None, depth=depth, accel=accel, gyro=gyro, @@ -185,6 +206,7 @@ def open_camera( *, enable_depth: bool, enable_imu: bool, + include_right: bool, ) -> PyZEDCameraHandle: cls._require_sdk() assert sl is not None @@ -206,6 +228,7 @@ def open_camera( information = camera.get_camera_information() calibration = information.camera_configuration.calibration_parameters left_cam = calibration.left_cam + right_cam = calibration.right_cam info = ZEDDeviceInfo( serial=str(config.identifier), model=cls._model_to_string(information.camera_model), @@ -213,7 +236,15 @@ def open_camera( has_imu=enable_imu and cls._device_has_imu(information), ) intrinsics = _intrinsics_matrix(left_cam.fx, left_cam.fy, left_cam.cx, left_cam.cy) - return PyZEDCameraHandle(camera=camera, device_info=info, color_intrinsics=intrinsics) + right_intrinsics = None + if include_right: + right_intrinsics = _intrinsics_matrix(right_cam.fx, right_cam.fy, right_cam.cx, right_cam.cy) + return PyZEDCameraHandle( + camera=camera, + device_info=info, + color_intrinsics=intrinsics, + right_color_intrinsics=right_intrinsics, + ) def __init__( self, @@ -221,6 +252,7 @@ def __init__( calibration_strategy: dict[str, CalibrationStrategy] | None = None, enable_depth: bool = True, enable_imu: bool = True, + include_right: bool = False, ) -> None: self.cameras = cameras if calibration_strategy is None: @@ -228,12 +260,14 @@ def __init__( self.calibration_strategy = calibration_strategy self.enable_depth = enable_depth self.enable_imu = enable_imu + self.include_right = include_right self._logger = logging.getLogger(__name__) - self._camera_names = list(self.cameras.keys()) + self._camera_names = self._logical_camera_names() self._available_devices: dict[str, ZEDDeviceInfo] = {} self._enabled_devices: dict[str, PyZEDCameraHandle] = {} self._frame_buffer_lock: dict[str, threading.Lock] = {} self._frame_buffer: dict[str, list[Frame]] = {} + self._pending_right_bundles: dict[str, ZEDFrameBundle] = {} assert ( len({camera.resolution_width for camera in self.cameras.values()}) == 1 @@ -246,7 +280,63 @@ def camera_names(self) -> list[str]: return self._camera_names def config(self, camera_name) -> common.BaseCameraConfig: - return self.cameras[camera_name] + return self.cameras[self._physical_camera_name(camera_name)] + + @staticmethod + def _right_camera_name(camera_name: str) -> str: + return f"{camera_name}_right" + + def _physical_camera_name(self, camera_name: str) -> str: + if camera_name in self.cameras: + return camera_name + if self.include_right and camera_name.endswith("_right"): + physical_name = camera_name.removesuffix("_right") + if physical_name in self.cameras: + return physical_name + msg = f"Camera {camera_name} not found in the enabled devices" + raise AssertionError(msg) + + def _is_right_camera_name(self, camera_name: str) -> bool: + return camera_name not in self.cameras and self._physical_camera_name(camera_name) != camera_name + + def _logical_camera_names(self) -> list[str]: + names = list(self.cameras.keys()) + if self.include_right: + names.extend(self._right_camera_name(camera_name) for camera_name in self.cameras) + return names + + def _frame_from_bundle(self, camera_name: str, bundle: ZEDFrameBundle) -> Frame: + physical_name = self._physical_camera_name(camera_name) + extrinsics = self.calibration_strategy[physical_name].get_extrinsics() + is_right = self._is_right_camera_name(camera_name) + color_data = bundle.right_color if is_right else bundle.color + color_intrinsics = bundle.right_color_intrinsics if is_right else bundle.color_intrinsics + if color_data is None or color_intrinsics is None: + msg = f"Missing {'right' if is_right else 'left'} image data for {camera_name}" + raise RuntimeError(msg) + + color = DataFrame( + data=color_data, + timestamp=bundle.timestamp, + intrinsics=color_intrinsics, + extrinsics=extrinsics, + ) + depth = None + if not is_right and bundle.depth is not None: + depth = DataFrame( + data=bundle.depth, + timestamp=bundle.timestamp, + intrinsics=bundle.color_intrinsics, + extrinsics=extrinsics, + ) + + accel = DataFrame(data=bundle.accel, timestamp=bundle.timestamp) if bundle.accel is not None else None + gyro = DataFrame(data=bundle.gyro, timestamp=bundle.timestamp) if bundle.gyro is not None else None + return Frame( + camera=CameraFrame(color=color, depth=depth), + imu=IMUFrame(accel=accel, gyro=gyro) if (accel is not None or gyro is not None) else None, + avg_timestamp=bundle.timestamp, + ) def update_available_devices(self): self._available_devices = self.enumerate_connected_devices() @@ -256,6 +346,7 @@ def open(self): self._enabled_devices = {} self._frame_buffer = {} self._frame_buffer_lock = {} + self._pending_right_bundles = {} for camera_name, camera_cfg in self.cameras.items(): if camera_cfg.identifier not in self._available_devices: @@ -267,6 +358,7 @@ def open(self): camera_cfg, enable_depth=self.enable_depth and device_info.has_depth, enable_imu=self.enable_imu and device_info.has_imu, + include_right=self.include_right, ) self._enabled_devices[camera_name] = opened self._frame_buffer[camera_name] = [] @@ -274,44 +366,34 @@ def open(self): def poll_frame(self, camera_name: str) -> Frame: assert camera_name in self.camera_names, f"Camera {camera_name} not found in the enabled devices" - device = self._enabled_devices[camera_name] - bundle = device.grab_frame() - extrinsics = self.calibration_strategy[camera_name].get_extrinsics() - - color = DataFrame( - data=bundle.color, - timestamp=bundle.timestamp, - intrinsics=bundle.color_intrinsics, - extrinsics=extrinsics, - ) - depth = None - if bundle.depth is not None: - depth = DataFrame( - data=bundle.depth, - timestamp=bundle.timestamp, - intrinsics=bundle.color_intrinsics, - extrinsics=extrinsics, - ) - - accel = DataFrame(data=bundle.accel, timestamp=bundle.timestamp) if bundle.accel is not None else None - gyro = DataFrame(data=bundle.gyro, timestamp=bundle.timestamp) if bundle.gyro is not None else None - - frame = Frame( - camera=CameraFrame(color=color, depth=depth), - imu=IMUFrame(accel=accel, gyro=gyro) if (accel is not None or gyro is not None) else None, - avg_timestamp=bundle.timestamp, - ) - - with self._frame_buffer_lock[camera_name]: - if len(self._frame_buffer[camera_name]) >= self.CALIBRATION_FRAME_SIZE: - self._frame_buffer[camera_name].pop(0) - self._frame_buffer[camera_name].append(copy.deepcopy(frame)) + physical_name = self._physical_camera_name(camera_name) + device = self._enabled_devices[physical_name] + + if self._is_right_camera_name(camera_name): + bundle = self._pending_right_bundles.pop(physical_name, None) + if bundle is None: + bundle = device.grab_frame(include_right=True) + else: + bundle = device.grab_frame(include_right=self.include_right) + if self.include_right and bundle.right_color is not None: + self._pending_right_bundles[physical_name] = bundle + else: + self._pending_right_bundles.pop(physical_name, None) + + frame = self._frame_from_bundle(camera_name, bundle) + + if not self._is_right_camera_name(camera_name): + with self._frame_buffer_lock[physical_name]: + if len(self._frame_buffer[physical_name]) >= self.CALIBRATION_FRAME_SIZE: + self._frame_buffer[physical_name].pop(0) + self._frame_buffer[physical_name].append(copy.deepcopy(frame)) return frame def close(self): for device in self._enabled_devices.values(): device.close() self._enabled_devices = {} + self._pending_right_bundles = {} def calibrate(self) -> bool: for camera_name in self.cameras: diff --git a/extensions/rcs_zed/tests/test_zed_extension.py b/extensions/rcs_zed/tests/test_zed_extension.py index 32e8294f..cb17acec 100644 --- a/extensions/rcs_zed/tests/test_zed_extension.py +++ b/extensions/rcs_zed/tests/test_zed_extension.py @@ -20,8 +20,10 @@ def __init__(self, device_info: ZEDDeviceInfo, color_intrinsics: np.ndarray, fra self.color_intrinsics = color_intrinsics self._frame_bundle = frame_bundle self.closed = False + self.grab_calls: list[bool] = [] - def grab_frame(self) -> ZEDFrameBundle: + def grab_frame(self, include_right: bool = False) -> ZEDFrameBundle: + self.grab_calls.append(include_right) return self._frame_bundle def close(self): @@ -31,7 +33,7 @@ def close(self): class PatchZedState(TypedDict): devices: dict[str, ZEDDeviceInfo] opened: dict[str, FakeOpenedZEDCamera] - open_calls: list[tuple[str, bool, bool]] + open_calls: list[tuple[str, bool, bool, bool]] @pytest.fixture() @@ -41,8 +43,8 @@ def patch_zed(monkeypatch) -> PatchZedState: def fake_enumerate(_cls): return state["devices"] - def fake_open(_cls, config: common.BaseCameraConfig, *, enable_depth: bool, enable_imu: bool): - state["open_calls"].append((config.identifier, enable_depth, enable_imu)) + def fake_open(_cls, config: common.BaseCameraConfig, *, enable_depth: bool, enable_imu: bool, include_right: bool): + state["open_calls"].append((config.identifier, enable_depth, enable_imu, include_right)) return state["opened"][config.identifier] monkeypatch.setattr(ZEDCameraSet, "enumerate_connected_devices", classmethod(fake_enumerate)) @@ -78,7 +80,7 @@ def test_zed_frame_mapping_depth_scaling_and_imu_downgrade(patch_zed): frame = camera_set.poll_frame("wrist") assert frame.camera.color.intrinsics is not None - assert patch_zed["open_calls"] == [("123", True, False)] + assert patch_zed["open_calls"] == [("123", True, False, False)] assert np.array_equal(frame.camera.color.data, color) assert np.array_equal(frame.camera.depth.data, depth) # type: ignore[union-attr] assert np.array_equal(frame.camera.color.intrinsics, intrinsics) @@ -113,7 +115,51 @@ def test_zed_enumeration_and_multi_camera_open(patch_zed): }, ) camera_set.open() - assert patch_zed["open_calls"] == [("111", True, False), ("222", True, True)] + assert patch_zed["open_calls"] == [("111", True, False, False), ("222", True, True, False)] camera_set.close() assert opened["111"].closed assert opened["222"].closed + + +def test_zed_include_right_adds_logical_right_camera_without_double_grab(patch_zed): + left_intrinsics = np.array([[100.0, 0.0, 10.0, 0.0], [0.0, 110.0, 20.0, 0.0], [0.0, 0.0, 1.0, 0.0]]) + right_intrinsics = np.array([[120.0, 0.0, 12.0, 0.0], [0.0, 130.0, 22.0, 0.0], [0.0, 0.0, 1.0, 0.0]]) + left_color = np.arange(27, dtype=np.uint8).reshape(3, 3, 3) + right_color = np.arange(27, 54, dtype=np.uint8).reshape(3, 3, 3) + frame_bundle = ZEDFrameBundle( + color=left_color, + right_color=right_color, + timestamp=12.5, + color_intrinsics=left_intrinsics, + right_color_intrinsics=right_intrinsics, + ) + device_info = ZEDDeviceInfo(serial="123", model="ZED Mini", has_depth=True, has_imu=False) + opened = FakeOpenedZEDCamera(device_info=device_info, color_intrinsics=left_intrinsics, frame_bundle=frame_bundle) + patch_zed["devices"] = {"123": device_info} + patch_zed["opened"] = {"123": opened} + + camera_set = ZEDCameraSet( + cameras={ + "wrist": common.BaseCameraConfig( + identifier="123", resolution_width=1280, resolution_height=720, frame_rate=30 + ) + }, + include_right=True, + ) + + assert camera_set.camera_names == ["wrist", "wrist_right"] + assert camera_set.config("wrist_right").identifier == "123" + + camera_set.open() + assert patch_zed["open_calls"] == [("123", True, False, True)] + left_frame = camera_set.poll_frame("wrist") + right_frame = camera_set.poll_frame("wrist_right") + + assert opened.grab_calls == [True] + assert np.array_equal(left_frame.camera.color.data, left_color) + assert np.array_equal(right_frame.camera.color.data, right_color) + assert np.array_equal(left_frame.camera.color.intrinsics, left_intrinsics) + assert np.array_equal(right_frame.camera.color.intrinsics, right_intrinsics) + assert left_frame.avg_timestamp == right_frame.avg_timestamp == 12.5 + assert left_frame.camera.depth is None + assert right_frame.camera.depth is None diff --git a/python/rcs/__main__.py b/python/rcs/__main__.py index 32dd98a4..4f97eae0 100644 --- a/python/rcs/__main__.py +++ b/python/rcs/__main__.py @@ -21,6 +21,7 @@ run_conversion, ) from rcs.sim.replayer import replay as replay_dataset +from rcs.utils import export_episode_videos app = typer.Typer() @@ -195,5 +196,27 @@ def lerobot_convert( ) +@app.command("episode-videos") +def episode_videos( + dataset: Annotated[ + Path, + typer.Argument( + exists=True, + help="Parquet dataset file or directory with parquet parts.", + ), + ], + output: Annotated[ + Path, + typer.Argument( + exists=False, + help="Output directory for episode mp4 files.", + ), + ], + fps: Annotated[int, typer.Option(help="Video frames per second.")] = DEFAULT_FPS, + n: Annotated[int, typer.Option(help="Maximum number of episodes to export. -1 means all.")] = -1, +): + export_episode_videos(dataset=dataset, output=output, fps=fps, n=n) + + if __name__ == "__main__": app() diff --git a/python/rcs/camera/hw.py b/python/rcs/camera/hw.py index 460f26f7..da8dc8d9 100644 --- a/python/rcs/camera/hw.py +++ b/python/rcs/camera/hw.py @@ -179,15 +179,19 @@ def get_timestamp_frames(self, ts: datetime) -> FrameSet | None: def stop(self): """Stops the polling of the cameras.""" self.running = False - assert self._thread is not None - self._thread.join() - self._thread = None + if self._thread is not None: + self._thread.join() + self._thread = None def close(self): - if self.running and self._thread is not None: + if self.running: + self.running = False + for camera in self.cameras: + camera.close() self.stop() - for camera in self.cameras: - camera.close() + else: + for camera in self.cameras: + camera.close() self.stop_video() def start(self, warm_up: bool = True): diff --git a/python/rcs/envs/base.py b/python/rcs/envs/base.py index a13592d2..40d1dbeb 100644 --- a/python/rcs/envs/base.py +++ b/python/rcs/envs/base.py @@ -593,6 +593,94 @@ class RelativeTo(Enum): CONFIGURED_ORIGIN = auto() NONE = auto() +class LimitedAbsoluteAction(ActObsInfoWrapper): + ABSOLUTE_ACTION_KEY = "limited_absolute_action" + + def __init__( + self, + env, + max_mov: float | tuple[float, float] | None = None, + ): + super().__init__(env) + self.joints_key = get_space_keys(LimitedJointsRelDictType)[0] + self.trpy_key = get_space_keys(LimitedTRPYRelDictType)[0] + self.tquat_key = get_space_keys(LimitedTQuatRelDictType)[0] + self._absolute_action: common.Pose | VecType | None = None + self.max_mov = max_mov + + def _get_current(self) -> np.ndarray: + if self.get_wrapper_attr("get_control_mode")() == ControlMode.JOINTS: + return self._robot.get_joint_position() + else: + return self._robot.get_cartesian_position() + + def action(self, action: dict[str, Any]) -> dict[str, Any]: + if self.max_mov is None: + return action + if self.get_wrapper_attr("get_control_mode")() == ControlMode.JOINTS: + assert isinstance(self.max_mov, float) + low, high = self._robot.get_config().joint_limits + curr = self._get_current() + delta = action[self.joints_key] - curr + limited_delta = np.clip(delta, -self.max_mov, self.max_mov) + clipped_joints = np.clip(curr + limited_delta, low, high) + action.update(JointsDictType(joints=clipped_joints)) + self._absolute_action = clipped_joints + + elif self.get_wrapper_attr("get_control_mode")() == ControlMode.CARTESIAN_TRPY: + assert isinstance(self.max_mov, tuple) + curr = cast(common.Pose, self._get_current()) + pose_space = cast(gym.spaces.Box, get_space(TRPYDictType).spaces[self.trpy_key]) + target_pose = common.Pose( + translation=action[self.trpy_key][:3], + rpy_vector=action[self.trpy_key][3:], + ) + pose_delta = common.Pose( + translation=target_pose.translation() - curr.translation(), # type: ignore + rpy_vector=(target_pose * curr.inverse()).rotation_rpy().as_vector(), + ) + limited_delta = pose_delta.limit_translation_length(self.max_mov[0]).limit_rotation_angle(self.max_mov[1]) + clipped_pose = np.concatenate( + [ + np.clip(curr.translation() + limited_delta.translation(), pose_space.low[:3], pose_space.high[:3]), + (limited_delta * curr).rotation_rpy().as_vector(), + ] + ) + action.update(TRPYDictType(xyzrpy=clipped_pose)) + self._absolute_action = clipped_pose + + elif self.get_wrapper_attr("get_control_mode")() == ControlMode.CARTESIAN_TQuat: + assert isinstance(self.max_mov, tuple) + curr = cast(common.Pose, self._get_current()) + pose_space = cast(gym.spaces.Box, get_space(TQuatDictType).spaces[self.tquat_key]) + target_pose = common.Pose( + translation=action[self.tquat_key][:3], + quaternion=action[self.tquat_key][3:], + ) + pose_delta = target_pose * curr.inverse() + limited_delta = pose_delta.limit_translation_length(self.max_mov[0]).limit_rotation_angle(self.max_mov[1]) + clipped_pose = np.concatenate( + [ + np.clip(curr.translation() + limited_delta.translation(), pose_space.low[:3], pose_space.high[:3]), + (limited_delta * curr).rotation_q(), + ] + ) + action.update(TQuatDictType(tquat=clipped_pose)) # type: ignore + self._absolute_action = clipped_pose + else: + msg = "Control mode not recognized!" + raise ValueError(msg) + return action + + def observation(self, observation: dict, info: dict[str, Any]) -> tuple[dict[str, Any], dict[str, Any]]: + if self._absolute_action is not None: + info[self.ABSOLUTE_ACTION_KEY] = ( + list(self._absolute_action.translation()) + list(self._absolute_action.rotation_q()) + if isinstance(self._absolute_action, common.Pose) + else self._absolute_action + ) + return observation, info + class RelativeActionSpace(ActObsInfoWrapper): DEFAULT_MAX_CART_MOV = 0.5 @@ -941,6 +1029,14 @@ def __init__(self, env, gripper: common.Gripper, binary: bool = True): self.gripper = gripper self._last_gripper_cmd = None + def _command_changed(self, gripper_action: np.ndarray) -> bool: + if self._last_gripper_cmd is None: + return True + last_action = np.asarray(self._last_gripper_cmd, dtype=np.float32) + if self.binary: + return not np.array_equal(gripper_action, last_action) + return not np.allclose(gripper_action, last_action, atol=1e-3, rtol=0.0) + def close(self): self.gripper.close() super().close() @@ -972,13 +1068,14 @@ def action(self, action: dict[str, Any]) -> dict[str, Any]: gripper_action = [gripper_action] # type: ignore if self.binary: gripper_action = np.round(gripper_action) - gripper_action = np.clip(gripper_action, 0.0, 1.0) + gripper_action = np.clip(np.asarray(gripper_action, dtype=np.float32), 0.0, 1.0) - if self.binary: - self.gripper.grasp() if gripper_action == self.BINARY_GRIPPER_CLOSED else self.gripper.open() - else: - self.gripper.set_normalized_width(gripper_action[0]) - self._last_gripper_cmd = gripper_action + if self._command_changed(gripper_action): + if self.binary: + self.gripper.grasp() if gripper_action[0] < 0.5 else self.gripper.open() + else: + self.gripper.set_normalized_width(float(gripper_action[0])) + self._last_gripper_cmd = gripper_action.tolist() del action[self.gripper_key] return action diff --git a/python/rcs/envs/scenes.py b/python/rcs/envs/scenes.py index ef281673..6cedeae3 100644 --- a/python/rcs/envs/scenes.py +++ b/python/rcs/envs/scenes.py @@ -14,6 +14,7 @@ ControlMode, CoverWrapper, GripperWrapper, + LimitedAbsoluteAction, MultiRobotWrapper, RelativeActionSpace, RelativeTo, @@ -363,6 +364,8 @@ def create_env_from_model(self, cfg: SimEnvCreatorConfig, mjmodel: MjModel) -> g env = RelativeActionSpace( env, max_mov=prefixed_cfg.max_relative_movement, relative_to=prefixed_cfg.relative_to ) + else: + env = LimitedAbsoluteAction(env, max_mov=cfg.max_relative_movement) envs[robot_name] = env env = MultiRobotWrapper(envs, prefixed_cfg.robot_to_shared_base_frame) diff --git a/python/rcs/envs/storage_wrapper.py b/python/rcs/envs/storage_wrapper.py index 497a1f03..58929bf3 100644 --- a/python/rcs/envs/storage_wrapper.py +++ b/python/rcs/envs/storage_wrapper.py @@ -182,15 +182,18 @@ def _writer_worker(self): ), ) - def _flush(self): + def _flush(self, keep_last: bool = False): + rows = self.buffer[:-1] if keep_last else self.buffer + if len(rows) == 0: + return if self.schema is None: - temp_batch = pa.RecordBatch.from_pylist(self.buffer) + temp_batch = pa.RecordBatch.from_pylist(rows) self.schema = temp_batch.schema - self.buffer[-1]["success"] = self._success - batch = pa.RecordBatch.from_pylist(self.buffer, schema=self.schema) + rows[-1]["success"] = self._success + batch = pa.RecordBatch.from_pylist(rows, schema=self.schema) self.queue.put(batch) - self.buffer.clear() + self.buffer = self.buffer[-1:] if keep_last else [] def _flatten_arrays(self, d: dict[str, Any]): # NOTE: list / tuples of arrays not supported @@ -284,7 +287,9 @@ def step(self, action): self.step_cnt += 1 if len(self.buffer) == self.batch_size: - self._flush() + # Keep the most recent row in memory so a success() right before reset() + # can still mark the episode as successful even if the batch boundary was hit. + self._flush(keep_last=True) return obs_original, reward, terminated, truncated, info diff --git a/python/rcs/operator/quest.py b/python/rcs/operator/quest.py index 322b870a..158850f0 100644 --- a/python/rcs/operator/quest.py +++ b/python/rcs/operator/quest.py @@ -128,6 +128,14 @@ def _reset_state(self): self._last_controller_pose[controller] = Pose() self._grp_pos[controller] = 1 + def _swap_controller_input(self, input_data: dict[str, Any]) -> dict[str, Any]: + if not self.config.switched_left_right: + return input_data + swapped = copy.copy(input_data) + swapped["left"] = input_data["right"] + swapped["right"] = input_data["left"] + return swapped + @staticmethod def _normalize_axis(value: bool | float | int) -> float: if isinstance(value, bool): @@ -139,13 +147,6 @@ def consume_commands(self) -> TeleopCommands: with self._cmd_lock: cmds = copy.copy(self._commands) self._commands = TeleopCommands() - if self.config.switched_left_right: - swapped_reset_origin_to_current = {} - if "left" in cmds.reset_origin_to_current: - swapped_reset_origin_to_current["right"] = cmds.reset_origin_to_current["left"] - if "right" in cmds.reset_origin_to_current: - swapped_reset_origin_to_current["left"] = cmds.reset_origin_to_current["right"] - cmds.reset_origin_to_current = swapped_reset_origin_to_current return cmds def reset_operator_state(self): @@ -176,11 +177,7 @@ def consume_action(self) -> dict[str, ArmWithGripper]: tquat=np.concatenate([transform.translation(), transform.rotation_q()]), gripper=np.array([self._grp_pos[controller]]), ) - return ( - {"left": transforms["right"], "right": transforms["left"]} - if self.config.switched_left_right - else transforms - ) + return transforms def set_camera(self, observation: dict) -> None: if not self.config.display_cameras: @@ -241,6 +238,8 @@ def run(self): logger.warning("[Quest Reader] packets arriving again") warning_raised = False + input_data = self._swap_controller_input(input_data) + # === Update Semantic Commands === with self._cmd_lock: if input_data[self._start_btn] and (self._prev_data is None or not self._prev_data[self._start_btn]): @@ -293,7 +292,8 @@ def run(self): gripper_axis = self._normalize_axis(input_data[controller][self._grp_btn[controller]]) # convert from IRIS to RCS gripper logic - self._grp_pos[controller] = 1.0 - gripper_axis + with self._resource_lock: + self._grp_pos[controller] = 1.0 - gripper_axis self._prev_data = input_data rate_limiter() diff --git a/python/rcs/utils.py b/python/rcs/utils.py index 9f9b7e7f..1571233c 100644 --- a/python/rcs/utils.py +++ b/python/rcs/utils.py @@ -1,6 +1,20 @@ +import datetime import logging +import math +import os +import subprocess +from pathlib import Path from time import perf_counter, sleep +import duckdb +os.environ.setdefault("MPLCONFIGDIR", "/tmp/matplotlib") +import matplotlib +matplotlib.use("Agg") +import numpy as np +import torch +from matplotlib import pyplot as plt +from torchvision.io import decode_jpeg + logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -27,7 +41,6 @@ def __call__(self): if self.t is None: self.t = perf_counter() self._last_print = self.t - sleep(1 / self.frame_rate) return sleep_time = 1 / self.frame_rate - (perf_counter() - self.t) if sleep_time > 0: @@ -45,3 +58,176 @@ def __enter__(self): def __exit__(self, *args): pass + + +def _write_mp4(frames: list[np.ndarray], output_path: Path, fps: int) -> None: + if not frames: + return + + height, width = frames[0].shape[:2] + process = subprocess.Popen( + [ + "ffmpeg", + "-y", + "-f", + "rawvideo", + "-pix_fmt", + "rgb24", + "-s", + f"{width}x{height}", + "-r", + str(fps), + "-i", + "-", + "-an", + "-vf", + "pad=ceil(iw/2)*2:ceil(ih/2)*2", + "-c:v", + "libx264", + "-pix_fmt", + "yuv420p", + "-movflags", + "+faststart", + str(output_path), + ], + stdin=subprocess.PIPE, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + assert process.stdin is not None + for frame in frames: + process.stdin.write(np.ascontiguousarray(frame).astype(np.uint8).tobytes()) + process.stdin.close() + process.wait() + + +def _render_action_panel( + joint_history: dict[str, np.ndarray], + gripper_history: dict[str, np.ndarray], + step: int, + height: int, + width: int, +) -> np.ndarray: + fig, axes = plt.subplots( + len(joint_history), + 1, + figsize=(width / 100, height / 100), + dpi=100, + squeeze=False, + ) + x = np.arange(len(next(iter(joint_history.values())))) + for axis, robot_key in zip(axes[:, 0], joint_history, strict=True): + joints = joint_history[robot_key] + for joint_idx in range(joints.shape[1]): + axis.plot(x, joints[:, joint_idx], linewidth=1) + axis.axvline(step, color="tab:red", linewidth=2) + axis.set_xlim(0, max(len(x) - 1, 1)) + axis.set_title(robot_key) + axis.set_xlabel("step") + axis.set_ylabel("joint") + + gripper_axis = axis.twinx() + gripper_axis.plot(x, gripper_history[robot_key], color="black", linestyle="--", linewidth=2) + gripper_axis.set_ylabel("gripper") + gripper_axis.set_ylim(-0.1, 1.1) + + fig.tight_layout() + fig.canvas.draw() + image = np.asarray(fig.canvas.buffer_rgba())[..., :3].copy() + plt.close(fig) + return image + + +def export_episode_videos( + dataset: str | Path, + output: str | Path, + fps: int = 30, + n: int = -1, +) -> None: + dataset = Path(dataset) + output = Path(output) + output.mkdir(parents=True, exist_ok=True) + + source = str(dataset / "*.parquet") if dataset.is_dir() else str(dataset) + source_escaped = source.replace("'", "''") + conn = duckdb.connect() + relation = conn.sql(f"SELECT * FROM read_parquet('{source_escaped}')") + frame_struct = relation.select("obs.frames").types[0] + action_struct = relation.select("action").types[0] + camera_names = [name for name, _ in frame_struct.children] + robot_names = [name for name, _ in relation.select("obs").types[0].children if name != "frames"] + action_fields_by_robot = { + robot: {field_name for field_name, _ in robot_struct.children} + for robot, robot_struct in action_struct.children + } + + uuids = conn.execute(f"SELECT DISTINCT uuid FROM read_parquet('{source_escaped}') ORDER BY uuid").fetchall() + for index, (episode_id,) in enumerate(uuids): + if n != -1 and index >= n: + break + + image_selects = ", ".join(f"obs.frames.{camera}.rgb.data AS {camera}" for camera in camera_names) + joint_selects = ", ".join( + ( + f"COALESCE(action.{robot}.joints, obs.{robot}.joints) AS joints_{robot}" + if "joints" in action_fields_by_robot.get(robot, set()) + else f"obs.{robot}.joints AS joints_{robot}" + ) + for robot in robot_names + ) + gripper_selects = ", ".join( + ( + f"COALESCE(CAST(action.{robot}.gripper[1] AS DOUBLE), obs.{robot}.gripper[1]) AS gripper_{robot}" + if "gripper" in action_fields_by_robot.get(robot, set()) + else f"obs.{robot}.gripper[1] AS gripper_{robot}" + ) + for robot in robot_names + ) + state_selects = ", ".join( + [ + joint_selects, + gripper_selects, + ] + ) + not_null_checks = " ".join(f"AND obs.frames.{camera}.rgb.data IS NOT NULL" for camera in camera_names) + rows = conn.execute( + f""" + SELECT timestamp, {image_selects}, {state_selects} + FROM read_parquet('{source_escaped}') + WHERE uuid = ? + {not_null_checks} + ORDER BY step + """, + [episode_id], + ).fetchall() + if not rows: + continue + + timestamp = datetime.datetime.fromtimestamp(float(rows[0][0])).strftime("%Y-%m-%d-%H-%M-%S") + frames = [] + joint_history = { + robot: np.asarray([row[1 + len(camera_names) + robot_idx] for row in rows], dtype=np.float32) + for robot_idx, robot in enumerate(robot_names) + } + gripper_history = { + robot: np.asarray([row[1 + len(camera_names) + len(robot_names) + robot_idx] for row in rows], dtype=np.float32) + for robot_idx, robot in enumerate(robot_names) + } + cols = math.ceil(math.sqrt(len(camera_names) + 1)) + rows_per_frame = math.ceil((len(camera_names) + 1) / cols) + + for step_idx, row in enumerate(rows): + decoded = [ + decode_jpeg(torch.frombuffer(bytearray(image_bytes), dtype=torch.uint8)).permute(1, 2, 0).cpu().numpy() + for image_bytes in row[1 : 1 + len(camera_names)] + ] + height, width = decoded[0].shape[:2] + decoded.append(_render_action_panel(joint_history, gripper_history, step_idx, height, width)) + tiled = np.zeros((rows_per_frame * height, cols * width, 3), dtype=np.uint8) + for camera_index, image in enumerate(decoded): + top = (camera_index // cols) * height + left = (camera_index % cols) * width + tiled[top : top + height, left : left + width] = image + frames.append(tiled) + + _write_mp4(frames, output / f"{timestamp}.mp4", fps=fps) diff --git a/python/tests/test_quest_operator.py b/python/tests/test_quest_operator.py index d5af2a78..55db719a 100644 --- a/python/tests/test_quest_operator.py +++ b/python/tests/test_quest_operator.py @@ -1,6 +1,9 @@ import threading from typing import Any, cast +import numpy as np + +from rcs._core.common import Pose from rcs.operator.interface import TeleopCommands from rcs.operator.quest import QuestOperator @@ -9,13 +12,46 @@ class _Config: switched_left_right = True +def test_swap_controller_input_swaps_left_and_right_packets(): + operator = QuestOperator.__new__(QuestOperator) + operator.config = cast(Any, _Config()) + + input_data = { + "left": {"hand_trigger": 0.1, "index_trigger": 0.2}, + "right": {"hand_trigger": 0.9, "index_trigger": 0.8}, + "A": True, + } + + swapped = QuestOperator._swap_controller_input(operator, input_data) + + assert swapped["left"] == input_data["right"] + assert swapped["right"] == input_data["left"] + assert swapped["A"] is True + + def test_consume_commands_swaps_both_reset_origin_keys(): operator = QuestOperator.__new__(QuestOperator) operator.config = cast(Any, _Config()) operator._cmd_lock = threading.Lock() - operator._commands = TeleopCommands(reset_origin_to_current={"left": True, "right": False}) + operator._commands = TeleopCommands(reset_origin_to_current={"right": True, "left": False}) cmds = QuestOperator.consume_commands(operator) assert cmds.reset_origin_to_current == {"right": True, "left": False} assert operator._commands.reset_origin_to_current == {} + + +def test_consume_action_swaps_gripper_with_controller(): + operator = QuestOperator.__new__(QuestOperator) + operator.config = cast(Any, _Config()) + operator._resource_lock = threading.Lock() + operator.controller_names = ["left", "right"] + operator._last_controller_pose = {key: Pose() for key in operator.controller_names} + operator._offset_pose = {key: Pose() for key in operator.controller_names} + operator._set_frame = {key: Pose() for key in operator.controller_names} + operator._grp_pos = {"left": 0.75, "right": 0.25} + + actions = QuestOperator.consume_action(operator) + + assert np.allclose(actions["left"]["gripper"], np.array([0.75])) + assert np.allclose(actions["right"]["gripper"], np.array([0.25]))