diff --git a/.dockerignore b/.dockerignore
index bdf319e7..8b767f67 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1 +1,3 @@
 **/build/
+real*
+test*
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 2b03ad71..9c88e253 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -53,7 +53,10 @@ RUN chmod +x /usr/local/bin/link-editable-source \
     && uv pip install --no-build-isolation /opt/rcs-src/extensions/rcs_fr3 \
     && uv pip install /opt/rcs-src/extensions/rcs_realsense \
     && uv pip install /opt/rcs-src/extensions/rcs_robotiq2f85 \
-    && uv pip install /opt/rcs-src/extensions/rcs_zed
+    && uv pip install /opt/rcs-src/extensions/rcs_zed \
+    && uv pip install /opt/rcs-src/examples/teleop/SimPublisher \
+    && uv pip install /opt/rcs-src/2f85-python-driver \
+    && uv pip install /opt/rcs-src/vlagents
 
 WORKDIR /workspace/robot-control-stack
 
diff --git a/docker/README.md b/docker/README.md
index 78bd0da4..35b4bd13 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -23,6 +23,7 @@ Notes:
 - `~/zed_models` is mounted into `/usr/local/zed/resources` to match the direct `docker run` setup.
 - `/dev/dri` is masked inside the container so host Mesa/AMD render nodes do not override the NVIDIA runtime devices.
 - NVIDIA PRIME/GLX environment variables are exported to bias OpenGL/EGL selection toward the NVIDIA stack when using X11 forwarding.
-- Python source changes are picked up from the mounted repo, including `extensions/rcs_zed`.
+- The simulator still bootstraps EGL for offscreen MuJoCo camera rendering; set `RCS_MUJOCO_DISABLE_EGL=1` only if you intentionally want to disable that path.
+- Python source changes are picked up from the mounted repo.
 - If you change C++ code in `rcs` or `rcs_fr3`, rebuild the image.
 - For non-GPU hosts, comment out the GPU-related lines in `docker/compose/dev.yml`.
diff --git a/docker/link-editable-source.sh b/docker/link-editable-source.sh
index 0b1b967a..02bf007e 100644
--- a/docker/link-editable-source.sh
+++ b/docker/link-editable-source.sh
@@ -10,26 +10,21 @@ fi
 
 SITE_PACKAGES="$(python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')"
 
-link_mixed_package() {
+link_compiled_package() {
     src_dir="$1"
     dst_dir="$2"
-    keep_dir_name="${3:-}"
 
     if [ ! -d "$src_dir" ] || [ ! -d "$dst_dir" ]; then
         return
     fi
 
-    # Replace only the Python sources from the mounted repo and keep compiled
-    # artifacts that were installed into site-packages during image build.
-    for path in "$src_dir"/* "$src_dir"/.[!.]* "$src_dir"/..?*; do
-        [ -e "$path" ] || continue
-        name="$(basename "$path")"
-        if [ -n "$keep_dir_name" ] && [ "$name" = "$keep_dir_name" ]; then
-            continue
-        fi
-        rm -rf "$dst_dir/$name"
-        cp -as "$path" "$dst_dir/$name"
-    done
+    tmp_keep="$(mktemp -d)"
+    find "$dst_dir" -maxdepth 1 \( -name '_core*.so' -o -name 'lib*.so*' \) -exec mv {} "$tmp_keep/" \;
+    rm -rf "$dst_dir"
+    mkdir -p "$dst_dir"
+    cp -as "$src_dir/." "$dst_dir/"
+    find "$tmp_keep" -maxdepth 1 -type f -exec mv {} "$dst_dir/" \;
+    rmdir "$tmp_keep"
 }
 
 link_pure_python_package() {
@@ -44,8 +39,9 @@ link_pure_python_package() {
     ln -s "$src_dir" "$dst_dir"
 }
 
-link_mixed_package "$REPO_ROOT/python/rcs" "$SITE_PACKAGES/rcs" "_core"
-link_mixed_package "$REPO_ROOT/extensions/rcs_fr3/src/rcs_fr3" "$SITE_PACKAGES/rcs_fr3" "_core"
+link_compiled_package "$REPO_ROOT/python/rcs" "$SITE_PACKAGES/rcs"
+link_compiled_package "$REPO_ROOT/extensions/rcs_fr3/src/rcs_fr3" "$SITE_PACKAGES/rcs_fr3"
 link_pure_python_package "$REPO_ROOT/extensions/rcs_realsense/src/rcs_realsense" "$SITE_PACKAGES/rcs_realsense"
 link_pure_python_package "$REPO_ROOT/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85" "$SITE_PACKAGES/rcs_robotiq2f85"
 link_pure_python_package "$REPO_ROOT/extensions/rcs_zed/src/rcs_zed" "$SITE_PACKAGES/rcs_zed"
+link_pure_python_package "$REPO_ROOT/vlagents" "$SITE_PACKAGES/vlagents"
diff --git a/examples/inference/README.md b/examples/inference/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/examples/inference/franka.py b/examples/inference/franka.py
new file mode 100644
index 00000000..2c2ed6ad
--- /dev/null
+++ b/examples/inference/franka.py
@@ -0,0 +1,441 @@
+import copy
+import json
+import logging
+import threading
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from queue import Empty, Queue
+from time import sleep
+from typing import Any
+
+from PIL import Image
+
+from rcs.utils import SimpleFrameRate
+
+import gymnasium as gym
+import numpy as np
+from rcs._core.common import BaseCameraConfig, RobotPlatform
+from rcs._core.sim import SimConfig
+from rcs.envs.base import ControlMode, RelativeTo
+from rcs.envs.configs import EmptyWorldFR3Duo
+from rcs.envs.storage_wrapper import StorageWrapper
+from rcs.envs.tasks import PickTaskConfig
+
+import rcs
+from rcs_duobench.tasks.bin_sort import BinSortEnvConfig
+from vlagents.client import RemoteAgent
+from vlagents.policies import Act, Obs
+
+logger = logging.getLogger(__name__)
+
+
+ROBOT2IP = {
+    "right": "192.168.102.1",
+    "left": "192.168.101.1",
+}
+ROBOT2ID = {
+    "left": "1",
+    "right": "0",
+}
+
+
+ROBOT_INSTANCE = RobotPlatform.SIMULATION
+# ROBOT_INSTANCE = RobotPlatform.HARDWARE
+
+# set camera dict to none disable cameras
+CAMERA_DICT = {
+    "left_wrist": "230422272017",
+    "right_wrist": "230422271040",
+    # "side": "243522070385",
+    # "bird_eye": "243522070364",
+}
+# CAMERA_DICT = None
+ZED_CAMERA_DICT = {
+    "head": "19928076",
+}
+INCLUDE_DEPTH = False
+
+ROBOTIQ_SERIAL = {
+    "left": "DAAQMPDC",
+    "right": "DAAQMJHX",
+}
+
+# DIGIT_DICT = {
+#     "digit_right_left": "D21182",
+#     "digit_right_right": "D21193"
+# }
+DIGIT_DICT = None
+
+
+INSTRUCTION = "pick up the black cube with the right arm and place it into the black bowl; pick up the white cube with the left arm and place it into the white bowl"
+FPS = 30
+CONTROL_MODE = ControlMode.JOINTS
+RELATIVETO = RelativeTo.NONE
+RECORD_PATH = "inference_recordings"
+MODEL = "lerobot"
+IP = "localhost"
+PORT = 20000
+CONFIG_PATH = Path(__file__).with_suffix(".json")
+MAX_REL_MOV_JOINTS = np.deg2rad(0.5)
+MAX_REL_MOV_CART = (0.5, np.deg2rad(90))
+
+logging.basicConfig(
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    level=logging.INFO,
+)
+
+
+robot2world = {
+    "right": rcs.common.Pose(
+        translation=np.array([0, 0, 0]), rpy_vector=np.array([0.89360858, -0.17453293, 0.46425758])
+    ),
+    "left": rcs.common.Pose(
+        translation=np.array([0, 0, 0]), rpy_vector=np.array([-0.89360858, -0.17453293, -0.46425758])
+    ),
+}
+
+
+@dataclass
+class InferenceConfig:
+    vlagents_host: str = IP
+    vlagents_port: int = PORT
+    vlagents_model: str = MODEL
+    instruction: str = INSTRUCTION
+    robot_keys: list[str] = field(default_factory=lambda: ["left", "right"])
+    jpeg_encoding: bool = True
+    on_same_machine: bool = False
+    fps: int = FPS
+    record_path: str = RECORD_PATH
+    n_action_steps: int | None = None
+    max_rel_mov_joints: float = MAX_REL_MOV_JOINTS
+    max_rel_mov_cart: tuple[float, float] = MAX_REL_MOV_CART
+
+
+def load_inference_config() -> InferenceConfig:
+    if not CONFIG_PATH.exists():
+        CONFIG_PATH.write_text(json.dumps(asdict(InferenceConfig()), indent=2) + "\n")
+        return InferenceConfig()
+    return InferenceConfig(**json.loads(CONFIG_PATH.read_text()))
+
+
+
+class ModelInference:
+    def __init__(self, env: gym.Env, cfg: InferenceConfig):
+        self.env = env
+        self.gripper_state = 1
+        self._cfg = cfg
+        self._episode_running = False
+        self._command_queue: Queue[str] = Queue()
+        self._shutdown_requested = threading.Event()
+        self.remote_agent = RemoteAgent(
+            cfg.vlagents_host, cfg.vlagents_port, cfg.vlagents_model, cfg.on_same_machine, cfg.jpeg_encoding
+        )
+        self.frame_rate = SimpleFrameRate(self._cfg.fps)
+        self._action_buffer = []
+
+    def submit_command(self, command: str) -> None:
+        self._command_queue.put(command)
+
+    def request_shutdown(self) -> None:
+        self._shutdown_requested.set()
+
+    def _drain_commands(self) -> tuple[bool, bool, bool, bool, bool]:
+        start_requested = False
+        record_requested = False
+        success_requested = False
+        stop_requested = False
+        reload_requested = False
+
+        while True:
+            try:
+                command = self._command_queue.get_nowait()
+            except Empty:
+                break
+
+            if command == "e":
+                start_requested = True
+            elif command == "r":
+                record_requested = True
+            elif command == "s":
+                success_requested = True
+            elif command == "q":
+                stop_requested = True
+            elif command == "o":
+                reload_requested = True
+
+        return start_requested, record_requested, success_requested, stop_requested, reload_requested
+
+    def obs_rcs2agents(self, obs: dict, info: dict | None = None) -> Obs:
+        cameras = {}
+        for frame in obs["frames"]:
+            cameras[frame] = obs["frames"][frame]["rgb"]["data"]
+            cameras[frame] = np.array(Image.fromarray(cameras[frame]).resize((224, 224), Image.Resampling.LANCZOS))
+
+        state = []
+        for robot in self._cfg.robot_keys:
+            # TODO: currently hardcoded for joints
+            state.append(obs[robot]["joints"])
+            state.append(obs[robot]["gripper"])
+
+        return Obs(cameras=cameras, gripper=None, info=info, state=np.concatenate(state))
+
+    def act(self, obs_dict) -> None:
+        done = False
+        if self._cfg.n_action_steps is None:
+            return self.remote_agent.act(obs_dict)
+        if len(self._action_buffer) == 0:
+            action = self.remote_agent.act(obs_dict)
+            selected_action = action.action[:self._cfg.n_action_steps]
+            self._action_buffer = selected_action.tolist()
+            done = action.done
+            if RELATIVETO == RelativeTo.CONFIGURED_ORIGIN:
+                for robot in self.env.get_wrapper_attr("envs"):
+                    self.env.get_wrapper_attr("envs")[robot].get_wrapper_attr("set_origin_to_current")()
+        act = self._action_buffer.pop(0)
+        return Act(action=act, done=done)
+
+    def action_agents2rcs(self, action: Act) -> dict[str, Any]:
+        act = {}
+        for idx, robot in enumerate(self._cfg.robot_keys):
+            # TODO: this is currently hard coded for franka joints
+            act[robot] = {}
+            act[robot]["joints"] = action.action[idx * 8 : idx * 8 + 7]
+            act[robot]["gripper"] = action.action[idx * 8 + 7 : idx * 8 + 8]
+        return act
+
+    def loop(self):
+        obs, _ = self.env.reset()
+        obs_dict = self.obs_rcs2agents(obs)
+        logger.info(
+            "waiting for input: 'e' to start, 'r' to start and record, 's' for success and reset, 'q' to stop and reset, and 'o' to reload config"
+        )
+
+        while not self._shutdown_requested.is_set():
+            start_requested, record_requested, success_requested, stop_requested, reload_requested = (
+                self._drain_commands()
+            )
+
+            if reload_requested:
+                self._cfg = load_inference_config()
+                try:
+                    self.remote_agent.reconnect(
+                        host=self._cfg.vlagents_host,
+                        port=self._cfg.vlagents_port,
+                        model=self._cfg.vlagents_model,
+                        on_same_machine=self._cfg.on_same_machine,
+                        jpeg_encoding=self._cfg.jpeg_encoding,
+                    )
+                    logger.info(
+                        "reloaded config from %s with host=%s port=%s model=%s",
+                        CONFIG_PATH,
+                        self._cfg.vlagents_host,
+                        self._cfg.vlagents_port,
+                        self._cfg.vlagents_model,
+                    )
+                except Exception:
+                    logger.exception("failed to reconnect after reloading %s", CONFIG_PATH)
+                if isinstance(self.env, StorageWrapper):
+                    self.env.base_dir = self._cfg.record_path
+                    self.env.set_instruction(self._cfg.instruction)
+                obs, _ = self.env.reset()
+                obs_dict = self.obs_rcs2agents(obs)
+                self._action_buffer = []
+                self._episode_running = False
+
+            if success_requested:
+                if self._episode_running:
+                    logger.info("marking episode successful and resetting environment")
+                self.env.get_wrapper_attr("success")()
+                obs, _ = self.env.reset()
+                obs_dict = self.obs_rcs2agents(obs)
+                self._action_buffer = []
+                self._episode_running = False
+
+            if stop_requested:
+                if self._episode_running:
+                    logger.info("stopping episode and resetting environment")
+                obs, _ = self.env.reset()
+                obs_dict = self.obs_rcs2agents(obs)
+                self._action_buffer = []
+                self._episode_running = False
+
+            if not self._episode_running:
+                try:
+                    self.remote_agent.ensure_connected()
+                except Exception:
+                    sleep(0.5)
+                    continue
+                if start_requested or record_requested:
+                    if isinstance(self.env, StorageWrapper):
+                        self.env.set_instruction(self._cfg.instruction)
+                        if record_requested:
+                            self.env.start_record()
+                    logger.info("starting episode%s", " with recording" if record_requested else "")
+                    self.remote_agent.reset(copy.deepcopy(obs_dict), instruction=self._cfg.instruction)
+                    self._episode_running = True
+                else:
+                    sleep(0.05)
+                    continue
+
+            action = self.act(copy.deepcopy(obs_dict))
+            if action.done:
+                logger.info("done issued by agent, resetting environment")
+                obs, _ = self.env.reset()
+                obs_dict = self.obs_rcs2agents(obs)
+                self._action_buffer = []
+                self._episode_running = False
+                continue
+            a = self.action_agents2rcs(action)
+            obs, _, _, _, info = self.env.step(a)
+            # print(obs["left"]["joints"], obs["left"]["gripper"], obs["right"]["joints"], obs["right"]["gripper"])
+
+            obs_dict = self.obs_rcs2agents(obs)
+
+            if ROBOT_INSTANCE == RobotPlatform.HARDWARE:
+                self.frame_rate()
+
+
+def command_loop(controller: ModelInference) -> None:
+    prompt = "Command [e=start, r=record, s=success/reset, q=stop/reset, o=reload, x=exit]: "
+    while True:
+        try:
+            command = input(prompt).strip().lower()
+        except EOFError:
+            command = "x"
+        except KeyboardInterrupt:
+            print()
+            command = "x"
+
+        if not command:
+            continue
+        if command == "x":
+            controller.request_shutdown()
+            return
+        if command in {"e", "r", "s", "q", "o"}:
+            controller.submit_command(command)
+            continue
+        logger.info("unknown command %r", command)
+
+
+def get_env(cfg: InferenceConfig) -> gym.Env:
+    if ROBOT_INSTANCE == RobotPlatform.HARDWARE:
+        from rcs_fr3.configs import FrankaDuoEnv
+        from rcs_fr3.creators import HardwareCameraCreatorConfig
+
+        env_creator = FrankaDuoEnv()
+        env_creator.left_ip = ROBOT2IP["left"]
+        env_creator.right_ip = ROBOT2IP["right"]
+        hw_cfg = env_creator.config()
+        camera_cfgs: dict[str, HardwareCameraCreatorConfig] = {}
+        if CAMERA_DICT is not None:
+            camera_cfgs["realsense"] = HardwareCameraCreatorConfig(
+                camera_type_id="realsense",
+                camera_cfgs={
+                    name: BaseCameraConfig(
+                        identifier=identifier,
+                        resolution_width=1280,
+                        resolution_height=720,
+                        frame_rate=30,
+                    )
+                    for name, identifier in CAMERA_DICT.items()
+                },
+            )
+        if ZED_CAMERA_DICT is not None:
+            camera_cfgs["zed"] = HardwareCameraCreatorConfig(
+                camera_type_id="zed",
+                camera_cfgs={
+                    name: BaseCameraConfig(
+                        identifier=identifier,
+                        resolution_width=1280,
+                        resolution_height=720,
+                        frame_rate=30,
+                    )
+                    for name, identifier in ZED_CAMERA_DICT.items()
+                },
+                kwargs={
+                    "enable_depth": False,
+                    "enable_imu": False,
+                },
+            )
+        if DIGIT_DICT is not None:
+            camera_cfgs["digit"] = HardwareCameraCreatorConfig(
+                camera_type_id="digit",
+                camera_cfgs={
+                    name: BaseCameraConfig(
+                        identifier=identifier,
+                        resolution_width=320,
+                        resolution_height=240,
+                        frame_rate=30,
+                    )
+                    for name, identifier in DIGIT_DICT.items()
+                },
+            )
+        hw_cfg.camera_cfgs = camera_cfgs or None
+        hw_cfg.control_mode = CONTROL_MODE
+        hw_cfg.wrapper_cfg.include_depth = INCLUDE_DEPTH
+        hw_cfg.max_relative_movement = cfg.max_rel_mov_joints if CONTROL_MODE == ControlMode.JOINTS else cfg.max_rel_mov_cart
+        hw_cfg.relative_to = RELATIVETO
+        hw_cfg.robot_to_shared_base_frame = robot2world
+        hw_cfg.robot_cfgs["left"].ignore_realtime = True
+        hw_cfg.robot_cfgs["right"].ignore_realtime = True
+        hw_cfg.robot_cfgs["left"].speed_factor = 0.1
+        hw_cfg.robot_cfgs["right"].speed_factor = 0.1
+        hw_cfg.gripper_cfgs["left"].serial_number = ROBOTIQ_SERIAL["left"]
+        hw_cfg.gripper_cfgs["right"].serial_number = ROBOTIQ_SERIAL["right"]
+        env_rel = env_creator.create_env(hw_cfg)
+    else:
+        # FR3
+
+        scene = BinSortEnvConfig()
+        sim_cfg_data = scene.config()
+        sim_cfg_data.sim_cfg = SimConfig(
+            async_control=True, realtime=False, frequency=cfg.fps, max_convergence_steps=500
+        )
+        sim_cfg_data.relative_to = RelativeTo.CONFIGURED_ORIGIN
+        sim_cfg_data.wrapper_cfg.include_depth = INCLUDE_DEPTH
+        sim_cfg_data.control_mode = ControlMode.JOINTS
+        sim_cfg_data.relative_to = RELATIVETO
+        sim_cfg_data.wrapper_cfg.binary_gripper = True
+        sim_cfg_data.max_relative_movement = cfg.max_rel_mov_joints if CONTROL_MODE == ControlMode.JOINTS else cfg.max_rel_mov_cart
+
+
+        # if sim_cfg_data.root_frame_objects is None:
+        #     sim_cfg_data.root_frame_objects = {}
+        # sim_cfg_data.task_cfg = PickTaskConfig(robot_name="right")
+
+        env_rel = scene.create_env(sim_cfg_data)
+
+    return StorageWrapper(
+        env_rel,
+        cfg.record_path,
+        cfg.instruction,
+        batch_size=32,
+        max_rows_per_group=2,
+        max_rows_per_file=10,
+        allow_wrapper_instruction=False
+    )
+
+
+def main():
+    cfg = load_inference_config()
+    env_rel = get_env(cfg)
+    env_rel.reset()
+
+    # Path(VIDEO_PATH).mkdir(parents=True, exist_ok=True)
+    # timestamp = str(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
+
+    # camera_set = env_rel.get_wrapper_attr("camera_set")
+    # camera_set.record_video(Path(VIDEO_PATH), timestamp)
+
+    # env = RHCWrapper(env, exec_horizon=1)
+
+    controller = ModelInference(env_rel, cfg)
+    with env_rel:
+        worker = threading.Thread(target=controller.loop, name="model-inference", daemon=True)
+        worker.start()
+        command_loop(controller)
+        worker.join()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/inference/requirements.txt b/examples/inference/requirements.txt
new file mode 100644
index 00000000..cb843744
--- /dev/null
+++ b/examples/inference/requirements.txt
@@ -0,0 +1 @@
+vlagents @ git+https://github.com/RobotControlStack/vlagents.git@lerobot
\ No newline at end of file
diff --git a/examples/teleop/franka.py b/examples/teleop/franka.py
index 2f0d6ede..f3951c98 100644
--- a/examples/teleop/franka.py
+++ b/examples/teleop/franka.py
@@ -26,22 +26,26 @@
     "right": "0",
 }
 
-
-ROBOT_INSTANCE = RobotPlatform.SIMULATION
-# ROBOT_INSTANCE = RobotPlatform.HARDWARE
+# use `udevadm info -a -n /dev/ttyUSB0 | grep serial`
+# to find out the serial numbers
+ROBOTIQ_SERIAL = {
+    "left": "DAAQMPDC",
+    "right": "DAAQMJHX",
+}
 
 RECORD_FPS = 30
 # set camera dict to none disable cameras
-# CAMERA_DICT = {
-#     "left_wrist": "230422272017",
-#     "right_wrist": "230422271040",
-#     "side": "243522070385",
-#     "bird_eye": "243522070364",
-# }
-CAMERA_DICT = None
+CAMERA_DICT = {
+    "right_wrist": "230422272017",
+    "left_wrist": "230422271040",
+    # "side": "243522070385",
+    # "bird_eye": "243522070364",
+}
+# CAMERA_DICT = None
 ZED_CAMERA_DICT = {
-    "zed": "19928076",
+    "head": "19928076",
 }
+# ZED_CAMERA_DICT = None
 MQ3_ADDR = "10.42.0.1"
 INCLUDE_DEPTH = False
 
@@ -117,8 +121,9 @@ def get_env():
                     for name, identifier in ZED_CAMERA_DICT.items()
                 },
                 kwargs={
-                    "enable_depth": False,
+                    "enable_depth": INCLUDE_DEPTH,
                     "enable_imu": False,
+                    "include_right": True,
                 },
             )
         if DIGIT_DICT is not None:
@@ -137,11 +142,19 @@ def get_env():
         hw_cfg.camera_cfgs = camera_cfgs or None
         hw_cfg.control_mode = config.operator_class.control_mode[0]
         hw_cfg.wrapper_cfg.include_depth = INCLUDE_DEPTH
+        hw_cfg.wrapper_cfg.binary_gripper = False
+        # hw_cfg.wrapper_cfg.binary_gripper = True
         hw_cfg.max_relative_movement = (
             0.5 if config.operator_class.control_mode[0] == ControlMode.JOINTS else (0.5, np.deg2rad(90))
         )
         hw_cfg.relative_to = config.operator_class.control_mode[1]
         hw_cfg.robot_to_shared_base_frame = robot2world
+        hw_cfg.robot_cfgs["left"].ignore_realtime = True
+        hw_cfg.robot_cfgs["right"].ignore_realtime = True
+        hw_cfg.robot_cfgs["left"].speed_factor = 0.3
+        hw_cfg.robot_cfgs["right"].speed_factor = 0.3
+        hw_cfg.gripper_cfgs["left"].serial_number = ROBOTIQ_SERIAL["left"]
+        hw_cfg.gripper_cfgs["right"].serial_number = ROBOTIQ_SERIAL["right"]
         env_rel = env_creator.create_env(hw_cfg)
         operator = GelloOperator(config) if isinstance(config, GelloConfig) else QuestOperator(config)
     else:
diff --git a/extensions/rcs_fr3/src/hw/Franka.cpp b/extensions/rcs_fr3/src/hw/Franka.cpp
index 1fbb140d..4d0eb285 100644
--- a/extensions/rcs_fr3/src/hw/Franka.cpp
+++ b/extensions/rcs_fr3/src/hw/Franka.cpp
@@ -220,6 +220,11 @@ void Franka::check_for_background_errors() {
   }
 }
 
+void Franka::clear_background_error() {
+  std::lock_guard<std::mutex> lock(this->exception_mutex);
+  this->background_exception = nullptr;
+}
+
 void Franka::osc_set_cartesian_position(
     const common::Pose& desired_pose_EE_in_base_frame) {
   this->check_for_background_errors();
@@ -335,8 +340,7 @@ void Franka::osc() {
 
       // torques handler
       if (this->running_controller.load() == Controller::none) {
-        franka::Torques zero_torques{{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}};
-        return franka::MotionFinished(zero_torques);
+        return franka::MotionFinished(franka::Torques(robot_state.tau_J_d));
       }
       // TO BE replaced
       // if (!this->control_thread_running && dq.maxCoeff() < 0.0001) {
@@ -541,9 +545,7 @@ void Franka::joint_controller() {
 
       // torques handler
       if (this->running_controller.load() == Controller::none) {
-        // TODO: test if this also works when the robot is moving
-        franka::Torques zero_torques{{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}};
-        return franka::MotionFinished(zero_torques);
+        return franka::MotionFinished(franka::Torques(robot_state.tau_J_d));
       }
 
       common::Vector7d desired_q;
@@ -667,6 +669,7 @@ void Franka::automatic_error_recovery() {
 void Franka::reset() {
   this->stop_control_thread();
   this->automatic_error_recovery();
+  this->clear_background_error();
 }
 
 void Franka::wait_milliseconds(int milliseconds) {
diff --git a/extensions/rcs_fr3/src/hw/Franka.h b/extensions/rcs_fr3/src/hw/Franka.h
index 6054b924..9601d343 100644
--- a/extensions/rcs_fr3/src/hw/Franka.h
+++ b/extensions/rcs_fr3/src/hw/Franka.h
@@ -93,6 +93,7 @@ class Franka : public common::Robot {
   void joint_controller();
   void zero_torque_controller();
   void check_for_background_errors();
+  void clear_background_error();
 
  public:
   Franka(const FrankaConfig& cfg,
diff --git a/extensions/rcs_fr3/src/rcs_fr3/configs.py b/extensions/rcs_fr3/src/rcs_fr3/configs.py
index 3eaa8b0a..baf47eaf 100644
--- a/extensions/rcs_fr3/src/rcs_fr3/configs.py
+++ b/extensions/rcs_fr3/src/rcs_fr3/configs.py
@@ -152,6 +152,7 @@ def config(self) -> FR3MultiHardwareEnvCreatorConfig:
 
 
 class FrankaDuoEnv(DefaultFR3MultiHardwareEnv):
+    # use `udevadm info -a -n /dev/ttyUSB0 | grep serial` to find out the serial numbers
     left_gripper_serial_number = "DAAQMJHX"
     right_gripper_serial_number = "DAAQMPDC"
 
diff --git a/extensions/rcs_fr3/src/rcs_fr3/creators.py b/extensions/rcs_fr3/src/rcs_fr3/creators.py
index 2225a577..01386503 100644
--- a/extensions/rcs_fr3/src/rcs_fr3/creators.py
+++ b/extensions/rcs_fr3/src/rcs_fr3/creators.py
@@ -15,6 +15,7 @@
     GripperWrapper,
     HandWrapper,
     HardwareEnv,
+    LimitedAbsoluteAction,
     MultiRobotWrapper,
     RelativeActionSpace,
     RelativeTo,
@@ -203,10 +204,12 @@ def create_env(self, cfg: FR3HardwareEnvCreatorConfig) -> gym.Env:
             camera_set.start()
             camera_set.wait_for_frames()
             logger.info("CameraSet started")
-            env = CameraSetWrapper(env, camera_set)
+            env = CameraSetWrapper(env, camera_set, cfg.wrapper_cfg.include_depth)
 
         if cfg.relative_to != RelativeTo.NONE:
             env = RelativeActionSpace(env, max_mov=cfg.max_relative_movement, relative_to=cfg.relative_to)
+        else:
+            env = LimitedAbsoluteAction(env, max_mov=cfg.max_relative_movement)
         return CoverWrapper(env)
 
     def config(self) -> FR3HardwareEnvCreatorConfig:
@@ -236,7 +239,7 @@ def create_env(self, cfg: FR3MultiHardwareEnvCreatorConfig) -> gym.Env:
             camera_set.start()
             camera_set.wait_for_frames()
             logger.info("CameraSet started")
-            env = CameraSetWrapper(env, camera_set)
+            env = CameraSetWrapper(env, camera_set, cfg.wrapper_cfg.include_depth)
         return CoverWrapper(env)
 
     def config(self) -> FR3MultiHardwareEnvCreatorConfig:
diff --git a/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py b/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py
index a5ed5736..db4be1fa 100644
--- a/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py
+++ b/extensions/rcs_robotiq2f85/src/rcs_robotiq2f85/hw.py
@@ -43,10 +43,12 @@ def __init__(self, cfg: RobotiQ2F85GripperConfig):
         super().__init__()
         self._cfg: RobotiQ2F85GripperConfig = cfg
         self.gripper = Robotiq2F85Driver(serial_number=cfg.serial_number)
+        self._last_normalized_width = 1.0
+        self.gripper.reset()
 
     def get_normalized_width(self) -> float:
-        # value between 0 and 1 (0 is closed)
-        return self.gripper.opening / 85
+        # Return the last commanded width to avoid a synchronous Modbus read on every env step.
+        return self._last_normalized_width
 
     def grasp(self) -> None:
         """
@@ -61,7 +63,7 @@ def open(self) -> None:
         self.set_normalized_width(1.0)
 
     def reset(self) -> None:
-        self.gripper.reset()
+        self.open()
 
     def set_normalized_width(self, width: float, force: float = 0) -> None:
         """
@@ -70,6 +72,7 @@ def set_normalized_width(self, width: float, force: float = 0) -> None:
         if not (0 <= width <= 1):
             msg = f"Width must be between 0 and 1, got {width}."
             raise ValueError(msg)
+        self._last_normalized_width = width
         abs_width = width * 85
         self.gripper.go_to(
             opening=float(abs_width),
diff --git a/extensions/rcs_zed/src/rcs_zed/__main__.py b/extensions/rcs_zed/src/rcs_zed/__main__.py
index cfe51fa5..8e2558f4 100644
--- a/extensions/rcs_zed/src/rcs_zed/__main__.py
+++ b/extensions/rcs_zed/src/rcs_zed/__main__.py
@@ -1,4 +1,6 @@
 import logging
+from pathlib import Path
+from time import monotonic
 
 import cv2
 import typer
@@ -14,6 +16,36 @@ def _display_frame(window_name: str, frame):
     cv2.imshow(window_name, frame.camera.color.data[:, :, ::-1])
 
 
+def _resolve_serial(serial: str | None) -> str:
+    if serial is not None:
+        return serial
+
+    try:
+        devices = ZEDCameraSet.enumerate_connected_devices()
+    except RuntimeError as exc:
+        msg = str(exc)
+        raise typer.BadParameter(msg) from exc
+    if len(devices) == 0:
+        msg = "No ZED devices connected."
+        raise typer.BadParameter(msg)
+    return next(iter(devices))
+
+
+def _create_rgb_camera(serial: str, width: int, height: int, fps: int) -> ZEDCameraSet:
+    return ZEDCameraSet(
+        cameras={
+            "viewer": common.BaseCameraConfig(
+                identifier=serial,
+                resolution_width=width,
+                resolution_height=height,
+                frame_rate=fps,
+            )
+        },
+        enable_depth=False,
+        enable_imu=False,
+    )
+
+
 @zed_app.command()
 def serials():
     """Reads out the serial numbers and models of connected ZED devices via the SDK."""
@@ -39,29 +71,8 @@ def rgb_view(
     window_name: str = typer.Option("ZED RGB", help="OpenCV window title."),
 ):
     """Open a live RGB window using the RCS ZED camera interface."""
-    if serial is None:
-        try:
-            devices = ZEDCameraSet.enumerate_connected_devices()
-        except RuntimeError as exc:
-            msg = str(exc)
-            raise typer.BadParameter(msg) from exc
-        if len(devices) == 0:
-            msg = "No ZED devices connected."
-            raise typer.BadParameter(msg)
-        serial = next(iter(devices))
-
-    camera = ZEDCameraSet(
-        cameras={
-            "viewer": common.BaseCameraConfig(
-                identifier=serial,
-                resolution_width=width,
-                resolution_height=height,
-                frame_rate=fps,
-            )
-        },
-        enable_depth=False,
-        enable_imu=False,
-    )
+    serial = _resolve_serial(serial)
+    camera = _create_rgb_camera(serial, width, height, fps)
 
     try:
         camera.open()
@@ -81,6 +92,53 @@ def rgb_view(
         cv2.destroyAllWindows()
 
 
+@zed_app.command("rgb-snapshot")
+def rgb_snapshot(
+    serial: str | None = typer.Argument(None, help="Optional ZED serial number. Uses the first device if omitted."),
+    output: Path = typer.Option(Path("zed_latest.png"), "--output", "-o", help="PNG file to write."),
+    width: int = typer.Option(1280, help="Requested capture width."),
+    height: int = typer.Option(720, help="Requested capture height."),
+    fps: int = typer.Option(30, help="Requested capture frame rate."),
+    duration: float = typer.Option(1.0, "--duration", "-d", help="Seconds to read frames before saving."),
+):
+    """Open a ZED camera briefly and save the latest RGB frame as a PNG."""
+    if duration <= 0:
+        msg = "Duration must be greater than zero."
+        raise typer.BadParameter(msg)
+    if output.suffix.lower() != ".png":
+        msg = "Output path must end in .png."
+        raise typer.BadParameter(msg)
+
+    serial = _resolve_serial(serial)
+    camera = _create_rgb_camera(serial, width, height, fps)
+
+    try:
+        camera.open()
+    except Exception as exc:
+        msg = f"Could not start ZED camera {serial}: {exc}"
+        raise typer.BadParameter(msg) from exc
+
+    latest_frame = None
+    frames_read = 0
+    deadline = monotonic() + duration
+    try:
+        while latest_frame is None or monotonic() < deadline:
+            latest_frame = camera.poll_frame("viewer")
+            frames_read += 1
+    finally:
+        camera.close()
+
+    assert latest_frame is not None
+    output.parent.mkdir(parents=True, exist_ok=True)
+    image_bgr = latest_frame.camera.color.data[:, :, ::-1]
+    print(latest_frame.camera.color.intrinsics)
+    if not cv2.imwrite(str(output), image_bgr):
+        msg = f"Could not write PNG to {output}."
+        raise typer.BadParameter(msg)
+
+    typer.echo(f"Saved {output} from ZED {serial} after reading {frames_read} frame(s).")
+
+
 def main():
     zed_app()
 
diff --git a/extensions/rcs_zed/src/rcs_zed/camera.py b/extensions/rcs_zed/src/rcs_zed/camera.py
index e9fd61dd..881a1368 100644
--- a/extensions/rcs_zed/src/rcs_zed/camera.py
+++ b/extensions/rcs_zed/src/rcs_zed/camera.py
@@ -30,6 +30,8 @@ class ZEDFrameBundle:
     color: np.ndarray
     timestamp: float
     color_intrinsics: np.ndarray[tuple[typing.Literal[3], typing.Literal[4]], np.dtype[np.float64]]
+    right_color: np.ndarray | None = None
+    right_color_intrinsics: np.ndarray[tuple[typing.Literal[3], typing.Literal[4]], np.dtype[np.float64]] | None = None
     depth: np.ndarray | None = None
     accel: np.ndarray | None = None
     gyro: np.ndarray | None = None
@@ -47,12 +49,20 @@ def _intrinsics_matrix(fx: float, fy: float, cx: float, cy: float) -> np.ndarray
 
 
 class PyZEDCameraHandle:
-    def __init__(self, camera: typing.Any, device_info: ZEDDeviceInfo, color_intrinsics: np.ndarray):
+    def __init__(
+        self,
+        camera: typing.Any,
+        device_info: ZEDDeviceInfo,
+        color_intrinsics: np.ndarray,
+        right_color_intrinsics: np.ndarray | None = None,
+    ):
         self.camera = camera
         self.device_info = device_info
         self.color_intrinsics = color_intrinsics
+        self.right_color_intrinsics = right_color_intrinsics
         self.runtime_parameters = sl.RuntimeParameters()  # type: ignore[union-attr]
         self.image_mat = sl.Mat()  # type: ignore[union-attr]
+        self.right_image_mat = sl.Mat()  # type: ignore[union-attr]
         self.depth_mat = sl.Mat()  # type: ignore[union-attr]
         self.sensors_data = sl.SensorsData()  # type: ignore[union-attr]
 
@@ -69,7 +79,7 @@ def _timestamp_seconds(self) -> float:
             pass
         return time()
 
-    def grab_frame(self) -> ZEDFrameBundle:
+    def grab_frame(self, include_right: bool = False) -> ZEDFrameBundle:
         err = self.camera.grab(self.runtime_parameters)
         if err != sl.ERROR_CODE.SUCCESS:  # type: ignore[union-attr]
             msg = f"Failed to grab ZED frame: {err}"
@@ -82,6 +92,15 @@ def grab_frame(self) -> ZEDFrameBundle:
             raise RuntimeError(msg)
         color_rgb = color_raw[:, :, :3][:, :, ::-1] if color_raw.shape[2] == 4 else color_raw[:, :, ::-1]
 
+        right_color = None
+        if include_right:
+            self.camera.retrieve_image(self.right_image_mat, sl.VIEW.RIGHT)  # type: ignore[union-attr]
+            right_raw = np.array(self.right_image_mat.get_data(), copy=True)
+            if right_raw.ndim != 3:
+                msg = f"Unexpected ZED right image shape {right_raw.shape}"
+                raise RuntimeError(msg)
+            right_color = right_raw[:, :, :3][:, :, ::-1] if right_raw.shape[2] == 4 else right_raw[:, :, ::-1]
+
         depth = None
         if self.device_info.has_depth:
             self.camera.retrieve_measure(self.depth_mat, sl.MEASURE.DEPTH)  # type: ignore[union-attr]
@@ -105,6 +124,8 @@ def grab_frame(self) -> ZEDFrameBundle:
 
         return ZEDFrameBundle(
             color=color_rgb,
+            right_color=right_color,
+            right_color_intrinsics=self.right_color_intrinsics if right_color is not None else None,
             depth=depth,
             accel=accel,
             gyro=gyro,
@@ -185,6 +206,7 @@ def open_camera(
         *,
         enable_depth: bool,
         enable_imu: bool,
+        include_right: bool,
     ) -> PyZEDCameraHandle:
         cls._require_sdk()
         assert sl is not None
@@ -206,6 +228,7 @@ def open_camera(
         information = camera.get_camera_information()
         calibration = information.camera_configuration.calibration_parameters
         left_cam = calibration.left_cam
+        right_cam = calibration.right_cam
         info = ZEDDeviceInfo(
             serial=str(config.identifier),
             model=cls._model_to_string(information.camera_model),
@@ -213,7 +236,15 @@ def open_camera(
             has_imu=enable_imu and cls._device_has_imu(information),
         )
         intrinsics = _intrinsics_matrix(left_cam.fx, left_cam.fy, left_cam.cx, left_cam.cy)
-        return PyZEDCameraHandle(camera=camera, device_info=info, color_intrinsics=intrinsics)
+        right_intrinsics = None
+        if include_right:
+            right_intrinsics = _intrinsics_matrix(right_cam.fx, right_cam.fy, right_cam.cx, right_cam.cy)
+        return PyZEDCameraHandle(
+            camera=camera,
+            device_info=info,
+            color_intrinsics=intrinsics,
+            right_color_intrinsics=right_intrinsics,
+        )
 
     def __init__(
         self,
@@ -221,6 +252,7 @@ def __init__(
         calibration_strategy: dict[str, CalibrationStrategy] | None = None,
         enable_depth: bool = True,
         enable_imu: bool = True,
+        include_right: bool = False,
     ) -> None:
         self.cameras = cameras
         if calibration_strategy is None:
@@ -228,12 +260,14 @@ def __init__(
         self.calibration_strategy = calibration_strategy
         self.enable_depth = enable_depth
         self.enable_imu = enable_imu
+        self.include_right = include_right
         self._logger = logging.getLogger(__name__)
-        self._camera_names = list(self.cameras.keys())
+        self._camera_names = self._logical_camera_names()
         self._available_devices: dict[str, ZEDDeviceInfo] = {}
         self._enabled_devices: dict[str, PyZEDCameraHandle] = {}
         self._frame_buffer_lock: dict[str, threading.Lock] = {}
         self._frame_buffer: dict[str, list[Frame]] = {}
+        self._pending_right_bundles: dict[str, ZEDFrameBundle] = {}
 
         assert (
             len({camera.resolution_width for camera in self.cameras.values()}) == 1
@@ -246,7 +280,63 @@ def camera_names(self) -> list[str]:
         return self._camera_names
 
     def config(self, camera_name) -> common.BaseCameraConfig:
-        return self.cameras[camera_name]
+        return self.cameras[self._physical_camera_name(camera_name)]
+
+    @staticmethod
+    def _right_camera_name(camera_name: str) -> str:
+        return f"{camera_name}_right"
+
+    def _physical_camera_name(self, camera_name: str) -> str:
+        if camera_name in self.cameras:
+            return camera_name
+        if self.include_right and camera_name.endswith("_right"):
+            physical_name = camera_name.removesuffix("_right")
+            if physical_name in self.cameras:
+                return physical_name
+        msg = f"Camera {camera_name} not found in the enabled devices"
+        raise AssertionError(msg)
+
+    def _is_right_camera_name(self, camera_name: str) -> bool:
+        return camera_name not in self.cameras and self._physical_camera_name(camera_name) != camera_name
+
+    def _logical_camera_names(self) -> list[str]:
+        names = list(self.cameras.keys())
+        if self.include_right:
+            names.extend(self._right_camera_name(camera_name) for camera_name in self.cameras)
+        return names
+
+    def _frame_from_bundle(self, camera_name: str, bundle: ZEDFrameBundle) -> Frame:
+        physical_name = self._physical_camera_name(camera_name)
+        extrinsics = self.calibration_strategy[physical_name].get_extrinsics()
+        is_right = self._is_right_camera_name(camera_name)
+        color_data = bundle.right_color if is_right else bundle.color
+        color_intrinsics = bundle.right_color_intrinsics if is_right else bundle.color_intrinsics
+        if color_data is None or color_intrinsics is None:
+            msg = f"Missing {'right' if is_right else 'left'} image data for {camera_name}"
+            raise RuntimeError(msg)
+
+        color = DataFrame(
+            data=color_data,
+            timestamp=bundle.timestamp,
+            intrinsics=color_intrinsics,
+            extrinsics=extrinsics,
+        )
+        depth = None
+        if not is_right and bundle.depth is not None:
+            depth = DataFrame(
+                data=bundle.depth,
+                timestamp=bundle.timestamp,
+                intrinsics=bundle.color_intrinsics,
+                extrinsics=extrinsics,
+            )
+
+        accel = DataFrame(data=bundle.accel, timestamp=bundle.timestamp) if bundle.accel is not None else None
+        gyro = DataFrame(data=bundle.gyro, timestamp=bundle.timestamp) if bundle.gyro is not None else None
+        return Frame(
+            camera=CameraFrame(color=color, depth=depth),
+            imu=IMUFrame(accel=accel, gyro=gyro) if (accel is not None or gyro is not None) else None,
+            avg_timestamp=bundle.timestamp,
+        )
 
     def update_available_devices(self):
         self._available_devices = self.enumerate_connected_devices()
@@ -256,6 +346,7 @@ def open(self):
         self._enabled_devices = {}
         self._frame_buffer = {}
         self._frame_buffer_lock = {}
+        self._pending_right_bundles = {}
 
         for camera_name, camera_cfg in self.cameras.items():
             if camera_cfg.identifier not in self._available_devices:
@@ -267,6 +358,7 @@ def open(self):
                 camera_cfg,
                 enable_depth=self.enable_depth and device_info.has_depth,
                 enable_imu=self.enable_imu and device_info.has_imu,
+                include_right=self.include_right,
             )
             self._enabled_devices[camera_name] = opened
             self._frame_buffer[camera_name] = []
@@ -274,44 +366,34 @@ def open(self):
 
     def poll_frame(self, camera_name: str) -> Frame:
         assert camera_name in self.camera_names, f"Camera {camera_name} not found in the enabled devices"
-        device = self._enabled_devices[camera_name]
-        bundle = device.grab_frame()
-        extrinsics = self.calibration_strategy[camera_name].get_extrinsics()
-
-        color = DataFrame(
-            data=bundle.color,
-            timestamp=bundle.timestamp,
-            intrinsics=bundle.color_intrinsics,
-            extrinsics=extrinsics,
-        )
-        depth = None
-        if bundle.depth is not None:
-            depth = DataFrame(
-                data=bundle.depth,
-                timestamp=bundle.timestamp,
-                intrinsics=bundle.color_intrinsics,
-                extrinsics=extrinsics,
-            )
-
-        accel = DataFrame(data=bundle.accel, timestamp=bundle.timestamp) if bundle.accel is not None else None
-        gyro = DataFrame(data=bundle.gyro, timestamp=bundle.timestamp) if bundle.gyro is not None else None
-
-        frame = Frame(
-            camera=CameraFrame(color=color, depth=depth),
-            imu=IMUFrame(accel=accel, gyro=gyro) if (accel is not None or gyro is not None) else None,
-            avg_timestamp=bundle.timestamp,
-        )
-
-        with self._frame_buffer_lock[camera_name]:
-            if len(self._frame_buffer[camera_name]) >= self.CALIBRATION_FRAME_SIZE:
-                self._frame_buffer[camera_name].pop(0)
-            self._frame_buffer[camera_name].append(copy.deepcopy(frame))
+        physical_name = self._physical_camera_name(camera_name)
+        device = self._enabled_devices[physical_name]
+
+        if self._is_right_camera_name(camera_name):
+            bundle = self._pending_right_bundles.pop(physical_name, None)
+            if bundle is None:
+                bundle = device.grab_frame(include_right=True)
+        else:
+            bundle = device.grab_frame(include_right=self.include_right)
+            if self.include_right and bundle.right_color is not None:
+                self._pending_right_bundles[physical_name] = bundle
+            else:
+                self._pending_right_bundles.pop(physical_name, None)
+
+        frame = self._frame_from_bundle(camera_name, bundle)
+
+        if not self._is_right_camera_name(camera_name):
+            with self._frame_buffer_lock[physical_name]:
+                if len(self._frame_buffer[physical_name]) >= self.CALIBRATION_FRAME_SIZE:
+                    self._frame_buffer[physical_name].pop(0)
+                self._frame_buffer[physical_name].append(copy.deepcopy(frame))
         return frame
 
     def close(self):
         for device in self._enabled_devices.values():
             device.close()
         self._enabled_devices = {}
+        self._pending_right_bundles = {}
 
     def calibrate(self) -> bool:
         for camera_name in self.cameras:
diff --git a/extensions/rcs_zed/tests/test_zed_extension.py b/extensions/rcs_zed/tests/test_zed_extension.py
index 32e8294f..cb17acec 100644
--- a/extensions/rcs_zed/tests/test_zed_extension.py
+++ b/extensions/rcs_zed/tests/test_zed_extension.py
@@ -20,8 +20,10 @@ def __init__(self, device_info: ZEDDeviceInfo, color_intrinsics: np.ndarray, fra
         self.color_intrinsics = color_intrinsics
         self._frame_bundle = frame_bundle
         self.closed = False
+        self.grab_calls: list[bool] = []
 
-    def grab_frame(self) -> ZEDFrameBundle:
+    def grab_frame(self, include_right: bool = False) -> ZEDFrameBundle:
+        self.grab_calls.append(include_right)
         return self._frame_bundle
 
     def close(self):
@@ -31,7 +33,7 @@ def close(self):
 class PatchZedState(TypedDict):
     devices: dict[str, ZEDDeviceInfo]
     opened: dict[str, FakeOpenedZEDCamera]
-    open_calls: list[tuple[str, bool, bool]]
+    open_calls: list[tuple[str, bool, bool, bool]]
 
 
 @pytest.fixture()
@@ -41,8 +43,8 @@ def patch_zed(monkeypatch) -> PatchZedState:
     def fake_enumerate(_cls):
         return state["devices"]
 
-    def fake_open(_cls, config: common.BaseCameraConfig, *, enable_depth: bool, enable_imu: bool):
-        state["open_calls"].append((config.identifier, enable_depth, enable_imu))
+    def fake_open(_cls, config: common.BaseCameraConfig, *, enable_depth: bool, enable_imu: bool, include_right: bool):
+        state["open_calls"].append((config.identifier, enable_depth, enable_imu, include_right))
         return state["opened"][config.identifier]
 
     monkeypatch.setattr(ZEDCameraSet, "enumerate_connected_devices", classmethod(fake_enumerate))
@@ -78,7 +80,7 @@ def test_zed_frame_mapping_depth_scaling_and_imu_downgrade(patch_zed):
     frame = camera_set.poll_frame("wrist")
     assert frame.camera.color.intrinsics is not None
 
-    assert patch_zed["open_calls"] == [("123", True, False)]
+    assert patch_zed["open_calls"] == [("123", True, False, False)]
     assert np.array_equal(frame.camera.color.data, color)
     assert np.array_equal(frame.camera.depth.data, depth)  # type: ignore[union-attr]
     assert np.array_equal(frame.camera.color.intrinsics, intrinsics)
@@ -113,7 +115,51 @@ def test_zed_enumeration_and_multi_camera_open(patch_zed):
         },
     )
     camera_set.open()
-    assert patch_zed["open_calls"] == [("111", True, False), ("222", True, True)]
+    assert patch_zed["open_calls"] == [("111", True, False, False), ("222", True, True, False)]
     camera_set.close()
     assert opened["111"].closed
     assert opened["222"].closed
+
+
+def test_zed_include_right_adds_logical_right_camera_without_double_grab(patch_zed):
+    left_intrinsics = np.array([[100.0, 0.0, 10.0, 0.0], [0.0, 110.0, 20.0, 0.0], [0.0, 0.0, 1.0, 0.0]])
+    right_intrinsics = np.array([[120.0, 0.0, 12.0, 0.0], [0.0, 130.0, 22.0, 0.0], [0.0, 0.0, 1.0, 0.0]])
+    left_color = np.arange(27, dtype=np.uint8).reshape(3, 3, 3)
+    right_color = np.arange(27, 54, dtype=np.uint8).reshape(3, 3, 3)
+    frame_bundle = ZEDFrameBundle(
+        color=left_color,
+        right_color=right_color,
+        timestamp=12.5,
+        color_intrinsics=left_intrinsics,
+        right_color_intrinsics=right_intrinsics,
+    )
+    device_info = ZEDDeviceInfo(serial="123", model="ZED Mini", has_depth=True, has_imu=False)
+    opened = FakeOpenedZEDCamera(device_info=device_info, color_intrinsics=left_intrinsics, frame_bundle=frame_bundle)
+    patch_zed["devices"] = {"123": device_info}
+    patch_zed["opened"] = {"123": opened}
+
+    camera_set = ZEDCameraSet(
+        cameras={
+            "wrist": common.BaseCameraConfig(
+                identifier="123", resolution_width=1280, resolution_height=720, frame_rate=30
+            )
+        },
+        include_right=True,
+    )
+
+    assert camera_set.camera_names == ["wrist", "wrist_right"]
+    assert camera_set.config("wrist_right").identifier == "123"
+
+    camera_set.open()
+    assert patch_zed["open_calls"] == [("123", True, False, True)]
+    left_frame = camera_set.poll_frame("wrist")
+    right_frame = camera_set.poll_frame("wrist_right")
+
+    assert opened.grab_calls == [True]
+    assert np.array_equal(left_frame.camera.color.data, left_color)
+    assert np.array_equal(right_frame.camera.color.data, right_color)
+    assert np.array_equal(left_frame.camera.color.intrinsics, left_intrinsics)
+    assert np.array_equal(right_frame.camera.color.intrinsics, right_intrinsics)
+    assert left_frame.avg_timestamp == right_frame.avg_timestamp == 12.5
+    assert left_frame.camera.depth is None
+    assert right_frame.camera.depth is None
diff --git a/python/rcs/__main__.py b/python/rcs/__main__.py
index 32dd98a4..4f97eae0 100644
--- a/python/rcs/__main__.py
+++ b/python/rcs/__main__.py
@@ -21,6 +21,7 @@
     run_conversion,
 )
 from rcs.sim.replayer import replay as replay_dataset
+from rcs.utils import export_episode_videos
 
 app = typer.Typer()
 
@@ -195,5 +196,27 @@ def lerobot_convert(
     )
 
 
+@app.command("episode-videos")
+def episode_videos(
+    dataset: Annotated[
+        Path,
+        typer.Argument(
+            exists=True,
+            help="Parquet dataset file or directory with parquet parts.",
+        ),
+    ],
+    output: Annotated[
+        Path,
+        typer.Argument(
+            exists=False,
+            help="Output directory for episode mp4 files.",
+        ),
+    ],
+    fps: Annotated[int, typer.Option(help="Video frames per second.")] = DEFAULT_FPS,
+    n: Annotated[int, typer.Option(help="Maximum number of episodes to export. -1 means all.")] = -1,
+):
+    export_episode_videos(dataset=dataset, output=output, fps=fps, n=n)
+
+
 if __name__ == "__main__":
     app()
diff --git a/python/rcs/camera/hw.py b/python/rcs/camera/hw.py
index 460f26f7..da8dc8d9 100644
--- a/python/rcs/camera/hw.py
+++ b/python/rcs/camera/hw.py
@@ -179,15 +179,19 @@ def get_timestamp_frames(self, ts: datetime) -> FrameSet | None:
     def stop(self):
         """Stops the polling of the cameras."""
         self.running = False
-        assert self._thread is not None
-        self._thread.join()
-        self._thread = None
+        if self._thread is not None:
+            self._thread.join()
+            self._thread = None
 
     def close(self):
-        if self.running and self._thread is not None:
+        if self.running:
+            self.running = False
+            for camera in self.cameras:
+                camera.close()
             self.stop()
-        for camera in self.cameras:
-            camera.close()
+        else:
+            for camera in self.cameras:
+                camera.close()
         self.stop_video()
 
     def start(self, warm_up: bool = True):
diff --git a/python/rcs/envs/base.py b/python/rcs/envs/base.py
index a13592d2..40d1dbeb 100644
--- a/python/rcs/envs/base.py
+++ b/python/rcs/envs/base.py
@@ -593,6 +593,94 @@ class RelativeTo(Enum):
     CONFIGURED_ORIGIN = auto()
     NONE = auto()
 
+class LimitedAbsoluteAction(ActObsInfoWrapper):
+    ABSOLUTE_ACTION_KEY = "limited_absolute_action"
+
+    def __init__(
+        self,
+        env,
+        max_mov: float | tuple[float, float] | None = None,
+    ):
+        super().__init__(env)
+        self.joints_key = get_space_keys(LimitedJointsRelDictType)[0]
+        self.trpy_key = get_space_keys(LimitedTRPYRelDictType)[0]
+        self.tquat_key = get_space_keys(LimitedTQuatRelDictType)[0]
+        self._absolute_action: common.Pose | VecType | None = None
+        self.max_mov = max_mov
+
+    def _get_current(self) -> np.ndarray:
+        if self.get_wrapper_attr("get_control_mode")() == ControlMode.JOINTS:
+            return self._robot.get_joint_position()
+        else:
+            return self._robot.get_cartesian_position()
+
+    def action(self, action: dict[str, Any]) -> dict[str, Any]:
+        if self.max_mov is None:
+            return action
+        if self.get_wrapper_attr("get_control_mode")() == ControlMode.JOINTS:
+            assert isinstance(self.max_mov, float)
+            low, high = self._robot.get_config().joint_limits
+            curr = self._get_current()
+            delta = action[self.joints_key] - curr
+            limited_delta = np.clip(delta, -self.max_mov, self.max_mov)
+            clipped_joints = np.clip(curr + limited_delta, low, high)
+            action.update(JointsDictType(joints=clipped_joints))
+            self._absolute_action = clipped_joints
+
+        elif self.get_wrapper_attr("get_control_mode")() == ControlMode.CARTESIAN_TRPY:
+            assert isinstance(self.max_mov, tuple)
+            curr = cast(common.Pose, self._get_current())
+            pose_space = cast(gym.spaces.Box, get_space(TRPYDictType).spaces[self.trpy_key])
+            target_pose = common.Pose(
+                translation=action[self.trpy_key][:3],
+                rpy_vector=action[self.trpy_key][3:],
+            )
+            pose_delta = common.Pose(
+                translation=target_pose.translation() - curr.translation(),  # type: ignore
+                rpy_vector=(target_pose * curr.inverse()).rotation_rpy().as_vector(),
+            )
+            limited_delta = pose_delta.limit_translation_length(self.max_mov[0]).limit_rotation_angle(self.max_mov[1])
+            clipped_pose = np.concatenate(
+                [
+                    np.clip(curr.translation() + limited_delta.translation(), pose_space.low[:3], pose_space.high[:3]),
+                    (limited_delta * curr).rotation_rpy().as_vector(),
+                ]
+            )
+            action.update(TRPYDictType(xyzrpy=clipped_pose))
+            self._absolute_action = clipped_pose
+
+        elif self.get_wrapper_attr("get_control_mode")() == ControlMode.CARTESIAN_TQuat:
+            assert isinstance(self.max_mov, tuple)
+            curr = cast(common.Pose, self._get_current())
+            pose_space = cast(gym.spaces.Box, get_space(TQuatDictType).spaces[self.tquat_key])
+            target_pose = common.Pose(
+                translation=action[self.tquat_key][:3],
+                quaternion=action[self.tquat_key][3:],
+            )
+            pose_delta = target_pose * curr.inverse()
+            limited_delta = pose_delta.limit_translation_length(self.max_mov[0]).limit_rotation_angle(self.max_mov[1])
+            clipped_pose = np.concatenate(
+                [
+                    np.clip(curr.translation() + limited_delta.translation(), pose_space.low[:3], pose_space.high[:3]),
+                    (limited_delta * curr).rotation_q(),
+                ]
+            )
+            action.update(TQuatDictType(tquat=clipped_pose))  # type: ignore
+            self._absolute_action = clipped_pose
+        else:
+            msg = "Control mode not recognized!"
+            raise ValueError(msg)
+        return action
+
+    def observation(self, observation: dict, info: dict[str, Any]) -> tuple[dict[str, Any], dict[str, Any]]:
+        if self._absolute_action is not None:
+            info[self.ABSOLUTE_ACTION_KEY] = (
+                list(self._absolute_action.translation()) + list(self._absolute_action.rotation_q())
+                if isinstance(self._absolute_action, common.Pose)
+                else self._absolute_action
+            )
+        return observation, info
+
 
 class RelativeActionSpace(ActObsInfoWrapper):
     DEFAULT_MAX_CART_MOV = 0.5
@@ -941,6 +1029,14 @@ def __init__(self, env, gripper: common.Gripper, binary: bool = True):
         self.gripper = gripper
         self._last_gripper_cmd = None
 
+    def _command_changed(self, gripper_action: np.ndarray) -> bool:
+        if self._last_gripper_cmd is None:
+            return True
+        last_action = np.asarray(self._last_gripper_cmd, dtype=np.float32)
+        if self.binary:
+            return not np.array_equal(gripper_action, last_action)
+        return not np.allclose(gripper_action, last_action, atol=1e-3, rtol=0.0)
+
     def close(self):
         self.gripper.close()
         super().close()
@@ -972,13 +1068,14 @@ def action(self, action: dict[str, Any]) -> dict[str, Any]:
             gripper_action = [gripper_action]  # type: ignore
         if self.binary:
             gripper_action = np.round(gripper_action)
-        gripper_action = np.clip(gripper_action, 0.0, 1.0)
+        gripper_action = np.clip(np.asarray(gripper_action, dtype=np.float32), 0.0, 1.0)
 
-        if self.binary:
-            self.gripper.grasp() if gripper_action == self.BINARY_GRIPPER_CLOSED else self.gripper.open()
-        else:
-            self.gripper.set_normalized_width(gripper_action[0])
-        self._last_gripper_cmd = gripper_action
+        if self._command_changed(gripper_action):
+            if self.binary:
+                self.gripper.grasp() if gripper_action[0] < 0.5 else self.gripper.open()
+            else:
+                self.gripper.set_normalized_width(float(gripper_action[0]))
+            self._last_gripper_cmd = gripper_action.tolist()
         del action[self.gripper_key]
         return action
 
diff --git a/python/rcs/envs/scenes.py b/python/rcs/envs/scenes.py
index ef281673..6cedeae3 100644
--- a/python/rcs/envs/scenes.py
+++ b/python/rcs/envs/scenes.py
@@ -14,6 +14,7 @@
     ControlMode,
     CoverWrapper,
     GripperWrapper,
+    LimitedAbsoluteAction,
     MultiRobotWrapper,
     RelativeActionSpace,
     RelativeTo,
@@ -363,6 +364,8 @@ def create_env_from_model(self, cfg: SimEnvCreatorConfig, mjmodel: MjModel) -> g
                 env = RelativeActionSpace(
                     env, max_mov=prefixed_cfg.max_relative_movement, relative_to=prefixed_cfg.relative_to
                 )
+            else:
+                env = LimitedAbsoluteAction(env, max_mov=cfg.max_relative_movement)
             envs[robot_name] = env
 
         env = MultiRobotWrapper(envs, prefixed_cfg.robot_to_shared_base_frame)
diff --git a/python/rcs/envs/storage_wrapper.py b/python/rcs/envs/storage_wrapper.py
index 497a1f03..58929bf3 100644
--- a/python/rcs/envs/storage_wrapper.py
+++ b/python/rcs/envs/storage_wrapper.py
@@ -182,15 +182,18 @@ def _writer_worker(self):
                 ),
             )
 
-    def _flush(self):
+    def _flush(self, keep_last: bool = False):
+        rows = self.buffer[:-1] if keep_last else self.buffer
+        if len(rows) == 0:
+            return
         if self.schema is None:
-            temp_batch = pa.RecordBatch.from_pylist(self.buffer)
+            temp_batch = pa.RecordBatch.from_pylist(rows)
             self.schema = temp_batch.schema
 
-        self.buffer[-1]["success"] = self._success
-        batch = pa.RecordBatch.from_pylist(self.buffer, schema=self.schema)
+        rows[-1]["success"] = self._success
+        batch = pa.RecordBatch.from_pylist(rows, schema=self.schema)
         self.queue.put(batch)
-        self.buffer.clear()
+        self.buffer = self.buffer[-1:] if keep_last else []
 
     def _flatten_arrays(self, d: dict[str, Any]):
         # NOTE: list / tuples of arrays not supported
@@ -284,7 +287,9 @@ def step(self, action):
 
             self.step_cnt += 1
             if len(self.buffer) == self.batch_size:
-                self._flush()
+                # Keep the most recent row in memory so a success() right before reset()
+                # can still mark the episode as successful even if the batch boundary was hit.
+                self._flush(keep_last=True)
 
         return obs_original, reward, terminated, truncated, info
 
diff --git a/python/rcs/operator/quest.py b/python/rcs/operator/quest.py
index 322b870a..158850f0 100644
--- a/python/rcs/operator/quest.py
+++ b/python/rcs/operator/quest.py
@@ -128,6 +128,14 @@ def _reset_state(self):
                 self._last_controller_pose[controller] = Pose()
                 self._grp_pos[controller] = 1
 
+    def _swap_controller_input(self, input_data: dict[str, Any]) -> dict[str, Any]:
+        if not self.config.switched_left_right:
+            return input_data
+        swapped = copy.copy(input_data)
+        swapped["left"] = input_data["right"]
+        swapped["right"] = input_data["left"]
+        return swapped
+
     @staticmethod
     def _normalize_axis(value: bool | float | int) -> float:
         if isinstance(value, bool):
@@ -139,13 +147,6 @@ def consume_commands(self) -> TeleopCommands:
         with self._cmd_lock:
             cmds = copy.copy(self._commands)
             self._commands = TeleopCommands()
-            if self.config.switched_left_right:
-                swapped_reset_origin_to_current = {}
-                if "left" in cmds.reset_origin_to_current:
-                    swapped_reset_origin_to_current["right"] = cmds.reset_origin_to_current["left"]
-                if "right" in cmds.reset_origin_to_current:
-                    swapped_reset_origin_to_current["left"] = cmds.reset_origin_to_current["right"]
-                cmds.reset_origin_to_current = swapped_reset_origin_to_current
             return cmds
 
     def reset_operator_state(self):
@@ -176,11 +177,7 @@ def consume_action(self) -> dict[str, ArmWithGripper]:
                     tquat=np.concatenate([transform.translation(), transform.rotation_q()]),
                     gripper=np.array([self._grp_pos[controller]]),
                 )
-        return (
-            {"left": transforms["right"], "right": transforms["left"]}
-            if self.config.switched_left_right
-            else transforms
-        )
+        return transforms
 
     def set_camera(self, observation: dict) -> None:
         if not self.config.display_cameras:
@@ -241,6 +238,8 @@ def run(self):
                 logger.warning("[Quest Reader] packets arriving again")
                 warning_raised = False
 
+            input_data = self._swap_controller_input(input_data)
+
             # === Update Semantic Commands ===
             with self._cmd_lock:
                 if input_data[self._start_btn] and (self._prev_data is None or not self._prev_data[self._start_btn]):
@@ -293,7 +292,8 @@ def run(self):
 
                 gripper_axis = self._normalize_axis(input_data[controller][self._grp_btn[controller]])
                 # convert from IRIS to RCS gripper logic
-                self._grp_pos[controller] = 1.0 - gripper_axis
+                with self._resource_lock:
+                    self._grp_pos[controller] = 1.0 - gripper_axis
 
             self._prev_data = input_data
             rate_limiter()
diff --git a/python/rcs/utils.py b/python/rcs/utils.py
index 9f9b7e7f..1571233c 100644
--- a/python/rcs/utils.py
+++ b/python/rcs/utils.py
@@ -1,6 +1,20 @@
+import datetime
 import logging
+import math
+import os
+import subprocess
+from pathlib import Path
 from time import perf_counter, sleep
 
+import duckdb
+os.environ.setdefault("MPLCONFIGDIR", "/tmp/matplotlib")
+import matplotlib
+matplotlib.use("Agg")
+import numpy as np
+import torch
+from matplotlib import pyplot as plt
+from torchvision.io import decode_jpeg
+
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 
@@ -27,7 +41,6 @@ def __call__(self):
         if self.t is None:
             self.t = perf_counter()
             self._last_print = self.t
-            sleep(1 / self.frame_rate)
             return
         sleep_time = 1 / self.frame_rate - (perf_counter() - self.t)
         if sleep_time > 0:
@@ -45,3 +58,176 @@ def __enter__(self):
 
     def __exit__(self, *args):
         pass
+
+
+def _write_mp4(frames: list[np.ndarray], output_path: Path, fps: int) -> None:
+    if not frames:
+        return
+
+    height, width = frames[0].shape[:2]
+    process = subprocess.Popen(
+        [
+            "ffmpeg",
+            "-y",
+            "-f",
+            "rawvideo",
+            "-pix_fmt",
+            "rgb24",
+            "-s",
+            f"{width}x{height}",
+            "-r",
+            str(fps),
+            "-i",
+            "-",
+            "-an",
+            "-vf",
+            "pad=ceil(iw/2)*2:ceil(ih/2)*2",
+            "-c:v",
+            "libx264",
+            "-pix_fmt",
+            "yuv420p",
+            "-movflags",
+            "+faststart",
+            str(output_path),
+        ],
+        stdin=subprocess.PIPE,
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
+    )
+    assert process.stdin is not None
+    for frame in frames:
+        process.stdin.write(np.ascontiguousarray(frame).astype(np.uint8).tobytes())
+    process.stdin.close()
+    process.wait()
+
+
+def _render_action_panel(
+    joint_history: dict[str, np.ndarray],
+    gripper_history: dict[str, np.ndarray],
+    step: int,
+    height: int,
+    width: int,
+) -> np.ndarray:
+    fig, axes = plt.subplots(
+        len(joint_history),
+        1,
+        figsize=(width / 100, height / 100),
+        dpi=100,
+        squeeze=False,
+    )
+    x = np.arange(len(next(iter(joint_history.values()))))
+    for axis, robot_key in zip(axes[:, 0], joint_history, strict=True):
+        joints = joint_history[robot_key]
+        for joint_idx in range(joints.shape[1]):
+            axis.plot(x, joints[:, joint_idx], linewidth=1)
+        axis.axvline(step, color="tab:red", linewidth=2)
+        axis.set_xlim(0, max(len(x) - 1, 1))
+        axis.set_title(robot_key)
+        axis.set_xlabel("step")
+        axis.set_ylabel("joint")
+
+        gripper_axis = axis.twinx()
+        gripper_axis.plot(x, gripper_history[robot_key], color="black", linestyle="--", linewidth=2)
+        gripper_axis.set_ylabel("gripper")
+        gripper_axis.set_ylim(-0.1, 1.1)
+
+    fig.tight_layout()
+    fig.canvas.draw()
+    image = np.asarray(fig.canvas.buffer_rgba())[..., :3].copy()
+    plt.close(fig)
+    return image
+
+
+def export_episode_videos(
+    dataset: str | Path,
+    output: str | Path,
+    fps: int = 30,
+    n: int = -1,
+) -> None:
+    dataset = Path(dataset)
+    output = Path(output)
+    output.mkdir(parents=True, exist_ok=True)
+
+    source = str(dataset / "*.parquet") if dataset.is_dir() else str(dataset)
+    source_escaped = source.replace("'", "''")
+    conn = duckdb.connect()
+    relation = conn.sql(f"SELECT * FROM read_parquet('{source_escaped}')")
+    frame_struct = relation.select("obs.frames").types[0]
+    action_struct = relation.select("action").types[0]
+    camera_names = [name for name, _ in frame_struct.children]
+    robot_names = [name for name, _ in relation.select("obs").types[0].children if name != "frames"]
+    action_fields_by_robot = {
+        robot: {field_name for field_name, _ in robot_struct.children}
+        for robot, robot_struct in action_struct.children
+    }
+
+    uuids = conn.execute(f"SELECT DISTINCT uuid FROM read_parquet('{source_escaped}') ORDER BY uuid").fetchall()
+    for index, (episode_id,) in enumerate(uuids):
+        if n != -1 and index >= n:
+            break
+
+        image_selects = ", ".join(f"obs.frames.{camera}.rgb.data AS {camera}" for camera in camera_names)
+        joint_selects = ", ".join(
+            (
+                f"COALESCE(action.{robot}.joints, obs.{robot}.joints) AS joints_{robot}"
+                if "joints" in action_fields_by_robot.get(robot, set())
+                else f"obs.{robot}.joints AS joints_{robot}"
+            )
+            for robot in robot_names
+        )
+        gripper_selects = ", ".join(
+            (
+                f"COALESCE(CAST(action.{robot}.gripper[1] AS DOUBLE), obs.{robot}.gripper[1]) AS gripper_{robot}"
+                if "gripper" in action_fields_by_robot.get(robot, set())
+                else f"obs.{robot}.gripper[1] AS gripper_{robot}"
+            )
+            for robot in robot_names
+        )
+        state_selects = ", ".join(
+            [
+                joint_selects,
+                gripper_selects,
+            ]
+        )
+        not_null_checks = " ".join(f"AND obs.frames.{camera}.rgb.data IS NOT NULL" for camera in camera_names)
+        rows = conn.execute(
+            f"""
+            SELECT timestamp, {image_selects}, {state_selects}
+            FROM read_parquet('{source_escaped}')
+            WHERE uuid = ?
+              {not_null_checks}
+            ORDER BY step
+            """,
+            [episode_id],
+        ).fetchall()
+        if not rows:
+            continue
+
+        timestamp = datetime.datetime.fromtimestamp(float(rows[0][0])).strftime("%Y-%m-%d-%H-%M-%S")
+        frames = []
+        joint_history = {
+            robot: np.asarray([row[1 + len(camera_names) + robot_idx] for row in rows], dtype=np.float32)
+            for robot_idx, robot in enumerate(robot_names)
+        }
+        gripper_history = {
+            robot: np.asarray([row[1 + len(camera_names) + len(robot_names) + robot_idx] for row in rows], dtype=np.float32)
+            for robot_idx, robot in enumerate(robot_names)
+        }
+        cols = math.ceil(math.sqrt(len(camera_names) + 1))
+        rows_per_frame = math.ceil((len(camera_names) + 1) / cols)
+
+        for step_idx, row in enumerate(rows):
+            decoded = [
+                decode_jpeg(torch.frombuffer(bytearray(image_bytes), dtype=torch.uint8)).permute(1, 2, 0).cpu().numpy()
+                for image_bytes in row[1 : 1 + len(camera_names)]
+            ]
+            height, width = decoded[0].shape[:2]
+            decoded.append(_render_action_panel(joint_history, gripper_history, step_idx, height, width))
+            tiled = np.zeros((rows_per_frame * height, cols * width, 3), dtype=np.uint8)
+            for camera_index, image in enumerate(decoded):
+                top = (camera_index // cols) * height
+                left = (camera_index % cols) * width
+                tiled[top : top + height, left : left + width] = image
+            frames.append(tiled)
+
+        _write_mp4(frames, output / f"{timestamp}.mp4", fps=fps)
diff --git a/python/tests/test_quest_operator.py b/python/tests/test_quest_operator.py
index d5af2a78..55db719a 100644
--- a/python/tests/test_quest_operator.py
+++ b/python/tests/test_quest_operator.py
@@ -1,6 +1,9 @@
 import threading
 from typing import Any, cast
 
+import numpy as np
+
+from rcs._core.common import Pose
 from rcs.operator.interface import TeleopCommands
 from rcs.operator.quest import QuestOperator
 
@@ -9,13 +12,46 @@ class _Config:
     switched_left_right = True
 
 
+def test_swap_controller_input_swaps_left_and_right_packets():
+    operator = QuestOperator.__new__(QuestOperator)
+    operator.config = cast(Any, _Config())
+
+    input_data = {
+        "left": {"hand_trigger": 0.1, "index_trigger": 0.2},
+        "right": {"hand_trigger": 0.9, "index_trigger": 0.8},
+        "A": True,
+    }
+
+    swapped = QuestOperator._swap_controller_input(operator, input_data)
+
+    assert swapped["left"] == input_data["right"]
+    assert swapped["right"] == input_data["left"]
+    assert swapped["A"] is True
+
+
 def test_consume_commands_swaps_both_reset_origin_keys():
     operator = QuestOperator.__new__(QuestOperator)
     operator.config = cast(Any, _Config())
     operator._cmd_lock = threading.Lock()
-    operator._commands = TeleopCommands(reset_origin_to_current={"left": True, "right": False})
+    operator._commands = TeleopCommands(reset_origin_to_current={"right": True, "left": False})
 
     cmds = QuestOperator.consume_commands(operator)
 
     assert cmds.reset_origin_to_current == {"right": True, "left": False}
     assert operator._commands.reset_origin_to_current == {}
+
+
+def test_consume_action_swaps_gripper_with_controller():
+    operator = QuestOperator.__new__(QuestOperator)
+    operator.config = cast(Any, _Config())
+    operator._resource_lock = threading.Lock()
+    operator.controller_names = ["left", "right"]
+    operator._last_controller_pose = {key: Pose() for key in operator.controller_names}
+    operator._offset_pose = {key: Pose() for key in operator.controller_names}
+    operator._set_frame = {key: Pose() for key in operator.controller_names}
+    operator._grp_pos = {"left": 0.75, "right": 0.25}
+
+    actions = QuestOperator.consume_action(operator)
+
+    assert np.allclose(actions["left"]["gripper"], np.array([0.75]))
+    assert np.allclose(actions["right"]["gripper"], np.array([0.25]))