NVIDIA · jioffe502 · Apr 10, 2026 · Apr 6, 2026 · Apr 6, 2026 · Apr 6, 2026
@@ -3,6 +3,8 @@
 active:
   dataset: jp20
   preset: single_gpu
+  run_mode: batch
+  auto_tuning: true
   query_csv: data/jp20_query_gt.csv
   input_type: pdf
   recall_required: true

@@ -5,34 +5,116 @@
 from __future__ import annotations
 
 import json
+import re
 import subprocess
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any
 
 NEMO_RETRIEVER_ROOT = Path(__file__).resolve().parents[3]
 DEFAULT_ARTIFACTS_ROOT = NEMO_RETRIEVER_ROOT / "artifacts"
+_COMMIT_RE = re.compile(r"^[0-9a-fA-F]{7,40}$")
 
 
 def now_timestr() -> str:
     return datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S_UTC")
 
 
+def _normalize_commit(value: str | None) -> str | None:
+    text = (value or "").strip()
+    if not _COMMIT_RE.match(text):
+        return None
+    return text[:7]
+
+
+def _resolve_git_dir(repo_root: Path) -> Path | None:
+    dot_git = repo_root / ".git"
+    if dot_git.is_dir():
+        return dot_git
+    if not dot_git.is_file():
+        return None
+    try:
+        raw = dot_git.read_text(encoding="utf-8").strip()
+    except Exception:
+        return None
+    if not raw.startswith("gitdir:"):
+        return None
+    gitdir_text = raw.split(":", 1)[1].strip()
+    git_dir = Path(gitdir_text).expanduser()
+    if not git_dir.is_absolute():
+        git_dir = (repo_root / git_dir).resolve()
+    return git_dir
+
+
+def _read_packed_ref(git_dir: Path, ref_name: str) -> str | None:
+    packed_refs = git_dir / "packed-refs"
+    if not packed_refs.exists():
+        return None
+    try:
+        for line in packed_refs.read_text(encoding="utf-8").splitlines():
+            line = line.strip()
+            if not line or line.startswith("#") or line.startswith("^"):
+                continue
+            commit, _sep, ref = line.partition(" ")
+            if ref.strip() == ref_name:
+                normalized = _normalize_commit(commit)
+                if normalized is not None:
+                    return normalized
+    except Exception:
+        return None
+    return None
+
+
+def _read_head_commit(repo_root: Path) -> str | None:
+    git_dir = _resolve_git_dir(repo_root)
+    if git_dir is None:
+        return None
+
+    head_path = git_dir / "HEAD"
+    if not head_path.exists():
+        return None
+    try:
+        head_value = head_path.read_text(encoding="utf-8").strip()
+    except Exception:
+        return None
+
+    if head_value.startswith("ref:"):
+        ref_name = head_value.split(":", 1)[1].strip()
+        ref_path = git_dir / ref_name
+        if ref_path.exists():
+            try:
+                normalized = _normalize_commit(ref_path.read_text(encoding="utf-8"))
+                if normalized is not None:
+                    return normalized
+            except Exception:
+                pass
+        return _read_packed_ref(git_dir, ref_name)
+
+    return _normalize_commit(head_value)
+
+
 def last_commit() -> str:
+    repo_root = NEMO_RETRIEVER_ROOT.parent
     try:
         result = subprocess.run(
             ["git", "rev-parse", "--short", "HEAD"],
-            cwd=str(NEMO_RETRIEVER_ROOT.parent),
+            cwd=str(repo_root),
             check=False,
             capture_output=True,
             text=True,
         )
     except Exception:
-        return "unknown"
+        result = None
+
+    if result is not None and result.returncode == 0:
+        normalized = _normalize_commit(result.stdout)
+        if normalized is not None:
+            return normalized
 
-    if result.returncode != 0:
-        return "unknown"
-    return (result.stdout or "").strip() or "unknown"
+    fallback = _read_head_commit(repo_root)
+    if fallback is not None:
+        return fallback
+    return "unknown"
 
 
 def get_artifacts_root(base_dir: str | None = None) -> Path:

@@ -15,6 +15,7 @@
 REPO_ROOT = NEMO_RETRIEVER_ROOT.parent
 DEFAULT_TEST_CONFIG_PATH = NEMO_RETRIEVER_ROOT / "harness" / "test_configs.yaml"
 DEFAULT_NIGHTLY_CONFIG_PATH = NEMO_RETRIEVER_ROOT / "harness" / "nightly_config.yaml"
+VALID_RUN_MODES = {"batch", "inprocess"}
 VALID_EVALUATION_MODES = {"recall", "beir"}
 VALID_RECALL_ADAPTERS = {"none", "page_plus_one", "financebench_json"}
 VALID_BEIR_LOADERS = {"vidore_hf"}
@@ -54,6 +55,8 @@ class HarnessConfig:
     dataset_dir: str
     dataset_label: str
     preset: str
+    run_mode: str = "batch"
+    auto_tuning: bool = False
 
     query_csv: str | None = None
     input_type: str = "pdf"
@@ -114,6 +117,9 @@ def validate(self) -> list[str]:
         if self.query_csv is not None and not Path(self.query_csv).exists():
             errors.append(f"query_csv does not exist: {self.query_csv}")
 
+        if self.run_mode not in VALID_RUN_MODES:
+            errors.append(f"run_mode must be one of {sorted(VALID_RUN_MODES)}")
+
         if self.evaluation_mode not in VALID_EVALUATION_MODES:
             errors.append(f"evaluation_mode must be one of {sorted(VALID_EVALUATION_MODES)}")
 
@@ -263,6 +269,8 @@ def _apply_env_overrides(config_dict: dict[str, Any]) -> None:
         "HARNESS_DATASET": ("dataset", str),
         "HARNESS_DATASET_DIR": ("dataset_dir", str),
         "HARNESS_PRESET": ("preset", str),
+        "HARNESS_RUN_MODE": ("run_mode", str),
+        "HARNESS_AUTO_TUNING": ("auto_tuning", _parse_bool),
         "HARNESS_QUERY_CSV": ("query_csv", str),
         "HARNESS_INPUT_TYPE": ("input_type", str),
         "HARNESS_RECALL_REQUIRED": ("recall_required", _parse_bool),