feat: add dedicated endpoint support for Model Garden deploys

AlexMikhalev · claude · AlexMikhalev · commit 50604bde791b · 2026-02-22T22:24:17.000+01:00
VERTEX_AI_ENDPOINT env var enables connecting to Model Garden
one-click deployed endpoints instead of publisher model URLs.
This is required for MedGemma which doesn't have a serverless
publisher model endpoint on Vertex AI.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/crates/medgemma-client/src/vertex_ai.rs b/crates/medgemma-client/src/vertex_ai.rs
@@ -10,7 +10,8 @@
 //! ```bash
 //! gcloud auth application-default login
 //! export VERTEX_AI_PROJECT=your-project-id
-//! export VERTEX_AI_LOCATION=us-central1  # optional, defaults to us-central1
+//! export VERTEX_AI_LOCATION=europe-west4  # optional, defaults to us-central1
+//! export VERTEX_AI_ENDPOINT=mg-endpoint-xxx  # optional, for Model Garden deploys
 //! ```
 //!
 //! ## Example
@@ -60,6 +61,9 @@ pub struct VertexAiClient {
     project_id: String,
     location: String,
     model_id: String,
+    /// Dedicated endpoint ID from Model Garden one-click deploy.
+    /// When set, uses endpoint URL instead of publisher model URL.
+    endpoint_id: Option<String>,
     access_token: Mutex<String>,
     latency_tracker: LatencyTracker,
 }
@@ -85,6 +89,7 @@ impl VertexAiClient {
     /// Optional:
     /// - `VERTEX_AI_LOCATION` (defaults to us-central1)
     /// - `VERTEX_AI_MODEL` (defaults to medgemma-4b-it)
+    /// - `VERTEX_AI_ENDPOINT` (dedicated endpoint ID from Model Garden deploy)
     pub async fn new() -> Result<Self> {
         let project_id = std::env::var("VERTEX_AI_PROJECT").map_err(|_| {
             ClientError::Api("VERTEX_AI_PROJECT env var not set".to_string())
@@ -96,22 +101,26 @@ impl VertexAiClient {
         let model_id =
             std::env::var("VERTEX_AI_MODEL").unwrap_or_else(|_| DEFAULT_MODEL.to_string());
 
+        let endpoint_id = std::env::var("VERTEX_AI_ENDPOINT").ok();
+
         let access_token = get_access_token()?;
 
         let genai_client = Client::default();
 
         tracing::info!(
-            "Vertex AI client initialized (via genai): project={}, location={}, model={}",
+            "Vertex AI client initialized (via genai): project={}, location={}, model={}, endpoint={:?}",
             project_id,
             location,
-            model_id
+            model_id,
+            endpoint_id,
         );
 
         Ok(Self {
             genai_client,
             project_id,
             location,
             model_id,
+            endpoint_id,
             access_token: Mutex::new(access_token),
             latency_tracker: LatencyTracker::new(),
         })
@@ -123,14 +132,26 @@ impl VertexAiClient {
         Self::new().await
     }
 
-    /// Build the full Vertex AI generateContent URL
+    /// Build the full Vertex AI generateContent URL.
+    ///
+    /// If `VERTEX_AI_ENDPOINT` is set (dedicated endpoint from Model Garden),
+    /// uses the endpoint URL pattern. Otherwise uses the publisher model pattern.
     fn vertex_url(&self) -> String {
-        format!(
-            "https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent",
-            location = self.location,
-            project = self.project_id,
-            model = self.model_id,
-        )
+        if let Some(ref endpoint_id) = self.endpoint_id {
+            format!(
+                "https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/endpoints/{endpoint}:generateContent",
+                location = self.location,
+                project = self.project_id,
+                endpoint = endpoint_id,
+            )
+        } else {
+            format!(
+                "https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent",
+                location = self.location,
+                project = self.project_id,
+                model = self.model_id,
+            )
+        }
     }
 
     /// Build a ServiceTarget using RequestOverride for Vertex AI auth and URL.
@@ -388,8 +409,8 @@ mod tests {
     use super::*;
 
     #[test]
-    fn test_vertex_url_format() {
-        // Verify the full Vertex AI generateContent URL format
+    fn test_vertex_url_publisher_model() {
+        // Verify the publisher model URL format (no VERTEX_AI_ENDPOINT)
         let url = format!(
             "https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent",
             location = "us-central1",
@@ -401,6 +422,20 @@ mod tests {
         assert!(url.starts_with("https://us-central1-aiplatform.googleapis.com/v1/"));
     }
 
+    #[test]
+    fn test_vertex_url_dedicated_endpoint() {
+        // Verify the dedicated endpoint URL format (VERTEX_AI_ENDPOINT set)
+        let url = format!(
+            "https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/endpoints/{endpoint}:generateContent",
+            location = "europe-west4",
+            project = "test-project",
+            endpoint = "mg-endpoint-abc123",
+        );
+        assert!(url.contains("europe-west4-aiplatform.googleapis.com"));
+        assert!(url.contains("endpoints/mg-endpoint-abc123:generateContent"));
+        assert!(!url.contains("publishers/google/models"));
+    }
+
     #[test]
     fn test_check_vertex_ai_without_env() {
         std::env::remove_var("VERTEX_AI_PROJECT");