Skip to content

Commit 50604bd

Browse files
AlexMikhalevclaude
andcommitted
feat: add dedicated endpoint support for Model Garden deploys
VERTEX_AI_ENDPOINT env var enables connecting to Model Garden one-click deployed endpoints instead of publisher model URLs. This is required for MedGemma which doesn't have a serverless publisher model endpoint on Vertex AI. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 2020cf6 commit 50604bd

File tree

1 file changed

+47
-12
lines changed

1 file changed

+47
-12
lines changed

crates/medgemma-client/src/vertex_ai.rs

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
//! ```bash
1111
//! gcloud auth application-default login
1212
//! export VERTEX_AI_PROJECT=your-project-id
13-
//! export VERTEX_AI_LOCATION=us-central1 # optional, defaults to us-central1
13+
//! export VERTEX_AI_LOCATION=europe-west4 # optional, defaults to us-central1
14+
//! export VERTEX_AI_ENDPOINT=mg-endpoint-xxx # optional, for Model Garden deploys
1415
//! ```
1516
//!
1617
//! ## Example
@@ -60,6 +61,9 @@ pub struct VertexAiClient {
6061
project_id: String,
6162
location: String,
6263
model_id: String,
64+
/// Dedicated endpoint ID from Model Garden one-click deploy.
65+
/// When set, uses endpoint URL instead of publisher model URL.
66+
endpoint_id: Option<String>,
6367
access_token: Mutex<String>,
6468
latency_tracker: LatencyTracker,
6569
}
@@ -85,6 +89,7 @@ impl VertexAiClient {
8589
/// Optional:
8690
/// - `VERTEX_AI_LOCATION` (defaults to us-central1)
8791
/// - `VERTEX_AI_MODEL` (defaults to medgemma-4b-it)
92+
/// - `VERTEX_AI_ENDPOINT` (dedicated endpoint ID from Model Garden deploy)
8893
pub async fn new() -> Result<Self> {
8994
let project_id = std::env::var("VERTEX_AI_PROJECT").map_err(|_| {
9095
ClientError::Api("VERTEX_AI_PROJECT env var not set".to_string())
@@ -96,22 +101,26 @@ impl VertexAiClient {
96101
let model_id =
97102
std::env::var("VERTEX_AI_MODEL").unwrap_or_else(|_| DEFAULT_MODEL.to_string());
98103

104+
let endpoint_id = std::env::var("VERTEX_AI_ENDPOINT").ok();
105+
99106
let access_token = get_access_token()?;
100107

101108
let genai_client = Client::default();
102109

103110
tracing::info!(
104-
"Vertex AI client initialized (via genai): project={}, location={}, model={}",
111+
"Vertex AI client initialized (via genai): project={}, location={}, model={}, endpoint={:?}",
105112
project_id,
106113
location,
107-
model_id
114+
model_id,
115+
endpoint_id,
108116
);
109117

110118
Ok(Self {
111119
genai_client,
112120
project_id,
113121
location,
114122
model_id,
123+
endpoint_id,
115124
access_token: Mutex::new(access_token),
116125
latency_tracker: LatencyTracker::new(),
117126
})
@@ -123,14 +132,26 @@ impl VertexAiClient {
123132
Self::new().await
124133
}
125134

126-
/// Build the full Vertex AI generateContent URL
135+
/// Build the full Vertex AI generateContent URL.
136+
///
137+
/// If `VERTEX_AI_ENDPOINT` is set (dedicated endpoint from Model Garden),
138+
/// uses the endpoint URL pattern. Otherwise uses the publisher model pattern.
127139
fn vertex_url(&self) -> String {
128-
format!(
129-
"https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent",
130-
location = self.location,
131-
project = self.project_id,
132-
model = self.model_id,
133-
)
140+
if let Some(ref endpoint_id) = self.endpoint_id {
141+
format!(
142+
"https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/endpoints/{endpoint}:generateContent",
143+
location = self.location,
144+
project = self.project_id,
145+
endpoint = endpoint_id,
146+
)
147+
} else {
148+
format!(
149+
"https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent",
150+
location = self.location,
151+
project = self.project_id,
152+
model = self.model_id,
153+
)
154+
}
134155
}
135156

136157
/// Build a ServiceTarget using RequestOverride for Vertex AI auth and URL.
@@ -388,8 +409,8 @@ mod tests {
388409
use super::*;
389410

390411
#[test]
391-
fn test_vertex_url_format() {
392-
// Verify the full Vertex AI generateContent URL format
412+
fn test_vertex_url_publisher_model() {
413+
// Verify the publisher model URL format (no VERTEX_AI_ENDPOINT)
393414
let url = format!(
394415
"https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent",
395416
location = "us-central1",
@@ -401,6 +422,20 @@ mod tests {
401422
assert!(url.starts_with("https://us-central1-aiplatform.googleapis.com/v1/"));
402423
}
403424

425+
#[test]
426+
fn test_vertex_url_dedicated_endpoint() {
427+
// Verify the dedicated endpoint URL format (VERTEX_AI_ENDPOINT set)
428+
let url = format!(
429+
"https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/endpoints/{endpoint}:generateContent",
430+
location = "europe-west4",
431+
project = "test-project",
432+
endpoint = "mg-endpoint-abc123",
433+
);
434+
assert!(url.contains("europe-west4-aiplatform.googleapis.com"));
435+
assert!(url.contains("endpoints/mg-endpoint-abc123:generateContent"));
436+
assert!(!url.contains("publishers/google/models"));
437+
}
438+
404439
#[test]
405440
fn test_check_vertex_ai_without_env() {
406441
std::env::remove_var("VERTEX_AI_PROJECT");

0 commit comments

Comments
 (0)