1010//! ```bash
1111//! gcloud auth application-default login
1212//! export VERTEX_AI_PROJECT=your-project-id
13- //! export VERTEX_AI_LOCATION=us-central1 # optional, defaults to us-central1
13+ //! export VERTEX_AI_LOCATION=europe-west4 # optional, defaults to us-central1
14+ //! export VERTEX_AI_ENDPOINT=mg-endpoint-xxx # optional, for Model Garden deploys
1415//! ```
1516//!
1617//! ## Example
@@ -60,6 +61,9 @@ pub struct VertexAiClient {
6061 project_id : String ,
6162 location : String ,
6263 model_id : String ,
64+ /// Dedicated endpoint ID from Model Garden one-click deploy.
65+ /// When set, uses endpoint URL instead of publisher model URL.
66+ endpoint_id : Option < String > ,
6367 access_token : Mutex < String > ,
6468 latency_tracker : LatencyTracker ,
6569}
@@ -85,6 +89,7 @@ impl VertexAiClient {
8589 /// Optional:
8690 /// - `VERTEX_AI_LOCATION` (defaults to us-central1)
8791 /// - `VERTEX_AI_MODEL` (defaults to medgemma-4b-it)
92+ /// - `VERTEX_AI_ENDPOINT` (dedicated endpoint ID from Model Garden deploy)
8893 pub async fn new ( ) -> Result < Self > {
8994 let project_id = std:: env:: var ( "VERTEX_AI_PROJECT" ) . map_err ( |_| {
9095 ClientError :: Api ( "VERTEX_AI_PROJECT env var not set" . to_string ( ) )
@@ -96,22 +101,26 @@ impl VertexAiClient {
96101 let model_id =
97102 std:: env:: var ( "VERTEX_AI_MODEL" ) . unwrap_or_else ( |_| DEFAULT_MODEL . to_string ( ) ) ;
98103
104+ let endpoint_id = std:: env:: var ( "VERTEX_AI_ENDPOINT" ) . ok ( ) ;
105+
99106 let access_token = get_access_token ( ) ?;
100107
101108 let genai_client = Client :: default ( ) ;
102109
103110 tracing:: info!(
104- "Vertex AI client initialized (via genai): project={}, location={}, model={}" ,
111+ "Vertex AI client initialized (via genai): project={}, location={}, model={}, endpoint={:?} " ,
105112 project_id,
106113 location,
107- model_id
114+ model_id,
115+ endpoint_id,
108116 ) ;
109117
110118 Ok ( Self {
111119 genai_client,
112120 project_id,
113121 location,
114122 model_id,
123+ endpoint_id,
115124 access_token : Mutex :: new ( access_token) ,
116125 latency_tracker : LatencyTracker :: new ( ) ,
117126 } )
@@ -123,14 +132,26 @@ impl VertexAiClient {
123132 Self :: new ( ) . await
124133 }
125134
126- /// Build the full Vertex AI generateContent URL
135+ /// Build the full Vertex AI generateContent URL.
136+ ///
137+ /// If `VERTEX_AI_ENDPOINT` is set (dedicated endpoint from Model Garden),
138+ /// uses the endpoint URL pattern. Otherwise uses the publisher model pattern.
127139 fn vertex_url ( & self ) -> String {
128- format ! (
129- "https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent" ,
130- location = self . location,
131- project = self . project_id,
132- model = self . model_id,
133- )
140+ if let Some ( ref endpoint_id) = self . endpoint_id {
141+ format ! (
142+ "https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/endpoints/{endpoint}:generateContent" ,
143+ location = self . location,
144+ project = self . project_id,
145+ endpoint = endpoint_id,
146+ )
147+ } else {
148+ format ! (
149+ "https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent" ,
150+ location = self . location,
151+ project = self . project_id,
152+ model = self . model_id,
153+ )
154+ }
134155 }
135156
136157 /// Build a ServiceTarget using RequestOverride for Vertex AI auth and URL.
@@ -388,8 +409,8 @@ mod tests {
388409 use super :: * ;
389410
390411 #[ test]
391- fn test_vertex_url_format ( ) {
392- // Verify the full Vertex AI generateContent URL format
412+ fn test_vertex_url_publisher_model ( ) {
413+ // Verify the publisher model URL format (no VERTEX_AI_ENDPOINT)
393414 let url = format ! (
394415 "https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent" ,
395416 location = "us-central1" ,
@@ -401,6 +422,20 @@ mod tests {
401422 assert ! ( url. starts_with( "https://us-central1-aiplatform.googleapis.com/v1/" ) ) ;
402423 }
403424
425+ #[ test]
426+ fn test_vertex_url_dedicated_endpoint ( ) {
427+ // Verify the dedicated endpoint URL format (VERTEX_AI_ENDPOINT set)
428+ let url = format ! (
429+ "https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/endpoints/{endpoint}:generateContent" ,
430+ location = "europe-west4" ,
431+ project = "test-project" ,
432+ endpoint = "mg-endpoint-abc123" ,
433+ ) ;
434+ assert ! ( url. contains( "europe-west4-aiplatform.googleapis.com" ) ) ;
435+ assert ! ( url. contains( "endpoints/mg-endpoint-abc123:generateContent" ) ) ;
436+ assert ! ( !url. contains( "publishers/google/models" ) ) ;
437+ }
438+
404439 #[ test]
405440 fn test_check_vertex_ai_without_env ( ) {
406441 std:: env:: remove_var ( "VERTEX_AI_PROJECT" ) ;
0 commit comments