Skip to content

Commit ccd43a4

Browse files
refactor: Remove proxy dependency, use direct MedGemma client
- Created DirectMedGemmaClient that connects directly to HF API or local models - Removed ProxyClient dependency on terraphim-llm-proxy service - Auto-detection: HF API → Local ctransformers → Mock fallback - Updated demo.rs to use direct client instead of proxy - Added shellexpand dependency for path expansion - Simplifies deployment - no external proxy service required - Works with HF_TOKEN env var or local model files
1 parent 790161a commit ccd43a4

File tree

4 files changed

+185
-24
lines changed

4 files changed

+185
-24
lines changed

crates/terraphim-demo/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@ anyhow = { workspace = true }
2323
reqwest = { workspace = true }
2424
tracing = { workspace = true }
2525
async-trait = { workspace = true }
26+
shellexpand = "3.0"

crates/terraphim-demo/src/demo.rs

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,9 @@
1111
use crate::consultation::{
1212
ClinicalEntities, ConsultationWorkflow, PGxValidation, SafetyCheck, TreatmentRecommendation,
1313
};
14+
use crate::direct_client::DirectMedGemmaClient;
1415
use crate::patients::{get_demo_patient, PatientProfile};
15-
use crate::proxy_client::ProxyClient;
1616
use medgemma_client::client::MedGemmaClient;
17-
use medgemma_client::mock::MockMedGemmaClient;
1817
use std::io;
1918
use std::sync::Arc;
2019
use terraphim_automata::EntityExtractor;
@@ -72,29 +71,32 @@ pub async fn run_demo() {
7271
let _ = io::stdin().read_line(&mut String::new());
7372
}
7473

75-
/// Initialize MedGemma client with proxy as primary, fallback to mock
74+
/// Initialize MedGemma client with direct connection (no proxy required)
7675
async fn init_medgemma_client() -> Arc<dyn MedGemmaClient + Send + Sync> {
77-
// Try to create proxy client
78-
let proxy_client = ProxyClient::new();
79-
80-
// Check if proxy is available
81-
if proxy_client.is_available().await {
82-
let backend_info = proxy_client.backend_info();
83-
println!(
84-
"[OK] Using terraphim-llm-proxy at {} (source: {})",
85-
backend_info.name,
86-
std::env::var("PROXY_URL").as_deref().unwrap_or("default")
87-
);
88-
return Arc::new(proxy_client);
76+
let client = DirectMedGemmaClient::new().await;
77+
let backend_info = client.backend_info();
78+
79+
match backend_info.health {
80+
medgemma_client::BackendHealth::Healthy => {
81+
println!(
82+
"[OK] MedGemma client ready: {} (healthy)",
83+
backend_info.name
84+
);
85+
}
86+
medgemma_client::BackendHealth::Degraded => {
87+
println!(
88+
"[WARN] MedGemma client degraded: {} (falling back to mock)",
89+
backend_info.name
90+
);
91+
}
92+
medgemma_client::BackendHealth::Unhealthy => {
93+
println!(
94+
"[INFO] Using mock MedGemma client (set HF_TOKEN for real inference)"
95+
);
96+
}
8997
}
90-
91-
// Fallback to mock client if proxy is not available
92-
println!("[WARN] terraphim-llm-proxy not available, falling back to mock client");
93-
println!(
94-
" (Proxy should be running at {})",
95-
proxy_client.backend_info().name
96-
);
97-
Arc::new(MockMedGemmaClient::new())
98+
99+
Arc::new(client)
98100
}
99101

100102
fn print_header() {
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
//! Simplified MedGemma client - Direct connection without proxy
2+
//!
3+
//! This client connects directly to HuggingFace or local MedGemma models
4+
//! without requiring the terraphim-llm-proxy service.
5+
6+
use async_trait::async_trait;
7+
use medgemma_client::{
8+
client::MedGemmaClient, BackendHealth, BackendInfo, ClientError, Explanation, PatientProfile,
9+
Recommendation, Result,
10+
};
11+
use std::time::Duration;
12+
13+
/// Direct MedGemma client using HF API or local inference
14+
pub struct DirectMedGemmaClient {
15+
inner: Box<dyn MedGemmaClient + Send + Sync>,
16+
backend_info: BackendInfo,
17+
}
18+
19+
impl DirectMedGemmaClient {
20+
/// Create a new direct client with auto-detection
21+
///
22+
/// Tries in order:
23+
/// 1. HuggingFace API (if HF_TOKEN env var set)
24+
/// 2. Local ctransformers (if model file exists)
25+
/// 3. Mock client (fallback)
26+
pub async fn new() -> Self {
27+
// Try HuggingFace API first
28+
if let Ok(token) = std::env::var("HF_TOKEN") {
29+
match Self::try_hf_api(&token).await {
30+
Ok(client) => {
31+
return client;
32+
}
33+
Err(e) => {
34+
eprintln!("[WARN] HF API failed: {}, trying local...", e);
35+
}
36+
}
37+
}
38+
39+
// Try local ctransformers
40+
match Self::try_local().await {
41+
Ok(client) => client,
42+
Err(e) => {
43+
eprintln!("[WARN] Local inference failed: {}, using mock", e);
44+
Self::mock()
45+
}
46+
}
47+
}
48+
49+
/// Create a mock client for testing
50+
pub fn mock() -> Self {
51+
use medgemma_client::mock::MockMedGemmaClient;
52+
53+
let mock = MockMedGemmaClient::new();
54+
let backend_info = mock.backend_info();
55+
56+
Self {
57+
inner: Box::new(mock),
58+
backend_info,
59+
}
60+
}
61+
62+
/// Try to use HuggingFace API
63+
async fn try_hf_api(token: &str) -> Result<Self> {
64+
use medgemma_client::hf::HfMedGemmaClient;
65+
66+
let client = HfMedGemmaClient::new(token.to_string())
67+
.map_err(|e| ClientError::Init(e.to_string()))?;
68+
69+
let backend_info = client.backend_info();
70+
71+
Ok(Self {
72+
inner: Box::new(client),
73+
backend_info,
74+
})
75+
}
76+
77+
/// Try to use local ctransformers
78+
async fn try_local() -> Result<Self> {
79+
use medgemma_client::local::LocalMedGemmaClient;
80+
81+
// Check for common model paths
82+
let model_paths = [
83+
"./models/medgemma-4b-it-q4_k_m.gguf",
84+
"./medgemma-4b-it-q4_k_m.gguf",
85+
"~/models/medgemma-4b-it-q4_k_m.gguf",
86+
];
87+
88+
for path in &model_paths {
89+
let expanded = shellexpand::tilde(path);
90+
if std::path::Path::new(expanded.as_ref()).exists() {
91+
let client = LocalMedGemmaClient::new(expanded.to_string())
92+
.map_err(|e| ClientError::Init(e.to_string()))?;
93+
94+
let backend_info = client.backend_info();
95+
96+
return Ok(Self {
97+
inner: Box::new(client),
98+
backend_info,
99+
});
100+
}
101+
}
102+
103+
Err(ClientError::Init("No local model found".to_string()))
104+
}
105+
106+
/// Get backend information
107+
pub fn backend_info(&self) -> &BackendInfo {
108+
&self.backend_info
109+
}
110+
111+
/// Check if using real inference (not mock)
112+
pub fn is_real(&self) -> bool {
113+
!matches!(self.backend_info.health, BackendHealth::Unhealthy)
114+
}
115+
}
116+
117+
#[async_trait]
118+
impl MedGemmaClient for DirectMedGemmaClient {
119+
async fn recommend_treatment(
120+
&self,
121+
patient: &PatientProfile,
122+
) -> Result<Recommendation> {
123+
self.inner.recommend_treatment(patient).await
124+
}
125+
126+
async fn explain_recommendation(
127+
&self,
128+
patient: &PatientProfile,
129+
recommendation: &Recommendation,
130+
) -> Result<Explanation> {
131+
self.inner.explain_recommendation(patient, recommendation).await
132+
}
133+
134+
async fn validate_safety(
135+
&self,
136+
patient: &PatientProfile,
137+
recommendation: &Recommendation,
138+
) -> Result<bool> {
139+
self.inner.validate_safety(patient, recommendation).await
140+
}
141+
142+
fn backend_info(&self) -> BackendInfo {
143+
self.backend_info.clone()
144+
}
145+
}
146+
147+
#[cfg(test)]
148+
mod tests {
149+
use super::*;
150+
151+
#[tokio::test]
152+
async fn test_mock_client() {
153+
let client = DirectMedGemmaClient::mock();
154+
assert!(!client.is_real());
155+
assert_eq!(client.backend_info().name, "MockMedGemma");
156+
}
157+
}

crates/terraphim-demo/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,13 @@
1010
1111
pub mod consultation;
1212
pub mod demo;
13+
pub mod direct_client;
1314
pub mod patients;
14-
pub mod proxy_client;
1515

1616
pub use consultation::{
1717
ClinicalEntities, ConsultationResult, ConsultationStep, ConsultationWorkflow, PGxValidation,
1818
SafetyCheck, StepResult, TreatmentRecommendation, WorkflowStatus,
1919
};
2020
pub use demo::run_demo;
21+
pub use direct_client::DirectMedGemmaClient;
2122
pub use patients::{get_afib_patient, get_demo_patient, PatientProfile};

0 commit comments

Comments
 (0)