66
77Features:
88- Recursive file traversal
9- - LLM-powered code similarity analysis
9+ - LLM-powered code similarity analysis using augmented LLM classes
1010- JSON-based relationship storage
1111- Configurable matching strategies
1212- Progress tracking and error handling
13+ - Automatic LLM provider selection based on API key availability
1314"""
1415
1516import asyncio
2223from dataclasses import dataclass , asdict
2324from typing import List , Dict , Any
2425
26+ # MCP Agent imports for LLM
27+ from mcp_agent .workflows .llm .augmented_llm_anthropic import AnthropicAugmentedLLM
28+ from mcp_agent .workflows .llm .augmented_llm_openai import OpenAIAugmentedLLM
29+ import yaml
30+
31+ def get_preferred_llm_class (config_path : str = "mcp_agent.secrets.yaml" ):
32+ """
33+ Automatically select the LLM class based on API key availability in configuration.
34+
35+ Reads from YAML config file and returns AnthropicAugmentedLLM if anthropic.api_key
36+ is available, otherwise returns OpenAIAugmentedLLM.
37+
38+ Args:
39+ config_path: Path to the YAML configuration file
40+
41+ Returns:
42+ class: The preferred LLM class
43+ """
44+ try :
45+ # Try to read the configuration file
46+ if os .path .exists (config_path ):
47+ with open (config_path , "r" , encoding = "utf-8" ) as f :
48+ config = yaml .safe_load (f )
49+
50+ # Check for anthropic API key in config
51+ anthropic_config = config .get ("anthropic" , {})
52+ anthropic_key = anthropic_config .get ("api_key" , "" )
53+
54+ if anthropic_key and anthropic_key .strip () and not anthropic_key == "" :
55+ return AnthropicAugmentedLLM
56+ else :
57+ return OpenAIAugmentedLLM
58+ else :
59+ print (f"🤖 Config file { config_path } not found, using OpenAIAugmentedLLM" )
60+ return OpenAIAugmentedLLM
61+
62+ except Exception as e :
63+ print (f"🤖 Error reading config file { config_path } : { e } " )
64+ print ("🤖 Falling back to OpenAIAugmentedLLM" )
65+ return OpenAIAugmentedLLM
66+
67+
68+ def get_default_models (config_path : str = "mcp_agent.config.yaml" ):
69+ """
70+ Get default models from configuration file.
71+
72+ Args:
73+ config_path: Path to the configuration file
74+
75+ Returns:
76+ dict: Dictionary with 'anthropic' and 'openai' default models
77+ """
78+ try :
79+ if os .path .exists (config_path ):
80+ with open (config_path , "r" , encoding = "utf-8" ) as f :
81+ config = yaml .safe_load (f )
82+
83+ anthropic_model = config .get ("anthropic" , {}).get (
84+ "default_model" , "claude-sonnet-4-20250514"
85+ )
86+ openai_model = config .get ("openai" , {}).get ("default_model" , "o3-mini" )
87+
88+ return {"anthropic" : anthropic_model , "openai" : openai_model }
89+ else :
90+ print (f"Config file { config_path } not found, using default models" )
91+ return {"anthropic" : "claude-sonnet-4-20250514" , "openai" : "o3-mini" }
92+
93+ except Exception as e :
94+ print (f"Error reading config file { config_path } : { e } " )
95+ return {"anthropic" : "claude-sonnet-4-20250514" , "openai" : "o3-mini" }
96+
2597
2698@dataclass
2799class FileRelationship :
@@ -78,6 +150,7 @@ def __init__(
78150 self .indexer_config_path = indexer_config_path
79151 self .api_config = self ._load_api_config ()
80152 self .indexer_config = self ._load_indexer_config ()
153+ self .default_models = get_default_models ("mcp_agent.config.yaml" )
81154
82155 # Use config paths if not provided as parameters
83156 paths_config = self .indexer_config .get ("paths" , {})
@@ -301,7 +374,7 @@ def _load_indexer_config(self) -> Dict[str, Any]:
301374 return {}
302375
303376 async def _initialize_llm_client (self ):
304- """Initialize LLM client based on configured provider """
377+ """Initialize LLM client (Anthropic or OpenAI) based on API key availability """
305378 if self .llm_client is not None :
306379 return self .llm_client , self .llm_client_type
307380
@@ -312,89 +385,65 @@ async def _initialize_llm_client(self):
312385 self .llm_client_type = "mock"
313386 return "mock" , "mock"
314387
315- # Try configured provider first
316- if self .model_provider .lower () == "anthropic" :
317- try :
318- anthropic_key = self .api_config .get ("anthropic" , {}).get ("api_key" )
319- if anthropic_key :
320- from anthropic import AsyncAnthropic
321-
322- client = AsyncAnthropic (api_key = anthropic_key )
323- # Test connection
324- await client .messages .create (
325- model = "claude-sonnet-4-20250514" ,
326- max_tokens = 10 ,
327- messages = [{"role" : "user" , "content" : "test" }],
328- )
329- self .logger .info ("Using Anthropic API for code analysis" )
330- self .llm_client = client
331- self .llm_client_type = "anthropic"
332- return client , "anthropic"
333- except Exception as e :
334- self .logger .warning (f"Configured Anthropic API unavailable: { e } " )
388+ # Check which API has available key and try that first
389+ anthropic_key = self .api_config .get ("anthropic" , {}).get ("api_key" , "" )
390+ openai_key = self .api_config .get ("openai" , {}).get ("api_key" , "" )
335391
336- elif self .model_provider .lower () == "openai" :
392+ # Try Anthropic API first if key is available
393+ if anthropic_key and anthropic_key .strip ():
337394 try :
338- openai_key = self .api_config .get ("openai" , {}).get ("api_key" )
339- if openai_key :
340- from openai import AsyncOpenAI
341-
342- client = AsyncOpenAI (api_key = openai_key )
343- # Test connection
344- await client .chat .completions .create (
345- model = "gpt-3.5-turbo" ,
346- max_tokens = 10 ,
347- messages = [{"role" : "user" , "content" : "test" }],
348- )
349- self .logger .info ("Using OpenAI API for code analysis" )
350- self .llm_client = client
351- self .llm_client_type = "openai"
352- return client , "openai"
353- except Exception as e :
354- self .logger .warning (f"Configured OpenAI API unavailable: { e } " )
355-
356- # Fallback: try other provider
357- self .logger .info ("Trying fallback provider..." )
358-
359- # Try Anthropic as fallback
360- try :
361- anthropic_key = self .api_config .get ("anthropic" , {}).get ("api_key" )
362- if anthropic_key :
363395 from anthropic import AsyncAnthropic
364396
365397 client = AsyncAnthropic (api_key = anthropic_key )
398+ # Test connection with default model from config
366399 await client .messages .create (
367- model = "claude-sonnet-4-20250514" ,
400+ model = self . default_models [ "anthropic" ] ,
368401 max_tokens = 10 ,
369402 messages = [{"role" : "user" , "content" : "test" }],
370403 )
371- self .logger .info ("Using Anthropic API as fallback" )
404+ self .logger .info (
405+ f"Using Anthropic API with model: { self .default_models ['anthropic' ]} "
406+ )
372407 self .llm_client = client
373408 self .llm_client_type = "anthropic"
374409 return client , "anthropic"
375- except Exception as e :
376- self .logger .warning (f"Anthropic fallback failed : { e } " )
410+ except Exception as e :
411+ self .logger .warning (f"Anthropic API unavailable : { e } " )
377412
378- # Try OpenAI as fallback
379- try :
380- openai_key = self .api_config .get ("openai" , {}).get ("api_key" )
381- if openai_key :
413+ # Try OpenAI API if Anthropic failed or key not available
414+ if openai_key and openai_key .strip ():
415+ try :
382416 from openai import AsyncOpenAI
383417
384- client = AsyncOpenAI (api_key = openai_key )
418+ # Handle custom base_url if specified
419+ openai_config = self .api_config .get ("openai" , {})
420+ base_url = openai_config .get ("base_url" )
421+
422+ if base_url :
423+ client = AsyncOpenAI (api_key = openai_key , base_url = base_url )
424+ else :
425+ client = AsyncOpenAI (api_key = openai_key )
426+
427+ # Test connection with default model from config
385428 await client .chat .completions .create (
386- model = "gpt-3.5-turbo" ,
429+ model = self . default_models [ "openai" ] ,
387430 max_tokens = 10 ,
388431 messages = [{"role" : "user" , "content" : "test" }],
389432 )
390- self .logger .info ("Using OpenAI API as fallback" )
433+ self .logger .info (
434+ f"Using OpenAI API with model: { self .default_models ['openai' ]} "
435+ )
436+ if base_url :
437+ self .logger .info (f"Using custom base URL: { base_url } " )
391438 self .llm_client = client
392439 self .llm_client_type = "openai"
393440 return client , "openai"
394- except Exception as e :
395- self .logger .warning (f"OpenAI fallback failed : { e } " )
441+ except Exception as e :
442+ self .logger .warning (f"OpenAI API unavailable : { e } " )
396443
397- raise ValueError ("No available LLM API for code analysis" )
444+ raise ValueError (
445+ "No available LLM API - please check your API keys in configuration"
446+ )
398447
399448 async def _call_llm (
400449 self , prompt : str , system_prompt : str = None , max_tokens : int = None
@@ -426,7 +475,7 @@ async def _call_llm(
426475
427476 if client_type == "anthropic" :
428477 response = await client .messages .create (
429- model = "claude-sonnet-4-20250514" ,
478+ model = self . default_models [ "anthropic" ] ,
430479 system = system_prompt ,
431480 messages = [{"role" : "user" , "content" : prompt }],
432481 max_tokens = max_tokens ,
@@ -451,7 +500,7 @@ async def _call_llm(
451500 ]
452501
453502 response = await client .chat .completions .create (
454- model = "gpt-4-1106-preview" ,
503+ model = self . default_models [ "openai" ] ,
455504 messages = messages ,
456505 max_tokens = max_tokens ,
457506 temperature = self .llm_temperature ,
@@ -1043,7 +1092,7 @@ async def process_repository(self, repo_path: Path) -> RepoIndex:
10431092 if r .confidence_score > self .high_confidence_threshold
10441093 ]
10451094 ),
1046- "analyzer_version" : "1.3 .0" , # Updated version to reflect concurrent support
1095+ "analyzer_version" : "1.4 .0" , # Updated version to reflect augmented LLM support
10471096 "pre_filtering_enabled" : self .enable_pre_filtering ,
10481097 "files_before_filtering" : len (all_files ),
10491098 "files_after_filtering" : len (files_to_analyze ),
@@ -1373,7 +1422,7 @@ def generate_statistics_report(self, statistics_data: List[Dict[str, Any]]) -> s
13731422 # Build statistics report
13741423 statistics_report = {
13751424 "report_generation_time" : datetime .now ().isoformat (),
1376- "analyzer_version" : "1.3 .0" ,
1425+ "analyzer_version" : "1.4 .0" ,
13771426 "configuration_used" : {
13781427 "config_file" : self .indexer_config_path ,
13791428 "concurrent_analysis_enabled" : self .enable_concurrent_analysis ,
@@ -1481,11 +1530,12 @@ async def main():
14811530 """Main function to run the code indexer with full configuration support"""
14821531
14831532 # Configuration - can be overridden by config file
1484- config_file = "deepcode-mcp/tools/indexer_config.yaml"
1533+ config_file = "DeepCode/tools/indexer_config.yaml"
1534+ api_config_file = "DeepCode/mcp_agent.secrets.yaml"
14851535
14861536 # You can override these parameters or let them be read from config
1487- code_base_path = None # Will use config file value if None
1488- output_dir = None # Will use config file value if None
1537+ code_base_path = "DeepCode/deepcode_lab/papers/1/code_base/" # Will use config file value if None
1538+ output_dir = "DeepCode/deepcode_lab/papers/1/indexes/" # Will use config file value if None
14891539
14901540 # Target structure - this should be customized for your specific project
14911541 target_structure = """
@@ -1526,21 +1576,24 @@ async def main():
15261576
15271577 print ("🚀 Starting Code Indexer with Enhanced Configuration Support" )
15281578 print (f"📋 Configuration file: { config_file } " )
1579+ print (f"🔑 API configuration file: { api_config_file } " )
15291580
15301581 # Create indexer with full configuration support
15311582 try :
15321583 indexer = CodeIndexer (
15331584 code_base_path = code_base_path , # None = read from config
15341585 target_structure = target_structure , # Required - project specific
15351586 output_dir = output_dir , # None = read from config
1587+ config_path = api_config_file , # API configuration file
15361588 indexer_config_path = config_file , # Configuration file
15371589 enable_pre_filtering = True , # Can be overridden in config
15381590 )
15391591
15401592 # Display configuration information
15411593 print (f"📁 Code base path: { indexer .code_base_path } " )
15421594 print (f"📂 Output directory: { indexer .output_dir } " )
1543- print (f"🤖 Model provider: { indexer .model_provider } " )
1595+ print (f"🤖 Default models: Anthropic={ indexer .default_models ['anthropic' ]} , OpenAI={ indexer .default_models ['openai' ]} " )
1596+ print (f"🔧 Preferred LLM: { get_preferred_llm_class (api_config_file ).__name__ } " )
15441597 print (
15451598 f"⚡ Concurrent analysis: { 'enabled' if indexer .enable_concurrent_analysis else 'disabled' } "
15461599 )
0 commit comments