yesod-mirror/experimental/users/acmcarther/llm/litellm/test_integration_comprehensive.py
Copybara 8157b39ea4
Some checks failed
CI / build (push) Failing after 12s
Project import generated by Copybara.
GitOrigin-RevId: 6370f6ea785709295b6abcf9c60717cacf3ac432
2026-01-20 21:26:21 +00:00

228 lines
8.3 KiB
Python

#!/usr/bin/env python3
"""
Comprehensive Integration Test
Combines all validated capabilities into a single end-to-end test that simulates
real agent usage patterns with the local model.
"""
import os
import sys
import json
from pathlib import Path
import litellm
from dotenv import load_dotenv
class LocalModelAgent:
"""
Simulates the proposed agent harness integration with local model.
This demonstrates how the retrofitted system would work.
"""
def __init__(self):
# Load environment
project_root = Path(__file__).parent.parent
dotenv_path = project_root / '.env'
load_dotenv(dotenv_path=dotenv_path)
self.api_base = os.getenv('OPENAI_API_BASE', 'http://192.168.0.235:1234/v1')
self.api_key = os.getenv('OPENAI_API_KEY', 'lm-studio')
self.model_name = "openai/qwen3-coder-30b-a3b-instruct-mlx"
# Define available tools (manual function calling)
self.tools = {
"read_file": {
"description": "Read the contents of a file",
"parameters": {"file_path": "string (absolute path)"}
},
"list_directory": {
"description": "List contents of a directory",
"parameters": {"path": "string (absolute path)"}
},
"write_file": {
"description": "Write content to a file",
"parameters": {"file_path": "string", "content": "string"}
}
}
def _create_tool_prompt(self):
"""
Creates the system prompt for manual tool usage.
"""
tools_json = json.dumps(self.tools, indent=2)
return f"""
You are an AI assistant with access to the following tools:
{tools_json}
When you need to use a tool, respond with ONLY a JSON object in this format:
{{"tool": "tool_name", "parameters": {{"param_name": "value"}}}}
Do not include any explanatory text. If you don't need to use a tool, respond normally.
"""
def _is_tool_call(self, response):
"""
Checks if a response is a tool call.
"""
try:
parsed = json.loads(response.strip())
return isinstance(parsed, dict) and "tool" in parsed
except json.JSONDecodeError:
return False
def _execute_tool(self, tool_call):
"""
Simulates tool execution (in real implementation, this would call actual tools).
"""
tool_name = tool_call["tool"]
parameters = tool_call.get("parameters", {})
if tool_name == "read_file":
file_path = parameters.get("file_path", "")
return f"[SIMULATED] Read file: {file_path} - Content would be displayed here"
elif tool_name == "list_directory":
path = parameters.get("path", "")
return f"[SIMULATED] Directory listing for: {path} - Files would be listed here"
elif tool_name == "write_file":
file_path = parameters.get("file_path", "")
content = parameters.get("content", "")[:50] + "..."
return f"[SIMULATED] Wrote to {file_path}: {content}"
else:
return f"[ERROR] Unknown tool: {tool_name}"
def converse(self, user_message, conversation_history=None):
"""
Handles a conversation turn with manual tool calling support.
"""
if conversation_history is None:
conversation_history = []
# Build messages
messages = [
{"role": "system", "content": self._create_tool_prompt()},
*conversation_history,
{"role": "user", "content": user_message}
]
try:
response = litellm.completion(
model=self.model_name,
messages=messages,
api_key=self.api_key,
api_base=self.api_base,
max_tokens=300
)
if hasattr(response, 'choices') and len(response.choices) > 0:
result = response.choices[0].message.content
# Check if this is a tool call
if self._is_tool_call(result):
tool_call = json.loads(result.strip())
tool_result = self._execute_tool(tool_call)
# Continue conversation with tool result
messages.append({"role": "assistant", "content": result})
messages.append({"role": "user", "content": f"Tool result: {tool_result}"})
# Get final response
final_response = litellm.completion(
model=self.model_name,
messages=messages,
api_key=self.api_key,
api_base=self.api_base,
max_tokens=300
)
if hasattr(final_response, 'choices') and len(final_response.choices) > 0:
return final_response.choices[0].message.content, True # True = tool was used
return result, False # False = no tool used
except Exception as e:
return f"Error: {e}", False
def test_comprehensive_integration():
"""
Runs a comprehensive integration test simulating real agent usage.
"""
print("=== Comprehensive Integration Test ===")
print("Simulating retrofitted agent harness with local model...\n")
agent = LocalModelAgent()
conversation_history = []
tools_used = 0
# Test scenarios
test_scenarios = [
{
"name": "System Prompt Compliance",
"message": "Hi, I need help organizing my project documentation. Can you act as a documentation specialist and give me advice?"
},
{
"name": "Tool Usage - File Reading",
"message": "Please read the file at /Users/acmcarther/Projects/yesod/README.md to understand the project structure."
},
{
"name": "Context Retention",
"message": "Based on what you just read, what do you think the main purpose of this project is?"
},
{
"name": "Tool Usage - Directory Listing",
"message": "Now list the contents of the /Users/acmcarther/Projects/yesod/scripts directory to see what test files we have."
},
{
"name": "Complex Task with Multiple Tools",
"message": "Create a summary document called 'project_summary.md' that includes the project purpose and the test files available."
}
]
results = []
for i, scenario in enumerate(test_scenarios, 1):
print(f"--- Test {i}: {scenario['name']} ---")
print(f"User: {scenario['message']}")
response, used_tool = agent.converse(scenario['message'], conversation_history)
print(f"Agent: {response[:200]}{'...' if len(response) > 200 else ''}")
if used_tool:
tools_used += 1
print("🔧 Tool was used in this response")
# Add to conversation history
conversation_history.append({"role": "user", "content": scenario['message']})
conversation_history.append({"role": "assistant", "content": response})
# Simple validation
if len(response) > 20 and not response.startswith("Error:"):
print("✅ Test passed")
results.append(True)
else:
print("❌ Test failed")
results.append(False)
print()
# Summary
success_rate = sum(results) / len(results)
print(f"=== Integration Test Summary ===")
print(f"Tests passed: {sum(results)}/{len(results)} ({success_rate:.1%})")
print(f"Tools used: {tools_used}")
print(f"Conversation turns: {len(conversation_history) // 2}")
if success_rate >= 0.8:
print("✅ Comprehensive integration test PASSED")
print("\nThe local model integration is ready for production retrofitting.")
return True
else:
print("❌ Comprehensive integration test FAILED")
print("\nAdditional refinement needed before production deployment.")
return False
if __name__ == "__main__":
success = test_comprehensive_integration()
sys.exit(0 if success else 1)