Some checks failed
CI / build (push) Failing after 12s
GitOrigin-RevId: 6370f6ea785709295b6abcf9c60717cacf3ac432
228 lines
8.3 KiB
Python
228 lines
8.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Comprehensive Integration Test
|
|
Combines all validated capabilities into a single end-to-end test that simulates
|
|
real agent usage patterns with the local model.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
from pathlib import Path
|
|
import litellm
|
|
from dotenv import load_dotenv
|
|
|
|
class LocalModelAgent:
|
|
"""
|
|
Simulates the proposed agent harness integration with local model.
|
|
This demonstrates how the retrofitted system would work.
|
|
"""
|
|
|
|
def __init__(self):
|
|
# Load environment
|
|
project_root = Path(__file__).parent.parent
|
|
dotenv_path = project_root / '.env'
|
|
load_dotenv(dotenv_path=dotenv_path)
|
|
|
|
self.api_base = os.getenv('OPENAI_API_BASE', 'http://192.168.0.235:1234/v1')
|
|
self.api_key = os.getenv('OPENAI_API_KEY', 'lm-studio')
|
|
self.model_name = "openai/qwen3-coder-30b-a3b-instruct-mlx"
|
|
|
|
# Define available tools (manual function calling)
|
|
self.tools = {
|
|
"read_file": {
|
|
"description": "Read the contents of a file",
|
|
"parameters": {"file_path": "string (absolute path)"}
|
|
},
|
|
"list_directory": {
|
|
"description": "List contents of a directory",
|
|
"parameters": {"path": "string (absolute path)"}
|
|
},
|
|
"write_file": {
|
|
"description": "Write content to a file",
|
|
"parameters": {"file_path": "string", "content": "string"}
|
|
}
|
|
}
|
|
|
|
def _create_tool_prompt(self):
|
|
"""
|
|
Creates the system prompt for manual tool usage.
|
|
"""
|
|
tools_json = json.dumps(self.tools, indent=2)
|
|
return f"""
|
|
You are an AI assistant with access to the following tools:
|
|
|
|
{tools_json}
|
|
|
|
When you need to use a tool, respond with ONLY a JSON object in this format:
|
|
{{"tool": "tool_name", "parameters": {{"param_name": "value"}}}}
|
|
|
|
Do not include any explanatory text. If you don't need to use a tool, respond normally.
|
|
"""
|
|
|
|
def _is_tool_call(self, response):
|
|
"""
|
|
Checks if a response is a tool call.
|
|
"""
|
|
try:
|
|
parsed = json.loads(response.strip())
|
|
return isinstance(parsed, dict) and "tool" in parsed
|
|
except json.JSONDecodeError:
|
|
return False
|
|
|
|
def _execute_tool(self, tool_call):
|
|
"""
|
|
Simulates tool execution (in real implementation, this would call actual tools).
|
|
"""
|
|
tool_name = tool_call["tool"]
|
|
parameters = tool_call.get("parameters", {})
|
|
|
|
if tool_name == "read_file":
|
|
file_path = parameters.get("file_path", "")
|
|
return f"[SIMULATED] Read file: {file_path} - Content would be displayed here"
|
|
|
|
elif tool_name == "list_directory":
|
|
path = parameters.get("path", "")
|
|
return f"[SIMULATED] Directory listing for: {path} - Files would be listed here"
|
|
|
|
elif tool_name == "write_file":
|
|
file_path = parameters.get("file_path", "")
|
|
content = parameters.get("content", "")[:50] + "..."
|
|
return f"[SIMULATED] Wrote to {file_path}: {content}"
|
|
|
|
else:
|
|
return f"[ERROR] Unknown tool: {tool_name}"
|
|
|
|
def converse(self, user_message, conversation_history=None):
|
|
"""
|
|
Handles a conversation turn with manual tool calling support.
|
|
"""
|
|
if conversation_history is None:
|
|
conversation_history = []
|
|
|
|
# Build messages
|
|
messages = [
|
|
{"role": "system", "content": self._create_tool_prompt()},
|
|
*conversation_history,
|
|
{"role": "user", "content": user_message}
|
|
]
|
|
|
|
try:
|
|
response = litellm.completion(
|
|
model=self.model_name,
|
|
messages=messages,
|
|
api_key=self.api_key,
|
|
api_base=self.api_base,
|
|
max_tokens=300
|
|
)
|
|
|
|
if hasattr(response, 'choices') and len(response.choices) > 0:
|
|
result = response.choices[0].message.content
|
|
|
|
# Check if this is a tool call
|
|
if self._is_tool_call(result):
|
|
tool_call = json.loads(result.strip())
|
|
tool_result = self._execute_tool(tool_call)
|
|
|
|
# Continue conversation with tool result
|
|
messages.append({"role": "assistant", "content": result})
|
|
messages.append({"role": "user", "content": f"Tool result: {tool_result}"})
|
|
|
|
# Get final response
|
|
final_response = litellm.completion(
|
|
model=self.model_name,
|
|
messages=messages,
|
|
api_key=self.api_key,
|
|
api_base=self.api_base,
|
|
max_tokens=300
|
|
)
|
|
|
|
if hasattr(final_response, 'choices') and len(final_response.choices) > 0:
|
|
return final_response.choices[0].message.content, True # True = tool was used
|
|
|
|
return result, False # False = no tool used
|
|
|
|
except Exception as e:
|
|
return f"Error: {e}", False
|
|
|
|
def test_comprehensive_integration():
|
|
"""
|
|
Runs a comprehensive integration test simulating real agent usage.
|
|
"""
|
|
print("=== Comprehensive Integration Test ===")
|
|
print("Simulating retrofitted agent harness with local model...\n")
|
|
|
|
agent = LocalModelAgent()
|
|
conversation_history = []
|
|
tools_used = 0
|
|
|
|
# Test scenarios
|
|
test_scenarios = [
|
|
{
|
|
"name": "System Prompt Compliance",
|
|
"message": "Hi, I need help organizing my project documentation. Can you act as a documentation specialist and give me advice?"
|
|
},
|
|
{
|
|
"name": "Tool Usage - File Reading",
|
|
"message": "Please read the file at /Users/acmcarther/Projects/yesod/README.md to understand the project structure."
|
|
},
|
|
{
|
|
"name": "Context Retention",
|
|
"message": "Based on what you just read, what do you think the main purpose of this project is?"
|
|
},
|
|
{
|
|
"name": "Tool Usage - Directory Listing",
|
|
"message": "Now list the contents of the /Users/acmcarther/Projects/yesod/scripts directory to see what test files we have."
|
|
},
|
|
{
|
|
"name": "Complex Task with Multiple Tools",
|
|
"message": "Create a summary document called 'project_summary.md' that includes the project purpose and the test files available."
|
|
}
|
|
]
|
|
|
|
results = []
|
|
|
|
for i, scenario in enumerate(test_scenarios, 1):
|
|
print(f"--- Test {i}: {scenario['name']} ---")
|
|
print(f"User: {scenario['message']}")
|
|
|
|
response, used_tool = agent.converse(scenario['message'], conversation_history)
|
|
print(f"Agent: {response[:200]}{'...' if len(response) > 200 else ''}")
|
|
|
|
if used_tool:
|
|
tools_used += 1
|
|
print("🔧 Tool was used in this response")
|
|
|
|
# Add to conversation history
|
|
conversation_history.append({"role": "user", "content": scenario['message']})
|
|
conversation_history.append({"role": "assistant", "content": response})
|
|
|
|
# Simple validation
|
|
if len(response) > 20 and not response.startswith("Error:"):
|
|
print("✅ Test passed")
|
|
results.append(True)
|
|
else:
|
|
print("❌ Test failed")
|
|
results.append(False)
|
|
|
|
print()
|
|
|
|
# Summary
|
|
success_rate = sum(results) / len(results)
|
|
print(f"=== Integration Test Summary ===")
|
|
print(f"Tests passed: {sum(results)}/{len(results)} ({success_rate:.1%})")
|
|
print(f"Tools used: {tools_used}")
|
|
print(f"Conversation turns: {len(conversation_history) // 2}")
|
|
|
|
if success_rate >= 0.8:
|
|
print("✅ Comprehensive integration test PASSED")
|
|
print("\nThe local model integration is ready for production retrofitting.")
|
|
return True
|
|
else:
|
|
print("❌ Comprehensive integration test FAILED")
|
|
print("\nAdditional refinement needed before production deployment.")
|
|
return False
|
|
|
|
if __name__ == "__main__":
|
|
success = test_comprehensive_integration()
|
|
sys.exit(0 if success else 1)
|