yesod-mirror/experimental/users/acmcarther/llm/litellm/test_integration_comprehensive.py

#!/usr/bin/env python3
"""
Comprehensive Integration Test
Combines all validated capabilities into a single end-to-end test that simulates
real agent usage patterns with the local model.
"""

import os
import sys
import json
from pathlib import Path
import litellm
from dotenv import load_dotenv

class LocalModelAgent:
    """
    Simulates the proposed agent harness integration with local model.
    This demonstrates how the retrofitted system would work.
    """

    def __init__(self):
        # Load environment
        project_root = Path(__file__).parent.parent
        dotenv_path = project_root / '.env'
        load_dotenv(dotenv_path=dotenv_path)

        self.api_base = os.getenv('OPENAI_API_BASE', 'http://192.168.0.235:1234/v1')
        self.api_key = os.getenv('OPENAI_API_KEY', 'lm-studio')
        self.model_name = "openai/qwen3-coder-30b-a3b-instruct-mlx"

        # Define available tools (manual function calling)
        self.tools = {
            "read_file": {
                "description": "Read the contents of a file",
                "parameters": {"file_path": "string (absolute path)"}
            },
            "list_directory": {
                "description": "List contents of a directory",
                "parameters": {"path": "string (absolute path)"}
            },
            "write_file": {
                "description": "Write content to a file",
                "parameters": {"file_path": "string", "content": "string"}
            }
        }

    def _create_tool_prompt(self):
        """
        Creates the system prompt for manual tool usage.
        """
        tools_json = json.dumps(self.tools, indent=2)
        return f"""
You are an AI assistant with access to the following tools:

{tools_json}

When you need to use a tool, respond with ONLY a JSON object in this format:
{{"tool": "tool_name", "parameters": {{"param_name": "value"}}}}

Do not include any explanatory text. If you don't need to use a tool, respond normally.
"""

    def _is_tool_call(self, response):
        """
        Checks if a response is a tool call.
        """
        try:
            parsed = json.loads(response.strip())
            return isinstance(parsed, dict) and "tool" in parsed
        except json.JSONDecodeError:
            return False

    def _execute_tool(self, tool_call):
        """
        Simulates tool execution (in real implementation, this would call actual tools).
        """
        tool_name = tool_call["tool"]
        parameters = tool_call.get("parameters", {})

        if tool_name == "read_file":
            file_path = parameters.get("file_path", "")
            return f"[SIMULATED] Read file: {file_path} - Content would be displayed here"

        elif tool_name == "list_directory":
            path = parameters.get("path", "")
            return f"[SIMULATED] Directory listing for: {path} - Files would be listed here"

        elif tool_name == "write_file":
            file_path = parameters.get("file_path", "")
            content = parameters.get("content", "")[:50] + "..."
            return f"[SIMULATED] Wrote to {file_path}: {content}"

        else:
            return f"[ERROR] Unknown tool: {tool_name}"

    def converse(self, user_message, conversation_history=None):
        """
        Handles a conversation turn with manual tool calling support.
        """
        if conversation_history is None:
            conversation_history = []

        # Build messages
        messages = [
            {"role": "system", "content": self._create_tool_prompt()},
            *conversation_history,
            {"role": "user", "content": user_message}
        ]

        try:
            response = litellm.completion(
                model=self.model_name,
                messages=messages,
                api_key=self.api_key,
                api_base=self.api_base,
                max_tokens=300
            )

            if hasattr(response, 'choices') and len(response.choices) > 0:
                result = response.choices[0].message.content

                # Check if this is a tool call
                if self._is_tool_call(result):
                    tool_call = json.loads(result.strip())
                    tool_result = self._execute_tool(tool_call)

                    # Continue conversation with tool result
                    messages.append({"role": "assistant", "content": result})
                    messages.append({"role": "user", "content": f"Tool result: {tool_result}"})

                    # Get final response
                    final_response = litellm.completion(
                        model=self.model_name,
                        messages=messages,
                        api_key=self.api_key,
                        api_base=self.api_base,
                        max_tokens=300
                    )

                    if hasattr(final_response, 'choices') and len(final_response.choices) > 0:
                        return final_response.choices[0].message.content, True  # True = tool was used

                return result, False  # False = no tool used

        except Exception as e:
            return f"Error: {e}", False

def test_comprehensive_integration():
    """
    Runs a comprehensive integration test simulating real agent usage.
    """
    print("=== Comprehensive Integration Test ===")
    print("Simulating retrofitted agent harness with local model...\n")

    agent = LocalModelAgent()
    conversation_history = []
    tools_used = 0

    # Test scenarios
    test_scenarios = [
        {
            "name": "System Prompt Compliance",
            "message": "Hi, I need help organizing my project documentation. Can you act as a documentation specialist and give me advice?"
        },
        {
            "name": "Tool Usage - File Reading",
            "message": "Please read the file at /Users/acmcarther/Projects/yesod/README.md to understand the project structure."
        },
        {
            "name": "Context Retention",
            "message": "Based on what you just read, what do you think the main purpose of this project is?"
        },
        {
            "name": "Tool Usage - Directory Listing",
            "message": "Now list the contents of the /Users/acmcarther/Projects/yesod/scripts directory to see what test files we have."
        },
        {
            "name": "Complex Task with Multiple Tools",
            "message": "Create a summary document called 'project_summary.md' that includes the project purpose and the test files available."
        }
    ]

    results = []

    for i, scenario in enumerate(test_scenarios, 1):
        print(f"--- Test {i}: {scenario['name']} ---")
        print(f"User: {scenario['message']}")

        response, used_tool = agent.converse(scenario['message'], conversation_history)
        print(f"Agent: {response[:200]}{'...' if len(response) > 200 else ''}")

        if used_tool:
            tools_used += 1
            print("🔧 Tool was used in this response")

        # Add to conversation history
        conversation_history.append({"role": "user", "content": scenario['message']})
        conversation_history.append({"role": "assistant", "content": response})

        # Simple validation
        if len(response) > 20 and not response.startswith("Error:"):
            print("✅ Test passed")
            results.append(True)
        else:
            print("❌ Test failed")
            results.append(False)

        print()

    # Summary
    success_rate = sum(results) / len(results)
    print(f"=== Integration Test Summary ===")
    print(f"Tests passed: {sum(results)}/{len(results)} ({success_rate:.1%})")
    print(f"Tools used: {tools_used}")
    print(f"Conversation turns: {len(conversation_history) // 2}")

    if success_rate >= 0.8:
        print("✅ Comprehensive integration test PASSED")
        print("\nThe local model integration is ready for production retrofitting.")
        return True
    else:
        print("❌ Comprehensive integration test FAILED")
        print("\nAdditional refinement needed before production deployment.")
        return False

if __name__ == "__main__":
    success = test_comprehensive_integration()
    sys.exit(0 if success else 1)