From b80e7bda16d09c117a2979cbf2a9e9d1898047f9 Mon Sep 17 00:00:00 2001 From: Alex McArther Date: Sun, 5 Oct 2025 14:02:29 -0700 Subject: [PATCH] Add preliminary assistant model --- package-lock.json | 32 +++++++++++++++--- package.json | 2 ++ packages/core/src/core/client.ts | 4 +-- .../core/openaiContentGenerator/pipeline.ts | 15 +++++++-- .../core/src/services/loopDetectionService.ts | 11 ++++++- packages/core/src/tools/web-fetch.ts | 13 ++++++++ .../core/src/utils/nextSpeakerChecker.test.ts | 33 +++++++++++++++++-- packages/core/src/utils/nextSpeakerChecker.ts | 14 +++++++- packages/core/src/utils/summarizer.ts | 16 +++++++-- 9 files changed, 125 insertions(+), 15 deletions(-) diff --git a/package-lock.json b/package-lock.json index 62353a65..8caad795 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,6 +12,7 @@ ], "dependencies": { "@lvce-editor/ripgrep": "^1.6.0", + "@qwen-code/qwen-code": "^0.0.14", "simple-git": "^3.28.0", "strip-ansi": "^7.1.0" }, @@ -19,6 +20,7 @@ "qwen": "bundle/gemini.js" }, "devDependencies": { + "@google/genai": "^1.22.0", "@types/marked": "^5.0.2", "@types/mime-types": "^3.0.1", "@types/minimatch": "^5.1.2", @@ -998,9 +1000,10 @@ "link": true }, "node_modules/@google/genai": { - "version": "1.13.0", - "resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.13.0.tgz", - "integrity": "sha512-BxilXzE8cJ0zt5/lXk6KwuBcIT9P2Lbi2WXhwWMbxf1RNeC68/8DmYQqMrzQP333CieRMdbDXs0eNCphLoScWg==", + "version": "1.22.0", + "resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.22.0.tgz", + "integrity": "sha512-siETS3zTm3EGpTT4+BFc1z20xXBYfueD3gCYfxkOjuAKRk8lt8TJevDHi3zepn1oSI6NhG/LZvy0i+Q3qheObg==", + "dev": true, "license": "Apache-2.0", "dependencies": { "google-auth-library": "^9.14.2", @@ -1010,7 +1013,7 @@ "node": ">=20.0.0" }, "peerDependencies": { - "@modelcontextprotocol/sdk": "^1.11.0" + "@modelcontextprotocol/sdk": "^1.11.4" }, "peerDependenciesMeta": { "@modelcontextprotocol/sdk": { @@ -13731,6 +13734,27 @@ "node-pty": "^1.0.0" } }, + "packages/core/node_modules/@google/genai": { + "version": "1.13.0", + "resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.13.0.tgz", + "integrity": "sha512-BxilXzE8cJ0zt5/lXk6KwuBcIT9P2Lbi2WXhwWMbxf1RNeC68/8DmYQqMrzQP333CieRMdbDXs0eNCphLoScWg==", + "license": "Apache-2.0", + "dependencies": { + "google-auth-library": "^9.14.2", + "ws": "^8.18.0" + }, + "engines": { + "node": ">=20.0.0" + }, + "peerDependencies": { + "@modelcontextprotocol/sdk": "^1.11.0" + }, + "peerDependenciesMeta": { + "@modelcontextprotocol/sdk": { + "optional": true + } + } + }, "packages/core/node_modules/ajv": { "version": "8.17.1", "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", diff --git a/package.json b/package.json index 4b034fe6..06fc86e6 100644 --- a/package.json +++ b/package.json @@ -61,6 +61,7 @@ "LICENSE" ], "devDependencies": { + "@google/genai": "^1.22.0", "@types/marked": "^5.0.2", "@types/mime-types": "^3.0.1", "@types/minimatch": "^5.1.2", @@ -96,6 +97,7 @@ }, "dependencies": { "@lvce-editor/ripgrep": "^1.6.0", + "@qwen-code/qwen-code": "^0.0.14", "simple-git": "^3.28.0", "strip-ansi": "^7.1.0" }, diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 05be91b6..7977fa5f 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -703,7 +703,7 @@ export class GeminiClient { * We should ignore model for now because some calls use `DEFAULT_GEMINI_FLASH_MODEL` * which is not available as `qwen3-coder-flash` */ - const modelToUse = this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL; + const modelToUse = model || this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL; try { const userMemory = this.config.getUserMemory(); const finalSystemInstruction = config.systemInstruction @@ -789,7 +789,7 @@ export class GeminiClient { abortSignal: AbortSignal, model?: string, ): Promise { - const modelToUse = model ?? this.config.getModel(); + const modelToUse = model || this.config.getModel(); const configToUse: GenerateContentConfig = { ...this.generateContentConfig, ...generationConfig, diff --git a/packages/core/src/core/openaiContentGenerator/pipeline.ts b/packages/core/src/core/openaiContentGenerator/pipeline.ts index 2ce0e074..05509177 100644 --- a/packages/core/src/core/openaiContentGenerator/pipeline.ts +++ b/packages/core/src/core/openaiContentGenerator/pipeline.ts @@ -241,10 +241,17 @@ export class ContentGenerationPipeline { ): Promise { const messages = this.converter.convertGeminiRequestToOpenAI(request); + // Check if request has a model override + let model = this.contentGeneratorConfig.model; + if (request.model) { + // Use request-specific model if provided + model = request.model; + } + // Apply provider-specific enhancements const baseRequest: OpenAI.Chat.ChatCompletionCreateParams = { - model: this.contentGeneratorConfig.model, messages, + model, ...this.buildSamplingParameters(request), }; @@ -405,9 +412,13 @@ export class ContentGenerationPipeline { userPromptId: string, isStreaming: boolean, ): RequestContext { + // For context logging, we use the default model since we don't have access to a request + // This is acceptable since context logging doesn't need request-specific model + const model = this.contentGeneratorConfig.model; + return { userPromptId, - model: this.contentGeneratorConfig.model, + model, authType: this.contentGeneratorConfig.authType || 'unknown', startTime: Date.now(), duration: 0, diff --git a/packages/core/src/services/loopDetectionService.ts b/packages/core/src/services/loopDetectionService.ts index 97cafb8d..256cda18 100644 --- a/packages/core/src/services/loopDetectionService.ts +++ b/packages/core/src/services/loopDetectionService.ts @@ -396,9 +396,18 @@ Please analyze the conversation history to determine the possibility that the co }; let result; try { + // Get the assistant model from environment variable, falling back to default if not specified + let assistantModel = DEFAULT_QWEN_FLASH_MODEL; + + // Check for OPENAI_ASSISTANT_MODEL environment variable + const openaiAssistantModel = process.env['OPENAI_ASSISTANT_MODEL']; + if (openaiAssistantModel) { + assistantModel = openaiAssistantModel; + } + result = await this.config .getGeminiClient() - .generateJson(contents, schema, signal, DEFAULT_QWEN_FLASH_MODEL); + .generateJson(contents, schema, signal, assistantModel); } catch (e) { // Do nothing, treat it as a non-loop. this.config.getDebugMode() ? console.error(e) : console.debug(e); diff --git a/packages/core/src/tools/web-fetch.ts b/packages/core/src/tools/web-fetch.ts index ce50d5ee..8f90255a 100644 --- a/packages/core/src/tools/web-fetch.ts +++ b/packages/core/src/tools/web-fetch.ts @@ -8,6 +8,7 @@ import { convert } from 'html-to-text'; import { ProxyAgent, setGlobalDispatcher } from 'undici'; import type { Config } from '../config/config.js'; import { ApprovalMode } from '../config/config.js'; +import { DEFAULT_QWEN_FLASH_MODEL } from '../config/models.js'; import { fetchWithTimeout, isPrivateIp } from '../utils/fetch.js'; import { getResponseText } from '../utils/partUtils.js'; import { ToolErrorType } from './tool-error.js'; @@ -104,10 +105,22 @@ ${textContent} `[WebFetchTool] Processing content with prompt: "${this.params.prompt}"`, ); + // For assistant decision-making, we want to use a different model than the one used for regular conversation + // This helps avoid KV cache invalidation issues when using the same model for both purposes + + // Get the assistant model from environment variable, falling back to default if not specified + let assistantModel = DEFAULT_QWEN_FLASH_MODEL; + + // Check for OPENAI_ASSISTANT_MODEL environment variable + const openaiAssistantModel = process.env['OPENAI_ASSISTANT_MODEL']; + if (openaiAssistantModel) { + assistantModel = openaiAssistantModel; + } const result = await geminiClient.generateContent( [{ role: 'user', parts: [{ text: fallbackPrompt }] }], {}, signal, + assistantModel ); const resultText = getResponseText(result) || ''; diff --git a/packages/core/src/utils/nextSpeakerChecker.test.ts b/packages/core/src/utils/nextSpeakerChecker.test.ts index e38beb81..9ab93c6d 100644 --- a/packages/core/src/utils/nextSpeakerChecker.test.ts +++ b/packages/core/src/utils/nextSpeakerChecker.test.ts @@ -1,4 +1,4 @@ -/** +/** * @license * Copyright 2025 Google LLC * SPDX-License-Identifier: Apache-2.0 @@ -242,7 +242,7 @@ describe('checkNextSpeaker', () => { expect(result).toBeNull(); }); - it('should call generateJson with DEFAULT_QWEN_FLASH_MODEL', async () => { + it('should call generateJson with DEFAULT_QWEN_FLASH_MODEL when OPENAI_ASSISTANT_MODEL is not set', async () => { (chatInstance.getHistory as Mock).mockReturnValue([ { role: 'model', parts: [{ text: 'Some model output.' }] }, ] as Content[]); @@ -259,4 +259,31 @@ describe('checkNextSpeaker', () => { .calls[0]; expect(generateJsonCall[3]).toBe(DEFAULT_QWEN_FLASH_MODEL); }); -}); + + it('should call generateJson with OPENAI_ASSISTANT_MODEL when set', async () => { + // Mock the environment variable + const originalEnv = process.env['OPENAI_ASSISTANT_MODEL']; + process.env['OPENAI_ASSISTANT_MODEL'] = 'qwen2.5-0.5b'; + + try { + (chatInstance.getHistory as Mock).mockReturnValue([ + { role: 'model', parts: [{ text: 'Some model output.' }] }, + ] as Content[]); + const mockApiResponse: NextSpeakerResponse = { + reasoning: 'Model made a statement, awaiting user input.', + next_speaker: 'user', + }; + (mockGeminiClient.generateJson as Mock).mockResolvedValue(mockApiResponse); + + await checkNextSpeaker(chatInstance, mockGeminiClient, abortSignal); + + expect(mockGeminiClient.generateJson).toHaveBeenCalled(); + const generateJsonCall = (mockGeminiClient.generateJson as Mock).mock + .calls[0]; + expect(generateJsonCall[3]).toBe('qwen2.5-0.5b'); + } finally { + // Restore original environment variable + process.env['OPENAI_ASSISTANT_MODEL'] = originalEnv; + } + }); +}); \ No newline at end of file diff --git a/packages/core/src/utils/nextSpeakerChecker.ts b/packages/core/src/utils/nextSpeakerChecker.ts index 24471455..fac3ddb8 100644 --- a/packages/core/src/utils/nextSpeakerChecker.ts +++ b/packages/core/src/utils/nextSpeakerChecker.ts @@ -108,11 +108,23 @@ export async function checkNextSpeaker( ]; try { + // For assistant decision-making, we want to use a different model than the one used for regular conversation + // This helps avoid KV cache invalidation issues when using the same model for both purposes + + // Get the assistant model from environment variable, falling back to default if not specified + let assistantModel = DEFAULT_QWEN_FLASH_MODEL; + + // Check for OPENAI_ASSISTANT_MODEL environment variable + const openaiAssistantModel = process.env['OPENAI_ASSISTANT_MODEL']; + if (openaiAssistantModel) { + assistantModel = openaiAssistantModel; + } + const parsedResponse = (await geminiClient.generateJson( contents, RESPONSE_SCHEMA, abortSignal, - DEFAULT_QWEN_FLASH_MODEL, + assistantModel, )) as unknown as NextSpeakerResponse; if ( diff --git a/packages/core/src/utils/summarizer.ts b/packages/core/src/utils/summarizer.ts index 14076b5c..fd22373f 100644 --- a/packages/core/src/utils/summarizer.ts +++ b/packages/core/src/utils/summarizer.ts @@ -11,7 +11,7 @@ import type { GenerateContentResponse, } from '@google/genai'; import type { GeminiClient } from '../core/client.js'; -import { DEFAULT_GEMINI_FLASH_LITE_MODEL } from '../config/models.js'; +import { DEFAULT_QWEN_FLASH_MODEL } from '../config/models.js'; import { getResponseText, partToString } from './partUtils.js'; /** @@ -81,12 +81,24 @@ export async function summarizeToolOutput( const toolOutputSummarizerConfig: GenerateContentConfig = { maxOutputTokens, }; + // For assistant decision-making, we want to use a different model than the one used for regular conversation + // This helps avoid KV cache invalidation issues when using the same model for both purposes + + // Get the assistant model from environment variable, falling back to default if not specified + let assistantModel = DEFAULT_QWEN_FLASH_MODEL; + + // Check for OPENAI_ASSISTANT_MODEL environment variable + const openaiAssistantModel = process.env['OPENAI_ASSISTANT_MODEL']; + if (openaiAssistantModel) { + assistantModel = openaiAssistantModel; + } + try { const parsedResponse = (await geminiClient.generateContent( contents, toolOutputSummarizerConfig, abortSignal, - DEFAULT_GEMINI_FLASH_LITE_MODEL, + assistantModel, )) as unknown as GenerateContentResponse; return getResponseText(parsedResponse) || textToSummarize; } catch (error) {