Add preliminary assistant model

This commit is contained in:
Alex McArther 2025-10-05 14:02:29 -07:00
parent 0922437bd5
commit b80e7bda16
9 changed files with 125 additions and 15 deletions

32
package-lock.json generated
View file

@ -12,6 +12,7 @@
], ],
"dependencies": { "dependencies": {
"@lvce-editor/ripgrep": "^1.6.0", "@lvce-editor/ripgrep": "^1.6.0",
"@qwen-code/qwen-code": "^0.0.14",
"simple-git": "^3.28.0", "simple-git": "^3.28.0",
"strip-ansi": "^7.1.0" "strip-ansi": "^7.1.0"
}, },
@ -19,6 +20,7 @@
"qwen": "bundle/gemini.js" "qwen": "bundle/gemini.js"
}, },
"devDependencies": { "devDependencies": {
"@google/genai": "^1.22.0",
"@types/marked": "^5.0.2", "@types/marked": "^5.0.2",
"@types/mime-types": "^3.0.1", "@types/mime-types": "^3.0.1",
"@types/minimatch": "^5.1.2", "@types/minimatch": "^5.1.2",
@ -998,9 +1000,10 @@
"link": true "link": true
}, },
"node_modules/@google/genai": { "node_modules/@google/genai": {
"version": "1.13.0", "version": "1.22.0",
"resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.13.0.tgz", "resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.22.0.tgz",
"integrity": "sha512-BxilXzE8cJ0zt5/lXk6KwuBcIT9P2Lbi2WXhwWMbxf1RNeC68/8DmYQqMrzQP333CieRMdbDXs0eNCphLoScWg==", "integrity": "sha512-siETS3zTm3EGpTT4+BFc1z20xXBYfueD3gCYfxkOjuAKRk8lt8TJevDHi3zepn1oSI6NhG/LZvy0i+Q3qheObg==",
"dev": true,
"license": "Apache-2.0", "license": "Apache-2.0",
"dependencies": { "dependencies": {
"google-auth-library": "^9.14.2", "google-auth-library": "^9.14.2",
@ -1010,7 +1013,7 @@
"node": ">=20.0.0" "node": ">=20.0.0"
}, },
"peerDependencies": { "peerDependencies": {
"@modelcontextprotocol/sdk": "^1.11.0" "@modelcontextprotocol/sdk": "^1.11.4"
}, },
"peerDependenciesMeta": { "peerDependenciesMeta": {
"@modelcontextprotocol/sdk": { "@modelcontextprotocol/sdk": {
@ -13731,6 +13734,27 @@
"node-pty": "^1.0.0" "node-pty": "^1.0.0"
} }
}, },
"packages/core/node_modules/@google/genai": {
"version": "1.13.0",
"resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.13.0.tgz",
"integrity": "sha512-BxilXzE8cJ0zt5/lXk6KwuBcIT9P2Lbi2WXhwWMbxf1RNeC68/8DmYQqMrzQP333CieRMdbDXs0eNCphLoScWg==",
"license": "Apache-2.0",
"dependencies": {
"google-auth-library": "^9.14.2",
"ws": "^8.18.0"
},
"engines": {
"node": ">=20.0.0"
},
"peerDependencies": {
"@modelcontextprotocol/sdk": "^1.11.0"
},
"peerDependenciesMeta": {
"@modelcontextprotocol/sdk": {
"optional": true
}
}
},
"packages/core/node_modules/ajv": { "packages/core/node_modules/ajv": {
"version": "8.17.1", "version": "8.17.1",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz",

View file

@ -61,6 +61,7 @@
"LICENSE" "LICENSE"
], ],
"devDependencies": { "devDependencies": {
"@google/genai": "^1.22.0",
"@types/marked": "^5.0.2", "@types/marked": "^5.0.2",
"@types/mime-types": "^3.0.1", "@types/mime-types": "^3.0.1",
"@types/minimatch": "^5.1.2", "@types/minimatch": "^5.1.2",
@ -96,6 +97,7 @@
}, },
"dependencies": { "dependencies": {
"@lvce-editor/ripgrep": "^1.6.0", "@lvce-editor/ripgrep": "^1.6.0",
"@qwen-code/qwen-code": "^0.0.14",
"simple-git": "^3.28.0", "simple-git": "^3.28.0",
"strip-ansi": "^7.1.0" "strip-ansi": "^7.1.0"
}, },

View file

@ -703,7 +703,7 @@ export class GeminiClient {
* We should ignore model for now because some calls use `DEFAULT_GEMINI_FLASH_MODEL` * We should ignore model for now because some calls use `DEFAULT_GEMINI_FLASH_MODEL`
* which is not available as `qwen3-coder-flash` * which is not available as `qwen3-coder-flash`
*/ */
const modelToUse = this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL; const modelToUse = model || this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL;
try { try {
const userMemory = this.config.getUserMemory(); const userMemory = this.config.getUserMemory();
const finalSystemInstruction = config.systemInstruction const finalSystemInstruction = config.systemInstruction
@ -789,7 +789,7 @@ export class GeminiClient {
abortSignal: AbortSignal, abortSignal: AbortSignal,
model?: string, model?: string,
): Promise<GenerateContentResponse> { ): Promise<GenerateContentResponse> {
const modelToUse = model ?? this.config.getModel(); const modelToUse = model || this.config.getModel();
const configToUse: GenerateContentConfig = { const configToUse: GenerateContentConfig = {
...this.generateContentConfig, ...this.generateContentConfig,
...generationConfig, ...generationConfig,

View file

@ -241,10 +241,17 @@ export class ContentGenerationPipeline {
): Promise<OpenAI.Chat.ChatCompletionCreateParams> { ): Promise<OpenAI.Chat.ChatCompletionCreateParams> {
const messages = this.converter.convertGeminiRequestToOpenAI(request); const messages = this.converter.convertGeminiRequestToOpenAI(request);
// Check if request has a model override
let model = this.contentGeneratorConfig.model;
if (request.model) {
// Use request-specific model if provided
model = request.model;
}
// Apply provider-specific enhancements // Apply provider-specific enhancements
const baseRequest: OpenAI.Chat.ChatCompletionCreateParams = { const baseRequest: OpenAI.Chat.ChatCompletionCreateParams = {
model: this.contentGeneratorConfig.model,
messages, messages,
model,
...this.buildSamplingParameters(request), ...this.buildSamplingParameters(request),
}; };
@ -405,9 +412,13 @@ export class ContentGenerationPipeline {
userPromptId: string, userPromptId: string,
isStreaming: boolean, isStreaming: boolean,
): RequestContext { ): RequestContext {
// For context logging, we use the default model since we don't have access to a request
// This is acceptable since context logging doesn't need request-specific model
const model = this.contentGeneratorConfig.model;
return { return {
userPromptId, userPromptId,
model: this.contentGeneratorConfig.model, model,
authType: this.contentGeneratorConfig.authType || 'unknown', authType: this.contentGeneratorConfig.authType || 'unknown',
startTime: Date.now(), startTime: Date.now(),
duration: 0, duration: 0,

View file

@ -396,9 +396,18 @@ Please analyze the conversation history to determine the possibility that the co
}; };
let result; let result;
try { try {
// Get the assistant model from environment variable, falling back to default if not specified
let assistantModel = DEFAULT_QWEN_FLASH_MODEL;
// Check for OPENAI_ASSISTANT_MODEL environment variable
const openaiAssistantModel = process.env['OPENAI_ASSISTANT_MODEL'];
if (openaiAssistantModel) {
assistantModel = openaiAssistantModel;
}
result = await this.config result = await this.config
.getGeminiClient() .getGeminiClient()
.generateJson(contents, schema, signal, DEFAULT_QWEN_FLASH_MODEL); .generateJson(contents, schema, signal, assistantModel);
} catch (e) { } catch (e) {
// Do nothing, treat it as a non-loop. // Do nothing, treat it as a non-loop.
this.config.getDebugMode() ? console.error(e) : console.debug(e); this.config.getDebugMode() ? console.error(e) : console.debug(e);

View file

@ -8,6 +8,7 @@ import { convert } from 'html-to-text';
import { ProxyAgent, setGlobalDispatcher } from 'undici'; import { ProxyAgent, setGlobalDispatcher } from 'undici';
import type { Config } from '../config/config.js'; import type { Config } from '../config/config.js';
import { ApprovalMode } from '../config/config.js'; import { ApprovalMode } from '../config/config.js';
import { DEFAULT_QWEN_FLASH_MODEL } from '../config/models.js';
import { fetchWithTimeout, isPrivateIp } from '../utils/fetch.js'; import { fetchWithTimeout, isPrivateIp } from '../utils/fetch.js';
import { getResponseText } from '../utils/partUtils.js'; import { getResponseText } from '../utils/partUtils.js';
import { ToolErrorType } from './tool-error.js'; import { ToolErrorType } from './tool-error.js';
@ -104,10 +105,22 @@ ${textContent}
`[WebFetchTool] Processing content with prompt: "${this.params.prompt}"`, `[WebFetchTool] Processing content with prompt: "${this.params.prompt}"`,
); );
// For assistant decision-making, we want to use a different model than the one used for regular conversation
// This helps avoid KV cache invalidation issues when using the same model for both purposes
// Get the assistant model from environment variable, falling back to default if not specified
let assistantModel = DEFAULT_QWEN_FLASH_MODEL;
// Check for OPENAI_ASSISTANT_MODEL environment variable
const openaiAssistantModel = process.env['OPENAI_ASSISTANT_MODEL'];
if (openaiAssistantModel) {
assistantModel = openaiAssistantModel;
}
const result = await geminiClient.generateContent( const result = await geminiClient.generateContent(
[{ role: 'user', parts: [{ text: fallbackPrompt }] }], [{ role: 'user', parts: [{ text: fallbackPrompt }] }],
{}, {},
signal, signal,
assistantModel
); );
const resultText = getResponseText(result) || ''; const resultText = getResponseText(result) || '';

View file

@ -242,7 +242,7 @@ describe('checkNextSpeaker', () => {
expect(result).toBeNull(); expect(result).toBeNull();
}); });
it('should call generateJson with DEFAULT_QWEN_FLASH_MODEL', async () => { it('should call generateJson with DEFAULT_QWEN_FLASH_MODEL when OPENAI_ASSISTANT_MODEL is not set', async () => {
(chatInstance.getHistory as Mock).mockReturnValue([ (chatInstance.getHistory as Mock).mockReturnValue([
{ role: 'model', parts: [{ text: 'Some model output.' }] }, { role: 'model', parts: [{ text: 'Some model output.' }] },
] as Content[]); ] as Content[]);
@ -259,4 +259,31 @@ describe('checkNextSpeaker', () => {
.calls[0]; .calls[0];
expect(generateJsonCall[3]).toBe(DEFAULT_QWEN_FLASH_MODEL); expect(generateJsonCall[3]).toBe(DEFAULT_QWEN_FLASH_MODEL);
}); });
it('should call generateJson with OPENAI_ASSISTANT_MODEL when set', async () => {
// Mock the environment variable
const originalEnv = process.env['OPENAI_ASSISTANT_MODEL'];
process.env['OPENAI_ASSISTANT_MODEL'] = 'qwen2.5-0.5b';
try {
(chatInstance.getHistory as Mock).mockReturnValue([
{ role: 'model', parts: [{ text: 'Some model output.' }] },
] as Content[]);
const mockApiResponse: NextSpeakerResponse = {
reasoning: 'Model made a statement, awaiting user input.',
next_speaker: 'user',
};
(mockGeminiClient.generateJson as Mock).mockResolvedValue(mockApiResponse);
await checkNextSpeaker(chatInstance, mockGeminiClient, abortSignal);
expect(mockGeminiClient.generateJson).toHaveBeenCalled();
const generateJsonCall = (mockGeminiClient.generateJson as Mock).mock
.calls[0];
expect(generateJsonCall[3]).toBe('qwen2.5-0.5b');
} finally {
// Restore original environment variable
process.env['OPENAI_ASSISTANT_MODEL'] = originalEnv;
}
});
}); });

View file

@ -108,11 +108,23 @@ export async function checkNextSpeaker(
]; ];
try { try {
// For assistant decision-making, we want to use a different model than the one used for regular conversation
// This helps avoid KV cache invalidation issues when using the same model for both purposes
// Get the assistant model from environment variable, falling back to default if not specified
let assistantModel = DEFAULT_QWEN_FLASH_MODEL;
// Check for OPENAI_ASSISTANT_MODEL environment variable
const openaiAssistantModel = process.env['OPENAI_ASSISTANT_MODEL'];
if (openaiAssistantModel) {
assistantModel = openaiAssistantModel;
}
const parsedResponse = (await geminiClient.generateJson( const parsedResponse = (await geminiClient.generateJson(
contents, contents,
RESPONSE_SCHEMA, RESPONSE_SCHEMA,
abortSignal, abortSignal,
DEFAULT_QWEN_FLASH_MODEL, assistantModel,
)) as unknown as NextSpeakerResponse; )) as unknown as NextSpeakerResponse;
if ( if (

View file

@ -11,7 +11,7 @@ import type {
GenerateContentResponse, GenerateContentResponse,
} from '@google/genai'; } from '@google/genai';
import type { GeminiClient } from '../core/client.js'; import type { GeminiClient } from '../core/client.js';
import { DEFAULT_GEMINI_FLASH_LITE_MODEL } from '../config/models.js'; import { DEFAULT_QWEN_FLASH_MODEL } from '../config/models.js';
import { getResponseText, partToString } from './partUtils.js'; import { getResponseText, partToString } from './partUtils.js';
/** /**
@ -81,12 +81,24 @@ export async function summarizeToolOutput(
const toolOutputSummarizerConfig: GenerateContentConfig = { const toolOutputSummarizerConfig: GenerateContentConfig = {
maxOutputTokens, maxOutputTokens,
}; };
// For assistant decision-making, we want to use a different model than the one used for regular conversation
// This helps avoid KV cache invalidation issues when using the same model for both purposes
// Get the assistant model from environment variable, falling back to default if not specified
let assistantModel = DEFAULT_QWEN_FLASH_MODEL;
// Check for OPENAI_ASSISTANT_MODEL environment variable
const openaiAssistantModel = process.env['OPENAI_ASSISTANT_MODEL'];
if (openaiAssistantModel) {
assistantModel = openaiAssistantModel;
}
try { try {
const parsedResponse = (await geminiClient.generateContent( const parsedResponse = (await geminiClient.generateContent(
contents, contents,
toolOutputSummarizerConfig, toolOutputSummarizerConfig,
abortSignal, abortSignal,
DEFAULT_GEMINI_FLASH_LITE_MODEL, assistantModel,
)) as unknown as GenerateContentResponse; )) as unknown as GenerateContentResponse;
return getResponseText(parsedResponse) || textToSummarize; return getResponseText(parsedResponse) || textToSummarize;
} catch (error) { } catch (error) {