Add preliminary assistant model

This commit is contained in:
Alex McArther 2025-10-05 14:02:29 -07:00
parent 0922437bd5
commit b80e7bda16
9 changed files with 125 additions and 15 deletions

32
package-lock.json generated
View file

@ -12,6 +12,7 @@
],
"dependencies": {
"@lvce-editor/ripgrep": "^1.6.0",
"@qwen-code/qwen-code": "^0.0.14",
"simple-git": "^3.28.0",
"strip-ansi": "^7.1.0"
},
@ -19,6 +20,7 @@
"qwen": "bundle/gemini.js"
},
"devDependencies": {
"@google/genai": "^1.22.0",
"@types/marked": "^5.0.2",
"@types/mime-types": "^3.0.1",
"@types/minimatch": "^5.1.2",
@ -998,9 +1000,10 @@
"link": true
},
"node_modules/@google/genai": {
"version": "1.13.0",
"resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.13.0.tgz",
"integrity": "sha512-BxilXzE8cJ0zt5/lXk6KwuBcIT9P2Lbi2WXhwWMbxf1RNeC68/8DmYQqMrzQP333CieRMdbDXs0eNCphLoScWg==",
"version": "1.22.0",
"resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.22.0.tgz",
"integrity": "sha512-siETS3zTm3EGpTT4+BFc1z20xXBYfueD3gCYfxkOjuAKRk8lt8TJevDHi3zepn1oSI6NhG/LZvy0i+Q3qheObg==",
"dev": true,
"license": "Apache-2.0",
"dependencies": {
"google-auth-library": "^9.14.2",
@ -1010,7 +1013,7 @@
"node": ">=20.0.0"
},
"peerDependencies": {
"@modelcontextprotocol/sdk": "^1.11.0"
"@modelcontextprotocol/sdk": "^1.11.4"
},
"peerDependenciesMeta": {
"@modelcontextprotocol/sdk": {
@ -13731,6 +13734,27 @@
"node-pty": "^1.0.0"
}
},
"packages/core/node_modules/@google/genai": {
"version": "1.13.0",
"resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.13.0.tgz",
"integrity": "sha512-BxilXzE8cJ0zt5/lXk6KwuBcIT9P2Lbi2WXhwWMbxf1RNeC68/8DmYQqMrzQP333CieRMdbDXs0eNCphLoScWg==",
"license": "Apache-2.0",
"dependencies": {
"google-auth-library": "^9.14.2",
"ws": "^8.18.0"
},
"engines": {
"node": ">=20.0.0"
},
"peerDependencies": {
"@modelcontextprotocol/sdk": "^1.11.0"
},
"peerDependenciesMeta": {
"@modelcontextprotocol/sdk": {
"optional": true
}
}
},
"packages/core/node_modules/ajv": {
"version": "8.17.1",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz",

View file

@ -61,6 +61,7 @@
"LICENSE"
],
"devDependencies": {
"@google/genai": "^1.22.0",
"@types/marked": "^5.0.2",
"@types/mime-types": "^3.0.1",
"@types/minimatch": "^5.1.2",
@ -96,6 +97,7 @@
},
"dependencies": {
"@lvce-editor/ripgrep": "^1.6.0",
"@qwen-code/qwen-code": "^0.0.14",
"simple-git": "^3.28.0",
"strip-ansi": "^7.1.0"
},

View file

@ -703,7 +703,7 @@ export class GeminiClient {
* We should ignore model for now because some calls use `DEFAULT_GEMINI_FLASH_MODEL`
* which is not available as `qwen3-coder-flash`
*/
const modelToUse = this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL;
const modelToUse = model || this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL;
try {
const userMemory = this.config.getUserMemory();
const finalSystemInstruction = config.systemInstruction
@ -789,7 +789,7 @@ export class GeminiClient {
abortSignal: AbortSignal,
model?: string,
): Promise<GenerateContentResponse> {
const modelToUse = model ?? this.config.getModel();
const modelToUse = model || this.config.getModel();
const configToUse: GenerateContentConfig = {
...this.generateContentConfig,
...generationConfig,

View file

@ -241,10 +241,17 @@ export class ContentGenerationPipeline {
): Promise<OpenAI.Chat.ChatCompletionCreateParams> {
const messages = this.converter.convertGeminiRequestToOpenAI(request);
// Check if request has a model override
let model = this.contentGeneratorConfig.model;
if (request.model) {
// Use request-specific model if provided
model = request.model;
}
// Apply provider-specific enhancements
const baseRequest: OpenAI.Chat.ChatCompletionCreateParams = {
model: this.contentGeneratorConfig.model,
messages,
model,
...this.buildSamplingParameters(request),
};
@ -405,9 +412,13 @@ export class ContentGenerationPipeline {
userPromptId: string,
isStreaming: boolean,
): RequestContext {
// For context logging, we use the default model since we don't have access to a request
// This is acceptable since context logging doesn't need request-specific model
const model = this.contentGeneratorConfig.model;
return {
userPromptId,
model: this.contentGeneratorConfig.model,
model,
authType: this.contentGeneratorConfig.authType || 'unknown',
startTime: Date.now(),
duration: 0,

View file

@ -396,9 +396,18 @@ Please analyze the conversation history to determine the possibility that the co
};
let result;
try {
// Get the assistant model from environment variable, falling back to default if not specified
let assistantModel = DEFAULT_QWEN_FLASH_MODEL;
// Check for OPENAI_ASSISTANT_MODEL environment variable
const openaiAssistantModel = process.env['OPENAI_ASSISTANT_MODEL'];
if (openaiAssistantModel) {
assistantModel = openaiAssistantModel;
}
result = await this.config
.getGeminiClient()
.generateJson(contents, schema, signal, DEFAULT_QWEN_FLASH_MODEL);
.generateJson(contents, schema, signal, assistantModel);
} catch (e) {
// Do nothing, treat it as a non-loop.
this.config.getDebugMode() ? console.error(e) : console.debug(e);

View file

@ -8,6 +8,7 @@ import { convert } from 'html-to-text';
import { ProxyAgent, setGlobalDispatcher } from 'undici';
import type { Config } from '../config/config.js';
import { ApprovalMode } from '../config/config.js';
import { DEFAULT_QWEN_FLASH_MODEL } from '../config/models.js';
import { fetchWithTimeout, isPrivateIp } from '../utils/fetch.js';
import { getResponseText } from '../utils/partUtils.js';
import { ToolErrorType } from './tool-error.js';
@ -104,10 +105,22 @@ ${textContent}
`[WebFetchTool] Processing content with prompt: "${this.params.prompt}"`,
);
// For assistant decision-making, we want to use a different model than the one used for regular conversation
// This helps avoid KV cache invalidation issues when using the same model for both purposes
// Get the assistant model from environment variable, falling back to default if not specified
let assistantModel = DEFAULT_QWEN_FLASH_MODEL;
// Check for OPENAI_ASSISTANT_MODEL environment variable
const openaiAssistantModel = process.env['OPENAI_ASSISTANT_MODEL'];
if (openaiAssistantModel) {
assistantModel = openaiAssistantModel;
}
const result = await geminiClient.generateContent(
[{ role: 'user', parts: [{ text: fallbackPrompt }] }],
{},
signal,
assistantModel
);
const resultText = getResponseText(result) || '';

View file

@ -242,7 +242,7 @@ describe('checkNextSpeaker', () => {
expect(result).toBeNull();
});
it('should call generateJson with DEFAULT_QWEN_FLASH_MODEL', async () => {
it('should call generateJson with DEFAULT_QWEN_FLASH_MODEL when OPENAI_ASSISTANT_MODEL is not set', async () => {
(chatInstance.getHistory as Mock).mockReturnValue([
{ role: 'model', parts: [{ text: 'Some model output.' }] },
] as Content[]);
@ -259,4 +259,31 @@ describe('checkNextSpeaker', () => {
.calls[0];
expect(generateJsonCall[3]).toBe(DEFAULT_QWEN_FLASH_MODEL);
});
it('should call generateJson with OPENAI_ASSISTANT_MODEL when set', async () => {
// Mock the environment variable
const originalEnv = process.env['OPENAI_ASSISTANT_MODEL'];
process.env['OPENAI_ASSISTANT_MODEL'] = 'qwen2.5-0.5b';
try {
(chatInstance.getHistory as Mock).mockReturnValue([
{ role: 'model', parts: [{ text: 'Some model output.' }] },
] as Content[]);
const mockApiResponse: NextSpeakerResponse = {
reasoning: 'Model made a statement, awaiting user input.',
next_speaker: 'user',
};
(mockGeminiClient.generateJson as Mock).mockResolvedValue(mockApiResponse);
await checkNextSpeaker(chatInstance, mockGeminiClient, abortSignal);
expect(mockGeminiClient.generateJson).toHaveBeenCalled();
const generateJsonCall = (mockGeminiClient.generateJson as Mock).mock
.calls[0];
expect(generateJsonCall[3]).toBe('qwen2.5-0.5b');
} finally {
// Restore original environment variable
process.env['OPENAI_ASSISTANT_MODEL'] = originalEnv;
}
});
});

View file

@ -108,11 +108,23 @@ export async function checkNextSpeaker(
];
try {
// For assistant decision-making, we want to use a different model than the one used for regular conversation
// This helps avoid KV cache invalidation issues when using the same model for both purposes
// Get the assistant model from environment variable, falling back to default if not specified
let assistantModel = DEFAULT_QWEN_FLASH_MODEL;
// Check for OPENAI_ASSISTANT_MODEL environment variable
const openaiAssistantModel = process.env['OPENAI_ASSISTANT_MODEL'];
if (openaiAssistantModel) {
assistantModel = openaiAssistantModel;
}
const parsedResponse = (await geminiClient.generateJson(
contents,
RESPONSE_SCHEMA,
abortSignal,
DEFAULT_QWEN_FLASH_MODEL,
assistantModel,
)) as unknown as NextSpeakerResponse;
if (

View file

@ -11,7 +11,7 @@ import type {
GenerateContentResponse,
} from '@google/genai';
import type { GeminiClient } from '../core/client.js';
import { DEFAULT_GEMINI_FLASH_LITE_MODEL } from '../config/models.js';
import { DEFAULT_QWEN_FLASH_MODEL } from '../config/models.js';
import { getResponseText, partToString } from './partUtils.js';
/**
@ -81,12 +81,24 @@ export async function summarizeToolOutput(
const toolOutputSummarizerConfig: GenerateContentConfig = {
maxOutputTokens,
};
// For assistant decision-making, we want to use a different model than the one used for regular conversation
// This helps avoid KV cache invalidation issues when using the same model for both purposes
// Get the assistant model from environment variable, falling back to default if not specified
let assistantModel = DEFAULT_QWEN_FLASH_MODEL;
// Check for OPENAI_ASSISTANT_MODEL environment variable
const openaiAssistantModel = process.env['OPENAI_ASSISTANT_MODEL'];
if (openaiAssistantModel) {
assistantModel = openaiAssistantModel;
}
try {
const parsedResponse = (await geminiClient.generateContent(
contents,
toolOutputSummarizerConfig,
abortSignal,
DEFAULT_GEMINI_FLASH_LITE_MODEL,
assistantModel,
)) as unknown as GenerateContentResponse;
return getResponseText(parsedResponse) || textToSummarize;
} catch (error) {