Add preliminary assistant model
This commit is contained in:
parent
0922437bd5
commit
b80e7bda16
9 changed files with 125 additions and 15 deletions
32
package-lock.json
generated
32
package-lock.json
generated
|
|
@ -12,6 +12,7 @@
|
|||
],
|
||||
"dependencies": {
|
||||
"@lvce-editor/ripgrep": "^1.6.0",
|
||||
"@qwen-code/qwen-code": "^0.0.14",
|
||||
"simple-git": "^3.28.0",
|
||||
"strip-ansi": "^7.1.0"
|
||||
},
|
||||
|
|
@ -19,6 +20,7 @@
|
|||
"qwen": "bundle/gemini.js"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@google/genai": "^1.22.0",
|
||||
"@types/marked": "^5.0.2",
|
||||
"@types/mime-types": "^3.0.1",
|
||||
"@types/minimatch": "^5.1.2",
|
||||
|
|
@ -998,9 +1000,10 @@
|
|||
"link": true
|
||||
},
|
||||
"node_modules/@google/genai": {
|
||||
"version": "1.13.0",
|
||||
"resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.13.0.tgz",
|
||||
"integrity": "sha512-BxilXzE8cJ0zt5/lXk6KwuBcIT9P2Lbi2WXhwWMbxf1RNeC68/8DmYQqMrzQP333CieRMdbDXs0eNCphLoScWg==",
|
||||
"version": "1.22.0",
|
||||
"resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.22.0.tgz",
|
||||
"integrity": "sha512-siETS3zTm3EGpTT4+BFc1z20xXBYfueD3gCYfxkOjuAKRk8lt8TJevDHi3zepn1oSI6NhG/LZvy0i+Q3qheObg==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"google-auth-library": "^9.14.2",
|
||||
|
|
@ -1010,7 +1013,7 @@
|
|||
"node": ">=20.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@modelcontextprotocol/sdk": "^1.11.0"
|
||||
"@modelcontextprotocol/sdk": "^1.11.4"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@modelcontextprotocol/sdk": {
|
||||
|
|
@ -13731,6 +13734,27 @@
|
|||
"node-pty": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"packages/core/node_modules/@google/genai": {
|
||||
"version": "1.13.0",
|
||||
"resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.13.0.tgz",
|
||||
"integrity": "sha512-BxilXzE8cJ0zt5/lXk6KwuBcIT9P2Lbi2WXhwWMbxf1RNeC68/8DmYQqMrzQP333CieRMdbDXs0eNCphLoScWg==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"google-auth-library": "^9.14.2",
|
||||
"ws": "^8.18.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=20.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@modelcontextprotocol/sdk": "^1.11.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@modelcontextprotocol/sdk": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"packages/core/node_modules/ajv": {
|
||||
"version": "8.17.1",
|
||||
"resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz",
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@
|
|||
"LICENSE"
|
||||
],
|
||||
"devDependencies": {
|
||||
"@google/genai": "^1.22.0",
|
||||
"@types/marked": "^5.0.2",
|
||||
"@types/mime-types": "^3.0.1",
|
||||
"@types/minimatch": "^5.1.2",
|
||||
|
|
@ -96,6 +97,7 @@
|
|||
},
|
||||
"dependencies": {
|
||||
"@lvce-editor/ripgrep": "^1.6.0",
|
||||
"@qwen-code/qwen-code": "^0.0.14",
|
||||
"simple-git": "^3.28.0",
|
||||
"strip-ansi": "^7.1.0"
|
||||
},
|
||||
|
|
|
|||
|
|
@ -703,7 +703,7 @@ export class GeminiClient {
|
|||
* We should ignore model for now because some calls use `DEFAULT_GEMINI_FLASH_MODEL`
|
||||
* which is not available as `qwen3-coder-flash`
|
||||
*/
|
||||
const modelToUse = this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL;
|
||||
const modelToUse = model || this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL;
|
||||
try {
|
||||
const userMemory = this.config.getUserMemory();
|
||||
const finalSystemInstruction = config.systemInstruction
|
||||
|
|
@ -789,7 +789,7 @@ export class GeminiClient {
|
|||
abortSignal: AbortSignal,
|
||||
model?: string,
|
||||
): Promise<GenerateContentResponse> {
|
||||
const modelToUse = model ?? this.config.getModel();
|
||||
const modelToUse = model || this.config.getModel();
|
||||
const configToUse: GenerateContentConfig = {
|
||||
...this.generateContentConfig,
|
||||
...generationConfig,
|
||||
|
|
|
|||
|
|
@ -241,10 +241,17 @@ export class ContentGenerationPipeline {
|
|||
): Promise<OpenAI.Chat.ChatCompletionCreateParams> {
|
||||
const messages = this.converter.convertGeminiRequestToOpenAI(request);
|
||||
|
||||
// Check if request has a model override
|
||||
let model = this.contentGeneratorConfig.model;
|
||||
if (request.model) {
|
||||
// Use request-specific model if provided
|
||||
model = request.model;
|
||||
}
|
||||
|
||||
// Apply provider-specific enhancements
|
||||
const baseRequest: OpenAI.Chat.ChatCompletionCreateParams = {
|
||||
model: this.contentGeneratorConfig.model,
|
||||
messages,
|
||||
model,
|
||||
...this.buildSamplingParameters(request),
|
||||
};
|
||||
|
||||
|
|
@ -405,9 +412,13 @@ export class ContentGenerationPipeline {
|
|||
userPromptId: string,
|
||||
isStreaming: boolean,
|
||||
): RequestContext {
|
||||
// For context logging, we use the default model since we don't have access to a request
|
||||
// This is acceptable since context logging doesn't need request-specific model
|
||||
const model = this.contentGeneratorConfig.model;
|
||||
|
||||
return {
|
||||
userPromptId,
|
||||
model: this.contentGeneratorConfig.model,
|
||||
model,
|
||||
authType: this.contentGeneratorConfig.authType || 'unknown',
|
||||
startTime: Date.now(),
|
||||
duration: 0,
|
||||
|
|
|
|||
|
|
@ -396,9 +396,18 @@ Please analyze the conversation history to determine the possibility that the co
|
|||
};
|
||||
let result;
|
||||
try {
|
||||
// Get the assistant model from environment variable, falling back to default if not specified
|
||||
let assistantModel = DEFAULT_QWEN_FLASH_MODEL;
|
||||
|
||||
// Check for OPENAI_ASSISTANT_MODEL environment variable
|
||||
const openaiAssistantModel = process.env['OPENAI_ASSISTANT_MODEL'];
|
||||
if (openaiAssistantModel) {
|
||||
assistantModel = openaiAssistantModel;
|
||||
}
|
||||
|
||||
result = await this.config
|
||||
.getGeminiClient()
|
||||
.generateJson(contents, schema, signal, DEFAULT_QWEN_FLASH_MODEL);
|
||||
.generateJson(contents, schema, signal, assistantModel);
|
||||
} catch (e) {
|
||||
// Do nothing, treat it as a non-loop.
|
||||
this.config.getDebugMode() ? console.error(e) : console.debug(e);
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import { convert } from 'html-to-text';
|
|||
import { ProxyAgent, setGlobalDispatcher } from 'undici';
|
||||
import type { Config } from '../config/config.js';
|
||||
import { ApprovalMode } from '../config/config.js';
|
||||
import { DEFAULT_QWEN_FLASH_MODEL } from '../config/models.js';
|
||||
import { fetchWithTimeout, isPrivateIp } from '../utils/fetch.js';
|
||||
import { getResponseText } from '../utils/partUtils.js';
|
||||
import { ToolErrorType } from './tool-error.js';
|
||||
|
|
@ -104,10 +105,22 @@ ${textContent}
|
|||
`[WebFetchTool] Processing content with prompt: "${this.params.prompt}"`,
|
||||
);
|
||||
|
||||
// For assistant decision-making, we want to use a different model than the one used for regular conversation
|
||||
// This helps avoid KV cache invalidation issues when using the same model for both purposes
|
||||
|
||||
// Get the assistant model from environment variable, falling back to default if not specified
|
||||
let assistantModel = DEFAULT_QWEN_FLASH_MODEL;
|
||||
|
||||
// Check for OPENAI_ASSISTANT_MODEL environment variable
|
||||
const openaiAssistantModel = process.env['OPENAI_ASSISTANT_MODEL'];
|
||||
if (openaiAssistantModel) {
|
||||
assistantModel = openaiAssistantModel;
|
||||
}
|
||||
const result = await geminiClient.generateContent(
|
||||
[{ role: 'user', parts: [{ text: fallbackPrompt }] }],
|
||||
{},
|
||||
signal,
|
||||
assistantModel
|
||||
);
|
||||
const resultText = getResponseText(result) || '';
|
||||
|
||||
|
|
|
|||
|
|
@ -242,7 +242,7 @@ describe('checkNextSpeaker', () => {
|
|||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it('should call generateJson with DEFAULT_QWEN_FLASH_MODEL', async () => {
|
||||
it('should call generateJson with DEFAULT_QWEN_FLASH_MODEL when OPENAI_ASSISTANT_MODEL is not set', async () => {
|
||||
(chatInstance.getHistory as Mock).mockReturnValue([
|
||||
{ role: 'model', parts: [{ text: 'Some model output.' }] },
|
||||
] as Content[]);
|
||||
|
|
@ -259,4 +259,31 @@ describe('checkNextSpeaker', () => {
|
|||
.calls[0];
|
||||
expect(generateJsonCall[3]).toBe(DEFAULT_QWEN_FLASH_MODEL);
|
||||
});
|
||||
|
||||
it('should call generateJson with OPENAI_ASSISTANT_MODEL when set', async () => {
|
||||
// Mock the environment variable
|
||||
const originalEnv = process.env['OPENAI_ASSISTANT_MODEL'];
|
||||
process.env['OPENAI_ASSISTANT_MODEL'] = 'qwen2.5-0.5b';
|
||||
|
||||
try {
|
||||
(chatInstance.getHistory as Mock).mockReturnValue([
|
||||
{ role: 'model', parts: [{ text: 'Some model output.' }] },
|
||||
] as Content[]);
|
||||
const mockApiResponse: NextSpeakerResponse = {
|
||||
reasoning: 'Model made a statement, awaiting user input.',
|
||||
next_speaker: 'user',
|
||||
};
|
||||
(mockGeminiClient.generateJson as Mock).mockResolvedValue(mockApiResponse);
|
||||
|
||||
await checkNextSpeaker(chatInstance, mockGeminiClient, abortSignal);
|
||||
|
||||
expect(mockGeminiClient.generateJson).toHaveBeenCalled();
|
||||
const generateJsonCall = (mockGeminiClient.generateJson as Mock).mock
|
||||
.calls[0];
|
||||
expect(generateJsonCall[3]).toBe('qwen2.5-0.5b');
|
||||
} finally {
|
||||
// Restore original environment variable
|
||||
process.env['OPENAI_ASSISTANT_MODEL'] = originalEnv;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
@ -108,11 +108,23 @@ export async function checkNextSpeaker(
|
|||
];
|
||||
|
||||
try {
|
||||
// For assistant decision-making, we want to use a different model than the one used for regular conversation
|
||||
// This helps avoid KV cache invalidation issues when using the same model for both purposes
|
||||
|
||||
// Get the assistant model from environment variable, falling back to default if not specified
|
||||
let assistantModel = DEFAULT_QWEN_FLASH_MODEL;
|
||||
|
||||
// Check for OPENAI_ASSISTANT_MODEL environment variable
|
||||
const openaiAssistantModel = process.env['OPENAI_ASSISTANT_MODEL'];
|
||||
if (openaiAssistantModel) {
|
||||
assistantModel = openaiAssistantModel;
|
||||
}
|
||||
|
||||
const parsedResponse = (await geminiClient.generateJson(
|
||||
contents,
|
||||
RESPONSE_SCHEMA,
|
||||
abortSignal,
|
||||
DEFAULT_QWEN_FLASH_MODEL,
|
||||
assistantModel,
|
||||
)) as unknown as NextSpeakerResponse;
|
||||
|
||||
if (
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import type {
|
|||
GenerateContentResponse,
|
||||
} from '@google/genai';
|
||||
import type { GeminiClient } from '../core/client.js';
|
||||
import { DEFAULT_GEMINI_FLASH_LITE_MODEL } from '../config/models.js';
|
||||
import { DEFAULT_QWEN_FLASH_MODEL } from '../config/models.js';
|
||||
import { getResponseText, partToString } from './partUtils.js';
|
||||
|
||||
/**
|
||||
|
|
@ -81,12 +81,24 @@ export async function summarizeToolOutput(
|
|||
const toolOutputSummarizerConfig: GenerateContentConfig = {
|
||||
maxOutputTokens,
|
||||
};
|
||||
// For assistant decision-making, we want to use a different model than the one used for regular conversation
|
||||
// This helps avoid KV cache invalidation issues when using the same model for both purposes
|
||||
|
||||
// Get the assistant model from environment variable, falling back to default if not specified
|
||||
let assistantModel = DEFAULT_QWEN_FLASH_MODEL;
|
||||
|
||||
// Check for OPENAI_ASSISTANT_MODEL environment variable
|
||||
const openaiAssistantModel = process.env['OPENAI_ASSISTANT_MODEL'];
|
||||
if (openaiAssistantModel) {
|
||||
assistantModel = openaiAssistantModel;
|
||||
}
|
||||
|
||||
try {
|
||||
const parsedResponse = (await geminiClient.generateContent(
|
||||
contents,
|
||||
toolOutputSummarizerConfig,
|
||||
abortSignal,
|
||||
DEFAULT_GEMINI_FLASH_LITE_MODEL,
|
||||
assistantModel,
|
||||
)) as unknown as GenerateContentResponse;
|
||||
return getResponseText(parsedResponse) || textToSummarize;
|
||||
} catch (error) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue