Add preliminary assistant model
This commit is contained in:
parent
0922437bd5
commit
b80e7bda16
9 changed files with 125 additions and 15 deletions
32
package-lock.json
generated
32
package-lock.json
generated
|
|
@ -12,6 +12,7 @@
|
||||||
],
|
],
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@lvce-editor/ripgrep": "^1.6.0",
|
"@lvce-editor/ripgrep": "^1.6.0",
|
||||||
|
"@qwen-code/qwen-code": "^0.0.14",
|
||||||
"simple-git": "^3.28.0",
|
"simple-git": "^3.28.0",
|
||||||
"strip-ansi": "^7.1.0"
|
"strip-ansi": "^7.1.0"
|
||||||
},
|
},
|
||||||
|
|
@ -19,6 +20,7 @@
|
||||||
"qwen": "bundle/gemini.js"
|
"qwen": "bundle/gemini.js"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
"@google/genai": "^1.22.0",
|
||||||
"@types/marked": "^5.0.2",
|
"@types/marked": "^5.0.2",
|
||||||
"@types/mime-types": "^3.0.1",
|
"@types/mime-types": "^3.0.1",
|
||||||
"@types/minimatch": "^5.1.2",
|
"@types/minimatch": "^5.1.2",
|
||||||
|
|
@ -998,9 +1000,10 @@
|
||||||
"link": true
|
"link": true
|
||||||
},
|
},
|
||||||
"node_modules/@google/genai": {
|
"node_modules/@google/genai": {
|
||||||
"version": "1.13.0",
|
"version": "1.22.0",
|
||||||
"resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.13.0.tgz",
|
"resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.22.0.tgz",
|
||||||
"integrity": "sha512-BxilXzE8cJ0zt5/lXk6KwuBcIT9P2Lbi2WXhwWMbxf1RNeC68/8DmYQqMrzQP333CieRMdbDXs0eNCphLoScWg==",
|
"integrity": "sha512-siETS3zTm3EGpTT4+BFc1z20xXBYfueD3gCYfxkOjuAKRk8lt8TJevDHi3zepn1oSI6NhG/LZvy0i+Q3qheObg==",
|
||||||
|
"dev": true,
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"google-auth-library": "^9.14.2",
|
"google-auth-library": "^9.14.2",
|
||||||
|
|
@ -1010,7 +1013,7 @@
|
||||||
"node": ">=20.0.0"
|
"node": ">=20.0.0"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@modelcontextprotocol/sdk": "^1.11.0"
|
"@modelcontextprotocol/sdk": "^1.11.4"
|
||||||
},
|
},
|
||||||
"peerDependenciesMeta": {
|
"peerDependenciesMeta": {
|
||||||
"@modelcontextprotocol/sdk": {
|
"@modelcontextprotocol/sdk": {
|
||||||
|
|
@ -13731,6 +13734,27 @@
|
||||||
"node-pty": "^1.0.0"
|
"node-pty": "^1.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"packages/core/node_modules/@google/genai": {
|
||||||
|
"version": "1.13.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.13.0.tgz",
|
||||||
|
"integrity": "sha512-BxilXzE8cJ0zt5/lXk6KwuBcIT9P2Lbi2WXhwWMbxf1RNeC68/8DmYQqMrzQP333CieRMdbDXs0eNCphLoScWg==",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"dependencies": {
|
||||||
|
"google-auth-library": "^9.14.2",
|
||||||
|
"ws": "^8.18.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=20.0.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@modelcontextprotocol/sdk": "^1.11.0"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@modelcontextprotocol/sdk": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"packages/core/node_modules/ajv": {
|
"packages/core/node_modules/ajv": {
|
||||||
"version": "8.17.1",
|
"version": "8.17.1",
|
||||||
"resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz",
|
"resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz",
|
||||||
|
|
|
||||||
|
|
@ -61,6 +61,7 @@
|
||||||
"LICENSE"
|
"LICENSE"
|
||||||
],
|
],
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
"@google/genai": "^1.22.0",
|
||||||
"@types/marked": "^5.0.2",
|
"@types/marked": "^5.0.2",
|
||||||
"@types/mime-types": "^3.0.1",
|
"@types/mime-types": "^3.0.1",
|
||||||
"@types/minimatch": "^5.1.2",
|
"@types/minimatch": "^5.1.2",
|
||||||
|
|
@ -96,6 +97,7 @@
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@lvce-editor/ripgrep": "^1.6.0",
|
"@lvce-editor/ripgrep": "^1.6.0",
|
||||||
|
"@qwen-code/qwen-code": "^0.0.14",
|
||||||
"simple-git": "^3.28.0",
|
"simple-git": "^3.28.0",
|
||||||
"strip-ansi": "^7.1.0"
|
"strip-ansi": "^7.1.0"
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -703,7 +703,7 @@ export class GeminiClient {
|
||||||
* We should ignore model for now because some calls use `DEFAULT_GEMINI_FLASH_MODEL`
|
* We should ignore model for now because some calls use `DEFAULT_GEMINI_FLASH_MODEL`
|
||||||
* which is not available as `qwen3-coder-flash`
|
* which is not available as `qwen3-coder-flash`
|
||||||
*/
|
*/
|
||||||
const modelToUse = this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL;
|
const modelToUse = model || this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL;
|
||||||
try {
|
try {
|
||||||
const userMemory = this.config.getUserMemory();
|
const userMemory = this.config.getUserMemory();
|
||||||
const finalSystemInstruction = config.systemInstruction
|
const finalSystemInstruction = config.systemInstruction
|
||||||
|
|
@ -789,7 +789,7 @@ export class GeminiClient {
|
||||||
abortSignal: AbortSignal,
|
abortSignal: AbortSignal,
|
||||||
model?: string,
|
model?: string,
|
||||||
): Promise<GenerateContentResponse> {
|
): Promise<GenerateContentResponse> {
|
||||||
const modelToUse = model ?? this.config.getModel();
|
const modelToUse = model || this.config.getModel();
|
||||||
const configToUse: GenerateContentConfig = {
|
const configToUse: GenerateContentConfig = {
|
||||||
...this.generateContentConfig,
|
...this.generateContentConfig,
|
||||||
...generationConfig,
|
...generationConfig,
|
||||||
|
|
|
||||||
|
|
@ -241,10 +241,17 @@ export class ContentGenerationPipeline {
|
||||||
): Promise<OpenAI.Chat.ChatCompletionCreateParams> {
|
): Promise<OpenAI.Chat.ChatCompletionCreateParams> {
|
||||||
const messages = this.converter.convertGeminiRequestToOpenAI(request);
|
const messages = this.converter.convertGeminiRequestToOpenAI(request);
|
||||||
|
|
||||||
|
// Check if request has a model override
|
||||||
|
let model = this.contentGeneratorConfig.model;
|
||||||
|
if (request.model) {
|
||||||
|
// Use request-specific model if provided
|
||||||
|
model = request.model;
|
||||||
|
}
|
||||||
|
|
||||||
// Apply provider-specific enhancements
|
// Apply provider-specific enhancements
|
||||||
const baseRequest: OpenAI.Chat.ChatCompletionCreateParams = {
|
const baseRequest: OpenAI.Chat.ChatCompletionCreateParams = {
|
||||||
model: this.contentGeneratorConfig.model,
|
|
||||||
messages,
|
messages,
|
||||||
|
model,
|
||||||
...this.buildSamplingParameters(request),
|
...this.buildSamplingParameters(request),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -405,9 +412,13 @@ export class ContentGenerationPipeline {
|
||||||
userPromptId: string,
|
userPromptId: string,
|
||||||
isStreaming: boolean,
|
isStreaming: boolean,
|
||||||
): RequestContext {
|
): RequestContext {
|
||||||
|
// For context logging, we use the default model since we don't have access to a request
|
||||||
|
// This is acceptable since context logging doesn't need request-specific model
|
||||||
|
const model = this.contentGeneratorConfig.model;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
userPromptId,
|
userPromptId,
|
||||||
model: this.contentGeneratorConfig.model,
|
model,
|
||||||
authType: this.contentGeneratorConfig.authType || 'unknown',
|
authType: this.contentGeneratorConfig.authType || 'unknown',
|
||||||
startTime: Date.now(),
|
startTime: Date.now(),
|
||||||
duration: 0,
|
duration: 0,
|
||||||
|
|
|
||||||
|
|
@ -396,9 +396,18 @@ Please analyze the conversation history to determine the possibility that the co
|
||||||
};
|
};
|
||||||
let result;
|
let result;
|
||||||
try {
|
try {
|
||||||
|
// Get the assistant model from environment variable, falling back to default if not specified
|
||||||
|
let assistantModel = DEFAULT_QWEN_FLASH_MODEL;
|
||||||
|
|
||||||
|
// Check for OPENAI_ASSISTANT_MODEL environment variable
|
||||||
|
const openaiAssistantModel = process.env['OPENAI_ASSISTANT_MODEL'];
|
||||||
|
if (openaiAssistantModel) {
|
||||||
|
assistantModel = openaiAssistantModel;
|
||||||
|
}
|
||||||
|
|
||||||
result = await this.config
|
result = await this.config
|
||||||
.getGeminiClient()
|
.getGeminiClient()
|
||||||
.generateJson(contents, schema, signal, DEFAULT_QWEN_FLASH_MODEL);
|
.generateJson(contents, schema, signal, assistantModel);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
// Do nothing, treat it as a non-loop.
|
// Do nothing, treat it as a non-loop.
|
||||||
this.config.getDebugMode() ? console.error(e) : console.debug(e);
|
this.config.getDebugMode() ? console.error(e) : console.debug(e);
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ import { convert } from 'html-to-text';
|
||||||
import { ProxyAgent, setGlobalDispatcher } from 'undici';
|
import { ProxyAgent, setGlobalDispatcher } from 'undici';
|
||||||
import type { Config } from '../config/config.js';
|
import type { Config } from '../config/config.js';
|
||||||
import { ApprovalMode } from '../config/config.js';
|
import { ApprovalMode } from '../config/config.js';
|
||||||
|
import { DEFAULT_QWEN_FLASH_MODEL } from '../config/models.js';
|
||||||
import { fetchWithTimeout, isPrivateIp } from '../utils/fetch.js';
|
import { fetchWithTimeout, isPrivateIp } from '../utils/fetch.js';
|
||||||
import { getResponseText } from '../utils/partUtils.js';
|
import { getResponseText } from '../utils/partUtils.js';
|
||||||
import { ToolErrorType } from './tool-error.js';
|
import { ToolErrorType } from './tool-error.js';
|
||||||
|
|
@ -104,10 +105,22 @@ ${textContent}
|
||||||
`[WebFetchTool] Processing content with prompt: "${this.params.prompt}"`,
|
`[WebFetchTool] Processing content with prompt: "${this.params.prompt}"`,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// For assistant decision-making, we want to use a different model than the one used for regular conversation
|
||||||
|
// This helps avoid KV cache invalidation issues when using the same model for both purposes
|
||||||
|
|
||||||
|
// Get the assistant model from environment variable, falling back to default if not specified
|
||||||
|
let assistantModel = DEFAULT_QWEN_FLASH_MODEL;
|
||||||
|
|
||||||
|
// Check for OPENAI_ASSISTANT_MODEL environment variable
|
||||||
|
const openaiAssistantModel = process.env['OPENAI_ASSISTANT_MODEL'];
|
||||||
|
if (openaiAssistantModel) {
|
||||||
|
assistantModel = openaiAssistantModel;
|
||||||
|
}
|
||||||
const result = await geminiClient.generateContent(
|
const result = await geminiClient.generateContent(
|
||||||
[{ role: 'user', parts: [{ text: fallbackPrompt }] }],
|
[{ role: 'user', parts: [{ text: fallbackPrompt }] }],
|
||||||
{},
|
{},
|
||||||
signal,
|
signal,
|
||||||
|
assistantModel
|
||||||
);
|
);
|
||||||
const resultText = getResponseText(result) || '';
|
const resultText = getResponseText(result) || '';
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -242,7 +242,7 @@ describe('checkNextSpeaker', () => {
|
||||||
expect(result).toBeNull();
|
expect(result).toBeNull();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should call generateJson with DEFAULT_QWEN_FLASH_MODEL', async () => {
|
it('should call generateJson with DEFAULT_QWEN_FLASH_MODEL when OPENAI_ASSISTANT_MODEL is not set', async () => {
|
||||||
(chatInstance.getHistory as Mock).mockReturnValue([
|
(chatInstance.getHistory as Mock).mockReturnValue([
|
||||||
{ role: 'model', parts: [{ text: 'Some model output.' }] },
|
{ role: 'model', parts: [{ text: 'Some model output.' }] },
|
||||||
] as Content[]);
|
] as Content[]);
|
||||||
|
|
@ -259,4 +259,31 @@ describe('checkNextSpeaker', () => {
|
||||||
.calls[0];
|
.calls[0];
|
||||||
expect(generateJsonCall[3]).toBe(DEFAULT_QWEN_FLASH_MODEL);
|
expect(generateJsonCall[3]).toBe(DEFAULT_QWEN_FLASH_MODEL);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should call generateJson with OPENAI_ASSISTANT_MODEL when set', async () => {
|
||||||
|
// Mock the environment variable
|
||||||
|
const originalEnv = process.env['OPENAI_ASSISTANT_MODEL'];
|
||||||
|
process.env['OPENAI_ASSISTANT_MODEL'] = 'qwen2.5-0.5b';
|
||||||
|
|
||||||
|
try {
|
||||||
|
(chatInstance.getHistory as Mock).mockReturnValue([
|
||||||
|
{ role: 'model', parts: [{ text: 'Some model output.' }] },
|
||||||
|
] as Content[]);
|
||||||
|
const mockApiResponse: NextSpeakerResponse = {
|
||||||
|
reasoning: 'Model made a statement, awaiting user input.',
|
||||||
|
next_speaker: 'user',
|
||||||
|
};
|
||||||
|
(mockGeminiClient.generateJson as Mock).mockResolvedValue(mockApiResponse);
|
||||||
|
|
||||||
|
await checkNextSpeaker(chatInstance, mockGeminiClient, abortSignal);
|
||||||
|
|
||||||
|
expect(mockGeminiClient.generateJson).toHaveBeenCalled();
|
||||||
|
const generateJsonCall = (mockGeminiClient.generateJson as Mock).mock
|
||||||
|
.calls[0];
|
||||||
|
expect(generateJsonCall[3]).toBe('qwen2.5-0.5b');
|
||||||
|
} finally {
|
||||||
|
// Restore original environment variable
|
||||||
|
process.env['OPENAI_ASSISTANT_MODEL'] = originalEnv;
|
||||||
|
}
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
@ -108,11 +108,23 @@ export async function checkNextSpeaker(
|
||||||
];
|
];
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
// For assistant decision-making, we want to use a different model than the one used for regular conversation
|
||||||
|
// This helps avoid KV cache invalidation issues when using the same model for both purposes
|
||||||
|
|
||||||
|
// Get the assistant model from environment variable, falling back to default if not specified
|
||||||
|
let assistantModel = DEFAULT_QWEN_FLASH_MODEL;
|
||||||
|
|
||||||
|
// Check for OPENAI_ASSISTANT_MODEL environment variable
|
||||||
|
const openaiAssistantModel = process.env['OPENAI_ASSISTANT_MODEL'];
|
||||||
|
if (openaiAssistantModel) {
|
||||||
|
assistantModel = openaiAssistantModel;
|
||||||
|
}
|
||||||
|
|
||||||
const parsedResponse = (await geminiClient.generateJson(
|
const parsedResponse = (await geminiClient.generateJson(
|
||||||
contents,
|
contents,
|
||||||
RESPONSE_SCHEMA,
|
RESPONSE_SCHEMA,
|
||||||
abortSignal,
|
abortSignal,
|
||||||
DEFAULT_QWEN_FLASH_MODEL,
|
assistantModel,
|
||||||
)) as unknown as NextSpeakerResponse;
|
)) as unknown as NextSpeakerResponse;
|
||||||
|
|
||||||
if (
|
if (
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ import type {
|
||||||
GenerateContentResponse,
|
GenerateContentResponse,
|
||||||
} from '@google/genai';
|
} from '@google/genai';
|
||||||
import type { GeminiClient } from '../core/client.js';
|
import type { GeminiClient } from '../core/client.js';
|
||||||
import { DEFAULT_GEMINI_FLASH_LITE_MODEL } from '../config/models.js';
|
import { DEFAULT_QWEN_FLASH_MODEL } from '../config/models.js';
|
||||||
import { getResponseText, partToString } from './partUtils.js';
|
import { getResponseText, partToString } from './partUtils.js';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -81,12 +81,24 @@ export async function summarizeToolOutput(
|
||||||
const toolOutputSummarizerConfig: GenerateContentConfig = {
|
const toolOutputSummarizerConfig: GenerateContentConfig = {
|
||||||
maxOutputTokens,
|
maxOutputTokens,
|
||||||
};
|
};
|
||||||
|
// For assistant decision-making, we want to use a different model than the one used for regular conversation
|
||||||
|
// This helps avoid KV cache invalidation issues when using the same model for both purposes
|
||||||
|
|
||||||
|
// Get the assistant model from environment variable, falling back to default if not specified
|
||||||
|
let assistantModel = DEFAULT_QWEN_FLASH_MODEL;
|
||||||
|
|
||||||
|
// Check for OPENAI_ASSISTANT_MODEL environment variable
|
||||||
|
const openaiAssistantModel = process.env['OPENAI_ASSISTANT_MODEL'];
|
||||||
|
if (openaiAssistantModel) {
|
||||||
|
assistantModel = openaiAssistantModel;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const parsedResponse = (await geminiClient.generateContent(
|
const parsedResponse = (await geminiClient.generateContent(
|
||||||
contents,
|
contents,
|
||||||
toolOutputSummarizerConfig,
|
toolOutputSummarizerConfig,
|
||||||
abortSignal,
|
abortSignal,
|
||||||
DEFAULT_GEMINI_FLASH_LITE_MODEL,
|
assistantModel,
|
||||||
)) as unknown as GenerateContentResponse;
|
)) as unknown as GenerateContentResponse;
|
||||||
return getResponseText(parsedResponse) || textToSummarize;
|
return getResponseText(parsedResponse) || textToSummarize;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue