Fix/qwen3 vl plus highres (#721)
* feat: Add Qwen3-VL-Plus token limits (256K input, 32K output) - Added 256K input context window limit for Qwen3-VL-Plus model - Updated output token limit from 8K to 32K for Qwen3-VL-Plus - Added comprehensive tests for both input and output limits As requested by Qwen maintainers for proper model support. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * fix: enable high-res flag for qwen VL models --------- Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
parent
f7841338c4
commit
9fce177bd8
2 changed files with 77 additions and 1 deletions
|
|
@ -688,6 +688,60 @@ describe('DashScopeOpenAICompatibleProvider', () => {
|
||||||
).toBe(true); // Vision-specific parameter should be preserved
|
).toBe(true); // Vision-specific parameter should be preserved
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should set high resolution flag for qwen3-vl-plus', () => {
|
||||||
|
const request: OpenAI.Chat.ChatCompletionCreateParams = {
|
||||||
|
model: 'qwen3-vl-plus',
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{ type: 'text', text: 'Please inspect the image.' },
|
||||||
|
{
|
||||||
|
type: 'image_url',
|
||||||
|
image_url: { url: 'https://example.com/vl.jpg' },
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
max_tokens: 50000,
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = provider.buildRequest(request, 'test-prompt-id');
|
||||||
|
|
||||||
|
expect(result.max_tokens).toBe(32768);
|
||||||
|
expect(
|
||||||
|
(result as { vl_high_resolution_images?: boolean })
|
||||||
|
.vl_high_resolution_images,
|
||||||
|
).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should set high resolution flag for the vision-model alias', () => {
|
||||||
|
const request: OpenAI.Chat.ChatCompletionCreateParams = {
|
||||||
|
model: 'vision-model',
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: [
|
||||||
|
{ type: 'text', text: 'Alias payload' },
|
||||||
|
{
|
||||||
|
type: 'image_url',
|
||||||
|
image_url: { url: 'https://example.com/alias.png' },
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
max_tokens: 9000,
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = provider.buildRequest(request, 'test-prompt-id');
|
||||||
|
|
||||||
|
expect(result.max_tokens).toBe(8192);
|
||||||
|
expect(
|
||||||
|
(result as { vl_high_resolution_images?: boolean })
|
||||||
|
.vl_high_resolution_images,
|
||||||
|
).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
it('should handle streaming requests with output token limits', () => {
|
it('should handle streaming requests with output token limits', () => {
|
||||||
const request: OpenAI.Chat.ChatCompletionCreateParams = {
|
const request: OpenAI.Chat.ChatCompletionCreateParams = {
|
||||||
model: 'qwen3-coder-plus',
|
model: 'qwen3-coder-plus',
|
||||||
|
|
|
||||||
|
|
@ -100,7 +100,7 @@ export class DashScopeOpenAICompatibleProvider
|
||||||
request.model,
|
request.model,
|
||||||
);
|
);
|
||||||
|
|
||||||
if (request.model.startsWith('qwen-vl')) {
|
if (this.isVisionModel(request.model)) {
|
||||||
return {
|
return {
|
||||||
...requestWithTokenLimits,
|
...requestWithTokenLimits,
|
||||||
messages,
|
messages,
|
||||||
|
|
@ -267,6 +267,28 @@ export class DashScopeOpenAICompatibleProvider
|
||||||
return contentArray;
|
return contentArray;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private isVisionModel(model: string | undefined): boolean {
|
||||||
|
if (!model) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const normalized = model.toLowerCase();
|
||||||
|
|
||||||
|
if (normalized === 'vision-model') {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (normalized.startsWith('qwen-vl')) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (normalized.startsWith('qwen3-vl-plus')) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Apply output token limit to a request's max_tokens parameter.
|
* Apply output token limit to a request's max_tokens parameter.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue