mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-16 16:30:15 +01:00
🤖 feat: Add Z.AI GLM Context Window & Pricing (#9979)
Some checks are pending
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Waiting to run
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Waiting to run
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Blocked by required conditions
Some checks are pending
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Waiting to run
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Waiting to run
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Blocked by required conditions
* fix: update @librechat/agents to v2.4.83 to handle reasoning edge case encountered with GLM models * feat: GLM Context Window & Pricing Support * feat: Add support for glm4 model in token values and tests
This commit is contained in:
parent
7288449011
commit
c9103a1708
7 changed files with 287 additions and 8 deletions
|
|
@ -157,6 +157,14 @@ const tokenValues = Object.assign(
|
|||
'gpt-oss-20b': { prompt: 0.05, completion: 0.2 },
|
||||
'gpt-oss:120b': { prompt: 0.15, completion: 0.6 },
|
||||
'gpt-oss-120b': { prompt: 0.15, completion: 0.6 },
|
||||
// GLM models (Zhipu AI)
|
||||
glm4: { prompt: 0.1, completion: 0.1 },
|
||||
'glm-4': { prompt: 0.1, completion: 0.1 },
|
||||
'glm-4-32b': { prompt: 0.1, completion: 0.1 },
|
||||
'glm-4.5': { prompt: 0.35, completion: 1.55 },
|
||||
'glm-4.5v': { prompt: 0.6, completion: 1.8 },
|
||||
'glm-4.5-air': { prompt: 0.14, completion: 0.86 },
|
||||
'glm-4.6': { prompt: 0.5, completion: 1.75 },
|
||||
},
|
||||
bedrockValues,
|
||||
);
|
||||
|
|
|
|||
|
|
@ -404,6 +404,18 @@ describe('getMultiplier', () => {
|
|||
expect(getMultiplier({ model: key, tokenType: 'completion' })).toBe(expectedCompletion);
|
||||
});
|
||||
});
|
||||
|
||||
it('should return correct multipliers for GLM models', () => {
|
||||
const models = ['glm-4.6', 'glm-4.5v', 'glm-4.5-air', 'glm-4.5', 'glm-4-32b', 'glm-4', 'glm4'];
|
||||
models.forEach((key) => {
|
||||
const expectedPrompt = tokenValues[key].prompt;
|
||||
const expectedCompletion = tokenValues[key].completion;
|
||||
expect(getMultiplier({ valueKey: key, tokenType: 'prompt' })).toBe(expectedPrompt);
|
||||
expect(getMultiplier({ valueKey: key, tokenType: 'completion' })).toBe(expectedCompletion);
|
||||
expect(getMultiplier({ model: key, tokenType: 'prompt' })).toBe(expectedPrompt);
|
||||
expect(getMultiplier({ model: key, tokenType: 'completion' })).toBe(expectedCompletion);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('AWS Bedrock Model Tests', () => {
|
||||
|
|
@ -782,6 +794,110 @@ describe('Grok Model Tests - Pricing', () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('GLM Model Tests', () => {
|
||||
it('should return expected value keys for GLM models', () => {
|
||||
expect(getValueKey('glm-4.6')).toBe('glm-4.6');
|
||||
expect(getValueKey('glm-4.5')).toBe('glm-4.5');
|
||||
expect(getValueKey('glm-4.5v')).toBe('glm-4.5v');
|
||||
expect(getValueKey('glm-4.5-air')).toBe('glm-4.5-air');
|
||||
expect(getValueKey('glm-4-32b')).toBe('glm-4-32b');
|
||||
expect(getValueKey('glm-4')).toBe('glm-4');
|
||||
expect(getValueKey('glm4')).toBe('glm4');
|
||||
});
|
||||
|
||||
it('should match GLM model variations with provider prefixes', () => {
|
||||
expect(getValueKey('z-ai/glm-4.6')).toBe('glm-4.6');
|
||||
expect(getValueKey('z-ai/glm-4.5')).toBe('glm-4.5');
|
||||
expect(getValueKey('z-ai/glm-4.5-air')).toBe('glm-4.5-air');
|
||||
expect(getValueKey('z-ai/glm-4.5v')).toBe('glm-4.5v');
|
||||
expect(getValueKey('z-ai/glm-4-32b')).toBe('glm-4-32b');
|
||||
|
||||
expect(getValueKey('zai/glm-4.6')).toBe('glm-4.6');
|
||||
expect(getValueKey('zai/glm-4.5')).toBe('glm-4.5');
|
||||
expect(getValueKey('zai/glm-4.5-air')).toBe('glm-4.5-air');
|
||||
expect(getValueKey('zai/glm-4.5v')).toBe('glm-4.5v');
|
||||
|
||||
expect(getValueKey('zai-org/GLM-4.6')).toBe('glm-4.6');
|
||||
expect(getValueKey('zai-org/GLM-4.5')).toBe('glm-4.5');
|
||||
expect(getValueKey('zai-org/GLM-4.5-Air')).toBe('glm-4.5-air');
|
||||
expect(getValueKey('zai-org/GLM-4.5V')).toBe('glm-4.5v');
|
||||
expect(getValueKey('zai-org/GLM-4-32B-0414')).toBe('glm-4-32b');
|
||||
});
|
||||
|
||||
it('should match GLM model variations with suffixes', () => {
|
||||
expect(getValueKey('glm-4.6-fp8')).toBe('glm-4.6');
|
||||
expect(getValueKey('zai-org/GLM-4.6-FP8')).toBe('glm-4.6');
|
||||
expect(getValueKey('zai-org/GLM-4.5-Air-FP8')).toBe('glm-4.5-air');
|
||||
});
|
||||
|
||||
it('should prioritize more specific GLM model patterns', () => {
|
||||
expect(getValueKey('glm-4.5-air-something')).toBe('glm-4.5-air');
|
||||
expect(getValueKey('glm-4.5-something')).toBe('glm-4.5');
|
||||
expect(getValueKey('glm-4.5v-something')).toBe('glm-4.5v');
|
||||
});
|
||||
|
||||
it('should return correct multipliers for all GLM models', () => {
|
||||
expect(getMultiplier({ model: 'glm-4.6', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['glm-4.6'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'glm-4.6', tokenType: 'completion' })).toBe(
|
||||
tokenValues['glm-4.6'].completion,
|
||||
);
|
||||
|
||||
expect(getMultiplier({ model: 'glm-4.5v', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['glm-4.5v'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'glm-4.5v', tokenType: 'completion' })).toBe(
|
||||
tokenValues['glm-4.5v'].completion,
|
||||
);
|
||||
|
||||
expect(getMultiplier({ model: 'glm-4.5-air', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['glm-4.5-air'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'glm-4.5-air', tokenType: 'completion' })).toBe(
|
||||
tokenValues['glm-4.5-air'].completion,
|
||||
);
|
||||
|
||||
expect(getMultiplier({ model: 'glm-4.5', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['glm-4.5'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'glm-4.5', tokenType: 'completion' })).toBe(
|
||||
tokenValues['glm-4.5'].completion,
|
||||
);
|
||||
|
||||
expect(getMultiplier({ model: 'glm-4-32b', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['glm-4-32b'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'glm-4-32b', tokenType: 'completion' })).toBe(
|
||||
tokenValues['glm-4-32b'].completion,
|
||||
);
|
||||
|
||||
expect(getMultiplier({ model: 'glm-4', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['glm-4'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'glm-4', tokenType: 'completion' })).toBe(
|
||||
tokenValues['glm-4'].completion,
|
||||
);
|
||||
|
||||
expect(getMultiplier({ model: 'glm4', tokenType: 'prompt' })).toBe(tokenValues['glm4'].prompt);
|
||||
expect(getMultiplier({ model: 'glm4', tokenType: 'completion' })).toBe(
|
||||
tokenValues['glm4'].completion,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return correct multipliers for GLM models with provider prefixes', () => {
|
||||
expect(getMultiplier({ model: 'z-ai/glm-4.6', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['glm-4.6'].prompt,
|
||||
);
|
||||
expect(getMultiplier({ model: 'zai/glm-4.5-air', tokenType: 'completion' })).toBe(
|
||||
tokenValues['glm-4.5-air'].completion,
|
||||
);
|
||||
expect(getMultiplier({ model: 'zai-org/GLM-4.5V', tokenType: 'prompt' })).toBe(
|
||||
tokenValues['glm-4.5v'].prompt,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Claude Model Tests', () => {
|
||||
it('should return correct prompt and completion rates for Claude 4 models', () => {
|
||||
expect(getMultiplier({ model: 'claude-sonnet-4', tokenType: 'prompt' })).toBe(
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@
|
|||
"@langchain/google-genai": "^0.2.13",
|
||||
"@langchain/google-vertexai": "^0.2.13",
|
||||
"@langchain/textsplitters": "^0.1.0",
|
||||
"@librechat/agents": "^2.4.82",
|
||||
"@librechat/agents": "^2.4.83",
|
||||
"@librechat/api": "*",
|
||||
"@librechat/data-schemas": "*",
|
||||
"@microsoft/microsoft-graph-client": "^3.0.7",
|
||||
|
|
|
|||
|
|
@ -409,6 +409,64 @@ describe('getModelMaxTokens', () => {
|
|||
});
|
||||
});
|
||||
|
||||
test('should return correct tokens for GLM models', () => {
|
||||
expect(getModelMaxTokens('glm-4.6')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.6']);
|
||||
expect(getModelMaxTokens('glm-4.5v')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.5v']);
|
||||
expect(getModelMaxTokens('glm-4.5-air')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['glm-4.5-air'],
|
||||
);
|
||||
expect(getModelMaxTokens('glm-4.5')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.5']);
|
||||
expect(getModelMaxTokens('glm-4-32b')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4-32b']);
|
||||
expect(getModelMaxTokens('glm-4')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4']);
|
||||
expect(getModelMaxTokens('glm4')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm4']);
|
||||
});
|
||||
|
||||
test('should return correct tokens for GLM models with provider prefixes', () => {
|
||||
expect(getModelMaxTokens('z-ai/glm-4.6')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.6']);
|
||||
expect(getModelMaxTokens('z-ai/glm-4.5')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.5']);
|
||||
expect(getModelMaxTokens('z-ai/glm-4.5-air')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['glm-4.5-air'],
|
||||
);
|
||||
expect(getModelMaxTokens('z-ai/glm-4.5v')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['glm-4.5v'],
|
||||
);
|
||||
expect(getModelMaxTokens('z-ai/glm-4-32b')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['glm-4-32b'],
|
||||
);
|
||||
|
||||
expect(getModelMaxTokens('zai/glm-4.6')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.6']);
|
||||
expect(getModelMaxTokens('zai/glm-4.5-air')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['glm-4.5-air'],
|
||||
);
|
||||
expect(getModelMaxTokens('zai/glm-4.5v')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.5v']);
|
||||
|
||||
expect(getModelMaxTokens('zai-org/GLM-4.6')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['glm-4.6'],
|
||||
);
|
||||
expect(getModelMaxTokens('zai-org/GLM-4.5')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['glm-4.5'],
|
||||
);
|
||||
expect(getModelMaxTokens('zai-org/GLM-4.5-Air')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['glm-4.5-air'],
|
||||
);
|
||||
expect(getModelMaxTokens('zai-org/GLM-4.5V')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['glm-4.5v'],
|
||||
);
|
||||
expect(getModelMaxTokens('zai-org/GLM-4-32B-0414')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['glm-4-32b'],
|
||||
);
|
||||
});
|
||||
|
||||
test('should return correct tokens for GLM models with suffixes', () => {
|
||||
expect(getModelMaxTokens('glm-4.6-fp8')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.6']);
|
||||
expect(getModelMaxTokens('zai-org/GLM-4.6-FP8')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['glm-4.6'],
|
||||
);
|
||||
expect(getModelMaxTokens('zai-org/GLM-4.5-Air-FP8')).toBe(
|
||||
maxTokensMap[EModelEndpoint.openAI]['glm-4.5-air'],
|
||||
);
|
||||
});
|
||||
|
||||
test('should return correct max output tokens for GPT-5 models', () => {
|
||||
const { getModelMaxOutputTokens } = require('@librechat/api');
|
||||
['gpt-5', 'gpt-5-mini', 'gpt-5-nano'].forEach((model) => {
|
||||
|
|
@ -865,3 +923,91 @@ describe('Kimi Model Tests', () => {
|
|||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('GLM Model Tests (Zhipu AI)', () => {
|
||||
describe('getModelMaxTokens', () => {
|
||||
test('should return correct tokens for GLM models', () => {
|
||||
expect(getModelMaxTokens('glm-4.6')).toBe(200000);
|
||||
expect(getModelMaxTokens('glm-4.5v')).toBe(66000);
|
||||
expect(getModelMaxTokens('glm-4.5-air')).toBe(131000);
|
||||
expect(getModelMaxTokens('glm-4.5')).toBe(131000);
|
||||
expect(getModelMaxTokens('glm-4-32b')).toBe(128000);
|
||||
expect(getModelMaxTokens('glm-4')).toBe(128000);
|
||||
expect(getModelMaxTokens('glm4')).toBe(128000);
|
||||
});
|
||||
|
||||
test('should handle partial matches for GLM models with provider prefixes', () => {
|
||||
expect(getModelMaxTokens('z-ai/glm-4.6')).toBe(200000);
|
||||
expect(getModelMaxTokens('z-ai/glm-4.5')).toBe(131000);
|
||||
expect(getModelMaxTokens('z-ai/glm-4.5-air')).toBe(131000);
|
||||
expect(getModelMaxTokens('z-ai/glm-4.5v')).toBe(66000);
|
||||
expect(getModelMaxTokens('z-ai/glm-4-32b')).toBe(128000);
|
||||
|
||||
expect(getModelMaxTokens('zai/glm-4.6')).toBe(200000);
|
||||
expect(getModelMaxTokens('zai/glm-4.5')).toBe(131000);
|
||||
expect(getModelMaxTokens('zai/glm-4.5-air')).toBe(131000);
|
||||
expect(getModelMaxTokens('zai/glm-4.5v')).toBe(66000);
|
||||
|
||||
expect(getModelMaxTokens('zai-org/GLM-4.6')).toBe(200000);
|
||||
expect(getModelMaxTokens('zai-org/GLM-4.5')).toBe(131000);
|
||||
expect(getModelMaxTokens('zai-org/GLM-4.5-Air')).toBe(131000);
|
||||
expect(getModelMaxTokens('zai-org/GLM-4.5V')).toBe(66000);
|
||||
expect(getModelMaxTokens('zai-org/GLM-4-32B-0414')).toBe(128000);
|
||||
});
|
||||
|
||||
test('should handle GLM model variations with suffixes', () => {
|
||||
expect(getModelMaxTokens('glm-4.6-fp8')).toBe(200000);
|
||||
expect(getModelMaxTokens('zai-org/GLM-4.6-FP8')).toBe(200000);
|
||||
expect(getModelMaxTokens('zai-org/GLM-4.5-Air-FP8')).toBe(131000);
|
||||
});
|
||||
|
||||
test('should prioritize more specific GLM patterns', () => {
|
||||
expect(getModelMaxTokens('glm-4.5-air-custom')).toBe(131000);
|
||||
expect(getModelMaxTokens('glm-4.5-custom')).toBe(131000);
|
||||
expect(getModelMaxTokens('glm-4.5v-custom')).toBe(66000);
|
||||
});
|
||||
});
|
||||
|
||||
describe('matchModelName', () => {
|
||||
test('should match exact GLM model names', () => {
|
||||
expect(matchModelName('glm-4.6')).toBe('glm-4.6');
|
||||
expect(matchModelName('glm-4.5v')).toBe('glm-4.5v');
|
||||
expect(matchModelName('glm-4.5-air')).toBe('glm-4.5-air');
|
||||
expect(matchModelName('glm-4.5')).toBe('glm-4.5');
|
||||
expect(matchModelName('glm-4-32b')).toBe('glm-4-32b');
|
||||
expect(matchModelName('glm-4')).toBe('glm-4');
|
||||
expect(matchModelName('glm4')).toBe('glm4');
|
||||
});
|
||||
|
||||
test('should match GLM model variations with provider prefixes', () => {
|
||||
expect(matchModelName('z-ai/glm-4.6')).toBe('glm-4.6');
|
||||
expect(matchModelName('z-ai/glm-4.5')).toBe('glm-4.5');
|
||||
expect(matchModelName('z-ai/glm-4.5-air')).toBe('glm-4.5-air');
|
||||
expect(matchModelName('z-ai/glm-4.5v')).toBe('glm-4.5v');
|
||||
expect(matchModelName('z-ai/glm-4-32b')).toBe('glm-4-32b');
|
||||
|
||||
expect(matchModelName('zai/glm-4.6')).toBe('glm-4.6');
|
||||
expect(matchModelName('zai/glm-4.5')).toBe('glm-4.5');
|
||||
expect(matchModelName('zai/glm-4.5-air')).toBe('glm-4.5-air');
|
||||
expect(matchModelName('zai/glm-4.5v')).toBe('glm-4.5v');
|
||||
|
||||
expect(matchModelName('zai-org/GLM-4.6')).toBe('glm-4.6');
|
||||
expect(matchModelName('zai-org/GLM-4.5')).toBe('glm-4.5');
|
||||
expect(matchModelName('zai-org/GLM-4.5-Air')).toBe('glm-4.5-air');
|
||||
expect(matchModelName('zai-org/GLM-4.5V')).toBe('glm-4.5v');
|
||||
expect(matchModelName('zai-org/GLM-4-32B-0414')).toBe('glm-4-32b');
|
||||
});
|
||||
|
||||
test('should match GLM model variations with suffixes', () => {
|
||||
expect(matchModelName('glm-4.6-fp8')).toBe('glm-4.6');
|
||||
expect(matchModelName('zai-org/GLM-4.6-FP8')).toBe('glm-4.6');
|
||||
expect(matchModelName('zai-org/GLM-4.5-Air-FP8')).toBe('glm-4.5-air');
|
||||
});
|
||||
|
||||
test('should handle case-insensitive matching for GLM models', () => {
|
||||
expect(matchModelName('zai-org/GLM-4.6')).toBe('glm-4.6');
|
||||
expect(matchModelName('zai-org/GLM-4.5V')).toBe('glm-4.5v');
|
||||
expect(matchModelName('zai-org/GLM-4-32B-0414')).toBe('glm-4-32b');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
10
package-lock.json
generated
10
package-lock.json
generated
|
|
@ -64,7 +64,7 @@
|
|||
"@langchain/google-genai": "^0.2.13",
|
||||
"@langchain/google-vertexai": "^0.2.13",
|
||||
"@langchain/textsplitters": "^0.1.0",
|
||||
"@librechat/agents": "^2.4.82",
|
||||
"@librechat/agents": "^2.4.83",
|
||||
"@librechat/api": "*",
|
||||
"@librechat/data-schemas": "*",
|
||||
"@microsoft/microsoft-graph-client": "^3.0.7",
|
||||
|
|
@ -21522,9 +21522,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@librechat/agents": {
|
||||
"version": "2.4.82",
|
||||
"resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-2.4.82.tgz",
|
||||
"integrity": "sha512-KNz8L1H/IXE3hnOU27ElsGy+oWpZ7oYnrLXIoJUyoy/qWlAUzKkzbOHp4hkLIK3xB21ncVuSqKS0542W6MQkKQ==",
|
||||
"version": "2.4.83",
|
||||
"resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-2.4.83.tgz",
|
||||
"integrity": "sha512-xOspD4jegd7wpjWQhOieOso2LrXsNRyHNYEIIuCuk2eUZkxJU+1Rny0XzukhenTOG8P4bpne9XoVxxxZ0W5duA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@langchain/anthropic": "^0.3.26",
|
||||
|
|
@ -51336,7 +51336,7 @@
|
|||
"@azure/storage-blob": "^12.27.0",
|
||||
"@keyv/redis": "^4.3.3",
|
||||
"@langchain/core": "^0.3.62",
|
||||
"@librechat/agents": "^2.4.82",
|
||||
"@librechat/agents": "^2.4.83",
|
||||
"@librechat/data-schemas": "*",
|
||||
"@modelcontextprotocol/sdk": "^1.17.1",
|
||||
"axios": "^1.12.1",
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@
|
|||
"@azure/storage-blob": "^12.27.0",
|
||||
"@keyv/redis": "^4.3.3",
|
||||
"@langchain/core": "^0.3.62",
|
||||
"@librechat/agents": "^2.4.82",
|
||||
"@librechat/agents": "^2.4.83",
|
||||
"@librechat/data-schemas": "*",
|
||||
"@modelcontextprotocol/sdk": "^1.17.1",
|
||||
"axios": "^1.12.1",
|
||||
|
|
|
|||
|
|
@ -262,6 +262,14 @@ const aggregateModels = {
|
|||
'gpt-oss-20b': 131000,
|
||||
'gpt-oss:120b': 131000,
|
||||
'gpt-oss-120b': 131000,
|
||||
// GLM models (Zhipu AI)
|
||||
glm4: 128000,
|
||||
'glm-4': 128000,
|
||||
'glm-4-32b': 128000,
|
||||
'glm-4.5': 131000,
|
||||
'glm-4.5-air': 131000,
|
||||
'glm-4.5v': 66000,
|
||||
'glm-4.6': 200000,
|
||||
};
|
||||
|
||||
export const maxTokensMap = {
|
||||
|
|
@ -317,9 +325,10 @@ export function findMatchingPattern(
|
|||
tokensMap: Record<string, number> | EndpointTokenConfig,
|
||||
): string | null {
|
||||
const keys = Object.keys(tokensMap);
|
||||
const lowerModelName = modelName.toLowerCase();
|
||||
for (let i = keys.length - 1; i >= 0; i--) {
|
||||
const modelKey = keys[i];
|
||||
if (modelName.includes(modelKey)) {
|
||||
if (lowerModelName.includes(modelKey)) {
|
||||
return modelKey;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue