🤖 feat: Add Z.AI GLM Context Window & Pricing (#9979)
Some checks are pending
Docker Dev Images Build / build (Dockerfile, librechat-dev, node) (push) Waiting to run
Docker Dev Images Build / build (Dockerfile.multi, librechat-dev-api, api-build) (push) Waiting to run
Sync Locize Translations & Create Translation PR / Sync Translation Keys with Locize (push) Waiting to run
Sync Locize Translations & Create Translation PR / Create Translation PR on Version Published (push) Blocked by required conditions

* fix: update @librechat/agents to v2.4.83 to handle reasoning edge case encountered with GLM models

* feat: GLM Context Window & Pricing Support

* feat: Add support for glm4 model in token values and tests
This commit is contained in:
Danny Avila 2025-10-05 09:08:29 -04:00 committed by GitHub
parent 7288449011
commit c9103a1708
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 287 additions and 8 deletions

View file

@ -157,6 +157,14 @@ const tokenValues = Object.assign(
'gpt-oss-20b': { prompt: 0.05, completion: 0.2 },
'gpt-oss:120b': { prompt: 0.15, completion: 0.6 },
'gpt-oss-120b': { prompt: 0.15, completion: 0.6 },
// GLM models (Zhipu AI)
glm4: { prompt: 0.1, completion: 0.1 },
'glm-4': { prompt: 0.1, completion: 0.1 },
'glm-4-32b': { prompt: 0.1, completion: 0.1 },
'glm-4.5': { prompt: 0.35, completion: 1.55 },
'glm-4.5v': { prompt: 0.6, completion: 1.8 },
'glm-4.5-air': { prompt: 0.14, completion: 0.86 },
'glm-4.6': { prompt: 0.5, completion: 1.75 },
},
bedrockValues,
);

View file

@ -404,6 +404,18 @@ describe('getMultiplier', () => {
expect(getMultiplier({ model: key, tokenType: 'completion' })).toBe(expectedCompletion);
});
});
it('should return correct multipliers for GLM models', () => {
const models = ['glm-4.6', 'glm-4.5v', 'glm-4.5-air', 'glm-4.5', 'glm-4-32b', 'glm-4', 'glm4'];
models.forEach((key) => {
const expectedPrompt = tokenValues[key].prompt;
const expectedCompletion = tokenValues[key].completion;
expect(getMultiplier({ valueKey: key, tokenType: 'prompt' })).toBe(expectedPrompt);
expect(getMultiplier({ valueKey: key, tokenType: 'completion' })).toBe(expectedCompletion);
expect(getMultiplier({ model: key, tokenType: 'prompt' })).toBe(expectedPrompt);
expect(getMultiplier({ model: key, tokenType: 'completion' })).toBe(expectedCompletion);
});
});
});
describe('AWS Bedrock Model Tests', () => {
@ -782,6 +794,110 @@ describe('Grok Model Tests - Pricing', () => {
});
});
describe('GLM Model Tests', () => {
it('should return expected value keys for GLM models', () => {
expect(getValueKey('glm-4.6')).toBe('glm-4.6');
expect(getValueKey('glm-4.5')).toBe('glm-4.5');
expect(getValueKey('glm-4.5v')).toBe('glm-4.5v');
expect(getValueKey('glm-4.5-air')).toBe('glm-4.5-air');
expect(getValueKey('glm-4-32b')).toBe('glm-4-32b');
expect(getValueKey('glm-4')).toBe('glm-4');
expect(getValueKey('glm4')).toBe('glm4');
});
it('should match GLM model variations with provider prefixes', () => {
expect(getValueKey('z-ai/glm-4.6')).toBe('glm-4.6');
expect(getValueKey('z-ai/glm-4.5')).toBe('glm-4.5');
expect(getValueKey('z-ai/glm-4.5-air')).toBe('glm-4.5-air');
expect(getValueKey('z-ai/glm-4.5v')).toBe('glm-4.5v');
expect(getValueKey('z-ai/glm-4-32b')).toBe('glm-4-32b');
expect(getValueKey('zai/glm-4.6')).toBe('glm-4.6');
expect(getValueKey('zai/glm-4.5')).toBe('glm-4.5');
expect(getValueKey('zai/glm-4.5-air')).toBe('glm-4.5-air');
expect(getValueKey('zai/glm-4.5v')).toBe('glm-4.5v');
expect(getValueKey('zai-org/GLM-4.6')).toBe('glm-4.6');
expect(getValueKey('zai-org/GLM-4.5')).toBe('glm-4.5');
expect(getValueKey('zai-org/GLM-4.5-Air')).toBe('glm-4.5-air');
expect(getValueKey('zai-org/GLM-4.5V')).toBe('glm-4.5v');
expect(getValueKey('zai-org/GLM-4-32B-0414')).toBe('glm-4-32b');
});
it('should match GLM model variations with suffixes', () => {
expect(getValueKey('glm-4.6-fp8')).toBe('glm-4.6');
expect(getValueKey('zai-org/GLM-4.6-FP8')).toBe('glm-4.6');
expect(getValueKey('zai-org/GLM-4.5-Air-FP8')).toBe('glm-4.5-air');
});
it('should prioritize more specific GLM model patterns', () => {
expect(getValueKey('glm-4.5-air-something')).toBe('glm-4.5-air');
expect(getValueKey('glm-4.5-something')).toBe('glm-4.5');
expect(getValueKey('glm-4.5v-something')).toBe('glm-4.5v');
});
it('should return correct multipliers for all GLM models', () => {
expect(getMultiplier({ model: 'glm-4.6', tokenType: 'prompt' })).toBe(
tokenValues['glm-4.6'].prompt,
);
expect(getMultiplier({ model: 'glm-4.6', tokenType: 'completion' })).toBe(
tokenValues['glm-4.6'].completion,
);
expect(getMultiplier({ model: 'glm-4.5v', tokenType: 'prompt' })).toBe(
tokenValues['glm-4.5v'].prompt,
);
expect(getMultiplier({ model: 'glm-4.5v', tokenType: 'completion' })).toBe(
tokenValues['glm-4.5v'].completion,
);
expect(getMultiplier({ model: 'glm-4.5-air', tokenType: 'prompt' })).toBe(
tokenValues['glm-4.5-air'].prompt,
);
expect(getMultiplier({ model: 'glm-4.5-air', tokenType: 'completion' })).toBe(
tokenValues['glm-4.5-air'].completion,
);
expect(getMultiplier({ model: 'glm-4.5', tokenType: 'prompt' })).toBe(
tokenValues['glm-4.5'].prompt,
);
expect(getMultiplier({ model: 'glm-4.5', tokenType: 'completion' })).toBe(
tokenValues['glm-4.5'].completion,
);
expect(getMultiplier({ model: 'glm-4-32b', tokenType: 'prompt' })).toBe(
tokenValues['glm-4-32b'].prompt,
);
expect(getMultiplier({ model: 'glm-4-32b', tokenType: 'completion' })).toBe(
tokenValues['glm-4-32b'].completion,
);
expect(getMultiplier({ model: 'glm-4', tokenType: 'prompt' })).toBe(
tokenValues['glm-4'].prompt,
);
expect(getMultiplier({ model: 'glm-4', tokenType: 'completion' })).toBe(
tokenValues['glm-4'].completion,
);
expect(getMultiplier({ model: 'glm4', tokenType: 'prompt' })).toBe(tokenValues['glm4'].prompt);
expect(getMultiplier({ model: 'glm4', tokenType: 'completion' })).toBe(
tokenValues['glm4'].completion,
);
});
it('should return correct multipliers for GLM models with provider prefixes', () => {
expect(getMultiplier({ model: 'z-ai/glm-4.6', tokenType: 'prompt' })).toBe(
tokenValues['glm-4.6'].prompt,
);
expect(getMultiplier({ model: 'zai/glm-4.5-air', tokenType: 'completion' })).toBe(
tokenValues['glm-4.5-air'].completion,
);
expect(getMultiplier({ model: 'zai-org/GLM-4.5V', tokenType: 'prompt' })).toBe(
tokenValues['glm-4.5v'].prompt,
);
});
});
describe('Claude Model Tests', () => {
it('should return correct prompt and completion rates for Claude 4 models', () => {
expect(getMultiplier({ model: 'claude-sonnet-4', tokenType: 'prompt' })).toBe(

View file

@ -48,7 +48,7 @@
"@langchain/google-genai": "^0.2.13",
"@langchain/google-vertexai": "^0.2.13",
"@langchain/textsplitters": "^0.1.0",
"@librechat/agents": "^2.4.82",
"@librechat/agents": "^2.4.83",
"@librechat/api": "*",
"@librechat/data-schemas": "*",
"@microsoft/microsoft-graph-client": "^3.0.7",

View file

@ -409,6 +409,64 @@ describe('getModelMaxTokens', () => {
});
});
test('should return correct tokens for GLM models', () => {
expect(getModelMaxTokens('glm-4.6')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.6']);
expect(getModelMaxTokens('glm-4.5v')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.5v']);
expect(getModelMaxTokens('glm-4.5-air')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4.5-air'],
);
expect(getModelMaxTokens('glm-4.5')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.5']);
expect(getModelMaxTokens('glm-4-32b')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4-32b']);
expect(getModelMaxTokens('glm-4')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4']);
expect(getModelMaxTokens('glm4')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm4']);
});
test('should return correct tokens for GLM models with provider prefixes', () => {
expect(getModelMaxTokens('z-ai/glm-4.6')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.6']);
expect(getModelMaxTokens('z-ai/glm-4.5')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.5']);
expect(getModelMaxTokens('z-ai/glm-4.5-air')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4.5-air'],
);
expect(getModelMaxTokens('z-ai/glm-4.5v')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4.5v'],
);
expect(getModelMaxTokens('z-ai/glm-4-32b')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4-32b'],
);
expect(getModelMaxTokens('zai/glm-4.6')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.6']);
expect(getModelMaxTokens('zai/glm-4.5-air')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4.5-air'],
);
expect(getModelMaxTokens('zai/glm-4.5v')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.5v']);
expect(getModelMaxTokens('zai-org/GLM-4.6')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4.6'],
);
expect(getModelMaxTokens('zai-org/GLM-4.5')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4.5'],
);
expect(getModelMaxTokens('zai-org/GLM-4.5-Air')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4.5-air'],
);
expect(getModelMaxTokens('zai-org/GLM-4.5V')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4.5v'],
);
expect(getModelMaxTokens('zai-org/GLM-4-32B-0414')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4-32b'],
);
});
test('should return correct tokens for GLM models with suffixes', () => {
expect(getModelMaxTokens('glm-4.6-fp8')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.6']);
expect(getModelMaxTokens('zai-org/GLM-4.6-FP8')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4.6'],
);
expect(getModelMaxTokens('zai-org/GLM-4.5-Air-FP8')).toBe(
maxTokensMap[EModelEndpoint.openAI]['glm-4.5-air'],
);
});
test('should return correct max output tokens for GPT-5 models', () => {
const { getModelMaxOutputTokens } = require('@librechat/api');
['gpt-5', 'gpt-5-mini', 'gpt-5-nano'].forEach((model) => {
@ -865,3 +923,91 @@ describe('Kimi Model Tests', () => {
});
});
});
describe('GLM Model Tests (Zhipu AI)', () => {
describe('getModelMaxTokens', () => {
test('should return correct tokens for GLM models', () => {
expect(getModelMaxTokens('glm-4.6')).toBe(200000);
expect(getModelMaxTokens('glm-4.5v')).toBe(66000);
expect(getModelMaxTokens('glm-4.5-air')).toBe(131000);
expect(getModelMaxTokens('glm-4.5')).toBe(131000);
expect(getModelMaxTokens('glm-4-32b')).toBe(128000);
expect(getModelMaxTokens('glm-4')).toBe(128000);
expect(getModelMaxTokens('glm4')).toBe(128000);
});
test('should handle partial matches for GLM models with provider prefixes', () => {
expect(getModelMaxTokens('z-ai/glm-4.6')).toBe(200000);
expect(getModelMaxTokens('z-ai/glm-4.5')).toBe(131000);
expect(getModelMaxTokens('z-ai/glm-4.5-air')).toBe(131000);
expect(getModelMaxTokens('z-ai/glm-4.5v')).toBe(66000);
expect(getModelMaxTokens('z-ai/glm-4-32b')).toBe(128000);
expect(getModelMaxTokens('zai/glm-4.6')).toBe(200000);
expect(getModelMaxTokens('zai/glm-4.5')).toBe(131000);
expect(getModelMaxTokens('zai/glm-4.5-air')).toBe(131000);
expect(getModelMaxTokens('zai/glm-4.5v')).toBe(66000);
expect(getModelMaxTokens('zai-org/GLM-4.6')).toBe(200000);
expect(getModelMaxTokens('zai-org/GLM-4.5')).toBe(131000);
expect(getModelMaxTokens('zai-org/GLM-4.5-Air')).toBe(131000);
expect(getModelMaxTokens('zai-org/GLM-4.5V')).toBe(66000);
expect(getModelMaxTokens('zai-org/GLM-4-32B-0414')).toBe(128000);
});
test('should handle GLM model variations with suffixes', () => {
expect(getModelMaxTokens('glm-4.6-fp8')).toBe(200000);
expect(getModelMaxTokens('zai-org/GLM-4.6-FP8')).toBe(200000);
expect(getModelMaxTokens('zai-org/GLM-4.5-Air-FP8')).toBe(131000);
});
test('should prioritize more specific GLM patterns', () => {
expect(getModelMaxTokens('glm-4.5-air-custom')).toBe(131000);
expect(getModelMaxTokens('glm-4.5-custom')).toBe(131000);
expect(getModelMaxTokens('glm-4.5v-custom')).toBe(66000);
});
});
describe('matchModelName', () => {
test('should match exact GLM model names', () => {
expect(matchModelName('glm-4.6')).toBe('glm-4.6');
expect(matchModelName('glm-4.5v')).toBe('glm-4.5v');
expect(matchModelName('glm-4.5-air')).toBe('glm-4.5-air');
expect(matchModelName('glm-4.5')).toBe('glm-4.5');
expect(matchModelName('glm-4-32b')).toBe('glm-4-32b');
expect(matchModelName('glm-4')).toBe('glm-4');
expect(matchModelName('glm4')).toBe('glm4');
});
test('should match GLM model variations with provider prefixes', () => {
expect(matchModelName('z-ai/glm-4.6')).toBe('glm-4.6');
expect(matchModelName('z-ai/glm-4.5')).toBe('glm-4.5');
expect(matchModelName('z-ai/glm-4.5-air')).toBe('glm-4.5-air');
expect(matchModelName('z-ai/glm-4.5v')).toBe('glm-4.5v');
expect(matchModelName('z-ai/glm-4-32b')).toBe('glm-4-32b');
expect(matchModelName('zai/glm-4.6')).toBe('glm-4.6');
expect(matchModelName('zai/glm-4.5')).toBe('glm-4.5');
expect(matchModelName('zai/glm-4.5-air')).toBe('glm-4.5-air');
expect(matchModelName('zai/glm-4.5v')).toBe('glm-4.5v');
expect(matchModelName('zai-org/GLM-4.6')).toBe('glm-4.6');
expect(matchModelName('zai-org/GLM-4.5')).toBe('glm-4.5');
expect(matchModelName('zai-org/GLM-4.5-Air')).toBe('glm-4.5-air');
expect(matchModelName('zai-org/GLM-4.5V')).toBe('glm-4.5v');
expect(matchModelName('zai-org/GLM-4-32B-0414')).toBe('glm-4-32b');
});
test('should match GLM model variations with suffixes', () => {
expect(matchModelName('glm-4.6-fp8')).toBe('glm-4.6');
expect(matchModelName('zai-org/GLM-4.6-FP8')).toBe('glm-4.6');
expect(matchModelName('zai-org/GLM-4.5-Air-FP8')).toBe('glm-4.5-air');
});
test('should handle case-insensitive matching for GLM models', () => {
expect(matchModelName('zai-org/GLM-4.6')).toBe('glm-4.6');
expect(matchModelName('zai-org/GLM-4.5V')).toBe('glm-4.5v');
expect(matchModelName('zai-org/GLM-4-32B-0414')).toBe('glm-4-32b');
});
});
});

10
package-lock.json generated
View file

@ -64,7 +64,7 @@
"@langchain/google-genai": "^0.2.13",
"@langchain/google-vertexai": "^0.2.13",
"@langchain/textsplitters": "^0.1.0",
"@librechat/agents": "^2.4.82",
"@librechat/agents": "^2.4.83",
"@librechat/api": "*",
"@librechat/data-schemas": "*",
"@microsoft/microsoft-graph-client": "^3.0.7",
@ -21522,9 +21522,9 @@
}
},
"node_modules/@librechat/agents": {
"version": "2.4.82",
"resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-2.4.82.tgz",
"integrity": "sha512-KNz8L1H/IXE3hnOU27ElsGy+oWpZ7oYnrLXIoJUyoy/qWlAUzKkzbOHp4hkLIK3xB21ncVuSqKS0542W6MQkKQ==",
"version": "2.4.83",
"resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-2.4.83.tgz",
"integrity": "sha512-xOspD4jegd7wpjWQhOieOso2LrXsNRyHNYEIIuCuk2eUZkxJU+1Rny0XzukhenTOG8P4bpne9XoVxxxZ0W5duA==",
"license": "MIT",
"dependencies": {
"@langchain/anthropic": "^0.3.26",
@ -51336,7 +51336,7 @@
"@azure/storage-blob": "^12.27.0",
"@keyv/redis": "^4.3.3",
"@langchain/core": "^0.3.62",
"@librechat/agents": "^2.4.82",
"@librechat/agents": "^2.4.83",
"@librechat/data-schemas": "*",
"@modelcontextprotocol/sdk": "^1.17.1",
"axios": "^1.12.1",

View file

@ -80,7 +80,7 @@
"@azure/storage-blob": "^12.27.0",
"@keyv/redis": "^4.3.3",
"@langchain/core": "^0.3.62",
"@librechat/agents": "^2.4.82",
"@librechat/agents": "^2.4.83",
"@librechat/data-schemas": "*",
"@modelcontextprotocol/sdk": "^1.17.1",
"axios": "^1.12.1",

View file

@ -262,6 +262,14 @@ const aggregateModels = {
'gpt-oss-20b': 131000,
'gpt-oss:120b': 131000,
'gpt-oss-120b': 131000,
// GLM models (Zhipu AI)
glm4: 128000,
'glm-4': 128000,
'glm-4-32b': 128000,
'glm-4.5': 131000,
'glm-4.5-air': 131000,
'glm-4.5v': 66000,
'glm-4.6': 200000,
};
export const maxTokensMap = {
@ -317,9 +325,10 @@ export function findMatchingPattern(
tokensMap: Record<string, number> | EndpointTokenConfig,
): string | null {
const keys = Object.keys(tokensMap);
const lowerModelName = modelName.toLowerCase();
for (let i = keys.length - 1; i >= 0; i--) {
const modelKey = keys[i];
if (modelName.includes(modelKey)) {
if (lowerModelName.includes(modelKey)) {
return modelKey;
}
}