diff --git a/api/models/tx.js b/api/models/tx.js index 062ebecf26..282d58c8fc 100644 --- a/api/models/tx.js +++ b/api/models/tx.js @@ -157,6 +157,14 @@ const tokenValues = Object.assign( 'gpt-oss-20b': { prompt: 0.05, completion: 0.2 }, 'gpt-oss:120b': { prompt: 0.15, completion: 0.6 }, 'gpt-oss-120b': { prompt: 0.15, completion: 0.6 }, + // GLM models (Zhipu AI) + glm4: { prompt: 0.1, completion: 0.1 }, + 'glm-4': { prompt: 0.1, completion: 0.1 }, + 'glm-4-32b': { prompt: 0.1, completion: 0.1 }, + 'glm-4.5': { prompt: 0.35, completion: 1.55 }, + 'glm-4.5v': { prompt: 0.6, completion: 1.8 }, + 'glm-4.5-air': { prompt: 0.14, completion: 0.86 }, + 'glm-4.6': { prompt: 0.5, completion: 1.75 }, }, bedrockValues, ); diff --git a/api/models/tx.spec.js b/api/models/tx.spec.js index 7594ce3295..3cbce34295 100644 --- a/api/models/tx.spec.js +++ b/api/models/tx.spec.js @@ -404,6 +404,18 @@ describe('getMultiplier', () => { expect(getMultiplier({ model: key, tokenType: 'completion' })).toBe(expectedCompletion); }); }); + + it('should return correct multipliers for GLM models', () => { + const models = ['glm-4.6', 'glm-4.5v', 'glm-4.5-air', 'glm-4.5', 'glm-4-32b', 'glm-4', 'glm4']; + models.forEach((key) => { + const expectedPrompt = tokenValues[key].prompt; + const expectedCompletion = tokenValues[key].completion; + expect(getMultiplier({ valueKey: key, tokenType: 'prompt' })).toBe(expectedPrompt); + expect(getMultiplier({ valueKey: key, tokenType: 'completion' })).toBe(expectedCompletion); + expect(getMultiplier({ model: key, tokenType: 'prompt' })).toBe(expectedPrompt); + expect(getMultiplier({ model: key, tokenType: 'completion' })).toBe(expectedCompletion); + }); + }); }); describe('AWS Bedrock Model Tests', () => { @@ -782,6 +794,110 @@ describe('Grok Model Tests - Pricing', () => { }); }); +describe('GLM Model Tests', () => { + it('should return expected value keys for GLM models', () => { + expect(getValueKey('glm-4.6')).toBe('glm-4.6'); + expect(getValueKey('glm-4.5')).toBe('glm-4.5'); + expect(getValueKey('glm-4.5v')).toBe('glm-4.5v'); + expect(getValueKey('glm-4.5-air')).toBe('glm-4.5-air'); + expect(getValueKey('glm-4-32b')).toBe('glm-4-32b'); + expect(getValueKey('glm-4')).toBe('glm-4'); + expect(getValueKey('glm4')).toBe('glm4'); + }); + + it('should match GLM model variations with provider prefixes', () => { + expect(getValueKey('z-ai/glm-4.6')).toBe('glm-4.6'); + expect(getValueKey('z-ai/glm-4.5')).toBe('glm-4.5'); + expect(getValueKey('z-ai/glm-4.5-air')).toBe('glm-4.5-air'); + expect(getValueKey('z-ai/glm-4.5v')).toBe('glm-4.5v'); + expect(getValueKey('z-ai/glm-4-32b')).toBe('glm-4-32b'); + + expect(getValueKey('zai/glm-4.6')).toBe('glm-4.6'); + expect(getValueKey('zai/glm-4.5')).toBe('glm-4.5'); + expect(getValueKey('zai/glm-4.5-air')).toBe('glm-4.5-air'); + expect(getValueKey('zai/glm-4.5v')).toBe('glm-4.5v'); + + expect(getValueKey('zai-org/GLM-4.6')).toBe('glm-4.6'); + expect(getValueKey('zai-org/GLM-4.5')).toBe('glm-4.5'); + expect(getValueKey('zai-org/GLM-4.5-Air')).toBe('glm-4.5-air'); + expect(getValueKey('zai-org/GLM-4.5V')).toBe('glm-4.5v'); + expect(getValueKey('zai-org/GLM-4-32B-0414')).toBe('glm-4-32b'); + }); + + it('should match GLM model variations with suffixes', () => { + expect(getValueKey('glm-4.6-fp8')).toBe('glm-4.6'); + expect(getValueKey('zai-org/GLM-4.6-FP8')).toBe('glm-4.6'); + expect(getValueKey('zai-org/GLM-4.5-Air-FP8')).toBe('glm-4.5-air'); + }); + + it('should prioritize more specific GLM model patterns', () => { + expect(getValueKey('glm-4.5-air-something')).toBe('glm-4.5-air'); + expect(getValueKey('glm-4.5-something')).toBe('glm-4.5'); + expect(getValueKey('glm-4.5v-something')).toBe('glm-4.5v'); + }); + + it('should return correct multipliers for all GLM models', () => { + expect(getMultiplier({ model: 'glm-4.6', tokenType: 'prompt' })).toBe( + tokenValues['glm-4.6'].prompt, + ); + expect(getMultiplier({ model: 'glm-4.6', tokenType: 'completion' })).toBe( + tokenValues['glm-4.6'].completion, + ); + + expect(getMultiplier({ model: 'glm-4.5v', tokenType: 'prompt' })).toBe( + tokenValues['glm-4.5v'].prompt, + ); + expect(getMultiplier({ model: 'glm-4.5v', tokenType: 'completion' })).toBe( + tokenValues['glm-4.5v'].completion, + ); + + expect(getMultiplier({ model: 'glm-4.5-air', tokenType: 'prompt' })).toBe( + tokenValues['glm-4.5-air'].prompt, + ); + expect(getMultiplier({ model: 'glm-4.5-air', tokenType: 'completion' })).toBe( + tokenValues['glm-4.5-air'].completion, + ); + + expect(getMultiplier({ model: 'glm-4.5', tokenType: 'prompt' })).toBe( + tokenValues['glm-4.5'].prompt, + ); + expect(getMultiplier({ model: 'glm-4.5', tokenType: 'completion' })).toBe( + tokenValues['glm-4.5'].completion, + ); + + expect(getMultiplier({ model: 'glm-4-32b', tokenType: 'prompt' })).toBe( + tokenValues['glm-4-32b'].prompt, + ); + expect(getMultiplier({ model: 'glm-4-32b', tokenType: 'completion' })).toBe( + tokenValues['glm-4-32b'].completion, + ); + + expect(getMultiplier({ model: 'glm-4', tokenType: 'prompt' })).toBe( + tokenValues['glm-4'].prompt, + ); + expect(getMultiplier({ model: 'glm-4', tokenType: 'completion' })).toBe( + tokenValues['glm-4'].completion, + ); + + expect(getMultiplier({ model: 'glm4', tokenType: 'prompt' })).toBe(tokenValues['glm4'].prompt); + expect(getMultiplier({ model: 'glm4', tokenType: 'completion' })).toBe( + tokenValues['glm4'].completion, + ); + }); + + it('should return correct multipliers for GLM models with provider prefixes', () => { + expect(getMultiplier({ model: 'z-ai/glm-4.6', tokenType: 'prompt' })).toBe( + tokenValues['glm-4.6'].prompt, + ); + expect(getMultiplier({ model: 'zai/glm-4.5-air', tokenType: 'completion' })).toBe( + tokenValues['glm-4.5-air'].completion, + ); + expect(getMultiplier({ model: 'zai-org/GLM-4.5V', tokenType: 'prompt' })).toBe( + tokenValues['glm-4.5v'].prompt, + ); + }); +}); + describe('Claude Model Tests', () => { it('should return correct prompt and completion rates for Claude 4 models', () => { expect(getMultiplier({ model: 'claude-sonnet-4', tokenType: 'prompt' })).toBe( diff --git a/api/package.json b/api/package.json index 98964f5cb0..b856fc0925 100644 --- a/api/package.json +++ b/api/package.json @@ -48,7 +48,7 @@ "@langchain/google-genai": "^0.2.13", "@langchain/google-vertexai": "^0.2.13", "@langchain/textsplitters": "^0.1.0", - "@librechat/agents": "^2.4.82", + "@librechat/agents": "^2.4.83", "@librechat/api": "*", "@librechat/data-schemas": "*", "@microsoft/microsoft-graph-client": "^3.0.7", diff --git a/api/utils/tokens.spec.js b/api/utils/tokens.spec.js index 20dad79894..162827767f 100644 --- a/api/utils/tokens.spec.js +++ b/api/utils/tokens.spec.js @@ -409,6 +409,64 @@ describe('getModelMaxTokens', () => { }); }); + test('should return correct tokens for GLM models', () => { + expect(getModelMaxTokens('glm-4.6')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.6']); + expect(getModelMaxTokens('glm-4.5v')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.5v']); + expect(getModelMaxTokens('glm-4.5-air')).toBe( + maxTokensMap[EModelEndpoint.openAI]['glm-4.5-air'], + ); + expect(getModelMaxTokens('glm-4.5')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.5']); + expect(getModelMaxTokens('glm-4-32b')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4-32b']); + expect(getModelMaxTokens('glm-4')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4']); + expect(getModelMaxTokens('glm4')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm4']); + }); + + test('should return correct tokens for GLM models with provider prefixes', () => { + expect(getModelMaxTokens('z-ai/glm-4.6')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.6']); + expect(getModelMaxTokens('z-ai/glm-4.5')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.5']); + expect(getModelMaxTokens('z-ai/glm-4.5-air')).toBe( + maxTokensMap[EModelEndpoint.openAI]['glm-4.5-air'], + ); + expect(getModelMaxTokens('z-ai/glm-4.5v')).toBe( + maxTokensMap[EModelEndpoint.openAI]['glm-4.5v'], + ); + expect(getModelMaxTokens('z-ai/glm-4-32b')).toBe( + maxTokensMap[EModelEndpoint.openAI]['glm-4-32b'], + ); + + expect(getModelMaxTokens('zai/glm-4.6')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.6']); + expect(getModelMaxTokens('zai/glm-4.5-air')).toBe( + maxTokensMap[EModelEndpoint.openAI]['glm-4.5-air'], + ); + expect(getModelMaxTokens('zai/glm-4.5v')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.5v']); + + expect(getModelMaxTokens('zai-org/GLM-4.6')).toBe( + maxTokensMap[EModelEndpoint.openAI]['glm-4.6'], + ); + expect(getModelMaxTokens('zai-org/GLM-4.5')).toBe( + maxTokensMap[EModelEndpoint.openAI]['glm-4.5'], + ); + expect(getModelMaxTokens('zai-org/GLM-4.5-Air')).toBe( + maxTokensMap[EModelEndpoint.openAI]['glm-4.5-air'], + ); + expect(getModelMaxTokens('zai-org/GLM-4.5V')).toBe( + maxTokensMap[EModelEndpoint.openAI]['glm-4.5v'], + ); + expect(getModelMaxTokens('zai-org/GLM-4-32B-0414')).toBe( + maxTokensMap[EModelEndpoint.openAI]['glm-4-32b'], + ); + }); + + test('should return correct tokens for GLM models with suffixes', () => { + expect(getModelMaxTokens('glm-4.6-fp8')).toBe(maxTokensMap[EModelEndpoint.openAI]['glm-4.6']); + expect(getModelMaxTokens('zai-org/GLM-4.6-FP8')).toBe( + maxTokensMap[EModelEndpoint.openAI]['glm-4.6'], + ); + expect(getModelMaxTokens('zai-org/GLM-4.5-Air-FP8')).toBe( + maxTokensMap[EModelEndpoint.openAI]['glm-4.5-air'], + ); + }); + test('should return correct max output tokens for GPT-5 models', () => { const { getModelMaxOutputTokens } = require('@librechat/api'); ['gpt-5', 'gpt-5-mini', 'gpt-5-nano'].forEach((model) => { @@ -865,3 +923,91 @@ describe('Kimi Model Tests', () => { }); }); }); + +describe('GLM Model Tests (Zhipu AI)', () => { + describe('getModelMaxTokens', () => { + test('should return correct tokens for GLM models', () => { + expect(getModelMaxTokens('glm-4.6')).toBe(200000); + expect(getModelMaxTokens('glm-4.5v')).toBe(66000); + expect(getModelMaxTokens('glm-4.5-air')).toBe(131000); + expect(getModelMaxTokens('glm-4.5')).toBe(131000); + expect(getModelMaxTokens('glm-4-32b')).toBe(128000); + expect(getModelMaxTokens('glm-4')).toBe(128000); + expect(getModelMaxTokens('glm4')).toBe(128000); + }); + + test('should handle partial matches for GLM models with provider prefixes', () => { + expect(getModelMaxTokens('z-ai/glm-4.6')).toBe(200000); + expect(getModelMaxTokens('z-ai/glm-4.5')).toBe(131000); + expect(getModelMaxTokens('z-ai/glm-4.5-air')).toBe(131000); + expect(getModelMaxTokens('z-ai/glm-4.5v')).toBe(66000); + expect(getModelMaxTokens('z-ai/glm-4-32b')).toBe(128000); + + expect(getModelMaxTokens('zai/glm-4.6')).toBe(200000); + expect(getModelMaxTokens('zai/glm-4.5')).toBe(131000); + expect(getModelMaxTokens('zai/glm-4.5-air')).toBe(131000); + expect(getModelMaxTokens('zai/glm-4.5v')).toBe(66000); + + expect(getModelMaxTokens('zai-org/GLM-4.6')).toBe(200000); + expect(getModelMaxTokens('zai-org/GLM-4.5')).toBe(131000); + expect(getModelMaxTokens('zai-org/GLM-4.5-Air')).toBe(131000); + expect(getModelMaxTokens('zai-org/GLM-4.5V')).toBe(66000); + expect(getModelMaxTokens('zai-org/GLM-4-32B-0414')).toBe(128000); + }); + + test('should handle GLM model variations with suffixes', () => { + expect(getModelMaxTokens('glm-4.6-fp8')).toBe(200000); + expect(getModelMaxTokens('zai-org/GLM-4.6-FP8')).toBe(200000); + expect(getModelMaxTokens('zai-org/GLM-4.5-Air-FP8')).toBe(131000); + }); + + test('should prioritize more specific GLM patterns', () => { + expect(getModelMaxTokens('glm-4.5-air-custom')).toBe(131000); + expect(getModelMaxTokens('glm-4.5-custom')).toBe(131000); + expect(getModelMaxTokens('glm-4.5v-custom')).toBe(66000); + }); + }); + + describe('matchModelName', () => { + test('should match exact GLM model names', () => { + expect(matchModelName('glm-4.6')).toBe('glm-4.6'); + expect(matchModelName('glm-4.5v')).toBe('glm-4.5v'); + expect(matchModelName('glm-4.5-air')).toBe('glm-4.5-air'); + expect(matchModelName('glm-4.5')).toBe('glm-4.5'); + expect(matchModelName('glm-4-32b')).toBe('glm-4-32b'); + expect(matchModelName('glm-4')).toBe('glm-4'); + expect(matchModelName('glm4')).toBe('glm4'); + }); + + test('should match GLM model variations with provider prefixes', () => { + expect(matchModelName('z-ai/glm-4.6')).toBe('glm-4.6'); + expect(matchModelName('z-ai/glm-4.5')).toBe('glm-4.5'); + expect(matchModelName('z-ai/glm-4.5-air')).toBe('glm-4.5-air'); + expect(matchModelName('z-ai/glm-4.5v')).toBe('glm-4.5v'); + expect(matchModelName('z-ai/glm-4-32b')).toBe('glm-4-32b'); + + expect(matchModelName('zai/glm-4.6')).toBe('glm-4.6'); + expect(matchModelName('zai/glm-4.5')).toBe('glm-4.5'); + expect(matchModelName('zai/glm-4.5-air')).toBe('glm-4.5-air'); + expect(matchModelName('zai/glm-4.5v')).toBe('glm-4.5v'); + + expect(matchModelName('zai-org/GLM-4.6')).toBe('glm-4.6'); + expect(matchModelName('zai-org/GLM-4.5')).toBe('glm-4.5'); + expect(matchModelName('zai-org/GLM-4.5-Air')).toBe('glm-4.5-air'); + expect(matchModelName('zai-org/GLM-4.5V')).toBe('glm-4.5v'); + expect(matchModelName('zai-org/GLM-4-32B-0414')).toBe('glm-4-32b'); + }); + + test('should match GLM model variations with suffixes', () => { + expect(matchModelName('glm-4.6-fp8')).toBe('glm-4.6'); + expect(matchModelName('zai-org/GLM-4.6-FP8')).toBe('glm-4.6'); + expect(matchModelName('zai-org/GLM-4.5-Air-FP8')).toBe('glm-4.5-air'); + }); + + test('should handle case-insensitive matching for GLM models', () => { + expect(matchModelName('zai-org/GLM-4.6')).toBe('glm-4.6'); + expect(matchModelName('zai-org/GLM-4.5V')).toBe('glm-4.5v'); + expect(matchModelName('zai-org/GLM-4-32B-0414')).toBe('glm-4-32b'); + }); + }); +}); diff --git a/package-lock.json b/package-lock.json index 0a3784fcd7..06ad223183 100644 --- a/package-lock.json +++ b/package-lock.json @@ -64,7 +64,7 @@ "@langchain/google-genai": "^0.2.13", "@langchain/google-vertexai": "^0.2.13", "@langchain/textsplitters": "^0.1.0", - "@librechat/agents": "^2.4.82", + "@librechat/agents": "^2.4.83", "@librechat/api": "*", "@librechat/data-schemas": "*", "@microsoft/microsoft-graph-client": "^3.0.7", @@ -21522,9 +21522,9 @@ } }, "node_modules/@librechat/agents": { - "version": "2.4.82", - "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-2.4.82.tgz", - "integrity": "sha512-KNz8L1H/IXE3hnOU27ElsGy+oWpZ7oYnrLXIoJUyoy/qWlAUzKkzbOHp4hkLIK3xB21ncVuSqKS0542W6MQkKQ==", + "version": "2.4.83", + "resolved": "https://registry.npmjs.org/@librechat/agents/-/agents-2.4.83.tgz", + "integrity": "sha512-xOspD4jegd7wpjWQhOieOso2LrXsNRyHNYEIIuCuk2eUZkxJU+1Rny0XzukhenTOG8P4bpne9XoVxxxZ0W5duA==", "license": "MIT", "dependencies": { "@langchain/anthropic": "^0.3.26", @@ -51336,7 +51336,7 @@ "@azure/storage-blob": "^12.27.0", "@keyv/redis": "^4.3.3", "@langchain/core": "^0.3.62", - "@librechat/agents": "^2.4.82", + "@librechat/agents": "^2.4.83", "@librechat/data-schemas": "*", "@modelcontextprotocol/sdk": "^1.17.1", "axios": "^1.12.1", diff --git a/packages/api/package.json b/packages/api/package.json index 2a38366b41..a983aca87a 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -80,7 +80,7 @@ "@azure/storage-blob": "^12.27.0", "@keyv/redis": "^4.3.3", "@langchain/core": "^0.3.62", - "@librechat/agents": "^2.4.82", + "@librechat/agents": "^2.4.83", "@librechat/data-schemas": "*", "@modelcontextprotocol/sdk": "^1.17.1", "axios": "^1.12.1", diff --git a/packages/api/src/utils/tokens.ts b/packages/api/src/utils/tokens.ts index f75a5b9de2..d527836642 100644 --- a/packages/api/src/utils/tokens.ts +++ b/packages/api/src/utils/tokens.ts @@ -262,6 +262,14 @@ const aggregateModels = { 'gpt-oss-20b': 131000, 'gpt-oss:120b': 131000, 'gpt-oss-120b': 131000, + // GLM models (Zhipu AI) + glm4: 128000, + 'glm-4': 128000, + 'glm-4-32b': 128000, + 'glm-4.5': 131000, + 'glm-4.5-air': 131000, + 'glm-4.5v': 66000, + 'glm-4.6': 200000, }; export const maxTokensMap = { @@ -317,9 +325,10 @@ export function findMatchingPattern( tokensMap: Record | EndpointTokenConfig, ): string | null { const keys = Object.keys(tokensMap); + const lowerModelName = modelName.toLowerCase(); for (let i = keys.length - 1; i >= 0; i--) { const modelKey = keys[i]; - if (modelName.includes(modelKey)) { + if (lowerModelName.includes(modelKey)) { return modelKey; } }