LibreChat/packages/api/src/tools/classification.ts

/**
 * @fileoverview Utility functions for building tool registries from environment variables.
 * This is a temporary solution for tool classification until UI-based configuration is available.
 *
 * Environment Variables:
 * - TOOL_PROGRAMMATIC_ONLY: Comma-separated tool names or server patterns (sys__all__sys_mcp_ServerName)
 * - TOOL_PROGRAMMATIC_ONLY_EXCLUDE: Comma-separated tool names to exclude from programmatic only
 * - TOOL_DUAL_CONTEXT: Comma-separated tool names or server patterns callable BOTH by LLM and PTC
 * - TOOL_DUAL_CONTEXT_EXCLUDE: Comma-separated tool names to exclude from dual context
 * - TOOL_DEFERRED: Comma-separated tool names or server patterns for deferred tools
 * - TOOL_DEFERRED_EXCLUDE: Comma-separated tool names to exclude from deferred
 * - TOOL_CLASSIFICATION_AGENT_IDS: Optional comma-separated agent IDs to restrict classification features
 *
 * Server patterns: Use `sys__all__sys_mcp_ServerName` to match all tools from an MCP server.
 * Example: `sys__all__sys_mcp_Google-Workspace` matches all Google Workspace tools.
 *
 * Agent restriction: If TOOL_CLASSIFICATION_AGENT_IDS is set, only those agents will get
 * PTC and tool search tools. If not set, all agents with matching tools get them.
 *
 * Smart enablement: PTC/tool search are only created if the agent has tools that actually
 * match the classification patterns. An agent with no programmatic/deferred tools won't
 * get PTC/tool search even if the env vars are set.
 *
 * @module packages/api/src/tools/classification
 */

import { logger } from '@librechat/data-schemas';
import { Constants } from 'librechat-data-provider';
import {
  EnvVar,
  createToolSearch,
  ToolSearchToolDefinition,
  createProgrammaticToolCallingTool,
  ProgrammaticToolCallingDefinition,
} from '@librechat/agents';
import type { AgentToolOptions } from 'librechat-data-provider';
import type {
  LCToolRegistry,
  JsonSchemaType,
  AllowedCaller,
  GenericTool,
  LCTool,
} from '@librechat/agents';

export type { LCTool, LCToolRegistry, AllowedCaller, JsonSchemaType };

/** Pattern prefix for matching all tools from an MCP server */
const MCP_ALL_PATTERN = `${Constants.mcp_all}${Constants.mcp_delimiter}`;

export interface ToolDefinition {
  name: string;
  description?: string;
  parameters?: JsonSchemaType;
  /** MCP server name extracted from tool name */
  serverName?: string;
}

/**
 * Parses a comma-separated tool list from an environment variable.
 * @param envValue - The environment variable value
 * @returns Set of tool names or server patterns
 */
export function parseToolList(envValue: string | undefined): Set<string> {
  if (!envValue || envValue.trim() === '') {
    return new Set();
  }
  return new Set(
    envValue
      .split(',')
      .map((s) => s.trim())
      .filter((s) => s.length > 0),
  );
}

/**
 * Extracts the MCP server name from a tool name.
 * Tool names follow the pattern: toolName_mcp_ServerName
 * @param toolName - The full tool name
 * @returns The server name or undefined if not an MCP tool
 */
export function getServerNameFromTool(toolName: string): string | undefined {
  const parts = toolName.split(Constants.mcp_delimiter);
  if (parts.length >= 2) {
    return parts[parts.length - 1];
  }
  return undefined;
}

/**
 * Checks if a tool matches a set of patterns (tool names or server patterns).
 * Supports both exact tool name matches and server-wide patterns like `mcp_all_mcp_ServerName`.
 *
 * @param toolName - The tool name to check
 * @param patterns - Set of patterns (tool names or mcp_all_mcp_ServerName patterns)
 * @param excludes - Set of tool names to exclude (takes precedence over patterns)
 * @returns Whether the tool matches any pattern and is not excluded
 */
export function toolMatchesPatterns(
  toolName: string,
  patterns: Set<string>,
  excludes: Set<string>,
): boolean {
  if (excludes.has(toolName)) {
    return false;
  }

  if (patterns.has(toolName)) {
    return true;
  }

  const serverName = getServerNameFromTool(toolName);
  if (serverName) {
    const serverPattern = `${MCP_ALL_PATTERN}${serverName}`;
    if (patterns.has(serverPattern)) {
      return true;
    }
  }

  return false;
}

/**
 * Builds a tool registry from environment variables for the given tools.
 * This is a temporary solution while UI-based configuration is being developed.
 *
 * Supports server-wide patterns using `mcp_all_mcp_ServerName` syntax.
 * Exclusion env vars take precedence over inclusion patterns.
 *
 * Default behavior (if tool not listed in any env var):
 * - allowed_callers: ['direct']
 * - defer_loading: false
 *
 * @param tools - Array of tool definitions
 * @returns Map of tool name to tool definition with classification
 *
 * @example
 * // Environment for server-wide configuration:
 * // TOOL_PROGRAMMATIC_ONLY=mcp_all_mcp_Google-Workspace
 * // TOOL_DEFERRED=mcp_all_mcp_Google-Workspace
 * // TOOL_DEFERRED_EXCLUDE=list_spreadsheets_mcp_Google-Workspace,read_sheet_values_mcp_Google-Workspace
 *
 * @example
 * // Environment for individual tools:
 * // TOOL_PROGRAMMATIC_ONLY=get_expenses,get_team_members
 * // TOOL_DUAL_CONTEXT=get_weather
 * // TOOL_DEFERRED=generate_report
 */
export function buildToolRegistryFromEnv(tools: ToolDefinition[]): LCToolRegistry {
  const programmaticOnly = parseToolList(process.env.TOOL_PROGRAMMATIC_ONLY);
  const programmaticOnlyExclude = parseToolList(process.env.TOOL_PROGRAMMATIC_ONLY_EXCLUDE);
  const dualContext = parseToolList(process.env.TOOL_DUAL_CONTEXT);
  const dualContextExclude = parseToolList(process.env.TOOL_DUAL_CONTEXT_EXCLUDE);
  const deferred = parseToolList(process.env.TOOL_DEFERRED);
  const deferredExclude = parseToolList(process.env.TOOL_DEFERRED_EXCLUDE);

  const registry: LCToolRegistry = new Map();

  for (const tool of tools) {
    const { name, description, parameters } = tool;

    let allowed_callers: AllowedCaller[];

    if (toolMatchesPatterns(name, programmaticOnly, programmaticOnlyExclude)) {
      allowed_callers = ['code_execution'];
    } else if (toolMatchesPatterns(name, dualContext, dualContextExclude)) {
      allowed_callers = ['direct', 'code_execution'];
    } else {
      // Default: direct only (LLM can call, PTC cannot)
      allowed_callers = ['direct'];
    }

    const toolDef: LCTool = {
      name,
      allowed_callers,
      defer_loading: toolMatchesPatterns(name, deferred, deferredExclude),
    };

    // Include description and parameters if available (needed for tool search and PTC stub generation)
    if (description) {
      toolDef.description = description;
    }
    if (parameters) {
      toolDef.parameters = parameters;
    }

    registry.set(name, toolDef);
  }

  return registry;
}

/**
 * Builds a tool registry from agent-level tool_options.
 * This takes precedence over environment variable configuration when provided.
 *
 * @param tools - Array of tool definitions
 * @param agentToolOptions - Per-tool configuration from the agent
 * @returns Map of tool name to tool definition with classification
 */
export function buildToolRegistryFromAgentOptions(
  tools: ToolDefinition[],
  agentToolOptions: AgentToolOptions,
): LCToolRegistry {
  /** Fall back to env vars for tools not configured at agent level */
  const programmaticOnly = parseToolList(process.env.TOOL_PROGRAMMATIC_ONLY);
  const programmaticOnlyExclude = parseToolList(process.env.TOOL_PROGRAMMATIC_ONLY_EXCLUDE);
  const dualContext = parseToolList(process.env.TOOL_DUAL_CONTEXT);
  const dualContextExclude = parseToolList(process.env.TOOL_DUAL_CONTEXT_EXCLUDE);

  const registry: LCToolRegistry = new Map();

  for (const tool of tools) {
    const { name, description, parameters } = tool;
    const agentOptions = agentToolOptions[name];

    /** Determine allowed_callers: agent options take precedence, then env vars, then default */
    let allowed_callers: AllowedCaller[];
    if (agentOptions?.allowed_callers && agentOptions.allowed_callers.length > 0) {
      allowed_callers = agentOptions.allowed_callers;
    } else if (toolMatchesPatterns(name, programmaticOnly, programmaticOnlyExclude)) {
      allowed_callers = ['code_execution'];
    } else if (toolMatchesPatterns(name, dualContext, dualContextExclude)) {
      allowed_callers = ['direct', 'code_execution'];
    } else {
      allowed_callers = ['direct'];
    }

    /** Determine defer_loading: agent options take precedence (explicit true/false) */
    const defer_loading = agentOptions?.defer_loading === true;

    const toolDef: LCTool = {
      name,
      allowed_callers,
      defer_loading,
    };

    if (description) {
      toolDef.description = description;
    }
    if (parameters) {
      toolDef.parameters = parameters;
    }

    registry.set(name, toolDef);
  }

  return registry;
}

/**
 * Checks if PTC (Programmatic Tool Calling) should be enabled based on environment configuration.
 * PTC is enabled if any tools or server patterns are configured for programmatic calling.
 * @returns Whether PTC should be enabled
 */
export function shouldEnablePTC(): boolean {
  const programmaticOnly = parseToolList(process.env.TOOL_PROGRAMMATIC_ONLY);
  const dualContext = parseToolList(process.env.TOOL_DUAL_CONTEXT);
  return programmaticOnly.size > 0 || dualContext.size > 0;
}

/**
 * Checks if tool search should be enabled based on environment configuration.
 * Tool search is enabled if any tools or server patterns are configured as deferred.
 * @returns Whether tool search should be enabled
 */
export function shouldEnableToolSearch(): boolean {
  const deferred = parseToolList(process.env.TOOL_DEFERRED);
  return deferred.size > 0;
}

interface MCPToolInstance {
  name: string;
  description?: string;
  mcp?: boolean;
  /** Original JSON schema attached at MCP tool creation time */
  mcpJsonSchema?: JsonSchemaType;
}

/**
 * Extracts MCP tool definition from a loaded tool instance.
 * MCP tools have the original JSON schema attached as `mcpJsonSchema` property.
 *
 * @param tool - The loaded tool instance
 * @returns Tool definition
 */
export function extractMCPToolDefinition(tool: MCPToolInstance): ToolDefinition {
  const def: ToolDefinition = { name: tool.name };

  if (tool.description) {
    def.description = tool.description;
  }

  if (tool.mcpJsonSchema) {
    def.parameters = tool.mcpJsonSchema;
  }

  /** Extract server name from tool name (format: toolName_mcp_ServerName) */
  const serverName = getServerNameFromTool(tool.name);
  if (serverName) {
    def.serverName = serverName;
  }

  return def;
}

/**
 * Checks if a tool is an MCP tool based on its properties.
 * @param tool - The tool to check (can be any object with potential mcp property)
 * @returns Whether the tool is an MCP tool
 */
export function isMCPTool(tool: unknown): tool is MCPToolInstance {
  return typeof tool === 'object' && tool !== null && (tool as MCPToolInstance).mcp === true;
}

/**
 * Cleans up the temporary mcpJsonSchema property from MCP tools after registry is populated.
 * This property is only needed during registry building and can be safely removed afterward.
 *
 * @param tools - Array of tools to clean up
 */
export function cleanupMCPToolSchemas(tools: MCPToolInstance[]): void {
  for (const tool of tools) {
    if (tool.mcpJsonSchema !== undefined) {
      delete tool.mcpJsonSchema;
    }
  }
}

/**
 * Builds tool registry from MCP tool definitions using the appropriate strategy.
 * Uses early returns to avoid nesting (Torvalds principle).
 */
function buildToolRegistry(
  mcpToolDefs: ToolDefinition[],
  agentToolOptions?: AgentToolOptions,
): LCToolRegistry {
  if (agentToolOptions && Object.keys(agentToolOptions).length > 0) {
    return buildToolRegistryFromAgentOptions(mcpToolDefs, agentToolOptions);
  }

  if (process.env.TOOL_CLASSIFICATION_FROM_ENV === 'true') {
    return buildToolRegistryFromEnv(mcpToolDefs);
  }

  /** No classification config - build basic definitions for event-driven mode */
  const registry: LCToolRegistry = new Map<string, LCTool>();
  for (const toolDef of mcpToolDefs) {
    registry.set(toolDef.name, {
      name: toolDef.name,
      description: toolDef.description,
      parameters: toolDef.parameters,
      serverName: toolDef.serverName,
      toolType: 'mcp',
    });
  }
  return registry;
}

/** Parameters for building tool classification and creating PTC/tool search tools */
export interface BuildToolClassificationParams {
  /** All loaded tools (will be filtered for MCP tools) */
  loadedTools: GenericTool[];
  /** User ID for auth lookup */
  userId: string;
  /** Agent ID (used to check if this agent should have classification features) */
  agentId?: string;
  /** Per-tool configuration from the agent (takes precedence over env vars) */
  agentToolOptions?: AgentToolOptions;
  /** Whether the deferred_tools capability is enabled (from agent config) */
  deferredToolsEnabled?: boolean;
  /** When true, skip creating tool instances (for event-driven mode) */
  definitionsOnly?: boolean;
  /** Function to load auth values (dependency injection) */
  loadAuthValues: (params: {
    userId: string;
    authFields: string[];
  }) => Promise<Record<string, string>>;
}

/** Result from building tool classification */
export interface BuildToolClassificationResult {
  /** Tool registry built from MCP tools (undefined if no MCP tools) */
  toolRegistry?: LCToolRegistry;
  /** Tool definitions array for event-driven execution (built simultaneously with registry) */
  toolDefinitions: LCTool[];
  /** Additional tools created (PTC and/or tool search) */
  additionalTools: GenericTool[];
  /** Whether any tools have defer_loading enabled (precomputed for efficiency) */
  hasDeferredTools: boolean;
}

/**
 * Checks if an agent is allowed to have classification features based on TOOL_CLASSIFICATION_AGENT_IDS.
 * If TOOL_CLASSIFICATION_AGENT_IDS is not set, all agents are allowed (including when no agentId).
 * If set, requires agentId to be in the list.
 * @param agentId - The agent ID to check
 * @returns Whether the agent is allowed
 */
export function isAgentAllowedForClassification(agentId?: string): boolean {
  const allowedAgentIds = parseToolList(process.env.TOOL_CLASSIFICATION_AGENT_IDS);
  if (allowedAgentIds.size === 0) {
    return true;
  }
  if (!agentId) {
    return false;
  }
  return allowedAgentIds.has(agentId);
}

/**
 * Checks if an agent's tools have any that match PTC patterns (programmatic only or dual context).
 * @param toolRegistry - The tool registry to check
 * @returns Whether any tools are configured for programmatic calling
 */
export function agentHasProgrammaticTools(toolRegistry: LCToolRegistry): boolean {
  for (const toolDef of toolRegistry.values()) {
    if (toolDef.allowed_callers?.includes('code_execution')) {
      return true;
    }
  }
  return false;
}

/**
 * Checks if an agent's tools have any that are deferred.
 * @param toolRegistry - The tool registry to check
 * @returns Whether any tools are configured as deferred
 */
export function agentHasDeferredTools(toolRegistry: LCToolRegistry): boolean {
  for (const toolDef of toolRegistry.values()) {
    if (toolDef.defer_loading === true) {
      return true;
    }
  }
  return false;
}

/**
 * Builds the tool registry from MCP tools and conditionally creates PTC and tool search tools.
 *
 * This function:
 * 1. Checks if the agent is allowed for classification features (via TOOL_CLASSIFICATION_AGENT_IDS)
 * 2. Filters loaded tools for MCP tools
 * 3. Extracts tool definitions and builds the registry
 *    - Uses agent's tool_options if provided (UI-based configuration)
 *    - Falls back to env vars for tools not configured at agent level
 * 4. Cleans up temporary mcpJsonSchema properties
 * 5. Creates PTC tool only if agent has tools configured for programmatic calling
 * 6. Creates tool search tool only if agent has deferred tools
 *
 * @param params - Parameters including loaded tools, userId, agentId, agentToolOptions, and dependencies
 * @returns Tool registry and any additional tools created
 */
export async function buildToolClassification(
  params: BuildToolClassificationParams,
): Promise<BuildToolClassificationResult> {
  const {
    userId,
    agentId,
    loadedTools,
    agentToolOptions,
    definitionsOnly = false,
    deferredToolsEnabled = true,
    loadAuthValues,
  } = params;
  const additionalTools: GenericTool[] = [];

  const mcpTools = loadedTools.filter(isMCPTool);
  if (mcpTools.length === 0) {
    return {
      additionalTools,
      toolDefinitions: [],
      toolRegistry: undefined,
      hasDeferredTools: false,
    };
  }

  /**
   * Check if this agent is allowed to have advanced classification features (PTC, deferred tools).
   * Even if not allowed, we still build basic tool definitions for event-driven execution.
   */
  const isAllowedForClassification = isAgentAllowedForClassification(agentId);
  if (!isAllowedForClassification) {
    logger.debug(
      `[buildToolClassification] Agent ${agentId ?? 'undefined'} not allowed for classification, building basic definitions only`,
    );
  }

  const mcpToolDefs = mcpTools.map(extractMCPToolDefinition);

  /**
   * Build registry from agent's tool_options if provided (UI config).
   * Environment variable-based classification is only used as fallback
   * when TOOL_CLASSIFICATION_FROM_ENV=true is explicitly set.
   *
   * Even without classification config, we still build basic tool definitions
   * for event-driven execution.
   */
  const toolRegistry: LCToolRegistry = buildToolRegistry(mcpToolDefs, agentToolOptions);

  /** Clean up temporary mcpJsonSchema property from tools now that registry is populated */
  cleanupMCPToolSchemas(mcpTools);

  /**
   * Check if this agent actually has tools that match the patterns.
   * Only enable PTC if the agent has programmatic tools.
   * Only enable tool search if the agent has deferred tools AND the capability is enabled.
   */
  const hasProgrammaticTools = agentHasProgrammaticTools(toolRegistry);
  const hasDeferredTools = deferredToolsEnabled && agentHasDeferredTools(toolRegistry);

  /** Clear defer_loading if capability disabled */
  if (!deferredToolsEnabled) {
    for (const toolDef of toolRegistry.values()) {
      if (toolDef.defer_loading !== true) {
        continue;
      }
      toolDef.defer_loading = false;
    }
  }

  /** Build toolDefinitions array from registry (single pass, reused) */
  const toolDefinitions: LCTool[] = Array.from(toolRegistry.values());

  /** Agent not allowed for classification - return basic definitions */
  if (!isAllowedForClassification) {
    logger.debug(
      `[buildToolClassification] Agent ${agentId} not allowed for classification, returning basic definitions`,
    );
    return { toolRegistry, toolDefinitions, additionalTools, hasDeferredTools: false };
  }

  /** No programmatic or deferred tools - skip PTC/ToolSearch */
  if (!hasProgrammaticTools && !hasDeferredTools) {
    logger.debug(
      `[buildToolClassification] Agent ${agentId} has no programmatic or deferred tools, skipping PTC/ToolSearch`,
    );
    return { toolRegistry, toolDefinitions, additionalTools, hasDeferredTools: false };
  }

  /** Tool search uses local mode (no API key needed) */
  if (hasDeferredTools) {
    if (!definitionsOnly) {
      const toolSearchTool = createToolSearch({
        mode: 'local',
        toolRegistry,
      });
      additionalTools.push(toolSearchTool);
    }

    /** Add ToolSearch definition for event-driven mode */
    toolDefinitions.push({
      name: ToolSearchToolDefinition.name,
      description: ToolSearchToolDefinition.description,
      parameters: ToolSearchToolDefinition.schema as unknown as LCTool['parameters'],
    });
    toolRegistry.set(ToolSearchToolDefinition.name, {
      name: ToolSearchToolDefinition.name,
      allowed_callers: ['direct'],
    });

    logger.debug(`[buildToolClassification] Tool Search enabled for agent ${agentId}`);
  }

  /** PTC requires CODE_API_KEY for sandbox execution */
  if (!hasProgrammaticTools) {
    return { toolRegistry, toolDefinitions, additionalTools, hasDeferredTools };
  }

  /** In definitions-only mode, add PTC definition without creating the tool instance */
  if (definitionsOnly) {
    toolDefinitions.push({
      name: ProgrammaticToolCallingDefinition.name,
      description: ProgrammaticToolCallingDefinition.description,
      parameters: ProgrammaticToolCallingDefinition.schema as unknown as LCTool['parameters'],
    });
    toolRegistry.set(ProgrammaticToolCallingDefinition.name, {
      name: ProgrammaticToolCallingDefinition.name,
      allowed_callers: ['direct'],
    });
    logger.debug(
      `[buildToolClassification] PTC definition added for agent ${agentId} (definitions only)`,
    );
    return { toolRegistry, toolDefinitions, additionalTools, hasDeferredTools };
  }

  try {
    const authValues = await loadAuthValues({
      userId,
      authFields: [EnvVar.CODE_API_KEY],
    });
    const codeApiKey = authValues[EnvVar.CODE_API_KEY];

    if (!codeApiKey) {
      logger.warn('[buildToolClassification] PTC configured but CODE_API_KEY not available');
      return { toolRegistry, toolDefinitions, additionalTools, hasDeferredTools };
    }

    const ptcTool = createProgrammaticToolCallingTool({ apiKey: codeApiKey });
    additionalTools.push(ptcTool);

    /** Add PTC definition for event-driven mode */
    toolDefinitions.push({
      name: ProgrammaticToolCallingDefinition.name,
      description: ProgrammaticToolCallingDefinition.description,
      parameters: ProgrammaticToolCallingDefinition.schema as unknown as LCTool['parameters'],
    });
    toolRegistry.set(ProgrammaticToolCallingDefinition.name, {
      name: ProgrammaticToolCallingDefinition.name,
      allowed_callers: ['direct'],
    });

    logger.debug(`[buildToolClassification] PTC tool enabled for agent ${agentId}`);
  } catch (error) {
    logger.error('[buildToolClassification] Error creating PTC tool:', error);
  }

  return { toolRegistry, toolDefinitions, additionalTools, hasDeferredTools };
}