From 20a486a190a21b76485ec6d1661dced40c4211a7 Mon Sep 17 00:00:00 2001
From: Danny Avila <danny@librechat.ai>
Date: Wed, 20 Aug 2025 00:56:12 -0400
Subject: [PATCH] refactor: restructure toolkits to TS API

---
 .../tools/structured/OpenAIImageTools.js      | 155 +-----------
 api/app/clients/tools/structured/YouTube.js   | 222 ++++++------------
 packages/api/src/tools/index.ts               |   1 +
 packages/api/src/tools/toolkits/index.ts      |   2 +
 packages/api/src/tools/toolkits/oai.ts        | 153 ++++++++++++
 packages/api/src/tools/toolkits/yt.ts         |  61 +++++
 6 files changed, 298 insertions(+), 296 deletions(-)
 create mode 100644 packages/api/src/tools/toolkits/index.ts
 create mode 100644 packages/api/src/tools/toolkits/oai.ts
 create mode 100644 packages/api/src/tools/toolkits/yt.ts

diff --git a/api/app/clients/tools/structured/OpenAIImageTools.js b/api/app/clients/tools/structured/OpenAIImageTools.js
index 43bcbb794f..9a2a047bb1 100644
--- a/api/app/clients/tools/structured/OpenAIImageTools.js
+++ b/api/app/clients/tools/structured/OpenAIImageTools.js
@@ -1,69 +1,16 @@
-const { z } = require('zod');
 const axios = require('axios');
 const { v4 } = require('uuid');
 const OpenAI = require('openai');
 const FormData = require('form-data');
 const { ProxyAgent } = require('undici');
 const { tool } = require('@langchain/core/tools');
-const { logAxiosError } = require('@librechat/api');
 const { logger } = require('@librechat/data-schemas');
+const { logAxiosError, oaiToolkit } = require('@librechat/api');
 const { ContentTypes, EImageOutputType } = require('librechat-data-provider');
 const { getStrategyFunctions } = require('~/server/services/Files/strategies');
 const extractBaseURL = require('~/utils/extractBaseURL');
 const { getFiles } = require('~/models/File');
 
-/** Default descriptions for image generation tool  */
-const DEFAULT_IMAGE_GEN_DESCRIPTION = `
-Generates high-quality, original images based solely on text, not using any uploaded reference images.
-
-When to use \`image_gen_oai\`:
-- To create entirely new images from detailed text descriptions that do NOT reference any image files.
-
-When NOT to use \`image_gen_oai\`:
-- If the user has uploaded any images and requests modifications, enhancements, or remixing based on those uploads → use \`image_edit_oai\` instead.
-
-Generated image IDs will be returned in the response, so you can refer to them in future requests made to \`image_edit_oai\`.
-`.trim();
-
-/** Default description for image editing tool  */
-const DEFAULT_IMAGE_EDIT_DESCRIPTION =
-  `Generates high-quality, original images based on text and one or more uploaded/referenced images.
-
-When to use \`image_edit_oai\`:
-- The user wants to modify, extend, or remix one **or more** uploaded images, either:
-- Previously generated, or in the current request (both to be included in the \`image_ids\` array).
-- Always when the user refers to uploaded images for editing, enhancement, remixing, style transfer, or combining elements.
-- Any current or existing images are to be used as visual guides.
-- If there are any files in the current request, they are more likely than not expected as references for image edit requests.
-
-When NOT to use \`image_edit_oai\`:
-- Brand-new generations that do not rely on an existing image → use \`image_gen_oai\` instead.
-
-Both generated and referenced image IDs will be returned in the response, so you can refer to them in future requests made to \`image_edit_oai\`.
-`.trim();
-
-/** Default prompt descriptions  */
-const DEFAULT_IMAGE_GEN_PROMPT_DESCRIPTION = `Describe the image you want in detail. 
-      Be highly specific—break your idea into layers: 
-      (1) main concept and subject,
-      (2) composition and position,
-      (3) lighting and mood,
-      (4) style, medium, or camera details,
-      (5) important features (age, expression, clothing, etc.),
-      (6) background.
-      Use positive, descriptive language and specify what should be included, not what to avoid. 
-      List number and characteristics of people/objects, and mention style/technical requirements (e.g., "DSLR photo, 85mm lens, golden hour").
-      Do not reference any uploaded images—use for new image creation from text only.`;
-
-const DEFAULT_IMAGE_EDIT_PROMPT_DESCRIPTION = `Describe the changes, enhancements, or new ideas to apply to the uploaded image(s).
-      Be highly specific—break your request into layers: 
-      (1) main concept or transformation,
-      (2) specific edits/replacements or composition guidance,
-      (3) desired style, mood, or technique,
-      (4) features/items to keep, change, or add (such as objects, people, clothing, lighting, etc.).
-      Use positive, descriptive language and clarify what should be included or changed, not what to avoid.
-      Always base this prompt on the most recently uploaded reference images.`;
-
 const displayMessage =
   "The tool displayed an image. All generated images are already plainly visible, so don't repeat the descriptions in detail. Do not list download links as they are available in the UI already. The user may download the images by clicking on them, but do not mention anything about downloading to the user.";
 
@@ -91,22 +38,6 @@ function returnValue(value) {
   return value;
 }
 
-const getImageGenDescription = () => {
-  return process.env.IMAGE_GEN_OAI_DESCRIPTION || DEFAULT_IMAGE_GEN_DESCRIPTION;
-};
-
-const getImageEditDescription = () => {
-  return process.env.IMAGE_EDIT_OAI_DESCRIPTION || DEFAULT_IMAGE_EDIT_DESCRIPTION;
-};
-
-const getImageGenPromptDescription = () => {
-  return process.env.IMAGE_GEN_OAI_PROMPT_DESCRIPTION || DEFAULT_IMAGE_GEN_PROMPT_DESCRIPTION;
-};
-
-const getImageEditPromptDescription = () => {
-  return process.env.IMAGE_EDIT_OAI_PROMPT_DESCRIPTION || DEFAULT_IMAGE_EDIT_PROMPT_DESCRIPTION;
-};
-
 function createAbortHandler() {
   return function () {
     logger.debug('[ImageGenOAI] Image generation aborted');
@@ -287,46 +218,7 @@ Error Message: ${error.message}`);
       ];
       return [response, { content, file_ids }];
     },
-    {
-      name: 'image_gen_oai',
-      description: getImageGenDescription(),
-      schema: z.object({
-        prompt: z.string().max(32000).describe(getImageGenPromptDescription()),
-        background: z
-          .enum(['transparent', 'opaque', 'auto'])
-          .optional()
-          .describe(
-            'Sets transparency for the background. Must be one of transparent, opaque or auto (default). When transparent, the output format should be png or webp.',
-          ),
-        /*
-        n: z
-          .number()
-          .int()
-          .min(1)
-          .max(10)
-          .optional()
-          .describe('The number of images to generate. Must be between 1 and 10.'),
-        output_compression: z
-          .number()
-          .int()
-          .min(0)
-          .max(100)
-          .optional()
-          .describe('The compression level (0-100%) for webp or jpeg formats. Defaults to 100.'),
-           */
-        quality: z
-          .enum(['auto', 'high', 'medium', 'low'])
-          .optional()
-          .describe('The quality of the image. One of auto (default), high, medium, or low.'),
-        size: z
-          .enum(['auto', '1024x1024', '1536x1024', '1024x1536'])
-          .optional()
-          .describe(
-            'The size of the generated image. One of 1024x1024, 1536x1024 (landscape), 1024x1536 (portrait), or auto (default).',
-          ),
-      }),
-      responseFormat: 'content_and_artifact',
-    },
+    oaiToolkit.image_gen_oai,
   );
 
   /**
@@ -519,48 +411,7 @@ Error Message: ${error.message || 'Unknown error'}`);
         }
       }
     },
-    {
-      name: 'image_edit_oai',
-      description: getImageEditDescription(),
-      schema: z.object({
-        image_ids: z
-          .array(z.string())
-          .min(1)
-          .describe(
-            `
-IDs (image ID strings) of previously generated or uploaded images that should guide the edit.
-
-Guidelines:
-- If the user's request depends on any prior image(s), copy their image IDs into the \`image_ids\` array (in the same order the user refers to them).  
-- Never invent or hallucinate IDs; only use IDs that are still visible in the conversation context.
-- If no earlier image is relevant, omit the field entirely.
-`.trim(),
-          ),
-        prompt: z.string().max(32000).describe(getImageEditPromptDescription()),
-        /*
-        n: z
-          .number()
-          .int()
-          .min(1)
-          .max(10)
-          .optional()
-          .describe('The number of images to generate. Must be between 1 and 10. Defaults to 1.'),
-        */
-        quality: z
-          .enum(['auto', 'high', 'medium', 'low'])
-          .optional()
-          .describe(
-            'The quality of the image. One of auto (default), high, medium, or low. High/medium/low only supported for gpt-image-1.',
-          ),
-        size: z
-          .enum(['auto', '1024x1024', '1536x1024', '1024x1536', '256x256', '512x512'])
-          .optional()
-          .describe(
-            'The size of the generated images. For gpt-image-1: auto (default), 1024x1024, 1536x1024, 1024x1536. For dall-e-2: 256x256, 512x512, 1024x1024.',
-          ),
-      }),
-      responseFormat: 'content_and_artifact',
-    },
+    oaiToolkit.image_edit_oai,
   );
 
   return [imageGenTool, imageEditTool];
diff --git a/api/app/clients/tools/structured/YouTube.js b/api/app/clients/tools/structured/YouTube.js
index aa19fc211f..8d1c7b9ff9 100644
--- a/api/app/clients/tools/structured/YouTube.js
+++ b/api/app/clients/tools/structured/YouTube.js
@@ -1,9 +1,9 @@
-const { z } = require('zod');
+const { ytToolkit } = require('@librechat/api');
 const { tool } = require('@langchain/core/tools');
 const { youtube } = require('@googleapis/youtube');
+const { logger } = require('@librechat/data-schemas');
 const { YoutubeTranscript } = require('youtube-transcript');
 const { getApiKey } = require('./credentials');
-const { logger } = require('~/config');
 
 function extractVideoId(url) {
   const rawIdRegex = /^[a-zA-Z0-9_-]{11}$/;
@@ -29,7 +29,7 @@ function parseTranscript(transcriptResponse) {
     .map((entry) => entry.text.trim())
     .filter((text) => text)
     .join(' ')
-    .replaceAll('&amp;#39;', '\'');
+    .replaceAll('&amp;#39;', "'");
 }
 
 function createYouTubeTools(fields = {}) {
@@ -42,160 +42,94 @@ function createYouTubeTools(fields = {}) {
     auth: apiKey,
   });
 
-  const searchTool = tool(
-    async ({ query, maxResults = 5 }) => {
-      const response = await youtubeClient.search.list({
-        part: 'snippet',
-        q: query,
-        type: 'video',
-        maxResults: maxResults || 5,
-      });
-      const result = response.data.items.map((item) => ({
-        title: item.snippet.title,
-        description: item.snippet.description,
-        url: `https://www.youtube.com/watch?v=${item.id.videoId}`,
-      }));
-      return JSON.stringify(result, null, 2);
-    },
-    {
-      name: 'youtube_search',
-      description: `Search for YouTube videos by keyword or phrase.
-- Required: query (search terms to find videos)
-- Optional: maxResults (number of videos to return, 1-50, default: 5)
-- Returns: List of videos with titles, descriptions, and URLs
-- Use for: Finding specific videos, exploring content, research
-Example: query="cooking pasta tutorials" maxResults=3`,
-      schema: z.object({
-        query: z.string().describe('Search query terms'),
-        maxResults: z.number().int().min(1).max(50).optional().describe('Number of results (1-50)'),
-      }),
-    },
-  );
+  const searchTool = tool(async ({ query, maxResults = 5 }) => {
+    const response = await youtubeClient.search.list({
+      part: 'snippet',
+      q: query,
+      type: 'video',
+      maxResults: maxResults || 5,
+    });
+    const result = response.data.items.map((item) => ({
+      title: item.snippet.title,
+      description: item.snippet.description,
+      url: `https://www.youtube.com/watch?v=${item.id.videoId}`,
+    }));
+    return JSON.stringify(result, null, 2);
+  }, ytToolkit.youtube_search);
 
-  const infoTool = tool(
-    async ({ url }) => {
-      const videoId = extractVideoId(url);
-      if (!videoId) {
-        throw new Error('Invalid YouTube URL or video ID');
-      }
+  const infoTool = tool(async ({ url }) => {
+    const videoId = extractVideoId(url);
+    if (!videoId) {
+      throw new Error('Invalid YouTube URL or video ID');
+    }
 
-      const response = await youtubeClient.videos.list({
-        part: 'snippet,statistics',
-        id: videoId,
-      });
+    const response = await youtubeClient.videos.list({
+      part: 'snippet,statistics',
+      id: videoId,
+    });
 
-      if (!response.data.items?.length) {
-        throw new Error('Video not found');
-      }
-      const video = response.data.items[0];
+    if (!response.data.items?.length) {
+      throw new Error('Video not found');
+    }
+    const video = response.data.items[0];
 
-      const result = {
-        title: video.snippet.title,
-        description: video.snippet.description,
-        views: video.statistics.viewCount,
-        likes: video.statistics.likeCount,
-        comments: video.statistics.commentCount,
-      };
-      return JSON.stringify(result, null, 2);
-    },
-    {
-      name: 'youtube_info',
-      description: `Get detailed metadata and statistics for a specific YouTube video.
-- Required: url (full YouTube URL or video ID)
-- Returns: Video title, description, view count, like count, comment count
-- Use for: Getting video metrics and basic metadata
-- DO NOT USE FOR VIDEO SUMMARIES, USE TRANSCRIPTS FOR COMPREHENSIVE ANALYSIS
-- Accepts both full URLs and video IDs
-Example: url="https://youtube.com/watch?v=abc123" or url="abc123"`,
-      schema: z.object({
-        url: z.string().describe('YouTube video URL or ID'),
-      }),
-    },
-  );
+    const result = {
+      title: video.snippet.title,
+      description: video.snippet.description,
+      views: video.statistics.viewCount,
+      likes: video.statistics.likeCount,
+      comments: video.statistics.commentCount,
+    };
+    return JSON.stringify(result, null, 2);
+  }, ytToolkit.youtube_info);
 
-  const commentsTool = tool(
-    async ({ url, maxResults = 10 }) => {
-      const videoId = extractVideoId(url);
-      if (!videoId) {
-        throw new Error('Invalid YouTube URL or video ID');
-      }
+  const commentsTool = tool(async ({ url, maxResults = 10 }) => {
+    const videoId = extractVideoId(url);
+    if (!videoId) {
+      throw new Error('Invalid YouTube URL or video ID');
+    }
 
-      const response = await youtubeClient.commentThreads.list({
-        part: 'snippet',
-        videoId,
-        maxResults: maxResults || 10,
-      });
+    const response = await youtubeClient.commentThreads.list({
+      part: 'snippet',
+      videoId,
+      maxResults: maxResults || 10,
+    });
 
-      const result = response.data.items.map((item) => ({
-        author: item.snippet.topLevelComment.snippet.authorDisplayName,
-        text: item.snippet.topLevelComment.snippet.textDisplay,
-        likes: item.snippet.topLevelComment.snippet.likeCount,
-      }));
-      return JSON.stringify(result, null, 2);
-    },
-    {
-      name: 'youtube_comments',
-      description: `Retrieve top-level comments from a YouTube video.
-- Required: url (full YouTube URL or video ID)
-- Optional: maxResults (number of comments, 1-50, default: 10)
-- Returns: Comment text, author names, like counts
-- Use for: Sentiment analysis, audience feedback, engagement review
-Example: url="abc123" maxResults=20`,
-      schema: z.object({
-        url: z.string().describe('YouTube video URL or ID'),
-        maxResults: z
-          .number()
-          .int()
-          .min(1)
-          .max(50)
-          .optional()
-          .describe('Number of comments to retrieve'),
-      }),
-    },
-  );
+    const result = response.data.items.map((item) => ({
+      author: item.snippet.topLevelComment.snippet.authorDisplayName,
+      text: item.snippet.topLevelComment.snippet.textDisplay,
+      likes: item.snippet.topLevelComment.snippet.likeCount,
+    }));
+    return JSON.stringify(result, null, 2);
+  }, ytToolkit.youtube_comments);
 
-  const transcriptTool = tool(
-    async ({ url }) => {
-      const videoId = extractVideoId(url);
-      if (!videoId) {
-        throw new Error('Invalid YouTube URL or video ID');
+  const transcriptTool = tool(async ({ url }) => {
+    const videoId = extractVideoId(url);
+    if (!videoId) {
+      throw new Error('Invalid YouTube URL or video ID');
+    }
+
+    try {
+      try {
+        const transcript = await YoutubeTranscript.fetchTranscript(videoId, { lang: 'en' });
+        return parseTranscript(transcript);
+      } catch (e) {
+        logger.error(e);
       }
 
       try {
-        try {
-          const transcript = await YoutubeTranscript.fetchTranscript(videoId, { lang: 'en' });
-          return parseTranscript(transcript);
-        } catch (e) {
-          logger.error(e);
-        }
-
-        try {
-          const transcript = await YoutubeTranscript.fetchTranscript(videoId, { lang: 'de' });
-          return parseTranscript(transcript);
-        } catch (e) {
-          logger.error(e);
-        }
-
-        const transcript = await YoutubeTranscript.fetchTranscript(videoId);
+        const transcript = await YoutubeTranscript.fetchTranscript(videoId, { lang: 'de' });
         return parseTranscript(transcript);
-      } catch (error) {
-        throw new Error(`Failed to fetch transcript: ${error.message}`);
+      } catch (e) {
+        logger.error(e);
       }
-    },
-    {
-      name: 'youtube_transcript',
-      description: `Fetch and parse the transcript/captions of a YouTube video.
-- Required: url (full YouTube URL or video ID)
-- Returns: Full video transcript as plain text
-- Use for: Content analysis, summarization, translation reference
-- This is the "Go-to" tool for analyzing actual video content
-- Attempts to fetch English first, then German, then any available language
-Example: url="https://youtube.com/watch?v=abc123"`,
-      schema: z.object({
-        url: z.string().describe('YouTube video URL or ID'),
-      }),
-    },
-  );
+
+      const transcript = await YoutubeTranscript.fetchTranscript(videoId);
+      return parseTranscript(transcript);
+    } catch (error) {
+      throw new Error(`Failed to fetch transcript: ${error.message}`);
+    }
+  }, ytToolkit.youtube_transcript);
 
   return [searchTool, infoTool, commentsTool, transcriptTool];
 }
diff --git a/packages/api/src/tools/index.ts b/packages/api/src/tools/index.ts
index 16c5b2b508..eb375902f1 100644
--- a/packages/api/src/tools/index.ts
+++ b/packages/api/src/tools/index.ts
@@ -1 +1,2 @@
 export * from './format';
+export * from './toolkits';
diff --git a/packages/api/src/tools/toolkits/index.ts b/packages/api/src/tools/toolkits/index.ts
new file mode 100644
index 0000000000..33807c673b
--- /dev/null
+++ b/packages/api/src/tools/toolkits/index.ts
@@ -0,0 +1,2 @@
+export * from './oai';
+export * from './yt';
diff --git a/packages/api/src/tools/toolkits/oai.ts b/packages/api/src/tools/toolkits/oai.ts
new file mode 100644
index 0000000000..0881a0148a
--- /dev/null
+++ b/packages/api/src/tools/toolkits/oai.ts
@@ -0,0 +1,153 @@
+import { z } from 'zod';
+
+/** Default descriptions for image generation tool  */
+const DEFAULT_IMAGE_GEN_DESCRIPTION =
+  `Generates high-quality, original images based solely on text, not using any uploaded reference images.
+
+When to use \`image_gen_oai\`:
+- To create entirely new images from detailed text descriptions that do NOT reference any image files.
+
+When NOT to use \`image_gen_oai\`:
+- If the user has uploaded any images and requests modifications, enhancements, or remixing based on those uploads → use \`image_edit_oai\` instead.
+
+Generated image IDs will be returned in the response, so you can refer to them in future requests made to \`image_edit_oai\`.` as const;
+
+const getImageGenDescription = () => {
+  return process.env.IMAGE_GEN_OAI_DESCRIPTION || DEFAULT_IMAGE_GEN_DESCRIPTION;
+};
+
+/** Default prompt descriptions  */
+const DEFAULT_IMAGE_GEN_PROMPT_DESCRIPTION = `Describe the image you want in detail. 
+      Be highly specific—break your idea into layers: 
+      (1) main concept and subject,
+      (2) composition and position,
+      (3) lighting and mood,
+      (4) style, medium, or camera details,
+      (5) important features (age, expression, clothing, etc.),
+      (6) background.
+      Use positive, descriptive language and specify what should be included, not what to avoid. 
+      List number and characteristics of people/objects, and mention style/technical requirements (e.g., "DSLR photo, 85mm lens, golden hour").
+      Do not reference any uploaded images—use for new image creation from text only.` as const;
+
+const getImageGenPromptDescription = () => {
+  return process.env.IMAGE_GEN_OAI_PROMPT_DESCRIPTION || DEFAULT_IMAGE_GEN_PROMPT_DESCRIPTION;
+};
+
+/** Default description for image editing tool  */
+const DEFAULT_IMAGE_EDIT_DESCRIPTION =
+  `Generates high-quality, original images based on text and one or more uploaded/referenced images.
+
+When to use \`image_edit_oai\`:
+- The user wants to modify, extend, or remix one **or more** uploaded images, either:
+- Previously generated, or in the current request (both to be included in the \`image_ids\` array).
+- Always when the user refers to uploaded images for editing, enhancement, remixing, style transfer, or combining elements.
+- Any current or existing images are to be used as visual guides.
+- If there are any files in the current request, they are more likely than not expected as references for image edit requests.
+
+When NOT to use \`image_edit_oai\`:
+- Brand-new generations that do not rely on an existing image → use \`image_gen_oai\` instead.
+
+Both generated and referenced image IDs will be returned in the response, so you can refer to them in future requests made to \`image_edit_oai\`.
+`.trim();
+
+const getImageEditDescription = () => {
+  return process.env.IMAGE_EDIT_OAI_DESCRIPTION || DEFAULT_IMAGE_EDIT_DESCRIPTION;
+};
+
+const DEFAULT_IMAGE_EDIT_PROMPT_DESCRIPTION = `Describe the changes, enhancements, or new ideas to apply to the uploaded image(s).
+      Be highly specific—break your request into layers: 
+      (1) main concept or transformation,
+      (2) specific edits/replacements or composition guidance,
+      (3) desired style, mood, or technique,
+      (4) features/items to keep, change, or add (such as objects, people, clothing, lighting, etc.).
+      Use positive, descriptive language and clarify what should be included or changed, not what to avoid.
+      Always base this prompt on the most recently uploaded reference images.`;
+
+const getImageEditPromptDescription = () => {
+  return process.env.IMAGE_EDIT_OAI_PROMPT_DESCRIPTION || DEFAULT_IMAGE_EDIT_PROMPT_DESCRIPTION;
+};
+
+export const oaiToolkit = {
+  image_gen_oai: {
+    name: 'image_gen_oai' as const,
+    description: getImageGenDescription(),
+    schema: z.object({
+      prompt: z.string().max(32000).describe(getImageGenPromptDescription()),
+      background: z
+        .enum(['transparent', 'opaque', 'auto'])
+        .optional()
+        .describe(
+          'Sets transparency for the background. Must be one of transparent, opaque or auto (default). When transparent, the output format should be png or webp.',
+        ),
+      /*
+        n: z
+          .number()
+          .int()
+          .min(1)
+          .max(10)
+          .optional()
+          .describe('The number of images to generate. Must be between 1 and 10.'),
+        output_compression: z
+          .number()
+          .int()
+          .min(0)
+          .max(100)
+          .optional()
+          .describe('The compression level (0-100%) for webp or jpeg formats. Defaults to 100.'),
+           */
+      quality: z
+        .enum(['auto', 'high', 'medium', 'low'])
+        .optional()
+        .describe('The quality of the image. One of auto (default), high, medium, or low.'),
+      size: z
+        .enum(['auto', '1024x1024', '1536x1024', '1024x1536'])
+        .optional()
+        .describe(
+          'The size of the generated image. One of 1024x1024, 1536x1024 (landscape), 1024x1536 (portrait), or auto (default).',
+        ),
+    }),
+    responseFormat: 'content_and_artifact' as const,
+  } as const,
+  image_edit_oai: {
+    name: 'image_edit_oai' as const,
+    description: getImageEditDescription(),
+    schema: z.object({
+      image_ids: z
+        .array(z.string())
+        .min(1)
+        .describe(
+          `
+IDs (image ID strings) of previously generated or uploaded images that should guide the edit.
+
+Guidelines:
+- If the user's request depends on any prior image(s), copy their image IDs into the \`image_ids\` array (in the same order the user refers to them).  
+- Never invent or hallucinate IDs; only use IDs that are still visible in the conversation context.
+- If no earlier image is relevant, omit the field entirely.
+`.trim(),
+        ),
+      prompt: z.string().max(32000).describe(getImageEditPromptDescription()),
+      /*
+        n: z
+          .number()
+          .int()
+          .min(1)
+          .max(10)
+          .optional()
+          .describe('The number of images to generate. Must be between 1 and 10. Defaults to 1.'),
+        */
+      quality: z
+        .enum(['auto', 'high', 'medium', 'low'])
+        .optional()
+        .describe(
+          'The quality of the image. One of auto (default), high, medium, or low. High/medium/low only supported for gpt-image-1.',
+        ),
+      size: z
+        .enum(['auto', '1024x1024', '1536x1024', '1024x1536', '256x256', '512x512'])
+        .optional()
+        .describe(
+          'The size of the generated images. For gpt-image-1: auto (default), 1024x1024, 1536x1024, 1024x1536. For dall-e-2: 256x256, 512x512, 1024x1024.',
+        ),
+    }),
+    responseFormat: 'content_and_artifact' as const,
+  },
+} as const;
diff --git a/packages/api/src/tools/toolkits/yt.ts b/packages/api/src/tools/toolkits/yt.ts
new file mode 100644
index 0000000000..7185a260d7
--- /dev/null
+++ b/packages/api/src/tools/toolkits/yt.ts
@@ -0,0 +1,61 @@
+import { z } from 'zod';
+export const ytToolkit = {
+  youtube_search: {
+    name: 'youtube_search' as const,
+    description: `Search for YouTube videos by keyword or phrase.
+- Required: query (search terms to find videos)
+- Optional: maxResults (number of videos to return, 1-50, default: 5)
+- Returns: List of videos with titles, descriptions, and URLs
+- Use for: Finding specific videos, exploring content, research
+Example: query="cooking pasta tutorials" maxResults=3` as const,
+    schema: z.object({
+      query: z.string().describe('Search query terms'),
+      maxResults: z.number().int().min(1).max(50).optional().describe('Number of results (1-50)'),
+    }),
+  },
+  youtube_info: {
+    name: 'youtube_info' as const,
+    description: `Get detailed metadata and statistics for a specific YouTube video.
+- Required: url (full YouTube URL or video ID)
+- Returns: Video title, description, view count, like count, comment count
+- Use for: Getting video metrics and basic metadata
+- DO NOT USE FOR VIDEO SUMMARIES, USE TRANSCRIPTS FOR COMPREHENSIVE ANALYSIS
+- Accepts both full URLs and video IDs
+Example: url="https://youtube.com/watch?v=abc123" or url="abc123"` as const,
+    schema: z.object({
+      url: z.string().describe('YouTube video URL or ID'),
+    }),
+  } as const,
+  youtube_comments: {
+    name: 'youtube_comments',
+    description: `Retrieve top-level comments from a YouTube video.
+- Required: url (full YouTube URL or video ID)
+- Optional: maxResults (number of comments, 1-50, default: 10)
+- Returns: Comment text, author names, like counts
+- Use for: Sentiment analysis, audience feedback, engagement review
+Example: url="abc123" maxResults=20`,
+    schema: z.object({
+      url: z.string().describe('YouTube video URL or ID'),
+      maxResults: z
+        .number()
+        .int()
+        .min(1)
+        .max(50)
+        .optional()
+        .describe('Number of comments to retrieve'),
+    }),
+  } as const,
+  youtube_transcript: {
+    name: 'youtube_transcript',
+    description: `Fetch and parse the transcript/captions of a YouTube video.
+- Required: url (full YouTube URL or video ID)
+- Returns: Full video transcript as plain text
+- Use for: Content analysis, summarization, translation reference
+- This is the "Go-to" tool for analyzing actual video content
+- Attempts to fetch English first, then German, then any available language
+Example: url="https://youtube.com/watch?v=abc123"`,
+    schema: z.object({
+      url: z.string().describe('YouTube video URL or ID'),
+    }),
+  } as const,
+} as const;