🌍 : Updated Translations & AI Generation Scripts (#2666)

* chore: bun scripts

* feat: comparisons

* refactor: move scripts to own folder

* feat: generated prompts script and Es output

* feat: generated prompts

* created prompts

* feat: Russian localization prompts

* translation setup

* additional ES translations

* additional ES translations

* translation services

* feat: additional translations

* fix regex for parseParamPrompt

* RU translations

* remove stores from git

* update gitignore

* update gitignore

* ZH translations

* move gen prompt output location

* ZH traditional translations

* AR translations

* chore: rename

* JP

* cleanup scripts

* add additional instruction prompts

* fix translation prompt and add DE

* FR translations (rate limited so not complete)

* chore: update translation comparisons

* chore: remove unused AnthropicClient changes

* refactor: use compositional styling for archive/delete buttons, fix manage archive table styling
This commit is contained in:
Danny Avila 2024-05-10 15:56:25 -04:00 committed by GitHub
parent 978009787c
commit 2ec821ea4c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
66 changed files with 51655 additions and 79 deletions

View file

@ -0,0 +1,76 @@
import Anthropic from '@anthropic-ai/sdk';
import type * as a from '@anthropic-ai/sdk';
import {
parseParamFromPrompt,
genTranslationPrompt,
} from '../../api/app/clients/prompts/titlePrompts';
/**
* Get the initialized Anthropic client.
* @returns {Anthropic} The Anthropic client instance.
*/
export function getClient() {
/** @type {Anthropic.default.RequestOptions} */
const options = {
apiKey: process.env.ANTHROPIC_API_KEY,
};
return new Anthropic(options);
}
/**
* This function capitlizes on [Anthropic's function calling training](https://docs.anthropic.com/claude/docs/functions-external-tools).
*
* @param {Object} params - The parameters for the generation.
* @param {string} params.key
* @param {string} params.baselineTranslation
* @param {string} params.translationPrompt
* @param {Array<{ pageContent: string }>} params.context
*
* @returns {Promise<string | 'New Chat'>} A promise that resolves to the generated conversation title.
* In case of failure, it will return the default title, "New Chat".
*/
export async function translateKeyPhrase({ key, baselineTranslation, translationPrompt, context }) {
let translation: string | undefined;
const model = 'claude-3-sonnet-20240229';
const prompt = genTranslationPrompt(translationPrompt);
const system = prompt;
const translateCompletion = async () => {
const content = `Current key: \`${key}\`
Baseline translation: ${baselineTranslation}
Please generate a translation for the key in the target language as described by the function.
Similar key and phrases: ${context.map((c) => c.pageContent).join(', ')}
Remember to invoke the tool with proper tool invocation; e.g.:
<invoke>\n<tool_name>submit_translation</tool_name>\n<parameters>\n<translation>Your Translation Here</translation>\n</parameters>\n</invoke>`;
const message: a.Anthropic.MessageParam = { role: 'user', content };
const requestOptions: a.Anthropic.MessageCreateParamsNonStreaming = {
model,
temperature: 0.3,
max_tokens: 1024,
system,
stop_sequences: ['\n\nHuman:', '\n\nAssistant', '</function_calls>'],
messages: [message],
stream: false,
};
try {
const client = getClient();
const response = await client.messages.create(requestOptions);
const text = response.content[0].text;
translation = parseParamFromPrompt(text, 'translation');
} catch (e) {
console.error('[AnthropicClient] There was an issue generating the translation', e);
}
};
await translateCompletion();
return translation;
}

View file

@ -0,0 +1,72 @@
import fs from 'fs';
import path from 'path';
import { exec } from 'child_process';
async function main(baseFilePath: string, languagesDir: string) {
const { default: baseLanguage } = await import(path.resolve(baseFilePath));
const files = fs.readdirSync(languagesDir);
for (let file of files) {
const ext = path.extname(file);
if (ext !== '.ts' && ext !== '.tsx') continue; // Only process TypeScript files
const filePath = path.resolve(languagesDir, file);
if (filePath === baseFilePath) continue; // Skip the base language file
const { default: otherLanguage } = await import(filePath);
let comparisons = {};
for (let key in otherLanguage) {
if (otherLanguage.hasOwnProperty(key) && baseLanguage.hasOwnProperty(key)) {
comparisons[key] = {
english: baseLanguage[key],
translated: otherLanguage[key]
};
}
}
let fileContent = fs.readFileSync(filePath, 'utf8');
const comparisonsObjRegex = /export const comparisons = {[\s\S]*?};/gm;
const hasComparisons = comparisonsObjRegex.test(fileContent);
const comparisonsExport = `\nexport const comparisons = ${JSON.stringify(comparisons, null, 2)};\n`;
if (hasComparisons) {
fileContent = fileContent.replace(comparisonsObjRegex, comparisonsExport);
} else {
fileContent = fileContent.trim() + comparisonsExport;
}
fs.writeFileSync(filePath, fileContent); // Write updated content back to file
}
// Execute ESLint with the --fix option on the entire directory
exec(`bunx eslint "${languagesDir}" --fix`, (error, stdout, stderr) => {
if (error) {
console.error('Error executing ESLint:', error);
return;
}
if (stderr) {
console.error('ESLint stderr:', stderr);
return;
}
console.log('ESLint stdout:', stdout);
});
}
const languagesDir = './client/src/localization/languages';
const baseFilePath = path.resolve(languagesDir, 'Eng.ts');
main(baseFilePath, languagesDir).catch(console.error);
// const prompt = `
// Write a prompt that is mindful of the nuances in the language with respect to its English counterpart, which serves as the baseline for translations. Here are the comparisons between the language translations and their English counterparts:
// ${comparisons}
// Please consider the above comparisons to enhance understanding and guide improvements in translations. Provide insights or suggestions that could help refine the translation process, focusing on cultural and contextual relevance.
// Please craft a prompt that can be used to better inform future translations to this language. Write this prompt in the translated language, with all its nuances detected, not in the English.
// `;

View file

@ -0,0 +1,43 @@
import dotenv from 'dotenv';
dotenv.config({
path: './',
});
import { OpenAIEmbeddings } from "@langchain/openai";
import { HNSWLib } from "@langchain/community/vectorstores/hnswlib";
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
import * as fs from "fs";
import * as path from "path";
export const storeEmbeddings = async (modulePath: string) => {
try {
const text = fs.readFileSync(modulePath, "utf8");
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 600 });
const docs = await textSplitter.createDocuments([text]);
const vectorStore = await HNSWLib.fromDocuments(docs, new OpenAIEmbeddings());
const directory = `./config/translations/stores/${path.basename(modulePath)}`;
if (!fs.existsSync(directory)) {
fs.mkdirSync(directory, { recursive: true });
console.log(`Directory created: ${directory}`);
} else {
console.log(`Directory already exists: ${directory}`);
return;
}
await vectorStore.save(directory);
} catch (error) {
console.error(`Error storing embeddings`);
console.error(error);
}
}
export const loadEmbeddings = async (modulePath: string) => {
try {
const directory = `./config/translations/stores/${path.basename(modulePath)}`;
const loadedVectorStore = await HNSWLib.load(directory, new OpenAIEmbeddings());
return loadedVectorStore;
} catch (error) {
console.error(`Error loading embeddings`);
console.error(error);
}
}

View file

@ -0,0 +1,21 @@
import path from 'path';
import main from './main';
async function processFile(baseFilePath: string, compareFilename: string) {
await main(baseFilePath, compareFilename);
}
const args = process.argv.slice(-1);
if (args.length !== 1) {
console.log(process.argv, args);
console.error('Usage: bun file.ts <compareFilename>');
process.exit(1);
}
const languagesDir = './client/src/localization/languages';
const baseFilePath = path.resolve(languagesDir, 'Eng.ts');
const compareFilename = path.resolve(languagesDir, `${args[0]}.ts`);
processFile(baseFilePath, compareFilename).catch(console.error);

View file

@ -0,0 +1,75 @@
import fs from 'fs';
import path from 'path';
const baseDirPath = './client/src/localization/languages';
const promptsDirPath = './client/src/localization/prompts/instructions';
async function ensureDirectoryExists(directory: string) {
return fs.promises.access(directory).catch(() => fs.promises.mkdir(directory, { recursive: true }));
}
// Helper function to generate Markdown from an object, recursively if needed
function generateMarkdownFromObject(obj: any, depth: number = 0): string {
if (typeof obj !== 'object' || obj === null) {
return String(obj);
}
const indent = ' '.repeat(depth * 2);
return Object.entries(obj)
.map(([key, value]) => {
if (typeof value === 'object') {
return `\n${indent}- **${key}**:${generateMarkdownFromObject(value, depth + 1)}`;
}
return `${key === 'english' ? '\n' : ''}${indent}- **${key}**: ${value}`;
})
.join('\n');
}
async function generatePromptForFile(filePath: string, fileName: string) {
const modulePath = path.resolve(filePath); // Ensuring path is correctly resolved
const fileModule = await import(modulePath); // Dynamically importing the file as a module
let comparisonsMarkdown = '';
if (fileModule.comparisons) {
comparisonsMarkdown = generateMarkdownFromObject(fileModule.comparisons);
} else {
comparisonsMarkdown = 'No comparisons object found.';
}
// Creating markdown content
const promptContent = `# Instructions for Translation
Write a prompt that is mindful of the nuances in the language with respect to its English counterpart, which serves as the baseline for translations. Here are the comparisons between the language translations and their English counterparts:
${comparisonsMarkdown}
Please consider the above comparisons to enhance understanding and guide improvements in translations.
Provide insights or suggestions that could help refine the translation process, focusing on cultural and contextual relevance.
Please craft a prompt that can be used to better inform future translations to this language.
Write this prompt in the translated language, with all its nuances detected, not in the English.
`;
return promptContent;
}
async function createPromptsForTranslations() {
await ensureDirectoryExists(promptsDirPath);
const files = await fs.promises.readdir(baseDirPath);
for (const file of files) {
if (!file.includes('Eng.ts')) { // Ensure English or base file is excluded
const filePath = path.join(baseDirPath, file);
const promptContent = await generatePromptForFile(filePath, file);
const outputFilePath = path.join(promptsDirPath, `${path.basename(file, '.ts')}.md`);
await fs.promises.writeFile(outputFilePath, promptContent);
console.log(`Prompt created for: ${file}`);
}
}
}
createPromptsForTranslations().then(() => console.log('Prompts generation completed.'));

View file

@ -0,0 +1,42 @@
import fs from 'fs';
import path from 'path';
async function readKeysFromFile(filePath: string): Promise<string[]> {
const languageModule = await import(filePath);
const keys = Object.keys(languageModule.default);
return keys;
}
async function compareKeys(baseKeys: string[], keysFromOtherFile: string[]): Promise<string[]> {
const missingKeys = baseKeys.filter(key => !keysFromOtherFile.includes(key));
return missingKeys;
}
async function main(baseFilePath: string, languagesDir: string) {
const baseKeys = await readKeysFromFile(baseFilePath);
const files = fs.readdirSync(languagesDir);
for (const file of files) {
const ext = path.extname(file);
if (ext !== '.ts' && ext !== '.tsx') continue; // Ensure it's a TypeScript file
const compareFilePath = path.resolve(languagesDir, file);
if (compareFilePath === baseFilePath) continue; // Skip the base file
try {
const keysFromOtherFile = await readKeysFromFile(compareFilePath);
const missingKeys = await compareKeys(baseKeys, keysFromOtherFile);
if (missingKeys.length > 0) {
console.log(`Missing Keys in ${file}:`, missingKeys);
}
} catch (error) {
console.error(`Error processing file ${file}:`, error);
}
}
}
// Set the directory containing language files and specify the path to the base (English) language file.
const languagesDir = './client/src/localization/languages';
const baseFilePath = path.resolve(languagesDir, 'Eng.ts');
main(baseFilePath, languagesDir).catch(console.error);

View file

@ -0,0 +1,38 @@
import fs from 'fs';
import path from 'path';
import { processLanguageModule, processMissingKey } from './process';
export default async function main(baseFilePath: string, compareFilePath: string) {
const prompt = await processLanguageModule(path.basename(compareFilePath), compareFilePath);
if (prompt === undefined) {
console.error(`Prompt not found for module: ${path.basename(compareFilePath)}`);
return;
}
const baseModule = await import(baseFilePath);
const baseKeys = Object.keys(baseModule.default);
const compareModule = await import(compareFilePath);
const compareKeys = Object.keys(compareModule.default);
const missingKeys = baseKeys.filter(key => !compareKeys.includes(key));
if (missingKeys.length > 0) {
const keyTranslations = {};
for (const key of missingKeys) {
const baselineTranslation = baseModule.default[key] || "No baseline translation available";
const result = await processMissingKey({
key,
baselineTranslation,
translationPrompt: prompt,
moduleName: path.basename(compareFilePath),
});
keyTranslations[key] = result;
}
const outputDir = path.dirname(compareFilePath);
const outputFileName = `${path.basename(compareFilePath, path.extname(compareFilePath))}_missing_keys.json`;
const outputFilePath = path.join(outputDir, outputFileName);
fs.writeFileSync(outputFilePath, JSON.stringify(keyTranslations, null, 2));
}
}

View file

@ -0,0 +1,47 @@
import fs from 'fs';
import path from 'path';
import { storeEmbeddings, loadEmbeddings } from './embeddings';
import { translateKeyPhrase } from './anthropic';
const missingKeyMap = {};
const vectorStoreMap = {};
export async function processLanguageModule(moduleName: string, modulePath: string) {
const filename = path.basename(moduleName, path.extname(moduleName));
const promptFilePath = path.join(path.dirname(modulePath), '../prompts', `${filename}.md`);
console.log(promptFilePath);
if (!fs.existsSync(promptFilePath)) {
console.error(`Prompt file not found for module: ${moduleName}`);
return undefined;
}
const prompt = fs.readFileSync(promptFilePath, 'utf-8');
await storeEmbeddings(modulePath);
vectorStoreMap[moduleName] = await loadEmbeddings(modulePath);
const baseKeys = Object.keys((await import(modulePath)).default);
console.log(`Keys in module: ${moduleName}:`, baseKeys.length)
missingKeyMap[moduleName] = 0;
return prompt;
}
export async function processMissingKey({
key,
baselineTranslation,
moduleName,
translationPrompt,
} : {
key: string, baselineTranslation: string, moduleName: string, translationPrompt: string
}) {
missingKeyMap[moduleName]++;
const vectorStore = vectorStoreMap[moduleName];
const context = await vectorStore.similaritySearch(key, 5);
const translation = await translateKeyPhrase({
key,
baselineTranslation,
translationPrompt,
context,
});
console.log(`"${key}": "${translation}",\n`)
return translation;
}

View file

@ -0,0 +1,21 @@
import fs from 'fs';
import path from 'path';
import main from './main';
async function scanDirectory(baseFilePath: string, languagesDir: string) {
const files = fs.readdirSync(languagesDir);
for (const file of files) {
const ext = path.extname(file);
if (ext !== '.ts' && ext !== '.tsx') continue;
const compareFilePath = path.resolve(languagesDir, file);
if (compareFilePath === baseFilePath) continue;
await main(baseFilePath, compareFilePath);
}
}
const languagesDir = './client/src/localization/languages';
const baseFilePath = path.resolve(languagesDir, 'Eng.ts');
scanDirectory(baseFilePath, languagesDir).catch(console.error);