complete language detection for code blocks

This commit is contained in:
Danny Avila 2023-03-02 16:07:36 -05:00
parent c4d0787b49
commit d235bb7b1b
10 changed files with 133 additions and 37 deletions

View file

@ -41,6 +41,7 @@ Currently, this project is only functional with the `text-davinci-003` model.
- [x] Remember last selected model
- [x] Highlight.js for code blocks
- [x] Markdown handling
- [x] Language Detection fpr code blocks
- [ ] 'Copy to clipboard' button for code and messages
- [ ] Set user/model label and prompt prefix view option
- [ ] AI model change handling (whether to pseudo-persist convos or start new convos within existing convo)

53
app/detectCode.js Normal file
View file

@ -0,0 +1,53 @@
const { ModelOperations } = require('@vscode/vscode-languagedetection');
const codeRegex = /(```[\s\S]*?```)/g;
const languageMatch = /```(\w+)/;
const detectCode = async (text) => {
try {
if (!text.match(codeRegex)) {
// console.log('disqualified for non-code match')
return text;
}
if (text.match(languageMatch)) {
// console.log('disqualified for language match')
return text;
}
// console.log('qualified for code match');
const modelOperations = new ModelOperations();
const regexSplit = (await import('../src/utils/regexSplit.mjs')).default;
const parts = regexSplit(text, codeRegex);
const output = parts.map(async (part, i) => {
if (part.match(codeRegex)) {
const code = part.slice(3, -3);
const language = await modelOperations.runModel(code);
return part.replace(/^```/, `\`\`\`${language[0].languageId}`);
} else {
return i > 0 ? '\n' + part : part;
}
});
return (await Promise.all(output)).join('');
} catch (e) {
console.log('Error in detectCode function\n', e);
return text;
}
};
const example3 = {
text: "By default, the function generates an 8-character password with uppercase and lowercase letters and digits, but no special characters.\n\nTo use this function, simply call it with the desired arguments. For example:\n\n```\n>>> generate_password()\n'wE5pUxV7'\n>>> generate_password(length=12, special_chars=True)\n'M4v&^gJ*8#qH'\n>>> generate_password(uppercase=False, digits=False)\n'zajyprxr'\n``` \n\nNote that the randomness is used to select characters from the available character sets, but the resulting password is always deterministic given the same inputs. This makes the function useful for generating secure passwords that meet specific requirements."
};
const example4 = {
text: 'here\'s a cool function:\n```\nimport random\nimport string\n\ndef generate_password(length=8, uppercase=True, lowercase=True, digits=True, special_chars=False):\n """Generate a random password with specified requirements.\n\n Args:\n length (int): The length of the password. Default is 8.\n uppercase (bool): Whether to include uppercase letters. Default is True.\n lowercase (bool): Whether to include lowercase letters. Default is True.\n digits (bool): Whether to include digits. Default is True.\n special_chars (bool): Whether to include special characters. Default is False.\n\n Returns:\n str: A random password with the specified requirements.\n """\n # Define character sets to use in password generation\n chars = ""\n if uppercase:\n chars += string.ascii_uppercase\n if lowercase:\n chars += string.ascii_lowercase\n if digits:\n chars += string.digits\n if special_chars:\n chars += string.punctuation\n\n # Generate the password\n password = "".join(random.choice(chars) for _ in range(length))\n return password\n```\n\nThis function takes several arguments'
};
// write an immediately invoked function to test this
// (async () => {
// const result = await detectCode(example3.text);
// console.log(result);
// })();
module.exports = detectCode;

View file

@ -1,11 +1,13 @@
const titleConvo = require('./titleConvo');
const { askClient } = require('./chatgpt-client');
const { browserClient } = require('./chatgpt-browser');
const { askBing } = require('./bingai');
const titleConvo = require('./titleConvo');
const detectCode = require('./detectCode');
module.exports = {
titleConvo,
askClient,
askBing,
browserClient,
askBing,
titleConvo,
detectCode
};

View file

@ -1,27 +0,0 @@
require('dotenv').config();
// docs https://github.com/transitive-bullshit/chatgpt-api
(async () => {
const { ChatGPTAPI } = await import('chatgpt');
const api = new ChatGPTAPI({ apiKey: process.env.OPENAI_KEY });
// send a message and wait for the response
let res = await api.sendMessage('What is OpenAI?');
console.log(res);
})();
// If you want to track the conversation, you'll need to pass the parentMessageid and conversationid:
// See example in models/Message.js
/*
// You can add streaming via the onProgress handler:
// timeout after 2 minutes (which will also abort the underlying HTTP request)
const res = await api.sendMessage('Write a 500 word essay on frogs.', {
// print the partial response as the AI is "typing"
onProgress: (partialResponse) => console.log(partialResponse.text)
})
// print the full text at the end
console.log(res.text)
*/

14
package-lock.json generated
View file

@ -13,6 +13,7 @@
"@radix-ui/react-dropdown-menu": "^2.0.2",
"@radix-ui/react-tabs": "^1.0.2",
"@reduxjs/toolkit": "^1.9.2",
"@vscode/vscode-languagedetection": "^1.0.22",
"@waylaidwanderer/chatgpt-api": "^1.15.1",
"chatgpt": "^4.2.0",
"class-variance-authority": "^0.4.0",
@ -4523,6 +4524,14 @@
"node": "^12.22.0 || ^14.17.0 || >=16.0.0"
}
},
"node_modules/@vscode/vscode-languagedetection": {
"version": "1.0.22",
"resolved": "https://registry.npmjs.org/@vscode/vscode-languagedetection/-/vscode-languagedetection-1.0.22.tgz",
"integrity": "sha512-rQ/BgMyLuIXSmbA0MSkIPHtcOw14QkeDbAq19sjvaS9LTRr905yij0S8lsyqN5JgOsbtIx7pAcyOxFMzPmqhZQ==",
"bin": {
"vscode-languagedetection": "cli/index.js"
}
},
"node_modules/@waylaidwanderer/chatgpt-api": {
"version": "1.22.2",
"resolved": "https://registry.npmjs.org/@waylaidwanderer/chatgpt-api/-/chatgpt-api-1.22.2.tgz",
@ -18361,6 +18370,11 @@
}
}
},
"@vscode/vscode-languagedetection": {
"version": "1.0.22",
"resolved": "https://registry.npmjs.org/@vscode/vscode-languagedetection/-/vscode-languagedetection-1.0.22.tgz",
"integrity": "sha512-rQ/BgMyLuIXSmbA0MSkIPHtcOw14QkeDbAq19sjvaS9LTRr905yij0S8lsyqN5JgOsbtIx7pAcyOxFMzPmqhZQ=="
},
"@waylaidwanderer/chatgpt-api": {
"version": "1.22.2",
"resolved": "https://registry.npmjs.org/@waylaidwanderer/chatgpt-api/-/chatgpt-api-1.22.2.tgz",

View file

@ -25,6 +25,7 @@
"@radix-ui/react-dropdown-menu": "^2.0.2",
"@radix-ui/react-tabs": "^1.0.2",
"@reduxjs/toolkit": "^1.9.2",
"@vscode/vscode-languagedetection": "^1.0.22",
"@waylaidwanderer/chatgpt-api": "^1.15.1",
"chatgpt": "^4.2.0",
"class-variance-authority": "^0.4.0",

View file

@ -2,7 +2,7 @@ const express = require('express');
const crypto = require('crypto');
const router = express.Router();
const askBing = require('./askBing');
const { titleConvo, askClient, browserClient } = require('../../app/');
const { titleConvo, askClient, browserClient, detectCode } = require('../../app/');
const { saveMessage, deleteMessages, saveConvo } = require('../../models');
const { handleError, sendMessage } = require('./handlers');
@ -49,7 +49,7 @@ router.post('/', async (req, res) => {
tokens = tokens.replace('[DONE]', '');
}
// tokens = appendCode(tokens);
// tokens = await detectCode(tokens);
sendMessage(res, { text: tokens, message: true, initial: i === 0 ? true : false });
i++;
}
@ -91,6 +91,7 @@ router.post('/', async (req, res) => {
}
gptResponse.sender = model;
gptResponse.final = true;
gptResponse.text = await detectCode(gptResponse.text);
await saveMessage(gptResponse);
await saveConvo(gptResponse);
sendMessage(res, gptResponse);

View file

@ -1,11 +1,11 @@
import React from 'react';
export default function Embed({ children, language = ''}) {
export default function Embed({ children, language = '', matched}) {
return (
<pre>
<div className="mb-4 rounded-md bg-black">
<div className="relative flex items-center bg-gray-800 px-4 py-2 font-sans text-xs text-gray-200 rounded-tl-md rounded-tr-md">
<span className="">{ language }</span>
<span className="">{ (language === 'javascript' && !matched ? '' : language) }</span>
<button className="ml-auto flex gap-2">
<svg
stroke="currentColor"

View file

@ -49,12 +49,16 @@ export default function TextWrapper({ text }) {
const codeParts = parts.map((part, i) => {
if (part.match(codeRegex)) {
let language = 'javascript';
let matched = false;
if (part.match(languageMatch)) {
language = part.match(languageMatch)[1].toLowerCase();
const validLanguage = languages.some((lang) => language === lang);
part = validLanguage ? part.replace(languageMatch, '```') : part;
language = validLanguage ? language : 'javascript';
part = part.replace(languageMatch, '```');
matched = true;
// highlight.js language validation
// const validLanguage = languages.some((lang) => language === lang);
// part = validLanguage ? part.replace(languageMatch, '```') : part;
// language = validLanguage ? language : 'javascript';
}
part = part.replace(newLineMatch, '```');
@ -63,6 +67,7 @@ export default function TextWrapper({ text }) {
<Embed
key={i}
language={language}
matched={matched}
>
<Highlight
code={part.slice(3, -3)}

46
src/utils/regexSplit.mjs Normal file
View file

@ -0,0 +1,46 @@
const primaryRegex = /```([^`\n]*?)\n([\s\S]*?)\n```/g;
const secondaryRegex = /```([^`\n]*?)\n?([\s\S]*?)\n?```/g;
const unenclosedCodeTest = (text) => {
let workingText = text;
// if (workingText.startsWith('<') || (!workingText.startsWith('`') && workingText.match(/```/g)?.length === 1)) {
// workingText = `\`\`\`${workingText}`
// }
return workingText.trim();
};
export default function regexSplit(string) {
let matches = [...string.matchAll(primaryRegex)];
if (!matches[0]) {
matches = [...string.matchAll(secondaryRegex)];
}
const output = [matches[0].input.slice(0, matches[0].index)];
// console.log(matches);
for (let i = 0; i < matches.length; i++) {
const [fullMatch, language, code] = matches[i];
// const formattedCode = code.replace(/`+/g, '\\`');
output.push(`\`\`\`${language}\n${code}\n\`\`\``);
if (i < matches.length - 1) {
let nextText = string.slice(matches[i].index + fullMatch.length, matches[i + 1].index);
nextText = unenclosedCodeTest(nextText);
output.push(nextText);
} else {
const lastMatch = matches[matches.length - 1][0];
// console.log(lastMatch);
// console.log(matches[0].input.split(lastMatch));
let rest = matches[0].input.split(lastMatch)[1]
if (rest) {
rest = unenclosedCodeTest(rest);
output.push(rest);
}
}
}
return output;
}