From 7edf6f8d139d136be5d75b8635f016aaaa0e9711 Mon Sep 17 00:00:00 2001 From: zhangbk1 Date: Tue, 2 Apr 2024 17:39:23 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=9E=E7=8E=B0=E9=80=89=E4=B8=AD=E5=AD=97?= =?UTF-8?q?=E7=AC=A6=E4=B8=B2=E5=88=86=E8=AF=8D=20=E4=BE=8B=E5=A6=82=20Tom?= =?UTF-8?q?Likes=20eat=20iceCream.=20=E5=88=86=E8=AF=8D=E4=B8=BA=20tom|lik?= =?UTF-8?q?es|eat|ice|cream|.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/extension.ts | 6 +- src/main-code/text-conversion.ts | 8 ++ src/main-code/text-split.ts | 121 +++++++++++-------------------- src/test/test-case.ts | 57 ++++++++++++++- src/test/test-text-split.js | 120 ++++++++++++++++++++++++++++++ 5 files changed, 231 insertions(+), 81 deletions(-) create mode 100644 src/test/test-text-split.js diff --git a/src/extension.ts b/src/extension.ts index 3968e30..fea19bd 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -3,7 +3,7 @@ import * as vscode from 'vscode'; import * as TextConversion from './main-code/text-conversion'; -type ConvertFunction = (selectionText: string) => string | undefined; +type ConvertFunction = (selectionText: string) => string; // This method is called when your extension is activated // Your extension is activated the very first time the command is executed @@ -28,7 +28,7 @@ export function activate(context: vscode.ExtensionContext) { if (!editor) { return; } - + console.log('============ start convert ============'); let document = editor.document; let selection = editor.selection; @@ -46,7 +46,7 @@ export function activate(context: vscode.ExtensionContext) { return; } - // 当转换后文本与转换前相同时,跳过转换 + // 当转换后文本与转换前相同时,跳过转换,避免形成 Ctrl + Z 撤销历史记录 if (converted === text) { console.log('selection text is same to converted text, skip replace contents.'); return; diff --git a/src/main-code/text-conversion.ts b/src/main-code/text-conversion.ts index 2c403a2..32483ec 100644 --- a/src/main-code/text-conversion.ts +++ b/src/main-code/text-conversion.ts @@ -1,3 +1,5 @@ +import { transformText } from './text-split'; + /** * 转小驼峰 to Camel Case * @@ -6,6 +8,12 @@ * @since 2024-03-28 */ export function toCamelCase(str: string): string { + // 切割文本 + const result = transformText(str); + console.log('result', result); + + // TODO + return str.replace(/_([a-z])/g, (g) => g[1].toUpperCase()); } diff --git a/src/main-code/text-split.ts b/src/main-code/text-split.ts index 52e2297..53fd2cd 100644 --- a/src/main-code/text-split.ts +++ b/src/main-code/text-split.ts @@ -1,89 +1,56 @@ -const handlerList = []; -/** - * 小驼峰处理中间件 - * - * @param str - * @since 2024-03-29 - */ -const camelCaseHandler = (str: string) => { - // 是否是小驼峰 - const regexp = /^$/g; // need done - // if() -}; -handlerList.push(camelCaseHandler); - -/** - * 小驼峰处理中间件 - * - * @param str - * @since 2024-03-29 - */ -const pascalCaseHandler = (str: string) => { - // 是否是小驼峰 - const regexp = /^$/g; // need done -}; -handlerList.push(pascalCaseHandler); - -type SplitFailResult = { - success: false - errMsg: string -}; - -type SplitSuccessResult = { - success: true - result: Array -}; - -type SplitResult = SplitFailResult | SplitSuccessResult; +const logDebugInfo = false; /** * 分词 * * @param str - * @since 2024-03-29 + * @since 2024-04-02 */ -export function splitWord(str: string): SplitResult { - // check parameter type - if (typeof str !== 'string') { - return { success: false, errMsg: `str is not string, type: ${typeof str}` }; - } +export function transformText(input: string): string { + logDebugInfo && console.log('input ', '->' + input + '<-'); - // check parameter length - if (str.length === 0) { - return { success: false, errMsg: 'str is empty string.' }; - } - else if (str.length > 64) { - return { success: false, errMsg: 'str is too long, it does not appear to be an acceptable input.' }; - } + // 记录首尾空格 + const leadingSpaces = input.match(/^ +/); + const trailingSpaces = input.match(/ +$/); - // check whether the input matches the criteria - // 是否包含空格 - const isContainSpace = str.indexOf(' ') !== -1; - // 是否包含连字符 - const isContainHyphen = str.indexOf('-') !== -1; - // 是否包含下划线 - const isContainUnderline = str.indexOf('_') !== -1; - // 是否包含除空格外的其他连字符 (检查字符串是否包含 - 或 _ ,并且不包含空格) - const isContainSeparator = /^[^\s]*[-_]+[^\s]*$/.test(str); + // 去除首尾空格 + input = input.trim(); - // 是否是小驼峰命名法 - const isCamelCase = /^[a-z][a-zA-Z]*$/; - // 是否是大驼峰命名法 - const isPascalCase = /^[A-Z][a-zA-Z]*$/; - // 是否包含大写字母 - const isContainUpperCaseLetter = /[A-Z]/.test(str); - // 是否包含小写字母 - const isContainLowerCaseLetter = /[a-z]/.test(str); - // 是否包含字母 - const isContainLetter = /[a-zA-Z]/.test(str); + // 使用正则表达式匹配中英文字母、连字符、下划线和空格 + let result = input.replace(/([A-Za-z\-_ ]+)/g, (match: string) => { - return { success: true, result: [] }; + // 替换连字符为 '|' (如有多个则合并) + match = match.replace(/[-_ ]+/g, '|'); + + // 拆分连续的小写字母和大写字母为多个单词 + match = match.replace(/([a-z])([A-Z])/g, '$1|$2'); + + // 分割 + let words = match.split('|'); + + // 处理特殊情况,如 'ENFADADO' 不应该被拆分 + words = words.map(word => { + if (word.toUpperCase() === word && word.length > 1) { + return word.toLowerCase(); + } + return word.replace(/([A-Z])/g, '|$1').toLowerCase(); + }); + + // 重新组合单词 + return '|' + words.join('|') + '|'; + }); + + // 如果有多个 | 将其合并 + result = result.replace(/[\|]+/g, '|'); + + // 如果首尾有 | 将其替换掉 + result = result.replace(/(^[\|]+|[\|]+$)/g, ''); + + // 还原首尾空格 + // result = (leadingSpaces ? (leadingSpaces[0] + '|') : '') + result + (trailingSpaces ? ('|' + trailingSpaces[0]) : ''); + result = (leadingSpaces ? leadingSpaces[0] : '') + result + (trailingSpaces ? trailingSpaces[0] : ''); + + logDebugInfo && console.log('output ', '->' + result + '<-'); + return result; } - -const result = splitWord('hello world'); -if (result.success) { - console.log('success!', result.result); -} else { - console.log('skip!', result.errMsg); -} \ No newline at end of file diff --git a/src/test/test-case.ts b/src/test/test-case.ts index 8e89f33..1c9560c 100644 --- a/src/test/test-case.ts +++ b/src/test/test-case.ts @@ -49,7 +49,7 @@ const testCase: Array = [ }, { input: - `今天是星期日` + '今天是星期日' , isSkip: true, skipReason: SkipReason.NOT_CONTAIN_LETTERS @@ -125,4 +125,59 @@ const testCase: Array = [ pascalCase: ' A NiceDay', } }, + { + input: + ' Julius_Caesar, William_Shakespeare, Albert_Einstein, Marie_Curie, WolfgangAmadeusMozart, Vincent-van-Gogh. ' + , + isSkip: false, + splitResult: [], + output: { + camelCase: '', + pascalCase: '', + } + }, + { + input: + '🥰 a-cup/_of Coffee🍻,-_please!. ' + , + isSkip: false, + splitResult: [], + output: { + camelCase: '', + pascalCase: '', + } + }, + { + input: + ' NHDAs--+90-usz&* ' + , + isSkip: false, + splitResult: [], + output: { + camelCase: '', + pascalCase: '', + } + }, + { + input: + '--担心你鸿dAf_=coffee—_— ' + , + isSkip: false, + splitResult: [], + output: { + camelCase: '', + pascalCase: '', + } + }, + { + input: + 'fsdi_sdacsaf+desd' + , + isSkip: false, + splitResult: [], + output: { + camelCase: '', + pascalCase: '', + } + }, ]; diff --git a/src/test/test-text-split.js b/src/test/test-text-split.js new file mode 100644 index 0000000..b4c1ae5 --- /dev/null +++ b/src/test/test-text-split.js @@ -0,0 +1,120 @@ +// 请帮我写一个 js 函数,实现如下功能:建议使用正则实现,难以实现的可以使用代码逻辑配合操作 +// 统一将所有单词转为小写,连字符转换为|便于后续操作 +// 具体转换逻辑: +// - 如果有多个连字符,将其合并为1个连字符,例如 ice-_-cream -> ice|cream +// - 如果连续小写字母存在大写字母,将其拆分为多个单词,例如 TomLikes eat iceCream. -> tom|likes|eat|ice|cream|. +// - 对于1个或连续多个除了A-Z a-z - _ 空格等的特殊字符,或表情符号等,不做处理,将其视为独立单词,例如 +// takeARest😊haPPy,😢triSTE,ENFADADO, 驚きました,❤️, 笑, 😎COol, 😳-Embarrassed +// -> take|a|rest|😊|ha|p|py|,😢|tri|s|t|e|,|enfadado|,| |驚きました,❤️,| |笑|,| |😎|c|ool|,| |😳|embarrassed + +// 注意,你不可以直接用toLowerCase,因为TomLikes eat iceCream. 这种情况下iceCream变成icecream就无法分词了 +// 另外,通过检查每个字母,如果它是大写的,我们就在它前面添加一个分隔符,然后再将整个字符串转换为小写,这样也有点问题,比如ENFADADO就会被分开。 +// 需要实现 takeARest-> take|a|rest , triSTE -> tri|s|t|e , ENFADADO -> enfadado , COol -> c|ool 或许按照单词首尾字母大小写判断可以解决这个问题? + +// 参考思路:只操作修改其中符合我们替换条件的部分,主要是中英文字母-_和空格,可以用正则匹配出来逐一进行操作后再回填回去,对于其他字符部分不做操作? + + +// 请将如下代码改写成 TypeScript 的格式,并移除 logDebugInfo 参数,需要保留注释内容 + +const logDebugInfo = false; + +function transformText(input) { + console.log(); + console.log('input ', '->' + input + '<-'); + + // 记录首尾空格 + const leadingSpaces = input.match(/^ +/); + const trailingSpaces = input.match(/ +$/); + + // 去除首尾空格 + input = input.trim(); + logDebugInfo && console.log('Trimmed input', input); + + // 使用正则表达式匹配中英文字母、连字符、下划线和空格 + let result = input.replace(/([A-Za-z\-_ ]+)/g, (match) => { + logDebugInfo && console.log('callback', match); + + // 替换连字符为 '|' (如有多个则合并) + match = match.replace(/[-_ ]+/g, '|'); + logDebugInfo && console.log('match', match); + + // 拆分连续的小写字母和大写字母为多个单词 + match = match.replace(/([a-z])([A-Z])/g, '$1|$2'); + logDebugInfo && console.log('match', match); + + // 分割 + let words = match.split('|'); + logDebugInfo && console.log('words', words); + + // 处理特殊情况,如 'ENFADADO' 不应该被拆分 + words = words.map(word => { + // if (word.toUpperCase() === word) { + // return word.toLowerCase(); + // } + // return word/*.replace(/([A-Z])/g, '|$1')*/.toLowerCase(); + if (word.toUpperCase() === word && word.length > 1) { + return word.toLowerCase(); + } + return word.replace(/([A-Z])/g, '|$1').toLowerCase(); + }); + logDebugInfo && console.log('words', words); + + // 重新组合单词 + return '|' + words.join('|') + '|'; + }); + logDebugInfo && console.log('result', result); + + // 如果有多个 | 将其合并 + result = result.replace(/[\|]+/g, '|'); + logDebugInfo && console.log('result', result); + + // 如果首尾有 | 将其替换掉 + result = result.replace(/(^[\|]+|[\|]+$)/g, ''); + logDebugInfo && console.log('result', result); + + // 还原首尾空格 + // result = (leadingSpaces ? (leadingSpaces[0] + '|') : '') + result + (trailingSpaces ? ('|' + trailingSpaces[0]) : ''); + result = (leadingSpaces ? leadingSpaces[0] : '') + result + (trailingSpaces ? trailingSpaces[0] : ''); + logDebugInfo && console.log('Final result', result); + + console.log('output ', '->' + result + '<-'); + return result; +} + +// 示例用法 +transformText(' ANiceDay!'); +transformText(' A----NiCe_Day_-_-- \''); +transformText('TomLikes eat iceCream.'); +transformText('takeARest😊haPPy,😢triSTE,ENFADADO, 驚きました,❤️, 笑, 😎COol, 😳-Embarrassed'); +transformText(' Julius_Caesar, William_Shakespeare, Albert_Einstein, Marie_Curie, WolfgangAmadeusMozart, Vincent-van-Gogh. '); +transformText(' 🥰 a-cup/_of Coffee🍻,-_please!. '); +transformText('--担心你dAf_=coffee—爸妈不在家_— '); +transformText(' NHDAs--+90-usz&* '); + +/* 输出: + +input -> ANiceDay!<- +output -> a|nice|day|!<- + +input -> A----NiCe_Day_-_-- '<- +output -> a|ni|ce|day|'<- + +input ->TomLikes eat iceCream.<- +output ->tom|likes|eat|ice|cream|.<- + +input ->takeARest😊haPPy,😢triSTE,ENFADADO, 驚きました,❤️, 笑, 😎COol, 😳-Embarrassed<- +output ->take|a|rest|😊|ha|p|py|,😢|tri|ste|,|enfadado|,|驚きました,❤️,|笑,|😎|c|ool|,|😳|embarrassed<- + +input -> Julius_Caesar, William_Shakespeare, Albert_Einstein, Marie_Curie, WolfgangAmadeusMozart, Vincent-van-Gogh. <- +output -> julius|caesar|,|william|shakespeare|,|albert|einstein|,|marie|curie|,|wolfgang|amadeus|mozart|,|vincent|van|gogh|. <- + +input -> 🥰 a-cup/_of Coffee🍻,-_please!. <- +output -> 🥰|a|cup|/|of|coffee|🍻,|please|!. <- + +input ->--担心你dAf_=coffee—爸妈不在家_— <- +output ->担心你|d|af|=|coffee|—爸妈不在家|— <- + +input -> NHDAs--+90-usz&* <- +output -> n|h|d|as|+90|usz|&* <- + +*/ \ No newline at end of file