1
0
Fork 0
Chinese-characters/utils.js

104 lines
3.2 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

const fs = require('fs');
const path = require('path');
const DataFile = path.join(__dirname, './data.json')
const DataFile2 = path.join(__dirname, './data-friendly.json')
const DataFile3 = path.join(__dirname, './data-single-word.txt')
let data = JSON.parse(fs.readFileSync(DataFile, 'utf8'))
function structure() {
return {
// 汉字
word: "",
// 拼音
pinyin: [],
// 部首
radical: [],
// 笔画
stroke: 0,
// 类型 0-未标注 1-常用字 2-生僻字
type: 0,
// 来源从original文件夹中哪个来源录入的
from: [],
// 是否人工确认过
confirm: false,
}
}
function saveData() {
const ObjSort = function (arys) {
var newkey = Object.keys(arys).sort((a, b) => a.localeCompare(b));
var newObj = {}; //创建一个新的对象,用于存放排好序的键值对
for (var i = 0; i < newkey.length; i++) {
newObj[newkey[i]] = arys[newkey[i]];
}
return newObj; //返回排好序的新对象
}
data = ObjSort(data)
fs.writeFileSync(DataFile, JSON.stringify(data), 'utf8')
fs.writeFileSync(DataFile2, JSON.stringify(data, null, 4), 'utf8')
fs.writeFileSync(DataFile3, Object.keys(data).join(''), 'utf8')
}
function addData(wordStruct, saveData = true) {
let word = wordStruct.word
if (Object.keys(data).includes(word)) {
// 已经存在
let struct = data[word]
let isNeedUpdate = false
// 合并拼音
for (let py of wordStruct.pinyin) {
if (!struct.pinyin.includes(py)) {
struct.pinyin.push(py)
isNeedUpdate = true
}
}
struct.pinyin.sort()
// 合并笔画
if (wordStruct.stroke > 0 && wordStruct.stroke != struct.stroke) {
if (struct.stroke != 0) {
console.log(`${wordStruct.word} 字的笔画(stroke)出现歧义 [${struct.stroke}, ${wordStruct.stroke}],当前保存 ${wordStruct.stroke}`)
}
struct.stroke = wordStruct.stroke
}
// 合并类型
if (wordStruct.type > 0 && wordStruct.type != struct.type) {
if (struct.type != 0) {
// 同时在生僻字和常见字出现 记为常见字
console.log(`${wordStruct.word} 字的类型(type)出现歧义 [${struct.type}, ${wordStruct.type}],当前保存 1`)
struct.type = 1
}
struct.type = wordStruct.type
}
// 合并来源
for (let fr of wordStruct.from) {
if (!struct.from.includes(fr)) {
struct.from.push(fr)
isNeedUpdate = true
}
}
struct.from.sort()
if (isNeedUpdate) {
data[word] = struct
saveData && saveData()
}
} else {
// 还不存在
data[wordStruct.word] = wordStruct
isNeedUpdate = true
saveData && saveData()
}
}
module.exports = {
structure,
addData,
saveData
}