1
0
Code Issues Pull Requests Actions Packages Projects Releases Wiki Activity GitHub Gitee

53 lines
1.6 KiB
JavaScript
Raw Permalink Normal View History

2024-06-18 16:57:55 +08:00
/**
* source page: https://www.datalearner.com/ai-models/pretrained-models
* @since 2024.06.18
*/
var content_container = main_div.childNodes[1].childNodes[3]
// console.log('content_container', content_container)
var content_item_list = Array.from(content_container.childNodes)
// console.log('content_item', content_item_list)
content_item_list = content_item_list
.filter(i => i instanceof HTMLElement) // .map(i => i.nodeName)
.filter(i => i.nodeName.toLowerCase() === 'a')
console.log('content_item', content_item_list)
const result = []
for (let content_item of content_item_list) {
const val = content_item.querySelectorAll('h4')
result.push({
model_code: val[0]?.textContent?.trim(),
model_parameter: val[1]?.textContent?.trim(),
release_date: val[2]?.textContent?.trim(),
license: val[3]?.textContent?.trim(),
})
}
console.log('result', result)
// 拼接SQL
var segment = []
// 处理兼容数据-去掉重复模型
var a = []
for (const i of result) {
if (a.includes(i.model_code.toLowerCase())) {
continue
}
a.push(i.model_code.toLowerCase())
// 处理兼容数据 '2022-08-01' 等
if (i.release_date.endsWith('-00')) {
i.release_date = i.release_date.replace('-00', '-01')
}
segment.push(`('(待完善)/${i.model_code}', '${i.model_code}', '${i.model_parameter}亿', '${i.release_date}', '${i.license}')`)
}
var sql = '-- ' + new Date().toLocaleDateString() + '\n'
+ 'INSERT INTO large_model_info (id, model_code, model_parameter, release_date, license) VALUES \n'
+ segment.join(',\n')
console.log('sql', sql)