77 lines
2.8 KiB
JavaScript
77 lines
2.8 KiB
JavaScript
// 定时更新 wait 表
|
||
|
||
const sleepUtils = require("../../utils/sleepUtils");
|
||
const { fill } = require("../../utils/stringUtils");
|
||
|
||
// 计算数组差集 (a - b)
|
||
function getDiffSet(a, b) {
|
||
// let a = [1, 2, 3];
|
||
// let b = [4, 5, 6, 1];
|
||
// let c = a.filter(i => b.indexOf(i) == -1);
|
||
// console.log(c);
|
||
return a.filter(i => b.indexOf(i) == -1);
|
||
}
|
||
|
||
async function migrateIdsFromCheckToFetch(tableName, fieldName, insertSql = null) {
|
||
// console.log(`更新待爬取列表: ${tableName}`);
|
||
|
||
let stepLength = 5000;
|
||
while (true) {
|
||
// 从 check 表中分块查出待处理数据
|
||
let idsResult = await dbUtils.query(`SELECT id FROM wait_check_${tableName} LIMIT ${stepLength}`, []);
|
||
let ids = idsResult.map(row => row.id);
|
||
// console.log("ids", ids);
|
||
if (ids.length == 0) {
|
||
// console.log(`${tableName} done.`);
|
||
break;
|
||
};
|
||
|
||
// 查询出已处理的数据
|
||
let skipIdsResult = await dbUtils.query(`SELECT ${fieldName} as id FROM ${tableName} WHERE ${fieldName} IN ?`, [[ids]]);
|
||
let skipIds = skipIdsResult.map(row => row.id);
|
||
// console.log("skipIds", skipIds);
|
||
|
||
// 剩余要爬取的数据
|
||
let finalIds = getDiffSet(ids, skipIds);
|
||
// console.log("finalIds", finalIds);
|
||
|
||
// 插入待爬取列表
|
||
if (finalIds.length > 0) {
|
||
var result = await dbUtils.query(insertSql ? insertSql : `INSERT IGNORE INTO wait_fetch_${tableName} (id) VALUES ?`, [finalIds.map(id => [id])]);
|
||
// console.log(result);
|
||
}
|
||
|
||
|
||
// 从待检查表中删除
|
||
if (ids.length > 0)
|
||
await dbUtils.query(`DELETE FROM wait_check_${tableName} WHERE id IN ?`, [[ids]]);
|
||
console.log(`table: ${tableName}\t| ${fill(ids[0], 10)} - ${fill(ids.slice(-1)[0], 10)} ${fill(`(${finalIds.length}/${ids.length})`, 10, ' ', true)}\t| affected: ${result?.affectedRows}`);
|
||
}
|
||
}
|
||
|
||
async function getPromise(tableName, fieldName, insertSql) {
|
||
try {
|
||
return new Promise(async function (resolve) {
|
||
await migrateIdsFromCheckToFetch(tableName, fieldName, insertSql);
|
||
resolve();
|
||
});
|
||
} catch (e) {
|
||
console.error(e);
|
||
await sleepUtils.sleep(10 * 1000);
|
||
}
|
||
}
|
||
async function updateWaitTable() {
|
||
console.log(`更新待爬取列表`);
|
||
await Promise.all([
|
||
getPromise("song", "song_id"),
|
||
getPromise("lyric", "song_id"),
|
||
getPromise("comment", "song_id", `INSERT IGNORE INTO comment_progress (song_id) VALUES ?`),
|
||
getPromise("album", "album_id"),
|
||
getPromise("artist", "artist_id")
|
||
]);
|
||
console.log("All done.\n");
|
||
}
|
||
|
||
module.exports = {
|
||
updateWaitTable,
|
||
} |