1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee

添加 assistant 助手;其他调整(大调整)

This commit is contained in:
2022-10-29 00:16:36 +08:00
parent ddde1b28f9
commit ce20720c60
21 changed files with 973 additions and 115 deletions

View File

@@ -0,0 +1,59 @@
// 定时更新 wait 表
// 计算数组差集 a - b
function getDiffSet(a, b) {
// let a = [1, 2, 3];
// let b = [4, 5, 6, 1];
// let c = a.filter(i => b.indexOf(i) == -1);
// console.log(c);
return a.filter(i => b.indexOf(i) == -1);
}
async function migrateIdsFromCheckToFetch(tableName, fieldName, insertSql = null) {
console.log(`更新待爬取列表: ${tableName}`);
let stepLength = 1000;
while (true) {
// 从 check 表中分块查出待处理数据
let idsResult = await dbUtils.query(`SELECT id FROM wait_check_${tableName} LIMIT ${stepLength}`, []);
let ids = idsResult.map(row => row.id);
// console.log("ids", ids);
if (ids.length == 0) {
break;
};
// 查询出已处理的数据
let skipIdsResult = await dbUtils.query(`SELECT ${fieldName} as id FROM ${tableName} WHERE ${fieldName} IN ?`, [[ids]]);
let skipIds = skipIdsResult.map(row => row.id);
// console.log("skipIds", skipIds);
// 剩余要爬取的数据
let finalIds = getDiffSet(ids, skipIds);
// console.log("finalIds", finalIds);
// 插入待爬取列表
if (finalIds.length > 0) {
await dbUtils.query(insertSql ? insertSql : `INSERT IGNORE INTO wait_fetch_${tableName} (id) VALUES ?`, [finalIds.map(id => [id])]);
}
// 从待检查表中删除
if (ids.length > 0)
await dbUtils.query(`DELETE FROM wait_check_${tableName} WHERE id IN ?`, [[ids]]);
console.log(`table: ${tableName} | ${ids[0]} - ${ids.slice(-1)[0]}`)
}
}
async function updateWaitTable() {
await migrateIdsFromCheckToFetch("song", "song_id");
await migrateIdsFromCheckToFetch("lyric", "song_id");
await migrateIdsFromCheckToFetch("comment", "song_id", `INSERT IGNORE INTO comment_progress (song_id) VALUES ?`);
await migrateIdsFromCheckToFetch("album", "album_id");
await migrateIdsFromCheckToFetch("artist", "artist_id");
// comment 搬到 comment_progress
console.log("done.\n");
}
module.exports = {
updateWaitTable,
}

View File

@@ -5,7 +5,7 @@ module.exports = {
insertCollection: async (songInfoList) => {
if (songInfoList.length == 0) return;
// image 因为接口没有返回,所以不更新
return await dbUtils.query(`
let result = await dbUtils.query(`
INSERT INTO song (
song_id, title, type, alias, pop, fee, quality, cd,
no, dj_id, s_id, origin_cover_type, pub_time,
@@ -19,23 +19,27 @@ module.exports = {
songInfo.no, songInfo.djId, songInfo.sId, songInfo.originCoverType, songInfo.pubTime,
songInfo.noCopyrightRcmd, songInfo.mv, songInfo.single, songInfo.version, 2
])]);
await dbUtils.query(`
DELETE FROM wait_fetch_song WHERE id IN ?
`, [[songInfoList.map(songInfo => songInfo.id)]])
return result;
},
getIdsToFetch: async (args) => {
let whereClause = [
args.min ? `song_id > ${args.min}` : '1=1',
args.max ? `song_id <= ${args.max}` : '1=1',
args.min ? `id > ${args.min}` : '1=1',
args.max ? `id <= ${args.max}` : '1=1',
].join(' AND ');
let sql = `
SELECT song_id FROM wait_fetch_song WHERE ${whereClause}
${args.order ? `ORDER BY song_id ${args.order}` : ''}
SELECT id FROM wait_fetch_song WHERE ${whereClause}
${args.order ? `ORDER BY id ${args.order}` : ''}
${args.limit ? `LIMIT ${args.limit}` : ''}
`;
// // 更新现有数据
// 更新现有数据
// sql = `SELECT song_id FROM song WHERE data_version = 1`;
console.log(sql);
let songIds = await dbUtils.query(sql, []);
songIds = songIds.map(item => item.song_id);
songIds = songIds.map(item => item.id);
return songIds;
},
},
@@ -92,7 +96,7 @@ module.exports = {
let artistIds = await dbUtils.query(sql, []);
artistIds = artistIds.map(item => item.artist_id);
return artistIds;
}
},
},
@@ -100,6 +104,22 @@ module.exports = {
insert: async (lyricInfo) => {
return await dbUtils.query('INSERT IGNORE INTO lyric SET ?', lyricInfo);
},
getIdsToFetch: async (args) => {
let whereClause = [
args.min ? `song_id > ${args.min}` : '1=1',
args.max ? `song_id <= ${args.max}` : '1=1',
].join(' AND ');
var sql = `
SELECT song_id FROM wait_fetch_lyric WHERE ${whereClause}
${args.order ? `ORDER BY song_id ${args.order}` : ''}
${args.limit ? `LIMIT ${args.limit}` : ''}
`;
console.log(sql);
let songIds = await dbUtils.query(sql, []);
songIds = songIds.map(song => song.song_id);
return songIds;
},
},
@@ -111,6 +131,22 @@ module.exports = {
ON DUPLICATE KEY UPDATE content = VALUES(content), like_count = VALUES(like_count), comment_type = GREATEST(comment_type, VALUES(comment_type)), modify_time = CURRENT_TIMESTAMP
`, [commentInfoList]);
},
getIdsToFetch: async (args) => {
let whereClause = [
args.min ? `song_id > ${args.min}` : '1=1',
args.max ? `song_id <= ${args.max}` : '1=1',
].join(' AND ');
var sql = `
SELECT song_id FROM comment_progress WHERE ${whereClause} AND current_status != 2
ORDER BY current_status DESC${args.order ? `, song_id ${args.order}` : ''}
${args.limit ? `LIMIT ${args.limit}` : ''}
`;
console.log(sql);
let songIds = await dbUtils.query(sql, []);
songIds = songIds.map(item => item.song_id);
return songIds;
},
},
@@ -175,8 +211,15 @@ module.exports = {
insert: async (type, ids) => {
// 过滤掉 id 为 0 的
ids = ids.filter(id => id < 0);
return await dbUtils.query(`INSERT IGNORE INTO wait_check_${type} (id) VALUES ?`, [ids]);
if (ids.length == 0) return;
return await dbUtils.query(`INSERT IGNORE INTO wait_check_${type} (id) VALUES ?`, [ids.map(id => [id])]);
},
},
wait_fetch: {
deleteCollection: async function (type, ids) {
if (ids.length > 0)
return await dbUtils.query(`DELETE FROM wait_fetch_${type} WHERE id IN ?`, [[ids]]);
}
}
};

View File

@@ -140,17 +140,26 @@ async function fetch({ albumId, debug = false, update = false }) {
};
// console.log("albumInfo", albumInfo);
// 插入待爬取表
await dataManager.wait_check.insert("song", songIds);
await dataManager.wait_check.insert("lyric", songIds);
await dataManager.wait_check.insert("comment", songIds);
// 插入关联关系
if (albumId > 0) {
let songAlbumRel = songIds.map(songId => [songId, albumId]);
await dataManager.song_album.insertCollection(songAlbumRel);
}
// 插入数据
if (update) {
await dataManager.album.update(albumId, albumInfo);
} else {
await dataManager.album.insert(albumInfo);
}
// 从待爬取表中删除记录
await dataManager.wait_fetch.deleteCollection("album", [albumId]);
}
module.exports = {

View File

@@ -96,13 +96,22 @@ async function fetch({ artistId, debug = false }) {
};
// console.log("artistInfo", artistInfo);
// 插入待爬取表
await dataManager.wait_check.insert("song", songIds);
await dataManager.wait_check.insert("lyric", songIds);
await dataManager.wait_check.insert("comment", songIds);
// 插入关联关系
if (artistId > 0) {
let songArtistRel = songIds.map(songId => [songId, artistId]);
await dataManager.song_artist.insertCollection(songArtistRel);
}
// 插入数据
await dataManager.artist.insert(artistInfo);
// 从待爬取表中删除记录
await dataManager.wait_fetch.deleteCollection("artist", [artistId]);
}
module.exports = {

View File

@@ -14,27 +14,12 @@ const { comment_music } = require('NeteaseCloudMusicApi');
async function fetchAll({ args = {} }) {
console.log("start fetching comment ...");
// 首先将需要爬取的song_id导入comment_progress表
await dbUtils.query(`
INSERT IGNORE INTO comment_progress ( song_id )
SELECT DISTINCT song_id FROM song WHERE song_id NOT IN ( SELECT song_id FROM comment_progress )
`, []);
let whereClause = [
args.min ? `song_id > ${args.min}` : '1=1',
args.max ? `song_id <= ${args.max}` : '1=1',
].join(' AND ');
var sql = `
SELECT song_id FROM comment_progress WHERE ${whereClause} AND current_status != 2
ORDER BY current_status DESC${args.order ? `, song_id ${args.order}` : ''}
${args.limit ? `LIMIT ${args.limit}` : ''}
`;
console.log(sql);
// 首先查询有无正在爬取中的记录
var songIds = await dbUtils.query(sql, []);
songIds = songIds.map(item => item.song_id);
// // 首先将需要爬取的song_id导入comment_progress表
// await dbUtils.query(`
// INSERT IGNORE INTO comment_progress ( song_id )
// SELECT song_id FROM wait_fetch_comment WHERE song_id NOT IN ( SELECT song_id FROM comment_progress )
// `, []);
let songIds = await dataManager.comment.getIdsToFetch(args);
for (let i = 0; i < songIds.length; i++) {
await global.checkIsExit();
const songId = songIds[i];

View File

@@ -10,19 +10,7 @@ const dbUtils = global.dbUtils;
// 从数据库中查出还缺少的歌词,并进行爬取
async function fetchAll({ args = {} }) {
console.log("start fetching lyrics ...");
let whereClause = [
args.min ? `song_id > ${args.min}` : '1=1',
args.max ? `song_id <= ${args.max}` : '1=1',
].join(' AND ');
var sql = `
SELECT song_id FROM song WHERE ${whereClause} AND song_id NOT IN ( SELECT song_id FROM lyric )
${args.order ? `ORDER BY song_id ${args.order}` : ''}
${args.limit ? `LIMIT ${args.limit}` : ''}
`;
console.log(sql);
var songIds = await dbUtils.query(sql, []);
songIds = songIds.map(song => song.song_id);
let songIds = await dataManager.lyric.getIdsToFetch(args);
for (let i = 0; i < songIds.length; i++) {
await global.checkIsExit();
const songId = songIds[i];
@@ -78,7 +66,12 @@ async function fetch({ songId, debug = false }) {
version: lyric.version,
};
// console.log("lyricInfo", lyricInfo);
// 插入数据
await dataManager.lyric.insert(lyricInfo);
// 从待爬取表中删除记录
await dataManager.wait_fetch.deleteCollection("lyric", [songId]);
}
module.exports = {

View File

@@ -85,11 +85,19 @@ async function fetch({ songIdArray, debug = false }) {
if (songInfoList.length == 0) return;
console.log("插入数据库");
// 插入待爬取表
await dataManager.wait_check.insert("album", albumIds);
await dataManager.wait_check.insert("artist", artistIds);
// 插入关联关系
await dataManager.song_album.insertCollection(songAlbumRel);
await dataManager.song_artist.insertCollection(songArtistRel);
// 插入数据
await dataManager.song.insertCollection(songInfoList); // image 因为接口没有返回,所以不更新
// 从待爬取表中删除记录
await dataManager.wait_fetch.deleteCollection("song", [songId]);
}
// 获取音乐详情

View File

@@ -6,7 +6,7 @@ const sleepUtils = require('../../utils/sleepUtils');
// 数据库连接池
dbUtils.create({
database: "neteaseMusic", // 指定数据库
database: global.database || "neteasemusic", // 指定数据库
connectionLimit: global.connectionLimit || 10, // 设置数据库连接池数量
});
global.dbUtils = dbUtils;
@@ -20,6 +20,8 @@ const lyricInfoUtils = require('./getInfo/lyricInfoUtils');
const commentUtils = require('./getInfo/commentUtils');
const playlistUtils = require('./getInfo/playlistUtils');
const assistantUtils = require('./assistantUtils');
/**
* 测试
*/
@@ -73,6 +75,10 @@ async function main(args) {
case 'playlist':
await playlistUtils.fetchAll({ args: args });
break;
case 'assistant':
await assistantUtils.updateWaitTable();
break;
default:
console.log("utils参数不匹配退出");
return;