添加 assistant 助手;其他调整(大调整)
This commit is contained in:
59
netease_music/src/assistantUtils.js
Normal file
59
netease_music/src/assistantUtils.js
Normal file
@@ -0,0 +1,59 @@
|
||||
// 定时更新 wait 表
|
||||
|
||||
// 计算数组差集 (a - b)
|
||||
function getDiffSet(a, b) {
|
||||
// let a = [1, 2, 3];
|
||||
// let b = [4, 5, 6, 1];
|
||||
// let c = a.filter(i => b.indexOf(i) == -1);
|
||||
// console.log(c);
|
||||
return a.filter(i => b.indexOf(i) == -1);
|
||||
}
|
||||
|
||||
async function migrateIdsFromCheckToFetch(tableName, fieldName, insertSql = null) {
|
||||
console.log(`更新待爬取列表: ${tableName}`);
|
||||
|
||||
let stepLength = 1000;
|
||||
while (true) {
|
||||
// 从 check 表中分块查出待处理数据
|
||||
let idsResult = await dbUtils.query(`SELECT id FROM wait_check_${tableName} LIMIT ${stepLength}`, []);
|
||||
let ids = idsResult.map(row => row.id);
|
||||
// console.log("ids", ids);
|
||||
if (ids.length == 0) {
|
||||
break;
|
||||
};
|
||||
|
||||
// 查询出已处理的数据
|
||||
let skipIdsResult = await dbUtils.query(`SELECT ${fieldName} as id FROM ${tableName} WHERE ${fieldName} IN ?`, [[ids]]);
|
||||
let skipIds = skipIdsResult.map(row => row.id);
|
||||
// console.log("skipIds", skipIds);
|
||||
|
||||
// 剩余要爬取的数据
|
||||
let finalIds = getDiffSet(ids, skipIds);
|
||||
// console.log("finalIds", finalIds);
|
||||
|
||||
// 插入待爬取列表
|
||||
if (finalIds.length > 0) {
|
||||
await dbUtils.query(insertSql ? insertSql : `INSERT IGNORE INTO wait_fetch_${tableName} (id) VALUES ?`, [finalIds.map(id => [id])]);
|
||||
}
|
||||
|
||||
// 从待检查表中删除
|
||||
if (ids.length > 0)
|
||||
await dbUtils.query(`DELETE FROM wait_check_${tableName} WHERE id IN ?`, [[ids]]);
|
||||
console.log(`table: ${tableName} | ${ids[0]} - ${ids.slice(-1)[0]}`)
|
||||
}
|
||||
}
|
||||
|
||||
async function updateWaitTable() {
|
||||
await migrateIdsFromCheckToFetch("song", "song_id");
|
||||
await migrateIdsFromCheckToFetch("lyric", "song_id");
|
||||
await migrateIdsFromCheckToFetch("comment", "song_id", `INSERT IGNORE INTO comment_progress (song_id) VALUES ?`);
|
||||
await migrateIdsFromCheckToFetch("album", "album_id");
|
||||
await migrateIdsFromCheckToFetch("artist", "artist_id");
|
||||
|
||||
// comment 搬到 comment_progress
|
||||
console.log("done.\n");
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
updateWaitTable,
|
||||
}
|
@@ -5,7 +5,7 @@ module.exports = {
|
||||
insertCollection: async (songInfoList) => {
|
||||
if (songInfoList.length == 0) return;
|
||||
// image 因为接口没有返回,所以不更新
|
||||
return await dbUtils.query(`
|
||||
let result = await dbUtils.query(`
|
||||
INSERT INTO song (
|
||||
song_id, title, type, alias, pop, fee, quality, cd,
|
||||
no, dj_id, s_id, origin_cover_type, pub_time,
|
||||
@@ -19,23 +19,27 @@ module.exports = {
|
||||
songInfo.no, songInfo.djId, songInfo.sId, songInfo.originCoverType, songInfo.pubTime,
|
||||
songInfo.noCopyrightRcmd, songInfo.mv, songInfo.single, songInfo.version, 2
|
||||
])]);
|
||||
await dbUtils.query(`
|
||||
DELETE FROM wait_fetch_song WHERE id IN ?
|
||||
`, [[songInfoList.map(songInfo => songInfo.id)]])
|
||||
return result;
|
||||
},
|
||||
|
||||
getIdsToFetch: async (args) => {
|
||||
let whereClause = [
|
||||
args.min ? `song_id > ${args.min}` : '1=1',
|
||||
args.max ? `song_id <= ${args.max}` : '1=1',
|
||||
args.min ? `id > ${args.min}` : '1=1',
|
||||
args.max ? `id <= ${args.max}` : '1=1',
|
||||
].join(' AND ');
|
||||
let sql = `
|
||||
SELECT song_id FROM wait_fetch_song WHERE ${whereClause}
|
||||
${args.order ? `ORDER BY song_id ${args.order}` : ''}
|
||||
SELECT id FROM wait_fetch_song WHERE ${whereClause}
|
||||
${args.order ? `ORDER BY id ${args.order}` : ''}
|
||||
${args.limit ? `LIMIT ${args.limit}` : ''}
|
||||
`;
|
||||
// // 更新现有数据
|
||||
// 更新现有数据
|
||||
// sql = `SELECT song_id FROM song WHERE data_version = 1`;
|
||||
console.log(sql);
|
||||
let songIds = await dbUtils.query(sql, []);
|
||||
songIds = songIds.map(item => item.song_id);
|
||||
songIds = songIds.map(item => item.id);
|
||||
return songIds;
|
||||
},
|
||||
},
|
||||
@@ -92,7 +96,7 @@ module.exports = {
|
||||
let artistIds = await dbUtils.query(sql, []);
|
||||
artistIds = artistIds.map(item => item.artist_id);
|
||||
return artistIds;
|
||||
}
|
||||
},
|
||||
},
|
||||
|
||||
|
||||
@@ -100,6 +104,22 @@ module.exports = {
|
||||
insert: async (lyricInfo) => {
|
||||
return await dbUtils.query('INSERT IGNORE INTO lyric SET ?', lyricInfo);
|
||||
},
|
||||
|
||||
getIdsToFetch: async (args) => {
|
||||
let whereClause = [
|
||||
args.min ? `song_id > ${args.min}` : '1=1',
|
||||
args.max ? `song_id <= ${args.max}` : '1=1',
|
||||
].join(' AND ');
|
||||
var sql = `
|
||||
SELECT song_id FROM wait_fetch_lyric WHERE ${whereClause}
|
||||
${args.order ? `ORDER BY song_id ${args.order}` : ''}
|
||||
${args.limit ? `LIMIT ${args.limit}` : ''}
|
||||
`;
|
||||
console.log(sql);
|
||||
let songIds = await dbUtils.query(sql, []);
|
||||
songIds = songIds.map(song => song.song_id);
|
||||
return songIds;
|
||||
},
|
||||
},
|
||||
|
||||
|
||||
@@ -111,6 +131,22 @@ module.exports = {
|
||||
ON DUPLICATE KEY UPDATE content = VALUES(content), like_count = VALUES(like_count), comment_type = GREATEST(comment_type, VALUES(comment_type)), modify_time = CURRENT_TIMESTAMP
|
||||
`, [commentInfoList]);
|
||||
},
|
||||
|
||||
getIdsToFetch: async (args) => {
|
||||
let whereClause = [
|
||||
args.min ? `song_id > ${args.min}` : '1=1',
|
||||
args.max ? `song_id <= ${args.max}` : '1=1',
|
||||
].join(' AND ');
|
||||
var sql = `
|
||||
SELECT song_id FROM comment_progress WHERE ${whereClause} AND current_status != 2
|
||||
ORDER BY current_status DESC${args.order ? `, song_id ${args.order}` : ''}
|
||||
${args.limit ? `LIMIT ${args.limit}` : ''}
|
||||
`;
|
||||
console.log(sql);
|
||||
let songIds = await dbUtils.query(sql, []);
|
||||
songIds = songIds.map(item => item.song_id);
|
||||
return songIds;
|
||||
},
|
||||
},
|
||||
|
||||
|
||||
@@ -175,8 +211,15 @@ module.exports = {
|
||||
insert: async (type, ids) => {
|
||||
// 过滤掉 id 为 0 的
|
||||
ids = ids.filter(id => id < 0);
|
||||
return await dbUtils.query(`INSERT IGNORE INTO wait_check_${type} (id) VALUES ?`, [ids]);
|
||||
if (ids.length == 0) return;
|
||||
return await dbUtils.query(`INSERT IGNORE INTO wait_check_${type} (id) VALUES ?`, [ids.map(id => [id])]);
|
||||
},
|
||||
},
|
||||
|
||||
wait_fetch: {
|
||||
deleteCollection: async function (type, ids) {
|
||||
if (ids.length > 0)
|
||||
return await dbUtils.query(`DELETE FROM wait_fetch_${type} WHERE id IN ?`, [[ids]]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@@ -140,17 +140,26 @@ async function fetch({ albumId, debug = false, update = false }) {
|
||||
};
|
||||
// console.log("albumInfo", albumInfo);
|
||||
|
||||
// 插入待爬取表
|
||||
await dataManager.wait_check.insert("song", songIds);
|
||||
await dataManager.wait_check.insert("lyric", songIds);
|
||||
await dataManager.wait_check.insert("comment", songIds);
|
||||
|
||||
// 插入关联关系
|
||||
if (albumId > 0) {
|
||||
let songAlbumRel = songIds.map(songId => [songId, albumId]);
|
||||
await dataManager.song_album.insertCollection(songAlbumRel);
|
||||
}
|
||||
|
||||
// 插入数据
|
||||
if (update) {
|
||||
await dataManager.album.update(albumId, albumInfo);
|
||||
} else {
|
||||
await dataManager.album.insert(albumInfo);
|
||||
}
|
||||
|
||||
// 从待爬取表中删除记录
|
||||
await dataManager.wait_fetch.deleteCollection("album", [albumId]);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
@@ -96,13 +96,22 @@ async function fetch({ artistId, debug = false }) {
|
||||
};
|
||||
// console.log("artistInfo", artistInfo);
|
||||
|
||||
// 插入待爬取表
|
||||
await dataManager.wait_check.insert("song", songIds);
|
||||
await dataManager.wait_check.insert("lyric", songIds);
|
||||
await dataManager.wait_check.insert("comment", songIds);
|
||||
|
||||
// 插入关联关系
|
||||
if (artistId > 0) {
|
||||
let songArtistRel = songIds.map(songId => [songId, artistId]);
|
||||
await dataManager.song_artist.insertCollection(songArtistRel);
|
||||
}
|
||||
|
||||
// 插入数据
|
||||
await dataManager.artist.insert(artistInfo);
|
||||
|
||||
// 从待爬取表中删除记录
|
||||
await dataManager.wait_fetch.deleteCollection("artist", [artistId]);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
@@ -14,27 +14,12 @@ const { comment_music } = require('NeteaseCloudMusicApi');
|
||||
|
||||
async function fetchAll({ args = {} }) {
|
||||
console.log("start fetching comment ...");
|
||||
// 首先将需要爬取的song_id导入comment_progress表
|
||||
await dbUtils.query(`
|
||||
INSERT IGNORE INTO comment_progress ( song_id )
|
||||
SELECT DISTINCT song_id FROM song WHERE song_id NOT IN ( SELECT song_id FROM comment_progress )
|
||||
`, []);
|
||||
|
||||
let whereClause = [
|
||||
args.min ? `song_id > ${args.min}` : '1=1',
|
||||
args.max ? `song_id <= ${args.max}` : '1=1',
|
||||
].join(' AND ');
|
||||
var sql = `
|
||||
SELECT song_id FROM comment_progress WHERE ${whereClause} AND current_status != 2
|
||||
ORDER BY current_status DESC${args.order ? `, song_id ${args.order}` : ''}
|
||||
${args.limit ? `LIMIT ${args.limit}` : ''}
|
||||
`;
|
||||
console.log(sql);
|
||||
|
||||
// 首先查询有无正在爬取中的记录
|
||||
var songIds = await dbUtils.query(sql, []);
|
||||
songIds = songIds.map(item => item.song_id);
|
||||
|
||||
// // 首先将需要爬取的song_id导入comment_progress表
|
||||
// await dbUtils.query(`
|
||||
// INSERT IGNORE INTO comment_progress ( song_id )
|
||||
// SELECT song_id FROM wait_fetch_comment WHERE song_id NOT IN ( SELECT song_id FROM comment_progress )
|
||||
// `, []);
|
||||
let songIds = await dataManager.comment.getIdsToFetch(args);
|
||||
for (let i = 0; i < songIds.length; i++) {
|
||||
await global.checkIsExit();
|
||||
const songId = songIds[i];
|
||||
|
@@ -10,19 +10,7 @@ const dbUtils = global.dbUtils;
|
||||
// 从数据库中查出还缺少的歌词,并进行爬取
|
||||
async function fetchAll({ args = {} }) {
|
||||
console.log("start fetching lyrics ...");
|
||||
let whereClause = [
|
||||
args.min ? `song_id > ${args.min}` : '1=1',
|
||||
args.max ? `song_id <= ${args.max}` : '1=1',
|
||||
].join(' AND ');
|
||||
var sql = `
|
||||
SELECT song_id FROM song WHERE ${whereClause} AND song_id NOT IN ( SELECT song_id FROM lyric )
|
||||
${args.order ? `ORDER BY song_id ${args.order}` : ''}
|
||||
${args.limit ? `LIMIT ${args.limit}` : ''}
|
||||
`;
|
||||
console.log(sql);
|
||||
|
||||
var songIds = await dbUtils.query(sql, []);
|
||||
songIds = songIds.map(song => song.song_id);
|
||||
let songIds = await dataManager.lyric.getIdsToFetch(args);
|
||||
for (let i = 0; i < songIds.length; i++) {
|
||||
await global.checkIsExit();
|
||||
const songId = songIds[i];
|
||||
@@ -78,7 +66,12 @@ async function fetch({ songId, debug = false }) {
|
||||
version: lyric.version,
|
||||
};
|
||||
// console.log("lyricInfo", lyricInfo);
|
||||
|
||||
// 插入数据
|
||||
await dataManager.lyric.insert(lyricInfo);
|
||||
|
||||
// 从待爬取表中删除记录
|
||||
await dataManager.wait_fetch.deleteCollection("lyric", [songId]);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
@@ -85,11 +85,19 @@ async function fetch({ songIdArray, debug = false }) {
|
||||
if (songInfoList.length == 0) return;
|
||||
|
||||
console.log("插入数据库");
|
||||
// 插入待爬取表
|
||||
await dataManager.wait_check.insert("album", albumIds);
|
||||
await dataManager.wait_check.insert("artist", artistIds);
|
||||
|
||||
// 插入关联关系
|
||||
await dataManager.song_album.insertCollection(songAlbumRel);
|
||||
await dataManager.song_artist.insertCollection(songArtistRel);
|
||||
|
||||
// 插入数据
|
||||
await dataManager.song.insertCollection(songInfoList); // image 因为接口没有返回,所以不更新
|
||||
|
||||
// 从待爬取表中删除记录
|
||||
await dataManager.wait_fetch.deleteCollection("song", [songId]);
|
||||
}
|
||||
|
||||
// 获取音乐详情
|
||||
|
@@ -6,7 +6,7 @@ const sleepUtils = require('../../utils/sleepUtils');
|
||||
|
||||
// 数据库连接池
|
||||
dbUtils.create({
|
||||
database: "neteaseMusic", // 指定数据库
|
||||
database: global.database || "neteasemusic", // 指定数据库
|
||||
connectionLimit: global.connectionLimit || 10, // 设置数据库连接池数量
|
||||
});
|
||||
global.dbUtils = dbUtils;
|
||||
@@ -20,6 +20,8 @@ const lyricInfoUtils = require('./getInfo/lyricInfoUtils');
|
||||
const commentUtils = require('./getInfo/commentUtils');
|
||||
const playlistUtils = require('./getInfo/playlistUtils');
|
||||
|
||||
const assistantUtils = require('./assistantUtils');
|
||||
|
||||
/**
|
||||
* 测试
|
||||
*/
|
||||
@@ -73,6 +75,10 @@ async function main(args) {
|
||||
case 'playlist':
|
||||
await playlistUtils.fetchAll({ args: args });
|
||||
break;
|
||||
|
||||
case 'assistant':
|
||||
await assistantUtils.updateWaitTable();
|
||||
break;
|
||||
default:
|
||||
console.log("utils参数不匹配,退出");
|
||||
return;
|
||||
|
Reference in New Issue
Block a user