2022-10-01 22:00:29 +08:00
|
|
|
// 引入modules
|
2022-09-30 08:06:14 +08:00
|
|
|
const fs = require('fs');
|
|
|
|
const path = require('path');
|
2022-10-01 20:35:42 +08:00
|
|
|
const dbUtils = require(global.useMysqlPool ? '../utils/dbPoolUtils' : '../utils/dbUtils');
|
2022-09-30 08:06:14 +08:00
|
|
|
const sleepUtils = require('../utils/sleepUtils');
|
|
|
|
|
2022-10-01 22:00:29 +08:00
|
|
|
// 数据库连接池
|
2022-10-01 19:45:49 +08:00
|
|
|
dbUtils.create({
|
|
|
|
database: "neteaseMusic", // 指定数据库
|
2022-10-02 01:53:00 +08:00
|
|
|
connectionLimit: global.connectionLimit || 10, // 设置数据库连接池数量
|
2022-10-01 19:45:49 +08:00
|
|
|
});
|
|
|
|
global.dbUtils = dbUtils;
|
2022-10-01 22:00:29 +08:00
|
|
|
console.log("global.useMysqlPool:", !!global.useMysqlPool);
|
|
|
|
|
|
|
|
// 两次请求之间停顿时间
|
2022-10-02 11:45:53 +08:00
|
|
|
global.sleepTime = 300;
|
2022-10-01 20:35:42 +08:00
|
|
|
|
2022-10-01 22:00:29 +08:00
|
|
|
// 引入utils
|
2022-10-01 11:41:20 +08:00
|
|
|
const songInfoUtils = require('./src/getInfo/songInfoUtils');
|
|
|
|
const artistInfoUtils = require('./src/getInfo/artistInfoUtils');
|
|
|
|
const albumInfoUtils = require('./src/getInfo/albumInfoUtils');
|
2022-10-01 21:09:05 +08:00
|
|
|
const lyricInfoUtils = require('./src/getInfo/lyricInfoUtils');
|
2022-10-02 17:37:28 +08:00
|
|
|
const commentUtils = require('./src/getInfo/commentUtils');
|
2022-10-01 11:41:20 +08:00
|
|
|
|
2022-10-01 22:00:29 +08:00
|
|
|
/**
|
|
|
|
* 测试
|
|
|
|
*/
|
2022-10-01 19:45:49 +08:00
|
|
|
async function test() {
|
|
|
|
console.log("neteaseMusic test...");
|
|
|
|
|
2022-10-01 21:09:05 +08:00
|
|
|
// 不是所有歌手都有个人主页 例如 https://music.163.com/#/artist?id=1079075
|
|
|
|
|
2022-10-01 22:00:29 +08:00
|
|
|
// let res = await albumInfoUtils.fetch({ albumId: "9156", debug: true });
|
|
|
|
// let res = await artistInfoUtils.fetch({ artistId: "12023508" });
|
|
|
|
// let res = await songInfoUtils.fetch({ songId: "437608327" });
|
2022-10-01 21:25:37 +08:00
|
|
|
|
|
|
|
// let res = await albumInfoUtils.getFromDatabase({ albumId: "9156" });
|
2022-10-01 21:09:05 +08:00
|
|
|
// let res = await artistInfoUtils.getFromDatabase({ artistId: "12023508" });
|
|
|
|
// let res = await songInfoUtils.getFromDatabase({ songId: "437608327" });
|
2022-10-01 22:00:29 +08:00
|
|
|
|
2022-10-01 21:25:37 +08:00
|
|
|
console.log(res);
|
2022-10-01 19:45:49 +08:00
|
|
|
}
|
|
|
|
|
2022-10-01 22:00:29 +08:00
|
|
|
/**
|
|
|
|
* 主函数
|
|
|
|
*/
|
2022-09-30 08:06:14 +08:00
|
|
|
async function main() {
|
2022-10-02 01:20:48 +08:00
|
|
|
console.log("neteaseMusic Start fetch ...");
|
2022-09-30 21:33:46 +08:00
|
|
|
while (true) {
|
2022-10-02 17:37:28 +08:00
|
|
|
// // 删除脏数据
|
|
|
|
// var affectedRows1 = await dbUtils.query(`DELETE FROM song_artist_relation WHERE song_id = 0 OR artist_id = 0`, []);
|
|
|
|
// var affectedRows2 = await dbUtils.query(`DELETE FROM song_album_relation WHERE song_id = 0 OR album_id = 0`, []);
|
|
|
|
// console.log(`删除脏数据 affectedRows:`, affectedRows1.affectedRows, affectedRows2.affectedRows);
|
2022-10-01 11:41:20 +08:00
|
|
|
|
2022-10-01 22:00:29 +08:00
|
|
|
await songInfoUtils.fetchAll();
|
2022-10-02 01:53:00 +08:00
|
|
|
await albumInfoUtils.fetchAll({});
|
2022-10-01 22:00:29 +08:00
|
|
|
await artistInfoUtils.fetchAll();
|
|
|
|
await lyricInfoUtils.fetchAll();
|
2022-10-02 19:16:41 +08:00
|
|
|
await commentUtils.fetchAll();
|
2022-10-01 11:41:20 +08:00
|
|
|
await sleepUtils.sleep(2000);
|
2022-09-30 21:33:46 +08:00
|
|
|
}
|
2022-09-30 08:06:14 +08:00
|
|
|
}
|
|
|
|
|
2022-10-01 22:00:29 +08:00
|
|
|
/**
|
|
|
|
* 数据更新 (重新爬取)
|
|
|
|
*/
|
2022-10-01 18:53:19 +08:00
|
|
|
async function update() {
|
2022-10-02 01:20:48 +08:00
|
|
|
console.log("neteaseMusic Start update ...");
|
|
|
|
while (true) {
|
|
|
|
await albumInfoUtils.fetchAll({ isUpdate: true });
|
|
|
|
await sleepUtils.sleep(2000);
|
2022-10-01 18:53:19 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-01 22:00:29 +08:00
|
|
|
/**
|
|
|
|
* 统计数据库中数据
|
|
|
|
*/
|
2022-10-02 23:48:07 +08:00
|
|
|
let watchParam = {
|
2022-10-03 01:01:25 +08:00
|
|
|
statisticTime: Date.now(),
|
2022-10-02 23:48:07 +08:00
|
|
|
songCount: 0,
|
|
|
|
albumCount: 0,
|
|
|
|
artistCount: 0,
|
|
|
|
lyricCount: 0,
|
|
|
|
commentCount: 0,
|
|
|
|
commentTotalCount: 0,
|
|
|
|
};
|
2022-10-02 01:20:48 +08:00
|
|
|
async function watch() {
|
2022-09-30 21:33:46 +08:00
|
|
|
let sql = `
|
|
|
|
SELECT
|
|
|
|
song_count,
|
2022-10-01 21:09:05 +08:00
|
|
|
song_waiting_1 + song_waiting_2 as song_waiting,
|
2022-10-01 22:00:29 +08:00
|
|
|
|
2022-09-30 21:33:46 +08:00
|
|
|
album_count,
|
2022-10-01 22:00:29 +08:00
|
|
|
album_waiting,
|
|
|
|
|
2022-09-30 21:33:46 +08:00
|
|
|
artist_count,
|
2022-10-01 22:00:29 +08:00
|
|
|
artist_waiting,
|
|
|
|
|
|
|
|
lyric_count,
|
2022-10-02 23:48:07 +08:00
|
|
|
|
|
|
|
comment_count,
|
|
|
|
comment_total_count,
|
2022-10-01 22:00:29 +08:00
|
|
|
|
2022-09-30 21:33:46 +08:00
|
|
|
song_album_count,
|
|
|
|
song_artist_count
|
|
|
|
FROM
|
2022-10-01 21:09:05 +08:00
|
|
|
( SELECT count(*) AS song_count FROM song ) t_song,
|
2022-10-01 22:00:29 +08:00
|
|
|
( SELECT count( DISTINCT song_id ) as song_waiting_1 FROM song_artist_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) ) t_song_waiting_song_artist,
|
|
|
|
( SELECT count( DISTINCT song_id ) as song_waiting_2 FROM song_album_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) ) t_song_waiting_song_album,
|
|
|
|
|
2022-10-01 21:09:05 +08:00
|
|
|
( SELECT count(*) AS album_count FROM album ) t_album,
|
2022-10-01 22:00:29 +08:00
|
|
|
( SELECT count( DISTINCT album_id ) as album_waiting FROM song_album_relation WHERE album_id NOT IN ( SELECT DISTINCT album_id FROM album ) ) as t_album_waiting_song_album,
|
|
|
|
|
2022-10-01 21:09:05 +08:00
|
|
|
( SELECT count(*) AS artist_count FROM artist ) t_artist,
|
2022-10-01 22:00:29 +08:00
|
|
|
( SELECT count( DISTINCT artist_id ) as artist_waiting FROM song_artist_relation WHERE artist_id NOT IN ( SELECT DISTINCT artist_id FROM artist ) ) as t_album_waiting_song_artist,
|
|
|
|
|
|
|
|
( SELECT count(*) AS lyric_count FROM lyric ) t_lyric,
|
2022-10-02 23:48:07 +08:00
|
|
|
|
|
|
|
( SELECT count( DISTINCT song_id ) AS comment_count, count( comment_id ) AS comment_total_count FROM comment ) t_comment,
|
2022-10-01 22:00:29 +08:00
|
|
|
|
2022-10-01 21:09:05 +08:00
|
|
|
( SELECT count(*) AS song_album_count FROM song_album_relation ) t_song_album,
|
2022-10-01 22:00:29 +08:00
|
|
|
( SELECT count(*) AS song_artist_count FROM song_artist_relation ) t_song_artist
|
2022-10-01 21:09:05 +08:00
|
|
|
`;
|
2022-10-02 23:48:07 +08:00
|
|
|
console.log("开始统计 ...");
|
|
|
|
let startTime = Date.now();
|
2022-09-30 21:33:46 +08:00
|
|
|
let result = await dbUtils.query(sql, []);
|
2022-10-02 23:48:07 +08:00
|
|
|
let timeSpent = Date.now() - startTime;
|
2022-10-01 22:00:29 +08:00
|
|
|
|
2022-09-30 21:33:46 +08:00
|
|
|
let songCount = result[0].song_count;
|
2022-10-01 21:09:05 +08:00
|
|
|
let songWaiting = result[0].song_waiting;
|
2022-10-01 22:00:29 +08:00
|
|
|
|
2022-09-30 21:33:46 +08:00
|
|
|
let albumCount = result[0].album_count;
|
2022-10-01 22:00:29 +08:00
|
|
|
let albumWaiting = result[0].album_waiting;
|
|
|
|
|
2022-09-30 21:33:46 +08:00
|
|
|
let artistCount = result[0].artist_count;
|
2022-10-01 22:00:29 +08:00
|
|
|
let artistWaiting = result[0].artist_waiting;
|
|
|
|
|
|
|
|
let lyricCount = result[0].lyric_count;
|
2022-10-02 23:48:07 +08:00
|
|
|
|
|
|
|
let commentCount = result[0].comment_count;
|
|
|
|
let commentTotalCount = result[0].comment_total_count;
|
2022-10-01 22:00:29 +08:00
|
|
|
|
2022-09-30 21:33:46 +08:00
|
|
|
let songAlbumCount = result[0].song_album_count;
|
|
|
|
let songArtistCount = result[0].song_artist_count;
|
2022-10-03 01:01:25 +08:00
|
|
|
|
|
|
|
let statisticTimeDelta = Date.now() - watchParam.statisticTime;
|
|
|
|
|
2022-10-02 01:20:48 +08:00
|
|
|
let statisticsString = [
|
2022-10-03 01:01:25 +08:00
|
|
|
`[与上次运行统计时相比] deltaTime: ${statisticTimeDelta}ms (${(statisticTimeDelta / 1000).toFixed(2)}s)`,
|
2022-10-02 23:48:07 +08:00
|
|
|
`song: ${songCount - watchParam.songCount}, album: ${albumCount - watchParam.albumCount}, artist: ${artistCount - watchParam.artistCount}, lyric: ${lyricCount - watchParam.lyricCount}, comment: ${commentCount - watchParam.commentCount}(song)/${commentTotalCount - watchParam.commentTotalCount}(comment)`,
|
|
|
|
`[已爬取]`,
|
|
|
|
`song: ${songCount}, album: ${albumCount}, artist: ${artistCount}, lyric: ${lyricCount}, comment: ${commentCount}(song)/${commentTotalCount}(comment)`,
|
|
|
|
`[待爬取]`,
|
|
|
|
`song: ${songWaiting}, album: ${albumWaiting}, artist: ${artistWaiting}, lyric: ${songCount - lyricCount}, comment: ${songCount - commentCount}`,
|
|
|
|
`[总计] (已爬取 + 待爬取)`,
|
|
|
|
`song: ${songCount + songWaiting}, album: ${albumCount + albumWaiting}, artist: ${artistCount + artistWaiting}, lyric: ${songCount}, comment: ${songCount}`,
|
|
|
|
`[关联关系统计]`,
|
|
|
|
`song-album: ${songAlbumCount}, song-artist: ${songArtistCount}`,
|
2022-10-03 01:01:25 +08:00
|
|
|
`time spent: ${timeSpent}ms (${(timeSpent / 1000).toFixed(2)}s)`,
|
2022-10-02 23:48:07 +08:00
|
|
|
``
|
|
|
|
].join('\n');
|
2022-10-02 01:20:48 +08:00
|
|
|
console.log(statisticsString);
|
2022-10-02 23:48:07 +08:00
|
|
|
watchParam = {
|
2022-10-03 01:01:25 +08:00
|
|
|
statisticTime: Date.now(),
|
2022-10-02 23:48:07 +08:00
|
|
|
songCount: songCount,
|
|
|
|
albumCount: albumCount,
|
|
|
|
artistCount: artistCount,
|
|
|
|
lyricCount: lyricCount,
|
|
|
|
commentCount: commentCount,
|
|
|
|
commentTotalCount: commentTotalCount,
|
|
|
|
}
|
2022-09-30 21:33:46 +08:00
|
|
|
}
|
|
|
|
|
2022-10-01 22:00:29 +08:00
|
|
|
/**
|
|
|
|
* 退出程序
|
|
|
|
*/
|
|
|
|
global.checkIsExit = async function () {
|
|
|
|
if (fs.readFileSync('stop.txt') != "1")
|
|
|
|
return;
|
|
|
|
console.log();
|
|
|
|
console.log(`收到退出指令,准备退出...`);
|
|
|
|
await sleepUtils.sleep(500);
|
|
|
|
await dbUtils.close();
|
|
|
|
console.log(`数据库连接池已关闭`);
|
|
|
|
await sleepUtils.sleep(100);
|
|
|
|
process.exit(0);
|
|
|
|
}
|
|
|
|
|
2022-09-30 08:06:14 +08:00
|
|
|
module.exports = {
|
|
|
|
main: main,
|
2022-10-01 18:53:19 +08:00
|
|
|
update: update,
|
2022-10-02 01:20:48 +08:00
|
|
|
watch: watch,
|
2022-10-01 18:53:19 +08:00
|
|
|
test: test,
|
2022-09-30 08:06:14 +08:00
|
|
|
}
|