1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee
tools/netease_music/index.js

207 lines
7.9 KiB
JavaScript
Raw Normal View History

2022-09-30 08:06:14 +08:00
const fs = require('fs');
const path = require('path');
2022-10-01 20:35:42 +08:00
const dbUtils = require(global.useMysqlPool ? '../utils/dbPoolUtils' : '../utils/dbUtils');
2022-09-30 08:06:14 +08:00
const requestUtils = require('../utils/requestUtils');
const sleepUtils = require('../utils/sleepUtils');
2022-10-01 19:45:49 +08:00
dbUtils.create({
database: "neteaseMusic", // 指定数据库
connectionLimit: 8, // 设置数据库连接池数量
});
global.dbUtils = dbUtils;
2022-10-01 20:35:42 +08:00
2022-10-01 11:41:20 +08:00
const songInfoUtils = require('./src/getInfo/songInfoUtils');
const artistInfoUtils = require('./src/getInfo/artistInfoUtils');
const albumInfoUtils = require('./src/getInfo/albumInfoUtils');
2022-10-01 21:09:05 +08:00
const lyricInfoUtils = require('./src/getInfo/lyricInfoUtils');
2022-10-01 11:41:20 +08:00
2022-10-01 20:35:42 +08:00
console.log("global.useMysqlPool:", !!global.useMysqlPool);
2022-10-01 11:41:20 +08:00
2022-10-01 19:45:49 +08:00
// 退出检查
async function checkIsExit() {
if (fs.readFileSync('stop.txt') != "1")
return;
console.log();
console.log(`收到退出指令,准备退出...`);
await sleepUtils.sleep(500);
await dbUtils.close();
console.log(`数据库连接池已关闭`);
await sleepUtils.sleep(100);
process.exit(0);
}
2022-10-01 21:09:05 +08:00
// 测试
2022-10-01 19:45:49 +08:00
async function test() {
console.log("neteaseMusic test...");
2022-10-01 21:09:05 +08:00
// getMusicInfo({ songId: "1855221507" });
// getArtistInfo({ artistId: "1079074" });
// getAlbumInfo({ albumId: "74268047" });
2022-10-01 19:45:49 +08:00
2022-10-01 21:09:05 +08:00
// 不是所有歌手都有个人主页 例如 https://music.163.com/#/artist?id=1079075
// getUserInfo({ userId: "37365202" });
// let res = await albumInfoUtils.getFromDatabase({ albumId: "34943450" });
// let res = await artistInfoUtils.getFromDatabase({ artistId: "12023508" });
// let res = await songInfoUtils.getFromDatabase({ songId: "437608327" });
// console.log(res);
2022-10-01 19:45:49 +08:00
}
2022-09-30 08:06:14 +08:00
async function main() {
console.log("neteaseMusic Starting...");
2022-10-01 19:45:49 +08:00
console.log(`数据统计: ${await statistics()}`);
2022-09-30 08:06:14 +08:00
2022-09-30 21:33:46 +08:00
while (true) {
2022-10-01 01:47:27 +08:00
// 删除脏数据
var affectRows1 = await dbUtils.query(`DELETE FROM song_artist_relation WHERE song_id = 0 OR artist_id = 0`, []);
var affectRows2 = await dbUtils.query(`DELETE FROM song_album_relation WHERE song_id = 0 OR album_id = 0`, []);
2022-10-01 19:45:49 +08:00
console.log(`删除脏数据 affectRows:`, affectRows1.affectedRows, affectRows2.affectedRows);
2022-10-01 11:41:20 +08:00
2022-10-01 21:09:05 +08:00
await startGet(100);
2022-10-01 11:41:20 +08:00
await sleepUtils.sleep(2000);
2022-09-30 21:33:46 +08:00
}
2022-09-30 08:06:14 +08:00
}
2022-10-01 18:53:19 +08:00
async function startGet(sleepTime) {
2022-10-01 21:09:05 +08:00
// 从数据库中查出还缺少的歌词,并进行爬取
console.log("start fetching lyrics ...");
var songIds = await dbUtils.query(`
SELECT DISTINCT song_id FROM song WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM lyric )
`, []);
songIds = songIds.map(song => song.song_id);
for (let i = 0; i < songIds.length; i++) {
await checkIsExit();
const songId = songIds[i];
console.log(`${i}/${songIds.length} | lyric: ${songId} | ${await statistics()}`);
try {
await lyricInfoUtils.fetch({ songId: songId });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(sleepTime);
}
2022-09-30 21:33:46 +08:00
// 从数据库中查出还缺少的歌曲,并进行爬取
console.log("start fetching songs ...");
2022-10-01 21:09:05 +08:00
var songIds = await dbUtils.query(`
2022-09-30 21:33:46 +08:00
SELECT DISTINCT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song )
UNION
SELECT DISTINCT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song )
`, []);
songIds = songIds.map(item => item.song_id);
for (let i = 0; i < songIds.length; i++) {
2022-10-01 19:45:49 +08:00
await checkIsExit();
2022-09-30 21:33:46 +08:00
const songId = songIds[i];
console.log(`${i}/${songIds.length} | song: ${songId} | ${await statistics()}`);
2022-10-01 11:41:20 +08:00
try {
2022-10-01 16:16:39 +08:00
await songInfoUtils.fetch({ songId: songId });
2022-10-01 11:41:20 +08:00
} catch (err) {
console.error(err);
}
2022-09-30 21:33:46 +08:00
await sleepUtils.sleep(sleepTime);
}
2022-09-30 08:06:14 +08:00
2022-09-30 21:33:46 +08:00
// 从数据库中查出还缺少的专辑,并进行爬取
console.log("start fetching albums ...")
2022-10-01 21:09:05 +08:00
var albumIds = await dbUtils.query(`
2022-09-30 21:33:46 +08:00
SELECT DISTINCT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT DISTINCT album_id FROM album )
`, []);
albumIds = albumIds.map(item => item.album_id);
for (let i = 0; i < albumIds.length; i++) {
2022-10-01 19:45:49 +08:00
await checkIsExit();
2022-09-30 21:33:46 +08:00
const albumId = albumIds[i];
console.log(`${i}/${albumIds.length} | album: ${albumId} | ${await statistics()}`);
2022-10-01 11:41:20 +08:00
try {
2022-10-01 16:16:39 +08:00
await albumInfoUtils.fetch({ albumId: albumId });
2022-10-01 11:41:20 +08:00
} catch (err) {
console.error(err);
}
2022-09-30 21:33:46 +08:00
await sleepUtils.sleep(sleepTime);
2022-09-30 08:06:14 +08:00
}
2022-09-30 21:33:46 +08:00
// 从数据库中查出还缺少的歌手,并进行爬取
2022-10-01 18:53:19 +08:00
console.log("start fetching artists ...")
2022-10-01 21:09:05 +08:00
var artistIds = await dbUtils.query(`
2022-09-30 21:33:46 +08:00
SELECT DISTINCT artist_id FROM song_artist_relation WHERE artist_id NOT IN ( SELECT DISTINCT artist_id FROM artist )
`, []);
artistIds = artistIds.map(item => item.artist_id);
for (let i = 0; i < artistIds.length; i++) {
2022-10-01 19:45:49 +08:00
await checkIsExit();
2022-09-30 21:33:46 +08:00
const artistId = artistIds[i];
console.log(`${i}/${artistIds.length} | artist: ${artistId} | ${await statistics()}`);
2022-10-01 11:41:20 +08:00
try {
2022-10-01 16:16:39 +08:00
await artistInfoUtils.fetch({ artistId: artistId });
2022-10-01 11:41:20 +08:00
} catch (err) {
console.error(err);
}
2022-09-30 21:33:46 +08:00
await sleepUtils.sleep(sleepTime);
2022-09-30 08:06:14 +08:00
}
}
2022-10-01 18:53:19 +08:00
async function update() {
console.log("neteaseMusic update ...");
2022-10-01 19:45:49 +08:00
console.log(`数据统计: ${await statistics()}`);
2022-10-01 18:53:19 +08:00
let sleepTime = 100;
// 从数据库中查出现有专辑,并进行更新
console.log("start fetching albums ...")
let albumIds = await dbUtils.query(`
SELECT DISTINCT album_id FROM album WHERE version = 1 -- and description like '%专辑《%》,简介:%'
`, []);
albumIds = albumIds.map(item => item.album_id);
for (let i = 0; i < albumIds.length; i++) {
2022-10-01 19:45:49 +08:00
await checkIsExit();
2022-10-01 18:53:19 +08:00
const albumId = albumIds[i];
console.log(`${i}/${albumIds.length} | album: ${albumId} | ${await statistics()}`);
try {
await albumInfoUtils.update({ albumId: albumId });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(sleepTime);
}
}
2022-09-30 21:33:46 +08:00
async function statistics() {
let sql = `
SELECT
song_count,
2022-10-01 21:09:05 +08:00
song_waiting_1 + song_waiting_2 as song_waiting,
2022-09-30 21:33:46 +08:00
album_count,
artist_count,
song_album_count,
song_artist_count
FROM
2022-10-01 21:09:05 +08:00
( SELECT count(*) AS song_count FROM song ) t_song,
( SELECT count(*) AS album_count FROM album ) t_album,
( SELECT count(*) AS artist_count FROM artist ) t_artist,
( SELECT count(*) AS song_album_count FROM song_album_relation ) t_song_album,
( SELECT count(*) AS song_artist_count FROM song_artist_relation ) t_song_artist,
( SELECT count( DISTINCT song_id ) as song_waiting_1 FROM song_artist_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) ) t_song_waiting_song_artist,
( SELECT count( DISTINCT song_id ) as song_waiting_2 FROM song_album_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) ) t_song_waiting_song_album
`;
2022-09-30 21:33:46 +08:00
let result = await dbUtils.query(sql, []);
let songCount = result[0].song_count;
2022-10-01 21:09:05 +08:00
let songWaiting = result[0].song_waiting;
2022-09-30 21:33:46 +08:00
let albumCount = result[0].album_count;
let artistCount = result[0].artist_count;
let songAlbumCount = result[0].song_album_count;
let songArtistCount = result[0].song_artist_count;
2022-10-01 21:09:05 +08:00
return [
`song: ${songCount}/${songCount + songWaiting}`,
`album: ${albumCount}`,
`artist: ${artistCount}`,
`songAlbum: ${songAlbumCount}`,
`songArtist: ${songArtistCount}`
].join(', ');
2022-09-30 21:33:46 +08:00
}
2022-09-30 08:06:14 +08:00
module.exports = {
main: main,
2022-10-01 18:53:19 +08:00
update: update,
test: test,
2022-09-30 08:06:14 +08:00
}