const fs = require('fs'); const path = require('path'); const dbUtils = require(global.useMysqlPool ? '../utils/dbPoolUtils' : '../utils/dbUtils'); const requestUtils = require('../utils/requestUtils'); const sleepUtils = require('../utils/sleepUtils'); dbUtils.create({ database: "neteaseMusic", // 指定数据库 connectionLimit: 8, // 设置数据库连接池数量 }); global.dbUtils = dbUtils; const songInfoUtils = require('./src/getInfo/songInfoUtils'); const artistInfoUtils = require('./src/getInfo/artistInfoUtils'); const albumInfoUtils = require('./src/getInfo/albumInfoUtils'); const lyricInfoUtils = require('./src/getInfo/lyricInfoUtils'); console.log("global.useMysqlPool:", !!global.useMysqlPool); // 退出检查 async function checkIsExit() { if (fs.readFileSync('stop.txt') != "1") return; console.log(); console.log(`收到退出指令,准备退出...`); await sleepUtils.sleep(500); await dbUtils.close(); console.log(`数据库连接池已关闭`); await sleepUtils.sleep(100); process.exit(0); } // 测试 async function test() { console.log("neteaseMusic test..."); // getMusicInfo({ songId: "1855221507" }); // getArtistInfo({ artistId: "1079074" }); // getAlbumInfo({ albumId: "74268047" }); // 不是所有歌手都有个人主页 例如 https://music.163.com/#/artist?id=1079075 // getUserInfo({ userId: "37365202" }); // let res = await albumInfoUtils.getFromDatabase({ albumId: "34943450" }); // let res = await artistInfoUtils.getFromDatabase({ artistId: "12023508" }); // let res = await songInfoUtils.getFromDatabase({ songId: "437608327" }); // console.log(res); } async function main() { console.log("neteaseMusic Starting..."); console.log(`数据统计: ${await statistics()}`); while (true) { // 删除脏数据 var affectRows1 = await dbUtils.query(`DELETE FROM song_artist_relation WHERE song_id = 0 OR artist_id = 0`, []); var affectRows2 = await dbUtils.query(`DELETE FROM song_album_relation WHERE song_id = 0 OR album_id = 0`, []); console.log(`删除脏数据 affectRows:`, affectRows1.affectedRows, affectRows2.affectedRows); await startGet(100); await sleepUtils.sleep(2000); } } async function startGet(sleepTime) { // 从数据库中查出还缺少的歌词,并进行爬取 console.log("start fetching lyrics ..."); var songIds = await dbUtils.query(` SELECT DISTINCT song_id FROM song WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM lyric ) `, []); songIds = songIds.map(song => song.song_id); for (let i = 0; i < songIds.length; i++) { await checkIsExit(); const songId = songIds[i]; console.log(`${i}/${songIds.length} | lyric: ${songId} | ${await statistics()}`); try { await lyricInfoUtils.fetch({ songId: songId }); } catch (err) { console.error(err); } await sleepUtils.sleep(sleepTime); } // 从数据库中查出还缺少的歌曲,并进行爬取 console.log("start fetching songs ..."); var songIds = await dbUtils.query(` SELECT DISTINCT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) UNION SELECT DISTINCT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) `, []); songIds = songIds.map(item => item.song_id); for (let i = 0; i < songIds.length; i++) { await checkIsExit(); const songId = songIds[i]; console.log(`${i}/${songIds.length} | song: ${songId} | ${await statistics()}`); try { await songInfoUtils.fetch({ songId: songId }); } catch (err) { console.error(err); } await sleepUtils.sleep(sleepTime); } // 从数据库中查出还缺少的专辑,并进行爬取 console.log("start fetching albums ...") var albumIds = await dbUtils.query(` SELECT DISTINCT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT DISTINCT album_id FROM album ) `, []); albumIds = albumIds.map(item => item.album_id); for (let i = 0; i < albumIds.length; i++) { await checkIsExit(); const albumId = albumIds[i]; console.log(`${i}/${albumIds.length} | album: ${albumId} | ${await statistics()}`); try { await albumInfoUtils.fetch({ albumId: albumId }); } catch (err) { console.error(err); } await sleepUtils.sleep(sleepTime); } // 从数据库中查出还缺少的歌手,并进行爬取 console.log("start fetching artists ...") var artistIds = await dbUtils.query(` SELECT DISTINCT artist_id FROM song_artist_relation WHERE artist_id NOT IN ( SELECT DISTINCT artist_id FROM artist ) `, []); artistIds = artistIds.map(item => item.artist_id); for (let i = 0; i < artistIds.length; i++) { await checkIsExit(); const artistId = artistIds[i]; console.log(`${i}/${artistIds.length} | artist: ${artistId} | ${await statistics()}`); try { await artistInfoUtils.fetch({ artistId: artistId }); } catch (err) { console.error(err); } await sleepUtils.sleep(sleepTime); } } async function update() { console.log("neteaseMusic update ..."); console.log(`数据统计: ${await statistics()}`); let sleepTime = 100; // 从数据库中查出现有专辑,并进行更新 console.log("start fetching albums ...") let albumIds = await dbUtils.query(` SELECT DISTINCT album_id FROM album WHERE version = 1 -- and description like '%专辑《%》,简介:%' `, []); albumIds = albumIds.map(item => item.album_id); for (let i = 0; i < albumIds.length; i++) { await checkIsExit(); const albumId = albumIds[i]; console.log(`${i}/${albumIds.length} | album: ${albumId} | ${await statistics()}`); try { await albumInfoUtils.update({ albumId: albumId }); } catch (err) { console.error(err); } await sleepUtils.sleep(sleepTime); } } async function statistics() { let sql = ` SELECT song_count, song_waiting_1 + song_waiting_2 as song_waiting, album_count, artist_count, song_album_count, song_artist_count FROM ( SELECT count(*) AS song_count FROM song ) t_song, ( SELECT count(*) AS album_count FROM album ) t_album, ( SELECT count(*) AS artist_count FROM artist ) t_artist, ( SELECT count(*) AS song_album_count FROM song_album_relation ) t_song_album, ( SELECT count(*) AS song_artist_count FROM song_artist_relation ) t_song_artist, ( SELECT count( DISTINCT song_id ) as song_waiting_1 FROM song_artist_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) ) t_song_waiting_song_artist, ( SELECT count( DISTINCT song_id ) as song_waiting_2 FROM song_album_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) ) t_song_waiting_song_album `; let result = await dbUtils.query(sql, []); let songCount = result[0].song_count; let songWaiting = result[0].song_waiting; let albumCount = result[0].album_count; let artistCount = result[0].artist_count; let songAlbumCount = result[0].song_album_count; let songArtistCount = result[0].song_artist_count; return [ `song: ${songCount}/${songCount + songWaiting}`, `album: ${albumCount}`, `artist: ${artistCount}`, `songAlbum: ${songAlbumCount}`, `songArtist: ${songArtistCount}` ].join(', '); } module.exports = { main: main, update: update, test: test, }