diff --git a/netease_music/index.js b/netease_music/index.js index eba2841..78098a6 100644 --- a/netease_music/index.js +++ b/netease_music/index.js @@ -14,6 +14,7 @@ global.dbUtils = dbUtils; const songInfoUtils = require('./src/getInfo/songInfoUtils'); const artistInfoUtils = require('./src/getInfo/artistInfoUtils'); const albumInfoUtils = require('./src/getInfo/albumInfoUtils'); +const lyricInfoUtils = require('./src/getInfo/lyricInfoUtils'); console.log("global.useMysqlPool:", !!global.useMysqlPool); @@ -30,27 +31,10 @@ async function checkIsExit() { process.exit(0); } +// 测试 async function test() { console.log("neteaseMusic test..."); - let songIds = (await dbUtils.query(` - SELECT DISTINCT song_id FROM song_lyric WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) - `, [])).map(song => song.song_id); - - for (let songId of songIds) { - var url = `https://music.163.com/api/song/lyric?id=${songId}&lv=1`; - var json = await requestUtils.getApiResult(url); - var lyric = JSON.parse(json).lrc; - console.log(lyric); - } - // fs.writeFileSync(path.join(__dirname, "../../temp", `album-${albumId}.html`), html); -} - - -async function main() { - console.log("neteaseMusic Starting..."); - console.log(`数据统计: ${await statistics()}`); - // getMusicInfo({ songId: "1855221507" }); // getArtistInfo({ artistId: "1079074" }); // getAlbumInfo({ albumId: "74268047" }); @@ -58,21 +42,51 @@ async function main() { // 不是所有歌手都有个人主页 例如 https://music.163.com/#/artist?id=1079075 // getUserInfo({ userId: "37365202" }); + // let res = await albumInfoUtils.getFromDatabase({ albumId: "34943450" }); + // let res = await artistInfoUtils.getFromDatabase({ artistId: "12023508" }); + // let res = await songInfoUtils.getFromDatabase({ songId: "437608327" }); + // console.log(res); +} + + +async function main() { + console.log("neteaseMusic Starting..."); + console.log(`数据统计: ${await statistics()}`); + while (true) { // 删除脏数据 var affectRows1 = await dbUtils.query(`DELETE FROM song_artist_relation WHERE song_id = 0 OR artist_id = 0`, []); var affectRows2 = await dbUtils.query(`DELETE FROM song_album_relation WHERE song_id = 0 OR album_id = 0`, []); console.log(`删除脏数据 affectRows:`, affectRows1.affectedRows, affectRows2.affectedRows); - await startGet(1); + await startGet(100); await sleepUtils.sleep(2000); } } async function startGet(sleepTime) { + + // 从数据库中查出还缺少的歌词,并进行爬取 + console.log("start fetching lyrics ..."); + var songIds = await dbUtils.query(` + SELECT DISTINCT song_id FROM song WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM lyric ) + `, []); + songIds = songIds.map(song => song.song_id); + for (let i = 0; i < songIds.length; i++) { + await checkIsExit(); + const songId = songIds[i]; + console.log(`${i}/${songIds.length} | lyric: ${songId} | ${await statistics()}`); + try { + await lyricInfoUtils.fetch({ songId: songId }); + } catch (err) { + console.error(err); + } + await sleepUtils.sleep(sleepTime); + } + // 从数据库中查出还缺少的歌曲,并进行爬取 console.log("start fetching songs ..."); - let songIds = await dbUtils.query(` + var songIds = await dbUtils.query(` SELECT DISTINCT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) UNION SELECT DISTINCT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) @@ -92,7 +106,7 @@ async function startGet(sleepTime) { // 从数据库中查出还缺少的专辑,并进行爬取 console.log("start fetching albums ...") - let albumIds = await dbUtils.query(` + var albumIds = await dbUtils.query(` SELECT DISTINCT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT DISTINCT album_id FROM album ) `, []); albumIds = albumIds.map(item => item.album_id); @@ -110,7 +124,7 @@ async function startGet(sleepTime) { // 从数据库中查出还缺少的歌手,并进行爬取 console.log("start fetching artists ...") - let artistIds = await dbUtils.query(` + var artistIds = await dbUtils.query(` SELECT DISTINCT artist_id FROM song_artist_relation WHERE artist_id NOT IN ( SELECT DISTINCT artist_id FROM artist ) `, []); artistIds = artistIds.map(item => item.artist_id); @@ -156,26 +170,34 @@ async function statistics() { let sql = ` SELECT song_count, + song_waiting_1 + song_waiting_2 as song_waiting, album_count, - album_v1_count, artist_count, song_album_count, song_artist_count FROM - ( SELECT count(*) AS song_count FROM song ) t1, - ( SELECT count(*) AS album_count FROM album ) t2, - ( SELECT count(*) AS album_v1_count FROM album WHERE version = 1 ) t3_1, - ( SELECT count(*) AS artist_count FROM artist ) t3, - ( SELECT count(*) AS song_album_count FROM song_album_relation ) t4, - ( SELECT count(*) AS song_artist_count FROM song_artist_relation ) t5`; + ( SELECT count(*) AS song_count FROM song ) t_song, + ( SELECT count(*) AS album_count FROM album ) t_album, + ( SELECT count(*) AS artist_count FROM artist ) t_artist, + ( SELECT count(*) AS song_album_count FROM song_album_relation ) t_song_album, + ( SELECT count(*) AS song_artist_count FROM song_artist_relation ) t_song_artist, + ( SELECT count( DISTINCT song_id ) as song_waiting_1 FROM song_artist_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) ) t_song_waiting_song_artist, + ( SELECT count( DISTINCT song_id ) as song_waiting_2 FROM song_album_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) ) t_song_waiting_song_album + `; let result = await dbUtils.query(sql, []); let songCount = result[0].song_count; + let songWaiting = result[0].song_waiting; let albumCount = result[0].album_count; - let albumV1Count = result[0].album_v1_count; let artistCount = result[0].artist_count; let songAlbumCount = result[0].song_album_count; let songArtistCount = result[0].song_artist_count; - return `song: ${songCount}, album: ${albumCount}(v1: ${albumV1Count}), artist: ${artistCount} | songAlbum: ${songAlbumCount}, songArtist: ${songArtistCount}`; + return [ + `song: ${songCount}/${songCount + songWaiting}`, + `album: ${albumCount}`, + `artist: ${artistCount}`, + `songAlbum: ${songAlbumCount}`, + `songArtist: ${songArtistCount}` + ].join(', '); } module.exports = { diff --git a/netease_music/sql/structure.sql b/netease_music/sql/structure.sql index 1781293..066b9b2 100644 --- a/netease_music/sql/structure.sql +++ b/netease_music/sql/structure.sql @@ -40,7 +40,7 @@ CREATE TABLE `song_album_relation` ( `album_id` int(10) unsigned NOT NULL COMMENT '专辑id', `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间', `modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间', - PRIMARY KEY `song_id` (`song_id`,`album_id`) + PRIMARY KEY (`song_id`,`album_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; CREATE TABLE `song_artist_relation` ( @@ -53,11 +53,11 @@ CREATE TABLE `song_artist_relation` ( CREATE TABLE `lyric` ( `song_id` int(10) unsigned NOT NULL COMMENT '歌曲id', - `lyric` text NOT NULL COMMENT '歌词', `version` int(10) unsigned NOT NULL COMMENT '版本号', + `lyric` text NOT NULL COMMENT '歌词', `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间', `modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间', - PRIMARY KEY (`song_id`) + PRIMARY KEY (`song_id`,`version`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; CREATE TABLE `log` ( diff --git a/netease_music/src/getInfo/lyricInfoUtils.js b/netease_music/src/getInfo/lyricInfoUtils.js new file mode 100644 index 0000000..6afad0e --- /dev/null +++ b/netease_music/src/getInfo/lyricInfoUtils.js @@ -0,0 +1,44 @@ +const fs = require('fs'); +const path = require('path'); + +const requestUtils = require('../../../utils/requestUtils'); + +const dbUtils = global.dbUtils; + +// 获取歌词详情 +async function fetch({ songId }) { + var url = `https://music.163.com/api/song/lyric?id=${songId}&lv=1`; + + try { + // var json = fs.readFileSync(path.join(__dirname, "../../temp", `lyric-${songId}.json`), 'utf8'); + var json = await requestUtils.getApiResult(url); + // fs.writeFileSync(path.join(__dirname, "../../temp", `lyric-${songId}.json`), json); + } catch (errors) { + console.error(errors); + return; + } + + try { + var lyric = JSON.parse(json).lrc; // { version: xx, lyric: 'xxx' } + } catch (error) { + console.error(error); + return; + } + + let lyricInfo = { + songId: songId, + lyric: lyric.lyric, + version: lyric.version, + }; + // console.log("lyricInfo", lyricInfo); + dbUtils.query('INSERT IGNORE INTO lyric SET ?', { + song_id: lyricInfo.songId, + lyric: lyricInfo.lyric, + version: lyricInfo.version, + }); + return lyricInfo; +} + +module.exports = { + fetch: fetch, +} \ No newline at end of file