From b35918faef07294fb07075aecbd433a41e5a94d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A8=8B=E5=BA=8F=E5=91=98=E5=B0=8F=E5=A2=A8?= <2291200076@qq.com> Date: Wed, 5 Oct 2022 11:41:30 +0800 Subject: [PATCH] =?UTF-8?q?=E6=95=B0=E6=8D=AE=E5=BA=93=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E7=B4=A2=E5=BC=95=EF=BC=9Bupdate?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .vscode/launch.json | 9 + config.json | 24 +++ netease_music/index.js | 183 +++++++++++--------- netease_music/sql/structure.sql | 20 ++- netease_music/src/getInfo/lyricInfoUtils.js | 7 + netease_music/src/getInfo/songInfoUtils.js | 4 +- utils/dbPoolUtils.js | 2 +- utils/dbUtils.js | 2 +- watch.js | 3 +- 9 files changed, 159 insertions(+), 95 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 02d3d0b..d73930d 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -30,6 +30,15 @@ "/**" ], "program": "${workspaceFolder}\\test.js" + }, + { + "type": "node", + "request": "launch", + "name": "node watch", + "skipFiles": [ + "/**" + ], + "program": "${workspaceFolder}\\watch.js" } ] } \ No newline at end of file diff --git a/config.json b/config.json index c8d4cb3..9f7a69f 100644 --- a/config.json +++ b/config.json @@ -1,10 +1,34 @@ { "mysql": { + "charset": "utf8mb4", + "host": "rm-bp18qrc78dj7vd3newo.rwlb.rds.aliyuncs.com", + "user": "root", + "password": "Oj13EzoppxXvMmjPKh", + "port": 3306, + "database": "" + }, + "mysql_aliyun": { + "charset": "utf8mb4", + "host": "rm-bp18qrc78dj7vd3newo.rwlb.rds.aliyuncs.com", + "user": "root", + "password": "Oj13EzoppxXvMmjPKh", + "port": 3306, + "database": "" + }, + "mysql_local": { "charset": "utf8mb4", "host": "localhost", "user": "root", "password": "root", "port": 3306, "database": "" + }, + "mysql_server": { + "charset": "utf8mb4", + "host": "39.99.244.156", + "user": "root", + "password": "Oj13EzoppxXvMmjPKh", + "port": 3306, + "database": "" } } \ No newline at end of file diff --git a/netease_music/index.js b/netease_music/index.js index 81f852c..a1717b2 100644 --- a/netease_music/index.js +++ b/netease_music/index.js @@ -75,101 +75,116 @@ async function update() { /** * 统计数据库中数据 */ -let watchParam = { - statisticTime: Date.now(), - songCount: 0, - albumCount: 0, - artistCount: 0, - lyricCount: 0, - commentCount: 0, - commentTotalCount: 0, -}; +let oldWatchParam = {}; async function watch() { - let sql = ` - SELECT - song_count, - song_waiting_1 + song_waiting_2 as song_waiting, + console.log(`开始统计 ... ${new Date(Date.now() + 8 * 3600 * 1000).toISOString()}`); + let statisticTime = Date.now(); + let newWatchParam = {}; + let sqls = [ + // InnoDB count(*) 会扫描全表,粗略数据可以通过 show table status 查看 + { + name: "songCount", + sql: `SELECT count(*) AS count FROM song`, + }, { + name: "songWaiting", + sql: `SELECT count(DISTINCT song_id) AS count + FROM ( SELECT song_id FROM song_artist_relation UNION SELECT song_id FROM song_album_relation ) t_tmp + WHERE song_id NOT IN ( SELECT song_id FROM song )`, + }, { + name: "albumCount", + sql: `SELECT count(*) AS count FROM album`, + }, { + name: "albumWaiting", + sql: `SELECT count( DISTINCT album_id ) as count FROM song_album_relation WHERE album_id NOT IN ( SELECT album_id FROM album )`, + }, { + name: "artistCount", + sql: `SELECT count(*) AS count FROM artist`, + }, { + name: "artistWaiting", + sql: `SELECT count( DISTINCT artist_id ) as count FROM song_artist_relation WHERE artist_id NOT IN ( SELECT artist_id FROM artist )`, + }, { + name: "lyricCount", + sql: `SELECT count(*) AS count FROM lyric`, + }, { + name: "commentCount", + sql: `SELECT count( DISTINCT song_id ) AS count FROM comment`, + }, { + name: "commentTotalCount", + sql: `SELECT count(*) AS count FROM comment`, + }, { + name: "songAlbumCount", + sql: `SELECT count(*) AS count FROM song_album_relation`, + }, { + name: "songArtistCount", + sql: `SELECT count(*) AS count FROM song_artist_relation`, + } + ]; + let sqlsTimeSpent = 0; + let promiseList = []; + for (let i = 0; i < sqls.length; i++) { + const sql = sqls[i]; + if (!sql.sql) continue; // 跳过注释掉SQL的项 + promiseList.push(new Promise(async (resolve, reject) => { + // console.log(`query ${sql.name} ...`); + let sqlStartTime = Date.now(); + let result = await dbUtils.query(sql.sql, []); + let sqlTimeSpent = Date.now() - sqlStartTime; + sqlsTimeSpent += sqlTimeSpent; + newWatchParam[sql.name] = result[0].count; + console.log(`query ${sql.name} finished.\tspend time: ${sqlTimeSpent}ms (${(sqlTimeSpent / 1000).toFixed(2)}s),\tcount: ${newWatchParam[sql.name]}`); + resolve(); + })); + } + await Promise.all(promiseList); - album_count, - album_waiting, - - artist_count, - artist_waiting, - - lyric_count, - - comment_count, - comment_total_count, - - song_album_count, - song_artist_count - FROM - ( SELECT count(*) AS song_count FROM song ) t_song, - ( SELECT count( DISTINCT song_id ) as song_waiting_1 FROM song_artist_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) ) t_song_waiting_song_artist, - ( SELECT count( DISTINCT song_id ) as song_waiting_2 FROM song_album_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) ) t_song_waiting_song_album, - - ( SELECT count(*) AS album_count FROM album ) t_album, - ( SELECT count( DISTINCT album_id ) as album_waiting FROM song_album_relation WHERE album_id NOT IN ( SELECT DISTINCT album_id FROM album ) ) as t_album_waiting_song_album, - - ( SELECT count(*) AS artist_count FROM artist ) t_artist, - ( SELECT count( DISTINCT artist_id ) as artist_waiting FROM song_artist_relation WHERE artist_id NOT IN ( SELECT DISTINCT artist_id FROM artist ) ) as t_album_waiting_song_artist, - - ( SELECT count(*) AS lyric_count FROM lyric ) t_lyric, - - ( SELECT count( DISTINCT song_id ) AS comment_count, count( comment_id ) AS comment_total_count FROM comment ) t_comment, - - ( SELECT count(*) AS song_album_count FROM song_album_relation ) t_song_album, - ( SELECT count(*) AS song_artist_count FROM song_artist_relation ) t_song_artist - `; - console.log("开始统计 ..."); - let startTime = Date.now(); - let result = await dbUtils.query(sql, []); - let timeSpent = Date.now() - startTime; - - let songCount = result[0].song_count; - let songWaiting = result[0].song_waiting; - - let albumCount = result[0].album_count; - let albumWaiting = result[0].album_waiting; - - let artistCount = result[0].artist_count; - let artistWaiting = result[0].artist_waiting; - - let lyricCount = result[0].lyric_count; - - let commentCount = result[0].comment_count; - let commentTotalCount = result[0].comment_total_count; - - let songAlbumCount = result[0].song_album_count; - let songArtistCount = result[0].song_artist_count; - - let statisticTimeDelta = Date.now() - watchParam.statisticTime; + // let tableCountResult = await dbUtils.query("show table status"); + // let tableCount = {}; // 查询近似值代替精确查询 + // tableCountResult.forEach(rowData => tableCount[rowData.Name] = rowData.Rows); + // newWatchParam['commentTotalCount'] = tableCount['comment']; + let statisticTimeDelta = Date.now() - statisticTime; let statisticsString = [ - `${new Date(Date.now() + 8 * 3600 * 1000).toISOString()}`, - `[与上次运行统计时相比] deltaTime: ${statisticTimeDelta}ms (${(statisticTimeDelta / 1000).toFixed(2)}s)`, - `song: ${songCount - watchParam.songCount}, album: ${albumCount - watchParam.albumCount}, artist: ${artistCount - watchParam.artistCount}, lyric: ${lyricCount - watchParam.lyricCount}, comment: ${commentCount - watchParam.commentCount}(song)/${commentTotalCount - watchParam.commentTotalCount}(comment)`, + ``, + `统计完成 ${new Date(Date.now() + 8 * 3600 * 1000).toISOString()}`, + `spend time: ${statisticTimeDelta}ms (${(statisticTimeDelta / 1000).toFixed(2)}s; ${(statisticTimeDelta / (60 * 1000)).toFixed(2)}min), sql query time (sum): ${sqlsTimeSpent}ms (${(sqlsTimeSpent / 1000).toFixed(2)}s; ${(sqlsTimeSpent / (60 * 1000)).toFixed(2)}min)`, + `[与上次运行统计时相比]`, + [ + `song: ${newWatchParam['songCount'] - oldWatchParam['songCount']}`, + `album: ${newWatchParam['albumCount'] - oldWatchParam['albumCount']}`, + `artist: ${newWatchParam['artistCount'] - oldWatchParam['artistCount']}`, + `lyric: ${newWatchParam['lyricCount'] - oldWatchParam['lyricCount']}`, + `comment: ${newWatchParam['commentCount'] - oldWatchParam['commentCount']}(song)/${newWatchParam['commentTotalCount'] - oldWatchParam['commentTotalCount']}(comment)`, + ].join(', '), `[已爬取]`, - `song: ${songCount}, album: ${albumCount}, artist: ${artistCount}, lyric: ${lyricCount}, comment: ${commentCount}(song)/${commentTotalCount}(comment)`, + [ + `song: ${newWatchParam['songCount']}`, + `album: ${newWatchParam['albumCount']}`, + `artist: ${newWatchParam['artistCount']}`, + `lyric: ${newWatchParam['lyricCount']}`, + `comment: ${newWatchParam['commentCount']}(song)/${newWatchParam['commentTotalCount']}(comment)`, + ].join(', '), `[待爬取]`, - `song: ${songWaiting}, album: ${albumWaiting}, artist: ${artistWaiting}, lyric: ${songCount - lyricCount}, comment: ${songCount - commentCount}`, + [ + `song: ${newWatchParam['songWaiting']}`, + `album: ${newWatchParam['albumWaiting']}`, + `artist: ${newWatchParam['artistWaiting']}`, + `lyric: ${newWatchParam['songCount'] - newWatchParam['lyricCount']}`, + `comment: ${newWatchParam['songCount'] - newWatchParam['commentCount']}`, + ].join(', '), `[总计] (已爬取 + 待爬取)`, - `song: ${songCount + songWaiting}, album: ${albumCount + albumWaiting}, artist: ${artistCount + artistWaiting}, lyric: ${songCount}, comment: ${songCount}`, + [ + `song: ${newWatchParam['songCount'] + newWatchParam['songWaiting']}`, + `album: ${newWatchParam['albumCount'] + newWatchParam['albumWaiting']}`, + `artist: ${newWatchParam['artistCount'] + newWatchParam['artistWaiting']}`, + `lyric: ${newWatchParam['songCount']}`, + `comment: ${newWatchParam['songCount']}`, + ].join(', '), `[关联关系统计]`, - `song-album: ${songAlbumCount}, song-artist: ${songArtistCount}`, - `sql query time: ${timeSpent}ms (${(timeSpent / 1000).toFixed(2)}s)`, + `song-album: ${newWatchParam['songAlbumCount']}, song-artist: ${newWatchParam['songArtistCount']}`, `` ].join('\n'); console.log(statisticsString); - watchParam = { - statisticTime: Date.now(), - songCount: songCount, - albumCount: albumCount, - artistCount: artistCount, - lyricCount: lyricCount, - commentCount: commentCount, - commentTotalCount: commentTotalCount, - } + oldWatchParam = newWatchParam; } /** diff --git a/netease_music/sql/structure.sql b/netease_music/sql/structure.sql index 12a7690..e5de3a8 100644 --- a/netease_music/sql/structure.sql +++ b/netease_music/sql/structure.sql @@ -18,7 +18,8 @@ CREATE TABLE `artist` ( `pub_date` varchar(100) NOT NULL COMMENT '发布日期', `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间', `modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间', - PRIMARY KEY (`artist_id`) + PRIMARY KEY (`artist_id`), + KEY `artist_id` (`artist_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; CREATE TABLE `album` ( @@ -32,7 +33,8 @@ CREATE TABLE `album` ( `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间', `modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间', `version` tinyint(4) NOT NULL DEFAULT 1 COMMENT '数据记录版本(如果有字段调整则整体+1)', - PRIMARY KEY (`album_id`) + PRIMARY KEY (`album_id`), + KEY `album_id` (`album_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; CREATE TABLE `song_album_relation` ( @@ -40,7 +42,9 @@ CREATE TABLE `song_album_relation` ( `album_id` int(10) unsigned NOT NULL COMMENT '专辑id', `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间', `modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间', - PRIMARY KEY (`song_id`,`album_id`) + PRIMARY KEY (`song_id`,`album_id`), + KEY `song_id` (`song_id`), + KEY `album_id` (`album_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; CREATE TABLE `song_artist_relation` ( @@ -48,7 +52,9 @@ CREATE TABLE `song_artist_relation` ( `artist_id` int(10) unsigned NOT NULL COMMENT '歌手id', `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间', `modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间', - PRIMARY KEY `song_id` (`song_id`,`artist_id`) + PRIMARY KEY `song_id` (`song_id`,`artist_id`), + KEY `song_id` (`song_id`), + KEY `artist_id` (`artist_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; CREATE TABLE `lyric` ( @@ -57,7 +63,8 @@ CREATE TABLE `lyric` ( `lyric` text NOT NULL COMMENT '歌词', `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间', `modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间', - PRIMARY KEY (`song_id`,`version`) + PRIMARY KEY (`song_id`,`version`), + KEY `song_id` (`song_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; CREATE TABLE `user` ( @@ -81,7 +88,8 @@ CREATE TABLE `comment` ( `comment_type` tinyint(4) unsigned NOT NULL COMMENT '评论类型 0-comments 1-hotComments 2-topComments', `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间', `modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间', - PRIMARY KEY (`comment_id`) + PRIMARY KEY (`comment_id`), + INDEX `song_id` (`song_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; CREATE TABLE `comment_progress` ( diff --git a/netease_music/src/getInfo/lyricInfoUtils.js b/netease_music/src/getInfo/lyricInfoUtils.js index 66661f6..5826fed 100644 --- a/netease_music/src/getInfo/lyricInfoUtils.js +++ b/netease_music/src/getInfo/lyricInfoUtils.js @@ -28,6 +28,13 @@ async function fetchAll() { // 获取歌词详情 async function fetch({ songId, debug = false }) { + let result = await dbUtils.query('SELECT count(*) as count FROM lyric WHERE song_id = ?', [songId]); + if (result[0].count > 0 && !debug) { + // 这里暂时跳过,后期可能要考虑歌词version更新的问题 + console.log(`数据库中已有数据,跳过 songId: ${songId}`); + return; + } + var url = `https://music.163.com/api/song/lyric?id=${songId}&lv=1`; // &kv=1&tv=-1 try { // var json = fs.readFileSync(path.join(__dirname, "../../temp", `lyric-${songId}.json`), 'utf8'); diff --git a/netease_music/src/getInfo/songInfoUtils.js b/netease_music/src/getInfo/songInfoUtils.js index a4e2be3..f614585 100644 --- a/netease_music/src/getInfo/songInfoUtils.js +++ b/netease_music/src/getInfo/songInfoUtils.js @@ -27,9 +27,9 @@ async function getFromDatabase({ songId }) { async function fetchAll() { console.log("start fetching songs ..."); var songIds = await dbUtils.query(` - SELECT DISTINCT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) + SELECT DISTINCT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT song_id FROM song ) UNION - SELECT DISTINCT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) + SELECT DISTINCT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT song_id FROM song ) `, []); songIds = songIds.map(item => item.song_id); for (let i = 0; i < songIds.length; i++) { diff --git a/utils/dbPoolUtils.js b/utils/dbPoolUtils.js index 25330ec..d50a32e 100644 --- a/utils/dbPoolUtils.js +++ b/utils/dbPoolUtils.js @@ -8,7 +8,7 @@ let pool = null; function create({ database, connectionLimit = 10 }) { let config = { connectionLimit: connectionLimit, //连接数量,默认是10 - ...globalConfig.mysql, + ...globalConfig[global.dbConfig || 'mysql'], database: database, }; // console.log(config); diff --git a/utils/dbUtils.js b/utils/dbUtils.js index 6af48f5..747b884 100644 --- a/utils/dbUtils.js +++ b/utils/dbUtils.js @@ -12,7 +12,7 @@ function create({ database }) { async function query(sql, params) { let config = { - ...globalConfig.mysql, + ...globalConfig[global.dbConfig || 'mysql'], database: databaseName, }; // console.log(config); diff --git a/watch.js b/watch.js index 7fe34db..57a34e6 100644 --- a/watch.js +++ b/watch.js @@ -1,10 +1,11 @@ let keepWatching = true; if (keepWatching) { global.useMysqlPool = true; - global.connectionLimit = 1; + global.connectionLimit = 15; } else { global.useMysqlPool = false; } +global.dbConfig = 'mysql_local'; const neteaseMusic = require('./netease_music/index'); const sleepUtils = require('./utils/sleepUtils');