From 3660fefda49c4bc9c5d00a8f5d41f82515a91007 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A8=8B=E5=BA=8F=E5=91=98=E5=B0=8F=E5=A2=A8?= <2291200076@qq.com> Date: Tue, 25 Oct 2022 19:36:05 +0800 Subject: [PATCH] =?UTF-8?q?=E6=8F=92=E5=85=A5=E5=85=B3=E8=81=94=E8=A1=A8?= =?UTF-8?q?=E6=97=B6=E5=90=8C=E4=BA=8B=E6=8F=92=E5=85=A5wait=5Fcheck?= =?UTF-8?q?=E8=A1=A8=EF=BC=9B=E7=BB=9F=E4=B8=80=E6=9F=A5=E8=AF=A2=E5=B0=86?= =?UTF-8?q?=E8=A6=81=E7=88=AC=E5=8F=96=E7=9A=84id=E4=BB=A3=E7=A0=81?= =?UTF-8?q?=E5=88=B0dataManager.js?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- netease_music/index.js | 23 +++-- netease_music/sql/structure.sql | 49 +++++++--- netease_music/src/dataManager.js | 95 ++++++++++++++++++-- netease_music/src/getInfo/albumInfoUtils.js | 29 ++---- netease_music/src/getInfo/artistInfoUtils.js | 22 +---- netease_music/src/getInfo/playlistUtils.js | 1 + netease_music/src/getInfo/songInfoUtils.js | 40 +++------ netease_music/src/index.js | 40 +++++---- netease_music/todo.txt | 12 ++- 9 files changed, 192 insertions(+), 119 deletions(-) diff --git a/netease_music/index.js b/netease_music/index.js index b4e208c..2a959a1 100644 --- a/netease_music/index.js +++ b/netease_music/index.js @@ -3,18 +3,31 @@ if (process.argv.length <= 2) { "参数不够", "node index --utils [song|album|artist|lyric|comment] --min [number] --max [number] --order [false|ASC|DESC] --limit [number]", // "", - // "node index --utils song --min xxx --max xxx --order ASC --limit 2000", - // "node index --utils album --min xxx --max xxx --order ASC --limit 2000", - // "node index --utils artist --min xxx --max xxx --order ASC --limit 2000", - // "node index --utils lyric --min xxx --max xxx --order ASC --limit 2000", - // "node index --utils comment --min xxx --max xxx --order ASC --limit 2000", + // "node index --utils xxx --min xxx --max xxx --order ASC --limit 2000", ].join('\n'); console.log(output); return; } var args = require('minimist')(process.argv.slice(2)); +args = { + // 子模块 + utils: args.utils, + // id 范围 + min: Number(args.min) || undefined, + max: Number(args.max) || undefined, + // 顺序 + order: args.order, + // 数量 + limit: Number(args.limit) || undefined, + // 分区 + partition: Number(args.partition) || undefined, + + sleepTime: Number(args.sleepTime) || 100, +} + console.log("args:", args); +global.sleepTime = args.sleepTime; // 两次请求之间停顿时间 global.useMysqlPool = true; const neteaseMusic = require('./src/index'); neteaseMusic.main(args); \ No newline at end of file diff --git a/netease_music/sql/structure.sql b/netease_music/sql/structure.sql index 59b7712..a114d43 100644 --- a/netease_music/sql/structure.sql +++ b/netease_music/sql/structure.sql @@ -212,26 +212,49 @@ CREATE TABLE `analysis` ( ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; -CREATE TABLE `wait_song` ( - `id` int(10) unsigned NOT NULL COMMENT 'id', - `partition` tinyint(4) unsigned NOT NULL COMMENT '分区 0-4', + +CREATE TABLE `wait_check_song` ( + `id` bigint(20) unsigned NOT NULL COMMENT 'id', PRIMARY KEY (`id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; -CREATE TABLE `wait_artist` ( - `id` int(10) unsigned NOT NULL COMMENT 'id', - `partition` tinyint(4) unsigned NOT NULL COMMENT '分区 0-4', +CREATE TABLE `wait_check_artist` ( + `id` bigint(20) unsigned NOT NULL COMMENT 'id', PRIMARY KEY (`id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; -CREATE TABLE `wait_album` ( - `id` int(10) unsigned NOT NULL COMMENT 'id', - `partition` tinyint(4) unsigned NOT NULL COMMENT '分区 0-4', +CREATE TABLE `wait_check_album` ( + `id` bigint(20) unsigned NOT NULL COMMENT 'id', PRIMARY KEY (`id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; -CREATE TABLE `wait_lyric` ( - `id` int(10) unsigned NOT NULL COMMENT 'id', - `partition` tinyint(4) unsigned NOT NULL COMMENT '分区 0-4', +CREATE TABLE `wait_check_lyric` ( + `id` bigint(20) unsigned NOT NULL COMMENT 'id', PRIMARY KEY (`id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; \ No newline at end of file +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; + + + +CREATE TABLE `wait_fetch_song` ( + `id` bigint(20) unsigned NOT NULL COMMENT 'id', + `partition` tinyint(4) unsigned DEFAULT NULL COMMENT '分区 0-4', + PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; + +CREATE TABLE `wait_fetch_artist` ( + `id` bigint(20) unsigned NOT NULL COMMENT 'id', + `partition` tinyint(4) unsigned DEFAULT NULL COMMENT '分区 0-4', + PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; + +CREATE TABLE `wait_fetch_album` ( + `id` bigint(20) unsigned NOT NULL COMMENT 'id', + `partition` tinyint(4) unsigned DEFAULT NULL COMMENT '分区 0-4', + PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; + +CREATE TABLE `wait_fetch_lyric` ( + `id` bigint(20) unsigned NOT NULL COMMENT 'id', + `partition` tinyint(4) unsigned DEFAULT NULL COMMENT '分区 0-4', + PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; diff --git a/netease_music/src/dataManager.js b/netease_music/src/dataManager.js index 13d5dde..c6e8ef4 100644 --- a/netease_music/src/dataManager.js +++ b/netease_music/src/dataManager.js @@ -20,8 +20,27 @@ module.exports = { songInfo.noCopyrightRcmd, songInfo.mv, songInfo.single, songInfo.version, 2 ])]); }, + + getIdsToFetch: async (args) => { + let whereClause = [ + args.min ? `song_id > ${args.min}` : '1=1', + args.max ? `song_id <= ${args.max}` : '1=1', + ].join(' AND '); + let sql = ` + SELECT song_id FROM wait_fetch_song WHERE ${whereClause} + ${args.order ? `ORDER BY song_id ${args.order}` : ''} + ${args.limit ? `LIMIT ${args.limit}` : ''} + `; + // // 更新现有数据 + // sql = `SELECT song_id FROM song WHERE data_version = 1`; + console.log(sql); + let songIds = await dbUtils.query(sql, []); + songIds = songIds.map(item => item.song_id); + return songIds; + }, }, + album: { insert: async (albumInfo) => { return await dbUtils.query('INSERT IGNORE INTO album SET ?', albumInfo); @@ -29,21 +48,61 @@ module.exports = { update: async (albumId, albumInfo) => { return await dbUtils.query(`UPDATE album SET ? WHERE album_id = ${albumId}`, albumInfo); - } + }, + + getIdsToFetch: async (args, isUpdate) => { + let sql = ""; + if (isUpdate) { + sql = `SELECT album_id FROM album WHERE (full_description = '' or full_description is null) and description like '%专辑《%》,简介:%' and description not regexp '^.*?专辑《.*?》,简介:[:space:]*?。,更多.*$'`; + } else { + let whereClause = [ + args.min ? `album_id > ${args.min}` : '1=1', + args.max ? `album_id <= ${args.max}` : '1=1', + ].join(' AND '); + sql = ` + SELECT album_id FROM wait_fetch_album WHERE ${whereClause} + ${args.order ? `ORDER BY album_id ${args.order}` : ''} + ${args.limit ? `LIMIT ${args.limit}` : ''} + `; + } + console.log(sql); + let albumIds = await dbUtils.query(sql, []); + albumIds = albumIds.map(item => item.album_id); + return albumIds; + }, }, + artist: { insert: async (artistInfo) => { return await dbUtils.query('INSERT IGNORE INTO artist SET ?', artistInfo); }, + + getIdsToFetch: async (args) => { + let whereClause = [ + args.min ? `artist_id > ${args.min}` : '1=1', + args.max ? `artist_id <= ${args.max}` : '1=1', + ].join(' AND '); + let sql = ` + SELECT artist_id FROM wait_fetch_artist WHERE ${whereClause} + ${args.order ? `ORDER BY artist_id ${args.order}` : ''} + ${args.limit ? `LIMIT ${args.limit}` : ''} + `; + console.log(sql); + let artistIds = await dbUtils.query(sql, []); + artistIds = artistIds.map(item => item.artist_id); + return artistIds; + } }, + lyric: { insert: async (lyricInfo) => { return await dbUtils.query('INSERT IGNORE INTO lyric SET ?', lyricInfo); - } + }, }, + comment: { insertCollection: async (commentInfoList) => { if (commentInfoList.length == 0) return; @@ -51,15 +110,17 @@ module.exports = { INSERT INTO comment ( comment_id, parent_comment_id, user_id, song_id, content, time, like_count, comment_type ) VALUES ? ON DUPLICATE KEY UPDATE content = VALUES(content), like_count = VALUES(like_count), comment_type = GREATEST(comment_type, VALUES(comment_type)), modify_time = CURRENT_TIMESTAMP `, [commentInfoList]); - } + }, }, + comment_progress: { update: async (commentProgressInfo, songId) => { return await dbUtils.query('UPDATE comment_progress SET ? WHERE song_id = ? LIMIT 1', [commentProgressInfo, songId]); }, }, + playlist: { insertCollection: async (playlistInfo) => { if (playlistInfo.length == 0) return; @@ -67,9 +128,10 @@ module.exports = { INSERT INTO playlist ( ${Object.keys(playlistInfo).map(field => `\`${field}\``).join(",")} ) VALUES ? ON DUPLICATE KEY UPDATE ${Object.keys(playlistInfo).map(field => `${field}=VALUES(${field})`).join(", ")} `, [[Object.values(playlistInfo)]]); - } + }, }, + user: { insertCollection: async (userInfoList) => { if (userInfoList.length == 0) return; @@ -77,27 +139,44 @@ module.exports = { INSERT INTO user ( user_id, user_type, nickname, avatar_url ) VALUES ? ON DUPLICATE KEY UPDATE user_type = VALUES(user_type), nickname = VALUES(nickname), avatar_url = VALUES(avatar_url), modify_time = CURRENT_TIMESTAMP `, [userInfoList]); - } + }, }, + song_album: { insertCollection: async (songAlbumRel) => { if (songAlbumRel.length == 0) return; return await dbUtils.query('INSERT IGNORE INTO song_album_relation (song_id, album_id) VALUES ?', [songAlbumRel]); - } + }, }, + song_artist: { insertCollection: async (songArtistRel) => { if (songArtistRel.length == 0) return; return await dbUtils.query('INSERT IGNORE INTO song_artist_relation (song_id, artist_id) VALUES ?', [songArtistRel]); - } + }, }, + song_playlist: { insertCollection: async (trackIds) => { if (trackIds.length == 0) return; return await dbUtils.query('INSERT IGNORE INTO song_playlist_relation (song_id, playlist_id, alg, rcmd_reason) VALUES ?', [trackIds]); - } + }, }, + + + /* ##################################################### */ + + + // 将 id 插入待检查表 + wait_check: { + insert: async (type, ids) => { + // 过滤掉 id 为 0 的 + ids = ids.filter(id => id < 0); + return await dbUtils.query(`INSERT IGNORE INTO wait_check_${type} (id) VALUES ?`, [ids]); + }, + }, + }; diff --git a/netease_music/src/getInfo/albumInfoUtils.js b/netease_music/src/getInfo/albumInfoUtils.js index 0d9eac9..9967926 100644 --- a/netease_music/src/getInfo/albumInfoUtils.js +++ b/netease_music/src/getInfo/albumInfoUtils.js @@ -29,28 +29,7 @@ SELECT * FROM album WHERE (full_description = '' or full_description is null) an async function fetchAll({ args = {}, isUpdate = false }) { console.log("start fetching albums ..."); - - if (isUpdate) { - var sql = ` - SELECT album_id FROM album WHERE (full_description = '' or full_description is null) and description like '%专辑《%》,简介:%' and description not regexp '^.*?专辑《.*?》,简介:[:space:]*?。,更多.*$' - `; - } else { - let whereClause = [ - args.min ? `album_id > ${args.min}` : '1=1', - args.max ? `album_id <= ${args.max}` : '1=1', - ].join(' AND '); - var sql = ` - -- 查出来通过代码去重,提高速度 - SELECT album_id FROM song_album_relation WHERE ${whereClause} AND album_id NOT IN ( SELECT album_id FROM album ) - ${args.order ? `ORDER BY album_id ${args.order}` : ''} - ${args.limit ? `LIMIT ${args.limit}` : ''} - `; - console.log(sql); - } - - var albumIds = await dbUtils.query(sql, []); - albumIds = albumIds.map(item => item.album_id); - albumIds = Array.from(new Set(albumIds)); + let albumIds = await dataManager.album.getIdsToFetch(args, isUpdate); for (let i = 0; i < albumIds.length; i++) { await global.checkIsExit(); const albumId = albumIds[i]; @@ -147,7 +126,7 @@ async function fetch({ albumId, debug = false, update = false }) { let image = //.exec(html)[1]; let songListJSONString = /