From 3660fefda49c4bc9c5d00a8f5d41f82515a91007 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=A8=8B=E5=BA=8F=E5=91=98=E5=B0=8F=E5=A2=A8?=
<2291200076@qq.com>
Date: Tue, 25 Oct 2022 19:36:05 +0800
Subject: [PATCH] =?UTF-8?q?=E6=8F=92=E5=85=A5=E5=85=B3=E8=81=94=E8=A1=A8?=
=?UTF-8?q?=E6=97=B6=E5=90=8C=E4=BA=8B=E6=8F=92=E5=85=A5wait=5Fcheck?=
=?UTF-8?q?=E8=A1=A8=EF=BC=9B=E7=BB=9F=E4=B8=80=E6=9F=A5=E8=AF=A2=E5=B0=86?=
=?UTF-8?q?=E8=A6=81=E7=88=AC=E5=8F=96=E7=9A=84id=E4=BB=A3=E7=A0=81?=
=?UTF-8?q?=E5=88=B0dataManager.js?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
netease_music/index.js | 23 +++--
netease_music/sql/structure.sql | 49 +++++++---
netease_music/src/dataManager.js | 95 ++++++++++++++++++--
netease_music/src/getInfo/albumInfoUtils.js | 29 ++----
netease_music/src/getInfo/artistInfoUtils.js | 22 +----
netease_music/src/getInfo/playlistUtils.js | 1 +
netease_music/src/getInfo/songInfoUtils.js | 40 +++------
netease_music/src/index.js | 40 +++++----
netease_music/todo.txt | 12 ++-
9 files changed, 192 insertions(+), 119 deletions(-)
diff --git a/netease_music/index.js b/netease_music/index.js
index b4e208c..2a959a1 100644
--- a/netease_music/index.js
+++ b/netease_music/index.js
@@ -3,18 +3,31 @@ if (process.argv.length <= 2) {
"参数不够",
"node index --utils [song|album|artist|lyric|comment] --min [number] --max [number] --order [false|ASC|DESC] --limit [number]",
// "",
- // "node index --utils song --min xxx --max xxx --order ASC --limit 2000",
- // "node index --utils album --min xxx --max xxx --order ASC --limit 2000",
- // "node index --utils artist --min xxx --max xxx --order ASC --limit 2000",
- // "node index --utils lyric --min xxx --max xxx --order ASC --limit 2000",
- // "node index --utils comment --min xxx --max xxx --order ASC --limit 2000",
+ // "node index --utils xxx --min xxx --max xxx --order ASC --limit 2000",
].join('\n');
console.log(output);
return;
}
var args = require('minimist')(process.argv.slice(2));
+args = {
+ // 子模块
+ utils: args.utils,
+ // id 范围
+ min: Number(args.min) || undefined,
+ max: Number(args.max) || undefined,
+ // 顺序
+ order: args.order,
+ // 数量
+ limit: Number(args.limit) || undefined,
+ // 分区
+ partition: Number(args.partition) || undefined,
+
+ sleepTime: Number(args.sleepTime) || 100,
+}
+
console.log("args:", args);
+global.sleepTime = args.sleepTime; // 两次请求之间停顿时间
global.useMysqlPool = true;
const neteaseMusic = require('./src/index');
neteaseMusic.main(args);
\ No newline at end of file
diff --git a/netease_music/sql/structure.sql b/netease_music/sql/structure.sql
index 59b7712..a114d43 100644
--- a/netease_music/sql/structure.sql
+++ b/netease_music/sql/structure.sql
@@ -212,26 +212,49 @@ CREATE TABLE `analysis` (
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
-CREATE TABLE `wait_song` (
- `id` int(10) unsigned NOT NULL COMMENT 'id',
- `partition` tinyint(4) unsigned NOT NULL COMMENT '分区 0-4',
+
+CREATE TABLE `wait_check_song` (
+ `id` bigint(20) unsigned NOT NULL COMMENT 'id',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
-CREATE TABLE `wait_artist` (
- `id` int(10) unsigned NOT NULL COMMENT 'id',
- `partition` tinyint(4) unsigned NOT NULL COMMENT '分区 0-4',
+CREATE TABLE `wait_check_artist` (
+ `id` bigint(20) unsigned NOT NULL COMMENT 'id',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
-CREATE TABLE `wait_album` (
- `id` int(10) unsigned NOT NULL COMMENT 'id',
- `partition` tinyint(4) unsigned NOT NULL COMMENT '分区 0-4',
+CREATE TABLE `wait_check_album` (
+ `id` bigint(20) unsigned NOT NULL COMMENT 'id',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
-CREATE TABLE `wait_lyric` (
- `id` int(10) unsigned NOT NULL COMMENT 'id',
- `partition` tinyint(4) unsigned NOT NULL COMMENT '分区 0-4',
+CREATE TABLE `wait_check_lyric` (
+ `id` bigint(20) unsigned NOT NULL COMMENT 'id',
PRIMARY KEY (`id`)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
\ No newline at end of file
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
+
+
+
+CREATE TABLE `wait_fetch_song` (
+ `id` bigint(20) unsigned NOT NULL COMMENT 'id',
+ `partition` tinyint(4) unsigned DEFAULT NULL COMMENT '分区 0-4',
+ PRIMARY KEY (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
+
+CREATE TABLE `wait_fetch_artist` (
+ `id` bigint(20) unsigned NOT NULL COMMENT 'id',
+ `partition` tinyint(4) unsigned DEFAULT NULL COMMENT '分区 0-4',
+ PRIMARY KEY (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
+
+CREATE TABLE `wait_fetch_album` (
+ `id` bigint(20) unsigned NOT NULL COMMENT 'id',
+ `partition` tinyint(4) unsigned DEFAULT NULL COMMENT '分区 0-4',
+ PRIMARY KEY (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
+
+CREATE TABLE `wait_fetch_lyric` (
+ `id` bigint(20) unsigned NOT NULL COMMENT 'id',
+ `partition` tinyint(4) unsigned DEFAULT NULL COMMENT '分区 0-4',
+ PRIMARY KEY (`id`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
diff --git a/netease_music/src/dataManager.js b/netease_music/src/dataManager.js
index 13d5dde..c6e8ef4 100644
--- a/netease_music/src/dataManager.js
+++ b/netease_music/src/dataManager.js
@@ -20,8 +20,27 @@ module.exports = {
songInfo.noCopyrightRcmd, songInfo.mv, songInfo.single, songInfo.version, 2
])]);
},
+
+ getIdsToFetch: async (args) => {
+ let whereClause = [
+ args.min ? `song_id > ${args.min}` : '1=1',
+ args.max ? `song_id <= ${args.max}` : '1=1',
+ ].join(' AND ');
+ let sql = `
+ SELECT song_id FROM wait_fetch_song WHERE ${whereClause}
+ ${args.order ? `ORDER BY song_id ${args.order}` : ''}
+ ${args.limit ? `LIMIT ${args.limit}` : ''}
+ `;
+ // // 更新现有数据
+ // sql = `SELECT song_id FROM song WHERE data_version = 1`;
+ console.log(sql);
+ let songIds = await dbUtils.query(sql, []);
+ songIds = songIds.map(item => item.song_id);
+ return songIds;
+ },
},
+
album: {
insert: async (albumInfo) => {
return await dbUtils.query('INSERT IGNORE INTO album SET ?', albumInfo);
@@ -29,21 +48,61 @@ module.exports = {
update: async (albumId, albumInfo) => {
return await dbUtils.query(`UPDATE album SET ? WHERE album_id = ${albumId}`, albumInfo);
- }
+ },
+
+ getIdsToFetch: async (args, isUpdate) => {
+ let sql = "";
+ if (isUpdate) {
+ sql = `SELECT album_id FROM album WHERE (full_description = '' or full_description is null) and description like '%专辑《%》,简介:%' and description not regexp '^.*?专辑《.*?》,简介:[:space:]*?。,更多.*$'`;
+ } else {
+ let whereClause = [
+ args.min ? `album_id > ${args.min}` : '1=1',
+ args.max ? `album_id <= ${args.max}` : '1=1',
+ ].join(' AND ');
+ sql = `
+ SELECT album_id FROM wait_fetch_album WHERE ${whereClause}
+ ${args.order ? `ORDER BY album_id ${args.order}` : ''}
+ ${args.limit ? `LIMIT ${args.limit}` : ''}
+ `;
+ }
+ console.log(sql);
+ let albumIds = await dbUtils.query(sql, []);
+ albumIds = albumIds.map(item => item.album_id);
+ return albumIds;
+ },
},
+
artist: {
insert: async (artistInfo) => {
return await dbUtils.query('INSERT IGNORE INTO artist SET ?', artistInfo);
},
+
+ getIdsToFetch: async (args) => {
+ let whereClause = [
+ args.min ? `artist_id > ${args.min}` : '1=1',
+ args.max ? `artist_id <= ${args.max}` : '1=1',
+ ].join(' AND ');
+ let sql = `
+ SELECT artist_id FROM wait_fetch_artist WHERE ${whereClause}
+ ${args.order ? `ORDER BY artist_id ${args.order}` : ''}
+ ${args.limit ? `LIMIT ${args.limit}` : ''}
+ `;
+ console.log(sql);
+ let artistIds = await dbUtils.query(sql, []);
+ artistIds = artistIds.map(item => item.artist_id);
+ return artistIds;
+ }
},
+
lyric: {
insert: async (lyricInfo) => {
return await dbUtils.query('INSERT IGNORE INTO lyric SET ?', lyricInfo);
- }
+ },
},
+
comment: {
insertCollection: async (commentInfoList) => {
if (commentInfoList.length == 0) return;
@@ -51,15 +110,17 @@ module.exports = {
INSERT INTO comment ( comment_id, parent_comment_id, user_id, song_id, content, time, like_count, comment_type ) VALUES ?
ON DUPLICATE KEY UPDATE content = VALUES(content), like_count = VALUES(like_count), comment_type = GREATEST(comment_type, VALUES(comment_type)), modify_time = CURRENT_TIMESTAMP
`, [commentInfoList]);
- }
+ },
},
+
comment_progress: {
update: async (commentProgressInfo, songId) => {
return await dbUtils.query('UPDATE comment_progress SET ? WHERE song_id = ? LIMIT 1', [commentProgressInfo, songId]);
},
},
+
playlist: {
insertCollection: async (playlistInfo) => {
if (playlistInfo.length == 0) return;
@@ -67,9 +128,10 @@ module.exports = {
INSERT INTO playlist ( ${Object.keys(playlistInfo).map(field => `\`${field}\``).join(",")} ) VALUES ?
ON DUPLICATE KEY UPDATE ${Object.keys(playlistInfo).map(field => `${field}=VALUES(${field})`).join(", ")}
`, [[Object.values(playlistInfo)]]);
- }
+ },
},
+
user: {
insertCollection: async (userInfoList) => {
if (userInfoList.length == 0) return;
@@ -77,27 +139,44 @@ module.exports = {
INSERT INTO user ( user_id, user_type, nickname, avatar_url ) VALUES ?
ON DUPLICATE KEY UPDATE user_type = VALUES(user_type), nickname = VALUES(nickname), avatar_url = VALUES(avatar_url), modify_time = CURRENT_TIMESTAMP
`, [userInfoList]);
- }
+ },
},
+
song_album: {
insertCollection: async (songAlbumRel) => {
if (songAlbumRel.length == 0) return;
return await dbUtils.query('INSERT IGNORE INTO song_album_relation (song_id, album_id) VALUES ?', [songAlbumRel]);
- }
+ },
},
+
song_artist: {
insertCollection: async (songArtistRel) => {
if (songArtistRel.length == 0) return;
return await dbUtils.query('INSERT IGNORE INTO song_artist_relation (song_id, artist_id) VALUES ?', [songArtistRel]);
- }
+ },
},
+
song_playlist: {
insertCollection: async (trackIds) => {
if (trackIds.length == 0) return;
return await dbUtils.query('INSERT IGNORE INTO song_playlist_relation (song_id, playlist_id, alg, rcmd_reason) VALUES ?', [trackIds]);
- }
+ },
},
+
+
+ /* ##################################################### */
+
+
+ // 将 id 插入待检查表
+ wait_check: {
+ insert: async (type, ids) => {
+ // 过滤掉 id 为 0 的
+ ids = ids.filter(id => id < 0);
+ return await dbUtils.query(`INSERT IGNORE INTO wait_check_${type} (id) VALUES ?`, [ids]);
+ },
+ },
+
};
diff --git a/netease_music/src/getInfo/albumInfoUtils.js b/netease_music/src/getInfo/albumInfoUtils.js
index 0d9eac9..9967926 100644
--- a/netease_music/src/getInfo/albumInfoUtils.js
+++ b/netease_music/src/getInfo/albumInfoUtils.js
@@ -29,28 +29,7 @@ SELECT * FROM album WHERE (full_description = '' or full_description is null) an
async function fetchAll({ args = {}, isUpdate = false }) {
console.log("start fetching albums ...");
-
- if (isUpdate) {
- var sql = `
- SELECT album_id FROM album WHERE (full_description = '' or full_description is null) and description like '%专辑《%》,简介:%' and description not regexp '^.*?专辑《.*?》,简介:[:space:]*?。,更多.*$'
- `;
- } else {
- let whereClause = [
- args.min ? `album_id > ${args.min}` : '1=1',
- args.max ? `album_id <= ${args.max}` : '1=1',
- ].join(' AND ');
- var sql = `
- -- 查出来通过代码去重,提高速度
- SELECT album_id FROM song_album_relation WHERE ${whereClause} AND album_id NOT IN ( SELECT album_id FROM album )
- ${args.order ? `ORDER BY album_id ${args.order}` : ''}
- ${args.limit ? `LIMIT ${args.limit}` : ''}
- `;
- console.log(sql);
- }
-
- var albumIds = await dbUtils.query(sql, []);
- albumIds = albumIds.map(item => item.album_id);
- albumIds = Array.from(new Set(albumIds));
+ let albumIds = await dataManager.album.getIdsToFetch(args, isUpdate);
for (let i = 0; i < albumIds.length; i++) {
await global.checkIsExit();
const albumId = albumIds[i];
@@ -147,7 +126,7 @@ async function fetch({ albumId, debug = false, update = false }) {
let image = //.exec(html)[1];
let songListJSONString = /