From 4753fd55aea38769ff32f0074cec3d7aa6f96548 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A8=8B=E5=BA=8F=E5=91=98=E5=B0=8F=E5=A2=A8?= <2291200076@qq.com> Date: Tue, 25 Oct 2022 16:25:23 +0800 Subject: [PATCH] =?UTF-8?q?=E6=8F=92=E5=85=A5=E6=95=B0=E6=8D=AE=E6=96=B9?= =?UTF-8?q?=E6=B3=95=E6=8A=BD=E7=A6=BB=E4=B8=BA=20dataManager.js?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- netease_music/sql/structure.sql | 25 +++++ netease_music/src/dataManager.js | 103 +++++++++++++++++++ netease_music/src/getInfo/albumInfoUtils.js | 33 +++--- netease_music/src/getInfo/artistInfoUtils.js | 24 ++--- netease_music/src/getInfo/commentUtils.js | 93 ++++------------- netease_music/src/getInfo/lyricInfoUtils.js | 14 +-- netease_music/src/getInfo/playlistUtils.js | 12 +-- netease_music/src/getInfo/songInfoUtils.js | 22 +--- 8 files changed, 185 insertions(+), 141 deletions(-) create mode 100644 netease_music/src/dataManager.js diff --git a/netease_music/sql/structure.sql b/netease_music/sql/structure.sql index a84a7df..59b7712 100644 --- a/netease_music/sql/structure.sql +++ b/netease_music/sql/structure.sql @@ -210,3 +210,28 @@ CREATE TABLE `analysis` ( `modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间', UNIQUE KEY `key` (`key`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; + + +CREATE TABLE `wait_song` ( + `id` int(10) unsigned NOT NULL COMMENT 'id', + `partition` tinyint(4) unsigned NOT NULL COMMENT '分区 0-4', + PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; + +CREATE TABLE `wait_artist` ( + `id` int(10) unsigned NOT NULL COMMENT 'id', + `partition` tinyint(4) unsigned NOT NULL COMMENT '分区 0-4', + PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; + +CREATE TABLE `wait_album` ( + `id` int(10) unsigned NOT NULL COMMENT 'id', + `partition` tinyint(4) unsigned NOT NULL COMMENT '分区 0-4', + PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; + +CREATE TABLE `wait_lyric` ( + `id` int(10) unsigned NOT NULL COMMENT 'id', + `partition` tinyint(4) unsigned NOT NULL COMMENT '分区 0-4', + PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; \ No newline at end of file diff --git a/netease_music/src/dataManager.js b/netease_music/src/dataManager.js new file mode 100644 index 0000000..13d5dde --- /dev/null +++ b/netease_music/src/dataManager.js @@ -0,0 +1,103 @@ +const dbUtils = global.dbUtils; + +module.exports = { + song: { + insertCollection: async (songInfoList) => { + if (songInfoList.length == 0) return; + // image 因为接口没有返回,所以不更新 + return await dbUtils.query(` + INSERT INTO song ( + song_id, title, type, alias, pop, fee, quality, cd, + no, dj_id, s_id, origin_cover_type, pub_time, + no_copyright_rcmd, mv, single, version, data_version + ) VALUES ? ON DUPLICATE KEY UPDATE + title = VALUES(title), type = VALUES(type), alias = VALUES(alias), pop = VALUES(pop), fee = VALUES(fee), quality = VALUES(quality), cd = VALUES(cd), + no = VALUES(no), dj_id = VALUES(dj_id), s_id = VALUES(s_id), origin_cover_type = VALUES(origin_cover_type), pub_time = VALUES(pub_time), + no_copyright_rcmd = VALUES(no_copyright_rcmd), mv = VALUES(mv), single = VALUES(single), version = VALUES(version), data_version = VALUES(data_version) + `, [songInfoList.map(songInfo => [ + songInfo.id, songInfo.title, songInfo.type, songInfo.alias, songInfo.pop, songInfo.fee, songInfo.quality, songInfo.cd, + songInfo.no, songInfo.djId, songInfo.sId, songInfo.originCoverType, songInfo.pubTime, + songInfo.noCopyrightRcmd, songInfo.mv, songInfo.single, songInfo.version, 2 + ])]); + }, + }, + + album: { + insert: async (albumInfo) => { + return await dbUtils.query('INSERT IGNORE INTO album SET ?', albumInfo); + }, + + update: async (albumId, albumInfo) => { + return await dbUtils.query(`UPDATE album SET ? WHERE album_id = ${albumId}`, albumInfo); + } + }, + + artist: { + insert: async (artistInfo) => { + return await dbUtils.query('INSERT IGNORE INTO artist SET ?', artistInfo); + }, + }, + + lyric: { + insert: async (lyricInfo) => { + return await dbUtils.query('INSERT IGNORE INTO lyric SET ?', lyricInfo); + } + }, + + comment: { + insertCollection: async (commentInfoList) => { + if (commentInfoList.length == 0) return; + return await dbUtils.query(` + INSERT INTO comment ( comment_id, parent_comment_id, user_id, song_id, content, time, like_count, comment_type ) VALUES ? + ON DUPLICATE KEY UPDATE content = VALUES(content), like_count = VALUES(like_count), comment_type = GREATEST(comment_type, VALUES(comment_type)), modify_time = CURRENT_TIMESTAMP + `, [commentInfoList]); + } + }, + + comment_progress: { + update: async (commentProgressInfo, songId) => { + return await dbUtils.query('UPDATE comment_progress SET ? WHERE song_id = ? LIMIT 1', [commentProgressInfo, songId]); + }, + }, + + playlist: { + insertCollection: async (playlistInfo) => { + if (playlistInfo.length == 0) return; + return await dbUtils.query(` + INSERT INTO playlist ( ${Object.keys(playlistInfo).map(field => `\`${field}\``).join(",")} ) VALUES ? + ON DUPLICATE KEY UPDATE ${Object.keys(playlistInfo).map(field => `${field}=VALUES(${field})`).join(", ")} + `, [[Object.values(playlistInfo)]]); + } + }, + + user: { + insertCollection: async (userInfoList) => { + if (userInfoList.length == 0) return; + return await dbUtils.query(` + INSERT INTO user ( user_id, user_type, nickname, avatar_url ) VALUES ? + ON DUPLICATE KEY UPDATE user_type = VALUES(user_type), nickname = VALUES(nickname), avatar_url = VALUES(avatar_url), modify_time = CURRENT_TIMESTAMP + `, [userInfoList]); + } + }, + + song_album: { + insertCollection: async (songAlbumRel) => { + if (songAlbumRel.length == 0) return; + return await dbUtils.query('INSERT IGNORE INTO song_album_relation (song_id, album_id) VALUES ?', [songAlbumRel]); + } + }, + + song_artist: { + insertCollection: async (songArtistRel) => { + if (songArtistRel.length == 0) return; + return await dbUtils.query('INSERT IGNORE INTO song_artist_relation (song_id, artist_id) VALUES ?', [songArtistRel]); + } + }, + + song_playlist: { + insertCollection: async (trackIds) => { + if (trackIds.length == 0) return; + return await dbUtils.query('INSERT IGNORE INTO song_playlist_relation (song_id, playlist_id, alg, rcmd_reason) VALUES ?', [trackIds]); + } + }, +}; diff --git a/netease_music/src/getInfo/albumInfoUtils.js b/netease_music/src/getInfo/albumInfoUtils.js index 22ff399..0d9eac9 100644 --- a/netease_music/src/getInfo/albumInfoUtils.js +++ b/netease_music/src/getInfo/albumInfoUtils.js @@ -3,6 +3,7 @@ const path = require('path'); const requestUtils = require('../../../utils/requestUtils'); const sleepUtils = require('../../../utils/sleepUtils'); +const dataManager = require('../dataManager'); const dbUtils = global.dbUtils; @@ -149,32 +150,26 @@ async function fetch({ albumId, debug = false, update = false }) { let songIds = songList.map(song => song.id); let albumInfo = { - albumId: albumId, + album_id: albumId, title: albumInfoDict.title, image: image, description: albumInfoDict.description, - fullDescription: fullDescription, - pubDate: albumInfoDict.pubDate, + full_description: fullDescription, + pub_date: albumInfoDict.pubDate, company: company, - songIds: songIds, + version: 1 }; // console.log("albumInfo", albumInfo); - if (albumId > 0 && songIds.length > 0) { - await dbUtils.query(` - INSERT IGNORE INTO song_album_relation (song_id, album_id) VALUES ? - `, [songIds.map(songId => [Number(songId), albumId])]); + if (albumId > 0) { + let songAlbumRel = songIds.map(songId => [Number(songId), albumId]); + await dataManager.song_album.insertCollection(songAlbumRel); + } + + if (update) { + await dataManager.album.update(albumId, albumInfo); + } else { + await dataManager.album.insert(albumInfo); } - await dbUtils.query(update ? `UPDATE album SET ? WHERE album_id = ${albumId}` : 'INSERT IGNORE INTO album SET ?', { - album_id: albumInfo.albumId, - title: albumInfo.title, - description: albumInfo.description, - full_description: albumInfo.fullDescription, - image: albumInfo.image, - pub_date: albumInfo.pubDate, - company: albumInfo.company, - version: 1 - }); - return albumInfo; } module.exports = { diff --git a/netease_music/src/getInfo/artistInfoUtils.js b/netease_music/src/getInfo/artistInfoUtils.js index 928bdba..fab6d14 100644 --- a/netease_music/src/getInfo/artistInfoUtils.js +++ b/netease_music/src/getInfo/artistInfoUtils.js @@ -3,6 +3,7 @@ const path = require('path'); const requestUtils = require('../../../utils/requestUtils'); const sleepUtils = require('../../../utils/sleepUtils'); +const dataManager = require('../dataManager'); const dbUtils = global.dbUtils; @@ -102,27 +103,20 @@ async function fetch({ artistId, debug = false }) { } let artistInfo = { - artistId: artistId, + artist_id: artistId, title: artistInfoDict.title, image: image, description: artistInfoDict.description, - pubDate: artistInfoDict.pubDate, - songIds: songIds, + pub_date: artistInfoDict.pubDate }; // console.log("artistInfo", artistInfo); - if (artistId > 0 && songIds.length > 0) { - await dbUtils.query(` - INSERT IGNORE INTO song_artist_relation (song_id, artist_id) VALUES ? - `, [songIds.map(songId => [Number(songId), artistId])]); + + if (artistId > 0) { + let songArtistRel = songIds.map(songId => [Number(songId), artistId]); + await dataManager.song_artist.insertCollection(songArtistRel); } - dbUtils.query('INSERT IGNORE INTO artist SET ?', { - artist_id: artistInfo.artistId, - title: artistInfo.title, - description: artistInfo.description, - image: artistInfo.image, - pub_date: artistInfo.pubDate, - }); - return artistInfo; + + await dataManager.artist.insert(artistInfo); } module.exports = { diff --git a/netease_music/src/getInfo/commentUtils.js b/netease_music/src/getInfo/commentUtils.js index 23b8763..9850de9 100644 --- a/netease_music/src/getInfo/commentUtils.js +++ b/netease_music/src/getInfo/commentUtils.js @@ -3,6 +3,7 @@ const path = require('path'); const requestUtils = require('../../../utils/requestUtils'); const sleepUtils = require('../../../utils/sleepUtils'); +const dataManager = require('../dataManager'); const dbUtils = global.dbUtils; @@ -49,22 +50,6 @@ async function fetchAll({ args = {} }) { // 获取歌词详情 async function fetch({ songId, debug = false }) { - // // var url = `https://music.163.com/weapi/comment/resource/comments/get?csrf_token=`; - // var opts = { - // method: "POST", - // url: `https://music.163.com/api/v1/resource/comments/R_SO_4_${songId}`, - // headers: { - // 'content-type': 'application/x-www-form-urlencoded', - // 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.53', - // }, - // form: encrypt.weapi({ - // rid: songId, - // limit: 20, - // offset: 20, // offset的取值为:(评论页数-1)*20 - // before: 1664655762881 - // }) - // }; - // 首先查询有无正在爬取中的记录 var commentProgress = await dbUtils.query(` SELECT * FROM comment_progress WHERE song_id = ? and current_status != 2 LIMIT 1 @@ -123,59 +108,25 @@ async function fetch({ songId, debug = false }) { // console.log(commentInfoList); // console.log(userInfoList); - let promiseList = []; - for (let commentInfo of commentInfoList) { - let promise = new Promise(async function (resolve, reject) { - let result = await dbUtils.query(` - INSERT INTO comment ( comment_id, parent_comment_id, user_id, song_id, content, time, like_count, comment_type ) VALUES ? - ON DUPLICATE KEY UPDATE content = ?, like_count = ?, comment_type = GREATEST(comment_type, ?), modify_time = CURRENT_TIMESTAMP - `, [ - [[ - commentInfo.comment_id, - commentInfo.parent_comment_id, - commentInfo.user_id, - commentInfo.song_id, - commentInfo.content, - commentInfo.time, - commentInfo.like_count, - commentInfo.comment_type - ]], - commentInfo.content, - commentInfo.like_count, - commentInfo.comment_type - ]); - // console.log(result); - // console.log("INSERT comment"); - resolve(); - }); - promiseList.push(promise); - } + await dataManager.comment.insertCollection(commentInfoList.map(commentInfo => [ + commentInfo.comment_id, + commentInfo.parent_comment_id, + commentInfo.user_id, + commentInfo.song_id, + commentInfo.content, + commentInfo.time, + commentInfo.like_count, + commentInfo.comment_type + ])); - for (let userInfo of userInfoList) { - let promise = new Promise(async function (resolve, reject) { - let result = await dbUtils.query(` - INSERT INTO user ( user_id, user_type, nickname, avatar_url ) VALUES ? - ON DUPLICATE KEY UPDATE user_type = ?, nickname = ?, avatar_url = ?, modify_time = CURRENT_TIMESTAMP - `, [ - [[ - userInfo.user_id, - userInfo.user_type, - userInfo.nickname, - userInfo.avatar_url, - ]], - userInfo.user_type, - userInfo.nickname, - userInfo.avatar_url - ]); - // console.log(result); - // console.log("INSERT user"); - resolve(); - }); - promiseList.push(promise); - } + await dataManager.user.insertCollection(userInfoList.map(userInfo => [ + userInfo.user_id, + userInfo.user_type, + userInfo.nickname, + userInfo.avatar_url, + ])); - await Promise.all(promiseList); - // console.log("INSERT finished comment and user finished"); + // console.log("INSERT comment and user finished"); // console.log(commentResult.body.more, comments.length, commentInfoList.length); @@ -202,17 +153,17 @@ async function fetch({ songId, debug = false }) { } // progress更新到数据库中 - await dbUtils.query('UPDATE comment_progress SET ? WHERE song_id = ? LIMIT 1', [{ + let commentProgressInfo = { max_time: progress.maxTime, min_time: progress.minTime, current_time: progress.currentTime, current_status: progress.currentStatus, total: progress.total, - }, songId]); + }; + await dataManager.comment_progress.update(commentProgressInfo, songId); // console.log("UPDATE comment_progress"); - // await sleepUtils.sleep(global.sleepTime); + await sleepUtils.sleep(global.sleepTime); } - // return commentInfo; } function getCommitInfoForInsert(songId, comment, commentType) { diff --git a/netease_music/src/getInfo/lyricInfoUtils.js b/netease_music/src/getInfo/lyricInfoUtils.js index 831bc11..524a39d 100644 --- a/netease_music/src/getInfo/lyricInfoUtils.js +++ b/netease_music/src/getInfo/lyricInfoUtils.js @@ -3,6 +3,7 @@ const path = require('path'); const requestUtils = require('../../../utils/requestUtils'); const sleepUtils = require('../../../utils/sleepUtils'); +const dataManager = require('../dataManager'); const dbUtils = global.dbUtils; @@ -63,8 +64,7 @@ async function fetch({ songId, debug = false }) { if (typeof lyric == "undefined") { // 这首歌爬song的时候还在,但是现在不在了 - // 这里数据丢了不要紧,所以不加await - dbUtils.query('INSERT IGNORE INTO lyric SET ?', { + await dataManager.lyric.insert({ song_id: songId, lyric: '', version: -1, @@ -73,18 +73,12 @@ async function fetch({ songId, debug = false }) { } let lyricInfo = { - songId: songId, + song_id: songId, lyric: lyric.lyric, version: lyric.version, }; // console.log("lyricInfo", lyricInfo); - // 这里数据丢了不要紧,所以不加await - dbUtils.query('INSERT IGNORE INTO lyric SET ?', { - song_id: lyricInfo.songId, - lyric: lyricInfo.lyric, - version: lyricInfo.version, - }); - return lyricInfo; + await dataManager.lyric.insert(lyricInfo); } module.exports = { diff --git a/netease_music/src/getInfo/playlistUtils.js b/netease_music/src/getInfo/playlistUtils.js index e7078bf..19ea188 100644 --- a/netease_music/src/getInfo/playlistUtils.js +++ b/netease_music/src/getInfo/playlistUtils.js @@ -10,6 +10,7 @@ const dbUtils = global.dbUtils; // https://neteasecloudmusicapi-docs.4everland.app/ // https://github.com/Binaryify/NeteaseCloudMusicApi const { playlist_catlist, playlist_hot, playlist_detail } = require('NeteaseCloudMusicApi'); +const dataManager = require('../dataManager'); async function fetchAll({ args }) { // 睡眠时间设置长一些,不然容易触发500错误 @@ -170,16 +171,11 @@ async function fetch({ playlistId, debug = false }) { if (playlist.bannedTrackIds) { console.log("bannedTrackIds", playlist.bannedTrackIds); process.exit(0); - } + let trackIds = playlist.trackIds.map(track => [track.id, playlist.id, track.alg, track.rcmdReason]); - if (trackIds.length > 0) - await dbUtils.query('INSERT IGNORE INTO song_playlist_relation (song_id, playlist_id, alg, rcmd_reason) VALUES ?', [trackIds]); - await dbUtils.query(` - INSERT INTO playlist ( ${Object.keys(playlistInfo).map(field => `\`${field}\``).join(",")} ) VALUES ? - ON DUPLICATE KEY UPDATE ${Object.keys(playlistInfo).map(field => `${field}=VALUES(${field})`).join(", ")} - `, [[Object.values(playlistInfo)]]); - return playlistInfo; + await dataManager.song_playlist.insertCollection(trackIds); + await dataManager.playlist.insertCollection(playlistInfo); } module.exports = { diff --git a/netease_music/src/getInfo/songInfoUtils.js b/netease_music/src/getInfo/songInfoUtils.js index de950d5..85b3796 100644 --- a/netease_music/src/getInfo/songInfoUtils.js +++ b/netease_music/src/getInfo/songInfoUtils.js @@ -2,6 +2,7 @@ const fs = require('fs'); const path = require('path'); const sleepUtils = require('../../../utils/sleepUtils'); +const dataManager = require('../dataManager'); const dbUtils = global.dbUtils; @@ -106,24 +107,9 @@ async function fetch({ songIdArray, debug = false }) { if (songInfoList.length == 0) return; console.log("插入数据库"); - if (songAlbumRel.length > 0) - await dbUtils.query('INSERT IGNORE INTO song_album_relation (song_id, album_id) VALUES ?', [songAlbumRel]); - await dbUtils.query('INSERT IGNORE INTO song_artist_relation (song_id, artist_id) VALUES ?', [songArtistRel]); - await dbUtils.query(` - INSERT INTO song ( - song_id, title, type, alias, pop, fee, quality, cd, - no, dj_id, s_id, origin_cover_type, pub_time, - no_copyright_rcmd, mv, single, version, data_version - ) VALUES ? ON DUPLICATE KEY UPDATE - title = VALUES(title), type = VALUES(type), alias = VALUES(alias), pop = VALUES(pop), fee = VALUES(fee), quality = VALUES(quality), cd = VALUES(cd), - no = VALUES(no), dj_id = VALUES(dj_id), s_id = VALUES(s_id), origin_cover_type = VALUES(origin_cover_type), pub_time = VALUES(pub_time), - no_copyright_rcmd = VALUES(no_copyright_rcmd), mv = VALUES(mv), single = VALUES(single), version = VALUES(version), data_version = VALUES(data_version) - `, [songInfoList.map(songInfo => [ - songInfo.id, songInfo.title, songInfo.type, songInfo.alias, songInfo.pop, songInfo.fee, songInfo.quality, songInfo.cd, - songInfo.no, songInfo.djId, songInfo.sId, songInfo.originCoverType, songInfo.pubTime, - songInfo.noCopyrightRcmd, songInfo.mv, songInfo.single, songInfo.version, 2 - ])]); - // image 因为接口没有返回,所以不更新 + await dataManager.song_album.insertCollection(songAlbumRel); + await dataManager.song_artist.insertCollection(songArtistRel); + await dataManager.song.insertCollection(songInfoList); // image 因为接口没有返回,所以不更新 } module.exports = {