diff --git a/netease_music/sql/structure.sql b/netease_music/sql/structure.sql index 8f6dcaf..12a7690 100644 --- a/netease_music/sql/structure.sql +++ b/netease_music/sql/structure.sql @@ -31,7 +31,7 @@ CREATE TABLE `album` ( `company` varchar(100) DEFAULT NULL COMMENT '发行公司', `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间', `modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间', - `version` tinyint(4) NOT NULL DEFAULT '1' COMMENT '数据记录版本(如果有字段调整则整体+1)', + `version` tinyint(4) NOT NULL DEFAULT 1 COMMENT '数据记录版本(如果有字段调整则整体+1)', PRIMARY KEY (`album_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; @@ -93,7 +93,8 @@ CREATE TABLE `comment_progress` ( `total` int(10) NOT NULL DEFAULT 0 COMMENT '评论总数', `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间', `modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间', - PRIMARY KEY (`song_id`) + PRIMARY KEY (`song_id`), + INDEX `current_status` (`current_status`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; CREATE TABLE `log` ( diff --git a/netease_music/src/getInfo/albumInfoUtils.js b/netease_music/src/getInfo/albumInfoUtils.js index dcf9d39..96535bc 100644 --- a/netease_music/src/getInfo/albumInfoUtils.js +++ b/netease_music/src/getInfo/albumInfoUtils.js @@ -82,7 +82,9 @@ async function fetch({ albumId, debug = false, update = false }) { let company = null; if (html.includes(`

发行公司:`)) { try { - company = /

发行公司:<\/b>\n(.*?)\n<\/p>/.exec(html)[1]; + // 注意 发行公司: 后面有可能只有一个换行 而没有内容 + company = /

发行公司:<\/b>\n(.*?)\n?<\/p>/.exec(html)[1]; + company = company.trim(); } catch (e) { // 解析出错 await dbUtils.query('INSERT INTO log (`id`, `name`, `msg`) VALUES (?, ?, ?)', [albumId, 'album_fetch', `company 正则失败\n${e.message}`]); diff --git a/netease_music/src/getInfo/commentUtils.js b/netease_music/src/getInfo/commentUtils.js index 7a309ab..ce8d3b5 100644 --- a/netease_music/src/getInfo/commentUtils.js +++ b/netease_music/src/getInfo/commentUtils.js @@ -22,7 +22,8 @@ async function fetchAll() { // 首先查询有无正在爬取中的记录 var songIds = await dbUtils.query(` -- 本机 - SELECT song_id FROM comment_progress WHERE current_status != 2 AND song_id < 30000000 ORDER BY current_status DESC + SELECT song_id FROM comment_progress WHERE current_status != 2 AND song_id <= 30000000 LIMIT 1000 + -- SELECT song_id FROM comment_progress WHERE current_status != 2 AND song_id < 30000000 ORDER BY current_status DESC -- 服务器 -- SELECT song_id FROM comment_progress WHERE current_status != 2 AND song_id > 30000000 ORDER BY current_status DESC `, []); @@ -75,6 +76,7 @@ async function fetch({ songId, debug = false }) { currentStatus: item.current_status, total: item.total, }; + // https://neteasecloudmusicapi-docs.4everland.app/#/?id=%e6%ad%8c%e6%9b%b2%e8%af%84%e8%ae%ba var queryParams = { id: songId, limit: 20, @@ -86,7 +88,7 @@ async function fetch({ songId, debug = false }) { let isFinish = false; let pageCount = 0; while (!isFinish) { await global.checkIsExit(); - console.log(`comment: ${songId}, 页数: ${++pageCount}`); + console.log(`comment: ${songId}, page: ${++pageCount}`); // 是否是第一页 let isFirstPage = progress.currentStatus === 0; @@ -116,51 +118,64 @@ async function fetch({ songId, debug = false }) { // console.log(commentInfoList); // console.log(userInfoList); + let promiseList = []; for (let commentInfo of commentInfoList) { - let result = await dbUtils.query(` - INSERT INTO comment ( comment_id, parent_comment_id, user_id, song_id, content, time, like_count, comment_type ) VALUES ? - ON DUPLICATE KEY UPDATE content = ?, like_count = ?, comment_type = GREATEST(comment_type, ?), modify_time = CURRENT_TIMESTAMP - `, [ - [[ - commentInfo.comment_id, - commentInfo.parent_comment_id, - commentInfo.user_id, - commentInfo.song_id, + let promise = new Promise(async function (resolve, reject) { + let result = await dbUtils.query(` + INSERT INTO comment ( comment_id, parent_comment_id, user_id, song_id, content, time, like_count, comment_type ) VALUES ? + ON DUPLICATE KEY UPDATE content = ?, like_count = ?, comment_type = GREATEST(comment_type, ?), modify_time = CURRENT_TIMESTAMP + `, [ + [[ + commentInfo.comment_id, + commentInfo.parent_comment_id, + commentInfo.user_id, + commentInfo.song_id, + commentInfo.content, + commentInfo.time, + commentInfo.like_count, + commentInfo.comment_type + ]], commentInfo.content, - commentInfo.time, commentInfo.like_count, commentInfo.comment_type - ]], - commentInfo.content, - commentInfo.like_count, - commentInfo.comment_type - ]); - // console.log(result); + ]); + // console.log(result); + // console.log("INSERT comment"); + resolve(); + }); + promiseList.push(promise); } for (let userInfo of userInfoList) { - let result = await dbUtils.query(` - INSERT INTO user ( user_id, user_type, nickname, avatar_url ) VALUES ? - ON DUPLICATE KEY UPDATE user_type = ?, nickname = ?, avatar_url = ?, modify_time = CURRENT_TIMESTAMP - `, [ - [[ - userInfo.user_id, + let promise = new Promise(async function (resolve, reject) { + let result = await dbUtils.query(` + INSERT INTO user ( user_id, user_type, nickname, avatar_url ) VALUES ? + ON DUPLICATE KEY UPDATE user_type = ?, nickname = ?, avatar_url = ?, modify_time = CURRENT_TIMESTAMP + `, [ + [[ + userInfo.user_id, + userInfo.user_type, + userInfo.nickname, + userInfo.avatar_url, + ]], userInfo.user_type, userInfo.nickname, - userInfo.avatar_url, - ]], - userInfo.user_type, - userInfo.nickname, - userInfo.avatar_url - ]); - // console.log(result); + userInfo.avatar_url + ]); + // console.log(result); + // console.log("INSERT user"); + resolve(); + }); + promiseList.push(promise); } + await Promise.all(promiseList); + // console.log("INSERT finished comment and user finished"); + // console.log(commentResult.body.more, comments.length, commentInfoList.length); // 判断是否还有下一页 if (commentResult.body.more && comments.length > 0) { - // console.log("还没结束"); // 更新 progress progress.currentTime = comments[comments.length - 1].time; if (progress.maxTime == progress.minTime) { // minTime = maxTime 代表这是本轮爬取的第一次 @@ -174,19 +189,22 @@ async function fetch({ songId, debug = false }) { isFinish = true; console.log(`comment: ${songId} 结束了`); progress.currentStatus = 2; // 0-等待爬取/增量爬取 1-爬取中 2-完成 + if (progress.maxTime == 0) { // 第一次爬取 且 没有分页的情况 + progress.maxTime = comments[0]?.time || 0; + } progress.minTime = progress.maxTime; // minTime = maxTime 代表这一轮爬取完成了 progress.currentTime = progress.maxTime; // 可有可无 } // progress更新到数据库中 - - await dbUtils.query('UPDATE comment_progress SET ? WHERE song_id = ? LIMIT 1',[ { + await dbUtils.query('UPDATE comment_progress SET ? WHERE song_id = ? LIMIT 1', [{ max_time: progress.maxTime, min_time: progress.minTime, current_time: progress.currentTime, current_status: progress.currentStatus, total: progress.total, }, songId]); + // console.log("UPDATE comment_progress"); // await sleepUtils.sleep(global.sleepTime); } // return commentInfo;