update
This commit is contained in:
182
netease_music/src/getInfo/commentUtils.js
Normal file
182
netease_music/src/getInfo/commentUtils.js
Normal file
@@ -0,0 +1,182 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const requestUtils = require('../../../utils/requestUtils');
|
||||
const sleepUtils = require('../../../utils/sleepUtils');
|
||||
|
||||
const dbUtils = global.dbUtils;
|
||||
|
||||
const { comment_music } = require('NeteaseCloudMusicApi');
|
||||
|
||||
async function fetchAll() {
|
||||
console.log("start fetching comment ...")
|
||||
// 首先将需要爬取的song_id导入comment_progress表
|
||||
await dbUtils.query(`
|
||||
INSERT INTO comment_progress ( song_id )
|
||||
SELECT DISTINCT song_id FROM song WHERE song_id NOT IN ( SELECT song_id FROM comment_progress )
|
||||
`, []);
|
||||
|
||||
// 首先查询有无正在爬取中的记录
|
||||
var songIds = await dbUtils.query(`
|
||||
SELECT song_id FROM comment_progress WHERE current_status != 2 LIMIT 1
|
||||
`, []);
|
||||
songIds = songIds.map(item => item.song_id);
|
||||
|
||||
for (let i = 0; i < songIds.length; i++) {
|
||||
await global.checkIsExit();
|
||||
const songId = songIds[i];
|
||||
console.log(`${i + 1}/${songIds.length} | comment: ${songId}`);
|
||||
try {
|
||||
await fetch({ songId: songId });
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
}
|
||||
await sleepUtils.sleep(global.sleepTime);
|
||||
}
|
||||
}
|
||||
|
||||
// 获取歌词详情
|
||||
async function fetch({ songId, debug = false }) {
|
||||
// // var url = `https://music.163.com/weapi/comment/resource/comments/get?csrf_token=`;
|
||||
// var opts = {
|
||||
// method: "POST",
|
||||
// url: `https://music.163.com/api/v1/resource/comments/R_SO_4_${songId}`,
|
||||
// headers: {
|
||||
// 'content-type': 'application/x-www-form-urlencoded',
|
||||
// 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.53',
|
||||
// },
|
||||
// form: encrypt.weapi({
|
||||
// rid: songId,
|
||||
// limit: 20,
|
||||
// offset: 20, // offset的取值为:(评论页数-1)*20
|
||||
// before: 1664655762881
|
||||
// })
|
||||
// };
|
||||
|
||||
// 首先查询有无正在爬取中的记录
|
||||
var commentProgress = await dbUtils.query(`
|
||||
SELECT * FROM comment_progress WHERE song_id = ? and current_status != 2 LIMIT 1
|
||||
`, [songId]);
|
||||
if (commentProgress.length == 0) {
|
||||
console.log('No commentProgress found, song_id:', songId);
|
||||
return;
|
||||
}
|
||||
var item = commentProgress[0];
|
||||
var progress = {
|
||||
maxTime: item.max_time,
|
||||
minTime: item.min_time,
|
||||
currentTime: item.current_time,
|
||||
currentStatus: item.current_status,
|
||||
total: item.total,
|
||||
};
|
||||
var queryParams = {
|
||||
id: songId,
|
||||
limit: 20,
|
||||
// before: undefined,
|
||||
};
|
||||
console.log(progress);
|
||||
|
||||
let isFinish = false;
|
||||
while (!isFinish) {
|
||||
// 是否是第一页
|
||||
let isFirstPage = progress.currentStatus === 0;
|
||||
|
||||
try {
|
||||
var commentResult = await comment_music(queryParams);
|
||||
fs.writeFileSync(path.join(__dirname, "../../temp", `comment-${songId}.json`), JSON.stringify(commentResult));
|
||||
} catch (errors) {
|
||||
console.error(errors);
|
||||
await sleepUtils.sleep(1000);
|
||||
continue;
|
||||
}
|
||||
|
||||
var topComments = commentResult.body.hotComments || [];
|
||||
var hotComments = commentResult.body.hotComments || [];
|
||||
var comments = commentResult.body.hotComments || [];
|
||||
|
||||
function getCommitInfoForInsert(comment, commentType) {
|
||||
return {
|
||||
comment_id: comment.commentId,
|
||||
parent_comment_id: comment.parentCommentId,
|
||||
user_id: comment.user?.userId,
|
||||
song_id: songId,
|
||||
content: comment.content,
|
||||
time: comment.time,
|
||||
like_count: comment.likedCount,
|
||||
comment_type: commentType, // 评论类型 0-comments 1-hotComments 2-topComments
|
||||
}
|
||||
}
|
||||
function getUserInfoForInsert(comment) {
|
||||
const user = comment.user;
|
||||
var shortAvatarUrlUrl = user.avatarUrl.match(/^http:\/\/p\d+\.music\.126\.net\/(.*?)$/);
|
||||
shortAvatarUrlUrl = shortAvatarUrlUrl ? shortAvatarUrlUrl[1] : user.avatarUrl;
|
||||
return {
|
||||
user_id: user.userId,
|
||||
user_type: user.userType,
|
||||
nickname: user.nickname,
|
||||
avatar_url: shortAvatarUrlUrl || user.avatarUrl,
|
||||
}
|
||||
}
|
||||
var commentInfoList = [
|
||||
...topComments.map(comment => getCommitInfoForInsert(comment, 2)),
|
||||
...hotComments.map(comment => getCommitInfoForInsert(comment, 1)),
|
||||
...comments.map(comment => getCommitInfoForInsert(comment, 0))
|
||||
];
|
||||
var userInfoList = [...topComments, ...hotComments, ...comments]
|
||||
.filter(comment => comment.user).map(getUserInfoForInsert);
|
||||
|
||||
console.log(commentInfoList);
|
||||
// console.log(userInfoList);
|
||||
|
||||
commentInfoList.forEach(async function (commentInfo) {
|
||||
let result = await dbUtils.query(`
|
||||
INSERT INTO comment ( comment_id, parent_comment_id, user_id, song_id, content, time, like_count, comment_type ) VALUES ?
|
||||
ON DUPLICATE KEY UPDATE content = ? , like_count = ? , comment_type = GREATEST(comment_type, ? ), modify_time = CURRENT_TIMESTAMP
|
||||
`, [
|
||||
[
|
||||
[
|
||||
commentInfo.comment_id,
|
||||
commentInfo.parent_comment_id,
|
||||
commentInfo.user_id,
|
||||
commentInfo.song_id,
|
||||
commentInfo.content,
|
||||
commentInfo.time,
|
||||
commentInfo.like_count,
|
||||
commentInfo.comment_type
|
||||
]
|
||||
],
|
||||
commentInfo.content,
|
||||
commentInfo.like_count,
|
||||
commentInfo.comment_type
|
||||
]);
|
||||
console.log(result);
|
||||
});
|
||||
|
||||
// process.exit(0);
|
||||
|
||||
// 判断是否完成
|
||||
// if(){
|
||||
isFinish = true;
|
||||
// }
|
||||
// 更新 queryParams
|
||||
queryParams.before = 1111;
|
||||
// 更新 progress
|
||||
progress.maxTime = 1000;
|
||||
progress.currentTime = 1;
|
||||
|
||||
// progress更新到数据库中
|
||||
|
||||
// // console.log("commentInfo", commentInfo);
|
||||
// dbUtils.query('INSERT IGNORE INTO comment SET ?', {
|
||||
// comment_id: commentInfo.commentId,
|
||||
// comment: commentInfo.comment,
|
||||
// version: commentInfo.version,
|
||||
// });
|
||||
}
|
||||
// return commentInfo;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchAll: fetchAll,
|
||||
fetch: fetch,
|
||||
}
|
Reference in New Issue
Block a user