1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee
This commit is contained in:
2022-10-02 17:37:28 +08:00
parent 93db6371d9
commit 1939398579
10 changed files with 3183 additions and 28 deletions

View File

@@ -0,0 +1,182 @@
const fs = require('fs');
const path = require('path');
const requestUtils = require('../../../utils/requestUtils');
const sleepUtils = require('../../../utils/sleepUtils');
const dbUtils = global.dbUtils;
const { comment_music } = require('NeteaseCloudMusicApi');
async function fetchAll() {
console.log("start fetching comment ...")
// 首先将需要爬取的song_id导入comment_progress表
await dbUtils.query(`
INSERT INTO comment_progress ( song_id )
SELECT DISTINCT song_id FROM song WHERE song_id NOT IN ( SELECT song_id FROM comment_progress )
`, []);
// 首先查询有无正在爬取中的记录
var songIds = await dbUtils.query(`
SELECT song_id FROM comment_progress WHERE current_status != 2 LIMIT 1
`, []);
songIds = songIds.map(item => item.song_id);
for (let i = 0; i < songIds.length; i++) {
await global.checkIsExit();
const songId = songIds[i];
console.log(`${i + 1}/${songIds.length} | comment: ${songId}`);
try {
await fetch({ songId: songId });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(global.sleepTime);
}
}
// 获取歌词详情
async function fetch({ songId, debug = false }) {
// // var url = `https://music.163.com/weapi/comment/resource/comments/get?csrf_token=`;
// var opts = {
// method: "POST",
// url: `https://music.163.com/api/v1/resource/comments/R_SO_4_${songId}`,
// headers: {
// 'content-type': 'application/x-www-form-urlencoded',
// 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.53',
// },
// form: encrypt.weapi({
// rid: songId,
// limit: 20,
// offset: 20, // offset的取值为:(评论页数-1)*20
// before: 1664655762881
// })
// };
// 首先查询有无正在爬取中的记录
var commentProgress = await dbUtils.query(`
SELECT * FROM comment_progress WHERE song_id = ? and current_status != 2 LIMIT 1
`, [songId]);
if (commentProgress.length == 0) {
console.log('No commentProgress found, song_id:', songId);
return;
}
var item = commentProgress[0];
var progress = {
maxTime: item.max_time,
minTime: item.min_time,
currentTime: item.current_time,
currentStatus: item.current_status,
total: item.total,
};
var queryParams = {
id: songId,
limit: 20,
// before: undefined,
};
console.log(progress);
let isFinish = false;
while (!isFinish) {
// 是否是第一页
let isFirstPage = progress.currentStatus === 0;
try {
var commentResult = await comment_music(queryParams);
fs.writeFileSync(path.join(__dirname, "../../temp", `comment-${songId}.json`), JSON.stringify(commentResult));
} catch (errors) {
console.error(errors);
await sleepUtils.sleep(1000);
continue;
}
var topComments = commentResult.body.hotComments || [];
var hotComments = commentResult.body.hotComments || [];
var comments = commentResult.body.hotComments || [];
function getCommitInfoForInsert(comment, commentType) {
return {
comment_id: comment.commentId,
parent_comment_id: comment.parentCommentId,
user_id: comment.user?.userId,
song_id: songId,
content: comment.content,
time: comment.time,
like_count: comment.likedCount,
comment_type: commentType, // 评论类型 0-comments 1-hotComments 2-topComments
}
}
function getUserInfoForInsert(comment) {
const user = comment.user;
var shortAvatarUrlUrl = user.avatarUrl.match(/^http:\/\/p\d+\.music\.126\.net\/(.*?)$/);
shortAvatarUrlUrl = shortAvatarUrlUrl ? shortAvatarUrlUrl[1] : user.avatarUrl;
return {
user_id: user.userId,
user_type: user.userType,
nickname: user.nickname,
avatar_url: shortAvatarUrlUrl || user.avatarUrl,
}
}
var commentInfoList = [
...topComments.map(comment => getCommitInfoForInsert(comment, 2)),
...hotComments.map(comment => getCommitInfoForInsert(comment, 1)),
...comments.map(comment => getCommitInfoForInsert(comment, 0))
];
var userInfoList = [...topComments, ...hotComments, ...comments]
.filter(comment => comment.user).map(getUserInfoForInsert);
console.log(commentInfoList);
// console.log(userInfoList);
commentInfoList.forEach(async function (commentInfo) {
let result = await dbUtils.query(`
INSERT INTO comment ( comment_id, parent_comment_id, user_id, song_id, content, time, like_count, comment_type ) VALUES ?
ON DUPLICATE KEY UPDATE content = ? , like_count = ? , comment_type = GREATEST(comment_type, ? ), modify_time = CURRENT_TIMESTAMP
`, [
[
[
commentInfo.comment_id,
commentInfo.parent_comment_id,
commentInfo.user_id,
commentInfo.song_id,
commentInfo.content,
commentInfo.time,
commentInfo.like_count,
commentInfo.comment_type
]
],
commentInfo.content,
commentInfo.like_count,
commentInfo.comment_type
]);
console.log(result);
});
// process.exit(0);
// 判断是否完成
// if(){
isFinish = true;
// }
// 更新 queryParams
queryParams.before = 1111;
// 更新 progress
progress.maxTime = 1000;
progress.currentTime = 1;
// progress更新到数据库中
// // console.log("commentInfo", commentInfo);
// dbUtils.query('INSERT IGNORE INTO comment SET ?', {
// comment_id: commentInfo.commentId,
// comment: commentInfo.comment,
// version: commentInfo.version,
// });
}
// return commentInfo;
}
module.exports = {
fetchAll: fetchAll,
fetch: fetch,
}