update
This commit is contained in:
parent
1939398579
commit
326201fb2f
@ -56,6 +56,7 @@ async function main() {
|
|||||||
await albumInfoUtils.fetchAll({});
|
await albumInfoUtils.fetchAll({});
|
||||||
await artistInfoUtils.fetchAll();
|
await artistInfoUtils.fetchAll();
|
||||||
await lyricInfoUtils.fetchAll();
|
await lyricInfoUtils.fetchAll();
|
||||||
|
await commentUtils.fetchAll();
|
||||||
await sleepUtils.sleep(2000);
|
await sleepUtils.sleep(2000);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -61,8 +61,8 @@ CREATE TABLE `lyric` (
|
|||||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||||
|
|
||||||
CREATE TABLE `user` (
|
CREATE TABLE `user` (
|
||||||
`user_id` int(10) unsigned NOT NULL COMMENT '用户id',
|
`user_id` bigint(20) unsigned NOT NULL COMMENT '用户id',
|
||||||
`user_type` tinyint(4) unsigned NOT NULL COMMENT '用户类型',
|
`user_type` varchar(50) NOT NULL COMMENT '用户类型',
|
||||||
`nickname` varchar(200) NOT NULL COMMENT '用户昵称',
|
`nickname` varchar(200) NOT NULL COMMENT '用户昵称',
|
||||||
`avatar_url` varchar(200) NOT NULL COMMENT '用户头像 http://p1.music.126.net/ 后面的部分',
|
`avatar_url` varchar(200) NOT NULL COMMENT '用户头像 http://p1.music.126.net/ 后面的部分',
|
||||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||||
@ -72,8 +72,8 @@ CREATE TABLE `user` (
|
|||||||
|
|
||||||
CREATE TABLE `comment` (
|
CREATE TABLE `comment` (
|
||||||
`comment_id` bigint(20) unsigned NOT NULL COMMENT '评论id',
|
`comment_id` bigint(20) unsigned NOT NULL COMMENT '评论id',
|
||||||
`parent_comment_id` int(10) unsigned NOT NULL COMMENT '父评论id',
|
`parent_comment_id` bigint(20) unsigned NOT NULL COMMENT '父评论id',
|
||||||
`user_id` int(10) unsigned NOT NULL COMMENT '用户id',
|
`user_id` bigint(20) unsigned NOT NULL COMMENT '用户id',
|
||||||
`song_id` int(10) unsigned NOT NULL COMMENT '歌曲id',
|
`song_id` int(10) unsigned NOT NULL COMMENT '歌曲id',
|
||||||
`content` text NOT NULL COMMENT '评论内容',
|
`content` text NOT NULL COMMENT '评论内容',
|
||||||
`time` varchar(50) NOT NULL DEFAULT '' COMMENT '评论时间',
|
`time` varchar(50) NOT NULL DEFAULT '' COMMENT '评论时间',
|
||||||
@ -86,9 +86,9 @@ CREATE TABLE `comment` (
|
|||||||
|
|
||||||
CREATE TABLE `comment_progress` (
|
CREATE TABLE `comment_progress` (
|
||||||
`song_id` int(10) unsigned NOT NULL COMMENT '歌曲id',
|
`song_id` int(10) unsigned NOT NULL COMMENT '歌曲id',
|
||||||
`max_time` int(10) NOT NULL DEFAULT 0 COMMENT '开始爬取/开始增量爬取的时候 最新一条评论的时间',
|
`max_time` bigint(20) NOT NULL DEFAULT 0 COMMENT '开始爬取/开始增量爬取的时候 最新一条评论的时间',
|
||||||
`min_time` int(10) NOT NULL DEFAULT 0 COMMENT '上一次爬取时最后一条评论的时间 第一次爬取时为0',
|
`min_time` bigint(20) NOT NULL DEFAULT 0 COMMENT '上一次爬取时最后一条评论的时间 第一次爬取时为0',
|
||||||
`current_time` int(10) NOT NULL DEFAULT 0 COMMENT '本次爬取/增量时,最早的一条评论时间',
|
`current_time` bigint(20) NOT NULL DEFAULT 0 COMMENT '本次爬取/增量时,最早的一条评论时间',
|
||||||
`current_status` tinyint(4) unsigned NOT NULL DEFAULT 0 COMMENT '爬取进度 0-等待爬取/增量爬取 1-爬取中 2-完成',
|
`current_status` tinyint(4) unsigned NOT NULL DEFAULT 0 COMMENT '爬取进度 0-等待爬取/增量爬取 1-爬取中 2-完成',
|
||||||
`total` int(10) NOT NULL DEFAULT 0 COMMENT '评论总数',
|
`total` int(10) NOT NULL DEFAULT 0 COMMENT '评论总数',
|
||||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||||
|
@ -18,7 +18,7 @@ async function fetchAll() {
|
|||||||
|
|
||||||
// 首先查询有无正在爬取中的记录
|
// 首先查询有无正在爬取中的记录
|
||||||
var songIds = await dbUtils.query(`
|
var songIds = await dbUtils.query(`
|
||||||
SELECT song_id FROM comment_progress WHERE current_status != 2 LIMIT 1
|
SELECT song_id FROM comment_progress WHERE current_status != 2
|
||||||
`, []);
|
`, []);
|
||||||
songIds = songIds.map(item => item.song_id);
|
songIds = songIds.map(item => item.song_id);
|
||||||
|
|
||||||
@ -74,27 +74,119 @@ async function fetch({ songId, debug = false }) {
|
|||||||
limit: 20,
|
limit: 20,
|
||||||
// before: undefined,
|
// before: undefined,
|
||||||
};
|
};
|
||||||
console.log(progress);
|
if (progress.currentTime != 0)
|
||||||
|
queryParams.before = progress.currentTime;
|
||||||
|
|
||||||
let isFinish = false;
|
let isFinish = false; let pageCount = 0;
|
||||||
while (!isFinish) {
|
while (!isFinish) {
|
||||||
|
await global.checkIsExit();
|
||||||
|
console.log(`comment: ${songId}, 页数: ${++pageCount}`);
|
||||||
|
|
||||||
// 是否是第一页
|
// 是否是第一页
|
||||||
let isFirstPage = progress.currentStatus === 0;
|
let isFirstPage = progress.currentStatus === 0;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
// console.log(progress, queryParams);
|
||||||
var commentResult = await comment_music(queryParams);
|
var commentResult = await comment_music(queryParams);
|
||||||
fs.writeFileSync(path.join(__dirname, "../../temp", `comment-${songId}.json`), JSON.stringify(commentResult));
|
// fs.writeFileSync(path.join(__dirname, "../../temp", `comment-${songId}-${pageCount}.json`), JSON.stringify(commentResult));
|
||||||
} catch (errors) {
|
} catch (errors) {
|
||||||
console.error(errors);
|
console.error(errors);
|
||||||
await sleepUtils.sleep(1000);
|
await sleepUtils.sleep(1000);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
var topComments = commentResult.body.hotComments || [];
|
var topComments = commentResult.body.topComments || [];
|
||||||
var hotComments = commentResult.body.hotComments || [];
|
var hotComments = commentResult.body.hotComments || [];
|
||||||
var comments = commentResult.body.hotComments || [];
|
var comments = commentResult.body.comments || [];
|
||||||
|
|
||||||
function getCommitInfoForInsert(comment, commentType) {
|
var commentInfoList = [
|
||||||
|
...topComments.map(comment => getCommitInfoForInsert(songId, comment, 2)),
|
||||||
|
...hotComments.map(comment => getCommitInfoForInsert(songId, comment, 1)),
|
||||||
|
...comments.map(comment => getCommitInfoForInsert(songId, comment, 0))
|
||||||
|
];
|
||||||
|
var userInfoList = [...topComments, ...hotComments, ...comments]
|
||||||
|
.map(comment => comment.user).filter(user => !!user).map(getUserInfoForInsert);
|
||||||
|
|
||||||
|
// console.log(commentInfoList);
|
||||||
|
// console.log(userInfoList);
|
||||||
|
|
||||||
|
for (let commentInfo of commentInfoList) {
|
||||||
|
let result = await dbUtils.query(`
|
||||||
|
INSERT INTO comment ( comment_id, parent_comment_id, user_id, song_id, content, time, like_count, comment_type ) VALUES ?
|
||||||
|
ON DUPLICATE KEY UPDATE content = ?, like_count = ?, comment_type = GREATEST(comment_type, ?), modify_time = CURRENT_TIMESTAMP
|
||||||
|
`, [
|
||||||
|
[[
|
||||||
|
commentInfo.comment_id,
|
||||||
|
commentInfo.parent_comment_id,
|
||||||
|
commentInfo.user_id,
|
||||||
|
commentInfo.song_id,
|
||||||
|
commentInfo.content,
|
||||||
|
commentInfo.time,
|
||||||
|
commentInfo.like_count,
|
||||||
|
commentInfo.comment_type
|
||||||
|
]],
|
||||||
|
commentInfo.content,
|
||||||
|
commentInfo.like_count,
|
||||||
|
commentInfo.comment_type
|
||||||
|
]);
|
||||||
|
// console.log(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (let userInfo of userInfoList) {
|
||||||
|
let result = await dbUtils.query(`
|
||||||
|
INSERT INTO user ( user_id, user_type, nickname, avatar_url ) VALUES ?
|
||||||
|
ON DUPLICATE KEY UPDATE user_type = ?, nickname = ?, avatar_url = ?, modify_time = CURRENT_TIMESTAMP
|
||||||
|
`, [
|
||||||
|
[[
|
||||||
|
userInfo.user_id,
|
||||||
|
userInfo.user_type,
|
||||||
|
userInfo.nickname,
|
||||||
|
userInfo.avatar_url,
|
||||||
|
]],
|
||||||
|
userInfo.user_type,
|
||||||
|
userInfo.nickname,
|
||||||
|
userInfo.avatar_url
|
||||||
|
]);
|
||||||
|
// console.log(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
// console.log(commentResult.body.more, comments.length, commentInfoList.length);
|
||||||
|
|
||||||
|
// 判断是否还有下一页
|
||||||
|
if (commentResult.body.more && comments.length > 0) {
|
||||||
|
// console.log("还没结束");
|
||||||
|
// 更新 progress
|
||||||
|
progress.currentTime = comments[comments.length - 1].time;
|
||||||
|
if (progress.maxTime == progress.minTime) { // minTime = maxTime 代表这是本轮爬取的第一次
|
||||||
|
progress.maxTime = comments[0].time;
|
||||||
|
}
|
||||||
|
progress.currentStatus = 1; // 0-等待爬取/增量爬取 1-爬取中 2-完成
|
||||||
|
// 更新 queryParams
|
||||||
|
queryParams.before = progress.currentTime;
|
||||||
|
progress.total = commentResult.body.total;
|
||||||
|
} else {
|
||||||
|
isFinish = true;
|
||||||
|
console.log(`comment: ${songId} 结束了`);
|
||||||
|
progress.currentStatus = 2; // 0-等待爬取/增量爬取 1-爬取中 2-完成
|
||||||
|
progress.minTime = progress.maxTime; // minTime = maxTime 代表这一轮爬取完成了
|
||||||
|
progress.currentTime = progress.maxTime; // 可有可无
|
||||||
|
}
|
||||||
|
|
||||||
|
// progress更新到数据库中
|
||||||
|
|
||||||
|
await dbUtils.query('UPDATE comment_progress SET ? WHERE song_id = ? LIMIT 1',[ {
|
||||||
|
max_time: progress.maxTime,
|
||||||
|
min_time: progress.minTime,
|
||||||
|
current_time: progress.currentTime,
|
||||||
|
current_status: progress.currentStatus,
|
||||||
|
total: progress.total,
|
||||||
|
}, songId]);
|
||||||
|
await sleepUtils.sleep(global.sleepTime);
|
||||||
|
}
|
||||||
|
// return commentInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getCommitInfoForInsert(songId, comment, commentType) {
|
||||||
return {
|
return {
|
||||||
comment_id: comment.commentId,
|
comment_id: comment.commentId,
|
||||||
parent_comment_id: comment.parentCommentId,
|
parent_comment_id: comment.parentCommentId,
|
||||||
@ -106,8 +198,8 @@ async function fetch({ songId, debug = false }) {
|
|||||||
comment_type: commentType, // 评论类型 0-comments 1-hotComments 2-topComments
|
comment_type: commentType, // 评论类型 0-comments 1-hotComments 2-topComments
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
function getUserInfoForInsert(comment) {
|
|
||||||
const user = comment.user;
|
function getUserInfoForInsert(user) {
|
||||||
var shortAvatarUrlUrl = user.avatarUrl.match(/^http:\/\/p\d+\.music\.126\.net\/(.*?)$/);
|
var shortAvatarUrlUrl = user.avatarUrl.match(/^http:\/\/p\d+\.music\.126\.net\/(.*?)$/);
|
||||||
shortAvatarUrlUrl = shortAvatarUrlUrl ? shortAvatarUrlUrl[1] : user.avatarUrl;
|
shortAvatarUrlUrl = shortAvatarUrlUrl ? shortAvatarUrlUrl[1] : user.avatarUrl;
|
||||||
return {
|
return {
|
||||||
@ -117,64 +209,6 @@ async function fetch({ songId, debug = false }) {
|
|||||||
avatar_url: shortAvatarUrlUrl || user.avatarUrl,
|
avatar_url: shortAvatarUrlUrl || user.avatarUrl,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
var commentInfoList = [
|
|
||||||
...topComments.map(comment => getCommitInfoForInsert(comment, 2)),
|
|
||||||
...hotComments.map(comment => getCommitInfoForInsert(comment, 1)),
|
|
||||||
...comments.map(comment => getCommitInfoForInsert(comment, 0))
|
|
||||||
];
|
|
||||||
var userInfoList = [...topComments, ...hotComments, ...comments]
|
|
||||||
.filter(comment => comment.user).map(getUserInfoForInsert);
|
|
||||||
|
|
||||||
console.log(commentInfoList);
|
|
||||||
// console.log(userInfoList);
|
|
||||||
|
|
||||||
commentInfoList.forEach(async function (commentInfo) {
|
|
||||||
let result = await dbUtils.query(`
|
|
||||||
INSERT INTO comment ( comment_id, parent_comment_id, user_id, song_id, content, time, like_count, comment_type ) VALUES ?
|
|
||||||
ON DUPLICATE KEY UPDATE content = ? , like_count = ? , comment_type = GREATEST(comment_type, ? ), modify_time = CURRENT_TIMESTAMP
|
|
||||||
`, [
|
|
||||||
[
|
|
||||||
[
|
|
||||||
commentInfo.comment_id,
|
|
||||||
commentInfo.parent_comment_id,
|
|
||||||
commentInfo.user_id,
|
|
||||||
commentInfo.song_id,
|
|
||||||
commentInfo.content,
|
|
||||||
commentInfo.time,
|
|
||||||
commentInfo.like_count,
|
|
||||||
commentInfo.comment_type
|
|
||||||
]
|
|
||||||
],
|
|
||||||
commentInfo.content,
|
|
||||||
commentInfo.like_count,
|
|
||||||
commentInfo.comment_type
|
|
||||||
]);
|
|
||||||
console.log(result);
|
|
||||||
});
|
|
||||||
|
|
||||||
// process.exit(0);
|
|
||||||
|
|
||||||
// 判断是否完成
|
|
||||||
// if(){
|
|
||||||
isFinish = true;
|
|
||||||
// }
|
|
||||||
// 更新 queryParams
|
|
||||||
queryParams.before = 1111;
|
|
||||||
// 更新 progress
|
|
||||||
progress.maxTime = 1000;
|
|
||||||
progress.currentTime = 1;
|
|
||||||
|
|
||||||
// progress更新到数据库中
|
|
||||||
|
|
||||||
// // console.log("commentInfo", commentInfo);
|
|
||||||
// dbUtils.query('INSERT IGNORE INTO comment SET ?', {
|
|
||||||
// comment_id: commentInfo.commentId,
|
|
||||||
// comment: commentInfo.comment,
|
|
||||||
// version: commentInfo.version,
|
|
||||||
// });
|
|
||||||
}
|
|
||||||
// return commentInfo;
|
|
||||||
}
|
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
fetchAll: fetchAll,
|
fetchAll: fetchAll,
|
||||||
|
Loading…
Reference in New Issue
Block a user