update
This commit is contained in:
parent
b4d63489c3
commit
50d3555dd7
@ -31,7 +31,7 @@ CREATE TABLE `album` (
|
|||||||
`company` varchar(100) DEFAULT NULL COMMENT '发行公司',
|
`company` varchar(100) DEFAULT NULL COMMENT '发行公司',
|
||||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||||
`version` tinyint(4) NOT NULL DEFAULT '1' COMMENT '数据记录版本(如果有字段调整则整体+1)',
|
`version` tinyint(4) NOT NULL DEFAULT 1 COMMENT '数据记录版本(如果有字段调整则整体+1)',
|
||||||
PRIMARY KEY (`album_id`)
|
PRIMARY KEY (`album_id`)
|
||||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||||
|
|
||||||
@ -93,7 +93,8 @@ CREATE TABLE `comment_progress` (
|
|||||||
`total` int(10) NOT NULL DEFAULT 0 COMMENT '评论总数',
|
`total` int(10) NOT NULL DEFAULT 0 COMMENT '评论总数',
|
||||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||||
PRIMARY KEY (`song_id`)
|
PRIMARY KEY (`song_id`),
|
||||||
|
INDEX `current_status` (`current_status`)
|
||||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||||
|
|
||||||
CREATE TABLE `log` (
|
CREATE TABLE `log` (
|
||||||
|
@ -82,7 +82,9 @@ async function fetch({ albumId, debug = false, update = false }) {
|
|||||||
let company = null;
|
let company = null;
|
||||||
if (html.includes(`<p class="intr"><b>发行公司:`)) {
|
if (html.includes(`<p class="intr"><b>发行公司:`)) {
|
||||||
try {
|
try {
|
||||||
company = /<p class="intr"><b>发行公司:<\/b>\n(.*?)\n<\/p>/.exec(html)[1];
|
// 注意 <b>发行公司:</b> 后面有可能只有一个换行 而没有内容
|
||||||
|
company = /<p class="intr"><b>发行公司:<\/b>\n(.*?)\n?<\/p>/.exec(html)[1];
|
||||||
|
company = company.trim();
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
// 解析出错
|
// 解析出错
|
||||||
await dbUtils.query('INSERT INTO log (`id`, `name`, `msg`) VALUES (?, ?, ?)', [albumId, 'album_fetch', `company 正则失败\n${e.message}`]);
|
await dbUtils.query('INSERT INTO log (`id`, `name`, `msg`) VALUES (?, ?, ?)', [albumId, 'album_fetch', `company 正则失败\n${e.message}`]);
|
||||||
|
@ -22,7 +22,8 @@ async function fetchAll() {
|
|||||||
// 首先查询有无正在爬取中的记录
|
// 首先查询有无正在爬取中的记录
|
||||||
var songIds = await dbUtils.query(`
|
var songIds = await dbUtils.query(`
|
||||||
-- 本机
|
-- 本机
|
||||||
SELECT song_id FROM comment_progress WHERE current_status != 2 AND song_id < 30000000 ORDER BY current_status DESC
|
SELECT song_id FROM comment_progress WHERE current_status != 2 AND song_id <= 30000000 LIMIT 1000
|
||||||
|
-- SELECT song_id FROM comment_progress WHERE current_status != 2 AND song_id < 30000000 ORDER BY current_status DESC
|
||||||
-- 服务器
|
-- 服务器
|
||||||
-- SELECT song_id FROM comment_progress WHERE current_status != 2 AND song_id > 30000000 ORDER BY current_status DESC
|
-- SELECT song_id FROM comment_progress WHERE current_status != 2 AND song_id > 30000000 ORDER BY current_status DESC
|
||||||
`, []);
|
`, []);
|
||||||
@ -75,6 +76,7 @@ async function fetch({ songId, debug = false }) {
|
|||||||
currentStatus: item.current_status,
|
currentStatus: item.current_status,
|
||||||
total: item.total,
|
total: item.total,
|
||||||
};
|
};
|
||||||
|
// https://neteasecloudmusicapi-docs.4everland.app/#/?id=%e6%ad%8c%e6%9b%b2%e8%af%84%e8%ae%ba
|
||||||
var queryParams = {
|
var queryParams = {
|
||||||
id: songId,
|
id: songId,
|
||||||
limit: 20,
|
limit: 20,
|
||||||
@ -86,7 +88,7 @@ async function fetch({ songId, debug = false }) {
|
|||||||
let isFinish = false; let pageCount = 0;
|
let isFinish = false; let pageCount = 0;
|
||||||
while (!isFinish) {
|
while (!isFinish) {
|
||||||
await global.checkIsExit();
|
await global.checkIsExit();
|
||||||
console.log(`comment: ${songId}, 页数: ${++pageCount}`);
|
console.log(`comment: ${songId}, page: ${++pageCount}`);
|
||||||
|
|
||||||
// 是否是第一页
|
// 是否是第一页
|
||||||
let isFirstPage = progress.currentStatus === 0;
|
let isFirstPage = progress.currentStatus === 0;
|
||||||
@ -116,51 +118,64 @@ async function fetch({ songId, debug = false }) {
|
|||||||
// console.log(commentInfoList);
|
// console.log(commentInfoList);
|
||||||
// console.log(userInfoList);
|
// console.log(userInfoList);
|
||||||
|
|
||||||
|
let promiseList = [];
|
||||||
for (let commentInfo of commentInfoList) {
|
for (let commentInfo of commentInfoList) {
|
||||||
let result = await dbUtils.query(`
|
let promise = new Promise(async function (resolve, reject) {
|
||||||
INSERT INTO comment ( comment_id, parent_comment_id, user_id, song_id, content, time, like_count, comment_type ) VALUES ?
|
let result = await dbUtils.query(`
|
||||||
ON DUPLICATE KEY UPDATE content = ?, like_count = ?, comment_type = GREATEST(comment_type, ?), modify_time = CURRENT_TIMESTAMP
|
INSERT INTO comment ( comment_id, parent_comment_id, user_id, song_id, content, time, like_count, comment_type ) VALUES ?
|
||||||
`, [
|
ON DUPLICATE KEY UPDATE content = ?, like_count = ?, comment_type = GREATEST(comment_type, ?), modify_time = CURRENT_TIMESTAMP
|
||||||
[[
|
`, [
|
||||||
commentInfo.comment_id,
|
[[
|
||||||
commentInfo.parent_comment_id,
|
commentInfo.comment_id,
|
||||||
commentInfo.user_id,
|
commentInfo.parent_comment_id,
|
||||||
commentInfo.song_id,
|
commentInfo.user_id,
|
||||||
|
commentInfo.song_id,
|
||||||
|
commentInfo.content,
|
||||||
|
commentInfo.time,
|
||||||
|
commentInfo.like_count,
|
||||||
|
commentInfo.comment_type
|
||||||
|
]],
|
||||||
commentInfo.content,
|
commentInfo.content,
|
||||||
commentInfo.time,
|
|
||||||
commentInfo.like_count,
|
commentInfo.like_count,
|
||||||
commentInfo.comment_type
|
commentInfo.comment_type
|
||||||
]],
|
]);
|
||||||
commentInfo.content,
|
// console.log(result);
|
||||||
commentInfo.like_count,
|
// console.log("INSERT comment");
|
||||||
commentInfo.comment_type
|
resolve();
|
||||||
]);
|
});
|
||||||
// console.log(result);
|
promiseList.push(promise);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (let userInfo of userInfoList) {
|
for (let userInfo of userInfoList) {
|
||||||
let result = await dbUtils.query(`
|
let promise = new Promise(async function (resolve, reject) {
|
||||||
INSERT INTO user ( user_id, user_type, nickname, avatar_url ) VALUES ?
|
let result = await dbUtils.query(`
|
||||||
ON DUPLICATE KEY UPDATE user_type = ?, nickname = ?, avatar_url = ?, modify_time = CURRENT_TIMESTAMP
|
INSERT INTO user ( user_id, user_type, nickname, avatar_url ) VALUES ?
|
||||||
`, [
|
ON DUPLICATE KEY UPDATE user_type = ?, nickname = ?, avatar_url = ?, modify_time = CURRENT_TIMESTAMP
|
||||||
[[
|
`, [
|
||||||
userInfo.user_id,
|
[[
|
||||||
|
userInfo.user_id,
|
||||||
|
userInfo.user_type,
|
||||||
|
userInfo.nickname,
|
||||||
|
userInfo.avatar_url,
|
||||||
|
]],
|
||||||
userInfo.user_type,
|
userInfo.user_type,
|
||||||
userInfo.nickname,
|
userInfo.nickname,
|
||||||
userInfo.avatar_url,
|
userInfo.avatar_url
|
||||||
]],
|
]);
|
||||||
userInfo.user_type,
|
// console.log(result);
|
||||||
userInfo.nickname,
|
// console.log("INSERT user");
|
||||||
userInfo.avatar_url
|
resolve();
|
||||||
]);
|
});
|
||||||
// console.log(result);
|
promiseList.push(promise);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
await Promise.all(promiseList);
|
||||||
|
// console.log("INSERT finished comment and user finished");
|
||||||
|
|
||||||
// console.log(commentResult.body.more, comments.length, commentInfoList.length);
|
// console.log(commentResult.body.more, comments.length, commentInfoList.length);
|
||||||
|
|
||||||
// 判断是否还有下一页
|
// 判断是否还有下一页
|
||||||
if (commentResult.body.more && comments.length > 0) {
|
if (commentResult.body.more && comments.length > 0) {
|
||||||
// console.log("还没结束");
|
|
||||||
// 更新 progress
|
// 更新 progress
|
||||||
progress.currentTime = comments[comments.length - 1].time;
|
progress.currentTime = comments[comments.length - 1].time;
|
||||||
if (progress.maxTime == progress.minTime) { // minTime = maxTime 代表这是本轮爬取的第一次
|
if (progress.maxTime == progress.minTime) { // minTime = maxTime 代表这是本轮爬取的第一次
|
||||||
@ -174,19 +189,22 @@ async function fetch({ songId, debug = false }) {
|
|||||||
isFinish = true;
|
isFinish = true;
|
||||||
console.log(`comment: ${songId} 结束了`);
|
console.log(`comment: ${songId} 结束了`);
|
||||||
progress.currentStatus = 2; // 0-等待爬取/增量爬取 1-爬取中 2-完成
|
progress.currentStatus = 2; // 0-等待爬取/增量爬取 1-爬取中 2-完成
|
||||||
|
if (progress.maxTime == 0) { // 第一次爬取 且 没有分页的情况
|
||||||
|
progress.maxTime = comments[0]?.time || 0;
|
||||||
|
}
|
||||||
progress.minTime = progress.maxTime; // minTime = maxTime 代表这一轮爬取完成了
|
progress.minTime = progress.maxTime; // minTime = maxTime 代表这一轮爬取完成了
|
||||||
progress.currentTime = progress.maxTime; // 可有可无
|
progress.currentTime = progress.maxTime; // 可有可无
|
||||||
}
|
}
|
||||||
|
|
||||||
// progress更新到数据库中
|
// progress更新到数据库中
|
||||||
|
await dbUtils.query('UPDATE comment_progress SET ? WHERE song_id = ? LIMIT 1', [{
|
||||||
await dbUtils.query('UPDATE comment_progress SET ? WHERE song_id = ? LIMIT 1',[ {
|
|
||||||
max_time: progress.maxTime,
|
max_time: progress.maxTime,
|
||||||
min_time: progress.minTime,
|
min_time: progress.minTime,
|
||||||
current_time: progress.currentTime,
|
current_time: progress.currentTime,
|
||||||
current_status: progress.currentStatus,
|
current_status: progress.currentStatus,
|
||||||
total: progress.total,
|
total: progress.total,
|
||||||
}, songId]);
|
}, songId]);
|
||||||
|
// console.log("UPDATE comment_progress");
|
||||||
// await sleepUtils.sleep(global.sleepTime);
|
// await sleepUtils.sleep(global.sleepTime);
|
||||||
}
|
}
|
||||||
// return commentInfo;
|
// return commentInfo;
|
||||||
|
Loading…
Reference in New Issue
Block a user