1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee

请求失败重试1次,并且保存请求失败的内容便于后续分析

This commit is contained in:
程序员小墨 2022-07-24 14:30:08 +08:00
parent 70e39f735c
commit 93bfc6c2cf

View File

@ -74,178 +74,190 @@ async function main() {
let now = new Date(requestTimestamp + 8 * 3600 * 1000).toISOString(); let now = new Date(requestTimestamp + 8 * 3600 * 1000).toISOString();
let result = await getApiResult("https://weibo.com/ajax/statuses/hot_band"); let result = await getApiResult("https://weibo.com/ajax/statuses/hot_band");
if (result.ok != 1) {
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "请求成功,但服务器处理失败,正在重试。");
result = await getApiResult("https://weibo.com/ajax/statuses/hot_band");
if (result.ok != 1) {
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "请求成功,但服务器处理失败,保存失败信息。");
// ok 不为 1那么久直接保存便于后续分析不进行后续处理
saveJSON({
now: now,
fileNameSuffix: `origin-error`,
object: result,
compress: true,
uncompress: false
});
return;
}
}
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "请求成功");
// console.log("result", result); // console.log("result", result);
if (result.ok != 1) { /**
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "请求成功,但服务器处理失败。"); * 保存原始数据
} else { */
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "请求成功"); saveJSON({
now: now,
fileNameSuffix: `origin`,
object: result,
compress: true,
uncompress: false
});
/** let data = JSON.parse(JSON.stringify(result.data));
* 保存原始数据
*/
saveJSON({
now: now,
fileNameSuffix: `origin`,
object: result,
compress: true,
uncompress: false
});
let data = JSON.parse(JSON.stringify(result.data));
/** /**
* 过滤掉不需要的数据 * 过滤掉不需要的数据
*/ */
// hotgov // hotgov
delete data.hotgov["mblog"]; delete data.hotgov["mblog"];
// 重复字段只保留一个 // 重复字段只保留一个
delete data.hotgov["note"]; // note word delete data.hotgov["note"]; // note word
delete data.hotgov["small_icon_desc"]; // icon_desc small_icon_desc delete data.hotgov["small_icon_desc"]; // icon_desc small_icon_desc
delete data.hotgov["small_icon_desc_color"]; // icon_desc_color small_icon_desc_color delete data.hotgov["small_icon_desc_color"]; // icon_desc_color small_icon_desc_color
// band_list // band_list
for (let i = 0; i < data.band_list.length; i++) { for (let i = 0; i < data.band_list.length; i++) {
const item = data.band_list[i]; const item = data.band_list[i];
// 过滤广告 // 过滤广告
if (item.is_ad) { if (item.is_ad) {
data.band_list.splice(i, 1); data.band_list.splice(i, 1);
i--; i--;
}
// 过滤空字段
delete item["ad_info"];
// 重复字段只保留一个
delete item["note"]; // note word
delete item["icon_desc"]; delete item["small_icon_desc"]; // label_name icon_desc small_icon_desc
delete item["small_icon_desc_color"]; // icon_desc_color small_icon_desc_color
delete item["flag_desc"]; // flag_desc subject_label 这两个有值的时候相同,没有值的时候,前一个为 undefined后一个为 ""
} }
// 过滤空字段
delete item["ad_info"];
/** // 重复字段只保留一个
* 获取需要的数据进行转换 delete item["note"]; // note word
*/ delete item["icon_desc"]; delete item["small_icon_desc"]; // label_name icon_desc small_icon_desc
let convert = []; delete item["small_icon_desc_color"]; // icon_desc_color small_icon_desc_color
data.band_list.forEach(item => { delete item["flag_desc"]; // flag_desc subject_label 这两个有值的时候相同,没有值的时候,前一个为 undefined后一个为 ""
let detail = "";
let pic_ids = [];
if (item.mblog) { // 有些热搜没有 mblog
var regex = /(<([^>]+)>)/ig
detail = item.mblog.text.replace(regex, "");
if (item.mblog.pics) {
pic_ids = item.mblog.pics.map(pic => `${pic}`);
}
}
convert.push({
// 热搜排行顺序
rank: item.rank,
realpos: item.realpos,
// 热搜信息
word: item.word, // 热搜标题
word_scheme: item.word_scheme, // 热搜话题 "#热搜标题#"
emoticon: item.emoticon, // 热搜小表情,如 "[泪]"
label_name: item.label_name, // 热搜标签,如 "爆" "热" "新" ""
onboard_time: item.onboard_time, // 热搜上线时间,秒级时间戳,如 1658565575
/**
* 热搜数据
*
* 大部分的 num raw_hot 是相同的页面上显示的是 num可能是人工调控的热搜
*
* 两者差值通过观测似乎最大是 1250000
* 例如 唐山打架事件8名违法嫌疑人已到案 这条热搜一开始 delta 首先不断增大最大达到 1250000
* 然后热搜数量增加到 12600000 左右的时候delta 逐渐减小到 1040000 左右
*/
num: item.num,
raw_hot: item.raw_hot,
detla: item.num - item.raw_hot, // 计算值
url: `https://s.weibo.com/weibo?q=${encodeURIComponent(item.word_scheme)}`, // 热搜话题链接
// 分类
category: item.category ? item.category.split(',') : "",
subject_label: item.subject_label,
// 其他
more: {
is_new: item.is_new,
subject_querys: item.subject_querys,
mid: item.mid,
icon_desc_color: item.icon_desc_color,
detail: detail,
},
});
});
saveJSON({
now: now,
fileNameSuffix: `final`,
object: convert,
compress: true,
// uncompress: true,
uncompress: false,
});
/**
* 只统计微博调控信息
*/
let convert2 = [];
let total = 0;
data.band_list.forEach(item => {
total += item.num;
total -= item.raw_hot;
if (item.num - item.raw_hot == 0) return;
convert2.push([
`[${item.realpos}] ${item.word}${item.label_name}`,
`原始:${item.raw_hot} 显示:${item.num} 调控: ${item.num - item.raw_hot}`
]);
});
saveJSON({
now: now,
fileNameSuffix: `regulation`,
object: {
total_delta: total, // 所有调控值之和
data: convert2
},
compress: false,
uncompress: true
});
/**
* 保存预处理后数据
*/
// 过滤掉不需要的数据
// band_list
data.band_list.forEach(function (item) {
delete item["mblog"];
});
saveJSON({
now: now,
fileNameSuffix: `simplify`,
object: data,
compress: true,
// uncompress: true,
// compress: false,
uncompress: false,
});
/**
* 更新最新的
*/
fs.writeFileSync(`${DATA_FOLDER}/latest.json`, JSON.stringify({
update_time: requestTimestamp,
update_time_friendly: now.substring(0, 19).replace(/T/g, " "),
regulation: convert2,
data: convert
}));
} }
/**
* 获取需要的数据进行转换
*/
let convert = [];
data.band_list.forEach(item => {
let detail = "";
let pic_ids = [];
if (item.mblog) { // 有些热搜没有 mblog
var regex = /(<([^>]+)>)/ig
detail = item.mblog.text.replace(regex, "");
if (item.mblog.pics) {
pic_ids = item.mblog.pics.map(pic => `${pic}`);
}
}
convert.push({
// 热搜排行顺序
rank: item.rank,
realpos: item.realpos,
// 热搜信息
word: item.word, // 热搜标题
word_scheme: item.word_scheme, // 热搜话题 "#热搜标题#"
emoticon: item.emoticon, // 热搜小表情,如 "[泪]"
label_name: item.label_name, // 热搜标签,如 "爆" "热" "新" ""
onboard_time: item.onboard_time, // 热搜上线时间,秒级时间戳,如 1658565575
/**
* 热搜数据
*
* 大部分的 num raw_hot 是相同的页面上显示的是 num可能是人工调控的热搜
*
* 两者差值通过观测似乎最大是 1250000
* 例如 唐山打架事件8名违法嫌疑人已到案 这条热搜一开始 delta 首先不断增大最大达到 1250000
* 然后热搜数量增加到 12600000 左右的时候delta 逐渐减小到 1040000 左右
*/
num: item.num,
raw_hot: item.raw_hot,
detla: item.num - item.raw_hot, // 计算值
url: `https://s.weibo.com/weibo?q=${encodeURIComponent(item.word_scheme)}`, // 热搜话题链接
// 分类
category: item.category ? item.category.split(',') : "",
subject_label: item.subject_label,
// 其他
more: {
is_new: item.is_new,
subject_querys: item.subject_querys,
mid: item.mid,
icon_desc_color: item.icon_desc_color,
detail: detail,
},
});
});
saveJSON({
now: now,
fileNameSuffix: `final`,
object: convert,
compress: true,
// uncompress: true,
uncompress: false,
});
/**
* 只统计微博调控信息
*/
let convert2 = [];
let total = 0;
data.band_list.forEach(item => {
total += item.num;
total -= item.raw_hot;
if (item.num - item.raw_hot == 0) return;
convert2.push([
`[${item.realpos}] ${item.word}${item.label_name}`,
`原始:${item.raw_hot} 显示:${item.num} 调控: ${item.num - item.raw_hot}`
]);
});
saveJSON({
now: now,
fileNameSuffix: `regulation`,
object: {
total_delta: total, // 所有调控值之和
data: convert2
},
compress: false,
uncompress: true
});
/**
* 保存预处理后数据
*/
// 过滤掉不需要的数据
// band_list
data.band_list.forEach(function (item) {
delete item["mblog"];
});
saveJSON({
now: now,
fileNameSuffix: `simplify`,
object: data,
compress: true,
// uncompress: true,
// compress: false,
uncompress: false,
});
/**
* 更新最新的
*/
fs.writeFileSync(`${DATA_FOLDER}/latest.json`, JSON.stringify({
update_time: requestTimestamp,
update_time_friendly: now.substring(0, 19).replace(/T/g, " "),
regulation: convert2,
data: convert
}));
} }
exports.main = main; exports.main = main;