From 93bfc6c2cf08fbdedce5d0aa15db4e8970cb550d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A8=8B=E5=BA=8F=E5=91=98=E5=B0=8F=E5=A2=A8?= <2291200076@qq.com> Date: Sun, 24 Jul 2022 14:30:08 +0800 Subject: [PATCH] =?UTF-8?q?=E8=AF=B7=E6=B1=82=E5=A4=B1=E8=B4=A5=E9=87=8D?= =?UTF-8?q?=E8=AF=951=E6=AC=A1=EF=BC=8C=E5=B9=B6=E4=B8=94=E4=BF=9D?= =?UTF-8?q?=E5=AD=98=E8=AF=B7=E6=B1=82=E5=A4=B1=E8=B4=A5=E7=9A=84=E5=86=85?= =?UTF-8?q?=E5=AE=B9=E4=BE=BF=E4=BA=8E=E5=90=8E=E7=BB=AD=E5=88=86=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/get_hotband.js | 334 +++++++++++++++++++++++---------------------- 1 file changed, 173 insertions(+), 161 deletions(-) diff --git a/src/get_hotband.js b/src/get_hotband.js index db1d14d..f3caa97 100644 --- a/src/get_hotband.js +++ b/src/get_hotband.js @@ -74,178 +74,190 @@ async function main() { let now = new Date(requestTimestamp + 8 * 3600 * 1000).toISOString(); let result = await getApiResult("https://weibo.com/ajax/statuses/hot_band"); + if (result.ok != 1) { + console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "请求成功,但服务器处理失败,正在重试。"); + result = await getApiResult("https://weibo.com/ajax/statuses/hot_band"); + if (result.ok != 1) { + console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "请求成功,但服务器处理失败,保存失败信息。"); + // ok 不为 1,那么久直接保存便于后续分析,不进行后续处理 + saveJSON({ + now: now, + fileNameSuffix: `origin-error`, + object: result, + compress: true, + uncompress: false + }); + return; + } + } + + console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "请求成功"); // console.log("result", result); - if (result.ok != 1) { - console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "请求成功,但服务器处理失败。"); - } else { - console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "请求成功"); + /** + * 保存原始数据 + */ + saveJSON({ + now: now, + fileNameSuffix: `origin`, + object: result, + compress: true, + uncompress: false + }); - /** - * 保存原始数据 - */ - saveJSON({ - now: now, - fileNameSuffix: `origin`, - object: result, - compress: true, - uncompress: false - }); - - let data = JSON.parse(JSON.stringify(result.data)); + let data = JSON.parse(JSON.stringify(result.data)); - /** - * 过滤掉不需要的数据 - */ - // hotgov - delete data.hotgov["mblog"]; - // 重复字段只保留一个 - delete data.hotgov["note"]; // note word - delete data.hotgov["small_icon_desc"]; // icon_desc small_icon_desc - delete data.hotgov["small_icon_desc_color"]; // icon_desc_color small_icon_desc_color + /** + * 过滤掉不需要的数据 + */ + // hotgov + delete data.hotgov["mblog"]; + // 重复字段只保留一个 + delete data.hotgov["note"]; // note word + delete data.hotgov["small_icon_desc"]; // icon_desc small_icon_desc + delete data.hotgov["small_icon_desc_color"]; // icon_desc_color small_icon_desc_color - // band_list - for (let i = 0; i < data.band_list.length; i++) { - const item = data.band_list[i]; + // band_list + for (let i = 0; i < data.band_list.length; i++) { + const item = data.band_list[i]; - // 过滤广告 - if (item.is_ad) { - data.band_list.splice(i, 1); - i--; - } - - // 过滤空字段 - delete item["ad_info"]; - - // 重复字段只保留一个 - delete item["note"]; // note word - delete item["icon_desc"]; delete item["small_icon_desc"]; // label_name icon_desc small_icon_desc - delete item["small_icon_desc_color"]; // icon_desc_color small_icon_desc_color - delete item["flag_desc"]; // flag_desc subject_label 这两个有值的时候相同,没有值的时候,前一个为 undefined,后一个为 "" + // 过滤广告 + if (item.is_ad) { + data.band_list.splice(i, 1); + i--; } + // 过滤空字段 + delete item["ad_info"]; - /** - * 获取需要的数据,进行转换 - */ - let convert = []; - data.band_list.forEach(item => { - let detail = ""; - let pic_ids = []; - if (item.mblog) { // 有些热搜没有 mblog - var regex = /(<([^>]+)>)/ig - detail = item.mblog.text.replace(regex, ""); - if (item.mblog.pics) { - pic_ids = item.mblog.pics.map(pic => `${pic}`); - } - } - convert.push({ - // 热搜排行顺序 - rank: item.rank, - realpos: item.realpos, - - // 热搜信息 - word: item.word, // 热搜标题 - word_scheme: item.word_scheme, // 热搜话题 "#热搜标题#" - emoticon: item.emoticon, // 热搜小表情,如 "[泪]" - label_name: item.label_name, // 热搜标签,如 "爆" "热" "新" "" - onboard_time: item.onboard_time, // 热搜上线时间,秒级时间戳,如 1658565575 - - /** - * 热搜数据 - * - * 大部分的 num 和 raw_hot 是相同的,页面上显示的是 num,可能是人工调控的热搜 - * - * 两者差值通过观测似乎最大是 1250000 - * 例如 【爆】唐山打架事件8名违法嫌疑人已到案 这条热搜一开始 delta 首先不断增大,最大达到 1250000 - * 然后热搜数量增加到 12600000 左右的时候,delta 逐渐减小到 1040000 左右 - */ - num: item.num, - raw_hot: item.raw_hot, - detla: item.num - item.raw_hot, // 计算值 - - url: `https://s.weibo.com/weibo?q=${encodeURIComponent(item.word_scheme)}`, // 热搜话题链接 - - // 分类 - category: item.category ? item.category.split(',') : "", - subject_label: item.subject_label, - - // 其他 - more: { - is_new: item.is_new, - subject_querys: item.subject_querys, - mid: item.mid, - icon_desc_color: item.icon_desc_color, - detail: detail, - }, - }); - }); - saveJSON({ - now: now, - fileNameSuffix: `final`, - object: convert, - compress: true, - // uncompress: true, - uncompress: false, - }); - - - /** - * 只统计微博调控信息 - */ - let convert2 = []; - let total = 0; - data.band_list.forEach(item => { - total += item.num; - total -= item.raw_hot; - if (item.num - item.raw_hot == 0) return; - convert2.push([ - `[${item.realpos}] ${item.word}【${item.label_name}】`, - `原始:${item.raw_hot} 显示:${item.num} 调控: ${item.num - item.raw_hot}` - ]); - }); - saveJSON({ - now: now, - fileNameSuffix: `regulation`, - object: { - total_delta: total, // 所有调控值之和 - data: convert2 - }, - compress: false, - uncompress: true - }); - - - /** - * 保存预处理后数据 - */ - // 过滤掉不需要的数据 - // band_list - data.band_list.forEach(function (item) { - delete item["mblog"]; - }); - saveJSON({ - now: now, - fileNameSuffix: `simplify`, - object: data, - compress: true, - // uncompress: true, - // compress: false, - uncompress: false, - }); - - - /** - * 更新最新的 - */ - fs.writeFileSync(`${DATA_FOLDER}/latest.json`, JSON.stringify({ - update_time: requestTimestamp, - update_time_friendly: now.substring(0, 19).replace(/T/g, " "), - regulation: convert2, - data: convert - })); + // 重复字段只保留一个 + delete item["note"]; // note word + delete item["icon_desc"]; delete item["small_icon_desc"]; // label_name icon_desc small_icon_desc + delete item["small_icon_desc_color"]; // icon_desc_color small_icon_desc_color + delete item["flag_desc"]; // flag_desc subject_label 这两个有值的时候相同,没有值的时候,前一个为 undefined,后一个为 "" } + + + /** + * 获取需要的数据,进行转换 + */ + let convert = []; + data.band_list.forEach(item => { + let detail = ""; + let pic_ids = []; + if (item.mblog) { // 有些热搜没有 mblog + var regex = /(<([^>]+)>)/ig + detail = item.mblog.text.replace(regex, ""); + if (item.mblog.pics) { + pic_ids = item.mblog.pics.map(pic => `${pic}`); + } + } + convert.push({ + // 热搜排行顺序 + rank: item.rank, + realpos: item.realpos, + + // 热搜信息 + word: item.word, // 热搜标题 + word_scheme: item.word_scheme, // 热搜话题 "#热搜标题#" + emoticon: item.emoticon, // 热搜小表情,如 "[泪]" + label_name: item.label_name, // 热搜标签,如 "爆" "热" "新" "" + onboard_time: item.onboard_time, // 热搜上线时间,秒级时间戳,如 1658565575 + + /** + * 热搜数据 + * + * 大部分的 num 和 raw_hot 是相同的,页面上显示的是 num,可能是人工调控的热搜 + * + * 两者差值通过观测似乎最大是 1250000 + * 例如 【爆】唐山打架事件8名违法嫌疑人已到案 这条热搜一开始 delta 首先不断增大,最大达到 1250000 + * 然后热搜数量增加到 12600000 左右的时候,delta 逐渐减小到 1040000 左右 + */ + num: item.num, + raw_hot: item.raw_hot, + detla: item.num - item.raw_hot, // 计算值 + + url: `https://s.weibo.com/weibo?q=${encodeURIComponent(item.word_scheme)}`, // 热搜话题链接 + + // 分类 + category: item.category ? item.category.split(',') : "", + subject_label: item.subject_label, + + // 其他 + more: { + is_new: item.is_new, + subject_querys: item.subject_querys, + mid: item.mid, + icon_desc_color: item.icon_desc_color, + detail: detail, + }, + }); + }); + saveJSON({ + now: now, + fileNameSuffix: `final`, + object: convert, + compress: true, + // uncompress: true, + uncompress: false, + }); + + + /** + * 只统计微博调控信息 + */ + let convert2 = []; + let total = 0; + data.band_list.forEach(item => { + total += item.num; + total -= item.raw_hot; + if (item.num - item.raw_hot == 0) return; + convert2.push([ + `[${item.realpos}] ${item.word}【${item.label_name}】`, + `原始:${item.raw_hot} 显示:${item.num} 调控: ${item.num - item.raw_hot}` + ]); + }); + saveJSON({ + now: now, + fileNameSuffix: `regulation`, + object: { + total_delta: total, // 所有调控值之和 + data: convert2 + }, + compress: false, + uncompress: true + }); + + + /** + * 保存预处理后数据 + */ + // 过滤掉不需要的数据 + // band_list + data.band_list.forEach(function (item) { + delete item["mblog"]; + }); + saveJSON({ + now: now, + fileNameSuffix: `simplify`, + object: data, + compress: true, + // uncompress: true, + // compress: false, + uncompress: false, + }); + + + /** + * 更新最新的 + */ + fs.writeFileSync(`${DATA_FOLDER}/latest.json`, JSON.stringify({ + update_time: requestTimestamp, + update_time_friendly: now.substring(0, 19).replace(/T/g, " "), + regulation: convert2, + data: convert + })); } exports.main = main; \ No newline at end of file