'use strict'; const fs = require('fs'); const path = require('path'); const fileUtils = require('./utils/fileUtils'); const requestUtils = require('./utils/requestUtils'); const DATA_FOLDER = path.join(path.dirname(__dirname), process.env.DATA_FOLDER ?? 'data', 'weibo'); console.log("DATA_FOLDER", DATA_FOLDER); fileUtils.createFolder(DATA_FOLDER); // 程序运行就保证 data 目录存在 async function main() { let requestTimestamp = Date.now(); let now = new Date(requestTimestamp + 8 * 3600 * 1000).toISOString(); let result = await requestUtils.getApiResult("https://weibo.com/ajax/statuses/hot_band"); if (result.ok != 1) { console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "weibo", "请求成功,但服务器处理失败,正在重试。"); result = await requestUtils.getApiResult("https://weibo.com/ajax/statuses/hot_band"); if (result.ok != 1) { console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "weibo", "请求成功,但服务器处理失败,保存失败信息。"); // ok 不为 1,那么久直接保存便于后续分析,不进行后续处理 fileUtils.saveJSON({ saveFolder: DATA_FOLDER, now: now, fileNameSuffix: `origin-error`, object: result, compress: true, uncompress: false }); return; } } console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "weibo", "请求成功"); // console.log("result", result); /** * 保存原始数据 */ fileUtils.saveJSON({ saveFolder: DATA_FOLDER, now: now, fileNameSuffix: `origin`, object: result, compress: true, uncompress: false }); let data = JSON.parse(JSON.stringify(result.data)); /** * 过滤掉不需要的数据 */ // hotgov delete data.hotgov["mblog"]; // 重复字段只保留一个 delete data.hotgov["note"]; // note word delete data.hotgov["small_icon_desc"]; // icon_desc small_icon_desc delete data.hotgov["small_icon_desc_color"]; // icon_desc_color small_icon_desc_color // band_list for (let i = 0; i < data.band_list.length; i++) { const item = data.band_list[i]; // 过滤广告 if (item.is_ad) { data.band_list.splice(i, 1); i--; } // 过滤空字段 delete item["ad_info"]; // 重复字段只保留一个 delete item["note"]; // note word delete item["icon_desc"]; delete item["small_icon_desc"]; // label_name icon_desc small_icon_desc delete item["small_icon_desc_color"]; // icon_desc_color small_icon_desc_color delete item["flag_desc"]; // flag_desc subject_label 这两个有值的时候相同,没有值的时候,前一个为 undefined,后一个为 "" } /** * 获取需要的数据,进行转换 */ let convert = []; data.band_list.forEach(item => { let detail = ""; let pic_ids = []; if (item.mblog) { // 有些热搜没有 mblog var regex = /(<([^>]+)>)/ig detail = item.mblog.text.replace(regex, ""); if (item.mblog.pics) { pic_ids = item.mblog.pics.map(pic => `${pic}`); } } convert.push({ // 热搜排行顺序 rank: item.rank, realpos: item.realpos, // 热搜信息 word: item.word, // 热搜标题 word_scheme: item.word_scheme, // 热搜话题 "#热搜标题#" emoticon: item.emoticon, // 热搜小表情,如 "[泪]" label_name: item.label_name, // 热搜标签,如 "爆" "热" "新" "" onboard_time: item.onboard_time, // 热搜上线时间,秒级时间戳,如 1658565575 /** * 热搜数据 * * 大部分的 num 和 raw_hot 是相同的,页面上显示的是 num,可能是人工调控的热搜 * * 两者差值通过观测似乎最大是 1250000 * 例如 【爆】唐山打架事件8名违法嫌疑人已到案 这条热搜一开始 delta 首先不断增大,最大达到 1250000 * 然后热搜数量增加到 12600000 左右的时候,delta 逐渐减小到 1040000 左右 */ num: item.num, raw_hot: item.raw_hot, detla: item.num - item.raw_hot, // 计算值 url: `https://s.weibo.com/weibo?q=${encodeURIComponent(item.word_scheme)}`, // 热搜话题链接 // 分类 category: item.category ? item.category.split(',') : "", subject_label: item.subject_label, // 其他 more: { is_new: item.is_new, subject_querys: item.subject_querys, mid: item.mid, icon_desc_color: item.icon_desc_color, detail: detail, }, }); }); fileUtils.saveJSON({ saveFolder: DATA_FOLDER, now: now, fileNameSuffix: `final`, object: convert, compress: true, // uncompress: true, uncompress: false, }); /** * 只统计微博调控信息 */ let convert2 = []; let total = 0; data.band_list.forEach(item => { total += item.num; total -= item.raw_hot; if (item.num - item.raw_hot == 0) return; convert2.push([ `[${item.realpos}] ${item.word}【${item.label_name}】`, `原始:${item.raw_hot} 显示:${item.num} 调控: ${item.num - item.raw_hot}` ]); }); fileUtils.saveJSON({ saveFolder: DATA_FOLDER, now: now, fileNameSuffix: `regulation`, object: { total_delta: total, // 所有调控值之和 data: convert2 }, compress: false, uncompress: true }); /** * 保存预处理后数据 */ // 过滤掉不需要的数据 // band_list data.band_list.forEach(function (item) { delete item["mblog"]; }); fileUtils.saveJSON({ saveFolder: DATA_FOLDER, now: now, fileNameSuffix: `simplify`, object: data, compress: true, // uncompress: true, // compress: false, uncompress: false, }); /** * 更新最新的 */ fs.writeFileSync(`${DATA_FOLDER}/latest.json`, JSON.stringify({ update_time: requestTimestamp, update_time_friendly: now.substring(0, 19).replace(/T/g, " "), regulation: convert2, data: convert })); } exports.main = main;