From 8067df5ae275b0e19862a076afb5d3fdb39099fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A8=8B=E5=BA=8F=E5=91=98=E5=B0=8F=E5=A2=A8?= <2291200076@qq.com> Date: Sat, 23 Jul 2022 23:19:13 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0node=E6=89=A7=E8=A1=8C?= =?UTF-8?q?=E5=91=BD=E4=BB=A4=E8=A1=8C=E7=9B=B8=E5=85=B3=E4=BB=A3=E7=A0=81?= =?UTF-8?q?=EF=BC=9B=E4=BB=A3=E7=A0=81=E7=BB=93=E6=9E=84=E8=B0=83=E6=95=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + index.js | 248 +---------------------------------------- src/execute_command.js | 20 ++++ src/get_hotband.js | 247 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 270 insertions(+), 246 deletions(-) create mode 100644 src/execute_command.js create mode 100644 src/get_hotband.js diff --git a/.gitignore b/.gitignore index e43125c..f72892b 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ test.js node_modules +.VSCodeCounter data/* !data/.gitkeep \ No newline at end of file diff --git a/index.js b/index.js index 1ccb8a0..4dd6cf8 100644 --- a/index.js +++ b/index.js @@ -1,247 +1,3 @@ -'use strict'; +const get_hotband = require('./src/get_hotband'); -const request = require('request'); -const fs = require('fs'); - -console.log("Start running ..."); - -// 请求微博热搜 APi 接口 -async function getApiResult(url) { - var return_data = await new Promise((resolve) => { - request({ - method: 'GET', - url: url, - json: true, - }, (error, response, result) => { - if (!error && (response.statusCode == 200)) { - // 请求成功 - resolve(result); - } else { - // 请求失败 - console.log(`error is ${error}`); - resolve("error"); - } - }); - }); - // console.log(`return_data is ${JSON.stringify(return_data)}`); - return return_data; -} - -// 创建目录 -async function createFolder(folderToCreate) { - let currentFolder = folderToCreate.replace(/\\/g, '/'); - let parentFolder = currentFolder.substring(0, currentFolder.lastIndexOf('/')); - if (!fs.existsSync(currentFolder)) { - // 文件夹不存在,创建文件夹 - createFolder(parentFolder); // 保证父级文件夹存在 - fs.mkdirSync(currentFolder); // 创建当前级文件夹 - } else { - // 否则就什么也不做 - } -} - -// 保存 JSON -function saveJSON({ now, fileNameSuffix, object, compress = true, uncompress = true }) { - - let year = now.substring(0, 4); - let month = now.substring(5, 7); - let day = now.substring(8, 10); - let hour = now.substring(11, 13); - let minute = now.substring(14, 16); - // console.log(now); - // console.log( "year, month, day, hour, minute: " + year + ", " + month + ", " + day + ", " + hour + ", " + minute); - - // 创建当前文件夹 - let folder = `./data/${year}/${month}/${day}`; - createFolder(folder); - let fileName = `${folder}/${year}${month}${day}_${hour}${minute}_${fileNameSuffix}`; - - // 生成文件名 - // '2022-07-23T10:11:38.650Z' => '20220723_1011' - // let fileName = now.replace(/T/, '_').replace(/:\d{2}.\d{3}Z/, '').replace(/[-:]/g, ''); - // console.log(`fileName is ${fileName}`); - - if (compress) - fs.writeFileSync(`${fileName}.min.json`, JSON.stringify(object)); - if (uncompress) - fs.writeFileSync(`${fileName}.json`, JSON.stringify(object, "", "\t")); -} - -async function main() { - let requestTimestamp = Date.now(); - let now = new Date(requestTimestamp + 8 * 3600 * 1000).toISOString(); - - let result = await getApiResult("https://weibo.com/ajax/statuses/hot_band"); - // console.log("result", result); - - if (result.ok != 1) { - console.log("请求成功,但服务器处理失败。"); - } else { - console.log("请求成功。"); - - /** - * 保存原始数据 - */ - saveJSON({ - now: now, - fileNameSuffix: `origin`, - object: result, - compress: true, - uncompress: false - }); - - let data = JSON.parse(JSON.stringify(result.data)); - - - /** - * 过滤掉不需要的数据 - */ - // hotgov - delete data.hotgov["mblog"]; - // 重复字段只保留一个 - delete data.hotgov["note"]; // note word - delete data.hotgov["small_icon_desc"]; // icon_desc small_icon_desc - delete data.hotgov["small_icon_desc_color"]; // icon_desc_color small_icon_desc_color - - // band_list - for (let i = 0; i < data.band_list.length; i++) { - const item = data.band_list[i]; - - // 过滤广告 - if (item.is_ad) { - data.band_list.splice(i, 1); - i--; - } - - // 过滤空字段 - delete item["ad_info"]; - - // 重复字段只保留一个 - delete item["note"]; // note word - delete item["icon_desc"]; delete item["small_icon_desc"]; // label_name icon_desc small_icon_desc - delete item["small_icon_desc_color"]; // icon_desc_color small_icon_desc_color - delete item["flag_desc"]; // flag_desc subject_label 这两个有值的时候相同,没有值的时候,前一个为 undefined,后一个为 "" - } - - - /** - * 获取需要的数据,进行转换 - */ - let convert = []; - data.band_list.forEach(item => { - let detail = ""; - let pic_ids = []; - if (item.mblog) { // 有些热搜没有 mblog - var regex = /(<([^>]+)>)/ig - detail = item.mblog.text.replace(regex, ""); - if (item.mblog.pics) { - pic_ids = item.mblog.pics.map(pic => `${pic}`); - } - } - convert.push({ - // 热搜排行顺序 - rank: item.rank, - realpos: item.realpos, - - // 热搜信息 - word: item.word, // 热搜标题 - word_scheme: item.word_scheme, // 热搜话题 "#热搜标题#" - emoticon: item.emoticon, // 热搜小表情,如 "[泪]" - label_name: item.label_name, // 热搜标签,如 "爆" "热" "新" "" - onboard_time: item.onboard_time, // 热搜上线时间,秒级时间戳,如 1658565575 - - /** - * 热搜数据 - * - * 大部分的 num 和 raw_hot 是相同的,页面上显示的是 num,可能是人工调控的热搜 - * - * 两者差值通过观测似乎最大是 1250000 - * 例如 【爆】唐山打架事件8名违法嫌疑人已到案 这条热搜一开始 delta 首先不断增大,最大达到 1250000 - * 然后热搜数量增加到 12600000 左右的时候,delta 逐渐减小到 1040000 左右 - * 所有热搜的 detla(带正负) 加起来就是基本上在100000-230000之间 - */ - num: item.num, - raw_hot: item.raw_hot, - detla: item.num - item.raw_hot, // 计算值 - - url: `https://s.weibo.com/weibo?q=${encodeURIComponent(item.word_scheme)}`, // 热搜话题链接 - - // 分类 - category: item.category ? item.category.split(',') : "", - subject_label: item.subject_label, - - // 其他 - more: { - is_new: item.is_new, - subject_querys: item.subject_querys, - mid: item.mid, - icon_desc_color: item.icon_desc_color, - detail: detail, - }, - }); - }); - saveJSON({ - now: now, - fileNameSuffix: `final`, - object: convert, - compress: true, - uncompress: true - }); - - - /** - * 只统计微博调控信息 - */ - let convert2 = []; - let total = 0; - data.band_list.forEach(item => { - total += item.num; - total -= item.raw_hot; - if (item.num - item.raw_hot == 0) return; - convert2.push([ - `[${item.realpos}] ${item.word}【${item.label_name}】`, - `原始:${item.raw_hot} 显示:${item.num} 调控: ${item.num - item.raw_hot}` - ]); - }); - saveJSON({ - now: now, - fileNameSuffix: `regulation`, - object: { - total_delta: total, // 所有调控值之和 - data: convert2 - }, - compress: false, - uncompress: true - }); - - - /** - * 保存预处理后数据 - */ - // 过滤掉不需要的数据 - // band_list - data.band_list.forEach(function (item) { - delete item["mblog"]; - }); - saveJSON({ - now: now, - fileNameSuffix: `simplify`, - object: data, - compress: true, - uncompress: true - }); - - - /** - * 更新最新的 - */ - fs.writeFileSync(`./data/latest.json`, JSON.stringify({ - update_time: requestTimestamp, - update_time_friendly: now.substring(0, 19).replace(/T/g, " "), - regulation: convert2, - data: convert - })); - } -} - -main(); \ No newline at end of file +get_hotband.main(); \ No newline at end of file diff --git a/src/execute_command.js b/src/execute_command.js new file mode 100644 index 0000000..76372e3 --- /dev/null +++ b/src/execute_command.js @@ -0,0 +1,20 @@ +var child_process = require('child_process'); +var cmds = [ + 'git status', + 'dir', +]; + +cmds.forEach(async function (cmd) { + let result = await new Promise(function (resolve) { + child_process.exec(cmd, function (error, stdout, stderr) { + // 获取命令执行的输出 + resolve({ + cmd: cmd, + error: error, + stdout: stdout, + stderr: stderr, + }); + }); + }); + console.log(result); +}); \ No newline at end of file diff --git a/src/get_hotband.js b/src/get_hotband.js new file mode 100644 index 0000000..5159a32 --- /dev/null +++ b/src/get_hotband.js @@ -0,0 +1,247 @@ +'use strict'; + +const request = require('request'); +const fs = require('fs'); + +console.log("Start running ..."); + +// 请求微博热搜 APi 接口 +async function getApiResult(url) { + var return_data = await new Promise((resolve) => { + request({ + method: 'GET', + url: url, + json: true, + }, (error, response, result) => { + if (!error && (response.statusCode == 200)) { + // 请求成功 + resolve(result); + } else { + // 请求失败 + console.log(`error is ${error}`); + resolve("error"); + } + }); + }); + // console.log(`return_data is ${JSON.stringify(return_data)}`); + return return_data; +} + +// 创建目录 +async function createFolder(folderToCreate) { + let currentFolder = folderToCreate.replace(/\\/g, '/'); + let parentFolder = currentFolder.substring(0, currentFolder.lastIndexOf('/')); + if (!fs.existsSync(currentFolder)) { + // 文件夹不存在,创建文件夹 + createFolder(parentFolder); // 保证父级文件夹存在 + fs.mkdirSync(currentFolder); // 创建当前级文件夹 + } else { + // 否则就什么也不做 + } +} + +// 保存 JSON +function saveJSON({ now, fileNameSuffix, object, compress = true, uncompress = true }) { + + let year = now.substring(0, 4); + let month = now.substring(5, 7); + let day = now.substring(8, 10); + let hour = now.substring(11, 13); + let minute = now.substring(14, 16); + // console.log(now); + // console.log( "year, month, day, hour, minute: " + year + ", " + month + ", " + day + ", " + hour + ", " + minute); + + // 创建当前文件夹 + let folder = `./data/${year}/${month}/${day}`; + createFolder(folder); + let fileName = `${folder}/${year}${month}${day}_${hour}${minute}_${fileNameSuffix}`; + + // 生成文件名 + // '2022-07-23T10:11:38.650Z' => '20220723_1011' + // let fileName = now.replace(/T/, '_').replace(/:\d{2}.\d{3}Z/, '').replace(/[-:]/g, ''); + // console.log(`fileName is ${fileName}`); + + if (compress) + fs.writeFileSync(`${fileName}.min.json`, JSON.stringify(object)); + if (uncompress) + fs.writeFileSync(`${fileName}.json`, JSON.stringify(object, "", "\t")); +} + +async function main() { + let requestTimestamp = Date.now(); + let now = new Date(requestTimestamp + 8 * 3600 * 1000).toISOString(); + + let result = await getApiResult("https://weibo.com/ajax/statuses/hot_band"); + // console.log("result", result); + + if (result.ok != 1) { + console.log("请求成功,但服务器处理失败。"); + } else { + console.log("请求成功。"); + + /** + * 保存原始数据 + */ + saveJSON({ + now: now, + fileNameSuffix: `origin`, + object: result, + compress: true, + uncompress: false + }); + + let data = JSON.parse(JSON.stringify(result.data)); + + + /** + * 过滤掉不需要的数据 + */ + // hotgov + delete data.hotgov["mblog"]; + // 重复字段只保留一个 + delete data.hotgov["note"]; // note word + delete data.hotgov["small_icon_desc"]; // icon_desc small_icon_desc + delete data.hotgov["small_icon_desc_color"]; // icon_desc_color small_icon_desc_color + + // band_list + for (let i = 0; i < data.band_list.length; i++) { + const item = data.band_list[i]; + + // 过滤广告 + if (item.is_ad) { + data.band_list.splice(i, 1); + i--; + } + + // 过滤空字段 + delete item["ad_info"]; + + // 重复字段只保留一个 + delete item["note"]; // note word + delete item["icon_desc"]; delete item["small_icon_desc"]; // label_name icon_desc small_icon_desc + delete item["small_icon_desc_color"]; // icon_desc_color small_icon_desc_color + delete item["flag_desc"]; // flag_desc subject_label 这两个有值的时候相同,没有值的时候,前一个为 undefined,后一个为 "" + } + + + /** + * 获取需要的数据,进行转换 + */ + let convert = []; + data.band_list.forEach(item => { + let detail = ""; + let pic_ids = []; + if (item.mblog) { // 有些热搜没有 mblog + var regex = /(<([^>]+)>)/ig + detail = item.mblog.text.replace(regex, ""); + if (item.mblog.pics) { + pic_ids = item.mblog.pics.map(pic => `${pic}`); + } + } + convert.push({ + // 热搜排行顺序 + rank: item.rank, + realpos: item.realpos, + + // 热搜信息 + word: item.word, // 热搜标题 + word_scheme: item.word_scheme, // 热搜话题 "#热搜标题#" + emoticon: item.emoticon, // 热搜小表情,如 "[泪]" + label_name: item.label_name, // 热搜标签,如 "爆" "热" "新" "" + onboard_time: item.onboard_time, // 热搜上线时间,秒级时间戳,如 1658565575 + + /** + * 热搜数据 + * + * 大部分的 num 和 raw_hot 是相同的,页面上显示的是 num,可能是人工调控的热搜 + * + * 两者差值通过观测似乎最大是 1250000 + * 例如 【爆】唐山打架事件8名违法嫌疑人已到案 这条热搜一开始 delta 首先不断增大,最大达到 1250000 + * 然后热搜数量增加到 12600000 左右的时候,delta 逐渐减小到 1040000 左右 + * 所有热搜的 detla(带正负) 加起来就是基本上在100000-230000之间 + */ + num: item.num, + raw_hot: item.raw_hot, + detla: item.num - item.raw_hot, // 计算值 + + url: `https://s.weibo.com/weibo?q=${encodeURIComponent(item.word_scheme)}`, // 热搜话题链接 + + // 分类 + category: item.category ? item.category.split(',') : "", + subject_label: item.subject_label, + + // 其他 + more: { + is_new: item.is_new, + subject_querys: item.subject_querys, + mid: item.mid, + icon_desc_color: item.icon_desc_color, + detail: detail, + }, + }); + }); + saveJSON({ + now: now, + fileNameSuffix: `final`, + object: convert, + compress: true, + uncompress: true + }); + + + /** + * 只统计微博调控信息 + */ + let convert2 = []; + let total = 0; + data.band_list.forEach(item => { + total += item.num; + total -= item.raw_hot; + if (item.num - item.raw_hot == 0) return; + convert2.push([ + `[${item.realpos}] ${item.word}【${item.label_name}】`, + `原始:${item.raw_hot} 显示:${item.num} 调控: ${item.num - item.raw_hot}` + ]); + }); + saveJSON({ + now: now, + fileNameSuffix: `regulation`, + object: { + total_delta: total, // 所有调控值之和 + data: convert2 + }, + compress: false, + uncompress: true + }); + + + /** + * 保存预处理后数据 + */ + // 过滤掉不需要的数据 + // band_list + data.band_list.forEach(function (item) { + delete item["mblog"]; + }); + saveJSON({ + now: now, + fileNameSuffix: `simplify`, + object: data, + compress: true, + uncompress: true + }); + + + /** + * 更新最新的 + */ + fs.writeFileSync(`./data/latest.json`, JSON.stringify({ + update_time: requestTimestamp, + update_time_friendly: now.substring(0, 19).replace(/T/g, " "), + regulation: convert2, + data: convert + })); + } +} + +exports.main = main; \ No newline at end of file