diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..8e766a2 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 程序员小墨 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/hotband/.env.example b/hotband/.env.example new file mode 100644 index 0000000..4fb6064 --- /dev/null +++ b/hotband/.env.example @@ -0,0 +1,20 @@ +# 调试模式 +# 1为开启调试 +DEBUG_MODE=1 + +# 爬取数据保存的文件夹 +# 目录开头与结尾的 [./] [/] [\] [\\] 均可带可不带 +# 默认为 data 文件夹 +DATA_FOLDER=data + +# 是否在程序刚一启动时就抓取一次数据 +# 1为是 +EXECUTE_AT_STARTUP=1 + +# 数据是否推送到Git仓库 +# 1为是 +PUSH_TO_GIT=0 + +# 是否仅保存 latest.json 而不保存其他文件作为存档 +# 1为是 +LATEST_DATA_ONLY=0 \ No newline at end of file diff --git a/hotband/.gitignore b/hotband/.gitignore new file mode 100644 index 0000000..7467e49 --- /dev/null +++ b/hotband/.gitignore @@ -0,0 +1,8 @@ +.DS_Store + +data/* +.env +node_modules +.VSCodeCounter + +test.js diff --git a/hotband/README.md b/hotband/README.md new file mode 100644 index 0000000..28524b5 --- /dev/null +++ b/hotband/README.md @@ -0,0 +1,247 @@ +# 热搜数据爬取工具 + +> 本仓库中代码仅供学习研究使用,不得用于违法用途,学习使用完毕后请于24小时内删除。 +> +> 数据来自微博、B站(详见下方「数据来源」),本项目不对数据真实性做验证,使用数据时请遵守相关平台的相关限制要求。 + +## 简介 + +您可以将本项目代码部署在服务器上(在本地运行也可),程序会每隔一分钟拉取一次热搜数据,并保存为 `json` 格式文件。 + + + +## 数据预览 + +在部署并启动项目后,您可以在浏览器中打开 `html/index.html` 文件实时预览当前热搜。 + + + +## 数据来源 + +**微博热搜** + +页面:https://weibo.com/hot/search + +接口:https://weibo.com/ajax/statuses/hot_band + + + +**B站热搜** + +页面:https://www.bilibili.com/blackboard/activity-trending-topic.html + +接口:https://app.bilibili.com/x/v2/search/trending/ranking + + + +**B站排行榜** + +页面:https://www.bilibili.com/v/popular/rank/all + +接口:https://api.bilibili.com/x/web-interface/ranking/v2?type=all + +(切换到其他榜单再切换回来会调用此接口) + + + +## 运行环境 + +原理上来说 Windows 下和 Linux 都可运行,目前仅在 Windows 下测试过,暂未在 Linux 系统下测试。 + +项目使用 node 开发,以下部署流程默认您已安装了 `Git`、`Nodejs`。 + + + +## 部署 + +1. 克隆仓库(或直接下载压缩包) + +```bash +git clone https://git.only4.work/coder-xiaomo/weibo-hotband +``` + +2. 安装依赖 + +```bash +npm i +``` + +3. 修改配置文件 + +将项目目录下的 `.env.example` 文件复制一份,并改名为 `.env`,使用文本编辑器打开(例如:记事本、VS Code、vim等均可),根据其中的注释说明来进行配置即可。 + +> 如果不创建 .env 文件,项目启动时会报如下错误并退出。 +> +> ```bash +> [ERROR] .env file not found! +> ``` + +4. 启动项目 + +```bash +# 直接运行 +# node index.js + +# 使用 pm2 +# pm2 start index.js --name weibo-hotband-bot +``` + +5. 停止项目 + +```bash +# 使用 node index.js 命令直接运行的项目可以通过 `Ctrl + C` 停止 + +# 使用 pm2 运行的可以使用以下两行命令来停止和从列表中删除项目 +# pm2 stop weibo-hotband-bot +# pm2 delete weibo-hotband-bot +``` + + + +## 说明 + +项目爬取的数据默认保存在项目目录下的 data 文件夹中,您也可以通过修改 `.env` 文件中的 `DATA_FOLDER` 参数值来自定义数据保存路径。 + +### 微博热搜榜 + +> 微博热搜在 `weibo_hotband` 子文件夹下 + +在程序运行后,该文件夹下会出现 `latest.json` 文件及其余几个文件夹,这些子文件夹中的文件按照以下格式保存:`年/月/日/年月日_时分.json`。 + +每次爬取后,`latest.json`中的数据都会被覆盖为最新的热搜数据。 + +`origin` 文件夹中的数据是通过Api接口获取到的原始数据,没有经过任何处理。 + + + +`final` 文件夹中的数据是从原始数据中抽离出的有用数据,并重新整理得到的。 + + + + + +### B站热搜榜 + +> 微博热搜在 `bilibili_hotband` 子文件夹下 + +在程序运行后,该文件夹下会出现 `latest.json` 文件及其余几个文件夹,这些子文件夹中的文件按照以下格式保存:`年/月/日/年月日_时分.json`。 + +每次爬取后,`latest.json`中的数据都会被覆盖为最新的热搜数据。 + +`origin` 文件夹中的数据是通过Api接口获取到的原始数据,此处仅仅去除了 `trackid`。 + +`final` 文件夹中的数据是从原始数据中抽离出的有用数据,并重新整理得到的。 + + + +### B站排行榜 + +> 微博热搜在 `bilibili_rank` 子文件夹下 + +在程序运行后,该文件夹下会出现 `latest.json` 文件及其余几个文件夹,这些子文件夹中的文件按照以下格式保存:`年/月/日/年月日_时分.json`。 + +每次爬取后,`latest.json`中的数据都会被覆盖为最新的热搜数据。 + +`origin` 文件夹中的数据是通过Api接口获取到的原始数据,没有经过任何处理。 + + + +## 目录结构 + +### 项目目录结构 + +```bash +hotband // 本项目 +├─ data // 爬取的数据(启动项目后自动创建) +├─ html // html 页面 +│ ├─ assets +│ │ ├─ css // CSS 样式 +│ │ │ └─ +│ │ ├─ image // 前端图片资源 +│ │ │ ├─ ... +│ │ └─ js +│ │ └─ isMobile.js +│ ├─ bilibili_hotband.html +│ ├─ bilibili_rank.html +│ └─ weibo_hotband.html +├─ src // 数据爬取核心代码 +│ ├─ utils // 工具类代码 +│ │ ├─ fileUtils.js +│ │ └─ requestUtils.js +│ ├─ execute_command.js // 执行命令行脚本(暂时没用到) +│ ├─ get_bilibili_hotband.js // 获取 B站热搜榜 代码 +│ ├─ get_bilibili_rank.js // 爬取 B站排行榜 代码 +│ └─ get_weibo_hotband.js // 爬取 微博热搜榜 代码 +├─ .env.example // 项目配置文件模板 +├─ .env // 项目配置文件(需要自行创建) +├─ index.html // html 页面打开文件 +├─ index.js // node 项目启动入口文件 +├─ nodemon.json +├─ package-lock.json +├─ package.json +├─ pm2 restart.bat +├─ pm2 restart.sh +├─ pm2 start.bat +├─ pm2 start.sh +├─ pm2 stop.bat +├─ pm2 stop.sh +└─ README.md // 项目自述文件 +``` + + + +### data 目录结构 + +data 文件夹下的目录结构如下 + +```bash +data +├─ bilibili-hotband +│ ├─ final / origin +│ │ └─ xxxx // 年 +│ │ └─ xx // 月 +│ │ └─ xx // 日 +│ │ ├─ xxxxxxxx_xxxx.min.json // 年月日_时分秒.min.json +│ └─ latest.json // 最新的json文件 +├─ bilibili-rank +│ ├─ origin +│ │ └─ xxxx // 年 +│ │ └─ xx // 月 +│ │ └─ xx // 日 +│ │ ├─ xxxxxxxx_xxxx.min.json // 年月日_时分秒.min.json +│ └─ latest.json // 最新的json文件 +└─ weibo-hotband + ├─ origin / final / simplify + │ └─ xxxx // 年 + │ └─ xx // 月 + │ └─ xx // 日 + │ ├─ xxxxxxxx_xxxx.min.json // 年月日_时分秒.min.json + ├─ regulation + │ └─ xxxx // 年 + │ └─ xx // 月 + │ └─ xx // 日 + │ ├─ xxxxxxxx_xxxx.json // 年月日_时分秒.json + └─ latest.json // 最新的json文件 +``` + + + +### 题外话:怎么生成目录结构? + +> 有很多小伙伴在问像上方的目录结构是如何生成的,这里跟大家说下: +> +> 1. Windows 下可以通过 `tree` 命令来生成,例如: +> +> ```bash +> tree /f > xxx.txt +> ``` +> +> 2. 使用 VS Code 插件 +> +> 我使用的是 [tree-generator](https://marketplace.visualstudio.com/items?itemName=xboxyan.tree-generator) 这个插件,安装之后直接在文件夹上右键即可生成 +> +> 3. 另外还有一些其他方法也可以生成,大家可以自己探索。 diff --git a/hotband/html/assets/css/main.css b/hotband/html/assets/css/main.css new file mode 100644 index 0000000..9740b4b --- /dev/null +++ b/hotband/html/assets/css/main.css @@ -0,0 +1,34 @@ +#list { + width: 100%; + text-align: center; + border-spacing: 0; + border: 0.4px solid black; +} + +#list tr { + height: min(1.85rem, 50px); +} + +#list td { + margin: 0; + border: 0.4px solid black; +} + +/* 热搜的 label 样式 */ +.hotband-label { + color: white; + padding: 3px; + border-radius: 6px; + font-size: 10px; + display: inline-block; +} + +.bottom-placeholder { + height: 90px; + font-size:12px; + color: #999; + display: grid; + place-items: center; + text-align: center; + line-height: 1.7em; +} diff --git a/hotband/html/assets/image/bilibili.svg b/hotband/html/assets/image/bilibili.svg new file mode 100644 index 0000000..26c2423 --- /dev/null +++ b/hotband/html/assets/image/bilibili.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/hotband/html/assets/image/icon_rank.png b/hotband/html/assets/image/icon_rank.png new file mode 100644 index 0000000..b50baa0 Binary files /dev/null and b/hotband/html/assets/image/icon_rank.png differ diff --git a/hotband/html/assets/image/weibo.svg b/hotband/html/assets/image/weibo.svg new file mode 100644 index 0000000..a0d6440 --- /dev/null +++ b/hotband/html/assets/image/weibo.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/hotband/html/assets/js/isMobile.js b/hotband/html/assets/js/isMobile.js new file mode 100644 index 0000000..49fbeb1 --- /dev/null +++ b/hotband/html/assets/js/isMobile.js @@ -0,0 +1,24 @@ +function isMobile() { + var userAgentInfo = navigator.userAgent; + + var mobileAgents = ["Android", "iPhone", "SymbianOS", "Windows Phone", "iPad", "iPod"]; + + var mobile_flag = false; + + //根据userAgent判断是否是手机 + for (var v = 0; v < mobileAgents.length; v++) { + if (userAgentInfo.indexOf(mobileAgents[v]) > 0) { + mobile_flag = true; + break; + } + } + // var screen_width = window.screen.width; + // var screen_height = window.screen.height; + + // //根据屏幕分辨率判断是否是手机 + // if (screen_width > 325 && screen_height < 750) { + // mobile_flag = true; + // } + + return mobile_flag; +} \ No newline at end of file diff --git a/hotband/html/bilibili_hotband.html b/hotband/html/bilibili_hotband.html new file mode 100644 index 0000000..472bfe5 --- /dev/null +++ b/hotband/html/bilibili_hotband.html @@ -0,0 +1,310 @@ + + + + + + + + B站热搜 + + + + + +
+

B站热搜榜

+
+
+ 显示字段: + + + +
+ + + | + + +
+
+

+ + + + + + + + + +
+

+
+

+ — 到底啦 —
+ 数据来源: https://www.bilibili.com/blackboard/activity-trending-topic.html +

+
+ + + + + + + + \ No newline at end of file diff --git a/hotband/html/bilibili_rank.html b/hotband/html/bilibili_rank.html new file mode 100644 index 0000000..c5ade77 --- /dev/null +++ b/hotband/html/bilibili_rank.html @@ -0,0 +1,466 @@ + + + + + + + + + + B站排行 + + + + + +
+

B站排行榜

+

+
+
+ 显示字段: + + + + + + + +
+ + + | + + + +
+
+

+ + + + + + + + + +
+
+

+ — 到底啦 —
+ 数据来源: https://www.bilibili.com/v/popular/rank/all +

+
+ + + + + \ No newline at end of file diff --git a/hotband/html/weibo_hotband.html b/hotband/html/weibo_hotband.html new file mode 100644 index 0000000..d6a46f7 --- /dev/null +++ b/hotband/html/weibo_hotband.html @@ -0,0 +1,333 @@ + + + + + + + + 微博热搜 + + + + +
+

微博热搜榜

+
+
+ 显示字段: + + + + + + + +
+ + + | + + + +
+
+

+ + + + + + + + + +
+
+

+ — 到底啦 —
+ 数据来源: https://weibo.com/hot/search +

+
+ + + + + \ No newline at end of file diff --git a/hotband/index.html b/hotband/index.html new file mode 100644 index 0000000..ce27f68 --- /dev/null +++ b/hotband/index.html @@ -0,0 +1,191 @@ + + + + + + + + 热搜榜单 + + + + +
+
+ +
+
+ +
+
+ + + + + \ No newline at end of file diff --git a/hotband/index.js b/hotband/index.js new file mode 100644 index 0000000..3ba44d1 --- /dev/null +++ b/hotband/index.js @@ -0,0 +1,110 @@ +'use strict'; + +const dotenv = require('dotenv'); +const schedule = require('node-schedule'); +const path = require('path'); +const os = require('os'); +const fs = require('fs'); + +/** + * 环境变量 + */ +if (!fs.existsSync('.env')) { + // 如果没有 .env 文件,则报错并退出 + console.error('[ERROR] .env file not found!'); + return; +} +process.env = {}; // 清除系统自带的环境变量 +dotenv.config('./.env'); // 导入 .env 文件中的环境变量 +// console.log(process.env); + +const DEBUG_MODE = process.env.DEBUG_MODE == true; +const EXECUTE_AT_STARTUP = process.env.EXECUTE_AT_STARTUP == true; +const PUSH_TO_GIT = process.env.PUSH_TO_GIT == true; + +const ROOT_PATH = path.join(__dirname, process.env.DATA_FOLDER ?? 'data'); + + +/** + * 调试模式 + */ +if (DEBUG_MODE) { + console.log('DEBUG_MODE is on'); + console.log('Environment variables: ', process.env); +} + +/** + * 引入模块 + */ +const get_weibo_hotband = require('./src/get_weibo_hotband'); +const get_bilibili_hotband = require('./src/get_bilibili_hotband'); +const get_bilibili_rank = require('./src/get_bilibili_rank'); + +const execute_command = require('./src/execute_command'); + + +/** + * 开始运行 + */ +console.log("Start running ..."); + + +/** + * 程序主函数 + */ +async function start() { + // 爬取热搜数据 + await get_weibo_hotband.main(); + await get_bilibili_hotband.main(); + await get_bilibili_rank.main(); + + // 调试模式下 + if (DEBUG_MODE) { + // 推送到 Git 仓库 + await pushToGitRepo(); + } +} + +// 调试模式下,程序一启动就首先运行一次 +if (EXECUTE_AT_STARTUP) { + process.stdout.write("程序启动时,立即运行一次\t"); + start(); +} + +// 每分钟的第 5 秒执行一次 +// 这里指定第 5 秒是为了稍微与微博服务器热榜更新时间错开,避免因为微秒级误差造成拉取两次相同的热榜数据 +// refer: https://www.npmjs.com/package/node-schedule +const scheduleJob = schedule.scheduleJob('05 * * * * *', start); + + +/** + * 定时将热搜数据推送到 Git 仓库 + */ +async function pushToGitRepo() { + if (!PUSH_TO_GIT) return; + + let commands = [ + 'git status', + 'git pull', + 'git add .', + `git commit -m "${new Date(Date.now() + 8 * 3600 * 1000).toISOString().substring(0, 19).replace('T', ' ')} update"`, + `git push origin master`, + 'git status', + ]; + switch (os.type()) { + case 'Windows_NT': // Windows + commands.unshift('dir'); + break; + + case 'Darwin': // Mac OS X + case 'Linux': // Linux + default: + commands.unshift('pwd'); + break; + } + let outputs = await execute_command.execute(ROOT_PATH, commands); + console.log(commands, outputs); +} + +// 每个小时同步一次 +schedule.scheduleJob('0 0 * * * *', pushToGitRepo); diff --git a/hotband/nodemon.json b/hotband/nodemon.json new file mode 100644 index 0000000..e4e9978 --- /dev/null +++ b/hotband/nodemon.json @@ -0,0 +1,8 @@ +{ + "ignore": [ + ".git", + ".svn", + "node_modules/**/node_modules" + ], + "ext": "js" +} \ No newline at end of file diff --git a/hotband/pack.bat b/hotband/pack.bat new file mode 100644 index 0000000..cd5913a --- /dev/null +++ b/hotband/pack.bat @@ -0,0 +1,40 @@ +set f_year=2022 +set f_month=10 + +ren data data_for_backup +cd ./data_for_backup + + +cd ./bilibili-hotband +del /f /s/q latest.json +cd ./final/%f_year%/%f_month% +for /d %%i in (*) do ( "C:\Users\Administrator\Desktop\7-Zip(绿色版)\7z.exe" a "%%~ni.zip" "%%i" -sdel ) +cd ../../../ + +cd ./origin/%f_year%/%f_month% +for /d %%i in (*) do ( "C:\Users\Administrator\Desktop\7-Zip(绿色版)\7z.exe" a "%%~ni.zip" "%%i" -sdel ) +cd ../../../ + + +cd ../ +cd ./bilibili-rank +del /f /s/q latest.json +cd ./origin/%f_year%/%f_month% +for /d %%i in (*) do ( "C:\Users\Administrator\Desktop\7-Zip(绿色版)\7z.exe" a "%%~ni.zip" "%%i" -sdel ) +cd ../../../ + + +cd ../ +cd ./weibo-hotband +del /f /s/q latest.json +cd ./final/%f_year%/%f_month% +for /d %%i in (*) do ( "C:\Users\Administrator\Desktop\7-Zip(绿色版)\7z.exe" a "%%~ni.zip" "%%i" -sdel ) +cd ../../../ + +cd ./final/%f_year%/%f_month% +for /d %%i in (*) do ( "C:\Users\Administrator\Desktop\7-Zip(绿色版)\7z.exe" a "%%~ni.zip" "%%i" -sdel ) +cd ../../../ + + +cd ../../ +pause \ No newline at end of file diff --git a/hotband/pm2 restart.bat b/hotband/pm2 restart.bat new file mode 100644 index 0000000..9e9eb8b --- /dev/null +++ b/hotband/pm2 restart.bat @@ -0,0 +1 @@ +pm2 restart weibo-hotband-bot \ No newline at end of file diff --git a/hotband/pm2 restart.sh b/hotband/pm2 restart.sh new file mode 100644 index 0000000..9e9eb8b --- /dev/null +++ b/hotband/pm2 restart.sh @@ -0,0 +1 @@ +pm2 restart weibo-hotband-bot \ No newline at end of file diff --git a/hotband/pm2 start.bat b/hotband/pm2 start.bat new file mode 100644 index 0000000..bdbca0d --- /dev/null +++ b/hotband/pm2 start.bat @@ -0,0 +1 @@ +pm2 start index.js --name weibo-hotband-bot \ No newline at end of file diff --git a/hotband/pm2 start.sh b/hotband/pm2 start.sh new file mode 100644 index 0000000..bdbca0d --- /dev/null +++ b/hotband/pm2 start.sh @@ -0,0 +1 @@ +pm2 start index.js --name weibo-hotband-bot \ No newline at end of file diff --git a/hotband/pm2 stop.bat b/hotband/pm2 stop.bat new file mode 100644 index 0000000..b0189d9 --- /dev/null +++ b/hotband/pm2 stop.bat @@ -0,0 +1,2 @@ +pm2 stop weibo-hotband-bot +pm2 delete weibo-hotband-bot \ No newline at end of file diff --git a/hotband/pm2 stop.sh b/hotband/pm2 stop.sh new file mode 100644 index 0000000..b0189d9 --- /dev/null +++ b/hotband/pm2 stop.sh @@ -0,0 +1,2 @@ +pm2 stop weibo-hotband-bot +pm2 delete weibo-hotband-bot \ No newline at end of file diff --git a/hotband/src/execute_command.js b/hotband/src/execute_command.js new file mode 100644 index 0000000..d73a6bd --- /dev/null +++ b/hotband/src/execute_command.js @@ -0,0 +1,40 @@ +'use strict'; + +const child_process = require('child_process'); +const iconv = require("iconv-lite"); + +const encoding = "cp936"; +const bufferEncoding = "binary"; + +async function execute(rootPath, cmds) { + let outputs = []; + for (let cmd of cmds) { + let result = await new Promise(function (resolve) { + // refer: https://www.webhek.com/post/execute-a-command-line-binary-with-node-js/ + child_process.exec(cmd, { + cwd: rootPath, // 脚本执行目录 + encoding: bufferEncoding + }, function (err, stdout, stderr) { + if (err) { + resolve({ + cmd: cmd, + err: err, + // err_stack: iconv.decode(Buffer.from(err.stack, bufferEncoding), encoding), + // err_message: iconv.decode(Buffer.from(err.message, bufferEncoding), encoding), + }); + } else { + // 获取命令执行的输出 + resolve({ + cmd: cmd, + stdout: iconv.decode(Buffer.from(stdout, bufferEncoding), encoding), + stderr: iconv.decode(Buffer.from(stderr, bufferEncoding), encoding), + }); + } + }); + }); + outputs.push(result); + } + return outputs; +} + +exports.execute = execute; diff --git a/hotband/src/get_bilibili_hotband.js b/hotband/src/get_bilibili_hotband.js new file mode 100644 index 0000000..7da0f90 --- /dev/null +++ b/hotband/src/get_bilibili_hotband.js @@ -0,0 +1,99 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); + +const fileUtils = require('./utils/fileUtils'); +const requestUtils = require('./utils/requestUtils'); + +const API_URL = "https://app.bilibili.com/x/v2/search/trending/ranking"; +const SUB_FOLDER = "bilibili-hotband"; + +const DATA_FOLDER = path.join(path.dirname(__dirname), process.env.DATA_FOLDER ?? 'data', SUB_FOLDER); +console.log("DATA_FOLDER", DATA_FOLDER); +fileUtils.createFolder(DATA_FOLDER); // 程序运行就保证 data 目录存在 + +async function main() { + let requestTimestamp = Date.now(); + let now = new Date(requestTimestamp + 8 * 3600 * 1000).toISOString(); + + let result = await requestUtils.getApiResult(API_URL); + if (result.code != 0) { + console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功,但服务器处理失败,等待3s后重试。"); + await new Promise((resolve) => { + setTimeout(resolve, 3000); // 等待3秒 + }); + result = await requestUtils.getApiResult(API_URL); + if (result.ok != 1) { + console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功,但服务器处理失败,保存失败信息。"); + // ok 不为 1,那么久直接保存便于后续分析,不进行后续处理 + fileUtils.saveJSON({ + saveFolder: DATA_FOLDER, + now: now, + fileNameSuffix: `origin-error`, + object: result, + compress: true, + uncompress: false + }); + return; + } + } + + console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功"); + // console.log("result", result); + + let data = result.data; + + // 去除 trackid + delete data["trackid"]; + // console.log(data); + + /** + * 保存原始数据 + */ + fileUtils.saveJSON({ + saveFolder: DATA_FOLDER, + now: now, + fileNameSuffix: `origin`, + object: result, + compress: true, + uncompress: false + }); + + + /** + * 获取需要的数据,进行转换 + */ + let convert = []; + data.list.forEach(item => { + // { + // "position": 1, + // "keyword": "关键词", + // "show_name": "热搜名称", + // "word_type": 8, + // "icon": "热搜的图标,也可能没有", + // "hot_id": 7399 // 热搜id + // } + convert.push(item); + }); + fileUtils.saveJSON({ + saveFolder: DATA_FOLDER, + now: now, + fileNameSuffix: `final`, + object: convert, + compress: true, + uncompress: false, + }); + + /** + * 更新最新的 + */ + fs.writeFileSync(`${DATA_FOLDER}/latest.json`, JSON.stringify({ + update_time: requestTimestamp, + update_time_friendly: now.substring(0, 19).replace(/T/g, " "), + data: data.list, + exp_str: data.exp_str, + })); +} + +exports.main = main; diff --git a/hotband/src/get_bilibili_rank.js b/hotband/src/get_bilibili_rank.js new file mode 100644 index 0000000..01be364 --- /dev/null +++ b/hotband/src/get_bilibili_rank.js @@ -0,0 +1,90 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); + +const fileUtils = require('./utils/fileUtils'); +const requestUtils = require('./utils/requestUtils'); + +const API_URL = "https://api.bilibili.com/x/web-interface/ranking/v2?type=all"; +const SUB_FOLDER = "bilibili-rank"; + +const DATA_FOLDER = path.join(path.dirname(__dirname), process.env.DATA_FOLDER ?? 'data', SUB_FOLDER); +console.log("DATA_FOLDER", DATA_FOLDER); +fileUtils.createFolder(DATA_FOLDER); // 程序运行就保证 data 目录存在 + +async function main() { + let requestTimestamp = Date.now(); + let now = new Date(requestTimestamp + 8 * 3600 * 1000).toISOString(); + + let result = await requestUtils.getApiResult(API_URL); + if (result.code != 0) { + console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功,但服务器处理失败,等待3s后重试。"); + await new Promise((resolve) => { + setTimeout(resolve, 3000); // 等待3秒 + }); + result = await requestUtils.getApiResult(API_URL); + if (result.ok != 1) { + console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功,但服务器处理失败,保存失败信息。"); + // ok 不为 1,那么久直接保存便于后续分析,不进行后续处理 + fileUtils.saveJSON({ + saveFolder: DATA_FOLDER, + now: now, + fileNameSuffix: `origin-error`, + object: result, + compress: true, + uncompress: false + }); + return; + } + } + + console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功"); + // console.log("result", result); + + let data = result.data; + + // // 去除 trackid + // delete data["trackid"]; + // console.log(data); + + /** + * 保存原始数据 + */ + fileUtils.saveJSON({ + saveFolder: DATA_FOLDER, + now: now, + fileNameSuffix: `origin`, + object: result, + compress: true, + uncompress: false + }); + + // /** + // * 获取需要的数据,进行转换 + // */ + // let convert = []; + // data.list.forEach(item => { + // convert.push(item); + // }); + // fileUtils.saveJSON({ + // saveFolder: DATA_FOLDER, + // now: now, + // fileNameSuffix: `final`, + // object: convert, + // compress: true, + // uncompress: false, + // }); + + /** + * 更新最新的 + */ + fs.writeFileSync(`${DATA_FOLDER}/latest.json`, JSON.stringify({ + update_time: requestTimestamp, + update_time_friendly: now.substring(0, 19).replace(/T/g, " "), + note: data.note, + data: data.list, + })); +} + +exports.main = main; diff --git a/hotband/src/get_weibo_hotband.js b/hotband/src/get_weibo_hotband.js new file mode 100644 index 0000000..cd0fcee --- /dev/null +++ b/hotband/src/get_weibo_hotband.js @@ -0,0 +1,228 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); + +const fileUtils = require('./utils/fileUtils'); +const requestUtils = require('./utils/requestUtils'); + +const API_URL = "https://weibo.com/ajax/statuses/hot_band"; +const SUB_FOLDER = "weibo-hotband"; + +const DATA_FOLDER = path.join(path.dirname(__dirname), process.env.DATA_FOLDER ?? 'data', SUB_FOLDER); +console.log("DATA_FOLDER", DATA_FOLDER); +fileUtils.createFolder(DATA_FOLDER); // 程序运行就保证 data 目录存在 + +async function main() { + let requestTimestamp = Date.now(); + let now = new Date(requestTimestamp + 8 * 3600 * 1000).toISOString(); + + let result = await requestUtils.getApiResult(API_URL); + if (result.ok != 1) { + console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功,但服务器处理失败,等待3s后重试。"); + await new Promise((resolve) => { + setTimeout(resolve, 3000); // 等待3秒 + }); + result = await requestUtils.getApiResult(API_URL); + if (result.ok != 1) { + console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功,但服务器处理失败,保存失败信息。"); + // ok 不为 1,那么就直接保存便于后续分析,不进行后续处理 + fileUtils.saveJSON({ + saveFolder: DATA_FOLDER, + now: now, + fileNameSuffix: `origin-error`, + object: result, + compress: true, + uncompress: false + }); + return; + } + } + + console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功"); + // console.log("result", result); + + /** + * 保存原始数据 + */ + fileUtils.saveJSON({ + saveFolder: DATA_FOLDER, + now: now, + fileNameSuffix: `origin`, + object: result, + compress: true, + uncompress: false + }); + + let data = JSON.parse(JSON.stringify(result.data)); + + if (!data) { + fileUtils.saveJSON({ + saveFolder: DATA_FOLDER, + now: now, + fileNameSuffix: `origin-parse-error`, + object: result, + compress: true, + uncompress: false + }); + return; + } + + /** + * 过滤掉不需要的数据 + */ + // hotgov + if (data.hotgov) { + delete data.hotgov["mblog"]; + // 重复字段只保留一个 + delete data.hotgov["note"]; // note word + delete data.hotgov["small_icon_desc"]; // icon_desc small_icon_desc + delete data.hotgov["small_icon_desc_color"]; // icon_desc_color small_icon_desc_color + } + + // band_list + for (let i = 0; i < data.band_list.length; i++) { + const item = data.band_list[i]; + + // 过滤广告 + if (item.is_ad) { + data.band_list.splice(i, 1); + i--; + } + + // 过滤空字段 + delete item["ad_info"]; + + // 重复字段只保留一个 + delete item["note"]; // note word + delete item["icon_desc"]; delete item["small_icon_desc"]; // label_name icon_desc small_icon_desc + delete item["small_icon_desc_color"]; // icon_desc_color small_icon_desc_color + delete item["flag_desc"]; // flag_desc subject_label 这两个有值的时候相同,没有值的时候,前一个为 undefined,后一个为 "" + } + + + /** + * 获取需要的数据,进行转换 + */ + let convert = []; + data.band_list.forEach(item => { + let detail = ""; + let pic_ids = []; + if (item.mblog) { // 有些热搜没有 mblog + var regex = /(<([^>]+)>)/ig + detail = item.mblog.text.replace(regex, ""); + if (item.mblog.pics) { + pic_ids = item.mblog.pics.map(pic => `${pic}`); + } + } + convert.push({ + // 热搜排行顺序 + rank: item.rank, + realpos: item.realpos, + + // 热搜信息 + word: item.word, // 热搜标题 + word_scheme: item.word_scheme, // 热搜话题 "#热搜标题#" + emoticon: item.emoticon, // 热搜小表情,如 "[泪]" + label_name: item.label_name, // 热搜标签,如 "爆" "热" "新" "" + onboard_time: item.onboard_time, // 热搜上线时间,秒级时间戳,如 1658565575 + + /** + * 热搜数据 + * + * 大部分的 num 和 raw_hot 是相同的,页面上显示的是 num,可能是人工调控的热搜 + * + * 两者差值通过观测似乎最大是 1250000 + * 例如 【爆】唐山打架事件8名违法嫌疑人已到案 这条热搜一开始 delta 首先不断增大,最大达到 1250000 + * 然后热搜数量增加到 12600000 左右的时候,delta 逐渐减小到 1040000 左右 + */ + num: item.num, + raw_hot: item.raw_hot, + detla: item.num - item.raw_hot, // 计算值 + + url: `https://s.weibo.com/weibo?q=${encodeURIComponent(item.word_scheme)}`, // 热搜话题链接 + + // 分类 + category: item.category ? item.category.split(',') : "", + subject_label: item.subject_label, + + // 其他 + more: { + is_new: item.is_new, + subject_querys: item.subject_querys, + mid: item.mid, + icon_desc_color: item.icon_desc_color, + detail: detail, + }, + }); + }); + fileUtils.saveJSON({ + saveFolder: DATA_FOLDER, + now: now, + fileNameSuffix: `final`, + object: convert, + compress: true, + // uncompress: true, + uncompress: false, + }); + + + // /** + // * 只统计微博调控信息 + // */ + // let convert2 = []; + // let total = 0; + // data.band_list.forEach(item => { + // total += item.num; + // total -= item.raw_hot; + // if (item.num - item.raw_hot == 0) return; + // convert2.push([ + // `[${item.realpos}] ${item.word}【${item.label_name}】`, + // `原始:${item.raw_hot} 显示:${item.num} 调控: ${item.num - item.raw_hot}` + // ]); + // }); + // fileUtils.saveJSON({ + // saveFolder: DATA_FOLDER, + // now: now, + // fileNameSuffix: `regulation`, + // object: { + // total_delta: total, // 所有调控值之和 + // data: convert2 + // }, + // compress: false, + // uncompress: true + // }); + + + // /** + // * 保存预处理后数据 + // */ + // // 过滤掉不需要的数据 + // // band_list + // data.band_list.forEach(function (item) { + // delete item["mblog"]; + // }); + // fileUtils.saveJSON({ + // saveFolder: DATA_FOLDER, + // now: now, + // fileNameSuffix: `simplify`, + // object: data, + // compress: true, + // // uncompress: true, + // // compress: false, + // uncompress: false, + // }); + + + /** + * 更新最新的 + */ + fs.writeFileSync(`${DATA_FOLDER}/latest.json`, JSON.stringify({ + update_time: requestTimestamp, + update_time_friendly: now.substring(0, 19).replace(/T/g, " "), + // regulation: convert2, + data: convert + })); +} + +exports.main = main; diff --git a/hotband/src/utils/fileUtils.js b/hotband/src/utils/fileUtils.js new file mode 100644 index 0000000..b85863a --- /dev/null +++ b/hotband/src/utils/fileUtils.js @@ -0,0 +1,51 @@ +const fs = require('fs'); +const path = require('path'); + +const LATEST_DATA_ONLY = process.env.LATEST_DATA_ONLY == true; + +// 创建目录 +async function createFolder(folderToCreate) { + let currentFolder = path.join(folderToCreate); + let parentFolder = path.join(currentFolder, '../'); + // console.log({ currentFolder: currentFolder, parentFolder: parentFolder }); + if (!fs.existsSync(currentFolder)) { + // 文件夹不存在,创建文件夹 + createFolder(parentFolder); // 保证父级文件夹存在 + fs.mkdirSync(currentFolder); // 创建当前级文件夹 + } else { + // 否则就什么也不做 + } +} + +// 保存 JSON +function saveJSON({ saveFolder, now, fileNameSuffix, object, compress = true, uncompress = true }) { + if (LATEST_DATA_ONLY) return; + + let year = now.substring(0, 4); + let month = now.substring(5, 7); + let day = now.substring(8, 10); + let hour = now.substring(11, 13); + let minute = now.substring(14, 16); + // console.log(now); + // console.log( "year, month, day, hour, minute: " + year + ", " + month + ", " + day + ", " + hour + ", " + minute); + + // 创建当前文件夹 + let folder = `${saveFolder}/${fileNameSuffix}/${year}/${month}/${day}`; + createFolder(folder); + let fileName = `${folder}/${year}${month}${day}_${hour}${minute}`; + + // 生成文件名 + // '2022-07-23T10:11:38.650Z' => '20220723_1011' + // let fileName = now.replace(/T/, '_').replace(/:\d{2}.\d{3}Z/, '').replace(/[-:]/g, ''); + // console.log(`fileName is ${fileName}`); + + if (compress) + fs.writeFileSync(`${fileName}.min.json`, JSON.stringify(object)); + if (uncompress) + fs.writeFileSync(`${fileName}.json`, JSON.stringify(object, "", "\t")); +} + +module.exports = { + createFolder, + saveJSON, +} \ No newline at end of file diff --git a/hotband/src/utils/requestUtils.js b/hotband/src/utils/requestUtils.js new file mode 100644 index 0000000..d2547df --- /dev/null +++ b/hotband/src/utils/requestUtils.js @@ -0,0 +1,27 @@ +const request = require('request'); + +// 请求 APi 接口 +async function getApiResult(url) { + var return_data = await new Promise((resolve) => { + request({ + method: 'GET', + url: url, + json: true, + }, (error, response, result) => { + if (!error && (response.statusCode == 200)) { + // 请求成功 + resolve(result); + } else { + // 请求失败 + console.log(`error is ${error}`); + resolve({}); + } + }); + }); + // console.log(`return_data is ${JSON.stringify(return_data)}`); + return return_data; +} + +module.exports = { + getApiResult, +} \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index 4ec7166..d1f5880 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,7 +11,9 @@ "dependencies": { "cheerio": "^1.0.0-rc.12", "crypto": "^1.0.1", + "dotenv": "^16.0.1", "fs": "^0.0.1-security", + "iconv-lite": "^0.6.3", "minimist": "^1.2.6", "mysql": "^2.18.1", "NeteaseCloudMusicApi": "^4.8.2", @@ -224,6 +226,17 @@ "npm": "1.2.8000 || >= 1.4.16" } }, + "node_modules/body-parser/node_modules/iconv-lite": { + "version": "0.4.24", + "resolved": "https://registry.npmmirror.com/iconv-lite/-/iconv-lite-0.4.24.tgz", + "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/boolbase": { "version": "1.0.0", "resolved": "https://registry.npmmirror.com/boolbase/-/boolbase-1.0.0.tgz", @@ -568,6 +581,14 @@ "domhandler": "^5.0.1" } }, + "node_modules/dotenv": { + "version": "16.0.3", + "resolved": "https://registry.npmmirror.com/dotenv/-/dotenv-16.0.3.tgz", + "integrity": "sha512-7GO6HghkA5fYG9TYnNxi14/7K9f5occMlp3zXAuSxn7CKCxt9xbNWG7yF8hTCSUchlfWSe3uLmlPfigevRItzQ==", + "engines": { + "node": ">=12" + } + }, "node_modules/ecc-jsbn": { "version": "0.1.2", "resolved": "https://registry.npmmirror.com/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz", @@ -1143,11 +1164,11 @@ "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" }, "node_modules/iconv-lite": { - "version": "0.4.24", - "resolved": "https://registry.npmmirror.com/iconv-lite/-/iconv-lite-0.4.24.tgz", - "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", + "version": "0.6.3", + "resolved": "https://registry.npmmirror.com/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", "dependencies": { - "safer-buffer": ">= 2.1.2 < 3" + "safer-buffer": ">= 2.1.2 < 3.0.0" }, "engines": { "node": ">=0.10.0" @@ -1833,6 +1854,17 @@ "node": ">= 0.8" } }, + "node_modules/raw-body/node_modules/iconv-lite": { + "version": "0.4.24", + "resolved": "https://registry.npmmirror.com/iconv-lite/-/iconv-lite-0.4.24.tgz", + "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/readable-stream": { "version": "2.3.7", "resolved": "https://registry.npmmirror.com/readable-stream/-/readable-stream-2.3.7.tgz", @@ -2588,6 +2620,16 @@ "raw-body": "2.5.1", "type-is": "~1.6.18", "unpipe": "1.0.0" + }, + "dependencies": { + "iconv-lite": { + "version": "0.4.24", + "resolved": "https://registry.npmmirror.com/iconv-lite/-/iconv-lite-0.4.24.tgz", + "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", + "requires": { + "safer-buffer": ">= 2.1.2 < 3" + } + } } }, "boolbase": { @@ -2865,6 +2907,11 @@ "domhandler": "^5.0.1" } }, + "dotenv": { + "version": "16.0.3", + "resolved": "https://registry.npmmirror.com/dotenv/-/dotenv-16.0.3.tgz", + "integrity": "sha512-7GO6HghkA5fYG9TYnNxi14/7K9f5occMlp3zXAuSxn7CKCxt9xbNWG7yF8hTCSUchlfWSe3uLmlPfigevRItzQ==" + }, "ecc-jsbn": { "version": "0.1.2", "resolved": "https://registry.npmmirror.com/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz", @@ -3313,11 +3360,11 @@ } }, "iconv-lite": { - "version": "0.4.24", - "resolved": "https://registry.npmmirror.com/iconv-lite/-/iconv-lite-0.4.24.tgz", - "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", + "version": "0.6.3", + "resolved": "https://registry.npmmirror.com/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", "requires": { - "safer-buffer": ">= 2.1.2 < 3" + "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "ieee754": { @@ -3853,6 +3900,16 @@ "http-errors": "2.0.0", "iconv-lite": "0.4.24", "unpipe": "1.0.0" + }, + "dependencies": { + "iconv-lite": { + "version": "0.4.24", + "resolved": "https://registry.npmmirror.com/iconv-lite/-/iconv-lite-0.4.24.tgz", + "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", + "requires": { + "safer-buffer": ">= 2.1.2 < 3" + } + } } }, "readable-stream": { diff --git a/package.json b/package.json index 87fad19..7d7ab3b 100644 --- a/package.json +++ b/package.json @@ -18,6 +18,8 @@ "node-schedule": "^2.1.0", "path": "^0.12.7", "request": "^2.88.2", - "solarlunar": "^2.0.7" + "solarlunar": "^2.0.7", + "dotenv": "^16.0.1", + "iconv-lite": "^0.6.3" } }