first commit
This commit is contained in:
commit
40173ce54d
7
.gitignore
vendored
Normal file
7
.gitignore
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
.DS_Store
|
||||
test.js
|
||||
|
||||
node_modules
|
||||
|
||||
data/*
|
||||
!data/.gitkeep
|
0
data/.gitkeep
Normal file
0
data/.gitkeep
Normal file
247
index.js
Normal file
247
index.js
Normal file
@ -0,0 +1,247 @@
|
||||
'use strict';
|
||||
|
||||
const request = require('request');
|
||||
const fs = require('fs');
|
||||
|
||||
console.log("Start running ...");
|
||||
|
||||
// 请求微博热搜 APi 接口
|
||||
async function getApiResult(url) {
|
||||
var return_data = await new Promise((resolve) => {
|
||||
request({
|
||||
method: 'GET',
|
||||
url: url,
|
||||
json: true,
|
||||
}, (error, response, result) => {
|
||||
if (!error && (response.statusCode == 200)) {
|
||||
// 请求成功
|
||||
resolve(result);
|
||||
} else {
|
||||
// 请求失败
|
||||
console.log(`error is ${error}`);
|
||||
resolve("error");
|
||||
}
|
||||
});
|
||||
});
|
||||
// console.log(`return_data is ${JSON.stringify(return_data)}`);
|
||||
return return_data;
|
||||
}
|
||||
|
||||
// 创建目录
|
||||
async function createFolder(folderToCreate) {
|
||||
let currentFolder = folderToCreate.replace(/\\/g, '/');
|
||||
let parentFolder = currentFolder.substring(0, currentFolder.lastIndexOf('/'));
|
||||
if (!fs.existsSync(currentFolder)) {
|
||||
// 文件夹不存在,创建文件夹
|
||||
createFolder(parentFolder); // 保证父级文件夹存在
|
||||
fs.mkdirSync(currentFolder); // 创建当前级文件夹
|
||||
} else {
|
||||
// 否则就什么也不做
|
||||
}
|
||||
}
|
||||
|
||||
// 保存 JSON
|
||||
function saveJSON({ now, fileNameSuffix, object, compress = true, uncompress = true }) {
|
||||
|
||||
let year = now.substring(0, 4);
|
||||
let month = now.substring(5, 7);
|
||||
let day = now.substring(8, 10);
|
||||
let hour = now.substring(11, 13);
|
||||
let minute = now.substring(14, 16);
|
||||
// console.log(now);
|
||||
// console.log( "year, month, day, hour, minute: " + year + ", " + month + ", " + day + ", " + hour + ", " + minute);
|
||||
|
||||
// 创建当前文件夹
|
||||
let folder = `./data/${year}/${month}/${day}`;
|
||||
createFolder(folder);
|
||||
let fileName = `${folder}/${year}${month}${day}_${hour}${minute}_${fileNameSuffix}`;
|
||||
|
||||
// 生成文件名
|
||||
// '2022-07-23T10:11:38.650Z' => '20220723_1011'
|
||||
// let fileName = now.replace(/T/, '_').replace(/:\d{2}.\d{3}Z/, '').replace(/[-:]/g, '');
|
||||
// console.log(`fileName is ${fileName}`);
|
||||
|
||||
if (compress)
|
||||
fs.writeFileSync(`${fileName}.min.json`, JSON.stringify(object));
|
||||
if (uncompress)
|
||||
fs.writeFileSync(`${fileName}.json`, JSON.stringify(object, "", "\t"));
|
||||
}
|
||||
|
||||
async function main() {
|
||||
let requestTimestamp = Date.now();
|
||||
let now = new Date(requestTimestamp + 8 * 3600 * 1000).toISOString();
|
||||
|
||||
let result = await getApiResult("https://weibo.com/ajax/statuses/hot_band");
|
||||
// console.log("result", result);
|
||||
|
||||
if (result.ok != 1) {
|
||||
console.log("请求成功,但服务器处理失败。");
|
||||
} else {
|
||||
console.log("请求成功。");
|
||||
|
||||
/**
|
||||
* 保存原始数据
|
||||
*/
|
||||
saveJSON({
|
||||
now: now,
|
||||
fileNameSuffix: `origin`,
|
||||
object: result,
|
||||
compress: true,
|
||||
uncompress: false
|
||||
});
|
||||
|
||||
let data = JSON.parse(JSON.stringify(result.data));
|
||||
|
||||
|
||||
/**
|
||||
* 过滤掉不需要的数据
|
||||
*/
|
||||
// hotgov
|
||||
delete data.hotgov["mblog"];
|
||||
// 重复字段只保留一个
|
||||
delete data.hotgov["note"]; // note word
|
||||
delete data.hotgov["small_icon_desc"]; // icon_desc small_icon_desc
|
||||
delete data.hotgov["small_icon_desc_color"]; // icon_desc_color small_icon_desc_color
|
||||
|
||||
// band_list
|
||||
for (let i = 0; i < data.band_list.length; i++) {
|
||||
const item = data.band_list[i];
|
||||
|
||||
// 过滤广告
|
||||
if (item.is_ad) {
|
||||
data.band_list.splice(i, 1);
|
||||
i--;
|
||||
}
|
||||
|
||||
// 过滤空字段
|
||||
delete item["ad_info"];
|
||||
|
||||
// 重复字段只保留一个
|
||||
delete item["note"]; // note word
|
||||
delete item["icon_desc"]; delete item["small_icon_desc"]; // label_name icon_desc small_icon_desc
|
||||
delete item["small_icon_desc_color"]; // icon_desc_color small_icon_desc_color
|
||||
delete item["flag_desc"]; // flag_desc subject_label 这两个有值的时候相同,没有值的时候,前一个为 undefined,后一个为 ""
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 获取需要的数据,进行转换
|
||||
*/
|
||||
let convert = [];
|
||||
data.band_list.forEach(item => {
|
||||
let detail = "";
|
||||
let pic_ids = [];
|
||||
if (item.mblog) { // 有些热搜没有 mblog
|
||||
var regex = /(<([^>]+)>)/ig
|
||||
detail = item.mblog.text.replace(regex, "");
|
||||
if (item.mblog.pics) {
|
||||
pic_ids = item.mblog.pics.map(pic => `${pic}`);
|
||||
}
|
||||
}
|
||||
convert.push({
|
||||
// 热搜排行顺序
|
||||
rank: item.rank,
|
||||
realpos: item.realpos,
|
||||
|
||||
// 热搜信息
|
||||
word: item.word, // 热搜标题
|
||||
word_scheme: item.word_scheme, // 热搜话题 "#热搜标题#"
|
||||
emoticon: item.emoticon, // 热搜小表情,如 "[泪]"
|
||||
label_name: item.label_name, // 热搜标签,如 "爆" "热" "新" ""
|
||||
onboard_time: item.onboard_time, // 热搜上线时间,秒级时间戳,如 1658565575
|
||||
|
||||
/**
|
||||
* 热搜数据
|
||||
*
|
||||
* 大部分的 num 和 raw_hot 是相同的,页面上显示的是 num,可能是人工调控的热搜
|
||||
*
|
||||
* 两者差值通过观测似乎最大是 1250000
|
||||
* 例如 【爆】唐山打架事件8名违法嫌疑人已到案 这条热搜一开始 delta 首先不断增大,最大达到 1250000
|
||||
* 然后热搜数量增加到 12600000 左右的时候,delta 逐渐减小到 1040000 左右
|
||||
* 所有热搜的 detla(带正负) 加起来就是基本上在100000-230000之间
|
||||
*/
|
||||
num: item.num,
|
||||
raw_hot: item.raw_hot,
|
||||
detla: item.num - item.raw_hot, // 计算值
|
||||
|
||||
url: `https://s.weibo.com/weibo?q=${encodeURIComponent(item.word_scheme)}`, // 热搜话题链接
|
||||
|
||||
// 分类
|
||||
category: item.category ? item.category.split(',') : "",
|
||||
subject_label: item.subject_label,
|
||||
|
||||
// 其他
|
||||
more: {
|
||||
is_new: item.is_new,
|
||||
subject_querys: item.subject_querys,
|
||||
mid: item.mid,
|
||||
icon_desc_color: item.icon_desc_color,
|
||||
detail: detail,
|
||||
},
|
||||
});
|
||||
});
|
||||
saveJSON({
|
||||
now: now,
|
||||
fileNameSuffix: `final`,
|
||||
object: convert,
|
||||
compress: true,
|
||||
uncompress: true
|
||||
});
|
||||
|
||||
|
||||
/**
|
||||
* 只统计微博调控信息
|
||||
*/
|
||||
let convert2 = [];
|
||||
let total = 0;
|
||||
data.band_list.forEach(item => {
|
||||
total += item.num;
|
||||
total -= item.raw_hot;
|
||||
if (item.num - item.raw_hot == 0) return;
|
||||
convert2.push([
|
||||
`[${item.realpos}] ${item.word}【${item.label_name}】`,
|
||||
`原始:${item.raw_hot} 显示:${item.num} 调控: ${item.num - item.raw_hot}`
|
||||
]);
|
||||
});
|
||||
saveJSON({
|
||||
now: now,
|
||||
fileNameSuffix: `regulation`,
|
||||
object: {
|
||||
total_delta: total, // 所有调控值之和
|
||||
data: convert2
|
||||
},
|
||||
compress: false,
|
||||
uncompress: true
|
||||
});
|
||||
|
||||
|
||||
/**
|
||||
* 保存预处理后数据
|
||||
*/
|
||||
// 过滤掉不需要的数据
|
||||
// band_list
|
||||
data.band_list.forEach(function (item) {
|
||||
delete item["mblog"];
|
||||
});
|
||||
saveJSON({
|
||||
now: now,
|
||||
fileNameSuffix: `simplify`,
|
||||
object: data,
|
||||
compress: true,
|
||||
uncompress: true
|
||||
});
|
||||
|
||||
|
||||
/**
|
||||
* 更新最新的
|
||||
*/
|
||||
fs.writeFileSync(`./data/latest.json`, JSON.stringify({
|
||||
update_time: requestTimestamp,
|
||||
update_time_friendly: now.substring(0, 19).replace(/T/g, " "),
|
||||
regulation: convert2,
|
||||
data: convert
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
1011
package-lock.json
generated
Normal file
1011
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
15
package.json
Normal file
15
package.json
Normal file
@ -0,0 +1,15 @@
|
||||
{
|
||||
"name": "weibo-hot-band",
|
||||
"version": "0.0.0",
|
||||
"description": "weibo-hot-band",
|
||||
"main": "server.js",
|
||||
"author": {
|
||||
"name": ""
|
||||
},
|
||||
"dependencies": {
|
||||
"dotenv": "^16.0.1",
|
||||
"fs": "^0.0.1-security",
|
||||
"node-schedule": "^2.1.0",
|
||||
"request": "^2.88.2"
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user