1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee

抽离出公共方法;添加B站热搜爬取代码

This commit is contained in:
2022-07-28 22:35:07 +08:00
parent a76c39b030
commit cf28f475e3
5 changed files with 184 additions and 55 deletions

View File

@@ -4,11 +4,11 @@ const request = require('request');
const fs = require('fs');
const path = require('path');
const DATA_FOLDER = path.join(path.dirname(__dirname), process.env.DATA_FOLDER ?? 'data');
console.log("DATA_FOLDER", DATA_FOLDER);
createFolder(DATA_FOLDER); // 程序运行就保证 data 目录存在
const utils = require('./utils/utils');
const LATEST_DATA_ONLY = process.env.LATEST_DATA_ONLY == true;
const DATA_FOLDER = path.join(path.dirname(__dirname), process.env.DATA_FOLDER ?? 'data', 'weibo');
console.log("DATA_FOLDER", DATA_FOLDER);
utils.createFolder(DATA_FOLDER); // 程序运行就保证 data 目录存在
// 请求微博热搜 APi 接口
async function getApiResult(url) {
@@ -32,59 +32,19 @@ async function getApiResult(url) {
return return_data;
}
// 创建目录
async function createFolder(folderToCreate) {
let currentFolder = folderToCreate.replace(/\\/g, '/');
let parentFolder = currentFolder.substring(0, currentFolder.lastIndexOf('/'));
if (!fs.existsSync(currentFolder)) {
// 文件夹不存在,创建文件夹
createFolder(parentFolder); // 保证父级文件夹存在
fs.mkdirSync(currentFolder); // 创建当前级文件夹
} else {
// 否则就什么也不做
}
}
// 保存 JSON
function saveJSON({ now, fileNameSuffix, object, compress = true, uncompress = true }) {
if (LATEST_DATA_ONLY) return;
let year = now.substring(0, 4);
let month = now.substring(5, 7);
let day = now.substring(8, 10);
let hour = now.substring(11, 13);
let minute = now.substring(14, 16);
// console.log(now);
// console.log( "year, month, day, hour, minute: " + year + ", " + month + ", " + day + ", " + hour + ", " + minute);
// 创建当前文件夹
let folder = `${DATA_FOLDER}/${fileNameSuffix}/${year}/${month}/${day}`;
createFolder(folder);
let fileName = `${folder}/${year}${month}${day}_${hour}${minute}`;
// 生成文件名
// '2022-07-23T10:11:38.650Z' => '20220723_1011'
// let fileName = now.replace(/T/, '_').replace(/:\d{2}.\d{3}Z/, '').replace(/[-:]/g, '');
// console.log(`fileName is ${fileName}`);
if (compress)
fs.writeFileSync(`${fileName}.min.json`, JSON.stringify(object));
if (uncompress)
fs.writeFileSync(`${fileName}.json`, JSON.stringify(object, "", "\t"));
}
async function main() {
let requestTimestamp = Date.now();
let now = new Date(requestTimestamp + 8 * 3600 * 1000).toISOString();
let result = await getApiResult("https://weibo.com/ajax/statuses/hot_band");
if (result.ok != 1) {
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "请求成功,但服务器处理失败,正在重试。");
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "weibo", "请求成功,但服务器处理失败,正在重试。");
result = await getApiResult("https://weibo.com/ajax/statuses/hot_band");
if (result.ok != 1) {
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "请求成功,但服务器处理失败,保存失败信息。");
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "weibo", "请求成功,但服务器处理失败,保存失败信息。");
// ok 不为 1那么久直接保存便于后续分析不进行后续处理
saveJSON({
utils.saveJSON({
saveFolder: DATA_FOLDER,
now: now,
fileNameSuffix: `origin-error`,
object: result,
@@ -95,13 +55,14 @@ async function main() {
}
}
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "请求成功");
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "weibo", "请求成功");
// console.log("result", result);
/**
* 保存原始数据
*/
saveJSON({
utils.saveJSON({
saveFolder: DATA_FOLDER,
now: now,
fileNameSuffix: `origin`,
object: result,
@@ -198,7 +159,8 @@ async function main() {
},
});
});
saveJSON({
utils.saveJSON({
saveFolder: DATA_FOLDER,
now: now,
fileNameSuffix: `final`,
object: convert,
@@ -222,7 +184,8 @@ async function main() {
`原始:${item.raw_hot} 显示:${item.num} 调控: ${item.num - item.raw_hot}`
]);
});
saveJSON({
utils.saveJSON({
saveFolder: DATA_FOLDER,
now: now,
fileNameSuffix: `regulation`,
object: {
@@ -242,7 +205,8 @@ async function main() {
data.band_list.forEach(function (item) {
delete item["mblog"];
});
saveJSON({
utils.saveJSON({
saveFolder: DATA_FOLDER,
now: now,
fileNameSuffix: `simplify`,
object: data,
@@ -264,4 +228,4 @@ async function main() {
}));
}
exports.main = main;
exports.main = main;