抽离出公共方法;添加B站热搜爬取代码
This commit is contained in:
parent
a76c39b030
commit
cf28f475e3
@ -227,7 +227,7 @@
|
|||||||
|
|
||||||
function getData() {
|
function getData() {
|
||||||
var xhr = new XMLHttpRequest();
|
var xhr = new XMLHttpRequest();
|
||||||
xhr.open("GET", "../data/latest.json?t=" + Date.now(), true);
|
xhr.open("GET", "../data/weibo/latest.json?t=" + Date.now(), true);
|
||||||
xhr.send();
|
xhr.send();
|
||||||
xhr.onreadystatechange = function () {
|
xhr.onreadystatechange = function () {
|
||||||
if (xhr.readyState !== 4) return;
|
if (xhr.readyState !== 4) return;
|
||||||
|
2
index.js
2
index.js
@ -37,6 +37,7 @@ if (DEBUG_MODE) {
|
|||||||
* 引入模块
|
* 引入模块
|
||||||
*/
|
*/
|
||||||
const get_weibo_hotband = require('./src/get_weibo_hotband');
|
const get_weibo_hotband = require('./src/get_weibo_hotband');
|
||||||
|
const get_bilibili_hotband = require('./src/get_bilibili_hotband');
|
||||||
const execute_command = require('./src/execute_command');
|
const execute_command = require('./src/execute_command');
|
||||||
|
|
||||||
|
|
||||||
@ -52,6 +53,7 @@ console.log("Start running ...");
|
|||||||
async function start() {
|
async function start() {
|
||||||
// 爬取热搜数据
|
// 爬取热搜数据
|
||||||
await get_weibo_hotband.main();
|
await get_weibo_hotband.main();
|
||||||
|
await get_bilibili_hotband.main();
|
||||||
|
|
||||||
// 调试模式下
|
// 调试模式下
|
||||||
if (DEBUG_MODE) {
|
if (DEBUG_MODE) {
|
||||||
|
114
src/get_bilibili_hotband.js
Normal file
114
src/get_bilibili_hotband.js
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
'use strict';
|
||||||
|
|
||||||
|
const request = require('request');
|
||||||
|
const fs = require('fs');
|
||||||
|
const path = require('path');
|
||||||
|
|
||||||
|
const utils = require('./utils/utils');
|
||||||
|
|
||||||
|
const DATA_FOLDER = path.join(path.dirname(__dirname), process.env.DATA_FOLDER ?? 'data', 'bilibili');
|
||||||
|
console.log("DATA_FOLDER", DATA_FOLDER);
|
||||||
|
utils.createFolder(DATA_FOLDER); // 程序运行就保证 data 目录存在
|
||||||
|
|
||||||
|
// 请求微博热搜 APi 接口
|
||||||
|
async function getApiResult(url) {
|
||||||
|
var return_data = await new Promise((resolve) => {
|
||||||
|
request({
|
||||||
|
method: 'GET',
|
||||||
|
url: url,
|
||||||
|
json: true,
|
||||||
|
}, (error, response, result) => {
|
||||||
|
if (!error && (response.statusCode == 200)) {
|
||||||
|
// 请求成功
|
||||||
|
resolve(result);
|
||||||
|
} else {
|
||||||
|
// 请求失败
|
||||||
|
console.log(`error is ${error}`);
|
||||||
|
resolve("error");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
// console.log(`return_data is ${JSON.stringify(return_data)}`);
|
||||||
|
return return_data;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
let requestTimestamp = Date.now();
|
||||||
|
let now = new Date(requestTimestamp + 8 * 3600 * 1000).toISOString();
|
||||||
|
|
||||||
|
let result = await getApiResult("https://app.bilibili.com/x/v2/search/trending/ranking");
|
||||||
|
if (result.code != 0) {
|
||||||
|
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "bilibili", "请求成功,但服务器处理失败,正在重试。");
|
||||||
|
result = await getApiResult("https://app.bilibili.com/x/v2/search/trending/ranking");
|
||||||
|
if (result.ok != 1) {
|
||||||
|
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "bilibili", "请求成功,但服务器处理失败,保存失败信息。");
|
||||||
|
// ok 不为 1,那么久直接保存便于后续分析,不进行后续处理
|
||||||
|
utils.saveJSON({
|
||||||
|
saveFolder: DATA_FOLDER,
|
||||||
|
now: now,
|
||||||
|
fileNameSuffix: `origin-error`,
|
||||||
|
object: result,
|
||||||
|
compress: true,
|
||||||
|
uncompress: false
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "bilibili", "请求成功");
|
||||||
|
// console.log("result", result);
|
||||||
|
|
||||||
|
let data = result.data;
|
||||||
|
|
||||||
|
// 去除 trackid
|
||||||
|
delete data["trackid"];
|
||||||
|
// console.log(data);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 保存原始数据
|
||||||
|
*/
|
||||||
|
utils.saveJSON({
|
||||||
|
saveFolder: DATA_FOLDER,
|
||||||
|
now: now,
|
||||||
|
fileNameSuffix: `origin`,
|
||||||
|
object: result,
|
||||||
|
compress: true,
|
||||||
|
uncompress: false
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取需要的数据,进行转换
|
||||||
|
*/
|
||||||
|
let convert = [];
|
||||||
|
data.list.forEach(item => {
|
||||||
|
// {
|
||||||
|
// "position": 1,
|
||||||
|
// "keyword": "关键词",
|
||||||
|
// "show_name": "热搜名称",
|
||||||
|
// "word_type": 8,
|
||||||
|
// "icon": "热搜的图标,也可能没有",
|
||||||
|
// "hot_id": 7399 // 热搜id
|
||||||
|
// }
|
||||||
|
convert.push(item);
|
||||||
|
});
|
||||||
|
utils.saveJSON({
|
||||||
|
saveFolder: DATA_FOLDER,
|
||||||
|
now: now,
|
||||||
|
fileNameSuffix: `final`,
|
||||||
|
object: convert,
|
||||||
|
compress: true,
|
||||||
|
uncompress: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 更新最新的
|
||||||
|
*/
|
||||||
|
fs.writeFileSync(`${DATA_FOLDER}/latest.json`, JSON.stringify({
|
||||||
|
update_time: requestTimestamp,
|
||||||
|
update_time_friendly: now.substring(0, 19).replace(/T/g, " "),
|
||||||
|
data: data
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
exports.main = main;
|
@ -4,11 +4,11 @@ const request = require('request');
|
|||||||
const fs = require('fs');
|
const fs = require('fs');
|
||||||
const path = require('path');
|
const path = require('path');
|
||||||
|
|
||||||
const DATA_FOLDER = path.join(path.dirname(__dirname), process.env.DATA_FOLDER ?? 'data');
|
const utils = require('./utils/utils');
|
||||||
console.log("DATA_FOLDER", DATA_FOLDER);
|
|
||||||
createFolder(DATA_FOLDER); // 程序运行就保证 data 目录存在
|
|
||||||
|
|
||||||
const LATEST_DATA_ONLY = process.env.LATEST_DATA_ONLY == true;
|
const DATA_FOLDER = path.join(path.dirname(__dirname), process.env.DATA_FOLDER ?? 'data', 'weibo');
|
||||||
|
console.log("DATA_FOLDER", DATA_FOLDER);
|
||||||
|
utils.createFolder(DATA_FOLDER); // 程序运行就保证 data 目录存在
|
||||||
|
|
||||||
// 请求微博热搜 APi 接口
|
// 请求微博热搜 APi 接口
|
||||||
async function getApiResult(url) {
|
async function getApiResult(url) {
|
||||||
@ -32,59 +32,19 @@ async function getApiResult(url) {
|
|||||||
return return_data;
|
return return_data;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 创建目录
|
|
||||||
async function createFolder(folderToCreate) {
|
|
||||||
let currentFolder = folderToCreate.replace(/\\/g, '/');
|
|
||||||
let parentFolder = currentFolder.substring(0, currentFolder.lastIndexOf('/'));
|
|
||||||
if (!fs.existsSync(currentFolder)) {
|
|
||||||
// 文件夹不存在,创建文件夹
|
|
||||||
createFolder(parentFolder); // 保证父级文件夹存在
|
|
||||||
fs.mkdirSync(currentFolder); // 创建当前级文件夹
|
|
||||||
} else {
|
|
||||||
// 否则就什么也不做
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 保存 JSON
|
|
||||||
function saveJSON({ now, fileNameSuffix, object, compress = true, uncompress = true }) {
|
|
||||||
if (LATEST_DATA_ONLY) return;
|
|
||||||
|
|
||||||
let year = now.substring(0, 4);
|
|
||||||
let month = now.substring(5, 7);
|
|
||||||
let day = now.substring(8, 10);
|
|
||||||
let hour = now.substring(11, 13);
|
|
||||||
let minute = now.substring(14, 16);
|
|
||||||
// console.log(now);
|
|
||||||
// console.log( "year, month, day, hour, minute: " + year + ", " + month + ", " + day + ", " + hour + ", " + minute);
|
|
||||||
|
|
||||||
// 创建当前文件夹
|
|
||||||
let folder = `${DATA_FOLDER}/${fileNameSuffix}/${year}/${month}/${day}`;
|
|
||||||
createFolder(folder);
|
|
||||||
let fileName = `${folder}/${year}${month}${day}_${hour}${minute}`;
|
|
||||||
|
|
||||||
// 生成文件名
|
|
||||||
// '2022-07-23T10:11:38.650Z' => '20220723_1011'
|
|
||||||
// let fileName = now.replace(/T/, '_').replace(/:\d{2}.\d{3}Z/, '').replace(/[-:]/g, '');
|
|
||||||
// console.log(`fileName is ${fileName}`);
|
|
||||||
|
|
||||||
if (compress)
|
|
||||||
fs.writeFileSync(`${fileName}.min.json`, JSON.stringify(object));
|
|
||||||
if (uncompress)
|
|
||||||
fs.writeFileSync(`${fileName}.json`, JSON.stringify(object, "", "\t"));
|
|
||||||
}
|
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
let requestTimestamp = Date.now();
|
let requestTimestamp = Date.now();
|
||||||
let now = new Date(requestTimestamp + 8 * 3600 * 1000).toISOString();
|
let now = new Date(requestTimestamp + 8 * 3600 * 1000).toISOString();
|
||||||
|
|
||||||
let result = await getApiResult("https://weibo.com/ajax/statuses/hot_band");
|
let result = await getApiResult("https://weibo.com/ajax/statuses/hot_band");
|
||||||
if (result.ok != 1) {
|
if (result.ok != 1) {
|
||||||
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "请求成功,但服务器处理失败,正在重试。");
|
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "weibo", "请求成功,但服务器处理失败,正在重试。");
|
||||||
result = await getApiResult("https://weibo.com/ajax/statuses/hot_band");
|
result = await getApiResult("https://weibo.com/ajax/statuses/hot_band");
|
||||||
if (result.ok != 1) {
|
if (result.ok != 1) {
|
||||||
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "请求成功,但服务器处理失败,保存失败信息。");
|
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "weibo", "请求成功,但服务器处理失败,保存失败信息。");
|
||||||
// ok 不为 1,那么久直接保存便于后续分析,不进行后续处理
|
// ok 不为 1,那么久直接保存便于后续分析,不进行后续处理
|
||||||
saveJSON({
|
utils.saveJSON({
|
||||||
|
saveFolder: DATA_FOLDER,
|
||||||
now: now,
|
now: now,
|
||||||
fileNameSuffix: `origin-error`,
|
fileNameSuffix: `origin-error`,
|
||||||
object: result,
|
object: result,
|
||||||
@ -95,13 +55,14 @@ async function main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "请求成功");
|
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "weibo", "请求成功");
|
||||||
// console.log("result", result);
|
// console.log("result", result);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 保存原始数据
|
* 保存原始数据
|
||||||
*/
|
*/
|
||||||
saveJSON({
|
utils.saveJSON({
|
||||||
|
saveFolder: DATA_FOLDER,
|
||||||
now: now,
|
now: now,
|
||||||
fileNameSuffix: `origin`,
|
fileNameSuffix: `origin`,
|
||||||
object: result,
|
object: result,
|
||||||
@ -198,7 +159,8 @@ async function main() {
|
|||||||
},
|
},
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
saveJSON({
|
utils.saveJSON({
|
||||||
|
saveFolder: DATA_FOLDER,
|
||||||
now: now,
|
now: now,
|
||||||
fileNameSuffix: `final`,
|
fileNameSuffix: `final`,
|
||||||
object: convert,
|
object: convert,
|
||||||
@ -222,7 +184,8 @@ async function main() {
|
|||||||
`原始:${item.raw_hot} 显示:${item.num} 调控: ${item.num - item.raw_hot}`
|
`原始:${item.raw_hot} 显示:${item.num} 调控: ${item.num - item.raw_hot}`
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
saveJSON({
|
utils.saveJSON({
|
||||||
|
saveFolder: DATA_FOLDER,
|
||||||
now: now,
|
now: now,
|
||||||
fileNameSuffix: `regulation`,
|
fileNameSuffix: `regulation`,
|
||||||
object: {
|
object: {
|
||||||
@ -242,7 +205,8 @@ async function main() {
|
|||||||
data.band_list.forEach(function (item) {
|
data.band_list.forEach(function (item) {
|
||||||
delete item["mblog"];
|
delete item["mblog"];
|
||||||
});
|
});
|
||||||
saveJSON({
|
utils.saveJSON({
|
||||||
|
saveFolder: DATA_FOLDER,
|
||||||
now: now,
|
now: now,
|
||||||
fileNameSuffix: `simplify`,
|
fileNameSuffix: `simplify`,
|
||||||
object: data,
|
object: data,
|
||||||
|
49
src/utils/utils.js
Normal file
49
src/utils/utils.js
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
const fs = require('fs');
|
||||||
|
|
||||||
|
const LATEST_DATA_ONLY = process.env.LATEST_DATA_ONLY == true;
|
||||||
|
|
||||||
|
// 创建目录
|
||||||
|
async function createFolder(folderToCreate) {
|
||||||
|
let currentFolder = folderToCreate.replace(/\\/g, '/');
|
||||||
|
let parentFolder = currentFolder.substring(0, currentFolder.lastIndexOf('/'));
|
||||||
|
if (!fs.existsSync(currentFolder)) {
|
||||||
|
// 文件夹不存在,创建文件夹
|
||||||
|
createFolder(parentFolder); // 保证父级文件夹存在
|
||||||
|
fs.mkdirSync(currentFolder); // 创建当前级文件夹
|
||||||
|
} else {
|
||||||
|
// 否则就什么也不做
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 保存 JSON
|
||||||
|
function saveJSON({ saveFolder, now, fileNameSuffix, object, compress = true, uncompress = true }) {
|
||||||
|
if (LATEST_DATA_ONLY) return;
|
||||||
|
|
||||||
|
let year = now.substring(0, 4);
|
||||||
|
let month = now.substring(5, 7);
|
||||||
|
let day = now.substring(8, 10);
|
||||||
|
let hour = now.substring(11, 13);
|
||||||
|
let minute = now.substring(14, 16);
|
||||||
|
// console.log(now);
|
||||||
|
// console.log( "year, month, day, hour, minute: " + year + ", " + month + ", " + day + ", " + hour + ", " + minute);
|
||||||
|
|
||||||
|
// 创建当前文件夹
|
||||||
|
let folder = `${saveFolder}/${fileNameSuffix}/${year}/${month}/${day}`;
|
||||||
|
createFolder(folder);
|
||||||
|
let fileName = `${folder}/${year}${month}${day}_${hour}${minute}`;
|
||||||
|
|
||||||
|
// 生成文件名
|
||||||
|
// '2022-07-23T10:11:38.650Z' => '20220723_1011'
|
||||||
|
// let fileName = now.replace(/T/, '_').replace(/:\d{2}.\d{3}Z/, '').replace(/[-:]/g, '');
|
||||||
|
// console.log(`fileName is ${fileName}`);
|
||||||
|
|
||||||
|
if (compress)
|
||||||
|
fs.writeFileSync(`${fileName}.min.json`, JSON.stringify(object));
|
||||||
|
if (uncompress)
|
||||||
|
fs.writeFileSync(`${fileName}.json`, JSON.stringify(object, "", "\t"));
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
createFolder,
|
||||||
|
saveJSON,
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user