抽离出公共方法;添加B站热搜爬取代码
This commit is contained in:
		
							
								
								
									
										114
									
								
								src/get_bilibili_hotband.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								src/get_bilibili_hotband.js
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,114 @@
 | 
			
		||||
'use strict';
 | 
			
		||||
 | 
			
		||||
const request = require('request');
 | 
			
		||||
const fs = require('fs');
 | 
			
		||||
const path = require('path');
 | 
			
		||||
 | 
			
		||||
const utils = require('./utils/utils');
 | 
			
		||||
 | 
			
		||||
const DATA_FOLDER = path.join(path.dirname(__dirname), process.env.DATA_FOLDER ?? 'data', 'bilibili');
 | 
			
		||||
console.log("DATA_FOLDER", DATA_FOLDER);
 | 
			
		||||
utils.createFolder(DATA_FOLDER); // 程序运行就保证 data 目录存在
 | 
			
		||||
 | 
			
		||||
// 请求微博热搜 APi 接口
 | 
			
		||||
async function getApiResult(url) {
 | 
			
		||||
    var return_data = await new Promise((resolve) => {
 | 
			
		||||
        request({
 | 
			
		||||
            method: 'GET',
 | 
			
		||||
            url: url,
 | 
			
		||||
            json: true,
 | 
			
		||||
        }, (error, response, result) => {
 | 
			
		||||
            if (!error && (response.statusCode == 200)) {
 | 
			
		||||
                // 请求成功
 | 
			
		||||
                resolve(result);
 | 
			
		||||
            } else {
 | 
			
		||||
                // 请求失败
 | 
			
		||||
                console.log(`error is ${error}`);
 | 
			
		||||
                resolve("error");
 | 
			
		||||
            }
 | 
			
		||||
        });
 | 
			
		||||
    });
 | 
			
		||||
    // console.log(`return_data is ${JSON.stringify(return_data)}`);
 | 
			
		||||
    return return_data;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function main() {
 | 
			
		||||
    let requestTimestamp = Date.now();
 | 
			
		||||
    let now = new Date(requestTimestamp + 8 * 3600 * 1000).toISOString();
 | 
			
		||||
 | 
			
		||||
    let result = await getApiResult("https://app.bilibili.com/x/v2/search/trending/ranking");
 | 
			
		||||
    if (result.code != 0) {
 | 
			
		||||
        console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "bilibili", "请求成功,但服务器处理失败,正在重试。");
 | 
			
		||||
        result = await getApiResult("https://app.bilibili.com/x/v2/search/trending/ranking");
 | 
			
		||||
        if (result.ok != 1) {
 | 
			
		||||
            console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "bilibili", "请求成功,但服务器处理失败,保存失败信息。");
 | 
			
		||||
            // ok 不为 1,那么久直接保存便于后续分析,不进行后续处理
 | 
			
		||||
            utils.saveJSON({
 | 
			
		||||
                saveFolder: DATA_FOLDER,
 | 
			
		||||
                now: now,
 | 
			
		||||
                fileNameSuffix: `origin-error`,
 | 
			
		||||
                object: result,
 | 
			
		||||
                compress: true,
 | 
			
		||||
                uncompress: false
 | 
			
		||||
            });
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "bilibili", "请求成功");
 | 
			
		||||
    // console.log("result", result);
 | 
			
		||||
 | 
			
		||||
    let data = result.data;
 | 
			
		||||
 | 
			
		||||
    // 去除 trackid
 | 
			
		||||
    delete data["trackid"];
 | 
			
		||||
    // console.log(data);
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * 保存原始数据
 | 
			
		||||
     */
 | 
			
		||||
    utils.saveJSON({
 | 
			
		||||
        saveFolder: DATA_FOLDER,
 | 
			
		||||
        now: now,
 | 
			
		||||
        fileNameSuffix: `origin`,
 | 
			
		||||
        object: result,
 | 
			
		||||
        compress: true,
 | 
			
		||||
        uncompress: false
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * 获取需要的数据,进行转换
 | 
			
		||||
     */
 | 
			
		||||
    let convert = [];
 | 
			
		||||
    data.list.forEach(item => {
 | 
			
		||||
        // {
 | 
			
		||||
        //     "position": 1,
 | 
			
		||||
        //     "keyword": "关键词",
 | 
			
		||||
        //     "show_name": "热搜名称",
 | 
			
		||||
        //     "word_type": 8,
 | 
			
		||||
        //     "icon": "热搜的图标,也可能没有",
 | 
			
		||||
        //     "hot_id": 7399 // 热搜id
 | 
			
		||||
        // }
 | 
			
		||||
        convert.push(item);
 | 
			
		||||
    });
 | 
			
		||||
    utils.saveJSON({
 | 
			
		||||
        saveFolder: DATA_FOLDER,
 | 
			
		||||
        now: now,
 | 
			
		||||
        fileNameSuffix: `final`,
 | 
			
		||||
        object: convert,
 | 
			
		||||
        compress: true,
 | 
			
		||||
        uncompress: false,
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * 更新最新的
 | 
			
		||||
     */
 | 
			
		||||
    fs.writeFileSync(`${DATA_FOLDER}/latest.json`, JSON.stringify({
 | 
			
		||||
        update_time: requestTimestamp,
 | 
			
		||||
        update_time_friendly: now.substring(0, 19).replace(/T/g, " "),
 | 
			
		||||
        data: data
 | 
			
		||||
    }));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
exports.main = main;
 | 
			
		||||
@@ -4,11 +4,11 @@ const request = require('request');
 | 
			
		||||
const fs = require('fs');
 | 
			
		||||
const path = require('path');
 | 
			
		||||
 | 
			
		||||
const DATA_FOLDER = path.join(path.dirname(__dirname), process.env.DATA_FOLDER ?? 'data');
 | 
			
		||||
console.log("DATA_FOLDER", DATA_FOLDER);
 | 
			
		||||
createFolder(DATA_FOLDER); // 程序运行就保证 data 目录存在
 | 
			
		||||
const utils = require('./utils/utils');
 | 
			
		||||
 | 
			
		||||
const LATEST_DATA_ONLY = process.env.LATEST_DATA_ONLY == true;
 | 
			
		||||
const DATA_FOLDER = path.join(path.dirname(__dirname), process.env.DATA_FOLDER ?? 'data', 'weibo');
 | 
			
		||||
console.log("DATA_FOLDER", DATA_FOLDER);
 | 
			
		||||
utils.createFolder(DATA_FOLDER); // 程序运行就保证 data 目录存在
 | 
			
		||||
 | 
			
		||||
// 请求微博热搜 APi 接口
 | 
			
		||||
async function getApiResult(url) {
 | 
			
		||||
@@ -32,59 +32,19 @@ async function getApiResult(url) {
 | 
			
		||||
    return return_data;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// 创建目录
 | 
			
		||||
async function createFolder(folderToCreate) {
 | 
			
		||||
    let currentFolder = folderToCreate.replace(/\\/g, '/');
 | 
			
		||||
    let parentFolder = currentFolder.substring(0, currentFolder.lastIndexOf('/'));
 | 
			
		||||
    if (!fs.existsSync(currentFolder)) {
 | 
			
		||||
        // 文件夹不存在,创建文件夹
 | 
			
		||||
        createFolder(parentFolder);  // 保证父级文件夹存在
 | 
			
		||||
        fs.mkdirSync(currentFolder); // 创建当前级文件夹
 | 
			
		||||
    } else {
 | 
			
		||||
        // 否则就什么也不做
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// 保存 JSON
 | 
			
		||||
function saveJSON({ now, fileNameSuffix, object, compress = true, uncompress = true }) {
 | 
			
		||||
    if (LATEST_DATA_ONLY) return;
 | 
			
		||||
 | 
			
		||||
    let year = now.substring(0, 4);
 | 
			
		||||
    let month = now.substring(5, 7);
 | 
			
		||||
    let day = now.substring(8, 10);
 | 
			
		||||
    let hour = now.substring(11, 13);
 | 
			
		||||
    let minute = now.substring(14, 16);
 | 
			
		||||
    // console.log(now);
 | 
			
		||||
    // console.log( "year, month, day, hour, minute: " + year + ", " + month + ", " + day + ", " + hour + ", " + minute);
 | 
			
		||||
 | 
			
		||||
    // 创建当前文件夹
 | 
			
		||||
    let folder = `${DATA_FOLDER}/${fileNameSuffix}/${year}/${month}/${day}`;
 | 
			
		||||
    createFolder(folder);
 | 
			
		||||
    let fileName = `${folder}/${year}${month}${day}_${hour}${minute}`;
 | 
			
		||||
 | 
			
		||||
    // 生成文件名
 | 
			
		||||
    // '2022-07-23T10:11:38.650Z' => '20220723_1011'
 | 
			
		||||
    // let fileName = now.replace(/T/, '_').replace(/:\d{2}.\d{3}Z/, '').replace(/[-:]/g, '');
 | 
			
		||||
    // console.log(`fileName is ${fileName}`);
 | 
			
		||||
 | 
			
		||||
    if (compress)
 | 
			
		||||
        fs.writeFileSync(`${fileName}.min.json`, JSON.stringify(object));
 | 
			
		||||
    if (uncompress)
 | 
			
		||||
        fs.writeFileSync(`${fileName}.json`, JSON.stringify(object, "", "\t"));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function main() {
 | 
			
		||||
    let requestTimestamp = Date.now();
 | 
			
		||||
    let now = new Date(requestTimestamp + 8 * 3600 * 1000).toISOString();
 | 
			
		||||
 | 
			
		||||
    let result = await getApiResult("https://weibo.com/ajax/statuses/hot_band");
 | 
			
		||||
    if (result.ok != 1) {
 | 
			
		||||
        console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "请求成功,但服务器处理失败,正在重试。");
 | 
			
		||||
        console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "weibo", "请求成功,但服务器处理失败,正在重试。");
 | 
			
		||||
        result = await getApiResult("https://weibo.com/ajax/statuses/hot_band");
 | 
			
		||||
        if (result.ok != 1) {
 | 
			
		||||
            console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "请求成功,但服务器处理失败,保存失败信息。");
 | 
			
		||||
            console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "weibo", "请求成功,但服务器处理失败,保存失败信息。");
 | 
			
		||||
            // ok 不为 1,那么久直接保存便于后续分析,不进行后续处理
 | 
			
		||||
            saveJSON({
 | 
			
		||||
            utils.saveJSON({
 | 
			
		||||
                saveFolder: DATA_FOLDER,
 | 
			
		||||
                now: now,
 | 
			
		||||
                fileNameSuffix: `origin-error`,
 | 
			
		||||
                object: result,
 | 
			
		||||
@@ -95,13 +55,14 @@ async function main() {
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "请求成功");
 | 
			
		||||
    console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), "weibo", "请求成功");
 | 
			
		||||
    // console.log("result", result);
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * 保存原始数据
 | 
			
		||||
     */
 | 
			
		||||
    saveJSON({
 | 
			
		||||
    utils.saveJSON({
 | 
			
		||||
        saveFolder: DATA_FOLDER,
 | 
			
		||||
        now: now,
 | 
			
		||||
        fileNameSuffix: `origin`,
 | 
			
		||||
        object: result,
 | 
			
		||||
@@ -198,7 +159,8 @@ async function main() {
 | 
			
		||||
            },
 | 
			
		||||
        });
 | 
			
		||||
    });
 | 
			
		||||
    saveJSON({
 | 
			
		||||
    utils.saveJSON({
 | 
			
		||||
        saveFolder: DATA_FOLDER,
 | 
			
		||||
        now: now,
 | 
			
		||||
        fileNameSuffix: `final`,
 | 
			
		||||
        object: convert,
 | 
			
		||||
@@ -222,7 +184,8 @@ async function main() {
 | 
			
		||||
            `原始:${item.raw_hot} 显示:${item.num} 调控: ${item.num - item.raw_hot}`
 | 
			
		||||
        ]);
 | 
			
		||||
    });
 | 
			
		||||
    saveJSON({
 | 
			
		||||
    utils.saveJSON({
 | 
			
		||||
        saveFolder: DATA_FOLDER,
 | 
			
		||||
        now: now,
 | 
			
		||||
        fileNameSuffix: `regulation`,
 | 
			
		||||
        object: {
 | 
			
		||||
@@ -242,7 +205,8 @@ async function main() {
 | 
			
		||||
    data.band_list.forEach(function (item) {
 | 
			
		||||
        delete item["mblog"];
 | 
			
		||||
    });
 | 
			
		||||
    saveJSON({
 | 
			
		||||
    utils.saveJSON({
 | 
			
		||||
        saveFolder: DATA_FOLDER,
 | 
			
		||||
        now: now,
 | 
			
		||||
        fileNameSuffix: `simplify`,
 | 
			
		||||
        object: data,
 | 
			
		||||
@@ -264,4 +228,4 @@ async function main() {
 | 
			
		||||
    }));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
exports.main = main;
 | 
			
		||||
exports.main = main;
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										49
									
								
								src/utils/utils.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								src/utils/utils.js
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,49 @@
 | 
			
		||||
const fs = require('fs');
 | 
			
		||||
 | 
			
		||||
const LATEST_DATA_ONLY = process.env.LATEST_DATA_ONLY == true;
 | 
			
		||||
 | 
			
		||||
// 创建目录
 | 
			
		||||
async function createFolder(folderToCreate) {
 | 
			
		||||
    let currentFolder = folderToCreate.replace(/\\/g, '/');
 | 
			
		||||
    let parentFolder = currentFolder.substring(0, currentFolder.lastIndexOf('/'));
 | 
			
		||||
    if (!fs.existsSync(currentFolder)) {
 | 
			
		||||
        // 文件夹不存在,创建文件夹
 | 
			
		||||
        createFolder(parentFolder);  // 保证父级文件夹存在
 | 
			
		||||
        fs.mkdirSync(currentFolder); // 创建当前级文件夹
 | 
			
		||||
    } else {
 | 
			
		||||
        // 否则就什么也不做
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// 保存 JSON
 | 
			
		||||
function saveJSON({ saveFolder, now, fileNameSuffix, object, compress = true, uncompress = true }) {
 | 
			
		||||
    if (LATEST_DATA_ONLY) return;
 | 
			
		||||
 | 
			
		||||
    let year = now.substring(0, 4);
 | 
			
		||||
    let month = now.substring(5, 7);
 | 
			
		||||
    let day = now.substring(8, 10);
 | 
			
		||||
    let hour = now.substring(11, 13);
 | 
			
		||||
    let minute = now.substring(14, 16);
 | 
			
		||||
    // console.log(now);
 | 
			
		||||
    // console.log( "year, month, day, hour, minute: " + year + ", " + month + ", " + day + ", " + hour + ", " + minute);
 | 
			
		||||
 | 
			
		||||
    // 创建当前文件夹
 | 
			
		||||
    let folder = `${saveFolder}/${fileNameSuffix}/${year}/${month}/${day}`;
 | 
			
		||||
    createFolder(folder);
 | 
			
		||||
    let fileName = `${folder}/${year}${month}${day}_${hour}${minute}`;
 | 
			
		||||
 | 
			
		||||
    // 生成文件名
 | 
			
		||||
    // '2022-07-23T10:11:38.650Z' => '20220723_1011'
 | 
			
		||||
    // let fileName = now.replace(/T/, '_').replace(/:\d{2}.\d{3}Z/, '').replace(/[-:]/g, '');
 | 
			
		||||
    // console.log(`fileName is ${fileName}`);
 | 
			
		||||
 | 
			
		||||
    if (compress)
 | 
			
		||||
        fs.writeFileSync(`${fileName}.min.json`, JSON.stringify(object));
 | 
			
		||||
    if (uncompress)
 | 
			
		||||
        fs.writeFileSync(`${fileName}.json`, JSON.stringify(object, "", "\t"));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
module.exports = {
 | 
			
		||||
    createFolder,
 | 
			
		||||
    saveJSON,
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user