1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee

合并仓库前的准备

This commit is contained in:
2022-10-17 13:09:59 +08:00
parent c11080a6f6
commit 04eb563794
29 changed files with 78 additions and 78 deletions

View File

@@ -0,0 +1,40 @@
'use strict';
const child_process = require('child_process');
const iconv = require("iconv-lite");
const encoding = "cp936";
const bufferEncoding = "binary";
async function execute(rootPath, cmds) {
let outputs = [];
for (let cmd of cmds) {
let result = await new Promise(function (resolve) {
// refer: https://www.webhek.com/post/execute-a-command-line-binary-with-node-js/
child_process.exec(cmd, {
cwd: rootPath, // 脚本执行目录
encoding: bufferEncoding
}, function (err, stdout, stderr) {
if (err) {
resolve({
cmd: cmd,
err: err,
// err_stack: iconv.decode(Buffer.from(err.stack, bufferEncoding), encoding),
// err_message: iconv.decode(Buffer.from(err.message, bufferEncoding), encoding),
});
} else {
// 获取命令执行的输出
resolve({
cmd: cmd,
stdout: iconv.decode(Buffer.from(stdout, bufferEncoding), encoding),
stderr: iconv.decode(Buffer.from(stderr, bufferEncoding), encoding),
});
}
});
});
outputs.push(result);
}
return outputs;
}
exports.execute = execute;

View File

@@ -0,0 +1,99 @@
'use strict';
const fs = require('fs');
const path = require('path');
const fileUtils = require('./utils/fileUtils');
const requestUtils = require('./utils/requestUtils');
const API_URL = "https://app.bilibili.com/x/v2/search/trending/ranking";
const SUB_FOLDER = "bilibili-hotband";
const DATA_FOLDER = path.join(path.dirname(__dirname), process.env.DATA_FOLDER ?? 'data', SUB_FOLDER);
console.log("DATA_FOLDER", DATA_FOLDER);
fileUtils.createFolder(DATA_FOLDER); // 程序运行就保证 data 目录存在
async function main() {
let requestTimestamp = Date.now();
let now = new Date(requestTimestamp + 8 * 3600 * 1000).toISOString();
let result = await requestUtils.getApiResult(API_URL);
if (result.code != 0) {
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功但服务器处理失败等待3s后重试。");
await new Promise((resolve) => {
setTimeout(resolve, 3000); // 等待3秒
});
result = await requestUtils.getApiResult(API_URL);
if (result.ok != 1) {
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功,但服务器处理失败,保存失败信息。");
// ok 不为 1那么久直接保存便于后续分析不进行后续处理
fileUtils.saveJSON({
saveFolder: DATA_FOLDER,
now: now,
fileNameSuffix: `origin-error`,
object: result,
compress: true,
uncompress: false
});
return;
}
}
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功");
// console.log("result", result);
let data = result.data;
// 去除 trackid
delete data["trackid"];
// console.log(data);
/**
* 保存原始数据
*/
fileUtils.saveJSON({
saveFolder: DATA_FOLDER,
now: now,
fileNameSuffix: `origin`,
object: result,
compress: true,
uncompress: false
});
/**
* 获取需要的数据,进行转换
*/
let convert = [];
data.list.forEach(item => {
// {
// "position": 1,
// "keyword": "关键词",
// "show_name": "热搜名称",
// "word_type": 8,
// "icon": "热搜的图标,也可能没有",
// "hot_id": 7399 // 热搜id
// }
convert.push(item);
});
fileUtils.saveJSON({
saveFolder: DATA_FOLDER,
now: now,
fileNameSuffix: `final`,
object: convert,
compress: true,
uncompress: false,
});
/**
* 更新最新的
*/
fs.writeFileSync(`${DATA_FOLDER}/latest.json`, JSON.stringify({
update_time: requestTimestamp,
update_time_friendly: now.substring(0, 19).replace(/T/g, " "),
data: data.list,
exp_str: data.exp_str,
}));
}
exports.main = main;

View File

@@ -0,0 +1,90 @@
'use strict';
const fs = require('fs');
const path = require('path');
const fileUtils = require('./utils/fileUtils');
const requestUtils = require('./utils/requestUtils');
const API_URL = "https://api.bilibili.com/x/web-interface/ranking/v2?type=all";
const SUB_FOLDER = "bilibili-rank";
const DATA_FOLDER = path.join(path.dirname(__dirname), process.env.DATA_FOLDER ?? 'data', SUB_FOLDER);
console.log("DATA_FOLDER", DATA_FOLDER);
fileUtils.createFolder(DATA_FOLDER); // 程序运行就保证 data 目录存在
async function main() {
let requestTimestamp = Date.now();
let now = new Date(requestTimestamp + 8 * 3600 * 1000).toISOString();
let result = await requestUtils.getApiResult(API_URL);
if (result.code != 0) {
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功但服务器处理失败等待3s后重试。");
await new Promise((resolve) => {
setTimeout(resolve, 3000); // 等待3秒
});
result = await requestUtils.getApiResult(API_URL);
if (result.ok != 1) {
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功,但服务器处理失败,保存失败信息。");
// ok 不为 1那么久直接保存便于后续分析不进行后续处理
fileUtils.saveJSON({
saveFolder: DATA_FOLDER,
now: now,
fileNameSuffix: `origin-error`,
object: result,
compress: true,
uncompress: false
});
return;
}
}
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功");
// console.log("result", result);
let data = result.data;
// // 去除 trackid
// delete data["trackid"];
// console.log(data);
/**
* 保存原始数据
*/
fileUtils.saveJSON({
saveFolder: DATA_FOLDER,
now: now,
fileNameSuffix: `origin`,
object: result,
compress: true,
uncompress: false
});
// /**
// * 获取需要的数据,进行转换
// */
// let convert = [];
// data.list.forEach(item => {
// convert.push(item);
// });
// fileUtils.saveJSON({
// saveFolder: DATA_FOLDER,
// now: now,
// fileNameSuffix: `final`,
// object: convert,
// compress: true,
// uncompress: false,
// });
/**
* 更新最新的
*/
fs.writeFileSync(`${DATA_FOLDER}/latest.json`, JSON.stringify({
update_time: requestTimestamp,
update_time_friendly: now.substring(0, 19).replace(/T/g, " "),
note: data.note,
data: data.list,
}));
}
exports.main = main;

View File

@@ -0,0 +1,228 @@
'use strict';
const fs = require('fs');
const path = require('path');
const fileUtils = require('./utils/fileUtils');
const requestUtils = require('./utils/requestUtils');
const API_URL = "https://weibo.com/ajax/statuses/hot_band";
const SUB_FOLDER = "weibo-hotband";
const DATA_FOLDER = path.join(path.dirname(__dirname), process.env.DATA_FOLDER ?? 'data', SUB_FOLDER);
console.log("DATA_FOLDER", DATA_FOLDER);
fileUtils.createFolder(DATA_FOLDER); // 程序运行就保证 data 目录存在
async function main() {
let requestTimestamp = Date.now();
let now = new Date(requestTimestamp + 8 * 3600 * 1000).toISOString();
let result = await requestUtils.getApiResult(API_URL);
if (result.ok != 1) {
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功但服务器处理失败等待3s后重试。");
await new Promise((resolve) => {
setTimeout(resolve, 3000); // 等待3秒
});
result = await requestUtils.getApiResult(API_URL);
if (result.ok != 1) {
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功,但服务器处理失败,保存失败信息。");
// ok 不为 1那么就直接保存便于后续分析不进行后续处理
fileUtils.saveJSON({
saveFolder: DATA_FOLDER,
now: now,
fileNameSuffix: `origin-error`,
object: result,
compress: true,
uncompress: false
});
return;
}
}
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功");
// console.log("result", result);
/**
* 保存原始数据
*/
fileUtils.saveJSON({
saveFolder: DATA_FOLDER,
now: now,
fileNameSuffix: `origin`,
object: result,
compress: true,
uncompress: false
});
let data = JSON.parse(JSON.stringify(result.data));
if (!data) {
fileUtils.saveJSON({
saveFolder: DATA_FOLDER,
now: now,
fileNameSuffix: `origin-parse-error`,
object: result,
compress: true,
uncompress: false
});
return;
}
/**
* 过滤掉不需要的数据
*/
// hotgov
if (data.hotgov) {
delete data.hotgov["mblog"];
// 重复字段只保留一个
delete data.hotgov["note"]; // note word
delete data.hotgov["small_icon_desc"]; // icon_desc small_icon_desc
delete data.hotgov["small_icon_desc_color"]; // icon_desc_color small_icon_desc_color
}
// band_list
for (let i = 0; i < data.band_list.length; i++) {
const item = data.band_list[i];
// 过滤广告
if (item.is_ad) {
data.band_list.splice(i, 1);
i--;
}
// 过滤空字段
delete item["ad_info"];
// 重复字段只保留一个
delete item["note"]; // note word
delete item["icon_desc"]; delete item["small_icon_desc"]; // label_name icon_desc small_icon_desc
delete item["small_icon_desc_color"]; // icon_desc_color small_icon_desc_color
delete item["flag_desc"]; // flag_desc subject_label 这两个有值的时候相同,没有值的时候,前一个为 undefined后一个为 ""
}
/**
* 获取需要的数据,进行转换
*/
let convert = [];
data.band_list.forEach(item => {
let detail = "";
let pic_ids = [];
if (item.mblog) { // 有些热搜没有 mblog
var regex = /(<([^>]+)>)/ig
detail = item.mblog.text.replace(regex, "");
if (item.mblog.pics) {
pic_ids = item.mblog.pics.map(pic => `${pic}`);
}
}
convert.push({
// 热搜排行顺序
rank: item.rank,
realpos: item.realpos,
// 热搜信息
word: item.word, // 热搜标题
word_scheme: item.word_scheme, // 热搜话题 "#热搜标题#"
emoticon: item.emoticon, // 热搜小表情,如 "[泪]"
label_name: item.label_name, // 热搜标签,如 "爆" "热" "新" ""
onboard_time: item.onboard_time, // 热搜上线时间,秒级时间戳,如 1658565575
/**
* 热搜数据
*
* 大部分的 num 和 raw_hot 是相同的,页面上显示的是 num可能是人工调控的热搜
*
* 两者差值通过观测似乎最大是 1250000
* 例如 【爆】唐山打架事件8名违法嫌疑人已到案 这条热搜一开始 delta 首先不断增大,最大达到 1250000
* 然后热搜数量增加到 12600000 左右的时候delta 逐渐减小到 1040000 左右
*/
num: item.num,
raw_hot: item.raw_hot,
detla: item.num - item.raw_hot, // 计算值
url: `https://s.weibo.com/weibo?q=${encodeURIComponent(item.word_scheme)}`, // 热搜话题链接
// 分类
category: item.category ? item.category.split(',') : "",
subject_label: item.subject_label,
// 其他
more: {
is_new: item.is_new,
subject_querys: item.subject_querys,
mid: item.mid,
icon_desc_color: item.icon_desc_color,
detail: detail,
},
});
});
fileUtils.saveJSON({
saveFolder: DATA_FOLDER,
now: now,
fileNameSuffix: `final`,
object: convert,
compress: true,
// uncompress: true,
uncompress: false,
});
// /**
// * 只统计微博调控信息
// */
// let convert2 = [];
// let total = 0;
// data.band_list.forEach(item => {
// total += item.num;
// total -= item.raw_hot;
// if (item.num - item.raw_hot == 0) return;
// convert2.push([
// `[${item.realpos}] ${item.word}【${item.label_name}】`,
// `原始:${item.raw_hot} 显示:${item.num} 调控: ${item.num - item.raw_hot}`
// ]);
// });
// fileUtils.saveJSON({
// saveFolder: DATA_FOLDER,
// now: now,
// fileNameSuffix: `regulation`,
// object: {
// total_delta: total, // 所有调控值之和
// data: convert2
// },
// compress: false,
// uncompress: true
// });
// /**
// * 保存预处理后数据
// */
// // 过滤掉不需要的数据
// // band_list
// data.band_list.forEach(function (item) {
// delete item["mblog"];
// });
// fileUtils.saveJSON({
// saveFolder: DATA_FOLDER,
// now: now,
// fileNameSuffix: `simplify`,
// object: data,
// compress: true,
// // uncompress: true,
// // compress: false,
// uncompress: false,
// });
/**
* 更新最新的
*/
fs.writeFileSync(`${DATA_FOLDER}/latest.json`, JSON.stringify({
update_time: requestTimestamp,
update_time_friendly: now.substring(0, 19).replace(/T/g, " "),
// regulation: convert2,
data: convert
}));
}
exports.main = main;

View File

@@ -0,0 +1,51 @@
const fs = require('fs');
const path = require('path');
const LATEST_DATA_ONLY = process.env.LATEST_DATA_ONLY == true;
// 创建目录
async function createFolder(folderToCreate) {
let currentFolder = path.join(folderToCreate);
let parentFolder = path.join(currentFolder, '../');
// console.log({ currentFolder: currentFolder, parentFolder: parentFolder });
if (!fs.existsSync(currentFolder)) {
// 文件夹不存在,创建文件夹
createFolder(parentFolder); // 保证父级文件夹存在
fs.mkdirSync(currentFolder); // 创建当前级文件夹
} else {
// 否则就什么也不做
}
}
// 保存 JSON
function saveJSON({ saveFolder, now, fileNameSuffix, object, compress = true, uncompress = true }) {
if (LATEST_DATA_ONLY) return;
let year = now.substring(0, 4);
let month = now.substring(5, 7);
let day = now.substring(8, 10);
let hour = now.substring(11, 13);
let minute = now.substring(14, 16);
// console.log(now);
// console.log( "year, month, day, hour, minute: " + year + ", " + month + ", " + day + ", " + hour + ", " + minute);
// 创建当前文件夹
let folder = `${saveFolder}/${fileNameSuffix}/${year}/${month}/${day}`;
createFolder(folder);
let fileName = `${folder}/${year}${month}${day}_${hour}${minute}`;
// 生成文件名
// '2022-07-23T10:11:38.650Z' => '20220723_1011'
// let fileName = now.replace(/T/, '_').replace(/:\d{2}.\d{3}Z/, '').replace(/[-:]/g, '');
// console.log(`fileName is ${fileName}`);
if (compress)
fs.writeFileSync(`${fileName}.min.json`, JSON.stringify(object));
if (uncompress)
fs.writeFileSync(`${fileName}.json`, JSON.stringify(object, "", "\t"));
}
module.exports = {
createFolder,
saveJSON,
}

View File

@@ -0,0 +1,27 @@
const request = require('request');
// 请求 APi 接口
async function getApiResult(url) {
var return_data = await new Promise((resolve) => {
request({
method: 'GET',
url: url,
json: true,
}, (error, response, result) => {
if (!error && (response.statusCode == 200)) {
// 请求成功
resolve(result);
} else {
// 请求失败
console.log(`error is ${error}`);
resolve({});
}
});
});
// console.log(`return_data is ${JSON.stringify(return_data)}`);
return return_data;
}
module.exports = {
getApiResult,
}