1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee

小报童-一人公司内容爬取

This commit is contained in:
2024-06-25 02:42:25 +08:00
parent ac429e4cbf
commit 12afb942a2
7 changed files with 5310 additions and 0 deletions

2
小报童/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
output/*.html
output/*.json

5068
小报童/assets/reader.css Normal file

File diff suppressed because it is too large Load Diff

52
小报童/index.js Normal file
View File

@@ -0,0 +1,52 @@
const send_request = require('./utils/send_request');
const fs = require('fs');
const path = require('path');
// 2024.06.25
function getList() {
// https://api.xiaobot.net/paper/oneCompany/post?limit=20&offset=0&tag_name=&keyword=&order_by=created_at+undefined
const url = 'https://api.xiaobot.net/paper/oneCompany/post'
const data = {
limit: 200,
offset: 0,
tag_name: '',
keyword: '',
order_by: "created_at " + undefined,
}
send_request.call('GET', url, data, (e) => {
const response = e.data
const data = response.data
// console.log('data', data)
console.log('data', data.length, data.map(d => JSON.stringify(d).substring(0, 100)))
const uuidTitleMap = {}
for (const item of data) {
uuidTitleMap[item.uuid] = item.title
}
for (const item of data) {
let contentHtml = item.content
for (const uuid in uuidTitleMap) {
// https://xiaobot.net/post/83e3da77-0c09-4ecb-a8dd-3d7e5d191a6d
contentHtml = contentHtml.replace('https://xiaobot.net/post/' + uuid, './' + uuidTitleMap[uuid] + '.html')
}
const html =`<!DOCTYPE html>
<html lang="zh-cn">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>${item.title}</title>
<link rel="stylesheet" href="../assets/reader.css">
</head>
<body>
<div class="post" style="padding: 20px;">${contentHtml}</div>
</body>
</html>`
fs.writeFileSync(path.join(__dirname, 'output', item.title + '.json'), JSON.stringify(item, null, 4), 'utf-8')
fs.writeFileSync(path.join(__dirname, 'output', item.title + '.html'), html, 'utf-8')
}
})
}
getList()

View File

108
小报童/package-lock.json generated Normal file
View File

@@ -0,0 +1,108 @@
{
"name": "1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"dependencies": {
"axios": "^1.7.2",
"js-md5": "^0.8.3"
}
},
"node_modules/asynckit": {
"version": "0.4.0",
"resolved": "https://registry.npmmirror.com/asynckit/-/asynckit-0.4.0.tgz",
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
},
"node_modules/axios": {
"version": "1.7.2",
"resolved": "https://registry.npmmirror.com/axios/-/axios-1.7.2.tgz",
"integrity": "sha512-2A8QhOMrbomlDuiLeK9XibIBzuHeRcqqNOHp0Cyp5EoJ1IFDh+XZH3A6BkXtv0K4gFGCI0Y4BM7B1wOEi0Rmgw==",
"dependencies": {
"follow-redirects": "^1.15.6",
"form-data": "^4.0.0",
"proxy-from-env": "^1.1.0"
}
},
"node_modules/combined-stream": {
"version": "1.0.8",
"resolved": "https://registry.npmmirror.com/combined-stream/-/combined-stream-1.0.8.tgz",
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
"dependencies": {
"delayed-stream": "~1.0.0"
},
"engines": {
"node": ">= 0.8"
}
},
"node_modules/delayed-stream": {
"version": "1.0.0",
"resolved": "https://registry.npmmirror.com/delayed-stream/-/delayed-stream-1.0.0.tgz",
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
"engines": {
"node": ">=0.4.0"
}
},
"node_modules/follow-redirects": {
"version": "1.15.6",
"resolved": "https://registry.npmmirror.com/follow-redirects/-/follow-redirects-1.15.6.tgz",
"integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==",
"funding": [
{
"type": "individual",
"url": "https://github.com/sponsors/RubenVerborgh"
}
],
"engines": {
"node": ">=4.0"
},
"peerDependenciesMeta": {
"debug": {
"optional": true
}
}
},
"node_modules/form-data": {
"version": "4.0.0",
"resolved": "https://registry.npmmirror.com/form-data/-/form-data-4.0.0.tgz",
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
"dependencies": {
"asynckit": "^0.4.0",
"combined-stream": "^1.0.8",
"mime-types": "^2.1.12"
},
"engines": {
"node": ">= 6"
}
},
"node_modules/js-md5": {
"version": "0.8.3",
"resolved": "https://registry.npmmirror.com/js-md5/-/js-md5-0.8.3.tgz",
"integrity": "sha512-qR0HB5uP6wCuRMrWPTrkMaev7MJZwJuuw4fnwAzRgP4J4/F8RwtodOKpGp4XpqsLBFzzgqIO42efFAyz2Et6KQ=="
},
"node_modules/mime-db": {
"version": "1.52.0",
"resolved": "https://registry.npmmirror.com/mime-db/-/mime-db-1.52.0.tgz",
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
"engines": {
"node": ">= 0.6"
}
},
"node_modules/mime-types": {
"version": "2.1.35",
"resolved": "https://registry.npmmirror.com/mime-types/-/mime-types-2.1.35.tgz",
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
"dependencies": {
"mime-db": "1.52.0"
},
"engines": {
"node": ">= 0.6"
}
},
"node_modules/proxy-from-env": {
"version": "1.1.0",
"resolved": "https://registry.npmmirror.com/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
}
}
}

6
小报童/package.json Normal file
View File

@@ -0,0 +1,6 @@
{
"dependencies": {
"axios": "^1.7.2",
"js-md5": "^0.8.3"
}
}

View File

@@ -0,0 +1,74 @@
/**
* 小报童请求加密逻辑
* decode from: https://static.xiaobot.net/resource/xiaobot-web/js/reader.e060c116.js
*
* @since 2024.06.25
*/
const md5 = require('js-md5');
const request = require('request');
const axios = require('axios');
// 抽离出的方法
function getAccessToken() {
return '填写网页F12中取得的 access_token'
}
function call(method, url, data, callback) {
let timestamp = Date.now() / 1000
let sign = _getSign(data, timestamp);
axios.request({
method: method,
url: url,
params: ["POST", "PUT", "PATCH"].includes(method) ? null : data,
data: ["POST", "PUT", "PATCH"].includes(method) ? data : null,
headers: {
timestamp: timestamp,
"api-key": "xiaobot_web",
"app-version": "0.1",
sign: sign,
Authorization: "Bearer " + getAccessToken() // o.a.state.user.user.access_token
}
}).then(response => {
if (response.data && 411 === response.data.code) {
console.log("请先登录")
// o.a.commit("user/setUser", {})
}
callback(response)
})
}
function _getSign(data, timestamp) {
data = _ksort(data);
let a = "";
for (let t in data) {
let c = data[t];
void 0 !== c && (a += t + "=" + c + "&")
}
return a = a.substring(0, a.length - 1),
md5(a + "dbbc1dd37360b4084c3a69346e0ce2b2." + timestamp)
// i() -> js-md5
// decode from: https://static.xiaobot.net/resource/xiaobot-web/js/chunk-core.25cf2aec.js
/**
* [js-md5]{@link https://github.com/emn178/js-md5}
*
* @namespace md5
* @version 0.7.3
* @author Chen, Yi-Cyuan [emn178@gmail.com]
* @copyright Chen, Yi-Cyuan 2014-2017
* @license MIT
*/
}
function _ksort(data) {
var t = Object.keys(data).sort(), a = {};
for (var c in t)
a[t[c]] = data[t[c]];
return a
}
// const sign = _getSign({}, Date.now())
// console.log('sign', sign)
module.exports = {
call,
}