小报童-一人公司内容爬取
This commit is contained in:
2
小报童/.gitignore
vendored
Normal file
2
小报童/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
output/*.html
|
||||
output/*.json
|
5068
小报童/assets/reader.css
Normal file
5068
小报童/assets/reader.css
Normal file
File diff suppressed because it is too large
Load Diff
52
小报童/index.js
Normal file
52
小报童/index.js
Normal file
@@ -0,0 +1,52 @@
|
||||
const send_request = require('./utils/send_request');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
// 2024.06.25
|
||||
|
||||
function getList() {
|
||||
// https://api.xiaobot.net/paper/oneCompany/post?limit=20&offset=0&tag_name=&keyword=&order_by=created_at+undefined
|
||||
const url = 'https://api.xiaobot.net/paper/oneCompany/post'
|
||||
const data = {
|
||||
limit: 200,
|
||||
offset: 0,
|
||||
tag_name: '',
|
||||
keyword: '',
|
||||
order_by: "created_at " + undefined,
|
||||
}
|
||||
send_request.call('GET', url, data, (e) => {
|
||||
const response = e.data
|
||||
const data = response.data
|
||||
// console.log('data', data)
|
||||
console.log('data', data.length, data.map(d => JSON.stringify(d).substring(0, 100)))
|
||||
|
||||
const uuidTitleMap = {}
|
||||
for (const item of data) {
|
||||
uuidTitleMap[item.uuid] = item.title
|
||||
}
|
||||
|
||||
for (const item of data) {
|
||||
let contentHtml = item.content
|
||||
for (const uuid in uuidTitleMap) {
|
||||
// https://xiaobot.net/post/83e3da77-0c09-4ecb-a8dd-3d7e5d191a6d
|
||||
contentHtml = contentHtml.replace('https://xiaobot.net/post/' + uuid, './' + uuidTitleMap[uuid] + '.html')
|
||||
}
|
||||
const html =`<!DOCTYPE html>
|
||||
<html lang="zh-cn">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>${item.title}</title>
|
||||
<link rel="stylesheet" href="../assets/reader.css">
|
||||
</head>
|
||||
<body>
|
||||
<div class="post" style="padding: 20px;">${contentHtml}</div>
|
||||
</body>
|
||||
</html>`
|
||||
fs.writeFileSync(path.join(__dirname, 'output', item.title + '.json'), JSON.stringify(item, null, 4), 'utf-8')
|
||||
fs.writeFileSync(path.join(__dirname, 'output', item.title + '.html'), html, 'utf-8')
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
getList()
|
0
小报童/output/.gitkeep
Normal file
0
小报童/output/.gitkeep
Normal file
108
小报童/package-lock.json
generated
Normal file
108
小报童/package-lock.json
generated
Normal file
@@ -0,0 +1,108 @@
|
||||
{
|
||||
"name": "1",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"dependencies": {
|
||||
"axios": "^1.7.2",
|
||||
"js-md5": "^0.8.3"
|
||||
}
|
||||
},
|
||||
"node_modules/asynckit": {
|
||||
"version": "0.4.0",
|
||||
"resolved": "https://registry.npmmirror.com/asynckit/-/asynckit-0.4.0.tgz",
|
||||
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
|
||||
},
|
||||
"node_modules/axios": {
|
||||
"version": "1.7.2",
|
||||
"resolved": "https://registry.npmmirror.com/axios/-/axios-1.7.2.tgz",
|
||||
"integrity": "sha512-2A8QhOMrbomlDuiLeK9XibIBzuHeRcqqNOHp0Cyp5EoJ1IFDh+XZH3A6BkXtv0K4gFGCI0Y4BM7B1wOEi0Rmgw==",
|
||||
"dependencies": {
|
||||
"follow-redirects": "^1.15.6",
|
||||
"form-data": "^4.0.0",
|
||||
"proxy-from-env": "^1.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/combined-stream": {
|
||||
"version": "1.0.8",
|
||||
"resolved": "https://registry.npmmirror.com/combined-stream/-/combined-stream-1.0.8.tgz",
|
||||
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
|
||||
"dependencies": {
|
||||
"delayed-stream": "~1.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/delayed-stream": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmmirror.com/delayed-stream/-/delayed-stream-1.0.0.tgz",
|
||||
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
|
||||
"engines": {
|
||||
"node": ">=0.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/follow-redirects": {
|
||||
"version": "1.15.6",
|
||||
"resolved": "https://registry.npmmirror.com/follow-redirects/-/follow-redirects-1.15.6.tgz",
|
||||
"integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "individual",
|
||||
"url": "https://github.com/sponsors/RubenVerborgh"
|
||||
}
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=4.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"debug": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/form-data": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmmirror.com/form-data/-/form-data-4.0.0.tgz",
|
||||
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
|
||||
"dependencies": {
|
||||
"asynckit": "^0.4.0",
|
||||
"combined-stream": "^1.0.8",
|
||||
"mime-types": "^2.1.12"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 6"
|
||||
}
|
||||
},
|
||||
"node_modules/js-md5": {
|
||||
"version": "0.8.3",
|
||||
"resolved": "https://registry.npmmirror.com/js-md5/-/js-md5-0.8.3.tgz",
|
||||
"integrity": "sha512-qR0HB5uP6wCuRMrWPTrkMaev7MJZwJuuw4fnwAzRgP4J4/F8RwtodOKpGp4XpqsLBFzzgqIO42efFAyz2Et6KQ=="
|
||||
},
|
||||
"node_modules/mime-db": {
|
||||
"version": "1.52.0",
|
||||
"resolved": "https://registry.npmmirror.com/mime-db/-/mime-db-1.52.0.tgz",
|
||||
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
|
||||
"engines": {
|
||||
"node": ">= 0.6"
|
||||
}
|
||||
},
|
||||
"node_modules/mime-types": {
|
||||
"version": "2.1.35",
|
||||
"resolved": "https://registry.npmmirror.com/mime-types/-/mime-types-2.1.35.tgz",
|
||||
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
|
||||
"dependencies": {
|
||||
"mime-db": "1.52.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.6"
|
||||
}
|
||||
},
|
||||
"node_modules/proxy-from-env": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmmirror.com/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
|
||||
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
|
||||
}
|
||||
}
|
||||
}
|
6
小报童/package.json
Normal file
6
小报童/package.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"dependencies": {
|
||||
"axios": "^1.7.2",
|
||||
"js-md5": "^0.8.3"
|
||||
}
|
||||
}
|
74
小报童/utils/send_request.js
Normal file
74
小报童/utils/send_request.js
Normal file
@@ -0,0 +1,74 @@
|
||||
/**
|
||||
* 小报童请求加密逻辑
|
||||
* decode from: https://static.xiaobot.net/resource/xiaobot-web/js/reader.e060c116.js
|
||||
*
|
||||
* @since 2024.06.25
|
||||
*/
|
||||
const md5 = require('js-md5');
|
||||
const request = require('request');
|
||||
const axios = require('axios');
|
||||
|
||||
// 抽离出的方法
|
||||
function getAccessToken() {
|
||||
return '填写网页F12中取得的 access_token'
|
||||
}
|
||||
|
||||
function call(method, url, data, callback) {
|
||||
let timestamp = Date.now() / 1000
|
||||
let sign = _getSign(data, timestamp);
|
||||
axios.request({
|
||||
method: method,
|
||||
url: url,
|
||||
params: ["POST", "PUT", "PATCH"].includes(method) ? null : data,
|
||||
data: ["POST", "PUT", "PATCH"].includes(method) ? data : null,
|
||||
headers: {
|
||||
timestamp: timestamp,
|
||||
"api-key": "xiaobot_web",
|
||||
"app-version": "0.1",
|
||||
sign: sign,
|
||||
Authorization: "Bearer " + getAccessToken() // o.a.state.user.user.access_token
|
||||
}
|
||||
}).then(response => {
|
||||
if (response.data && 411 === response.data.code) {
|
||||
console.log("请先登录")
|
||||
// o.a.commit("user/setUser", {})
|
||||
}
|
||||
callback(response)
|
||||
})
|
||||
}
|
||||
|
||||
function _getSign(data, timestamp) {
|
||||
data = _ksort(data);
|
||||
let a = "";
|
||||
for (let t in data) {
|
||||
let c = data[t];
|
||||
void 0 !== c && (a += t + "=" + c + "&")
|
||||
}
|
||||
return a = a.substring(0, a.length - 1),
|
||||
md5(a + "dbbc1dd37360b4084c3a69346e0ce2b2." + timestamp)
|
||||
// i() -> js-md5
|
||||
// decode from: https://static.xiaobot.net/resource/xiaobot-web/js/chunk-core.25cf2aec.js
|
||||
/**
|
||||
* [js-md5]{@link https://github.com/emn178/js-md5}
|
||||
*
|
||||
* @namespace md5
|
||||
* @version 0.7.3
|
||||
* @author Chen, Yi-Cyuan [emn178@gmail.com]
|
||||
* @copyright Chen, Yi-Cyuan 2014-2017
|
||||
* @license MIT
|
||||
*/
|
||||
}
|
||||
|
||||
function _ksort(data) {
|
||||
var t = Object.keys(data).sort(), a = {};
|
||||
for (var c in t)
|
||||
a[t[c]] = data[t[c]];
|
||||
return a
|
||||
}
|
||||
|
||||
// const sign = _getSign({}, Date.now())
|
||||
// console.log('sign', sign)
|
||||
|
||||
module.exports = {
|
||||
call,
|
||||
}
|
Reference in New Issue
Block a user