1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee

财智Aisino微信小程序音频课程下载 nodejs脚本

This commit is contained in:
2024-02-01 22:12:09 +08:00
parent 75be10be20
commit 4f4917a940
8 changed files with 593 additions and 0 deletions

View File

@@ -0,0 +1,8 @@
output/**/*.json
output/**/*.html
output/**/*.mp3
output/**/*.mp4
output/**/*.m4a
*.postman_collection.json
config.js

View File

@@ -0,0 +1,5 @@
## 财智Aisino微信小程序音频课程下载
相关接口通过 HTTPDebug 抓包得到
编写时间2024.01.31

View File

@@ -0,0 +1,4 @@
const token = 'bearer xxxxxxxxxxxxxx'
const userId = 'xxxxxxxxxxxxxxxxxxxxxxxxxx'
module.exports = { token, userId }

View File

@@ -0,0 +1,240 @@
var https = require('follow-redirects').https;
var fs = require('fs');
const path = require('path');
const base64Utils = require('../utils/base64Utils')
const { token, userId } = require('./config')
const outputDir = './output/' // 以 / 结尾
const courseDetailDir = 'courseDetail/' // 以 / 结尾
const downloadMediaDir = 'downloadMedia/' // 以 / 结尾
main()
async function main() {
// /**
// * Step 1 课程列表
// */
await getCourseList(1594)
// /**
// * Step 2 课程详情
// */
let courseListJson = fs.readFileSync(outputDir + 'courseList.json', 'utf8')
let courseList = JSON.parse(courseListJson).data.childs
await getCourseDetail(courseList)
/**
* Step 3 解析数据
*/
await parseCourseDetailJson()
/**
* Step 3 下载音频,保存课件
*/
await downloadMedia()
console.log('完成')
}
async function getCourseList(courseId) {
var options = {
'method': 'POST',
'hostname': 'wx.hxdkfp.com',
'path': '/czw-api/api-hypt-product/hypt/product/getProductInfo?id=' + courseId + '&serviceType=3&userId=' + userId,
'headers': {
'token': token
},
// 'maxRedirects': 20,
'rejectUnauthorized': false
};
await httpsRequest(options, outputDir + 'courseList.json')
}
async function getCourseDetail(courseList) {
// console.log(courseList)
for (let i = 0; i < courseList.length; i++) {
const courseDetail = courseList[i]
/*
{
id: 2662,
serviceType: '1',
imgUrl: 'e41f693bbfe4442d8c611633644802fc.jpg',
publishTime: '2023-09-11 00:00:00',
serviceName: '【第559期】关于耕地占用税的那些事儿',
serviceOrder: 1,
serviceStatus: '0',
isCard: '0',
isFree: '1',
realPrice: '5',
labelNum: '1',
labelName: '音频',
verificationCodeFlag: false,
serviceStatusName: '上架',
downButtonFlag: true,
agreementString: ''
},
*/
const courseDetailId = courseDetail.id
const courseDetailName = courseDetail.serviceName
console.log(`${i}\t| ${courseDetailId} | ${courseDetailName}`)
try {
let options = {
'method': 'POST',
'hostname': 'wx.hxdkfp.com',
'path': '/czw-api/api-hypt-product/hypt/product/getProductInfo?id=' + courseDetailId + '&serviceType=1&userId=' + userId,
'headers': {
'token': token
},
// 'maxRedirects': 20,
'rejectUnauthorized': false
};
await httpsRequest(options, outputDir + courseDetailDir + courseDetailId + '.json')
console.log('\t完成')
await waitAMinute()
} catch (err) {
console.error(err)
}
}
}
async function parseCourseDetailJson() {
const dirPath = outputDir + courseDetailDir
var infoDir = [
// {
// name: '',
// audioUrl: '',
// publishTime: '',
// brief: '',
// detail: '',
// }
]
// 读取目录下的所有文件名
const files = fs.readdirSync(dirPath);
for (let file of files) {
// 拼接文件的完整路径
const filePath = path.join(dirPath, file);
// 读取文件的内容
let fileContent = fs.readFileSync(filePath, 'utf8');
const jsonObj = JSON.parse(fileContent);
const data = jsonObj.data
const brief = base64Utils.base64_decode(data.briefString)
const detail = base64Utils.base64_decode(data.detailString)
const filename = deleteFilenameUnsupportChar(data.serviceName)
// 打印文件名和文件内容
console.log(file);
infoDir.push({
name: filename,
audioUrl: data.audioUrl,
// publishTime: data.publishTime,
// brief: brief,
// detail: detail,
})
fs.writeFileSync(outputDir + downloadMediaDir + filename + '-简介.html', brief, 'utf8')
fs.writeFileSync(outputDir + downloadMediaDir + filename + '-详情.html', detail, 'utf8')
}
fs.writeFileSync(outputDir + 'parsedCourseDetails.json', JSON.stringify(infoDir, null, 4), 'utf8')
}
async function downloadMedia() {
const parsedCourseDetails = JSON.parse(fs.readFileSync(outputDir + 'parsedCourseDetails.json', 'utf8'))
for (let i = 0; i < parsedCourseDetails.length; i++) {
const item = parsedCourseDetails[i]
const audioUrl = item.audioUrl
const filename = item.name
const fileNameWithExt = audioUrl.lastIndexOf('.') != -1
? filename + audioUrl.substring(audioUrl.lastIndexOf('.'))
: filename
console.log(`${i}\t| ${fileNameWithExt} | ${item.audioUrl}`)
try {
await downloadFile(audioUrl, outputDir + downloadMediaDir + fileNameWithExt)
} catch (err) {
console.error(err)
}
await waitAMinute()
}
}
// 公共请求方法
async function httpsRequest(options, fileneme) {
return await new Promise((resolve) => {
var req = https.request(options, function (res) {
var chunks = [];
res.on("data", function (chunk) {
chunks.push(chunk);
});
res.on("end", function (chunk) {
var body = Buffer.concat(chunks);
var result = body.toString()
// console.log(result);
fs.writeFileSync(fileneme, result, 'utf8')
resolve(result)
});
res.on("error", function (error) {
console.error(error);
});
});
req.end();
})
}
async function waitAMinute() {
return await new Promise((resolve) => {
setTimeout(resolve, 100)
})
}
function deleteFilenameUnsupportChar(name) {
return name
.replace(/\?/g, "")
.replace(/\:/g, "")
.replace(/\t/g, " ")
.replace(/"/g, "''")
}
/**
*
* @param {*} fileUrl 要下载的文件的 url
* @param {*} downloadPath 要保存的文件的路径
*/
async function downloadFile(fileUrl, downloadPath) {
return await new Promise((resolve, reject) => {
// 创建一个可写的文件流
const file = fs.createWriteStream(downloadPath);
// 发起 https 请求,获取响应流
const request = https.request(fileUrl, response => {
// 将响应流写入文件流
response.pipe(file);
// 监听文件流的 finish 事件,表示下载完成
file.on("finish", () => {
// 关闭文件流
file.close();
// 打印下载成功的消息
console.log("\t文件下载完毕");
resolve()
});
});
// 监听请求的 error 事件,表示下载失败
request.on("error", err => {
// 删除已下载的文件
fs.unlink(downloadPath, () => {
// 打印下载失败的消息
console.error(`\t下载失败: ${err.message}`);
reject()
});
});
// 结束请求
request.end();
})
}

View File

@@ -0,0 +1,140 @@
{
"info": {
"_postman_id": "aca35348-a1a2-471c-8893-1264e3b641a1",
"name": "财智Aisino微信小程序 接口爬取",
"schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json",
"_exporter_id": "24657033"
},
"item": [
{
"name": "已购课程",
"request": {
"method": "GET",
"header": [
{
"key": "token",
"value": "bearer [替换为你自己的token]",
"type": "text"
}
],
"url": {
"raw": "https://wx.hxdkfp.com/czw-api/api-hypt-order/hypt/order/getPurchasedCourse/1/2?phoneNumber=[替换为你自己的phoneNumber]&userId=[替换为你自己的userId]",
"protocol": "https",
"host": [
"wx",
"hxdkfp",
"com"
],
"path": [
"czw-api",
"api-hypt-order",
"hypt",
"order",
"getPurchasedCourse",
"1",
"2"
],
"query": [
{
"key": "phoneNumber",
"value": "[替换为你自己的phoneNumber]"
},
{
"key": "userId",
"value": "[替换为你自己的userId]"
}
]
}
},
"response": []
},
{
"name": "课程列表",
"request": {
"method": "POST",
"header": [
{
"key": "token",
"value": "bearer [替换为你自己的token]",
"type": "text"
}
],
"url": {
"raw": "https://wx.hxdkfp.com/czw-api/api-hypt-product/hypt/product/getProductInfo?id=1594&serviceType=3&userId=[替换为你自己的userId]",
"protocol": "https",
"host": [
"wx",
"hxdkfp",
"com"
],
"path": [
"czw-api",
"api-hypt-product",
"hypt",
"product",
"getProductInfo"
],
"query": [
{
"key": "id",
"value": "1594"
},
{
"key": "serviceType",
"value": "3"
},
{
"key": "userId",
"value": "[替换为你自己的userId]"
}
]
}
},
"response": []
},
{
"name": "课程详情",
"request": {
"method": "POST",
"header": [
{
"key": "token",
"value": "bearer [替换为你自己的token]",
"type": "text"
}
],
"url": {
"raw": "https://wx.hxdkfp.com/czw-api/api-hypt-product/hypt/product/getProductInfo?id=3380&serviceType=1&userId=[替换为你自己的userId]",
"protocol": "https",
"host": [
"wx",
"hxdkfp",
"com"
],
"path": [
"czw-api",
"api-hypt-product",
"hypt",
"product",
"getProductInfo"
],
"query": [
{
"key": "id",
"value": "3380"
},
{
"key": "serviceType",
"value": "1"
},
{
"key": "userId",
"value": "[替换为你自己的userId]"
}
]
}
},
"response": []
}
]
}