1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee

财智Aisino微信小程序音频课程下载 nodejs脚本

This commit is contained in:
2024-02-01 22:12:09 +08:00
parent 75be10be20
commit 4f4917a940
8 changed files with 593 additions and 0 deletions

196
utils/base64Utils.js Normal file
View File

@@ -0,0 +1,196 @@
var hexIn = false;
var hexOut = false;
var base64EncodeChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
var base64DecodeChars = new Array(
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1);
function base64encode(str) {
var out, i, len;
var c1, c2, c3;
var charCode;
len = str.length;
i = 0;
out = "";
while(i < len) {
c1 = (hexIn ? str[i++] : str.charCodeAt(i++)) & 0xff;
if(i == len)
{
out += base64EncodeChars.charAt(c1 >> 2);
out += base64EncodeChars.charAt((c1 & 0x3) << 4);
out += "==";
break;
}
c2 = (hexIn ? str[i++] : str.charCodeAt(i++));
if(i == len)
{
out += base64EncodeChars.charAt(c1 >> 2);
out += base64EncodeChars.charAt(((c1 & 0x3)<< 4) | ((c2 & 0xF0) >> 4));
out += base64EncodeChars.charAt((c2 & 0xF) << 2);
out += "=";
break;
}
c3 = (hexIn ? str[i++] : str.charCodeAt(i++));
out += base64EncodeChars.charAt(c1 >> 2);
out += base64EncodeChars.charAt(((c1 & 0x3)<< 4) | ((c2 & 0xF0) >> 4));
out += base64EncodeChars.charAt(((c2 & 0xF) << 2) | ((c3 & 0xC0) >>6));
out += base64EncodeChars.charAt(c3 & 0x3F);
}
return out;
}
function base64decode(str) {
var c1, c2, c3, c4;
var i, len, out;
var charCode;
len = str.length;
i = 0;
out = hexOut ? [] : "";
while(i < len) {
/* c1 */
do {
c1 = base64DecodeChars[str.charCodeAt(i++) & 0xff];
} while(i < len && c1 == -1);
if(c1 == -1)
break;
/* c2 */
do {
c2 = base64DecodeChars[str.charCodeAt(i++) & 0xff];
} while(i < len && c2 == -1);
if(c2 == -1)
break;
charCode = (c1 << 2) | ((c2 & 0x30) >> 4);
hexOut ? out.push(charCode) : out += String.fromCharCode(charCode);
/* c3 */
do {
c3 = str.charCodeAt(i++) & 0xff;
if(c3 == 61)
return out;
c3 = base64DecodeChars[c3];
} while(i < len && c3 == -1);
if(c3 == -1)
break;
charCode = ((c2 & 0XF) << 4) | ((c3 & 0x3C) >> 2);
hexOut ? out.push(charCode) : out += String.fromCharCode(charCode);
/* c4 */
do {
c4 = str.charCodeAt(i++) & 0xff;
if(c4 == 61)
return out;
c4 = base64DecodeChars[c4];
} while(i < len && c4 == -1);
if(c4 == -1)
break;
charCode = ((c3 & 0x03) << 6) | c4;
hexOut ? out.push(charCode) : out += String.fromCharCode(charCode);
}
return out;
}
function utf16to8(str) {
var out, i, len, c;
var charCode;
out = hexIn ? [] : "";
len = str.length;
for(i = 0; i < len; i++) {
c = hexIn ? str[i] : str.charCodeAt(i);
if ((c >= 0x0001) && (c <= 0x007F)) {
hexIn ? out.push(str[i]) : out += str.charAt(i);
} else if (c > 0x07FF) {
charCode = (0xE0 | ((c >> 12) & 0x0F)); hexIn ? out.push(charCode) : out += String.fromCharCode(charCode);
charCode = (0x80 | ((c >> 6) & 0x3F)); hexIn ? out.push(charCode) : out += String.fromCharCode(charCode);
charCode = (0x80 | ((c >> 0) & 0x3F)); hexIn ? out.push(charCode) : out += String.fromCharCode(charCode);
} else {
charCode = (0xC0 | ((c >> 6) & 0x1F)); hexIn ? out.push(charCode) : out += String.fromCharCode(charCode);
charCode = (0x80 | ((c >> 0) & 0x3F)); hexIn ? out.push(charCode) : out += String.fromCharCode(charCode);
}
}
return out;
}
function utf8to16(str) {
var out, i, len, c;
var char2, char3;
var charCode;
out = hexOut ? [] : "";
len = str.length;
i = 0;
while(i < len) {
c = hexOut ? str[i++] : str.charCodeAt(i++);
switch(c >> 4)
{
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
// 0xxxxxxx
hexOut ? out.push(str[i-1]) : out += str.charAt(i-1);
break;
case 12: case 13:
// 110x xxxx 10xx xxxx
char2 = hexOut ? str[i++] : str.charCodeAt(i++);
charCode = ((c & 0x1F) << 6) | (char2 & 0x3F); hexOut ? out.push(charCode) : out += String.fromCharCode(charCode);
break;
case 14:
// 1110 xxxx 10xx xxxx 10xx xxxx
char2 = hexOut ? str[i++] : str.charCodeAt(i++);
char3 = hexOut ? str[i++] : str.charCodeAt(i++);
charCode = ((c & 0x0F) << 12) |
((char2 & 0x3F) << 6) |
((char3 & 0x3F) << 0);
hexOut ? out.push(charCode) : out += String.fromCharCode(charCode);
break;
}
}
return out;
}
function CharToHex(str) {
var out, i, len, c, h;
out = "";
len = str.length;
i = 0;
while(i < len)
{
c = str.charCodeAt(i++);
h = c.toString(16);
if(h.length < 2)
h = "0" + h;
out += "\\x" + h + " ";
if(i > 0 && i % 8 == 0)
out += "\r\n";
}
return out;
}
function base64_encode(src, hI) {
hexIn = hI;
return base64encode(hexIn ? src : utf16to8(src));
}
function base64_decode(src, hO, out_de) {
hexOut = hO;
var ret = base64decode(src);
if(!hexOut || out_de == 'u' || out_de == 'd'){ ret = utf8to16(ret); }
return ret;
}
module.exports = {
// base64decode,
// base64encode,
base64_encode,
base64_decode,
}

View File

@@ -0,0 +1,8 @@
output/**/*.json
output/**/*.html
output/**/*.mp3
output/**/*.mp4
output/**/*.m4a
*.postman_collection.json
config.js

View File

@@ -0,0 +1,5 @@
## 财智Aisino微信小程序音频课程下载
相关接口通过 HTTPDebug 抓包得到
编写时间2024.01.31

View File

@@ -0,0 +1,4 @@
const token = 'bearer xxxxxxxxxxxxxx'
const userId = 'xxxxxxxxxxxxxxxxxxxxxxxxxx'
module.exports = { token, userId }

View File

@@ -0,0 +1,240 @@
var https = require('follow-redirects').https;
var fs = require('fs');
const path = require('path');
const base64Utils = require('../utils/base64Utils')
const { token, userId } = require('./config')
const outputDir = './output/' // 以 / 结尾
const courseDetailDir = 'courseDetail/' // 以 / 结尾
const downloadMediaDir = 'downloadMedia/' // 以 / 结尾
main()
async function main() {
// /**
// * Step 1 课程列表
// */
await getCourseList(1594)
// /**
// * Step 2 课程详情
// */
let courseListJson = fs.readFileSync(outputDir + 'courseList.json', 'utf8')
let courseList = JSON.parse(courseListJson).data.childs
await getCourseDetail(courseList)
/**
* Step 3 解析数据
*/
await parseCourseDetailJson()
/**
* Step 3 下载音频,保存课件
*/
await downloadMedia()
console.log('完成')
}
async function getCourseList(courseId) {
var options = {
'method': 'POST',
'hostname': 'wx.hxdkfp.com',
'path': '/czw-api/api-hypt-product/hypt/product/getProductInfo?id=' + courseId + '&serviceType=3&userId=' + userId,
'headers': {
'token': token
},
// 'maxRedirects': 20,
'rejectUnauthorized': false
};
await httpsRequest(options, outputDir + 'courseList.json')
}
async function getCourseDetail(courseList) {
// console.log(courseList)
for (let i = 0; i < courseList.length; i++) {
const courseDetail = courseList[i]
/*
{
id: 2662,
serviceType: '1',
imgUrl: 'e41f693bbfe4442d8c611633644802fc.jpg',
publishTime: '2023-09-11 00:00:00',
serviceName: '【第559期】关于耕地占用税的那些事儿',
serviceOrder: 1,
serviceStatus: '0',
isCard: '0',
isFree: '1',
realPrice: '5',
labelNum: '1',
labelName: '音频',
verificationCodeFlag: false,
serviceStatusName: '上架',
downButtonFlag: true,
agreementString: ''
},
*/
const courseDetailId = courseDetail.id
const courseDetailName = courseDetail.serviceName
console.log(`${i}\t| ${courseDetailId} | ${courseDetailName}`)
try {
let options = {
'method': 'POST',
'hostname': 'wx.hxdkfp.com',
'path': '/czw-api/api-hypt-product/hypt/product/getProductInfo?id=' + courseDetailId + '&serviceType=1&userId=' + userId,
'headers': {
'token': token
},
// 'maxRedirects': 20,
'rejectUnauthorized': false
};
await httpsRequest(options, outputDir + courseDetailDir + courseDetailId + '.json')
console.log('\t完成')
await waitAMinute()
} catch (err) {
console.error(err)
}
}
}
async function parseCourseDetailJson() {
const dirPath = outputDir + courseDetailDir
var infoDir = [
// {
// name: '',
// audioUrl: '',
// publishTime: '',
// brief: '',
// detail: '',
// }
]
// 读取目录下的所有文件名
const files = fs.readdirSync(dirPath);
for (let file of files) {
// 拼接文件的完整路径
const filePath = path.join(dirPath, file);
// 读取文件的内容
let fileContent = fs.readFileSync(filePath, 'utf8');
const jsonObj = JSON.parse(fileContent);
const data = jsonObj.data
const brief = base64Utils.base64_decode(data.briefString)
const detail = base64Utils.base64_decode(data.detailString)
const filename = deleteFilenameUnsupportChar(data.serviceName)
// 打印文件名和文件内容
console.log(file);
infoDir.push({
name: filename,
audioUrl: data.audioUrl,
// publishTime: data.publishTime,
// brief: brief,
// detail: detail,
})
fs.writeFileSync(outputDir + downloadMediaDir + filename + '-简介.html', brief, 'utf8')
fs.writeFileSync(outputDir + downloadMediaDir + filename + '-详情.html', detail, 'utf8')
}
fs.writeFileSync(outputDir + 'parsedCourseDetails.json', JSON.stringify(infoDir, null, 4), 'utf8')
}
async function downloadMedia() {
const parsedCourseDetails = JSON.parse(fs.readFileSync(outputDir + 'parsedCourseDetails.json', 'utf8'))
for (let i = 0; i < parsedCourseDetails.length; i++) {
const item = parsedCourseDetails[i]
const audioUrl = item.audioUrl
const filename = item.name
const fileNameWithExt = audioUrl.lastIndexOf('.') != -1
? filename + audioUrl.substring(audioUrl.lastIndexOf('.'))
: filename
console.log(`${i}\t| ${fileNameWithExt} | ${item.audioUrl}`)
try {
await downloadFile(audioUrl, outputDir + downloadMediaDir + fileNameWithExt)
} catch (err) {
console.error(err)
}
await waitAMinute()
}
}
// 公共请求方法
async function httpsRequest(options, fileneme) {
return await new Promise((resolve) => {
var req = https.request(options, function (res) {
var chunks = [];
res.on("data", function (chunk) {
chunks.push(chunk);
});
res.on("end", function (chunk) {
var body = Buffer.concat(chunks);
var result = body.toString()
// console.log(result);
fs.writeFileSync(fileneme, result, 'utf8')
resolve(result)
});
res.on("error", function (error) {
console.error(error);
});
});
req.end();
})
}
async function waitAMinute() {
return await new Promise((resolve) => {
setTimeout(resolve, 100)
})
}
function deleteFilenameUnsupportChar(name) {
return name
.replace(/\?/g, "")
.replace(/\:/g, "")
.replace(/\t/g, " ")
.replace(/"/g, "''")
}
/**
*
* @param {*} fileUrl 要下载的文件的 url
* @param {*} downloadPath 要保存的文件的路径
*/
async function downloadFile(fileUrl, downloadPath) {
return await new Promise((resolve, reject) => {
// 创建一个可写的文件流
const file = fs.createWriteStream(downloadPath);
// 发起 https 请求,获取响应流
const request = https.request(fileUrl, response => {
// 将响应流写入文件流
response.pipe(file);
// 监听文件流的 finish 事件,表示下载完成
file.on("finish", () => {
// 关闭文件流
file.close();
// 打印下载成功的消息
console.log("\t文件下载完毕");
resolve()
});
});
// 监听请求的 error 事件,表示下载失败
request.on("error", err => {
// 删除已下载的文件
fs.unlink(downloadPath, () => {
// 打印下载失败的消息
console.error(`\t下载失败: ${err.message}`);
reject()
});
});
// 结束请求
request.end();
})
}

View File

@@ -0,0 +1,140 @@
{
"info": {
"_postman_id": "aca35348-a1a2-471c-8893-1264e3b641a1",
"name": "财智Aisino微信小程序 接口爬取",
"schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json",
"_exporter_id": "24657033"
},
"item": [
{
"name": "已购课程",
"request": {
"method": "GET",
"header": [
{
"key": "token",
"value": "bearer [替换为你自己的token]",
"type": "text"
}
],
"url": {
"raw": "https://wx.hxdkfp.com/czw-api/api-hypt-order/hypt/order/getPurchasedCourse/1/2?phoneNumber=[替换为你自己的phoneNumber]&userId=[替换为你自己的userId]",
"protocol": "https",
"host": [
"wx",
"hxdkfp",
"com"
],
"path": [
"czw-api",
"api-hypt-order",
"hypt",
"order",
"getPurchasedCourse",
"1",
"2"
],
"query": [
{
"key": "phoneNumber",
"value": "[替换为你自己的phoneNumber]"
},
{
"key": "userId",
"value": "[替换为你自己的userId]"
}
]
}
},
"response": []
},
{
"name": "课程列表",
"request": {
"method": "POST",
"header": [
{
"key": "token",
"value": "bearer [替换为你自己的token]",
"type": "text"
}
],
"url": {
"raw": "https://wx.hxdkfp.com/czw-api/api-hypt-product/hypt/product/getProductInfo?id=1594&serviceType=3&userId=[替换为你自己的userId]",
"protocol": "https",
"host": [
"wx",
"hxdkfp",
"com"
],
"path": [
"czw-api",
"api-hypt-product",
"hypt",
"product",
"getProductInfo"
],
"query": [
{
"key": "id",
"value": "1594"
},
{
"key": "serviceType",
"value": "3"
},
{
"key": "userId",
"value": "[替换为你自己的userId]"
}
]
}
},
"response": []
},
{
"name": "课程详情",
"request": {
"method": "POST",
"header": [
{
"key": "token",
"value": "bearer [替换为你自己的token]",
"type": "text"
}
],
"url": {
"raw": "https://wx.hxdkfp.com/czw-api/api-hypt-product/hypt/product/getProductInfo?id=3380&serviceType=1&userId=[替换为你自己的userId]",
"protocol": "https",
"host": [
"wx",
"hxdkfp",
"com"
],
"path": [
"czw-api",
"api-hypt-product",
"hypt",
"product",
"getProductInfo"
],
"query": [
{
"key": "id",
"value": "3380"
},
{
"key": "serviceType",
"value": "1"
},
{
"key": "userId",
"value": "[替换为你自己的userId]"
}
]
}
},
"response": []
}
]
}