diff --git a/utils/base64Utils.js b/utils/base64Utils.js new file mode 100644 index 0000000..8de26fe --- /dev/null +++ b/utils/base64Utils.js @@ -0,0 +1,196 @@ +var hexIn = false; +var hexOut = false; +var base64EncodeChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +var base64DecodeChars = new Array( + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, + -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1); + +function base64encode(str) { + var out, i, len; + var c1, c2, c3; + var charCode; + + len = str.length; + i = 0; + out = ""; + while(i < len) { + c1 = (hexIn ? str[i++] : str.charCodeAt(i++)) & 0xff; + if(i == len) + { + out += base64EncodeChars.charAt(c1 >> 2); + out += base64EncodeChars.charAt((c1 & 0x3) << 4); + out += "=="; + break; + } + c2 = (hexIn ? str[i++] : str.charCodeAt(i++)); + if(i == len) + { + out += base64EncodeChars.charAt(c1 >> 2); + out += base64EncodeChars.charAt(((c1 & 0x3)<< 4) | ((c2 & 0xF0) >> 4)); + out += base64EncodeChars.charAt((c2 & 0xF) << 2); + out += "="; + break; + } + c3 = (hexIn ? str[i++] : str.charCodeAt(i++)); + out += base64EncodeChars.charAt(c1 >> 2); + out += base64EncodeChars.charAt(((c1 & 0x3)<< 4) | ((c2 & 0xF0) >> 4)); + out += base64EncodeChars.charAt(((c2 & 0xF) << 2) | ((c3 & 0xC0) >>6)); + out += base64EncodeChars.charAt(c3 & 0x3F); + } + return out; +} + +function base64decode(str) { + var c1, c2, c3, c4; + var i, len, out; + var charCode; + + len = str.length; + i = 0; + out = hexOut ? [] : ""; + while(i < len) { + /* c1 */ + do { + c1 = base64DecodeChars[str.charCodeAt(i++) & 0xff]; + } while(i < len && c1 == -1); + if(c1 == -1) + break; + + /* c2 */ + do { + c2 = base64DecodeChars[str.charCodeAt(i++) & 0xff]; + } while(i < len && c2 == -1); + if(c2 == -1) + break; + + charCode = (c1 << 2) | ((c2 & 0x30) >> 4); + hexOut ? out.push(charCode) : out += String.fromCharCode(charCode); + + /* c3 */ + do { + c3 = str.charCodeAt(i++) & 0xff; + if(c3 == 61) + return out; + c3 = base64DecodeChars[c3]; + } while(i < len && c3 == -1); + if(c3 == -1) + break; + charCode = ((c2 & 0XF) << 4) | ((c3 & 0x3C) >> 2); + hexOut ? out.push(charCode) : out += String.fromCharCode(charCode); + + /* c4 */ + do { + c4 = str.charCodeAt(i++) & 0xff; + if(c4 == 61) + return out; + c4 = base64DecodeChars[c4]; + } while(i < len && c4 == -1); + if(c4 == -1) + break; + charCode = ((c3 & 0x03) << 6) | c4; + hexOut ? out.push(charCode) : out += String.fromCharCode(charCode); + } + return out; +} + +function utf16to8(str) { + var out, i, len, c; + var charCode; + out = hexIn ? [] : ""; + len = str.length; + for(i = 0; i < len; i++) { + c = hexIn ? str[i] : str.charCodeAt(i); + if ((c >= 0x0001) && (c <= 0x007F)) { + hexIn ? out.push(str[i]) : out += str.charAt(i); + } else if (c > 0x07FF) { + charCode = (0xE0 | ((c >> 12) & 0x0F)); hexIn ? out.push(charCode) : out += String.fromCharCode(charCode); + charCode = (0x80 | ((c >> 6) & 0x3F)); hexIn ? out.push(charCode) : out += String.fromCharCode(charCode); + charCode = (0x80 | ((c >> 0) & 0x3F)); hexIn ? out.push(charCode) : out += String.fromCharCode(charCode); + } else { + charCode = (0xC0 | ((c >> 6) & 0x1F)); hexIn ? out.push(charCode) : out += String.fromCharCode(charCode); + charCode = (0x80 | ((c >> 0) & 0x3F)); hexIn ? out.push(charCode) : out += String.fromCharCode(charCode); + } + } + return out; +} + +function utf8to16(str) { + var out, i, len, c; + var char2, char3; + var charCode; + + out = hexOut ? [] : ""; + len = str.length; + i = 0; + while(i < len) { + c = hexOut ? str[i++] : str.charCodeAt(i++); + switch(c >> 4) + { + case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: + // 0xxxxxxx + hexOut ? out.push(str[i-1]) : out += str.charAt(i-1); + break; + case 12: case 13: + // 110x xxxx 10xx xxxx + char2 = hexOut ? str[i++] : str.charCodeAt(i++); + charCode = ((c & 0x1F) << 6) | (char2 & 0x3F); hexOut ? out.push(charCode) : out += String.fromCharCode(charCode); + break; + case 14: + // 1110 xxxx 10xx xxxx 10xx xxxx + char2 = hexOut ? str[i++] : str.charCodeAt(i++); + char3 = hexOut ? str[i++] : str.charCodeAt(i++); + charCode = ((c & 0x0F) << 12) | + ((char2 & 0x3F) << 6) | + ((char3 & 0x3F) << 0); + hexOut ? out.push(charCode) : out += String.fromCharCode(charCode); + break; + } + } + + return out; +} + +function CharToHex(str) { + var out, i, len, c, h; + out = ""; + len = str.length; + i = 0; + while(i < len) + { + c = str.charCodeAt(i++); + h = c.toString(16); + if(h.length < 2) + h = "0" + h; + + out += "\\x" + h + " "; + if(i > 0 && i % 8 == 0) + out += "\r\n"; + } + + return out; +} + +function base64_encode(src, hI) { + hexIn = hI; + return base64encode(hexIn ? src : utf16to8(src)); +} + +function base64_decode(src, hO, out_de) { + hexOut = hO; + var ret = base64decode(src); + if(!hexOut || out_de == 'u' || out_de == 'd'){ ret = utf8to16(ret); } + return ret; +} + +module.exports = { + // base64decode, + // base64encode, + base64_encode, + base64_decode, +} diff --git a/财智Aisino微信小程序音频课程下载/.gitignore b/财智Aisino微信小程序音频课程下载/.gitignore new file mode 100644 index 0000000..adf7365 --- /dev/null +++ b/财智Aisino微信小程序音频课程下载/.gitignore @@ -0,0 +1,8 @@ +output/**/*.json +output/**/*.html +output/**/*.mp3 +output/**/*.mp4 +output/**/*.m4a + +*.postman_collection.json +config.js \ No newline at end of file diff --git a/财智Aisino微信小程序音频课程下载/README.md b/财智Aisino微信小程序音频课程下载/README.md new file mode 100644 index 0000000..a3d03a3 --- /dev/null +++ b/财智Aisino微信小程序音频课程下载/README.md @@ -0,0 +1,5 @@ +## 财智Aisino微信小程序音频课程下载 + +相关接口通过 HTTPDebug 抓包得到 + +编写时间:2024.01.31 \ No newline at end of file diff --git a/财智Aisino微信小程序音频课程下载/config.template.js b/财智Aisino微信小程序音频课程下载/config.template.js new file mode 100644 index 0000000..ab3d41c --- /dev/null +++ b/财智Aisino微信小程序音频课程下载/config.template.js @@ -0,0 +1,4 @@ +const token = 'bearer xxxxxxxxxxxxxx' +const userId = 'xxxxxxxxxxxxxxxxxxxxxxxxxx' + +module.exports = { token, userId } diff --git a/财智Aisino微信小程序音频课程下载/index.js b/财智Aisino微信小程序音频课程下载/index.js new file mode 100644 index 0000000..0eeaafe --- /dev/null +++ b/财智Aisino微信小程序音频课程下载/index.js @@ -0,0 +1,240 @@ +var https = require('follow-redirects').https; +var fs = require('fs'); +const path = require('path'); + +const base64Utils = require('../utils/base64Utils') + +const { token, userId } = require('./config') + +const outputDir = './output/' // 以 / 结尾 +const courseDetailDir = 'courseDetail/' // 以 / 结尾 +const downloadMediaDir = 'downloadMedia/' // 以 / 结尾 + +main() + +async function main() { + // /** + // * Step 1 课程列表 + // */ + await getCourseList(1594) + + // /** + // * Step 2 课程详情 + // */ + let courseListJson = fs.readFileSync(outputDir + 'courseList.json', 'utf8') + let courseList = JSON.parse(courseListJson).data.childs + await getCourseDetail(courseList) + + /** + * Step 3 解析数据 + */ + await parseCourseDetailJson() + + /** + * Step 3 下载音频,保存课件 + */ + await downloadMedia() + + console.log('完成') +} + +async function getCourseList(courseId) { + var options = { + 'method': 'POST', + 'hostname': 'wx.hxdkfp.com', + 'path': '/czw-api/api-hypt-product/hypt/product/getProductInfo?id=' + courseId + '&serviceType=3&userId=' + userId, + 'headers': { + 'token': token + }, + // 'maxRedirects': 20, + 'rejectUnauthorized': false + }; + await httpsRequest(options, outputDir + 'courseList.json') +} + +async function getCourseDetail(courseList) { + // console.log(courseList) + for (let i = 0; i < courseList.length; i++) { + const courseDetail = courseList[i] + /* + { + id: 2662, + serviceType: '1', + imgUrl: 'e41f693bbfe4442d8c611633644802fc.jpg', + publishTime: '2023-09-11 00:00:00', + serviceName: '【第559期】关于耕地占用税的那些事儿', + serviceOrder: 1, + serviceStatus: '0', + isCard: '0', + isFree: '1', + realPrice: '5', + labelNum: '1', + labelName: '音频', + verificationCodeFlag: false, + serviceStatusName: '上架', + downButtonFlag: true, + agreementString: '' + }, + */ + const courseDetailId = courseDetail.id + const courseDetailName = courseDetail.serviceName + console.log(`${i}\t| ${courseDetailId} | ${courseDetailName}`) + try { + let options = { + 'method': 'POST', + 'hostname': 'wx.hxdkfp.com', + 'path': '/czw-api/api-hypt-product/hypt/product/getProductInfo?id=' + courseDetailId + '&serviceType=1&userId=' + userId, + 'headers': { + 'token': token + }, + // 'maxRedirects': 20, + 'rejectUnauthorized': false + }; + await httpsRequest(options, outputDir + courseDetailDir + courseDetailId + '.json') + console.log('\t完成') + + await waitAMinute() + } catch (err) { + console.error(err) + } + } +} + +async function parseCourseDetailJson() { + const dirPath = outputDir + courseDetailDir + var infoDir = [ + // { + // name: '', + // audioUrl: '', + // publishTime: '', + // brief: '', + // detail: '', + // } + ] + // 读取目录下的所有文件名 + const files = fs.readdirSync(dirPath); + for (let file of files) { + // 拼接文件的完整路径 + const filePath = path.join(dirPath, file); + // 读取文件的内容 + let fileContent = fs.readFileSync(filePath, 'utf8'); + const jsonObj = JSON.parse(fileContent); + const data = jsonObj.data + const brief = base64Utils.base64_decode(data.briefString) + const detail = base64Utils.base64_decode(data.detailString) + const filename = deleteFilenameUnsupportChar(data.serviceName) + // 打印文件名和文件内容 + console.log(file); + infoDir.push({ + name: filename, + audioUrl: data.audioUrl, + // publishTime: data.publishTime, + // brief: brief, + // detail: detail, + }) + fs.writeFileSync(outputDir + downloadMediaDir + filename + '-简介.html', brief, 'utf8') + fs.writeFileSync(outputDir + downloadMediaDir + filename + '-详情.html', detail, 'utf8') + } + fs.writeFileSync(outputDir + 'parsedCourseDetails.json', JSON.stringify(infoDir, null, 4), 'utf8') +} + +async function downloadMedia() { + const parsedCourseDetails = JSON.parse(fs.readFileSync(outputDir + 'parsedCourseDetails.json', 'utf8')) + for (let i = 0; i < parsedCourseDetails.length; i++) { + const item = parsedCourseDetails[i] + const audioUrl = item.audioUrl + const filename = item.name + const fileNameWithExt = audioUrl.lastIndexOf('.') != -1 + ? filename + audioUrl.substring(audioUrl.lastIndexOf('.')) + : filename + console.log(`${i}\t| ${fileNameWithExt} | ${item.audioUrl}`) + try { + await downloadFile(audioUrl, outputDir + downloadMediaDir + fileNameWithExt) + } catch (err) { + console.error(err) + } + await waitAMinute() + } +} + + +// 公共请求方法 +async function httpsRequest(options, fileneme) { + return await new Promise((resolve) => { + + var req = https.request(options, function (res) { + var chunks = []; + + res.on("data", function (chunk) { + chunks.push(chunk); + }); + + res.on("end", function (chunk) { + var body = Buffer.concat(chunks); + var result = body.toString() + // console.log(result); + fs.writeFileSync(fileneme, result, 'utf8') + resolve(result) + }); + + res.on("error", function (error) { + console.error(error); + }); + }); + + req.end(); + }) +} + +async function waitAMinute() { + return await new Promise((resolve) => { + setTimeout(resolve, 100) + }) +} + +function deleteFilenameUnsupportChar(name) { + return name + .replace(/\?/g, "?") + .replace(/\:/g, ":") + .replace(/\t/g, " ") + .replace(/"/g, "''") +} + +/** + * + * @param {*} fileUrl 要下载的文件的 url + * @param {*} downloadPath 要保存的文件的路径 + */ +async function downloadFile(fileUrl, downloadPath) { + return await new Promise((resolve, reject) => { + // 创建一个可写的文件流 + const file = fs.createWriteStream(downloadPath); + + // 发起 https 请求,获取响应流 + const request = https.request(fileUrl, response => { + // 将响应流写入文件流 + response.pipe(file); + // 监听文件流的 finish 事件,表示下载完成 + file.on("finish", () => { + // 关闭文件流 + file.close(); + // 打印下载成功的消息 + console.log("\t文件下载完毕"); + resolve() + }); + }); + + // 监听请求的 error 事件,表示下载失败 + request.on("error", err => { + // 删除已下载的文件 + fs.unlink(downloadPath, () => { + // 打印下载失败的消息 + console.error(`\t下载失败: ${err.message}`); + reject() + }); + }); + + // 结束请求 + request.end(); + }) +} \ No newline at end of file diff --git a/财智Aisino微信小程序音频课程下载/output/courseDetail/.gitkeep b/财智Aisino微信小程序音频课程下载/output/courseDetail/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/财智Aisino微信小程序音频课程下载/output/downloadMedia/.gitkeep b/财智Aisino微信小程序音频课程下载/output/downloadMedia/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/财智Aisino微信小程序音频课程下载/财智Aisino微信小程序 接口爬取.postman_collection.json b/财智Aisino微信小程序音频课程下载/财智Aisino微信小程序 接口爬取.postman_collection.json new file mode 100644 index 0000000..05aae64 --- /dev/null +++ b/财智Aisino微信小程序音频课程下载/财智Aisino微信小程序 接口爬取.postman_collection.json @@ -0,0 +1,140 @@ +{ + "info": { + "_postman_id": "aca35348-a1a2-471c-8893-1264e3b641a1", + "name": "财智Aisino微信小程序 接口爬取", + "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json", + "_exporter_id": "24657033" + }, + "item": [ + { + "name": "已购课程", + "request": { + "method": "GET", + "header": [ + { + "key": "token", + "value": "bearer [替换为你自己的token]", + "type": "text" + } + ], + "url": { + "raw": "https://wx.hxdkfp.com/czw-api/api-hypt-order/hypt/order/getPurchasedCourse/1/2?phoneNumber=[替换为你自己的phoneNumber]&userId=[替换为你自己的userId]", + "protocol": "https", + "host": [ + "wx", + "hxdkfp", + "com" + ], + "path": [ + "czw-api", + "api-hypt-order", + "hypt", + "order", + "getPurchasedCourse", + "1", + "2" + ], + "query": [ + { + "key": "phoneNumber", + "value": "[替换为你自己的phoneNumber]" + }, + { + "key": "userId", + "value": "[替换为你自己的userId]" + } + ] + } + }, + "response": [] + }, + { + "name": "课程列表", + "request": { + "method": "POST", + "header": [ + { + "key": "token", + "value": "bearer [替换为你自己的token]", + "type": "text" + } + ], + "url": { + "raw": "https://wx.hxdkfp.com/czw-api/api-hypt-product/hypt/product/getProductInfo?id=1594&serviceType=3&userId=[替换为你自己的userId]", + "protocol": "https", + "host": [ + "wx", + "hxdkfp", + "com" + ], + "path": [ + "czw-api", + "api-hypt-product", + "hypt", + "product", + "getProductInfo" + ], + "query": [ + { + "key": "id", + "value": "1594" + }, + { + "key": "serviceType", + "value": "3" + }, + { + "key": "userId", + "value": "[替换为你自己的userId]" + } + ] + } + }, + "response": [] + }, + { + "name": "课程详情", + "request": { + "method": "POST", + "header": [ + { + "key": "token", + "value": "bearer [替换为你自己的token]", + "type": "text" + } + ], + "url": { + "raw": "https://wx.hxdkfp.com/czw-api/api-hypt-product/hypt/product/getProductInfo?id=3380&serviceType=1&userId=[替换为你自己的userId]", + "protocol": "https", + "host": [ + "wx", + "hxdkfp", + "com" + ], + "path": [ + "czw-api", + "api-hypt-product", + "hypt", + "product", + "getProductInfo" + ], + "query": [ + { + "key": "id", + "value": "3380" + }, + { + "key": "serviceType", + "value": "1" + }, + { + "key": "userId", + "value": "[替换为你自己的userId]" + } + ] + } + }, + "response": [] + } + ] +} \ No newline at end of file