1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee

229 lines
14 KiB
JavaScript
Raw Permalink Normal View History

2023-06-27 15:07:09 +08:00
const request = require('request');
const fs = require('fs');
const path = require('path');
// 2023.06.27
// https://appe0mes6qx8480.h5.xiaoeknow.com/p/course/member/p_5857d53b3342a_Tm6TjjTD?type=3
function getResult(pageIndex) {
var options = {
'method': 'POST',
'url': 'https://appe0mes6qx8480.h5.xiaoeknow.com/xe.course.business.member.single_items.get/2.0.0',
'headers': {
'authority': 'appe0mes6qx8480.h5.xiaoeknow.com',
'accept': 'application/json, text/plain, */*',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'content-type': 'application/x-www-form-urlencoded',
'cookie': 'sajssdk_2015_cross_new_user=1; sensorsdata2015jssdkcross=%7B%22%24device_id%22%3A%22188910ca4534a7-06c3769156869f-7e56547a-1638720-188910ca45483b%22%2C%22distinct_id%22%3A%22188faad51401330-0a45aeaa8b8e44-7e56547a-1638720-188faad5141188b%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%7D%7D; Hm_lvt_32573db0e6d7780af79f38632658ed95=1687832846; Hm_lpvt_32573db0e6d7780af79f38632658ed95=1687832846; Qs_lvt_416447=1687832846; Qs_pv_416447=2667293149066936000; shop_version_type=4; ko_token=1242be138fd55c4ef4316cd54334b65c; xenbyfpfUnhLsdkZbX=0; dataUpJssdkCookie={"wxver":"","net":"","sid":""}; sajssdk_2015_new_user_appe0mes6qx8480_h5_xiaoeknow_com=1; sa_jssdk_2015_appe0mes6qx8480_h5_xiaoeknow_com=%7B%22distinct_id%22%3A%22u_6493fd71d1df1_FjNmN1cb6S%22%2C%22first_id%22%3A%22188faae9cb032a-042220d4e5b0b6-7e56547a-1638720-188faae9cb11002%22%2C%22props%22%3A%7B%7D%7D; logintime=1687833065; logintime=1687833334; shop_version_type=4',
'origin': 'https://appe0mes6qx8480.h5.xiaoeknow.com',
'referer': 'https://appe0mes6qx8480.h5.xiaoeknow.com/p/course/member/p_5857d53b3342a_Tm6TjjTD?type=3',
'req-uuid': '20230627103104000260393',
'retry': '1',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51'
},
form: {
'bizData[column_id]': 'p_5857d53b3342a_Tm6TjjTD',
'bizData[page_index]': `${pageIndex}`,
// 'bizData[page_index]': '1',
'bizData[page_size]': '100',
'bizData[sort]': 'asc'
}
}
request(options, function (error, response) {
if (error) throw new Error(error)
console.log(response.body)
let result = JSON.parse(response.body)
let data = result.data
if (result.code === 0) {
fs.writeFileSync(path.join(__dirname, 'courseList', `${pageIndex}.json`), JSON.stringify(data), 'utf-8')
}
})
}
// // step 1: get course list
// for (let pageIndex = 1; pageIndex <= 23; pageIndex++) {
// getResult(pageIndex)
// }
// // step 2: parse course list
// let total = []
// for (let pageIndex = 1; pageIndex <= 23; pageIndex++) {
// let jsonStr = fs.readFileSync(path.join(__dirname, 'courseList', `${pageIndex}.json`), 'utf-8')
// let json = JSON.parse(jsonStr)
// total.push(...json.list)
// }
// console.log(total.length)
// fs.writeFileSync(path.join(__dirname, 'courseList', `index.json`), JSON.stringify({
// "total": total.length,
// "list": total,
// }), 'utf-8')
// step 3: fetch course detail
let jsonStr = fs.readFileSync(path.join(__dirname, 'courseList', `index.json`), 'utf-8')
let json = JSON.parse(jsonStr)
let courseList = json.list
getDetails()
async function getDetails() {
getDetails1(courseList.filter(course => course.resource_type === 1))
getDetails3(courseList.filter(course => course.resource_type === 3))
getDetails24(courseList.filter(course => course.resource_type === 2 || course.resource_type === 4))
}
async function getDetails1(courseList) {
// "resource_type": 1, 图文
}
async function getDetails3(courseList) {
// "resource_type": 3, 视频
// 年终秀PPT动态视频
// jump_url: /content_page/eyJ0eXBlIjoyLCJyZXNvdXJjZV90eXBlIjozLCJyZXNvdXJjZV9pZCI6InZfNWZlY2VjMDhlNGIwYzRmMmJjNGYxYmJkIiwicHJvZHVjdF9pZCI6InBfNTg1N2Q1M2IzMzQyYV9UbTZUampURCIsImFwcF9pZCI6ImFwcGUwTUVzNnFYODQ4MCJ9
//
// 打开页面 读 html 代码发现参数添加 showVconsole 可以打开右下角 vConsole
// 然后发现通过 打印多媒体元素 打印出来的那条记录的 <video> 标签存在 currentSrc 属性
// document.querySelector('video').currentSrc
}
async function getDetails24(courseList) {
// "resource_type": 2 / 4, 音频
// "resource_type": 2,
// 万科的下场
// jump_url: /content_page/eyJ0eXBlIjoyLCJyZXNvdXJjZV90eXBlIjoyLCJyZXNvdXJjZV9pZCI6IjE3IiwicHJvZHVjdF9pZCI6InBfNTg1N2Q1M2IzMzQyYV9UbTZUampURCIsImFwcF9pZCI6ImFwcGUwTUVzNnFYODQ4MCJ9
// 拼接host: https://h5.xiaoeknow.com 或 https://appe0mes6qx8480.h5.xiaoeknow.com
// 最好拼接后面那个,因为前面那个还是会 302 到后面那个
// https://h5.xiaoeknow.com/content_page/eyJ0eXBlIjoyLCJyZXNvdXJjZV90eXBlIjoyLCJyZXNvdXJjZV9pZCI6IjE3IiwicHJvZHVjdF9pZCI6InBfNTg1N2Q1M2IzMzQyYV9UbTZUampURCIsImFwcF9pZCI6ImFwcGUwTUVzNnFYODQ4MCJ9
// 会 302 到 https://appe0mes6qx8480.h5.xiaoeknow.com/p/course/audio/17?product_id=p_5857d53b3342a_Tm6TjjTD
// "resource_type": 4,
// 0627【观点】过去日夜喧嚣的“创业大街”如今怎样了
// jump_url: /content_page/eyJ0eXBlIjoyLCJyZXNvdXJjZV90eXBlIjoyLCJyZXNvdXJjZV9pZCI6ImFfNjQ5OWE5MjllNGIwYjJkMWM0MjkxOTI2IiwicHJvZHVjdF9pZCI6InBfNTg1N2Q1M2IzMzQyYV9UbTZUampURCIsImFwcF9pZCI6ImFwcGUwTUVzNnFYODQ4MCJ9
// 会 302 到 https://appe0mes6qx8480.h5.xiaoeknow.com/p/course/audio/a_6499a929e4b0b2d1c4291926?product_id=p_5857d53b3342a_Tm6TjjTD
for (let courseIndex = 26 || 0; courseIndex < courseList.length; courseIndex++) {
const course = courseList[courseIndex]
// console.log(course)
// return
// 跟随重定向(需要传 Cookie 的 anony_token不然会重定向到自己。可以通过先访问一次拿到 set-cookie 再重新请求)
let result = await new Promise((resolve, reject) => {
var options = {
'method': 'GET',
'url': 'https://appe0mes6qx8480.h5.xiaoeknow.com' + course.jump_url,
// 'url': 'https://appe0mes6qx8480.h5.xiaoeknow.com/content_page/eyJ0eXBlIjoyLCJyZXNvdXJjZV90eXBlIjoyLCJyZXNvdXJjZV9pZCI6IjE3IiwicHJvZHVjdF9pZCI6InBfNTg1N2Q1M2IzMzQyYV9UbTZUampURCIsImFwcF9pZCI6ImFwcGUwTUVzNnFYODQ4MCJ9',
'headers': {
'Cookie': 'anony_token=9dcb10c78e3e237def2c69a3709d8d44'
},
followRedirect: false,
};
request(options, function (error, response) {
if (error) throw new Error(error);
// console.log(response.body);
//使用正则表达式匹配以https开头的网址可以使用以下方法
var regex = /\<title\>Redirecting to (.*?)\<\/title\>/g; //匹配以https开头的非空白字符直到遇到空白字符或字符串结束为止
var matches = regex.exec(response.body);
if (matches) {
console.log(matches[1]); //打印结果
// https://appe0mes6qx8480.h5.xiaoeknow.com/p/course/audio/17?product_id=p_5857d53b3342a_Tm6TjjTD
const url = matches[1]
var regex2 = /^.*?\/course\/audio\/(.*?)\?product_id=(.*?)$/g;
var matches2 = regex2.exec(url);
if (matches2) {
// console.log(matches2); //打印结果
const resource_id = matches2[1]
const product_id = matches2[2]
resolve({ resource_id, product_id })
}
}
resolve(null)
})
})
if (!result) {
console.log("失败1", courseIndex, course.resource_id)
fs.appendFileSync(path.join(__dirname, 'courseDetail', '24', `failed.txt`),
`失败1\t${courseIndex}\t${course.resource_id}\n`, 'utf-8')
continue
}
let result2 = await new Promise((resolve, reject) => {
var options = {
'method': 'POST',
'url': 'https://appe0mes6qx8480.h5.xiaoeknow.com/xe.course.business.audio.info.get/2.0.0',
'headers': {
'authority': 'appe0mes6qx8480.h5.xiaoeknow.com',
'accept': 'application/json, text/plain, */*',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'content-type': 'application/x-www-form-urlencoded',
'cookie': 'sajssdk_2015_cross_new_user=1; sensorsdata2015jssdkcross=%7B%22%24device_id%22%3A%22188910ca4534a7-06c3769156869f-7e56547a-1638720-188910ca45483b%22%2C%22distinct_id%22%3A%22188faad51401330-0a45aeaa8b8e44-7e56547a-1638720-188faad5141188b%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%7D%7D; Hm_lvt_32573db0e6d7780af79f38632658ed95=1687832846; Hm_lpvt_32573db0e6d7780af79f38632658ed95=1687832846; Qs_lvt_416447=1687832846; Qs_pv_416447=2667293149066936000; shop_version_type=4; ko_token=1242be138fd55c4ef4316cd54334b65c; xenbyfpfUnhLsdkZbX=0; dataUpJssdkCookie={"wxver":"","net":"","sid":""}; sajssdk_2015_new_user_appe0mes6qx8480_h5_xiaoeknow_com=1; sa_jssdk_2015_appe0mes6qx8480_h5_xiaoeknow_com=%7B%22distinct_id%22%3A%22u_6493fd71d1df1_FjNmN1cb6S%22%2C%22first_id%22%3A%22188faae9cb032a-042220d4e5b0b6-7e56547a-1638720-188faae9cb11002%22%2C%22props%22%3A%7B%7D%7D; logintime=1687834785; logintime=1687835002; shop_version_type=4',
'origin': 'https://appe0mes6qx8480.h5.xiaoeknow.com',
'referer': 'https://appe0mes6qx8480.h5.xiaoeknow.com/p/course/audio/a_6499a929e4b0b2d1c4291926?product_id=p_5857d53b3342a_Tm6TjjTD',
'req-uuid': '20230627105945000455932',
'sec-ch-ua': '"Not.A/Brand";v="8", "Chromium";v="114", "Microsoft Edge";v="114"',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51'
},
form: {
'bizData[resource_id]': result.resource_id, // 'a_6499a929e4b0b2d1c4291926',
'bizData[product_id]': request.product_id, // 'p_5857d53b3342a_Tm6TjjTD',
'bizData[opr_sys]': 'Win32'
}
};
request(options, function (error, response) {
if (error) throw new Error(error);
// console.log(response.body);
/*
{
"code": 0,
"msg": "success",
"data": {
"audio_info": {
"title": "0627\u3010\u89c2\u70b9\u3011\u8fc7\u53bb\u65e5\u591c\u55a7\u56a3\u7684\u201c\u521b\u4e1a\u5927\u8857\u201d\uff0c\u5982\u4eca\u600e\u6837\u4e86\uff1f",
"audio_url": "https:\/\/wechatapppro-1252524126.file.myqcloud.com\/appe0MEs6qX8480\/audio_compressed\/1687791920_23nbypljczgd58.mp3",
"audio_size": 0,
"audio_length": 433,
"start_at": "2023-06-27 07:00:02",
"state": 0,
"sign_url": "",
"show_in_menu": 1,
"can_select": 1
},
"is_auto_play": 1,
"audio_back_listen_switch": 1,
"is_try": 0,
"payment_url": "",
"product_try_info": [],
"only_h5_play": 0,
"jump_h5_url": ""
},
"forward_url": ""
}
*/
let _result = JSON.parse(response.body)
if (_result.code === 0) {
// console.log(_result.data)
fs.appendFileSync(path.join(__dirname, 'courseDetail', '24', `index.csv`),
`${courseIndex}\t${result.resource_id}\t${_result.data?.audio_info?.start_at}\t${_result.data?.audio_info?.title}\t${_result.data?.audio_info?.audio_url}\n`, 'utf-8')
if (_result.data?.audio_info?.audio_url) {
fs.appendFileSync(path.join(__dirname, 'courseDetail', '24', `url.txt`),
`${_result.data?.audio_info?.audio_url}\n`, 'utf-8')
resolve()
return
}
}
console.log("失败2", courseIndex, course.resource_id)
fs.appendFileSync(path.join(__dirname, 'courseDetail', '24', `failed.txt`),
`失败2\t${courseIndex}\t${course.resource_id}\n`, 'utf-8')
});
})
}
}