1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee
tools/xiaoeknow/每天听见吴晓波/index.js
2023-06-27 15:07:09 +08:00

229 lines
14 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

const request = require('request');
const fs = require('fs');
const path = require('path');
// 2023.06.27
// https://appe0mes6qx8480.h5.xiaoeknow.com/p/course/member/p_5857d53b3342a_Tm6TjjTD?type=3
function getResult(pageIndex) {
var options = {
'method': 'POST',
'url': 'https://appe0mes6qx8480.h5.xiaoeknow.com/xe.course.business.member.single_items.get/2.0.0',
'headers': {
'authority': 'appe0mes6qx8480.h5.xiaoeknow.com',
'accept': 'application/json, text/plain, */*',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'content-type': 'application/x-www-form-urlencoded',
'cookie': 'sajssdk_2015_cross_new_user=1; sensorsdata2015jssdkcross=%7B%22%24device_id%22%3A%22188910ca4534a7-06c3769156869f-7e56547a-1638720-188910ca45483b%22%2C%22distinct_id%22%3A%22188faad51401330-0a45aeaa8b8e44-7e56547a-1638720-188faad5141188b%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%7D%7D; Hm_lvt_32573db0e6d7780af79f38632658ed95=1687832846; Hm_lpvt_32573db0e6d7780af79f38632658ed95=1687832846; Qs_lvt_416447=1687832846; Qs_pv_416447=2667293149066936000; shop_version_type=4; ko_token=1242be138fd55c4ef4316cd54334b65c; xenbyfpfUnhLsdkZbX=0; dataUpJssdkCookie={"wxver":"","net":"","sid":""}; sajssdk_2015_new_user_appe0mes6qx8480_h5_xiaoeknow_com=1; sa_jssdk_2015_appe0mes6qx8480_h5_xiaoeknow_com=%7B%22distinct_id%22%3A%22u_6493fd71d1df1_FjNmN1cb6S%22%2C%22first_id%22%3A%22188faae9cb032a-042220d4e5b0b6-7e56547a-1638720-188faae9cb11002%22%2C%22props%22%3A%7B%7D%7D; logintime=1687833065; logintime=1687833334; shop_version_type=4',
'origin': 'https://appe0mes6qx8480.h5.xiaoeknow.com',
'referer': 'https://appe0mes6qx8480.h5.xiaoeknow.com/p/course/member/p_5857d53b3342a_Tm6TjjTD?type=3',
'req-uuid': '20230627103104000260393',
'retry': '1',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51'
},
form: {
'bizData[column_id]': 'p_5857d53b3342a_Tm6TjjTD',
'bizData[page_index]': `${pageIndex}`,
// 'bizData[page_index]': '1',
'bizData[page_size]': '100',
'bizData[sort]': 'asc'
}
}
request(options, function (error, response) {
if (error) throw new Error(error)
console.log(response.body)
let result = JSON.parse(response.body)
let data = result.data
if (result.code === 0) {
fs.writeFileSync(path.join(__dirname, 'courseList', `${pageIndex}.json`), JSON.stringify(data), 'utf-8')
}
})
}
// // step 1: get course list
// for (let pageIndex = 1; pageIndex <= 23; pageIndex++) {
// getResult(pageIndex)
// }
// // step 2: parse course list
// let total = []
// for (let pageIndex = 1; pageIndex <= 23; pageIndex++) {
// let jsonStr = fs.readFileSync(path.join(__dirname, 'courseList', `${pageIndex}.json`), 'utf-8')
// let json = JSON.parse(jsonStr)
// total.push(...json.list)
// }
// console.log(total.length)
// fs.writeFileSync(path.join(__dirname, 'courseList', `index.json`), JSON.stringify({
// "total": total.length,
// "list": total,
// }), 'utf-8')
// step 3: fetch course detail
let jsonStr = fs.readFileSync(path.join(__dirname, 'courseList', `index.json`), 'utf-8')
let json = JSON.parse(jsonStr)
let courseList = json.list
getDetails()
async function getDetails() {
getDetails1(courseList.filter(course => course.resource_type === 1))
getDetails3(courseList.filter(course => course.resource_type === 3))
getDetails24(courseList.filter(course => course.resource_type === 2 || course.resource_type === 4))
}
async function getDetails1(courseList) {
// "resource_type": 1, 图文
}
async function getDetails3(courseList) {
// "resource_type": 3, 视频
// 年终秀PPT动态视频
// jump_url: /content_page/eyJ0eXBlIjoyLCJyZXNvdXJjZV90eXBlIjozLCJyZXNvdXJjZV9pZCI6InZfNWZlY2VjMDhlNGIwYzRmMmJjNGYxYmJkIiwicHJvZHVjdF9pZCI6InBfNTg1N2Q1M2IzMzQyYV9UbTZUampURCIsImFwcF9pZCI6ImFwcGUwTUVzNnFYODQ4MCJ9
//
// 打开页面 读 html 代码发现参数添加 showVconsole 可以打开右下角 vConsole
// 然后发现通过 打印多媒体元素 打印出来的那条记录的 <video> 标签存在 currentSrc 属性
// document.querySelector('video').currentSrc
}
async function getDetails24(courseList) {
// "resource_type": 2 / 4, 音频
// "resource_type": 2,
// 万科的下场
// jump_url: /content_page/eyJ0eXBlIjoyLCJyZXNvdXJjZV90eXBlIjoyLCJyZXNvdXJjZV9pZCI6IjE3IiwicHJvZHVjdF9pZCI6InBfNTg1N2Q1M2IzMzQyYV9UbTZUampURCIsImFwcF9pZCI6ImFwcGUwTUVzNnFYODQ4MCJ9
// 拼接host: https://h5.xiaoeknow.com 或 https://appe0mes6qx8480.h5.xiaoeknow.com
// 最好拼接后面那个,因为前面那个还是会 302 到后面那个
// https://h5.xiaoeknow.com/content_page/eyJ0eXBlIjoyLCJyZXNvdXJjZV90eXBlIjoyLCJyZXNvdXJjZV9pZCI6IjE3IiwicHJvZHVjdF9pZCI6InBfNTg1N2Q1M2IzMzQyYV9UbTZUampURCIsImFwcF9pZCI6ImFwcGUwTUVzNnFYODQ4MCJ9
// 会 302 到 https://appe0mes6qx8480.h5.xiaoeknow.com/p/course/audio/17?product_id=p_5857d53b3342a_Tm6TjjTD
// "resource_type": 4,
// 0627【观点】过去日夜喧嚣的“创业大街”如今怎样了
// jump_url: /content_page/eyJ0eXBlIjoyLCJyZXNvdXJjZV90eXBlIjoyLCJyZXNvdXJjZV9pZCI6ImFfNjQ5OWE5MjllNGIwYjJkMWM0MjkxOTI2IiwicHJvZHVjdF9pZCI6InBfNTg1N2Q1M2IzMzQyYV9UbTZUampURCIsImFwcF9pZCI6ImFwcGUwTUVzNnFYODQ4MCJ9
// 会 302 到 https://appe0mes6qx8480.h5.xiaoeknow.com/p/course/audio/a_6499a929e4b0b2d1c4291926?product_id=p_5857d53b3342a_Tm6TjjTD
for (let courseIndex = 26 || 0; courseIndex < courseList.length; courseIndex++) {
const course = courseList[courseIndex]
// console.log(course)
// return
// 跟随重定向(需要传 Cookie 的 anony_token不然会重定向到自己。可以通过先访问一次拿到 set-cookie 再重新请求)
let result = await new Promise((resolve, reject) => {
var options = {
'method': 'GET',
'url': 'https://appe0mes6qx8480.h5.xiaoeknow.com' + course.jump_url,
// 'url': 'https://appe0mes6qx8480.h5.xiaoeknow.com/content_page/eyJ0eXBlIjoyLCJyZXNvdXJjZV90eXBlIjoyLCJyZXNvdXJjZV9pZCI6IjE3IiwicHJvZHVjdF9pZCI6InBfNTg1N2Q1M2IzMzQyYV9UbTZUampURCIsImFwcF9pZCI6ImFwcGUwTUVzNnFYODQ4MCJ9',
'headers': {
'Cookie': 'anony_token=9dcb10c78e3e237def2c69a3709d8d44'
},
followRedirect: false,
};
request(options, function (error, response) {
if (error) throw new Error(error);
// console.log(response.body);
//使用正则表达式匹配以https开头的网址可以使用以下方法
var regex = /\<title\>Redirecting to (.*?)\<\/title\>/g; //匹配以https开头的非空白字符直到遇到空白字符或字符串结束为止
var matches = regex.exec(response.body);
if (matches) {
console.log(matches[1]); //打印结果
// https://appe0mes6qx8480.h5.xiaoeknow.com/p/course/audio/17?product_id=p_5857d53b3342a_Tm6TjjTD
const url = matches[1]
var regex2 = /^.*?\/course\/audio\/(.*?)\?product_id=(.*?)$/g;
var matches2 = regex2.exec(url);
if (matches2) {
// console.log(matches2); //打印结果
const resource_id = matches2[1]
const product_id = matches2[2]
resolve({ resource_id, product_id })
}
}
resolve(null)
})
})
if (!result) {
console.log("失败1", courseIndex, course.resource_id)
fs.appendFileSync(path.join(__dirname, 'courseDetail', '24', `failed.txt`),
`失败1\t${courseIndex}\t${course.resource_id}\n`, 'utf-8')
continue
}
let result2 = await new Promise((resolve, reject) => {
var options = {
'method': 'POST',
'url': 'https://appe0mes6qx8480.h5.xiaoeknow.com/xe.course.business.audio.info.get/2.0.0',
'headers': {
'authority': 'appe0mes6qx8480.h5.xiaoeknow.com',
'accept': 'application/json, text/plain, */*',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'content-type': 'application/x-www-form-urlencoded',
'cookie': 'sajssdk_2015_cross_new_user=1; sensorsdata2015jssdkcross=%7B%22%24device_id%22%3A%22188910ca4534a7-06c3769156869f-7e56547a-1638720-188910ca45483b%22%2C%22distinct_id%22%3A%22188faad51401330-0a45aeaa8b8e44-7e56547a-1638720-188faad5141188b%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%7D%7D; Hm_lvt_32573db0e6d7780af79f38632658ed95=1687832846; Hm_lpvt_32573db0e6d7780af79f38632658ed95=1687832846; Qs_lvt_416447=1687832846; Qs_pv_416447=2667293149066936000; shop_version_type=4; ko_token=1242be138fd55c4ef4316cd54334b65c; xenbyfpfUnhLsdkZbX=0; dataUpJssdkCookie={"wxver":"","net":"","sid":""}; sajssdk_2015_new_user_appe0mes6qx8480_h5_xiaoeknow_com=1; sa_jssdk_2015_appe0mes6qx8480_h5_xiaoeknow_com=%7B%22distinct_id%22%3A%22u_6493fd71d1df1_FjNmN1cb6S%22%2C%22first_id%22%3A%22188faae9cb032a-042220d4e5b0b6-7e56547a-1638720-188faae9cb11002%22%2C%22props%22%3A%7B%7D%7D; logintime=1687834785; logintime=1687835002; shop_version_type=4',
'origin': 'https://appe0mes6qx8480.h5.xiaoeknow.com',
'referer': 'https://appe0mes6qx8480.h5.xiaoeknow.com/p/course/audio/a_6499a929e4b0b2d1c4291926?product_id=p_5857d53b3342a_Tm6TjjTD',
'req-uuid': '20230627105945000455932',
'sec-ch-ua': '"Not.A/Brand";v="8", "Chromium";v="114", "Microsoft Edge";v="114"',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.51'
},
form: {
'bizData[resource_id]': result.resource_id, // 'a_6499a929e4b0b2d1c4291926',
'bizData[product_id]': request.product_id, // 'p_5857d53b3342a_Tm6TjjTD',
'bizData[opr_sys]': 'Win32'
}
};
request(options, function (error, response) {
if (error) throw new Error(error);
// console.log(response.body);
/*
{
"code": 0,
"msg": "success",
"data": {
"audio_info": {
"title": "0627\u3010\u89c2\u70b9\u3011\u8fc7\u53bb\u65e5\u591c\u55a7\u56a3\u7684\u201c\u521b\u4e1a\u5927\u8857\u201d\uff0c\u5982\u4eca\u600e\u6837\u4e86\uff1f",
"audio_url": "https:\/\/wechatapppro-1252524126.file.myqcloud.com\/appe0MEs6qX8480\/audio_compressed\/1687791920_23nbypljczgd58.mp3",
"audio_size": 0,
"audio_length": 433,
"start_at": "2023-06-27 07:00:02",
"state": 0,
"sign_url": "",
"show_in_menu": 1,
"can_select": 1
},
"is_auto_play": 1,
"audio_back_listen_switch": 1,
"is_try": 0,
"payment_url": "",
"product_try_info": [],
"only_h5_play": 0,
"jump_h5_url": ""
},
"forward_url": ""
}
*/
let _result = JSON.parse(response.body)
if (_result.code === 0) {
// console.log(_result.data)
fs.appendFileSync(path.join(__dirname, 'courseDetail', '24', `index.csv`),
`${courseIndex}\t${result.resource_id}\t${_result.data?.audio_info?.start_at}\t${_result.data?.audio_info?.title}\t${_result.data?.audio_info?.audio_url}\n`, 'utf-8')
if (_result.data?.audio_info?.audio_url) {
fs.appendFileSync(path.join(__dirname, 'courseDetail', '24', `url.txt`),
`${_result.data?.audio_info?.audio_url}\n`, 'utf-8')
resolve()
return
}
}
console.log("失败2", courseIndex, course.resource_id)
fs.appendFileSync(path.join(__dirname, 'courseDetail', '24', `failed.txt`),
`失败2\t${courseIndex}\t${course.resource_id}\n`, 'utf-8')
});
})
}
}