1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee

61 lines
1.8 KiB
JavaScript
Raw Normal View History

2022-10-19 17:22:49 +08:00
const fs = require('fs');
const path = require('path');
const requestUtils = require('../utils/requestUtils');
const TEMP_MODULE_PATH = path.join(__dirname, 'temp');
const DATA_PATH = path.join(__dirname, 'data');
/**
* 获取 html
*/
async function getHtml(url) {
// let html = fs.readFileSync("songlist");
let html = await requestUtils.getApiResult(url, {
gzip: true,
});
return html;
}
/**
* 正则匹配出JSON
*/
function getData(html, index = 0) {
// 首先匹配出 window.__NUXT__
let script = /<script>window.__NUXT__=(.*?);<\/script>/.exec(html)[1];
// 然后将该匿名函数作为模块引入取得解析后的JSON
let tempModule = `${TEMP_MODULE_PATH}/temp_module_${index}.js`;
fs.writeFileSync(tempModule, `exports.data=${script};`);
try {
let data = require(tempModule);
// console.log(data);
// data 下有分类 和 歌单信息 这里只取歌单信息部分
return data.data.data[0].listData;
} catch (err) {
console.error(err);
} finally {
fs.rmSync(tempModule);
}
}
async function main() {
let currentPage = 1;
let haveMore = true;
while (haveMore) {
let url = `https://music.91q.com/songlist?pageNo=${currentPage}`;
console.log(url);
let html = await getHtml(url);
fs.writeFileSync(`${DATA_PATH}/songlist-html-${currentPage}.html`, html);
console.log("get html done.");
let data = getData(html, currentPage);
fs.writeFileSync(`${DATA_PATH}/songlist-data-${currentPage}.json`, JSON.stringify(data, null, 2));
console.log("get data done.");
currentPage++;
haveMore = data.haveMore;
}
}
main();