61 lines
1.8 KiB
JavaScript
61 lines
1.8 KiB
JavaScript
const fs = require('fs');
|
||
const path = require('path');
|
||
|
||
const requestUtils = require('../utils/requestUtils');
|
||
|
||
const TEMP_MODULE_PATH = path.join(__dirname, 'temp');
|
||
const DATA_PATH = path.join(__dirname, 'data');
|
||
|
||
/**
|
||
* 获取 html
|
||
*/
|
||
async function getHtml(url) {
|
||
// let html = fs.readFileSync("songlist");
|
||
let html = await requestUtils.getApiResult(url, {
|
||
gzip: true,
|
||
});
|
||
return html;
|
||
}
|
||
|
||
/**
|
||
* 正则匹配出JSON
|
||
*/
|
||
function getData(html, index = 0) {
|
||
// 首先匹配出 window.__NUXT__
|
||
let script = /<script>window.__NUXT__=(.*?);<\/script>/.exec(html)[1];
|
||
|
||
// 然后将该匿名函数作为模块引入,取得解析后的JSON
|
||
let tempModule = `${TEMP_MODULE_PATH}/temp_module_${index}.js`;
|
||
fs.writeFileSync(tempModule, `exports.data=${script};`);
|
||
try {
|
||
let data = require(tempModule);
|
||
// console.log(data);
|
||
// data 下有分类 和 歌单信息 这里只取歌单信息部分
|
||
return data.data.data[0].listData;
|
||
} catch (err) {
|
||
console.error(err);
|
||
} finally {
|
||
fs.rmSync(tempModule);
|
||
}
|
||
}
|
||
|
||
async function main() {
|
||
let currentPage = 1;
|
||
let haveMore = true;
|
||
while (haveMore) {
|
||
let url = `https://music.91q.com/songlist?pageNo=${currentPage}`;
|
||
console.log(url);
|
||
let html = await getHtml(url);
|
||
fs.writeFileSync(`${DATA_PATH}/songlist-html-${currentPage}.html`, html);
|
||
console.log("get html done.");
|
||
|
||
let data = getData(html, currentPage);
|
||
fs.writeFileSync(`${DATA_PATH}/songlist-data-${currentPage}.json`, JSON.stringify(data, null, 2));
|
||
console.log("get data done.");
|
||
|
||
currentPage++;
|
||
haveMore = data.haveMore;
|
||
}
|
||
}
|
||
|
||
main(); |