爬取歌单 简单代码
This commit is contained in:
61
qianqian_music/index.js
Normal file
61
qianqian_music/index.js
Normal file
@@ -0,0 +1,61 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const requestUtils = require('../utils/requestUtils');
|
||||
|
||||
const TEMP_MODULE_PATH = path.join(__dirname, 'temp');
|
||||
const DATA_PATH = path.join(__dirname, 'data');
|
||||
|
||||
/**
|
||||
* 获取 html
|
||||
*/
|
||||
async function getHtml(url) {
|
||||
// let html = fs.readFileSync("songlist");
|
||||
let html = await requestUtils.getApiResult(url, {
|
||||
gzip: true,
|
||||
});
|
||||
return html;
|
||||
}
|
||||
|
||||
/**
|
||||
* 正则匹配出JSON
|
||||
*/
|
||||
function getData(html, index = 0) {
|
||||
// 首先匹配出 window.__NUXT__
|
||||
let script = /<script>window.__NUXT__=(.*?);<\/script>/.exec(html)[1];
|
||||
|
||||
// 然后将该匿名函数作为模块引入,取得解析后的JSON
|
||||
let tempModule = `${TEMP_MODULE_PATH}/temp_module_${index}.js`;
|
||||
fs.writeFileSync(tempModule, `exports.data=${script};`);
|
||||
try {
|
||||
let data = require(tempModule);
|
||||
// console.log(data);
|
||||
// data 下有分类 和 歌单信息 这里只取歌单信息部分
|
||||
return data.data.data[0].listData;
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
} finally {
|
||||
fs.rmSync(tempModule);
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
let currentPage = 1;
|
||||
let haveMore = true;
|
||||
while (haveMore) {
|
||||
let url = `https://music.91q.com/songlist?pageNo=${currentPage}`;
|
||||
console.log(url);
|
||||
let html = await getHtml(url);
|
||||
fs.writeFileSync(`${DATA_PATH}/songlist-html-${currentPage}.html`, html);
|
||||
console.log("get html done.");
|
||||
|
||||
let data = getData(html, currentPage);
|
||||
fs.writeFileSync(`${DATA_PATH}/songlist-data-${currentPage}.json`, JSON.stringify(data, null, 2));
|
||||
console.log("get data done.");
|
||||
|
||||
currentPage++;
|
||||
haveMore = data.haveMore;
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
Reference in New Issue
Block a user