const fs = require('fs'); const path = require('path'); const requestUtils = require('../../../utils/requestUtils'); const sleepUtils = require('../../../utils/sleepUtils'); const dbUtils = global.dbUtils; // 从数据库中查询 async function getFromDatabase({ artistId }) { // 查询出专辑 let infoResultSet = await dbUtils.query('SELECT * FROM artist WHERE artist_id = ?', [artistId]); if (infoResultSet.length == 0) return {}; // 查出专辑与歌曲对应关系 let relationResultSet = await dbUtils.query('SELECT * FROM song_artist_relation WHERE artist_id = ?', [artistId]); // 拼装 let artistInfo = JSON.parse(JSON.stringify(infoResultSet[0])); artistInfo.songIds = relationResultSet.map(song => song.song_id); return artistInfo; } // 从数据库中查出还缺少的歌手,并进行爬取 async function fetchAll() { console.log("start fetching artists ...") var artistIds = await dbUtils.query(` SELECT DISTINCT artist_id FROM song_artist_relation WHERE artist_id NOT IN ( SELECT DISTINCT artist_id FROM artist ) `, []); artistIds = artistIds.map(item => item.artist_id); for (let i = 0; i < artistIds.length; i++) { await global.checkIsExit(); const artistId = artistIds[i]; console.log(`${i}/${artistIds.length} | artist: ${artistId}`); try { await fetch({ artistId: artistId }); } catch (err) { console.error(err); } await sleepUtils.sleep(global.sleepTime); } } // 获取音乐人详情 async function fetch({ artistId, debug = false }) { let result = await dbUtils.query('SELECT count(*) as count FROM artist WHERE artist_id = ?', [artistId]); if (result[0].count > 0 && !debug) { console.log(`数据库中已有数据,跳过 artistId: ${artistId}`); return; } let url = `https://music.163.com/artist?id=${artistId}`; try { // var html = fs.readFileSync(path.join(__dirname, "../../temp", `artist-${artistId}.html`), 'utf8'); var html = await requestUtils.getApiResult(url); // fs.writeFileSync(path.join(__dirname, "../../temp", `artist-${artistId}.html`), html); } catch (errors) { console.error(errors); return; } if (html.includes(`
很抱歉,你要查找的网页找不到
`)) { // TODO 最后统一来处理这里 demo: artistId == 30084536 return; } // 正则匹配 let regExResult = /\