110 lines
4.1 KiB
JavaScript
110 lines
4.1 KiB
JavaScript
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
const requestUtils = require('../../../utils/requestUtils');
|
|
const sleepUtils = require('../../../utils/sleepUtils');
|
|
|
|
const dbUtils = global.dbUtils;
|
|
|
|
// 从数据库中查询
|
|
async function getFromDatabase({ artistId }) {
|
|
// 查询出专辑
|
|
let infoResultSet = await dbUtils.query('SELECT * FROM artist WHERE artist_id = ?', [artistId]);
|
|
if (infoResultSet.length == 0) return {};
|
|
|
|
// 查出专辑与歌曲对应关系
|
|
let relationResultSet = await dbUtils.query('SELECT * FROM song_artist_relation WHERE artist_id = ?', [artistId]);
|
|
|
|
// 拼装
|
|
let artistInfo = JSON.parse(JSON.stringify(infoResultSet[0]));
|
|
artistInfo.songIds = relationResultSet.map(song => song.song_id);
|
|
return artistInfo;
|
|
}
|
|
|
|
// 从数据库中查出还缺少的歌手,并进行爬取
|
|
async function fetchAll() {
|
|
console.log("start fetching artists ...")
|
|
var artistIds = await dbUtils.query(`
|
|
SELECT DISTINCT artist_id FROM song_artist_relation WHERE artist_id NOT IN ( SELECT DISTINCT artist_id FROM artist )
|
|
`, []);
|
|
artistIds = artistIds.map(item => item.artist_id);
|
|
for (let i = 0; i < artistIds.length; i++) {
|
|
await global.checkIsExit();
|
|
const artistId = artistIds[i];
|
|
console.log(`${i}/${artistIds.length} | artist: ${artistId} | ${await global.statistics()}`);
|
|
try {
|
|
await fetch({ artistId: artistId });
|
|
} catch (err) {
|
|
console.error(err);
|
|
}
|
|
await sleepUtils.sleep(global.sleepTime);
|
|
}
|
|
}
|
|
|
|
// 获取音乐人详情
|
|
async function fetch({ artistId, debug = false }) {
|
|
let result = await dbUtils.query('SELECT count(*) as count FROM artist WHERE artist_id = ?', [artistId]);
|
|
if (result[0].count > 0 && !debug) {
|
|
console.log(`数据库中已有数据,跳过 artistId: ${artistId}`);
|
|
return;
|
|
}
|
|
|
|
let url = `https://music.163.com/artist?id=${artistId}`;
|
|
|
|
try {
|
|
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `artist-${artistId}.html`), 'utf8');
|
|
var html = await requestUtils.getApiResult(url);
|
|
// fs.writeFileSync(path.join(__dirname, "../../temp", `artist-${artistId}.html`), html);
|
|
} catch (errors) {
|
|
console.error(errors);
|
|
return;
|
|
}
|
|
|
|
if (html.includes(`<p class="note s-fc3">很抱歉,你要查找的网页找不到</p>`)) {
|
|
// TODO 最后统一来处理这里 demo: artistId == 30084536
|
|
return;
|
|
}
|
|
|
|
// 正则匹配
|
|
let regExResult = /\<script type\=\"application\/ld\+json\"\>([\S\s]*?)\<\/script\>/.exec(html);
|
|
let artistInfoJSONString = regExResult[1];
|
|
let artistInfoDict = JSON.parse(artistInfoJSONString);
|
|
// console.log(artistInfoDict);
|
|
|
|
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
|
|
let songListJSONString = /<textarea id="song-list-pre-data" style="display:none;">(.*?)<\/textarea>/.exec(html)[1];
|
|
let songList = JSON.parse(songListJSONString);
|
|
let songIds = songList.map(song => song.id);
|
|
|
|
let artistInfo = {
|
|
artistId: artistId,
|
|
title: artistInfoDict.title,
|
|
image: image,
|
|
description: artistInfoDict.description,
|
|
pubDate: artistInfoDict.pubDate,
|
|
songIds: songIds,
|
|
};
|
|
// console.log("artistInfo", artistInfo);
|
|
dbUtils.query('INSERT IGNORE INTO artist SET ?', {
|
|
artist_id: artistInfo.artistId,
|
|
title: artistInfo.title,
|
|
description: artistInfo.description,
|
|
image: artistInfo.image,
|
|
pub_date: artistInfo.pubDate,
|
|
});
|
|
songIds.forEach(function (songId) {
|
|
if (isNaN(Number(songId)) || Number(songId) === 0 || isNaN(Number(artistId)) || Number(artistId) === 0)
|
|
return;
|
|
dbUtils.query('INSERT IGNORE INTO song_artist_relation SET ?', {
|
|
song_id: songId,
|
|
artist_id: artistId,
|
|
});
|
|
});
|
|
return artistInfo;
|
|
}
|
|
|
|
module.exports = {
|
|
getFromDatabase: getFromDatabase,
|
|
fetch: fetch,
|
|
fetchAll: fetchAll,
|
|
} |