115 lines
4.4 KiB
JavaScript
115 lines
4.4 KiB
JavaScript
const fs = require('fs');
|
||
const path = require('path');
|
||
|
||
const requestUtils = require('../../../utils/requestUtils');
|
||
const sleepUtils = require('../../../utils/sleepUtils');
|
||
|
||
const dbUtils = global.dbUtils;
|
||
|
||
// 从数据库中查询
|
||
async function getFromDatabase({ artistId }) {
|
||
// 查询出专辑
|
||
let infoResultSet = await dbUtils.query('SELECT * FROM artist WHERE artist_id = ?', [artistId]);
|
||
if (infoResultSet.length == 0) return {};
|
||
|
||
// 查出专辑与歌曲对应关系
|
||
let relationResultSet = await dbUtils.query('SELECT * FROM song_artist_relation WHERE artist_id = ?', [artistId]);
|
||
|
||
// 拼装
|
||
let artistInfo = JSON.parse(JSON.stringify(infoResultSet[0]));
|
||
artistInfo.songIds = relationResultSet.map(song => song.song_id);
|
||
return artistInfo;
|
||
}
|
||
|
||
// 从数据库中查出还缺少的歌手,并进行爬取
|
||
async function fetchAll() {
|
||
console.log("start fetching artists ...")
|
||
var artistIds = await dbUtils.query(`
|
||
SELECT DISTINCT artist_id FROM song_artist_relation WHERE artist_id NOT IN ( SELECT artist_id FROM artist )
|
||
`, []);
|
||
artistIds = artistIds.map(item => item.artist_id);
|
||
for (let i = 0; i < artistIds.length; i++) {
|
||
await global.checkIsExit();
|
||
const artistId = artistIds[i];
|
||
console.log(`${i + 1}/${artistIds.length} | artist: ${artistId}`);
|
||
try {
|
||
await fetch({ artistId: artistId });
|
||
} catch (err) {
|
||
console.error(err);
|
||
}
|
||
await sleepUtils.sleep(global.sleepTime);
|
||
}
|
||
}
|
||
|
||
// 获取音乐人详情
|
||
async function fetch({ artistId, debug = false }) {
|
||
let result = await dbUtils.query('SELECT count(*) as count FROM artist WHERE artist_id = ?', [artistId]);
|
||
if (result[0].count > 0 && !debug) {
|
||
console.log(`数据库中已有数据,跳过 artistId: ${artistId}`);
|
||
return;
|
||
}
|
||
|
||
let url = `https://music.163.com/artist?id=${artistId}`;
|
||
try {
|
||
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `artist-${artistId}.html`), 'utf8');
|
||
var html = await requestUtils.getApiResult(url);
|
||
// fs.writeFileSync(path.join(__dirname, "../../temp", `artist-${artistId}.html`), html);
|
||
} catch (errors) {
|
||
console.error(errors);
|
||
return;
|
||
}
|
||
|
||
if (html.includes(`<p class="note s-fc3">很抱歉,你要查找的网页找不到</p>`)) {
|
||
let deleteResult = await dbUtils.query('DELETE FROM song_artist_relation WHERE artist_id = ?', [artistId]);
|
||
console.log(`artist: ${artistId} 不存在,从song_artist_relation表中删除. affectedRows: ${deleteResult.affectedRows}`);
|
||
return;
|
||
}
|
||
|
||
// 正则匹配
|
||
let regExResult = /\<script type\=\"application\/ld\+json\"\>([\S\s]*?)\<\/script\>/.exec(html);
|
||
let artistInfoJSONString = regExResult[1];
|
||
let artistInfoDict = JSON.parse(artistInfoJSONString);
|
||
// console.log(artistInfoDict);
|
||
|
||
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
|
||
let songIds = [];
|
||
try {
|
||
let songListJSONString = /<textarea id="song-list-pre-data" style="display:none;">(.*?)<\/textarea>/.exec(html)[1];
|
||
let songList = JSON.parse(songListJSONString);
|
||
songIds = songList.map(song => song.id);
|
||
} catch (error) {
|
||
// 可能是歌手下面没有音乐 例如:https://music.163.com/#/artist?id=30032762
|
||
}
|
||
|
||
let artistInfo = {
|
||
artistId: artistId,
|
||
title: artistInfoDict.title,
|
||
image: image,
|
||
description: artistInfoDict.description,
|
||
pubDate: artistInfoDict.pubDate,
|
||
songIds: songIds,
|
||
};
|
||
// console.log("artistInfo", artistInfo);
|
||
songIds.forEach(function (songId) {
|
||
if (isNaN(Number(songId)) || Number(songId) === 0 || isNaN(Number(artistId)) || Number(artistId) === 0)
|
||
return;
|
||
dbUtils.query('INSERT IGNORE INTO song_artist_relation SET ?', {
|
||
song_id: songId,
|
||
artist_id: artistId,
|
||
});
|
||
});
|
||
dbUtils.query('INSERT IGNORE INTO artist SET ?', {
|
||
artist_id: artistInfo.artistId,
|
||
title: artistInfo.title,
|
||
description: artistInfo.description,
|
||
image: artistInfo.image,
|
||
pub_date: artistInfo.pubDate,
|
||
});
|
||
return artistInfo;
|
||
}
|
||
|
||
module.exports = {
|
||
getFromDatabase: getFromDatabase,
|
||
fetch: fetch,
|
||
fetchAll: fetchAll,
|
||
} |