1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee
Files
tools/netease_music/src/getInfo/artistInfoUtils.js

115 lines
4.4 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

const fs = require('fs');
const path = require('path');
const requestUtils = require('../../../utils/requestUtils');
const sleepUtils = require('../../../utils/sleepUtils');
const dbUtils = global.dbUtils;
// 从数据库中查询
async function getFromDatabase({ artistId }) {
// 查询出专辑
let infoResultSet = await dbUtils.query('SELECT * FROM artist WHERE artist_id = ?', [artistId]);
if (infoResultSet.length == 0) return {};
// 查出专辑与歌曲对应关系
let relationResultSet = await dbUtils.query('SELECT * FROM song_artist_relation WHERE artist_id = ?', [artistId]);
// 拼装
let artistInfo = JSON.parse(JSON.stringify(infoResultSet[0]));
artistInfo.songIds = relationResultSet.map(song => song.song_id);
return artistInfo;
}
// 从数据库中查出还缺少的歌手,并进行爬取
async function fetchAll() {
console.log("start fetching artists ...")
var artistIds = await dbUtils.query(`
SELECT DISTINCT artist_id FROM song_artist_relation WHERE artist_id NOT IN ( SELECT artist_id FROM artist )
`, []);
artistIds = artistIds.map(item => item.artist_id);
for (let i = 0; i < artistIds.length; i++) {
await global.checkIsExit();
const artistId = artistIds[i];
console.log(`${i + 1}/${artistIds.length} | artist: ${artistId}`);
try {
await fetch({ artistId: artistId });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(global.sleepTime);
}
}
// 获取音乐人详情
async function fetch({ artistId, debug = false }) {
let result = await dbUtils.query('SELECT count(*) as count FROM artist WHERE artist_id = ?', [artistId]);
if (result[0].count > 0 && !debug) {
console.log(`数据库中已有数据,跳过 artistId: ${artistId}`);
return;
}
let url = `https://music.163.com/artist?id=${artistId}`;
try {
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `artist-${artistId}.html`), 'utf8');
var html = await requestUtils.getApiResult(url);
// fs.writeFileSync(path.join(__dirname, "../../temp", `artist-${artistId}.html`), html);
} catch (errors) {
console.error(errors);
return;
}
if (html.includes(`<p class="note s-fc3">很抱歉,你要查找的网页找不到</p>`)) {
let deleteResult = await dbUtils.query('DELETE FROM song_artist_relation WHERE artist_id = ?', [artistId]);
console.log(`artist: ${artistId} 不存在从song_artist_relation表中删除. affectedRows: ${deleteResult.affectedRows}`);
return;
}
// 正则匹配
let regExResult = /\<script type\=\"application\/ld\+json\"\>([\S\s]*?)\<\/script\>/.exec(html);
let artistInfoJSONString = regExResult[1];
let artistInfoDict = JSON.parse(artistInfoJSONString);
// console.log(artistInfoDict);
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
let songIds = [];
try {
let songListJSONString = /<textarea id="song-list-pre-data" style="display:none;">(.*?)<\/textarea>/.exec(html)[1];
let songList = JSON.parse(songListJSONString);
songIds = songList.map(song => song.id);
} catch (error) {
// 可能是歌手下面没有音乐 例如https://music.163.com/#/artist?id=30032762
}
let artistInfo = {
artistId: artistId,
title: artistInfoDict.title,
image: image,
description: artistInfoDict.description,
pubDate: artistInfoDict.pubDate,
songIds: songIds,
};
// console.log("artistInfo", artistInfo);
songIds.forEach(function (songId) {
if (isNaN(Number(songId)) || Number(songId) === 0 || isNaN(Number(artistId)) || Number(artistId) === 0)
return;
dbUtils.query('INSERT IGNORE INTO song_artist_relation SET ?', {
song_id: songId,
artist_id: artistId,
});
});
dbUtils.query('INSERT IGNORE INTO artist SET ?', {
artist_id: artistInfo.artistId,
title: artistInfo.title,
description: artistInfo.description,
image: artistInfo.image,
pub_date: artistInfo.pubDate,
});
return artistInfo;
}
module.exports = {
getFromDatabase: getFromDatabase,
fetch: fetch,
fetchAll: fetchAll,
}