181 lines
8.3 KiB
JavaScript
181 lines
8.3 KiB
JavaScript
const fs = require('fs');
|
||
const path = require('path');
|
||
|
||
const requestUtils = require('../../../utils/requestUtils');
|
||
const sleepUtils = require('../../../utils/sleepUtils');
|
||
const dataManager = require('../dataManager');
|
||
|
||
const dbUtils = global.dbUtils;
|
||
|
||
const { song_detail } = require('NeteaseCloudMusicApi');
|
||
|
||
// 从数据库中查出还缺少的歌曲,并进行爬取
|
||
async function fetchAll({ args = {} }) {
|
||
console.log("start fetching songs ...");
|
||
let songIds = await dataManager.song.getIdsToFetch(args);
|
||
// 0 - 100, 200 - 399, 400 - ..., ... - songIds.length-1
|
||
// 0 1 2 count-1
|
||
var step = 1000;
|
||
var total = songIds.length;
|
||
var count = Math.ceil(total / step);
|
||
for (let i = 0; i < count; i++) {
|
||
await global.checkIsExit();
|
||
var subArray = songIds.slice(i * step, (i + 1) * step);
|
||
console.log(`${i + 1}/${count} | song: ${subArray[0]}-${subArray.slice(-1)[0]} | ${args.min || "?"}-${args.max || "?"}`);
|
||
try {
|
||
await fetch({ songIdArray: subArray });
|
||
} catch (err) {
|
||
console.error(err);
|
||
}
|
||
await sleepUtils.sleep(global.sleepTime);
|
||
}
|
||
}
|
||
|
||
// 获取音乐详情
|
||
async function fetch({ songIdArray, debug = false }) {
|
||
// https://neteasecloudmusicapi-docs.4everland.app/#/?id=%e8%8e%b7%e5%8f%96%e6%ad%8c%e6%9b%b2%e8%af%a6%e6%83%85
|
||
try {
|
||
// 每一次大概可以取到1000条以上
|
||
var songResult = await song_detail({ ids: songIdArray.join(',') });
|
||
// fs.writeFileSync(path.join(__dirname, "../../temp", `song-${songIdArray[0]}-${songIdArray[songIdArray.length - 1]}.json`), JSON.stringify(songResult));
|
||
} catch (errors) {
|
||
console.error(errors);
|
||
return;
|
||
}
|
||
// console.log(songResult.body.songs.map(item => JSON.stringify(item)));
|
||
|
||
let albumIds = [], artistIds = [];
|
||
let songAlbumRel = [], songArtistRel = [];
|
||
let songInfoList = songResult.body.songs.map(song => {
|
||
song.ar.forEach(item => {
|
||
artistIds.push(item.id);
|
||
songArtistRel.push([song.id, item.id])
|
||
});
|
||
albumIds.push(song.al.id || 0);
|
||
songAlbumRel.push([song.id, song.al.id || 0])
|
||
return {
|
||
title: song.name, // 歌曲标题
|
||
id: song.id, // 歌曲ID
|
||
type: song.t, // 0: 一般类型 1: 通过云盘上传的音乐,网易云不存在公开对应 2: 通过云盘上传的音乐,网易云存在公开对应
|
||
alias: JSON.stringify(song.alia), // 别名列表,第一个别名会被显示作副标题
|
||
pop: song.pop, // 小数,常取[0.0, 100.0]中离散的几个数值, 表示歌曲热度
|
||
fee: song.fee, // 版权 0: 免费或无版权 1: VIP 歌曲 4: 购买专辑 8: 非会员可免费播放低音质,会员可播放高音质及下载 fee 为 1 或 8 的歌曲均可单独购买 2 元单曲
|
||
duration: song.dt, // 歌曲时长
|
||
quality: JSON.stringify({ h: song.h, m: song.m, l: song.l, sq: song.sq }), // 高/中/低/无损质量文件信息
|
||
version: song.version, // 歌曲版本信息
|
||
cd: song.cd, // None或如"04", "1/2", "3", "null"的字符串,表示歌曲属于专辑中第几张CD,对应音频文件的Tag
|
||
no: song.no, // 表示歌曲属于CD中第几曲,0表示没有这个字段,对应音频文件的Tag
|
||
djId: song.djId, // 0: 不是DJ节目 其他:是DJ节目,表示DJ ID
|
||
sId: song.s_id, // 对于t == 2的歌曲,表示匹配到的公开版本歌曲ID
|
||
originCoverType: song.originCoverType, // 0: 未知 1: 原曲 2: 翻唱
|
||
image: "",
|
||
pubTime: song.publishTime, // 毫秒为单位的Unix时间戳
|
||
mv: song.mv, // 非零表示有MV ID
|
||
single: song.single, // 0: 有专辑信息或者是DJ节目 1: 未知专辑
|
||
noCopyrightRcmd: song.noCopyrightRcmd ? JSON.stringify(song.noCopyrightRcmd) : "", // None表示可以播,非空表示无版权
|
||
|
||
artist: song.ar.map(item => item.id), // 歌手列表
|
||
album: song.al.id || 0, // 专辑,如果是DJ节目(dj_type != 0)或者无专辑信息(single == 1),则专辑id为0
|
||
};
|
||
});
|
||
|
||
// console.log("songAlbumRel, songArtistRel", songAlbumRel, songArtistRel);
|
||
// console.log("songInfoList", songInfoList);
|
||
|
||
if (songInfoList.length == 0) return;
|
||
|
||
console.log("插入数据库");
|
||
await dataManager.wait_check.insert("album", albumIds);
|
||
await dataManager.wait_check.insert("artist", artistIds);
|
||
await dataManager.song_album.insertCollection(songAlbumRel);
|
||
await dataManager.song_artist.insertCollection(songArtistRel);
|
||
await dataManager.song.insertCollection(songInfoList); // image 因为接口没有返回,所以不更新
|
||
}
|
||
|
||
// 获取音乐详情
|
||
async function fetch_old({ songId, debug = false }) {
|
||
let result = await dbUtils.query('SELECT count(*) as count FROM song WHERE song_id = ?', [songId]);
|
||
if (result[0].count > 0 && !debug) {
|
||
console.log(`数据库中已有数据,跳过 songId: ${songId}`);
|
||
return;
|
||
}
|
||
|
||
let url = `https://music.163.com/song?id=${songId}`;
|
||
try {
|
||
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `song-${songId}.html`), 'utf8');
|
||
var html = await requestUtils.getApiResult(url);
|
||
// fs.writeFileSync(path.join(__dirname, "../../temp", `song-${songId}.html`), html);
|
||
} catch (errors) {
|
||
console.error(errors);
|
||
return;
|
||
}
|
||
|
||
if (html.includes(`<p class="note s-fc3">很抱歉,你要查找的网页找不到</p>`)) {
|
||
let deleteResult1 = await dbUtils.query('DELETE FROM song_album_relation WHERE song_id = ?', [songId]);
|
||
let deleteResult2 = await dbUtils.query('DELETE FROM song_artist_relation WHERE song_id = ?', [songId]);
|
||
console.log(`song: ${songId} 不存在,从song_album_relation, song_artist_relation表中删除. affectedRows: ${deleteResult1.affectedRows}, ${deleteResult2.affectedRows}`);
|
||
return;
|
||
}
|
||
|
||
// 正则匹配
|
||
let regExResult = /\<script type\=\"application\/ld\+json\"\>([\S\s]*?)\<\/script\>/.exec(html);
|
||
let songInfoJSONString = regExResult[1];
|
||
let songInfoDict = JSON.parse(songInfoJSONString);
|
||
// console.log(songInfoDict);
|
||
|
||
// TODO 考虑歌曲别名 例如:https://music.163.com/#/song?id=26830207
|
||
|
||
let title = /<meta property="og:title" content="(.*?)" \/>/.exec(html)[1];
|
||
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
|
||
let artist = /<meta property="og:music:artist" content="(.*?)" \/>/.exec(html)[1];
|
||
let duration = /<meta property="music:duration" content="(.*?)"\/>/.exec(html)[1];
|
||
try {
|
||
var album = /<meta property="og:music:album" content="(.*?)"\/>/.exec(html)[1];
|
||
var albumId = /<meta property="music:album" content="https:\/\/music\.163\.com\/album\?id=(.*?)"\/>/.exec(html)[1];
|
||
} catch (err) {
|
||
// 歌曲不在专辑中
|
||
}
|
||
|
||
const reg = /<meta property="music:musician" content="https:\/\/music\.163\.com\/artist\?id=(.*?)"\/>/g;
|
||
let artistIds = [];
|
||
let matched = null;
|
||
while ((matched = reg.exec(html)) !== null) {
|
||
artistIds.push(matched[1]);
|
||
}
|
||
|
||
let songInfo = {
|
||
songId: songId,
|
||
title: title,
|
||
image: image,
|
||
pubDate: songInfoDict.pubDate,
|
||
artist: artist,
|
||
artistIds: artistIds,
|
||
album: album || null,
|
||
albumId: albumId || null,
|
||
duration: duration,
|
||
};
|
||
// console.log("songInfo", songInfo);
|
||
if (albumId != null)
|
||
dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', {
|
||
song_id: songInfo.songId,
|
||
album_id: songInfo.albumId,
|
||
});
|
||
artistIds.forEach(function (artistId) {
|
||
dbUtils.query('INSERT IGNORE INTO song_artist_relation SET ?', {
|
||
song_id: songInfo.songId,
|
||
artist_id: artistId,
|
||
});
|
||
});
|
||
dbUtils.query('INSERT IGNORE INTO song SET ?', {
|
||
song_id: songInfo.songId,
|
||
title: songInfo.title,
|
||
image: songInfo.image,
|
||
pub_date: songInfo.pubDate,
|
||
});
|
||
return songInfo;
|
||
}
|
||
|
||
module.exports = {
|
||
fetch: fetch,
|
||
fetchAll: fetchAll,
|
||
} |