1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee
Files
tools/netease_music/src/getInfo/songInfoUtils.old.js

144 lines
5.7 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

const fs = require('fs');
const path = require('path');
const requestUtils = require('../../../utils/requestUtils');
const sleepUtils = require('../../../utils/sleepUtils');
const dbUtils = global.dbUtils;
// 从数据库中查询
async function getFromDatabase({ songId }) {
// 查询出专辑
let infoResultSet = await dbUtils.query('SELECT * FROM song WHERE song_id = ?', [songId]);
if (infoResultSet.length == 0) return {};
// 查出专辑与歌曲对应关系
let albumRelationResultSet = await dbUtils.query('SELECT * FROM song_album_relation WHERE song_id = ?', [songId]);
let artistRelationResultSet = await dbUtils.query('SELECT * FROM song_artist_relation WHERE song_id = ?', [songId]);
// 拼装
let songInfo = JSON.parse(JSON.stringify(infoResultSet[0]));
songInfo.albumIds = albumRelationResultSet.map(album => album.album_id);
songInfo.artistIds = artistRelationResultSet.map(artist => artist.artist_id);
return songInfo;
}
// 从数据库中查出还缺少的歌曲,并进行爬取
async function fetchAll({ args = {} }) {
console.log("start fetching songs ...");
let whereClause = [
args.min ? `song_id > ${args.min}` : '1=1',
args.max ? `song_id <= ${args.max}` : '1=1',
].join(' AND ');
var sql = `
SELECT DISTINCT song_id FROM song_artist_relation WHERE ${whereClause} AND song_id NOT IN ( SELECT song_id FROM song )
UNION
SELECT DISTINCT song_id FROM song_album_relation WHERE ${whereClause} AND song_id NOT IN ( SELECT song_id FROM song )
${args.order ? `ORDER BY song_id ${args.order}` : ''}
${args.limit ? `LIMIT ${args.limit}` : ''}
`;
console.log(sql);
var songIds = await dbUtils.query(sql, []);
songIds = songIds.map(item => item.song_id);
for (let i = 0; i < songIds.length; i++) {
await global.checkIsExit();
const songId = songIds[i];
console.log(`${i + 1}/${songIds.length} | song: ${songId} | ${args.min || "?"}-${args.max || "?"}`);
try {
await fetch({ songId: songId });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(global.sleepTime);
}
}
// 获取音乐详情
async function fetch({ songId, debug = false }) {
let result = await dbUtils.query('SELECT count(*) as count FROM song WHERE song_id = ?', [songId]);
if (result[0].count > 0 && !debug) {
console.log(`数据库中已有数据,跳过 songId: ${songId}`);
return;
}
let url = `https://music.163.com/song?id=${songId}`;
try {
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `song-${songId}.html`), 'utf8');
var html = await requestUtils.getApiResult(url);
// fs.writeFileSync(path.join(__dirname, "../../temp", `song-${songId}.html`), html);
} catch (errors) {
console.error(errors);
return;
}
if (html.includes(`<p class="note s-fc3">很抱歉,你要查找的网页找不到</p>`)) {
let deleteResult1 = await dbUtils.query('DELETE FROM song_album_relation WHERE song_id = ?', [songId]);
let deleteResult2 = await dbUtils.query('DELETE FROM song_artist_relation WHERE song_id = ?', [songId]);
console.log(`song: ${songId} 不存在从song_album_relation, song_artist_relation表中删除. affectedRows: ${deleteResult1.affectedRows}, ${deleteResult2.affectedRows}`);
return;
}
// 正则匹配
let regExResult = /\<script type\=\"application\/ld\+json\"\>([\S\s]*?)\<\/script\>/.exec(html);
let songInfoJSONString = regExResult[1];
let songInfoDict = JSON.parse(songInfoJSONString);
// console.log(songInfoDict);
// TODO 考虑歌曲别名 例如https://music.163.com/#/song?id=26830207
let title = /<meta property="og:title" content="(.*?)" \/>/.exec(html)[1];
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
let artist = /<meta property="og:music:artist" content="(.*?)" \/>/.exec(html)[1];
let duration = /<meta property="music:duration" content="(.*?)"\/>/.exec(html)[1];
try {
var album = /<meta property="og:music:album" content="(.*?)"\/>/.exec(html)[1];
var albumId = /<meta property="music:album" content="https:\/\/music\.163\.com\/album\?id=(.*?)"\/>/.exec(html)[1];
} catch (err) {
// 歌曲不在专辑中
}
const reg = /<meta property="music:musician" content="https:\/\/music\.163\.com\/artist\?id=(.*?)"\/>/g;
let artistIds = [];
let matched = null;
while ((matched = reg.exec(html)) !== null) {
artistIds.push(matched[1]);
}
let songInfo = {
songId: songId,
title: title,
image: image,
pubDate: songInfoDict.pubDate,
artist: artist,
artistIds: artistIds,
album: album || null,
albumId: albumId || null,
duration: duration,
};
// console.log("songInfo", songInfo);
if (albumId != null)
dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', {
song_id: songInfo.songId,
album_id: songInfo.albumId,
});
artistIds.forEach(function (artistId) {
dbUtils.query('INSERT IGNORE INTO song_artist_relation SET ?', {
song_id: songInfo.songId,
artist_id: artistId,
});
});
dbUtils.query('INSERT IGNORE INTO song SET ?', {
song_id: songInfo.songId,
title: songInfo.title,
image: songInfo.image,
pub_date: songInfo.pubDate,
});
return songInfo;
}
module.exports = {
getFromDatabase: getFromDatabase,
fetch: fetch,
fetchAll: fetchAll,
}