1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee
tools/netease_music/index.js
2022-10-01 01:47:27 +08:00

362 lines
14 KiB
JavaScript

const fs = require('fs');
const path = require('path');
const dbUtils = require('../utils/dbUtils');
const requestUtils = require('../utils/requestUtils');
const sleepUtils = require('../utils/sleepUtils');
async function main() {
console.log("neteaseMusic Starting...");
// 指定数据库
dbUtils.create("neteaseMusic");
// getMusicInfo({ songId: "1855221507" });
// getMusicInfo({ songId: "1855221517" });
// getMusicInfo({ songId: "1861632812" });
// getArtistInfo({ artistId: "1079074" });
// getArtistInfo({ artistId: "1079075" });
// getAlbumInfo({ albumId: "74268047" });
// getAlbumInfo({ albumId: "129327797" });
// 不是所有歌手都有个人主页 例如 https://music.163.com/#/artist?id=1079075
// getUserInfo({ userId: "37365202" });
// getUserInfo({ userId: "29879272" });
while (true) {
// 删除脏数据
var affectRows1 = await dbUtils.query(`DELETE FROM song_artist_relation WHERE song_id = 0 OR artist_id = 0`, []);
var affectRows2 = await dbUtils.query(`DELETE FROM song_album_relation WHERE song_id = 0 OR album_id = 0`, []);
console.log(affectRows1.affectedRows, affectRows2.affectedRows);
await startGetMusic(1);
}
}
async function startGetMusic(sleepTime) {
// 从数据库中查出还缺少的歌曲,并进行爬取
console.log("start fetching songs ...");
let songIds = await dbUtils.query(`
SELECT DISTINCT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song )
UNION
SELECT DISTINCT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song )
`, []);
songIds = songIds.map(item => item.song_id);
for (let i = 0; i < songIds.length; i++) {
const songId = songIds[i];
console.log(`${i}/${songIds.length} | song: ${songId} | ${await statistics()}`);
await getMusicInfo({ songId: songId });
await sleepUtils.sleep(sleepTime);
if (fs.readFileSync('stop.txt') == "1") {
throw new Error(`Stopped`);
}
}
// 从数据库中查出还缺少的专辑,并进行爬取
console.log("start fetching albums ...")
let albumIds = await dbUtils.query(`
SELECT DISTINCT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT DISTINCT album_id FROM album )
`, []);
albumIds = albumIds.map(item => item.album_id);
for (let i = 0; i < albumIds.length; i++) {
const albumId = albumIds[i];
console.log(`${i}/${albumIds.length} | album: ${albumId} | ${await statistics()}`);
await getAlbumInfo({ albumId: albumId });
await sleepUtils.sleep(sleepTime);
if (fs.readFileSync('stop.txt') == "1") {
throw new Error(`Stopped`);
}
}
// 从数据库中查出还缺少的歌手,并进行爬取
console.log("start fetching albums ...")
let artistIds = await dbUtils.query(`
SELECT DISTINCT artist_id FROM song_artist_relation WHERE artist_id NOT IN ( SELECT DISTINCT artist_id FROM artist )
`, []);
artistIds = artistIds.map(item => item.artist_id);
for (let i = 0; i < artistIds.length; i++) {
const artistId = artistIds[i];
console.log(`${i}/${artistIds.length} | artist: ${artistId} | ${await statistics()}`);
await getArtistInfo({ artistId: artistId });
await sleepUtils.sleep(sleepTime);
if (fs.readFileSync('stop.txt') == "1") {
throw new Error(`Stopped`);
}
}
}
async function statistics() {
let sql = `
SELECT
song_count,
album_count,
artist_count,
song_album_count,
song_artist_count
FROM
( SELECT count(*) AS song_count FROM song ) t1,
( SELECT count(*) AS album_count FROM album ) t2,
( SELECT count(*) AS artist_count FROM artist ) t3,
( SELECT count(*) AS song_album_count FROM song_album_relation ) t4,
( SELECT count(*) AS song_artist_count FROM song_artist_relation ) t5`;
let result = await dbUtils.query(sql, []);
let songCount = result[0].song_count;
let albumCount = result[0].album_count;
let artistCount = result[0].artist_count;
let songAlbumCount = result[0].song_album_count;
let songArtistCount = result[0].song_artist_count;
return `song: ${songCount}, album: ${albumCount}, artist: ${artistCount} | songAlbum: ${songAlbumCount}, songArtist: ${songArtistCount}`;
}
// 获取音乐详情
async function getMusicInfo({ songId }) {
let result = await dbUtils.query('SELECT count(*) as count FROM song WHERE song_id = ?', [songId]);
if (result[0].count > 0) {
console.log(`数据库中已有数据,跳过 songId: ${songId}`);
return;
// let songResult = await dbUtils.query('SELECT * FROM song WHERE song_id = ?', [songId]);
// songResult = JSON.parse(JSON.stringify(songResult));
// let songArtistResult = await dbUtils.query('SELECT * FROM song_artist_relation WHERE song_id = ?', [songId]);
// songArtistResult = JSON.parse(JSON.stringify(songArtistResult));
// songResult.artistIds = songArtistResult.map(song => song.artist_id);
// let songAlbumResult = await dbUtils.query('SELECT * FROM song_album_relation WHERE song_id = ?', [songId]);
// songAlbumResult = JSON.parse(JSON.stringify(songAlbumResult));
// songResult.albumId = songAlbumResult.map(song => song.album_id)[0];
// // console.log(songResult);
// return songResult;
}
let url = `https://music.163.com/song?id=${songId}`;
try {
// var html = fs.readFileSync(path.join(__dirname, "../temp", `song-${songId}.html`), 'utf8');
var html = await requestUtils.getApiResult(url);
// fs.writeFileSync(path.join(__dirname, "../temp", `song-${songId}.html`), html);
} catch (errors) {
console.error(errors);
return;
}
// console.log(html);
// 正则匹配
let regExResult = /\<script type\=\"application\/ld\+json\"\>([\S\s]*?)\<\/script\>/.exec(html);
let songInfoJSONString = regExResult[1];
let songInfoDict = JSON.parse(songInfoJSONString);
// console.log(songInfoDict);
let title = /<meta property="og:title" content="(.*?)" \/>/.exec(html)[1];
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
let artist = /<meta property="og:music:artist" content="(.*?)" \/>/.exec(html)[1];
let duration = /<meta property="music:duration" content="(.*?)"\/>/.exec(html)[1];
try {
var album = /<meta property="og:music:album" content="(.*?)"\/>/.exec(html)[1];
var albumId = /<meta property="music:album" content="https:\/\/music\.163\.com\/album\?id=(.*?)"\/>/.exec(html)[1];
} catch (err) {
// 歌曲不在专辑中
}
const reg = /<meta property="music:musician" content="https:\/\/music\.163\.com\/artist\?id=(.*?)"\/>/g;
let artistIds = [];
let matched = null;
while ((matched = reg.exec(html)) !== null) {
artistIds.push(matched[1]);
}
let songInfo = {
songId: songId,
title: title,
image: image,
pubDate: songInfoDict.pubDate,
artist: artist,
artistIds: artistIds,
album: album || null,
albumId: albumId || null,
duration: duration,
};
// console.log("songInfo", songInfo);
dbUtils.query('INSERT IGNORE INTO song SET ?', {
song_id: songInfo.songId,
title: songInfo.title,
image: songInfo.image,
pub_date: songInfo.pubDate,
});
if (albumId != null)
dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', {
song_id: songInfo.songId,
album_id: songInfo.albumId,
});
artistIds.forEach(function (artistId) {
dbUtils.query('INSERT IGNORE INTO song_artist_relation SET ?', {
song_id: songInfo.songId,
artist_id: artistId,
});
});
return songInfo;
}
// 获取音乐人详情
async function getArtistInfo({ artistId }) {
let result = await dbUtils.query('SELECT count(*) as count FROM artist WHERE artist_id = ?', [artistId]);
if (result[0].count > 0) {
console.log(`数据库中已有数据,跳过 artistId: ${artistId}`);
return;
// // let artistResult = await dbUtils.query('SELECT * FROM artist LEFT JOIN song_artist_relation ON artist.artist_id = song_artist_relation.artist_id WHERE artist.artist_id = ?', [artistId]);
// let artistResult = await dbUtils.query('SELECT * FROM artist WHERE artist_id = ?', [artistId]);
// artistResult = JSON.parse(JSON.stringify(artistResult));
// let songArtistResult = await dbUtils.query('SELECT * FROM song_artist_relation WHERE artist_id = ?', [artistId]);
// songArtistResult = JSON.parse(JSON.stringify(songArtistResult));
// artistResult.songIds = songArtistResult.map(song => song.song_id);
// // console.log(artistResult);
// return artistResult;
}
let url = `https://music.163.com/artist?id=${artistId}`;
try {
// var html = fs.readFileSync(path.join(__dirname, "../temp", `artist-${artistId}.html`), 'utf8');
var html = await requestUtils.getApiResult(url);
// fs.writeFileSync(path.join(__dirname, "../temp", `artist-${artistId}.html`), html);
} catch (errors) {
console.error(errors);
return;
}
// console.log(html);
// 正则匹配
let regExResult = /\<script type\=\"application\/ld\+json\"\>([\S\s]*?)\<\/script\>/.exec(html);
let artistInfoJSONString = regExResult[1];
let artistInfoDict = JSON.parse(artistInfoJSONString);
// console.log(artistInfoDict);
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
let songListJSONString = /<textarea id="song-list-pre-data" style="display:none;">(.*?)<\/textarea>/.exec(html)[1];
let songList = JSON.parse(songListJSONString);
let songIds = songList.map(song => song.id);
let artistInfo = {
artistId: artistId,
title: artistInfoDict.title,
image: image,
description: artistInfoDict.description,
pubDate: artistInfoDict.pubDate,
songIds: songIds,
};
// console.log("artistInfo", artistInfo);
dbUtils.query('INSERT IGNORE INTO artist SET ?', {
artist_id: artistInfo.artistId,
title: artistInfo.title,
description: artistInfo.description,
image: artistInfo.image,
pub_date: artistInfo.pubDate,
});
songIds.forEach(function (songId) {
if (isNaN(Number(songId)) || Number(songId) === 0 || isNaN(Number(artistId)) || Number(artistId) === 0)
return;
dbUtils.query('INSERT IGNORE INTO song_artist_relation SET ?', {
song_id: songId,
artist_id: artistId,
});
});
return artistInfo;
}
// 获取专辑详情
async function getAlbumInfo({ albumId }) {
let result = await dbUtils.query('SELECT count(*) as count FROM album WHERE album_id = ?', [albumId]);
if (result[0].count > 0) {
console.log(`数据库中已有数据,跳过 albumId: ${albumId}`);
return;
// let albumResult = await dbUtils.query('SELECT * FROM album WHERE album_id = ?', [albumId]);
// albumResult = JSON.parse(JSON.stringify(albumResult));
// let songAlbumResult = await dbUtils.query('SELECT * FROM song_album_relation WHERE album_id = ?', [albumId]);
// songAlbumResult = JSON.parse(JSON.stringify(songAlbumResult));
// albumResult.songIds = songAlbumResult.map(song => song.song_id);
// // console.log(albumResult);
// return albumResult;
}
let url = `https://music.163.com/album?id=${albumId}`;
try {
// var html = fs.readFileSync(path.join(__dirname, "../temp", `album-${albumId}.html`), 'utf8');
var html = await requestUtils.getApiResult(url);
// fs.writeFileSync(path.join(__dirname, "../temp", `album-${albumId}.html`), html);
} catch (errors) {
console.error(errors);
return;
}
// console.log(html);
// 正则匹配
let regExResult = /\<script type\=\"application\/ld\+json\"\>([\S\s]*?)\<\/script\>/.exec(html);
let albumInfoJSONString = regExResult[1];
let albumInfoDict = JSON.parse(albumInfoJSONString);
// console.log(albumInfoDict);
let company = null;
try {
company = /<p class="intr"><b>发行公司:<\/b>\n(.*?)\n<\/p>/.exec(html)[1];
} catch (e) {
}
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
let songListJSONString = /<textarea id="song-list-pre-data" style="display:none;">(.*?)<\/textarea>/.exec(html)[1];
let songList = JSON.parse(songListJSONString);
let songIds = songList.map(song => song.id);
let albumInfo = {
albumId: albumId,
title: albumInfoDict.title,
image: image,
description: albumInfoDict.description,
pubDate: albumInfoDict.pubDate,
company: company,
songIds: songIds,
};
// console.log("albumInfo", albumInfo);
dbUtils.query('INSERT IGNORE INTO album SET ?', {
album_id: albumInfo.albumId,
title: albumInfo.title,
description: albumInfo.description,
image: albumInfo.image,
pub_date: albumInfo.pubDate,
company: albumInfo.company,
});
songIds.forEach(function (songId) {
if (isNaN(Number(songId)) || Number(songId) === 0 || isNaN(Number(albumId)) || Number(songId) === 0)
return;
dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', {
song_id: songId,
album_id: albumId,
});
});
return albumInfo;
}
// // 获取音乐人详情
// async function getUserInfo({ userId }) {
// let url = `https://music.163.com/user/home?id=${userId}`;
// try {
// var html = fs.readFileSync(path.join(__dirname, "../temp", ` user-${userId}.html`), 'utf8');
// } catch (errors) {
// var html = await requestUtils.getApiResult(url);
// fs.writeFileSync(path.join(__dirname, "../temp", ` user-${userId}.html`), html);
// }
// // console.log(html);
// }
module.exports = {
main: main,
}