update
This commit is contained in:
parent
e835580358
commit
b85653ec72
@ -25,59 +25,123 @@ async function main() {
|
||||
// getUserInfo({ userId: "37365202" });
|
||||
// getUserInfo({ userId: "29879272" });
|
||||
|
||||
await startGetMusic({ songId: "1966061035" });
|
||||
while (true) {
|
||||
await startGetMusic(100);
|
||||
}
|
||||
}
|
||||
|
||||
async function startGetMusic({ songId }) {
|
||||
var songInfo = await getMusicInfo({ songId: songId });
|
||||
/*
|
||||
DELETE FROM song_artist_relation WHERE song_id = 0 OR artist_id = 0
|
||||
-- DELETE FROM song_album_relation WHERE song_id = 0 OR album_id = 0
|
||||
*/
|
||||
|
||||
var albumInfo = await getAlbumInfo({ albumId: songInfo.albumId });
|
||||
if (albumInfo) {
|
||||
for (var songId of albumInfo.songIds) {
|
||||
await startGetMusic({ songId: songId });
|
||||
async function startGetMusic(sleepTime) {
|
||||
|
||||
// 从数据库中查出还缺少的歌曲,并进行爬取
|
||||
console.log("start fetching songs ...");
|
||||
let songIds = await dbUtils.query(`
|
||||
SELECT DISTINCT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song )
|
||||
UNION
|
||||
SELECT DISTINCT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song )
|
||||
`, []);
|
||||
songIds = songIds.map(item => item.song_id);
|
||||
for (let i = 0; i < songIds.length; i++) {
|
||||
const songId = songIds[i];
|
||||
console.log(`${i}/${songIds.length} | song: ${songId} | ${await statistics()}`);
|
||||
await getMusicInfo({ songId: songId });
|
||||
await sleepUtils.sleep(sleepTime);
|
||||
if (fs.readFileSync('stop.txt') == "1") {
|
||||
throw new Error(`Stopped`);
|
||||
}
|
||||
}
|
||||
|
||||
for (var artistId of songInfo.artistIds) {
|
||||
var artistInfo = await getArtistInfo({ artistId: artistId });
|
||||
if (artistInfo) {
|
||||
for (var songId of artistInfo.songIds) {
|
||||
await startGetMusic({ songId: songId });
|
||||
}
|
||||
// 从数据库中查出还缺少的专辑,并进行爬取
|
||||
console.log("start fetching albums ...")
|
||||
let albumIds = await dbUtils.query(`
|
||||
SELECT DISTINCT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT DISTINCT album_id FROM album )
|
||||
`, []);
|
||||
albumIds = albumIds.map(item => item.album_id);
|
||||
for (let i = 0; i < albumIds.length; i++) {
|
||||
const albumId = albumIds[i];
|
||||
console.log(`${i}/${albumIds.length} | album: ${albumId} | ${await statistics()}`);
|
||||
await getAlbumInfo({ albumId: albumId });
|
||||
await sleepUtils.sleep(sleepTime);
|
||||
if (fs.readFileSync('stop.txt') == "1") {
|
||||
throw new Error(`Stopped`);
|
||||
}
|
||||
}
|
||||
|
||||
// 从数据库中查出还缺少的歌手,并进行爬取
|
||||
console.log("start fetching albums ...")
|
||||
let artistIds = await dbUtils.query(`
|
||||
SELECT DISTINCT artist_id FROM song_artist_relation WHERE artist_id NOT IN ( SELECT DISTINCT artist_id FROM artist )
|
||||
`, []);
|
||||
artistIds = artistIds.map(item => item.artist_id);
|
||||
for (let i = 0; i < artistIds.length; i++) {
|
||||
const artistId = artistIds[i];
|
||||
console.log(`${i}/${artistIds.length} | artist: ${artistId} | ${await statistics()}`);
|
||||
await getArtistInfo({ artistId: artistId });
|
||||
await sleepUtils.sleep(sleepTime);
|
||||
if (fs.readFileSync('stop.txt') == "1") {
|
||||
throw new Error(`Stopped`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function statistics() {
|
||||
let sql = `
|
||||
SELECT
|
||||
song_count,
|
||||
album_count,
|
||||
artist_count,
|
||||
song_album_count,
|
||||
song_artist_count
|
||||
FROM
|
||||
( SELECT count(*) AS song_count FROM song ) t1,
|
||||
( SELECT count(*) AS album_count FROM album ) t2,
|
||||
( SELECT count(*) AS artist_count FROM artist ) t3,
|
||||
( SELECT count(*) AS song_album_count FROM song_album_relation ) t4,
|
||||
( SELECT count(*) AS song_artist_count FROM song_artist_relation ) t5`;
|
||||
let result = await dbUtils.query(sql, []);
|
||||
let songCount = result[0].song_count;
|
||||
let albumCount = result[0].album_count;
|
||||
let artistCount = result[0].artist_count;
|
||||
let songAlbumCount = result[0].song_album_count;
|
||||
let songArtistCount = result[0].song_artist_count;
|
||||
return `song: ${songCount}, album: ${albumCount}, artist: ${artistCount} | songAlbum: ${songAlbumCount}, songArtist: ${songArtistCount}`;
|
||||
}
|
||||
|
||||
// 获取音乐详情
|
||||
async function getMusicInfo({ songId }) {
|
||||
console.log(`开始处理 song: ${songId}`);
|
||||
let result = await dbUtils.query('SELECT count(*) as count FROM song WHERE song_id = ?', [songId]);
|
||||
if (result[0].count > 0) {
|
||||
console.log(`数据库中已有数据,跳过 songId: ${songId}`);
|
||||
let songResult = await dbUtils.query('SELECT * FROM song WHERE song_id = ?', [songId]);
|
||||
songResult = JSON.parse(JSON.stringify(songResult));
|
||||
return;
|
||||
|
||||
let songArtistResult = await dbUtils.query('SELECT * FROM song_artist_relation WHERE song_id = ?', [songId]);
|
||||
songArtistResult = JSON.parse(JSON.stringify(songArtistResult));
|
||||
songResult.artistIds = songArtistResult.map(song => song.artist_id);
|
||||
// let songResult = await dbUtils.query('SELECT * FROM song WHERE song_id = ?', [songId]);
|
||||
// songResult = JSON.parse(JSON.stringify(songResult));
|
||||
|
||||
let songAlbumResult = await dbUtils.query('SELECT * FROM song_album_relation WHERE song_id = ?', [songId]);
|
||||
songAlbumResult = JSON.parse(JSON.stringify(songAlbumResult));
|
||||
songResult.albumId = songAlbumResult.map(song => song.album_id)[0];
|
||||
// let songArtistResult = await dbUtils.query('SELECT * FROM song_artist_relation WHERE song_id = ?', [songId]);
|
||||
// songArtistResult = JSON.parse(JSON.stringify(songArtistResult));
|
||||
// songResult.artistIds = songArtistResult.map(song => song.artist_id);
|
||||
|
||||
// console.log(songResult);
|
||||
return songResult;
|
||||
// let songAlbumResult = await dbUtils.query('SELECT * FROM song_album_relation WHERE song_id = ?', [songId]);
|
||||
// songAlbumResult = JSON.parse(JSON.stringify(songAlbumResult));
|
||||
// songResult.albumId = songAlbumResult.map(song => song.album_id)[0];
|
||||
|
||||
// // console.log(songResult);
|
||||
// return songResult;
|
||||
}
|
||||
await sleepUtils.sleep(500);
|
||||
|
||||
let url = `https://music.163.com/song?id=${songId}`;
|
||||
|
||||
try {
|
||||
throw new Error(`Error`);
|
||||
var html = fs.readFileSync(path.join(__dirname, "../temp", `song-${songId}.html`), 'utf8');
|
||||
} catch (errors) {
|
||||
// var html = fs.readFileSync(path.join(__dirname, "../temp", `song-${songId}.html`), 'utf8');
|
||||
var html = await requestUtils.getApiResult(url);
|
||||
fs.writeFileSync(path.join(__dirname, "../temp", `song-${songId}.html`), html);
|
||||
} catch (errors) {
|
||||
console.error(errors);
|
||||
return;
|
||||
}
|
||||
// console.log(html);
|
||||
|
||||
@ -90,9 +154,14 @@ async function getMusicInfo({ songId }) {
|
||||
let title = /<meta property="og:title" content="(.*?)" \/>/.exec(html)[1];
|
||||
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
|
||||
let artist = /<meta property="og:music:artist" content="(.*?)" \/>/.exec(html)[1];
|
||||
let album = /<meta property="og:music:album" content="(.*?)"\/>/.exec(html)[1];
|
||||
let albumId = /<meta property="music:album" content="https:\/\/music\.163\.com\/album\?id=(.*?)"\/>/.exec(html)[1];
|
||||
let duration = /<meta property="music:duration" content="(.*?)"\/>/.exec(html)[1];
|
||||
try {
|
||||
var album = /<meta property="og:music:album" content="(.*?)"\/>/.exec(html)[1];
|
||||
var albumId = /<meta property="music:album" content="https:\/\/music\.163\.com\/album\?id=(.*?)"\/>/.exec(html)[1];
|
||||
} catch (err) {
|
||||
// 歌曲不在专辑中
|
||||
}
|
||||
|
||||
|
||||
const reg = /<meta property="music:musician" content="https:\/\/music\.163\.com\/artist\?id=(.*?)"\/>/g;
|
||||
let artistIds = [];
|
||||
@ -108,8 +177,8 @@ async function getMusicInfo({ songId }) {
|
||||
pubDate: songInfoDict.pubDate,
|
||||
artist: artist,
|
||||
artistIds: artistIds,
|
||||
album: album,
|
||||
albumId: albumId,
|
||||
album: album || null,
|
||||
albumId: albumId || null,
|
||||
duration: duration,
|
||||
};
|
||||
// console.log("songInfo", songInfo);
|
||||
@ -119,10 +188,11 @@ async function getMusicInfo({ songId }) {
|
||||
image: songInfo.image,
|
||||
pub_date: songInfo.pubDate,
|
||||
});
|
||||
dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', {
|
||||
song_id: songInfo.songId,
|
||||
album_id: songInfo.albumId,
|
||||
});
|
||||
if (albumId != null)
|
||||
dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', {
|
||||
song_id: songInfo.songId,
|
||||
album_id: songInfo.albumId,
|
||||
});
|
||||
artistIds.forEach(function (artistId) {
|
||||
dbUtils.query('INSERT IGNORE INTO song_artist_relation SET ?', {
|
||||
song_id: songInfo.songId,
|
||||
@ -134,10 +204,11 @@ async function getMusicInfo({ songId }) {
|
||||
|
||||
// 获取音乐人详情
|
||||
async function getArtistInfo({ artistId }) {
|
||||
console.log(`开始处理 artist: ${artistId}`);
|
||||
let result = await dbUtils.query('SELECT count(*) as count FROM artist WHERE artist_id = ?', [artistId]);
|
||||
if (result[0].count > 0) {
|
||||
console.log(`数据库中已有数据,跳过 artistId: ${artistId}`);
|
||||
return;
|
||||
|
||||
// // let artistResult = await dbUtils.query('SELECT * FROM artist LEFT JOIN song_artist_relation ON artist.artist_id = song_artist_relation.artist_id WHERE artist.artist_id = ?', [artistId]);
|
||||
// let artistResult = await dbUtils.query('SELECT * FROM artist WHERE artist_id = ?', [artistId]);
|
||||
// artistResult = JSON.parse(JSON.stringify(artistResult));
|
||||
@ -146,18 +217,17 @@ async function getArtistInfo({ artistId }) {
|
||||
// artistResult.songIds = songArtistResult.map(song => song.song_id);
|
||||
// // console.log(artistResult);
|
||||
// return artistResult;
|
||||
return null;
|
||||
}
|
||||
await sleepUtils.sleep(500);
|
||||
|
||||
let url = `https://music.163.com/artist?id=${artistId}`;
|
||||
|
||||
try {
|
||||
throw new Error(`Error`);
|
||||
var html = fs.readFileSync(path.join(__dirname, "../temp", `artist-${artistId}.html`), 'utf8');
|
||||
} catch (errors) {
|
||||
// var html = fs.readFileSync(path.join(__dirname, "../temp", `artist-${artistId}.html`), 'utf8');
|
||||
var html = await requestUtils.getApiResult(url);
|
||||
fs.writeFileSync(path.join(__dirname, "../temp", `artist-${artistId}.html`), html);
|
||||
} catch (errors) {
|
||||
console.error(errors);
|
||||
return;
|
||||
}
|
||||
// console.log(html);
|
||||
|
||||
@ -189,6 +259,8 @@ async function getArtistInfo({ artistId }) {
|
||||
pub_date: artistInfo.pubDate,
|
||||
});
|
||||
songIds.forEach(function (songId) {
|
||||
if (isNaN(Number(songId)) || Number(songId) === 0 || isNaN(Number(artistId)) || Number(artistId) === 0)
|
||||
return;
|
||||
dbUtils.query('INSERT IGNORE INTO song_artist_relation SET ?', {
|
||||
song_id: songId,
|
||||
artist_id: artistId,
|
||||
@ -199,10 +271,11 @@ async function getArtistInfo({ artistId }) {
|
||||
|
||||
// 获取专辑详情
|
||||
async function getAlbumInfo({ albumId }) {
|
||||
console.log(`开始处理 album: ${albumId}`);
|
||||
let result = await dbUtils.query('SELECT count(*) as count FROM album WHERE album_id = ?', [albumId]);
|
||||
if (result[0].count > 0) {
|
||||
console.log(`数据库中已有数据,跳过 albumId: ${albumId}`);
|
||||
return;
|
||||
|
||||
// let albumResult = await dbUtils.query('SELECT * FROM album WHERE album_id = ?', [albumId]);
|
||||
// albumResult = JSON.parse(JSON.stringify(albumResult));
|
||||
// let songAlbumResult = await dbUtils.query('SELECT * FROM song_album_relation WHERE album_id = ?', [albumId]);
|
||||
@ -210,18 +283,17 @@ async function getAlbumInfo({ albumId }) {
|
||||
// albumResult.songIds = songAlbumResult.map(song => song.song_id);
|
||||
// // console.log(albumResult);
|
||||
// return albumResult;
|
||||
return null;
|
||||
}
|
||||
await sleepUtils.sleep(500);
|
||||
|
||||
let url = `https://music.163.com/album?id=${albumId}`;
|
||||
|
||||
try {
|
||||
throw new Error(`Error`);
|
||||
var html = fs.readFileSync(path.join(__dirname, "../temp", `album-${albumId}.html`), 'utf8');
|
||||
} catch (errors) {
|
||||
// var html = fs.readFileSync(path.join(__dirname, "../temp", `album-${albumId}.html`), 'utf8');
|
||||
var html = await requestUtils.getApiResult(url);
|
||||
fs.writeFileSync(path.join(__dirname, "../temp", `album-${albumId}.html`), html);
|
||||
} catch (errors) {
|
||||
console.error(errors);
|
||||
return;
|
||||
}
|
||||
// console.log(html);
|
||||
|
||||
@ -261,6 +333,8 @@ async function getAlbumInfo({ albumId }) {
|
||||
company: albumInfo.company,
|
||||
});
|
||||
songIds.forEach(function (songId) {
|
||||
if (isNaN(Number(songId)) || Number(songId) === 0 || isNaN(Number(albumId)) || Number(songId) === 0)
|
||||
return;
|
||||
dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', {
|
||||
song_id: songId,
|
||||
album_id: albumId,
|
||||
|
Loading…
Reference in New Issue
Block a user