diff --git a/netease_music/index.js b/netease_music/index.js
index ea528d1..ced6442 100644
--- a/netease_music/index.js
+++ b/netease_music/index.js
@@ -25,59 +25,123 @@ async function main() {
// getUserInfo({ userId: "37365202" });
// getUserInfo({ userId: "29879272" });
- await startGetMusic({ songId: "1966061035" });
+ while (true) {
+ await startGetMusic(100);
+ }
}
-async function startGetMusic({ songId }) {
- var songInfo = await getMusicInfo({ songId: songId });
+/*
+DELETE FROM song_artist_relation WHERE song_id = 0 OR artist_id = 0
+-- DELETE FROM song_album_relation WHERE song_id = 0 OR album_id = 0
+ */
- var albumInfo = await getAlbumInfo({ albumId: songInfo.albumId });
- if (albumInfo) {
- for (var songId of albumInfo.songIds) {
- await startGetMusic({ songId: songId });
+async function startGetMusic(sleepTime) {
+
+ // 从数据库中查出还缺少的歌曲,并进行爬取
+ console.log("start fetching songs ...");
+ let songIds = await dbUtils.query(`
+ SELECT DISTINCT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song )
+ UNION
+ SELECT DISTINCT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song )
+ `, []);
+ songIds = songIds.map(item => item.song_id);
+ for (let i = 0; i < songIds.length; i++) {
+ const songId = songIds[i];
+ console.log(`${i}/${songIds.length} | song: ${songId} | ${await statistics()}`);
+ await getMusicInfo({ songId: songId });
+ await sleepUtils.sleep(sleepTime);
+ if (fs.readFileSync('stop.txt') == "1") {
+ throw new Error(`Stopped`);
}
}
- for (var artistId of songInfo.artistIds) {
- var artistInfo = await getArtistInfo({ artistId: artistId });
- if (artistInfo) {
- for (var songId of artistInfo.songIds) {
- await startGetMusic({ songId: songId });
- }
+ // 从数据库中查出还缺少的专辑,并进行爬取
+ console.log("start fetching albums ...")
+ let albumIds = await dbUtils.query(`
+ SELECT DISTINCT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT DISTINCT album_id FROM album )
+ `, []);
+ albumIds = albumIds.map(item => item.album_id);
+ for (let i = 0; i < albumIds.length; i++) {
+ const albumId = albumIds[i];
+ console.log(`${i}/${albumIds.length} | album: ${albumId} | ${await statistics()}`);
+ await getAlbumInfo({ albumId: albumId });
+ await sleepUtils.sleep(sleepTime);
+ if (fs.readFileSync('stop.txt') == "1") {
+ throw new Error(`Stopped`);
}
}
+
+ // 从数据库中查出还缺少的歌手,并进行爬取
+ console.log("start fetching albums ...")
+ let artistIds = await dbUtils.query(`
+ SELECT DISTINCT artist_id FROM song_artist_relation WHERE artist_id NOT IN ( SELECT DISTINCT artist_id FROM artist )
+ `, []);
+ artistIds = artistIds.map(item => item.artist_id);
+ for (let i = 0; i < artistIds.length; i++) {
+ const artistId = artistIds[i];
+ console.log(`${i}/${artistIds.length} | artist: ${artistId} | ${await statistics()}`);
+ await getArtistInfo({ artistId: artistId });
+ await sleepUtils.sleep(sleepTime);
+ if (fs.readFileSync('stop.txt') == "1") {
+ throw new Error(`Stopped`);
+ }
+ }
+}
+
+async function statistics() {
+ let sql = `
+ SELECT
+ song_count,
+ album_count,
+ artist_count,
+ song_album_count,
+ song_artist_count
+ FROM
+ ( SELECT count(*) AS song_count FROM song ) t1,
+ ( SELECT count(*) AS album_count FROM album ) t2,
+ ( SELECT count(*) AS artist_count FROM artist ) t3,
+ ( SELECT count(*) AS song_album_count FROM song_album_relation ) t4,
+ ( SELECT count(*) AS song_artist_count FROM song_artist_relation ) t5`;
+ let result = await dbUtils.query(sql, []);
+ let songCount = result[0].song_count;
+ let albumCount = result[0].album_count;
+ let artistCount = result[0].artist_count;
+ let songAlbumCount = result[0].song_album_count;
+ let songArtistCount = result[0].song_artist_count;
+ return `song: ${songCount}, album: ${albumCount}, artist: ${artistCount} | songAlbum: ${songAlbumCount}, songArtist: ${songArtistCount}`;
}
// 获取音乐详情
async function getMusicInfo({ songId }) {
- console.log(`开始处理 song: ${songId}`);
let result = await dbUtils.query('SELECT count(*) as count FROM song WHERE song_id = ?', [songId]);
if (result[0].count > 0) {
console.log(`数据库中已有数据,跳过 songId: ${songId}`);
- let songResult = await dbUtils.query('SELECT * FROM song WHERE song_id = ?', [songId]);
- songResult = JSON.parse(JSON.stringify(songResult));
+ return;
- let songArtistResult = await dbUtils.query('SELECT * FROM song_artist_relation WHERE song_id = ?', [songId]);
- songArtistResult = JSON.parse(JSON.stringify(songArtistResult));
- songResult.artistIds = songArtistResult.map(song => song.artist_id);
+ // let songResult = await dbUtils.query('SELECT * FROM song WHERE song_id = ?', [songId]);
+ // songResult = JSON.parse(JSON.stringify(songResult));
- let songAlbumResult = await dbUtils.query('SELECT * FROM song_album_relation WHERE song_id = ?', [songId]);
- songAlbumResult = JSON.parse(JSON.stringify(songAlbumResult));
- songResult.albumId = songAlbumResult.map(song => song.album_id)[0];
+ // let songArtistResult = await dbUtils.query('SELECT * FROM song_artist_relation WHERE song_id = ?', [songId]);
+ // songArtistResult = JSON.parse(JSON.stringify(songArtistResult));
+ // songResult.artistIds = songArtistResult.map(song => song.artist_id);
- // console.log(songResult);
- return songResult;
+ // let songAlbumResult = await dbUtils.query('SELECT * FROM song_album_relation WHERE song_id = ?', [songId]);
+ // songAlbumResult = JSON.parse(JSON.stringify(songAlbumResult));
+ // songResult.albumId = songAlbumResult.map(song => song.album_id)[0];
+
+ // // console.log(songResult);
+ // return songResult;
}
- await sleepUtils.sleep(500);
let url = `https://music.163.com/song?id=${songId}`;
try {
- throw new Error(`Error`);
- var html = fs.readFileSync(path.join(__dirname, "../temp", `song-${songId}.html`), 'utf8');
- } catch (errors) {
+ // var html = fs.readFileSync(path.join(__dirname, "../temp", `song-${songId}.html`), 'utf8');
var html = await requestUtils.getApiResult(url);
fs.writeFileSync(path.join(__dirname, "../temp", `song-${songId}.html`), html);
+ } catch (errors) {
+ console.error(errors);
+ return;
}
// console.log(html);
@@ -90,9 +154,14 @@ async function getMusicInfo({ songId }) {
let title = //.exec(html)[1];
let image = //.exec(html)[1];
let artist = //.exec(html)[1];
- let album = //.exec(html)[1];
- let albumId = //.exec(html)[1];
let duration = //.exec(html)[1];
+ try {
+ var album = //.exec(html)[1];
+ var albumId = //.exec(html)[1];
+ } catch (err) {
+ // 歌曲不在专辑中
+ }
+
const reg = //g;
let artistIds = [];
@@ -108,8 +177,8 @@ async function getMusicInfo({ songId }) {
pubDate: songInfoDict.pubDate,
artist: artist,
artistIds: artistIds,
- album: album,
- albumId: albumId,
+ album: album || null,
+ albumId: albumId || null,
duration: duration,
};
// console.log("songInfo", songInfo);
@@ -119,10 +188,11 @@ async function getMusicInfo({ songId }) {
image: songInfo.image,
pub_date: songInfo.pubDate,
});
- dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', {
- song_id: songInfo.songId,
- album_id: songInfo.albumId,
- });
+ if (albumId != null)
+ dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', {
+ song_id: songInfo.songId,
+ album_id: songInfo.albumId,
+ });
artistIds.forEach(function (artistId) {
dbUtils.query('INSERT IGNORE INTO song_artist_relation SET ?', {
song_id: songInfo.songId,
@@ -134,10 +204,11 @@ async function getMusicInfo({ songId }) {
// 获取音乐人详情
async function getArtistInfo({ artistId }) {
- console.log(`开始处理 artist: ${artistId}`);
let result = await dbUtils.query('SELECT count(*) as count FROM artist WHERE artist_id = ?', [artistId]);
if (result[0].count > 0) {
console.log(`数据库中已有数据,跳过 artistId: ${artistId}`);
+ return;
+
// // let artistResult = await dbUtils.query('SELECT * FROM artist LEFT JOIN song_artist_relation ON artist.artist_id = song_artist_relation.artist_id WHERE artist.artist_id = ?', [artistId]);
// let artistResult = await dbUtils.query('SELECT * FROM artist WHERE artist_id = ?', [artistId]);
// artistResult = JSON.parse(JSON.stringify(artistResult));
@@ -146,18 +217,17 @@ async function getArtistInfo({ artistId }) {
// artistResult.songIds = songArtistResult.map(song => song.song_id);
// // console.log(artistResult);
// return artistResult;
- return null;
}
- await sleepUtils.sleep(500);
let url = `https://music.163.com/artist?id=${artistId}`;
try {
- throw new Error(`Error`);
- var html = fs.readFileSync(path.join(__dirname, "../temp", `artist-${artistId}.html`), 'utf8');
- } catch (errors) {
+ // var html = fs.readFileSync(path.join(__dirname, "../temp", `artist-${artistId}.html`), 'utf8');
var html = await requestUtils.getApiResult(url);
fs.writeFileSync(path.join(__dirname, "../temp", `artist-${artistId}.html`), html);
+ } catch (errors) {
+ console.error(errors);
+ return;
}
// console.log(html);
@@ -189,6 +259,8 @@ async function getArtistInfo({ artistId }) {
pub_date: artistInfo.pubDate,
});
songIds.forEach(function (songId) {
+ if (isNaN(Number(songId)) || Number(songId) === 0 || isNaN(Number(artistId)) || Number(artistId) === 0)
+ return;
dbUtils.query('INSERT IGNORE INTO song_artist_relation SET ?', {
song_id: songId,
artist_id: artistId,
@@ -199,10 +271,11 @@ async function getArtistInfo({ artistId }) {
// 获取专辑详情
async function getAlbumInfo({ albumId }) {
- console.log(`开始处理 album: ${albumId}`);
let result = await dbUtils.query('SELECT count(*) as count FROM album WHERE album_id = ?', [albumId]);
if (result[0].count > 0) {
console.log(`数据库中已有数据,跳过 albumId: ${albumId}`);
+ return;
+
// let albumResult = await dbUtils.query('SELECT * FROM album WHERE album_id = ?', [albumId]);
// albumResult = JSON.parse(JSON.stringify(albumResult));
// let songAlbumResult = await dbUtils.query('SELECT * FROM song_album_relation WHERE album_id = ?', [albumId]);
@@ -210,18 +283,17 @@ async function getAlbumInfo({ albumId }) {
// albumResult.songIds = songAlbumResult.map(song => song.song_id);
// // console.log(albumResult);
// return albumResult;
- return null;
}
- await sleepUtils.sleep(500);
let url = `https://music.163.com/album?id=${albumId}`;
try {
- throw new Error(`Error`);
- var html = fs.readFileSync(path.join(__dirname, "../temp", `album-${albumId}.html`), 'utf8');
- } catch (errors) {
+ // var html = fs.readFileSync(path.join(__dirname, "../temp", `album-${albumId}.html`), 'utf8');
var html = await requestUtils.getApiResult(url);
fs.writeFileSync(path.join(__dirname, "../temp", `album-${albumId}.html`), html);
+ } catch (errors) {
+ console.error(errors);
+ return;
}
// console.log(html);
@@ -261,6 +333,8 @@ async function getAlbumInfo({ albumId }) {
company: albumInfo.company,
});
songIds.forEach(function (songId) {
+ if (isNaN(Number(songId)) || Number(songId) === 0 || isNaN(Number(albumId)) || Number(songId) === 0)
+ return;
dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', {
song_id: songId,
album_id: albumId,
diff --git a/stop.txt b/stop.txt
new file mode 100644
index 0000000..c227083
--- /dev/null
+++ b/stop.txt
@@ -0,0 +1 @@
+0
\ No newline at end of file