diff --git a/netease_music/index.js b/netease_music/index.js index ea528d1..ced6442 100644 --- a/netease_music/index.js +++ b/netease_music/index.js @@ -25,59 +25,123 @@ async function main() { // getUserInfo({ userId: "37365202" }); // getUserInfo({ userId: "29879272" }); - await startGetMusic({ songId: "1966061035" }); + while (true) { + await startGetMusic(100); + } } -async function startGetMusic({ songId }) { - var songInfo = await getMusicInfo({ songId: songId }); +/* +DELETE FROM song_artist_relation WHERE song_id = 0 OR artist_id = 0 +-- DELETE FROM song_album_relation WHERE song_id = 0 OR album_id = 0 + */ - var albumInfo = await getAlbumInfo({ albumId: songInfo.albumId }); - if (albumInfo) { - for (var songId of albumInfo.songIds) { - await startGetMusic({ songId: songId }); +async function startGetMusic(sleepTime) { + + // 从数据库中查出还缺少的歌曲,并进行爬取 + console.log("start fetching songs ..."); + let songIds = await dbUtils.query(` + SELECT DISTINCT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) + UNION + SELECT DISTINCT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) + `, []); + songIds = songIds.map(item => item.song_id); + for (let i = 0; i < songIds.length; i++) { + const songId = songIds[i]; + console.log(`${i}/${songIds.length} | song: ${songId} | ${await statistics()}`); + await getMusicInfo({ songId: songId }); + await sleepUtils.sleep(sleepTime); + if (fs.readFileSync('stop.txt') == "1") { + throw new Error(`Stopped`); } } - for (var artistId of songInfo.artistIds) { - var artistInfo = await getArtistInfo({ artistId: artistId }); - if (artistInfo) { - for (var songId of artistInfo.songIds) { - await startGetMusic({ songId: songId }); - } + // 从数据库中查出还缺少的专辑,并进行爬取 + console.log("start fetching albums ...") + let albumIds = await dbUtils.query(` + SELECT DISTINCT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT DISTINCT album_id FROM album ) + `, []); + albumIds = albumIds.map(item => item.album_id); + for (let i = 0; i < albumIds.length; i++) { + const albumId = albumIds[i]; + console.log(`${i}/${albumIds.length} | album: ${albumId} | ${await statistics()}`); + await getAlbumInfo({ albumId: albumId }); + await sleepUtils.sleep(sleepTime); + if (fs.readFileSync('stop.txt') == "1") { + throw new Error(`Stopped`); } } + + // 从数据库中查出还缺少的歌手,并进行爬取 + console.log("start fetching albums ...") + let artistIds = await dbUtils.query(` + SELECT DISTINCT artist_id FROM song_artist_relation WHERE artist_id NOT IN ( SELECT DISTINCT artist_id FROM artist ) + `, []); + artistIds = artistIds.map(item => item.artist_id); + for (let i = 0; i < artistIds.length; i++) { + const artistId = artistIds[i]; + console.log(`${i}/${artistIds.length} | artist: ${artistId} | ${await statistics()}`); + await getArtistInfo({ artistId: artistId }); + await sleepUtils.sleep(sleepTime); + if (fs.readFileSync('stop.txt') == "1") { + throw new Error(`Stopped`); + } + } +} + +async function statistics() { + let sql = ` + SELECT + song_count, + album_count, + artist_count, + song_album_count, + song_artist_count + FROM + ( SELECT count(*) AS song_count FROM song ) t1, + ( SELECT count(*) AS album_count FROM album ) t2, + ( SELECT count(*) AS artist_count FROM artist ) t3, + ( SELECT count(*) AS song_album_count FROM song_album_relation ) t4, + ( SELECT count(*) AS song_artist_count FROM song_artist_relation ) t5`; + let result = await dbUtils.query(sql, []); + let songCount = result[0].song_count; + let albumCount = result[0].album_count; + let artistCount = result[0].artist_count; + let songAlbumCount = result[0].song_album_count; + let songArtistCount = result[0].song_artist_count; + return `song: ${songCount}, album: ${albumCount}, artist: ${artistCount} | songAlbum: ${songAlbumCount}, songArtist: ${songArtistCount}`; } // 获取音乐详情 async function getMusicInfo({ songId }) { - console.log(`开始处理 song: ${songId}`); let result = await dbUtils.query('SELECT count(*) as count FROM song WHERE song_id = ?', [songId]); if (result[0].count > 0) { console.log(`数据库中已有数据,跳过 songId: ${songId}`); - let songResult = await dbUtils.query('SELECT * FROM song WHERE song_id = ?', [songId]); - songResult = JSON.parse(JSON.stringify(songResult)); + return; - let songArtistResult = await dbUtils.query('SELECT * FROM song_artist_relation WHERE song_id = ?', [songId]); - songArtistResult = JSON.parse(JSON.stringify(songArtistResult)); - songResult.artistIds = songArtistResult.map(song => song.artist_id); + // let songResult = await dbUtils.query('SELECT * FROM song WHERE song_id = ?', [songId]); + // songResult = JSON.parse(JSON.stringify(songResult)); - let songAlbumResult = await dbUtils.query('SELECT * FROM song_album_relation WHERE song_id = ?', [songId]); - songAlbumResult = JSON.parse(JSON.stringify(songAlbumResult)); - songResult.albumId = songAlbumResult.map(song => song.album_id)[0]; + // let songArtistResult = await dbUtils.query('SELECT * FROM song_artist_relation WHERE song_id = ?', [songId]); + // songArtistResult = JSON.parse(JSON.stringify(songArtistResult)); + // songResult.artistIds = songArtistResult.map(song => song.artist_id); - // console.log(songResult); - return songResult; + // let songAlbumResult = await dbUtils.query('SELECT * FROM song_album_relation WHERE song_id = ?', [songId]); + // songAlbumResult = JSON.parse(JSON.stringify(songAlbumResult)); + // songResult.albumId = songAlbumResult.map(song => song.album_id)[0]; + + // // console.log(songResult); + // return songResult; } - await sleepUtils.sleep(500); let url = `https://music.163.com/song?id=${songId}`; try { - throw new Error(`Error`); - var html = fs.readFileSync(path.join(__dirname, "../temp", `song-${songId}.html`), 'utf8'); - } catch (errors) { + // var html = fs.readFileSync(path.join(__dirname, "../temp", `song-${songId}.html`), 'utf8'); var html = await requestUtils.getApiResult(url); fs.writeFileSync(path.join(__dirname, "../temp", `song-${songId}.html`), html); + } catch (errors) { + console.error(errors); + return; } // console.log(html); @@ -90,9 +154,14 @@ async function getMusicInfo({ songId }) { let title = //.exec(html)[1]; let image = //.exec(html)[1]; let artist = //.exec(html)[1]; - let album = //.exec(html)[1]; - let albumId = //.exec(html)[1]; let duration = //.exec(html)[1]; + try { + var album = //.exec(html)[1]; + var albumId = //.exec(html)[1]; + } catch (err) { + // 歌曲不在专辑中 + } + const reg = //g; let artistIds = []; @@ -108,8 +177,8 @@ async function getMusicInfo({ songId }) { pubDate: songInfoDict.pubDate, artist: artist, artistIds: artistIds, - album: album, - albumId: albumId, + album: album || null, + albumId: albumId || null, duration: duration, }; // console.log("songInfo", songInfo); @@ -119,10 +188,11 @@ async function getMusicInfo({ songId }) { image: songInfo.image, pub_date: songInfo.pubDate, }); - dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', { - song_id: songInfo.songId, - album_id: songInfo.albumId, - }); + if (albumId != null) + dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', { + song_id: songInfo.songId, + album_id: songInfo.albumId, + }); artistIds.forEach(function (artistId) { dbUtils.query('INSERT IGNORE INTO song_artist_relation SET ?', { song_id: songInfo.songId, @@ -134,10 +204,11 @@ async function getMusicInfo({ songId }) { // 获取音乐人详情 async function getArtistInfo({ artistId }) { - console.log(`开始处理 artist: ${artistId}`); let result = await dbUtils.query('SELECT count(*) as count FROM artist WHERE artist_id = ?', [artistId]); if (result[0].count > 0) { console.log(`数据库中已有数据,跳过 artistId: ${artistId}`); + return; + // // let artistResult = await dbUtils.query('SELECT * FROM artist LEFT JOIN song_artist_relation ON artist.artist_id = song_artist_relation.artist_id WHERE artist.artist_id = ?', [artistId]); // let artistResult = await dbUtils.query('SELECT * FROM artist WHERE artist_id = ?', [artistId]); // artistResult = JSON.parse(JSON.stringify(artistResult)); @@ -146,18 +217,17 @@ async function getArtistInfo({ artistId }) { // artistResult.songIds = songArtistResult.map(song => song.song_id); // // console.log(artistResult); // return artistResult; - return null; } - await sleepUtils.sleep(500); let url = `https://music.163.com/artist?id=${artistId}`; try { - throw new Error(`Error`); - var html = fs.readFileSync(path.join(__dirname, "../temp", `artist-${artistId}.html`), 'utf8'); - } catch (errors) { + // var html = fs.readFileSync(path.join(__dirname, "../temp", `artist-${artistId}.html`), 'utf8'); var html = await requestUtils.getApiResult(url); fs.writeFileSync(path.join(__dirname, "../temp", `artist-${artistId}.html`), html); + } catch (errors) { + console.error(errors); + return; } // console.log(html); @@ -189,6 +259,8 @@ async function getArtistInfo({ artistId }) { pub_date: artistInfo.pubDate, }); songIds.forEach(function (songId) { + if (isNaN(Number(songId)) || Number(songId) === 0 || isNaN(Number(artistId)) || Number(artistId) === 0) + return; dbUtils.query('INSERT IGNORE INTO song_artist_relation SET ?', { song_id: songId, artist_id: artistId, @@ -199,10 +271,11 @@ async function getArtistInfo({ artistId }) { // 获取专辑详情 async function getAlbumInfo({ albumId }) { - console.log(`开始处理 album: ${albumId}`); let result = await dbUtils.query('SELECT count(*) as count FROM album WHERE album_id = ?', [albumId]); if (result[0].count > 0) { console.log(`数据库中已有数据,跳过 albumId: ${albumId}`); + return; + // let albumResult = await dbUtils.query('SELECT * FROM album WHERE album_id = ?', [albumId]); // albumResult = JSON.parse(JSON.stringify(albumResult)); // let songAlbumResult = await dbUtils.query('SELECT * FROM song_album_relation WHERE album_id = ?', [albumId]); @@ -210,18 +283,17 @@ async function getAlbumInfo({ albumId }) { // albumResult.songIds = songAlbumResult.map(song => song.song_id); // // console.log(albumResult); // return albumResult; - return null; } - await sleepUtils.sleep(500); let url = `https://music.163.com/album?id=${albumId}`; try { - throw new Error(`Error`); - var html = fs.readFileSync(path.join(__dirname, "../temp", `album-${albumId}.html`), 'utf8'); - } catch (errors) { + // var html = fs.readFileSync(path.join(__dirname, "../temp", `album-${albumId}.html`), 'utf8'); var html = await requestUtils.getApiResult(url); fs.writeFileSync(path.join(__dirname, "../temp", `album-${albumId}.html`), html); + } catch (errors) { + console.error(errors); + return; } // console.log(html); @@ -261,6 +333,8 @@ async function getAlbumInfo({ albumId }) { company: albumInfo.company, }); songIds.forEach(function (songId) { + if (isNaN(Number(songId)) || Number(songId) === 0 || isNaN(Number(albumId)) || Number(songId) === 0) + return; dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', { song_id: songId, album_id: albumId, diff --git a/stop.txt b/stop.txt new file mode 100644 index 0000000..c227083 --- /dev/null +++ b/stop.txt @@ -0,0 +1 @@ +0 \ No newline at end of file