diff --git a/netease_music/sql/export/export_sql_generator.js b/netease_music/sql/export/export_sql_generator.js index cff4d11..1d2b76c 100644 --- a/netease_music/sql/export/export_sql_generator.js +++ b/netease_music/sql/export/export_sql_generator.js @@ -76,20 +76,31 @@ function fill(num, fillers, length) { // ############################################# -// 使用 mysqldump 分块导出数据表 -let rangeTxtName = "song"; // 分布区间 "song" "album" "artist" "user" -const fieldName = `song_id`; -var a = fs.readFileSync(path.join(__dirname, `distribution_range/${rangeTxtName}.txt`), "utf-8").trim().split("\n").reverse().map(i => i.trim()); -// console.log(a); -let outputArr = [`@echo off`, `D:`, `cd D:/Program/Development/Environment/phpstudy_pro/Extensions/MySQL8.0.12/bin`]; +// // 使用 mysqldump 分块导出数据表 +// let rangeTxtName = "song"; // 分布区间 "song" "album" "artist" "user" +// const fieldName = `song_id`; +// var a = fs.readFileSync(path.join(__dirname, `distribution_range/${rangeTxtName}.txt`), "utf-8").trim().split("\n").reverse().map(i => i.trim()); +// // console.log(a); +// let outputArr = [`@echo off`, `D:`, `cd D:/Program/Development/Environment/phpstudy_pro/Extensions/MySQL8.0.12/bin`]; +// for (let i = 0; i < a.length; i++) { +// let where; +// if (a[i + 1]) { +// where = `${fieldName}>=${a[i]} and ${fieldName}<${a[i + 1]}`; +// } else { +// where = `${fieldName}>=${a[i]}`; +// } +// outputArr.push(`INSERT INTO song SELECT * FROM song_old WHERE ${where}; -- ${i}`); +// } +// outputArr.push("echo done."); +// console.log(outputArr.join('\n')); + +// ############################################# + +let outputArr = []; +var a = fs.readFileSync(path.join(__dirname, `distribution_range/user.txt`), "utf-8").trim().split("\n").reverse().map(i => i.trim()); +// a = a.filter((val, index) => index % 15 == 0); // 抽掉一些边界 不然SQL太多了 for (let i = 0; i < a.length; i++) { - let where; - if (a[i + 1]) { - where = `${fieldName}>=${a[i]} and ${fieldName}<${a[i + 1]}`; - } else { - where = `${fieldName}>=${a[i]}`; - } - outputArr.push(`INSERT INTO song SELECT * FROM song_old WHERE ${where}; -- ${i}`); + outputArr.push(`start cmd /k "node index --utils comment --min ${a[i]} --max ${a[i + 1]} --limit 10000";`); } outputArr.push("echo done."); console.log(outputArr.join('\n')); diff --git a/netease_music/src/getInfo/albumInfoUtils.js b/netease_music/src/getInfo/albumInfoUtils.js index fe2ce75..0a53138 100644 --- a/netease_music/src/getInfo/albumInfoUtils.js +++ b/netease_music/src/getInfo/albumInfoUtils.js @@ -30,6 +30,7 @@ SELECT * FROM album WHERE (full_description = '' or full_description is null) an async function fetchAll({ args = {}, isUpdate = false }) { console.log("start fetching albums ..."); let albumIds = await dataManager.album.getIdsToFetch(args, isUpdate); + console.log(`albumIds was fetched, count: ${albumIds.length}`); for (let i = 0; i < albumIds.length; i++) { await global.checkIsExit(); const albumId = albumIds[i]; diff --git a/netease_music/src/getInfo/artistInfoUtils.js b/netease_music/src/getInfo/artistInfoUtils.js index 43fb588..59f5d19 100644 --- a/netease_music/src/getInfo/artistInfoUtils.js +++ b/netease_music/src/getInfo/artistInfoUtils.js @@ -26,6 +26,7 @@ async function getFromDatabase({ artistId }) { async function fetchAll({ args = {} }) { console.log("start fetching artists ..."); let artistIds = await dataManager.artist.getIdsToFetch(args); + console.log(`artistIds was fetched, count: ${artistIds.length}`); for (let i = 0; i < artistIds.length; i++) { await global.checkIsExit(); const artistId = artistIds[i]; diff --git a/netease_music/src/getInfo/commentUtils.js b/netease_music/src/getInfo/commentUtils.js index 592c9f3..e219fb2 100644 --- a/netease_music/src/getInfo/commentUtils.js +++ b/netease_music/src/getInfo/commentUtils.js @@ -20,6 +20,7 @@ async function fetchAll({ args = {} }) { // SELECT song_id FROM wait_fetch_comment WHERE song_id NOT IN ( SELECT song_id FROM comment_progress ) // `, []); let songIds = await dataManager.comment.getIdsToFetch(args); + console.log(`songIds was fetched, count: ${songIds.length}`); for (let i = 0; i < songIds.length; i++) { await global.checkIsExit(); const songId = songIds[i]; diff --git a/netease_music/src/getInfo/lyricInfoUtils.js b/netease_music/src/getInfo/lyricInfoUtils.js index 3fb09e7..acafa7b 100644 --- a/netease_music/src/getInfo/lyricInfoUtils.js +++ b/netease_music/src/getInfo/lyricInfoUtils.js @@ -11,6 +11,7 @@ const dbUtils = global.dbUtils; async function fetchAll({ args = {} }) { console.log("start fetching lyrics ..."); let songIds = await dataManager.lyric.getIdsToFetch(args); + console.log(`songIds was fetched, count: ${songIds.length}`); for (let i = 0; i < songIds.length; i++) { await global.checkIsExit(); const songId = songIds[i]; diff --git a/netease_music/src/getInfo/songInfoUtils.js b/netease_music/src/getInfo/songInfoUtils.js index b0733f8..a3858b0 100644 --- a/netease_music/src/getInfo/songInfoUtils.js +++ b/netease_music/src/getInfo/songInfoUtils.js @@ -14,6 +14,7 @@ const { song_detail } = require('NeteaseCloudMusicApi'); async function fetchAll({ args = {} }) { console.log("start fetching songs ..."); let songIds = await dataManager.song.getIdsToFetch(args); + console.log(`songIds was fetched, count: ${songIds.length}`); // 0 - 100, 200 - 399, 400 - ..., ... - songIds.length-1 // 0 1 2 count-1 var step = 1000; @@ -23,6 +24,10 @@ async function fetchAll({ args = {} }) { await global.checkIsExit(); var subArray = songIds.slice(i * step, (i + 1) * step); console.log(`${i + 1}/${count} | song: ${fill(subArray[0], 10)}-${fill(subArray.slice(-1)[0], 10)} ${fill(`(${subArray.length})`, 6, ' ', true)} | ${args.min || "?"}-${args.max || "?"}`); + if (subArray.length < 800) { + console.log("小于800首歌,等待凑够800首歌下次一起爬取"); + return; + } try { await fetch({ songIdArray: subArray }); } catch (err) {