From ce20720c608c157d3a56f2fd2c031ab2ec0c58fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A8=8B=E5=BA=8F=E5=91=98=E5=B0=8F=E5=A2=A8?= <2291200076@qq.com> Date: Sat, 29 Oct 2022 00:16:36 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=20assistant=20=E5=8A=A9?= =?UTF-8?q?=E6=89=8B=EF=BC=9B=E5=85=B6=E4=BB=96=E8=B0=83=E6=95=B4=EF=BC=88?= =?UTF-8?q?=E5=A4=A7=E8=B0=83=E6=95=B4=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- hifini_music/index.js | 14 +- netease_music/index.js | 10 +- netease_music/sql/import/album_bat.txt | 301 ++++++++++++++++++ netease_music/sql/import/artist_bat.txt | 22 ++ .../sql/import/import_sql_generator.js | 22 +- netease_music/sql/import/lyric_bat.txt | 76 +++++ .../sql/import/song_artist_relation_bat.txt | 22 ++ netease_music/sql/import/song_bat.txt | 285 +++++++++++++++++ netease_music/sql/statistic.sql | 13 +- netease_music/sql/structure.sql | 5 + netease_music/src/assistantUtils.js | 59 ++++ netease_music/src/dataManager.js | 61 +++- netease_music/src/getInfo/albumInfoUtils.js | 9 + netease_music/src/getInfo/artistInfoUtils.js | 9 + netease_music/src/getInfo/commentUtils.js | 27 +- netease_music/src/getInfo/lyricInfoUtils.js | 19 +- netease_music/src/getInfo/songInfoUtils.js | 8 + netease_music/src/index.js | 8 +- netease_music/todo.txt | 7 +- utils/dbPoolUtils.js | 109 +++---- utils/requestUtils.js | 2 +- 21 files changed, 973 insertions(+), 115 deletions(-) create mode 100644 netease_music/sql/import/album_bat.txt create mode 100644 netease_music/sql/import/artist_bat.txt create mode 100644 netease_music/sql/import/lyric_bat.txt create mode 100644 netease_music/sql/import/song_artist_relation_bat.txt create mode 100644 netease_music/sql/import/song_bat.txt create mode 100644 netease_music/src/assistantUtils.js diff --git a/hifini_music/index.js b/hifini_music/index.js index 3964319..50ea658 100644 --- a/hifini_music/index.js +++ b/hifini_music/index.js @@ -75,17 +75,21 @@ async function startFetchDetail() { const threadId = idsToFetch[i]; console.log(`getDetail\t| ${i + 1}/${idsToFetch.length} | threadId: ${threadId}`); await getDetail(threadId); - await sleepUtils.sleep(1000); } } async function getDetail(threadId) { let url = `https://hifini.com/thread-${threadId}.htm`; - - // let html = fs.readFileSync("./1.html", "utf8"); - let html = await getApiResult(url); - // fs.writeFileSync("./1.html", html); + let html; + try { + // html = fs.readFileSync("./1.html", "utf8"); + html = await getApiResult(url, { timeout: 3000 }); + // fs.writeFileSync("./1.html", html); + } catch (e) { + console.error("请求失败,可能是请求超时", e); + return; + } // 解析到音乐信息 var matcher = /var ap4 = new APlayer\(([\S\s]*?)\);/.exec(html); diff --git a/netease_music/index.js b/netease_music/index.js index 2a959a1..962e3f8 100644 --- a/netease_music/index.js +++ b/netease_music/index.js @@ -21,12 +21,20 @@ args = { limit: Number(args.limit) || undefined, // 分区 partition: Number(args.partition) || undefined, - + // ################################# + // 两次请求之间等待时间 sleepTime: Number(args.sleepTime) || 100, + // 数据库 + database: args.database || "neteasemusic", } console.log("args:", args); +// 指定数据库名 +if (args.database != "neteasemusic") + console.log(`注意,当前连接的数据库 [${args.database}] 非业务数据库`); +global.database = args.database; + global.sleepTime = args.sleepTime; // 两次请求之间停顿时间 global.useMysqlPool = true; const neteaseMusic = require('./src/index'); diff --git a/netease_music/sql/import/album_bat.txt b/netease_music/sql/import/album_bat.txt new file mode 100644 index 0000000..bb4b03c --- /dev/null +++ b/netease_music/sql/import/album_bat.txt @@ -0,0 +1,301 @@ +@echo off +D: +cd D:/Program/Development/Environment/phpstudy_pro/Extensions/MySQL8.0.12/bin +mysql -hlocalhost -uroot -proot neteasemusic +use neteasemusic; +source D:/sql_export/album/album_0.sql +rename table album to album_0000; +source D:/sql_export/album/album_1.sql +rename table album to album_0001; +source D:/sql_export/album/album_2.sql +rename table album to album_0002; +source D:/sql_export/album/album_3.sql +rename table album to album_0003; +source D:/sql_export/album/album_4.sql +rename table album to album_0004; +source D:/sql_export/album/album_5.sql +rename table album to album_0005; +source D:/sql_export/album/album_6.sql +rename table album to album_0006; +source D:/sql_export/album/album_7.sql +rename table album to album_0007; +source D:/sql_export/album/album_8.sql +rename table album to album_0008; +source D:/sql_export/album/album_9.sql +rename table album to album_0009; +source D:/sql_export/album/album_10.sql +rename table album to album_0010; +source D:/sql_export/album/album_11.sql +rename table album to album_0011; +source D:/sql_export/album/album_12.sql +rename table album to album_0012; +source D:/sql_export/album/album_13.sql +rename table album to album_0013; +source D:/sql_export/album/album_14.sql +rename table album to album_0014; +source D:/sql_export/album/album_15.sql +rename table album to album_0015; +source D:/sql_export/album/album_16.sql +rename table album to album_0016; +source D:/sql_export/album/album_17.sql +rename table album to album_0017; +source D:/sql_export/album/album_18.sql +rename table album to album_0018; +source D:/sql_export/album/album_19.sql +rename table album to album_0019; +source D:/sql_export/album/album_20.sql +rename table album to album_0020; +source D:/sql_export/album/album_21.sql +rename table album to album_0021; +source D:/sql_export/album/album_22.sql +rename table album to album_0022; +source D:/sql_export/album/album_23.sql +rename table album to album_0023; +source D:/sql_export/album/album_24.sql +rename table album to album_0024; +source D:/sql_export/album/album_25.sql +rename table album to album_0025; +source D:/sql_export/album/album_26.sql +rename table album to album_0026; +source D:/sql_export/album/album_27.sql +rename table album to album_0027; +source D:/sql_export/album/album_28.sql +rename table album to album_0028; +source D:/sql_export/album/album_29.sql +rename table album to album_0029; +source D:/sql_export/album/album_30.sql +rename table album to album_0030; +source D:/sql_export/album/album_31.sql +rename table album to album_0031; +source D:/sql_export/album/album_32.sql +rename table album to album_0032; +source D:/sql_export/album/album_33.sql +rename table album to album_0033; +source D:/sql_export/album/album_34.sql +rename table album to album_0034; +source D:/sql_export/album/album_35.sql +rename table album to album_0035; +source D:/sql_export/album/album_36.sql +rename table album to album_0036; +source D:/sql_export/album/album_37.sql +rename table album to album_0037; +source D:/sql_export/album/album_38.sql +rename table album to album_0038; +source D:/sql_export/album/album_39.sql +rename table album to album_0039; +source D:/sql_export/album/album_40.sql +rename table album to album_0040; +source D:/sql_export/album/album_41.sql +rename table album to album_0041; +source D:/sql_export/album/album_42.sql +rename table album to album_0042; +source D:/sql_export/album/album_43.sql +rename table album to album_0043; +source D:/sql_export/album/album_44.sql +rename table album to album_0044; +source D:/sql_export/album/album_45.sql +rename table album to album_0045; +source D:/sql_export/album/album_46.sql +rename table album to album_0046; +source D:/sql_export/album/album_47.sql +rename table album to album_0047; +source D:/sql_export/album/album_48.sql +rename table album to album_0048; +source D:/sql_export/album/album_49.sql +rename table album to album_0049; +source D:/sql_export/album/album_50.sql +rename table album to album_0050; +source D:/sql_export/album/album_51.sql +rename table album to album_0051; +source D:/sql_export/album/album_52.sql +rename table album to album_0052; +source D:/sql_export/album/album_53.sql +rename table album to album_0053; +source D:/sql_export/album/album_54.sql +rename table album to album_0054; +source D:/sql_export/album/album_55.sql +rename table album to album_0055; +source D:/sql_export/album/album_56.sql +rename table album to album_0056; +source D:/sql_export/album/album_57.sql +rename table album to album_0057; +source D:/sql_export/album/album_58.sql +rename table album to album_0058; +source D:/sql_export/album/album_59.sql +rename table album to album_0059; +source D:/sql_export/album/album_60.sql +rename table album to album_0060; +source D:/sql_export/album/album_61.sql +rename table album to album_0061; +source D:/sql_export/album/album_62.sql +rename table album to album_0062; +source D:/sql_export/album/album_63.sql +rename table album to album_0063; +source D:/sql_export/album/album_64.sql +rename table album to album_0064; +source D:/sql_export/album/album_65.sql +rename table album to album_0065; +source D:/sql_export/album/album_66.sql +rename table album to album_0066; +source D:/sql_export/album/album_67.sql +rename table album to album_0067; +source D:/sql_export/album/album_68.sql +rename table album to album_0068; +source D:/sql_export/album/album_69.sql +rename table album to album_0069; +source D:/sql_export/album/album_70.sql +rename table album to album_0070; +source D:/sql_export/album/album_71.sql +rename table album to album_0071; +source D:/sql_export/album/album_72.sql +rename table album to album_0072; +source D:/sql_export/album/album_73.sql +rename table album to album_0073; +rename table album_0000 to album; +INSERT IGNORE INTO album SELECT * FROM album_0001; +drop table album_0001; +INSERT IGNORE INTO album SELECT * FROM album_0002; +drop table album_0002; +INSERT IGNORE INTO album SELECT * FROM album_0003; +drop table album_0003; +INSERT IGNORE INTO album SELECT * FROM album_0004; +drop table album_0004; +INSERT IGNORE INTO album SELECT * FROM album_0005; +drop table album_0005; +INSERT IGNORE INTO album SELECT * FROM album_0006; +drop table album_0006; +INSERT IGNORE INTO album SELECT * FROM album_0007; +drop table album_0007; +INSERT IGNORE INTO album SELECT * FROM album_0008; +drop table album_0008; +INSERT IGNORE INTO album SELECT * FROM album_0009; +drop table album_0009; +INSERT IGNORE INTO album SELECT * FROM album_0010; +drop table album_0010; +INSERT IGNORE INTO album SELECT * FROM album_0011; +drop table album_0011; +INSERT IGNORE INTO album SELECT * FROM album_0012; +drop table album_0012; +INSERT IGNORE INTO album SELECT * FROM album_0013; +drop table album_0013; +INSERT IGNORE INTO album SELECT * FROM album_0014; +drop table album_0014; +INSERT IGNORE INTO album SELECT * FROM album_0015; +drop table album_0015; +INSERT IGNORE INTO album SELECT * FROM album_0016; +drop table album_0016; +INSERT IGNORE INTO album SELECT * FROM album_0017; +drop table album_0017; +INSERT IGNORE INTO album SELECT * FROM album_0018; +drop table album_0018; +INSERT IGNORE INTO album SELECT * FROM album_0019; +drop table album_0019; +INSERT IGNORE INTO album SELECT * FROM album_0020; +drop table album_0020; +INSERT IGNORE INTO album SELECT * FROM album_0021; +drop table album_0021; +INSERT IGNORE INTO album SELECT * FROM album_0022; +drop table album_0022; +INSERT IGNORE INTO album SELECT * FROM album_0023; +drop table album_0023; +INSERT IGNORE INTO album SELECT * FROM album_0024; +drop table album_0024; +INSERT IGNORE INTO album SELECT * FROM album_0025; +drop table album_0025; +INSERT IGNORE INTO album SELECT * FROM album_0026; +drop table album_0026; +INSERT IGNORE INTO album SELECT * FROM album_0027; +drop table album_0027; +INSERT IGNORE INTO album SELECT * FROM album_0028; +drop table album_0028; +INSERT IGNORE INTO album SELECT * FROM album_0029; +drop table album_0029; +INSERT IGNORE INTO album SELECT * FROM album_0030; +drop table album_0030; +INSERT IGNORE INTO album SELECT * FROM album_0031; +drop table album_0031; +INSERT IGNORE INTO album SELECT * FROM album_0032; +drop table album_0032; +INSERT IGNORE INTO album SELECT * FROM album_0033; +drop table album_0033; +INSERT IGNORE INTO album SELECT * FROM album_0034; +drop table album_0034; +INSERT IGNORE INTO album SELECT * FROM album_0035; +drop table album_0035; +INSERT IGNORE INTO album SELECT * FROM album_0036; +drop table album_0036; +INSERT IGNORE INTO album SELECT * FROM album_0037; +drop table album_0037; +INSERT IGNORE INTO album SELECT * FROM album_0038; +drop table album_0038; +INSERT IGNORE INTO album SELECT * FROM album_0039; +drop table album_0039; +INSERT IGNORE INTO album SELECT * FROM album_0040; +drop table album_0040; +INSERT IGNORE INTO album SELECT * FROM album_0041; +drop table album_0041; +INSERT IGNORE INTO album SELECT * FROM album_0042; +drop table album_0042; +INSERT IGNORE INTO album SELECT * FROM album_0043; +drop table album_0043; +INSERT IGNORE INTO album SELECT * FROM album_0044; +drop table album_0044; +INSERT IGNORE INTO album SELECT * FROM album_0045; +drop table album_0045; +INSERT IGNORE INTO album SELECT * FROM album_0046; +drop table album_0046; +INSERT IGNORE INTO album SELECT * FROM album_0047; +drop table album_0047; +INSERT IGNORE INTO album SELECT * FROM album_0048; +drop table album_0048; +INSERT IGNORE INTO album SELECT * FROM album_0049; +drop table album_0049; +INSERT IGNORE INTO album SELECT * FROM album_0050; +drop table album_0050; +INSERT IGNORE INTO album SELECT * FROM album_0051; +drop table album_0051; +INSERT IGNORE INTO album SELECT * FROM album_0052; +drop table album_0052; +INSERT IGNORE INTO album SELECT * FROM album_0053; +drop table album_0053; +INSERT IGNORE INTO album SELECT * FROM album_0054; +drop table album_0054; +INSERT IGNORE INTO album SELECT * FROM album_0055; +drop table album_0055; +INSERT IGNORE INTO album SELECT * FROM album_0056; +drop table album_0056; +INSERT IGNORE INTO album SELECT * FROM album_0057; +drop table album_0057; +INSERT IGNORE INTO album SELECT * FROM album_0058; +drop table album_0058; +INSERT IGNORE INTO album SELECT * FROM album_0059; +drop table album_0059; +INSERT IGNORE INTO album SELECT * FROM album_0060; +drop table album_0060; +INSERT IGNORE INTO album SELECT * FROM album_0061; +drop table album_0061; +INSERT IGNORE INTO album SELECT * FROM album_0062; +drop table album_0062; +INSERT IGNORE INTO album SELECT * FROM album_0063; +drop table album_0063; +INSERT IGNORE INTO album SELECT * FROM album_0064; +drop table album_0064; +INSERT IGNORE INTO album SELECT * FROM album_0065; +drop table album_0065; +INSERT IGNORE INTO album SELECT * FROM album_0066; +drop table album_0066; +INSERT IGNORE INTO album SELECT * FROM album_0067; +drop table album_0067; +INSERT IGNORE INTO album SELECT * FROM album_0068; +drop table album_0068; +INSERT IGNORE INTO album SELECT * FROM album_0069; +drop table album_0069; +INSERT IGNORE INTO album SELECT * FROM album_0070; +drop table album_0070; +INSERT IGNORE INTO album SELECT * FROM album_0071; +drop table album_0071; +INSERT IGNORE INTO album SELECT * FROM album_0072; +drop table album_0072; +INSERT IGNORE INTO album SELECT * FROM album_0073; +drop table album_0073; +echo done. \ No newline at end of file diff --git a/netease_music/sql/import/artist_bat.txt b/netease_music/sql/import/artist_bat.txt new file mode 100644 index 0000000..cc7763b --- /dev/null +++ b/netease_music/sql/import/artist_bat.txt @@ -0,0 +1,22 @@ +@echo off +D: +cd D:/Program/Development/Environment/phpstudy_pro/Extensions/MySQL8.0.12/bin +mysql -hlocalhost -uroot -proot neteasemusic +use neteasemusic; +source D:/sql_export/artist/artist_0000.sql +source D:/sql_export/artist/artist_0001.sql +source D:/sql_export/artist/artist_0002.sql +source D:/sql_export/artist/artist_0003.sql +source D:/sql_export/artist/artist_0004.sql +source D:/sql_export/artist/artist_0005.sql +source D:/sql_export/artist/artist_0006.sql +source D:/sql_export/artist/artist_0007.sql +source D:/sql_export/artist/artist_0008.sql +source D:/sql_export/artist/artist_0009.sql +source D:/sql_export/artist/artist_0010.sql +source D:/sql_export/artist/artist_0011.sql +source D:/sql_export/artist/artist_0012.sql +source D:/sql_export/artist/artist_0013.sql +source D:/sql_export/artist/artist_0014.sql +source D:/sql_export/artist/artist_0015.sql +echo done. \ No newline at end of file diff --git a/netease_music/sql/import/import_sql_generator.js b/netease_music/sql/import/import_sql_generator.js index 7585bf5..985c568 100644 --- a/netease_music/sql/import/import_sql_generator.js +++ b/netease_music/sql/import/import_sql_generator.js @@ -1,3 +1,6 @@ +const fs = require('fs'); +const path = require('path'); + const absPath = `D:/sql_export`; // 数字转成字符串,同时在前面填充 @@ -18,9 +21,24 @@ let outputArr = [ ]; let firstIndex = 0; -let lastIndex = 115; +let lastIndex = 15; +let tableName = "song_artist_relation"; +let isContainDropTable = false; // 如果 mysqldump的时候导出的文件包含了drop table if exists,那么就先分别导入不同表,然后再将数据合并到一张表中 +let fileNameSerialFillZero = true; for (let i = firstIndex; i <= lastIndex; i++) { - outputArr.push(`source ${absPath}/comment/comment_${fill(i, '0', 4)}.sql`); + outputArr.push(`source ${absPath}/${tableName}/${tableName}_${fileNameSerialFillZero ? fill(i, '0', 4) : i}.sql`); + + if (isContainDropTable) { + outputArr.push(`rename table ${tableName} to ${tableName}_${fill(i, '0', 4)};`); + } +} +if (isContainDropTable) { + outputArr.push(`rename table ${tableName}_${fill(firstIndex, '0', 4)} to ${tableName};`); + for (let i = firstIndex + 1; i <= lastIndex; i++) { + outputArr.push(`INSERT IGNORE INTO ${tableName} SELECT * FROM ${tableName}_${fill(i, '0', 4)};`); + outputArr.push(`drop table ${tableName}_${fill(i, '0', 4)};`); + } } outputArr.push("echo done."); console.log(outputArr.join('\n')); +fs.writeFileSync(path.join(__dirname, `${tableName}_bat.txt`), outputArr.join('\n')); \ No newline at end of file diff --git a/netease_music/sql/import/lyric_bat.txt b/netease_music/sql/import/lyric_bat.txt new file mode 100644 index 0000000..77f47b4 --- /dev/null +++ b/netease_music/sql/import/lyric_bat.txt @@ -0,0 +1,76 @@ +@echo off +D: +cd D:/Program/Development/Environment/phpstudy_pro/Extensions/MySQL8.0.12/bin +mysql -hlocalhost -uroot -proot neteasemusic +use neteasemusic; +source D:/sql_export/lyric/lyric_0000.sql +source D:/sql_export/lyric/lyric_0001.sql +source D:/sql_export/lyric/lyric_0002.sql +source D:/sql_export/lyric/lyric_0003.sql +source D:/sql_export/lyric/lyric_0004.sql +source D:/sql_export/lyric/lyric_0005.sql +source D:/sql_export/lyric/lyric_0006.sql +source D:/sql_export/lyric/lyric_0007.sql +source D:/sql_export/lyric/lyric_0008.sql +source D:/sql_export/lyric/lyric_0009.sql +source D:/sql_export/lyric/lyric_0010.sql +source D:/sql_export/lyric/lyric_0011.sql +source D:/sql_export/lyric/lyric_0012.sql +source D:/sql_export/lyric/lyric_0013.sql +source D:/sql_export/lyric/lyric_0014.sql +source D:/sql_export/lyric/lyric_0015.sql +source D:/sql_export/lyric/lyric_0016.sql +source D:/sql_export/lyric/lyric_0017.sql +source D:/sql_export/lyric/lyric_0018.sql +source D:/sql_export/lyric/lyric_0019.sql +source D:/sql_export/lyric/lyric_0020.sql +source D:/sql_export/lyric/lyric_0021.sql +source D:/sql_export/lyric/lyric_0022.sql +source D:/sql_export/lyric/lyric_0023.sql +source D:/sql_export/lyric/lyric_0024.sql +source D:/sql_export/lyric/lyric_0025.sql +source D:/sql_export/lyric/lyric_0026.sql +source D:/sql_export/lyric/lyric_0027.sql +source D:/sql_export/lyric/lyric_0028.sql +source D:/sql_export/lyric/lyric_0029.sql +source D:/sql_export/lyric/lyric_0030.sql +source D:/sql_export/lyric/lyric_0031.sql +source D:/sql_export/lyric/lyric_0032.sql +source D:/sql_export/lyric/lyric_0033.sql +source D:/sql_export/lyric/lyric_0034.sql +source D:/sql_export/lyric/lyric_0035.sql +source D:/sql_export/lyric/lyric_0036.sql +source D:/sql_export/lyric/lyric_0037.sql +source D:/sql_export/lyric/lyric_0038.sql +source D:/sql_export/lyric/lyric_0039.sql +source D:/sql_export/lyric/lyric_0040.sql +source D:/sql_export/lyric/lyric_0041.sql +source D:/sql_export/lyric/lyric_0042.sql +source D:/sql_export/lyric/lyric_0043.sql +source D:/sql_export/lyric/lyric_0044.sql +source D:/sql_export/lyric/lyric_0045.sql +source D:/sql_export/lyric/lyric_0046.sql +source D:/sql_export/lyric/lyric_0047.sql +source D:/sql_export/lyric/lyric_0048.sql +source D:/sql_export/lyric/lyric_0049.sql +source D:/sql_export/lyric/lyric_0050.sql +source D:/sql_export/lyric/lyric_0051.sql +source D:/sql_export/lyric/lyric_0052.sql +source D:/sql_export/lyric/lyric_0053.sql +source D:/sql_export/lyric/lyric_0054.sql +source D:/sql_export/lyric/lyric_0055.sql +source D:/sql_export/lyric/lyric_0056.sql +source D:/sql_export/lyric/lyric_0057.sql +source D:/sql_export/lyric/lyric_0058.sql +source D:/sql_export/lyric/lyric_0059.sql +source D:/sql_export/lyric/lyric_0060.sql +source D:/sql_export/lyric/lyric_0061.sql +source D:/sql_export/lyric/lyric_0062.sql +source D:/sql_export/lyric/lyric_0063.sql +source D:/sql_export/lyric/lyric_0064.sql +source D:/sql_export/lyric/lyric_0065.sql +source D:/sql_export/lyric/lyric_0066.sql +source D:/sql_export/lyric/lyric_0067.sql +source D:/sql_export/lyric/lyric_0068.sql +source D:/sql_export/lyric/lyric_0069.sql +echo done. \ No newline at end of file diff --git a/netease_music/sql/import/song_artist_relation_bat.txt b/netease_music/sql/import/song_artist_relation_bat.txt new file mode 100644 index 0000000..19b54ae --- /dev/null +++ b/netease_music/sql/import/song_artist_relation_bat.txt @@ -0,0 +1,22 @@ +@echo off +D: +cd D:/Program/Development/Environment/phpstudy_pro/Extensions/MySQL8.0.12/bin +mysql -hlocalhost -uroot -proot neteasemusic +use neteasemusic; +source D:/sql_export/song_artist_relation/song_artist_relation_0000.sql +source D:/sql_export/song_artist_relation/song_artist_relation_0001.sql +source D:/sql_export/song_artist_relation/song_artist_relation_0002.sql +source D:/sql_export/song_artist_relation/song_artist_relation_0003.sql +source D:/sql_export/song_artist_relation/song_artist_relation_0004.sql +source D:/sql_export/song_artist_relation/song_artist_relation_0005.sql +source D:/sql_export/song_artist_relation/song_artist_relation_0006.sql +source D:/sql_export/song_artist_relation/song_artist_relation_0007.sql +source D:/sql_export/song_artist_relation/song_artist_relation_0008.sql +source D:/sql_export/song_artist_relation/song_artist_relation_0009.sql +source D:/sql_export/song_artist_relation/song_artist_relation_0010.sql +source D:/sql_export/song_artist_relation/song_artist_relation_0011.sql +source D:/sql_export/song_artist_relation/song_artist_relation_0012.sql +source D:/sql_export/song_artist_relation/song_artist_relation_0013.sql +source D:/sql_export/song_artist_relation/song_artist_relation_0014.sql +source D:/sql_export/song_artist_relation/song_artist_relation_0015.sql +echo done. \ No newline at end of file diff --git a/netease_music/sql/import/song_bat.txt b/netease_music/sql/import/song_bat.txt new file mode 100644 index 0000000..cb170fe --- /dev/null +++ b/netease_music/sql/import/song_bat.txt @@ -0,0 +1,285 @@ +@echo off +D: +cd D:/Program/Development/Environment/phpstudy_pro/Extensions/MySQL8.0.12/bin +mysql -hlocalhost -uroot -proot neteasemusic +use neteasemusic; +source D:/sql_export/song/song_0.sql +rename table song to song_0000; +source D:/sql_export/song/song_1.sql +rename table song to song_0001; +source D:/sql_export/song/song_2.sql +rename table song to song_0002; +source D:/sql_export/song/song_3.sql +rename table song to song_0003; +source D:/sql_export/song/song_4.sql +rename table song to song_0004; +source D:/sql_export/song/song_5.sql +rename table song to song_0005; +source D:/sql_export/song/song_6.sql +rename table song to song_0006; +source D:/sql_export/song/song_7.sql +rename table song to song_0007; +source D:/sql_export/song/song_8.sql +rename table song to song_0008; +source D:/sql_export/song/song_9.sql +rename table song to song_0009; +source D:/sql_export/song/song_10.sql +rename table song to song_0010; +source D:/sql_export/song/song_11.sql +rename table song to song_0011; +source D:/sql_export/song/song_12.sql +rename table song to song_0012; +source D:/sql_export/song/song_13.sql +rename table song to song_0013; +source D:/sql_export/song/song_14.sql +rename table song to song_0014; +source D:/sql_export/song/song_15.sql +rename table song to song_0015; +source D:/sql_export/song/song_16.sql +rename table song to song_0016; +source D:/sql_export/song/song_17.sql +rename table song to song_0017; +source D:/sql_export/song/song_18.sql +rename table song to song_0018; +source D:/sql_export/song/song_19.sql +rename table song to song_0019; +source D:/sql_export/song/song_20.sql +rename table song to song_0020; +source D:/sql_export/song/song_21.sql +rename table song to song_0021; +source D:/sql_export/song/song_22.sql +rename table song to song_0022; +source D:/sql_export/song/song_23.sql +rename table song to song_0023; +source D:/sql_export/song/song_24.sql +rename table song to song_0024; +source D:/sql_export/song/song_25.sql +rename table song to song_0025; +source D:/sql_export/song/song_26.sql +rename table song to song_0026; +source D:/sql_export/song/song_27.sql +rename table song to song_0027; +source D:/sql_export/song/song_28.sql +rename table song to song_0028; +source D:/sql_export/song/song_29.sql +rename table song to song_0029; +source D:/sql_export/song/song_30.sql +rename table song to song_0030; +source D:/sql_export/song/song_31.sql +rename table song to song_0031; +source D:/sql_export/song/song_32.sql +rename table song to song_0032; +source D:/sql_export/song/song_33.sql +rename table song to song_0033; +source D:/sql_export/song/song_34.sql +rename table song to song_0034; +source D:/sql_export/song/song_35.sql +rename table song to song_0035; +source D:/sql_export/song/song_36.sql +rename table song to song_0036; +source D:/sql_export/song/song_37.sql +rename table song to song_0037; +source D:/sql_export/song/song_38.sql +rename table song to song_0038; +source D:/sql_export/song/song_39.sql +rename table song to song_0039; +source D:/sql_export/song/song_40.sql +rename table song to song_0040; +source D:/sql_export/song/song_41.sql +rename table song to song_0041; +source D:/sql_export/song/song_42.sql +rename table song to song_0042; +source D:/sql_export/song/song_43.sql +rename table song to song_0043; +source D:/sql_export/song/song_44.sql +rename table song to song_0044; +source D:/sql_export/song/song_45.sql +rename table song to song_0045; +source D:/sql_export/song/song_46.sql +rename table song to song_0046; +source D:/sql_export/song/song_47.sql +rename table song to song_0047; +source D:/sql_export/song/song_48.sql +rename table song to song_0048; +source D:/sql_export/song/song_49.sql +rename table song to song_0049; +source D:/sql_export/song/song_50.sql +rename table song to song_0050; +source D:/sql_export/song/song_51.sql +rename table song to song_0051; +source D:/sql_export/song/song_52.sql +rename table song to song_0052; +source D:/sql_export/song/song_53.sql +rename table song to song_0053; +source D:/sql_export/song/song_54.sql +rename table song to song_0054; +source D:/sql_export/song/song_55.sql +rename table song to song_0055; +source D:/sql_export/song/song_56.sql +rename table song to song_0056; +source D:/sql_export/song/song_57.sql +rename table song to song_0057; +source D:/sql_export/song/song_58.sql +rename table song to song_0058; +source D:/sql_export/song/song_59.sql +rename table song to song_0059; +source D:/sql_export/song/song_60.sql +rename table song to song_0060; +source D:/sql_export/song/song_61.sql +rename table song to song_0061; +source D:/sql_export/song/song_62.sql +rename table song to song_0062; +source D:/sql_export/song/song_63.sql +rename table song to song_0063; +source D:/sql_export/song/song_64.sql +rename table song to song_0064; +source D:/sql_export/song/song_65.sql +rename table song to song_0065; +source D:/sql_export/song/song_66.sql +rename table song to song_0066; +source D:/sql_export/song/song_67.sql +rename table song to song_0067; +source D:/sql_export/song/song_68.sql +rename table song to song_0068; +source D:/sql_export/song/song_69.sql +rename table song to song_0069; +rename table song_0000 to song; +INSERT IGNORE INTO song SELECT * FROM song_0001; +drop table song_0001; +INSERT IGNORE INTO song SELECT * FROM song_0002; +drop table song_0002; +INSERT IGNORE INTO song SELECT * FROM song_0003; +drop table song_0003; +INSERT IGNORE INTO song SELECT * FROM song_0004; +drop table song_0004; +INSERT IGNORE INTO song SELECT * FROM song_0005; +drop table song_0005; +INSERT IGNORE INTO song SELECT * FROM song_0006; +drop table song_0006; +INSERT IGNORE INTO song SELECT * FROM song_0007; +drop table song_0007; +INSERT IGNORE INTO song SELECT * FROM song_0008; +drop table song_0008; +INSERT IGNORE INTO song SELECT * FROM song_0009; +drop table song_0009; +INSERT IGNORE INTO song SELECT * FROM song_0010; +drop table song_0010; +INSERT IGNORE INTO song SELECT * FROM song_0011; +drop table song_0011; +INSERT IGNORE INTO song SELECT * FROM song_0012; +drop table song_0012; +INSERT IGNORE INTO song SELECT * FROM song_0013; +drop table song_0013; +INSERT IGNORE INTO song SELECT * FROM song_0014; +drop table song_0014; +INSERT IGNORE INTO song SELECT * FROM song_0015; +drop table song_0015; +INSERT IGNORE INTO song SELECT * FROM song_0016; +drop table song_0016; +INSERT IGNORE INTO song SELECT * FROM song_0017; +drop table song_0017; +INSERT IGNORE INTO song SELECT * FROM song_0018; +drop table song_0018; +INSERT IGNORE INTO song SELECT * FROM song_0019; +drop table song_0019; +INSERT IGNORE INTO song SELECT * FROM song_0020; +drop table song_0020; +INSERT IGNORE INTO song SELECT * FROM song_0021; +drop table song_0021; +INSERT IGNORE INTO song SELECT * FROM song_0022; +drop table song_0022; +INSERT IGNORE INTO song SELECT * FROM song_0023; +drop table song_0023; +INSERT IGNORE INTO song SELECT * FROM song_0024; +drop table song_0024; +INSERT IGNORE INTO song SELECT * FROM song_0025; +drop table song_0025; +INSERT IGNORE INTO song SELECT * FROM song_0026; +drop table song_0026; +INSERT IGNORE INTO song SELECT * FROM song_0027; +drop table song_0027; +INSERT IGNORE INTO song SELECT * FROM song_0028; +drop table song_0028; +INSERT IGNORE INTO song SELECT * FROM song_0029; +drop table song_0029; +INSERT IGNORE INTO song SELECT * FROM song_0030; +drop table song_0030; +INSERT IGNORE INTO song SELECT * FROM song_0031; +drop table song_0031; +INSERT IGNORE INTO song SELECT * FROM song_0032; +drop table song_0032; +INSERT IGNORE INTO song SELECT * FROM song_0033; +drop table song_0033; +INSERT IGNORE INTO song SELECT * FROM song_0034; +drop table song_0034; +INSERT IGNORE INTO song SELECT * FROM song_0035; +drop table song_0035; +INSERT IGNORE INTO song SELECT * FROM song_0036; +drop table song_0036; +INSERT IGNORE INTO song SELECT * FROM song_0037; +drop table song_0037; +INSERT IGNORE INTO song SELECT * FROM song_0038; +drop table song_0038; +INSERT IGNORE INTO song SELECT * FROM song_0039; +drop table song_0039; +INSERT IGNORE INTO song SELECT * FROM song_0040; +drop table song_0040; +INSERT IGNORE INTO song SELECT * FROM song_0041; +drop table song_0041; +INSERT IGNORE INTO song SELECT * FROM song_0042; +drop table song_0042; +INSERT IGNORE INTO song SELECT * FROM song_0043; +drop table song_0043; +INSERT IGNORE INTO song SELECT * FROM song_0044; +drop table song_0044; +INSERT IGNORE INTO song SELECT * FROM song_0045; +drop table song_0045; +INSERT IGNORE INTO song SELECT * FROM song_0046; +drop table song_0046; +INSERT IGNORE INTO song SELECT * FROM song_0047; +drop table song_0047; +INSERT IGNORE INTO song SELECT * FROM song_0048; +drop table song_0048; +INSERT IGNORE INTO song SELECT * FROM song_0049; +drop table song_0049; +INSERT IGNORE INTO song SELECT * FROM song_0050; +drop table song_0050; +INSERT IGNORE INTO song SELECT * FROM song_0051; +drop table song_0051; +INSERT IGNORE INTO song SELECT * FROM song_0052; +drop table song_0052; +INSERT IGNORE INTO song SELECT * FROM song_0053; +drop table song_0053; +INSERT IGNORE INTO song SELECT * FROM song_0054; +drop table song_0054; +INSERT IGNORE INTO song SELECT * FROM song_0055; +drop table song_0055; +INSERT IGNORE INTO song SELECT * FROM song_0056; +drop table song_0056; +INSERT IGNORE INTO song SELECT * FROM song_0057; +drop table song_0057; +INSERT IGNORE INTO song SELECT * FROM song_0058; +drop table song_0058; +INSERT IGNORE INTO song SELECT * FROM song_0059; +drop table song_0059; +INSERT IGNORE INTO song SELECT * FROM song_0060; +drop table song_0060; +INSERT IGNORE INTO song SELECT * FROM song_0061; +drop table song_0061; +INSERT IGNORE INTO song SELECT * FROM song_0062; +drop table song_0062; +INSERT IGNORE INTO song SELECT * FROM song_0063; +drop table song_0063; +INSERT IGNORE INTO song SELECT * FROM song_0064; +drop table song_0064; +INSERT IGNORE INTO song SELECT * FROM song_0065; +drop table song_0065; +INSERT IGNORE INTO song SELECT * FROM song_0066; +drop table song_0066; +INSERT IGNORE INTO song SELECT * FROM song_0067; +drop table song_0067; +INSERT IGNORE INTO song SELECT * FROM song_0068; +drop table song_0068; +INSERT IGNORE INTO song SELECT * FROM song_0069; +drop table song_0069; +echo done. \ No newline at end of file diff --git a/netease_music/sql/statistic.sql b/netease_music/sql/statistic.sql index 3b8dbc4..a84ad8a 100644 --- a/netease_music/sql/statistic.sql +++ b/netease_music/sql/statistic.sql @@ -19,12 +19,13 @@ INSERT INTO analysis (`key`, `value`) VALUES ('songArtistCount', (SELECT count(* -- 更新后初次全表扫描 -INSERT IGNORE INTO wait_song (song_id) SELECT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT song_id FROM song ) -INSERT IGNORE INTO wait_song (song_id) SELECT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT song_id FROM song ) -INSERT IGNORE INTO wait_song (song_id) SELECT song_id FROM song_playlist_relation WHERE song_id NOT IN ( SELECT song_id FROM song ) - --- 后续只需要扫描 wait_check 表 -INSERT IGNORE INTO wait_song (song_id) SELECT song_id FROM wait_check_song WHERE song_id NOT IN ( SELECT song_id FROM song ) +INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT song_id FROM song ); +INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT song_id FROM song ); +INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_playlist_relation WHERE song_id NOT IN ( SELECT song_id FROM song ); +INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song WHERE song_id NOT IN ( SELECT song_id FROM lyric ); +INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song WHERE song_id NOT IN ( SELECT song_id FROM comment_progress ); +INSERT IGNORE INTO wait_check_artist (id) SELECT artist_id FROM song_artist_relation WHERE artist_id NOT IN ( SELECT artist_id FROM artist ); +INSERT IGNORE INTO wait_check_album (id) SELECT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT album_id FROM album ); diff --git a/netease_music/sql/structure.sql b/netease_music/sql/structure.sql index a4d4f22..81ce140 100644 --- a/netease_music/sql/structure.sql +++ b/netease_music/sql/structure.sql @@ -233,6 +233,11 @@ CREATE TABLE `wait_check_lyric` ( PRIMARY KEY (`id`) ); +CREATE TABLE `wait_check_comment` ( + `id` bigint(20) unsigned NOT NULL COMMENT 'id', + PRIMARY KEY (`id`) +); + CREATE TABLE `wait_fetch_song` ( diff --git a/netease_music/src/assistantUtils.js b/netease_music/src/assistantUtils.js new file mode 100644 index 0000000..cd99cf7 --- /dev/null +++ b/netease_music/src/assistantUtils.js @@ -0,0 +1,59 @@ +// 定时更新 wait 表 + +// 计算数组差集 (a - b) +function getDiffSet(a, b) { + // let a = [1, 2, 3]; + // let b = [4, 5, 6, 1]; + // let c = a.filter(i => b.indexOf(i) == -1); + // console.log(c); + return a.filter(i => b.indexOf(i) == -1); +} + +async function migrateIdsFromCheckToFetch(tableName, fieldName, insertSql = null) { + console.log(`更新待爬取列表: ${tableName}`); + + let stepLength = 1000; + while (true) { + // 从 check 表中分块查出待处理数据 + let idsResult = await dbUtils.query(`SELECT id FROM wait_check_${tableName} LIMIT ${stepLength}`, []); + let ids = idsResult.map(row => row.id); + // console.log("ids", ids); + if (ids.length == 0) { + break; + }; + + // 查询出已处理的数据 + let skipIdsResult = await dbUtils.query(`SELECT ${fieldName} as id FROM ${tableName} WHERE ${fieldName} IN ?`, [[ids]]); + let skipIds = skipIdsResult.map(row => row.id); + // console.log("skipIds", skipIds); + + // 剩余要爬取的数据 + let finalIds = getDiffSet(ids, skipIds); + // console.log("finalIds", finalIds); + + // 插入待爬取列表 + if (finalIds.length > 0) { + await dbUtils.query(insertSql ? insertSql : `INSERT IGNORE INTO wait_fetch_${tableName} (id) VALUES ?`, [finalIds.map(id => [id])]); + } + + // 从待检查表中删除 + if (ids.length > 0) + await dbUtils.query(`DELETE FROM wait_check_${tableName} WHERE id IN ?`, [[ids]]); + console.log(`table: ${tableName} | ${ids[0]} - ${ids.slice(-1)[0]}`) + } +} + +async function updateWaitTable() { + await migrateIdsFromCheckToFetch("song", "song_id"); + await migrateIdsFromCheckToFetch("lyric", "song_id"); + await migrateIdsFromCheckToFetch("comment", "song_id", `INSERT IGNORE INTO comment_progress (song_id) VALUES ?`); + await migrateIdsFromCheckToFetch("album", "album_id"); + await migrateIdsFromCheckToFetch("artist", "artist_id"); + + // comment 搬到 comment_progress + console.log("done.\n"); +} + +module.exports = { + updateWaitTable, +} \ No newline at end of file diff --git a/netease_music/src/dataManager.js b/netease_music/src/dataManager.js index c6e8ef4..8aa3df3 100644 --- a/netease_music/src/dataManager.js +++ b/netease_music/src/dataManager.js @@ -5,7 +5,7 @@ module.exports = { insertCollection: async (songInfoList) => { if (songInfoList.length == 0) return; // image 因为接口没有返回,所以不更新 - return await dbUtils.query(` + let result = await dbUtils.query(` INSERT INTO song ( song_id, title, type, alias, pop, fee, quality, cd, no, dj_id, s_id, origin_cover_type, pub_time, @@ -19,23 +19,27 @@ module.exports = { songInfo.no, songInfo.djId, songInfo.sId, songInfo.originCoverType, songInfo.pubTime, songInfo.noCopyrightRcmd, songInfo.mv, songInfo.single, songInfo.version, 2 ])]); + await dbUtils.query(` + DELETE FROM wait_fetch_song WHERE id IN ? + `, [[songInfoList.map(songInfo => songInfo.id)]]) + return result; }, getIdsToFetch: async (args) => { let whereClause = [ - args.min ? `song_id > ${args.min}` : '1=1', - args.max ? `song_id <= ${args.max}` : '1=1', + args.min ? `id > ${args.min}` : '1=1', + args.max ? `id <= ${args.max}` : '1=1', ].join(' AND '); let sql = ` - SELECT song_id FROM wait_fetch_song WHERE ${whereClause} - ${args.order ? `ORDER BY song_id ${args.order}` : ''} + SELECT id FROM wait_fetch_song WHERE ${whereClause} + ${args.order ? `ORDER BY id ${args.order}` : ''} ${args.limit ? `LIMIT ${args.limit}` : ''} `; - // // 更新现有数据 + // 更新现有数据 // sql = `SELECT song_id FROM song WHERE data_version = 1`; console.log(sql); let songIds = await dbUtils.query(sql, []); - songIds = songIds.map(item => item.song_id); + songIds = songIds.map(item => item.id); return songIds; }, }, @@ -92,7 +96,7 @@ module.exports = { let artistIds = await dbUtils.query(sql, []); artistIds = artistIds.map(item => item.artist_id); return artistIds; - } + }, }, @@ -100,6 +104,22 @@ module.exports = { insert: async (lyricInfo) => { return await dbUtils.query('INSERT IGNORE INTO lyric SET ?', lyricInfo); }, + + getIdsToFetch: async (args) => { + let whereClause = [ + args.min ? `song_id > ${args.min}` : '1=1', + args.max ? `song_id <= ${args.max}` : '1=1', + ].join(' AND '); + var sql = ` + SELECT song_id FROM wait_fetch_lyric WHERE ${whereClause} + ${args.order ? `ORDER BY song_id ${args.order}` : ''} + ${args.limit ? `LIMIT ${args.limit}` : ''} + `; + console.log(sql); + let songIds = await dbUtils.query(sql, []); + songIds = songIds.map(song => song.song_id); + return songIds; + }, }, @@ -111,6 +131,22 @@ module.exports = { ON DUPLICATE KEY UPDATE content = VALUES(content), like_count = VALUES(like_count), comment_type = GREATEST(comment_type, VALUES(comment_type)), modify_time = CURRENT_TIMESTAMP `, [commentInfoList]); }, + + getIdsToFetch: async (args) => { + let whereClause = [ + args.min ? `song_id > ${args.min}` : '1=1', + args.max ? `song_id <= ${args.max}` : '1=1', + ].join(' AND '); + var sql = ` + SELECT song_id FROM comment_progress WHERE ${whereClause} AND current_status != 2 + ORDER BY current_status DESC${args.order ? `, song_id ${args.order}` : ''} + ${args.limit ? `LIMIT ${args.limit}` : ''} + `; + console.log(sql); + let songIds = await dbUtils.query(sql, []); + songIds = songIds.map(item => item.song_id); + return songIds; + }, }, @@ -175,8 +211,15 @@ module.exports = { insert: async (type, ids) => { // 过滤掉 id 为 0 的 ids = ids.filter(id => id < 0); - return await dbUtils.query(`INSERT IGNORE INTO wait_check_${type} (id) VALUES ?`, [ids]); + if (ids.length == 0) return; + return await dbUtils.query(`INSERT IGNORE INTO wait_check_${type} (id) VALUES ?`, [ids.map(id => [id])]); }, }, + wait_fetch: { + deleteCollection: async function (type, ids) { + if (ids.length > 0) + return await dbUtils.query(`DELETE FROM wait_fetch_${type} WHERE id IN ?`, [[ids]]); + } + } }; diff --git a/netease_music/src/getInfo/albumInfoUtils.js b/netease_music/src/getInfo/albumInfoUtils.js index 9967926..26aacc6 100644 --- a/netease_music/src/getInfo/albumInfoUtils.js +++ b/netease_music/src/getInfo/albumInfoUtils.js @@ -140,17 +140,26 @@ async function fetch({ albumId, debug = false, update = false }) { }; // console.log("albumInfo", albumInfo); + // 插入待爬取表 await dataManager.wait_check.insert("song", songIds); + await dataManager.wait_check.insert("lyric", songIds); + await dataManager.wait_check.insert("comment", songIds); + + // 插入关联关系 if (albumId > 0) { let songAlbumRel = songIds.map(songId => [songId, albumId]); await dataManager.song_album.insertCollection(songAlbumRel); } + // 插入数据 if (update) { await dataManager.album.update(albumId, albumInfo); } else { await dataManager.album.insert(albumInfo); } + + // 从待爬取表中删除记录 + await dataManager.wait_fetch.deleteCollection("album", [albumId]); } module.exports = { diff --git a/netease_music/src/getInfo/artistInfoUtils.js b/netease_music/src/getInfo/artistInfoUtils.js index 862ac74..10ed3c0 100644 --- a/netease_music/src/getInfo/artistInfoUtils.js +++ b/netease_music/src/getInfo/artistInfoUtils.js @@ -96,13 +96,22 @@ async function fetch({ artistId, debug = false }) { }; // console.log("artistInfo", artistInfo); + // 插入待爬取表 await dataManager.wait_check.insert("song", songIds); + await dataManager.wait_check.insert("lyric", songIds); + await dataManager.wait_check.insert("comment", songIds); + + // 插入关联关系 if (artistId > 0) { let songArtistRel = songIds.map(songId => [songId, artistId]); await dataManager.song_artist.insertCollection(songArtistRel); } + // 插入数据 await dataManager.artist.insert(artistInfo); + + // 从待爬取表中删除记录 + await dataManager.wait_fetch.deleteCollection("artist", [artistId]); } module.exports = { diff --git a/netease_music/src/getInfo/commentUtils.js b/netease_music/src/getInfo/commentUtils.js index 9850de9..592c9f3 100644 --- a/netease_music/src/getInfo/commentUtils.js +++ b/netease_music/src/getInfo/commentUtils.js @@ -14,27 +14,12 @@ const { comment_music } = require('NeteaseCloudMusicApi'); async function fetchAll({ args = {} }) { console.log("start fetching comment ..."); - // 首先将需要爬取的song_id导入comment_progress表 - await dbUtils.query(` - INSERT IGNORE INTO comment_progress ( song_id ) - SELECT DISTINCT song_id FROM song WHERE song_id NOT IN ( SELECT song_id FROM comment_progress ) - `, []); - - let whereClause = [ - args.min ? `song_id > ${args.min}` : '1=1', - args.max ? `song_id <= ${args.max}` : '1=1', - ].join(' AND '); - var sql = ` - SELECT song_id FROM comment_progress WHERE ${whereClause} AND current_status != 2 - ORDER BY current_status DESC${args.order ? `, song_id ${args.order}` : ''} - ${args.limit ? `LIMIT ${args.limit}` : ''} - `; - console.log(sql); - - // 首先查询有无正在爬取中的记录 - var songIds = await dbUtils.query(sql, []); - songIds = songIds.map(item => item.song_id); - + // // 首先将需要爬取的song_id导入comment_progress表 + // await dbUtils.query(` + // INSERT IGNORE INTO comment_progress ( song_id ) + // SELECT song_id FROM wait_fetch_comment WHERE song_id NOT IN ( SELECT song_id FROM comment_progress ) + // `, []); + let songIds = await dataManager.comment.getIdsToFetch(args); for (let i = 0; i < songIds.length; i++) { await global.checkIsExit(); const songId = songIds[i]; diff --git a/netease_music/src/getInfo/lyricInfoUtils.js b/netease_music/src/getInfo/lyricInfoUtils.js index 524a39d..e8dd1d3 100644 --- a/netease_music/src/getInfo/lyricInfoUtils.js +++ b/netease_music/src/getInfo/lyricInfoUtils.js @@ -10,19 +10,7 @@ const dbUtils = global.dbUtils; // 从数据库中查出还缺少的歌词,并进行爬取 async function fetchAll({ args = {} }) { console.log("start fetching lyrics ..."); - let whereClause = [ - args.min ? `song_id > ${args.min}` : '1=1', - args.max ? `song_id <= ${args.max}` : '1=1', - ].join(' AND '); - var sql = ` - SELECT song_id FROM song WHERE ${whereClause} AND song_id NOT IN ( SELECT song_id FROM lyric ) - ${args.order ? `ORDER BY song_id ${args.order}` : ''} - ${args.limit ? `LIMIT ${args.limit}` : ''} - `; - console.log(sql); - - var songIds = await dbUtils.query(sql, []); - songIds = songIds.map(song => song.song_id); + let songIds = await dataManager.lyric.getIdsToFetch(args); for (let i = 0; i < songIds.length; i++) { await global.checkIsExit(); const songId = songIds[i]; @@ -78,7 +66,12 @@ async function fetch({ songId, debug = false }) { version: lyric.version, }; // console.log("lyricInfo", lyricInfo); + + // 插入数据 await dataManager.lyric.insert(lyricInfo); + + // 从待爬取表中删除记录 + await dataManager.wait_fetch.deleteCollection("lyric", [songId]); } module.exports = { diff --git a/netease_music/src/getInfo/songInfoUtils.js b/netease_music/src/getInfo/songInfoUtils.js index 829ebae..4efef36 100644 --- a/netease_music/src/getInfo/songInfoUtils.js +++ b/netease_music/src/getInfo/songInfoUtils.js @@ -85,11 +85,19 @@ async function fetch({ songIdArray, debug = false }) { if (songInfoList.length == 0) return; console.log("插入数据库"); + // 插入待爬取表 await dataManager.wait_check.insert("album", albumIds); await dataManager.wait_check.insert("artist", artistIds); + + // 插入关联关系 await dataManager.song_album.insertCollection(songAlbumRel); await dataManager.song_artist.insertCollection(songArtistRel); + + // 插入数据 await dataManager.song.insertCollection(songInfoList); // image 因为接口没有返回,所以不更新 + + // 从待爬取表中删除记录 + await dataManager.wait_fetch.deleteCollection("song", [songId]); } // 获取音乐详情 diff --git a/netease_music/src/index.js b/netease_music/src/index.js index 2624000..e0b9c91 100644 --- a/netease_music/src/index.js +++ b/netease_music/src/index.js @@ -6,7 +6,7 @@ const sleepUtils = require('../../utils/sleepUtils'); // 数据库连接池 dbUtils.create({ - database: "neteaseMusic", // 指定数据库 + database: global.database || "neteasemusic", // 指定数据库 connectionLimit: global.connectionLimit || 10, // 设置数据库连接池数量 }); global.dbUtils = dbUtils; @@ -20,6 +20,8 @@ const lyricInfoUtils = require('./getInfo/lyricInfoUtils'); const commentUtils = require('./getInfo/commentUtils'); const playlistUtils = require('./getInfo/playlistUtils'); +const assistantUtils = require('./assistantUtils'); + /** * 测试 */ @@ -73,6 +75,10 @@ async function main(args) { case 'playlist': await playlistUtils.fetchAll({ args: args }); break; + + case 'assistant': + await assistantUtils.updateWaitTable(); + break; default: console.log("utils参数不匹配,退出"); return; diff --git a/netease_music/todo.txt b/netease_music/todo.txt index 80399ca..6dd38e9 100644 --- a/netease_music/todo.txt +++ b/netease_music/todo.txt @@ -31,16 +31,19 @@ node index --utils playlist 后期: +爬取歌单playlist功能需要更新 + 删除song_playlist_relation表中rcmd_reason字段(全是空字符串) 歌单定时更新(rel表中添加一个del字段,先将歌单下面的全部置为删除状态,再插入的时候把已有歌曲的标记重新修改为正常状态) 评论的更新 -爬取歌单playlist功能需要更新 - 被删除的aritst和album回头再通过其他表中的数据反查回来 +歌曲目前爬取之后,会有一部分没有image封面,还是需要用旧方法爬取到 + + 说明: song表中data_version=1的音乐是第一次爬取的时候存在,但是后面再爬取时不存在的音乐 diff --git a/utils/dbPoolUtils.js b/utils/dbPoolUtils.js index 4aee356..0c8d383 100644 --- a/utils/dbPoolUtils.js +++ b/utils/dbPoolUtils.js @@ -65,62 +65,63 @@ async function query(sql, params) { }); } -// sqlParamsEntities = { sql: "", params: [], callback: function(可选) } -async function transaction(sqlParamsEntities) { - let connection; - try { - connection = await new Promise((resolve, reject) => { - pool.getConnection(function (err, connection) { - if (err) { - reject(err); - } - resolve(connection); - }); - }); - } catch (err) { - console.error("获取事务connection失败", err); - return; - } +// // sqlParamsEntities = { sql: "", params: [], callback: function(可选) } +// async function transaction(sqlParamsEntities) { +// let connection; +// try { +// connection = await new Promise((resolve, reject) => { +// pool.getConnection(function (err, connection) { +// if (err) { +// reject(err); +// } +// resolve(connection); +// }); +// }); +// } catch (err) { +// console.error("获取事务connection失败", err); +// return; +// } - try { - return await new Promise((resolve, reject) => { - // 开启事务 - connection.beginTransaction(function (err) { - if (err) { - reject(err); - } +// try { +// return await new Promise((resolve, reject) => { +// // 开启事务 +// connection.beginTransaction(function (err) { +// if (err) { +// reject(err); +// } - // 开始执行SQL语句 - console.log("开始执行transaction,共执行" + sqlParamsEntities.length + "条数据"); - sqlParamsEntities.forEach((entity) => { - connection.query(entity.sql, entity.param, function (tErr, data) { - if (tErr) { - reject(tErr); - } - if (typeof entity.callback === 'function') - return entity.callback(data); - }) - }); +// // 开始执行SQL语句 +// console.log("开始执行transaction,共执行" + sqlParamsEntities.length + "条数据"); +// sqlParamsEntities.forEach((entity) => { +// console.log(entity.sql); +// connection.query(entity.sql, entity.param, function (tErr, data) { +// if (tErr) { +// reject(tErr); +// } +// if (typeof entity.callback === 'function') +// return entity.callback(data); +// }) +// }); - // 执行完毕,提交事务 - connection.commit(function (tErr, info) { - console.log("transaction info: " + JSON.stringify(info)); - if (tErr) { - reject(tErr); - } - resolve(info); - }) - }); - }); - } catch (err) { - console.error("事务执行失败,开始回滚"); - connection.rollback(function () { - console.log("transaction error: " + err); - }); - } finally { - connection.release(); - } -} +// // 执行完毕,提交事务 +// connection.commit(function (tErr, info) { +// console.log("transaction info: " + JSON.stringify(info)); +// if (tErr) { +// reject(tErr); +// } +// resolve(info); +// }) +// }); +// }); +// } catch (err) { +// console.error("事务执行失败,开始回滚", err); +// connection.rollback(function () { +// console.log("transaction error: " + err); +// }); +// } finally { +// connection.release(); +// } +// } async function close() { await new Promise((resolve, reject) => { @@ -135,6 +136,6 @@ async function close() { module.exports = { create, query, - transaction, + // transaction, close, } \ No newline at end of file diff --git a/utils/requestUtils.js b/utils/requestUtils.js index c5d11bb..c1d154d 100644 --- a/utils/requestUtils.js +++ b/utils/requestUtils.js @@ -45,7 +45,7 @@ async function getRedirectUrl(url) { reject(err); } // console.log(res.headers.location); - resolve(res.headers.location); + resolve(res?.headers?.location); }); }); }