diff --git a/netease_music/sql/export/export_sql_generator.js b/netease_music/sql/export/export_sql_generator.js index ea1a8aa..cff4d11 100644 --- a/netease_music/sql/export/export_sql_generator.js +++ b/netease_music/sql/export/export_sql_generator.js @@ -55,9 +55,29 @@ function fill(num, fillers, length) { // ############################################# +// // 使用 mysqldump 分块导出数据表 +// let rangeTxtName = "song"; // 分布区间 "song" "album" "artist" "user" +// const dumpTable = "lyric"; // "comment_progress"; +// const fieldName = `song_id`; +// var a = fs.readFileSync(path.join(__dirname, `distribution_range/${rangeTxtName}.txt`), "utf-8").trim().split("\n").reverse().map(i => i.trim()); +// // console.log(a); +// let outputArr = [`@echo off`, `D:`, `cd D:/Program/Development/Environment/phpstudy_pro/Extensions/MySQL8.0.12/bin`]; +// for (let i = 0; i < a.length; i++) { +// let where; +// if (a[i + 1]) { +// where = `${fieldName}>=${a[i]} and ${fieldName}<${a[i + 1]}`; +// } else { +// where = `${fieldName}>=${a[i]}`; +// } +// outputArr.push(`mysqldump neteasemusic -hrm-bp18qrc78dj7vd3newo.rwlb.rds.aliyuncs.com -uroot -pOj13EzoppxXvMmjPKh --tables ${dumpTable} --where="${where}" --skip-add-drop-table --set-gtid-purged=OFF > ${absPath}/${dumpTable}_${fill(i, '0', 4)}.sql`); +// } +// outputArr.push("echo done."); +// console.log(outputArr.join('\n\n')); + +// ############################################# + // 使用 mysqldump 分块导出数据表 let rangeTxtName = "song"; // 分布区间 "song" "album" "artist" "user" -const dumpTable = "lyric"; // "comment_progress"; const fieldName = `song_id`; var a = fs.readFileSync(path.join(__dirname, `distribution_range/${rangeTxtName}.txt`), "utf-8").trim().split("\n").reverse().map(i => i.trim()); // console.log(a); @@ -69,7 +89,7 @@ for (let i = 0; i < a.length; i++) { } else { where = `${fieldName}>=${a[i]}`; } - outputArr.push(`mysqldump neteasemusic -hrm-bp18qrc78dj7vd3newo.rwlb.rds.aliyuncs.com -uroot -pOj13EzoppxXvMmjPKh --tables ${dumpTable} --where="${where}" --skip-add-drop-table --set-gtid-purged=OFF > ${absPath}/${dumpTable}_${fill(i, '0', 4)}.sql`); + outputArr.push(`INSERT INTO song SELECT * FROM song_old WHERE ${where}; -- ${i}`); } outputArr.push("echo done."); -console.log(outputArr.join('\n\n')); +console.log(outputArr.join('\n')); diff --git a/netease_music/sql/neteasemusic.sql b/netease_music/sql/neteasemusic.sql index 7b0308c..5ab957a 100644 --- a/netease_music/sql/neteasemusic.sql +++ b/netease_music/sql/neteasemusic.sql @@ -201,10 +201,10 @@ CREATE TABLE `song` ( `image` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '封面图 http://p1.music.126.net/ 后面的部分', `pub_date` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '发布日期(弃用)', `pub_time` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '发布日期 毫秒为单位的Unix时间戳', - `no_copyright_rcmd` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT 'None表示可以播,非空表示无版权', `mv` int(10) UNSIGNED NULL DEFAULT NULL COMMENT '非零表示有MV ID', `single` tinyint(4) NULL DEFAULT NULL COMMENT '0: 有专辑信息或者是DJ节目 1: 未知专辑', `version` int(11) NOT NULL DEFAULT 1 COMMENT '歌曲版本信息', + `no_copyright_rcmd` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT 'None表示可以播,非空表示无版权', `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间', `modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间', `data_version` tinyint(4) NOT NULL DEFAULT 1 COMMENT '数据记录版本(如果有字段调整则整体+1)', diff --git a/netease_music/sql/statistic.sql b/netease_music/sql/statistic.sql index 449c576..b7f7e4a 100644 --- a/netease_music/sql/statistic.sql +++ b/netease_music/sql/statistic.sql @@ -1,15 +1,16 @@ -- 更新统计数据 -- songCount 容易超时,有几张表查询时容易发生死锁,所以请在没有爬取时进行统计 +-- 4G: 4294967296 (4 * 1024 * 1024 * 1024) 64M: 67108864 +-- my.ini 配置文件中设置 innodb_buffer_pool_size=4G +show variables like "%innodb_buffer_pool_size%"; DELETE FROM analysis WHERE `key` LIKE '%_old'; UPDATE analysis SET `key`=concat(`key`,'_old'), modify_time=modify_time WHERE `key` NOT LIKE '%_old'; INSERT INTO analysis (`key`, `value`) VALUES ('songCount', (SELECT count(*) as count FROM song) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`); INSERT INTO analysis (`key`, `value`) VALUES ('songWaiting', (SELECT count(*) as count FROM wait_fetch_song) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`); INSERT INTO analysis (`key`, `value`) VALUES ('playlistCount', (SELECT count(*) AS count FROM playlist) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`); INSERT INTO analysis (`key`, `value`) VALUES ('albumCount', (SELECT count(*) as count FROM album) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`); --- INSERT INTO analysis (`key`, `value`) VALUES ('albumWaiting', (SELECT count( DISTINCT album_id ) as count FROM song_album_relation WHERE album_id NOT IN ( SELECT album_id FROM album )) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`); INSERT INTO analysis (`key`, `value`) VALUES ('albumWaiting', (SELECT count(*) as count FROM wait_fetch_album) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`); INSERT INTO analysis (`key`, `value`) VALUES ('artistCount', (SELECT count(*) AS count FROM artist) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`); --- INSERT INTO analysis (`key`, `value`) VALUES ('artistWaiting', (SELECT count( DISTINCT artist_id ) as count FROM song_artist_relation WHERE artist_id NOT IN ( SELECT artist_id FROM artist )) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`); INSERT INTO analysis (`key`, `value`) VALUES ('artistWaiting', (SELECT count(*) as count FROM wait_fetch_artist) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`); INSERT INTO analysis (`key`, `value`) VALUES ('lyricCount', (SELECT count(*) AS count FROM lyric) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`); INSERT INTO analysis (`key`, `value`) VALUES ('commentCount', (SELECT count( DISTINCT song_id ) AS count FROM comment) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`); diff --git a/netease_music/sql/structure.sql b/netease_music/sql/structure.sql index 2b19ddd..e338de4 100644 --- a/netease_music/sql/structure.sql +++ b/netease_music/sql/structure.sql @@ -16,10 +16,10 @@ CREATE TABLE `song` ( `image` varchar(200) DEFAULT NULL COMMENT '封面图 http://p1.music.126.net/ 后面的部分', `pub_date` varchar(100) DEFAULT NULL COMMENT '发布日期(弃用)', `pub_time` varchar(100) DEFAULT NULL COMMENT '发布日期 毫秒为单位的Unix时间戳', - `no_copyright_rcmd` varchar(255) DEFAULT NULL COMMENT 'None表示可以播,非空表示无版权', `mv` int(10) unsigned DEFAULT NULL COMMENT '非零表示有MV ID', `single` tinyint(4) DEFAULT NULL COMMENT '0: 有专辑信息或者是DJ节目 1: 未知专辑', `version` int(11) NOT NULL DEFAULT '1' COMMENT '歌曲版本信息', + `no_copyright_rcmd` varchar(255) DEFAULT NULL COMMENT 'None表示可以播,非空表示无版权', `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间', `modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间', `data_version` tinyint(4) NOT NULL DEFAULT '1' COMMENT '数据记录版本(如果有字段调整则整体+1)', diff --git a/netease_music/watch.js b/netease_music/watch.js index 0a26925..f3db298 100644 --- a/netease_music/watch.js +++ b/netease_music/watch.js @@ -13,7 +13,7 @@ const sleepUtils = require('../utils/sleepUtils'); async function main() { do { await neteaseMusic.watch(); - await sleepUtils.sleep(10 * 1000); + keepWatching && await sleepUtils.sleep(10 * 1000); } while (keepWatching) } main(); \ No newline at end of file