更新 netease_music 脚本
This commit is contained in:
@@ -1,10 +0,0 @@
|
|||||||
start cmd /k "node index --utils assistant"
|
|
||||||
|
|
||||||
start cmd /k "node index --utils song"
|
|
||||||
start cmd /k "node index --utils album --limit 10000"
|
|
||||||
start cmd /k "node index --utils artist --limit 10000"
|
|
||||||
start cmd /k "node index --utils comment --limit 10000"
|
|
||||||
start cmd /k "node index --utils lyric --limit 10000"
|
|
||||||
|
|
||||||
@REM start cmd /k "node index --utils playlist"
|
|
||||||
exit
|
|
1
netease_music/manual-script/.gitignore
vendored
Normal file
1
netease_music/manual-script/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
comment id segment.txt
|
@@ -1,9 +1,9 @@
|
|||||||
REPLACE INTO `comment_origin_` SELECT * FROM `comment`; -- 80.1G
|
REPLACE INTO `comment_origin_` SELECT * FROM `comment`; -- 80.1G
|
||||||
REPLACE INTO `user_origin_` SELECT * FROM `user`; -- 5.20G
|
REPLACE INTO `user_origin_` SELECT * FROM `user`; -- 5.20G
|
||||||
|
|
||||||
|
|
||||||
DELETE `comment`
|
DELETE `comment`
|
||||||
FROM `comment` INNER JOIN `comment_origin_` ON `comment`.comment_id = `comment_origin_`.comment_id;
|
FROM `comment` INNER JOIN `comment_origin_` ON `comment`.comment_id = `comment_origin_`.comment_id;
|
||||||
|
|
||||||
DELETE `user`
|
DELETE `user`
|
||||||
FROM `user` INNER JOIN `user_origin_` ON `user`.user_id = `user_origin_`.user_id;
|
FROM `user` INNER JOIN `user_origin_` ON `user`.user_id = `user_origin_`.user_id;
|
@@ -2,20 +2,16 @@
|
|||||||
# cd ./netease_music
|
# cd ./netease_music
|
||||||
cd tools/netease_music/
|
cd tools/netease_music/
|
||||||
|
|
||||||
|
# 【ING】
|
||||||
start cmd /k "node index --utils assistant"
|
start cmd /k "node index --utils assistant"
|
||||||
start cmd /k "node index --utils song"
|
start cmd /k "node index --utils song"
|
||||||
start cmd /k "node index --utils artist --limit 50000"
|
start cmd /k "node index --utils artist --limit 50000"
|
||||||
start cmd /k "node index --utils album --limit 50000"
|
start cmd /k "node index --utils album --limit 50000"
|
||||||
start cmd /k "node index --utils lyric"
|
start cmd /k "node index --utils lyric --limit 10000"
|
||||||
|
# start cmd /k "node index --utils comment --limit 10000"
|
||||||
|
|
||||||
start cmd /k "node index --utils playlist"
|
start cmd /k "node index --utils playlist"
|
||||||
|
|
||||||
exit
|
|
||||||
|
|
||||||
# 把增量数据带上来
|
|
||||||
|
|
||||||
# node index --utils lyric --limit 500 --order desc
|
|
||||||
# node index --utils lyric --limit 500
|
|
||||||
|
|
||||||
# lyric_5
|
# lyric_5
|
||||||
# 【ING】
|
# 【ING】
|
||||||
@@ -99,39 +95,54 @@ node index --utils comment --min 2000000 --max 2500000 --limit 10000
|
|||||||
node index --utils comment --min 2500000 --max 3000000 --limit 10000
|
node index --utils comment --min 2500000 --max 3000000 --limit 10000
|
||||||
node index --utils comment --min 3000000 --max 3500000 --limit 10000
|
node index --utils comment --min 3000000 --max 3500000 --limit 10000
|
||||||
|
|
||||||
# comment_3
|
# comment_3 配置待更新
|
||||||
# 【阿里云ing】
|
# 【阿里云ing】
|
||||||
node index --utils comment --min 3500000 --max 4000000 --limit 10000 &
|
node index --utils comment --min 3500000 --max 4000000 --limit 10000 &
|
||||||
node index --utils comment --min 4000000 --max 4500000 --limit 10000 &
|
node index --utils comment --min 4000000 --max 4500000 --limit 10000 &
|
||||||
node index --utils comment --min 4500000 --max 5000000 --limit 10000 &
|
node index --utils comment --min 4500000 --max 5000000 --limit 10000 &
|
||||||
node index --utils comment --min 5000000 --max 5500000 --limit 10000 &
|
node index --utils comment --min 5000000 --max 5500000 --limit 10000 &
|
||||||
node index --utils comment --min 5500000 --max 6000000 --limit 10000 &
|
# node index --utils comment --min 5500000 --max 6000000 --limit 10000
|
||||||
|
|
||||||
# comment_4
|
# comment_4 配置待更新
|
||||||
# 【阿里云ing】
|
|
||||||
node index --utils comment --min 6000000 --max 6500000 --limit 10000 &
|
|
||||||
node index --utils comment --min 6500000 --max 7000000 --limit 10000 &
|
|
||||||
node index --utils comment --min 7000000 --max 7500000 --limit 10000 &
|
|
||||||
node index --utils comment --min 7500000 --max 8000000 --limit 10000 &
|
|
||||||
|
|
||||||
# comment_5
|
# comment_5 配置待更新
|
||||||
node index --utils comment --min 8000000 --max 8500000 --limit 10000
|
# 【公司电脑ing】
|
||||||
node index --utils comment --min 8500000 --max 9000000 --limit 10000
|
# node index --utils comment --min 6000000 --max 9000000 --limit 10000
|
||||||
node index --utils comment --min 9000000 --max 9500000 --limit 10000
|
node index --utils comment --min 9000000 --max 9500000 --limit 10000
|
||||||
node index --utils comment --min 9500000 --max 10000000 --limit 10000
|
# node index --utils comment --min 9500000 --max 10000000 --limit 10000
|
||||||
|
|
||||||
# comment_n
|
# comment_n
|
||||||
|
# 【公司电脑ing】
|
||||||
node index --utils comment --min 10000000 --max 20000000 --limit 10000
|
node index --utils comment --min 10000000 --max 20000000 --limit 10000
|
||||||
node index --utils comment --min 20000000 --max 30000000 --limit 10000
|
node index --utils comment --min 20000000 --max 30000000 --limit 10000
|
||||||
node index --utils comment --min 30000000 --max 40000000 --limit 10000
|
node index --utils comment --min 30000000 --max 40000000 --limit 10000
|
||||||
# node index --utils comment --min 40000000 --max 50000000 --limit 10000
|
# node index --utils comment --min 40000000 --max 50000000 --limit 10000
|
||||||
node index --utils comment --min 50000000 --max 500000000 --limit 10000
|
node index --utils comment --min 50000000 --max 500000000 --limit 10000
|
||||||
|
|
||||||
# comment_2n
|
# comment_2n_1 配置待更新
|
||||||
node index --utils comment --min 1000000000 --max 1500000000 --limit 10000
|
# 【公司电脑ing】
|
||||||
node index --utils comment --min 1500000000 --max 2000000000 --limit 10000
|
# node index --utils comment --min 1000000000 --max 1100000000 --limit 10000
|
||||||
node index --utils comment --min 2000000000 --max 2500000000 --limit 10000
|
# node index --utils comment --min 1100000000 --max 1200000000 --limit 10000
|
||||||
node index --utils comment --min 2500000000 --limit 10000
|
node index --utils comment --min 1200000000 --max 1300000000 --limit 10000
|
||||||
|
node index --utils comment --min 1300000000 --max 1400000000 --limit 10000
|
||||||
|
node index --utils comment --min 1400000000 --max 1500000000 --limit 10000
|
||||||
|
|
||||||
|
# comment_2n_2 配置待更新
|
||||||
|
# 【手机ing】
|
||||||
|
node index --utils comment --min 1500000000 --max 1600000000 --limit 10000 &
|
||||||
|
node index --utils comment --min 1600000000 --max 1700000000 --limit 10000 &
|
||||||
|
node index --utils comment --min 1700000000 --max 1800000000 --limit 10000 &
|
||||||
|
node index --utils comment --min 1800000000 --max 1900000000 --limit 10000 &
|
||||||
|
node index --utils comment --min 1900000000 --max 2000000000 --limit 10000 &
|
||||||
|
|
||||||
|
# comment_2n_3 配置待更新
|
||||||
|
# 【阿里云ing】
|
||||||
|
node index --utils comment --min 2000000000 --max 2100000000 --limit 10000 &
|
||||||
|
node index --utils comment --min 2100000000 --max 2200000000 --limit 10000 &
|
||||||
|
# node index --utils comment --min 2200000000 --max 2300000000 --limit 10000
|
||||||
|
# node index --utils comment --min 2300000000 --max 2400000000 --limit 10000
|
||||||
|
# node index --utils comment --min 2400000000 --max 2500000000 --limit 10000
|
||||||
|
# node index --utils comment --min 2500000000 --limit 10000
|
||||||
|
|
||||||
# # 待整理 2000000 - 1999000000
|
# # 待整理 2000000 - 1999000000
|
||||||
# start cmd /k "node index --utils comment --limit 10000 --min --max " #
|
# start cmd /k "node index --utils comment --limit 10000 --min --max " #
|
@@ -1,80 +1,103 @@
|
|||||||
let a = `1990000000
|
// -- 查看需要爬取的 comment 的分布
|
||||||
1980000000
|
// SELECT cast( FLOOR( song_id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
|
||||||
1970000000
|
// FROM comment_progress
|
||||||
1960000000
|
// WHERE current_status != 2
|
||||||
1950000000
|
// GROUP BY s
|
||||||
1940000000
|
// ORDER BY s DESC;
|
||||||
1930000000
|
|
||||||
1920000000
|
// 变量 a 为通过执行以上SQL获取的分段
|
||||||
1910000000
|
let a = `2110000000
|
||||||
1900000000
|
2100000000
|
||||||
1890000000
|
2090000000
|
||||||
1880000000
|
2080000000
|
||||||
1870000000
|
2070000000
|
||||||
1860000000
|
2060000000
|
||||||
1850000000
|
2050000000
|
||||||
1840000000
|
2040000000
|
||||||
1830000000
|
2030000000
|
||||||
1820000000
|
2020000000
|
||||||
1810000000
|
2010000000
|
||||||
1800000000
|
2000000000
|
||||||
1500000000
|
1990000000
|
||||||
1490000000
|
1980000000
|
||||||
1480000000
|
1970000000
|
||||||
1470000000
|
1960000000
|
||||||
1460000000
|
1950000000
|
||||||
1450000000
|
1940000000
|
||||||
1440000000
|
1930000000
|
||||||
1430000000
|
1920000000
|
||||||
1420000000
|
1910000000
|
||||||
1410000000
|
1900000000
|
||||||
1400000000
|
1890000000
|
||||||
1390000000
|
1880000000
|
||||||
1380000000
|
1870000000
|
||||||
1370000000
|
1860000000
|
||||||
1360000000
|
1850000000
|
||||||
1350000000
|
1840000000
|
||||||
1340000000
|
1830000000
|
||||||
1330000000
|
1820000000
|
||||||
1320000000
|
1810000000
|
||||||
1310000000
|
1800000000
|
||||||
1300000000
|
1500000000
|
||||||
1290000000
|
1490000000
|
||||||
860000000
|
1480000000
|
||||||
570000000
|
1470000000
|
||||||
560000000
|
1460000000
|
||||||
550000000
|
1450000000
|
||||||
540000000
|
1440000000
|
||||||
530000000
|
1430000000
|
||||||
520000000
|
1420000000
|
||||||
510000000
|
1410000000
|
||||||
500000000
|
1400000000
|
||||||
490000000
|
1390000000
|
||||||
480000000
|
1380000000
|
||||||
470000000
|
1370000000
|
||||||
460000000
|
1360000000
|
||||||
450000000
|
1350000000
|
||||||
440000000
|
1340000000
|
||||||
430000000
|
1330000000
|
||||||
420000000
|
1320000000
|
||||||
410000000
|
1310000000
|
||||||
400000000
|
1300000000
|
||||||
390000000
|
1290000000
|
||||||
40000000
|
860000000
|
||||||
30000000
|
570000000
|
||||||
20000000
|
560000000
|
||||||
10000000
|
550000000
|
||||||
0`
|
540000000
|
||||||
|
530000000
|
||||||
const splitCount = 1000
|
520000000
|
||||||
const step = 10000000 / splitCount
|
510000000
|
||||||
|
500000000
|
||||||
let b = []
|
490000000
|
||||||
a.split('\n')
|
480000000
|
||||||
.map(i => Number(i))
|
470000000
|
||||||
.forEach(n => {
|
460000000
|
||||||
for (let i = splitCount; i > 0; i--) {
|
450000000
|
||||||
b.push(Number(n) + (i - 1) * step)
|
440000000
|
||||||
}
|
430000000
|
||||||
});
|
420000000
|
||||||
console.log(b.join('\n'))
|
410000000
|
||||||
|
400000000
|
||||||
|
390000000
|
||||||
|
30000000
|
||||||
|
20000000
|
||||||
|
10000000
|
||||||
|
0`
|
||||||
|
|
||||||
|
const splitCount = 1
|
||||||
|
const step = 10000000 / splitCount
|
||||||
|
|
||||||
|
let b = []
|
||||||
|
a.split('\n')
|
||||||
|
.map(i => Number(i))
|
||||||
|
.forEach(n => {
|
||||||
|
for (let i = splitCount; i > 0; i--) {
|
||||||
|
b.push(Number(n) + (i - 1) * step)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
let content = b.join('\n')
|
||||||
|
// console.log(content)
|
||||||
|
|
||||||
|
const fs = require('fs')
|
||||||
|
fs.writeFileSync('comment id segment.txt', content, 'utf-8')
|
@@ -1,136 +1,147 @@
|
|||||||
-- 更新统计数据
|
-- 更新统计数据
|
||||||
-- songCount 容易超时,有几张表查询时容易发生死锁,所以请在没有爬取时进行统计
|
-- songCount 容易超时,有几张表查询时容易发生死锁,所以请在没有爬取时进行统计
|
||||||
-- 4G: 4294967296 (4 * 1024 * 1024 * 1024) 64M: 67108864
|
-- 4G: 4294967296 (4 * 1024 * 1024 * 1024) 64M: 67108864
|
||||||
-- my.ini 配置文件中设置 innodb_buffer_pool_size=4G
|
-- my.ini 配置文件中设置 innodb_buffer_pool_size=4G
|
||||||
show variables like "%innodb_buffer_pool_size%";
|
show variables like "%innodb_buffer_pool_size%";
|
||||||
DELETE FROM analysis WHERE `key` LIKE '%_old';
|
DELETE FROM analysis WHERE `key` LIKE '%_old';
|
||||||
UPDATE analysis SET `key`=concat(`key`,'_old'), modify_time=modify_time WHERE `key` NOT LIKE '%_old';
|
UPDATE analysis SET `key`=concat(`key`,'_old'), modify_time=modify_time WHERE `key` NOT LIKE '%_old';
|
||||||
INSERT INTO analysis (`key`, `value`) VALUES ('songCount', (SELECT count(*) as count FROM song) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
INSERT INTO analysis (`key`, `value`) VALUES ('songCount', (SELECT count(*) as count FROM song) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
||||||
INSERT INTO analysis (`key`, `value`) VALUES ('songWaiting', (SELECT count(*) as count FROM wait_fetch_song) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
INSERT INTO analysis (`key`, `value`) VALUES ('songWaiting', (SELECT count(*) as count FROM wait_fetch_song) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
||||||
INSERT INTO analysis (`key`, `value`) VALUES ('playlistCount', (SELECT count(*) AS count FROM playlist) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
INSERT INTO analysis (`key`, `value`) VALUES ('playlistCount', (SELECT count(*) AS count FROM playlist) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
||||||
INSERT INTO analysis (`key`, `value`) VALUES ('albumCount', (SELECT count(*) as count FROM album) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
INSERT INTO analysis (`key`, `value`) VALUES ('albumCount', (SELECT count(*) as count FROM album) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
||||||
INSERT INTO analysis (`key`, `value`) VALUES ('albumWaiting', (SELECT count(*) as count FROM wait_fetch_album) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
INSERT INTO analysis (`key`, `value`) VALUES ('albumWaiting', (SELECT count(*) as count FROM wait_fetch_album) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
||||||
INSERT INTO analysis (`key`, `value`) VALUES ('artistCount', (SELECT count(*) AS count FROM artist) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
INSERT INTO analysis (`key`, `value`) VALUES ('artistCount', (SELECT count(*) AS count FROM artist) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
||||||
INSERT INTO analysis (`key`, `value`) VALUES ('artistWaiting', (SELECT count(*) as count FROM wait_fetch_artist) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
INSERT INTO analysis (`key`, `value`) VALUES ('artistWaiting', (SELECT count(*) as count FROM wait_fetch_artist) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
||||||
INSERT INTO analysis (`key`, `value`) VALUES ('lyricCount', (SELECT count(*) AS count FROM lyric) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
INSERT INTO analysis (`key`, `value`) VALUES ('lyricCount', (SELECT count(*) AS count FROM lyric) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
||||||
INSERT INTO analysis (`key`, `value`) VALUES ('commentCount', (SELECT count( DISTINCT song_id ) AS count FROM comment) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
INSERT INTO analysis (`key`, `value`) VALUES ('commentCount', (SELECT count( DISTINCT song_id ) AS count FROM comment) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
||||||
INSERT INTO analysis (`key`, `value`) VALUES ('commentTotalCount', (SELECT count(*) AS count FROM comment) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
INSERT INTO analysis (`key`, `value`) VALUES ('commentTotalCount', (SELECT count(*) AS count FROM comment) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
||||||
INSERT INTO analysis (`key`, `value`) VALUES ('userCount', (SELECT count(*) AS count FROM user) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
INSERT INTO analysis (`key`, `value`) VALUES ('userCount', (SELECT count(*) AS count FROM user) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
||||||
INSERT INTO analysis (`key`, `value`) VALUES ('songPlaylistCount', (SELECT count(*) AS count FROM song_playlist_relation) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
INSERT INTO analysis (`key`, `value`) VALUES ('songPlaylistCount', (SELECT count(*) AS count FROM song_playlist_relation) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
||||||
INSERT INTO analysis (`key`, `value`) VALUES ('songAlbumCount', (SELECT count(*) AS count FROM song_album_relation) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
INSERT INTO analysis (`key`, `value`) VALUES ('songAlbumCount', (SELECT count(*) AS count FROM song_album_relation) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
||||||
INSERT INTO analysis (`key`, `value`) VALUES ('songArtistCount', (SELECT count(*) AS count FROM song_artist_relation) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
INSERT INTO analysis (`key`, `value`) VALUES ('songArtistCount', (SELECT count(*) AS count FROM song_artist_relation) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
-- 更新后初次全表扫描
|
-- 更新后初次全表扫描
|
||||||
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00';
|
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00';
|
||||||
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00';
|
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00';
|
||||||
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_playlist_relation WHERE create_time > '2022-10-28 00:00:00';
|
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_playlist_relation WHERE create_time > '2022-10-28 00:00:00';
|
||||||
|
|
||||||
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00';
|
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00';
|
||||||
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00';
|
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00';
|
||||||
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song_playlist_relation WHERE create_time > '2022-10-28 00:00:00';
|
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song_playlist_relation WHERE create_time > '2022-10-28 00:00:00';
|
||||||
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song WHERE create_time > '2022-10-28 00:00:00';
|
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song WHERE create_time > '2022-10-28 00:00:00';
|
||||||
|
|
||||||
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00';
|
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00';
|
||||||
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00';
|
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00';
|
||||||
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song_playlist_relation WHERE create_time > '2022-10-28 00:00:00';
|
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song_playlist_relation WHERE create_time > '2022-10-28 00:00:00';
|
||||||
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song WHERE create_time > '2022-10-28 00:00:00';
|
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song WHERE create_time > '2022-10-28 00:00:00';
|
||||||
|
|
||||||
INSERT IGNORE INTO wait_check_artist (id) SELECT artist_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00';
|
INSERT IGNORE INTO wait_check_artist (id) SELECT artist_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00';
|
||||||
|
|
||||||
INSERT IGNORE INTO wait_check_album (id) SELECT album_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00';
|
INSERT IGNORE INTO wait_check_album (id) SELECT album_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00';
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
-- 全量更新
|
-- 全量更新
|
||||||
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT song_id FROM song );
|
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT song_id FROM song );
|
||||||
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT song_id FROM song );
|
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT song_id FROM song );
|
||||||
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_playlist_relation WHERE song_id NOT IN ( SELECT song_id FROM song );
|
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_playlist_relation WHERE song_id NOT IN ( SELECT song_id FROM song );
|
||||||
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song WHERE song_id NOT IN ( SELECT song_id FROM lyric );
|
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song WHERE song_id NOT IN ( SELECT song_id FROM lyric );
|
||||||
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song WHERE song_id NOT IN ( SELECT song_id FROM comment_progress );
|
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song WHERE song_id NOT IN ( SELECT song_id FROM comment_progress );
|
||||||
INSERT IGNORE INTO wait_check_artist (id) SELECT artist_id FROM song_artist_relation WHERE artist_id NOT IN ( SELECT artist_id FROM artist );
|
INSERT IGNORE INTO wait_check_artist (id) SELECT artist_id FROM song_artist_relation WHERE artist_id NOT IN ( SELECT artist_id FROM artist );
|
||||||
INSERT IGNORE INTO wait_check_album (id) SELECT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT album_id FROM album );
|
INSERT IGNORE INTO wait_check_album (id) SELECT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT album_id FROM album );
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
-- 查看需要爬取的 song 的分布
|
-- 查看需要爬取的 song 的分布
|
||||||
SELECT cast( FLOOR( id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
|
SELECT cast( FLOOR( id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
|
||||||
FROM wait_fetch_song
|
FROM wait_fetch_song
|
||||||
GROUP BY s
|
GROUP BY s
|
||||||
ORDER BY s DESC;
|
ORDER BY s DESC;
|
||||||
|
|
||||||
-- 查看需要爬取的 album 的分布
|
-- 查看需要爬取的 album 的分布
|
||||||
SELECT cast( FLOOR( id / 1000000 ) * 1000000 as UNSIGNED ) as s, count(*) as count
|
SELECT cast( FLOOR( id / 1000000 ) * 1000000 as UNSIGNED ) as s, count(*) as count
|
||||||
FROM wait_fetch_album
|
FROM wait_fetch_album
|
||||||
GROUP BY s
|
GROUP BY s
|
||||||
ORDER BY s DESC;
|
ORDER BY s DESC;
|
||||||
|
|
||||||
-- 查看需要爬取的 artist 的分布
|
-- 查看需要爬取的 artist 的分布
|
||||||
SELECT cast( FLOOR(id / 100000 ) * 100000 as UNSIGNED ) as s, count(*) as count
|
SELECT cast( FLOOR(id / 100000 ) * 100000 as UNSIGNED ) as s, count(*) as count
|
||||||
FROM wait_fetch_artist
|
FROM wait_fetch_artist
|
||||||
GROUP BY s
|
GROUP BY s
|
||||||
ORDER BY s DESC;
|
ORDER BY s DESC;
|
||||||
|
|
||||||
-- 查看需要爬取的 comment 的分布
|
-- 查看需要爬取的 comment 的分布
|
||||||
SELECT cast( FLOOR( song_id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
|
SELECT cast( FLOOR( song_id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
|
||||||
FROM comment_progress
|
FROM comment_progress
|
||||||
WHERE current_status != 2
|
WHERE current_status != 2
|
||||||
GROUP BY s
|
GROUP BY s
|
||||||
ORDER BY s DESC;
|
ORDER BY s DESC;
|
||||||
|
|
||||||
-- 查看需要爬取的 lyric 的分布
|
-- 查看需要爬取的 lyric 的分布
|
||||||
SELECT cast( FLOOR( id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
|
SELECT cast( FLOOR( id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
|
||||||
FROM wait_fetch_lyric
|
FROM wait_fetch_lyric
|
||||||
GROUP BY s
|
GROUP BY s
|
||||||
ORDER BY s DESC;
|
ORDER BY s DESC;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
-- 查看本地已有 song 的分布
|
-- 查看本地已有 song 的分布
|
||||||
SELECT cast( FLOOR( song_id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
|
SELECT cast( FLOOR( song_id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
|
||||||
FROM song
|
FROM song
|
||||||
GROUP BY s
|
GROUP BY s
|
||||||
ORDER BY s DESC;
|
ORDER BY s DESC;
|
||||||
|
|
||||||
-- 查看本地已有 user 的分布
|
-- 查看本地已有 user 的分布
|
||||||
SELECT cast( FLOOR( user_id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
|
SELECT cast( FLOOR( user_id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
|
||||||
FROM user
|
FROM user
|
||||||
GROUP BY s
|
GROUP BY s
|
||||||
ORDER BY s DESC;
|
ORDER BY s DESC;
|
||||||
|
|
||||||
-- 查看本地已有 album 的分布
|
-- 查看本地已有 album 的分布
|
||||||
SELECT cast( FLOOR( album_id / 1000000 ) * 1000000 as UNSIGNED ) as s, count(*) as count
|
SELECT cast( FLOOR( album_id / 1000000 ) * 1000000 as UNSIGNED ) as s, count(*) as count
|
||||||
FROM album
|
FROM album
|
||||||
GROUP BY s
|
GROUP BY s
|
||||||
ORDER BY s DESC;
|
ORDER BY s DESC;
|
||||||
|
|
||||||
-- 查看本地已有 artist 的分布
|
-- 查看本地已有 artist 的分布
|
||||||
SELECT cast( FLOOR( artist_id / 2000000 ) * 2000000 as UNSIGNED ) as s, count(*) as count
|
SELECT cast( FLOOR( artist_id / 2000000 ) * 2000000 as UNSIGNED ) as s, count(*) as count
|
||||||
FROM artist
|
FROM artist
|
||||||
GROUP BY s
|
GROUP BY s
|
||||||
ORDER BY s DESC;
|
ORDER BY s DESC;
|
||||||
|
|
||||||
-- 查看本地已有 playlist 的分布
|
-- 查看本地已有 playlist 的分布
|
||||||
SELECT cast( FLOOR( playlist_id / 2000000 ) * 2000000 as UNSIGNED ) as s, count(*) as count
|
SELECT cast( FLOOR( playlist_id / 2000000 ) * 2000000 as UNSIGNED ) as s, count(*) as count
|
||||||
FROM playlist
|
FROM playlist
|
||||||
GROUP BY s
|
GROUP BY s
|
||||||
ORDER BY s DESC;
|
ORDER BY s DESC;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
-- 查询单个数据库里面各个表所占磁盘空间大小包括其索引的大小
|
-- 查询单个数据库里面各个表所占磁盘空间大小包括其索引的大小
|
||||||
SELECT
|
SELECT
|
||||||
table_schema AS '数据库',
|
table_schema AS '数据库',
|
||||||
table_name AS '表名',
|
table_name AS '表名',
|
||||||
table_rows AS '记录数',
|
table_rows AS '记录数',
|
||||||
TRUNCATE (data_length / 1024 / 1024, 2) AS '数据容量(MB)',
|
TRUNCATE (data_length / 1024 / 1024, 2) AS '数据容量(MB)',
|
||||||
TRUNCATE (index_length / 1024 / 1024, 2) AS '索引容量(MB)',
|
TRUNCATE (index_length / 1024 / 1024, 2) AS '索引容量(MB)',
|
||||||
TRUNCATE ((data_length + index_length) / 1024 / 1024 / 1024, 2) AS '总容量(GB)'
|
TRUNCATE ((data_length + index_length) / 1024 / 1024 / 1024, 2) AS '总容量(GB)'
|
||||||
FROM
|
FROM
|
||||||
information_schema.TABLES
|
information_schema.TABLES
|
||||||
WHERE
|
WHERE
|
||||||
table_schema = 'neteasemusic'
|
table_schema = 'neteasemusic'
|
||||||
ORDER BY
|
ORDER BY
|
||||||
table_rows DESC;
|
table_rows DESC;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-- 统计等待爬取的数据条数 2023.12.25
|
||||||
|
SELECT 'comment' as wait_fetch, count(*) as `count` FROM `comment_progress` where current_status = 0
|
||||||
|
UNION ALL
|
||||||
|
SELECT 'album', count(*) FROM `wait_fetch_album`
|
||||||
|
UNION ALL
|
||||||
|
SELECT 'artist', count(*) FROM `wait_fetch_artist`
|
||||||
|
UNION ALL
|
||||||
|
SELECT 'lyric', count(*) FROM `wait_fetch_lyric`
|
@@ -1,74 +1,79 @@
|
|||||||
windows服务器
|
windows服务器
|
||||||
cd C:\Users\Administrator\Desktop\tools\netease_music
|
cd C:\Users\Administrator\Desktop\tools\netease_music
|
||||||
|
|
||||||
linux服务器
|
linux服务器
|
||||||
cd /www/neteasemusic/tools
|
cd /www/neteasemusic/tools
|
||||||
|
|
||||||
|
手机 Termux
|
||||||
本地库测试
|
pkg update
|
||||||
node index --database neteasemusic_develop --utils song
|
pkg install git
|
||||||
node index --database neteasemusic_develop --utils album --min 10000000
|
pkg install nodejs
|
||||||
node index --database neteasemusic_develop --utils album --order desc
|
|
||||||
node index --database neteasemusic_develop --utils artist
|
|
||||||
node index --database neteasemusic_develop --utils playlist
|
本地库测试
|
||||||
node index --database neteasemusic_develop --utils comment --limit 10000
|
node index --database neteasemusic_develop --utils song
|
||||||
node index --database neteasemusic_develop --utils lyric
|
node index --database neteasemusic_develop --utils album --min 10000000
|
||||||
node index --database neteasemusic_develop --utils assistant
|
node index --database neteasemusic_develop --utils album --order desc
|
||||||
|
node index --database neteasemusic_develop --utils artist
|
||||||
|
node index --database neteasemusic_develop --utils playlist
|
||||||
|
node index --database neteasemusic_develop --utils comment --limit 10000
|
||||||
思路:
|
node index --database neteasemusic_develop --utils lyric
|
||||||
通过一首歌,查出对应的artist和album,然后顺藤摸瓜查出网易云的其他song, album, artist, lyric, comment等
|
node index --database neteasemusic_develop --utils assistant
|
||||||
|
|
||||||
插入rel表的时候同时插入 wait_check_xx 表,然后后续检查这个表,如果不存在,那么就插入对应的 wait_fetch_xxx 表
|
|
||||||
之后查出 wait_fetch_xxx 表,进行数据拉取,形成闭环
|
|
||||||
|
思路:
|
||||||
|
通过一首歌,查出对应的artist和album,然后顺藤摸瓜查出网易云的其他song, album, artist, lyric, comment等
|
||||||
|
|
||||||
后期:
|
插入rel表的时候同时插入 wait_check_xx 表,然后后续检查这个表,如果不存在,那么就插入对应的 wait_fetch_xxx 表
|
||||||
歌单定时更新(rel表中添加一个del字段,先将歌单下面的全部置为删除状态,再插入的时候把已有歌曲的标记重新修改为正常状态)
|
之后查出 wait_fetch_xxx 表,进行数据拉取,形成闭环
|
||||||
|
|
||||||
评论的更新
|
|
||||||
|
|
||||||
被删除的aritst和album回头再通过其他表中的数据反查回来
|
后期:
|
||||||
|
歌单定时更新(rel表中添加一个del字段,先将歌单下面的全部置为删除状态,再插入的时候把已有歌曲的标记重新修改为正常状态)
|
||||||
歌曲目前爬取之后,会有一部分没有image封面,还是需要用旧方法爬取到
|
|
||||||
|
评论的更新
|
||||||
|
|
||||||
|
被删除的aritst和album回头再通过其他表中的数据反查回来
|
||||||
说明:
|
|
||||||
song表中data_version=1的音乐是第一次爬取的时候存在,但是后面再爬取时不存在的音乐
|
歌曲目前爬取之后,会有一部分没有image封面,还是需要用旧方法爬取到
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
后续分区(不能在现有表上修改,只能重新查出数据到新表)
|
说明:
|
||||||
alter table song add partition (
|
song表中data_version=1的音乐是第一次爬取的时候存在,但是后面再爬取时不存在的音乐
|
||||||
PARTITION p1 VALUES LESS THAN ( 50000000),
|
|
||||||
PARTITION p2 VALUES LESS THAN (1000000000),
|
|
||||||
PARTITION p3 VALUES LESS THAN (1500000000),
|
|
||||||
PARTITION p4 VALUES LESS THAN (2000000000),
|
后续分区(不能在现有表上修改,只能重新查出数据到新表)
|
||||||
PARTITION p5 VALUES LESS THAN MAXVALUE
|
alter table song add partition (
|
||||||
);
|
PARTITION p1 VALUES LESS THAN ( 50000000),
|
||||||
|
PARTITION p2 VALUES LESS THAN (1000000000),
|
||||||
|
PARTITION p3 VALUES LESS THAN (1500000000),
|
||||||
|
PARTITION p4 VALUES LESS THAN (2000000000),
|
||||||
SQL文件说明
|
PARTITION p5 VALUES LESS THAN MAXVALUE
|
||||||
sql/structure.sql 中的SQL为最简,不包含字段的编码集
|
);
|
||||||
sql/neteasemusic.sql 中的SQL为数据库导出,包含字段的编码集
|
|
||||||
项目数据库 CHARACTER SET 统一使用 'utf8mb4',COLLATE 统一使用 'utf8mb4_general_ci'
|
|
||||||
|
|
||||||
|
SQL文件说明
|
||||||
|
sql/structure.sql 中的SQL为最简,不包含字段的编码集
|
||||||
|
sql/neteasemusic.sql 中的SQL为数据库导出,包含字段的编码集
|
||||||
|
项目数据库 CHARACTER SET 统一使用 'utf8mb4',COLLATE 统一使用 'utf8mb4_general_ci'
|
||||||
# # 查看列表
|
|
||||||
# screen -ls
|
|
||||||
|
|
||||||
# # 创建一个screen
|
|
||||||
# screen + <Enter>
|
|
||||||
|
# # 查看列表
|
||||||
# # 切换到指定屏幕
|
# screen -ls
|
||||||
# screen -r <screen_id>
|
|
||||||
|
# # 创建一个screen
|
||||||
# # 切出屏幕
|
# screen + <Enter>
|
||||||
|
|
||||||
|
# # 切换到指定屏幕
|
||||||
|
# screen -r <screen_id>
|
||||||
|
|
||||||
|
# # 切出屏幕
|
||||||
# Ctrl + A D
|
# Ctrl + A D
|
@@ -1,40 +1,40 @@
|
|||||||
|
|
||||||
// const mysql = require('mysql');
|
// const mysql = require('mysql');
|
||||||
// await new Promise(function (resolve, reject) {
|
// await new Promise(function (resolve, reject) {
|
||||||
// //通过MySQL中方法创建连接对象
|
// //通过MySQL中方法创建连接对象
|
||||||
// var connection = mysql.createConnection({
|
// var connection = mysql.createConnection({
|
||||||
// "charset": "utf8mb4",
|
// "charset": "utf8mb4",
|
||||||
// "host": "localhost",
|
// "host": "localhost",
|
||||||
// "user": "root",
|
// "user": "root",
|
||||||
// "password": "123456",
|
// "password": "123456",
|
||||||
// "port": 3306,
|
// "port": 3306,
|
||||||
// "database": ""
|
// "database": ""
|
||||||
// });
|
// });
|
||||||
// //开始连接
|
// //开始连接
|
||||||
// connection.connect();
|
// connection.connect();
|
||||||
// var sql = `
|
// var sql = `
|
||||||
// INSERT INTO comment ( comment_id, parent_comment_id, user_id, song_id, content, time, like_count, comment_type ) VALUES ?
|
// INSERT INTO comment ( comment_id, parent_comment_id, user_id, song_id, content, time, like_count, comment_type ) VALUES ?
|
||||||
// ON DUPLICATE KEY UPDATE content = VALUES(content), like_count = VALUES(like_count), comment_type = GREATEST(comment_type, VALUES(comment_type)), modify_time = CURRENT_TIMESTAMP
|
// ON DUPLICATE KEY UPDATE content = VALUES(content), like_count = VALUES(like_count), comment_type = GREATEST(comment_type, VALUES(comment_type)), modify_time = CURRENT_TIMESTAMP
|
||||||
// `;
|
// `;
|
||||||
// var params = commentInfoList.map(commentInfo => [
|
// var params = commentInfoList.map(commentInfo => [
|
||||||
// commentInfo.comment_id,
|
// commentInfo.comment_id,
|
||||||
// commentInfo.parent_comment_id,
|
// commentInfo.parent_comment_id,
|
||||||
// commentInfo.user_id,
|
// commentInfo.user_id,
|
||||||
// commentInfo.song_id,
|
// commentInfo.song_id,
|
||||||
// commentInfo.content,
|
// commentInfo.content,
|
||||||
// commentInfo.time,
|
// commentInfo.time,
|
||||||
// commentInfo.like_count,
|
// commentInfo.like_count,
|
||||||
// commentInfo.comment_type
|
// commentInfo.comment_type
|
||||||
// ]);
|
// ]);
|
||||||
// var formattedSql = connection.format(sql, [params]); // 返回一个格式化后的SQL字符串
|
// var formattedSql = connection.format(sql, [params]); // 返回一个格式化后的SQL字符串
|
||||||
// console.log(params); // 打印原始SQL语句
|
// console.log(params); // 打印原始SQL语句
|
||||||
// console.log(formattedSql); // 打印原始SQL语句
|
// console.log(formattedSql); // 打印原始SQL语句
|
||||||
// //最后需要关闭连接
|
// //最后需要关闭连接
|
||||||
// connection.end();
|
// connection.end();
|
||||||
// });
|
// });
|
||||||
// process.exit(0);
|
// process.exit(0);
|
||||||
|
|
||||||
|
|
||||||
// node index --utils comment --min 1935500000 --max 1935550000 --limit 10
|
// node index --utils comment --min 1935500000 --max 1935550000 --limit 10
|
||||||
|
|
||||||
|
|
Reference in New Issue
Block a user