2023-12-25 16:46:39 +08:00
|
|
|
-- 更新统计数据
|
|
|
|
-- songCount 容易超时,有几张表查询时容易发生死锁,所以请在没有爬取时进行统计
|
|
|
|
-- 4G: 4294967296 (4 * 1024 * 1024 * 1024) 64M: 67108864
|
|
|
|
-- my.ini 配置文件中设置 innodb_buffer_pool_size=4G
|
|
|
|
show variables like "%innodb_buffer_pool_size%";
|
|
|
|
DELETE FROM analysis WHERE `key` LIKE '%_old';
|
|
|
|
UPDATE analysis SET `key`=concat(`key`,'_old'), modify_time=modify_time WHERE `key` NOT LIKE '%_old';
|
|
|
|
INSERT INTO analysis (`key`, `value`) VALUES ('songCount', (SELECT count(*) as count FROM song) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
|
|
|
INSERT INTO analysis (`key`, `value`) VALUES ('songWaiting', (SELECT count(*) as count FROM wait_fetch_song) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
|
|
|
INSERT INTO analysis (`key`, `value`) VALUES ('playlistCount', (SELECT count(*) AS count FROM playlist) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
|
|
|
INSERT INTO analysis (`key`, `value`) VALUES ('albumCount', (SELECT count(*) as count FROM album) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
|
|
|
INSERT INTO analysis (`key`, `value`) VALUES ('albumWaiting', (SELECT count(*) as count FROM wait_fetch_album) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
|
|
|
INSERT INTO analysis (`key`, `value`) VALUES ('artistCount', (SELECT count(*) AS count FROM artist) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
|
|
|
INSERT INTO analysis (`key`, `value`) VALUES ('artistWaiting', (SELECT count(*) as count FROM wait_fetch_artist) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
|
|
|
INSERT INTO analysis (`key`, `value`) VALUES ('lyricCount', (SELECT count(*) AS count FROM lyric) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
|
|
|
INSERT INTO analysis (`key`, `value`) VALUES ('commentCount', (SELECT count( DISTINCT song_id ) AS count FROM comment) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
|
|
|
INSERT INTO analysis (`key`, `value`) VALUES ('commentTotalCount', (SELECT count(*) AS count FROM comment) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
|
|
|
INSERT INTO analysis (`key`, `value`) VALUES ('userCount', (SELECT count(*) AS count FROM user) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
|
|
|
INSERT INTO analysis (`key`, `value`) VALUES ('songPlaylistCount', (SELECT count(*) AS count FROM song_playlist_relation) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
|
|
|
INSERT INTO analysis (`key`, `value`) VALUES ('songAlbumCount', (SELECT count(*) AS count FROM song_album_relation) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
|
|
|
INSERT INTO analysis (`key`, `value`) VALUES ('songArtistCount', (SELECT count(*) AS count FROM song_artist_relation) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-- 更新后初次全表扫描
|
|
|
|
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00';
|
|
|
|
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00';
|
|
|
|
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_playlist_relation WHERE create_time > '2022-10-28 00:00:00';
|
|
|
|
|
|
|
|
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00';
|
|
|
|
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00';
|
|
|
|
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song_playlist_relation WHERE create_time > '2022-10-28 00:00:00';
|
|
|
|
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song WHERE create_time > '2022-10-28 00:00:00';
|
|
|
|
|
|
|
|
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00';
|
|
|
|
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00';
|
|
|
|
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song_playlist_relation WHERE create_time > '2022-10-28 00:00:00';
|
|
|
|
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song WHERE create_time > '2022-10-28 00:00:00';
|
|
|
|
|
|
|
|
INSERT IGNORE INTO wait_check_artist (id) SELECT artist_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00';
|
|
|
|
|
|
|
|
INSERT IGNORE INTO wait_check_album (id) SELECT album_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-- 全量更新
|
|
|
|
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT song_id FROM song );
|
|
|
|
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT song_id FROM song );
|
|
|
|
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_playlist_relation WHERE song_id NOT IN ( SELECT song_id FROM song );
|
|
|
|
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song WHERE song_id NOT IN ( SELECT song_id FROM lyric );
|
|
|
|
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song WHERE song_id NOT IN ( SELECT song_id FROM comment_progress );
|
|
|
|
INSERT IGNORE INTO wait_check_artist (id) SELECT artist_id FROM song_artist_relation WHERE artist_id NOT IN ( SELECT artist_id FROM artist );
|
|
|
|
INSERT IGNORE INTO wait_check_album (id) SELECT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT album_id FROM album );
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-- 查看需要爬取的 song 的分布
|
|
|
|
SELECT cast( FLOOR( id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
|
|
|
|
FROM wait_fetch_song
|
|
|
|
GROUP BY s
|
|
|
|
ORDER BY s DESC;
|
|
|
|
|
|
|
|
-- 查看需要爬取的 album 的分布
|
|
|
|
SELECT cast( FLOOR( id / 1000000 ) * 1000000 as UNSIGNED ) as s, count(*) as count
|
|
|
|
FROM wait_fetch_album
|
|
|
|
GROUP BY s
|
|
|
|
ORDER BY s DESC;
|
|
|
|
|
|
|
|
-- 查看需要爬取的 artist 的分布
|
|
|
|
SELECT cast( FLOOR(id / 100000 ) * 100000 as UNSIGNED ) as s, count(*) as count
|
|
|
|
FROM wait_fetch_artist
|
|
|
|
GROUP BY s
|
|
|
|
ORDER BY s DESC;
|
|
|
|
|
|
|
|
-- 查看需要爬取的 comment 的分布
|
|
|
|
SELECT cast( FLOOR( song_id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
|
|
|
|
FROM comment_progress
|
|
|
|
WHERE current_status != 2
|
|
|
|
GROUP BY s
|
|
|
|
ORDER BY s DESC;
|
|
|
|
|
|
|
|
-- 查看需要爬取的 lyric 的分布
|
|
|
|
SELECT cast( FLOOR( id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
|
|
|
|
FROM wait_fetch_lyric
|
|
|
|
GROUP BY s
|
|
|
|
ORDER BY s DESC;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-- 查看本地已有 song 的分布
|
|
|
|
SELECT cast( FLOOR( song_id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
|
|
|
|
FROM song
|
|
|
|
GROUP BY s
|
|
|
|
ORDER BY s DESC;
|
|
|
|
|
|
|
|
-- 查看本地已有 user 的分布
|
|
|
|
SELECT cast( FLOOR( user_id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
|
|
|
|
FROM user
|
|
|
|
GROUP BY s
|
|
|
|
ORDER BY s DESC;
|
|
|
|
|
|
|
|
-- 查看本地已有 album 的分布
|
|
|
|
SELECT cast( FLOOR( album_id / 1000000 ) * 1000000 as UNSIGNED ) as s, count(*) as count
|
|
|
|
FROM album
|
|
|
|
GROUP BY s
|
|
|
|
ORDER BY s DESC;
|
|
|
|
|
|
|
|
-- 查看本地已有 artist 的分布
|
|
|
|
SELECT cast( FLOOR( artist_id / 2000000 ) * 2000000 as UNSIGNED ) as s, count(*) as count
|
|
|
|
FROM artist
|
|
|
|
GROUP BY s
|
|
|
|
ORDER BY s DESC;
|
|
|
|
|
|
|
|
-- 查看本地已有 playlist 的分布
|
|
|
|
SELECT cast( FLOOR( playlist_id / 2000000 ) * 2000000 as UNSIGNED ) as s, count(*) as count
|
|
|
|
FROM playlist
|
|
|
|
GROUP BY s
|
|
|
|
ORDER BY s DESC;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-- 查询单个数据库里面各个表所占磁盘空间大小包括其索引的大小
|
|
|
|
SELECT
|
|
|
|
table_schema AS '数据库',
|
|
|
|
table_name AS '表名',
|
|
|
|
table_rows AS '记录数',
|
|
|
|
TRUNCATE (data_length / 1024 / 1024, 2) AS '数据容量(MB)',
|
|
|
|
TRUNCATE (index_length / 1024 / 1024, 2) AS '索引容量(MB)',
|
|
|
|
TRUNCATE ((data_length + index_length) / 1024 / 1024 / 1024, 2) AS '总容量(GB)'
|
|
|
|
FROM
|
|
|
|
information_schema.TABLES
|
|
|
|
WHERE
|
|
|
|
table_schema = 'neteasemusic'
|
|
|
|
ORDER BY
|
|
|
|
table_rows DESC;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-- 统计等待爬取的数据条数 2023.12.25
|
|
|
|
SELECT 'comment' as wait_fetch, count(*) as `count` FROM `comment_progress` where current_status = 0
|
|
|
|
UNION ALL
|
|
|
|
SELECT 'album', count(*) FROM `wait_fetch_album`
|
|
|
|
UNION ALL
|
|
|
|
SELECT 'artist', count(*) FROM `wait_fetch_artist`
|
|
|
|
UNION ALL
|
|
|
|
SELECT 'lyric', count(*) FROM `wait_fetch_lyric`
|