2022-10-26 15:33:07 +08:00
-- 更新统计数据
2022-10-31 16:47:47 +08:00
-- songCount 容易超时,有几张表查询时容易发生死锁,所以请在没有爬取时进行统计
DELETE FROM analysis WHERE ` key ` LIKE ' %_old ' ;
UPDATE analysis SET ` key ` = concat ( ` key ` , ' _old ' ) , modify_time = modify_time WHERE ` key ` NOT LIKE ' %_old ' ;
2022-10-18 14:42:39 +08:00
INSERT INTO analysis ( ` key ` , ` value ` ) VALUES ( ' songCount ' , ( SELECT count ( * ) as count FROM song ) ) ON DUPLICATE KEY UPDATE ` value ` = VALUES ( ` value ` ) ;
2022-10-31 16:47:47 +08:00
INSERT INTO analysis ( ` key ` , ` value ` ) VALUES ( ' songWaiting ' , ( SELECT count ( * ) as count FROM wait_fetch_song ) ) ON DUPLICATE KEY UPDATE ` value ` = VALUES ( ` value ` ) ;
2022-10-20 13:38:33 +08:00
INSERT INTO analysis ( ` key ` , ` value ` ) VALUES ( ' playlistCount ' , ( SELECT count ( * ) AS count FROM playlist ) ) ON DUPLICATE KEY UPDATE ` value ` = VALUES ( ` value ` ) ;
2022-10-18 14:42:39 +08:00
INSERT INTO analysis ( ` key ` , ` value ` ) VALUES ( ' albumCount ' , ( SELECT count ( * ) as count FROM album ) ) ON DUPLICATE KEY UPDATE ` value ` = VALUES ( ` value ` ) ;
2022-10-31 16:47:47 +08:00
-- INSERT INTO analysis (`key`, `value`) VALUES ('albumWaiting', (SELECT count( DISTINCT album_id ) as count FROM song_album_relation WHERE album_id NOT IN ( SELECT album_id FROM album )) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis ( ` key ` , ` value ` ) VALUES ( ' albumWaiting ' , ( SELECT count ( * ) as count FROM wait_fetch_album ) ) ON DUPLICATE KEY UPDATE ` value ` = VALUES ( ` value ` ) ;
2022-10-18 14:42:39 +08:00
INSERT INTO analysis ( ` key ` , ` value ` ) VALUES ( ' artistCount ' , ( SELECT count ( * ) AS count FROM artist ) ) ON DUPLICATE KEY UPDATE ` value ` = VALUES ( ` value ` ) ;
2022-10-31 16:47:47 +08:00
-- INSERT INTO analysis (`key`, `value`) VALUES ('artistWaiting', (SELECT count( DISTINCT artist_id ) as count FROM song_artist_relation WHERE artist_id NOT IN ( SELECT artist_id FROM artist )) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis ( ` key ` , ` value ` ) VALUES ( ' artistWaiting ' , ( SELECT count ( * ) as count FROM wait_fetch_artist ) ) ON DUPLICATE KEY UPDATE ` value ` = VALUES ( ` value ` ) ;
2022-10-18 14:42:39 +08:00
INSERT INTO analysis ( ` key ` , ` value ` ) VALUES ( ' lyricCount ' , ( SELECT count ( * ) AS count FROM lyric ) ) ON DUPLICATE KEY UPDATE ` value ` = VALUES ( ` value ` ) ;
INSERT INTO analysis ( ` key ` , ` value ` ) VALUES ( ' commentCount ' , ( SELECT count ( DISTINCT song_id ) AS count FROM comment ) ) ON DUPLICATE KEY UPDATE ` value ` = VALUES ( ` value ` ) ;
INSERT INTO analysis ( ` key ` , ` value ` ) VALUES ( ' commentTotalCount ' , ( SELECT count ( * ) AS count FROM comment ) ) ON DUPLICATE KEY UPDATE ` value ` = VALUES ( ` value ` ) ;
INSERT INTO analysis ( ` key ` , ` value ` ) VALUES ( ' userCount ' , ( SELECT count ( * ) AS count FROM user ) ) ON DUPLICATE KEY UPDATE ` value ` = VALUES ( ` value ` ) ;
2022-10-19 23:57:53 +08:00
INSERT INTO analysis ( ` key ` , ` value ` ) VALUES ( ' songPlaylistCount ' , ( SELECT count ( * ) AS count FROM song_playlist_relation ) ) ON DUPLICATE KEY UPDATE ` value ` = VALUES ( ` value ` ) ;
2022-10-18 14:42:39 +08:00
INSERT INTO analysis ( ` key ` , ` value ` ) VALUES ( ' songAlbumCount ' , ( SELECT count ( * ) AS count FROM song_album_relation ) ) ON DUPLICATE KEY UPDATE ` value ` = VALUES ( ` value ` ) ;
INSERT INTO analysis ( ` key ` , ` value ` ) VALUES ( ' songArtistCount ' , ( SELECT count ( * ) AS count FROM song_artist_relation ) ) ON DUPLICATE KEY UPDATE ` value ` = VALUES ( ` value ` ) ;
2022-10-26 15:33:07 +08:00
-- 更新后初次全表扫描
2022-10-29 14:32:51 +08:00
INSERT IGNORE INTO wait_check_song ( id ) SELECT song_id FROM song_artist_relation WHERE create_time > ' 2022-10-28 00:00:00 ' ;
INSERT IGNORE INTO wait_check_song ( id ) SELECT song_id FROM song_album_relation WHERE create_time > ' 2022-10-28 00:00:00 ' ;
INSERT IGNORE INTO wait_check_song ( id ) SELECT song_id FROM song_playlist_relation WHERE create_time > ' 2022-10-28 00:00:00 ' ;
INSERT IGNORE INTO wait_check_lyric ( id ) SELECT song_id FROM song_artist_relation WHERE create_time > ' 2022-10-28 00:00:00 ' ;
INSERT IGNORE INTO wait_check_lyric ( id ) SELECT song_id FROM song_album_relation WHERE create_time > ' 2022-10-28 00:00:00 ' ;
INSERT IGNORE INTO wait_check_lyric ( id ) SELECT song_id FROM song_playlist_relation WHERE create_time > ' 2022-10-28 00:00:00 ' ;
INSERT IGNORE INTO wait_check_lyric ( id ) SELECT song_id FROM song WHERE create_time > ' 2022-10-28 00:00:00 ' ;
INSERT IGNORE INTO wait_check_comment ( id ) SELECT song_id FROM song_artist_relation WHERE create_time > ' 2022-10-28 00:00:00 ' ;
INSERT IGNORE INTO wait_check_comment ( id ) SELECT song_id FROM song_album_relation WHERE create_time > ' 2022-10-28 00:00:00 ' ;
INSERT IGNORE INTO wait_check_comment ( id ) SELECT song_id FROM song_playlist_relation WHERE create_time > ' 2022-10-28 00:00:00 ' ;
INSERT IGNORE INTO wait_check_comment ( id ) SELECT song_id FROM song WHERE create_time > ' 2022-10-28 00:00:00 ' ;
INSERT IGNORE INTO wait_check_artist ( id ) SELECT artist_id FROM song_artist_relation WHERE create_time > ' 2022-10-28 00:00:00 ' ;
INSERT IGNORE INTO wait_check_album ( id ) SELECT album_id FROM song_album_relation WHERE create_time > ' 2022-10-28 00:00:00 ' ;
-- 全量更新
2022-10-29 00:16:36 +08:00
INSERT IGNORE INTO wait_check_song ( id ) SELECT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT song_id FROM song ) ;
INSERT IGNORE INTO wait_check_song ( id ) SELECT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT song_id FROM song ) ;
INSERT IGNORE INTO wait_check_song ( id ) SELECT song_id FROM song_playlist_relation WHERE song_id NOT IN ( SELECT song_id FROM song ) ;
INSERT IGNORE INTO wait_check_lyric ( id ) SELECT song_id FROM song WHERE song_id NOT IN ( SELECT song_id FROM lyric ) ;
INSERT IGNORE INTO wait_check_comment ( id ) SELECT song_id FROM song WHERE song_id NOT IN ( SELECT song_id FROM comment_progress ) ;
INSERT IGNORE INTO wait_check_artist ( id ) SELECT artist_id FROM song_artist_relation WHERE artist_id NOT IN ( SELECT artist_id FROM artist ) ;
INSERT IGNORE INTO wait_check_album ( id ) SELECT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT album_id FROM album ) ;
2022-10-26 15:33:07 +08:00
2022-10-06 21:06:09 +08:00
-- 查看需要爬取的 song 的分布
2022-11-07 15:05:05 +08:00
SELECT cast ( format ( id / 10000000 , 0 ) * 10000000 as UNSIGNED ) as s , count ( * ) as count
FROM wait_fetch_song
2022-10-06 21:06:09 +08:00
GROUP BY s
ORDER BY s DESC
-- 查看需要爬取的 album 的分布
2022-11-07 15:05:05 +08:00
SELECT cast ( format ( id / 1000000 , 0 ) * 1000000 as UNSIGNED ) as s , count ( * ) as count
FROM wait_fetch_album
2022-10-06 21:06:09 +08:00
GROUP BY s
ORDER BY s DESC
-- 查看需要爬取的 artist 的分布
2022-11-07 15:35:22 +08:00
SELECT cast ( format ( id / 100000 , 0 ) * 100000 as UNSIGNED ) as s , count ( * ) as count
2022-11-07 15:05:05 +08:00
FROM wait_fetch_artist
2022-10-06 21:06:09 +08:00
GROUP BY s
ORDER BY s DESC
-- 查看需要爬取的 comment 的分布
SELECT cast ( format ( song_id / 10000000 , 0 ) * 10000000 as UNSIGNED ) as s , count ( * ) as count
FROM comment_progress
WHERE current_status ! = 2
GROUP BY s
ORDER BY s DESC
-- 查看需要爬取的 lyric 的分布
2022-11-07 15:05:05 +08:00
SELECT cast ( format ( id / 10000000 , 0 ) * 10000000 as UNSIGNED ) as s , count ( * ) as count
FROM wait_fetch_lyric
2022-10-06 21:06:09 +08:00
GROUP BY s
ORDER BY s DESC
2022-10-06 14:01:05 +08:00
2022-10-18 14:42:39 +08:00
2022-10-26 16:19:46 +08:00
-- 查看本地已有 song 的分布
SELECT cast ( format ( song_id / 10000000 , 0 ) * 10000000 as UNSIGNED ) as s , count ( * ) as count
FROM song
GROUP BY s
ORDER BY s DESC
-- 查看本地已有 user 的分布
SELECT cast ( format ( user_id / 10000000 , 0 ) * 10000000 as UNSIGNED ) as s , count ( * ) as count
FROM user
GROUP BY s
ORDER BY s DESC
-- 查看本地已有 album 的分布
SELECT cast ( format ( album_id / 1000000 , 0 ) * 1000000 as UNSIGNED ) as s , count ( * ) as count
FROM album
GROUP BY s
ORDER BY s DESC
-- 查看本地已有 artist 的分布
SELECT cast ( format ( artist_id / 2000000 , 0 ) * 2000000 as UNSIGNED ) as s , count ( * ) as count
FROM artist
GROUP BY s
ORDER BY s DESC
-- 查看本地已有 playlist 的分布
SELECT cast ( format ( playlist_id / 2000000 , 0 ) * 2000000 as UNSIGNED ) as s , count ( * ) as count
FROM playlist
GROUP BY s
ORDER BY s DESC
2022-10-06 22:45:10 +08:00
-- 查询单个数据库里面各个表所占磁盘空间大小包括其索引的大小
SELECT
table_schema AS ' 数据库 ' ,
table_name AS ' 表名 ' ,
table_rows AS ' 记录数 ' ,
TRUNCATE ( data_length / 1024 / 1024 , 2 ) AS ' 数据容量(MB) ' ,
TRUNCATE ( index_length / 1024 / 1024 , 2 ) AS ' 索引容量(MB) ' ,
TRUNCATE ( ( data_length + index_length ) / 1024 / 1024 / 1024 , 2 ) AS ' 总容量(GB) '
FROM
information_schema . TABLES
WHERE
table_schema = ' neteasemusic '
ORDER BY
2022-10-26 15:33:07 +08:00
table_rows DESC ;