-- 查看需要爬取的 song 的分布 SELECT cast( format( t_tmp.song_id / 10000000, 0) * 10000000 as UNSIGNED ) as s, count(*) as count FROM ( SELECT DISTINCT song_id FROM song_album_relation UNION SELECT DISTINCT song_id FROM song_artist_relation ) as t_tmp WHERE song_id NOT IN ( SELECT song_id FROM song ) GROUP BY s ORDER BY s DESC -- 查看需要爬取的 album 的分布 SELECT cast( format( album_id / 1000000, 0) * 1000000 as UNSIGNED ) as s, count(*) as count FROM song_album_relation WHERE album_id NOT IN ( SELECT album_id FROM album ) GROUP BY s ORDER BY s DESC -- 查看需要爬取的 artist 的分布 SELECT cast( format( artist_id / 2000000, 0) * 2000000 as UNSIGNED ) as s, count(*) as count FROM song_artist_relation WHERE artist_id NOT IN ( SELECT artist_id FROM artist ) GROUP BY s ORDER BY s DESC -- 查看需要爬取的 comment 的分布 SELECT cast( format( song_id / 10000000, 0) * 10000000 as UNSIGNED ) as s, count(*) as count FROM comment_progress WHERE current_status != 2 GROUP BY s ORDER BY s DESC -- 查看需要爬取的 lyric 的分布 SELECT cast( format( song_id / 10000000, 0) * 10000000 as UNSIGNED ) as s, count(*) as count FROM song WHERE song_id NOT IN ( SELECT song_id FROM lyric ) GROUP BY s ORDER BY s DESC -- optimize table optimize table album; optimize table artist; optimize table comment; optimize table comment_progress; optimize table log; optimize table lyric; optimize table song; optimize table song_album_relation; optimize table song_artist_relation; optimize table user; -- 查询单个数据库里面各个表所占磁盘空间大小包括其索引的大小 SELECT table_schema AS '数据库', table_name AS '表名', table_rows AS '记录数', TRUNCATE (data_length / 1024 / 1024, 2) AS '数据容量(MB)', TRUNCATE (index_length / 1024 / 1024, 2) AS '索引容量(MB)', TRUNCATE ((data_length + index_length) / 1024 / 1024 / 1024, 2) AS '总容量(GB)' FROM information_schema.TABLES WHERE table_schema = 'neteasemusic' ORDER BY table_rows DESC;