1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee
tools/netease_music/sql/statistic.sql

141 lines
8.0 KiB
SQL

-- 更新统计数据
INSERT INTO analysis (`key`, `value`) VALUES ('songCount', (SELECT count(*) as count FROM song) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('songWaiting', (SELECT count(DISTINCT song_id) AS count
FROM ( SELECT song_id FROM song_artist_relation UNION SELECT song_id FROM song_album_relation ) t_tmp
WHERE song_id NOT IN ( SELECT song_id FROM song )) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('playlistCount', (SELECT count(*) AS count FROM playlist) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('albumCount', (SELECT count(*) as count FROM album) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('albumWaiting', (SELECT count( DISTINCT album_id ) as count FROM song_album_relation WHERE album_id NOT IN ( SELECT album_id FROM album )) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('artistCount', (SELECT count(*) AS count FROM artist) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('artistWaiting', (SELECT count( DISTINCT artist_id ) as count FROM song_artist_relation WHERE artist_id NOT IN ( SELECT artist_id FROM artist )) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('lyricCount', (SELECT count(*) AS count FROM lyric) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('commentCount', (SELECT count( DISTINCT song_id ) AS count FROM comment) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('commentTotalCount', (SELECT count(*) AS count FROM comment) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('userCount', (SELECT count(*) AS count FROM user) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('songPlaylistCount', (SELECT count(*) AS count FROM song_playlist_relation) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('songAlbumCount', (SELECT count(*) AS count FROM song_album_relation) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('songArtistCount', (SELECT count(*) AS count FROM song_artist_relation) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
-- 更新后初次全表扫描
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_playlist_relation WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song_playlist_relation WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song_playlist_relation WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_artist (id) SELECT artist_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_album (id) SELECT album_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00';
-- 全量更新
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT song_id FROM song );
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT song_id FROM song );
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_playlist_relation WHERE song_id NOT IN ( SELECT song_id FROM song );
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song WHERE song_id NOT IN ( SELECT song_id FROM lyric );
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song WHERE song_id NOT IN ( SELECT song_id FROM comment_progress );
INSERT IGNORE INTO wait_check_artist (id) SELECT artist_id FROM song_artist_relation WHERE artist_id NOT IN ( SELECT artist_id FROM artist );
INSERT IGNORE INTO wait_check_album (id) SELECT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT album_id FROM album );
-- 查看需要爬取的 song 的分布
SELECT cast( format( t_tmp.song_id / 10000000, 0) * 10000000 as UNSIGNED ) as s, count(*) as count
FROM (
SELECT DISTINCT song_id FROM song_album_relation
UNION
SELECT DISTINCT song_id FROM song_artist_relation
) as t_tmp
WHERE song_id NOT IN ( SELECT song_id FROM song )
GROUP BY s
ORDER BY s DESC
-- 查看需要爬取的 album 的分布
SELECT cast( format( album_id / 1000000, 0) * 1000000 as UNSIGNED ) as s, count(*) as count
FROM song_album_relation
WHERE album_id NOT IN ( SELECT album_id FROM album )
GROUP BY s
ORDER BY s DESC
-- 查看需要爬取的 artist 的分布
SELECT cast( format( artist_id / 2000000, 0) * 2000000 as UNSIGNED ) as s, count(*) as count
FROM song_artist_relation
WHERE artist_id NOT IN ( SELECT artist_id FROM artist )
GROUP BY s
ORDER BY s DESC
-- 查看需要爬取的 comment 的分布
SELECT cast( format( song_id / 10000000, 0) * 10000000 as UNSIGNED ) as s, count(*) as count
FROM comment_progress
WHERE current_status != 2
GROUP BY s
ORDER BY s DESC
-- 查看需要爬取的 lyric 的分布
SELECT cast( format( song_id / 10000000, 0) * 10000000 as UNSIGNED ) as s, count(*) as count
FROM song
WHERE song_id NOT IN ( SELECT song_id FROM lyric )
GROUP BY s
ORDER BY s DESC
-- 查看本地已有 song 的分布
SELECT cast( format( song_id / 10000000, 0) * 10000000 as UNSIGNED ) as s, count(*) as count
FROM song
GROUP BY s
ORDER BY s DESC
-- 查看本地已有 user 的分布
SELECT cast( format( user_id / 10000000, 0) * 10000000 as UNSIGNED ) as s, count(*) as count
FROM user
GROUP BY s
ORDER BY s DESC
-- 查看本地已有 album 的分布
SELECT cast( format( album_id / 1000000, 0) * 1000000 as UNSIGNED ) as s, count(*) as count
FROM album
GROUP BY s
ORDER BY s DESC
-- 查看本地已有 artist 的分布
SELECT cast( format( artist_id / 2000000, 0) * 2000000 as UNSIGNED ) as s, count(*) as count
FROM artist
GROUP BY s
ORDER BY s DESC
-- 查看本地已有 playlist 的分布
SELECT cast( format( playlist_id / 2000000, 0) * 2000000 as UNSIGNED ) as s, count(*) as count
FROM playlist
GROUP BY s
ORDER BY s DESC
-- 查询单个数据库里面各个表所占磁盘空间大小包括其索引的大小
SELECT
table_schema AS '数据库',
table_name AS '表名',
table_rows AS '记录数',
TRUNCATE (data_length / 1024 / 1024, 2) AS '数据容量(MB)',
TRUNCATE (index_length / 1024 / 1024, 2) AS '索引容量(MB)',
TRUNCATE ((data_length + index_length) / 1024 / 1024 / 1024, 2) AS '总容量(GB)'
FROM
information_schema.TABLES
WHERE
table_schema = 'neteasemusic'
ORDER BY
table_rows DESC;