通过命令行指定爬取参数,不用再修改代码了
This commit is contained in:
@@ -1,13 +1,42 @@
|
||||
-- 查看需要爬取的音乐的分布
|
||||
SELECT cast( format( t_tmp.song_id / 10000000, 0) * 10000000 as UNSIGNED ) as s, count(*) as count
|
||||
FROM (
|
||||
-- 查看需要爬取的 song 的分布
|
||||
SELECT cast( format( t_tmp.song_id / 10000000, 0) * 10000000 as UNSIGNED ) as s, count(*) as count
|
||||
FROM (
|
||||
SELECT DISTINCT song_id FROM song_album_relation
|
||||
UNION
|
||||
SELECT DISTINCT song_id FROM song_artist_relation
|
||||
) as t_tmp
|
||||
WHERE song_id NOT IN ( SELECT song_id FROM song )
|
||||
GROUP BY s
|
||||
ORDER BY s DESC
|
||||
WHERE song_id NOT IN ( SELECT song_id FROM song )
|
||||
GROUP BY s
|
||||
ORDER BY s DESC
|
||||
|
||||
-- 查看需要爬取的 album 的分布
|
||||
SELECT cast( format( album_id / 1000000, 0) * 1000000 as UNSIGNED ) as s, count(*) as count
|
||||
FROM song_album_relation
|
||||
WHERE album_id NOT IN ( SELECT album_id FROM album )
|
||||
GROUP BY s
|
||||
ORDER BY s DESC
|
||||
|
||||
-- 查看需要爬取的 artist 的分布
|
||||
SELECT cast( format( artist_id / 2000000, 0) * 2000000 as UNSIGNED ) as s, count(*) as count
|
||||
FROM song_artist_relation
|
||||
WHERE artist_id NOT IN ( SELECT artist_id FROM artist )
|
||||
GROUP BY s
|
||||
ORDER BY s DESC
|
||||
|
||||
-- 查看需要爬取的 comment 的分布
|
||||
SELECT cast( format( song_id / 10000000, 0) * 10000000 as UNSIGNED ) as s, count(*) as count
|
||||
FROM comment_progress
|
||||
WHERE current_status != 2
|
||||
GROUP BY s
|
||||
ORDER BY s DESC
|
||||
|
||||
-- 查看需要爬取的 lyric 的分布
|
||||
SELECT cast( format( song_id / 10000000, 0) * 10000000 as UNSIGNED ) as s, count(*) as count
|
||||
FROM song
|
||||
WHERE song_id NOT IN ( SELECT song_id FROM lyric )
|
||||
GROUP BY s
|
||||
ORDER BY s DESC
|
||||
|
||||
|
||||
-- optimize table
|
||||
optimize table album;
|
||||
|
Reference in New Issue
Block a user