diff --git a/hifini_music/sql/statistic.sql b/hifini_music/sql/statistic.sql new file mode 100644 index 0000000..2e3deed --- /dev/null +++ b/hifini_music/sql/statistic.sql @@ -0,0 +1,2 @@ +SELECT count(*) FROM hifini_thread WHERE music_title='' and music_pic='' and music_url=''; +SELECT count(*) FROM hifini_thread WHERE music_url like 'get_music.php?key=%' and music_real_url=''; diff --git a/netease_music/sql/neteasemusic.sql b/netease_music/sql/neteasemusic.sql index 1e660ed..f1da518 100644 --- a/netease_music/sql/neteasemusic.sql +++ b/netease_music/sql/neteasemusic.sql @@ -150,7 +150,7 @@ CREATE TABLE `playlist` ( `playlist_create_time` bigint(20) UNSIGNED NULL DEFAULT NULL COMMENT '创建日期', `playlist_update_time` bigint(20) UNSIGNED NULL DEFAULT NULL COMMENT '更新日期', `track_count` int(10) UNSIGNED NOT NULL COMMENT '歌单歌曲数', - `play_count` int(10) UNSIGNED NOT NULL COMMENT '歌单播放数', + `play_count` bigint(20) UNSIGNED NOT NULL COMMENT '歌单播放数', `subscribed_count` int(10) UNSIGNED NOT NULL COMMENT '歌单收藏数', `share_count` int(10) UNSIGNED NOT NULL COMMENT '歌单分享数', `comment_count` int(10) UNSIGNED NOT NULL COMMENT '歌单评论数', diff --git a/netease_music/sql/structure.sql b/netease_music/sql/structure.sql index 81ce140..050ee43 100644 --- a/netease_music/sql/structure.sql +++ b/netease_music/sql/structure.sql @@ -67,7 +67,7 @@ CREATE TABLE `playlist` ( `playlist_update_time` bigint(20) unsigned DEFAULT NULL COMMENT '更新日期', -- 数据 `track_count` int(10) unsigned NOT NULL COMMENT '歌单歌曲数', - `play_count` int(10) unsigned NOT NULL COMMENT '歌单播放数', + `play_count` bigint(20) unsigned NOT NULL COMMENT '歌单播放数', `subscribed_count` int(10) unsigned NOT NULL COMMENT '歌单收藏数', `share_count` int(10) unsigned NOT NULL COMMENT '歌单分享数', `comment_count` int(10) unsigned NOT NULL COMMENT '歌单评论数', diff --git a/netease_music/src/getInfo/playlistUtils.js b/netease_music/src/getInfo/playlistUtils.js index 4949910..26439f2 100644 --- a/netease_music/src/getInfo/playlistUtils.js +++ b/netease_music/src/getInfo/playlistUtils.js @@ -14,7 +14,7 @@ const dataManager = require('../dataManager'); async function fetchAll({ args }) { // 睡眠时间设置长一些,不然容易触发500错误 - global.sleepTime = Math.max(500, global.sleepTime); + global.sleepTime = Math.max(1000, global.sleepTime); console.log("global.sleepTime", global.sleepTime); console.log("playlist 需要一口气爬完,中途不能停止,否则下次又要重头爬(歌单不会重复爬取,但是分页列表会)"); @@ -28,8 +28,8 @@ async function fetchAll({ args }) { for (let i = 0; i < cate.length; i++) { const categoryName = cate[i]; try { - console.log("开始爬取分类:", categoryName); - await fetchCategory({ categoryName: categoryName, progress: `${i + 1}/${cate.length}` }); + console.log(`开始爬取分类:${categoryName}(i=${i})`); + await fetchCategory({ categoryName: `${categoryName}`, progress: `${i + 1}/${cate.length}` }); } catch (err) { console.error(err); } @@ -94,7 +94,10 @@ async function fetchCategory({ categoryName, progress }) { } } else { console.log("失败url", url); + fs.writeFileSync(path.join(__dirname, "../../temp", `[error]discover-playlist.html`), html); await dbUtils.query('INSERT INTO log (`id`, `name`, `msg`) VALUES (?, ?, ?)', [0, 'playlist_fetch', `失败 可能是爬太快了\n${url}`]); + await sleepUtils.sleep(40 * 1000); // 等待40s再继续爬取 + continue; } // 最有一页判断标识