1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee

playlist修bug

This commit is contained in:
程序员小墨 2022-10-20 00:30:09 +08:00
parent 896e66de53
commit 0862db5db9
2 changed files with 38 additions and 21 deletions

View File

@ -12,8 +12,12 @@ const dbUtils = global.dbUtils;
const { playlist_catlist, playlist_hot, playlist_detail } = require('NeteaseCloudMusicApi');
async function fetchAll({ args }) {
// 睡眠时间设置长一些不然容易触发500错误
global.sleepTime = Math.max(500, global.sleepTime);
console.log("global.sleepTime", global.sleepTime);
console.log("playlist 需要一口气爬完,中途不能停止,否则下次又要重头爬(歌单不会重复爬取,但是分页列表会)");
console.log("start fetching playlists ...");
console.log("playlist 需要一口气爬完,中途不能停止,否则下次又要重头爬(歌单不会重复爬取,但是分页列表会)")
// 从数据库中查出所有的网易云分类
let result = await dbUtils.query(`SELECT title FROM category WHERE netease_group_chinese IS NOT NULL`);
@ -23,10 +27,12 @@ async function fetchAll({ args }) {
for (let i = 0; i < cate.length; i++) {
const categoryName = cate[i];
try {
console.log("开始爬取分类:", categoryName);
await fetchCategory({ categoryName: categoryName, progress: `${i + 1}/${cate.length}` });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(global.sleepTime);
}
console.log("爬取完毕");
await sleepUtils.sleep(2000);
@ -58,35 +64,42 @@ async function fetchCategory({ categoryName, progress }) {
return;
}
// 从数据库查出已爬取的歌单ids并从 playlistIds 中排除这部分歌单
var exceptPlaylistIds = await dbUtils.query(`
SELECT playlist_id FROM playlist WHERE playlist_id IN ?
`, [[playlistIds]]);
exceptPlaylistIds = exceptPlaylistIds.map(playlist => playlist.playlist_id);
if (playlistIds.length > 0) {
// 从数据库查出已爬取的歌单ids并从 playlistIds 中排除这部分歌单
var exceptPlaylistIds = await dbUtils.query(`
SELECT playlist_id FROM playlist WHERE playlist_id IN ?
`, [[playlistIds]]);
exceptPlaylistIds = exceptPlaylistIds.map(playlist => playlist.playlist_id);
var finalPlaylistIds = playlistIds.filter(playlistId => exceptPlaylistIds.indexOf(playlistId) == -1);
var finalPlaylistIds = playlistIds.filter(playlistId => exceptPlaylistIds.indexOf(playlistId) == -1);
// console.log("playlistIds", playlistIds);
// console.log("exceptPlaylistIds", exceptPlaylistIds);
// console.log("finalPlaylistIds", finalPlaylistIds);
console.log("finalPlaylistIds.length", finalPlaylistIds.length);
// console.log("playlistIds", playlistIds);
// console.log("exceptPlaylistIds", exceptPlaylistIds);
// console.log("finalPlaylistIds", finalPlaylistIds);
console.log(categoryName, "offset", offset, "playlistIds.length", playlistIds.length, "finalPlaylistIds.length", finalPlaylistIds.length);
// console.log(url);
for (let i = 0; i < finalPlaylistIds.length; i++) {
await global.checkIsExit();
const playlistId = finalPlaylistIds[i];
// console.log(offset, i, finalPlaylistIds.length);
console.log(`分类: ${progress} | 歌单: ${offset + i + 1}/${offset + finalPlaylistIds.length} | playlist: ${playlistId}`);
try {
await fetch({ playlistId: playlistId });
} catch (err) {
console.error(err);
for (let i = 0; i < finalPlaylistIds.length; i++) {
await global.checkIsExit();
const playlistId = finalPlaylistIds[i];
// console.log(offset, i, finalPlaylistIds.length);
console.log(`分类: ${progress} | 歌单: ${offset + i + 1}/${offset + finalPlaylistIds.length} | playlist: ${playlistId}`);
try {
await fetch({ playlistId: playlistId });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(global.sleepTime);
}
await sleepUtils.sleep(global.sleepTime);
} else {
console.log("失败url", url);
await dbUtils.query('INSERT INTO log (`id`, `name`, `msg`) VALUES (?, ?, ?)', [0, 'playlist_fetch', `失败 可能是爬太快了\n${url}`]);
}
// 最有一页判断标识
if (html.indexOf(`class="zbtn znxt js-disabled">下一页</a>`) > -1) haveNext = false;
offset += perPage;
await sleepUtils.sleep(global.sleepTime);
}
}

View File

@ -24,6 +24,10 @@ node index --utils playlist
后期:
批量查库修改为一条SQL搞定
歌单定时更新rel表中添加一个del字段先将歌单下面的全部置为删除状态再插入的时候把已有歌曲的标记重新修改为正常状态
评论的更新
爬取歌单playlist