playlist修bug
This commit is contained in:
parent
896e66de53
commit
0862db5db9
@ -12,8 +12,12 @@ const dbUtils = global.dbUtils;
|
||||
const { playlist_catlist, playlist_hot, playlist_detail } = require('NeteaseCloudMusicApi');
|
||||
|
||||
async function fetchAll({ args }) {
|
||||
// 睡眠时间设置长一些,不然容易触发500错误
|
||||
global.sleepTime = Math.max(500, global.sleepTime);
|
||||
console.log("global.sleepTime", global.sleepTime);
|
||||
console.log("playlist 需要一口气爬完,中途不能停止,否则下次又要重头爬(歌单不会重复爬取,但是分页列表会)");
|
||||
|
||||
console.log("start fetching playlists ...");
|
||||
console.log("playlist 需要一口气爬完,中途不能停止,否则下次又要重头爬(歌单不会重复爬取,但是分页列表会)")
|
||||
|
||||
// 从数据库中查出所有的网易云分类
|
||||
let result = await dbUtils.query(`SELECT title FROM category WHERE netease_group_chinese IS NOT NULL`);
|
||||
@ -23,10 +27,12 @@ async function fetchAll({ args }) {
|
||||
for (let i = 0; i < cate.length; i++) {
|
||||
const categoryName = cate[i];
|
||||
try {
|
||||
console.log("开始爬取分类:", categoryName);
|
||||
await fetchCategory({ categoryName: categoryName, progress: `${i + 1}/${cate.length}` });
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
}
|
||||
await sleepUtils.sleep(global.sleepTime);
|
||||
}
|
||||
console.log("爬取完毕");
|
||||
await sleepUtils.sleep(2000);
|
||||
@ -58,35 +64,42 @@ async function fetchCategory({ categoryName, progress }) {
|
||||
return;
|
||||
}
|
||||
|
||||
// 从数据库查出已爬取的歌单ids,并从 playlistIds 中排除这部分歌单
|
||||
var exceptPlaylistIds = await dbUtils.query(`
|
||||
SELECT playlist_id FROM playlist WHERE playlist_id IN ?
|
||||
`, [[playlistIds]]);
|
||||
exceptPlaylistIds = exceptPlaylistIds.map(playlist => playlist.playlist_id);
|
||||
if (playlistIds.length > 0) {
|
||||
// 从数据库查出已爬取的歌单ids,并从 playlistIds 中排除这部分歌单
|
||||
var exceptPlaylistIds = await dbUtils.query(`
|
||||
SELECT playlist_id FROM playlist WHERE playlist_id IN ?
|
||||
`, [[playlistIds]]);
|
||||
exceptPlaylistIds = exceptPlaylistIds.map(playlist => playlist.playlist_id);
|
||||
|
||||
var finalPlaylistIds = playlistIds.filter(playlistId => exceptPlaylistIds.indexOf(playlistId) == -1);
|
||||
var finalPlaylistIds = playlistIds.filter(playlistId => exceptPlaylistIds.indexOf(playlistId) == -1);
|
||||
|
||||
// console.log("playlistIds", playlistIds);
|
||||
// console.log("exceptPlaylistIds", exceptPlaylistIds);
|
||||
// console.log("finalPlaylistIds", finalPlaylistIds);
|
||||
console.log("finalPlaylistIds.length", finalPlaylistIds.length);
|
||||
// console.log("playlistIds", playlistIds);
|
||||
// console.log("exceptPlaylistIds", exceptPlaylistIds);
|
||||
// console.log("finalPlaylistIds", finalPlaylistIds);
|
||||
console.log(categoryName, "offset", offset, "playlistIds.length", playlistIds.length, "finalPlaylistIds.length", finalPlaylistIds.length);
|
||||
// console.log(url);
|
||||
|
||||
for (let i = 0; i < finalPlaylistIds.length; i++) {
|
||||
await global.checkIsExit();
|
||||
const playlistId = finalPlaylistIds[i];
|
||||
// console.log(offset, i, finalPlaylistIds.length);
|
||||
console.log(`分类: ${progress} | 歌单: ${offset + i + 1}/${offset + finalPlaylistIds.length} | playlist: ${playlistId}`);
|
||||
try {
|
||||
await fetch({ playlistId: playlistId });
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
for (let i = 0; i < finalPlaylistIds.length; i++) {
|
||||
await global.checkIsExit();
|
||||
const playlistId = finalPlaylistIds[i];
|
||||
// console.log(offset, i, finalPlaylistIds.length);
|
||||
console.log(`分类: ${progress} | 歌单: ${offset + i + 1}/${offset + finalPlaylistIds.length} | playlist: ${playlistId}`);
|
||||
try {
|
||||
await fetch({ playlistId: playlistId });
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
}
|
||||
await sleepUtils.sleep(global.sleepTime);
|
||||
}
|
||||
await sleepUtils.sleep(global.sleepTime);
|
||||
} else {
|
||||
console.log("失败url", url);
|
||||
await dbUtils.query('INSERT INTO log (`id`, `name`, `msg`) VALUES (?, ?, ?)', [0, 'playlist_fetch', `失败 可能是爬太快了\n${url}`]);
|
||||
}
|
||||
|
||||
// 最有一页判断标识
|
||||
if (html.indexOf(`class="zbtn znxt js-disabled">下一页</a>`) > -1) haveNext = false;
|
||||
offset += perPage;
|
||||
await sleepUtils.sleep(global.sleepTime);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -24,6 +24,10 @@ node index --utils playlist
|
||||
|
||||
后期:
|
||||
|
||||
批量查库修改为一条SQL搞定
|
||||
|
||||
歌单定时更新(rel表中添加一个del字段,先将歌单下面的全部置为删除状态,再插入的时候把已有歌曲的标记重新修改为正常状态)
|
||||
|
||||
评论的更新
|
||||
|
||||
爬取歌单playlist
|
||||
|
Loading…
Reference in New Issue
Block a user