const fs = require('fs'); const path = require('path'); const requestUtils = require('../../../utils/requestUtils'); const sleepUtils = require('../../../utils/sleepUtils'); const dbUtils = global.dbUtils; // refer: // https://neteasecloudmusicapi-docs.4everland.app/ // https://github.com/Binaryify/NeteaseCloudMusicApi const { playlist_catlist, playlist_hot, playlist_detail } = require('NeteaseCloudMusicApi'); async function fetchAll({ args }) { console.log("start fetching playlists ..."); console.log("playlist 需要一口气爬完,中途不能停止,否则下次又要重头爬(歌单不会重复爬取,但是分页列表会)") // 从数据库中查出所有的网易云分类 let result = await dbUtils.query(`SELECT title FROM category WHERE netease_group_chinese IS NOT NULL`); cate = result.map(cate => cate.title); cate.unshift('全部'); // 插入第一个 console.log(cate); for (let i = 0; i < cate.length; i++) { const categoryName = cate[i]; try { await fetchCategory({ categoryName: categoryName, progress: `${i + 1}/${cate.length}` }); } catch (err) { console.error(err); } } } async function fetchCategory({ categoryName, progress }) { // 首先去网易云音乐首页获得歌单 (每一首音乐右侧都会有几个包含该音乐的歌单) let haveNext = true; let perPage = 35; let offset = 0; while (haveNext) { let url = `https://music.163.com/discover/playlist?cat=${encodeURIComponent(categoryName)}&limit=${perPage}&offset=${offset}`; try { // var html = fs.readFileSync(path.join(__dirname, "../../temp", `discover-playlist.html`), 'utf8'); var html = await requestUtils.getApiResult(url); // fs.writeFileSync(path.join(__dirname, "../../temp", `discover-playlist.html`), html); var matcher = html.matchAll(/"\/playlist\?id=(\d{1,20})"/g); var m = matcher.next(); var a = new Set(); // 因为每个歌单id会出现两次,所以使用Set去重 while (!m.done) { a.add(Number(m.value[1])); m = matcher.next(); } var playlistIds = Array.from(a).sort(); } catch (errors) { console.error(errors); return; } // 从数据库查出已爬取的歌单ids,并从 playlistIds 中排除这部分歌单 var exceptPlaylistIds = await dbUtils.query(` SELECT playlist_id FROM playlist WHERE playlist_id IN ? `, [[playlistIds]]); exceptPlaylistIds = exceptPlaylistIds.map(playlist => playlist.playlist_id); var finalPlaylistIds = playlistIds.filter(playlistId => exceptPlaylistIds.indexOf(playlistId) == -1); // console.log("playlistIds", playlistIds); // console.log("exceptPlaylistIds", exceptPlaylistIds); // console.log("finalPlaylistIds", finalPlaylistIds); console.log("finalPlaylistIds.length", finalPlaylistIds.length); for (let i = 0; i < finalPlaylistIds.length; i++) { await global.checkIsExit(); const playlistId = finalPlaylistIds[i]; // console.log(offset, i, finalPlaylistIds.length); console.log(`分类: ${progress} | 歌单: ${offset + i + 1}/${offset + finalPlaylistIds.length} | playlist: ${playlistId}`); try { await fetch({ playlistId: playlistId }); } catch (err) { console.error(err); } await sleepUtils.sleep(global.sleepTime); } // 最有一页判断标识 if (html.indexOf(`class="zbtn znxt js-disabled">下一页`) > -1) haveNext = false; offset += perPage; } } // 获取歌词详情 async function fetch({ playlistId, debug = false }) { let result = await dbUtils.query('SELECT count(*) as count FROM playlist WHERE playlist_id = ?', [playlistId]); if (result[0].count > 0 && !debug) { console.log(`数据库中已有数据,跳过 playlistId: ${playlistId}`); return; } // https://neteasecloudmusicapi-docs.4everland.app/#/?id=%e6%ad%8c%e5%8d%95%e5%88%86%e7%b1%bb try { // 获取歌单分类 // var playlistResult = await playlist_catlist({}); // var playlistResult = await playlist_hot({}); var playlistResult = await playlist_detail({ id: playlistId, }); // fs.writeFileSync(path.join(__dirname, "../../temp", `playlist-${playlistId}.json`), JSON.stringify(playlistResult)); } catch (errors) { console.error(errors); return; } let playlist = playlistResult.body.playlist; // console.log("playlist", playlist); let playlistInfo = { playlist_id: playlist.id, title: playlist.name, english_title: playlist.englishTitle, description: playlist.description, user_id: playlist.userId, tags: JSON.stringify(playlist.tags), alg_tags: JSON.stringify(playlist.algTags), playlist_create_time: playlist.createTime, playlist_update_time: playlist.updateTime, track_count: playlist.trackCount, play_count: playlist.playCount, subscribed_count: playlist.subscribedCount, share_count: playlist.shareCount, comment_count: playlist.commentCount, cover_image: playlist.coverImgUrl ? /^https?:\/\/p.\.music\.126\.net\/(.*?)$/.exec(playlist.coverImgUrl)[1] : '', title_image: playlist.titleImageUrl ? /^https?:\/\/p.\.music\.126\.net\/(.*?)$/.exec(playlist.titleImageUrl)[1] : '', background_cover: playlist.backgroundCoverUrl ? /^https?:\/\/p.\.music\.126\.net\/(.*?)$/.exec(playlist.backgroundCoverUrl)[1] : '', ordered: playlist.ordered, copied: playlist.copied, status: playlist.status, privacy: playlist.privacy, ad_type: playlist.adType, special_type: playlist.specialType, official_playlist_type: playlist.officialPlaylistType, op_recommend: playlist.opRecommend, high_quality: playlist.highQuality, new_imported: playlist.newImported, update_frequency: playlist.updateFrequency, grade_status: playlist.gradeStatus, score: playlist.score, creator: JSON.stringify(playlist.creator), video_ids: JSON.stringify(playlist.videoIds), videos: JSON.stringify(playlist.videos), banned_track_ids: JSON.stringify(playlist.bannedTrackIds), remix_video: JSON.stringify(playlist.remixVideo), }; // console.log("playlistInfo", playlistInfo); if (playlist.bannedTrackIds) { console.log("bannedTrackIds", playlist.bannedTrackIds); process.exit(0); } let trackIds = playlist.trackIds.map(track => [track.id, playlist.id, track.alg, track.rcmdReason]); if (trackIds.length > 0) await dbUtils.query('INSERT IGNORE INTO song_playlist_relation (song_id, playlist_id, alg, rcmd_reason) VALUES ?', [trackIds]); await dbUtils.query(` INSERT INTO playlist ( ${Object.keys(playlistInfo).map(field => `\`${field}\``).join(",")} ) VALUES ? ON DUPLICATE KEY UPDATE ${Object.keys(playlistInfo).map(field => `${field}=VALUES(${field})`).join(", ")} `, [[Object.values(playlistInfo)]]); return playlistInfo; } module.exports = { fetch: fetch, fetchAll: fetchAll, }