172 lines
7.5 KiB
JavaScript
172 lines
7.5 KiB
JavaScript
const fs = require('fs');
|
||
const path = require('path');
|
||
|
||
const requestUtils = require('../../../utils/requestUtils');
|
||
const sleepUtils = require('../../../utils/sleepUtils');
|
||
|
||
const dbUtils = global.dbUtils;
|
||
|
||
// refer:
|
||
// https://neteasecloudmusicapi-docs.4everland.app/
|
||
// https://github.com/Binaryify/NeteaseCloudMusicApi
|
||
const { playlist_catlist, playlist_hot, playlist_detail } = require('NeteaseCloudMusicApi');
|
||
|
||
async function fetchAll({ args }) {
|
||
console.log("start fetching playlists ...");
|
||
console.log("playlist 需要一口气爬完,中途不能停止,否则下次又要重头爬(歌单不会重复爬取,但是分页列表会)")
|
||
|
||
// 从数据库中查出所有的网易云分类
|
||
let result = await dbUtils.query(`SELECT title FROM category WHERE netease_group_chinese IS NOT NULL`);
|
||
cate = result.map(cate => cate.title);
|
||
cate.unshift('全部'); // 插入第一个
|
||
console.log(cate);
|
||
for (let i = 0; i < cate.length; i++) {
|
||
const categoryName = cate[i];
|
||
try {
|
||
await fetchCategory({ categoryName: categoryName, progress: `${i + 1}/${cate.length}` });
|
||
} catch (err) {
|
||
console.error(err);
|
||
}
|
||
}
|
||
}
|
||
|
||
async function fetchCategory({ categoryName, progress }) {
|
||
// 首先去网易云音乐首页获得歌单 (每一首音乐右侧都会有几个包含该音乐的歌单)
|
||
let haveNext = true;
|
||
let perPage = 35;
|
||
let offset = 0;
|
||
while (haveNext) {
|
||
let url = `https://music.163.com/discover/playlist?cat=${encodeURIComponent(categoryName)}&limit=${perPage}&offset=${offset}`;
|
||
try {
|
||
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `discover-playlist.html`), 'utf8');
|
||
var html = await requestUtils.getApiResult(url);
|
||
// fs.writeFileSync(path.join(__dirname, "../../temp", `discover-playlist.html`), html);
|
||
|
||
var matcher = html.matchAll(/"\/playlist\?id=(\d{1,20})"/g);
|
||
var m = matcher.next();
|
||
var a = new Set(); // 因为每个歌单id会出现两次,所以使用Set去重
|
||
while (!m.done) {
|
||
a.add(Number(m.value[1]));
|
||
m = matcher.next();
|
||
}
|
||
var playlistIds = Array.from(a).sort();
|
||
} catch (errors) {
|
||
console.error(errors);
|
||
return;
|
||
}
|
||
|
||
// 从数据库查出已爬取的歌单ids,并从 playlistIds 中排除这部分歌单
|
||
var exceptPlaylistIds = await dbUtils.query(`
|
||
SELECT playlist_id FROM playlist WHERE playlist_id IN ?
|
||
`, [[playlistIds]]);
|
||
exceptPlaylistIds = exceptPlaylistIds.map(playlist => playlist.playlist_id);
|
||
|
||
var finalPlaylistIds = playlistIds.filter(playlistId => exceptPlaylistIds.indexOf(playlistId) == -1);
|
||
|
||
// console.log("playlistIds", playlistIds);
|
||
// console.log("exceptPlaylistIds", exceptPlaylistIds);
|
||
// console.log("finalPlaylistIds", finalPlaylistIds);
|
||
console.log("finalPlaylistIds.length", finalPlaylistIds.length);
|
||
|
||
for (let i = 0; i < finalPlaylistIds.length; i++) {
|
||
await global.checkIsExit();
|
||
const playlistId = finalPlaylistIds[i];
|
||
// console.log(offset, i, finalPlaylistIds.length);
|
||
console.log(`分类: ${progress} | 歌单: ${offset + i + 1}/${offset + finalPlaylistIds.length} | playlist: ${playlistId}`);
|
||
try {
|
||
await fetch({ playlistId: playlistId });
|
||
} catch (err) {
|
||
console.error(err);
|
||
}
|
||
await sleepUtils.sleep(global.sleepTime);
|
||
}
|
||
|
||
// 最有一页判断标识
|
||
if (html.indexOf(`class="zbtn znxt js-disabled">下一页</a>`) > -1) haveNext = false;
|
||
offset += perPage;
|
||
}
|
||
}
|
||
|
||
// 获取歌词详情
|
||
async function fetch({ playlistId, debug = false }) {
|
||
let result = await dbUtils.query('SELECT count(*) as count FROM playlist WHERE playlist_id = ?', [playlistId]);
|
||
if (result[0].count > 0 && !debug) {
|
||
console.log(`数据库中已有数据,跳过 playlistId: ${playlistId}`);
|
||
return;
|
||
}
|
||
|
||
// https://neteasecloudmusicapi-docs.4everland.app/#/?id=%e6%ad%8c%e5%8d%95%e5%88%86%e7%b1%bb
|
||
try {
|
||
// 获取歌单分类
|
||
// var playlistResult = await playlist_catlist({});
|
||
// var playlistResult = await playlist_hot({});
|
||
var playlistResult = await playlist_detail({
|
||
id: playlistId,
|
||
});
|
||
// fs.writeFileSync(path.join(__dirname, "../../temp", `playlist-${playlistId}.json`), JSON.stringify(playlistResult));
|
||
} catch (errors) {
|
||
console.error(errors);
|
||
return;
|
||
}
|
||
|
||
let playlist = playlistResult.body.playlist;
|
||
// console.log("playlist", playlist);
|
||
|
||
let playlistInfo = {
|
||
playlist_id: playlist.id,
|
||
title: playlist.name,
|
||
english_title: playlist.englishTitle,
|
||
description: playlist.description,
|
||
user_id: playlist.userId,
|
||
tags: JSON.stringify(playlist.tags),
|
||
alg_tags: JSON.stringify(playlist.algTags),
|
||
playlist_create_time: playlist.createTime,
|
||
playlist_update_time: playlist.updateTime,
|
||
track_count: playlist.trackCount,
|
||
play_count: playlist.playCount,
|
||
subscribed_count: playlist.subscribedCount,
|
||
share_count: playlist.shareCount,
|
||
comment_count: playlist.commentCount,
|
||
cover_image: playlist.coverImgUrl ? /^https?:\/\/p.\.music\.126\.net\/(.*?)$/.exec(playlist.coverImgUrl)[1] : '',
|
||
title_image: playlist.titleImageUrl ? /^https?:\/\/p.\.music\.126\.net\/(.*?)$/.exec(playlist.titleImageUrl)[1] : '',
|
||
background_cover: playlist.backgroundCoverUrl ? /^https?:\/\/p.\.music\.126\.net\/(.*?)$/.exec(playlist.backgroundCoverUrl)[1] : '',
|
||
ordered: playlist.ordered,
|
||
copied: playlist.copied,
|
||
status: playlist.status,
|
||
privacy: playlist.privacy,
|
||
ad_type: playlist.adType,
|
||
special_type: playlist.specialType,
|
||
official_playlist_type: playlist.officialPlaylistType,
|
||
op_recommend: playlist.opRecommend,
|
||
high_quality: playlist.highQuality,
|
||
new_imported: playlist.newImported,
|
||
update_frequency: playlist.updateFrequency,
|
||
grade_status: playlist.gradeStatus,
|
||
score: playlist.score,
|
||
creator: JSON.stringify(playlist.creator),
|
||
video_ids: JSON.stringify(playlist.videoIds),
|
||
videos: JSON.stringify(playlist.videos),
|
||
banned_track_ids: JSON.stringify(playlist.bannedTrackIds),
|
||
remix_video: JSON.stringify(playlist.remixVideo),
|
||
};
|
||
// console.log("playlistInfo", playlistInfo);
|
||
|
||
if (playlist.bannedTrackIds) {
|
||
console.log("bannedTrackIds", playlist.bannedTrackIds);
|
||
process.exit(0);
|
||
|
||
}
|
||
let trackIds = playlist.trackIds.map(track => [track.id, playlist.id, track.alg, track.rcmdReason]);
|
||
if (trackIds.length > 0)
|
||
await dbUtils.query('INSERT IGNORE INTO song_playlist_relation (song_id, playlist_id, alg, rcmd_reason) VALUES ?', [trackIds]);
|
||
await dbUtils.query(`
|
||
INSERT INTO playlist ( ${Object.keys(playlistInfo).map(field => `\`${field}\``).join(",")} ) VALUES ?
|
||
ON DUPLICATE KEY UPDATE ${Object.keys(playlistInfo).map(field => `${field}=VALUES(${field})`).join(", ")}
|
||
`, [[Object.values(playlistInfo)]]);
|
||
return playlistInfo;
|
||
}
|
||
|
||
module.exports = {
|
||
fetch: fetch,
|
||
fetchAll: fetchAll,
|
||
} |