1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee
Files
tools/netease_music/src/getInfo/playlistUtils.js
2022-10-19 23:39:54 +08:00

172 lines
7.5 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

const fs = require('fs');
const path = require('path');
const requestUtils = require('../../../utils/requestUtils');
const sleepUtils = require('../../../utils/sleepUtils');
const dbUtils = global.dbUtils;
// refer:
// https://neteasecloudmusicapi-docs.4everland.app/
// https://github.com/Binaryify/NeteaseCloudMusicApi
const { playlist_catlist, playlist_hot, playlist_detail } = require('NeteaseCloudMusicApi');
async function fetchAll({ args }) {
console.log("start fetching playlists ...");
console.log("playlist 需要一口气爬完,中途不能停止,否则下次又要重头爬(歌单不会重复爬取,但是分页列表会)")
// 从数据库中查出所有的网易云分类
let result = await dbUtils.query(`SELECT title FROM category WHERE netease_group_chinese IS NOT NULL`);
cate = result.map(cate => cate.title);
cate.unshift('全部'); // 插入第一个
console.log(cate);
for (let i = 0; i < cate.length; i++) {
const categoryName = cate[i];
try {
await fetchCategory({ categoryName: categoryName, progress: `${i + 1}/${cate.length}` });
} catch (err) {
console.error(err);
}
}
}
async function fetchCategory({ categoryName, progress }) {
// 首先去网易云音乐首页获得歌单 (每一首音乐右侧都会有几个包含该音乐的歌单)
let haveNext = true;
let perPage = 35;
let offset = 0;
while (haveNext) {
let url = `https://music.163.com/discover/playlist?cat=${encodeURIComponent(categoryName)}&limit=${perPage}&offset=${offset}`;
try {
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `discover-playlist.html`), 'utf8');
var html = await requestUtils.getApiResult(url);
// fs.writeFileSync(path.join(__dirname, "../../temp", `discover-playlist.html`), html);
var matcher = html.matchAll(/"\/playlist\?id=(\d{1,20})"/g);
var m = matcher.next();
var a = new Set(); // 因为每个歌单id会出现两次所以使用Set去重
while (!m.done) {
a.add(Number(m.value[1]));
m = matcher.next();
}
var playlistIds = Array.from(a).sort();
} catch (errors) {
console.error(errors);
return;
}
// 从数据库查出已爬取的歌单ids并从 playlistIds 中排除这部分歌单
var exceptPlaylistIds = await dbUtils.query(`
SELECT playlist_id FROM playlist WHERE playlist_id IN ?
`, [[playlistIds]]);
exceptPlaylistIds = exceptPlaylistIds.map(playlist => playlist.playlist_id);
var finalPlaylistIds = playlistIds.filter(playlistId => exceptPlaylistIds.indexOf(playlistId) == -1);
// console.log("playlistIds", playlistIds);
// console.log("exceptPlaylistIds", exceptPlaylistIds);
// console.log("finalPlaylistIds", finalPlaylistIds);
console.log("finalPlaylistIds.length", finalPlaylistIds.length);
for (let i = 0; i < finalPlaylistIds.length; i++) {
await global.checkIsExit();
const playlistId = finalPlaylistIds[i];
// console.log(offset, i, finalPlaylistIds.length);
console.log(`分类: ${progress} | 歌单: ${offset + i + 1}/${offset + finalPlaylistIds.length} | playlist: ${playlistId}`);
try {
await fetch({ playlistId: playlistId });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(global.sleepTime);
}
// 最有一页判断标识
if (html.indexOf(`class="zbtn znxt js-disabled">下一页</a>`) > -1) haveNext = false;
offset += perPage;
}
}
// 获取歌词详情
async function fetch({ playlistId, debug = false }) {
let result = await dbUtils.query('SELECT count(*) as count FROM playlist WHERE playlist_id = ?', [playlistId]);
if (result[0].count > 0 && !debug) {
console.log(`数据库中已有数据,跳过 playlistId: ${playlistId}`);
return;
}
// https://neteasecloudmusicapi-docs.4everland.app/#/?id=%e6%ad%8c%e5%8d%95%e5%88%86%e7%b1%bb
try {
// 获取歌单分类
// var playlistResult = await playlist_catlist({});
// var playlistResult = await playlist_hot({});
var playlistResult = await playlist_detail({
id: playlistId,
});
// fs.writeFileSync(path.join(__dirname, "../../temp", `playlist-${playlistId}.json`), JSON.stringify(playlistResult));
} catch (errors) {
console.error(errors);
return;
}
let playlist = playlistResult.body.playlist;
// console.log("playlist", playlist);
let playlistInfo = {
playlist_id: playlist.id,
title: playlist.name,
english_title: playlist.englishTitle,
description: playlist.description,
user_id: playlist.userId,
tags: JSON.stringify(playlist.tags),
alg_tags: JSON.stringify(playlist.algTags),
playlist_create_time: playlist.createTime,
playlist_update_time: playlist.updateTime,
track_count: playlist.trackCount,
play_count: playlist.playCount,
subscribed_count: playlist.subscribedCount,
share_count: playlist.shareCount,
comment_count: playlist.commentCount,
cover_image: playlist.coverImgUrl ? /^https?:\/\/p.\.music\.126\.net\/(.*?)$/.exec(playlist.coverImgUrl)[1] : '',
title_image: playlist.titleImageUrl ? /^https?:\/\/p.\.music\.126\.net\/(.*?)$/.exec(playlist.titleImageUrl)[1] : '',
background_cover: playlist.backgroundCoverUrl ? /^https?:\/\/p.\.music\.126\.net\/(.*?)$/.exec(playlist.backgroundCoverUrl)[1] : '',
ordered: playlist.ordered,
copied: playlist.copied,
status: playlist.status,
privacy: playlist.privacy,
ad_type: playlist.adType,
special_type: playlist.specialType,
official_playlist_type: playlist.officialPlaylistType,
op_recommend: playlist.opRecommend,
high_quality: playlist.highQuality,
new_imported: playlist.newImported,
update_frequency: playlist.updateFrequency,
grade_status: playlist.gradeStatus,
score: playlist.score,
creator: JSON.stringify(playlist.creator),
video_ids: JSON.stringify(playlist.videoIds),
videos: JSON.stringify(playlist.videos),
banned_track_ids: JSON.stringify(playlist.bannedTrackIds),
remix_video: JSON.stringify(playlist.remixVideo),
};
// console.log("playlistInfo", playlistInfo);
if (playlist.bannedTrackIds) {
console.log("bannedTrackIds", playlist.bannedTrackIds);
process.exit(0);
}
let trackIds = playlist.trackIds.map(track => [track.id, playlist.id, track.alg, track.rcmdReason]);
if (trackIds.length > 0)
await dbUtils.query('INSERT IGNORE INTO song_playlist_relation (song_id, playlist_id, alg, rcmd_reason) VALUES ?', [trackIds]);
await dbUtils.query(`
INSERT INTO playlist ( ${Object.keys(playlistInfo).map(field => `\`${field}\``).join(",")} ) VALUES ?
ON DUPLICATE KEY UPDATE ${Object.keys(playlistInfo).map(field => `${field}=VALUES(${field})`).join(", ")}
`, [[Object.values(playlistInfo)]]);
return playlistInfo;
}
module.exports = {
fetch: fetch,
fetchAll: fetchAll,
}