1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee
This commit is contained in:
2022-10-19 22:58:56 +08:00
parent 8451e7a849
commit da900dc2b1
7 changed files with 287 additions and 57 deletions

View File

@@ -9,70 +9,164 @@ const dbUtils = global.dbUtils;
// refer:
// https://neteasecloudmusicapi-docs.4everland.app/
// https://github.com/Binaryify/NeteaseCloudMusicApi
const { playlist_catlist, playlist_hot, playlist_detail, playlist_track_all, song_detail } = require('NeteaseCloudMusicApi');
const { playlist_catlist, playlist_hot, playlist_detail } = require('NeteaseCloudMusicApi');
// // 从数据库中查出还缺少的歌词,并进行爬取
// async function fetchAll() {
// console.log("start fetching lyrics ...");
// var playlistIds = await dbUtils.query(`
// SELECT DISTINCT playlist_id FROM playlist WHERE playlist_id NOT IN ( SELECT playlist_id FROM lyric )
// `, []);
// playlistIds = playlistIds.map(playlist => playlist.playlist_id);
// for (let i = 0; i < playlistIds.length; i++) {
// await global.checkIsExit();
// const playlistId = playlistIds[i];
// console.log(`${i + 1}/${playlistIds.length} | lyric: ${playlistId}`);
// try {
// await fetch({ playlistId: playlistId });
// } catch (err) {
// console.error(err);
// }
// await sleepUtils.sleep(global.sleepTime);
// }
// }
async function fetchAll({ args }) {
console.log("start fetching playlists ...");
console.log("playlist 需要一口气爬完,中途不能停止,否则下次又要重头爬(歌单不会重复爬取,但是分页列表会)")
// 从数据库中查出所有的网易云分类
let result = await dbUtils.query(`SELECT title FROM category WHERE netease_group_chinese IS NOT NULL`);
cate = result.map(cate => cate.title);
cate.unshift('全部'); // 插入第一个
console.log(cate);
for (let i = 0; i < cate.length; i++) {
const categoryName = cate[i];
try {
await fetchCategory({ categoryName: categoryName, progress: `${i + 1}/${cate.length}` });
} catch (err) {
console.error(err);
}
}
}
async function fetchCategory({ categoryName, progress }) {
// 首先去网易云音乐首页获得歌单 (每一首音乐右侧都会有几个包含该音乐的歌单)
let haveNext = true;
let perPage = 35;
let offset = 0;
while (haveNext) {
let url = `https://music.163.com/discover/playlist?cat=${encodeURIComponent(categoryName)}&limit=${perPage}&offset=${offset}`;
try {
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `discover-playlist.html`), 'utf8');
var html = await requestUtils.getApiResult(url);
// fs.writeFileSync(path.join(__dirname, "../../temp", `discover-playlist.html`), html);
var matcher = html.matchAll(/"\/playlist\?id=(\d{1,20})"/g);
var m = matcher.next();
var a = new Set(); // 因为每个歌单id会出现两次所以使用Set去重
while (!m.done) {
a.add(Number(m.value[1]));
m = matcher.next();
}
var playlistIds = Array.from(a).sort();
} catch (errors) {
console.error(errors);
return;
}
// 从数据库查出已爬取的歌单ids并从 playlistIds 中排除这部分歌单
var exceptPlaylistIds = await dbUtils.query(`
SELECT playlist_id FROM playlist WHERE playlist_id IN ?
`, [[playlistIds]]);
exceptPlaylistIds = exceptPlaylistIds.map(playlist => playlist.playlist_id);
var finalPlaylistIds = playlistIds.filter(playlistId => exceptPlaylistIds.indexOf(playlistId) == -1);
// console.log("playlistIds", playlistIds);
// console.log("exceptPlaylistIds", exceptPlaylistIds);
// console.log("finalPlaylistIds", finalPlaylistIds);
console.log("finalPlaylistIds.length", finalPlaylistIds.length);
for (let i = 0; i < finalPlaylistIds.length; i++) {
await global.checkIsExit();
const playlistId = finalPlaylistIds[i];
// console.log(offset, i, finalPlaylistIds.length);
console.log(`分类: ${progress} | 歌单: ${offset + i + 1}/${offset + finalPlaylistIds.length} | playlist: ${playlistId}`);
try {
await fetch({ playlistId: playlistId });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(global.sleepTime);
}
// 最有一页判断标识
if (html.indexOf(`class="zbtn znxt js-disabled">下一页</a>`) > -1) haveNext = false;
offset += perPage;
}
}
// 获取歌词详情
async function fetch({ playlistId, debug = false }) {
let result = await dbUtils.query('SELECT count(*) as count FROM playlist WHERE playlist_id = ?', [playlistId]);
if (result[0].count > 0 && !debug) {
console.log(`数据库中已有数据,跳过 playlistId: ${playlistId}`);
return;
}
// https://neteasecloudmusicapi-docs.4everland.app/#/?id=%e6%ad%8c%e5%8d%95%e5%88%86%e7%b1%bb
var queryParams = {};
try {
// 获取歌单分类
// var playlistResult = await playlist_catlist(queryParams);
// var playlistResult = await playlist_hot(queryParams);
// var playlistResult = await playlist_detail({
// id: playlistId,
// });
var playlistResult = await song_detail({
// ids: ["536623501", "536623501"].join(','),
// var playlistResult = await playlist_catlist({});
// var playlistResult = await playlist_hot({});
var playlistResult = await playlist_detail({
id: playlistId,
});
// var playlistResult = await playlist_track_all({
// id: playlistId,
// limit: 10,
// offset: 0,
// });
fs.writeFileSync(path.join(__dirname, "../../temp", `playlist-${playlistId}.json`), JSON.stringify(playlistResult));
// fs.writeFileSync(path.join(__dirname, "../../temp", `playlist-${playlistId}.json`), JSON.stringify(playlistResult));
} catch (errors) {
console.error(errors);
return;
}
console.log(playlistResult);
let playlist = playlistResult.body.playlist;
// console.log("playlist", playlist);
// let lyricInfo = {
// playlistId: playlistId,
// lyric: lyric.lyric,
// version: lyric.version,
// };
// // console.log("lyricInfo", lyricInfo);
// dbUtils.query('INSERT IGNORE INTO lyric SET ?', {
// playlist_id: lyricInfo.playlistId,
// lyric: lyricInfo.lyric,
// version: lyricInfo.version,
// });
// return lyricInfo;
let playlistInfo = {
playlist_id: playlist.id,
title: playlist.name,
english_title: playlist.englishTitle,
description: playlist.description,
user_id: playlist.userId,
tags: JSON.stringify(playlist.tags),
alg_tags: JSON.stringify(playlist.algTags),
playlist_create_time: playlist.createTime,
playlist_update_time: playlist.updateTime,
track_count: playlist.trackCount,
play_count: playlist.playCount,
subscribed_count: playlist.subscribedCount,
share_count: playlist.shareCount,
comment_count: playlist.commentCount,
cover_image: playlist.coverImgUrl ? /^https?:\/\/p.\.music\.126\.net\/(.*?)$/.exec(playlist.coverImgUrl)[1] : '',
title_image: playlist.titleImageUrl ? /^https?:\/\/p.\.music\.126\.net\/(.*?)$/.exec(playlist.titleImageUrl)[1] : '',
background_cover: playlist.backgroundCoverUrl ? /^https?:\/\/p.\.music\.126\.net\/(.*?)$/.exec(playlist.backgroundCoverUrl)[1] : '',
ordered: playlist.ordered,
copied: playlist.copied,
status: playlist.status,
privacy: playlist.privacy,
ad_type: playlist.adType,
special_type: playlist.specialType,
official_playlist_type: playlist.officialPlaylistType,
op_recommend: playlist.opRecommend,
high_quality: playlist.highQuality,
new_imported: playlist.newImported,
update_frequency: playlist.updateFrequency,
grade_status: playlist.gradeStatus,
score: playlist.score,
creator: JSON.stringify(playlist.creator),
video_ids: JSON.stringify(playlist.videoIds),
videos: JSON.stringify(playlist.videos),
banned_track_ids: JSON.stringify(playlist.bannedTrackIds),
remix_video: JSON.stringify(playlist.remixVideo),
};
// console.log("playlistInfo", playlistInfo);
if (playlist.bannedTrackIds) {
console.log("bannedTrackIds", playlist.bannedTrackIds);
process.exit(0);
}
let trackIds = playlist.trackIds.map(track => [track.id, playlist.id, track.alg, track.rcmdReason]);
if (trackIds.length > 0)
await dbUtils.query('INSERT IGNORE INTO song_playlist_relation (song_id, playlist_id, alg, rcmd_reason) VALUES ?', [trackIds]);
await dbUtils.query(`
INSERT INTO playlist ( ${Object.keys(playlistInfo).map(field => `\`${field}\``).join(",")} ) VALUES ?
ON DUPLICATE KEY UPDATE ${Object.keys(playlistInfo).map(field => `${field}=VALUES(${field})`).join(", ")}
`, [[Object.values(playlistInfo)]]);
return playlistInfo;
}
module.exports = {
fetch: fetch,
// fetchAll: fetchAll,
fetchAll: fetchAll,
}