update
This commit is contained in:
@@ -9,70 +9,164 @@ const dbUtils = global.dbUtils;
|
||||
// refer:
|
||||
// https://neteasecloudmusicapi-docs.4everland.app/
|
||||
// https://github.com/Binaryify/NeteaseCloudMusicApi
|
||||
const { playlist_catlist, playlist_hot, playlist_detail, playlist_track_all, song_detail } = require('NeteaseCloudMusicApi');
|
||||
const { playlist_catlist, playlist_hot, playlist_detail } = require('NeteaseCloudMusicApi');
|
||||
|
||||
// // 从数据库中查出还缺少的歌词,并进行爬取
|
||||
// async function fetchAll() {
|
||||
// console.log("start fetching lyrics ...");
|
||||
// var playlistIds = await dbUtils.query(`
|
||||
// SELECT DISTINCT playlist_id FROM playlist WHERE playlist_id NOT IN ( SELECT playlist_id FROM lyric )
|
||||
// `, []);
|
||||
// playlistIds = playlistIds.map(playlist => playlist.playlist_id);
|
||||
// for (let i = 0; i < playlistIds.length; i++) {
|
||||
// await global.checkIsExit();
|
||||
// const playlistId = playlistIds[i];
|
||||
// console.log(`${i + 1}/${playlistIds.length} | lyric: ${playlistId}`);
|
||||
// try {
|
||||
// await fetch({ playlistId: playlistId });
|
||||
// } catch (err) {
|
||||
// console.error(err);
|
||||
// }
|
||||
// await sleepUtils.sleep(global.sleepTime);
|
||||
// }
|
||||
// }
|
||||
async function fetchAll({ args }) {
|
||||
console.log("start fetching playlists ...");
|
||||
console.log("playlist 需要一口气爬完,中途不能停止,否则下次又要重头爬(歌单不会重复爬取,但是分页列表会)")
|
||||
|
||||
// 从数据库中查出所有的网易云分类
|
||||
let result = await dbUtils.query(`SELECT title FROM category WHERE netease_group_chinese IS NOT NULL`);
|
||||
cate = result.map(cate => cate.title);
|
||||
cate.unshift('全部'); // 插入第一个
|
||||
console.log(cate);
|
||||
for (let i = 0; i < cate.length; i++) {
|
||||
const categoryName = cate[i];
|
||||
try {
|
||||
await fetchCategory({ categoryName: categoryName, progress: `${i + 1}/${cate.length}` });
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchCategory({ categoryName, progress }) {
|
||||
// 首先去网易云音乐首页获得歌单 (每一首音乐右侧都会有几个包含该音乐的歌单)
|
||||
let haveNext = true;
|
||||
let perPage = 35;
|
||||
let offset = 0;
|
||||
while (haveNext) {
|
||||
let url = `https://music.163.com/discover/playlist?cat=${encodeURIComponent(categoryName)}&limit=${perPage}&offset=${offset}`;
|
||||
try {
|
||||
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `discover-playlist.html`), 'utf8');
|
||||
var html = await requestUtils.getApiResult(url);
|
||||
// fs.writeFileSync(path.join(__dirname, "../../temp", `discover-playlist.html`), html);
|
||||
|
||||
var matcher = html.matchAll(/"\/playlist\?id=(\d{1,20})"/g);
|
||||
var m = matcher.next();
|
||||
var a = new Set(); // 因为每个歌单id会出现两次,所以使用Set去重
|
||||
while (!m.done) {
|
||||
a.add(Number(m.value[1]));
|
||||
m = matcher.next();
|
||||
}
|
||||
var playlistIds = Array.from(a).sort();
|
||||
} catch (errors) {
|
||||
console.error(errors);
|
||||
return;
|
||||
}
|
||||
|
||||
// 从数据库查出已爬取的歌单ids,并从 playlistIds 中排除这部分歌单
|
||||
var exceptPlaylistIds = await dbUtils.query(`
|
||||
SELECT playlist_id FROM playlist WHERE playlist_id IN ?
|
||||
`, [[playlistIds]]);
|
||||
exceptPlaylistIds = exceptPlaylistIds.map(playlist => playlist.playlist_id);
|
||||
|
||||
var finalPlaylistIds = playlistIds.filter(playlistId => exceptPlaylistIds.indexOf(playlistId) == -1);
|
||||
|
||||
// console.log("playlistIds", playlistIds);
|
||||
// console.log("exceptPlaylistIds", exceptPlaylistIds);
|
||||
// console.log("finalPlaylistIds", finalPlaylistIds);
|
||||
console.log("finalPlaylistIds.length", finalPlaylistIds.length);
|
||||
|
||||
for (let i = 0; i < finalPlaylistIds.length; i++) {
|
||||
await global.checkIsExit();
|
||||
const playlistId = finalPlaylistIds[i];
|
||||
// console.log(offset, i, finalPlaylistIds.length);
|
||||
console.log(`分类: ${progress} | 歌单: ${offset + i + 1}/${offset + finalPlaylistIds.length} | playlist: ${playlistId}`);
|
||||
try {
|
||||
await fetch({ playlistId: playlistId });
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
}
|
||||
await sleepUtils.sleep(global.sleepTime);
|
||||
}
|
||||
|
||||
// 最有一页判断标识
|
||||
if (html.indexOf(`class="zbtn znxt js-disabled">下一页</a>`) > -1) haveNext = false;
|
||||
offset += perPage;
|
||||
}
|
||||
}
|
||||
|
||||
// 获取歌词详情
|
||||
async function fetch({ playlistId, debug = false }) {
|
||||
let result = await dbUtils.query('SELECT count(*) as count FROM playlist WHERE playlist_id = ?', [playlistId]);
|
||||
if (result[0].count > 0 && !debug) {
|
||||
console.log(`数据库中已有数据,跳过 playlistId: ${playlistId}`);
|
||||
return;
|
||||
}
|
||||
|
||||
// https://neteasecloudmusicapi-docs.4everland.app/#/?id=%e6%ad%8c%e5%8d%95%e5%88%86%e7%b1%bb
|
||||
var queryParams = {};
|
||||
try {
|
||||
// 获取歌单分类
|
||||
// var playlistResult = await playlist_catlist(queryParams);
|
||||
// var playlistResult = await playlist_hot(queryParams);
|
||||
// var playlistResult = await playlist_detail({
|
||||
// id: playlistId,
|
||||
// });
|
||||
var playlistResult = await song_detail({
|
||||
// ids: ["536623501", "536623501"].join(','),
|
||||
// var playlistResult = await playlist_catlist({});
|
||||
// var playlistResult = await playlist_hot({});
|
||||
var playlistResult = await playlist_detail({
|
||||
id: playlistId,
|
||||
});
|
||||
// var playlistResult = await playlist_track_all({
|
||||
// id: playlistId,
|
||||
// limit: 10,
|
||||
// offset: 0,
|
||||
// });
|
||||
fs.writeFileSync(path.join(__dirname, "../../temp", `playlist-${playlistId}.json`), JSON.stringify(playlistResult));
|
||||
// fs.writeFileSync(path.join(__dirname, "../../temp", `playlist-${playlistId}.json`), JSON.stringify(playlistResult));
|
||||
} catch (errors) {
|
||||
console.error(errors);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(playlistResult);
|
||||
let playlist = playlistResult.body.playlist;
|
||||
// console.log("playlist", playlist);
|
||||
|
||||
// let lyricInfo = {
|
||||
// playlistId: playlistId,
|
||||
// lyric: lyric.lyric,
|
||||
// version: lyric.version,
|
||||
// };
|
||||
// // console.log("lyricInfo", lyricInfo);
|
||||
// dbUtils.query('INSERT IGNORE INTO lyric SET ?', {
|
||||
// playlist_id: lyricInfo.playlistId,
|
||||
// lyric: lyricInfo.lyric,
|
||||
// version: lyricInfo.version,
|
||||
// });
|
||||
// return lyricInfo;
|
||||
let playlistInfo = {
|
||||
playlist_id: playlist.id,
|
||||
title: playlist.name,
|
||||
english_title: playlist.englishTitle,
|
||||
description: playlist.description,
|
||||
user_id: playlist.userId,
|
||||
tags: JSON.stringify(playlist.tags),
|
||||
alg_tags: JSON.stringify(playlist.algTags),
|
||||
playlist_create_time: playlist.createTime,
|
||||
playlist_update_time: playlist.updateTime,
|
||||
track_count: playlist.trackCount,
|
||||
play_count: playlist.playCount,
|
||||
subscribed_count: playlist.subscribedCount,
|
||||
share_count: playlist.shareCount,
|
||||
comment_count: playlist.commentCount,
|
||||
cover_image: playlist.coverImgUrl ? /^https?:\/\/p.\.music\.126\.net\/(.*?)$/.exec(playlist.coverImgUrl)[1] : '',
|
||||
title_image: playlist.titleImageUrl ? /^https?:\/\/p.\.music\.126\.net\/(.*?)$/.exec(playlist.titleImageUrl)[1] : '',
|
||||
background_cover: playlist.backgroundCoverUrl ? /^https?:\/\/p.\.music\.126\.net\/(.*?)$/.exec(playlist.backgroundCoverUrl)[1] : '',
|
||||
ordered: playlist.ordered,
|
||||
copied: playlist.copied,
|
||||
status: playlist.status,
|
||||
privacy: playlist.privacy,
|
||||
ad_type: playlist.adType,
|
||||
special_type: playlist.specialType,
|
||||
official_playlist_type: playlist.officialPlaylistType,
|
||||
op_recommend: playlist.opRecommend,
|
||||
high_quality: playlist.highQuality,
|
||||
new_imported: playlist.newImported,
|
||||
update_frequency: playlist.updateFrequency,
|
||||
grade_status: playlist.gradeStatus,
|
||||
score: playlist.score,
|
||||
creator: JSON.stringify(playlist.creator),
|
||||
video_ids: JSON.stringify(playlist.videoIds),
|
||||
videos: JSON.stringify(playlist.videos),
|
||||
banned_track_ids: JSON.stringify(playlist.bannedTrackIds),
|
||||
remix_video: JSON.stringify(playlist.remixVideo),
|
||||
};
|
||||
// console.log("playlistInfo", playlistInfo);
|
||||
|
||||
if (playlist.bannedTrackIds) {
|
||||
console.log("bannedTrackIds", playlist.bannedTrackIds);
|
||||
process.exit(0);
|
||||
|
||||
}
|
||||
let trackIds = playlist.trackIds.map(track => [track.id, playlist.id, track.alg, track.rcmdReason]);
|
||||
if (trackIds.length > 0)
|
||||
await dbUtils.query('INSERT IGNORE INTO song_playlist_relation (song_id, playlist_id, alg, rcmd_reason) VALUES ?', [trackIds]);
|
||||
await dbUtils.query(`
|
||||
INSERT INTO playlist ( ${Object.keys(playlistInfo).map(field => `\`${field}\``).join(",")} ) VALUES ?
|
||||
ON DUPLICATE KEY UPDATE ${Object.keys(playlistInfo).map(field => `${field}=VALUES(${field})`).join(", ")}
|
||||
`, [[Object.values(playlistInfo)]]);
|
||||
return playlistInfo;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetch: fetch,
|
||||
// fetchAll: fetchAll,
|
||||
fetchAll: fetchAll,
|
||||
}
|
Reference in New Issue
Block a user