1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee

??改为|| 兼容node v12.13.1

This commit is contained in:
程序员小墨 2022-10-09 17:37:57 +08:00
parent c28fca34be
commit 7afc7f79e2
10 changed files with 215 additions and 87 deletions

View File

@ -21,7 +21,7 @@ const artistInfoUtils = require('./src/getInfo/artistInfoUtils');
const albumInfoUtils = require('./src/getInfo/albumInfoUtils');
const lyricInfoUtils = require('./src/getInfo/lyricInfoUtils');
const commentUtils = require('./src/getInfo/commentUtils');
// const playlistUtils = require('./src/getInfo/playlistUtils');
const playlistUtils = require('./src/getInfo/playlistUtils');
/**
* 测试
@ -34,12 +34,14 @@ async function test() {
// let res = await albumInfoUtils.fetch({ albumId: "9156", debug: true });
// let res = await artistInfoUtils.fetch({ artistId: "12023508" });
// let res = await songInfoUtils.fetch({ songId: "437608327" });
// let res = await playlistUtils.fetch({ songId: "2320041657", debug: true });
// let res = await playlistUtils.fetch({ playlistId: "4980157066", debug: true });
// let res = await albumInfoUtils.getFromDatabase({ albumId: "9156" });
// let res = await artistInfoUtils.getFromDatabase({ artistId: "12023508" });
// let res = await songInfoUtils.getFromDatabase({ songId: "437608327" });
let res = await dbUtils.query('INSERT IGNORE INTO song (`song_id`, `title`, `image`, `pub_date`) VALUES ?',
[[[100, '4', '3', '4'], [200, '23', '4', '5']]]);
console.log(res);
}

View File

@ -49,7 +49,7 @@ async function fetchAll({ args = {}, isUpdate = false }) {
for (let i = 0; i < albumIds.length; i++) {
await global.checkIsExit();
const albumId = albumIds[i];
console.log(`${i + 1}/${albumIds.length} | album: ${albumId} | ${args.min ?? "?"}-${args.max ?? "?"}`);
console.log(`${i + 1}/${albumIds.length} | album: ${albumId} | ${args.min || "?"}-${args.max || "?"}`);
try {
await fetch({ albumId: albumId, update: isUpdate });
} catch (err) {

View File

@ -40,7 +40,7 @@ async function fetchAll({ args = {} }) {
for (let i = 0; i < artistIds.length; i++) {
await global.checkIsExit();
const artistId = artistIds[i];
console.log(`${i + 1}/${artistIds.length} | artist: ${artistId} | ${args.min ?? "?"}-${args.max ?? "?"}`);
console.log(`${i + 1}/${artistIds.length} | artist: ${artistId} | ${args.min || "?"}-${args.max || "?"}`);
try {
await fetch({ artistId: artistId });
} catch (err) {

View File

@ -37,7 +37,7 @@ async function fetchAll({ args = {} }) {
for (let i = 0; i < songIds.length; i++) {
await global.checkIsExit();
const songId = songIds[i];
console.log(`${i + 1}/${songIds.length} | comment: ${songId} | ${args.min ?? "?"}-${args.max ?? "?"}`);
console.log(`${i + 1}/${songIds.length} | comment: ${songId} | ${args.min || "?"}-${args.max || "?"}`);
try {
await fetch({ songId: songId });
} catch (err) {

View File

@ -25,7 +25,7 @@ async function fetchAll({ args = {} }) {
for (let i = 0; i < songIds.length; i++) {
await global.checkIsExit();
const songId = songIds[i];
console.log(`${i + 1}/${songIds.length} | lyric: ${songId} | ${args.min ?? "?"}-${args.max ?? "?"}`);
console.log(`${i + 1}/${songIds.length} | lyric: ${songId} | ${args.min || "?"}-${args.max || "?"}`);
try {
await fetch({ songId: songId });
} catch (err) {

View File

@ -9,7 +9,7 @@ const dbUtils = global.dbUtils;
// refer:
// https://neteasecloudmusicapi-docs.4everland.app/
// https://github.com/Binaryify/NeteaseCloudMusicApi
const { playlist_catlist, playlist_hot } = require('NeteaseCloudMusicApi');
const { playlist_catlist, playlist_hot, playlist_detail, playlist_track_all, song_detail } = require('NeteaseCloudMusicApi');
// // 从数据库中查出还缺少的歌词,并进行爬取
// async function fetchAll() {
@ -36,7 +36,20 @@ async function fetch({ playlistId, debug = false }) {
// https://neteasecloudmusicapi-docs.4everland.app/#/?id=%e6%ad%8c%e5%8d%95%e5%88%86%e7%b1%bb
var queryParams = {};
try {
var playlistResult = await playlist_catlist(queryParams);
// 获取歌单分类
// var playlistResult = await playlist_catlist(queryParams);
// var playlistResult = await playlist_hot(queryParams);
// var playlistResult = await playlist_detail({
// id: playlistId,
// });
var playlistResult = await song_detail({
// ids: ["536623501", "536623501"].join(','),
});
// var playlistResult = await playlist_track_all({
// id: playlistId,
// limit: 10,
// offset: 0,
// });
fs.writeFileSync(path.join(__dirname, "../../temp", `playlist-${playlistId}.json`), JSON.stringify(playlistResult));
} catch (errors) {
console.error(errors);

View File

@ -6,23 +6,6 @@ const sleepUtils = require('../../../utils/sleepUtils');
const dbUtils = global.dbUtils;
// 从数据库中查询
async function getFromDatabase({ songId }) {
// 查询出专辑
let infoResultSet = await dbUtils.query('SELECT * FROM song WHERE song_id = ?', [songId]);
if (infoResultSet.length == 0) return {};
// 查出专辑与歌曲对应关系
let albumRelationResultSet = await dbUtils.query('SELECT * FROM song_album_relation WHERE song_id = ?', [songId]);
let artistRelationResultSet = await dbUtils.query('SELECT * FROM song_artist_relation WHERE song_id = ?', [songId]);
// 拼装
let songInfo = JSON.parse(JSON.stringify(infoResultSet[0]));
songInfo.albumIds = albumRelationResultSet.map(album => album.album_id);
songInfo.artistIds = artistRelationResultSet.map(artist => artist.artist_id);
return songInfo;
}
// 从数据库中查出还缺少的歌曲,并进行爬取
async function fetchAll({ args = {} }) {
console.log("start fetching songs ...");
@ -41,12 +24,18 @@ async function fetchAll({ args = {} }) {
var songIds = await dbUtils.query(sql, []);
songIds = songIds.map(item => item.song_id);
for (let i = 0; i < songIds.length; i++) {
// 0 - 100, 200 - 399, 400 - ..., ... - songIds.length-1
// 0 1 2 count-1
var step = 270;
var total = songIds.length;
var count = Math.ceil(total / step);
for (let i = 0; i < count; i++) {
await global.checkIsExit();
const songId = songIds[i];
console.log(`${i + 1}/${songIds.length} | song: ${songId} | ${args.min ?? "?"}-${args.max ?? "?"}`);
var subArray = songIds.slice(i * step, (i + 1) * step);
console.log(`${i + 1}/${count} | song: ${subArray[0]}-${subArray.slice(-1)[0]} | ${args.min || "?"}-${args.max || "?"}`);
try {
await fetch({ songId: songId });
await fetch({ songIdArray: subArray });
} catch (err) {
console.error(err);
}
@ -55,55 +44,26 @@ async function fetchAll({ args = {} }) {
}
// 获取音乐详情
async function fetch({ songId, debug = false }) {
async function fetch({ songIdArray, debug = false }) {
let result = await dbUtils.query('SELECT count(*) as count FROM song WHERE song_id = ?', [songId]);
if (result[0].count > 0 && !debug) {
console.log(`数据库中已有数据,跳过 songId: ${songId}`);
return;
}
let url = `https://music.163.com/song?id=${songId}`;
// https://neteasecloudmusicapi-docs.4everland.app/#/?id=%e8%8e%b7%e5%8f%96%e6%ad%8c%e6%9b%b2%e8%af%a6%e6%83%85
try {
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `song-${songId}.html`), 'utf8');
var html = await requestUtils.getApiResult(url);
// fs.writeFileSync(path.join(__dirname, "../../temp", `song-${songId}.html`), html);
// 每一次大概可以取到270条以上
var songResult = await song_detail({
ids: ["64956", "64956"].join(','),
});
fs.writeFileSync(path.join(__dirname, "../../temp", `song-${playlistId}.json`), JSON.stringify(playlistResult));
} catch (errors) {
console.error(errors);
return;
}
if (html.includes(`<p class="note s-fc3">很抱歉,你要查找的网页找不到</p>`)) {
let deleteResult1 = await dbUtils.query('DELETE FROM song_album_relation WHERE song_id = ?', [songId]);
let deleteResult2 = await dbUtils.query('DELETE FROM song_artist_relation WHERE song_id = ?', [songId]);
console.log(`song: ${songId} 不存在从song_album_relation, song_artist_relation表中删除. affectedRows: ${deleteResult1.affectedRows}, ${deleteResult2.affectedRows}`);
return;
}
// 正则匹配
let regExResult = /\<script type\=\"application\/ld\+json\"\>([\S\s]*?)\<\/script\>/.exec(html);
let songInfoJSONString = regExResult[1];
let songInfoDict = JSON.parse(songInfoJSONString);
// console.log(songInfoDict);
// TODO 考虑歌曲别名 例如https://music.163.com/#/song?id=26830207
let title = /<meta property="og:title" content="(.*?)" \/>/.exec(html)[1];
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
let artist = /<meta property="og:music:artist" content="(.*?)" \/>/.exec(html)[1];
let duration = /<meta property="music:duration" content="(.*?)"\/>/.exec(html)[1];
try {
var album = /<meta property="og:music:album" content="(.*?)"\/>/.exec(html)[1];
var albumId = /<meta property="music:album" content="https:\/\/music\.163\.com\/album\?id=(.*?)"\/>/.exec(html)[1];
} catch (err) {
// 歌曲不在专辑中
}
const reg = /<meta property="music:musician" content="https:\/\/music\.163\.com\/artist\?id=(.*?)"\/>/g;
let artistIds = [];
let matched = null;
while ((matched = reg.exec(html)) !== null) {
artistIds.push(matched[1]);
}
console.log(playlistResult);
let songInfo = {
songId: songId,
@ -138,7 +98,6 @@ async function fetch({ songId, debug = false }) {
}
module.exports = {
getFromDatabase: getFromDatabase,
fetch: fetch,
fetchAll: fetchAll,
}

View File

@ -0,0 +1,144 @@
const fs = require('fs');
const path = require('path');
const requestUtils = require('../../../utils/requestUtils');
const sleepUtils = require('../../../utils/sleepUtils');
const dbUtils = global.dbUtils;
// 从数据库中查询
async function getFromDatabase({ songId }) {
// 查询出专辑
let infoResultSet = await dbUtils.query('SELECT * FROM song WHERE song_id = ?', [songId]);
if (infoResultSet.length == 0) return {};
// 查出专辑与歌曲对应关系
let albumRelationResultSet = await dbUtils.query('SELECT * FROM song_album_relation WHERE song_id = ?', [songId]);
let artistRelationResultSet = await dbUtils.query('SELECT * FROM song_artist_relation WHERE song_id = ?', [songId]);
// 拼装
let songInfo = JSON.parse(JSON.stringify(infoResultSet[0]));
songInfo.albumIds = albumRelationResultSet.map(album => album.album_id);
songInfo.artistIds = artistRelationResultSet.map(artist => artist.artist_id);
return songInfo;
}
// 从数据库中查出还缺少的歌曲,并进行爬取
async function fetchAll({ args = {} }) {
console.log("start fetching songs ...");
let whereClause = [
args.min ? `song_id > ${args.min}` : '1=1',
args.max ? `song_id <= ${args.max}` : '1=1',
].join(' AND ');
var sql = `
SELECT DISTINCT song_id FROM song_artist_relation WHERE ${whereClause} AND song_id NOT IN ( SELECT song_id FROM song )
UNION
SELECT DISTINCT song_id FROM song_album_relation WHERE ${whereClause} AND song_id NOT IN ( SELECT song_id FROM song )
${args.order ? `ORDER BY song_id ${args.order}` : ''}
${args.limit ? `LIMIT ${args.limit}` : ''}
`;
console.log(sql);
var songIds = await dbUtils.query(sql, []);
songIds = songIds.map(item => item.song_id);
for (let i = 0; i < songIds.length; i++) {
await global.checkIsExit();
const songId = songIds[i];
console.log(`${i + 1}/${songIds.length} | song: ${songId} | ${args.min || "?"}-${args.max || "?"}`);
try {
await fetch({ songId: songId });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(global.sleepTime);
}
}
// 获取音乐详情
async function fetch({ songId, debug = false }) {
let result = await dbUtils.query('SELECT count(*) as count FROM song WHERE song_id = ?', [songId]);
if (result[0].count > 0 && !debug) {
console.log(`数据库中已有数据,跳过 songId: ${songId}`);
return;
}
let url = `https://music.163.com/song?id=${songId}`;
try {
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `song-${songId}.html`), 'utf8');
var html = await requestUtils.getApiResult(url);
// fs.writeFileSync(path.join(__dirname, "../../temp", `song-${songId}.html`), html);
} catch (errors) {
console.error(errors);
return;
}
if (html.includes(`<p class="note s-fc3">很抱歉,你要查找的网页找不到</p>`)) {
let deleteResult1 = await dbUtils.query('DELETE FROM song_album_relation WHERE song_id = ?', [songId]);
let deleteResult2 = await dbUtils.query('DELETE FROM song_artist_relation WHERE song_id = ?', [songId]);
console.log(`song: ${songId} 不存在从song_album_relation, song_artist_relation表中删除. affectedRows: ${deleteResult1.affectedRows}, ${deleteResult2.affectedRows}`);
return;
}
// 正则匹配
let regExResult = /\<script type\=\"application\/ld\+json\"\>([\S\s]*?)\<\/script\>/.exec(html);
let songInfoJSONString = regExResult[1];
let songInfoDict = JSON.parse(songInfoJSONString);
// console.log(songInfoDict);
// TODO 考虑歌曲别名 例如https://music.163.com/#/song?id=26830207
let title = /<meta property="og:title" content="(.*?)" \/>/.exec(html)[1];
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
let artist = /<meta property="og:music:artist" content="(.*?)" \/>/.exec(html)[1];
let duration = /<meta property="music:duration" content="(.*?)"\/>/.exec(html)[1];
try {
var album = /<meta property="og:music:album" content="(.*?)"\/>/.exec(html)[1];
var albumId = /<meta property="music:album" content="https:\/\/music\.163\.com\/album\?id=(.*?)"\/>/.exec(html)[1];
} catch (err) {
// 歌曲不在专辑中
}
const reg = /<meta property="music:musician" content="https:\/\/music\.163\.com\/artist\?id=(.*?)"\/>/g;
let artistIds = [];
let matched = null;
while ((matched = reg.exec(html)) !== null) {
artistIds.push(matched[1]);
}
let songInfo = {
songId: songId,
title: title,
image: image,
pubDate: songInfoDict.pubDate,
artist: artist,
artistIds: artistIds,
album: album || null,
albumId: albumId || null,
duration: duration,
};
// console.log("songInfo", songInfo);
if (albumId != null)
dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', {
song_id: songInfo.songId,
album_id: songInfo.albumId,
});
artistIds.forEach(function (artistId) {
dbUtils.query('INSERT IGNORE INTO song_artist_relation SET ?', {
song_id: songInfo.songId,
artist_id: artistId,
});
});
dbUtils.query('INSERT IGNORE INTO song SET ?', {
song_id: songInfo.songId,
title: songInfo.title,
image: songInfo.image,
pub_date: songInfo.pubDate,
});
return songInfo;
}
module.exports = {
getFromDatabase: getFromDatabase,
fetch: fetch,
fetchAll: fetchAll,
}

View File

@ -1,41 +1,41 @@
cd C:\Users\Coz\Desktop\tools\
cd /www/neteasemusic/tools
-- 本地
node index --utils song --min 1900000000 --max 2000000000 --order DESC # ing 在本地
node index --utils song --min 1900000000 --max 2000000000 --order ASC # ing 在本地
node index --utils song --min 1800000000 --max 1900000000 --order DESC # ing 在Windows服务器上
node index --utils song --min 1900000000 --max 2000000000 --order DESC #
node index --utils song --min 1900000000 --max 2000000000 --order ASC #
node index --utils song --min 1800000000 --max 1900000000 --order DESC #
-- Linux服务器
node index --utils song --min 1290000000 --max 1500000000 --order DESC # ing 在Linux服务器上
node index --utils song --min 1290000000 --max 1500000000 --order DESC #
-- Windows服务器
node index --utils song --min 400000000 --max 1000000000 --order ASC # ing 在Windows服务器上
node index --utils song --min 0 --max 400000000 --order ASC # ing 在Windows服务器上
node index --utils song --min 400000000 --max 1000000000 --order ASC #
node index --utils song --min 0 --max 400000000 --order ASC #
#############################################################################################
-- 完成: 36000000-38000000 72000000-96000000 139000000-151000000
-- Windows 服务器
node index --utils album --min 134000000 --max 160000000 # 完成
node index --utils album --min 134000000 --max 160000000 #
-- 本机
node index --utils album --min 0 --max 134000000 # ing 在本地 & 在Linux服务器上
node index --utils album --min 0 --max 134000000 #
#############################################################################################
-- 完成: 38000000-55000000
-- Windows服务器
node index --utils artist --min 0 --max 55000000 # ing 在本地
node index --utils artist --min 0 --max 55000000 #
#############################################################################################
-- 本机
node index --utils comment --min 1800000000 --max 2000000000 --order DESC --limit 2000 # ing 在本地
node index --utils comment --min 1800000000 --max 2000000000 --order DESC --limit 2000 #
-- Windows服务器
node index --utils comment --min 1290000000 --max 1500000000 --order DESC --limit 2000 # ing 在Windows服务器上
node index --utils comment --min 400000000 --max 1000000000 --order ASC --limit 2000 # ing 在Linux服务器上
node index --utils comment --min 1290000000 --max 1500000000 --order DESC --limit 2000 #
node index --utils comment --min 400000000 --max 1000000000 --order ASC --limit 2000 #
-- Linux服务器
node index --utils comment --min 0 --max 400000000 --order ASC --limit 2000 # ing
node index --utils comment --min 0 --max 400000000 --order ASC --limit 2000 #
#############################################################################################
-- 完成: 40000000-460000000 500000000-560000000
node index --utils lyric --min 0 --max 1950000000 # ing 在Linux服务器上
node index --utils lyric --min 0 --max 1950000000 #
-- Windows服务器
node index --utils lyric --min 1800000000 --max 1950000000 # ing 在Linux服务器上
node index --utils lyric --min 1950000000 --max 2000000000 # ing 在本地
node index --utils lyric --min 0 --max 400000000 # 完成
node index --utils lyric --min 1800000000 --max 1950000000 #
node index --utils lyric --min 1950000000 --max 2000000000 #
node index --utils lyric --min 0 --max 400000000 #
-- 本机
node index --utils lyric --min 400000000 --max 1000000000 # 完成
node index --utils lyric --min 400000000 --max 1000000000 #
后期:
@ -47,3 +47,13 @@ node index --utils lyric --min 400000000 --max 1000000000
爬取歌单playlist
被删除的aritst和album回头再通过其他表中的数据反查回来
后续分区(不能在现有表上修改,只能重新查出数据到新表)
alter table song add partition (
PARTITION p1 VALUES LESS THAN ( 50000000),
PARTITION p2 VALUES LESS THAN (1000000000),
PARTITION p3 VALUES LESS THAN (1500000000),
PARTITION p4 VALUES LESS THAN (2000000000),
PARTITION p5 VALUES LESS THAN MAXVALUE
);

View File

@ -12,7 +12,7 @@ async function getApiResult(url, params = null) {
return await query({
method: 'GET',
url: url,
...params ?? {}
...params || {}
});
}