1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee
This commit is contained in:
程序员小墨 2022-10-02 17:37:28 +08:00
parent 93db6371d9
commit 1939398579
10 changed files with 3183 additions and 28 deletions

View File

@ -20,6 +20,7 @@ const songInfoUtils = require('./src/getInfo/songInfoUtils');
const artistInfoUtils = require('./src/getInfo/artistInfoUtils');
const albumInfoUtils = require('./src/getInfo/albumInfoUtils');
const lyricInfoUtils = require('./src/getInfo/lyricInfoUtils');
const commentUtils = require('./src/getInfo/commentUtils');
/**
* 测试
@ -46,10 +47,10 @@ async function test() {
async function main() {
console.log("neteaseMusic Start fetch ...");
while (true) {
// 删除脏数据
var affectedRows1 = await dbUtils.query(`DELETE FROM song_artist_relation WHERE song_id = 0 OR artist_id = 0`, []);
var affectedRows2 = await dbUtils.query(`DELETE FROM song_album_relation WHERE song_id = 0 OR album_id = 0`, []);
console.log(`删除脏数据 affectedRows:`, affectedRows1.affectedRows, affectedRows2.affectedRows);
// // 删除脏数据
// var affectedRows1 = await dbUtils.query(`DELETE FROM song_artist_relation WHERE song_id = 0 OR artist_id = 0`, []);
// var affectedRows2 = await dbUtils.query(`DELETE FROM song_album_relation WHERE song_id = 0 OR album_id = 0`, []);
// console.log(`删除脏数据 affectedRows:`, affectedRows1.affectedRows, affectedRows2.affectedRows);
await songInfoUtils.fetchAll();
await albumInfoUtils.fetchAll({});

View File

@ -60,6 +60,42 @@ CREATE TABLE `lyric` (
PRIMARY KEY (`song_id`,`version`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
CREATE TABLE `user` (
`user_id` int(10) unsigned NOT NULL COMMENT '用户id',
`user_type` tinyint(4) unsigned NOT NULL COMMENT '用户类型',
`nickname` varchar(200) NOT NULL COMMENT '用户昵称',
`avatar_url` varchar(200) NOT NULL COMMENT '用户头像 http://p1.music.126.net/ 后面的部分',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`user_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
CREATE TABLE `comment` (
`comment_id` bigint(20) unsigned NOT NULL COMMENT '评论id',
`parent_comment_id` int(10) unsigned NOT NULL COMMENT '父评论id',
`user_id` int(10) unsigned NOT NULL COMMENT '用户id',
`song_id` int(10) unsigned NOT NULL COMMENT '歌曲id',
`content` text NOT NULL COMMENT '评论内容',
`time` varchar(50) NOT NULL DEFAULT '' COMMENT '评论时间',
`like_count` int(10) unsigned NOT NULL COMMENT '点赞数',
`comment_type` tinyint(4) unsigned NOT NULL COMMENT '评论类型 0-comments 1-hotComments 2-topComments',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`comment_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
CREATE TABLE `comment_progress` (
`song_id` int(10) unsigned NOT NULL COMMENT '歌曲id',
`max_time` int(10) NOT NULL DEFAULT 0 COMMENT '开始爬取/开始增量爬取的时候 最新一条评论的时间',
`min_time` int(10) NOT NULL DEFAULT 0 COMMENT '上一次爬取时最后一条评论的时间 第一次爬取时为0',
`current_time` int(10) NOT NULL DEFAULT 0 COMMENT '本次爬取/增量时,最早的一条评论时间',
`current_status` tinyint(4) unsigned NOT NULL DEFAULT 0 COMMENT '爬取进度 0-等待爬取/增量爬取 1-爬取中 2-完成',
`total` int(10) NOT NULL DEFAULT 0 COMMENT '评论总数',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`song_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
CREATE TABLE `log` (
`id` int(10) unsigned NOT NULL COMMENT 'id',
`name` varchar(200) NOT NULL COMMENT '方法/数据库',

View File

@ -0,0 +1,70 @@
/**
* 该文件来自https://github.com/Binaryify/NeteaseCloudMusicApi/blob/master/util/crypto.js
*/
const crypto = require('crypto')
const iv = Buffer.from('0102030405060708')
const presetKey = Buffer.from('0CoJUm6Qyw8W8jud')
const linuxapiKey = Buffer.from('rFgB&h#%2?^eDg:Q')
const base62 = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
const publicKey =
'-----BEGIN PUBLIC KEY-----\nMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDgtQn2JZ34ZC28NWYpAUd98iZ37BUrX/aKzmFbt7clFSs6sXqHauqKWqdtLkF2KexO40H1YTX8z2lSgBBOAxLsvaklV8k4cBFK9snQXE9/DDaFt6Rr7iVZMldczhC0JNgTz+SHXT6CBHuX3e9SdB1Ua44oncaTWz7OBGLbCiK45wIDAQAB\n-----END PUBLIC KEY-----'
const eapiKey = 'e82ckenh8dichen8'
const aesEncrypt = (buffer, mode, key, iv) => {
const cipher = crypto.createCipheriv('aes-128-' + mode, key, iv)
return Buffer.concat([cipher.update(buffer), cipher.final()])
}
const rsaEncrypt = (buffer, key) => {
buffer = Buffer.concat([Buffer.alloc(128 - buffer.length), buffer])
return crypto.publicEncrypt(
{ key: key, padding: crypto.constants.RSA_NO_PADDING },
buffer,
)
}
const weapi = (object) => {
const text = JSON.stringify(object)
const secretKey = crypto
.randomBytes(16)
.map((n) => base62.charAt(n % 62).charCodeAt())
return {
params: aesEncrypt(
Buffer.from(
aesEncrypt(Buffer.from(text), 'cbc', presetKey, iv).toString('base64'),
),
'cbc',
secretKey,
iv,
).toString('base64'),
encSecKey: rsaEncrypt(secretKey.reverse(), publicKey).toString('hex'),
}
}
const linuxapi = (object) => {
const text = JSON.stringify(object)
return {
eparams: aesEncrypt(Buffer.from(text), 'ecb', linuxapiKey, '')
.toString('hex')
.toUpperCase(),
}
}
const eapi = (url, object) => {
const text = typeof object === 'object' ? JSON.stringify(object) : object
const message = `nobody${url}use${text}md5forencrypt`
const digest = crypto.createHash('md5').update(message).digest('hex')
const data = `${url}-36cd479b6b5-${text}-36cd479b6b5-${digest}`
return {
params: aesEncrypt(Buffer.from(data), 'ecb', eapiKey, '')
.toString('hex')
.toUpperCase(),
}
}
const decrypt = (cipherBuffer) => {
const decipher = crypto.createDecipheriv('aes-128-ecb', eapiKey, '')
return Buffer.concat([decipher.update(cipherBuffer), decipher.final()])
}
module.exports = { weapi, linuxapi, eapi, decrypt }

View File

@ -130,6 +130,14 @@ async function fetch({ albumId, debug = false, update = false }) {
songIds: songIds,
};
// console.log("albumInfo", albumInfo);
songIds.forEach(function (songId) {
if (isNaN(Number(songId)) || Number(songId) === 0 || isNaN(Number(albumId)) || Number(songId) === 0)
return;
dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', {
song_id: songId,
album_id: albumId,
});
});
dbUtils.query(update ? `UPDATE album SET ? WHERE album_id = ${albumId}` : 'INSERT IGNORE INTO album SET ?', {
album_id: albumInfo.albumId,
title: albumInfo.title,
@ -140,14 +148,6 @@ async function fetch({ albumId, debug = false, update = false }) {
company: albumInfo.company,
version: 1
});
songIds.forEach(function (songId) {
if (isNaN(Number(songId)) || Number(songId) === 0 || isNaN(Number(albumId)) || Number(songId) === 0)
return;
dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', {
song_id: songId,
album_id: albumId,
});
});
return albumInfo;
}

View File

@ -90,13 +90,6 @@ async function fetch({ artistId, debug = false }) {
songIds: songIds,
};
// console.log("artistInfo", artistInfo);
dbUtils.query('INSERT IGNORE INTO artist SET ?', {
artist_id: artistInfo.artistId,
title: artistInfo.title,
description: artistInfo.description,
image: artistInfo.image,
pub_date: artistInfo.pubDate,
});
songIds.forEach(function (songId) {
if (isNaN(Number(songId)) || Number(songId) === 0 || isNaN(Number(artistId)) || Number(artistId) === 0)
return;
@ -105,6 +98,13 @@ async function fetch({ artistId, debug = false }) {
artist_id: artistId,
});
});
dbUtils.query('INSERT IGNORE INTO artist SET ?', {
artist_id: artistInfo.artistId,
title: artistInfo.title,
description: artistInfo.description,
image: artistInfo.image,
pub_date: artistInfo.pubDate,
});
return artistInfo;
}

View File

@ -0,0 +1,182 @@
const fs = require('fs');
const path = require('path');
const requestUtils = require('../../../utils/requestUtils');
const sleepUtils = require('../../../utils/sleepUtils');
const dbUtils = global.dbUtils;
const { comment_music } = require('NeteaseCloudMusicApi');
async function fetchAll() {
console.log("start fetching comment ...")
// 首先将需要爬取的song_id导入comment_progress表
await dbUtils.query(`
INSERT INTO comment_progress ( song_id )
SELECT DISTINCT song_id FROM song WHERE song_id NOT IN ( SELECT song_id FROM comment_progress )
`, []);
// 首先查询有无正在爬取中的记录
var songIds = await dbUtils.query(`
SELECT song_id FROM comment_progress WHERE current_status != 2 LIMIT 1
`, []);
songIds = songIds.map(item => item.song_id);
for (let i = 0; i < songIds.length; i++) {
await global.checkIsExit();
const songId = songIds[i];
console.log(`${i + 1}/${songIds.length} | comment: ${songId}`);
try {
await fetch({ songId: songId });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(global.sleepTime);
}
}
// 获取歌词详情
async function fetch({ songId, debug = false }) {
// // var url = `https://music.163.com/weapi/comment/resource/comments/get?csrf_token=`;
// var opts = {
// method: "POST",
// url: `https://music.163.com/api/v1/resource/comments/R_SO_4_${songId}`,
// headers: {
// 'content-type': 'application/x-www-form-urlencoded',
// 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.53',
// },
// form: encrypt.weapi({
// rid: songId,
// limit: 20,
// offset: 20, // offset的取值为:(评论页数-1)*20
// before: 1664655762881
// })
// };
// 首先查询有无正在爬取中的记录
var commentProgress = await dbUtils.query(`
SELECT * FROM comment_progress WHERE song_id = ? and current_status != 2 LIMIT 1
`, [songId]);
if (commentProgress.length == 0) {
console.log('No commentProgress found, song_id:', songId);
return;
}
var item = commentProgress[0];
var progress = {
maxTime: item.max_time,
minTime: item.min_time,
currentTime: item.current_time,
currentStatus: item.current_status,
total: item.total,
};
var queryParams = {
id: songId,
limit: 20,
// before: undefined,
};
console.log(progress);
let isFinish = false;
while (!isFinish) {
// 是否是第一页
let isFirstPage = progress.currentStatus === 0;
try {
var commentResult = await comment_music(queryParams);
fs.writeFileSync(path.join(__dirname, "../../temp", `comment-${songId}.json`), JSON.stringify(commentResult));
} catch (errors) {
console.error(errors);
await sleepUtils.sleep(1000);
continue;
}
var topComments = commentResult.body.hotComments || [];
var hotComments = commentResult.body.hotComments || [];
var comments = commentResult.body.hotComments || [];
function getCommitInfoForInsert(comment, commentType) {
return {
comment_id: comment.commentId,
parent_comment_id: comment.parentCommentId,
user_id: comment.user?.userId,
song_id: songId,
content: comment.content,
time: comment.time,
like_count: comment.likedCount,
comment_type: commentType, // 评论类型 0-comments 1-hotComments 2-topComments
}
}
function getUserInfoForInsert(comment) {
const user = comment.user;
var shortAvatarUrlUrl = user.avatarUrl.match(/^http:\/\/p\d+\.music\.126\.net\/(.*?)$/);
shortAvatarUrlUrl = shortAvatarUrlUrl ? shortAvatarUrlUrl[1] : user.avatarUrl;
return {
user_id: user.userId,
user_type: user.userType,
nickname: user.nickname,
avatar_url: shortAvatarUrlUrl || user.avatarUrl,
}
}
var commentInfoList = [
...topComments.map(comment => getCommitInfoForInsert(comment, 2)),
...hotComments.map(comment => getCommitInfoForInsert(comment, 1)),
...comments.map(comment => getCommitInfoForInsert(comment, 0))
];
var userInfoList = [...topComments, ...hotComments, ...comments]
.filter(comment => comment.user).map(getUserInfoForInsert);
console.log(commentInfoList);
// console.log(userInfoList);
commentInfoList.forEach(async function (commentInfo) {
let result = await dbUtils.query(`
INSERT INTO comment ( comment_id, parent_comment_id, user_id, song_id, content, time, like_count, comment_type ) VALUES ?
ON DUPLICATE KEY UPDATE content = ? , like_count = ? , comment_type = GREATEST(comment_type, ? ), modify_time = CURRENT_TIMESTAMP
`, [
[
[
commentInfo.comment_id,
commentInfo.parent_comment_id,
commentInfo.user_id,
commentInfo.song_id,
commentInfo.content,
commentInfo.time,
commentInfo.like_count,
commentInfo.comment_type
]
],
commentInfo.content,
commentInfo.like_count,
commentInfo.comment_type
]);
console.log(result);
});
// process.exit(0);
// 判断是否完成
// if(){
isFinish = true;
// }
// 更新 queryParams
queryParams.before = 1111;
// 更新 progress
progress.maxTime = 1000;
progress.currentTime = 1;
// progress更新到数据库中
// // console.log("commentInfo", commentInfo);
// dbUtils.query('INSERT IGNORE INTO comment SET ?', {
// comment_id: commentInfo.commentId,
// comment: commentInfo.comment,
// version: commentInfo.version,
// });
}
// return commentInfo;
}
module.exports = {
fetchAll: fetchAll,
fetch: fetch,
}

View File

@ -106,12 +106,6 @@ async function fetch({ songId, debug = false }) {
duration: duration,
};
// console.log("songInfo", songInfo);
dbUtils.query('INSERT IGNORE INTO song SET ?', {
song_id: songInfo.songId,
title: songInfo.title,
image: songInfo.image,
pub_date: songInfo.pubDate,
});
if (albumId != null)
dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', {
song_id: songInfo.songId,
@ -123,6 +117,12 @@ async function fetch({ songId, debug = false }) {
artist_id: artistId,
});
});
dbUtils.query('INSERT IGNORE INTO song SET ?', {
song_id: songInfo.songId,
title: songInfo.title,
image: songInfo.image,
pub_date: songInfo.pubDate,
});
return songInfo;
}

2858
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -10,8 +10,10 @@
"license": "MIT",
"dependencies": {
"cheerio": "^1.0.0-rc.12",
"crypto": "^1.0.1",
"fs": "^0.0.1-security",
"mysql": "^2.18.1",
"NeteaseCloudMusicApi": "^4.8.2",
"node-schedule": "^2.1.0",
"path": "^0.12.7",
"request": "^2.88.2",

View File

@ -1,5 +1,11 @@
global.useMysqlPool = true;
global.connectionLimit = 2;
const neteaseMusic = require('./netease_music/index');
let keepWatching = false;
if (keepWatching) {
global.useMysqlPool = true;
global.connectionLimit = 1;
setInterval(neteaseMusic.watch, 10 * 1000);
} else {
global.useMysqlPool = false;
}
neteaseMusic.watch();
setInterval(neteaseMusic.watch, 5000);