update
This commit is contained in:
parent
93db6371d9
commit
1939398579
@ -20,6 +20,7 @@ const songInfoUtils = require('./src/getInfo/songInfoUtils');
|
||||
const artistInfoUtils = require('./src/getInfo/artistInfoUtils');
|
||||
const albumInfoUtils = require('./src/getInfo/albumInfoUtils');
|
||||
const lyricInfoUtils = require('./src/getInfo/lyricInfoUtils');
|
||||
const commentUtils = require('./src/getInfo/commentUtils');
|
||||
|
||||
/**
|
||||
* 测试
|
||||
@ -46,10 +47,10 @@ async function test() {
|
||||
async function main() {
|
||||
console.log("neteaseMusic Start fetch ...");
|
||||
while (true) {
|
||||
// 删除脏数据
|
||||
var affectedRows1 = await dbUtils.query(`DELETE FROM song_artist_relation WHERE song_id = 0 OR artist_id = 0`, []);
|
||||
var affectedRows2 = await dbUtils.query(`DELETE FROM song_album_relation WHERE song_id = 0 OR album_id = 0`, []);
|
||||
console.log(`删除脏数据 affectedRows:`, affectedRows1.affectedRows, affectedRows2.affectedRows);
|
||||
// // 删除脏数据
|
||||
// var affectedRows1 = await dbUtils.query(`DELETE FROM song_artist_relation WHERE song_id = 0 OR artist_id = 0`, []);
|
||||
// var affectedRows2 = await dbUtils.query(`DELETE FROM song_album_relation WHERE song_id = 0 OR album_id = 0`, []);
|
||||
// console.log(`删除脏数据 affectedRows:`, affectedRows1.affectedRows, affectedRows2.affectedRows);
|
||||
|
||||
await songInfoUtils.fetchAll();
|
||||
await albumInfoUtils.fetchAll({});
|
||||
|
@ -60,6 +60,42 @@ CREATE TABLE `lyric` (
|
||||
PRIMARY KEY (`song_id`,`version`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||
|
||||
CREATE TABLE `user` (
|
||||
`user_id` int(10) unsigned NOT NULL COMMENT '用户id',
|
||||
`user_type` tinyint(4) unsigned NOT NULL COMMENT '用户类型',
|
||||
`nickname` varchar(200) NOT NULL COMMENT '用户昵称',
|
||||
`avatar_url` varchar(200) NOT NULL COMMENT '用户头像 http://p1.music.126.net/ 后面的部分',
|
||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||
PRIMARY KEY (`user_id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||
|
||||
CREATE TABLE `comment` (
|
||||
`comment_id` bigint(20) unsigned NOT NULL COMMENT '评论id',
|
||||
`parent_comment_id` int(10) unsigned NOT NULL COMMENT '父评论id',
|
||||
`user_id` int(10) unsigned NOT NULL COMMENT '用户id',
|
||||
`song_id` int(10) unsigned NOT NULL COMMENT '歌曲id',
|
||||
`content` text NOT NULL COMMENT '评论内容',
|
||||
`time` varchar(50) NOT NULL DEFAULT '' COMMENT '评论时间',
|
||||
`like_count` int(10) unsigned NOT NULL COMMENT '点赞数',
|
||||
`comment_type` tinyint(4) unsigned NOT NULL COMMENT '评论类型 0-comments 1-hotComments 2-topComments',
|
||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||
PRIMARY KEY (`comment_id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||
|
||||
CREATE TABLE `comment_progress` (
|
||||
`song_id` int(10) unsigned NOT NULL COMMENT '歌曲id',
|
||||
`max_time` int(10) NOT NULL DEFAULT 0 COMMENT '开始爬取/开始增量爬取的时候 最新一条评论的时间',
|
||||
`min_time` int(10) NOT NULL DEFAULT 0 COMMENT '上一次爬取时最后一条评论的时间 第一次爬取时为0',
|
||||
`current_time` int(10) NOT NULL DEFAULT 0 COMMENT '本次爬取/增量时,最早的一条评论时间',
|
||||
`current_status` tinyint(4) unsigned NOT NULL DEFAULT 0 COMMENT '爬取进度 0-等待爬取/增量爬取 1-爬取中 2-完成',
|
||||
`total` int(10) NOT NULL DEFAULT 0 COMMENT '评论总数',
|
||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||
PRIMARY KEY (`song_id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||
|
||||
CREATE TABLE `log` (
|
||||
`id` int(10) unsigned NOT NULL COMMENT 'id',
|
||||
`name` varchar(200) NOT NULL COMMENT '方法/数据库',
|
||||
|
70
netease_music/src/crypto.js
Normal file
70
netease_music/src/crypto.js
Normal file
@ -0,0 +1,70 @@
|
||||
/**
|
||||
* 该文件来自:https://github.com/Binaryify/NeteaseCloudMusicApi/blob/master/util/crypto.js
|
||||
*/
|
||||
const crypto = require('crypto')
|
||||
const iv = Buffer.from('0102030405060708')
|
||||
const presetKey = Buffer.from('0CoJUm6Qyw8W8jud')
|
||||
const linuxapiKey = Buffer.from('rFgB&h#%2?^eDg:Q')
|
||||
const base62 = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
|
||||
const publicKey =
|
||||
'-----BEGIN PUBLIC KEY-----\nMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDgtQn2JZ34ZC28NWYpAUd98iZ37BUrX/aKzmFbt7clFSs6sXqHauqKWqdtLkF2KexO40H1YTX8z2lSgBBOAxLsvaklV8k4cBFK9snQXE9/DDaFt6Rr7iVZMldczhC0JNgTz+SHXT6CBHuX3e9SdB1Ua44oncaTWz7OBGLbCiK45wIDAQAB\n-----END PUBLIC KEY-----'
|
||||
const eapiKey = 'e82ckenh8dichen8'
|
||||
|
||||
const aesEncrypt = (buffer, mode, key, iv) => {
|
||||
const cipher = crypto.createCipheriv('aes-128-' + mode, key, iv)
|
||||
return Buffer.concat([cipher.update(buffer), cipher.final()])
|
||||
}
|
||||
|
||||
const rsaEncrypt = (buffer, key) => {
|
||||
buffer = Buffer.concat([Buffer.alloc(128 - buffer.length), buffer])
|
||||
return crypto.publicEncrypt(
|
||||
{ key: key, padding: crypto.constants.RSA_NO_PADDING },
|
||||
buffer,
|
||||
)
|
||||
}
|
||||
|
||||
const weapi = (object) => {
|
||||
const text = JSON.stringify(object)
|
||||
const secretKey = crypto
|
||||
.randomBytes(16)
|
||||
.map((n) => base62.charAt(n % 62).charCodeAt())
|
||||
return {
|
||||
params: aesEncrypt(
|
||||
Buffer.from(
|
||||
aesEncrypt(Buffer.from(text), 'cbc', presetKey, iv).toString('base64'),
|
||||
),
|
||||
'cbc',
|
||||
secretKey,
|
||||
iv,
|
||||
).toString('base64'),
|
||||
encSecKey: rsaEncrypt(secretKey.reverse(), publicKey).toString('hex'),
|
||||
}
|
||||
}
|
||||
|
||||
const linuxapi = (object) => {
|
||||
const text = JSON.stringify(object)
|
||||
return {
|
||||
eparams: aesEncrypt(Buffer.from(text), 'ecb', linuxapiKey, '')
|
||||
.toString('hex')
|
||||
.toUpperCase(),
|
||||
}
|
||||
}
|
||||
|
||||
const eapi = (url, object) => {
|
||||
const text = typeof object === 'object' ? JSON.stringify(object) : object
|
||||
const message = `nobody${url}use${text}md5forencrypt`
|
||||
const digest = crypto.createHash('md5').update(message).digest('hex')
|
||||
const data = `${url}-36cd479b6b5-${text}-36cd479b6b5-${digest}`
|
||||
return {
|
||||
params: aesEncrypt(Buffer.from(data), 'ecb', eapiKey, '')
|
||||
.toString('hex')
|
||||
.toUpperCase(),
|
||||
}
|
||||
}
|
||||
|
||||
const decrypt = (cipherBuffer) => {
|
||||
const decipher = crypto.createDecipheriv('aes-128-ecb', eapiKey, '')
|
||||
return Buffer.concat([decipher.update(cipherBuffer), decipher.final()])
|
||||
}
|
||||
|
||||
module.exports = { weapi, linuxapi, eapi, decrypt }
|
@ -130,6 +130,14 @@ async function fetch({ albumId, debug = false, update = false }) {
|
||||
songIds: songIds,
|
||||
};
|
||||
// console.log("albumInfo", albumInfo);
|
||||
songIds.forEach(function (songId) {
|
||||
if (isNaN(Number(songId)) || Number(songId) === 0 || isNaN(Number(albumId)) || Number(songId) === 0)
|
||||
return;
|
||||
dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', {
|
||||
song_id: songId,
|
||||
album_id: albumId,
|
||||
});
|
||||
});
|
||||
dbUtils.query(update ? `UPDATE album SET ? WHERE album_id = ${albumId}` : 'INSERT IGNORE INTO album SET ?', {
|
||||
album_id: albumInfo.albumId,
|
||||
title: albumInfo.title,
|
||||
@ -140,14 +148,6 @@ async function fetch({ albumId, debug = false, update = false }) {
|
||||
company: albumInfo.company,
|
||||
version: 1
|
||||
});
|
||||
songIds.forEach(function (songId) {
|
||||
if (isNaN(Number(songId)) || Number(songId) === 0 || isNaN(Number(albumId)) || Number(songId) === 0)
|
||||
return;
|
||||
dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', {
|
||||
song_id: songId,
|
||||
album_id: albumId,
|
||||
});
|
||||
});
|
||||
return albumInfo;
|
||||
}
|
||||
|
||||
|
@ -90,13 +90,6 @@ async function fetch({ artistId, debug = false }) {
|
||||
songIds: songIds,
|
||||
};
|
||||
// console.log("artistInfo", artistInfo);
|
||||
dbUtils.query('INSERT IGNORE INTO artist SET ?', {
|
||||
artist_id: artistInfo.artistId,
|
||||
title: artistInfo.title,
|
||||
description: artistInfo.description,
|
||||
image: artistInfo.image,
|
||||
pub_date: artistInfo.pubDate,
|
||||
});
|
||||
songIds.forEach(function (songId) {
|
||||
if (isNaN(Number(songId)) || Number(songId) === 0 || isNaN(Number(artistId)) || Number(artistId) === 0)
|
||||
return;
|
||||
@ -105,6 +98,13 @@ async function fetch({ artistId, debug = false }) {
|
||||
artist_id: artistId,
|
||||
});
|
||||
});
|
||||
dbUtils.query('INSERT IGNORE INTO artist SET ?', {
|
||||
artist_id: artistInfo.artistId,
|
||||
title: artistInfo.title,
|
||||
description: artistInfo.description,
|
||||
image: artistInfo.image,
|
||||
pub_date: artistInfo.pubDate,
|
||||
});
|
||||
return artistInfo;
|
||||
}
|
||||
|
||||
|
182
netease_music/src/getInfo/commentUtils.js
Normal file
182
netease_music/src/getInfo/commentUtils.js
Normal file
@ -0,0 +1,182 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const requestUtils = require('../../../utils/requestUtils');
|
||||
const sleepUtils = require('../../../utils/sleepUtils');
|
||||
|
||||
const dbUtils = global.dbUtils;
|
||||
|
||||
const { comment_music } = require('NeteaseCloudMusicApi');
|
||||
|
||||
async function fetchAll() {
|
||||
console.log("start fetching comment ...")
|
||||
// 首先将需要爬取的song_id导入comment_progress表
|
||||
await dbUtils.query(`
|
||||
INSERT INTO comment_progress ( song_id )
|
||||
SELECT DISTINCT song_id FROM song WHERE song_id NOT IN ( SELECT song_id FROM comment_progress )
|
||||
`, []);
|
||||
|
||||
// 首先查询有无正在爬取中的记录
|
||||
var songIds = await dbUtils.query(`
|
||||
SELECT song_id FROM comment_progress WHERE current_status != 2 LIMIT 1
|
||||
`, []);
|
||||
songIds = songIds.map(item => item.song_id);
|
||||
|
||||
for (let i = 0; i < songIds.length; i++) {
|
||||
await global.checkIsExit();
|
||||
const songId = songIds[i];
|
||||
console.log(`${i + 1}/${songIds.length} | comment: ${songId}`);
|
||||
try {
|
||||
await fetch({ songId: songId });
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
}
|
||||
await sleepUtils.sleep(global.sleepTime);
|
||||
}
|
||||
}
|
||||
|
||||
// 获取歌词详情
|
||||
async function fetch({ songId, debug = false }) {
|
||||
// // var url = `https://music.163.com/weapi/comment/resource/comments/get?csrf_token=`;
|
||||
// var opts = {
|
||||
// method: "POST",
|
||||
// url: `https://music.163.com/api/v1/resource/comments/R_SO_4_${songId}`,
|
||||
// headers: {
|
||||
// 'content-type': 'application/x-www-form-urlencoded',
|
||||
// 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.53',
|
||||
// },
|
||||
// form: encrypt.weapi({
|
||||
// rid: songId,
|
||||
// limit: 20,
|
||||
// offset: 20, // offset的取值为:(评论页数-1)*20
|
||||
// before: 1664655762881
|
||||
// })
|
||||
// };
|
||||
|
||||
// 首先查询有无正在爬取中的记录
|
||||
var commentProgress = await dbUtils.query(`
|
||||
SELECT * FROM comment_progress WHERE song_id = ? and current_status != 2 LIMIT 1
|
||||
`, [songId]);
|
||||
if (commentProgress.length == 0) {
|
||||
console.log('No commentProgress found, song_id:', songId);
|
||||
return;
|
||||
}
|
||||
var item = commentProgress[0];
|
||||
var progress = {
|
||||
maxTime: item.max_time,
|
||||
minTime: item.min_time,
|
||||
currentTime: item.current_time,
|
||||
currentStatus: item.current_status,
|
||||
total: item.total,
|
||||
};
|
||||
var queryParams = {
|
||||
id: songId,
|
||||
limit: 20,
|
||||
// before: undefined,
|
||||
};
|
||||
console.log(progress);
|
||||
|
||||
let isFinish = false;
|
||||
while (!isFinish) {
|
||||
// 是否是第一页
|
||||
let isFirstPage = progress.currentStatus === 0;
|
||||
|
||||
try {
|
||||
var commentResult = await comment_music(queryParams);
|
||||
fs.writeFileSync(path.join(__dirname, "../../temp", `comment-${songId}.json`), JSON.stringify(commentResult));
|
||||
} catch (errors) {
|
||||
console.error(errors);
|
||||
await sleepUtils.sleep(1000);
|
||||
continue;
|
||||
}
|
||||
|
||||
var topComments = commentResult.body.hotComments || [];
|
||||
var hotComments = commentResult.body.hotComments || [];
|
||||
var comments = commentResult.body.hotComments || [];
|
||||
|
||||
function getCommitInfoForInsert(comment, commentType) {
|
||||
return {
|
||||
comment_id: comment.commentId,
|
||||
parent_comment_id: comment.parentCommentId,
|
||||
user_id: comment.user?.userId,
|
||||
song_id: songId,
|
||||
content: comment.content,
|
||||
time: comment.time,
|
||||
like_count: comment.likedCount,
|
||||
comment_type: commentType, // 评论类型 0-comments 1-hotComments 2-topComments
|
||||
}
|
||||
}
|
||||
function getUserInfoForInsert(comment) {
|
||||
const user = comment.user;
|
||||
var shortAvatarUrlUrl = user.avatarUrl.match(/^http:\/\/p\d+\.music\.126\.net\/(.*?)$/);
|
||||
shortAvatarUrlUrl = shortAvatarUrlUrl ? shortAvatarUrlUrl[1] : user.avatarUrl;
|
||||
return {
|
||||
user_id: user.userId,
|
||||
user_type: user.userType,
|
||||
nickname: user.nickname,
|
||||
avatar_url: shortAvatarUrlUrl || user.avatarUrl,
|
||||
}
|
||||
}
|
||||
var commentInfoList = [
|
||||
...topComments.map(comment => getCommitInfoForInsert(comment, 2)),
|
||||
...hotComments.map(comment => getCommitInfoForInsert(comment, 1)),
|
||||
...comments.map(comment => getCommitInfoForInsert(comment, 0))
|
||||
];
|
||||
var userInfoList = [...topComments, ...hotComments, ...comments]
|
||||
.filter(comment => comment.user).map(getUserInfoForInsert);
|
||||
|
||||
console.log(commentInfoList);
|
||||
// console.log(userInfoList);
|
||||
|
||||
commentInfoList.forEach(async function (commentInfo) {
|
||||
let result = await dbUtils.query(`
|
||||
INSERT INTO comment ( comment_id, parent_comment_id, user_id, song_id, content, time, like_count, comment_type ) VALUES ?
|
||||
ON DUPLICATE KEY UPDATE content = ? , like_count = ? , comment_type = GREATEST(comment_type, ? ), modify_time = CURRENT_TIMESTAMP
|
||||
`, [
|
||||
[
|
||||
[
|
||||
commentInfo.comment_id,
|
||||
commentInfo.parent_comment_id,
|
||||
commentInfo.user_id,
|
||||
commentInfo.song_id,
|
||||
commentInfo.content,
|
||||
commentInfo.time,
|
||||
commentInfo.like_count,
|
||||
commentInfo.comment_type
|
||||
]
|
||||
],
|
||||
commentInfo.content,
|
||||
commentInfo.like_count,
|
||||
commentInfo.comment_type
|
||||
]);
|
||||
console.log(result);
|
||||
});
|
||||
|
||||
// process.exit(0);
|
||||
|
||||
// 判断是否完成
|
||||
// if(){
|
||||
isFinish = true;
|
||||
// }
|
||||
// 更新 queryParams
|
||||
queryParams.before = 1111;
|
||||
// 更新 progress
|
||||
progress.maxTime = 1000;
|
||||
progress.currentTime = 1;
|
||||
|
||||
// progress更新到数据库中
|
||||
|
||||
// // console.log("commentInfo", commentInfo);
|
||||
// dbUtils.query('INSERT IGNORE INTO comment SET ?', {
|
||||
// comment_id: commentInfo.commentId,
|
||||
// comment: commentInfo.comment,
|
||||
// version: commentInfo.version,
|
||||
// });
|
||||
}
|
||||
// return commentInfo;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchAll: fetchAll,
|
||||
fetch: fetch,
|
||||
}
|
@ -106,12 +106,6 @@ async function fetch({ songId, debug = false }) {
|
||||
duration: duration,
|
||||
};
|
||||
// console.log("songInfo", songInfo);
|
||||
dbUtils.query('INSERT IGNORE INTO song SET ?', {
|
||||
song_id: songInfo.songId,
|
||||
title: songInfo.title,
|
||||
image: songInfo.image,
|
||||
pub_date: songInfo.pubDate,
|
||||
});
|
||||
if (albumId != null)
|
||||
dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', {
|
||||
song_id: songInfo.songId,
|
||||
@ -123,6 +117,12 @@ async function fetch({ songId, debug = false }) {
|
||||
artist_id: artistId,
|
||||
});
|
||||
});
|
||||
dbUtils.query('INSERT IGNORE INTO song SET ?', {
|
||||
song_id: songInfo.songId,
|
||||
title: songInfo.title,
|
||||
image: songInfo.image,
|
||||
pub_date: songInfo.pubDate,
|
||||
});
|
||||
return songInfo;
|
||||
}
|
||||
|
||||
|
2858
package-lock.json
generated
2858
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -10,8 +10,10 @@
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"cheerio": "^1.0.0-rc.12",
|
||||
"crypto": "^1.0.1",
|
||||
"fs": "^0.0.1-security",
|
||||
"mysql": "^2.18.1",
|
||||
"NeteaseCloudMusicApi": "^4.8.2",
|
||||
"node-schedule": "^2.1.0",
|
||||
"path": "^0.12.7",
|
||||
"request": "^2.88.2",
|
||||
|
12
watch.js
12
watch.js
@ -1,5 +1,11 @@
|
||||
global.useMysqlPool = true;
|
||||
global.connectionLimit = 2;
|
||||
const neteaseMusic = require('./netease_music/index');
|
||||
|
||||
let keepWatching = false;
|
||||
if (keepWatching) {
|
||||
global.useMysqlPool = true;
|
||||
global.connectionLimit = 1;
|
||||
setInterval(neteaseMusic.watch, 10 * 1000);
|
||||
} else {
|
||||
global.useMysqlPool = false;
|
||||
}
|
||||
neteaseMusic.watch();
|
||||
setInterval(neteaseMusic.watch, 5000);
|
Loading…
Reference in New Issue
Block a user