1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee
This commit is contained in:
程序员小墨 2022-10-06 14:01:05 +08:00
parent 6c3a6d9aaf
commit 9db9383934
7 changed files with 117 additions and 10 deletions

View File

@ -21,6 +21,7 @@ const artistInfoUtils = require('./src/getInfo/artistInfoUtils');
const albumInfoUtils = require('./src/getInfo/albumInfoUtils');
const lyricInfoUtils = require('./src/getInfo/lyricInfoUtils');
const commentUtils = require('./src/getInfo/commentUtils');
// const playlistUtils = require('./src/getInfo/playlistUtils');
/**
* 测试
@ -33,6 +34,7 @@ async function test() {
// let res = await albumInfoUtils.fetch({ albumId: "9156", debug: true });
// let res = await artistInfoUtils.fetch({ artistId: "12023508" });
// let res = await songInfoUtils.fetch({ songId: "437608327" });
// let res = await playlistUtils.fetch({ songId: "2320041657", debug: true });
// let res = await albumInfoUtils.getFromDatabase({ albumId: "9156" });
// let res = await artistInfoUtils.getFromDatabase({ artistId: "12023508" });
@ -111,6 +113,9 @@ async function watch() {
}, {
name: "commentTotalCount",
sql: `SELECT count(*) AS count FROM comment`,
}, {
name: "userCount",
sql: `SELECT count(*) AS count FROM user`,
}, {
name: "songAlbumCount",
sql: `SELECT count(*) AS count FROM song_album_relation`,
@ -154,6 +159,7 @@ async function watch() {
`artist: ${newWatchParam['artistCount'] - oldWatchParam['artistCount']}`,
`lyric: ${newWatchParam['lyricCount'] - oldWatchParam['lyricCount']}`,
`comment: ${newWatchParam['commentCount'] - oldWatchParam['commentCount']}(song)/${newWatchParam['commentTotalCount'] - oldWatchParam['commentTotalCount']}(comment)`,
`user: ${newWatchParam['userCount'] - oldWatchParam['userCount']}`,
].join(', '),
`[已爬取]`,
[
@ -162,6 +168,7 @@ async function watch() {
`artist: ${newWatchParam['artistCount']}`,
`lyric: ${newWatchParam['lyricCount']}`,
`comment: ${newWatchParam['commentCount']}(song)/${newWatchParam['commentTotalCount']}(comment)`,
`user: ${newWatchParam['userCount']}`,
].join(', '),
`[待爬取]`,
[
@ -170,6 +177,7 @@ async function watch() {
`artist: ${newWatchParam['artistWaiting']}`,
`lyric: ${newWatchParam['songCount'] - newWatchParam['lyricCount']}`,
`comment: ${newWatchParam['songCount'] - newWatchParam['commentCount']}`,
`user: 未知`,
].join(', '),
`[总计] (已爬取 + 待爬取)`,
[
@ -178,6 +186,7 @@ async function watch() {
`artist: ${newWatchParam['artistCount'] + newWatchParam['artistWaiting']}`,
`lyric: ${newWatchParam['songCount']}`,
`comment: ${newWatchParam['songCount']}`,
`user: ${newWatchParam['userCount']}`,
].join(', '),
`[关联关系统计]`,
`song-album: ${newWatchParam['songAlbumCount']}, song-artist: ${newWatchParam['songArtistCount']}`,

View File

@ -0,0 +1,22 @@
-- 查看需要爬取的音乐的分布
SELECT cast( format( t_tmp.song_id / 10000000, 0) * 10000000 as UNSIGNED ) as s, count(*) as count
FROM (
SELECT DISTINCT song_id FROM song_album_relation
UNION
SELECT DISTINCT song_id FROM song_artist_relation
) as t_tmp
WHERE song_id NOT IN ( SELECT song_id FROM song )
GROUP BY s
ORDER BY s DESC
-- optimize table
optimize table album;
optimize table artist;
optimize table comment;
optimize table comment_progress;
optimize table log;
optimize table lyric;
optimize table song;
optimize table song_album_relation;
optimize table song_artist_relation;
optimize table user;

View File

@ -19,7 +19,7 @@ CREATE TABLE `artist` (
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`artist_id`),
KEY `artist_id` (`artist_id`)
INDEX `artist_id` (`artist_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
CREATE TABLE `album` (
@ -34,7 +34,7 @@ CREATE TABLE `album` (
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
`version` tinyint(4) NOT NULL DEFAULT 1 COMMENT '数据记录版本(如果有字段调整则整体+1)',
PRIMARY KEY (`album_id`),
KEY `album_id` (`album_id`)
INDEX `album_id` (`album_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
CREATE TABLE `song_album_relation` (
@ -43,8 +43,8 @@ CREATE TABLE `song_album_relation` (
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`song_id`,`album_id`),
KEY `song_id` (`song_id`),
KEY `album_id` (`album_id`)
INDEX `song_id` (`song_id`),
INDEX `album_id` (`album_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
CREATE TABLE `song_artist_relation` (
@ -53,8 +53,8 @@ CREATE TABLE `song_artist_relation` (
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY `song_id` (`song_id`,`artist_id`),
KEY `song_id` (`song_id`),
KEY `artist_id` (`artist_id`)
INDEX `song_id` (`song_id`),
INDEX `artist_id` (`artist_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
CREATE TABLE `lyric` (
@ -64,7 +64,7 @@ CREATE TABLE `lyric` (
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`song_id`,`version`),
KEY `song_id` (`song_id`)
INDEX `song_id` (`song_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
CREATE TABLE `user` (
@ -74,7 +74,8 @@ CREATE TABLE `user` (
`avatar_url` varchar(200) NOT NULL COMMENT '用户头像 http://p1.music.126.net/ 后面的部分',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`user_id`)
PRIMARY KEY (`user_id`),
INDEX `user_id` (`user_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
CREATE TABLE `comment` (

View File

@ -0,0 +1,65 @@
const fs = require('fs');
const path = require('path');
const requestUtils = require('../../../utils/requestUtils');
const sleepUtils = require('../../../utils/sleepUtils');
const dbUtils = global.dbUtils;
// refer:
// https://neteasecloudmusicapi-docs.4everland.app/
// https://github.com/Binaryify/NeteaseCloudMusicApi
const { playlist_catlist, playlist_hot } = require('NeteaseCloudMusicApi');
// // 从数据库中查出还缺少的歌词,并进行爬取
// async function fetchAll() {
// console.log("start fetching lyrics ...");
// var playlistIds = await dbUtils.query(`
// SELECT DISTINCT playlist_id FROM playlist WHERE playlist_id NOT IN ( SELECT playlist_id FROM lyric )
// `, []);
// playlistIds = playlistIds.map(playlist => playlist.playlist_id);
// for (let i = 0; i < playlistIds.length; i++) {
// await global.checkIsExit();
// const playlistId = playlistIds[i];
// console.log(`${i + 1}/${playlistIds.length} | lyric: ${playlistId}`);
// try {
// await fetch({ playlistId: playlistId });
// } catch (err) {
// console.error(err);
// }
// await sleepUtils.sleep(global.sleepTime);
// }
// }
// 获取歌词详情
async function fetch({ playlistId, debug = false }) {
// https://neteasecloudmusicapi-docs.4everland.app/#/?id=%e6%ad%8c%e5%8d%95%e5%88%86%e7%b1%bb
var queryParams = {};
try {
var playlistResult = await playlist_catlist(queryParams);
fs.writeFileSync(path.join(__dirname, "../../temp", `playlist-${playlistId}.json`), JSON.stringify(playlistResult));
} catch (errors) {
console.error(errors);
return;
}
console.log(playlistResult);
// let lyricInfo = {
// playlistId: playlistId,
// lyric: lyric.lyric,
// version: lyric.version,
// };
// // console.log("lyricInfo", lyricInfo);
// dbUtils.query('INSERT IGNORE INTO lyric SET ?', {
// playlist_id: lyricInfo.playlistId,
// lyric: lyricInfo.lyric,
// version: lyricInfo.version,
// });
// return lyricInfo;
}
module.exports = {
fetch: fetch,
// fetchAll: fetchAll,
}

View File

@ -76,6 +76,8 @@ async function fetch({ songId, debug = false }) {
let songInfoDict = JSON.parse(songInfoJSONString);
// console.log(songInfoDict);
// TODO 考虑歌曲别名 例如https://music.163.com/#/song?id=26830207
let title = /<meta property="og:title" content="(.*?)" \/>/.exec(html)[1];
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
let artist = /<meta property="og:music:artist" content="(.*?)" \/>/.exec(html)[1];

View File

@ -7,9 +7,8 @@ const sleepUtils = require('../../../utils/sleepUtils');
const dbUtils = global.dbUtils;
// 获取用户详情
async function fetch({ userId }) {
async function fetch({ userId, debug = false }) {
let url = `https://music.163.com/user/home?id=${userId}`;
try {
var html = fs.readFileSync(path.join(__dirname, "../../temp", ` user-${userId}.html`), 'utf8');
} catch (errors) {

9
todo.txt Normal file
View File

@ -0,0 +1,9 @@
后期:
考虑歌曲别名 例如https://music.163.com/#/song?id=26830207
评论的更新
爬取歌单playlist
被删除的aritst和album回头再通过其他表中的数据反查回来