update
This commit is contained in:
parent
6c3a6d9aaf
commit
9db9383934
@ -21,6 +21,7 @@ const artistInfoUtils = require('./src/getInfo/artistInfoUtils');
|
||||
const albumInfoUtils = require('./src/getInfo/albumInfoUtils');
|
||||
const lyricInfoUtils = require('./src/getInfo/lyricInfoUtils');
|
||||
const commentUtils = require('./src/getInfo/commentUtils');
|
||||
// const playlistUtils = require('./src/getInfo/playlistUtils');
|
||||
|
||||
/**
|
||||
* 测试
|
||||
@ -33,6 +34,7 @@ async function test() {
|
||||
// let res = await albumInfoUtils.fetch({ albumId: "9156", debug: true });
|
||||
// let res = await artistInfoUtils.fetch({ artistId: "12023508" });
|
||||
// let res = await songInfoUtils.fetch({ songId: "437608327" });
|
||||
// let res = await playlistUtils.fetch({ songId: "2320041657", debug: true });
|
||||
|
||||
// let res = await albumInfoUtils.getFromDatabase({ albumId: "9156" });
|
||||
// let res = await artistInfoUtils.getFromDatabase({ artistId: "12023508" });
|
||||
@ -111,6 +113,9 @@ async function watch() {
|
||||
}, {
|
||||
name: "commentTotalCount",
|
||||
sql: `SELECT count(*) AS count FROM comment`,
|
||||
}, {
|
||||
name: "userCount",
|
||||
sql: `SELECT count(*) AS count FROM user`,
|
||||
}, {
|
||||
name: "songAlbumCount",
|
||||
sql: `SELECT count(*) AS count FROM song_album_relation`,
|
||||
@ -154,6 +159,7 @@ async function watch() {
|
||||
`artist: ${newWatchParam['artistCount'] - oldWatchParam['artistCount']}`,
|
||||
`lyric: ${newWatchParam['lyricCount'] - oldWatchParam['lyricCount']}`,
|
||||
`comment: ${newWatchParam['commentCount'] - oldWatchParam['commentCount']}(song)/${newWatchParam['commentTotalCount'] - oldWatchParam['commentTotalCount']}(comment)`,
|
||||
`user: ${newWatchParam['userCount'] - oldWatchParam['userCount']}`,
|
||||
].join(', '),
|
||||
`[已爬取]`,
|
||||
[
|
||||
@ -162,6 +168,7 @@ async function watch() {
|
||||
`artist: ${newWatchParam['artistCount']}`,
|
||||
`lyric: ${newWatchParam['lyricCount']}`,
|
||||
`comment: ${newWatchParam['commentCount']}(song)/${newWatchParam['commentTotalCount']}(comment)`,
|
||||
`user: ${newWatchParam['userCount']}`,
|
||||
].join(', '),
|
||||
`[待爬取]`,
|
||||
[
|
||||
@ -170,6 +177,7 @@ async function watch() {
|
||||
`artist: ${newWatchParam['artistWaiting']}`,
|
||||
`lyric: ${newWatchParam['songCount'] - newWatchParam['lyricCount']}`,
|
||||
`comment: ${newWatchParam['songCount'] - newWatchParam['commentCount']}`,
|
||||
`user: 未知`,
|
||||
].join(', '),
|
||||
`[总计] (已爬取 + 待爬取)`,
|
||||
[
|
||||
@ -178,6 +186,7 @@ async function watch() {
|
||||
`artist: ${newWatchParam['artistCount'] + newWatchParam['artistWaiting']}`,
|
||||
`lyric: ${newWatchParam['songCount']}`,
|
||||
`comment: ${newWatchParam['songCount']}`,
|
||||
`user: ${newWatchParam['userCount']}`,
|
||||
].join(', '),
|
||||
`[关联关系统计]`,
|
||||
`song-album: ${newWatchParam['songAlbumCount']}, song-artist: ${newWatchParam['songArtistCount']}`,
|
||||
|
22
netease_music/sql/statistic.sql
Normal file
22
netease_music/sql/statistic.sql
Normal file
@ -0,0 +1,22 @@
|
||||
-- 查看需要爬取的音乐的分布
|
||||
SELECT cast( format( t_tmp.song_id / 10000000, 0) * 10000000 as UNSIGNED ) as s, count(*) as count
|
||||
FROM (
|
||||
SELECT DISTINCT song_id FROM song_album_relation
|
||||
UNION
|
||||
SELECT DISTINCT song_id FROM song_artist_relation
|
||||
) as t_tmp
|
||||
WHERE song_id NOT IN ( SELECT song_id FROM song )
|
||||
GROUP BY s
|
||||
ORDER BY s DESC
|
||||
|
||||
-- optimize table
|
||||
optimize table album;
|
||||
optimize table artist;
|
||||
optimize table comment;
|
||||
optimize table comment_progress;
|
||||
optimize table log;
|
||||
optimize table lyric;
|
||||
optimize table song;
|
||||
optimize table song_album_relation;
|
||||
optimize table song_artist_relation;
|
||||
optimize table user;
|
@ -19,7 +19,7 @@ CREATE TABLE `artist` (
|
||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||
PRIMARY KEY (`artist_id`),
|
||||
KEY `artist_id` (`artist_id`)
|
||||
INDEX `artist_id` (`artist_id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||
|
||||
CREATE TABLE `album` (
|
||||
@ -34,7 +34,7 @@ CREATE TABLE `album` (
|
||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||
`version` tinyint(4) NOT NULL DEFAULT 1 COMMENT '数据记录版本(如果有字段调整则整体+1)',
|
||||
PRIMARY KEY (`album_id`),
|
||||
KEY `album_id` (`album_id`)
|
||||
INDEX `album_id` (`album_id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||
|
||||
CREATE TABLE `song_album_relation` (
|
||||
@ -43,8 +43,8 @@ CREATE TABLE `song_album_relation` (
|
||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||
PRIMARY KEY (`song_id`,`album_id`),
|
||||
KEY `song_id` (`song_id`),
|
||||
KEY `album_id` (`album_id`)
|
||||
INDEX `song_id` (`song_id`),
|
||||
INDEX `album_id` (`album_id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||
|
||||
CREATE TABLE `song_artist_relation` (
|
||||
@ -53,8 +53,8 @@ CREATE TABLE `song_artist_relation` (
|
||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||
PRIMARY KEY `song_id` (`song_id`,`artist_id`),
|
||||
KEY `song_id` (`song_id`),
|
||||
KEY `artist_id` (`artist_id`)
|
||||
INDEX `song_id` (`song_id`),
|
||||
INDEX `artist_id` (`artist_id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||
|
||||
CREATE TABLE `lyric` (
|
||||
@ -64,7 +64,7 @@ CREATE TABLE `lyric` (
|
||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||
PRIMARY KEY (`song_id`,`version`),
|
||||
KEY `song_id` (`song_id`)
|
||||
INDEX `song_id` (`song_id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||
|
||||
CREATE TABLE `user` (
|
||||
@ -74,7 +74,8 @@ CREATE TABLE `user` (
|
||||
`avatar_url` varchar(200) NOT NULL COMMENT '用户头像 http://p1.music.126.net/ 后面的部分',
|
||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||
PRIMARY KEY (`user_id`)
|
||||
PRIMARY KEY (`user_id`),
|
||||
INDEX `user_id` (`user_id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||
|
||||
CREATE TABLE `comment` (
|
||||
|
65
netease_music/src/getInfo/playlistUtils.js
Normal file
65
netease_music/src/getInfo/playlistUtils.js
Normal file
@ -0,0 +1,65 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const requestUtils = require('../../../utils/requestUtils');
|
||||
const sleepUtils = require('../../../utils/sleepUtils');
|
||||
|
||||
const dbUtils = global.dbUtils;
|
||||
|
||||
// refer:
|
||||
// https://neteasecloudmusicapi-docs.4everland.app/
|
||||
// https://github.com/Binaryify/NeteaseCloudMusicApi
|
||||
const { playlist_catlist, playlist_hot } = require('NeteaseCloudMusicApi');
|
||||
|
||||
// // 从数据库中查出还缺少的歌词,并进行爬取
|
||||
// async function fetchAll() {
|
||||
// console.log("start fetching lyrics ...");
|
||||
// var playlistIds = await dbUtils.query(`
|
||||
// SELECT DISTINCT playlist_id FROM playlist WHERE playlist_id NOT IN ( SELECT playlist_id FROM lyric )
|
||||
// `, []);
|
||||
// playlistIds = playlistIds.map(playlist => playlist.playlist_id);
|
||||
// for (let i = 0; i < playlistIds.length; i++) {
|
||||
// await global.checkIsExit();
|
||||
// const playlistId = playlistIds[i];
|
||||
// console.log(`${i + 1}/${playlistIds.length} | lyric: ${playlistId}`);
|
||||
// try {
|
||||
// await fetch({ playlistId: playlistId });
|
||||
// } catch (err) {
|
||||
// console.error(err);
|
||||
// }
|
||||
// await sleepUtils.sleep(global.sleepTime);
|
||||
// }
|
||||
// }
|
||||
|
||||
// 获取歌词详情
|
||||
async function fetch({ playlistId, debug = false }) {
|
||||
// https://neteasecloudmusicapi-docs.4everland.app/#/?id=%e6%ad%8c%e5%8d%95%e5%88%86%e7%b1%bb
|
||||
var queryParams = {};
|
||||
try {
|
||||
var playlistResult = await playlist_catlist(queryParams);
|
||||
fs.writeFileSync(path.join(__dirname, "../../temp", `playlist-${playlistId}.json`), JSON.stringify(playlistResult));
|
||||
} catch (errors) {
|
||||
console.error(errors);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(playlistResult);
|
||||
|
||||
// let lyricInfo = {
|
||||
// playlistId: playlistId,
|
||||
// lyric: lyric.lyric,
|
||||
// version: lyric.version,
|
||||
// };
|
||||
// // console.log("lyricInfo", lyricInfo);
|
||||
// dbUtils.query('INSERT IGNORE INTO lyric SET ?', {
|
||||
// playlist_id: lyricInfo.playlistId,
|
||||
// lyric: lyricInfo.lyric,
|
||||
// version: lyricInfo.version,
|
||||
// });
|
||||
// return lyricInfo;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetch: fetch,
|
||||
// fetchAll: fetchAll,
|
||||
}
|
@ -76,6 +76,8 @@ async function fetch({ songId, debug = false }) {
|
||||
let songInfoDict = JSON.parse(songInfoJSONString);
|
||||
// console.log(songInfoDict);
|
||||
|
||||
// TODO 考虑歌曲别名 例如:https://music.163.com/#/song?id=26830207
|
||||
|
||||
let title = /<meta property="og:title" content="(.*?)" \/>/.exec(html)[1];
|
||||
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
|
||||
let artist = /<meta property="og:music:artist" content="(.*?)" \/>/.exec(html)[1];
|
||||
|
@ -7,9 +7,8 @@ const sleepUtils = require('../../../utils/sleepUtils');
|
||||
const dbUtils = global.dbUtils;
|
||||
|
||||
// 获取用户详情
|
||||
async function fetch({ userId }) {
|
||||
async function fetch({ userId, debug = false }) {
|
||||
let url = `https://music.163.com/user/home?id=${userId}`;
|
||||
|
||||
try {
|
||||
var html = fs.readFileSync(path.join(__dirname, "../../temp", ` user-${userId}.html`), 'utf8');
|
||||
} catch (errors) {
|
||||
|
Loading…
Reference in New Issue
Block a user