update
This commit is contained in:
@@ -21,6 +21,7 @@ const artistInfoUtils = require('./src/getInfo/artistInfoUtils');
|
|||||||
const albumInfoUtils = require('./src/getInfo/albumInfoUtils');
|
const albumInfoUtils = require('./src/getInfo/albumInfoUtils');
|
||||||
const lyricInfoUtils = require('./src/getInfo/lyricInfoUtils');
|
const lyricInfoUtils = require('./src/getInfo/lyricInfoUtils');
|
||||||
const commentUtils = require('./src/getInfo/commentUtils');
|
const commentUtils = require('./src/getInfo/commentUtils');
|
||||||
|
// const playlistUtils = require('./src/getInfo/playlistUtils');
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 测试
|
* 测试
|
||||||
@@ -33,6 +34,7 @@ async function test() {
|
|||||||
// let res = await albumInfoUtils.fetch({ albumId: "9156", debug: true });
|
// let res = await albumInfoUtils.fetch({ albumId: "9156", debug: true });
|
||||||
// let res = await artistInfoUtils.fetch({ artistId: "12023508" });
|
// let res = await artistInfoUtils.fetch({ artistId: "12023508" });
|
||||||
// let res = await songInfoUtils.fetch({ songId: "437608327" });
|
// let res = await songInfoUtils.fetch({ songId: "437608327" });
|
||||||
|
// let res = await playlistUtils.fetch({ songId: "2320041657", debug: true });
|
||||||
|
|
||||||
// let res = await albumInfoUtils.getFromDatabase({ albumId: "9156" });
|
// let res = await albumInfoUtils.getFromDatabase({ albumId: "9156" });
|
||||||
// let res = await artistInfoUtils.getFromDatabase({ artistId: "12023508" });
|
// let res = await artistInfoUtils.getFromDatabase({ artistId: "12023508" });
|
||||||
@@ -111,6 +113,9 @@ async function watch() {
|
|||||||
}, {
|
}, {
|
||||||
name: "commentTotalCount",
|
name: "commentTotalCount",
|
||||||
sql: `SELECT count(*) AS count FROM comment`,
|
sql: `SELECT count(*) AS count FROM comment`,
|
||||||
|
}, {
|
||||||
|
name: "userCount",
|
||||||
|
sql: `SELECT count(*) AS count FROM user`,
|
||||||
}, {
|
}, {
|
||||||
name: "songAlbumCount",
|
name: "songAlbumCount",
|
||||||
sql: `SELECT count(*) AS count FROM song_album_relation`,
|
sql: `SELECT count(*) AS count FROM song_album_relation`,
|
||||||
@@ -154,6 +159,7 @@ async function watch() {
|
|||||||
`artist: ${newWatchParam['artistCount'] - oldWatchParam['artistCount']}`,
|
`artist: ${newWatchParam['artistCount'] - oldWatchParam['artistCount']}`,
|
||||||
`lyric: ${newWatchParam['lyricCount'] - oldWatchParam['lyricCount']}`,
|
`lyric: ${newWatchParam['lyricCount'] - oldWatchParam['lyricCount']}`,
|
||||||
`comment: ${newWatchParam['commentCount'] - oldWatchParam['commentCount']}(song)/${newWatchParam['commentTotalCount'] - oldWatchParam['commentTotalCount']}(comment)`,
|
`comment: ${newWatchParam['commentCount'] - oldWatchParam['commentCount']}(song)/${newWatchParam['commentTotalCount'] - oldWatchParam['commentTotalCount']}(comment)`,
|
||||||
|
`user: ${newWatchParam['userCount'] - oldWatchParam['userCount']}`,
|
||||||
].join(', '),
|
].join(', '),
|
||||||
`[已爬取]`,
|
`[已爬取]`,
|
||||||
[
|
[
|
||||||
@@ -162,6 +168,7 @@ async function watch() {
|
|||||||
`artist: ${newWatchParam['artistCount']}`,
|
`artist: ${newWatchParam['artistCount']}`,
|
||||||
`lyric: ${newWatchParam['lyricCount']}`,
|
`lyric: ${newWatchParam['lyricCount']}`,
|
||||||
`comment: ${newWatchParam['commentCount']}(song)/${newWatchParam['commentTotalCount']}(comment)`,
|
`comment: ${newWatchParam['commentCount']}(song)/${newWatchParam['commentTotalCount']}(comment)`,
|
||||||
|
`user: ${newWatchParam['userCount']}`,
|
||||||
].join(', '),
|
].join(', '),
|
||||||
`[待爬取]`,
|
`[待爬取]`,
|
||||||
[
|
[
|
||||||
@@ -170,6 +177,7 @@ async function watch() {
|
|||||||
`artist: ${newWatchParam['artistWaiting']}`,
|
`artist: ${newWatchParam['artistWaiting']}`,
|
||||||
`lyric: ${newWatchParam['songCount'] - newWatchParam['lyricCount']}`,
|
`lyric: ${newWatchParam['songCount'] - newWatchParam['lyricCount']}`,
|
||||||
`comment: ${newWatchParam['songCount'] - newWatchParam['commentCount']}`,
|
`comment: ${newWatchParam['songCount'] - newWatchParam['commentCount']}`,
|
||||||
|
`user: 未知`,
|
||||||
].join(', '),
|
].join(', '),
|
||||||
`[总计] (已爬取 + 待爬取)`,
|
`[总计] (已爬取 + 待爬取)`,
|
||||||
[
|
[
|
||||||
@@ -178,6 +186,7 @@ async function watch() {
|
|||||||
`artist: ${newWatchParam['artistCount'] + newWatchParam['artistWaiting']}`,
|
`artist: ${newWatchParam['artistCount'] + newWatchParam['artistWaiting']}`,
|
||||||
`lyric: ${newWatchParam['songCount']}`,
|
`lyric: ${newWatchParam['songCount']}`,
|
||||||
`comment: ${newWatchParam['songCount']}`,
|
`comment: ${newWatchParam['songCount']}`,
|
||||||
|
`user: ${newWatchParam['userCount']}`,
|
||||||
].join(', '),
|
].join(', '),
|
||||||
`[关联关系统计]`,
|
`[关联关系统计]`,
|
||||||
`song-album: ${newWatchParam['songAlbumCount']}, song-artist: ${newWatchParam['songArtistCount']}`,
|
`song-album: ${newWatchParam['songAlbumCount']}, song-artist: ${newWatchParam['songArtistCount']}`,
|
||||||
|
22
netease_music/sql/statistic.sql
Normal file
22
netease_music/sql/statistic.sql
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
-- 查看需要爬取的音乐的分布
|
||||||
|
SELECT cast( format( t_tmp.song_id / 10000000, 0) * 10000000 as UNSIGNED ) as s, count(*) as count
|
||||||
|
FROM (
|
||||||
|
SELECT DISTINCT song_id FROM song_album_relation
|
||||||
|
UNION
|
||||||
|
SELECT DISTINCT song_id FROM song_artist_relation
|
||||||
|
) as t_tmp
|
||||||
|
WHERE song_id NOT IN ( SELECT song_id FROM song )
|
||||||
|
GROUP BY s
|
||||||
|
ORDER BY s DESC
|
||||||
|
|
||||||
|
-- optimize table
|
||||||
|
optimize table album;
|
||||||
|
optimize table artist;
|
||||||
|
optimize table comment;
|
||||||
|
optimize table comment_progress;
|
||||||
|
optimize table log;
|
||||||
|
optimize table lyric;
|
||||||
|
optimize table song;
|
||||||
|
optimize table song_album_relation;
|
||||||
|
optimize table song_artist_relation;
|
||||||
|
optimize table user;
|
@@ -19,7 +19,7 @@ CREATE TABLE `artist` (
|
|||||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||||
PRIMARY KEY (`artist_id`),
|
PRIMARY KEY (`artist_id`),
|
||||||
KEY `artist_id` (`artist_id`)
|
INDEX `artist_id` (`artist_id`)
|
||||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||||
|
|
||||||
CREATE TABLE `album` (
|
CREATE TABLE `album` (
|
||||||
@@ -34,7 +34,7 @@ CREATE TABLE `album` (
|
|||||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||||
`version` tinyint(4) NOT NULL DEFAULT 1 COMMENT '数据记录版本(如果有字段调整则整体+1)',
|
`version` tinyint(4) NOT NULL DEFAULT 1 COMMENT '数据记录版本(如果有字段调整则整体+1)',
|
||||||
PRIMARY KEY (`album_id`),
|
PRIMARY KEY (`album_id`),
|
||||||
KEY `album_id` (`album_id`)
|
INDEX `album_id` (`album_id`)
|
||||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||||
|
|
||||||
CREATE TABLE `song_album_relation` (
|
CREATE TABLE `song_album_relation` (
|
||||||
@@ -43,8 +43,8 @@ CREATE TABLE `song_album_relation` (
|
|||||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||||
PRIMARY KEY (`song_id`,`album_id`),
|
PRIMARY KEY (`song_id`,`album_id`),
|
||||||
KEY `song_id` (`song_id`),
|
INDEX `song_id` (`song_id`),
|
||||||
KEY `album_id` (`album_id`)
|
INDEX `album_id` (`album_id`)
|
||||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||||
|
|
||||||
CREATE TABLE `song_artist_relation` (
|
CREATE TABLE `song_artist_relation` (
|
||||||
@@ -53,8 +53,8 @@ CREATE TABLE `song_artist_relation` (
|
|||||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||||
PRIMARY KEY `song_id` (`song_id`,`artist_id`),
|
PRIMARY KEY `song_id` (`song_id`,`artist_id`),
|
||||||
KEY `song_id` (`song_id`),
|
INDEX `song_id` (`song_id`),
|
||||||
KEY `artist_id` (`artist_id`)
|
INDEX `artist_id` (`artist_id`)
|
||||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||||
|
|
||||||
CREATE TABLE `lyric` (
|
CREATE TABLE `lyric` (
|
||||||
@@ -64,7 +64,7 @@ CREATE TABLE `lyric` (
|
|||||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||||
PRIMARY KEY (`song_id`,`version`),
|
PRIMARY KEY (`song_id`,`version`),
|
||||||
KEY `song_id` (`song_id`)
|
INDEX `song_id` (`song_id`)
|
||||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||||
|
|
||||||
CREATE TABLE `user` (
|
CREATE TABLE `user` (
|
||||||
@@ -74,7 +74,8 @@ CREATE TABLE `user` (
|
|||||||
`avatar_url` varchar(200) NOT NULL COMMENT '用户头像 http://p1.music.126.net/ 后面的部分',
|
`avatar_url` varchar(200) NOT NULL COMMENT '用户头像 http://p1.music.126.net/ 后面的部分',
|
||||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||||
PRIMARY KEY (`user_id`)
|
PRIMARY KEY (`user_id`),
|
||||||
|
INDEX `user_id` (`user_id`)
|
||||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||||
|
|
||||||
CREATE TABLE `comment` (
|
CREATE TABLE `comment` (
|
||||||
|
65
netease_music/src/getInfo/playlistUtils.js
Normal file
65
netease_music/src/getInfo/playlistUtils.js
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
const fs = require('fs');
|
||||||
|
const path = require('path');
|
||||||
|
|
||||||
|
const requestUtils = require('../../../utils/requestUtils');
|
||||||
|
const sleepUtils = require('../../../utils/sleepUtils');
|
||||||
|
|
||||||
|
const dbUtils = global.dbUtils;
|
||||||
|
|
||||||
|
// refer:
|
||||||
|
// https://neteasecloudmusicapi-docs.4everland.app/
|
||||||
|
// https://github.com/Binaryify/NeteaseCloudMusicApi
|
||||||
|
const { playlist_catlist, playlist_hot } = require('NeteaseCloudMusicApi');
|
||||||
|
|
||||||
|
// // 从数据库中查出还缺少的歌词,并进行爬取
|
||||||
|
// async function fetchAll() {
|
||||||
|
// console.log("start fetching lyrics ...");
|
||||||
|
// var playlistIds = await dbUtils.query(`
|
||||||
|
// SELECT DISTINCT playlist_id FROM playlist WHERE playlist_id NOT IN ( SELECT playlist_id FROM lyric )
|
||||||
|
// `, []);
|
||||||
|
// playlistIds = playlistIds.map(playlist => playlist.playlist_id);
|
||||||
|
// for (let i = 0; i < playlistIds.length; i++) {
|
||||||
|
// await global.checkIsExit();
|
||||||
|
// const playlistId = playlistIds[i];
|
||||||
|
// console.log(`${i + 1}/${playlistIds.length} | lyric: ${playlistId}`);
|
||||||
|
// try {
|
||||||
|
// await fetch({ playlistId: playlistId });
|
||||||
|
// } catch (err) {
|
||||||
|
// console.error(err);
|
||||||
|
// }
|
||||||
|
// await sleepUtils.sleep(global.sleepTime);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// 获取歌词详情
|
||||||
|
async function fetch({ playlistId, debug = false }) {
|
||||||
|
// https://neteasecloudmusicapi-docs.4everland.app/#/?id=%e6%ad%8c%e5%8d%95%e5%88%86%e7%b1%bb
|
||||||
|
var queryParams = {};
|
||||||
|
try {
|
||||||
|
var playlistResult = await playlist_catlist(queryParams);
|
||||||
|
fs.writeFileSync(path.join(__dirname, "../../temp", `playlist-${playlistId}.json`), JSON.stringify(playlistResult));
|
||||||
|
} catch (errors) {
|
||||||
|
console.error(errors);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(playlistResult);
|
||||||
|
|
||||||
|
// let lyricInfo = {
|
||||||
|
// playlistId: playlistId,
|
||||||
|
// lyric: lyric.lyric,
|
||||||
|
// version: lyric.version,
|
||||||
|
// };
|
||||||
|
// // console.log("lyricInfo", lyricInfo);
|
||||||
|
// dbUtils.query('INSERT IGNORE INTO lyric SET ?', {
|
||||||
|
// playlist_id: lyricInfo.playlistId,
|
||||||
|
// lyric: lyricInfo.lyric,
|
||||||
|
// version: lyricInfo.version,
|
||||||
|
// });
|
||||||
|
// return lyricInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
fetch: fetch,
|
||||||
|
// fetchAll: fetchAll,
|
||||||
|
}
|
@@ -76,6 +76,8 @@ async function fetch({ songId, debug = false }) {
|
|||||||
let songInfoDict = JSON.parse(songInfoJSONString);
|
let songInfoDict = JSON.parse(songInfoJSONString);
|
||||||
// console.log(songInfoDict);
|
// console.log(songInfoDict);
|
||||||
|
|
||||||
|
// TODO 考虑歌曲别名 例如:https://music.163.com/#/song?id=26830207
|
||||||
|
|
||||||
let title = /<meta property="og:title" content="(.*?)" \/>/.exec(html)[1];
|
let title = /<meta property="og:title" content="(.*?)" \/>/.exec(html)[1];
|
||||||
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
|
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
|
||||||
let artist = /<meta property="og:music:artist" content="(.*?)" \/>/.exec(html)[1];
|
let artist = /<meta property="og:music:artist" content="(.*?)" \/>/.exec(html)[1];
|
||||||
|
@@ -7,9 +7,8 @@ const sleepUtils = require('../../../utils/sleepUtils');
|
|||||||
const dbUtils = global.dbUtils;
|
const dbUtils = global.dbUtils;
|
||||||
|
|
||||||
// 获取用户详情
|
// 获取用户详情
|
||||||
async function fetch({ userId }) {
|
async function fetch({ userId, debug = false }) {
|
||||||
let url = `https://music.163.com/user/home?id=${userId}`;
|
let url = `https://music.163.com/user/home?id=${userId}`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
var html = fs.readFileSync(path.join(__dirname, "../../temp", ` user-${userId}.html`), 'utf8');
|
var html = fs.readFileSync(path.join(__dirname, "../../temp", ` user-${userId}.html`), 'utf8');
|
||||||
} catch (errors) {
|
} catch (errors) {
|
||||||
|
Reference in New Issue
Block a user