数据库添加索引;update
This commit is contained in:
parent
50d3555dd7
commit
b35918faef
9
.vscode/launch.json
vendored
9
.vscode/launch.json
vendored
@ -30,6 +30,15 @@
|
||||
"<node_internals>/**"
|
||||
],
|
||||
"program": "${workspaceFolder}\\test.js"
|
||||
},
|
||||
{
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
"name": "node watch",
|
||||
"skipFiles": [
|
||||
"<node_internals>/**"
|
||||
],
|
||||
"program": "${workspaceFolder}\\watch.js"
|
||||
}
|
||||
]
|
||||
}
|
24
config.json
24
config.json
@ -1,10 +1,34 @@
|
||||
{
|
||||
"mysql": {
|
||||
"charset": "utf8mb4",
|
||||
"host": "rm-bp18qrc78dj7vd3newo.rwlb.rds.aliyuncs.com",
|
||||
"user": "root",
|
||||
"password": "Oj13EzoppxXvMmjPKh",
|
||||
"port": 3306,
|
||||
"database": ""
|
||||
},
|
||||
"mysql_aliyun": {
|
||||
"charset": "utf8mb4",
|
||||
"host": "rm-bp18qrc78dj7vd3newo.rwlb.rds.aliyuncs.com",
|
||||
"user": "root",
|
||||
"password": "Oj13EzoppxXvMmjPKh",
|
||||
"port": 3306,
|
||||
"database": ""
|
||||
},
|
||||
"mysql_local": {
|
||||
"charset": "utf8mb4",
|
||||
"host": "localhost",
|
||||
"user": "root",
|
||||
"password": "root",
|
||||
"port": 3306,
|
||||
"database": ""
|
||||
},
|
||||
"mysql_server": {
|
||||
"charset": "utf8mb4",
|
||||
"host": "39.99.244.156",
|
||||
"user": "root",
|
||||
"password": "Oj13EzoppxXvMmjPKh",
|
||||
"port": 3306,
|
||||
"database": ""
|
||||
}
|
||||
}
|
@ -75,101 +75,116 @@ async function update() {
|
||||
/**
|
||||
* 统计数据库中数据
|
||||
*/
|
||||
let watchParam = {
|
||||
statisticTime: Date.now(),
|
||||
songCount: 0,
|
||||
albumCount: 0,
|
||||
artistCount: 0,
|
||||
lyricCount: 0,
|
||||
commentCount: 0,
|
||||
commentTotalCount: 0,
|
||||
};
|
||||
let oldWatchParam = {};
|
||||
async function watch() {
|
||||
let sql = `
|
||||
SELECT
|
||||
song_count,
|
||||
song_waiting_1 + song_waiting_2 as song_waiting,
|
||||
console.log(`开始统计 ... ${new Date(Date.now() + 8 * 3600 * 1000).toISOString()}`);
|
||||
let statisticTime = Date.now();
|
||||
let newWatchParam = {};
|
||||
let sqls = [
|
||||
// InnoDB count(*) 会扫描全表,粗略数据可以通过 show table status 查看
|
||||
{
|
||||
name: "songCount",
|
||||
sql: `SELECT count(*) AS count FROM song`,
|
||||
}, {
|
||||
name: "songWaiting",
|
||||
sql: `SELECT count(DISTINCT song_id) AS count
|
||||
FROM ( SELECT song_id FROM song_artist_relation UNION SELECT song_id FROM song_album_relation ) t_tmp
|
||||
WHERE song_id NOT IN ( SELECT song_id FROM song )`,
|
||||
}, {
|
||||
name: "albumCount",
|
||||
sql: `SELECT count(*) AS count FROM album`,
|
||||
}, {
|
||||
name: "albumWaiting",
|
||||
sql: `SELECT count( DISTINCT album_id ) as count FROM song_album_relation WHERE album_id NOT IN ( SELECT album_id FROM album )`,
|
||||
}, {
|
||||
name: "artistCount",
|
||||
sql: `SELECT count(*) AS count FROM artist`,
|
||||
}, {
|
||||
name: "artistWaiting",
|
||||
sql: `SELECT count( DISTINCT artist_id ) as count FROM song_artist_relation WHERE artist_id NOT IN ( SELECT artist_id FROM artist )`,
|
||||
}, {
|
||||
name: "lyricCount",
|
||||
sql: `SELECT count(*) AS count FROM lyric`,
|
||||
}, {
|
||||
name: "commentCount",
|
||||
sql: `SELECT count( DISTINCT song_id ) AS count FROM comment`,
|
||||
}, {
|
||||
name: "commentTotalCount",
|
||||
sql: `SELECT count(*) AS count FROM comment`,
|
||||
}, {
|
||||
name: "songAlbumCount",
|
||||
sql: `SELECT count(*) AS count FROM song_album_relation`,
|
||||
}, {
|
||||
name: "songArtistCount",
|
||||
sql: `SELECT count(*) AS count FROM song_artist_relation`,
|
||||
}
|
||||
];
|
||||
let sqlsTimeSpent = 0;
|
||||
let promiseList = [];
|
||||
for (let i = 0; i < sqls.length; i++) {
|
||||
const sql = sqls[i];
|
||||
if (!sql.sql) continue; // 跳过注释掉SQL的项
|
||||
promiseList.push(new Promise(async (resolve, reject) => {
|
||||
// console.log(`query ${sql.name} ...`);
|
||||
let sqlStartTime = Date.now();
|
||||
let result = await dbUtils.query(sql.sql, []);
|
||||
let sqlTimeSpent = Date.now() - sqlStartTime;
|
||||
sqlsTimeSpent += sqlTimeSpent;
|
||||
newWatchParam[sql.name] = result[0].count;
|
||||
console.log(`query ${sql.name} finished.\tspend time: ${sqlTimeSpent}ms (${(sqlTimeSpent / 1000).toFixed(2)}s),\tcount: ${newWatchParam[sql.name]}`);
|
||||
resolve();
|
||||
}));
|
||||
}
|
||||
await Promise.all(promiseList);
|
||||
|
||||
album_count,
|
||||
album_waiting,
|
||||
|
||||
artist_count,
|
||||
artist_waiting,
|
||||
|
||||
lyric_count,
|
||||
|
||||
comment_count,
|
||||
comment_total_count,
|
||||
|
||||
song_album_count,
|
||||
song_artist_count
|
||||
FROM
|
||||
( SELECT count(*) AS song_count FROM song ) t_song,
|
||||
( SELECT count( DISTINCT song_id ) as song_waiting_1 FROM song_artist_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) ) t_song_waiting_song_artist,
|
||||
( SELECT count( DISTINCT song_id ) as song_waiting_2 FROM song_album_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) ) t_song_waiting_song_album,
|
||||
|
||||
( SELECT count(*) AS album_count FROM album ) t_album,
|
||||
( SELECT count( DISTINCT album_id ) as album_waiting FROM song_album_relation WHERE album_id NOT IN ( SELECT DISTINCT album_id FROM album ) ) as t_album_waiting_song_album,
|
||||
|
||||
( SELECT count(*) AS artist_count FROM artist ) t_artist,
|
||||
( SELECT count( DISTINCT artist_id ) as artist_waiting FROM song_artist_relation WHERE artist_id NOT IN ( SELECT DISTINCT artist_id FROM artist ) ) as t_album_waiting_song_artist,
|
||||
|
||||
( SELECT count(*) AS lyric_count FROM lyric ) t_lyric,
|
||||
|
||||
( SELECT count( DISTINCT song_id ) AS comment_count, count( comment_id ) AS comment_total_count FROM comment ) t_comment,
|
||||
|
||||
( SELECT count(*) AS song_album_count FROM song_album_relation ) t_song_album,
|
||||
( SELECT count(*) AS song_artist_count FROM song_artist_relation ) t_song_artist
|
||||
`;
|
||||
console.log("开始统计 ...");
|
||||
let startTime = Date.now();
|
||||
let result = await dbUtils.query(sql, []);
|
||||
let timeSpent = Date.now() - startTime;
|
||||
|
||||
let songCount = result[0].song_count;
|
||||
let songWaiting = result[0].song_waiting;
|
||||
|
||||
let albumCount = result[0].album_count;
|
||||
let albumWaiting = result[0].album_waiting;
|
||||
|
||||
let artistCount = result[0].artist_count;
|
||||
let artistWaiting = result[0].artist_waiting;
|
||||
|
||||
let lyricCount = result[0].lyric_count;
|
||||
|
||||
let commentCount = result[0].comment_count;
|
||||
let commentTotalCount = result[0].comment_total_count;
|
||||
|
||||
let songAlbumCount = result[0].song_album_count;
|
||||
let songArtistCount = result[0].song_artist_count;
|
||||
|
||||
let statisticTimeDelta = Date.now() - watchParam.statisticTime;
|
||||
// let tableCountResult = await dbUtils.query("show table status");
|
||||
// let tableCount = {}; // 查询近似值代替精确查询
|
||||
// tableCountResult.forEach(rowData => tableCount[rowData.Name] = rowData.Rows);
|
||||
// newWatchParam['commentTotalCount'] = tableCount['comment'];
|
||||
|
||||
let statisticTimeDelta = Date.now() - statisticTime;
|
||||
let statisticsString = [
|
||||
`${new Date(Date.now() + 8 * 3600 * 1000).toISOString()}`,
|
||||
`[与上次运行统计时相比] deltaTime: ${statisticTimeDelta}ms (${(statisticTimeDelta / 1000).toFixed(2)}s)`,
|
||||
`song: ${songCount - watchParam.songCount}, album: ${albumCount - watchParam.albumCount}, artist: ${artistCount - watchParam.artistCount}, lyric: ${lyricCount - watchParam.lyricCount}, comment: ${commentCount - watchParam.commentCount}(song)/${commentTotalCount - watchParam.commentTotalCount}(comment)`,
|
||||
``,
|
||||
`统计完成 ${new Date(Date.now() + 8 * 3600 * 1000).toISOString()}`,
|
||||
`spend time: ${statisticTimeDelta}ms (${(statisticTimeDelta / 1000).toFixed(2)}s; ${(statisticTimeDelta / (60 * 1000)).toFixed(2)}min), sql query time (sum): ${sqlsTimeSpent}ms (${(sqlsTimeSpent / 1000).toFixed(2)}s; ${(sqlsTimeSpent / (60 * 1000)).toFixed(2)}min)`,
|
||||
`[与上次运行统计时相比]`,
|
||||
[
|
||||
`song: ${newWatchParam['songCount'] - oldWatchParam['songCount']}`,
|
||||
`album: ${newWatchParam['albumCount'] - oldWatchParam['albumCount']}`,
|
||||
`artist: ${newWatchParam['artistCount'] - oldWatchParam['artistCount']}`,
|
||||
`lyric: ${newWatchParam['lyricCount'] - oldWatchParam['lyricCount']}`,
|
||||
`comment: ${newWatchParam['commentCount'] - oldWatchParam['commentCount']}(song)/${newWatchParam['commentTotalCount'] - oldWatchParam['commentTotalCount']}(comment)`,
|
||||
].join(', '),
|
||||
`[已爬取]`,
|
||||
`song: ${songCount}, album: ${albumCount}, artist: ${artistCount}, lyric: ${lyricCount}, comment: ${commentCount}(song)/${commentTotalCount}(comment)`,
|
||||
[
|
||||
`song: ${newWatchParam['songCount']}`,
|
||||
`album: ${newWatchParam['albumCount']}`,
|
||||
`artist: ${newWatchParam['artistCount']}`,
|
||||
`lyric: ${newWatchParam['lyricCount']}`,
|
||||
`comment: ${newWatchParam['commentCount']}(song)/${newWatchParam['commentTotalCount']}(comment)`,
|
||||
].join(', '),
|
||||
`[待爬取]`,
|
||||
`song: ${songWaiting}, album: ${albumWaiting}, artist: ${artistWaiting}, lyric: ${songCount - lyricCount}, comment: ${songCount - commentCount}`,
|
||||
[
|
||||
`song: ${newWatchParam['songWaiting']}`,
|
||||
`album: ${newWatchParam['albumWaiting']}`,
|
||||
`artist: ${newWatchParam['artistWaiting']}`,
|
||||
`lyric: ${newWatchParam['songCount'] - newWatchParam['lyricCount']}`,
|
||||
`comment: ${newWatchParam['songCount'] - newWatchParam['commentCount']}`,
|
||||
].join(', '),
|
||||
`[总计] (已爬取 + 待爬取)`,
|
||||
`song: ${songCount + songWaiting}, album: ${albumCount + albumWaiting}, artist: ${artistCount + artistWaiting}, lyric: ${songCount}, comment: ${songCount}`,
|
||||
[
|
||||
`song: ${newWatchParam['songCount'] + newWatchParam['songWaiting']}`,
|
||||
`album: ${newWatchParam['albumCount'] + newWatchParam['albumWaiting']}`,
|
||||
`artist: ${newWatchParam['artistCount'] + newWatchParam['artistWaiting']}`,
|
||||
`lyric: ${newWatchParam['songCount']}`,
|
||||
`comment: ${newWatchParam['songCount']}`,
|
||||
].join(', '),
|
||||
`[关联关系统计]`,
|
||||
`song-album: ${songAlbumCount}, song-artist: ${songArtistCount}`,
|
||||
`sql query time: ${timeSpent}ms (${(timeSpent / 1000).toFixed(2)}s)`,
|
||||
`song-album: ${newWatchParam['songAlbumCount']}, song-artist: ${newWatchParam['songArtistCount']}`,
|
||||
``
|
||||
].join('\n');
|
||||
console.log(statisticsString);
|
||||
watchParam = {
|
||||
statisticTime: Date.now(),
|
||||
songCount: songCount,
|
||||
albumCount: albumCount,
|
||||
artistCount: artistCount,
|
||||
lyricCount: lyricCount,
|
||||
commentCount: commentCount,
|
||||
commentTotalCount: commentTotalCount,
|
||||
}
|
||||
oldWatchParam = newWatchParam;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -18,7 +18,8 @@ CREATE TABLE `artist` (
|
||||
`pub_date` varchar(100) NOT NULL COMMENT '发布日期',
|
||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||
PRIMARY KEY (`artist_id`)
|
||||
PRIMARY KEY (`artist_id`),
|
||||
KEY `artist_id` (`artist_id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||
|
||||
CREATE TABLE `album` (
|
||||
@ -32,7 +33,8 @@ CREATE TABLE `album` (
|
||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||
`version` tinyint(4) NOT NULL DEFAULT 1 COMMENT '数据记录版本(如果有字段调整则整体+1)',
|
||||
PRIMARY KEY (`album_id`)
|
||||
PRIMARY KEY (`album_id`),
|
||||
KEY `album_id` (`album_id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||
|
||||
CREATE TABLE `song_album_relation` (
|
||||
@ -40,7 +42,9 @@ CREATE TABLE `song_album_relation` (
|
||||
`album_id` int(10) unsigned NOT NULL COMMENT '专辑id',
|
||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||
PRIMARY KEY (`song_id`,`album_id`)
|
||||
PRIMARY KEY (`song_id`,`album_id`),
|
||||
KEY `song_id` (`song_id`),
|
||||
KEY `album_id` (`album_id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||
|
||||
CREATE TABLE `song_artist_relation` (
|
||||
@ -48,7 +52,9 @@ CREATE TABLE `song_artist_relation` (
|
||||
`artist_id` int(10) unsigned NOT NULL COMMENT '歌手id',
|
||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||
PRIMARY KEY `song_id` (`song_id`,`artist_id`)
|
||||
PRIMARY KEY `song_id` (`song_id`,`artist_id`),
|
||||
KEY `song_id` (`song_id`),
|
||||
KEY `artist_id` (`artist_id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||
|
||||
CREATE TABLE `lyric` (
|
||||
@ -57,7 +63,8 @@ CREATE TABLE `lyric` (
|
||||
`lyric` text NOT NULL COMMENT '歌词',
|
||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||
PRIMARY KEY (`song_id`,`version`)
|
||||
PRIMARY KEY (`song_id`,`version`),
|
||||
KEY `song_id` (`song_id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||
|
||||
CREATE TABLE `user` (
|
||||
@ -81,7 +88,8 @@ CREATE TABLE `comment` (
|
||||
`comment_type` tinyint(4) unsigned NOT NULL COMMENT '评论类型 0-comments 1-hotComments 2-topComments',
|
||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||
PRIMARY KEY (`comment_id`)
|
||||
PRIMARY KEY (`comment_id`),
|
||||
INDEX `song_id` (`song_id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||
|
||||
CREATE TABLE `comment_progress` (
|
||||
|
@ -28,6 +28,13 @@ async function fetchAll() {
|
||||
|
||||
// 获取歌词详情
|
||||
async function fetch({ songId, debug = false }) {
|
||||
let result = await dbUtils.query('SELECT count(*) as count FROM lyric WHERE song_id = ?', [songId]);
|
||||
if (result[0].count > 0 && !debug) {
|
||||
// 这里暂时跳过,后期可能要考虑歌词version更新的问题
|
||||
console.log(`数据库中已有数据,跳过 songId: ${songId}`);
|
||||
return;
|
||||
}
|
||||
|
||||
var url = `https://music.163.com/api/song/lyric?id=${songId}&lv=1`; // &kv=1&tv=-1
|
||||
try {
|
||||
// var json = fs.readFileSync(path.join(__dirname, "../../temp", `lyric-${songId}.json`), 'utf8');
|
||||
|
@ -27,9 +27,9 @@ async function getFromDatabase({ songId }) {
|
||||
async function fetchAll() {
|
||||
console.log("start fetching songs ...");
|
||||
var songIds = await dbUtils.query(`
|
||||
SELECT DISTINCT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song )
|
||||
SELECT DISTINCT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT song_id FROM song )
|
||||
UNION
|
||||
SELECT DISTINCT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song )
|
||||
SELECT DISTINCT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT song_id FROM song )
|
||||
`, []);
|
||||
songIds = songIds.map(item => item.song_id);
|
||||
for (let i = 0; i < songIds.length; i++) {
|
||||
|
@ -8,7 +8,7 @@ let pool = null;
|
||||
function create({ database, connectionLimit = 10 }) {
|
||||
let config = {
|
||||
connectionLimit: connectionLimit, //连接数量,默认是10
|
||||
...globalConfig.mysql,
|
||||
...globalConfig[global.dbConfig || 'mysql'],
|
||||
database: database,
|
||||
};
|
||||
// console.log(config);
|
||||
|
@ -12,7 +12,7 @@ function create({ database }) {
|
||||
|
||||
async function query(sql, params) {
|
||||
let config = {
|
||||
...globalConfig.mysql,
|
||||
...globalConfig[global.dbConfig || 'mysql'],
|
||||
database: databaseName,
|
||||
};
|
||||
// console.log(config);
|
||||
|
3
watch.js
3
watch.js
@ -1,10 +1,11 @@
|
||||
let keepWatching = true;
|
||||
if (keepWatching) {
|
||||
global.useMysqlPool = true;
|
||||
global.connectionLimit = 1;
|
||||
global.connectionLimit = 15;
|
||||
} else {
|
||||
global.useMysqlPool = false;
|
||||
}
|
||||
global.dbConfig = 'mysql_local';
|
||||
|
||||
const neteaseMusic = require('./netease_music/index');
|
||||
const sleepUtils = require('./utils/sleepUtils');
|
||||
|
Loading…
Reference in New Issue
Block a user