1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee

数据库添加索引;update

This commit is contained in:
程序员小墨 2022-10-05 11:41:30 +08:00
parent 50d3555dd7
commit b35918faef
9 changed files with 159 additions and 95 deletions

9
.vscode/launch.json vendored
View File

@ -30,6 +30,15 @@
"<node_internals>/**"
],
"program": "${workspaceFolder}\\test.js"
},
{
"type": "node",
"request": "launch",
"name": "node watch",
"skipFiles": [
"<node_internals>/**"
],
"program": "${workspaceFolder}\\watch.js"
}
]
}

View File

@ -1,10 +1,34 @@
{
"mysql": {
"charset": "utf8mb4",
"host": "rm-bp18qrc78dj7vd3newo.rwlb.rds.aliyuncs.com",
"user": "root",
"password": "Oj13EzoppxXvMmjPKh",
"port": 3306,
"database": ""
},
"mysql_aliyun": {
"charset": "utf8mb4",
"host": "rm-bp18qrc78dj7vd3newo.rwlb.rds.aliyuncs.com",
"user": "root",
"password": "Oj13EzoppxXvMmjPKh",
"port": 3306,
"database": ""
},
"mysql_local": {
"charset": "utf8mb4",
"host": "localhost",
"user": "root",
"password": "root",
"port": 3306,
"database": ""
},
"mysql_server": {
"charset": "utf8mb4",
"host": "39.99.244.156",
"user": "root",
"password": "Oj13EzoppxXvMmjPKh",
"port": 3306,
"database": ""
}
}

View File

@ -75,101 +75,116 @@ async function update() {
/**
* 统计数据库中数据
*/
let watchParam = {
statisticTime: Date.now(),
songCount: 0,
albumCount: 0,
artistCount: 0,
lyricCount: 0,
commentCount: 0,
commentTotalCount: 0,
};
let oldWatchParam = {};
async function watch() {
let sql = `
SELECT
song_count,
song_waiting_1 + song_waiting_2 as song_waiting,
console.log(`开始统计 ... ${new Date(Date.now() + 8 * 3600 * 1000).toISOString()}`);
let statisticTime = Date.now();
let newWatchParam = {};
let sqls = [
// InnoDB count(*) 会扫描全表,粗略数据可以通过 show table status 查看
{
name: "songCount",
sql: `SELECT count(*) AS count FROM song`,
}, {
name: "songWaiting",
sql: `SELECT count(DISTINCT song_id) AS count
FROM ( SELECT song_id FROM song_artist_relation UNION SELECT song_id FROM song_album_relation ) t_tmp
WHERE song_id NOT IN ( SELECT song_id FROM song )`,
}, {
name: "albumCount",
sql: `SELECT count(*) AS count FROM album`,
}, {
name: "albumWaiting",
sql: `SELECT count( DISTINCT album_id ) as count FROM song_album_relation WHERE album_id NOT IN ( SELECT album_id FROM album )`,
}, {
name: "artistCount",
sql: `SELECT count(*) AS count FROM artist`,
}, {
name: "artistWaiting",
sql: `SELECT count( DISTINCT artist_id ) as count FROM song_artist_relation WHERE artist_id NOT IN ( SELECT artist_id FROM artist )`,
}, {
name: "lyricCount",
sql: `SELECT count(*) AS count FROM lyric`,
}, {
name: "commentCount",
sql: `SELECT count( DISTINCT song_id ) AS count FROM comment`,
}, {
name: "commentTotalCount",
sql: `SELECT count(*) AS count FROM comment`,
}, {
name: "songAlbumCount",
sql: `SELECT count(*) AS count FROM song_album_relation`,
}, {
name: "songArtistCount",
sql: `SELECT count(*) AS count FROM song_artist_relation`,
}
];
let sqlsTimeSpent = 0;
let promiseList = [];
for (let i = 0; i < sqls.length; i++) {
const sql = sqls[i];
if (!sql.sql) continue; // 跳过注释掉SQL的项
promiseList.push(new Promise(async (resolve, reject) => {
// console.log(`query ${sql.name} ...`);
let sqlStartTime = Date.now();
let result = await dbUtils.query(sql.sql, []);
let sqlTimeSpent = Date.now() - sqlStartTime;
sqlsTimeSpent += sqlTimeSpent;
newWatchParam[sql.name] = result[0].count;
console.log(`query ${sql.name} finished.\tspend time: ${sqlTimeSpent}ms (${(sqlTimeSpent / 1000).toFixed(2)}s),\tcount: ${newWatchParam[sql.name]}`);
resolve();
}));
}
await Promise.all(promiseList);
album_count,
album_waiting,
artist_count,
artist_waiting,
lyric_count,
comment_count,
comment_total_count,
song_album_count,
song_artist_count
FROM
( SELECT count(*) AS song_count FROM song ) t_song,
( SELECT count( DISTINCT song_id ) as song_waiting_1 FROM song_artist_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) ) t_song_waiting_song_artist,
( SELECT count( DISTINCT song_id ) as song_waiting_2 FROM song_album_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) ) t_song_waiting_song_album,
( SELECT count(*) AS album_count FROM album ) t_album,
( SELECT count( DISTINCT album_id ) as album_waiting FROM song_album_relation WHERE album_id NOT IN ( SELECT DISTINCT album_id FROM album ) ) as t_album_waiting_song_album,
( SELECT count(*) AS artist_count FROM artist ) t_artist,
( SELECT count( DISTINCT artist_id ) as artist_waiting FROM song_artist_relation WHERE artist_id NOT IN ( SELECT DISTINCT artist_id FROM artist ) ) as t_album_waiting_song_artist,
( SELECT count(*) AS lyric_count FROM lyric ) t_lyric,
( SELECT count( DISTINCT song_id ) AS comment_count, count( comment_id ) AS comment_total_count FROM comment ) t_comment,
( SELECT count(*) AS song_album_count FROM song_album_relation ) t_song_album,
( SELECT count(*) AS song_artist_count FROM song_artist_relation ) t_song_artist
`;
console.log("开始统计 ...");
let startTime = Date.now();
let result = await dbUtils.query(sql, []);
let timeSpent = Date.now() - startTime;
let songCount = result[0].song_count;
let songWaiting = result[0].song_waiting;
let albumCount = result[0].album_count;
let albumWaiting = result[0].album_waiting;
let artistCount = result[0].artist_count;
let artistWaiting = result[0].artist_waiting;
let lyricCount = result[0].lyric_count;
let commentCount = result[0].comment_count;
let commentTotalCount = result[0].comment_total_count;
let songAlbumCount = result[0].song_album_count;
let songArtistCount = result[0].song_artist_count;
let statisticTimeDelta = Date.now() - watchParam.statisticTime;
// let tableCountResult = await dbUtils.query("show table status");
// let tableCount = {}; // 查询近似值代替精确查询
// tableCountResult.forEach(rowData => tableCount[rowData.Name] = rowData.Rows);
// newWatchParam['commentTotalCount'] = tableCount['comment'];
let statisticTimeDelta = Date.now() - statisticTime;
let statisticsString = [
`${new Date(Date.now() + 8 * 3600 * 1000).toISOString()}`,
`[与上次运行统计时相比] deltaTime: ${statisticTimeDelta}ms (${(statisticTimeDelta / 1000).toFixed(2)}s)`,
`song: ${songCount - watchParam.songCount}, album: ${albumCount - watchParam.albumCount}, artist: ${artistCount - watchParam.artistCount}, lyric: ${lyricCount - watchParam.lyricCount}, comment: ${commentCount - watchParam.commentCount}(song)/${commentTotalCount - watchParam.commentTotalCount}(comment)`,
``,
`统计完成 ${new Date(Date.now() + 8 * 3600 * 1000).toISOString()}`,
`spend time: ${statisticTimeDelta}ms (${(statisticTimeDelta / 1000).toFixed(2)}s; ${(statisticTimeDelta / (60 * 1000)).toFixed(2)}min), sql query time (sum): ${sqlsTimeSpent}ms (${(sqlsTimeSpent / 1000).toFixed(2)}s; ${(sqlsTimeSpent / (60 * 1000)).toFixed(2)}min)`,
`[与上次运行统计时相比]`,
[
`song: ${newWatchParam['songCount'] - oldWatchParam['songCount']}`,
`album: ${newWatchParam['albumCount'] - oldWatchParam['albumCount']}`,
`artist: ${newWatchParam['artistCount'] - oldWatchParam['artistCount']}`,
`lyric: ${newWatchParam['lyricCount'] - oldWatchParam['lyricCount']}`,
`comment: ${newWatchParam['commentCount'] - oldWatchParam['commentCount']}(song)/${newWatchParam['commentTotalCount'] - oldWatchParam['commentTotalCount']}(comment)`,
].join(', '),
`[已爬取]`,
`song: ${songCount}, album: ${albumCount}, artist: ${artistCount}, lyric: ${lyricCount}, comment: ${commentCount}(song)/${commentTotalCount}(comment)`,
[
`song: ${newWatchParam['songCount']}`,
`album: ${newWatchParam['albumCount']}`,
`artist: ${newWatchParam['artistCount']}`,
`lyric: ${newWatchParam['lyricCount']}`,
`comment: ${newWatchParam['commentCount']}(song)/${newWatchParam['commentTotalCount']}(comment)`,
].join(', '),
`[待爬取]`,
`song: ${songWaiting}, album: ${albumWaiting}, artist: ${artistWaiting}, lyric: ${songCount - lyricCount}, comment: ${songCount - commentCount}`,
[
`song: ${newWatchParam['songWaiting']}`,
`album: ${newWatchParam['albumWaiting']}`,
`artist: ${newWatchParam['artistWaiting']}`,
`lyric: ${newWatchParam['songCount'] - newWatchParam['lyricCount']}`,
`comment: ${newWatchParam['songCount'] - newWatchParam['commentCount']}`,
].join(', '),
`[总计] (已爬取 + 待爬取)`,
`song: ${songCount + songWaiting}, album: ${albumCount + albumWaiting}, artist: ${artistCount + artistWaiting}, lyric: ${songCount}, comment: ${songCount}`,
[
`song: ${newWatchParam['songCount'] + newWatchParam['songWaiting']}`,
`album: ${newWatchParam['albumCount'] + newWatchParam['albumWaiting']}`,
`artist: ${newWatchParam['artistCount'] + newWatchParam['artistWaiting']}`,
`lyric: ${newWatchParam['songCount']}`,
`comment: ${newWatchParam['songCount']}`,
].join(', '),
`[关联关系统计]`,
`song-album: ${songAlbumCount}, song-artist: ${songArtistCount}`,
`sql query time: ${timeSpent}ms (${(timeSpent / 1000).toFixed(2)}s)`,
`song-album: ${newWatchParam['songAlbumCount']}, song-artist: ${newWatchParam['songArtistCount']}`,
``
].join('\n');
console.log(statisticsString);
watchParam = {
statisticTime: Date.now(),
songCount: songCount,
albumCount: albumCount,
artistCount: artistCount,
lyricCount: lyricCount,
commentCount: commentCount,
commentTotalCount: commentTotalCount,
}
oldWatchParam = newWatchParam;
}
/**

View File

@ -18,7 +18,8 @@ CREATE TABLE `artist` (
`pub_date` varchar(100) NOT NULL COMMENT '发布日期',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`artist_id`)
PRIMARY KEY (`artist_id`),
KEY `artist_id` (`artist_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
CREATE TABLE `album` (
@ -32,7 +33,8 @@ CREATE TABLE `album` (
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
`version` tinyint(4) NOT NULL DEFAULT 1 COMMENT '数据记录版本(如果有字段调整则整体+1)',
PRIMARY KEY (`album_id`)
PRIMARY KEY (`album_id`),
KEY `album_id` (`album_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
CREATE TABLE `song_album_relation` (
@ -40,7 +42,9 @@ CREATE TABLE `song_album_relation` (
`album_id` int(10) unsigned NOT NULL COMMENT '专辑id',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`song_id`,`album_id`)
PRIMARY KEY (`song_id`,`album_id`),
KEY `song_id` (`song_id`),
KEY `album_id` (`album_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
CREATE TABLE `song_artist_relation` (
@ -48,7 +52,9 @@ CREATE TABLE `song_artist_relation` (
`artist_id` int(10) unsigned NOT NULL COMMENT '歌手id',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY `song_id` (`song_id`,`artist_id`)
PRIMARY KEY `song_id` (`song_id`,`artist_id`),
KEY `song_id` (`song_id`),
KEY `artist_id` (`artist_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
CREATE TABLE `lyric` (
@ -57,7 +63,8 @@ CREATE TABLE `lyric` (
`lyric` text NOT NULL COMMENT '歌词',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`song_id`,`version`)
PRIMARY KEY (`song_id`,`version`),
KEY `song_id` (`song_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
CREATE TABLE `user` (
@ -81,7 +88,8 @@ CREATE TABLE `comment` (
`comment_type` tinyint(4) unsigned NOT NULL COMMENT '评论类型 0-comments 1-hotComments 2-topComments',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`comment_id`)
PRIMARY KEY (`comment_id`),
INDEX `song_id` (`song_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
CREATE TABLE `comment_progress` (

View File

@ -28,6 +28,13 @@ async function fetchAll() {
// 获取歌词详情
async function fetch({ songId, debug = false }) {
let result = await dbUtils.query('SELECT count(*) as count FROM lyric WHERE song_id = ?', [songId]);
if (result[0].count > 0 && !debug) {
// 这里暂时跳过后期可能要考虑歌词version更新的问题
console.log(`数据库中已有数据,跳过 songId: ${songId}`);
return;
}
var url = `https://music.163.com/api/song/lyric?id=${songId}&lv=1`; // &kv=1&tv=-1
try {
// var json = fs.readFileSync(path.join(__dirname, "../../temp", `lyric-${songId}.json`), 'utf8');

View File

@ -27,9 +27,9 @@ async function getFromDatabase({ songId }) {
async function fetchAll() {
console.log("start fetching songs ...");
var songIds = await dbUtils.query(`
SELECT DISTINCT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song )
SELECT DISTINCT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT song_id FROM song )
UNION
SELECT DISTINCT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song )
SELECT DISTINCT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT song_id FROM song )
`, []);
songIds = songIds.map(item => item.song_id);
for (let i = 0; i < songIds.length; i++) {

View File

@ -8,7 +8,7 @@ let pool = null;
function create({ database, connectionLimit = 10 }) {
let config = {
connectionLimit: connectionLimit, //连接数量默认是10
...globalConfig.mysql,
...globalConfig[global.dbConfig || 'mysql'],
database: database,
};
// console.log(config);

View File

@ -12,7 +12,7 @@ function create({ database }) {
async function query(sql, params) {
let config = {
...globalConfig.mysql,
...globalConfig[global.dbConfig || 'mysql'],
database: databaseName,
};
// console.log(config);

View File

@ -1,10 +1,11 @@
let keepWatching = true;
if (keepWatching) {
global.useMysqlPool = true;
global.connectionLimit = 1;
global.connectionLimit = 15;
} else {
global.useMysqlPool = false;
}
global.dbConfig = 'mysql_local';
const neteaseMusic = require('./netease_music/index');
const sleepUtils = require('./utils/sleepUtils');