1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee

合并仓库前的准备

This commit is contained in:
程序员小墨 2022-10-17 13:11:15 +08:00
parent 2ccb0e3c1f
commit 8dbc539ced
9 changed files with 256 additions and 256 deletions

View File

@ -1,20 +0,0 @@
if (process.argv.length <= 2) {
let output = [
"参数不够",
"node index --utils [song|album|artist|lyric|comment] --min [number] --max [number] --order [false|ASC|DESC] --limit [number]",
// "",
// "node index --utils song --min xxx --max xxx --order ASC --limit 2000",
// "node index --utils album --min xxx --max xxx --order ASC --limit 2000",
// "node index --utils artist --min xxx --max xxx --order ASC --limit 2000",
// "node index --utils lyric --min xxx --max xxx --order ASC --limit 2000",
// "node index --utils comment --min xxx --max xxx --order ASC --limit 2000",
].join('\n');
console.log(output);
return;
}
var args = require('minimist')(process.argv.slice(2));
console.log("args:", args);
global.useMysqlPool = true;
const neteaseMusic = require('./netease_music/index');
neteaseMusic.main(args);

View File

@ -1,230 +1,20 @@
// 引入modules
const fs = require('fs');
const path = require('path');
const dbUtils = require(global.useMysqlPool ? '../utils/dbPoolUtils' : '../utils/dbUtils');
const sleepUtils = require('../utils/sleepUtils');
// 数据库连接池
dbUtils.create({
database: "neteaseMusic", // 指定数据库
connectionLimit: global.connectionLimit || 10, // 设置数据库连接池数量
});
global.dbUtils = dbUtils;
console.log("global.useMysqlPool:", !!global.useMysqlPool);
// 两次请求之间停顿时间
global.sleepTime = 10;
// 引入utils
const songInfoUtils = require('./src/getInfo/songInfoUtils');
const artistInfoUtils = require('./src/getInfo/artistInfoUtils');
const albumInfoUtils = require('./src/getInfo/albumInfoUtils');
const lyricInfoUtils = require('./src/getInfo/lyricInfoUtils');
const commentUtils = require('./src/getInfo/commentUtils');
const playlistUtils = require('./src/getInfo/playlistUtils');
/**
* 测试
*/
async function test() {
console.log("neteaseMusic test...");
// 不是所有歌手都有个人主页 例如 https://music.163.com/#/artist?id=1079075
// let res = await albumInfoUtils.fetch({ albumId: "9156", debug: true });
// let res = await artistInfoUtils.fetch({ artistId: "12023508" });
// let res = await songInfoUtils.fetch({ songId: "437608327" });
// let res = await playlistUtils.fetch({ playlistId: "4980157066", debug: true });
// let res = await albumInfoUtils.getFromDatabase({ albumId: "9156" });
// let res = await artistInfoUtils.getFromDatabase({ artistId: "12023508" });
// let res = await songInfoUtils.getFromDatabase({ songId: "437608327" });
let res = await dbUtils.query('INSERT IGNORE INTO song (`song_id`, `title`, `image`, `pub_date`) VALUES ?',
[[[100, '4', '3', '4'], [200, '23', '4', '5']]]);
console.log(res);
}
/**
* 主函数
*/
async function main(args) {
console.log("neteaseMusic Start fetch ...");
while (true) {
// // 删除脏数据
// var affectedRows1 = await dbUtils.query(`DELETE FROM song_artist_relation WHERE song_id = 0 OR artist_id = 0`, []);
// var affectedRows2 = await dbUtils.query(`DELETE FROM song_album_relation WHERE song_id = 0 OR album_id = 0`, []);
// console.log(`删除脏数据 affectedRows:`, affectedRows1.affectedRows, affectedRows2.affectedRows);
if (args.utils == "song")
await songInfoUtils.fetchAll({ args: args });
else if (args.utils == "album")
await albumInfoUtils.fetchAll({ args: args });
else if (args.utils == "artist")
await artistInfoUtils.fetchAll({ args: args });
else if (args.utils == "lyric")
await lyricInfoUtils.fetchAll({ args: args });
else if (args.utils == "comment")
await commentUtils.fetchAll({ args: args });
else {
console.log("utils参数不匹配退出");
return;
}
await sleepUtils.sleep(2000);
}
}
/**
* 数据更新 (重新爬取)
*/
async function update() {
console.log("neteaseMusic Start update ...");
while (true) {
await albumInfoUtils.fetchAll({ isUpdate: true });
await sleepUtils.sleep(2000);
}
}
/**
* 统计数据库中数据
*/
let oldWatchParam = {};
async function watch() {
console.log(`开始统计 ... ${new Date(Date.now() + 8 * 3600 * 1000).toISOString()}`);
let statisticTime = Date.now();
let newWatchParam = {};
let sqls = [
// InnoDB count(*) 会扫描全表,粗略数据可以通过 show table status 查看
{
name: "songCount",
sql: `SELECT count(*) AS count FROM song`,
}, {
name: "songWaiting",
sql: `SELECT count(DISTINCT song_id) AS count
FROM ( SELECT song_id FROM song_artist_relation UNION SELECT song_id FROM song_album_relation ) t_tmp
WHERE song_id NOT IN ( SELECT song_id FROM song )`,
}, {
name: "albumCount",
sql: `SELECT count(*) AS count FROM album`,
}, {
name: "albumWaiting",
sql: `SELECT count( DISTINCT album_id ) as count FROM song_album_relation WHERE album_id NOT IN ( SELECT album_id FROM album )`,
}, {
name: "artistCount",
sql: `SELECT count(*) AS count FROM artist`,
}, {
name: "artistWaiting",
sql: `SELECT count( DISTINCT artist_id ) as count FROM song_artist_relation WHERE artist_id NOT IN ( SELECT artist_id FROM artist )`,
}, {
name: "lyricCount",
sql: `SELECT count(*) AS count FROM lyric`,
}, {
name: "commentCount",
sql: `SELECT count( DISTINCT song_id ) AS count FROM comment`,
}, {
name: "commentTotalCount",
sql: `SELECT count(*) AS count FROM comment`,
}, {
name: "userCount",
sql: `SELECT count(*) AS count FROM user`,
}, {
name: "songAlbumCount",
sql: `SELECT count(*) AS count FROM song_album_relation`,
}, {
name: "songArtistCount",
sql: `SELECT count(*) AS count FROM song_artist_relation`,
}
];
let sqlsTimeSpent = 0;
let promiseList = [];
for (let i = 0; i < sqls.length; i++) {
const sql = sqls[i];
if (!sql.sql) continue; // 跳过注释掉SQL的项
promiseList.push(new Promise(async (resolve, reject) => {
// console.log(`query ${sql.name} ...`);
let sqlStartTime = Date.now();
let result = await dbUtils.query(sql.sql, []);
let sqlTimeSpent = Date.now() - sqlStartTime;
sqlsTimeSpent += sqlTimeSpent;
newWatchParam[sql.name] = result[0].count;
console.log(`query ${sql.name} finished.\tspend time: ${sqlTimeSpent}ms (${(sqlTimeSpent / 1000).toFixed(2)}s),\tcount: ${newWatchParam[sql.name]}`);
resolve();
}));
}
await Promise.all(promiseList);
// let tableCountResult = await dbUtils.query("show table status");
// let tableCount = {}; // 查询近似值代替精确查询
// tableCountResult.forEach(rowData => tableCount[rowData.Name] = rowData.Rows);
// newWatchParam['commentTotalCount'] = tableCount['comment'];
let statisticTimeDelta = Date.now() - statisticTime;
let statisticsString = [
``,
`统计完成 ${new Date(Date.now() + 8 * 3600 * 1000).toISOString()}`,
`spend time: ${statisticTimeDelta}ms (${(statisticTimeDelta / 1000).toFixed(2)}s; ${(statisticTimeDelta / (60 * 1000)).toFixed(2)}min), sql query time (sum): ${sqlsTimeSpent}ms (${(sqlsTimeSpent / 1000).toFixed(2)}s; ${(sqlsTimeSpent / (60 * 1000)).toFixed(2)}min)`,
`[与上次运行统计时相比]`,
[
`song: ${newWatchParam['songCount'] - oldWatchParam['songCount']}`,
`album: ${newWatchParam['albumCount'] - oldWatchParam['albumCount']}`,
`artist: ${newWatchParam['artistCount'] - oldWatchParam['artistCount']}`,
`lyric: ${newWatchParam['lyricCount'] - oldWatchParam['lyricCount']}`,
`comment: ${newWatchParam['commentCount'] - oldWatchParam['commentCount']}(song)/${newWatchParam['commentTotalCount'] - oldWatchParam['commentTotalCount']}(comment)`,
`user: ${newWatchParam['userCount'] - oldWatchParam['userCount']}`,
].join(', '),
`[已爬取]`,
[
`song: ${newWatchParam['songCount']}`,
`album: ${newWatchParam['albumCount']}`,
`artist: ${newWatchParam['artistCount']}`,
`lyric: ${newWatchParam['lyricCount']}`,
`comment: ${newWatchParam['commentCount']}(song)/${newWatchParam['commentTotalCount']}(comment)`,
`user: ${newWatchParam['userCount']}`,
].join(', '),
`[待爬取]`,
[
`song: ${newWatchParam['songWaiting']}`,
`album: ${newWatchParam['albumWaiting']}`,
`artist: ${newWatchParam['artistWaiting']}`,
`lyric: ${newWatchParam['songCount'] - newWatchParam['lyricCount']}`,
`comment: ${newWatchParam['songCount'] - newWatchParam['commentCount']}`,
`user: 未知`,
].join(', '),
`[总计] (已爬取 + 待爬取)`,
[
`song: ${newWatchParam['songCount'] + newWatchParam['songWaiting']}`,
`album: ${newWatchParam['albumCount'] + newWatchParam['albumWaiting']}`,
`artist: ${newWatchParam['artistCount'] + newWatchParam['artistWaiting']}`,
`lyric: ${newWatchParam['songCount']}`,
`comment: ${newWatchParam['songCount']}`,
`user: ${newWatchParam['userCount']}`,
].join(', '),
`[关联关系统计]`,
`song-album: ${newWatchParam['songAlbumCount']}, song-artist: ${newWatchParam['songArtistCount']}`,
``
if (process.argv.length <= 2) {
let output = [
"参数不够",
"node index --utils [song|album|artist|lyric|comment] --min [number] --max [number] --order [false|ASC|DESC] --limit [number]",
// "",
// "node index --utils song --min xxx --max xxx --order ASC --limit 2000",
// "node index --utils album --min xxx --max xxx --order ASC --limit 2000",
// "node index --utils artist --min xxx --max xxx --order ASC --limit 2000",
// "node index --utils lyric --min xxx --max xxx --order ASC --limit 2000",
// "node index --utils comment --min xxx --max xxx --order ASC --limit 2000",
].join('\n');
console.log(statisticsString);
oldWatchParam = newWatchParam;
}
/**
* 退出程序
*/
global.checkIsExit = async function () {
if (fs.readFileSync('stop.txt') != "1")
console.log(output);
return;
console.log();
console.log(`收到退出指令,准备退出...`);
await sleepUtils.sleep(500);
await dbUtils.close();
console.log(`数据库连接池已关闭`);
await sleepUtils.sleep(100);
process.exit(0);
}
var args = require('minimist')(process.argv.slice(2));
console.log("args:", args);
module.exports = {
main: main,
update: update,
watch: watch,
test: test,
}
global.useMysqlPool = true;
const neteaseMusic = require('./src/index');
neteaseMusic.main(args);

View File

@ -63,7 +63,7 @@ async function fetch({ songIdArray, debug = false }) {
return;
}
console.log(playlistResult);
// console.log(playlistResult);
let songInfo = {
songId: songId,

230
netease_music/src/index.js Normal file
View File

@ -0,0 +1,230 @@
// 引入modules
const fs = require('fs');
const path = require('path');
const dbUtils = require(`../../utils/${global.useMysqlPool ? 'dbPoolUtils' : 'dbUtils'}`);
const sleepUtils = require('../../utils/sleepUtils');
// 数据库连接池
dbUtils.create({
database: "neteaseMusic", // 指定数据库
connectionLimit: global.connectionLimit || 10, // 设置数据库连接池数量
});
global.dbUtils = dbUtils;
console.log("global.useMysqlPool:", !!global.useMysqlPool);
// 两次请求之间停顿时间
global.sleepTime = 10;
// 引入utils
const songInfoUtils = require('./getInfo/songInfoUtils');
const artistInfoUtils = require('./getInfo/artistInfoUtils');
const albumInfoUtils = require('./getInfo/albumInfoUtils');
const lyricInfoUtils = require('./getInfo/lyricInfoUtils');
const commentUtils = require('./getInfo/commentUtils');
const playlistUtils = require('./getInfo/playlistUtils');
/**
* 测试
*/
async function test() {
console.log("neteaseMusic test...");
// 不是所有歌手都有个人主页 例如 https://music.163.com/#/artist?id=1079075
// let res = await albumInfoUtils.fetch({ albumId: "9156", debug: true });
// let res = await artistInfoUtils.fetch({ artistId: "12023508" });
// let res = await songInfoUtils.fetch({ songId: "437608327" });
// let res = await playlistUtils.fetch({ playlistId: "4980157066", debug: true });
// let res = await albumInfoUtils.getFromDatabase({ albumId: "9156" });
// let res = await artistInfoUtils.getFromDatabase({ artistId: "12023508" });
// let res = await songInfoUtils.getFromDatabase({ songId: "437608327" });
let res = await dbUtils.query('INSERT IGNORE INTO song (`song_id`, `title`, `image`, `pub_date`) VALUES ?',
[[[100, '4', '3', '4'], [200, '23', '4', '5']]]);
console.log(res);
}
/**
* 主函数
*/
async function main(args) {
console.log("neteaseMusic Start fetch ...");
while (true) {
// // 删除脏数据
// var affectedRows1 = await dbUtils.query(`DELETE FROM song_artist_relation WHERE song_id = 0 OR artist_id = 0`, []);
// var affectedRows2 = await dbUtils.query(`DELETE FROM song_album_relation WHERE song_id = 0 OR album_id = 0`, []);
// console.log(`删除脏数据 affectedRows:`, affectedRows1.affectedRows, affectedRows2.affectedRows);
if (args.utils == "song")
await songInfoUtils.fetchAll({ args: args });
else if (args.utils == "album")
await albumInfoUtils.fetchAll({ args: args });
else if (args.utils == "artist")
await artistInfoUtils.fetchAll({ args: args });
else if (args.utils == "lyric")
await lyricInfoUtils.fetchAll({ args: args });
else if (args.utils == "comment")
await commentUtils.fetchAll({ args: args });
else {
console.log("utils参数不匹配退出");
return;
}
await sleepUtils.sleep(2000);
}
}
/**
* 数据更新 (重新爬取)
*/
async function update() {
console.log("neteaseMusic Start update ...");
while (true) {
await albumInfoUtils.fetchAll({ isUpdate: true });
await sleepUtils.sleep(2000);
}
}
/**
* 统计数据库中数据
*/
let oldWatchParam = {};
async function watch() {
console.log(`开始统计 ... ${new Date(Date.now() + 8 * 3600 * 1000).toISOString()}`);
let statisticTime = Date.now();
let newWatchParam = {};
let sqls = [
// InnoDB count(*) 会扫描全表,粗略数据可以通过 show table status 查看
{
name: "songCount",
sql: `SELECT count(*) AS count FROM song`,
}, {
name: "songWaiting",
sql: `SELECT count(DISTINCT song_id) AS count
FROM ( SELECT song_id FROM song_artist_relation UNION SELECT song_id FROM song_album_relation ) t_tmp
WHERE song_id NOT IN ( SELECT song_id FROM song )`,
}, {
name: "albumCount",
sql: `SELECT count(*) AS count FROM album`,
}, {
name: "albumWaiting",
sql: `SELECT count( DISTINCT album_id ) as count FROM song_album_relation WHERE album_id NOT IN ( SELECT album_id FROM album )`,
}, {
name: "artistCount",
sql: `SELECT count(*) AS count FROM artist`,
}, {
name: "artistWaiting",
sql: `SELECT count( DISTINCT artist_id ) as count FROM song_artist_relation WHERE artist_id NOT IN ( SELECT artist_id FROM artist )`,
}, {
name: "lyricCount",
sql: `SELECT count(*) AS count FROM lyric`,
}, {
name: "commentCount",
sql: `SELECT count( DISTINCT song_id ) AS count FROM comment`,
}, {
name: "commentTotalCount",
sql: `SELECT count(*) AS count FROM comment`,
}, {
name: "userCount",
sql: `SELECT count(*) AS count FROM user`,
}, {
name: "songAlbumCount",
sql: `SELECT count(*) AS count FROM song_album_relation`,
}, {
name: "songArtistCount",
sql: `SELECT count(*) AS count FROM song_artist_relation`,
}
];
let sqlsTimeSpent = 0;
let promiseList = [];
for (let i = 0; i < sqls.length; i++) {
const sql = sqls[i];
if (!sql.sql) continue; // 跳过注释掉SQL的项
promiseList.push(new Promise(async (resolve, reject) => {
// console.log(`query ${sql.name} ...`);
let sqlStartTime = Date.now();
let result = await dbUtils.query(sql.sql, []);
let sqlTimeSpent = Date.now() - sqlStartTime;
sqlsTimeSpent += sqlTimeSpent;
newWatchParam[sql.name] = result[0].count;
console.log(`query ${sql.name} finished.\tspend time: ${sqlTimeSpent}ms (${(sqlTimeSpent / 1000).toFixed(2)}s),\tcount: ${newWatchParam[sql.name]}`);
resolve();
}));
}
await Promise.all(promiseList);
// let tableCountResult = await dbUtils.query("show table status");
// let tableCount = {}; // 查询近似值代替精确查询
// tableCountResult.forEach(rowData => tableCount[rowData.Name] = rowData.Rows);
// newWatchParam['commentTotalCount'] = tableCount['comment'];
let statisticTimeDelta = Date.now() - statisticTime;
let statisticsString = [
``,
`统计完成 ${new Date(Date.now() + 8 * 3600 * 1000).toISOString()}`,
`spend time: ${statisticTimeDelta}ms (${(statisticTimeDelta / 1000).toFixed(2)}s; ${(statisticTimeDelta / (60 * 1000)).toFixed(2)}min), sql query time (sum): ${sqlsTimeSpent}ms (${(sqlsTimeSpent / 1000).toFixed(2)}s; ${(sqlsTimeSpent / (60 * 1000)).toFixed(2)}min)`,
`[与上次运行统计时相比]`,
[
`song: ${newWatchParam['songCount'] - oldWatchParam['songCount']}`,
`album: ${newWatchParam['albumCount'] - oldWatchParam['albumCount']}`,
`artist: ${newWatchParam['artistCount'] - oldWatchParam['artistCount']}`,
`lyric: ${newWatchParam['lyricCount'] - oldWatchParam['lyricCount']}`,
`comment: ${newWatchParam['commentCount'] - oldWatchParam['commentCount']}(song)/${newWatchParam['commentTotalCount'] - oldWatchParam['commentTotalCount']}(comment)`,
`user: ${newWatchParam['userCount'] - oldWatchParam['userCount']}`,
].join(', '),
`[已爬取]`,
[
`song: ${newWatchParam['songCount']}`,
`album: ${newWatchParam['albumCount']}`,
`artist: ${newWatchParam['artistCount']}`,
`lyric: ${newWatchParam['lyricCount']}`,
`comment: ${newWatchParam['commentCount']}(song)/${newWatchParam['commentTotalCount']}(comment)`,
`user: ${newWatchParam['userCount']}`,
].join(', '),
`[待爬取]`,
[
`song: ${newWatchParam['songWaiting']}`,
`album: ${newWatchParam['albumWaiting']}`,
`artist: ${newWatchParam['artistWaiting']}`,
`lyric: ${newWatchParam['songCount'] - newWatchParam['lyricCount']}`,
`comment: ${newWatchParam['songCount'] - newWatchParam['commentCount']}`,
`user: 未知`,
].join(', '),
`[总计] (已爬取 + 待爬取)`,
[
`song: ${newWatchParam['songCount'] + newWatchParam['songWaiting']}`,
`album: ${newWatchParam['albumCount'] + newWatchParam['albumWaiting']}`,
`artist: ${newWatchParam['artistCount'] + newWatchParam['artistWaiting']}`,
`lyric: ${newWatchParam['songCount']}`,
`comment: ${newWatchParam['songCount']}`,
`user: ${newWatchParam['userCount']}`,
].join(', '),
`[关联关系统计]`,
`song-album: ${newWatchParam['songAlbumCount']}, song-artist: ${newWatchParam['songArtistCount']}`,
``
].join('\n');
console.log(statisticsString);
oldWatchParam = newWatchParam;
}
/**
* 退出程序
*/
global.checkIsExit = async function () {
if (fs.readFileSync('stop.txt') != "1")
return;
console.log();
console.log(`收到退出指令,准备退出...`);
await sleepUtils.sleep(500);
await dbUtils.close();
console.log(`数据库连接池已关闭`);
await sleepUtils.sleep(100);
process.exit(0);
}
module.exports = {
main: main,
update: update,
watch: watch,
test: test,
}

3
netease_music/test.js Normal file
View File

@ -0,0 +1,3 @@
global.useMysqlPool = false;
const neteaseMusic = require('./src/index');
neteaseMusic.test();

3
netease_music/update.js Normal file
View File

@ -0,0 +1,3 @@
global.useMysqlPool = true;
const neteaseMusic = require('./src/index');
neteaseMusic.update();

View File

@ -7,8 +7,8 @@ if (keepWatching) {
}
// global.dbConfig = 'mysql_local';
const neteaseMusic = require('./netease_music/index');
const sleepUtils = require('./utils/sleepUtils');
const neteaseMusic = require('./src/index');
const sleepUtils = require('../utils/sleepUtils');
async function main() {
do {

View File

@ -1,3 +0,0 @@
global.useMysqlPool = false;
const neteaseMusic = require('./netease_music/index');
neteaseMusic.test();

View File

@ -1,3 +0,0 @@
global.useMysqlPool = true;
const neteaseMusic = require('./netease_music/index');
neteaseMusic.update();