1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee

fetchAll从index.js提到各个utils中

This commit is contained in:
程序员小墨 2022-10-01 22:00:29 +08:00
parent 5d2bfccb4b
commit 3dcb71b5a3
6 changed files with 196 additions and 110 deletions

26
.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,26 @@
{
// 使 IntelliSense
//
// 访: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "node",
"request": "launch",
"name": "node index",
"skipFiles": [
"<node_internals>/**"
],
"program": "${workspaceFolder}\\index.js"
},
{
"type": "node",
"request": "launch",
"name": "node test",
"skipFiles": [
"<node_internals>/**"
],
"program": "${workspaceFolder}\\test.js"
}
]
}

View File

@ -1,53 +1,50 @@
// 引入modules
const fs = require('fs');
const path = require('path');
const dbUtils = require(global.useMysqlPool ? '../utils/dbPoolUtils' : '../utils/dbUtils');
const requestUtils = require('../utils/requestUtils');
const sleepUtils = require('../utils/sleepUtils');
// 数据库连接池
dbUtils.create({
database: "neteaseMusic", // 指定数据库
connectionLimit: 8, // 设置数据库连接池数量
connectionLimit: 10, // 设置数据库连接池数量
});
global.dbUtils = dbUtils;
console.log("global.useMysqlPool:", !!global.useMysqlPool);
// 两次请求之间停顿时间
global.sleepTime = 10;
// 引入utils
const songInfoUtils = require('./src/getInfo/songInfoUtils');
const artistInfoUtils = require('./src/getInfo/artistInfoUtils');
const albumInfoUtils = require('./src/getInfo/albumInfoUtils');
const lyricInfoUtils = require('./src/getInfo/lyricInfoUtils');
console.log("global.useMysqlPool:", !!global.useMysqlPool);
// 退出检查
async function checkIsExit() {
if (fs.readFileSync('stop.txt') != "1")
return;
console.log();
console.log(`收到退出指令,准备退出...`);
await sleepUtils.sleep(500);
await dbUtils.close();
console.log(`数据库连接池已关闭`);
await sleepUtils.sleep(100);
process.exit(0);
}
// 测试
/**
* 测试
*/
async function test() {
console.log("neteaseMusic test...");
// 不是所有歌手都有个人主页 例如 https://music.163.com/#/artist?id=1079075
let res = await albumInfoUtils.fetch({ albumId: "9156", debug: true });
// let res = await artistInfoUtils.getFromDatabase({ artistId: "12023508" });
// let res = await songInfoUtils.getFromDatabase({ songId: "437608327" });
let res = await global.statistics();
// let res = await albumInfoUtils.fetch({ albumId: "9156", debug: true });
// let res = await artistInfoUtils.fetch({ artistId: "12023508" });
// let res = await songInfoUtils.fetch({ songId: "437608327" });
// let res = await albumInfoUtils.getFromDatabase({ albumId: "9156" });
// let res = await artistInfoUtils.getFromDatabase({ artistId: "12023508" });
// let res = await songInfoUtils.getFromDatabase({ songId: "437608327" });
console.log(res);
}
/**
* 主函数
*/
async function main() {
console.log("neteaseMusic Starting...");
console.log(`数据统计: ${await statistics()}`);
@ -58,88 +55,17 @@ async function main() {
var affectRows2 = await dbUtils.query(`DELETE FROM song_album_relation WHERE song_id = 0 OR album_id = 0`, []);
console.log(`删除脏数据 affectRows:`, affectRows1.affectedRows, affectRows2.affectedRows);
await startGet(100);
await songInfoUtils.fetchAll();
await albumInfoUtils.fetchAll();
await artistInfoUtils.fetchAll();
await lyricInfoUtils.fetchAll();
await sleepUtils.sleep(2000);
}
}
async function startGet(sleepTime) {
// 从数据库中查出还缺少的歌词,并进行爬取
console.log("start fetching lyrics ...");
var songIds = await dbUtils.query(`
SELECT DISTINCT song_id FROM song WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM lyric )
`, []);
songIds = songIds.map(song => song.song_id);
for (let i = 0; i < songIds.length; i++) {
await checkIsExit();
const songId = songIds[i];
console.log(`${i}/${songIds.length} | lyric: ${songId} | ${await statistics()}`);
try {
await lyricInfoUtils.fetch({ songId: songId });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(sleepTime);
}
// 从数据库中查出还缺少的歌曲,并进行爬取
console.log("start fetching songs ...");
var songIds = await dbUtils.query(`
SELECT DISTINCT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song )
UNION
SELECT DISTINCT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song )
`, []);
songIds = songIds.map(item => item.song_id);
for (let i = 0; i < songIds.length; i++) {
await checkIsExit();
const songId = songIds[i];
console.log(`${i}/${songIds.length} | song: ${songId} | ${await statistics()}`);
try {
await songInfoUtils.fetch({ songId: songId });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(sleepTime);
}
// 从数据库中查出还缺少的专辑,并进行爬取
console.log("start fetching albums ...")
var albumIds = await dbUtils.query(`
SELECT DISTINCT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT DISTINCT album_id FROM album )
`, []);
albumIds = albumIds.map(item => item.album_id);
for (let i = 0; i < albumIds.length; i++) {
await checkIsExit();
const albumId = albumIds[i];
console.log(`${i}/${albumIds.length} | album: ${albumId} | ${await statistics()}`);
try {
await albumInfoUtils.fetch({ albumId: albumId });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(sleepTime);
}
// 从数据库中查出还缺少的歌手,并进行爬取
console.log("start fetching artists ...")
var artistIds = await dbUtils.query(`
SELECT DISTINCT artist_id FROM song_artist_relation WHERE artist_id NOT IN ( SELECT DISTINCT artist_id FROM artist )
`, []);
artistIds = artistIds.map(item => item.artist_id);
for (let i = 0; i < artistIds.length; i++) {
await checkIsExit();
const artistId = artistIds[i];
console.log(`${i}/${artistIds.length} | artist: ${artistId} | ${await statistics()}`);
try {
await artistInfoUtils.fetch({ artistId: artistId });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(sleepTime);
}
}
/**
* 数据更新 (重新爬取)
*/
async function update() {
console.log("neteaseMusic update ...");
console.log(`数据统计: ${await statistics()}`);
@ -165,40 +91,84 @@ async function update() {
}
}
async function statistics() {
/**
* 统计数据库中数据
*/
global.statistics = async function () {
let sql = `
SELECT
song_count,
song_waiting_1 + song_waiting_2 as song_waiting,
album_count,
album_waiting,
artist_count,
artist_waiting,
lyric_count,
lyric_waiting,
song_album_count,
song_artist_count
FROM
( SELECT count(*) AS song_count FROM song ) t_song,
( SELECT count(*) AS album_count FROM album ) t_album,
( SELECT count(*) AS artist_count FROM artist ) t_artist,
( SELECT count(*) AS song_album_count FROM song_album_relation ) t_song_album,
( SELECT count(*) AS song_artist_count FROM song_artist_relation ) t_song_artist,
( SELECT count( DISTINCT song_id ) as song_waiting_1 FROM song_artist_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) ) t_song_waiting_song_artist,
( SELECT count( DISTINCT song_id ) as song_waiting_2 FROM song_album_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) ) t_song_waiting_song_album
( SELECT count( DISTINCT song_id ) as song_waiting_2 FROM song_album_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) ) t_song_waiting_song_album,
( SELECT count(*) AS album_count FROM album ) t_album,
( SELECT count( DISTINCT album_id ) as album_waiting FROM song_album_relation WHERE album_id NOT IN ( SELECT DISTINCT album_id FROM album ) ) as t_album_waiting_song_album,
( SELECT count(*) AS artist_count FROM artist ) t_artist,
( SELECT count( DISTINCT artist_id ) as artist_waiting FROM song_artist_relation WHERE artist_id NOT IN ( SELECT DISTINCT artist_id FROM artist ) ) as t_album_waiting_song_artist,
( SELECT count(*) AS lyric_count FROM lyric ) t_lyric,
( SELECT count( DISTINCT song_id ) as lyric_waiting FROM song WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM lyric ) ) as t_lyric_waiting_song,
( SELECT count(*) AS song_album_count FROM song_album_relation ) t_song_album,
( SELECT count(*) AS song_artist_count FROM song_artist_relation ) t_song_artist
`;
let result = await dbUtils.query(sql, []);
let songCount = result[0].song_count;
let songWaiting = result[0].song_waiting;
let albumCount = result[0].album_count;
let albumWaiting = result[0].album_waiting;
let artistCount = result[0].artist_count;
let artistWaiting = result[0].artist_waiting;
let lyricCount = result[0].lyric_count;
let lyricWaiting = result[0].lyric_waiting;
let songAlbumCount = result[0].song_album_count;
let songArtistCount = result[0].song_artist_count;
return [
`song: ${songCount}/${songCount + songWaiting}`,
`album: ${albumCount}`,
`artist: ${artistCount}`,
`album: ${albumCount}/${albumCount + albumWaiting}`,
`artist: ${artistCount}/${artistCount + artistWaiting}`,
`lyric: ${lyricCount}/${lyricCount + lyricWaiting}`,
`songAlbum: ${songAlbumCount}`,
`songArtist: ${songArtistCount}`
].join(', ');
}
/**
* 退出程序
*/
global.checkIsExit = async function () {
if (fs.readFileSync('stop.txt') != "1")
return;
console.log();
console.log(`收到退出指令,准备退出...`);
await sleepUtils.sleep(500);
await dbUtils.close();
console.log(`数据库连接池已关闭`);
await sleepUtils.sleep(100);
process.exit(0);
}
module.exports = {
main: main,
update: update,

View File

@ -2,6 +2,7 @@ const fs = require('fs');
const path = require('path');
const requestUtils = require('../../../utils/requestUtils');
const sleepUtils = require('../../../utils/sleepUtils');
const dbUtils = global.dbUtils;
@ -20,6 +21,26 @@ async function getFromDatabase({ albumId }) {
return albumInfo;
}
// 从数据库中查出还缺少的专辑,并进行爬取
async function fetchAll() {
console.log("start fetching albums ...")
var albumIds = await dbUtils.query(`
SELECT DISTINCT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT DISTINCT album_id FROM album )
`, []);
albumIds = albumIds.map(item => item.album_id);
for (let i = 0; i < albumIds.length; i++) {
await global.checkIsExit();
const albumId = albumIds[i];
console.log(`${i}/${albumIds.length} | album: ${albumId} | ${await global.statistics()}`);
try {
await fetch({ albumId: albumId });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(global.sleepTime);
}
}
// 获取专辑详情
async function fetch({ albumId, debug = false }) {
let result = await dbUtils.query('SELECT count(*) as count FROM album WHERE album_id = ?', [albumId]);
@ -186,5 +207,6 @@ async function update({ albumId }) {
module.exports = {
getFromDatabase: getFromDatabase,
fetch: fetch,
fetchAll: fetchAll,
update: update,
}

View File

@ -2,6 +2,7 @@ const fs = require('fs');
const path = require('path');
const requestUtils = require('../../../utils/requestUtils');
const sleepUtils = require('../../../utils/sleepUtils');
const dbUtils = global.dbUtils;
@ -20,6 +21,26 @@ async function getFromDatabase({ artistId }) {
return artistInfo;
}
// 从数据库中查出还缺少的歌手,并进行爬取
async function fetchAll() {
console.log("start fetching artists ...")
var artistIds = await dbUtils.query(`
SELECT DISTINCT artist_id FROM song_artist_relation WHERE artist_id NOT IN ( SELECT DISTINCT artist_id FROM artist )
`, []);
artistIds = artistIds.map(item => item.artist_id);
for (let i = 0; i < artistIds.length; i++) {
await global.checkIsExit();
const artistId = artistIds[i];
console.log(`${i}/${artistIds.length} | artist: ${artistId} | ${await global.statistics()}`);
try {
await fetch({ artistId: artistId });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(global.sleepTime);
}
}
// 获取音乐人详情
async function fetch({ artistId, debug = false }) {
let result = await dbUtils.query('SELECT count(*) as count FROM artist WHERE artist_id = ?', [artistId]);
@ -85,4 +106,5 @@ async function fetch({ artistId, debug = false }) {
module.exports = {
getFromDatabase: getFromDatabase,
fetch: fetch,
fetchAll: fetchAll,
}

View File

@ -2,11 +2,32 @@ const fs = require('fs');
const path = require('path');
const requestUtils = require('../../../utils/requestUtils');
const sleepUtils = require('../../../utils/sleepUtils');
const dbUtils = global.dbUtils;
// 从数据库中查出还缺少的歌词,并进行爬取
async function fetchAll() {
console.log("start fetching lyrics ...");
var songIds = await dbUtils.query(`
SELECT DISTINCT song_id FROM song WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM lyric )
`, []);
songIds = songIds.map(song => song.song_id);
for (let i = 0; i < songIds.length; i++) {
await global.checkIsExit();
const songId = songIds[i];
console.log(`${i}/${songIds.length} | lyric: ${songId} | ${await global.statistics()}`);
try {
await fetch({ songId: songId });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(global.sleepTime);
}
}
// 获取歌词详情
async function fetch({ songId }) {
async function fetch({ songId, debug = false }) {
var url = `https://music.163.com/api/song/lyric?id=${songId}&lv=1`;
try {
@ -41,4 +62,5 @@ async function fetch({ songId }) {
module.exports = {
fetch: fetch,
fetchAll: fetchAll,
}

View File

@ -2,6 +2,7 @@ const fs = require('fs');
const path = require('path');
const requestUtils = require('../../../utils/requestUtils');
const sleepUtils = require('../../../utils/sleepUtils');
const dbUtils = global.dbUtils;
@ -22,6 +23,28 @@ async function getFromDatabase({ songId }) {
return songInfo;
}
// 从数据库中查出还缺少的歌曲,并进行爬取
async function fetchAll() {
console.log("start fetching songs ...");
var songIds = await dbUtils.query(`
SELECT DISTINCT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song )
UNION
SELECT DISTINCT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song )
`, []);
songIds = songIds.map(item => item.song_id);
for (let i = 0; i < songIds.length; i++) {
await global.checkIsExit();
const songId = songIds[i];
console.log(`${i}/${songIds.length} | song: ${songId} | ${await global.statistics()}`);
try {
await fetch({ songId: songId });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(global.sleepTime);
}
}
// 获取音乐详情
async function fetch({ songId, debug = false }) {
let result = await dbUtils.query('SELECT count(*) as count FROM song WHERE song_id = ?', [songId]);
@ -119,4 +142,5 @@ async function fetch({ songId, debug = false }) {
module.exports = {
getFromDatabase: getFromDatabase,
fetch: fetch,
fetchAll: fetchAll,
}