update
This commit is contained in:
parent
ba395bac47
commit
c98d453e14
@ -14,6 +14,7 @@ global.dbUtils = dbUtils;
|
||||
const songInfoUtils = require('./src/getInfo/songInfoUtils');
|
||||
const artistInfoUtils = require('./src/getInfo/artistInfoUtils');
|
||||
const albumInfoUtils = require('./src/getInfo/albumInfoUtils');
|
||||
const lyricInfoUtils = require('./src/getInfo/lyricInfoUtils');
|
||||
|
||||
console.log("global.useMysqlPool:", !!global.useMysqlPool);
|
||||
|
||||
@ -30,27 +31,10 @@ async function checkIsExit() {
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
// 测试
|
||||
async function test() {
|
||||
console.log("neteaseMusic test...");
|
||||
|
||||
let songIds = (await dbUtils.query(`
|
||||
SELECT DISTINCT song_id FROM song_lyric WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song )
|
||||
`, [])).map(song => song.song_id);
|
||||
|
||||
for (let songId of songIds) {
|
||||
var url = `https://music.163.com/api/song/lyric?id=${songId}&lv=1`;
|
||||
var json = await requestUtils.getApiResult(url);
|
||||
var lyric = JSON.parse(json).lrc;
|
||||
console.log(lyric);
|
||||
}
|
||||
// fs.writeFileSync(path.join(__dirname, "../../temp", `album-${albumId}.html`), html);
|
||||
}
|
||||
|
||||
|
||||
async function main() {
|
||||
console.log("neteaseMusic Starting...");
|
||||
console.log(`数据统计: ${await statistics()}`);
|
||||
|
||||
// getMusicInfo({ songId: "1855221507" });
|
||||
// getArtistInfo({ artistId: "1079074" });
|
||||
// getAlbumInfo({ albumId: "74268047" });
|
||||
@ -58,21 +42,51 @@ async function main() {
|
||||
// 不是所有歌手都有个人主页 例如 https://music.163.com/#/artist?id=1079075
|
||||
// getUserInfo({ userId: "37365202" });
|
||||
|
||||
// let res = await albumInfoUtils.getFromDatabase({ albumId: "34943450" });
|
||||
// let res = await artistInfoUtils.getFromDatabase({ artistId: "12023508" });
|
||||
// let res = await songInfoUtils.getFromDatabase({ songId: "437608327" });
|
||||
// console.log(res);
|
||||
}
|
||||
|
||||
|
||||
async function main() {
|
||||
console.log("neteaseMusic Starting...");
|
||||
console.log(`数据统计: ${await statistics()}`);
|
||||
|
||||
while (true) {
|
||||
// 删除脏数据
|
||||
var affectRows1 = await dbUtils.query(`DELETE FROM song_artist_relation WHERE song_id = 0 OR artist_id = 0`, []);
|
||||
var affectRows2 = await dbUtils.query(`DELETE FROM song_album_relation WHERE song_id = 0 OR album_id = 0`, []);
|
||||
console.log(`删除脏数据 affectRows:`, affectRows1.affectedRows, affectRows2.affectedRows);
|
||||
|
||||
await startGet(1);
|
||||
await startGet(100);
|
||||
await sleepUtils.sleep(2000);
|
||||
}
|
||||
}
|
||||
|
||||
async function startGet(sleepTime) {
|
||||
|
||||
// 从数据库中查出还缺少的歌词,并进行爬取
|
||||
console.log("start fetching lyrics ...");
|
||||
var songIds = await dbUtils.query(`
|
||||
SELECT DISTINCT song_id FROM song WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM lyric )
|
||||
`, []);
|
||||
songIds = songIds.map(song => song.song_id);
|
||||
for (let i = 0; i < songIds.length; i++) {
|
||||
await checkIsExit();
|
||||
const songId = songIds[i];
|
||||
console.log(`${i}/${songIds.length} | lyric: ${songId} | ${await statistics()}`);
|
||||
try {
|
||||
await lyricInfoUtils.fetch({ songId: songId });
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
}
|
||||
await sleepUtils.sleep(sleepTime);
|
||||
}
|
||||
|
||||
// 从数据库中查出还缺少的歌曲,并进行爬取
|
||||
console.log("start fetching songs ...");
|
||||
let songIds = await dbUtils.query(`
|
||||
var songIds = await dbUtils.query(`
|
||||
SELECT DISTINCT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song )
|
||||
UNION
|
||||
SELECT DISTINCT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song )
|
||||
@ -92,7 +106,7 @@ async function startGet(sleepTime) {
|
||||
|
||||
// 从数据库中查出还缺少的专辑,并进行爬取
|
||||
console.log("start fetching albums ...")
|
||||
let albumIds = await dbUtils.query(`
|
||||
var albumIds = await dbUtils.query(`
|
||||
SELECT DISTINCT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT DISTINCT album_id FROM album )
|
||||
`, []);
|
||||
albumIds = albumIds.map(item => item.album_id);
|
||||
@ -110,7 +124,7 @@ async function startGet(sleepTime) {
|
||||
|
||||
// 从数据库中查出还缺少的歌手,并进行爬取
|
||||
console.log("start fetching artists ...")
|
||||
let artistIds = await dbUtils.query(`
|
||||
var artistIds = await dbUtils.query(`
|
||||
SELECT DISTINCT artist_id FROM song_artist_relation WHERE artist_id NOT IN ( SELECT DISTINCT artist_id FROM artist )
|
||||
`, []);
|
||||
artistIds = artistIds.map(item => item.artist_id);
|
||||
@ -156,26 +170,34 @@ async function statistics() {
|
||||
let sql = `
|
||||
SELECT
|
||||
song_count,
|
||||
song_waiting_1 + song_waiting_2 as song_waiting,
|
||||
album_count,
|
||||
album_v1_count,
|
||||
artist_count,
|
||||
song_album_count,
|
||||
song_artist_count
|
||||
FROM
|
||||
( SELECT count(*) AS song_count FROM song ) t1,
|
||||
( SELECT count(*) AS album_count FROM album ) t2,
|
||||
( SELECT count(*) AS album_v1_count FROM album WHERE version = 1 ) t3_1,
|
||||
( SELECT count(*) AS artist_count FROM artist ) t3,
|
||||
( SELECT count(*) AS song_album_count FROM song_album_relation ) t4,
|
||||
( SELECT count(*) AS song_artist_count FROM song_artist_relation ) t5`;
|
||||
( SELECT count(*) AS song_count FROM song ) t_song,
|
||||
( SELECT count(*) AS album_count FROM album ) t_album,
|
||||
( SELECT count(*) AS artist_count FROM artist ) t_artist,
|
||||
( SELECT count(*) AS song_album_count FROM song_album_relation ) t_song_album,
|
||||
( SELECT count(*) AS song_artist_count FROM song_artist_relation ) t_song_artist,
|
||||
( SELECT count( DISTINCT song_id ) as song_waiting_1 FROM song_artist_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) ) t_song_waiting_song_artist,
|
||||
( SELECT count( DISTINCT song_id ) as song_waiting_2 FROM song_album_relation WHERE song_id NOT IN ( SELECT DISTINCT song_id FROM song ) ) t_song_waiting_song_album
|
||||
`;
|
||||
let result = await dbUtils.query(sql, []);
|
||||
let songCount = result[0].song_count;
|
||||
let songWaiting = result[0].song_waiting;
|
||||
let albumCount = result[0].album_count;
|
||||
let albumV1Count = result[0].album_v1_count;
|
||||
let artistCount = result[0].artist_count;
|
||||
let songAlbumCount = result[0].song_album_count;
|
||||
let songArtistCount = result[0].song_artist_count;
|
||||
return `song: ${songCount}, album: ${albumCount}(v1: ${albumV1Count}), artist: ${artistCount} | songAlbum: ${songAlbumCount}, songArtist: ${songArtistCount}`;
|
||||
return [
|
||||
`song: ${songCount}/${songCount + songWaiting}`,
|
||||
`album: ${albumCount}`,
|
||||
`artist: ${artistCount}`,
|
||||
`songAlbum: ${songAlbumCount}`,
|
||||
`songArtist: ${songArtistCount}`
|
||||
].join(', ');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
@ -40,7 +40,7 @@ CREATE TABLE `song_album_relation` (
|
||||
`album_id` int(10) unsigned NOT NULL COMMENT '专辑id',
|
||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||
PRIMARY KEY `song_id` (`song_id`,`album_id`)
|
||||
PRIMARY KEY (`song_id`,`album_id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||
|
||||
CREATE TABLE `song_artist_relation` (
|
||||
@ -53,11 +53,11 @@ CREATE TABLE `song_artist_relation` (
|
||||
|
||||
CREATE TABLE `lyric` (
|
||||
`song_id` int(10) unsigned NOT NULL COMMENT '歌曲id',
|
||||
`lyric` text NOT NULL COMMENT '歌词',
|
||||
`version` int(10) unsigned NOT NULL COMMENT '版本号',
|
||||
`lyric` text NOT NULL COMMENT '歌词',
|
||||
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||
PRIMARY KEY (`song_id`)
|
||||
PRIMARY KEY (`song_id`,`version`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||
|
||||
CREATE TABLE `log` (
|
||||
|
44
netease_music/src/getInfo/lyricInfoUtils.js
Normal file
44
netease_music/src/getInfo/lyricInfoUtils.js
Normal file
@ -0,0 +1,44 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const requestUtils = require('../../../utils/requestUtils');
|
||||
|
||||
const dbUtils = global.dbUtils;
|
||||
|
||||
// 获取歌词详情
|
||||
async function fetch({ songId }) {
|
||||
var url = `https://music.163.com/api/song/lyric?id=${songId}&lv=1`;
|
||||
|
||||
try {
|
||||
// var json = fs.readFileSync(path.join(__dirname, "../../temp", `lyric-${songId}.json`), 'utf8');
|
||||
var json = await requestUtils.getApiResult(url);
|
||||
// fs.writeFileSync(path.join(__dirname, "../../temp", `lyric-${songId}.json`), json);
|
||||
} catch (errors) {
|
||||
console.error(errors);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
var lyric = JSON.parse(json).lrc; // { version: xx, lyric: 'xxx' }
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
return;
|
||||
}
|
||||
|
||||
let lyricInfo = {
|
||||
songId: songId,
|
||||
lyric: lyric.lyric,
|
||||
version: lyric.version,
|
||||
};
|
||||
// console.log("lyricInfo", lyricInfo);
|
||||
dbUtils.query('INSERT IGNORE INTO lyric SET ?', {
|
||||
song_id: lyricInfo.songId,
|
||||
lyric: lyricInfo.lyric,
|
||||
version: lyricInfo.version,
|
||||
});
|
||||
return lyricInfo;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetch: fetch,
|
||||
}
|
Loading…
Reference in New Issue
Block a user