1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee
This commit is contained in:
程序员小墨 2022-10-02 01:20:48 +08:00
parent 3e1ef431a7
commit 2ff2758fc8
7 changed files with 43 additions and 119 deletions

9
.vscode/launch.json vendored
View File

@ -13,6 +13,15 @@
], ],
"program": "${workspaceFolder}\\index.js" "program": "${workspaceFolder}\\index.js"
}, },
{
"type": "node",
"request": "launch",
"name": "node update",
"skipFiles": [
"<node_internals>/**"
],
"program": "${workspaceFolder}\\update.js"
},
{ {
"type": "node", "type": "node",
"request": "launch", "request": "launch",

View File

@ -44,8 +44,7 @@ async function test() {
* 主函数 * 主函数
*/ */
async function main() { async function main() {
console.log("neteaseMusic Starting..."); console.log("neteaseMusic Start fetch ...");
while (true) { while (true) {
// 删除脏数据 // 删除脏数据
var affectRows1 = await dbUtils.query(`DELETE FROM song_artist_relation WHERE song_id = 0 OR artist_id = 0`, []); var affectRows1 = await dbUtils.query(`DELETE FROM song_artist_relation WHERE song_id = 0 OR artist_id = 0`, []);
@ -64,35 +63,17 @@ async function main() {
* 数据更新 (重新爬取) * 数据更新 (重新爬取)
*/ */
async function update() { async function update() {
console.log("neteaseMusic update ..."); console.log("neteaseMusic Start update ...");
while (true) {
let sleepTime = 100; await albumInfoUtils.fetchAll({ isUpdate: true });
await sleepUtils.sleep(2000);
// 从数据库中查出现有专辑,并进行更新
console.log("start fetching albums ...")
let albumIds = await dbUtils.query(`
SELECT DISTINCT album_id FROM album WHERE version = 1 -- and description like '%专辑《%》,简介:%'
`, []);
albumIds = albumIds.map(item => item.album_id);
for (let i = 0; i < albumIds.length; i++) {
await checkIsExit();
const albumId = albumIds[i];
console.log(`${i}/${albumIds.length} | album: ${albumId} | ${await statistics()}`);
try {
await albumInfoUtils.update({ albumId: albumId });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(sleepTime);
} }
} }
/** /**
* 统计数据库中数据 * 统计数据库中数据
*/ */
updateStatisticsResult(); async function watch() {
setInterval(updateStatisticsResult, 1000);
async function updateStatisticsResult() {
let sql = ` let sql = `
SELECT SELECT
song_count, song_count,
@ -142,7 +123,7 @@ async function updateStatisticsResult() {
let songAlbumCount = result[0].song_album_count; let songAlbumCount = result[0].song_album_count;
let songArtistCount = result[0].song_artist_count; let songArtistCount = result[0].song_artist_count;
global.statistics = [ let statisticsString = [
`song: ${songCount}/${songCount + songWaiting}`, `song: ${songCount}/${songCount + songWaiting}`,
`album: ${albumCount}/${albumCount + albumWaiting}`, `album: ${albumCount}/${albumCount + albumWaiting}`,
`artist: ${artistCount}/${artistCount + artistWaiting}`, `artist: ${artistCount}/${artistCount + artistWaiting}`,
@ -150,6 +131,7 @@ async function updateStatisticsResult() {
`songAlbum: ${songAlbumCount}`, `songAlbum: ${songAlbumCount}`,
`songArtist: ${songArtistCount}` `songArtist: ${songArtistCount}`
].join(', '); ].join(', ');
console.log(statisticsString);
} }
/** /**
@ -170,5 +152,6 @@ global.checkIsExit = async function () {
module.exports = { module.exports = {
main: main, main: main,
update: update, update: update,
watch: watch,
test: test, test: test,
} }

View File

@ -21,19 +21,23 @@ async function getFromDatabase({ albumId }) {
return albumInfo; return albumInfo;
} }
// 从数据库中查出还缺少的专辑,并进行爬取 // 正常应该查不出记录才对
async function fetchAll() { /*
SELECT * FROM album WHERE (full_description = '' or full_description is null) and description like '%专辑《%》,简介:%' and description not regexp '^.*?专辑《.*?》,简介:[:space:]*?。,更多.*$'
*/
async function fetchAll({ isUpdate = false }) {
console.log("start fetching albums ...") console.log("start fetching albums ...")
var albumIds = await dbUtils.query(` var albumIds = await dbUtils.query(isUpdate
SELECT DISTINCT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT DISTINCT album_id FROM album ) ? `SELECT DISTINCT album_id FROM album WHERE (full_description = '' or full_description is null) and description like '%专辑《%》,简介:%' and description not regexp '^.*?专辑《.*?》,简介:[:space:]*?。,更多.*$'`
`, []); : `SELECT DISTINCT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT DISTINCT album_id FROM album )`, []);
albumIds = albumIds.map(item => item.album_id); albumIds = albumIds.map(item => item.album_id);
for (let i = 0; i < albumIds.length; i++) { for (let i = 0; i < albumIds.length; i++) {
await global.checkIsExit(); await global.checkIsExit();
const albumId = albumIds[i]; const albumId = albumIds[i];
console.log(`${i}/${albumIds.length} | album: ${albumId} | ${global.statistics}`); console.log(`${i}/${albumIds.length} | album: ${albumId}`);
try { try {
await fetch({ albumId: albumId }); await fetch({ albumId: albumId, update: isUpdate });
} catch (err) { } catch (err) {
console.error(err); console.error(err);
} }
@ -42,15 +46,17 @@ async function fetchAll() {
} }
// 获取专辑详情 // 获取专辑详情
async function fetch({ albumId, debug = false }) { async function fetch({ albumId, debug = false, update = false }) {
let result = await dbUtils.query('SELECT count(*) as count FROM album WHERE album_id = ?', [albumId]); let result = await dbUtils.query('SELECT count(*) as count FROM album WHERE album_id = ?', [albumId]);
if (result[0].count > 0 && !debug) { if (!debug && !update && result[0].count > 0) {
console.log(`数据库中已有数据,跳过 albumId: ${albumId}`); console.log(`数据库中已有数据,跳过 albumId: ${albumId}`);
return; return;
} else if (update && result[0].count == 0) {
console.log(`数据库中沒有数据,跳过 albumId: ${albumId}`);
return;
} }
let url = `https://music.163.com/album?id=${albumId}`; let url = `https://music.163.com/album?id=${albumId}`;
try { try {
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `album-${albumId}.html`), 'utf8'); // var html = fs.readFileSync(path.join(__dirname, "../../temp", `album-${albumId}.html`), 'utf8');
var html = await requestUtils.getApiResult(url); var html = await requestUtils.getApiResult(url);
@ -117,13 +123,13 @@ async function fetch({ albumId, debug = false }) {
title: albumInfoDict.title, title: albumInfoDict.title,
image: image, image: image,
description: albumInfoDict.description, description: albumInfoDict.description,
full_description: fullDescription, fullDescription: fullDescription,
pubDate: albumInfoDict.pubDate, pubDate: albumInfoDict.pubDate,
company: company, company: company,
songIds: songIds, songIds: songIds,
}; };
// console.log("albumInfo", albumInfo); // console.log("albumInfo", albumInfo);
dbUtils.query('INSERT IGNORE INTO album SET ?', { dbUtils.query(update ? `UPDATE album SET ? WHERE album_id = ${albumId}` : 'INSERT IGNORE INTO album SET ?', {
album_id: albumInfo.albumId, album_id: albumInfo.albumId,
title: albumInfo.title, title: albumInfo.title,
description: albumInfo.description, description: albumInfo.description,
@ -144,69 +150,8 @@ async function fetch({ albumId, debug = false }) {
return albumInfo; return albumInfo;
} }
/*
v1 to v3
升级v3完毕后应该查不出记录才对
SELECT
*
FROM
album
WHERE
full_description is null and description like '%专辑《%》,简介:%'
*/
async function update({ albumId }) {
let result = await dbUtils.query('SELECT count(*) as count FROM album WHERE album_id = ?', [albumId]);
if (result[0].count == 0) {
console.log(`数据库中没有数据,跳过 albumId: ${albumId}`);
return;
}
let url = `https://music.163.com/album?id=${albumId}`;
try {
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `album-${albumId}.html`), 'utf8');
var html = await requestUtils.getApiResult(url);
// fs.writeFileSync(path.join(__dirname, "../../temp", `album-${albumId}.html`), html);
} catch (errors) {
console.error(errors);
return;
}
if (html.includes(`<p class="note s-fc3">很抱歉,你要查找的网页找不到</p>`)) {
return;
}
// 专辑详细简介
let fullDescription = null;
if (html.includes(`<div id="album-desc-more" class="f-hide">`)) {
try {
fullDescription = /<div id="album-desc-more" class="f-hide">([\S\s]*?)<\/div>/.exec(html)[1];
fullDescription = fullDescription.replace(/<p class="f-brk">\n/g, '').replace(/<\/p>\n/g, '').trim();
} catch (e) {
// 解析出错
await dbUtils.query('INSERT INTO log (`id`, `name`, `msg`) VALUES (?, ?, ?)', [albumId, 'album_fetch', `fullDescription 3 正则失败\n${e.message}`]);
return;
}
} else if (html.includes(`<div id="album-desc-dot" class="f-brk">`)) {
try {
fullDescription = /<div id="album-desc-dot" class="f-brk">([\S\s]*?)<\/div>/.exec(html)[1];
fullDescription = fullDescription.replace(/<p>/g, '').replace(/<\/p>/g, '').trim();
} catch (e) {
// 解析出错
await dbUtils.query('INSERT INTO log (`id`, `name`, `msg`) VALUES (?, ?, ?)', [albumId, 'album_fetch', `fullDescription 4 正则失败\n${e.message}`]);
return;
}
}
await dbUtils.query('UPDATE album SET full_description = ?, version = 3 WHERE album_id = ?', [fullDescription, albumId]);
return;
}
module.exports = { module.exports = {
getFromDatabase: getFromDatabase, getFromDatabase: getFromDatabase,
fetch: fetch, fetch: fetch,
fetchAll: fetchAll, fetchAll: fetchAll,
update: update,
} }

View File

@ -31,7 +31,7 @@ async function fetchAll() {
for (let i = 0; i < artistIds.length; i++) { for (let i = 0; i < artistIds.length; i++) {
await global.checkIsExit(); await global.checkIsExit();
const artistId = artistIds[i]; const artistId = artistIds[i];
console.log(`${i}/${artistIds.length} | artist: ${artistId} | ${global.statistics}`); console.log(`${i}/${artistIds.length} | artist: ${artistId}`);
try { try {
await fetch({ artistId: artistId }); await fetch({ artistId: artistId });
} catch (err) { } catch (err) {
@ -50,7 +50,6 @@ async function fetch({ artistId, debug = false }) {
} }
let url = `https://music.163.com/artist?id=${artistId}`; let url = `https://music.163.com/artist?id=${artistId}`;
try { try {
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `artist-${artistId}.html`), 'utf8'); // var html = fs.readFileSync(path.join(__dirname, "../../temp", `artist-${artistId}.html`), 'utf8');
var html = await requestUtils.getApiResult(url); var html = await requestUtils.getApiResult(url);

View File

@ -16,7 +16,7 @@ async function fetchAll() {
for (let i = 0; i < songIds.length; i++) { for (let i = 0; i < songIds.length; i++) {
await global.checkIsExit(); await global.checkIsExit();
const songId = songIds[i]; const songId = songIds[i];
console.log(`${i}/${songIds.length} | lyric: ${songId} | ${global.statistics}`); console.log(`${i}/${songIds.length} | lyric: ${songId}`);
try { try {
await fetch({ songId: songId }); await fetch({ songId: songId });
} catch (err) { } catch (err) {
@ -29,7 +29,6 @@ async function fetchAll() {
// 获取歌词详情 // 获取歌词详情
async function fetch({ songId, debug = false }) { async function fetch({ songId, debug = false }) {
var url = `https://music.163.com/api/song/lyric?id=${songId}&lv=1`; var url = `https://music.163.com/api/song/lyric?id=${songId}&lv=1`;
try { try {
// var json = fs.readFileSync(path.join(__dirname, "../../temp", `lyric-${songId}.json`), 'utf8'); // var json = fs.readFileSync(path.join(__dirname, "../../temp", `lyric-${songId}.json`), 'utf8');
var json = await requestUtils.getApiResult(url); var json = await requestUtils.getApiResult(url);

View File

@ -35,7 +35,7 @@ async function fetchAll() {
for (let i = 0; i < songIds.length; i++) { for (let i = 0; i < songIds.length; i++) {
await global.checkIsExit(); await global.checkIsExit();
const songId = songIds[i]; const songId = songIds[i];
console.log(`${i}/${songIds.length} | song: ${songId} | ${global.statistics}`); console.log(`${i}/${songIds.length} | song: ${songId}`);
try { try {
await fetch({ songId: songId }); await fetch({ songId: songId });
} catch (err) { } catch (err) {
@ -51,24 +51,9 @@ async function fetch({ songId, debug = false }) {
if (result[0].count > 0 && !debug) { if (result[0].count > 0 && !debug) {
console.log(`数据库中已有数据,跳过 songId: ${songId}`); console.log(`数据库中已有数据,跳过 songId: ${songId}`);
return; return;
// let songResult = await dbUtils.query('SELECT * FROM song WHERE song_id = ?', [songId]);
// songResult = JSON.parse(JSON.stringify(songResult));
// let songArtistResult = await dbUtils.query('SELECT * FROM song_artist_relation WHERE song_id = ?', [songId]);
// songArtistResult = JSON.parse(JSON.stringify(songArtistResult));
// songResult.artistIds = songArtistResult.map(song => song.artist_id);
// let songAlbumResult = await dbUtils.query('SELECT * FROM song_album_relation WHERE song_id = ?', [songId]);
// songAlbumResult = JSON.parse(JSON.stringify(songAlbumResult));
// songResult.albumId = songAlbumResult.map(song => song.album_id)[0];
// // console.log(songResult);
// return songResult;
} }
let url = `https://music.163.com/song?id=${songId}`; let url = `https://music.163.com/song?id=${songId}`;
try { try {
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `song-${songId}.html`), 'utf8'); // var html = fs.readFileSync(path.join(__dirname, "../../temp", `song-${songId}.html`), 'utf8');
var html = await requestUtils.getApiResult(url); var html = await requestUtils.getApiResult(url);

4
watch.js Normal file
View File

@ -0,0 +1,4 @@
global.useMysqlPool = false;
const neteaseMusic = require('./netease_music/index');
neteaseMusic.watch();
setInterval(neteaseMusic.watch, 5000);