update
This commit is contained in:
parent
3e1ef431a7
commit
2ff2758fc8
9
.vscode/launch.json
vendored
9
.vscode/launch.json
vendored
@ -13,6 +13,15 @@
|
||||
],
|
||||
"program": "${workspaceFolder}\\index.js"
|
||||
},
|
||||
{
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
"name": "node update",
|
||||
"skipFiles": [
|
||||
"<node_internals>/**"
|
||||
],
|
||||
"program": "${workspaceFolder}\\update.js"
|
||||
},
|
||||
{
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
|
@ -44,8 +44,7 @@ async function test() {
|
||||
* 主函数
|
||||
*/
|
||||
async function main() {
|
||||
console.log("neteaseMusic Starting...");
|
||||
|
||||
console.log("neteaseMusic Start fetch ...");
|
||||
while (true) {
|
||||
// 删除脏数据
|
||||
var affectRows1 = await dbUtils.query(`DELETE FROM song_artist_relation WHERE song_id = 0 OR artist_id = 0`, []);
|
||||
@ -64,35 +63,17 @@ async function main() {
|
||||
* 数据更新 (重新爬取)
|
||||
*/
|
||||
async function update() {
|
||||
console.log("neteaseMusic update ...");
|
||||
|
||||
let sleepTime = 100;
|
||||
|
||||
// 从数据库中查出现有专辑,并进行更新
|
||||
console.log("start fetching albums ...")
|
||||
let albumIds = await dbUtils.query(`
|
||||
SELECT DISTINCT album_id FROM album WHERE version = 1 -- and description like '%专辑《%》,简介:%'
|
||||
`, []);
|
||||
albumIds = albumIds.map(item => item.album_id);
|
||||
for (let i = 0; i < albumIds.length; i++) {
|
||||
await checkIsExit();
|
||||
const albumId = albumIds[i];
|
||||
console.log(`${i}/${albumIds.length} | album: ${albumId} | ${await statistics()}`);
|
||||
try {
|
||||
await albumInfoUtils.update({ albumId: albumId });
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
}
|
||||
await sleepUtils.sleep(sleepTime);
|
||||
console.log("neteaseMusic Start update ...");
|
||||
while (true) {
|
||||
await albumInfoUtils.fetchAll({ isUpdate: true });
|
||||
await sleepUtils.sleep(2000);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 统计数据库中数据
|
||||
*/
|
||||
updateStatisticsResult();
|
||||
setInterval(updateStatisticsResult, 1000);
|
||||
async function updateStatisticsResult() {
|
||||
async function watch() {
|
||||
let sql = `
|
||||
SELECT
|
||||
song_count,
|
||||
@ -142,7 +123,7 @@ async function updateStatisticsResult() {
|
||||
|
||||
let songAlbumCount = result[0].song_album_count;
|
||||
let songArtistCount = result[0].song_artist_count;
|
||||
global.statistics = [
|
||||
let statisticsString = [
|
||||
`song: ${songCount}/${songCount + songWaiting}`,
|
||||
`album: ${albumCount}/${albumCount + albumWaiting}`,
|
||||
`artist: ${artistCount}/${artistCount + artistWaiting}`,
|
||||
@ -150,6 +131,7 @@ async function updateStatisticsResult() {
|
||||
`songAlbum: ${songAlbumCount}`,
|
||||
`songArtist: ${songArtistCount}`
|
||||
].join(', ');
|
||||
console.log(statisticsString);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -170,5 +152,6 @@ global.checkIsExit = async function () {
|
||||
module.exports = {
|
||||
main: main,
|
||||
update: update,
|
||||
watch: watch,
|
||||
test: test,
|
||||
}
|
@ -21,19 +21,23 @@ async function getFromDatabase({ albumId }) {
|
||||
return albumInfo;
|
||||
}
|
||||
|
||||
// 从数据库中查出还缺少的专辑,并进行爬取
|
||||
async function fetchAll() {
|
||||
// 正常应该查不出记录才对
|
||||
/*
|
||||
SELECT * FROM album WHERE (full_description = '' or full_description is null) and description like '%专辑《%》,简介:%' and description not regexp '^.*?专辑《.*?》,简介:[:space:]*?。,更多.*$'
|
||||
*/
|
||||
|
||||
async function fetchAll({ isUpdate = false }) {
|
||||
console.log("start fetching albums ...")
|
||||
var albumIds = await dbUtils.query(`
|
||||
SELECT DISTINCT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT DISTINCT album_id FROM album )
|
||||
`, []);
|
||||
var albumIds = await dbUtils.query(isUpdate
|
||||
? `SELECT DISTINCT album_id FROM album WHERE (full_description = '' or full_description is null) and description like '%专辑《%》,简介:%' and description not regexp '^.*?专辑《.*?》,简介:[:space:]*?。,更多.*$'`
|
||||
: `SELECT DISTINCT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT DISTINCT album_id FROM album )`, []);
|
||||
albumIds = albumIds.map(item => item.album_id);
|
||||
for (let i = 0; i < albumIds.length; i++) {
|
||||
await global.checkIsExit();
|
||||
const albumId = albumIds[i];
|
||||
console.log(`${i}/${albumIds.length} | album: ${albumId} | ${global.statistics}`);
|
||||
console.log(`${i}/${albumIds.length} | album: ${albumId}`);
|
||||
try {
|
||||
await fetch({ albumId: albumId });
|
||||
await fetch({ albumId: albumId, update: isUpdate });
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
}
|
||||
@ -42,15 +46,17 @@ async function fetchAll() {
|
||||
}
|
||||
|
||||
// 获取专辑详情
|
||||
async function fetch({ albumId, debug = false }) {
|
||||
async function fetch({ albumId, debug = false, update = false }) {
|
||||
let result = await dbUtils.query('SELECT count(*) as count FROM album WHERE album_id = ?', [albumId]);
|
||||
if (result[0].count > 0 && !debug) {
|
||||
if (!debug && !update && result[0].count > 0) {
|
||||
console.log(`数据库中已有数据,跳过 albumId: ${albumId}`);
|
||||
return;
|
||||
} else if (update && result[0].count == 0) {
|
||||
console.log(`数据库中沒有数据,跳过 albumId: ${albumId}`);
|
||||
return;
|
||||
}
|
||||
|
||||
let url = `https://music.163.com/album?id=${albumId}`;
|
||||
|
||||
try {
|
||||
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `album-${albumId}.html`), 'utf8');
|
||||
var html = await requestUtils.getApiResult(url);
|
||||
@ -117,13 +123,13 @@ async function fetch({ albumId, debug = false }) {
|
||||
title: albumInfoDict.title,
|
||||
image: image,
|
||||
description: albumInfoDict.description,
|
||||
full_description: fullDescription,
|
||||
fullDescription: fullDescription,
|
||||
pubDate: albumInfoDict.pubDate,
|
||||
company: company,
|
||||
songIds: songIds,
|
||||
};
|
||||
// console.log("albumInfo", albumInfo);
|
||||
dbUtils.query('INSERT IGNORE INTO album SET ?', {
|
||||
dbUtils.query(update ? `UPDATE album SET ? WHERE album_id = ${albumId}` : 'INSERT IGNORE INTO album SET ?', {
|
||||
album_id: albumInfo.albumId,
|
||||
title: albumInfo.title,
|
||||
description: albumInfo.description,
|
||||
@ -144,69 +150,8 @@ async function fetch({ albumId, debug = false }) {
|
||||
return albumInfo;
|
||||
}
|
||||
|
||||
/*
|
||||
v1 to v3
|
||||
|
||||
升级v3完毕后应该查不出记录才对
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
album
|
||||
WHERE
|
||||
full_description is null and description like '%专辑《%》,简介:%'
|
||||
*/
|
||||
async function update({ albumId }) {
|
||||
let result = await dbUtils.query('SELECT count(*) as count FROM album WHERE album_id = ?', [albumId]);
|
||||
if (result[0].count == 0) {
|
||||
console.log(`数据库中没有数据,跳过 albumId: ${albumId}`);
|
||||
return;
|
||||
}
|
||||
|
||||
let url = `https://music.163.com/album?id=${albumId}`;
|
||||
|
||||
try {
|
||||
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `album-${albumId}.html`), 'utf8');
|
||||
var html = await requestUtils.getApiResult(url);
|
||||
// fs.writeFileSync(path.join(__dirname, "../../temp", `album-${albumId}.html`), html);
|
||||
} catch (errors) {
|
||||
console.error(errors);
|
||||
return;
|
||||
}
|
||||
|
||||
if (html.includes(`<p class="note s-fc3">很抱歉,你要查找的网页找不到</p>`)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// 专辑详细简介
|
||||
let fullDescription = null;
|
||||
if (html.includes(`<div id="album-desc-more" class="f-hide">`)) {
|
||||
try {
|
||||
fullDescription = /<div id="album-desc-more" class="f-hide">([\S\s]*?)<\/div>/.exec(html)[1];
|
||||
fullDescription = fullDescription.replace(/<p class="f-brk">\n/g, '').replace(/<\/p>\n/g, '').trim();
|
||||
} catch (e) {
|
||||
// 解析出错
|
||||
await dbUtils.query('INSERT INTO log (`id`, `name`, `msg`) VALUES (?, ?, ?)', [albumId, 'album_fetch', `fullDescription 3 正则失败\n${e.message}`]);
|
||||
return;
|
||||
}
|
||||
} else if (html.includes(`<div id="album-desc-dot" class="f-brk">`)) {
|
||||
try {
|
||||
fullDescription = /<div id="album-desc-dot" class="f-brk">([\S\s]*?)<\/div>/.exec(html)[1];
|
||||
fullDescription = fullDescription.replace(/<p>/g, '').replace(/<\/p>/g, '').trim();
|
||||
} catch (e) {
|
||||
// 解析出错
|
||||
await dbUtils.query('INSERT INTO log (`id`, `name`, `msg`) VALUES (?, ?, ?)', [albumId, 'album_fetch', `fullDescription 4 正则失败\n${e.message}`]);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
await dbUtils.query('UPDATE album SET full_description = ?, version = 3 WHERE album_id = ?', [fullDescription, albumId]);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
module.exports = {
|
||||
getFromDatabase: getFromDatabase,
|
||||
fetch: fetch,
|
||||
fetchAll: fetchAll,
|
||||
update: update,
|
||||
}
|
@ -31,7 +31,7 @@ async function fetchAll() {
|
||||
for (let i = 0; i < artistIds.length; i++) {
|
||||
await global.checkIsExit();
|
||||
const artistId = artistIds[i];
|
||||
console.log(`${i}/${artistIds.length} | artist: ${artistId} | ${global.statistics}`);
|
||||
console.log(`${i}/${artistIds.length} | artist: ${artistId}`);
|
||||
try {
|
||||
await fetch({ artistId: artistId });
|
||||
} catch (err) {
|
||||
@ -50,7 +50,6 @@ async function fetch({ artistId, debug = false }) {
|
||||
}
|
||||
|
||||
let url = `https://music.163.com/artist?id=${artistId}`;
|
||||
|
||||
try {
|
||||
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `artist-${artistId}.html`), 'utf8');
|
||||
var html = await requestUtils.getApiResult(url);
|
||||
|
@ -16,7 +16,7 @@ async function fetchAll() {
|
||||
for (let i = 0; i < songIds.length; i++) {
|
||||
await global.checkIsExit();
|
||||
const songId = songIds[i];
|
||||
console.log(`${i}/${songIds.length} | lyric: ${songId} | ${global.statistics}`);
|
||||
console.log(`${i}/${songIds.length} | lyric: ${songId}`);
|
||||
try {
|
||||
await fetch({ songId: songId });
|
||||
} catch (err) {
|
||||
@ -29,7 +29,6 @@ async function fetchAll() {
|
||||
// 获取歌词详情
|
||||
async function fetch({ songId, debug = false }) {
|
||||
var url = `https://music.163.com/api/song/lyric?id=${songId}&lv=1`;
|
||||
|
||||
try {
|
||||
// var json = fs.readFileSync(path.join(__dirname, "../../temp", `lyric-${songId}.json`), 'utf8');
|
||||
var json = await requestUtils.getApiResult(url);
|
||||
|
@ -35,7 +35,7 @@ async function fetchAll() {
|
||||
for (let i = 0; i < songIds.length; i++) {
|
||||
await global.checkIsExit();
|
||||
const songId = songIds[i];
|
||||
console.log(`${i}/${songIds.length} | song: ${songId} | ${global.statistics}`);
|
||||
console.log(`${i}/${songIds.length} | song: ${songId}`);
|
||||
try {
|
||||
await fetch({ songId: songId });
|
||||
} catch (err) {
|
||||
@ -51,24 +51,9 @@ async function fetch({ songId, debug = false }) {
|
||||
if (result[0].count > 0 && !debug) {
|
||||
console.log(`数据库中已有数据,跳过 songId: ${songId}`);
|
||||
return;
|
||||
|
||||
// let songResult = await dbUtils.query('SELECT * FROM song WHERE song_id = ?', [songId]);
|
||||
// songResult = JSON.parse(JSON.stringify(songResult));
|
||||
|
||||
// let songArtistResult = await dbUtils.query('SELECT * FROM song_artist_relation WHERE song_id = ?', [songId]);
|
||||
// songArtistResult = JSON.parse(JSON.stringify(songArtistResult));
|
||||
// songResult.artistIds = songArtistResult.map(song => song.artist_id);
|
||||
|
||||
// let songAlbumResult = await dbUtils.query('SELECT * FROM song_album_relation WHERE song_id = ?', [songId]);
|
||||
// songAlbumResult = JSON.parse(JSON.stringify(songAlbumResult));
|
||||
// songResult.albumId = songAlbumResult.map(song => song.album_id)[0];
|
||||
|
||||
// // console.log(songResult);
|
||||
// return songResult;
|
||||
}
|
||||
|
||||
let url = `https://music.163.com/song?id=${songId}`;
|
||||
|
||||
try {
|
||||
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `song-${songId}.html`), 'utf8');
|
||||
var html = await requestUtils.getApiResult(url);
|
||||
|
Loading…
Reference in New Issue
Block a user