update
This commit is contained in:
parent
3e1ef431a7
commit
2ff2758fc8
9
.vscode/launch.json
vendored
9
.vscode/launch.json
vendored
@ -13,6 +13,15 @@
|
|||||||
],
|
],
|
||||||
"program": "${workspaceFolder}\\index.js"
|
"program": "${workspaceFolder}\\index.js"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"type": "node",
|
||||||
|
"request": "launch",
|
||||||
|
"name": "node update",
|
||||||
|
"skipFiles": [
|
||||||
|
"<node_internals>/**"
|
||||||
|
],
|
||||||
|
"program": "${workspaceFolder}\\update.js"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"type": "node",
|
"type": "node",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
|
@ -44,8 +44,7 @@ async function test() {
|
|||||||
* 主函数
|
* 主函数
|
||||||
*/
|
*/
|
||||||
async function main() {
|
async function main() {
|
||||||
console.log("neteaseMusic Starting...");
|
console.log("neteaseMusic Start fetch ...");
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
// 删除脏数据
|
// 删除脏数据
|
||||||
var affectRows1 = await dbUtils.query(`DELETE FROM song_artist_relation WHERE song_id = 0 OR artist_id = 0`, []);
|
var affectRows1 = await dbUtils.query(`DELETE FROM song_artist_relation WHERE song_id = 0 OR artist_id = 0`, []);
|
||||||
@ -64,35 +63,17 @@ async function main() {
|
|||||||
* 数据更新 (重新爬取)
|
* 数据更新 (重新爬取)
|
||||||
*/
|
*/
|
||||||
async function update() {
|
async function update() {
|
||||||
console.log("neteaseMusic update ...");
|
console.log("neteaseMusic Start update ...");
|
||||||
|
while (true) {
|
||||||
let sleepTime = 100;
|
await albumInfoUtils.fetchAll({ isUpdate: true });
|
||||||
|
await sleepUtils.sleep(2000);
|
||||||
// 从数据库中查出现有专辑,并进行更新
|
|
||||||
console.log("start fetching albums ...")
|
|
||||||
let albumIds = await dbUtils.query(`
|
|
||||||
SELECT DISTINCT album_id FROM album WHERE version = 1 -- and description like '%专辑《%》,简介:%'
|
|
||||||
`, []);
|
|
||||||
albumIds = albumIds.map(item => item.album_id);
|
|
||||||
for (let i = 0; i < albumIds.length; i++) {
|
|
||||||
await checkIsExit();
|
|
||||||
const albumId = albumIds[i];
|
|
||||||
console.log(`${i}/${albumIds.length} | album: ${albumId} | ${await statistics()}`);
|
|
||||||
try {
|
|
||||||
await albumInfoUtils.update({ albumId: albumId });
|
|
||||||
} catch (err) {
|
|
||||||
console.error(err);
|
|
||||||
}
|
|
||||||
await sleepUtils.sleep(sleepTime);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 统计数据库中数据
|
* 统计数据库中数据
|
||||||
*/
|
*/
|
||||||
updateStatisticsResult();
|
async function watch() {
|
||||||
setInterval(updateStatisticsResult, 1000);
|
|
||||||
async function updateStatisticsResult() {
|
|
||||||
let sql = `
|
let sql = `
|
||||||
SELECT
|
SELECT
|
||||||
song_count,
|
song_count,
|
||||||
@ -142,7 +123,7 @@ async function updateStatisticsResult() {
|
|||||||
|
|
||||||
let songAlbumCount = result[0].song_album_count;
|
let songAlbumCount = result[0].song_album_count;
|
||||||
let songArtistCount = result[0].song_artist_count;
|
let songArtistCount = result[0].song_artist_count;
|
||||||
global.statistics = [
|
let statisticsString = [
|
||||||
`song: ${songCount}/${songCount + songWaiting}`,
|
`song: ${songCount}/${songCount + songWaiting}`,
|
||||||
`album: ${albumCount}/${albumCount + albumWaiting}`,
|
`album: ${albumCount}/${albumCount + albumWaiting}`,
|
||||||
`artist: ${artistCount}/${artistCount + artistWaiting}`,
|
`artist: ${artistCount}/${artistCount + artistWaiting}`,
|
||||||
@ -150,6 +131,7 @@ async function updateStatisticsResult() {
|
|||||||
`songAlbum: ${songAlbumCount}`,
|
`songAlbum: ${songAlbumCount}`,
|
||||||
`songArtist: ${songArtistCount}`
|
`songArtist: ${songArtistCount}`
|
||||||
].join(', ');
|
].join(', ');
|
||||||
|
console.log(statisticsString);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -170,5 +152,6 @@ global.checkIsExit = async function () {
|
|||||||
module.exports = {
|
module.exports = {
|
||||||
main: main,
|
main: main,
|
||||||
update: update,
|
update: update,
|
||||||
|
watch: watch,
|
||||||
test: test,
|
test: test,
|
||||||
}
|
}
|
@ -21,19 +21,23 @@ async function getFromDatabase({ albumId }) {
|
|||||||
return albumInfo;
|
return albumInfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 从数据库中查出还缺少的专辑,并进行爬取
|
// 正常应该查不出记录才对
|
||||||
async function fetchAll() {
|
/*
|
||||||
|
SELECT * FROM album WHERE (full_description = '' or full_description is null) and description like '%专辑《%》,简介:%' and description not regexp '^.*?专辑《.*?》,简介:[:space:]*?。,更多.*$'
|
||||||
|
*/
|
||||||
|
|
||||||
|
async function fetchAll({ isUpdate = false }) {
|
||||||
console.log("start fetching albums ...")
|
console.log("start fetching albums ...")
|
||||||
var albumIds = await dbUtils.query(`
|
var albumIds = await dbUtils.query(isUpdate
|
||||||
SELECT DISTINCT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT DISTINCT album_id FROM album )
|
? `SELECT DISTINCT album_id FROM album WHERE (full_description = '' or full_description is null) and description like '%专辑《%》,简介:%' and description not regexp '^.*?专辑《.*?》,简介:[:space:]*?。,更多.*$'`
|
||||||
`, []);
|
: `SELECT DISTINCT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT DISTINCT album_id FROM album )`, []);
|
||||||
albumIds = albumIds.map(item => item.album_id);
|
albumIds = albumIds.map(item => item.album_id);
|
||||||
for (let i = 0; i < albumIds.length; i++) {
|
for (let i = 0; i < albumIds.length; i++) {
|
||||||
await global.checkIsExit();
|
await global.checkIsExit();
|
||||||
const albumId = albumIds[i];
|
const albumId = albumIds[i];
|
||||||
console.log(`${i}/${albumIds.length} | album: ${albumId} | ${global.statistics}`);
|
console.log(`${i}/${albumIds.length} | album: ${albumId}`);
|
||||||
try {
|
try {
|
||||||
await fetch({ albumId: albumId });
|
await fetch({ albumId: albumId, update: isUpdate });
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error(err);
|
console.error(err);
|
||||||
}
|
}
|
||||||
@ -42,15 +46,17 @@ async function fetchAll() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 获取专辑详情
|
// 获取专辑详情
|
||||||
async function fetch({ albumId, debug = false }) {
|
async function fetch({ albumId, debug = false, update = false }) {
|
||||||
let result = await dbUtils.query('SELECT count(*) as count FROM album WHERE album_id = ?', [albumId]);
|
let result = await dbUtils.query('SELECT count(*) as count FROM album WHERE album_id = ?', [albumId]);
|
||||||
if (result[0].count > 0 && !debug) {
|
if (!debug && !update && result[0].count > 0) {
|
||||||
console.log(`数据库中已有数据,跳过 albumId: ${albumId}`);
|
console.log(`数据库中已有数据,跳过 albumId: ${albumId}`);
|
||||||
return;
|
return;
|
||||||
|
} else if (update && result[0].count == 0) {
|
||||||
|
console.log(`数据库中沒有数据,跳过 albumId: ${albumId}`);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
let url = `https://music.163.com/album?id=${albumId}`;
|
let url = `https://music.163.com/album?id=${albumId}`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `album-${albumId}.html`), 'utf8');
|
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `album-${albumId}.html`), 'utf8');
|
||||||
var html = await requestUtils.getApiResult(url);
|
var html = await requestUtils.getApiResult(url);
|
||||||
@ -117,13 +123,13 @@ async function fetch({ albumId, debug = false }) {
|
|||||||
title: albumInfoDict.title,
|
title: albumInfoDict.title,
|
||||||
image: image,
|
image: image,
|
||||||
description: albumInfoDict.description,
|
description: albumInfoDict.description,
|
||||||
full_description: fullDescription,
|
fullDescription: fullDescription,
|
||||||
pubDate: albumInfoDict.pubDate,
|
pubDate: albumInfoDict.pubDate,
|
||||||
company: company,
|
company: company,
|
||||||
songIds: songIds,
|
songIds: songIds,
|
||||||
};
|
};
|
||||||
// console.log("albumInfo", albumInfo);
|
// console.log("albumInfo", albumInfo);
|
||||||
dbUtils.query('INSERT IGNORE INTO album SET ?', {
|
dbUtils.query(update ? `UPDATE album SET ? WHERE album_id = ${albumId}` : 'INSERT IGNORE INTO album SET ?', {
|
||||||
album_id: albumInfo.albumId,
|
album_id: albumInfo.albumId,
|
||||||
title: albumInfo.title,
|
title: albumInfo.title,
|
||||||
description: albumInfo.description,
|
description: albumInfo.description,
|
||||||
@ -144,69 +150,8 @@ async function fetch({ albumId, debug = false }) {
|
|||||||
return albumInfo;
|
return albumInfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
v1 to v3
|
|
||||||
|
|
||||||
升级v3完毕后应该查不出记录才对
|
|
||||||
SELECT
|
|
||||||
*
|
|
||||||
FROM
|
|
||||||
album
|
|
||||||
WHERE
|
|
||||||
full_description is null and description like '%专辑《%》,简介:%'
|
|
||||||
*/
|
|
||||||
async function update({ albumId }) {
|
|
||||||
let result = await dbUtils.query('SELECT count(*) as count FROM album WHERE album_id = ?', [albumId]);
|
|
||||||
if (result[0].count == 0) {
|
|
||||||
console.log(`数据库中没有数据,跳过 albumId: ${albumId}`);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
let url = `https://music.163.com/album?id=${albumId}`;
|
|
||||||
|
|
||||||
try {
|
|
||||||
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `album-${albumId}.html`), 'utf8');
|
|
||||||
var html = await requestUtils.getApiResult(url);
|
|
||||||
// fs.writeFileSync(path.join(__dirname, "../../temp", `album-${albumId}.html`), html);
|
|
||||||
} catch (errors) {
|
|
||||||
console.error(errors);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (html.includes(`<p class="note s-fc3">很抱歉,你要查找的网页找不到</p>`)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 专辑详细简介
|
|
||||||
let fullDescription = null;
|
|
||||||
if (html.includes(`<div id="album-desc-more" class="f-hide">`)) {
|
|
||||||
try {
|
|
||||||
fullDescription = /<div id="album-desc-more" class="f-hide">([\S\s]*?)<\/div>/.exec(html)[1];
|
|
||||||
fullDescription = fullDescription.replace(/<p class="f-brk">\n/g, '').replace(/<\/p>\n/g, '').trim();
|
|
||||||
} catch (e) {
|
|
||||||
// 解析出错
|
|
||||||
await dbUtils.query('INSERT INTO log (`id`, `name`, `msg`) VALUES (?, ?, ?)', [albumId, 'album_fetch', `fullDescription 3 正则失败\n${e.message}`]);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
} else if (html.includes(`<div id="album-desc-dot" class="f-brk">`)) {
|
|
||||||
try {
|
|
||||||
fullDescription = /<div id="album-desc-dot" class="f-brk">([\S\s]*?)<\/div>/.exec(html)[1];
|
|
||||||
fullDescription = fullDescription.replace(/<p>/g, '').replace(/<\/p>/g, '').trim();
|
|
||||||
} catch (e) {
|
|
||||||
// 解析出错
|
|
||||||
await dbUtils.query('INSERT INTO log (`id`, `name`, `msg`) VALUES (?, ?, ?)', [albumId, 'album_fetch', `fullDescription 4 正则失败\n${e.message}`]);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
await dbUtils.query('UPDATE album SET full_description = ?, version = 3 WHERE album_id = ?', [fullDescription, albumId]);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
getFromDatabase: getFromDatabase,
|
getFromDatabase: getFromDatabase,
|
||||||
fetch: fetch,
|
fetch: fetch,
|
||||||
fetchAll: fetchAll,
|
fetchAll: fetchAll,
|
||||||
update: update,
|
|
||||||
}
|
}
|
@ -31,7 +31,7 @@ async function fetchAll() {
|
|||||||
for (let i = 0; i < artistIds.length; i++) {
|
for (let i = 0; i < artistIds.length; i++) {
|
||||||
await global.checkIsExit();
|
await global.checkIsExit();
|
||||||
const artistId = artistIds[i];
|
const artistId = artistIds[i];
|
||||||
console.log(`${i}/${artistIds.length} | artist: ${artistId} | ${global.statistics}`);
|
console.log(`${i}/${artistIds.length} | artist: ${artistId}`);
|
||||||
try {
|
try {
|
||||||
await fetch({ artistId: artistId });
|
await fetch({ artistId: artistId });
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
@ -50,7 +50,6 @@ async function fetch({ artistId, debug = false }) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let url = `https://music.163.com/artist?id=${artistId}`;
|
let url = `https://music.163.com/artist?id=${artistId}`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `artist-${artistId}.html`), 'utf8');
|
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `artist-${artistId}.html`), 'utf8');
|
||||||
var html = await requestUtils.getApiResult(url);
|
var html = await requestUtils.getApiResult(url);
|
||||||
|
@ -16,7 +16,7 @@ async function fetchAll() {
|
|||||||
for (let i = 0; i < songIds.length; i++) {
|
for (let i = 0; i < songIds.length; i++) {
|
||||||
await global.checkIsExit();
|
await global.checkIsExit();
|
||||||
const songId = songIds[i];
|
const songId = songIds[i];
|
||||||
console.log(`${i}/${songIds.length} | lyric: ${songId} | ${global.statistics}`);
|
console.log(`${i}/${songIds.length} | lyric: ${songId}`);
|
||||||
try {
|
try {
|
||||||
await fetch({ songId: songId });
|
await fetch({ songId: songId });
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
@ -29,7 +29,6 @@ async function fetchAll() {
|
|||||||
// 获取歌词详情
|
// 获取歌词详情
|
||||||
async function fetch({ songId, debug = false }) {
|
async function fetch({ songId, debug = false }) {
|
||||||
var url = `https://music.163.com/api/song/lyric?id=${songId}&lv=1`;
|
var url = `https://music.163.com/api/song/lyric?id=${songId}&lv=1`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// var json = fs.readFileSync(path.join(__dirname, "../../temp", `lyric-${songId}.json`), 'utf8');
|
// var json = fs.readFileSync(path.join(__dirname, "../../temp", `lyric-${songId}.json`), 'utf8');
|
||||||
var json = await requestUtils.getApiResult(url);
|
var json = await requestUtils.getApiResult(url);
|
||||||
|
@ -35,7 +35,7 @@ async function fetchAll() {
|
|||||||
for (let i = 0; i < songIds.length; i++) {
|
for (let i = 0; i < songIds.length; i++) {
|
||||||
await global.checkIsExit();
|
await global.checkIsExit();
|
||||||
const songId = songIds[i];
|
const songId = songIds[i];
|
||||||
console.log(`${i}/${songIds.length} | song: ${songId} | ${global.statistics}`);
|
console.log(`${i}/${songIds.length} | song: ${songId}`);
|
||||||
try {
|
try {
|
||||||
await fetch({ songId: songId });
|
await fetch({ songId: songId });
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
@ -51,24 +51,9 @@ async function fetch({ songId, debug = false }) {
|
|||||||
if (result[0].count > 0 && !debug) {
|
if (result[0].count > 0 && !debug) {
|
||||||
console.log(`数据库中已有数据,跳过 songId: ${songId}`);
|
console.log(`数据库中已有数据,跳过 songId: ${songId}`);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// let songResult = await dbUtils.query('SELECT * FROM song WHERE song_id = ?', [songId]);
|
|
||||||
// songResult = JSON.parse(JSON.stringify(songResult));
|
|
||||||
|
|
||||||
// let songArtistResult = await dbUtils.query('SELECT * FROM song_artist_relation WHERE song_id = ?', [songId]);
|
|
||||||
// songArtistResult = JSON.parse(JSON.stringify(songArtistResult));
|
|
||||||
// songResult.artistIds = songArtistResult.map(song => song.artist_id);
|
|
||||||
|
|
||||||
// let songAlbumResult = await dbUtils.query('SELECT * FROM song_album_relation WHERE song_id = ?', [songId]);
|
|
||||||
// songAlbumResult = JSON.parse(JSON.stringify(songAlbumResult));
|
|
||||||
// songResult.albumId = songAlbumResult.map(song => song.album_id)[0];
|
|
||||||
|
|
||||||
// // console.log(songResult);
|
|
||||||
// return songResult;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let url = `https://music.163.com/song?id=${songId}`;
|
let url = `https://music.163.com/song?id=${songId}`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `song-${songId}.html`), 'utf8');
|
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `song-${songId}.html`), 'utf8');
|
||||||
var html = await requestUtils.getApiResult(url);
|
var html = await requestUtils.getApiResult(url);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user