插入关联表时同事插入wait_check表;统一查询将要爬取的id代码到dataManager.js
This commit is contained in:
@@ -3,18 +3,31 @@ if (process.argv.length <= 2) {
|
|||||||
"参数不够",
|
"参数不够",
|
||||||
"node index --utils [song|album|artist|lyric|comment] --min [number] --max [number] --order [false|ASC|DESC] --limit [number]",
|
"node index --utils [song|album|artist|lyric|comment] --min [number] --max [number] --order [false|ASC|DESC] --limit [number]",
|
||||||
// "",
|
// "",
|
||||||
// "node index --utils song --min xxx --max xxx --order ASC --limit 2000",
|
// "node index --utils xxx --min xxx --max xxx --order ASC --limit 2000",
|
||||||
// "node index --utils album --min xxx --max xxx --order ASC --limit 2000",
|
|
||||||
// "node index --utils artist --min xxx --max xxx --order ASC --limit 2000",
|
|
||||||
// "node index --utils lyric --min xxx --max xxx --order ASC --limit 2000",
|
|
||||||
// "node index --utils comment --min xxx --max xxx --order ASC --limit 2000",
|
|
||||||
].join('\n');
|
].join('\n');
|
||||||
console.log(output);
|
console.log(output);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
var args = require('minimist')(process.argv.slice(2));
|
var args = require('minimist')(process.argv.slice(2));
|
||||||
|
args = {
|
||||||
|
// 子模块
|
||||||
|
utils: args.utils,
|
||||||
|
// id 范围
|
||||||
|
min: Number(args.min) || undefined,
|
||||||
|
max: Number(args.max) || undefined,
|
||||||
|
// 顺序
|
||||||
|
order: args.order,
|
||||||
|
// 数量
|
||||||
|
limit: Number(args.limit) || undefined,
|
||||||
|
// 分区
|
||||||
|
partition: Number(args.partition) || undefined,
|
||||||
|
|
||||||
|
sleepTime: Number(args.sleepTime) || 100,
|
||||||
|
}
|
||||||
|
|
||||||
console.log("args:", args);
|
console.log("args:", args);
|
||||||
|
|
||||||
|
global.sleepTime = args.sleepTime; // 两次请求之间停顿时间
|
||||||
global.useMysqlPool = true;
|
global.useMysqlPool = true;
|
||||||
const neteaseMusic = require('./src/index');
|
const neteaseMusic = require('./src/index');
|
||||||
neteaseMusic.main(args);
|
neteaseMusic.main(args);
|
@@ -212,26 +212,49 @@ CREATE TABLE `analysis` (
|
|||||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||||
|
|
||||||
|
|
||||||
CREATE TABLE `wait_song` (
|
|
||||||
`id` int(10) unsigned NOT NULL COMMENT 'id',
|
CREATE TABLE `wait_check_song` (
|
||||||
`partition` tinyint(4) unsigned NOT NULL COMMENT '分区 0-4',
|
`id` bigint(20) unsigned NOT NULL COMMENT 'id',
|
||||||
PRIMARY KEY (`id`)
|
PRIMARY KEY (`id`)
|
||||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||||
|
|
||||||
CREATE TABLE `wait_artist` (
|
CREATE TABLE `wait_check_artist` (
|
||||||
`id` int(10) unsigned NOT NULL COMMENT 'id',
|
`id` bigint(20) unsigned NOT NULL COMMENT 'id',
|
||||||
`partition` tinyint(4) unsigned NOT NULL COMMENT '分区 0-4',
|
|
||||||
PRIMARY KEY (`id`)
|
PRIMARY KEY (`id`)
|
||||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||||
|
|
||||||
CREATE TABLE `wait_album` (
|
CREATE TABLE `wait_check_album` (
|
||||||
`id` int(10) unsigned NOT NULL COMMENT 'id',
|
`id` bigint(20) unsigned NOT NULL COMMENT 'id',
|
||||||
`partition` tinyint(4) unsigned NOT NULL COMMENT '分区 0-4',
|
|
||||||
PRIMARY KEY (`id`)
|
PRIMARY KEY (`id`)
|
||||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||||
|
|
||||||
CREATE TABLE `wait_lyric` (
|
CREATE TABLE `wait_check_lyric` (
|
||||||
`id` int(10) unsigned NOT NULL COMMENT 'id',
|
`id` bigint(20) unsigned NOT NULL COMMENT 'id',
|
||||||
`partition` tinyint(4) unsigned NOT NULL COMMENT '分区 0-4',
|
|
||||||
PRIMARY KEY (`id`)
|
PRIMARY KEY (`id`)
|
||||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE `wait_fetch_song` (
|
||||||
|
`id` bigint(20) unsigned NOT NULL COMMENT 'id',
|
||||||
|
`partition` tinyint(4) unsigned DEFAULT NULL COMMENT '分区 0-4',
|
||||||
|
PRIMARY KEY (`id`)
|
||||||
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||||
|
|
||||||
|
CREATE TABLE `wait_fetch_artist` (
|
||||||
|
`id` bigint(20) unsigned NOT NULL COMMENT 'id',
|
||||||
|
`partition` tinyint(4) unsigned DEFAULT NULL COMMENT '分区 0-4',
|
||||||
|
PRIMARY KEY (`id`)
|
||||||
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||||
|
|
||||||
|
CREATE TABLE `wait_fetch_album` (
|
||||||
|
`id` bigint(20) unsigned NOT NULL COMMENT 'id',
|
||||||
|
`partition` tinyint(4) unsigned DEFAULT NULL COMMENT '分区 0-4',
|
||||||
|
PRIMARY KEY (`id`)
|
||||||
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||||
|
|
||||||
|
CREATE TABLE `wait_fetch_lyric` (
|
||||||
|
`id` bigint(20) unsigned NOT NULL COMMENT 'id',
|
||||||
|
`partition` tinyint(4) unsigned DEFAULT NULL COMMENT '分区 0-4',
|
||||||
|
PRIMARY KEY (`id`)
|
||||||
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||||
|
@@ -20,8 +20,27 @@ module.exports = {
|
|||||||
songInfo.noCopyrightRcmd, songInfo.mv, songInfo.single, songInfo.version, 2
|
songInfo.noCopyrightRcmd, songInfo.mv, songInfo.single, songInfo.version, 2
|
||||||
])]);
|
])]);
|
||||||
},
|
},
|
||||||
|
|
||||||
|
getIdsToFetch: async (args) => {
|
||||||
|
let whereClause = [
|
||||||
|
args.min ? `song_id > ${args.min}` : '1=1',
|
||||||
|
args.max ? `song_id <= ${args.max}` : '1=1',
|
||||||
|
].join(' AND ');
|
||||||
|
let sql = `
|
||||||
|
SELECT song_id FROM wait_fetch_song WHERE ${whereClause}
|
||||||
|
${args.order ? `ORDER BY song_id ${args.order}` : ''}
|
||||||
|
${args.limit ? `LIMIT ${args.limit}` : ''}
|
||||||
|
`;
|
||||||
|
// // 更新现有数据
|
||||||
|
// sql = `SELECT song_id FROM song WHERE data_version = 1`;
|
||||||
|
console.log(sql);
|
||||||
|
let songIds = await dbUtils.query(sql, []);
|
||||||
|
songIds = songIds.map(item => item.song_id);
|
||||||
|
return songIds;
|
||||||
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
||||||
album: {
|
album: {
|
||||||
insert: async (albumInfo) => {
|
insert: async (albumInfo) => {
|
||||||
return await dbUtils.query('INSERT IGNORE INTO album SET ?', albumInfo);
|
return await dbUtils.query('INSERT IGNORE INTO album SET ?', albumInfo);
|
||||||
@@ -29,21 +48,61 @@ module.exports = {
|
|||||||
|
|
||||||
update: async (albumId, albumInfo) => {
|
update: async (albumId, albumInfo) => {
|
||||||
return await dbUtils.query(`UPDATE album SET ? WHERE album_id = ${albumId}`, albumInfo);
|
return await dbUtils.query(`UPDATE album SET ? WHERE album_id = ${albumId}`, albumInfo);
|
||||||
}
|
},
|
||||||
|
|
||||||
|
getIdsToFetch: async (args, isUpdate) => {
|
||||||
|
let sql = "";
|
||||||
|
if (isUpdate) {
|
||||||
|
sql = `SELECT album_id FROM album WHERE (full_description = '' or full_description is null) and description like '%专辑《%》,简介:%' and description not regexp '^.*?专辑《.*?》,简介:[:space:]*?。,更多.*$'`;
|
||||||
|
} else {
|
||||||
|
let whereClause = [
|
||||||
|
args.min ? `album_id > ${args.min}` : '1=1',
|
||||||
|
args.max ? `album_id <= ${args.max}` : '1=1',
|
||||||
|
].join(' AND ');
|
||||||
|
sql = `
|
||||||
|
SELECT album_id FROM wait_fetch_album WHERE ${whereClause}
|
||||||
|
${args.order ? `ORDER BY album_id ${args.order}` : ''}
|
||||||
|
${args.limit ? `LIMIT ${args.limit}` : ''}
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
console.log(sql);
|
||||||
|
let albumIds = await dbUtils.query(sql, []);
|
||||||
|
albumIds = albumIds.map(item => item.album_id);
|
||||||
|
return albumIds;
|
||||||
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
||||||
artist: {
|
artist: {
|
||||||
insert: async (artistInfo) => {
|
insert: async (artistInfo) => {
|
||||||
return await dbUtils.query('INSERT IGNORE INTO artist SET ?', artistInfo);
|
return await dbUtils.query('INSERT IGNORE INTO artist SET ?', artistInfo);
|
||||||
},
|
},
|
||||||
|
|
||||||
|
getIdsToFetch: async (args) => {
|
||||||
|
let whereClause = [
|
||||||
|
args.min ? `artist_id > ${args.min}` : '1=1',
|
||||||
|
args.max ? `artist_id <= ${args.max}` : '1=1',
|
||||||
|
].join(' AND ');
|
||||||
|
let sql = `
|
||||||
|
SELECT artist_id FROM wait_fetch_artist WHERE ${whereClause}
|
||||||
|
${args.order ? `ORDER BY artist_id ${args.order}` : ''}
|
||||||
|
${args.limit ? `LIMIT ${args.limit}` : ''}
|
||||||
|
`;
|
||||||
|
console.log(sql);
|
||||||
|
let artistIds = await dbUtils.query(sql, []);
|
||||||
|
artistIds = artistIds.map(item => item.artist_id);
|
||||||
|
return artistIds;
|
||||||
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
||||||
lyric: {
|
lyric: {
|
||||||
insert: async (lyricInfo) => {
|
insert: async (lyricInfo) => {
|
||||||
return await dbUtils.query('INSERT IGNORE INTO lyric SET ?', lyricInfo);
|
return await dbUtils.query('INSERT IGNORE INTO lyric SET ?', lyricInfo);
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
||||||
comment: {
|
comment: {
|
||||||
insertCollection: async (commentInfoList) => {
|
insertCollection: async (commentInfoList) => {
|
||||||
if (commentInfoList.length == 0) return;
|
if (commentInfoList.length == 0) return;
|
||||||
@@ -51,15 +110,17 @@ module.exports = {
|
|||||||
INSERT INTO comment ( comment_id, parent_comment_id, user_id, song_id, content, time, like_count, comment_type ) VALUES ?
|
INSERT INTO comment ( comment_id, parent_comment_id, user_id, song_id, content, time, like_count, comment_type ) VALUES ?
|
||||||
ON DUPLICATE KEY UPDATE content = VALUES(content), like_count = VALUES(like_count), comment_type = GREATEST(comment_type, VALUES(comment_type)), modify_time = CURRENT_TIMESTAMP
|
ON DUPLICATE KEY UPDATE content = VALUES(content), like_count = VALUES(like_count), comment_type = GREATEST(comment_type, VALUES(comment_type)), modify_time = CURRENT_TIMESTAMP
|
||||||
`, [commentInfoList]);
|
`, [commentInfoList]);
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
||||||
comment_progress: {
|
comment_progress: {
|
||||||
update: async (commentProgressInfo, songId) => {
|
update: async (commentProgressInfo, songId) => {
|
||||||
return await dbUtils.query('UPDATE comment_progress SET ? WHERE song_id = ? LIMIT 1', [commentProgressInfo, songId]);
|
return await dbUtils.query('UPDATE comment_progress SET ? WHERE song_id = ? LIMIT 1', [commentProgressInfo, songId]);
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
||||||
playlist: {
|
playlist: {
|
||||||
insertCollection: async (playlistInfo) => {
|
insertCollection: async (playlistInfo) => {
|
||||||
if (playlistInfo.length == 0) return;
|
if (playlistInfo.length == 0) return;
|
||||||
@@ -67,9 +128,10 @@ module.exports = {
|
|||||||
INSERT INTO playlist ( ${Object.keys(playlistInfo).map(field => `\`${field}\``).join(",")} ) VALUES ?
|
INSERT INTO playlist ( ${Object.keys(playlistInfo).map(field => `\`${field}\``).join(",")} ) VALUES ?
|
||||||
ON DUPLICATE KEY UPDATE ${Object.keys(playlistInfo).map(field => `${field}=VALUES(${field})`).join(", ")}
|
ON DUPLICATE KEY UPDATE ${Object.keys(playlistInfo).map(field => `${field}=VALUES(${field})`).join(", ")}
|
||||||
`, [[Object.values(playlistInfo)]]);
|
`, [[Object.values(playlistInfo)]]);
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
||||||
user: {
|
user: {
|
||||||
insertCollection: async (userInfoList) => {
|
insertCollection: async (userInfoList) => {
|
||||||
if (userInfoList.length == 0) return;
|
if (userInfoList.length == 0) return;
|
||||||
@@ -77,27 +139,44 @@ module.exports = {
|
|||||||
INSERT INTO user ( user_id, user_type, nickname, avatar_url ) VALUES ?
|
INSERT INTO user ( user_id, user_type, nickname, avatar_url ) VALUES ?
|
||||||
ON DUPLICATE KEY UPDATE user_type = VALUES(user_type), nickname = VALUES(nickname), avatar_url = VALUES(avatar_url), modify_time = CURRENT_TIMESTAMP
|
ON DUPLICATE KEY UPDATE user_type = VALUES(user_type), nickname = VALUES(nickname), avatar_url = VALUES(avatar_url), modify_time = CURRENT_TIMESTAMP
|
||||||
`, [userInfoList]);
|
`, [userInfoList]);
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
||||||
song_album: {
|
song_album: {
|
||||||
insertCollection: async (songAlbumRel) => {
|
insertCollection: async (songAlbumRel) => {
|
||||||
if (songAlbumRel.length == 0) return;
|
if (songAlbumRel.length == 0) return;
|
||||||
return await dbUtils.query('INSERT IGNORE INTO song_album_relation (song_id, album_id) VALUES ?', [songAlbumRel]);
|
return await dbUtils.query('INSERT IGNORE INTO song_album_relation (song_id, album_id) VALUES ?', [songAlbumRel]);
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
||||||
song_artist: {
|
song_artist: {
|
||||||
insertCollection: async (songArtistRel) => {
|
insertCollection: async (songArtistRel) => {
|
||||||
if (songArtistRel.length == 0) return;
|
if (songArtistRel.length == 0) return;
|
||||||
return await dbUtils.query('INSERT IGNORE INTO song_artist_relation (song_id, artist_id) VALUES ?', [songArtistRel]);
|
return await dbUtils.query('INSERT IGNORE INTO song_artist_relation (song_id, artist_id) VALUES ?', [songArtistRel]);
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
||||||
song_playlist: {
|
song_playlist: {
|
||||||
insertCollection: async (trackIds) => {
|
insertCollection: async (trackIds) => {
|
||||||
if (trackIds.length == 0) return;
|
if (trackIds.length == 0) return;
|
||||||
return await dbUtils.query('INSERT IGNORE INTO song_playlist_relation (song_id, playlist_id, alg, rcmd_reason) VALUES ?', [trackIds]);
|
return await dbUtils.query('INSERT IGNORE INTO song_playlist_relation (song_id, playlist_id, alg, rcmd_reason) VALUES ?', [trackIds]);
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
||||||
|
/* ##################################################### */
|
||||||
|
|
||||||
|
|
||||||
|
// 将 id 插入待检查表
|
||||||
|
wait_check: {
|
||||||
|
insert: async (type, ids) => {
|
||||||
|
// 过滤掉 id 为 0 的
|
||||||
|
ids = ids.filter(id => id < 0);
|
||||||
|
return await dbUtils.query(`INSERT IGNORE INTO wait_check_${type} (id) VALUES ?`, [ids]);
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
};
|
};
|
||||||
|
@@ -29,28 +29,7 @@ SELECT * FROM album WHERE (full_description = '' or full_description is null) an
|
|||||||
|
|
||||||
async function fetchAll({ args = {}, isUpdate = false }) {
|
async function fetchAll({ args = {}, isUpdate = false }) {
|
||||||
console.log("start fetching albums ...");
|
console.log("start fetching albums ...");
|
||||||
|
let albumIds = await dataManager.album.getIdsToFetch(args, isUpdate);
|
||||||
if (isUpdate) {
|
|
||||||
var sql = `
|
|
||||||
SELECT album_id FROM album WHERE (full_description = '' or full_description is null) and description like '%专辑《%》,简介:%' and description not regexp '^.*?专辑《.*?》,简介:[:space:]*?。,更多.*$'
|
|
||||||
`;
|
|
||||||
} else {
|
|
||||||
let whereClause = [
|
|
||||||
args.min ? `album_id > ${args.min}` : '1=1',
|
|
||||||
args.max ? `album_id <= ${args.max}` : '1=1',
|
|
||||||
].join(' AND ');
|
|
||||||
var sql = `
|
|
||||||
-- 查出来通过代码去重,提高速度
|
|
||||||
SELECT album_id FROM song_album_relation WHERE ${whereClause} AND album_id NOT IN ( SELECT album_id FROM album )
|
|
||||||
${args.order ? `ORDER BY album_id ${args.order}` : ''}
|
|
||||||
${args.limit ? `LIMIT ${args.limit}` : ''}
|
|
||||||
`;
|
|
||||||
console.log(sql);
|
|
||||||
}
|
|
||||||
|
|
||||||
var albumIds = await dbUtils.query(sql, []);
|
|
||||||
albumIds = albumIds.map(item => item.album_id);
|
|
||||||
albumIds = Array.from(new Set(albumIds));
|
|
||||||
for (let i = 0; i < albumIds.length; i++) {
|
for (let i = 0; i < albumIds.length; i++) {
|
||||||
await global.checkIsExit();
|
await global.checkIsExit();
|
||||||
const albumId = albumIds[i];
|
const albumId = albumIds[i];
|
||||||
@@ -147,7 +126,7 @@ async function fetch({ albumId, debug = false, update = false }) {
|
|||||||
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
|
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
|
||||||
let songListJSONString = /<textarea id="song-list-pre-data" style="display:none;">(.*?)<\/textarea>/.exec(html)[1];
|
let songListJSONString = /<textarea id="song-list-pre-data" style="display:none;">(.*?)<\/textarea>/.exec(html)[1];
|
||||||
let songList = JSON.parse(songListJSONString);
|
let songList = JSON.parse(songListJSONString);
|
||||||
let songIds = songList.map(song => song.id);
|
let songIds = songList.map(song => Number(song.id));
|
||||||
|
|
||||||
let albumInfo = {
|
let albumInfo = {
|
||||||
album_id: albumId,
|
album_id: albumId,
|
||||||
@@ -160,8 +139,10 @@ async function fetch({ albumId, debug = false, update = false }) {
|
|||||||
version: 1
|
version: 1
|
||||||
};
|
};
|
||||||
// console.log("albumInfo", albumInfo);
|
// console.log("albumInfo", albumInfo);
|
||||||
|
|
||||||
|
await dataManager.wait_check.insert("song", songIds);
|
||||||
if (albumId > 0) {
|
if (albumId > 0) {
|
||||||
let songAlbumRel = songIds.map(songId => [Number(songId), albumId]);
|
let songAlbumRel = songIds.map(songId => [songId, albumId]);
|
||||||
await dataManager.song_album.insertCollection(songAlbumRel);
|
await dataManager.song_album.insertCollection(songAlbumRel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -25,22 +25,7 @@ async function getFromDatabase({ artistId }) {
|
|||||||
// 从数据库中查出还缺少的歌手,并进行爬取
|
// 从数据库中查出还缺少的歌手,并进行爬取
|
||||||
async function fetchAll({ args = {} }) {
|
async function fetchAll({ args = {} }) {
|
||||||
console.log("start fetching artists ...");
|
console.log("start fetching artists ...");
|
||||||
let whereClause = [
|
let artistIds = await dataManager.artist.getIdsToFetch(args);
|
||||||
args.min ? `artist_id > ${args.min}` : '1=1',
|
|
||||||
args.max ? `artist_id <= ${args.max}` : '1=1',
|
|
||||||
].join(' AND ');
|
|
||||||
var sql = `
|
|
||||||
-- 查出来通过代码去重,提高速度
|
|
||||||
-- SELECT DISTINCT artist_id FROM song_artist_relation WHERE ${whereClause} AND artist_id NOT IN ( SELECT artist_id FROM artist )
|
|
||||||
SELECT artist_id FROM song_artist_relation WHERE ${whereClause} AND artist_id NOT IN ( SELECT artist_id FROM artist )
|
|
||||||
${args.order ? `ORDER BY artist_id ${args.order}` : ''}
|
|
||||||
${args.limit ? `LIMIT ${args.limit}` : ''}
|
|
||||||
`;
|
|
||||||
console.log(sql);
|
|
||||||
|
|
||||||
var artistIds = await dbUtils.query(sql, []);
|
|
||||||
artistIds = artistIds.map(item => item.artist_id);
|
|
||||||
artistIds = Array.from(new Set(artistIds));
|
|
||||||
for (let i = 0; i < artistIds.length; i++) {
|
for (let i = 0; i < artistIds.length; i++) {
|
||||||
await global.checkIsExit();
|
await global.checkIsExit();
|
||||||
const artistId = artistIds[i];
|
const artistId = artistIds[i];
|
||||||
@@ -97,7 +82,7 @@ async function fetch({ artistId, debug = false }) {
|
|||||||
try {
|
try {
|
||||||
let songListJSONString = /<textarea id="song-list-pre-data" style="display:none;">(.*?)<\/textarea>/.exec(html)[1];
|
let songListJSONString = /<textarea id="song-list-pre-data" style="display:none;">(.*?)<\/textarea>/.exec(html)[1];
|
||||||
let songList = JSON.parse(songListJSONString);
|
let songList = JSON.parse(songListJSONString);
|
||||||
songIds = songList.map(song => song.id);
|
songIds = songList.map(song => Number(song.id));
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// 可能是歌手下面没有音乐 例如:https://music.163.com/#/artist?id=30032762
|
// 可能是歌手下面没有音乐 例如:https://music.163.com/#/artist?id=30032762
|
||||||
}
|
}
|
||||||
@@ -111,8 +96,9 @@ async function fetch({ artistId, debug = false }) {
|
|||||||
};
|
};
|
||||||
// console.log("artistInfo", artistInfo);
|
// console.log("artistInfo", artistInfo);
|
||||||
|
|
||||||
|
await dataManager.wait_check.insert("song", songIds);
|
||||||
if (artistId > 0) {
|
if (artistId > 0) {
|
||||||
let songArtistRel = songIds.map(songId => [Number(songId), artistId]);
|
let songArtistRel = songIds.map(songId => [songId, artistId]);
|
||||||
await dataManager.song_artist.insertCollection(songArtistRel);
|
await dataManager.song_artist.insertCollection(songArtistRel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -173,6 +173,7 @@ async function fetch({ playlistId, debug = false }) {
|
|||||||
process.exit(0);
|
process.exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
await dataManager.wait_check.insert("song", playlist.trackIds.map(track => track.id));
|
||||||
let trackIds = playlist.trackIds.map(track => [track.id, playlist.id, track.alg, track.rcmdReason]);
|
let trackIds = playlist.trackIds.map(track => [track.id, playlist.id, track.alg, track.rcmdReason]);
|
||||||
await dataManager.song_playlist.insertCollection(trackIds);
|
await dataManager.song_playlist.insertCollection(trackIds);
|
||||||
await dataManager.playlist.insertCollection(playlistInfo);
|
await dataManager.playlist.insertCollection(playlistInfo);
|
||||||
|
@@ -1,6 +1,7 @@
|
|||||||
const fs = require('fs');
|
const fs = require('fs');
|
||||||
const path = require('path');
|
const path = require('path');
|
||||||
|
|
||||||
|
const requestUtils = require('../../../utils/requestUtils');
|
||||||
const sleepUtils = require('../../../utils/sleepUtils');
|
const sleepUtils = require('../../../utils/sleepUtils');
|
||||||
const dataManager = require('../dataManager');
|
const dataManager = require('../dataManager');
|
||||||
|
|
||||||
@@ -11,35 +12,7 @@ const { song_detail } = require('NeteaseCloudMusicApi');
|
|||||||
// 从数据库中查出还缺少的歌曲,并进行爬取
|
// 从数据库中查出还缺少的歌曲,并进行爬取
|
||||||
async function fetchAll({ args = {} }) {
|
async function fetchAll({ args = {} }) {
|
||||||
console.log("start fetching songs ...");
|
console.log("start fetching songs ...");
|
||||||
let whereClause = [
|
let songIds = await dataManager.song.getIdsToFetch(args);
|
||||||
args.min ? `song_id > ${args.min}` : '1=1',
|
|
||||||
args.max ? `song_id <= ${args.max}` : '1=1',
|
|
||||||
].join(' AND ');
|
|
||||||
var sql1 = `
|
|
||||||
SELECT song_id FROM song_artist_relation WHERE ${whereClause} AND song_id NOT IN ( SELECT song_id FROM song )
|
|
||||||
${args.order ? `ORDER BY song_id ${args.order}` : ''}
|
|
||||||
${args.limit ? `LIMIT ${args.limit}` : ''}
|
|
||||||
`;
|
|
||||||
var sql2 = `
|
|
||||||
SELECT song_id FROM song_album_relation WHERE ${whereClause} AND song_id NOT IN ( SELECT song_id FROM song )
|
|
||||||
${args.order ? `ORDER BY song_id ${args.order}` : ''}
|
|
||||||
${args.limit ? `LIMIT ${args.limit}` : ''}
|
|
||||||
`;
|
|
||||||
// // 更新现有数据
|
|
||||||
// sql = `SELECT song_id FROM song WHERE data_version = 1`;
|
|
||||||
// 测试用
|
|
||||||
// sql = `SELECT song_id FROM song_artist_relation group by song_id limit 10`;
|
|
||||||
console.log(sql1);
|
|
||||||
var songIds1 = await dbUtils.query(sql1, []);
|
|
||||||
songIds1 = songIds1.map(item => item.song_id);
|
|
||||||
|
|
||||||
console.log(sql2);
|
|
||||||
var songIds2 = await dbUtils.query(sql2, []);
|
|
||||||
songIds2 = songIds2.map(item => item.song_id);
|
|
||||||
|
|
||||||
var songIds = songIds1.concat(songIds2);
|
|
||||||
songIds = Array.from(new Set(songIds)); // 去重
|
|
||||||
|
|
||||||
// 0 - 100, 200 - 399, 400 - ..., ... - songIds.length-1
|
// 0 - 100, 200 - 399, 400 - ..., ... - songIds.length-1
|
||||||
// 0 1 2 count-1
|
// 0 1 2 count-1
|
||||||
var step = 1000;
|
var step = 1000;
|
||||||
@@ -71,9 +44,14 @@ async function fetch({ songIdArray, debug = false }) {
|
|||||||
}
|
}
|
||||||
// console.log(songResult.body.songs.map(item => JSON.stringify(item)));
|
// console.log(songResult.body.songs.map(item => JSON.stringify(item)));
|
||||||
|
|
||||||
|
let albumIds = [], artistIds = [];
|
||||||
let songAlbumRel = [], songArtistRel = [];
|
let songAlbumRel = [], songArtistRel = [];
|
||||||
let songInfoList = songResult.body.songs.map(song => {
|
let songInfoList = songResult.body.songs.map(song => {
|
||||||
song.ar.forEach(item => songArtistRel.push([song.id, item.id]));
|
song.ar.forEach(item => {
|
||||||
|
artistIds.push(item.id);
|
||||||
|
songArtistRel.push([song.id, item.id])
|
||||||
|
});
|
||||||
|
albumIds.push(song.al.id || 0);
|
||||||
songAlbumRel.push([song.id, song.al.id || 0])
|
songAlbumRel.push([song.id, song.al.id || 0])
|
||||||
return {
|
return {
|
||||||
title: song.name, // 歌曲标题
|
title: song.name, // 歌曲标题
|
||||||
@@ -107,6 +85,8 @@ async function fetch({ songIdArray, debug = false }) {
|
|||||||
if (songInfoList.length == 0) return;
|
if (songInfoList.length == 0) return;
|
||||||
|
|
||||||
console.log("插入数据库");
|
console.log("插入数据库");
|
||||||
|
await dataManager.wait_check.insert("album", albumIds);
|
||||||
|
await dataManager.wait_check.insert("artist", artistIds);
|
||||||
await dataManager.song_album.insertCollection(songAlbumRel);
|
await dataManager.song_album.insertCollection(songAlbumRel);
|
||||||
await dataManager.song_artist.insertCollection(songArtistRel);
|
await dataManager.song_artist.insertCollection(songArtistRel);
|
||||||
await dataManager.song.insertCollection(songInfoList); // image 因为接口没有返回,所以不更新
|
await dataManager.song.insertCollection(songInfoList); // image 因为接口没有返回,所以不更新
|
||||||
|
@@ -12,9 +12,6 @@ dbUtils.create({
|
|||||||
global.dbUtils = dbUtils;
|
global.dbUtils = dbUtils;
|
||||||
console.log("global.useMysqlPool:", !!global.useMysqlPool);
|
console.log("global.useMysqlPool:", !!global.useMysqlPool);
|
||||||
|
|
||||||
// 两次请求之间停顿时间
|
|
||||||
global.sleepTime = 10;
|
|
||||||
|
|
||||||
// 引入utils
|
// 引入utils
|
||||||
const songInfoUtils = require('./getInfo/songInfoUtils');
|
const songInfoUtils = require('./getInfo/songInfoUtils');
|
||||||
const artistInfoUtils = require('./getInfo/artistInfoUtils');
|
const artistInfoUtils = require('./getInfo/artistInfoUtils');
|
||||||
@@ -57,21 +54,28 @@ async function main(args) {
|
|||||||
// var affectedRows2 = await dbUtils.query(`DELETE FROM song_album_relation WHERE song_id = 0 OR album_id = 0`, []);
|
// var affectedRows2 = await dbUtils.query(`DELETE FROM song_album_relation WHERE song_id = 0 OR album_id = 0`, []);
|
||||||
// console.log(`删除脏数据 affectedRows:`, affectedRows1.affectedRows, affectedRows2.affectedRows);
|
// console.log(`删除脏数据 affectedRows:`, affectedRows1.affectedRows, affectedRows2.affectedRows);
|
||||||
|
|
||||||
if (args.utils == "song")
|
switch (args.utils) {
|
||||||
await songInfoUtils.fetchAll({ args: args });
|
case 'song':
|
||||||
else if (args.utils == "album")
|
await songInfoUtils.fetchAll({ args: args });
|
||||||
await albumInfoUtils.fetchAll({ args: args });
|
break;
|
||||||
else if (args.utils == "artist")
|
case 'album':
|
||||||
await artistInfoUtils.fetchAll({ args: args });
|
await albumInfoUtils.fetchAll({ args: args });
|
||||||
else if (args.utils == "lyric")
|
break;
|
||||||
await lyricInfoUtils.fetchAll({ args: args });
|
case 'artist':
|
||||||
else if (args.utils == "comment")
|
await artistInfoUtils.fetchAll({ args: args });
|
||||||
await commentUtils.fetchAll({ args: args });
|
break;
|
||||||
else if (args.utils == "playlist")
|
case 'lyric':
|
||||||
await playlistUtils.fetchAll({ args: args });
|
await lyricInfoUtils.fetchAll({ args: args });
|
||||||
else {
|
break;
|
||||||
console.log("utils参数不匹配,退出");
|
case 'comment':
|
||||||
return;
|
await commentUtils.fetchAll({ args: args });
|
||||||
|
break;
|
||||||
|
case 'playlist':
|
||||||
|
await playlistUtils.fetchAll({ args: args });
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
console.log("utils参数不匹配,退出");
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
await sleepUtils.sleep(2000);
|
await sleepUtils.sleep(2000);
|
||||||
}
|
}
|
||||||
|
@@ -22,15 +22,21 @@ node index --utils lyric --min 0 --max 400000000
|
|||||||
node index --utils playlist #
|
node index --utils playlist #
|
||||||
|
|
||||||
|
|
||||||
后期:
|
思路:
|
||||||
|
通过一首歌,查出对应的artist和album,然后顺藤摸瓜查出网易云的其他song, album, artist, lyric, comment等
|
||||||
|
|
||||||
批量查库修改为一条SQL搞定
|
插入rel表的时候同时插入 wait_check_xx 表,然后后续检查这个表,如果不存在,那么就插入对应的 wait_fetch_xxx 表
|
||||||
|
之后查出 wait_fetch_xxx 表,进行数据拉取,形成闭环
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
后期:
|
||||||
|
|
||||||
歌单定时更新(rel表中添加一个del字段,先将歌单下面的全部置为删除状态,再插入的时候把已有歌曲的标记重新修改为正常状态)
|
歌单定时更新(rel表中添加一个del字段,先将歌单下面的全部置为删除状态,再插入的时候把已有歌曲的标记重新修改为正常状态)
|
||||||
|
|
||||||
评论的更新
|
评论的更新
|
||||||
|
|
||||||
爬取歌单playlist
|
爬取歌单playlist功能需要更新
|
||||||
|
|
||||||
被删除的aritst和album回头再通过其他表中的数据反查回来
|
被删除的aritst和album回头再通过其他表中的数据反查回来
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user