通过命令行指定爬取参数,不用再修改代码了
This commit is contained in:
@@ -23,19 +23,33 @@ async function getFromDatabase({ albumId }) {
|
||||
|
||||
// 正常应该查不出记录才对
|
||||
/*
|
||||
SELECT * FROM album WHERE (full_description = '' or full_description is null) and description like '%专辑《%》,简介:%' and description not regexp '^.*?专辑《.*?》,简介:[:space:]*?。,更多.*$'
|
||||
SELECT * FROM album WHERE (full_description = '' or full_description is null) and description like '%专辑《%》,简介:%' and description not regexp '^.*?专辑《.*?》,简介:([:space:]*?|[ ]*?)。,更多.*$'
|
||||
*/
|
||||
|
||||
async function fetchAll({ isUpdate = false }) {
|
||||
console.log("start fetching albums ...")
|
||||
var albumIds = await dbUtils.query(isUpdate
|
||||
? `SELECT DISTINCT album_id FROM album WHERE (full_description = '' or full_description is null) and description like '%专辑《%》,简介:%' and description not regexp '^.*?专辑《.*?》,简介:[:space:]*?。,更多.*$'`
|
||||
: `SELECT DISTINCT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT album_id FROM album )`, []);
|
||||
async function fetchAll({ args = {}, isUpdate = false }) {
|
||||
console.log("start fetching albums ...");
|
||||
|
||||
if (isUpdate) {
|
||||
var sql = `SELECT DISTINCT album_id FROM album WHERE (full_description = '' or full_description is null) and description like '%专辑《%》,简介:%' and description not regexp '^.*?专辑《.*?》,简介:[:space:]*?。,更多.*$'`;
|
||||
} else {
|
||||
let whereClause = [
|
||||
args.min ? `album_id > ${args.min}` : '1=1',
|
||||
args.max ? `album_id <= ${args.max}` : '1=1',
|
||||
].join(' AND ');
|
||||
var sql = `
|
||||
SELECT DISTINCT album_id FROM song_album_relation WHERE ${whereClause} AND album_id NOT IN ( SELECT album_id FROM album )
|
||||
${args.order ? `ORDER BY album_id ${args.order}` : ''}
|
||||
${args.limit ? `LIMIT ${args.limit}` : ''}
|
||||
`;
|
||||
console.log(sql);
|
||||
}
|
||||
|
||||
var albumIds = await dbUtils.query(sql, []);
|
||||
albumIds = albumIds.map(item => item.album_id);
|
||||
for (let i = 0; i < albumIds.length; i++) {
|
||||
await global.checkIsExit();
|
||||
const albumId = albumIds[i];
|
||||
console.log(`${i + 1}/${albumIds.length} | album: ${albumId}`);
|
||||
console.log(`${i + 1}/${albumIds.length} | album: ${albumId} | ${args.min ?? "?"}-${args.max ?? "?"}`);
|
||||
try {
|
||||
await fetch({ albumId: albumId, update: isUpdate });
|
||||
} catch (err) {
|
||||
|
@@ -22,16 +22,25 @@ async function getFromDatabase({ artistId }) {
|
||||
}
|
||||
|
||||
// 从数据库中查出还缺少的歌手,并进行爬取
|
||||
async function fetchAll() {
|
||||
console.log("start fetching artists ...")
|
||||
var artistIds = await dbUtils.query(`
|
||||
SELECT DISTINCT artist_id FROM song_artist_relation WHERE artist_id NOT IN ( SELECT artist_id FROM artist )
|
||||
`, []);
|
||||
async function fetchAll({ args = {} }) {
|
||||
console.log("start fetching artists ...");
|
||||
let whereClause = [
|
||||
args.min ? `artist_id > ${args.min}` : '1=1',
|
||||
args.max ? `artist_id <= ${args.max}` : '1=1',
|
||||
].join(' AND ');
|
||||
var sql = `
|
||||
SELECT DISTINCT artist_id FROM song_artist_relation WHERE ${whereClause} AND artist_id NOT IN ( SELECT artist_id FROM artist )
|
||||
${args.order ? `ORDER BY artist_id ${args.order}` : ''}
|
||||
${args.limit ? `LIMIT ${args.limit}` : ''}
|
||||
`;
|
||||
console.log(sql);
|
||||
|
||||
var artistIds = await dbUtils.query(sql, []);
|
||||
artistIds = artistIds.map(item => item.artist_id);
|
||||
for (let i = 0; i < artistIds.length; i++) {
|
||||
await global.checkIsExit();
|
||||
const artistId = artistIds[i];
|
||||
console.log(`${i + 1}/${artistIds.length} | artist: ${artistId}`);
|
||||
console.log(`${i + 1}/${artistIds.length} | artist: ${artistId} | ${args.min ?? "?"}-${args.max ?? "?"}`);
|
||||
try {
|
||||
await fetch({ artistId: artistId });
|
||||
} catch (err) {
|
||||
|
@@ -11,28 +11,33 @@ const dbUtils = global.dbUtils;
|
||||
// https://github.com/Binaryify/NeteaseCloudMusicApi
|
||||
const { comment_music } = require('NeteaseCloudMusicApi');
|
||||
|
||||
async function fetchAll() {
|
||||
console.log("start fetching comment ...")
|
||||
async function fetchAll({ args = {} }) {
|
||||
console.log("start fetching comment ...");
|
||||
// 首先将需要爬取的song_id导入comment_progress表
|
||||
await dbUtils.query(`
|
||||
INSERT INTO comment_progress ( song_id )
|
||||
INSERT IGNORE INTO comment_progress ( song_id )
|
||||
SELECT DISTINCT song_id FROM song WHERE song_id NOT IN ( SELECT song_id FROM comment_progress )
|
||||
`, []);
|
||||
|
||||
let whereClause = [
|
||||
args.min ? `song_id > ${args.min}` : '1=1',
|
||||
args.max ? `song_id <= ${args.max}` : '1=1',
|
||||
].join(' AND ');
|
||||
var sql = `
|
||||
SELECT song_id FROM comment_progress WHERE ${whereClause} AND current_status != 2
|
||||
ORDER BY current_status DESC${args.order ? `, song_id ${args.order}` : ''}
|
||||
${args.limit ? `LIMIT ${args.limit}` : ''}
|
||||
`;
|
||||
console.log(sql);
|
||||
|
||||
// 首先查询有无正在爬取中的记录
|
||||
var songIds = await dbUtils.query(`
|
||||
-- 本机
|
||||
SELECT song_id FROM comment_progress WHERE current_status != 2 AND song_id <= 30000000 LIMIT 1000
|
||||
-- SELECT song_id FROM comment_progress WHERE current_status != 2 AND song_id < 30000000 ORDER BY current_status DESC
|
||||
-- 服务器
|
||||
-- SELECT song_id FROM comment_progress WHERE current_status != 2 AND song_id > 30000000 ORDER BY current_status DESC
|
||||
`, []);
|
||||
var songIds = await dbUtils.query(sql, []);
|
||||
songIds = songIds.map(item => item.song_id);
|
||||
|
||||
for (let i = 0; i < songIds.length; i++) {
|
||||
await global.checkIsExit();
|
||||
const songId = songIds[i];
|
||||
console.log(`${i + 1}/${songIds.length} | comment: ${songId}`);
|
||||
console.log(`${i + 1}/${songIds.length} | comment: ${songId} | ${args.min ?? "?"}-${args.max ?? "?"}`);
|
||||
try {
|
||||
await fetch({ songId: songId });
|
||||
} catch (err) {
|
||||
|
@@ -7,11 +7,20 @@ const sleepUtils = require('../../../utils/sleepUtils');
|
||||
const dbUtils = global.dbUtils;
|
||||
|
||||
// 从数据库中查出还缺少的歌词,并进行爬取
|
||||
async function fetchAll() {
|
||||
async function fetchAll({ args = {} }) {
|
||||
console.log("start fetching lyrics ...");
|
||||
var songIds = await dbUtils.query(`
|
||||
SELECT DISTINCT song_id FROM song WHERE song_id NOT IN ( SELECT song_id FROM lyric )
|
||||
`, []);
|
||||
let whereClause = [
|
||||
args.min ? `song_id > ${args.min}` : '1=1',
|
||||
args.max ? `song_id <= ${args.max}` : '1=1',
|
||||
].join(' AND ');
|
||||
var sql = `
|
||||
SELECT DISTINCT song_id FROM song WHERE ${whereClause} AND song_id NOT IN ( SELECT song_id FROM lyric )
|
||||
${args.order ? `ORDER BY song_id ${args.order}` : ''}
|
||||
${args.limit ? `LIMIT ${args.limit}` : ''}
|
||||
`;
|
||||
console.log(sql);
|
||||
|
||||
var songIds = await dbUtils.query(sql, []);
|
||||
songIds = songIds.map(song => song.song_id);
|
||||
for (let i = 0; i < songIds.length; i++) {
|
||||
await global.checkIsExit();
|
||||
|
@@ -24,18 +24,27 @@ async function getFromDatabase({ songId }) {
|
||||
}
|
||||
|
||||
// 从数据库中查出还缺少的歌曲,并进行爬取
|
||||
async function fetchAll() {
|
||||
async function fetchAll({ args = {} }) {
|
||||
console.log("start fetching songs ...");
|
||||
var songIds = await dbUtils.query(`
|
||||
SELECT DISTINCT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT song_id FROM song )
|
||||
let whereClause = [
|
||||
args.min ? `song_id > ${args.min}` : '1=1',
|
||||
args.max ? `song_id <= ${args.max}` : '1=1',
|
||||
].join(' AND ');
|
||||
var sql = `
|
||||
SELECT DISTINCT song_id FROM song_artist_relation WHERE ${whereClause} AND song_id NOT IN ( SELECT song_id FROM song )
|
||||
UNION
|
||||
SELECT DISTINCT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT song_id FROM song )
|
||||
`, []);
|
||||
SELECT DISTINCT song_id FROM song_album_relation WHERE ${whereClause} AND song_id NOT IN ( SELECT song_id FROM song )
|
||||
${args.order ? `ORDER BY song_id ${args.order}` : ''}
|
||||
${args.limit ? `LIMIT ${args.limit}` : ''}
|
||||
`;
|
||||
console.log(sql);
|
||||
|
||||
var songIds = await dbUtils.query(sql, []);
|
||||
songIds = songIds.map(item => item.song_id);
|
||||
for (let i = 0; i < songIds.length; i++) {
|
||||
await global.checkIsExit();
|
||||
const songId = songIds[i];
|
||||
console.log(`${i + 1}/${songIds.length} | song: ${songId}`);
|
||||
console.log(`${i + 1}/${songIds.length} | song: ${songId} | ${args.min ?? "?"}-${args.max ?? "?"}`);
|
||||
try {
|
||||
await fetch({ songId: songId });
|
||||
} catch (err) {
|
||||
|
Reference in New Issue
Block a user