插入关联表时同事插入wait_check表;统一查询将要爬取的id代码到dataManager.js
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const requestUtils = require('../../../utils/requestUtils');
|
||||
const sleepUtils = require('../../../utils/sleepUtils');
|
||||
const dataManager = require('../dataManager');
|
||||
|
||||
@@ -11,35 +12,7 @@ const { song_detail } = require('NeteaseCloudMusicApi');
|
||||
// 从数据库中查出还缺少的歌曲,并进行爬取
|
||||
async function fetchAll({ args = {} }) {
|
||||
console.log("start fetching songs ...");
|
||||
let whereClause = [
|
||||
args.min ? `song_id > ${args.min}` : '1=1',
|
||||
args.max ? `song_id <= ${args.max}` : '1=1',
|
||||
].join(' AND ');
|
||||
var sql1 = `
|
||||
SELECT song_id FROM song_artist_relation WHERE ${whereClause} AND song_id NOT IN ( SELECT song_id FROM song )
|
||||
${args.order ? `ORDER BY song_id ${args.order}` : ''}
|
||||
${args.limit ? `LIMIT ${args.limit}` : ''}
|
||||
`;
|
||||
var sql2 = `
|
||||
SELECT song_id FROM song_album_relation WHERE ${whereClause} AND song_id NOT IN ( SELECT song_id FROM song )
|
||||
${args.order ? `ORDER BY song_id ${args.order}` : ''}
|
||||
${args.limit ? `LIMIT ${args.limit}` : ''}
|
||||
`;
|
||||
// // 更新现有数据
|
||||
// sql = `SELECT song_id FROM song WHERE data_version = 1`;
|
||||
// 测试用
|
||||
// sql = `SELECT song_id FROM song_artist_relation group by song_id limit 10`;
|
||||
console.log(sql1);
|
||||
var songIds1 = await dbUtils.query(sql1, []);
|
||||
songIds1 = songIds1.map(item => item.song_id);
|
||||
|
||||
console.log(sql2);
|
||||
var songIds2 = await dbUtils.query(sql2, []);
|
||||
songIds2 = songIds2.map(item => item.song_id);
|
||||
|
||||
var songIds = songIds1.concat(songIds2);
|
||||
songIds = Array.from(new Set(songIds)); // 去重
|
||||
|
||||
let songIds = await dataManager.song.getIdsToFetch(args);
|
||||
// 0 - 100, 200 - 399, 400 - ..., ... - songIds.length-1
|
||||
// 0 1 2 count-1
|
||||
var step = 1000;
|
||||
@@ -71,9 +44,14 @@ async function fetch({ songIdArray, debug = false }) {
|
||||
}
|
||||
// console.log(songResult.body.songs.map(item => JSON.stringify(item)));
|
||||
|
||||
let albumIds = [], artistIds = [];
|
||||
let songAlbumRel = [], songArtistRel = [];
|
||||
let songInfoList = songResult.body.songs.map(song => {
|
||||
song.ar.forEach(item => songArtistRel.push([song.id, item.id]));
|
||||
song.ar.forEach(item => {
|
||||
artistIds.push(item.id);
|
||||
songArtistRel.push([song.id, item.id])
|
||||
});
|
||||
albumIds.push(song.al.id || 0);
|
||||
songAlbumRel.push([song.id, song.al.id || 0])
|
||||
return {
|
||||
title: song.name, // 歌曲标题
|
||||
@@ -107,6 +85,8 @@ async function fetch({ songIdArray, debug = false }) {
|
||||
if (songInfoList.length == 0) return;
|
||||
|
||||
console.log("插入数据库");
|
||||
await dataManager.wait_check.insert("album", albumIds);
|
||||
await dataManager.wait_check.insert("artist", artistIds);
|
||||
await dataManager.song_album.insertCollection(songAlbumRel);
|
||||
await dataManager.song_artist.insertCollection(songArtistRel);
|
||||
await dataManager.song.insertCollection(songInfoList); // image 因为接口没有返回,所以不更新
|
||||
|
Reference in New Issue
Block a user