1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee

插入关联表时同事插入wait_check表;统一查询将要爬取的id代码到dataManager.js

This commit is contained in:
2022-10-25 19:36:05 +08:00
parent 4753fd55ae
commit 3660fefda4
9 changed files with 192 additions and 119 deletions

View File

@@ -1,6 +1,7 @@
const fs = require('fs');
const path = require('path');
const requestUtils = require('../../../utils/requestUtils');
const sleepUtils = require('../../../utils/sleepUtils');
const dataManager = require('../dataManager');
@@ -11,35 +12,7 @@ const { song_detail } = require('NeteaseCloudMusicApi');
// 从数据库中查出还缺少的歌曲,并进行爬取
async function fetchAll({ args = {} }) {
console.log("start fetching songs ...");
let whereClause = [
args.min ? `song_id > ${args.min}` : '1=1',
args.max ? `song_id <= ${args.max}` : '1=1',
].join(' AND ');
var sql1 = `
SELECT song_id FROM song_artist_relation WHERE ${whereClause} AND song_id NOT IN ( SELECT song_id FROM song )
${args.order ? `ORDER BY song_id ${args.order}` : ''}
${args.limit ? `LIMIT ${args.limit}` : ''}
`;
var sql2 = `
SELECT song_id FROM song_album_relation WHERE ${whereClause} AND song_id NOT IN ( SELECT song_id FROM song )
${args.order ? `ORDER BY song_id ${args.order}` : ''}
${args.limit ? `LIMIT ${args.limit}` : ''}
`;
// // 更新现有数据
// sql = `SELECT song_id FROM song WHERE data_version = 1`;
// 测试用
// sql = `SELECT song_id FROM song_artist_relation group by song_id limit 10`;
console.log(sql1);
var songIds1 = await dbUtils.query(sql1, []);
songIds1 = songIds1.map(item => item.song_id);
console.log(sql2);
var songIds2 = await dbUtils.query(sql2, []);
songIds2 = songIds2.map(item => item.song_id);
var songIds = songIds1.concat(songIds2);
songIds = Array.from(new Set(songIds)); // 去重
let songIds = await dataManager.song.getIdsToFetch(args);
// 0 - 100, 200 - 399, 400 - ..., ... - songIds.length-1
// 0 1 2 count-1
var step = 1000;
@@ -71,9 +44,14 @@ async function fetch({ songIdArray, debug = false }) {
}
// console.log(songResult.body.songs.map(item => JSON.stringify(item)));
let albumIds = [], artistIds = [];
let songAlbumRel = [], songArtistRel = [];
let songInfoList = songResult.body.songs.map(song => {
song.ar.forEach(item => songArtistRel.push([song.id, item.id]));
song.ar.forEach(item => {
artistIds.push(item.id);
songArtistRel.push([song.id, item.id])
});
albumIds.push(song.al.id || 0);
songAlbumRel.push([song.id, song.al.id || 0])
return {
title: song.name, // 歌曲标题
@@ -107,6 +85,8 @@ async function fetch({ songIdArray, debug = false }) {
if (songInfoList.length == 0) return;
console.log("插入数据库");
await dataManager.wait_check.insert("album", albumIds);
await dataManager.wait_check.insert("artist", artistIds);
await dataManager.song_album.insertCollection(songAlbumRel);
await dataManager.song_artist.insertCollection(songArtistRel);
await dataManager.song.insertCollection(songInfoList); // image 因为接口没有返回,所以不更新