删除 songInfoUtils.old.js
This commit is contained in:
		@@ -92,6 +92,89 @@ async function fetch({ songIdArray, debug = false }) {
 | 
			
		||||
    await dataManager.song.insertCollection(songInfoList); // image 因为接口没有返回,所以不更新
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// 获取音乐详情
 | 
			
		||||
async function fetch_old({ songId, debug = false }) {
 | 
			
		||||
    let result = await dbUtils.query('SELECT count(*) as count FROM song WHERE song_id = ?', [songId]);
 | 
			
		||||
    if (result[0].count > 0 && !debug) {
 | 
			
		||||
        console.log(`数据库中已有数据,跳过 songId: ${songId}`);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let url = `https://music.163.com/song?id=${songId}`;
 | 
			
		||||
    try {
 | 
			
		||||
        // var html = fs.readFileSync(path.join(__dirname, "../../temp", `song-${songId}.html`), 'utf8');
 | 
			
		||||
        var html = await requestUtils.getApiResult(url);
 | 
			
		||||
        // fs.writeFileSync(path.join(__dirname, "../../temp", `song-${songId}.html`), html);
 | 
			
		||||
    } catch (errors) {
 | 
			
		||||
        console.error(errors);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (html.includes(`<p class="note s-fc3">很抱歉,你要查找的网页找不到</p>`)) {
 | 
			
		||||
        let deleteResult1 = await dbUtils.query('DELETE FROM song_album_relation WHERE song_id = ?', [songId]);
 | 
			
		||||
        let deleteResult2 = await dbUtils.query('DELETE FROM song_artist_relation WHERE song_id = ?', [songId]);
 | 
			
		||||
        console.log(`song: ${songId} 不存在,从song_album_relation, song_artist_relation表中删除. affectedRows: ${deleteResult1.affectedRows}, ${deleteResult2.affectedRows}`);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // 正则匹配
 | 
			
		||||
    let regExResult = /\<script type\=\"application\/ld\+json\"\>([\S\s]*?)\<\/script\>/.exec(html);
 | 
			
		||||
    let songInfoJSONString = regExResult[1];
 | 
			
		||||
    let songInfoDict = JSON.parse(songInfoJSONString);
 | 
			
		||||
    // console.log(songInfoDict);
 | 
			
		||||
 | 
			
		||||
    // TODO 考虑歌曲别名 例如:https://music.163.com/#/song?id=26830207
 | 
			
		||||
 | 
			
		||||
    let title = /<meta property="og:title" content="(.*?)" \/>/.exec(html)[1];
 | 
			
		||||
    let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
 | 
			
		||||
    let artist = /<meta property="og:music:artist" content="(.*?)" \/>/.exec(html)[1];
 | 
			
		||||
    let duration = /<meta property="music:duration" content="(.*?)"\/>/.exec(html)[1];
 | 
			
		||||
    try {
 | 
			
		||||
        var album = /<meta property="og:music:album" content="(.*?)"\/>/.exec(html)[1];
 | 
			
		||||
        var albumId = /<meta property="music:album" content="https:\/\/music\.163\.com\/album\?id=(.*?)"\/>/.exec(html)[1];
 | 
			
		||||
    } catch (err) {
 | 
			
		||||
        // 歌曲不在专辑中
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const reg = /<meta property="music:musician" content="https:\/\/music\.163\.com\/artist\?id=(.*?)"\/>/g;
 | 
			
		||||
    let artistIds = [];
 | 
			
		||||
    let matched = null;
 | 
			
		||||
    while ((matched = reg.exec(html)) !== null) {
 | 
			
		||||
        artistIds.push(matched[1]);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let songInfo = {
 | 
			
		||||
        songId: songId,
 | 
			
		||||
        title: title,
 | 
			
		||||
        image: image,
 | 
			
		||||
        pubDate: songInfoDict.pubDate,
 | 
			
		||||
        artist: artist,
 | 
			
		||||
        artistIds: artistIds,
 | 
			
		||||
        album: album || null,
 | 
			
		||||
        albumId: albumId || null,
 | 
			
		||||
        duration: duration,
 | 
			
		||||
    };
 | 
			
		||||
    // console.log("songInfo", songInfo);
 | 
			
		||||
    if (albumId != null)
 | 
			
		||||
        dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', {
 | 
			
		||||
            song_id: songInfo.songId,
 | 
			
		||||
            album_id: songInfo.albumId,
 | 
			
		||||
        });
 | 
			
		||||
    artistIds.forEach(function (artistId) {
 | 
			
		||||
        dbUtils.query('INSERT IGNORE INTO song_artist_relation SET ?', {
 | 
			
		||||
            song_id: songInfo.songId,
 | 
			
		||||
            artist_id: artistId,
 | 
			
		||||
        });
 | 
			
		||||
    });
 | 
			
		||||
    dbUtils.query('INSERT IGNORE INTO song SET ?', {
 | 
			
		||||
        song_id: songInfo.songId,
 | 
			
		||||
        title: songInfo.title,
 | 
			
		||||
        image: songInfo.image,
 | 
			
		||||
        pub_date: songInfo.pubDate,
 | 
			
		||||
    });
 | 
			
		||||
    return songInfo;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
module.exports = {
 | 
			
		||||
    fetch: fetch,
 | 
			
		||||
    fetchAll: fetchAll,
 | 
			
		||||
 
 | 
			
		||||
@@ -1,144 +0,0 @@
 | 
			
		||||
const fs = require('fs');
 | 
			
		||||
const path = require('path');
 | 
			
		||||
 | 
			
		||||
const requestUtils = require('../../../utils/requestUtils');
 | 
			
		||||
const sleepUtils = require('../../../utils/sleepUtils');
 | 
			
		||||
 | 
			
		||||
const dbUtils = global.dbUtils;
 | 
			
		||||
 | 
			
		||||
// 从数据库中查询
 | 
			
		||||
async function getFromDatabase({ songId }) {
 | 
			
		||||
    // 查询出专辑
 | 
			
		||||
    let infoResultSet = await dbUtils.query('SELECT * FROM song WHERE song_id = ?', [songId]);
 | 
			
		||||
    if (infoResultSet.length == 0) return {};
 | 
			
		||||
 | 
			
		||||
    // 查出专辑与歌曲对应关系
 | 
			
		||||
    let albumRelationResultSet = await dbUtils.query('SELECT * FROM song_album_relation WHERE song_id = ?', [songId]);
 | 
			
		||||
    let artistRelationResultSet = await dbUtils.query('SELECT * FROM song_artist_relation WHERE song_id = ?', [songId]);
 | 
			
		||||
 | 
			
		||||
    // 拼装
 | 
			
		||||
    let songInfo = JSON.parse(JSON.stringify(infoResultSet[0]));
 | 
			
		||||
    songInfo.albumIds = albumRelationResultSet.map(album => album.album_id);
 | 
			
		||||
    songInfo.artistIds = artistRelationResultSet.map(artist => artist.artist_id);
 | 
			
		||||
    return songInfo;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// 从数据库中查出还缺少的歌曲,并进行爬取
 | 
			
		||||
async function fetchAll({ args = {} }) {
 | 
			
		||||
    console.log("start fetching songs ...");
 | 
			
		||||
    let whereClause = [
 | 
			
		||||
        args.min ? `song_id >  ${args.min}` : '1=1',
 | 
			
		||||
        args.max ? `song_id <= ${args.max}` : '1=1',
 | 
			
		||||
    ].join(' AND ');
 | 
			
		||||
    var sql = `
 | 
			
		||||
        SELECT DISTINCT song_id FROM song_artist_relation WHERE ${whereClause} AND song_id NOT IN ( SELECT song_id FROM song )
 | 
			
		||||
        UNION
 | 
			
		||||
        SELECT DISTINCT song_id FROM song_album_relation  WHERE ${whereClause} AND song_id NOT IN ( SELECT song_id FROM song )
 | 
			
		||||
        ${args.order ? `ORDER BY song_id ${args.order}` : ''}
 | 
			
		||||
        ${args.limit ? `LIMIT ${args.limit}` : ''}
 | 
			
		||||
    `;
 | 
			
		||||
    console.log(sql);
 | 
			
		||||
 | 
			
		||||
    var songIds = await dbUtils.query(sql, []);
 | 
			
		||||
    songIds = songIds.map(item => item.song_id);
 | 
			
		||||
    for (let i = 0; i < songIds.length; i++) {
 | 
			
		||||
        await global.checkIsExit();
 | 
			
		||||
        const songId = songIds[i];
 | 
			
		||||
        console.log(`${i + 1}/${songIds.length} | song: ${songId} | ${args.min || "?"}-${args.max || "?"}`);
 | 
			
		||||
        try {
 | 
			
		||||
            await fetch({ songId: songId });
 | 
			
		||||
        } catch (err) {
 | 
			
		||||
            console.error(err);
 | 
			
		||||
        }
 | 
			
		||||
        await sleepUtils.sleep(global.sleepTime);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// 获取音乐详情
 | 
			
		||||
async function fetch({ songId, debug = false }) {
 | 
			
		||||
    let result = await dbUtils.query('SELECT count(*) as count FROM song WHERE song_id = ?', [songId]);
 | 
			
		||||
    if (result[0].count > 0 && !debug) {
 | 
			
		||||
        console.log(`数据库中已有数据,跳过 songId: ${songId}`);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let url = `https://music.163.com/song?id=${songId}`;
 | 
			
		||||
    try {
 | 
			
		||||
        // var html = fs.readFileSync(path.join(__dirname, "../../temp", `song-${songId}.html`), 'utf8');
 | 
			
		||||
        var html = await requestUtils.getApiResult(url);
 | 
			
		||||
        // fs.writeFileSync(path.join(__dirname, "../../temp", `song-${songId}.html`), html);
 | 
			
		||||
    } catch (errors) {
 | 
			
		||||
        console.error(errors);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (html.includes(`<p class="note s-fc3">很抱歉,你要查找的网页找不到</p>`)) {
 | 
			
		||||
        let deleteResult1 = await dbUtils.query('DELETE FROM song_album_relation WHERE song_id = ?', [songId]);
 | 
			
		||||
        let deleteResult2 = await dbUtils.query('DELETE FROM song_artist_relation WHERE song_id = ?', [songId]);
 | 
			
		||||
        console.log(`song: ${songId} 不存在,从song_album_relation, song_artist_relation表中删除. affectedRows: ${deleteResult1.affectedRows}, ${deleteResult2.affectedRows}`);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // 正则匹配
 | 
			
		||||
    let regExResult = /\<script type\=\"application\/ld\+json\"\>([\S\s]*?)\<\/script\>/.exec(html);
 | 
			
		||||
    let songInfoJSONString = regExResult[1];
 | 
			
		||||
    let songInfoDict = JSON.parse(songInfoJSONString);
 | 
			
		||||
    // console.log(songInfoDict);
 | 
			
		||||
 | 
			
		||||
    // TODO 考虑歌曲别名 例如:https://music.163.com/#/song?id=26830207
 | 
			
		||||
 | 
			
		||||
    let title = /<meta property="og:title" content="(.*?)" \/>/.exec(html)[1];
 | 
			
		||||
    let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
 | 
			
		||||
    let artist = /<meta property="og:music:artist" content="(.*?)" \/>/.exec(html)[1];
 | 
			
		||||
    let duration = /<meta property="music:duration" content="(.*?)"\/>/.exec(html)[1];
 | 
			
		||||
    try {
 | 
			
		||||
        var album = /<meta property="og:music:album" content="(.*?)"\/>/.exec(html)[1];
 | 
			
		||||
        var albumId = /<meta property="music:album" content="https:\/\/music\.163\.com\/album\?id=(.*?)"\/>/.exec(html)[1];
 | 
			
		||||
    } catch (err) {
 | 
			
		||||
        // 歌曲不在专辑中
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const reg = /<meta property="music:musician" content="https:\/\/music\.163\.com\/artist\?id=(.*?)"\/>/g;
 | 
			
		||||
    let artistIds = [];
 | 
			
		||||
    let matched = null;
 | 
			
		||||
    while ((matched = reg.exec(html)) !== null) {
 | 
			
		||||
        artistIds.push(matched[1]);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let songInfo = {
 | 
			
		||||
        songId: songId,
 | 
			
		||||
        title: title,
 | 
			
		||||
        image: image,
 | 
			
		||||
        pubDate: songInfoDict.pubDate,
 | 
			
		||||
        artist: artist,
 | 
			
		||||
        artistIds: artistIds,
 | 
			
		||||
        album: album || null,
 | 
			
		||||
        albumId: albumId || null,
 | 
			
		||||
        duration: duration,
 | 
			
		||||
    };
 | 
			
		||||
    // console.log("songInfo", songInfo);
 | 
			
		||||
    if (albumId != null)
 | 
			
		||||
        dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', {
 | 
			
		||||
            song_id: songInfo.songId,
 | 
			
		||||
            album_id: songInfo.albumId,
 | 
			
		||||
        });
 | 
			
		||||
    artistIds.forEach(function (artistId) {
 | 
			
		||||
        dbUtils.query('INSERT IGNORE INTO song_artist_relation SET ?', {
 | 
			
		||||
            song_id: songInfo.songId,
 | 
			
		||||
            artist_id: artistId,
 | 
			
		||||
        });
 | 
			
		||||
    });
 | 
			
		||||
    dbUtils.query('INSERT IGNORE INTO song SET ?', {
 | 
			
		||||
        song_id: songInfo.songId,
 | 
			
		||||
        title: songInfo.title,
 | 
			
		||||
        image: songInfo.image,
 | 
			
		||||
        pub_date: songInfo.pubDate,
 | 
			
		||||
    });
 | 
			
		||||
    return songInfo;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
module.exports = {
 | 
			
		||||
    getFromDatabase: getFromDatabase,
 | 
			
		||||
    fetch: fetch,
 | 
			
		||||
    fetchAll: fetchAll,
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user