diff --git a/hifini_music/index.js b/hifini_music/index.js index 50ea658..4efa4e1 100644 --- a/hifini_music/index.js +++ b/hifini_music/index.js @@ -75,6 +75,7 @@ async function startFetchDetail() { const threadId = idsToFetch[i]; console.log(`getDetail\t| ${i + 1}/${idsToFetch.length} | threadId: ${threadId}`); await getDetail(threadId); + // await sleepUtils.sleep(100); } } @@ -158,7 +159,7 @@ async function startFetchRealUrl() { const urlToFetch = urlsToFetch[i]; console.log(`getRealUrl\t| ${i + 1}/${urlsToFetch.length} | threadId: ${urlToFetch.threadId}`); await getRealUrl(urlToFetch); - await sleepUtils.sleep(1000); + // await sleepUtils.sleep(100); } } diff --git a/netease_music/sql/generator.js b/netease_music/sql/generator.js new file mode 100644 index 0000000..878ced2 --- /dev/null +++ b/netease_music/sql/generator.js @@ -0,0 +1,44 @@ +var table = [ + "song", + "album", + "artist", + "comment", + "lyric", + "user", + "category", + "playlist", + + "comment_progress", + "song_album_relation", + "song_artist_relation", + "song_playlist_relation", + + "wait_check_album", + "wait_check_artist", + "wait_check_comment", + "wait_check_lyric", + "wait_check_song", + + "wait_fetch_album", + "wait_fetch_artist", + "wait_fetch_lyric", + "wait_fetch_song", + + "analysis", + "log", + + "hifini_forum", + "hifini_tag", + "hifini_thread", + "hifini_thread_tag_relation", +]; + +let sqlList = []; + +// OPTIMIZE TABLE +table.forEach((tableName) => sqlList.push(`OPTIMIZE TABLE ${tableName};`)); + +// RENAME TABLE 移动数据库 +// table.forEach((tableName) => sqlList.push(`RENAME TABLE neteasemusic.${tableName} TO neteasemusic_develop.${tableName};`)); + +console.log(sqlList.join('\n')); \ No newline at end of file diff --git a/netease_music/sql/statistic.sql b/netease_music/sql/statistic.sql index a84ad8a..f04e964 100644 --- a/netease_music/sql/statistic.sql +++ b/netease_music/sql/statistic.sql @@ -19,6 +19,27 @@ INSERT INTO analysis (`key`, `value`) VALUES ('songArtistCount', (SELECT count(* -- 更新后初次全表扫描 +INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00'; +INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00'; +INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_playlist_relation WHERE create_time > '2022-10-28 00:00:00'; + +INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00'; +INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00'; +INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song_playlist_relation WHERE create_time > '2022-10-28 00:00:00'; +INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song WHERE create_time > '2022-10-28 00:00:00'; + +INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00'; +INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00'; +INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song_playlist_relation WHERE create_time > '2022-10-28 00:00:00'; +INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song WHERE create_time > '2022-10-28 00:00:00'; + +INSERT IGNORE INTO wait_check_artist (id) SELECT artist_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00'; + +INSERT IGNORE INTO wait_check_album (id) SELECT album_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00'; + + + +-- 全量更新 INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT song_id FROM song ); INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT song_id FROM song ); INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_playlist_relation WHERE song_id NOT IN ( SELECT song_id FROM song ); @@ -103,59 +124,6 @@ ORDER BY s DESC --- optimize table -optimize table analysis; -optimize table album; -optimize table artist; -optimize table category; -optimize table comment; -optimize table comment_progress; -optimize table log; -optimize table lyric; -optimize table playlist; -optimize table song; -optimize table song_album_relation; -optimize table song_artist_relation; -optimize table song_playlist_relation; -optimize table user; -optimize table wait_check_album; -optimize table wait_check_artist; -optimize table wait_check_lyric; -optimize table wait_check_song; -optimize table wait_fetch_album; -optimize table wait_fetch_artist; -optimize table wait_fetch_lyric; -optimize table wait_fetch_song; - - - --- 移动数据库 -RENAME TABLE neteasemusic.analysis TO neteasemusic_develop.analysis; -RENAME TABLE neteasemusic.album TO neteasemusic_develop.album; -RENAME TABLE neteasemusic.artist TO neteasemusic_develop.artist; -RENAME TABLE neteasemusic.category TO neteasemusic_develop.category; -RENAME TABLE neteasemusic.comment TO neteasemusic_develop.comment; -RENAME TABLE neteasemusic.comment_progress TO neteasemusic_develop.comment_progress; -RENAME TABLE neteasemusic.log TO neteasemusic_develop.log; -RENAME TABLE neteasemusic.lyric TO neteasemusic_develop.lyric; -RENAME TABLE neteasemusic.playlist TO neteasemusic_develop.playlist; -RENAME TABLE neteasemusic.song TO neteasemusic_develop.song; -RENAME TABLE neteasemusic.song_album_relation TO neteasemusic_develop.song_album_relation; -RENAME TABLE neteasemusic.song_artist_relation TO neteasemusic_develop.song_artist_relation; -RENAME TABLE neteasemusic.song_playlist_relation TO neteasemusic_develop.song_playlist_relation; -RENAME TABLE neteasemusic.user TO neteasemusic_develop.user; -RENAME TABLE neteasemusic.wait_check_album TO neteasemusic_develop.wait_check_album; -RENAME TABLE neteasemusic.wait_check_artist TO neteasemusic_develop.wait_check_artist; -RENAME TABLE neteasemusic.wait_check_lyric TO neteasemusic_develop.wait_check_lyric; -RENAME TABLE neteasemusic.wait_check_song TO neteasemusic_develop.wait_check_song; -RENAME TABLE neteasemusic.wait_fetch_album TO neteasemusic_develop.wait_fetch_album; -RENAME TABLE neteasemusic.wait_fetch_artist TO neteasemusic_develop.wait_fetch_artist; -RENAME TABLE neteasemusic.wait_fetch_lyric TO neteasemusic_develop.wait_fetch_lyric; -RENAME TABLE neteasemusic.wait_fetch_song TO neteasemusic_develop.wait_fetch_song; - - - - -- 查询单个数据库里面各个表所占磁盘空间大小包括其索引的大小 SELECT table_schema AS '数据库', @@ -170,28 +138,3 @@ WHERE table_schema = 'neteasemusic' ORDER BY table_rows DESC; - - - --- analysis --- album --- artist --- category --- comment --- comment_progress --- log --- lyric --- playlist --- song --- song_album_relation --- song_artist_relation --- song_playlist_relation --- user --- wait_check_album --- wait_check_artist --- wait_check_lyric --- wait_check_song --- wait_fetch_album --- wait_fetch_artist --- wait_fetch_lyric --- wait_fetch_song diff --git a/netease_music/src/assistantUtils.js b/netease_music/src/assistantUtils.js index 68c303f..2899471 100644 --- a/netease_music/src/assistantUtils.js +++ b/netease_music/src/assistantUtils.js @@ -34,13 +34,14 @@ async function migrateIdsFromCheckToFetch(tableName, fieldName, insertSql = null // 插入待爬取列表 if (finalIds.length > 0) { - await dbUtils.query(insertSql ? insertSql : `INSERT IGNORE INTO wait_fetch_${tableName} (id) VALUES ?`, [finalIds.map(id => [id])]); + var result = await dbUtils.query(insertSql ? insertSql : `INSERT IGNORE INTO wait_fetch_${tableName} (id) VALUES ?`, [finalIds.map(id => [id])]); + // console.log(result); } // 从待检查表中删除 if (ids.length > 0) await dbUtils.query(`DELETE FROM wait_check_${tableName} WHERE id IN ?`, [[ids]]); - console.log(`table: ${tableName} | ${ids[0]} - ${ids.slice(-1)[0]}`); + console.log(`table: ${tableName} | ${ids[0]} - ${ids.slice(-1)[0]} (${result?.affectedRows}/${finalIds.length}/${ids.length})`); } } diff --git a/netease_music/src/getInfo/albumInfoUtils.js b/netease_music/src/getInfo/albumInfoUtils.js index 5016f57..32b1d2e 100644 --- a/netease_music/src/getInfo/albumInfoUtils.js +++ b/netease_music/src/getInfo/albumInfoUtils.js @@ -59,7 +59,7 @@ async function fetch({ albumId, debug = false, update = false }) { let url = `https://music.163.com/album?id=${albumId}`; try { // var html = fs.readFileSync(path.join(__dirname, "../../temp", `album-${albumId}.html`), 'utf8'); - var html = await requestUtils.getApiResult(url); + var html = await requestUtils.getApiResult(url, { timeout: 3000 }); // fs.writeFileSync(path.join(__dirname, "../../temp", `album-${albumId}.html`), html); } catch (errors) { console.error(errors); diff --git a/netease_music/src/getInfo/artistInfoUtils.js b/netease_music/src/getInfo/artistInfoUtils.js index a4c0da0..feee740 100644 --- a/netease_music/src/getInfo/artistInfoUtils.js +++ b/netease_music/src/getInfo/artistInfoUtils.js @@ -52,7 +52,7 @@ async function fetch({ artistId, debug = false }) { let url = `https://music.163.com/artist?id=${artistId}`; try { // var html = fs.readFileSync(path.join(__dirname, "../../temp", `artist-${artistId}.html`), 'utf8'); - var html = await requestUtils.getApiResult(url); + var html = await requestUtils.getApiResult(url, { timeout: 3000 }); // fs.writeFileSync(path.join(__dirname, "../../temp", `artist-${artistId}.html`), html); } catch (errors) { console.error(errors); diff --git a/netease_music/src/getInfo/songInfoUtils.js b/netease_music/src/getInfo/songInfoUtils.js index db373f3..d671de9 100644 --- a/netease_music/src/getInfo/songInfoUtils.js +++ b/netease_music/src/getInfo/songInfoUtils.js @@ -21,7 +21,7 @@ async function fetchAll({ args = {} }) { for (let i = 0; i < count; i++) { await global.checkIsExit(); var subArray = songIds.slice(i * step, (i + 1) * step); - console.log(`${i + 1}/${count} | song: ${subArray[0]}-${subArray.slice(-1)[0]} | ${args.min || "?"}-${args.max || "?"}`); + console.log(`${i + 1}/${count} | song: ${subArray[0]}-${subArray.slice(-1)[0]} (${subArray.length}) | ${args.min || "?"}-${args.max || "?"}`); try { await fetch({ songIdArray: subArray }); } catch (err) { diff --git a/netease_music/src/index.js b/netease_music/src/index.js index e0b9c91..10592f3 100644 --- a/netease_music/src/index.js +++ b/netease_music/src/index.js @@ -59,31 +59,37 @@ async function main(args) { switch (args.utils) { case 'song': await songInfoUtils.fetchAll({ args: args }); + await sleepUtils.sleep(60 * 1000); break; case 'album': await albumInfoUtils.fetchAll({ args: args }); + await sleepUtils.sleep(30 * 1000); break; case 'artist': await artistInfoUtils.fetchAll({ args: args }); + await sleepUtils.sleep(30 * 1000); break; case 'lyric': await lyricInfoUtils.fetchAll({ args: args }); + await sleepUtils.sleep(30 * 1000); break; case 'comment': await commentUtils.fetchAll({ args: args }); + await sleepUtils.sleep(30 * 1000); break; case 'playlist': await playlistUtils.fetchAll({ args: args }); + process.exit(0); break; case 'assistant': await assistantUtils.updateWaitTable(); + await sleepUtils.sleep(5000); break; default: console.log("utils参数不匹配,退出"); return; } - await sleepUtils.sleep(2000); } }