180 lines
5.9 KiB
JavaScript
180 lines
5.9 KiB
JavaScript
const fs = require('fs');
|
||
const { getApiResult } = require('../utils/requestUtils');
|
||
const dbUtils = require("../utils/dbPoolUtils");
|
||
const sleepUtils = require("../utils/sleepUtils");
|
||
|
||
// 数据库连接池
|
||
dbUtils.create({
|
||
database: "hifinimusic", // 指定数据库
|
||
connectionLimit: 10, // 设置数据库连接池数量
|
||
});
|
||
global.dbUtils = dbUtils;
|
||
|
||
const dataManager = require('./src/dataManager');
|
||
const requestUtils = require('../utils/requestUtils');
|
||
|
||
async function main() {
|
||
var args = require('minimist')(process.argv.slice(2));
|
||
global.args = {
|
||
"order": args.order,
|
||
"limit": args.limit,
|
||
}
|
||
// async function timeout1() {
|
||
// await getList();
|
||
// setTimeout(() => console.log("getList已完成"), 2000);
|
||
// }
|
||
// timeout1();
|
||
|
||
async function timeout2() {
|
||
await startFetchDetail();
|
||
setTimeout(timeout2, 10 * 1000);
|
||
}
|
||
timeout2();
|
||
|
||
async function timeout3() {
|
||
await startFetchRealUrl();
|
||
setTimeout(timeout3, 10 * 1000);
|
||
}
|
||
timeout3();
|
||
}
|
||
|
||
// 爬取列表页,获得歌曲详情页
|
||
async function getList() {
|
||
|
||
let forumId = 1; // 分类id
|
||
let beginPage = 1; // 起始页
|
||
let endPage = 23; // 结束页
|
||
for (let page = beginPage; page <= endPage; page++) {
|
||
let url = `https://hifini.com/forum-${forumId}-${page}.htm?orderby=tid`; // 按照发帖时间排序
|
||
console.log(`getList \t| ${beginPage}/${page}/${endPage} | forumId: ${forumId} | ${url}`);
|
||
|
||
// let html = fs.readFileSync("./1.html", "utf8");
|
||
let html = await getApiResult(url);
|
||
// fs.writeFileSync("./1.html", html);
|
||
|
||
var matcher = html.matchAll(/<a href="thread-(\d{1,15}).htm">(.*?)<\/a>/g);
|
||
var m = matcher.next();
|
||
var threadList = [];
|
||
while (!m.done) {
|
||
// if (!/^.*?\[[-\/\.A-Za-z0-9]+?\]$/.exec(m.value[2])) {
|
||
// console.log(`跳过 ${m.value[2]}`);
|
||
// } else {
|
||
threadList.push({
|
||
forum_id: forumId,
|
||
thread_id: Number(m.value[1]),
|
||
title: m.value[2]
|
||
});
|
||
// }
|
||
m = matcher.next();
|
||
}
|
||
await dataManager.thread.insertCollection(threadList);
|
||
await sleepUtils.sleep(1000);
|
||
}
|
||
}
|
||
|
||
async function startFetchDetail() {
|
||
let idsToFetch = await dataManager.thread.getIdsToFetch();
|
||
idsToFetch = idsToFetch.map(item => item.thread_id);
|
||
// console.log(idsToFetch);
|
||
for (let i = 0; i < idsToFetch.length; i++) {
|
||
const threadId = idsToFetch[i];
|
||
console.log(`getDetail\t| ${i + 1}/${idsToFetch.length} | threadId: ${threadId}`);
|
||
await getDetail(threadId);
|
||
// await sleepUtils.sleep(100);
|
||
}
|
||
}
|
||
|
||
async function getDetail(threadId) {
|
||
|
||
let url = `https://hifini.com/thread-${threadId}.htm`;
|
||
let html;
|
||
try {
|
||
// html = fs.readFileSync("./1.html", "utf8");
|
||
html = await getApiResult(url, { timeout: 3000 });
|
||
// fs.writeFileSync("./1.html", html);
|
||
} catch (e) {
|
||
console.error("请求失败,可能是请求超时", e);
|
||
return;
|
||
}
|
||
|
||
// 解析到音乐信息
|
||
var matcher = /var ap4 = new APlayer\(([\S\s]*?)\);/.exec(html);
|
||
if (!matcher) {
|
||
await dataManager.thread.update(threadId, 0, { music_title: "未解析到音乐" });
|
||
console.log("未解析到音乐,跳过");
|
||
return;
|
||
}
|
||
try {
|
||
let arrStr = matcher[1];
|
||
// console.log(arrStr);
|
||
eval(`let document = { getElementById: () => {} }; var arr = ${arrStr};`);
|
||
var musicArr = arr.music;
|
||
// console.log(musicArr);
|
||
} catch (e) {
|
||
console.error("解析失败", e);
|
||
return;
|
||
}
|
||
|
||
var matcher = html.matchAll(/<a href='tag-(\d{1,15}).htm'><i class="icon-tag"><\/i>(.*?)<\/a>/g);
|
||
var m = matcher.next();
|
||
var tagList = [];
|
||
while (!m.done) {
|
||
tagList.push({
|
||
tag_id: Number(m.value[1]),
|
||
tag_name: m.value[2]
|
||
});
|
||
m = matcher.next();
|
||
}
|
||
|
||
await dataManager.tag.insertCollection(tagList);
|
||
|
||
await dataManager.thread_tag.insertCollection(tagList.map(tag => {
|
||
return {
|
||
thread_id: threadId,
|
||
tag_id: tag.tag_id
|
||
};
|
||
}));
|
||
|
||
if (musicArr.length > 1) {
|
||
console.log("典型:thread_id:", threadId);
|
||
await dataManager.thread.insertCollection(musicArr.map((music, i) => {
|
||
return {
|
||
thread_id: threadId,
|
||
music_index: i
|
||
}
|
||
}));
|
||
}
|
||
for (let i = 0; i < musicArr.length; i++) {
|
||
const music = musicArr[i];
|
||
await dataManager.thread.update(threadId, i, {
|
||
music_title: music.title,
|
||
music_author: music.author || "",
|
||
music_url: music.url,
|
||
music_pic: music.pic || ""
|
||
});
|
||
}
|
||
// console.log("done");
|
||
}
|
||
|
||
async function startFetchRealUrl() {
|
||
let urlsToFetch = await dataManager.thread.getIdsToFetchRealUrl();
|
||
// console.log(urlsToFetch.map(item => item.thread_id));
|
||
for (let i = 0; i < urlsToFetch.length; i++) {
|
||
const urlToFetch = urlsToFetch[i];
|
||
console.log(`getRealUrl\t| ${i + 1}/${urlsToFetch.length} | threadId: ${urlToFetch.thread_id} | music_index: ${urlToFetch.music_index}`);
|
||
await getRealUrl(urlToFetch.thread_id, urlToFetch.music_index, urlToFetch.music_url);
|
||
// await sleepUtils.sleep(100);
|
||
}
|
||
}
|
||
|
||
async function getRealUrl(threadId, musicIndex, fakeUrl) {
|
||
let url = "原地址已失效";
|
||
try {
|
||
url = await requestUtils.getRedirectUrl(`https://hifini.com/${fakeUrl}`);
|
||
} catch (e) {
|
||
console.log("重定向地址获取失败");
|
||
}
|
||
result = await dataManager.thread.update(threadId, musicIndex, { music_real_url: url });
|
||
}
|
||
|
||
main(); |