1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee
tools/hifini_music/index.js

180 lines
5.9 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

const fs = require('fs');
const { getApiResult } = require('../utils/requestUtils');
const dbUtils = require("../utils/dbPoolUtils");
const sleepUtils = require("../utils/sleepUtils");
// 数据库连接池
dbUtils.create({
database: "hifinimusic", // 指定数据库
connectionLimit: 10, // 设置数据库连接池数量
});
global.dbUtils = dbUtils;
const dataManager = require('./src/dataManager');
const requestUtils = require('../utils/requestUtils');
async function main() {
var args = require('minimist')(process.argv.slice(2));
global.args = {
"order": args.order,
"limit": args.limit,
}
// async function timeout1() {
// await getList();
// setTimeout(() => console.log("getList已完成"), 2000);
// }
// timeout1();
async function timeout2() {
await startFetchDetail();
setTimeout(timeout2, 10 * 1000);
}
timeout2();
async function timeout3() {
await startFetchRealUrl();
setTimeout(timeout3, 10 * 1000);
}
timeout3();
}
// 爬取列表页,获得歌曲详情页
async function getList() {
let forumId = 1; // 分类id
let beginPage = 1; // 起始页
let endPage = 23; // 结束页
for (let page = beginPage; page <= endPage; page++) {
let url = `https://hifini.com/forum-${forumId}-${page}.htm?orderby=tid`; // 按照发帖时间排序
console.log(`getList \t| ${beginPage}/${page}/${endPage} | forumId: ${forumId} | ${url}`);
// let html = fs.readFileSync("./1.html", "utf8");
let html = await getApiResult(url);
// fs.writeFileSync("./1.html", html);
var matcher = html.matchAll(/<a href="thread-(\d{1,15}).htm">(.*?)<\/a>/g);
var m = matcher.next();
var threadList = [];
while (!m.done) {
// if (!/^.*?\[[-\/\.A-Za-z0-9]+?\]$/.exec(m.value[2])) {
// console.log(`跳过 ${m.value[2]}`);
// } else {
threadList.push({
forum_id: forumId,
thread_id: Number(m.value[1]),
title: m.value[2]
});
// }
m = matcher.next();
}
await dataManager.thread.insertCollection(threadList);
await sleepUtils.sleep(1000);
}
}
async function startFetchDetail() {
let idsToFetch = await dataManager.thread.getIdsToFetch();
idsToFetch = idsToFetch.map(item => item.thread_id);
// console.log(idsToFetch);
for (let i = 0; i < idsToFetch.length; i++) {
const threadId = idsToFetch[i];
console.log(`getDetail\t| ${i + 1}/${idsToFetch.length} | threadId: ${threadId}`);
await getDetail(threadId);
// await sleepUtils.sleep(100);
}
}
async function getDetail(threadId) {
let url = `https://hifini.com/thread-${threadId}.htm`;
let html;
try {
// html = fs.readFileSync("./1.html", "utf8");
html = await getApiResult(url, { timeout: 3000 });
// fs.writeFileSync("./1.html", html);
} catch (e) {
console.error("请求失败,可能是请求超时", e);
return;
}
// 解析到音乐信息
var matcher = /var ap4 = new APlayer\(([\S\s]*?)\);/.exec(html);
if (!matcher) {
await dataManager.thread.update(threadId, 0, { music_title: "未解析到音乐" });
console.log("未解析到音乐,跳过");
return;
}
try {
let arrStr = matcher[1];
// console.log(arrStr);
eval(`let document = { getElementById: () => {} }; var arr = ${arrStr};`);
var musicArr = arr.music;
// console.log(musicArr);
} catch (e) {
console.error("解析失败", e);
return;
}
var matcher = html.matchAll(/<a href='tag-(\d{1,15}).htm'><i class="icon-tag"><\/i>(.*?)<\/a>/g);
var m = matcher.next();
var tagList = [];
while (!m.done) {
tagList.push({
tag_id: Number(m.value[1]),
tag_name: m.value[2]
});
m = matcher.next();
}
await dataManager.tag.insertCollection(tagList);
await dataManager.thread_tag.insertCollection(tagList.map(tag => {
return {
thread_id: threadId,
tag_id: tag.tag_id
};
}));
if (musicArr.length > 1) {
console.log("典型thread_id:", threadId);
await dataManager.thread.insertCollection(musicArr.map((music, i) => {
return {
thread_id: threadId,
music_index: i
}
}));
}
for (let i = 0; i < musicArr.length; i++) {
const music = musicArr[i];
await dataManager.thread.update(threadId, i, {
music_title: music.title,
music_author: music.author || "",
music_url: music.url,
music_pic: music.pic || ""
});
}
// console.log("done");
}
async function startFetchRealUrl() {
let urlsToFetch = await dataManager.thread.getIdsToFetchRealUrl();
// console.log(urlsToFetch.map(item => item.thread_id));
for (let i = 0; i < urlsToFetch.length; i++) {
const urlToFetch = urlsToFetch[i];
console.log(`getRealUrl\t| ${i + 1}/${urlsToFetch.length} | threadId: ${urlToFetch.thread_id} | music_index: ${urlToFetch.music_index}`);
await getRealUrl(urlToFetch.thread_id, urlToFetch.music_index, urlToFetch.music_url);
// await sleepUtils.sleep(100);
}
}
async function getRealUrl(threadId, musicIndex, fakeUrl) {
let url = "原地址已失效";
try {
url = await requestUtils.getRedirectUrl(`https://hifini.com/${fakeUrl}`);
} catch (e) {
console.log("重定向地址获取失败");
}
result = await dataManager.thread.update(threadId, musicIndex, { music_real_url: url });
}
main();