添加SQL建表语句;考虑音乐页面多收首歌的情况;Bugfix
This commit is contained in:
parent
6ce6b0cd46
commit
ddde1b28f9
@ -14,11 +14,11 @@ const dataManager = require('./src/dataManager');
|
|||||||
const requestUtils = require('../utils/requestUtils');
|
const requestUtils = require('../utils/requestUtils');
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
async function timeout1() {
|
// async function timeout1() {
|
||||||
await getList();
|
// await getList();
|
||||||
setTimeout(timeout1, 2000);
|
// setTimeout(() => console.log("getList已完成"), 2000);
|
||||||
}
|
// }
|
||||||
timeout1();
|
// timeout1();
|
||||||
|
|
||||||
async function timeout2() {
|
async function timeout2() {
|
||||||
await startFetchDetail();
|
await startFetchDetail();
|
||||||
@ -36,9 +36,9 @@ async function main() {
|
|||||||
// 爬取列表页,获得歌曲详情页
|
// 爬取列表页,获得歌曲详情页
|
||||||
async function getList() {
|
async function getList() {
|
||||||
|
|
||||||
let forumId = 12; // 分类id
|
let forumId = 1; // 分类id
|
||||||
let beginPage = 125; // 起始页
|
let beginPage = 1; // 起始页
|
||||||
let endPage = 165; // 结束页
|
let endPage = 23; // 结束页
|
||||||
for (let page = beginPage; page <= endPage; page++) {
|
for (let page = beginPage; page <= endPage; page++) {
|
||||||
let url = `https://hifini.com/forum-${forumId}-${page}.htm?orderby=tid`; // 按照发帖时间排序
|
let url = `https://hifini.com/forum-${forumId}-${page}.htm?orderby=tid`; // 按照发帖时间排序
|
||||||
console.log(`getList \t| ${beginPage}/${page}/${endPage} | forumId: ${forumId} | ${url}`);
|
console.log(`getList \t| ${beginPage}/${page}/${endPage} | forumId: ${forumId} | ${url}`);
|
||||||
@ -51,15 +51,15 @@ async function getList() {
|
|||||||
var m = matcher.next();
|
var m = matcher.next();
|
||||||
var threadList = [];
|
var threadList = [];
|
||||||
while (!m.done) {
|
while (!m.done) {
|
||||||
if (!/^.*?\[[-\/\.A-Za-z0-9]+?\]$/.exec(m.value[2])) {
|
// if (!/^.*?\[[-\/\.A-Za-z0-9]+?\]$/.exec(m.value[2])) {
|
||||||
console.log(`跳过 ${m.value[2]}`);
|
// console.log(`跳过 ${m.value[2]}`);
|
||||||
} else {
|
// } else {
|
||||||
threadList.push({
|
threadList.push({
|
||||||
forum_id: forumId,
|
forum_id: forumId,
|
||||||
thread_id: Number(m.value[1]),
|
thread_id: Number(m.value[1]),
|
||||||
title: m.value[2]
|
title: m.value[2]
|
||||||
});
|
});
|
||||||
}
|
// }
|
||||||
m = matcher.next();
|
m = matcher.next();
|
||||||
}
|
}
|
||||||
await dataManager.thread.insertCollection(threadList);
|
await dataManager.thread.insertCollection(threadList);
|
||||||
@ -90,7 +90,7 @@ async function getDetail(threadId) {
|
|||||||
// 解析到音乐信息
|
// 解析到音乐信息
|
||||||
var matcher = /var ap4 = new APlayer\(([\S\s]*?)\);/.exec(html);
|
var matcher = /var ap4 = new APlayer\(([\S\s]*?)\);/.exec(html);
|
||||||
if (!matcher) {
|
if (!matcher) {
|
||||||
await dataManager.thread.update(threadId, { music_title: "未解析到音乐" });
|
await dataManager.thread.update(threadId, 0, { music_title: "未解析到音乐" });
|
||||||
console.log("未解析到音乐,跳过");
|
console.log("未解析到音乐,跳过");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -98,8 +98,8 @@ async function getDetail(threadId) {
|
|||||||
let arrStr = matcher[1];
|
let arrStr = matcher[1];
|
||||||
// console.log(arrStr);
|
// console.log(arrStr);
|
||||||
eval(`let document = { getElementById: () => {} }; var arr = ${arrStr};`);
|
eval(`let document = { getElementById: () => {} }; var arr = ${arrStr};`);
|
||||||
var music = arr.music[0];
|
var musicArr = arr.music;
|
||||||
// console.log(music);
|
// console.log(musicArr);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error("解析失败", e);
|
console.error("解析失败", e);
|
||||||
return;
|
return;
|
||||||
@ -125,12 +125,24 @@ async function getDetail(threadId) {
|
|||||||
};
|
};
|
||||||
}));
|
}));
|
||||||
|
|
||||||
await dataManager.thread.update(threadId, {
|
if (musicArr.length > 1) {
|
||||||
|
console.log("典型:thread_id:", threadId);
|
||||||
|
await dataManager.thread.insertCollection(musicArr.map((music, i) => {
|
||||||
|
return {
|
||||||
|
thread_id: threadId,
|
||||||
|
music_index: i
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
for (let i = 0; i < musicArr.length; i++) {
|
||||||
|
const music = musicArr[i];
|
||||||
|
await dataManager.thread.update(threadId, i, {
|
||||||
music_title: music.title,
|
music_title: music.title,
|
||||||
music_author: music.author || "",
|
music_author: music.author || "",
|
||||||
music_url: music.url,
|
music_url: music.url,
|
||||||
music_pic: music.pic || ""
|
music_pic: music.pic || ""
|
||||||
});
|
});
|
||||||
|
}
|
||||||
// console.log("done");
|
// console.log("done");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -140,7 +152,7 @@ async function startFetchRealUrl() {
|
|||||||
urlsToFetch = urlsToFetch.map(item => { return { threadId: item.thread_id, fakeUrl: item.music_url } });
|
urlsToFetch = urlsToFetch.map(item => { return { threadId: item.thread_id, fakeUrl: item.music_url } });
|
||||||
for (let i = 0; i < urlsToFetch.length; i++) {
|
for (let i = 0; i < urlsToFetch.length; i++) {
|
||||||
const urlToFetch = urlsToFetch[i];
|
const urlToFetch = urlsToFetch[i];
|
||||||
console.log(`getRealUrl\t| ${i + 1}/${urlsToFetch.length} | threadId: ${urlToFetch.threadId} | ${urlToFetch.fakeUrl}`);
|
console.log(`getRealUrl\t| ${i + 1}/${urlsToFetch.length} | threadId: ${urlToFetch.threadId}`);
|
||||||
await getRealUrl(urlToFetch);
|
await getRealUrl(urlToFetch);
|
||||||
await sleepUtils.sleep(1000);
|
await sleepUtils.sleep(1000);
|
||||||
}
|
}
|
||||||
@ -148,12 +160,13 @@ async function startFetchRealUrl() {
|
|||||||
|
|
||||||
async function getRealUrl(urlToFetch) {
|
async function getRealUrl(urlToFetch) {
|
||||||
let { threadId, fakeUrl } = urlToFetch;
|
let { threadId, fakeUrl } = urlToFetch;
|
||||||
|
let url = "原地址已失效";
|
||||||
try {
|
try {
|
||||||
let url = await requestUtils.getRedirectUrl(`https://hifini.com/${fakeUrl}`);
|
url = await requestUtils.getRedirectUrl(`https://hifini.com/${fakeUrl}`);
|
||||||
result = await dataManager.thread.update(threadId, { music_real_url: url });
|
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.log("重定向地址获取失败");
|
console.log("重定向地址获取失败");
|
||||||
}
|
}
|
||||||
|
result = await dataManager.thread.update(threadId, 0, { music_real_url: url });
|
||||||
}
|
}
|
||||||
|
|
||||||
main();
|
main();
|
54
hifini_music/sql/create_table_sql.sql
Normal file
54
hifini_music/sql/create_table_sql.sql
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
SET NAMES utf8mb4;
|
||||||
|
SET FOREIGN_KEY_CHECKS = 0;
|
||||||
|
|
||||||
|
-- ----------------------------
|
||||||
|
-- Table structure for hifini_forum
|
||||||
|
-- ----------------------------
|
||||||
|
DROP TABLE IF EXISTS `hifini_forum`;
|
||||||
|
CREATE TABLE `hifini_forum` (
|
||||||
|
`forum_id` int(10) UNSIGNED NOT NULL COMMENT 'id',
|
||||||
|
`title` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '名称',
|
||||||
|
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||||
|
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||||
|
PRIMARY KEY (`forum_id`) USING BTREE
|
||||||
|
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
|
||||||
|
|
||||||
|
-- ----------------------------
|
||||||
|
-- Table structure for hifini_tag
|
||||||
|
-- ----------------------------
|
||||||
|
DROP TABLE IF EXISTS `hifini_tag`;
|
||||||
|
CREATE TABLE `hifini_tag` (
|
||||||
|
`tag_id` int(10) UNSIGNED NOT NULL,
|
||||||
|
`tag_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL,
|
||||||
|
PRIMARY KEY (`tag_id`) USING BTREE
|
||||||
|
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
|
||||||
|
|
||||||
|
-- ----------------------------
|
||||||
|
-- Table structure for hifini_thread
|
||||||
|
-- ----------------------------
|
||||||
|
DROP TABLE IF EXISTS `hifini_thread`;
|
||||||
|
CREATE TABLE `hifini_thread` (
|
||||||
|
`thread_id` int(10) UNSIGNED NOT NULL COMMENT 'id',
|
||||||
|
`music_index` int(10) UNSIGNED NOT NULL DEFAULT 0 COMMENT '与id组成联合主键(考虑一个页面包含多首歌的情况)',
|
||||||
|
`forum_id` int(10) UNSIGNED NOT NULL COMMENT '分类id',
|
||||||
|
`title` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '名称',
|
||||||
|
`music_title` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '',
|
||||||
|
`music_author` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '',
|
||||||
|
`music_url` varchar(1000) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '音乐网址',
|
||||||
|
`music_real_url` varchar(1000) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '音乐真实地址',
|
||||||
|
`music_pic` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '音乐封面图地址',
|
||||||
|
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||||
|
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||||
|
PRIMARY KEY (`thread_id`, `music_index`) USING BTREE,
|
||||||
|
INDEX `forum_id`(`forum_id`) USING BTREE
|
||||||
|
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
|
||||||
|
|
||||||
|
-- ----------------------------
|
||||||
|
-- Table structure for hifini_thread_tag_relation
|
||||||
|
-- ----------------------------
|
||||||
|
DROP TABLE IF EXISTS `hifini_thread_tag_relation`;
|
||||||
|
CREATE TABLE `hifini_thread_tag_relation` (
|
||||||
|
`thread_id` int(10) UNSIGNED NOT NULL,
|
||||||
|
`tag_id` int(10) NOT NULL,
|
||||||
|
PRIMARY KEY (`thread_id`, `tag_id`) USING BTREE
|
||||||
|
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
|
@ -15,8 +15,8 @@ module.exports = {
|
|||||||
return await insertCollectionTemplate("hifini_thread", threadList);
|
return await insertCollectionTemplate("hifini_thread", threadList);
|
||||||
},
|
},
|
||||||
|
|
||||||
update: async (threadId, threadInfo) => {
|
update: async (threadId, musicIndex, threadInfo) => {
|
||||||
return await dbUtils.query(`UPDATE hifini_thread SET ? WHERE thread_id = ${threadId}`, threadInfo);
|
return await dbUtils.query(`UPDATE hifini_thread SET ? WHERE thread_id = ${threadId} and music_index = ${musicIndex}`, threadInfo);
|
||||||
},
|
},
|
||||||
|
|
||||||
getIdsToFetch: async () => {
|
getIdsToFetch: async () => {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user