1
0
Code Pull Requests Projects Releases Activity GitHub Gitee
nowcoder-bible/main.js
2022-08-17 13:42:07 +08:00

126 lines
4.0 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

const request = require('request');
const fs = require('fs');
const wkhtmltopdf = require('wkhtmltopdf');
// 请求 APi 接口
async function getApiResult(url) {
var return_data = await new Promise((resolve) => {
request({
method: 'GET',
url: url,
}, (error, response, result) => {
console.log("error, response, result", error, response, result);
if (!error && (response.statusCode == 200)) {
// 请求成功
resolve(result);
} else {
// 请求失败
console.log(`error is ${error}`);
resolve("error");
}
});
});
// console.log(`return_data is ${JSON.stringify(return_data)}`);
return return_data;
}
// https://www.nowcoder.com/issue/tutorial?tutorialId=94
// 目录https://www.nowcoder.com/content/tutorial/catalog/94
// 文章https://www.nowcoder.com/content/tutorial/detail/94/ea1986fcff294f6292385703e94689e8
async function main() {
/**
* 爬取数据
*/
let urlContent = "https://www.nowcoder.com/content/tutorial/catalog/94";
let result = await getApiResult(urlContent);
let data = JSON.parse(result);
console.log(data);
let catalog = data.data.catalog;
console.log("catalog", catalog);
for (let chapter = 0; chapter < catalog.length; chapter++) {
const sectionList = catalog[chapter];
for (let section = 0; section < sectionList.length; section++) {
const sectionInfo = sectionList[section];
// console.log(`${sectionInfo.uuid} ${sectionInfo.title}`);
let res = await getDetail(sectionInfo.uuid);
console.log(res);
}
}
console.log("完成");
}
async function getDetail(uuid) {
let urlDetail = `https://www.nowcoder.com/content/tutorial/detail/94/${uuid}`;
let result = await getApiResult(urlDetail);
let data = JSON.parse(result);
// console.log(data);
let section = data.data.section;
// console.log(section.content);
// console.log(section.title);
// console.log(section.chapterId);
// console.log(section.sectionId);
let fileName = `${section.chapterId}.${section.sectionId} ${section.title}.html`;
let pdfFileName = `${section.chapterId}.${section.sectionId} ${section.title}.pdf`;
fs.writeFileSync(`./output/html/${fileName}`, section.content);
await transferToPDF(`./output/html/${fileName}`, `./output/pdf/${pdfFileName}`);
}
// https://wkhtmltopdf.org/
async function transferToPDF(htmlFilePath, pdfFilePath) {
console.log(`开始转换 ${pdfFilePath}`);
let html = `
<html>
<head>
<meta charset="utf-8">
<style>
* {
/*
font-size: 30px;
font-size: large !important;
*/
}
img {
max-width: 100%;
}
</style>
</head>
<body>
${fs.readFileSync(htmlFilePath)}
</body>
</html>`
// fs.writeFileSync(`./output/test.html`, html);
wkhtmltopdf(html, { pageSize: "A4", minimumFontSize: 10, disableSmartShrinking: true })
.pipe(fs.createWriteStream(pdfFilePath));
}
// ========================================================
// 遍历 output/html 文件夹下的所有html文件
async function transferHTMLToPDF() {
let files = fs.readdirSync("./output/html");
for (let i = 0; i < files.length; i++) {
const fileName = files[i];
if (!fileName.endsWith(".html")) continue; // 过滤掉 html 文件
let pdfFileName = fileName.replace(".html", ".pdf");
await transferToPDF(`./output/html/${fileName}`, `./output/pdf/${pdfFileName}`);
}
console.log("完成");
}
// 爬取 + 转换为pdf
main();
// 已经爬取过只需要转pdf
// transferHTMLToPDF();
// 测试调试用
// transferToPDF(`./output/html/8.3 操作系统(三).html`, `./output/test.pdf`)