128 lines
4.1 KiB
JavaScript
128 lines
4.1 KiB
JavaScript
const request = require('request');
|
||
const fs = require('fs');
|
||
const wkhtmltopdf = require('wkhtmltopdf');
|
||
|
||
// 请求 APi 接口
|
||
async function getApiResult(url) {
|
||
var return_data = await new Promise((resolve) => {
|
||
request({
|
||
method: 'GET',
|
||
url: url,
|
||
}, (error, response, result) => {
|
||
// console.log("error, response, result", error, response, result);
|
||
if (!error && (response.statusCode == 200)) {
|
||
// 请求成功
|
||
resolve(result);
|
||
} else {
|
||
// 请求失败
|
||
console.log(`error is ${error}`);
|
||
resolve("error");
|
||
}
|
||
});
|
||
});
|
||
// console.log(`return_data is ${JSON.stringify(return_data)}`);
|
||
return return_data;
|
||
}
|
||
|
||
|
||
// https://www.nowcoder.com/issue/tutorial?tutorialId=94
|
||
// 目录:https://www.nowcoder.com/content/tutorial/catalog/94
|
||
// 文章:https://www.nowcoder.com/content/tutorial/detail/94/ea1986fcff294f6292385703e94689e8
|
||
async function main() {
|
||
console.log(`获取目录 ${pdfFilePath}`);
|
||
/**
|
||
* 爬取数据
|
||
*/
|
||
let urlContent = "https://www.nowcoder.com/content/tutorial/catalog/94";
|
||
let result = await getApiResult(urlContent);
|
||
let data = JSON.parse(result);
|
||
// console.log(data);
|
||
|
||
let catalog = data.data.catalog;
|
||
// console.log("catalog", catalog);
|
||
for (let chapter = 0; chapter < catalog.length; chapter++) {
|
||
const sectionList = catalog[chapter];
|
||
for (let section = 0; section < sectionList.length; section++) {
|
||
const sectionInfo = sectionList[section];
|
||
// console.log(`${sectionInfo.uuid} ${sectionInfo.title}`);
|
||
let res = await getDetail(sectionInfo.uuid);
|
||
// console.log(res);
|
||
}
|
||
}
|
||
console.log("完成");
|
||
}
|
||
|
||
async function getDetail(uuid) {
|
||
console.log(`开始转换 ${pdfFilePath}`);
|
||
let urlDetail = `https://www.nowcoder.com/content/tutorial/detail/94/${uuid}`;
|
||
let result = await getApiResult(urlDetail);
|
||
let data = JSON.parse(result);
|
||
|
||
// console.log(data);
|
||
|
||
let section = data.data.section;
|
||
|
||
// console.log(section.content);
|
||
// console.log(section.title);
|
||
// console.log(section.chapterId);
|
||
// console.log(section.sectionId);
|
||
|
||
let fileName = `${section.chapterId}.${section.sectionId} ${section.title}.html`;
|
||
let pdfFileName = `${section.chapterId}.${section.sectionId} ${section.title}.pdf`;
|
||
|
||
fs.writeFileSync(`./output/html/${fileName}`, section.content);
|
||
await transferToPDF(`./output/html/${fileName}`, `./output/pdf/${pdfFileName}`);
|
||
}
|
||
|
||
// https://wkhtmltopdf.org/
|
||
async function transferToPDF(htmlFilePath, pdfFilePath) {
|
||
console.log(`开始转换 ${pdfFilePath}`);
|
||
let html = `
|
||
<html>
|
||
<head>
|
||
<meta charset="utf-8">
|
||
<style>
|
||
* {
|
||
/*
|
||
font-size: 30px;
|
||
font-size: large !important;
|
||
*/
|
||
}
|
||
|
||
img {
|
||
max-width: 100%;
|
||
}
|
||
</style>
|
||
</head>
|
||
<body>
|
||
${fs.readFileSync(htmlFilePath)}
|
||
</body>
|
||
</html>`
|
||
// fs.writeFileSync(`./output/test.html`, html);
|
||
wkhtmltopdf(html, { pageSize: "A4", minimumFontSize: 10, disableSmartShrinking: true })
|
||
.pipe(fs.createWriteStream(pdfFilePath));
|
||
}
|
||
|
||
// ========================================================
|
||
|
||
// 遍历 output/html 文件夹下的所有html文件
|
||
async function transferHTMLToPDF() {
|
||
let files = fs.readdirSync("./output/html");
|
||
for (let i = 0; i < files.length; i++) {
|
||
const fileName = files[i];
|
||
if (!fileName.endsWith(".html")) continue; // 过滤掉 html 文件
|
||
let pdfFileName = fileName.replace(".html", ".pdf");
|
||
await transferToPDF(`./output/html/${fileName}`, `./output/pdf/${pdfFileName}`);
|
||
}
|
||
console.log("完成");
|
||
}
|
||
|
||
// 爬取 + 转换为pdf
|
||
main();
|
||
|
||
// 已经爬取过,只需要转pdf
|
||
// transferHTMLToPDF();
|
||
|
||
// 测试调试用
|
||
// transferToPDF(`./output/html/8.3 操作系统(三).html`, `./output/test.pdf`)
|