1
0
Code Pull Requests Projects Releases Activity GitHub Gitee
nowcoder-bible/main.js

128 lines
4.1 KiB
JavaScript
Raw Permalink Normal View History

2022-08-16 14:15:24 +08:00
const request = require('request');
const fs = require('fs');
2022-08-16 14:47:29 +08:00
const wkhtmltopdf = require('wkhtmltopdf');
2022-08-16 14:15:24 +08:00
// 请求 APi 接口
async function getApiResult(url) {
var return_data = await new Promise((resolve) => {
request({
method: 'GET',
url: url,
}, (error, response, result) => {
2022-08-19 14:19:07 +08:00
// console.log("error, response, result", error, response, result);
2022-08-16 14:15:24 +08:00
if (!error && (response.statusCode == 200)) {
// 请求成功
resolve(result);
} else {
// 请求失败
console.log(`error is ${error}`);
resolve("error");
}
});
});
// console.log(`return_data is ${JSON.stringify(return_data)}`);
return return_data;
}
// https://www.nowcoder.com/issue/tutorial?tutorialId=94
// 目录https://www.nowcoder.com/content/tutorial/catalog/94
// 文章https://www.nowcoder.com/content/tutorial/detail/94/ea1986fcff294f6292385703e94689e8
async function main() {
2022-08-19 14:19:07 +08:00
console.log(`获取目录 ${pdfFilePath}`);
2022-08-16 14:15:24 +08:00
/**
* 爬取数据
*/
let urlContent = "https://www.nowcoder.com/content/tutorial/catalog/94";
let result = await getApiResult(urlContent);
let data = JSON.parse(result);
2022-08-19 14:19:07 +08:00
// console.log(data);
2022-08-16 14:15:24 +08:00
let catalog = data.data.catalog;
2022-08-19 14:19:07 +08:00
// console.log("catalog", catalog);
2022-08-16 14:15:24 +08:00
for (let chapter = 0; chapter < catalog.length; chapter++) {
const sectionList = catalog[chapter];
for (let section = 0; section < sectionList.length; section++) {
const sectionInfo = sectionList[section];
// console.log(`${sectionInfo.uuid} ${sectionInfo.title}`);
let res = await getDetail(sectionInfo.uuid);
2022-08-19 14:19:07 +08:00
// console.log(res);
2022-08-16 14:15:24 +08:00
}
}
console.log("完成");
2022-08-16 14:15:24 +08:00
}
async function getDetail(uuid) {
2022-08-19 14:19:07 +08:00
console.log(`开始转换 ${pdfFilePath}`);
2022-08-16 14:15:24 +08:00
let urlDetail = `https://www.nowcoder.com/content/tutorial/detail/94/${uuid}`;
let result = await getApiResult(urlDetail);
let data = JSON.parse(result);
// console.log(data);
let section = data.data.section;
// console.log(section.content);
// console.log(section.title);
// console.log(section.chapterId);
// console.log(section.sectionId);
let fileName = `${section.chapterId}.${section.sectionId} ${section.title}.html`;
2022-08-16 14:47:29 +08:00
let pdfFileName = `${section.chapterId}.${section.sectionId} ${section.title}.pdf`;
2022-08-16 14:15:24 +08:00
2022-08-16 14:47:29 +08:00
fs.writeFileSync(`./output/html/${fileName}`, section.content);
await transferToPDF(`./output/html/${fileName}`, `./output/pdf/${pdfFileName}`);
2022-08-16 14:15:24 +08:00
}
2022-08-16 14:47:29 +08:00
// https://wkhtmltopdf.org/
async function transferToPDF(htmlFilePath, pdfFilePath) {
console.log(`开始转换 ${pdfFilePath}`);
2022-08-16 14:47:29 +08:00
let html = `
<html>
<head>
<meta charset="utf-8">
<style>
* {
/*
font-size: 30px;
font-size: large !important;
*/
}
img {
max-width: 100%;
2022-08-16 14:47:29 +08:00
}
</style>
</head>
<body>
${fs.readFileSync(htmlFilePath)}
</body>
</html>`
// fs.writeFileSync(`./output/test.html`, html);
wkhtmltopdf(html, { pageSize: "A4", minimumFontSize: 10, disableSmartShrinking: true })
2022-08-16 14:47:29 +08:00
.pipe(fs.createWriteStream(pdfFilePath));
}
// ========================================================
// 遍历 output/html 文件夹下的所有html文件
async function transferHTMLToPDF() {
let files = fs.readdirSync("./output/html");
for (let i = 0; i < files.length; i++) {
const fileName = files[i];
if (!fileName.endsWith(".html")) continue; // 过滤掉 html 文件
let pdfFileName = fileName.replace(".html", ".pdf");
await transferToPDF(`./output/html/${fileName}`, `./output/pdf/${pdfFileName}`);
}
console.log("完成");
}
// 爬取 + 转换为pdf
2022-08-17 13:42:07 +08:00
main();
// 已经爬取过只需要转pdf
2022-08-17 13:42:07 +08:00
// transferHTMLToPDF();
// 测试调试用
// transferToPDF(`./output/html/8.3 操作系统(三).html`, `./output/test.pdf`)