1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee

Compare commits

156 Commits
test ... master

Author SHA1 Message Date
92798436b0 批量下载文件脚本 2025-02-06 03:51:30 +08:00
8778ad53ab hotband 请求失败不保存 origin-error 目录下 .json 文件 2025-01-02 21:03:18 +08:00
12afb942a2 小报童-一人公司内容爬取 2024-06-25 02:42:25 +08:00
ac429e4cbf tools 添加 websocket server (nodejs & python) 2024-04-05 14:51:20 +08:00
4f4917a940 财智Aisino微信小程序音频课程下载 nodejs脚本 2024-02-01 22:16:12 +08:00
75be10be20 来此加密 SSL证书半自动申请脚本 2024-01-30 15:28:50 +08:00
a4b148ed49 netease_music 临时脚本更新 2024-01-23 23:17:26 +08:00
54817268d5 Merge branch 'master' of git.only4.work:coder-xiaomo/tools 2023-12-30 01:10:42 +08:00
f72dad74c3 netease_music 临时脚本暂存 2023-12-30 01:10:07 +08:00
e2a092be3c 更新爬取脚本 2023-12-29 23:25:27 +08:00
zhangbk1
175088644a 股票数据爬取 添加README.md 2023-12-27 17:32:52 +08:00
fc46e76d74 更新 netease_music 脚本 2023-12-25 16:48:57 +08:00
e3ea397f03 更新 netease_music 处理脚本 2023-12-25 10:57:46 +08:00
78642222cc 一些更新 2023-12-24 03:18:01 +08:00
zhangbk1
928dad71b9 add TCP Server - with retry (node demo) 2023-09-04 11:35:07 +08:00
87e38f5e26 添加numToChinese 数字转中文数字函数 2023-08-27 22:07:59 +08:00
e188b52a62 add "HTTP Server with POST body" 2023-08-01 22:14:52 +08:00
cc3a74468a server & client 新增 & 调整 2023-07-29 22:28:21 +08:00
7683831186 新增 websocket client(browser) 2023-07-29 22:01:50 +08:00
9a317f93e9 新增 websocket client(node) 2023-07-29 22:00:28 +08:00
155206f77a 新增 tcp、http server 2023-07-29 21:55:41 +08:00
7d42c5c928 Merge remote-tracking branch 'temp/main' 2023-07-16 11:52:38 +08:00
c7c5e7d29a 为仓库合并做准备 2023-07-16 11:51:42 +08:00
66d800f195 添加知乎书本下载 浏览器书签脚本 2023-07-13 10:25:00 +08:00
740e8332cb update 2023-06-27 15:07:09 +08:00
1293b1af33 添加微信公众号 推文信息 F12 提取 2023-06-04 17:05:43 +08:00
2b571c1e17 bugfix 2023-06-04 14:24:21 +08:00
ff4532f18b 取消不必要打印;执行完就退出 2023-06-04 11:19:04 +08:00
4ab2725343 netease_music 修改表结构;代码调整 2023-06-04 09:33:37 +08:00
70e15f7e49 netease_music 的一些改动 2023-06-03 00:10:47 +08:00
a29dc77417 netease_music 的一些改动 2023-06-02 18:03:41 +08:00
59336c15b6 添加 ParseHAR 2023-05-31 13:13:57 +08:00
78ec159bc4 B站热搜图标更新 2023-05-25 17:51:16 +08:00
8f1f348843 first commit 2023-03-15 15:58:01 +08:00
fa2810eaa6 Merge branch 'linux-command' 2023-01-30 15:21:19 +08:00
1eefb07eba 添加CSDN猿如意linux-command接口爬取 2023-01-30 15:20:57 +08:00
49f8afb973 fixed npm dependeny warning: 1 1 high severity vulnerability 2023-01-29 16:28:29 +08:00
412f39f78e 小改动 2022-12-17 16:26:54 +08:00
99a53fc759 处理 album 极少数情况下 image 正则匹配失败问题 2022-11-16 17:46:55 +08:00
67af823cde 添加字幕滚动代码(lyric) 2022-11-15 14:03:53 +08:00
08c7313ce0 解决偶现的数据库连接失败问题:Error: Handshake inactivity timeout
refer: https://www.it1352.com/1661615.html
2022-11-10 17:47:17 +08:00
72427338f4 暂存 2022-11-10 15:56:27 +08:00
5877674187 兼容 node 12 2022-11-10 15:52:47 +08:00
de815fed87 所有song_id都改为bigint(20) 2022-11-10 14:31:21 +08:00
3ec2ac1796 hifinimusic迁移到新数据库 2022-11-09 17:19:25 +08:00
8efbbec9c0 song_playlist_relation添加is_del字段 2022-11-09 16:50:30 +08:00
b7fc10de63 重写playlist爬取方法-01(调接口,而不是通过网易云音乐首页获取) 2022-11-09 16:15:48 +08:00
6582bf8d40 修改SQL的一些无关紧要的小问题 2022-11-09 16:14:10 +08:00
f4cdef2935 comment表添加索引 2022-11-09 14:12:31 +08:00
f22a9c5228 comment 打印 current_time 2022-11-09 13:32:49 +08:00
8e0406a485 从数据库中取得id之后打印出来 2022-11-09 11:18:54 +08:00
e48305ffaa SQL 小改动 2022-11-07 23:21:00 +08:00
b4af23b381 删除song_playlist_relation表中rcmd_reason字段 2022-11-07 17:56:49 +08:00
d17f4282e4 artist考虑极少数正则匹配内容不存在问题 2022-11-07 15:35:22 +08:00
e638b09313 assistant 不同时操作多张表,减少死锁概率 2022-11-07 15:16:05 +08:00
0b887580fc 修改统计语句 2022-11-07 15:05:05 +08:00
2878dbf111 添加 牛客网 面试题库 爬取脚本(存为JSON) 2022-11-06 17:34:16 +08:00
f46977d1b7 更新统计SQL 2022-10-31 16:47:47 +08:00
6b560ef47a bugfix 解决极少数album没有标题,正则匹配失败问题 2022-10-31 15:54:48 +08:00
6f359ae080 hifini爬取完成 2022-10-29 23:46:34 +08:00
5015c6007b hifini 增大等待时长,降低数据库查询次数 2022-10-29 17:56:01 +08:00
d332563905 输出对齐(填充空格) 2022-10-29 17:40:25 +08:00
c02dcdf814 assistant添加异常捕获,避免数据库死锁异常造成assistant中断 2022-10-29 17:18:24 +08:00
fae4db04a3 bugfix hifini爬取时,一个thread中包含多首音乐时,获取real链接时重复获取问题 2022-10-29 16:40:41 +08:00
8a5cd1225a update 2022-10-29 16:09:01 +08:00
b93e1598e6 hifini 添加 order limit 参数 2022-10-29 14:40:58 +08:00
71eb3b7ce5 album,artist请求增加超时时间;小调整 2022-10-29 14:32:51 +08:00
bf74284ab2 bugfix 2022-10-29 11:53:14 +08:00
d3245038d7 bugfix;添加auto.bat 2022-10-29 01:21:48 +08:00
ce20720c60 添加 assistant 助手;其他调整(大调整) 2022-10-29 00:16:36 +08:00
ddde1b28f9 添加SQL建表语句;考虑音乐页面多收首歌的情况;Bugfix 2022-10-28 01:14:44 +08:00
6ce6b0cd46 添加hifini_music爬虫代码 2022-10-28 00:23:50 +08:00
a39fa9dc18 hotband 修复一处拼写错误 2022-10-26 20:47:15 +08:00
18c149bbc0 Merge branch 'master' of git.only4.work:coder-xiaomo/tools 2022-10-26 20:44:30 +08:00
19e69f9bfd 添加sql导入脚本生成工具 2022-10-26 20:44:07 +08:00
d1c620e942 修hotband/改pack.bat为ASCII编码 2022-10-26 20:41:56 +08:00
66d5f89b02 更新SQL导出支持 2022-10-26 16:19:46 +08:00
aa5e239155 更新SQL 2022-10-26 15:33:07 +08:00
63c102ef7b optimize table 2022-10-26 01:13:41 +08:00
e79c70595b dbPoolUtils添加事务支持 2022-10-25 20:19:03 +08:00
af677e3d0e 删除 songInfoUtils.old.js 2022-10-25 20:18:37 +08:00
3660fefda4 插入关联表时同事插入wait_check表;统一查询将要爬取的id代码到dataManager.js 2022-10-25 19:36:05 +08:00
4753fd55ae 插入数据方法抽离为 dataManager.js 2022-10-25 16:25:23 +08:00
c068085385 update 2022-10-25 15:16:59 +08:00
9a8565a1e3 SQL ?传递array要考虑数组为空情况 2022-10-20 22:31:13 +08:00
4f3aa180a0 bugfix 2022-10-20 13:38:33 +08:00
7ec08d1e55 解决 order by 问题 2022-10-20 01:17:03 +08:00
dc80d8d527 去除多余DISTICT 2022-10-20 01:12:49 +08:00
5c2ea41a14 一个拼写错误 2022-10-20 00:56:06 +08:00
4a245ceba4 UNION查出来后使用代码拼接 2022-10-20 00:53:34 +08:00
e521c139e2 避免重复查库;DISTICT查出来后通过代码去重 2022-10-20 00:48:31 +08:00
0862db5db9 playlist修bug 2022-10-20 00:30:09 +08:00
896e66de53 node watch添加playlist相关统计 2022-10-19 23:57:53 +08:00
102cf25060 爬完提示 2022-10-19 23:43:46 +08:00
903d64b85b Merge branch 'qianqian' 2022-10-19 23:40:22 +08:00
da900dc2b1 update 2022-10-19 23:39:54 +08:00
d864d2ab5f 爬取歌单 简单代码 2022-10-19 17:22:49 +08:00
8451e7a849 update 2022-10-19 13:37:06 +08:00
c17c3ae5a1 update node watch 2022-10-18 14:42:39 +08:00
b2d32c5844 不删除空数据 2022-10-18 01:10:49 +08:00
dc1e3e4bde 爬取新数据 2022-10-18 00:58:53 +08:00
1917b17975 分析进度慢的原因 2022-10-18 00:50:45 +08:00
35d911a1f9 update 2022-10-18 00:37:48 +08:00
95065be3d7 small update 2022-10-17 22:28:33 +08:00
ade6e4f9e9 完成songInfo的更新 2022-10-17 21:47:37 +08:00
7b866cdaf9 小改动 2022-10-17 13:58:10 +08:00
02ac6091f4 weibo-hotband-bot 改为 hotband-bot 2022-10-17 13:48:59 +08:00
4bf451e337 小改动 2022-10-17 13:40:41 +08:00
62d0eb8cea Merge brepo 'hot-band' 2022-10-17 13:19:35 +08:00
8dbc539ced 合并仓库前的准备 2022-10-17 13:11:15 +08:00
04eb563794 合并仓库前的准备 2022-10-17 13:09:59 +08:00
c11080a6f6 add pack.bat 2022-10-15 21:20:07 +08:00
2ccb0e3c1f ?.兼容node v12.13.1 2022-10-09 17:44:37 +08:00
7afc7f79e2 ??改为|| 兼容node v12.13.1 2022-10-09 17:37:57 +08:00
c28fca34be 添加索引 2022-10-08 21:57:22 +08:00
kevinbzhang
ba4ab91bb8 update package-lock.json 2022-10-08 11:54:37 +08:00
140c8bd61e update 2022-10-08 07:49:03 +08:00
88bb8a4b29 update 2022-10-07 13:09:27 +08:00
00fa195c70 update 2022-10-06 22:45:10 +08:00
321b207d27 更新todo 2022-10-06 21:25:42 +08:00
be2658375c 通过命令行指定爬取参数,不用再修改代码了 2022-10-06 21:06:09 +08:00
9db9383934 update 2022-10-06 14:01:05 +08:00
6c3a6d9aaf 去掉 NOT IN 之后子查询的 DISTINCT 2022-10-05 12:06:44 +08:00
b35918faef 数据库添加索引;update 2022-10-05 11:41:30 +08:00
50d3555dd7 update 2022-10-04 14:12:33 +08:00
b4d63489c3 update 2022-10-03 14:32:04 +08:00
e778b455ed update 2022-10-03 01:01:25 +08:00
dfab62b437 watch update 2022-10-02 23:48:07 +08:00
326201fb2f update 2022-10-02 19:16:41 +08:00
1939398579 update 2022-10-02 17:37:28 +08:00
93db6371d9 update 2022-10-02 12:03:36 +08:00
7ced382c9f update 2022-10-02 11:45:53 +08:00
d6c78a28bd update 2022-10-02 01:53:00 +08:00
2ff2758fc8 update 2022-10-02 01:20:48 +08:00
3e1ef431a7 update 2022-10-01 23:27:52 +08:00
3dcb71b5a3 fetchAll从index.js提到各个utils中 2022-10-01 22:00:29 +08:00
5d2bfccb4b update 2022-10-01 21:25:37 +08:00
c98d453e14 update 2022-10-01 21:09:05 +08:00
ba395bac47 update 2022-10-01 20:35:42 +08:00
cf4449604d 使用连接池;退出检查优化 2022-10-01 19:45:49 +08:00
074cbc124f update 2022-10-01 18:53:19 +08:00
64834ae7f9 update 2022-10-01 16:16:39 +08:00
37fe49c53c 代码拆分出Utils 2022-10-01 11:41:20 +08:00
7004fe8858 update 2022-10-01 01:47:27 +08:00
b85653ec72 update 2022-09-30 21:33:46 +08:00
e835580358 1 2022-09-30 08:20:55 +08:00
2a72b59dce add netease music 2022-09-30 08:06:14 +08:00
08f303de8f bugfix 2022-09-30 00:52:56 +08:00
5b31b4bf98 add utils 2022-09-30 00:51:08 +08:00
06eeb65eac init 2022-09-29 22:33:16 +08:00
ea533eeb29 删掉一个多余的 - 2022-08-14 21:14:44 +08:00
45cb904d65 解决 Cannot read properties of null (reading 'hotgov') 问题 2022-08-05 22:42:57 +08:00
c8958a9de7 add LICENSE. 2022-07-31 13:55:43 +00:00
51f6c2de1f 微博热搜不保存 simplify 和 regulation 数据 2022-07-30 22:10:32 +08:00
590e209b2c fileUtils 创建目录方法更新,解决创建当前文件夹上一级文件夹时报错问题 2022-07-30 14:49:13 +08:00
6bd81a210f 请求失败返回 {} 而不是 null,否则后面 result.ok 会报错 2022-07-30 14:29:34 +08:00
164 changed files with 17871 additions and 213 deletions

14
.gitignore vendored
View File

@@ -1,8 +1,6 @@
.DS_Store
data/*
.env
node_modules
.VSCodeCounter
test.js
node_modules
config.json
.VSCodeCounter
.DS_Store

53
.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,53 @@
{
// 使用 IntelliSense 了解相关属性。
// 悬停以查看现有属性的描述。
// 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "node",
"request": "launch",
"name": "hifini_music node index",
"skipFiles": [
"<node_internals>/**"
],
"program": "${workspaceFolder}\\hifini_music\\index.js"
},
{
"type": "node",
"request": "launch",
"name": "netease_music node index",
"skipFiles": [
"<node_internals>/**"
],
"program": "${workspaceFolder}\\netease_music\\index.js"
},
{
"type": "node",
"request": "launch",
"name": "netease_music node update",
"skipFiles": [
"<node_internals>/**"
],
"program": "${workspaceFolder}\\netease_music\\update.js"
},
{
"type": "node",
"request": "launch",
"name": "netease_music node test",
"skipFiles": [
"<node_internals>/**"
],
"program": "${workspaceFolder}\\netease_music\\test.js"
},
{
"type": "node",
"request": "launch",
"name": "netease_music node watch",
"skipFiles": [
"<node_internals>/**"
],
"program": "${workspaceFolder}\\netease_music\\watch.js"
}
]
}

21
LICENSE Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2022 程序员小墨
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

1
batch-download/README.md Normal file
View File

@@ -0,0 +1 @@
# 文件批量下载工具

53
batch-download/index.js Normal file
View File

@@ -0,0 +1,53 @@
const axios = require('axios');
const fs = require('fs');
const path = require('path');
// 示例图片 URL 数组
const imageUrls = [
// 'https://example.com/image1.jpg',
// 'https://example.com/image2.jpg',
// 'https://example.com/image3.jpg',
];
for (let i = 1; i <= 100; i++) {
imageUrls.push(`https://example.com/image${i}.jpg`)
}
// 执行下载
downloadImages(imageUrls);
// 批量下载图片函数
async function downloadImages(urls) {
for (const url of urls) {
// const imageName = path.basename(url); // 获取图片名称
const imageName = (imageUrls.indexOf(url) + 1) + '.jpg'; // 图片名称递增
const imagePath = path.join(__dirname, 'downloads', imageName); // 图片保存路径
try {
const response = await axios({
method: 'get',
url: url,
responseType: 'stream', // 让 axios 直接返回图片流
});
// 确保下载目录存在
if (!fs.existsSync(path.dirname(imagePath))) {
fs.mkdirSync(path.dirname(imagePath), { recursive: true });
}
// 将图片流写入本地文件
const writer = fs.createWriteStream(imagePath);
response.data.pipe(writer);
writer.on('finish', () => {
console.log(`图片下载完成:${imageName}`);
});
writer.on('error', (err) => {
console.error(`下载图片失败:${imageName}`, err);
});
} catch (err) {
console.error(`下载图片出错:${url}`, err);
}
}
}

13
config.example.json Normal file
View File

@@ -0,0 +1,13 @@
{
"mysql": {
"charset": "utf8mb4",
"host": "localhost",
"user": "root",
"password": "root",
"port": 3306,
"database": "",
"connectTimeout": 3600000,
"acquireTimeout": 3600000,
"timeout": 3600000
}
}

View File

@@ -0,0 +1,2 @@
start cmd /k "node index --order DESC --limit 200"
exit

2
hifini_music/auto.bat Normal file
View File

@@ -0,0 +1,2 @@
start cmd /k "node index --order ASC --limit 200"
exit

180
hifini_music/index.js Normal file
View File

@@ -0,0 +1,180 @@
const fs = require('fs');
const { getApiResult } = require('../utils/requestUtils');
const dbUtils = require("../utils/dbPoolUtils");
const sleepUtils = require("../utils/sleepUtils");
// 数据库连接池
dbUtils.create({
database: "hifinimusic", // 指定数据库
connectionLimit: 10, // 设置数据库连接池数量
});
global.dbUtils = dbUtils;
const dataManager = require('./src/dataManager');
const requestUtils = require('../utils/requestUtils');
async function main() {
var args = require('minimist')(process.argv.slice(2));
global.args = {
"order": args.order,
"limit": args.limit,
}
// async function timeout1() {
// await getList();
// setTimeout(() => console.log("getList已完成"), 2000);
// }
// timeout1();
async function timeout2() {
await startFetchDetail();
setTimeout(timeout2, 10 * 1000);
}
timeout2();
async function timeout3() {
await startFetchRealUrl();
setTimeout(timeout3, 10 * 1000);
}
timeout3();
}
// 爬取列表页,获得歌曲详情页
async function getList() {
let forumId = 1; // 分类id
let beginPage = 1; // 起始页
let endPage = 23; // 结束页
for (let page = beginPage; page <= endPage; page++) {
let url = `https://hifini.com/forum-${forumId}-${page}.htm?orderby=tid`; // 按照发帖时间排序
console.log(`getList \t| ${beginPage}/${page}/${endPage} | forumId: ${forumId} | ${url}`);
// let html = fs.readFileSync("./1.html", "utf8");
let html = await getApiResult(url);
// fs.writeFileSync("./1.html", html);
var matcher = html.matchAll(/<a href="thread-(\d{1,15}).htm">(.*?)<\/a>/g);
var m = matcher.next();
var threadList = [];
while (!m.done) {
// if (!/^.*?\[[-\/\.A-Za-z0-9]+?\]$/.exec(m.value[2])) {
// console.log(`跳过 ${m.value[2]}`);
// } else {
threadList.push({
forum_id: forumId,
thread_id: Number(m.value[1]),
title: m.value[2]
});
// }
m = matcher.next();
}
await dataManager.thread.insertCollection(threadList);
await sleepUtils.sleep(1000);
}
}
async function startFetchDetail() {
let idsToFetch = await dataManager.thread.getIdsToFetch();
idsToFetch = idsToFetch.map(item => item.thread_id);
// console.log(idsToFetch);
for (let i = 0; i < idsToFetch.length; i++) {
const threadId = idsToFetch[i];
console.log(`getDetail\t| ${i + 1}/${idsToFetch.length} | threadId: ${threadId}`);
await getDetail(threadId);
// await sleepUtils.sleep(100);
}
}
async function getDetail(threadId) {
let url = `https://hifini.com/thread-${threadId}.htm`;
let html;
try {
// html = fs.readFileSync("./1.html", "utf8");
html = await getApiResult(url, { timeout: 3000 });
// fs.writeFileSync("./1.html", html);
} catch (e) {
console.error("请求失败,可能是请求超时", e);
return;
}
// 解析到音乐信息
var matcher = /var ap4 = new APlayer\(([\S\s]*?)\);/.exec(html);
if (!matcher) {
await dataManager.thread.update(threadId, 0, { music_title: "未解析到音乐" });
console.log("未解析到音乐,跳过");
return;
}
try {
let arrStr = matcher[1];
// console.log(arrStr);
eval(`let document = { getElementById: () => {} }; var arr = ${arrStr};`);
var musicArr = arr.music;
// console.log(musicArr);
} catch (e) {
console.error("解析失败", e);
return;
}
var matcher = html.matchAll(/<a href='tag-(\d{1,15}).htm'><i class="icon-tag"><\/i>(.*?)<\/a>/g);
var m = matcher.next();
var tagList = [];
while (!m.done) {
tagList.push({
tag_id: Number(m.value[1]),
tag_name: m.value[2]
});
m = matcher.next();
}
await dataManager.tag.insertCollection(tagList);
await dataManager.thread_tag.insertCollection(tagList.map(tag => {
return {
thread_id: threadId,
tag_id: tag.tag_id
};
}));
if (musicArr.length > 1) {
console.log("典型thread_id:", threadId);
await dataManager.thread.insertCollection(musicArr.map((music, i) => {
return {
thread_id: threadId,
music_index: i
}
}));
}
for (let i = 0; i < musicArr.length; i++) {
const music = musicArr[i];
await dataManager.thread.update(threadId, i, {
music_title: music.title,
music_author: music.author || "",
music_url: music.url,
music_pic: music.pic || ""
});
}
// console.log("done");
}
async function startFetchRealUrl() {
let urlsToFetch = await dataManager.thread.getIdsToFetchRealUrl();
// console.log(urlsToFetch.map(item => item.thread_id));
for (let i = 0; i < urlsToFetch.length; i++) {
const urlToFetch = urlsToFetch[i];
console.log(`getRealUrl\t| ${i + 1}/${urlsToFetch.length} | threadId: ${urlToFetch.thread_id} | music_index: ${urlToFetch.music_index}`);
await getRealUrl(urlToFetch.thread_id, urlToFetch.music_index, urlToFetch.music_url);
// await sleepUtils.sleep(100);
}
}
async function getRealUrl(threadId, musicIndex, fakeUrl) {
let url = "原地址已失效";
try {
url = await requestUtils.getRedirectUrl(`https://hifini.com/${fakeUrl}`);
} catch (e) {
console.log("重定向地址获取失败");
}
result = await dataManager.thread.update(threadId, musicIndex, { music_real_url: url });
}
main();

View File

@@ -0,0 +1,53 @@
SET NAMES utf8mb4;
-- ----------------------------
-- Table structure for hifini_forum
-- ----------------------------
DROP TABLE IF EXISTS `hifini_forum`;
CREATE TABLE `hifini_forum` (
`forum_id` int(10) UNSIGNED NOT NULL COMMENT 'id',
`title` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '名称',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`forum_id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for hifini_tag
-- ----------------------------
DROP TABLE IF EXISTS `hifini_tag`;
CREATE TABLE `hifini_tag` (
`tag_id` int(10) UNSIGNED NOT NULL,
`tag_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL,
PRIMARY KEY (`tag_id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for hifini_thread
-- ----------------------------
DROP TABLE IF EXISTS `hifini_thread`;
CREATE TABLE `hifini_thread` (
`thread_id` int(10) UNSIGNED NOT NULL COMMENT 'id',
`music_index` int(10) UNSIGNED NOT NULL DEFAULT 0 COMMENT '与id组成联合主键考虑一个页面包含多首歌的情况',
`forum_id` int(10) UNSIGNED NOT NULL COMMENT '分类id',
`title` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '名称',
`music_title` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '',
`music_author` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '',
`music_url` varchar(1000) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '音乐网址',
`music_real_url` varchar(1000) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '音乐真实地址',
`music_pic` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '音乐封面图地址',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`thread_id`, `music_index`) USING BTREE,
INDEX `forum_id`(`forum_id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for hifini_thread_tag_relation
-- ----------------------------
DROP TABLE IF EXISTS `hifini_thread_tag_relation`;
CREATE TABLE `hifini_thread_tag_relation` (
`thread_id` int(10) UNSIGNED NOT NULL,
`tag_id` int(10) NOT NULL,
PRIMARY KEY (`thread_id`, `tag_id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;

View File

@@ -0,0 +1,2 @@
SELECT count(*) FROM hifini_thread WHERE music_title='' and music_pic='' and music_url='';
SELECT count(*) FROM hifini_thread WHERE music_url like 'get_music.php?key=%' and music_real_url='';

View File

@@ -0,0 +1,55 @@
const dbUtils = global.dbUtils;
let insertCollectionTemplate = async (tableName, dataList) => {
if (dataList.length == 0) return;
return await dbUtils.query(`
INSERT INTO ${tableName} ( ${Object.keys(dataList[0]).map(field => `\`${field}\``).join(",")} ) VALUES ?
ON DUPLICATE KEY UPDATE ${Object.keys(dataList[0]).map(field => `${field}=VALUES(${field})`).join(", ")}
`, [dataList.map(item => Object.values(item))]);
}
module.exports = {
thread: {
insertCollection: async (threadList) => {
return await insertCollectionTemplate("hifini_thread", threadList);
},
update: async (threadId, musicIndex, threadInfo) => {
return await dbUtils.query(`UPDATE hifini_thread SET ? WHERE thread_id = ${threadId} and music_index = ${musicIndex}`, threadInfo);
},
getIdsToFetch: async () => {
let sql = `
SELECT thread_id FROM hifini_thread WHERE music_title='' and music_pic='' and music_url=''
${global.args?.order ? `ORDER BY thread_id ${global.args.order}` : ""}
${global.args?.limit ? `LIMIT ${global.args.limit}` : ""}
`;
console.log(sql);
return await dbUtils.query(sql);
},
getIdsToFetchRealUrl: async () => {
let sql = `
SELECT thread_id,music_index,music_url FROM hifini_thread WHERE music_url like 'get_music.php?key=%' and music_real_url=''
${global.args?.order ? `ORDER BY thread_id ${global.args.order}` : ""}
${global.args?.limit ? `LIMIT ${global.args.limit}` : ""}
`;
console.log(sql);
return await dbUtils.query(sql);
}
},
tag: {
insertCollection: async (tagList) => {
return await insertCollectionTemplate("hifini_tag", tagList);
},
},
thread_tag: {
insertCollection: async (tagList) => {
return await insertCollectionTemplate("hifini_thread_tag_relation", tagList);
},
},
};

View File

@@ -1,20 +1,20 @@
# 调试模式
# 1为开启调试
DEBUG_MODE=1
# 爬取数据保存的文件夹
# 目录开头与结尾的 [./] [/] [\] [\\] 均可带可不带
# 默认为 data 文件夹
DATA_FOLDER=data
# 是否在程序刚一启动时就抓取一次数据
# 1为是
EXECUTE_AT_STARTUP=1
# 数据是否推送到Git仓库
# 1为是
PUSH_TO_GIT=0
# 是否仅保存 latest.json 而不保存其他文件作为存档
# 1为是
# 调试模式
# 1为开启调试
DEBUG_MODE=1
# 爬取数据保存的文件夹
# 目录开头与结尾的 [./] [/] [\] [\\] 均可带可不带
# 默认为 data 文件夹
DATA_FOLDER=data
# 是否在程序刚一启动时就抓取一次数据
# 1为是
EXECUTE_AT_STARTUP=1
# 数据是否推送到Git仓库
# 1为是
PUSH_TO_GIT=0
# 是否仅保存 latest.json 而不保存其他文件作为存档
# 1为是
LATEST_DATA_ONLY=0

4
hotband/.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
data/*
.env
test.js

View File

@@ -83,7 +83,7 @@ npm i
# node index.js
# 使用 pm2
# pm2 start index.js --name weibo-hotband-bot
# pm2 start index.js --name hotband-bot
```
5. 停止项目
@@ -92,8 +92,8 @@ npm i
# 使用 node index.js 命令直接运行的项目可以通过 `Ctrl + C` 停止
# 使用 pm2 运行的可以使用以下两行命令来停止和从列表中删除项目
# pm2 stop weibo-hotband-bot
# pm2 delete weibo-hotband-bot
# pm2 stop hotband-bot
# pm2 delete hotband-bot
```
@@ -112,11 +112,15 @@ npm i
`origin` 文件夹中的数据是通过Api接口获取到的原始数据没有经过任何处理。
<!--
`simplify` 文件夹中的数据是在原始数据的基础上,去除了部分冗余数据。
-->
`final` 文件夹中的数据是从原始数据中抽离出的有用数据,并重新整理得到的。
<!--
`regulation` 文件夹中的数据主要用于观测原始值与显示值不同的热搜,这部分热搜猜测可能是经过微博平台调控的。(这部分数据没有太大意义,可以忽略)
-->
@@ -154,20 +158,20 @@ npm i
hotband // 本项目
├─ data // 爬取的数据(启动项目后自动创建)
├─ html // html 页面
│ ├─ assets
│ ├─ assets
│ │ ├─ css // CSS 样式
│ │ │ └─
│ │ │ └─
│ │ ├─ image // 前端图片资源
│ │ │ ├─ ...
│ │ └─ js
│ │ └─ isMobile.js
│ ├─ bilibili_hotband.html
│ ├─ bilibili_rank.html
│ └─ weibo_hotband.html
│ │ │ ├─ ...
│ │ └─ js
│ │ └─ isMobile.js
│ ├─ bilibili_hotband.html
│ ├─ bilibili_rank.html
│ └─ weibo_hotband.html
├─ src // 数据爬取核心代码
│ ├─ utils // 工具类代码
│ │ ├─ fileUtils.js
│ │ └─ requestUtils.js
│ │ ├─ fileUtils.js
│ │ └─ requestUtils.js
│ ├─ execute_command.js // 执行命令行脚本(暂时没用到)
│ ├─ get_bilibili_hotband.js // 获取 B站热搜榜 代码
│ ├─ get_bilibili_rank.js // 爬取 B站排行榜 代码
@@ -176,15 +180,15 @@ hotband // 本项目
├─ .env // 项目配置文件(需要自行创建)
├─ index.html // html 页面打开文件
├─ index.js // node 项目启动入口文件
├─ nodemon.json
├─ package-lock.json
├─ package.json
├─ pm2 restart.bat
├─ pm2 restart.sh
├─ pm2 start.bat
├─ pm2 start.sh
├─ pm2 stop.bat
├─ pm2 stop.sh
├─ nodemon.json
├─ package-lock.json
├─ package.json
├─ pm2 restart.bat
├─ pm2 restart.sh
├─ pm2 start.bat
├─ pm2 start.sh
├─ pm2 stop.bat
├─ pm2 stop.sh
└─ README.md // 项目自述文件
```
@@ -195,28 +199,28 @@ hotband // 本项目
data 文件夹下的目录结构如下
```bash
data
├─ bilibili-hotband
│ ├─ final / origin
data
├─ bilibili-hotband
│ ├─ final / origin
│ │ └─ xxxx // 年
│ │ └─ xx // 月
│ │ └─ xx // 日
│ │ ├─ xxxxxxxx_xxxx.min.json // 年月日_时分秒.min.json
│ └─ latest.json // 最新的json文件
├─ bilibili-rank
│ ├─ origin
├─ bilibili-rank
│ ├─ origin
│ │ └─ xxxx // 年
│ │ └─ xx // 月
│ │ └─ xx // 日
│ │ ├─ xxxxxxxx_xxxx.min.json // 年月日_时分秒.min.json
│ └─ latest.json // 最新的json文件
└─ weibo-hotband
├─ origin / final / simplify
└─ weibo-hotband
├─ origin / final / simplify
│ └─ xxxx // 年
│ └─ xx // 月
│ └─ xx // 日
│ ├─ xxxxxxxx_xxxx.min.json // 年月日_时分秒.min.json
├─ regulation
├─ regulation
│ └─ xxxx // 年
│ └─ xx // 月
│ └─ xx // 日

View File

Before

Width:  |  Height:  |  Size: 1.9 KiB

After

Width:  |  Height:  |  Size: 1.9 KiB

View File

Before

Width:  |  Height:  |  Size: 1.9 KiB

After

Width:  |  Height:  |  Size: 1.9 KiB

View File

Before

Width:  |  Height:  |  Size: 2.0 KiB

After

Width:  |  Height:  |  Size: 2.0 KiB

View File

@@ -74,6 +74,12 @@
</script>
<script>
let iconMapper = {
// 2023.05.25 更新
"http://i0.hdslb.com/bfs/activity-plat/static/20221118/eaf2dd702d7cc14d8d9511190245d057/UF7B1wVKT2.png": ["新", "#FFB027"],
"http://i0.hdslb.com/bfs/activity-plat/static/20221213/eaf2dd702d7cc14d8d9511190245d057/lrx9rnKo24.png": ["热", "#F85A54"],
"http://i0.hdslb.com/bfs/activity-plat/static/20221117/eaf2dd702d7cc14d8d9511190245d057/nhoSO8rRli.png": ["话题", "#FF6699"],
"http://i0.hdslb.com/bfs/activity-plat/static/20221117/eaf2dd702d7cc14d8d9511190245d057/EeuqbMwao9.png": ["梗", "#FF6699"],
"http://i0.hdslb.com/bfs/feed-admin/e9e7a2d8497d4063421b685e72680bf1cfb99a0d.png": ["热", "#FF895C"],
"http://i0.hdslb.com/bfs/feed-admin/4d579fb61f9655316582db193118bba3a721eec0.png": ["新", "#F87399"],
}

View File

@@ -1,8 +1,8 @@
{
"ignore": [
".git",
".svn",
"node_modules/**/node_modules"
],
"ext": "js"
{
"ignore": [
".git",
".svn",
"node_modules/**/node_modules"
],
"ext": "js"
}

40
hotband/pack.bat Normal file
View File

@@ -0,0 +1,40 @@
set f_year=2022
set f_month=10
ren data data_for_backup
cd ./data_for_backup
cd ./bilibili-hotband
del /f /s/q latest.json
cd ./final/%f_year%/%f_month%
for /d %%i in (*) do ( "C:\Users\Administrator\Desktop\7-Zip<69><70><EFBFBD><EFBFBD>ɫ<EFBFBD>棩\7z.exe" a "%%~ni.zip" "%%i" -sdel )
cd ../../../
cd ./origin/%f_year%/%f_month%
for /d %%i in (*) do ( "C:\Users\Administrator\Desktop\7-Zip<69><70><EFBFBD><EFBFBD>ɫ<EFBFBD>棩\7z.exe" a "%%~ni.zip" "%%i" -sdel )
cd ../../../
cd ../
cd ./bilibili-rank
del /f /s/q latest.json
cd ./origin/%f_year%/%f_month%
for /d %%i in (*) do ( "C:\Users\Administrator\Desktop\7-Zip<69><70><EFBFBD><EFBFBD>ɫ<EFBFBD>棩\7z.exe" a "%%~ni.zip" "%%i" -sdel )
cd ../../../
cd ../
cd ./weibo-hotband
del /f /s/q latest.json
cd ./final/%f_year%/%f_month%
for /d %%i in (*) do ( "C:\Users\Administrator\Desktop\7-Zip<69><70><EFBFBD><EFBFBD>ɫ<EFBFBD>棩\7z.exe" a "%%~ni.zip" "%%i" -sdel )
cd ../../../
cd ./origin/%f_year%/%f_month%
for /d %%i in (*) do ( "C:\Users\Administrator\Desktop\7-Zip<69><70><EFBFBD><EFBFBD>ɫ<EFBFBD>棩\7z.exe" a "%%~ni.zip" "%%i" -sdel )
cd ../../../
cd ../../
pause

1
hotband/pm2 restart.bat Normal file
View File

@@ -0,0 +1 @@
pm2 restart hotband-bot

1
hotband/pm2 restart.sh Normal file
View File

@@ -0,0 +1 @@
pm2 restart hotband-bot

1
hotband/pm2 start.bat Normal file
View File

@@ -0,0 +1 @@
pm2 start index.js --name hotband-bot

1
hotband/pm2 start.sh Normal file
View File

@@ -0,0 +1 @@
pm2 start index.js --name hotband-bot

2
hotband/pm2 stop.bat Normal file
View File

@@ -0,0 +1,2 @@
pm2 stop hotband-bot
pm2 delete hotband-bot

2
hotband/pm2 stop.sh Normal file
View File

@@ -0,0 +1,2 @@
pm2 stop hotband-bot
pm2 delete hotband-bot

View File

@@ -1,40 +1,40 @@
'use strict';
const child_process = require('child_process');
const iconv = require("iconv-lite");
const encoding = "cp936";
const bufferEncoding = "binary";
async function execute(rootPath, cmds) {
let outputs = [];
for (let cmd of cmds) {
let result = await new Promise(function (resolve) {
// refer: https://www.webhek.com/post/execute-a-command-line-binary-with-node-js/
child_process.exec(cmd, {
cwd: rootPath, // 脚本执行目录
encoding: bufferEncoding
}, function (err, stdout, stderr) {
if (err) {
resolve({
cmd: cmd,
err: err,
// err_stack: iconv.decode(Buffer.from(err.stack, bufferEncoding), encoding),
// err_message: iconv.decode(Buffer.from(err.message, bufferEncoding), encoding),
});
} else {
// 获取命令执行的输出
resolve({
cmd: cmd,
stdout: iconv.decode(Buffer.from(stdout, bufferEncoding), encoding),
stderr: iconv.decode(Buffer.from(stderr, bufferEncoding), encoding),
});
}
});
});
outputs.push(result);
}
return outputs;
}
exports.execute = execute;
'use strict';
const child_process = require('child_process');
const iconv = require("iconv-lite");
const encoding = "cp936";
const bufferEncoding = "binary";
async function execute(rootPath, cmds) {
let outputs = [];
for (let cmd of cmds) {
let result = await new Promise(function (resolve) {
// refer: https://www.webhek.com/post/execute-a-command-line-binary-with-node-js/
child_process.exec(cmd, {
cwd: rootPath, // 脚本执行目录
encoding: bufferEncoding
}, function (err, stdout, stderr) {
if (err) {
resolve({
cmd: cmd,
err: err,
// err_stack: iconv.decode(Buffer.from(err.stack, bufferEncoding), encoding),
// err_message: iconv.decode(Buffer.from(err.message, bufferEncoding), encoding),
});
} else {
// 获取命令执行的输出
resolve({
cmd: cmd,
stdout: iconv.decode(Buffer.from(stdout, bufferEncoding), encoding),
stderr: iconv.decode(Buffer.from(stderr, bufferEncoding), encoding),
});
}
});
});
outputs.push(result);
}
return outputs;
}
exports.execute = execute;

View File

@@ -18,12 +18,20 @@ async function main() {
let now = new Date(requestTimestamp + 8 * 3600 * 1000).toISOString();
let result = await requestUtils.getApiResult(API_URL);
if (result === undefined) {
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求失败");
return;
}
if (result.code != 0) {
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功但服务器处理失败等待3s后重试。");
await new Promise((resolve) => {
setTimeout(resolve, 3000); // 等待3秒
});
result = await requestUtils.getApiResult(API_URL);
if (result === undefined) {
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "重试请求失败");
return;
}
if (result.ok != 1) {
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功,但服务器处理失败,保存失败信息。");
// ok 不为 1那么久直接保存便于后续分析不进行后续处理

View File

@@ -18,12 +18,20 @@ async function main() {
let now = new Date(requestTimestamp + 8 * 3600 * 1000).toISOString();
let result = await requestUtils.getApiResult(API_URL);
if (result === undefined) {
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求失败");
return;
}
if (result.code != 0) {
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功但服务器处理失败等待3s后重试。");
await new Promise((resolve) => {
setTimeout(resolve, 3000); // 等待3秒
});
result = await requestUtils.getApiResult(API_URL);
if (result === undefined) {
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "重试请求失败");
return;
}
if (result.ok != 1) {
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功,但服务器处理失败,保存失败信息。");
// ok 不为 1那么久直接保存便于后续分析不进行后续处理

View File

@@ -18,15 +18,23 @@ async function main() {
let now = new Date(requestTimestamp + 8 * 3600 * 1000).toISOString();
let result = await requestUtils.getApiResult(API_URL);
if (result === undefined) {
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求失败");
return;
}
if (result.ok != 1) {
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功但服务器处理失败等待3s后重试。");
await new Promise((resolve) => {
setTimeout(resolve, 3000); // 等待3秒
});
result = await requestUtils.getApiResult(API_URL);
if (result === undefined) {
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "重试请求失败");
return;
}
if (result.ok != 1) {
console.log(new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString(), SUB_FOLDER, "请求成功,但服务器处理失败,保存失败信息。");
// ok 不为 1那么直接保存便于后续分析,不进行后续处理
// ok 不为 1那么直接保存便于后续分析,不进行后续处理
fileUtils.saveJSON({
saveFolder: DATA_FOLDER,
now: now,
@@ -56,16 +64,29 @@ async function main() {
let data = JSON.parse(JSON.stringify(result.data));
if (!data) {
fileUtils.saveJSON({
saveFolder: DATA_FOLDER,
now: now,
fileNameSuffix: `origin-parse-error`,
object: result,
compress: true,
uncompress: false
});
return;
}
/**
* 过滤掉不需要的数据
*/
// hotgov
delete data.hotgov["mblog"];
// 重复字段只保留一个
delete data.hotgov["note"]; // note word
delete data.hotgov["small_icon_desc"]; // icon_desc small_icon_desc
delete data.hotgov["small_icon_desc_color"]; // icon_desc_color small_icon_desc_color
if (data.hotgov) {
delete data.hotgov["mblog"];
// 重复字段只保留一个
delete data.hotgov["note"]; // note word
delete data.hotgov["small_icon_desc"]; // icon_desc small_icon_desc
delete data.hotgov["small_icon_desc_color"]; // icon_desc_color small_icon_desc_color
}
// band_list
for (let i = 0; i < data.band_list.length; i++) {
@@ -154,51 +175,51 @@ async function main() {
});
/**
* 只统计微博调控信息
*/
let convert2 = [];
let total = 0;
data.band_list.forEach(item => {
total += item.num;
total -= item.raw_hot;
if (item.num - item.raw_hot == 0) return;
convert2.push([
`[${item.realpos}] ${item.word}${item.label_name}`,
`原始:${item.raw_hot} 显示:${item.num} 调控: ${item.num - item.raw_hot}`
]);
});
fileUtils.saveJSON({
saveFolder: DATA_FOLDER,
now: now,
fileNameSuffix: `regulation`,
object: {
total_delta: total, // 所有调控值之和
data: convert2
},
compress: false,
uncompress: true
});
// /**
// * 只统计微博调控信息
// */
// let convert2 = [];
// let total = 0;
// data.band_list.forEach(item => {
// total += item.num;
// total -= item.raw_hot;
// if (item.num - item.raw_hot == 0) return;
// convert2.push([
// `[${item.realpos}] ${item.word}【${item.label_name}】`,
// `原始:${item.raw_hot} 显示:${item.num} 调控: ${item.num - item.raw_hot}`
// ]);
// });
// fileUtils.saveJSON({
// saveFolder: DATA_FOLDER,
// now: now,
// fileNameSuffix: `regulation`,
// object: {
// total_delta: total, // 所有调控值之和
// data: convert2
// },
// compress: false,
// uncompress: true
// });
/**
* 保存预处理后数据
*/
// 过滤掉不需要的数据
// band_list
data.band_list.forEach(function (item) {
delete item["mblog"];
});
fileUtils.saveJSON({
saveFolder: DATA_FOLDER,
now: now,
fileNameSuffix: `simplify`,
object: data,
compress: true,
// uncompress: true,
// compress: false,
uncompress: false,
});
// /**
// * 保存预处理后数据
// */
// // 过滤掉不需要的数据
// // band_list
// data.band_list.forEach(function (item) {
// delete item["mblog"];
// });
// fileUtils.saveJSON({
// saveFolder: DATA_FOLDER,
// now: now,
// fileNameSuffix: `simplify`,
// object: data,
// compress: true,
// // uncompress: true,
// // compress: false,
// uncompress: false,
// });
/**
@@ -207,7 +228,7 @@ async function main() {
fs.writeFileSync(`${DATA_FOLDER}/latest.json`, JSON.stringify({
update_time: requestTimestamp,
update_time_friendly: now.substring(0, 19).replace(/T/g, " "),
regulation: convert2,
// regulation: convert2,
data: convert
}));
}

View File

@@ -1,11 +1,13 @@
const fs = require('fs');
const path = require('path');
const LATEST_DATA_ONLY = process.env.LATEST_DATA_ONLY == true;
// 创建目录
async function createFolder(folderToCreate) {
let currentFolder = folderToCreate.replace(/\\/g, '/');
let parentFolder = currentFolder.substring(0, currentFolder.lastIndexOf('/'));
let currentFolder = path.join(folderToCreate);
let parentFolder = path.join(currentFolder, '../');
// console.log({ currentFolder: currentFolder, parentFolder: parentFolder });
if (!fs.existsSync(currentFolder)) {
// 文件夹不存在,创建文件夹
createFolder(parentFolder); // 保证父级文件夹存在

View File

@@ -14,7 +14,7 @@ async function getApiResult(url) {
} else {
// 请求失败
console.log(`error is ${error}`);
resolve(null);
resolve(undefined);
}
});
});

1
letsencrypt-autorenew/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
config.js

View File

@@ -0,0 +1,6 @@
// console : https://letsencrypt.osfipin.com/user-0408/my/api
// Api docs: https://letsencrypt.apifox.cn/
const APIToken = ''
const User = ''
module.exports = { APIToken, User }

View File

@@ -0,0 +1,112 @@
const { APIToken, User } = require('./config')
/**
* 来此加密 SSL证书半自动申请
*
* 功能:将账号下的证书逐个提交重申
* 用法:运行脚本挂着,然后就可以去做别的,脚本会逐一将账号下证书提交重申,等待一段时间后即可去后台下载证书
*
* 注意:需要验证状态的证书会阻塞
*
* https://letsencrypt.osfipin.com/user-0408/order/list
*
* @author coder-xiaomo
* @since 2024.01.30
*/
var myHeaders = new Headers();
myHeaders.append("Authorization", `Bearer ${APIToken}:${User}`);
myHeaders.append("User-Agent", "Apifox/1.0.0 (https://apifox.com)");
var requestOptions = {
method: 'GET',
headers: myHeaders,
redirect: 'follow'
};
async function renew(certId) {
let response = await fetch("https://api.osfipin.com/letsencrypt/api/order/renew?id=" + certId, requestOptions)
.then(response => response.text())
// .then(result => console.log(result))
.catch(error => console.log('error', error));
let result = JSON.parse(response)
console.log('result', result)
// 一些返回示例
// {"c":40,"m":"\u65e0\u6cd5\u91cd\u7533,\u8fc7\u671f\u524d14\u5929\u5185\u53ef\u91cd\u65b0\u7533\u8bf7","v":""}
// '无法重申,过期前14天内可重新申请'
// { c: 40, m: 'param id error', v: '' }
// { c: 40, m: '请先完成正在处理的证书', v: '' }
// if (result.c == 40 || result.c == 50) {
// console.log('出错了:', unescape(result.m))
// }
return result
}
async function getList() {
let totalPage = 1, currentPage = 1
let list = []
while (currentPage <= totalPage) {
let response = await fetch("https://api.osfipin.com/letsencrypt/api/order/list?page=" + currentPage, requestOptions)
.then(response => response.text())
// .then(result => console.log(result))
.catch(error => console.log('error', error));
let result = JSON.parse(response)
totalPage = result.v.mpage
currentPage++
// console.log('result', result)
list.push(...result.v.list)
}
// console.log('currentPage', currentPage)
// console.log('totalPage', totalPage)
// console.log('list', list)
return list
}
let startTimestamp = 0
let forTime = 0
let needRenewList = []
async function _timer() {
let list = await getList()
// console.log('list', list)
const needRenewList = list
// 排除正在申请中的
.filter(i => i.status !== '验证中')
// 留下最近 14 天将要过期的
.filter(i => {
return new Date(i.time_end).getTime() - Date.now() <= 14 * 24 * 3600 * 1000 // 小于 14 天
})
// console.log('needRenewList', needRenewList)
if (needRenewList.length === 0) {
clearInterval(timerInstanse)
console.log('完成!')
}
let displaySpendTime = ((Date.now() - startTimestamp) / (1000 * 60)).toFixed(3)
console.log(`${++forTime}次尝试,已耗时${displaySpendTime}min剩余待申请证书:`, needRenewList.map(cert => `${cert.id}(${cert.status})`).join('、'))
for (let cert of needRenewList) {
let result = await renew(cert.id)
if (result.c == 20) {
console.log('申请证书返回成功')
// 有证书申请中,剩下的申请等下次循环
break
} else if (result.c == 40) {
if (result.m === '请先完成正在处理的证书') {
console.log('有证书正在申请中,跳过')
break
}
}
}
}
async function main() {
startTimestamp = Date.now()
timerInstanse = setInterval(_timer, 10 * 1000)
_timer()
// renew('1')
}
main()

2
linux-command/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
commands/*
list.json

1
linux-command/README.md Normal file
View File

@@ -0,0 +1 @@
Api是从 CSDN猿如意 的插件中扒出来的

55
linux-command/index.js Normal file
View File

@@ -0,0 +1,55 @@
const fs = require('fs');
const path = require('path');
const os = require('os');
const { getApiResult } = require('../utils/requestUtils');
const url = {
list: 'https://cdn-static-devbit.csdn.net/devbit-static/plugin/linux-command/dist/data.min.json',
content: (command) => `https://cdn-static-devbit.csdn.net/devbit-static/plugin/linux-command/command/${command}.md`
}
async function get_list() {
let saveFilePath = './list.json'
if (fs.existsSync(saveFilePath) && fs.statSync(saveFilePath).isFile()) {
console.log('File exists')
let list = fs.readFileSync(saveFilePath, 'utf8')
let list_data = JSON.parse(list)
return list_data
} else {
console.log('File not exists')
let list = await getApiResult(url.list)
let list_data = JSON.parse(list)
fs.writeFileSync(saveFilePath, JSON.stringify(list_data, null, 4))
return list_data
}
}
async function get_content_by_command(command) {
// console.log(element)
let saveFilePath = `./commands/${command}.md`
if (fs.existsSync(saveFilePath) && fs.statSync(saveFilePath).isFile()) {
console.log(`[${command}] File exists, skip ...`)
} else {
console.log(`[${command}] File not exists, get content ...`)
let content = await getApiResult(url.content(command))
fs.writeFileSync(saveFilePath, content)
}
}
async function main() {
let list = await get_list()
let commands = Object.keys(list)
// console.log(commands.join(', '))
let folderPath = './commands'
if (!fs.existsSync(folderPath) || !fs.statSync(folderPath).isDirectory()) {
fs.mkdirSync(folderPath, { recursive: true })
}
for (let i = 0; i < commands.length; i++) {
const element = commands[i]
get_content_by_command(element)
}
}
main()

4
lyric/.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
*.lrc
*.m4a
*.mp3
*.flac

64
lyric/index.js Normal file
View File

@@ -0,0 +1,64 @@
const fs = require('fs');
const path = require('path');
const iconv = require('iconv-lite');//安装第三方库转换编码格式
const readline = require('readline');
const chalk = require('chalk')
// var filename = path.join(__dirname, './../lyrics/晴天.lrc');
var filename = path.join(__dirname, './lyric.lrc');
var streamReader = fs.createReadStream(filename).pipe(iconv.decodeStream('utf8')) // gbk
console.clear();
playMusic();
setTimeout(main, 2600); // 音频加载时间
function main() {
// 利用readline读取
var rl = readline.createInterface({ input: streamReader });
var begin = new Date().getTime();
rl.on('line', (line) => {
task(line, begin);
});
}
var displayLyric = [], index = 0;
var regex = /\[(\d{2})\:(\d{2})\.(\d{2,3})\][ ]*(.+)[ ]*/;
function task(line, begin) {
var matches = regex.exec(line);
if (matches) {
var m = parseFloat(matches[1]);
var s = parseFloat(matches[2]);
var f = parseFloat(matches[3]);
var lyric = matches[4];
displayLyric.push(lyric);
var offset = new Date().getTime() - begin;
setTimeout(() => {
console.clear();
console.log(chalk.dim(displayLyric[index - 2] || ""));
console.log(chalk.hex('#b3b3b3').visible(displayLyric[index - 1] || ""));
console.log(chalk.hex('#FFFFFF').bold(displayLyric[index]));
console.log(chalk.hex('#b3b3b3').visible(displayLyric[index + 1] || ""));
console.log(chalk.dim(displayLyric[index + 2] || ""));
index++;
// console.log(lyric);
}, m * 60 * 1000 + s * 1000 + f - offset);
} else {
// 不是一行歌词
// console.log("err", line);
}
}
function playMusic() {
var player = require('play-sound')(opts = {})
player.play('./music.m4a', function (err) {
if (err) throw err;
console.log("消愁 - 毛不易");
});
}

41
netease_music/index.js Normal file
View File

@@ -0,0 +1,41 @@
if (process.argv.length <= 2) {
let output = [
"参数不够",
"node index --utils [song|album|artist|lyric|comment|playlist|assistant] --min [number] --max [number] --order [false|ASC|DESC] --limit [number]",
// "",
// "node index --utils xxx --min xxx --max xxx --order ASC --limit 2000",
].join('\n');
console.log(output);
return;
}
var args = require('minimist')(process.argv.slice(2));
args = {
// 子模块
utils: args.utils,
// id 范围
min: Number(args.min) || undefined,
max: Number(args.max) || undefined,
// 顺序
order: args.order,
// 数量
limit: Number(args.limit) || undefined,
// 分区
partition: Number(args.partition) || undefined,
// #################################
// 两次请求之间等待时间
sleepTime: Number(args.sleepTime) || 100,
// 数据库
database: args.database || "neteasemusic",
}
console.log("args:", args);
// 指定数据库名
if (args.database != "neteasemusic")
console.log(`注意,当前连接的数据库 [${args.database}] 非业务数据库`);
global.database = args.database;
global.sleepTime = args.sleepTime; // 两次请求之间停顿时间
global.useMysqlPool = true;
const neteaseMusic = require('./src/index');
neteaseMusic.main(args);

View File

@@ -0,0 +1,37 @@
-- 统计等待爬取的数据条数 2023.12.25
SELECT 'comment' as wait_fetch, count(*) as `count` FROM `comment_progress` where current_status = 0
UNION ALL
SELECT 'album', count(*) FROM `wait_fetch_album`
UNION ALL
SELECT 'artist', count(*) FROM `wait_fetch_artist`
UNION ALL
SELECT 'lyric', count(*) FROM `wait_fetch_lyric`
-- 查看需要爬取的 comment 的分布
SELECT cast( FLOOR( song_id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
FROM comment_progress
WHERE current_status != 2
GROUP BY s
ORDER BY s DESC;
-- 查看需要爬取的 lyric 的分布
SELECT cast( FLOOR( id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
FROM wait_fetch_lyric
GROUP BY s
ORDER BY s DESC;
-- 查看需要爬取的 album 的分布
SELECT cast( FLOOR( id / 1000000 ) * 1000000 as UNSIGNED ) as s, count(*) as count
FROM wait_fetch_album
GROUP BY s
ORDER BY s DESC;
-- 查看需要爬取的 artist 的分布
SELECT cast( FLOOR(id / 1000000 ) * 1000000 as UNSIGNED ) as s, count(*) as count
FROM wait_fetch_artist
GROUP BY s
ORDER BY s DESC;

View File

@@ -0,0 +1,20 @@
# 阿里云Windows服务器(临时)
# 阿里云1(临时)
# 阿里云2(临时)
# 家笔记本(临时)
# 家台式机(临时)
# 旧安卓手机(临时)
cd tools/netease_music/
# 手机(临时)
cd tools/netease_music/
# 腾讯云linux服务器(临时)
cd /www/wwwserv/tools/netease_music/
# 服务器能力有限,不分配

View File

@@ -0,0 +1 @@
comment id segment.txt

View File

@@ -0,0 +1 @@
start cmd

View File

@@ -0,0 +1,9 @@
REPLACE INTO `comment_origin_` SELECT * FROM `comment`; -- 80.1G
REPLACE INTO `user_origin_` SELECT * FROM `user`; -- 5.20G
DELETE `comment`
FROM `comment` INNER JOIN `comment_origin_` ON `comment`.comment_id = `comment_origin_`.comment_id;
DELETE `user`
FROM `user` INNER JOIN `user_origin_` ON `user`.user_id = `user_origin_`.user_id;

View File

@@ -0,0 +1,13 @@
# https://shell.aliyun.com/
git clone https://git.only4.work/coder-xiaomo/tools
cd tools
echo '{"mysql":{"charset":"utf8mb4","host":"124.220.172.110","user":"root","password":"123456","port":5204,"database":"","connectTimeout": 3600000,"acquireTimeout": 3600000,"timeout": 3600000}}' > config.json
npm config set registry https://registry.npmmirror.com
cat config.json
npm config get registry
npm i
# cd netease_music
cd ~/tools/netease_music/

View File

@@ -0,0 +1,277 @@
# cd ./netease_music
# cd tools/netease_music/
start cmd /k "node index --utils assistant"
start cmd /k "node index --utils song"
start cmd /k "node index --utils artist --limit 10000"
start cmd /k "node index --utils album --limit 10000"
start cmd /k "node index --utils lyric --limit 10000"
start cmd /k "node index --utils comment --limit 10000"
# start cmd /k "node index --utils playlist"
# lyric
# node index --utils lyric --min 2100000000 --limit 10000
# node index --utils lyric --min 2090000000 --max 2100000000 --limit 10000
# node index --utils lyric --min 2080000000 --max 2090000000 --limit 10000
# node index --utils lyric --min 2070000000 --max 2080000000 --limit 10000
# node index --utils lyric --min 2060000000 --max 2070000000 --limit 10000
# node index --utils lyric --min 2050000000 --max 2060000000 --limit 10000
# node index --utils lyric --min 2000000000 --max 2050000000 --limit 10000
# node index --utils lyric --min 1990000000 --max 2000000000 --limit 10000
# node index --utils lyric --min 1980000000 --max 1990000000 --limit 10000
# node index --utils lyric --min 1970000000 --max 1980000000 --limit 10000
# node index --utils lyric --min 1960000000 --max 1970000000 --limit 10000
# node index --utils lyric --min 1950000000 --max 1960000000 --limit 10000
# node index --utils lyric --min 1940000000 --max 1950000000 --limit 10000
# node index --utils lyric --min 1930000000 --max 1940000000 --limit 10000
# node index --utils lyric --min 1920000000 --max 1930000000 --limit 10000
# node index --utils lyric --min 1910000000 --max 1920000000 --limit 10000
# node index --utils lyric --min 1900000000 --max 1910000000 --limit 10000
# node index --utils lyric --min 1890000000 --max 1900000000 --limit 10000
# node index --utils lyric --min 1880000000 --max 1890000000 --limit 10000
# node index --utils lyric --min 1870000000 --max 1880000000 --limit 10000
# node index --utils lyric --min 1860000000 --max 1870000000 --limit 10000
# node index --utils lyric --min 1850000000 --max 1860000000 --limit 10000
# node index --utils lyric --min 1840000000 --max 1850000000 --limit 10000
# node index --utils lyric --min 1830000000 --max 1840000000 --limit 10000
# node index --utils lyric --min 1820000000 --max 1830000000 --limit 10000
# node index --utils lyric --min 1810000000 --max 1820000000 --limit 10000
# node index --utils lyric --min 1800000000 --max 1810000000 --limit 10000
# node index --utils lyric --min 1500000000 --max 1800000000 --limit 10000
# node index --utils lyric --min 1490000000 --max 1500000000 --limit 10000
# node index --utils lyric --min 1480000000 --max 1490000000 --limit 10000
# node index --utils lyric --min 1470000000 --max 1480000000 --limit 10000
# node index --utils lyric --min 1460000000 --max 1470000000 --limit 10000
# node index --utils lyric --min 1450000000 --max 1460000000 --limit 10000
# node index --utils lyric --min 1440000000 --max 1450000000 --limit 10000
# node index --utils lyric --min 1430000000 --max 1440000000 --limit 10000
# node index --utils lyric --min 1420000000 --max 1430000000 --limit 10000
# node index --utils lyric --min 1410000000 --max 1420000000 --limit 10000
# node index --utils lyric --min 1400000000 --max 1410000000 --limit 10000
# node index --utils lyric --min 1390000000 --max 1400000000 --limit 10000
# node index --utils lyric --min 1380000000 --max 1390000000 --limit 10000
# node index --utils lyric --min 1370000000 --max 1380000000 --limit 10000
# node index --utils lyric --min 1360000000 --max 1370000000 --limit 10000
# node index --utils lyric --min 1350000000 --max 1360000000 --limit 10000
# node index --utils lyric --min 1340000000 --max 1350000000 --limit 10000
# node index --utils lyric --min 1330000000 --max 1340000000 --limit 10000
# node index --utils lyric --min 1320000000 --max 1330000000 --limit 10000
# node index --utils lyric --min 1310000000 --max 1320000000 --limit 10000
# node index --utils lyric --min 1300000000 --max 1310000000 --limit 10000
# node index --utils lyric --min 570000000 --max 1300000000 --limit 10000
# node index --utils lyric --min 560000000 --max 570000000 --limit 10000
# node index --utils lyric --min 550000000 --max 560000000 --limit 10000
# node index --utils lyric --min 540000000 --max 550000000 --limit 10000
# node index --utils lyric --min 530000000 --max 540000000 --limit 10000
# node index --utils lyric --max 530000000 --limit 10000
# ###################################################################################
# comment
# node index --utils comment --min 0 --max 100000 --limit 10000
# node index --utils comment --min 100000 --max 200000 --limit 10000
# node index --utils comment --min 200000 --max 400000 --limit 10000
# node index --utils comment --min 400000 --max 1000000 --limit 10000
# node index --utils comment --min 1000000 --max 2000000 --limit 10000
# node index --utils comment --min 2000000 --max 3000000 --limit 10000
# node index --utils comment --min 3000000 --max 4000000 --limit 10000
# node index --utils comment --min 4000000 --max 5000000 --limit 10000
# node index --utils comment --min 5000000 --max 10000000 --limit 10000
# node index --utils comment --min 10000000 --max 15000000 --limit 10000
# node index --utils comment --min 15000000 --max 20000000 --limit 10000
# node index --utils comment --min 20000000 --max 25000000 --limit 10000
# node index --utils comment --min 25000000 --max 30000000 --limit 10000
# node index --utils comment --min 30000000 --max 35000000 --limit 10000
# node index --utils comment --min 35000000 --max 40000000 --limit 10000
# node index --utils comment --min 40000000 --max 50000000 --limit 10000
# node index --utils comment --min 50000000 --max 100000000 --limit 10000
# node index --utils comment --min 100000000 --max 200000000 --limit 10000
# node index --utils comment --min 200000000 --max 300000000 --limit 10000
# node index --utils comment --min 300000000 --max 400000000 --limit 10000
# node index --utils comment --min 400000000 --max 500000000 --limit 10000
# node index --utils comment --min 500000000 --max 600000000 --limit 10000
# node index --utils comment --min 600000000 --max 700000000 --limit 10000
# node index --utils comment --min 700000000 --max 800000000 --limit 10000
# node index --utils comment --min 800000000 --max 900000000 --limit 10000
# node index --utils comment --min 900000000 --max 1000000000 --limit 10000
# node index --utils comment --min 1000000000 --max 1100000000 --limit 10000
# node index --utils comment --min 1100000000 --max 1200000000 --limit 10000
# node index --utils comment --min 1200000000 --max 1280000000 --limit 10000
# node index --utils comment --min 1280000000 --max 1290000000 --limit 10000
# node index --utils comment --min 1290000000 --max 1300000000 --limit 10000
# node index --utils comment --min 1300000000 --max 1310000000 --limit 10000
# node index --utils comment --min 1310000000 --max 1320000000 --limit 10000
# node index --utils comment --min 1320000000 --max 1330000000 --limit 10000
# node index --utils comment --min 1330000000 --max 1340000000 --limit 10000
# node index --utils comment --min 1340000000 --max 1350000000 --limit 10000
# node index --utils comment --min 1350000000 --max 1360000000 --limit 10000
# node index --utils comment --min 1360000000 --max 1370000000 --limit 10000
# node index --utils comment --min 1370000000 --max 1380000000 --limit 10000
# node index --utils comment --min 1380000000 --max 1390000000 --limit 10000
# node index --utils comment --min 1390000000 --max 1400000000 --limit 10000
# node index --utils comment --min 1400000000 --max 1410000000 --limit 10000
# node index --utils comment --min 1410000000 --max 1420000000 --limit 10000
# node index --utils comment --min 1420000000 --max 1430000000 --limit 10000
# node index --utils comment --min 1430000000 --max 1440000000 --limit 10000
# node index --utils comment --min 1440000000 --max 1450000000 --limit 10000
# node index --utils comment --min 1450000000 --max 1460000000 --limit 10000
# node index --utils comment --min 1460000000 --max 1470000000 --limit 10000
# node index --utils comment --min 1470000000 --max 1480000000 --limit 10000
# node index --utils comment --min 1480000000 --max 1490000000 --limit 10000
# node index --utils comment --min 1490000000 --max 1500000000 --limit 10000
# node index --utils comment --min 1500000000 --max 1600000000 --limit 10000
# node index --utils comment --min 1600000000 --max 1700000000 --limit 10000
# node index --utils comment --min 1700000000 --max 1800000000 --limit 10000
# node index --utils comment --min 1800000000 --max 1805000000 --limit 10000
# node index --utils comment --min 1805000000 --max 1810000000 --limit 10000
# node index --utils comment --min 1810000000 --max 1815000000 --limit 10000
# node index --utils comment --min 1815000000 --max 1820000000 --limit 10000
# node index --utils comment --min 1820000000 --max 1825000000 --limit 10000
# node index --utils comment --min 1825000000 --max 1830000000 --limit 10000
# node index --utils comment --min 1830000000 --max 1835000000 --limit 10000
# node index --utils comment --min 1835000000 --max 1840000000 --limit 10000
# node index --utils comment --min 1840000000 --max 1845000000 --limit 10000
# node index --utils comment --min 1845000000 --max 1850000000 --limit 10000
# node index --utils comment --min 1850000000 --max 1855000000 --limit 10000
# node index --utils comment --min 1855000000 --max 1860000000 --limit 10000
# node index --utils comment --min 1860000000 --max 1865000000 --limit 10000
# node index --utils comment --min 1865000000 --max 1870000000 --limit 10000
# node index --utils comment --min 1870000000 --max 1875000000 --limit 10000
# node index --utils comment --min 1875000000 --max 1880000000 --limit 10000
# node index --utils comment --min 1880000000 --max 1885000000 --limit 10000
# node index --utils comment --min 1885000000 --max 1890000000 --limit 10000
# node index --utils comment --min 1890000000 --max 1895000000 --limit 10000
# node index --utils comment --min 1895000000 --max 1900000000 --limit 10000
# node index --utils comment --min 1900000000 --max 1905000000 --limit 10000
# node index --utils comment --min 1905000000 --max 1910000000 --limit 10000
# node index --utils comment --min 1910000000 --max 1915000000 --limit 10000
# node index --utils comment --min 1915000000 --max 1920000000 --limit 10000
# node index --utils comment --min 1920000000 --max 1925000000 --limit 10000
# node index --utils comment --min 1925000000 --max 1930000000 --limit 10000
# node index --utils comment --min 1930000000 --max 1935000000 --limit 10000
# node index --utils comment --min 1935000000 --max 1940000000 --limit 10000
# node index --utils comment --min 1940000000 --max 1945000000 --limit 10000
# node index --utils comment --min 1945000000 --max 1950000000 --limit 10000
# node index --utils comment --min 1950000000 --max 1955000000 --limit 10000
# node index --utils comment --min 1955000000 --max 1960000000 --limit 10000
# node index --utils comment --min 1960000000 --max 1965000000 --limit 10000
# node index --utils comment --min 1965000000 --max 1970000000 --limit 10000
# node index --utils comment --min 1970000000 --max 1975000000 --limit 10000
# node index --utils comment --min 1975000000 --max 1980000000 --limit 10000
# node index --utils comment --min 1980000000 --max 1985000000 --limit 10000
# node index --utils comment --min 1985000000 --max 1990000000 --limit 10000
# node index --utils comment --min 1990000000 --max 1995000000 --limit 10000
# node index --utils comment --min 1995000000 --max 2000000000 --limit 10000
# node index --utils comment --min 2000000000 --max 2005000000 --limit 10000
# node index --utils comment --min 2005000000 --max 2010000000 --limit 10000
# node index --utils comment --min 2010000000 --max 2015000000 --limit 10000
# node index --utils comment --min 2015000000 --max 2020000000 --limit 10000
# node index --utils comment --min 2020000000 --max 2025000000 --limit 10000
# node index --utils comment --min 2025000000 --max 2030000000 --limit 10000
# node index --utils comment --min 2030000000 --max 2035000000 --limit 10000
# node index --utils comment --min 2035000000 --max 2040000000 --limit 10000
# node index --utils comment --min 2040000000 --max 2045000000 --limit 10000
# node index --utils comment --min 2045000000 --max 2050000000 --limit 10000
# node index --utils comment --min 2050000000 --max 2055000000 --limit 10000
# node index --utils comment --min 2055000000 --max 2060000000 --limit 10000
# node index --utils comment --min 2060000000 --max 2065000000 --limit 10000
# node index --utils comment --min 2065000000 --max 2070000000 --limit 10000
# node index --utils comment --min 2070000000 --max 2075000000 --limit 10000
# node index --utils comment --min 2075000000 --max 2080000000 --limit 10000
# node index --utils comment --min 2080000000 --max 2085000000 --limit 10000
# node index --utils comment --min 2085000000 --max 2090000000 --limit 10000
# node index --utils comment --min 2090000000 --max 2095000000 --limit 10000
# node index --utils comment --min 2095000000 --max 2100000000 --limit 10000
# node index --utils comment --min 2100000000 --max 2110000000 --limit 10000
# node index --utils comment --min 2110000000 --max 2120000000 --limit 10000
# node index --utils comment --min 2120000000 --max 2500000000 --limit 10000
# node index --utils comment --min 2500000000 --limit 10000
# ###################################################################################
# album
# node index --utils album --min 185000000 --limit 10000
# node index --utils album --min 181000000 --max 185000000 --limit 10000
# node index --utils album --min 180000000 --max 181000000 --limit 10000
# node index --utils album --min 179000000 --max 180000000 --limit 10000
# node index --utils album --min 178000000 --max 179000000 --limit 10000
# node index --utils album --min 177000000 --max 178000000 --limit 10000
# node index --utils album --min 175000000 --max 177000000 --limit 10000
# node index --utils album --min 170000000 --max 175000000 --limit 10000
# node index --utils album --min 169000000 --max 170000000 --limit 10000
# node index --utils album --min 168000000 --max 169000000 --limit 10000
# node index --utils album --min 167000000 --max 168000000 --limit 10000
# node index --utils album --min 166000000 --max 167000000 --limit 10000
# node index --utils album --min 165000000 --max 166000000 --limit 10000
# node index --utils album --min 164000000 --max 165000000 --limit 10000
# node index --utils album --min 163000000 --max 164000000 --limit 10000
# node index --utils album --min 162000000 --max 163000000 --limit 10000
# node index --utils album --min 161000000 --max 162000000 --limit 10000
# node index --utils album --min 160000000 --max 161000000 --limit 10000
# node index --utils album --min 159000000 --max 160000000 --limit 10000
# node index --utils album --min 158000000 --max 159000000 --limit 10000
# node index --utils album --min 157000000 --max 158000000 --limit 10000
# node index --utils album --min 156000000 --max 157000000 --limit 10000
# node index --utils album --min 155000000 --max 156000000 --limit 10000
# node index --utils album --min 154000000 --max 155000000 --limit 10000
# node index --utils album --min 153000000 --max 154000000 --limit 10000
# node index --utils album --min 152000000 --max 153000000 --limit 10000
# node index --utils album --min 151000000 --max 152000000 --limit 10000
# node index --utils album --min 150000000 --max 151000000 --limit 10000
# node index --utils album --min 149000000 --max 150000000 --limit 10000
# node index --utils album --min 148000000 --max 149000000 --limit 10000
# node index --utils album --min 147000000 --max 148000000 --limit 10000
# node index --utils album --min 146000000 --max 147000000 --limit 10000
# node index --utils album --min 145000000 --max 146000000 --limit 10000
# node index --utils album --min 144000000 --max 145000000 --limit 10000
# node index --utils album --min 143000000 --max 144000000 --limit 10000
# node index --utils album --min 142000000 --max 143000000 --limit 10000
# node index --utils album --min 141000000 --max 142000000 --limit 10000
# node index --utils album --min 140000000 --max 141000000 --limit 10000
# node index --utils album --min 139000000 --max 140000000 --limit 10000
# node index --utils album --min 138000000 --max 139000000 --limit 10000
# node index --utils album --min 137000000 --max 138000000 --limit 10000
# node index --utils album --min 136000000 --max 137000000 --limit 10000
# node index --utils album --min 135000000 --max 136000000 --limit 10000
# node index --utils album --min 134000000 --max 135000000 --limit 10000
# node index --utils album --min 133000000 --max 134000000 --limit 10000
# node index --utils album --min 132000000 --max 133000000 --limit 10000
# node index --utils album --min 131000000 --max 132000000 --limit 10000
# node index --utils album --min 130000000 --max 131000000 --limit 10000
# node index --utils album --min 129000000 --max 130000000 --limit 10000
# node index --utils album --min 128000000 --max 129000000 --limit 10000
# node index --utils album --min 127000000 --max 128000000 --limit 10000
# node index --utils album --min 126000000 --max 127000000 --limit 10000
# node index --utils album --min 125000000 --max 126000000 --limit 10000
# node index --utils album --min 124000000 --max 125000000 --limit 10000
# node index --utils album --min 123000000 --max 124000000 --limit 10000
# node index --utils album --min 122000000 --max 123000000 --limit 10000
# node index --utils album --min 121000000 --max 122000000 --limit 10000
# node index --utils album --min 120000000 --max 121000000 --limit 10000
# node index --utils album --min 99000000 --max 120000000 --limit 10000
# node index --utils album --min 98000000 --max 99000000 --limit 10000
# node index --utils album --min 97000000 --max 98000000 --limit 10000
# node index --utils album --min 96000000 --max 97000000 --limit 10000
# node index --utils album --min 95000000 --max 96000000 --limit 10000
# node index --utils album --min 94000000 --max 95000000 --limit 10000
# node index --utils album --min 93000000 --max 94000000 --limit 10000
# node index --utils album --min 92000000 --max 93000000 --limit 10000
# node index --utils album --min 91000000 --max 92000000 --limit 10000
# node index --utils album --max 91000000 --limit 10000
# ###################################################################################
# artist
# node index --utils artist --min 49000000 --limit 10000
# node index --utils artist --min 48000000 --max 49000000 --limit 10000
# node index --utils artist --min 47000000 --max 48000000 --limit 10000
# node index --utils artist --min 46000000 --max 47000000 --limit 10000
# node index --utils artist --min 37000000 --max 46000000 --limit 10000
# node index --utils artist --min 36000000 --max 37000000 --limit 10000
# node index --utils artist --min 35000000 --max 36000000 --limit 10000
# node index --utils artist --min 34000000 --max 35000000 --limit 10000
# node index --utils artist --min 33000000 --max 34000000 --limit 10000
# node index --utils artist --max 33000000 --limit 10000

View File

@@ -0,0 +1,103 @@
// -- 查看需要爬取的 comment 的分布
// SELECT cast( FLOOR( song_id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
// FROM comment_progress
// WHERE current_status != 2
// GROUP BY s
// ORDER BY s DESC;
// 变量 a 为通过执行以上SQL获取的分段
let a = `2110000000
2100000000
2090000000
2080000000
2070000000
2060000000
2050000000
2040000000
2030000000
2020000000
2010000000
2000000000
1990000000
1980000000
1970000000
1960000000
1950000000
1940000000
1930000000
1920000000
1910000000
1900000000
1890000000
1880000000
1870000000
1860000000
1850000000
1840000000
1830000000
1820000000
1810000000
1800000000
1500000000
1490000000
1480000000
1470000000
1460000000
1450000000
1440000000
1430000000
1420000000
1410000000
1400000000
1390000000
1380000000
1370000000
1360000000
1350000000
1340000000
1330000000
1320000000
1310000000
1300000000
1290000000
860000000
570000000
560000000
550000000
540000000
530000000
520000000
510000000
500000000
490000000
480000000
470000000
460000000
450000000
440000000
430000000
420000000
410000000
400000000
390000000
30000000
20000000
10000000
0`
const splitCount = 1
const step = 10000000 / splitCount
let b = []
a.split('\n')
.map(i => Number(i))
.forEach(n => {
for (let i = splitCount; i > 0; i--) {
b.push(Number(n) + (i - 1) * step)
}
});
let content = b.join('\n')
// console.log(content)
const fs = require('fs')
fs.writeFileSync('comment id segment.txt', content, 'utf-8')

View File

@@ -0,0 +1,147 @@
-- 更新统计数据
-- songCount 容易超时,有几张表查询时容易发生死锁,所以请在没有爬取时进行统计
-- 4G: 4294967296 (4 * 1024 * 1024 * 1024) 64M: 67108864
-- my.ini 配置文件中设置 innodb_buffer_pool_size=4G
show variables like "%innodb_buffer_pool_size%";
DELETE FROM analysis WHERE `key` LIKE '%_old';
UPDATE analysis SET `key`=concat(`key`,'_old'), modify_time=modify_time WHERE `key` NOT LIKE '%_old';
INSERT INTO analysis (`key`, `value`) VALUES ('songCount', (SELECT count(*) as count FROM song) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('songWaiting', (SELECT count(*) as count FROM wait_fetch_song) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('playlistCount', (SELECT count(*) AS count FROM playlist) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('albumCount', (SELECT count(*) as count FROM album) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('albumWaiting', (SELECT count(*) as count FROM wait_fetch_album) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('artistCount', (SELECT count(*) AS count FROM artist) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('artistWaiting', (SELECT count(*) as count FROM wait_fetch_artist) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('lyricCount', (SELECT count(*) AS count FROM lyric) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('commentCount', (SELECT count( DISTINCT song_id ) AS count FROM comment) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('commentTotalCount', (SELECT count(*) AS count FROM comment) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('userCount', (SELECT count(*) AS count FROM user) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('songPlaylistCount', (SELECT count(*) AS count FROM song_playlist_relation) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('songAlbumCount', (SELECT count(*) AS count FROM song_album_relation) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
INSERT INTO analysis (`key`, `value`) VALUES ('songArtistCount', (SELECT count(*) AS count FROM song_artist_relation) ) ON DUPLICATE KEY UPDATE `value` = VALUES(`value`);
-- 更新后初次全表扫描
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_playlist_relation WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song_playlist_relation WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song_playlist_relation WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_artist (id) SELECT artist_id FROM song_artist_relation WHERE create_time > '2022-10-28 00:00:00';
INSERT IGNORE INTO wait_check_album (id) SELECT album_id FROM song_album_relation WHERE create_time > '2022-10-28 00:00:00';
-- 全量更新
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_artist_relation WHERE song_id NOT IN ( SELECT song_id FROM song );
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_album_relation WHERE song_id NOT IN ( SELECT song_id FROM song );
INSERT IGNORE INTO wait_check_song (id) SELECT song_id FROM song_playlist_relation WHERE song_id NOT IN ( SELECT song_id FROM song );
INSERT IGNORE INTO wait_check_lyric (id) SELECT song_id FROM song WHERE song_id NOT IN ( SELECT song_id FROM lyric );
INSERT IGNORE INTO wait_check_comment (id) SELECT song_id FROM song WHERE song_id NOT IN ( SELECT song_id FROM comment_progress );
INSERT IGNORE INTO wait_check_artist (id) SELECT artist_id FROM song_artist_relation WHERE artist_id NOT IN ( SELECT artist_id FROM artist );
INSERT IGNORE INTO wait_check_album (id) SELECT album_id FROM song_album_relation WHERE album_id NOT IN ( SELECT album_id FROM album );
-- 查看需要爬取的 song 的分布
SELECT cast( FLOOR( id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
FROM wait_fetch_song
GROUP BY s
ORDER BY s DESC;
-- 查看需要爬取的 album 的分布
SELECT cast( FLOOR( id / 1000000 ) * 1000000 as UNSIGNED ) as s, count(*) as count
FROM wait_fetch_album
GROUP BY s
ORDER BY s DESC;
-- 查看需要爬取的 artist 的分布
SELECT cast( FLOOR(id / 100000 ) * 100000 as UNSIGNED ) as s, count(*) as count
FROM wait_fetch_artist
GROUP BY s
ORDER BY s DESC;
-- 查看需要爬取的 comment 的分布
SELECT cast( FLOOR( song_id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
FROM comment_progress
WHERE current_status != 2
GROUP BY s
ORDER BY s DESC;
-- 查看需要爬取的 lyric 的分布
SELECT cast( FLOOR( id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
FROM wait_fetch_lyric
GROUP BY s
ORDER BY s DESC;
-- 查看本地已有 song 的分布
SELECT cast( FLOOR( song_id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
FROM song
GROUP BY s
ORDER BY s DESC;
-- 查看本地已有 user 的分布
SELECT cast( FLOOR( user_id / 10000000 ) * 10000000 as UNSIGNED ) as s, count(*) as count
FROM user
GROUP BY s
ORDER BY s DESC;
-- 查看本地已有 album 的分布
SELECT cast( FLOOR( album_id / 1000000 ) * 1000000 as UNSIGNED ) as s, count(*) as count
FROM album
GROUP BY s
ORDER BY s DESC;
-- 查看本地已有 artist 的分布
SELECT cast( FLOOR( artist_id / 2000000 ) * 2000000 as UNSIGNED ) as s, count(*) as count
FROM artist
GROUP BY s
ORDER BY s DESC;
-- 查看本地已有 playlist 的分布
SELECT cast( FLOOR( playlist_id / 2000000 ) * 2000000 as UNSIGNED ) as s, count(*) as count
FROM playlist
GROUP BY s
ORDER BY s DESC;
-- 查询单个数据库里面各个表所占磁盘空间大小包括其索引的大小
SELECT
table_schema AS '数据库',
table_name AS '表名',
table_rows AS '记录数',
TRUNCATE (data_length / 1024 / 1024, 2) AS '数据容量(MB)',
TRUNCATE (index_length / 1024 / 1024, 2) AS '索引容量(MB)',
TRUNCATE ((data_length + index_length) / 1024 / 1024 / 1024, 2) AS '总容量(GB)'
FROM
information_schema.TABLES
WHERE
table_schema = 'neteasemusic'
ORDER BY
table_rows DESC;
-- 统计等待爬取的数据条数 2023.12.25
SELECT 'comment' as wait_fetch, count(*) as `count` FROM `comment_progress` where current_status = 0
UNION ALL
SELECT 'album', count(*) FROM `wait_fetch_album`
UNION ALL
SELECT 'artist', count(*) FROM `wait_fetch_artist`
UNION ALL
SELECT 'lyric', count(*) FROM `wait_fetch_lyric`

View File

@@ -0,0 +1,79 @@
windows服务器
cd C:\Users\Administrator\Desktop\tools\netease_music
linux服务器
cd /www/neteasemusic/tools
手机 Termux
pkg update
pkg install git
pkg install nodejs
本地库测试
node index --database neteasemusic_develop --utils song
node index --database neteasemusic_develop --utils album --min 10000000
node index --database neteasemusic_develop --utils album --order desc
node index --database neteasemusic_develop --utils artist
node index --database neteasemusic_develop --utils playlist
node index --database neteasemusic_develop --utils comment --limit 10000
node index --database neteasemusic_develop --utils lyric
node index --database neteasemusic_develop --utils assistant
思路:
通过一首歌查出对应的artist和album然后顺藤摸瓜查出网易云的其他song, album, artist, lyric, comment等
插入rel表的时候同时插入 wait_check_xx 表,然后后续检查这个表,如果不存在,那么就插入对应的 wait_fetch_xxx 表
之后查出 wait_fetch_xxx 表,进行数据拉取,形成闭环
后期:
歌单定时更新rel表中添加一个del字段先将歌单下面的全部置为删除状态再插入的时候把已有歌曲的标记重新修改为正常状态
评论的更新
被删除的aritst和album回头再通过其他表中的数据反查回来
歌曲目前爬取之后会有一部分没有image封面还是需要用旧方法爬取到
说明:
song表中data_version=1的音乐是第一次爬取的时候存在但是后面再爬取时不存在的音乐
后续分区(不能在现有表上修改,只能重新查出数据到新表)
alter table song add partition (
PARTITION p1 VALUES LESS THAN ( 50000000),
PARTITION p2 VALUES LESS THAN (1000000000),
PARTITION p3 VALUES LESS THAN (1500000000),
PARTITION p4 VALUES LESS THAN (2000000000),
PARTITION p5 VALUES LESS THAN MAXVALUE
);
SQL文件说明
sql/structure.sql 中的SQL为最简不包含字段的编码集
sql/neteasemusic.sql 中的SQL为数据库导出包含字段的编码集
项目数据库 CHARACTER SET 统一使用 'utf8mb4'COLLATE 统一使用 'utf8mb4_general_ci'
# # 查看列表
# screen -ls
# # 创建一个screen
# screen + <Enter>
# # 切换到指定屏幕
# screen -r <screen_id>
# # 切出屏幕
# Ctrl + A D

View File

@@ -0,0 +1,40 @@
// const mysql = require('mysql');
// await new Promise(function (resolve, reject) {
// //通过MySQL中方法创建连接对象
// var connection = mysql.createConnection({
// "charset": "utf8mb4",
// "host": "localhost",
// "user": "root",
// "password": "123456",
// "port": 3306,
// "database": ""
// });
// //开始连接
// connection.connect();
// var sql = `
// INSERT INTO comment ( comment_id, parent_comment_id, user_id, song_id, content, time, like_count, comment_type ) VALUES ?
// ON DUPLICATE KEY UPDATE content = VALUES(content), like_count = VALUES(like_count), comment_type = GREATEST(comment_type, VALUES(comment_type)), modify_time = CURRENT_TIMESTAMP
// `;
// var params = commentInfoList.map(commentInfo => [
// commentInfo.comment_id,
// commentInfo.parent_comment_id,
// commentInfo.user_id,
// commentInfo.song_id,
// commentInfo.content,
// commentInfo.time,
// commentInfo.like_count,
// commentInfo.comment_type
// ]);
// var formattedSql = connection.format(sql, [params]); // 返回一个格式化后的SQL字符串
// console.log(params); // 打印原始SQL语句
// console.log(formattedSql); // 打印原始SQL语句
// //最后需要关闭连接
// connection.end();
// });
// process.exit(0);
// node index --utils comment --min 1935500000 --max 1935550000 --limit 10

View File

@@ -0,0 +1,49 @@
# 家笔记本Win11中配置
[mysql]
default-character-set=utf8mb4
[mysqld]
port=3306
default_authentication_plugin=mysql_native_password
basedir=D:/Program/Development/Environment/phpstudy_pro/Extensions/MySQL8.0.12/
datadir=D:/Program/Development/Environment/phpstudy_pro/Extensions/MySQL8.0.12/data/
character-set-server=utf8mb4
default-storage-engine=InnoDB
max_connections=1000
collation-server=utf8mb4_unicode_ci
init_connect='SET NAMES utf8mb4'
innodb_buffer_pool_size=64M
# 64M 1G 4G 5G
innodb_flush_log_at_trx_commit=1
innodb_lock_wait_timeout=120
innodb_log_buffer_size=4M
innodb_log_file_size=256M
interactive_timeout=120
join_buffer_size=2M
key_buffer_size=32M
log_error_verbosity=1
max_allowed_packet=16M
max_heap_table_size=64M
myisam_max_sort_file_size=64G
myisam_sort_buffer_size=32M
read_buffer_size=512kb
read_rnd_buffer_size=4M
skip-external-locking=on
sort_buffer_size=256kb
table_open_cache=256
thread_cache_size=16
tmp_table_size=64M
wait_timeout=120
skip-log-bin
server_id=100
gtid_mode=off_permissive
enforce_gtid_consistency=on
replicate_do_db=neteasemusic
replicate_ignore_db=mysql
slave_skip_errors=all
[client]
port=3306
default-character-set=utf8mb4

View File

@@ -0,0 +1,65 @@
# 当前配置文件
# 针对网易云音乐爬虫 + 家台式机做了特别调整
[mysql]
default-character-set=utf8mb4
[mysqld]
port=3306
default_authentication_plugin=mysql_native_password
# basedir=D:/Program/Development/Environment/phpstudy_pro/Extensions/MySQL8.0.12/
# datadir=D:/Program/Development/Environment/phpstudy_pro/Extensions/MySQL8.0.12/data/
basedir=D:/Program/Develop/Environment/phpstudy_pro/Extensions/MySQL8.0.12/
datadir=D:/Program/Develop/Environment/phpstudy_pro/Extensions/MySQL8.0.12/data/
character-set-server=utf8mb4
default-storage-engine=InnoDB
max_connections=1000
collation-server=utf8mb4_unicode_ci
init_connect='SET NAMES utf8mb4'
# 这个参数决定了InnoDB存储引擎的缓冲池大小缓冲池用于缓存数据和索引提高查询和写入的性能。一般建议将这个参数设置为物理内存的50%~80%
# 默认 64M
# 64M 1G 4G 5G
innodb_buffer_pool_size=32G
# 这个参数决定了事务提交时日志刷新到磁盘的频率。如果设置为1默认值则每次事务提交时都会刷新日志这样可以保证数据的一致性和恢复能力但会降低写入性能。如果设置为0或2则每秒刷新一次日志这样可以提高写入性能但会增加数据丢失的风险。可以根据您的业务需求和容忍度选择合适的值
# 默认 1
innodb_flush_log_at_trx_commit=0
innodb_lock_wait_timeout=120
innodb_log_buffer_size=4M
# 这个参数决定了重做日志文件的大小重做日志文件用于记录数据的变化以便在崩溃恢复时重放。这个参数的大小影响着数据库的性能和恢复时间。一般建议将这个参数设置为1~2倍的缓冲池大小但不要超过4G。您的配置文件中将这个参数设置为256M这可能太小了您可以根据您的缓冲池大小适当增大这个值
# 默认 256M
innodb_log_file_size=1G
interactive_timeout=120
join_buffer_size=2M
key_buffer_size=32M
log_error_verbosity=1
max_allowed_packet=16M
max_heap_table_size=64M
myisam_max_sort_file_size=64G
myisam_sort_buffer_size=32M
read_buffer_size=512kb
read_rnd_buffer_size=4M
skip-external-locking=on
sort_buffer_size=256kb
table_open_cache=256
thread_cache_size=16
tmp_table_size=64M
wait_timeout=120
# 这个参数表示关闭二进制日志功能二进制日志用于记录数据的变化以便进行复制或点恢复。如果您不需要这些功能您可以关闭二进制日志这样可以节省磁盘空间和I/O开销提高写入性能。但是如果您需要进行复制或点恢复您必须开启二进制日志并且选择合适的格式和过期时间
skip-log-bin
# replicate_do_db, replicate_ignore_db参数指定复制的数据库。但是如果您关闭了二进制日志功能这些参数就没有意义了因为复制依赖于二进制日志。您可以删除这些参数或者根据您的复制需求重新开启二进制日志
# gtid_mode, enforce_gtid_consistency参数用于开启全局事务标识GTID模式GTID模式可以简化复制的管理和故障恢复。但是如果您关闭了二进制日志功能这些参数也没有意义了因为GTID模式依赖于二进制日志。您可以删除这些参数或者根据您的复制需求重新开启二进制日志
server_id=100
# gtid_mode=off_permissive
# enforce_gtid_consistency=on
# replicate_do_db=neteasemusic
# replicate_ignore_db=mysql
# slave_skip_errors=all
# Forcing InnoDB Recovery
# https://dev.mysql.com/doc/refman/8.0/en/forcing-innodb-recovery.html
# innodb_force_recovery = 6
[client]
port=3306
default-character-set=utf8mb4

View File

@@ -0,0 +1,8 @@
{
"ignore": [
".git",
".svn",
"node_modules/**/node_modules"
],
"ext": "js"
}

View File

@@ -0,0 +1,74 @@
154000000
153000000
152000000
151000000
150000000
149000000
148000000
147000000
146000000
145000000
144000000
143000000
142000000
141000000
140000000
139000000
138000000
137000000
136000000
135000000
134000000
133000000
132000000
131000000
130000000
129000000
128000000
127000000
126000000
125000000
124000000
123000000
122000000
121000000
120000000
99000000
98000000
97000000
96000000
95000000
94000000
93000000
92000000
91000000
90000000
89000000
88000000
87000000
86000000
85000000
84000000
83000000
82000000
81000000
80000000
79000000
78000000
77000000
76000000
75000000
74000000
73000000
72000000
40000000
39000000
38000000
37000000
36000000
35000000
34000000
3000000
2000000
1000000
0

View File

@@ -0,0 +1,16 @@
54000000
52000000
50000000
48000000
46000000
38000000
36000000
34000000
32000000
30000000
28000000
16000000
14000000
12000000
2000000
0

View File

@@ -0,0 +1,70 @@
2000000000
1990000000
1980000000
1970000000
1960000000
1950000000
1940000000
1930000000
1920000000
1910000000
1900000000
1890000000
1880000000
1870000000
1860000000
1850000000
1840000000
1830000000
1820000000
1810000000
1800000000
1500000000
1490000000
1480000000
1470000000
1460000000
1450000000
1440000000
1430000000
1420000000
1410000000
1400000000
1390000000
1380000000
1370000000
1360000000
1350000000
1340000000
1330000000
1320000000
1310000000
1300000000
1290000000
870000000
860000000
580000000
570000000
560000000
550000000
540000000
530000000
520000000
510000000
500000000
490000000
480000000
470000000
460000000
450000000
440000000
430000000
420000000
410000000
400000000
40000000
30000000
20000000
10000000
5000000
0

View File

@@ -0,0 +1,317 @@
8080000000
8070000000
8060000000
8050000000
8040000000
8030000000
8020000000
8010000000
8000000000
7990000000
7980000000
7970000000
7960000000
7950000000
7940000000
7930000000
7920000000
7910000000
7900000000
7890000000
7880000000
7870000000
7860000000
7850000000
7840000000
7830000000
7820000000
7810000000
7800000000
7790000000
6490000000
6480000000
6470000000
6460000000
6450000000
6440000000
6430000000
6420000000
6410000000
6400000000
6390000000
6380000000
6370000000
6360000000
6350000000
6340000000
6330000000
6320000000
6310000000
6300000000
6290000000
6280000000
6270000000
5890000000
5220000000
5210000000
5200000000
5190000000
5180000000
5170000000
5160000000
5150000000
5140000000
5130000000
5120000000
5110000000
5100000000
5090000000
5080000000
5070000000
5060000000
5050000000
5040000000
5030000000
5020000000
5010000000
5000000000
4990000000
4980000000
4970000000
4960000000
4950000000
4940000000
4930000000
4920000000
4910000000
4900000000
4890000000
4880000000
4870000000
4060000000
4050000000
4040000000
4030000000
4020000000
4010000000
4000000000
3990000000
3980000000
3970000000
3960000000
3950000000
3940000000
3930000000
3920000000
3910000000
3900000000
3890000000
3880000000
3870000000
3860000000
3850000000
3840000000
3830000000
3820000000
3810000000
3800000000
3790000000
3780000000
3770000000
3760000000
3750000000
3740000000
3730000000
3720000000
3710000000
3700000000
3690000000
3680000000
3670000000
3660000000
3650000000
3640000000
3630000000
3620000000
3610000000
3600000000
3590000000
3580000000
3570000000
3560000000
3550000000
3540000000
3530000000
3520000000
3510000000
3500000000
3490000000
3480000000
3470000000
3460000000
3450000000
3440000000
3430000000
3420000000
3410000000
3400000000
3390000000
3380000000
3370000000
3360000000
3350000000
3340000000
3330000000
3320000000
3310000000
3300000000
3290000000
3280000000
3270000000
3260000000
3250000000
3240000000
3230000000
3220000000
2140000000
2130000000
2120000000
2110000000
2100000000
2090000000
2080000000
2070000000
2060000000
2050000000
2040000000
2030000000
2020000000
2010000000
2000000000
1990000000
1980000000
1970000000
1960000000
1950000000
1940000000
1930000000
1920000000
1910000000
1900000000
1890000000
1880000000
1870000000
1860000000
1850000000
1840000000
1830000000
1820000000
1810000000
1800000000
1790000000
1780000000
1770000000
1760000000
1750000000
1740000000
1730000000
1720000000
1710000000
1700000000
1690000000
1680000000
1670000000
1660000000
1650000000
1640000000
1630000000
1620000000
1610000000
1600000000
1590000000
1580000000
1570000000
1560000000
1550000000
1540000000
1530000000
1520000000
1510000000
1500000000
1490000000
1480000000
1470000000
1460000000
1450000000
1440000000
1430000000
1420000000
1410000000
1400000000
1390000000
1380000000
1370000000
1360000000
1350000000
1340000000
1330000000
1320000000
1310000000
1300000000
1290000000
650000000
640000000
630000000
620000000
610000000
600000000
590000000
580000000
570000000
560000000
550000000
540000000
530000000
520000000
510000000
500000000
490000000
480000000
470000000
460000000
450000000
440000000
430000000
420000000
410000000
400000000
390000000
380000000
370000000
360000000
350000000
340000000
330000000
320000000
310000000
300000000
290000000
280000000
270000000
260000000
250000000
140000000
130000000
120000000
110000000
100000000
90000000
80000000
70000000
60000000
50000000
40000000
30000000
20000000
10000000
0

View File

@@ -0,0 +1,106 @@
const fs = require('fs');
const path = require('path');
const absPath = `D:/sql_export`;
// 数字转成字符串,同时在前面填充
function fill(num, fillers, length) {
var result = `${num}`;
if (result.length < length)
result = new Array(length - result.length + 1).join(fillers) + result;
return result;
}
// #############################################
// const export_gap = 50000000;
// const partition_gap = 50000000;
// const table = "comment_1024";
// const index = "comment_id";
// const exportTablePrefix = "comment_export_";
// let sqlArr1 = [];
// let sqlArr2 = [];
// let sqlArr3 = [];
// for (let i = 200; i < 300; i++) {
// // for (let i = 100; i < 200; i++) {
// let where = `${index} >= ${fill(i * export_gap, ' ', 12)} and ${index} < ${fill((i + 1) * export_gap, ' ', 12)}`;
// let sql_create_table = `create table ${exportTablePrefix}${fill(i, '0', 4)} select * from ${table} where ${where};`;
// sqlArr1.push(sql_create_table);
// let sql_delete_rows = `DELETE FROM ${table} WHERE ${where};`;
// sqlArr2.push(sql_delete_rows);
// let sql_partition = ` PARTITION p${fill(i, '0', 4)} VALUES LESS THAN (${fill((i + 1) * partition_gap, ' ', 12)})`;
// sqlArr3.push(sql_partition);
// }
// sqlArr3.push(` PARTITION p_max VALUES LESS THAN MAXVALUE`);
// 建表
// console.log(sqlArr1.join('\n'));
// 删除原表数据
// console.log(sqlArr2.join('\n'));
// 新创建表的分区
// console.log(`partition (\n${sqlArr3.join(',\n')}\n)`);
// #############################################
// 删除表
// for (let i = 107; i < 200; i++) {
// console.log(`DROP TABLE IF EXISTS comment_export_${fill(i, '0', 4)};`);
// }
// #############################################
// // 使用 mysqldump 分块导出数据表
// let rangeTxtName = "song"; // 分布区间 "song" "album" "artist" "user"
// const dumpTable = "lyric"; // "comment_progress";
// const fieldName = `song_id`;
// var a = fs.readFileSync(path.join(__dirname, `distribution_range/${rangeTxtName}.txt`), "utf-8").trim().split("\n").reverse().map(i => i.trim());
// // console.log(a);
// let outputArr = [`@echo off`, `D:`, `cd D:/Program/Development/Environment/phpstudy_pro/Extensions/MySQL8.0.12/bin`];
// for (let i = 0; i < a.length; i++) {
// let where;
// if (a[i + 1]) {
// where = `${fieldName}>=${a[i]} and ${fieldName}<${a[i + 1]}`;
// } else {
// where = `${fieldName}>=${a[i]}`;
// }
// outputArr.push(`mysqldump neteasemusic -hrm-bp18qrc78dj7vd3newo.rwlb.rds.aliyuncs.com -uroot -pOj13EzoppxXvMmjPKh --tables ${dumpTable} --where="${where}" --skip-add-drop-table --set-gtid-purged=OFF > ${absPath}/${dumpTable}_${fill(i, '0', 4)}.sql`);
// }
// outputArr.push("echo done.");
// console.log(outputArr.join('\n\n'));
// #############################################
// // 使用 mysqldump 分块导出数据表
// let rangeTxtName = "song"; // 分布区间 "song" "album" "artist" "user"
// const fieldName = `song_id`;
// var a = fs.readFileSync(path.join(__dirname, `distribution_range/${rangeTxtName}.txt`), "utf-8").trim().split("\n").reverse().map(i => i.trim());
// // console.log(a);
// let outputArr = [`@echo off`, `D:`, `cd D:/Program/Development/Environment/phpstudy_pro/Extensions/MySQL8.0.12/bin`];
// for (let i = 0; i < a.length; i++) {
// let where;
// if (a[i + 1]) {
// where = `${fieldName}>=${a[i]} and ${fieldName}<${a[i + 1]}`;
// } else {
// where = `${fieldName}>=${a[i]}`;
// }
// outputArr.push(`INSERT INTO song SELECT * FROM song_old WHERE ${where}; -- ${i}`);
// }
// outputArr.push("echo done.");
// console.log(outputArr.join('\n'));
// #############################################
let outputArr = [];
var a = fs.readFileSync(path.join(__dirname, `distribution_range/user.txt`), "utf-8").trim().split("\n").reverse().map(i => i.trim());
// a = a.filter((val, index) => index % 15 == 0); // 抽掉一些边界 不然SQL太多了
for (let i = 0; i < a.length; i++) {
outputArr.push(`start cmd /k "node index --utils comment --min ${a[i]} --max ${a[i + 1]} --limit 10000"`);
}
outputArr.push("echo done.");
console.log(outputArr.join('\n'));

View File

@@ -0,0 +1,44 @@
var table = [
"song",
"album",
"artist",
"comment",
"lyric",
"user",
"category",
"playlist",
"comment_progress",
"song_album_relation",
"song_artist_relation",
"song_playlist_relation",
"wait_check_album",
"wait_check_artist",
"wait_check_comment",
"wait_check_lyric",
"wait_check_song",
"wait_fetch_album",
"wait_fetch_artist",
"wait_fetch_lyric",
"wait_fetch_song",
"analysis",
"log",
// "hifini_forum",
// "hifini_tag",
// "hifini_thread",
// "hifini_thread_tag_relation",
];
let sqlList = [];
// OPTIMIZE TABLE
table.forEach((tableName) => sqlList.push(`OPTIMIZE TABLE ${tableName};`));
// RENAME TABLE 移动数据库
// table.forEach((tableName) => sqlList.push(`RENAME TABLE neteasemusic.${tableName} TO neteasemusic_develop.${tableName};`));
console.log(sqlList.join('\n'));

View File

@@ -0,0 +1,301 @@
@echo off
D:
cd D:/Program/Development/Environment/phpstudy_pro/Extensions/MySQL8.0.12/bin
mysql -hlocalhost -uroot -proot neteasemusic
use neteasemusic;
source D:/sql_export/album/album_0.sql
rename table album to album_0000;
source D:/sql_export/album/album_1.sql
rename table album to album_0001;
source D:/sql_export/album/album_2.sql
rename table album to album_0002;
source D:/sql_export/album/album_3.sql
rename table album to album_0003;
source D:/sql_export/album/album_4.sql
rename table album to album_0004;
source D:/sql_export/album/album_5.sql
rename table album to album_0005;
source D:/sql_export/album/album_6.sql
rename table album to album_0006;
source D:/sql_export/album/album_7.sql
rename table album to album_0007;
source D:/sql_export/album/album_8.sql
rename table album to album_0008;
source D:/sql_export/album/album_9.sql
rename table album to album_0009;
source D:/sql_export/album/album_10.sql
rename table album to album_0010;
source D:/sql_export/album/album_11.sql
rename table album to album_0011;
source D:/sql_export/album/album_12.sql
rename table album to album_0012;
source D:/sql_export/album/album_13.sql
rename table album to album_0013;
source D:/sql_export/album/album_14.sql
rename table album to album_0014;
source D:/sql_export/album/album_15.sql
rename table album to album_0015;
source D:/sql_export/album/album_16.sql
rename table album to album_0016;
source D:/sql_export/album/album_17.sql
rename table album to album_0017;
source D:/sql_export/album/album_18.sql
rename table album to album_0018;
source D:/sql_export/album/album_19.sql
rename table album to album_0019;
source D:/sql_export/album/album_20.sql
rename table album to album_0020;
source D:/sql_export/album/album_21.sql
rename table album to album_0021;
source D:/sql_export/album/album_22.sql
rename table album to album_0022;
source D:/sql_export/album/album_23.sql
rename table album to album_0023;
source D:/sql_export/album/album_24.sql
rename table album to album_0024;
source D:/sql_export/album/album_25.sql
rename table album to album_0025;
source D:/sql_export/album/album_26.sql
rename table album to album_0026;
source D:/sql_export/album/album_27.sql
rename table album to album_0027;
source D:/sql_export/album/album_28.sql
rename table album to album_0028;
source D:/sql_export/album/album_29.sql
rename table album to album_0029;
source D:/sql_export/album/album_30.sql
rename table album to album_0030;
source D:/sql_export/album/album_31.sql
rename table album to album_0031;
source D:/sql_export/album/album_32.sql
rename table album to album_0032;
source D:/sql_export/album/album_33.sql
rename table album to album_0033;
source D:/sql_export/album/album_34.sql
rename table album to album_0034;
source D:/sql_export/album/album_35.sql
rename table album to album_0035;
source D:/sql_export/album/album_36.sql
rename table album to album_0036;
source D:/sql_export/album/album_37.sql
rename table album to album_0037;
source D:/sql_export/album/album_38.sql
rename table album to album_0038;
source D:/sql_export/album/album_39.sql
rename table album to album_0039;
source D:/sql_export/album/album_40.sql
rename table album to album_0040;
source D:/sql_export/album/album_41.sql
rename table album to album_0041;
source D:/sql_export/album/album_42.sql
rename table album to album_0042;
source D:/sql_export/album/album_43.sql
rename table album to album_0043;
source D:/sql_export/album/album_44.sql
rename table album to album_0044;
source D:/sql_export/album/album_45.sql
rename table album to album_0045;
source D:/sql_export/album/album_46.sql
rename table album to album_0046;
source D:/sql_export/album/album_47.sql
rename table album to album_0047;
source D:/sql_export/album/album_48.sql
rename table album to album_0048;
source D:/sql_export/album/album_49.sql
rename table album to album_0049;
source D:/sql_export/album/album_50.sql
rename table album to album_0050;
source D:/sql_export/album/album_51.sql
rename table album to album_0051;
source D:/sql_export/album/album_52.sql
rename table album to album_0052;
source D:/sql_export/album/album_53.sql
rename table album to album_0053;
source D:/sql_export/album/album_54.sql
rename table album to album_0054;
source D:/sql_export/album/album_55.sql
rename table album to album_0055;
source D:/sql_export/album/album_56.sql
rename table album to album_0056;
source D:/sql_export/album/album_57.sql
rename table album to album_0057;
source D:/sql_export/album/album_58.sql
rename table album to album_0058;
source D:/sql_export/album/album_59.sql
rename table album to album_0059;
source D:/sql_export/album/album_60.sql
rename table album to album_0060;
source D:/sql_export/album/album_61.sql
rename table album to album_0061;
source D:/sql_export/album/album_62.sql
rename table album to album_0062;
source D:/sql_export/album/album_63.sql
rename table album to album_0063;
source D:/sql_export/album/album_64.sql
rename table album to album_0064;
source D:/sql_export/album/album_65.sql
rename table album to album_0065;
source D:/sql_export/album/album_66.sql
rename table album to album_0066;
source D:/sql_export/album/album_67.sql
rename table album to album_0067;
source D:/sql_export/album/album_68.sql
rename table album to album_0068;
source D:/sql_export/album/album_69.sql
rename table album to album_0069;
source D:/sql_export/album/album_70.sql
rename table album to album_0070;
source D:/sql_export/album/album_71.sql
rename table album to album_0071;
source D:/sql_export/album/album_72.sql
rename table album to album_0072;
source D:/sql_export/album/album_73.sql
rename table album to album_0073;
rename table album_0000 to album;
INSERT IGNORE INTO album SELECT * FROM album_0001;
drop table album_0001;
INSERT IGNORE INTO album SELECT * FROM album_0002;
drop table album_0002;
INSERT IGNORE INTO album SELECT * FROM album_0003;
drop table album_0003;
INSERT IGNORE INTO album SELECT * FROM album_0004;
drop table album_0004;
INSERT IGNORE INTO album SELECT * FROM album_0005;
drop table album_0005;
INSERT IGNORE INTO album SELECT * FROM album_0006;
drop table album_0006;
INSERT IGNORE INTO album SELECT * FROM album_0007;
drop table album_0007;
INSERT IGNORE INTO album SELECT * FROM album_0008;
drop table album_0008;
INSERT IGNORE INTO album SELECT * FROM album_0009;
drop table album_0009;
INSERT IGNORE INTO album SELECT * FROM album_0010;
drop table album_0010;
INSERT IGNORE INTO album SELECT * FROM album_0011;
drop table album_0011;
INSERT IGNORE INTO album SELECT * FROM album_0012;
drop table album_0012;
INSERT IGNORE INTO album SELECT * FROM album_0013;
drop table album_0013;
INSERT IGNORE INTO album SELECT * FROM album_0014;
drop table album_0014;
INSERT IGNORE INTO album SELECT * FROM album_0015;
drop table album_0015;
INSERT IGNORE INTO album SELECT * FROM album_0016;
drop table album_0016;
INSERT IGNORE INTO album SELECT * FROM album_0017;
drop table album_0017;
INSERT IGNORE INTO album SELECT * FROM album_0018;
drop table album_0018;
INSERT IGNORE INTO album SELECT * FROM album_0019;
drop table album_0019;
INSERT IGNORE INTO album SELECT * FROM album_0020;
drop table album_0020;
INSERT IGNORE INTO album SELECT * FROM album_0021;
drop table album_0021;
INSERT IGNORE INTO album SELECT * FROM album_0022;
drop table album_0022;
INSERT IGNORE INTO album SELECT * FROM album_0023;
drop table album_0023;
INSERT IGNORE INTO album SELECT * FROM album_0024;
drop table album_0024;
INSERT IGNORE INTO album SELECT * FROM album_0025;
drop table album_0025;
INSERT IGNORE INTO album SELECT * FROM album_0026;
drop table album_0026;
INSERT IGNORE INTO album SELECT * FROM album_0027;
drop table album_0027;
INSERT IGNORE INTO album SELECT * FROM album_0028;
drop table album_0028;
INSERT IGNORE INTO album SELECT * FROM album_0029;
drop table album_0029;
INSERT IGNORE INTO album SELECT * FROM album_0030;
drop table album_0030;
INSERT IGNORE INTO album SELECT * FROM album_0031;
drop table album_0031;
INSERT IGNORE INTO album SELECT * FROM album_0032;
drop table album_0032;
INSERT IGNORE INTO album SELECT * FROM album_0033;
drop table album_0033;
INSERT IGNORE INTO album SELECT * FROM album_0034;
drop table album_0034;
INSERT IGNORE INTO album SELECT * FROM album_0035;
drop table album_0035;
INSERT IGNORE INTO album SELECT * FROM album_0036;
drop table album_0036;
INSERT IGNORE INTO album SELECT * FROM album_0037;
drop table album_0037;
INSERT IGNORE INTO album SELECT * FROM album_0038;
drop table album_0038;
INSERT IGNORE INTO album SELECT * FROM album_0039;
drop table album_0039;
INSERT IGNORE INTO album SELECT * FROM album_0040;
drop table album_0040;
INSERT IGNORE INTO album SELECT * FROM album_0041;
drop table album_0041;
INSERT IGNORE INTO album SELECT * FROM album_0042;
drop table album_0042;
INSERT IGNORE INTO album SELECT * FROM album_0043;
drop table album_0043;
INSERT IGNORE INTO album SELECT * FROM album_0044;
drop table album_0044;
INSERT IGNORE INTO album SELECT * FROM album_0045;
drop table album_0045;
INSERT IGNORE INTO album SELECT * FROM album_0046;
drop table album_0046;
INSERT IGNORE INTO album SELECT * FROM album_0047;
drop table album_0047;
INSERT IGNORE INTO album SELECT * FROM album_0048;
drop table album_0048;
INSERT IGNORE INTO album SELECT * FROM album_0049;
drop table album_0049;
INSERT IGNORE INTO album SELECT * FROM album_0050;
drop table album_0050;
INSERT IGNORE INTO album SELECT * FROM album_0051;
drop table album_0051;
INSERT IGNORE INTO album SELECT * FROM album_0052;
drop table album_0052;
INSERT IGNORE INTO album SELECT * FROM album_0053;
drop table album_0053;
INSERT IGNORE INTO album SELECT * FROM album_0054;
drop table album_0054;
INSERT IGNORE INTO album SELECT * FROM album_0055;
drop table album_0055;
INSERT IGNORE INTO album SELECT * FROM album_0056;
drop table album_0056;
INSERT IGNORE INTO album SELECT * FROM album_0057;
drop table album_0057;
INSERT IGNORE INTO album SELECT * FROM album_0058;
drop table album_0058;
INSERT IGNORE INTO album SELECT * FROM album_0059;
drop table album_0059;
INSERT IGNORE INTO album SELECT * FROM album_0060;
drop table album_0060;
INSERT IGNORE INTO album SELECT * FROM album_0061;
drop table album_0061;
INSERT IGNORE INTO album SELECT * FROM album_0062;
drop table album_0062;
INSERT IGNORE INTO album SELECT * FROM album_0063;
drop table album_0063;
INSERT IGNORE INTO album SELECT * FROM album_0064;
drop table album_0064;
INSERT IGNORE INTO album SELECT * FROM album_0065;
drop table album_0065;
INSERT IGNORE INTO album SELECT * FROM album_0066;
drop table album_0066;
INSERT IGNORE INTO album SELECT * FROM album_0067;
drop table album_0067;
INSERT IGNORE INTO album SELECT * FROM album_0068;
drop table album_0068;
INSERT IGNORE INTO album SELECT * FROM album_0069;
drop table album_0069;
INSERT IGNORE INTO album SELECT * FROM album_0070;
drop table album_0070;
INSERT IGNORE INTO album SELECT * FROM album_0071;
drop table album_0071;
INSERT IGNORE INTO album SELECT * FROM album_0072;
drop table album_0072;
INSERT IGNORE INTO album SELECT * FROM album_0073;
drop table album_0073;
echo done.

View File

@@ -0,0 +1,22 @@
@echo off
D:
cd D:/Program/Development/Environment/phpstudy_pro/Extensions/MySQL8.0.12/bin
mysql -hlocalhost -uroot -proot neteasemusic
use neteasemusic;
source D:/sql_export/artist/artist_0000.sql
source D:/sql_export/artist/artist_0001.sql
source D:/sql_export/artist/artist_0002.sql
source D:/sql_export/artist/artist_0003.sql
source D:/sql_export/artist/artist_0004.sql
source D:/sql_export/artist/artist_0005.sql
source D:/sql_export/artist/artist_0006.sql
source D:/sql_export/artist/artist_0007.sql
source D:/sql_export/artist/artist_0008.sql
source D:/sql_export/artist/artist_0009.sql
source D:/sql_export/artist/artist_0010.sql
source D:/sql_export/artist/artist_0011.sql
source D:/sql_export/artist/artist_0012.sql
source D:/sql_export/artist/artist_0013.sql
source D:/sql_export/artist/artist_0014.sql
source D:/sql_export/artist/artist_0015.sql
echo done.

View File

@@ -0,0 +1,44 @@
const fs = require('fs');
const path = require('path');
const absPath = `D:/sql_export`;
// 数字转成字符串,同时在前面填充
function fill(num, fillers, length) {
var result = `${num}`;
if (result.length < length)
result = new Array(length - result.length + 1).join(fillers) + result;
return result;
}
// 使用 mysql 导入数据表
let outputArr = [
[
`@echo off`, `D:`, `cd D:/Program/Development/Environment/phpstudy_pro/Extensions/MySQL8.0.12/bin`,
`mysql -hlocalhost -uroot -proot neteasemusic`
].join('\n'),
`use neteasemusic;`
];
let firstIndex = 0;
let lastIndex = 15;
let tableName = "song_artist_relation";
let isContainDropTable = false; // 如果 mysqldump的时候导出的文件包含了drop table if exists那么就先分别导入不同表然后再将数据合并到一张表中
let fileNameSerialFillZero = true;
for (let i = firstIndex; i <= lastIndex; i++) {
outputArr.push(`source ${absPath}/${tableName}/${tableName}_${fileNameSerialFillZero ? fill(i, '0', 4) : i}.sql`);
if (isContainDropTable) {
outputArr.push(`rename table ${tableName} to ${tableName}_${fill(i, '0', 4)};`);
}
}
if (isContainDropTable) {
outputArr.push(`rename table ${tableName}_${fill(firstIndex, '0', 4)} to ${tableName};`);
for (let i = firstIndex + 1; i <= lastIndex; i++) {
outputArr.push(`INSERT IGNORE INTO ${tableName} SELECT * FROM ${tableName}_${fill(i, '0', 4)};`);
outputArr.push(`drop table ${tableName}_${fill(i, '0', 4)};`);
}
}
outputArr.push("echo done.");
console.log(outputArr.join('\n'));
fs.writeFileSync(path.join(__dirname, `${tableName}_bat.txt`), outputArr.join('\n'));

View File

@@ -0,0 +1,76 @@
@echo off
D:
cd D:/Program/Development/Environment/phpstudy_pro/Extensions/MySQL8.0.12/bin
mysql -hlocalhost -uroot -proot neteasemusic
use neteasemusic;
source D:/sql_export/lyric/lyric_0000.sql
source D:/sql_export/lyric/lyric_0001.sql
source D:/sql_export/lyric/lyric_0002.sql
source D:/sql_export/lyric/lyric_0003.sql
source D:/sql_export/lyric/lyric_0004.sql
source D:/sql_export/lyric/lyric_0005.sql
source D:/sql_export/lyric/lyric_0006.sql
source D:/sql_export/lyric/lyric_0007.sql
source D:/sql_export/lyric/lyric_0008.sql
source D:/sql_export/lyric/lyric_0009.sql
source D:/sql_export/lyric/lyric_0010.sql
source D:/sql_export/lyric/lyric_0011.sql
source D:/sql_export/lyric/lyric_0012.sql
source D:/sql_export/lyric/lyric_0013.sql
source D:/sql_export/lyric/lyric_0014.sql
source D:/sql_export/lyric/lyric_0015.sql
source D:/sql_export/lyric/lyric_0016.sql
source D:/sql_export/lyric/lyric_0017.sql
source D:/sql_export/lyric/lyric_0018.sql
source D:/sql_export/lyric/lyric_0019.sql
source D:/sql_export/lyric/lyric_0020.sql
source D:/sql_export/lyric/lyric_0021.sql
source D:/sql_export/lyric/lyric_0022.sql
source D:/sql_export/lyric/lyric_0023.sql
source D:/sql_export/lyric/lyric_0024.sql
source D:/sql_export/lyric/lyric_0025.sql
source D:/sql_export/lyric/lyric_0026.sql
source D:/sql_export/lyric/lyric_0027.sql
source D:/sql_export/lyric/lyric_0028.sql
source D:/sql_export/lyric/lyric_0029.sql
source D:/sql_export/lyric/lyric_0030.sql
source D:/sql_export/lyric/lyric_0031.sql
source D:/sql_export/lyric/lyric_0032.sql
source D:/sql_export/lyric/lyric_0033.sql
source D:/sql_export/lyric/lyric_0034.sql
source D:/sql_export/lyric/lyric_0035.sql
source D:/sql_export/lyric/lyric_0036.sql
source D:/sql_export/lyric/lyric_0037.sql
source D:/sql_export/lyric/lyric_0038.sql
source D:/sql_export/lyric/lyric_0039.sql
source D:/sql_export/lyric/lyric_0040.sql
source D:/sql_export/lyric/lyric_0041.sql
source D:/sql_export/lyric/lyric_0042.sql
source D:/sql_export/lyric/lyric_0043.sql
source D:/sql_export/lyric/lyric_0044.sql
source D:/sql_export/lyric/lyric_0045.sql
source D:/sql_export/lyric/lyric_0046.sql
source D:/sql_export/lyric/lyric_0047.sql
source D:/sql_export/lyric/lyric_0048.sql
source D:/sql_export/lyric/lyric_0049.sql
source D:/sql_export/lyric/lyric_0050.sql
source D:/sql_export/lyric/lyric_0051.sql
source D:/sql_export/lyric/lyric_0052.sql
source D:/sql_export/lyric/lyric_0053.sql
source D:/sql_export/lyric/lyric_0054.sql
source D:/sql_export/lyric/lyric_0055.sql
source D:/sql_export/lyric/lyric_0056.sql
source D:/sql_export/lyric/lyric_0057.sql
source D:/sql_export/lyric/lyric_0058.sql
source D:/sql_export/lyric/lyric_0059.sql
source D:/sql_export/lyric/lyric_0060.sql
source D:/sql_export/lyric/lyric_0061.sql
source D:/sql_export/lyric/lyric_0062.sql
source D:/sql_export/lyric/lyric_0063.sql
source D:/sql_export/lyric/lyric_0064.sql
source D:/sql_export/lyric/lyric_0065.sql
source D:/sql_export/lyric/lyric_0066.sql
source D:/sql_export/lyric/lyric_0067.sql
source D:/sql_export/lyric/lyric_0068.sql
source D:/sql_export/lyric/lyric_0069.sql
echo done.

View File

@@ -0,0 +1,22 @@
@echo off
D:
cd D:/Program/Development/Environment/phpstudy_pro/Extensions/MySQL8.0.12/bin
mysql -hlocalhost -uroot -proot neteasemusic
use neteasemusic;
source D:/sql_export/song_artist_relation/song_artist_relation_0000.sql
source D:/sql_export/song_artist_relation/song_artist_relation_0001.sql
source D:/sql_export/song_artist_relation/song_artist_relation_0002.sql
source D:/sql_export/song_artist_relation/song_artist_relation_0003.sql
source D:/sql_export/song_artist_relation/song_artist_relation_0004.sql
source D:/sql_export/song_artist_relation/song_artist_relation_0005.sql
source D:/sql_export/song_artist_relation/song_artist_relation_0006.sql
source D:/sql_export/song_artist_relation/song_artist_relation_0007.sql
source D:/sql_export/song_artist_relation/song_artist_relation_0008.sql
source D:/sql_export/song_artist_relation/song_artist_relation_0009.sql
source D:/sql_export/song_artist_relation/song_artist_relation_0010.sql
source D:/sql_export/song_artist_relation/song_artist_relation_0011.sql
source D:/sql_export/song_artist_relation/song_artist_relation_0012.sql
source D:/sql_export/song_artist_relation/song_artist_relation_0013.sql
source D:/sql_export/song_artist_relation/song_artist_relation_0014.sql
source D:/sql_export/song_artist_relation/song_artist_relation_0015.sql
echo done.

View File

@@ -0,0 +1,285 @@
@echo off
D:
cd D:/Program/Development/Environment/phpstudy_pro/Extensions/MySQL8.0.12/bin
mysql -hlocalhost -uroot -proot neteasemusic
use neteasemusic;
source D:/sql_export/song/song_0.sql
rename table song to song_0000;
source D:/sql_export/song/song_1.sql
rename table song to song_0001;
source D:/sql_export/song/song_2.sql
rename table song to song_0002;
source D:/sql_export/song/song_3.sql
rename table song to song_0003;
source D:/sql_export/song/song_4.sql
rename table song to song_0004;
source D:/sql_export/song/song_5.sql
rename table song to song_0005;
source D:/sql_export/song/song_6.sql
rename table song to song_0006;
source D:/sql_export/song/song_7.sql
rename table song to song_0007;
source D:/sql_export/song/song_8.sql
rename table song to song_0008;
source D:/sql_export/song/song_9.sql
rename table song to song_0009;
source D:/sql_export/song/song_10.sql
rename table song to song_0010;
source D:/sql_export/song/song_11.sql
rename table song to song_0011;
source D:/sql_export/song/song_12.sql
rename table song to song_0012;
source D:/sql_export/song/song_13.sql
rename table song to song_0013;
source D:/sql_export/song/song_14.sql
rename table song to song_0014;
source D:/sql_export/song/song_15.sql
rename table song to song_0015;
source D:/sql_export/song/song_16.sql
rename table song to song_0016;
source D:/sql_export/song/song_17.sql
rename table song to song_0017;
source D:/sql_export/song/song_18.sql
rename table song to song_0018;
source D:/sql_export/song/song_19.sql
rename table song to song_0019;
source D:/sql_export/song/song_20.sql
rename table song to song_0020;
source D:/sql_export/song/song_21.sql
rename table song to song_0021;
source D:/sql_export/song/song_22.sql
rename table song to song_0022;
source D:/sql_export/song/song_23.sql
rename table song to song_0023;
source D:/sql_export/song/song_24.sql
rename table song to song_0024;
source D:/sql_export/song/song_25.sql
rename table song to song_0025;
source D:/sql_export/song/song_26.sql
rename table song to song_0026;
source D:/sql_export/song/song_27.sql
rename table song to song_0027;
source D:/sql_export/song/song_28.sql
rename table song to song_0028;
source D:/sql_export/song/song_29.sql
rename table song to song_0029;
source D:/sql_export/song/song_30.sql
rename table song to song_0030;
source D:/sql_export/song/song_31.sql
rename table song to song_0031;
source D:/sql_export/song/song_32.sql
rename table song to song_0032;
source D:/sql_export/song/song_33.sql
rename table song to song_0033;
source D:/sql_export/song/song_34.sql
rename table song to song_0034;
source D:/sql_export/song/song_35.sql
rename table song to song_0035;
source D:/sql_export/song/song_36.sql
rename table song to song_0036;
source D:/sql_export/song/song_37.sql
rename table song to song_0037;
source D:/sql_export/song/song_38.sql
rename table song to song_0038;
source D:/sql_export/song/song_39.sql
rename table song to song_0039;
source D:/sql_export/song/song_40.sql
rename table song to song_0040;
source D:/sql_export/song/song_41.sql
rename table song to song_0041;
source D:/sql_export/song/song_42.sql
rename table song to song_0042;
source D:/sql_export/song/song_43.sql
rename table song to song_0043;
source D:/sql_export/song/song_44.sql
rename table song to song_0044;
source D:/sql_export/song/song_45.sql
rename table song to song_0045;
source D:/sql_export/song/song_46.sql
rename table song to song_0046;
source D:/sql_export/song/song_47.sql
rename table song to song_0047;
source D:/sql_export/song/song_48.sql
rename table song to song_0048;
source D:/sql_export/song/song_49.sql
rename table song to song_0049;
source D:/sql_export/song/song_50.sql
rename table song to song_0050;
source D:/sql_export/song/song_51.sql
rename table song to song_0051;
source D:/sql_export/song/song_52.sql
rename table song to song_0052;
source D:/sql_export/song/song_53.sql
rename table song to song_0053;
source D:/sql_export/song/song_54.sql
rename table song to song_0054;
source D:/sql_export/song/song_55.sql
rename table song to song_0055;
source D:/sql_export/song/song_56.sql
rename table song to song_0056;
source D:/sql_export/song/song_57.sql
rename table song to song_0057;
source D:/sql_export/song/song_58.sql
rename table song to song_0058;
source D:/sql_export/song/song_59.sql
rename table song to song_0059;
source D:/sql_export/song/song_60.sql
rename table song to song_0060;
source D:/sql_export/song/song_61.sql
rename table song to song_0061;
source D:/sql_export/song/song_62.sql
rename table song to song_0062;
source D:/sql_export/song/song_63.sql
rename table song to song_0063;
source D:/sql_export/song/song_64.sql
rename table song to song_0064;
source D:/sql_export/song/song_65.sql
rename table song to song_0065;
source D:/sql_export/song/song_66.sql
rename table song to song_0066;
source D:/sql_export/song/song_67.sql
rename table song to song_0067;
source D:/sql_export/song/song_68.sql
rename table song to song_0068;
source D:/sql_export/song/song_69.sql
rename table song to song_0069;
rename table song_0000 to song;
INSERT IGNORE INTO song SELECT * FROM song_0001;
drop table song_0001;
INSERT IGNORE INTO song SELECT * FROM song_0002;
drop table song_0002;
INSERT IGNORE INTO song SELECT * FROM song_0003;
drop table song_0003;
INSERT IGNORE INTO song SELECT * FROM song_0004;
drop table song_0004;
INSERT IGNORE INTO song SELECT * FROM song_0005;
drop table song_0005;
INSERT IGNORE INTO song SELECT * FROM song_0006;
drop table song_0006;
INSERT IGNORE INTO song SELECT * FROM song_0007;
drop table song_0007;
INSERT IGNORE INTO song SELECT * FROM song_0008;
drop table song_0008;
INSERT IGNORE INTO song SELECT * FROM song_0009;
drop table song_0009;
INSERT IGNORE INTO song SELECT * FROM song_0010;
drop table song_0010;
INSERT IGNORE INTO song SELECT * FROM song_0011;
drop table song_0011;
INSERT IGNORE INTO song SELECT * FROM song_0012;
drop table song_0012;
INSERT IGNORE INTO song SELECT * FROM song_0013;
drop table song_0013;
INSERT IGNORE INTO song SELECT * FROM song_0014;
drop table song_0014;
INSERT IGNORE INTO song SELECT * FROM song_0015;
drop table song_0015;
INSERT IGNORE INTO song SELECT * FROM song_0016;
drop table song_0016;
INSERT IGNORE INTO song SELECT * FROM song_0017;
drop table song_0017;
INSERT IGNORE INTO song SELECT * FROM song_0018;
drop table song_0018;
INSERT IGNORE INTO song SELECT * FROM song_0019;
drop table song_0019;
INSERT IGNORE INTO song SELECT * FROM song_0020;
drop table song_0020;
INSERT IGNORE INTO song SELECT * FROM song_0021;
drop table song_0021;
INSERT IGNORE INTO song SELECT * FROM song_0022;
drop table song_0022;
INSERT IGNORE INTO song SELECT * FROM song_0023;
drop table song_0023;
INSERT IGNORE INTO song SELECT * FROM song_0024;
drop table song_0024;
INSERT IGNORE INTO song SELECT * FROM song_0025;
drop table song_0025;
INSERT IGNORE INTO song SELECT * FROM song_0026;
drop table song_0026;
INSERT IGNORE INTO song SELECT * FROM song_0027;
drop table song_0027;
INSERT IGNORE INTO song SELECT * FROM song_0028;
drop table song_0028;
INSERT IGNORE INTO song SELECT * FROM song_0029;
drop table song_0029;
INSERT IGNORE INTO song SELECT * FROM song_0030;
drop table song_0030;
INSERT IGNORE INTO song SELECT * FROM song_0031;
drop table song_0031;
INSERT IGNORE INTO song SELECT * FROM song_0032;
drop table song_0032;
INSERT IGNORE INTO song SELECT * FROM song_0033;
drop table song_0033;
INSERT IGNORE INTO song SELECT * FROM song_0034;
drop table song_0034;
INSERT IGNORE INTO song SELECT * FROM song_0035;
drop table song_0035;
INSERT IGNORE INTO song SELECT * FROM song_0036;
drop table song_0036;
INSERT IGNORE INTO song SELECT * FROM song_0037;
drop table song_0037;
INSERT IGNORE INTO song SELECT * FROM song_0038;
drop table song_0038;
INSERT IGNORE INTO song SELECT * FROM song_0039;
drop table song_0039;
INSERT IGNORE INTO song SELECT * FROM song_0040;
drop table song_0040;
INSERT IGNORE INTO song SELECT * FROM song_0041;
drop table song_0041;
INSERT IGNORE INTO song SELECT * FROM song_0042;
drop table song_0042;
INSERT IGNORE INTO song SELECT * FROM song_0043;
drop table song_0043;
INSERT IGNORE INTO song SELECT * FROM song_0044;
drop table song_0044;
INSERT IGNORE INTO song SELECT * FROM song_0045;
drop table song_0045;
INSERT IGNORE INTO song SELECT * FROM song_0046;
drop table song_0046;
INSERT IGNORE INTO song SELECT * FROM song_0047;
drop table song_0047;
INSERT IGNORE INTO song SELECT * FROM song_0048;
drop table song_0048;
INSERT IGNORE INTO song SELECT * FROM song_0049;
drop table song_0049;
INSERT IGNORE INTO song SELECT * FROM song_0050;
drop table song_0050;
INSERT IGNORE INTO song SELECT * FROM song_0051;
drop table song_0051;
INSERT IGNORE INTO song SELECT * FROM song_0052;
drop table song_0052;
INSERT IGNORE INTO song SELECT * FROM song_0053;
drop table song_0053;
INSERT IGNORE INTO song SELECT * FROM song_0054;
drop table song_0054;
INSERT IGNORE INTO song SELECT * FROM song_0055;
drop table song_0055;
INSERT IGNORE INTO song SELECT * FROM song_0056;
drop table song_0056;
INSERT IGNORE INTO song SELECT * FROM song_0057;
drop table song_0057;
INSERT IGNORE INTO song SELECT * FROM song_0058;
drop table song_0058;
INSERT IGNORE INTO song SELECT * FROM song_0059;
drop table song_0059;
INSERT IGNORE INTO song SELECT * FROM song_0060;
drop table song_0060;
INSERT IGNORE INTO song SELECT * FROM song_0061;
drop table song_0061;
INSERT IGNORE INTO song SELECT * FROM song_0062;
drop table song_0062;
INSERT IGNORE INTO song SELECT * FROM song_0063;
drop table song_0063;
INSERT IGNORE INTO song SELECT * FROM song_0064;
drop table song_0064;
INSERT IGNORE INTO song SELECT * FROM song_0065;
drop table song_0065;
INSERT IGNORE INTO song SELECT * FROM song_0066;
drop table song_0066;
INSERT IGNORE INTO song SELECT * FROM song_0067;
drop table song_0067;
INSERT IGNORE INTO song SELECT * FROM song_0068;
drop table song_0068;
INSERT IGNORE INTO song SELECT * FROM song_0069;
drop table song_0069;
echo done.

View File

@@ -0,0 +1,375 @@
/*
Navicat Premium Data Transfer
Source Server : localhost MySQL 8.0
Source Server Type : MySQL
Source Server Version : 80012
Source Host : localhost:3306
Source Schema : neteasemusic
Target Server Type : MySQL
Target Server Version : 80012
File Encoding : 65001
Date: 24/12/2023 02:53:47
*/
/*
CREATE DATABASE `neteasemusic` CHARACTER SET 'utf8mb4' COLLATE 'utf8mb4_0900_ai_ci';
USE `neteasemusic`;
*/
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;
-- ----------------------------
-- Table structure for album
-- ----------------------------
DROP TABLE IF EXISTS `album`;
CREATE TABLE `album` (
`album_id` bigint(20) UNSIGNED NOT NULL COMMENT '专辑id',
`title` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '专辑名',
`description` varchar(1500) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '专辑简介',
`full_description` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL COMMENT '专辑简介(全)',
`image` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '封面图 http://p1.music.126.net/ 后面的部分',
`pub_date` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '发布日期',
`company` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '发行公司',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
`version` tinyint(4) NOT NULL DEFAULT 1 COMMENT '数据记录版本(如果有字段调整则整体+1)',
PRIMARY KEY (`album_id`) USING BTREE,
INDEX `album_id`(`album_id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for analysis
-- ----------------------------
DROP TABLE IF EXISTS `analysis`;
CREATE TABLE `analysis` (
`key` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '参数名',
`value` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '参数值',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
UNIQUE INDEX `key`(`key`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for artist
-- ----------------------------
DROP TABLE IF EXISTS `artist`;
CREATE TABLE `artist` (
`artist_id` bigint(20) UNSIGNED NOT NULL COMMENT '歌手id',
`title` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '歌手名',
`description` varchar(1500) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '歌手简介',
`image` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '封面图 http://p1.music.126.net/ 后面的部分',
`pub_date` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '发布日期',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`artist_id`) USING BTREE,
INDEX `artist_id`(`artist_id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for category
-- ----------------------------
DROP TABLE IF EXISTS `category`;
CREATE TABLE `category` (
`id` int(11) NOT NULL AUTO_INCREMENT COMMENT '分类id',
`title` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '分类名称',
`netease_id` int(11) NULL DEFAULT NULL COMMENT '网易音乐id',
`qianqian_id` int(11) NULL DEFAULT NULL COMMENT '千千音乐id',
`alias` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '分类别名',
`qianqian_group` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '千千音乐 分类所属分组',
`qianqian_group_chinese` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '千千音乐 分类所属分组(中文)',
`netease_group_chinese` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '网易音乐 分类所属分组(中文)',
PRIMARY KEY (`id`) USING BTREE,
UNIQUE INDEX `title`(`title`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for comment
-- ----------------------------
DROP TABLE IF EXISTS `comment`;
CREATE TABLE `comment` (
`comment_id` bigint(20) UNSIGNED NOT NULL COMMENT '评论id',
`parent_comment_id` bigint(20) UNSIGNED NOT NULL COMMENT '父评论id',
`user_id` bigint(20) UNSIGNED NOT NULL COMMENT '用户id',
`song_id` bigint(20) UNSIGNED NOT NULL COMMENT '歌曲id',
`content` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '评论内容',
`time` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL DEFAULT '' COMMENT '评论时间',
`like_count` int(11) NOT NULL COMMENT '点赞数',
`comment_type` tinyint(4) UNSIGNED NOT NULL COMMENT '评论类型 0-comments 1-hotComments 2-topComments',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`comment_id`) USING BTREE,
INDEX `song_id`(`song_id`) USING BTREE,
INDEX `user_id`(`user_id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for comment_progress
-- ----------------------------
DROP TABLE IF EXISTS `comment_progress`;
CREATE TABLE `comment_progress` (
`song_id` bigint(20) UNSIGNED NOT NULL COMMENT '歌曲id',
`max_time` bigint(20) NOT NULL DEFAULT 0 COMMENT '开始爬取/开始增量爬取的时候 最新一条评论的时间',
`min_time` bigint(20) NOT NULL DEFAULT 0 COMMENT '上一次爬取时最后一条评论的时间 第一次爬取时为0',
`current_time` bigint(20) NOT NULL DEFAULT 0 COMMENT '本次爬取/增量时,最早的一条评论时间',
`current_status` tinyint(4) UNSIGNED NOT NULL DEFAULT 0 COMMENT '爬取进度 0-等待爬取/增量爬取 1-爬取中 2-完成',
`total` int(10) NOT NULL DEFAULT 0 COMMENT '评论总数',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`song_id`) USING BTREE,
INDEX `current_status`(`current_status`) USING BTREE,
INDEX `song_id`(`song_id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for log
-- ----------------------------
DROP TABLE IF EXISTS `log`;
CREATE TABLE `log` (
`id` int(10) UNSIGNED NOT NULL COMMENT 'id',
`name` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '方法/数据库',
`msg` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '出错信息',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
INDEX `id`(`id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for lyric
-- ----------------------------
DROP TABLE IF EXISTS `lyric`;
CREATE TABLE `lyric` (
`song_id` bigint(20) UNSIGNED NOT NULL COMMENT '歌曲id',
`version` int(10) UNSIGNED NOT NULL COMMENT '歌词版本 -1代表没有数据',
`lyric` text CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '歌词',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`song_id`, `version`) USING BTREE,
INDEX `song_id`(`song_id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for playlist
-- ----------------------------
DROP TABLE IF EXISTS `playlist`;
CREATE TABLE `playlist` (
`playlist_id` bigint(20) UNSIGNED NOT NULL COMMENT '歌单id',
`title` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '歌单名',
`english_title` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '歌单名(英文)',
`description` varchar(1500) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '歌单简介',
`user_id` bigint(20) UNSIGNED NOT NULL COMMENT '用户id',
`tags` json NULL COMMENT '歌单标签JSON格式数组',
`alg_tags` json NULL COMMENT '歌单标签JSON格式数组',
`playlist_create_time` bigint(20) UNSIGNED NULL DEFAULT NULL COMMENT '创建日期',
`playlist_update_time` bigint(20) UNSIGNED NULL DEFAULT NULL COMMENT '更新日期',
`track_count` int(10) UNSIGNED NOT NULL COMMENT '歌单歌曲数',
`play_count` bigint(20) UNSIGNED NOT NULL COMMENT '歌单播放数',
`subscribed_count` int(10) UNSIGNED NOT NULL COMMENT '歌单收藏数',
`share_count` int(10) UNSIGNED NOT NULL COMMENT '歌单分享数',
`comment_count` int(10) UNSIGNED NOT NULL COMMENT '歌单评论数',
`cover_image` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '封面图 http://p1.music.126.net/ 后面的部分',
`title_image` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '封面图 http://p1.music.126.net/ 后面的部分',
`background_cover` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '封面图 http://p1.music.126.net/ 后面的部分',
`ordered` tinyint(4) NULL DEFAULT NULL COMMENT '排序 0-false 1-true',
`copied` tinyint(4) NULL DEFAULT NULL COMMENT '是否复制 0-false 1-true',
`status` tinyint(4) NULL DEFAULT NULL COMMENT '保留状态码',
`privacy` tinyint(4) NULL DEFAULT NULL COMMENT '保留状态码',
`ad_type` tinyint(4) NULL DEFAULT NULL COMMENT '保留状态码',
`special_type` int(11) NULL DEFAULT NULL COMMENT '保留状态码',
`official_playlist_type` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '保留状态码',
`op_recommend` tinyint(4) NULL DEFAULT NULL COMMENT '保留状态码 0-false 1-true',
`high_quality` tinyint(4) NULL DEFAULT NULL COMMENT '保留状态码 0-false 1-true',
`new_imported` tinyint(4) NULL DEFAULT NULL COMMENT '保留状态码 0-false 1-true',
`update_frequency` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '保留字段',
`grade_status` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '保留字段',
`score` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '保留字段',
`creator` json NULL COMMENT '保留字段JSON格式数组',
`video_ids` json NULL COMMENT '保留字段JSON格式数组',
`videos` json NULL COMMENT '保留字段JSON格式数组',
`banned_track_ids` json NULL COMMENT '保留字段JSON格式数组',
`remix_video` json NULL COMMENT '保留字段JSON格式数组',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
`related_playlist` json NULL COMMENT '是否获取了相关歌单',
PRIMARY KEY (`playlist_id`) USING BTREE,
INDEX `playlist_id`(`playlist_id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for song
-- ----------------------------
DROP TABLE IF EXISTS `song`;
CREATE TABLE `song` (
`song_id` bigint(20) UNSIGNED NOT NULL COMMENT '歌曲id',
`title` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '歌曲名',
`type` tinyint(4) NOT NULL COMMENT ' 0: 一般类型 1: 通过云盘上传的音乐,网易云不存在公开对应 2: 通过云盘上传的音乐,网易云存在公开对应',
`alias` json NULL COMMENT '歌曲别名JSON格式数组',
`pop` float NULL DEFAULT NULL COMMENT '歌曲热度',
`fee` tinyint(4) NULL DEFAULT NULL COMMENT '版权 0: 免费或无版权 1: VIP 歌曲 4: 购买专辑 8: 非会员可免费播放低音质,会员可播放高音质及下载 fee 为 1 或 8 的歌曲均可单独购买 2 元单曲',
`quality` varchar(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '高/中/低/无损质量文件信息',
`cd` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT 'None或如\"04\", \"1/2\", \"3\", \"null\"的字符串表示歌曲属于专辑中第几张CD对应音频文件的Tag',
`no` int(11) NULL DEFAULT NULL COMMENT '表示歌曲属于CD中第几曲0表示没有这个字段对应音频文件的Tag',
`dj_id` int(10) UNSIGNED NULL DEFAULT NULL COMMENT '0: 不是DJ节目 其他是DJ节目表示DJ ID',
`s_id` int(10) UNSIGNED NULL DEFAULT NULL COMMENT '对于t == 2的歌曲表示匹配到的公开版本歌曲ID',
`origin_cover_type` tinyint(4) NOT NULL DEFAULT 0 COMMENT '0: 未知 1: 原曲 2: 翻唱',
`image` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '封面图 http://p1.music.126.net/ 后面的部分',
`pub_date` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '发布日期(弃用)',
`pub_time` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '发布日期 毫秒为单位的Unix时间戳',
`mv` int(10) UNSIGNED NULL DEFAULT NULL COMMENT '非零表示有MV ID',
`single` tinyint(4) NULL DEFAULT NULL COMMENT '0: 有专辑信息或者是DJ节目 1: 未知专辑',
`version` int(11) NOT NULL DEFAULT 1 COMMENT '歌曲版本信息',
`no_copyright_rcmd` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT 'None表示可以播非空表示无版权',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
`data_version` tinyint(4) NOT NULL DEFAULT 1 COMMENT '数据记录版本(如果有字段调整则整体+1)',
PRIMARY KEY (`song_id`) USING BTREE,
INDEX `song_id`(`song_id`) USING BTREE,
INDEX `data_version`(`data_version`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for song_album_relation
-- ----------------------------
DROP TABLE IF EXISTS `song_album_relation`;
CREATE TABLE `song_album_relation` (
`song_id` bigint(20) UNSIGNED NOT NULL COMMENT '歌曲id',
`album_id` bigint(20) UNSIGNED NOT NULL COMMENT '专辑id',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`song_id`, `album_id`) USING BTREE,
INDEX `song_id`(`song_id`) USING BTREE,
INDEX `album_id`(`album_id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for song_artist_relation
-- ----------------------------
DROP TABLE IF EXISTS `song_artist_relation`;
CREATE TABLE `song_artist_relation` (
`song_id` bigint(20) UNSIGNED NOT NULL COMMENT '歌曲id',
`artist_id` bigint(20) UNSIGNED NOT NULL COMMENT '歌手id',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`song_id`, `artist_id`) USING BTREE,
INDEX `song_id`(`song_id`) USING BTREE,
INDEX `artist_id`(`artist_id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for song_playlist_relation
-- ----------------------------
DROP TABLE IF EXISTS `song_playlist_relation`;
CREATE TABLE `song_playlist_relation` (
`song_id` bigint(20) UNSIGNED NOT NULL COMMENT '歌曲id',
`playlist_id` bigint(20) UNSIGNED NOT NULL COMMENT '歌单id',
`is_del` tinyint(4) NOT NULL DEFAULT 0 COMMENT '歌曲是否从歌单中删除 0-未删除 1-删除',
`alg` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '保留字段',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`song_id`, `playlist_id`) USING BTREE,
INDEX `song_id`(`song_id`) USING BTREE,
INDEX `playlist_id`(`playlist_id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for user
-- ----------------------------
DROP TABLE IF EXISTS `user`;
CREATE TABLE `user` (
`user_id` bigint(20) UNSIGNED NOT NULL COMMENT '用户id',
`user_type` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '用户类型',
`nickname` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '用户昵称',
`avatar_url` varchar(200) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '用户头像 http://p1.music.126.net/ 后面的部分',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`user_id`) USING BTREE,
INDEX `user_id`(`user_id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for wait_check_album
-- ----------------------------
DROP TABLE IF EXISTS `wait_check_album`;
CREATE TABLE `wait_check_album` (
`id` bigint(20) UNSIGNED NOT NULL COMMENT 'id',
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for wait_check_artist
-- ----------------------------
DROP TABLE IF EXISTS `wait_check_artist`;
CREATE TABLE `wait_check_artist` (
`id` bigint(20) UNSIGNED NOT NULL COMMENT 'id',
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for wait_check_comment
-- ----------------------------
DROP TABLE IF EXISTS `wait_check_comment`;
CREATE TABLE `wait_check_comment` (
`id` bigint(20) UNSIGNED NOT NULL COMMENT 'id',
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for wait_check_lyric
-- ----------------------------
DROP TABLE IF EXISTS `wait_check_lyric`;
CREATE TABLE `wait_check_lyric` (
`id` bigint(20) UNSIGNED NOT NULL COMMENT 'id',
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for wait_check_song
-- ----------------------------
DROP TABLE IF EXISTS `wait_check_song`;
CREATE TABLE `wait_check_song` (
`id` bigint(20) UNSIGNED NOT NULL COMMENT 'id',
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for wait_fetch_album
-- ----------------------------
DROP TABLE IF EXISTS `wait_fetch_album`;
CREATE TABLE `wait_fetch_album` (
`id` bigint(20) UNSIGNED NOT NULL COMMENT 'id',
`partition` tinyint(4) UNSIGNED NULL DEFAULT NULL COMMENT '分区 0-4',
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for wait_fetch_artist
-- ----------------------------
DROP TABLE IF EXISTS `wait_fetch_artist`;
CREATE TABLE `wait_fetch_artist` (
`id` bigint(20) UNSIGNED NOT NULL COMMENT 'id',
`partition` tinyint(4) UNSIGNED NULL DEFAULT NULL COMMENT '分区 0-4',
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for wait_fetch_lyric
-- ----------------------------
DROP TABLE IF EXISTS `wait_fetch_lyric`;
CREATE TABLE `wait_fetch_lyric` (
`id` bigint(20) UNSIGNED NOT NULL COMMENT 'id',
`partition` tinyint(4) UNSIGNED NULL DEFAULT NULL COMMENT '分区 0-4',
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Table structure for wait_fetch_song
-- ----------------------------
DROP TABLE IF EXISTS `wait_fetch_song`;
CREATE TABLE `wait_fetch_song` (
`id` bigint(20) UNSIGNED NOT NULL COMMENT 'id',
`partition` tinyint(4) UNSIGNED NULL DEFAULT NULL COMMENT '分区 0-4',
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
SET FOREIGN_KEY_CHECKS = 1;

View File

@@ -0,0 +1,268 @@
CREATE DATABASE `neteaseMusic`;
USE `neteaseMusic`;
CREATE TABLE `song` (
`song_id` bigint(20) unsigned NOT NULL COMMENT '歌曲id',
`title` varchar(500) NOT NULL COMMENT '歌曲名',
`type` tinyint(4) NOT NULL COMMENT ' 0: 一般类型 1: 通过云盘上传的音乐,网易云不存在公开对应 2: 通过云盘上传的音乐,网易云存在公开对应',
`alias` json DEFAULT NULL COMMENT '歌曲别名JSON格式数组',
`pop` float DEFAULT NULL COMMENT '歌曲热度',
`fee` tinyint(4) DEFAULT NULL COMMENT '版权 0: 免费或无版权 1: VIP 歌曲 4: 购买专辑 8: 非会员可免费播放低音质,会员可播放高音质及下载 fee 为 1 或 8 的歌曲均可单独购买 2 元单曲',
`quality` varchar(500) NOT NULL COMMENT '高/中/低/无损质量文件信息',
`cd` varchar(255) NOT NULL COMMENT 'None或如"04", "1/2", "3", "null"的字符串表示歌曲属于专辑中第几张CD对应音频文件的Tag',
`no` int(11) DEFAULT NULL COMMENT '表示歌曲属于CD中第几曲0表示没有这个字段对应音频文件的Tag',
`dj_id` int(10) unsigned DEFAULT NULL COMMENT '0: 不是DJ节目 其他是DJ节目表示DJ ID',
`s_id` int(10) unsigned DEFAULT NULL COMMENT '对于t == 2的歌曲表示匹配到的公开版本歌曲ID',
`origin_cover_type` tinyint(4) NOT NULL DEFAULT '0' COMMENT '0: 未知 1: 原曲 2: 翻唱',
`image` varchar(200) DEFAULT NULL COMMENT '封面图 http://p1.music.126.net/ 后面的部分',
`pub_date` varchar(100) DEFAULT NULL COMMENT '发布日期(弃用)',
`pub_time` varchar(100) DEFAULT NULL COMMENT '发布日期 毫秒为单位的Unix时间戳',
`mv` int(10) unsigned DEFAULT NULL COMMENT '非零表示有MV ID',
`single` tinyint(4) DEFAULT NULL COMMENT '0: 有专辑信息或者是DJ节目 1: 未知专辑',
`version` int(11) NOT NULL DEFAULT '1' COMMENT '歌曲版本信息',
`no_copyright_rcmd` varchar(255) DEFAULT NULL COMMENT 'None表示可以播非空表示无版权',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
`data_version` tinyint(4) NOT NULL DEFAULT '1' COMMENT '数据记录版本(如果有字段调整则整体+1)',
PRIMARY KEY (`song_id`),
KEY `song_id` (`song_id`),
KEY `data_version` (`data_version`)
);
CREATE TABLE `artist` (
`artist_id` bigint(20) unsigned NOT NULL COMMENT '歌手id',
`title` varchar(200) NOT NULL COMMENT '歌手名',
`description` varchar(1500) NOT NULL COMMENT '歌手简介',
`image` varchar(200) NOT NULL COMMENT '封面图 http://p1.music.126.net/ 后面的部分',
`pub_date` varchar(100) NOT NULL COMMENT '发布日期',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`artist_id`),
KEY `artist_id` (`artist_id`)
);
CREATE TABLE `album` (
`album_id` bigint(20) unsigned NOT NULL COMMENT '专辑id',
`title` varchar(200) NOT NULL COMMENT '专辑名',
`description` varchar(1500) NOT NULL COMMENT '专辑简介',
`full_description` text COMMENT '专辑简介(全)',
`image` varchar(200) NOT NULL COMMENT '封面图 http://p1.music.126.net/ 后面的部分',
`pub_date` varchar(100) NOT NULL COMMENT '发布日期',
`company` varchar(100) DEFAULT NULL COMMENT '发行公司',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
`version` tinyint(4) NOT NULL DEFAULT 1 COMMENT '数据记录版本(如果有字段调整则整体+1)',
PRIMARY KEY (`album_id`),
KEY `album_id` (`album_id`)
);
CREATE TABLE `playlist` (
`playlist_id` bigint(20) unsigned NOT NULL COMMENT '歌单id',
`title` varchar(200) NOT NULL COMMENT '歌单名',
`english_title` varchar(200) DEFAULT NULL COMMENT '歌单名(英文)',
`description` varchar(1500) NOT NULL COMMENT '歌单简介',
`user_id` bigint(20) unsigned NOT NULL COMMENT '用户id',
`tags` json DEFAULT NULL COMMENT '歌单标签JSON格式数组',
`alg_tags` json DEFAULT NULL COMMENT '歌单标签JSON格式数组',
`playlist_create_time` bigint(20) unsigned DEFAULT NULL COMMENT '创建日期',
`playlist_update_time` bigint(20) unsigned DEFAULT NULL COMMENT '更新日期',
-- 数据
`track_count` int(10) unsigned NOT NULL COMMENT '歌单歌曲数',
`play_count` bigint(20) unsigned NOT NULL COMMENT '歌单播放数',
`subscribed_count` int(10) unsigned NOT NULL COMMENT '歌单收藏数',
`share_count` int(10) unsigned NOT NULL COMMENT '歌单分享数',
`comment_count` int(10) unsigned NOT NULL COMMENT '歌单评论数',
-- 封面图
`cover_image` varchar(200) NOT NULL COMMENT '封面图 http://p1.music.126.net/ 后面的部分',
`title_image` varchar(200) DEFAULT NULL COMMENT '封面图 http://p1.music.126.net/ 后面的部分',
`background_cover` varchar(200) DEFAULT NULL COMMENT '封面图 http://p1.music.126.net/ 后面的部分',
-- 状态码
`ordered` tinyint(4) NULL COMMENT '排序 0-false 1-true',
`copied` tinyint(4) NULL COMMENT '是否复制 0-false 1-true',
`status` tinyint(4) DEFAULT NULL COMMENT '保留状态码',
`privacy` tinyint(4) DEFAULT NULL COMMENT '保留状态码',
`ad_type` tinyint(4) DEFAULT NULL COMMENT '保留状态码',
`special_type` int(11) DEFAULT NULL COMMENT '保留状态码',
`official_playlist_type` varchar(20) DEFAULT NULL COMMENT '保留状态码',
`op_recommend` tinyint(4) DEFAULT NULL COMMENT '保留状态码 0-false 1-true',
`high_quality` tinyint(4) DEFAULT NULL COMMENT '保留状态码 0-false 1-true',
`new_imported` tinyint(4) DEFAULT NULL COMMENT '保留状态码 0-false 1-true',
`update_frequency` varchar(100) DEFAULT NULL COMMENT '保留字段',
`grade_status` varchar(20) DEFAULT NULL COMMENT '保留字段',
`score` varchar(20) DEFAULT NULL COMMENT '保留字段',
-- 后期调整字段
`creator` json DEFAULT NULL COMMENT '保留字段JSON格式数组',
`video_ids` json DEFAULT NULL COMMENT '保留字段JSON格式数组',
`videos` json DEFAULT NULL COMMENT '保留字段JSON格式数组',
`banned_track_ids` json DEFAULT NULL COMMENT '保留字段JSON格式数组',
`remix_video` json DEFAULT NULL COMMENT '保留字段JSON格式数组',
-- 数据信息
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
`related_playlist` json NULL COMMENT '是否获取了相关歌单',
PRIMARY KEY (`playlist_id`),
KEY `playlist_id` (`playlist_id`)
);
CREATE TABLE `song_playlist_relation` (
`song_id` bigint(20) unsigned NOT NULL COMMENT '歌曲id',
`playlist_id` bigint(20) unsigned NOT NULL COMMENT '歌单id',
`is_del` tinyint(4) NOT NULL DEFAULT 0 COMMENT '歌曲是否从歌单中删除 0-未删除 1-删除',
`alg` varchar(20) DEFAULT NULL COMMENT '保留字段',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`song_id`, `playlist_id`),
KEY `song_id` (`song_id`),
KEY `playlist_id` (`playlist_id`)
);
CREATE TABLE `song_album_relation` (
`song_id` bigint(20) unsigned NOT NULL COMMENT '歌曲id',
`album_id` bigint(20) unsigned NOT NULL COMMENT '专辑id',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`song_id`,`album_id`),
KEY `song_id` (`song_id`),
KEY `album_id` (`album_id`)
);
CREATE TABLE `song_artist_relation` (
`song_id` bigint(20) unsigned NOT NULL COMMENT '歌曲id',
`artist_id` bigint(20) unsigned NOT NULL COMMENT '歌手id',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY `song_id` (`song_id`,`artist_id`),
KEY `song_id` (`song_id`),
KEY `artist_id` (`artist_id`)
);
CREATE TABLE `lyric` (
`song_id` bigint(20) unsigned NOT NULL COMMENT '歌曲id',
`version` int(10) unsigned NOT NULL COMMENT '歌词版本 -1代表没有数据',
`lyric` text NOT NULL COMMENT '歌词',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`song_id`,`version`),
KEY `song_id` (`song_id`)
);
CREATE TABLE `user` (
`user_id` bigint(20) unsigned NOT NULL COMMENT '用户id',
`user_type` varchar(50) NOT NULL COMMENT '用户类型',
`nickname` varchar(200) NOT NULL COMMENT '用户昵称',
`avatar_url` varchar(200) NOT NULL COMMENT '用户头像 http://p1.music.126.net/ 后面的部分',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`user_id`),
KEY `user_id` (`user_id`)
);
CREATE TABLE `comment` (
`comment_id` bigint(20) unsigned NOT NULL COMMENT '评论id',
`parent_comment_id` bigint(20) unsigned NOT NULL COMMENT '父评论id',
`user_id` bigint(20) unsigned NOT NULL COMMENT '用户id',
`song_id` bigint(20) unsigned NOT NULL COMMENT '歌曲id',
`content` text NOT NULL COMMENT '评论内容',
`time` varchar(50) NOT NULL DEFAULT '' COMMENT '评论时间',
`like_count` int(11) NOT NULL COMMENT '点赞数',
`comment_type` tinyint(4) unsigned NOT NULL COMMENT '评论类型 0-comments 1-hotComments 2-topComments',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`comment_id`),
KEY `song_id` (`song_id`),
KEY `user_id`(`user_id`)
);
CREATE TABLE `comment_progress` (
`song_id` bigint(20) unsigned NOT NULL COMMENT '歌曲id',
`max_time` bigint(20) NOT NULL DEFAULT 0 COMMENT '开始爬取/开始增量爬取的时候 最新一条评论的时间',
`min_time` bigint(20) NOT NULL DEFAULT 0 COMMENT '上一次爬取时最后一条评论的时间 第一次爬取时为0',
`current_time` bigint(20) NOT NULL DEFAULT 0 COMMENT '本次爬取/增量时,最早的一条评论时间',
`current_status` tinyint(4) unsigned NOT NULL DEFAULT 0 COMMENT '爬取进度 0-等待爬取/增量爬取 1-爬取中 2-完成',
`total` int(10) NOT NULL DEFAULT 0 COMMENT '评论总数',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
PRIMARY KEY (`song_id`),
INDEX `current_status` (`current_status`),
INDEX `song_id` (`song_id`)
);
CREATE TABLE `category` (
`id` int NOT NULL AUTO_INCREMENT COMMENT '分类id',
`title` varchar(255) NOT NULL COMMENT '分类名称',
`netease_id` int DEFAULT NULL COMMENT '网易音乐id',
`qianqian_id` int DEFAULT NULL COMMENT '千千音乐id',
`alias` varchar(255) DEFAULT NULL COMMENT '分类别名',
`qianqian_group` varchar(255) DEFAULT NULL COMMENT '千千音乐 分类所属分组',
`qianqian_group_chinese` varchar(255) DEFAULT NULL COMMENT '千千音乐 分类所属分组(中文)',
`netease_group_chinese` varchar(255) DEFAULT NULL COMMENT '网易音乐 分类所属分组(中文)',
PRIMARY KEY (`id`),
UNIQUE KEY `title` (`title`)
);
CREATE TABLE `log` (
`id` int(10) unsigned NOT NULL COMMENT 'id',
`name` varchar(200) NOT NULL COMMENT '方法/数据库',
`msg` varchar(200) NOT NULL COMMENT '出错信息',
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
KEY `id` (`id`)
);
CREATE TABLE `analysis` (
`key` varchar(255) NOT NULL COMMENT '参数名',
`value` varchar(255) DEFAULT NULL COMMENT '参数值',
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
UNIQUE KEY `key` (`key`)
);
CREATE TABLE `wait_check_song` (
`id` bigint(20) unsigned NOT NULL COMMENT 'id',
PRIMARY KEY (`id`)
);
CREATE TABLE `wait_check_artist` (
`id` bigint(20) unsigned NOT NULL COMMENT 'id',
PRIMARY KEY (`id`)
);
CREATE TABLE `wait_check_album` (
`id` bigint(20) unsigned NOT NULL COMMENT 'id',
PRIMARY KEY (`id`)
);
CREATE TABLE `wait_check_lyric` (
`id` bigint(20) unsigned NOT NULL COMMENT 'id',
PRIMARY KEY (`id`)
);
CREATE TABLE `wait_check_comment` (
`id` bigint(20) unsigned NOT NULL COMMENT 'id',
PRIMARY KEY (`id`)
);
CREATE TABLE `wait_fetch_song` (
`id` bigint(20) unsigned NOT NULL COMMENT 'id',
`partition` tinyint(4) unsigned DEFAULT NULL COMMENT '分区 0-4',
PRIMARY KEY (`id`)
);
CREATE TABLE `wait_fetch_artist` (
`id` bigint(20) unsigned NOT NULL COMMENT 'id',
`partition` tinyint(4) unsigned DEFAULT NULL COMMENT '分区 0-4',
PRIMARY KEY (`id`)
);
CREATE TABLE `wait_fetch_album` (
`id` bigint(20) unsigned NOT NULL COMMENT 'id',
`partition` tinyint(4) unsigned DEFAULT NULL COMMENT '分区 0-4',
PRIMARY KEY (`id`)
);
CREATE TABLE `wait_fetch_lyric` (
`id` bigint(20) unsigned NOT NULL COMMENT 'id',
`partition` tinyint(4) unsigned DEFAULT NULL COMMENT '分区 0-4',
PRIMARY KEY (`id`)
);

View File

@@ -0,0 +1,83 @@
// 定时更新 wait 表
const sleepUtils = require("../../utils/sleepUtils");
const { fill } = require("../../utils/stringUtils");
// 计算数组差集 a - b
function getDiffSet(a, b) {
// let a = [1, 2, 3];
// let b = [4, 5, 6, 1];
// let c = a.filter(i => b.indexOf(i) == -1);
// console.log(c);
return a.filter(i => b.indexOf(i) == -1);
}
async function migrateIdsFromCheckToFetch(tableName, fieldName, insertSql = null) {
try {
// console.log(`更新待爬取列表: ${tableName}`);
let stepLength = 5000;
while (true) {
// 从 check 表中分块查出待处理数据
let idsResult = await dbUtils.query(`SELECT id FROM wait_check_${tableName} LIMIT ${stepLength}`, []);
let ids = idsResult.map(row => row.id);
// console.log("ids", ids);
if (ids.length == 0) {
// console.log(`${tableName} done.`);
break;
};
// 查询出已处理的数据
let skipIdsResult = await dbUtils.query(`SELECT ${fieldName} as id FROM ${tableName} WHERE ${fieldName} IN ?`, [[ids]]);
let skipIds = skipIdsResult.map(row => row.id);
// console.log("skipIds", skipIds);
// 剩余要爬取的数据
let finalIds = getDiffSet(ids, skipIds);
// console.log("finalIds", finalIds);
// 插入待爬取列表
if (finalIds.length > 0) {
var result = await dbUtils.query(insertSql ? insertSql : `INSERT IGNORE INTO wait_fetch_${tableName} (id) VALUES ?`, [finalIds.map(id => [id])]);
// console.log(result);
}
// 从待检查表中删除
if (ids.length > 0)
await dbUtils.query(`DELETE FROM wait_check_${tableName} WHERE id IN ?`, [[ids]]);
console.log(`table: ${tableName}\t| ${fill(ids[0], 10)} - ${fill(ids.slice(-1)[0], 10)} ${fill(`(${finalIds.length}/${ids.length})`, 10, ' ', true)}\t| affected: ${result ? result.affectedRows : ""}`);
}
} catch (e) {
console.error(e);
}
}
async function getPromise(tableName, fieldName, insertSql) {
return new Promise(async function (resolve) {
await migrateIdsFromCheckToFetch(tableName, fieldName, insertSql);
resolve();
});
}
async function updateWaitTable() {
console.log(`更新待爬取列表`);
// 不同时操作多张表,减少死锁概率
await migrateIdsFromCheckToFetch("song", "song_id");
await migrateIdsFromCheckToFetch("lyric", "song_id");
await migrateIdsFromCheckToFetch("comment", "song_id", `INSERT IGNORE INTO comment_progress (song_id) VALUES ?`);
await migrateIdsFromCheckToFetch("album", "album_id");
await migrateIdsFromCheckToFetch("artist", "artist_id");
// await Promise.all([
// getPromise("song", "song_id"),
// getPromise("lyric", "song_id"),
// getPromise("comment", "song_id", `INSERT IGNORE INTO comment_progress (song_id) VALUES ?`),
// getPromise("album", "album_id"),
// getPromise("artist", "artist_id")
// ]);
console.log("All done.\n");
}
module.exports = {
updateWaitTable,
}

View File

@@ -0,0 +1,70 @@
/**
* 该文件来自https://github.com/Binaryify/NeteaseCloudMusicApi/blob/master/util/crypto.js
*/
const crypto = require('crypto')
const iv = Buffer.from('0102030405060708')
const presetKey = Buffer.from('0CoJUm6Qyw8W8jud')
const linuxapiKey = Buffer.from('rFgB&h#%2?^eDg:Q')
const base62 = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
const publicKey =
'-----BEGIN PUBLIC KEY-----\nMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDgtQn2JZ34ZC28NWYpAUd98iZ37BUrX/aKzmFbt7clFSs6sXqHauqKWqdtLkF2KexO40H1YTX8z2lSgBBOAxLsvaklV8k4cBFK9snQXE9/DDaFt6Rr7iVZMldczhC0JNgTz+SHXT6CBHuX3e9SdB1Ua44oncaTWz7OBGLbCiK45wIDAQAB\n-----END PUBLIC KEY-----'
const eapiKey = 'e82ckenh8dichen8'
const aesEncrypt = (buffer, mode, key, iv) => {
const cipher = crypto.createCipheriv('aes-128-' + mode, key, iv)
return Buffer.concat([cipher.update(buffer), cipher.final()])
}
const rsaEncrypt = (buffer, key) => {
buffer = Buffer.concat([Buffer.alloc(128 - buffer.length), buffer])
return crypto.publicEncrypt(
{ key: key, padding: crypto.constants.RSA_NO_PADDING },
buffer,
)
}
const weapi = (object) => {
const text = JSON.stringify(object)
const secretKey = crypto
.randomBytes(16)
.map((n) => base62.charAt(n % 62).charCodeAt())
return {
params: aesEncrypt(
Buffer.from(
aesEncrypt(Buffer.from(text), 'cbc', presetKey, iv).toString('base64'),
),
'cbc',
secretKey,
iv,
).toString('base64'),
encSecKey: rsaEncrypt(secretKey.reverse(), publicKey).toString('hex'),
}
}
const linuxapi = (object) => {
const text = JSON.stringify(object)
return {
eparams: aesEncrypt(Buffer.from(text), 'ecb', linuxapiKey, '')
.toString('hex')
.toUpperCase(),
}
}
const eapi = (url, object) => {
const text = typeof object === 'object' ? JSON.stringify(object) : object
const message = `nobody${url}use${text}md5forencrypt`
const digest = crypto.createHash('md5').update(message).digest('hex')
const data = `${url}-36cd479b6b5-${text}-36cd479b6b5-${digest}`
return {
params: aesEncrypt(Buffer.from(data), 'ecb', eapiKey, '')
.toString('hex')
.toUpperCase(),
}
}
const decrypt = (cipherBuffer) => {
const decipher = crypto.createDecipheriv('aes-128-ecb', eapiKey, '')
return Buffer.concat([decipher.update(cipherBuffer), decipher.final()])
}
module.exports = { weapi, linuxapi, eapi, decrypt }

View File

@@ -0,0 +1,264 @@
const dbUtils = global.dbUtils;
module.exports = {
song: {
insertCollection: async (songInfoList) => {
if (songInfoList.length == 0) return;
// image 因为接口没有返回,所以不更新
let result = await dbUtils.query(`
INSERT INTO song (
song_id, title, type, alias, pop, fee, quality, cd,
no, dj_id, s_id, origin_cover_type, pub_time,
no_copyright_rcmd, mv, single, version, data_version
) VALUES ? ON DUPLICATE KEY UPDATE
title = VALUES(title), type = VALUES(type), alias = VALUES(alias), pop = VALUES(pop), fee = VALUES(fee), quality = VALUES(quality), cd = VALUES(cd),
no = VALUES(no), dj_id = VALUES(dj_id), s_id = VALUES(s_id), origin_cover_type = VALUES(origin_cover_type), pub_time = VALUES(pub_time),
no_copyright_rcmd = VALUES(no_copyright_rcmd), mv = VALUES(mv), single = VALUES(single), version = VALUES(version), data_version = VALUES(data_version)
`, [songInfoList.map(songInfo => [
songInfo.id, songInfo.title, songInfo.type, songInfo.alias, songInfo.pop, songInfo.fee, songInfo.quality, songInfo.cd,
songInfo.no, songInfo.djId, songInfo.sId, songInfo.originCoverType, songInfo.pubTime,
songInfo.noCopyrightRcmd, songInfo.mv, songInfo.single, songInfo.version, 2
])]);
await dbUtils.query(`
DELETE FROM wait_fetch_song WHERE id IN ?
`, [[songInfoList.map(songInfo => songInfo.id)]])
return result;
},
getIdsToFetch: async (args) => {
let whereClause = [
args.min ? `id > ${args.min}` : '1=1',
args.max ? `id <= ${args.max}` : '1=1',
].join(' AND ');
let sql = `
SELECT id FROM wait_fetch_song WHERE ${whereClause}
${args.order ? `ORDER BY id ${args.order}` : ''}
${args.limit ? `LIMIT ${args.limit}` : ''}
`;
// 更新现有数据
// sql = `SELECT song_id FROM song WHERE data_version = 1`;
console.log(sql);
let songIds = await dbUtils.query(sql, []);
songIds = songIds.map(item => item.id);
return songIds;
},
},
album: {
insert: async (albumInfo) => {
return await dbUtils.query('INSERT IGNORE INTO album SET ?', albumInfo);
},
update: async (albumId, albumInfo) => {
return await dbUtils.query(`UPDATE album SET ? WHERE album_id = ${albumId}`, albumInfo);
},
getIdsToFetch: async (args, isUpdate) => {
let sql = "";
if (isUpdate) {
sql = `SELECT album_id FROM album WHERE (full_description = '' or full_description is null) and description like '%专辑《%》,简介:%' and description not regexp '^.*?专辑《.*?》,简介:[:space:]*?。,更多.*$'`;
} else {
let whereClause = [
args.min ? `id > ${args.min}` : '1=1',
args.max ? `id <= ${args.max}` : '1=1',
].join(' AND ');
sql = `
SELECT id FROM wait_fetch_album WHERE ${whereClause}
${args.order ? `ORDER BY id ${args.order}` : ''}
${args.limit ? `LIMIT ${args.limit}` : ''}
`;
}
console.log(sql);
let albumIds = await dbUtils.query(sql, []);
albumIds = albumIds.map(item => item.id);
return albumIds;
},
getInfoById: async (albumId, { getRelation = true }) => {
if (!albumId) return {}
// 查询出专辑
const sql = 'SELECT * FROM album WHERE album_id = ?'
let infoResultSet = await dbUtils.query(sql, [albumId]);
if (infoResultSet.length == 0) return {};
let albumInfo = JSON.parse(JSON.stringify(infoResultSet[0]));
if (getRelation) {
// 查出专辑与歌曲对应关系
const sql2 = 'SELECT * FROM song_album_relation WHERE album_id = ?'
let relationResultSet = await dbUtils.query(sql2, [albumId]);
// 拼装
albumInfo.songIds = relationResultSet.map(song => song.song_id);
} else {
albumInfo.songIds = null;
}
return albumInfo;
},
},
artist: {
insert: async (artistInfo) => {
return await dbUtils.query('INSERT IGNORE INTO artist SET ?', artistInfo);
},
getIdsToFetch: async (args) => {
let whereClause = [
args.min ? `id > ${args.min}` : '1=1',
args.max ? `id <= ${args.max}` : '1=1',
].join(' AND ');
let sql = `
SELECT id FROM wait_fetch_artist WHERE ${whereClause}
${args.order ? `ORDER BY id ${args.order}` : ''}
${args.limit ? `LIMIT ${args.limit}` : ''}
`;
console.log(sql);
let artistIds = await dbUtils.query(sql, []);
artistIds = artistIds.map(item => item.id);
return artistIds;
},
getInfoById: async (artistId, { getRelation = true }) => {
if (!artistId) return {}
// 查询出歌手
const sql = 'SELECT * FROM artist WHERE artist_id = ?'
let infoResultSet = await dbUtils.query(sql, [artistId]);
if (infoResultSet.length == 0) return {};
let artistInfo = JSON.parse(JSON.stringify(infoResultSet[0]));
if (getRelation) {
// 查出歌手与歌曲对应关系
const sql2 = 'SELECT * FROM song_artist_relation WHERE artist_id = ?'
let relationResultSet = await dbUtils.query(sql2, [artistId]);
// 拼装
artistInfo.songIds = relationResultSet.map(song => song.song_id);
} else {
artistInfo.songIds = null;
}
return artistInfo;
},
},
lyric: {
insert: async (lyricInfo) => {
return await dbUtils.query('INSERT IGNORE INTO lyric SET ?', lyricInfo);
},
getIdsToFetch: async (args) => {
let whereClause = [
args.min ? `id > ${args.min}` : '1=1',
args.max ? `id <= ${args.max}` : '1=1',
].join(' AND ');
var sql = `
SELECT id FROM wait_fetch_lyric WHERE ${whereClause}
${args.order ? `ORDER BY id ${args.order}` : ''}
${args.limit ? `LIMIT ${args.limit}` : ''}
`;
console.log(sql);
let songIds = await dbUtils.query(sql, []);
songIds = songIds.map(item => item.id);
return songIds;
},
},
comment: {
insertCollection: async (commentInfoList) => {
if (commentInfoList.length == 0) return;
return await dbUtils.query(`
INSERT INTO comment ( comment_id, parent_comment_id, user_id, song_id, content, time, like_count, comment_type ) VALUES ?
ON DUPLICATE KEY UPDATE content = VALUES(content), like_count = VALUES(like_count), comment_type = GREATEST(comment_type, VALUES(comment_type)), modify_time = CURRENT_TIMESTAMP
`, [commentInfoList]);
},
getIdsToFetch: async (args) => {
let whereClause = [
args.min ? `song_id > ${args.min}` : '1=1',
args.max ? `song_id <= ${args.max}` : '1=1',
].join(' AND ');
var sql = `
SELECT song_id FROM comment_progress WHERE ${whereClause} AND current_status != 2
ORDER BY current_status DESC${args.order ? `, song_id ${args.order}` : ''}
${args.limit ? `LIMIT ${args.limit}` : ''}
`;
console.log(sql);
let songIds = await dbUtils.query(sql, []);
songIds = songIds.map(item => item.song_id);
return songIds;
},
},
comment_progress: {
update: async (commentProgressInfo, songId) => {
return await dbUtils.query('UPDATE comment_progress SET ? WHERE song_id = ? LIMIT 1', [commentProgressInfo, songId]);
},
},
playlist: {
insertCollection: async (playlistInfo) => {
if (playlistInfo.length == 0) return;
return await dbUtils.query(`
INSERT INTO playlist ( ${Object.keys(playlistInfo).map(field => `\`${field}\``).join(",")} ) VALUES ?
ON DUPLICATE KEY UPDATE ${Object.keys(playlistInfo).map(field => `${field}=VALUES(${field})`).join(", ")}
`, [[Object.values(playlistInfo)]]);
},
},
user: {
insertCollection: async (userInfoList) => {
if (userInfoList.length == 0) return;
return await dbUtils.query(`
INSERT INTO user ( user_id, user_type, nickname, avatar_url ) VALUES ?
ON DUPLICATE KEY UPDATE user_type = VALUES(user_type), nickname = VALUES(nickname), avatar_url = VALUES(avatar_url), modify_time = CURRENT_TIMESTAMP
`, [userInfoList]);
},
},
song_album: {
insertCollection: async (songAlbumRel) => {
if (songAlbumRel.length == 0) return;
return await dbUtils.query('INSERT IGNORE INTO song_album_relation (song_id, album_id) VALUES ?', [songAlbumRel]);
},
},
song_artist: {
insertCollection: async (songArtistRel) => {
if (songArtistRel.length == 0) return;
return await dbUtils.query('INSERT IGNORE INTO song_artist_relation (song_id, artist_id) VALUES ?', [songArtistRel]);
},
},
song_playlist: {
insertCollection: async (trackIds) => {
if (trackIds.length == 0) return;
return await dbUtils.query('INSERT IGNORE INTO song_playlist_relation (song_id, playlist_id, alg) VALUES ?', [trackIds]);
},
},
/* ##################################################### */
// 将 id 插入待检查表
wait_check: {
insert: async (type, ids) => {
// 过滤掉 id 为 0 的
ids = ids.filter(id => id > 0);
if (ids.length == 0) return;
return await dbUtils.query(`INSERT IGNORE INTO wait_check_${type} (id) VALUES ?`, [ids.map(id => [id])]);
},
},
wait_fetch: {
deleteCollection: async function (type, ids) {
// console.log("wait_fetch.deleteCollection", type, ids);
if (ids.length > 0)
return await dbUtils.query(`DELETE FROM wait_fetch_${type} WHERE id IN ?`, [[ids]]);
}
}
};

View File

@@ -0,0 +1,174 @@
const fs = require('fs');
const path = require('path');
const requestUtils = require('../../../utils/requestUtils');
const sleepUtils = require('../../../utils/sleepUtils');
const dataManager = require('../dataManager');
const dbUtils = global.dbUtils;
// 正常应该查不出记录才对
/*
SELECT * FROM album WHERE (full_description = '' or full_description is null) and description like '%专辑《%》,简介:%' and description not regexp '^.*?专辑《.*?》,简介:([:space:]*?|[ ]*?)。,更多.*$'
*/
async function fetchAll({ args = {}, isUpdate = false }) {
console.log("start fetching albums ...");
let albumIds = await dataManager.album.getIdsToFetch(args, isUpdate);
console.log(`albumIds was fetched, count: ${albumIds.length}`);
for (let i = 0; i < albumIds.length; i++) {
await global.checkIsExit();
const albumId = albumIds[i];
console.log(`${i + 1}/${albumIds.length} | album: ${albumId} | ${args.min || "?"}-${args.max || "?"}`);
try {
await fetch({ albumId: albumId, update: isUpdate });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(global.sleepTime);
}
}
// 获取专辑详情
async function fetch({ albumId, debug = false, update = false }) {
let result = await dbUtils.query('SELECT count(*) as count FROM album WHERE album_id = ?', [albumId]);
if (!debug && !update && result[0].count > 0) {
console.log(`数据库中已有数据,跳过 albumId: ${albumId}`);
// 从待爬取表中删除记录
await dataManager.wait_fetch.deleteCollection("album", [albumId]);
return;
} else if (update && result[0].count == 0) {
console.log(`数据库中沒有数据,跳过 albumId: ${albumId}`);
return;
}
let url = `https://music.163.com/album?id=${albumId}`;
try {
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `album-${albumId}.html`), 'utf8');
var html = await requestUtils.getApiResult(url, { timeout: 3000 });
// fs.writeFileSync(path.join(__dirname, "../../temp", `album-${albumId}.html`), html);
} catch (errors) {
console.error(errors);
return;
}
if (html.includes(`<p class="note s-fc3">很抱歉,你要查找的网页找不到</p>`)) {
// let deleteResult = await dbUtils.query('DELETE FROM song_album_relation WHERE album_id = ?', [albumId]);
// console.log(`album: ${albumId} 不存在从song_album_relation表中删除. affectedRows: ${deleteResult.affectedRows}`);
console.log(`album: ${albumId} 不存在,插入空数据`);
dbUtils.query('INSERT IGNORE INTO album SET ?', {
album_id: albumId,
title: '',
description: '',
image: '',
pub_date: '',
version: 1
});
return;
}
// 正则匹配
let albumInfoDict;
let regExResult = /\<script type\=\"application\/ld\+json\"\>([\S\s]*?)\<\/script\>/.exec(html);
if (regExResult) {
let albumInfoJSONString = regExResult[1];
albumInfoDict = JSON.parse(albumInfoJSONString);
} else {
// 极少数album没有 <script type="application/ld+json">......</script> 这块内容
// 例如7171596371738618。这些album都没有标题所以下面这三项就直接保留空字符串
albumInfoDict = {
"title": "",
"description": "",
"pubDate": ""
}
}
// console.log(albumInfoDict);
// 发行公司
let company = null;
if (html.includes(`<p class="intr"><b>发行公司:`)) {
try {
// 注意 <b>发行公司:</b> 后面有可能只有一个换行 而没有内容
company = /<p class="intr"><b>发行公司:<\/b>\n(.*?)\n?<\/p>/.exec(html)[1];
company = company.trim();
} catch (e) {
// 解析出错
await dbUtils.query('INSERT INTO log (`id`, `name`, `msg`) VALUES (?, ?, ?)', [albumId, 'album_fetch', `company 正则失败\n${e.message}`]);
return;
}
}
// 专辑详细简介
let fullDescription = null;
if (html.includes(`<div id="album-desc-more" class="f-hide">`)) {
// 比较长 有点击展开按钮
try {
fullDescription = /<div id="album-desc-more" class="f-hide">([\S\s]*?)<\/div>/.exec(html)[1];
fullDescription = fullDescription.replace(/<p class="f-brk">\n/g, '').replace(/<\/p>\n/g, '').trim();
} catch (e) {
// 解析出错
await dbUtils.query('INSERT INTO log (`id`, `name`, `msg`) VALUES (?, ?, ?)', [albumId, 'album_fetch', `fullDescription 1 正则失败\n${e.message}`]);
return;
}
} else if (html.includes(`<div id="album-desc-dot" class="f-brk">`)) {
// 比较短 无点击展开按钮
try {
fullDescription = /<div id="album-desc-dot" class="f-brk">([\S\s]*?)<\/div>/.exec(html)[1];
fullDescription = fullDescription.replace(/<p>/g, '').replace(/<\/p>/g, '').trim();
} catch (e) {
// 解析出错
await dbUtils.query('INSERT INTO log (`id`, `name`, `msg`) VALUES (?, ?, ?)', [albumId, 'album_fetch', `fullDescription 2 正则失败\n${e.message}`]);
return;
}
}
// 极个别album下面的image正则找不到例如73973625 74009959
let image = "";
try {
image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
} catch (e) { }
let songListJSONString = "";
try {
songListJSONString = /<textarea id="song-list-pre-data" style="display:none;">(.*?)<\/textarea>/.exec(html)[1];
} catch (e) { }
let songList = JSON.parse(songListJSONString);
let songIds = songList.map(song => Number(song.id));
let albumInfo = {
album_id: albumId,
title: albumInfoDict.title,
image: image,
description: albumInfoDict.description,
full_description: fullDescription,
pub_date: albumInfoDict.pubDate,
company: company,
version: 1
};
// console.log("albumInfo", albumInfo);
// 插入待爬取表
await dataManager.wait_check.insert("song", songIds);
await dataManager.wait_check.insert("lyric", songIds);
await dataManager.wait_check.insert("comment", songIds);
// 插入关联关系
if (albumId > 0) {
let songAlbumRel = songIds.map(songId => [songId, albumId]);
await dataManager.song_album.insertCollection(songAlbumRel);
}
// 插入数据
if (update) {
await dataManager.album.update(albumId, albumInfo);
} else {
await dataManager.album.insert(albumInfo);
}
// 从待爬取表中删除记录
await dataManager.wait_fetch.deleteCollection("album", [albumId]);
}
module.exports = {
fetch: fetch,
fetchAll: fetchAll,
}

View File

@@ -0,0 +1,119 @@
const fs = require('fs');
const path = require('path');
const requestUtils = require('../../../utils/requestUtils');
const sleepUtils = require('../../../utils/sleepUtils');
const dataManager = require('../dataManager');
const dbUtils = global.dbUtils;
// 从数据库中查出还缺少的歌手,并进行爬取
async function fetchAll({ args = {} }) {
console.log("start fetching artists ...");
let artistIds = await dataManager.artist.getIdsToFetch(args);
console.log(`artistIds was fetched, count: ${artistIds.length}`);
for (let i = 0; i < artistIds.length; i++) {
await global.checkIsExit();
const artistId = artistIds[i];
console.log(`${i + 1}/${artistIds.length} | artist: ${artistId} | ${args.min || "?"}-${args.max || "?"}`);
try {
await fetch({ artistId: artistId });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(global.sleepTime);
}
}
// 获取音乐人详情
async function fetch({ artistId, debug = false }) {
let result = await dbUtils.query('SELECT count(*) as count FROM artist WHERE artist_id = ?', [artistId]);
if (result[0].count > 0 && !debug) {
console.log(`数据库中已有数据,跳过 artistId: ${artistId}`);
// 从待爬取表中删除记录
await dataManager.wait_fetch.deleteCollection("artist", [artistId]);
return;
}
let url = `https://music.163.com/artist?id=${artistId}`;
try {
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `artist-${artistId}.html`), 'utf8');
var html = await requestUtils.getApiResult(url, { timeout: 3000 });
// fs.writeFileSync(path.join(__dirname, "../../temp", `artist-${artistId}.html`), html);
} catch (errors) {
console.error(errors);
return;
}
if (html.includes(`<p class="note s-fc3">很抱歉,你要查找的网页找不到</p>`)) {
// let deleteResult = await dbUtils.query('DELETE FROM song_artist_relation WHERE artist_id = ?', [artistId]);
// console.log(`artist: ${artistId} 不存在从song_artist_relation表中删除. affectedRows: ${deleteResult.affectedRows}`);
console.log(`artist: ${artistId} 不存在,插入空数据`);
dbUtils.query('INSERT IGNORE INTO artist SET ?', {
artist_id: artistId,
title: '',
description: '',
image: '',
pub_date: '',
});
return;
}
// 正则匹配
let artistInfoDict;
let regExResult = /\<script type\=\"application\/ld\+json\"\>([\S\s]*?)\<\/script\>/.exec(html);
if (regExResult) {
let artistInfoJSONString = regExResult[1];
artistInfoDict = JSON.parse(artistInfoJSONString);
} else {
// 极少数artist没有 <script type="application/ld+json">......</script> 这块内容
// 例如2948847329717445。这些artist都没有标题所以下面这三项就直接保留空字符串
artistInfoDict = {
"title": "",
"description": "",
"pubDate": ""
}
}
// console.log(artistInfoDict);
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
let songIds = [];
try {
let songListJSONString = /<textarea id="song-list-pre-data" style="display:none;">(.*?)<\/textarea>/.exec(html)[1];
let songList = JSON.parse(songListJSONString);
songIds = songList.map(song => Number(song.id));
} catch (error) {
// 可能是歌手下面没有音乐 例如https://music.163.com/#/artist?id=30032762
}
let artistInfo = {
artist_id: artistId,
title: artistInfoDict.title,
image: image,
description: artistInfoDict.description,
pub_date: artistInfoDict.pubDate
};
// console.log("artistInfo", artistInfo);
// 插入待爬取表
await dataManager.wait_check.insert("song", songIds);
await dataManager.wait_check.insert("lyric", songIds);
await dataManager.wait_check.insert("comment", songIds);
// 插入关联关系
if (artistId > 0) {
let songArtistRel = songIds.map(songId => [songId, artistId]);
await dataManager.song_artist.insertCollection(songArtistRel);
}
// 插入数据
await dataManager.artist.insert(artistInfo);
// 从待爬取表中删除记录
await dataManager.wait_fetch.deleteCollection("artist", [artistId]);
}
module.exports = {
fetch: fetch,
fetchAll: fetchAll,
}

View File

@@ -0,0 +1,191 @@
const fs = require('fs');
const path = require('path');
const sleepUtils = require('../../../utils/sleepUtils');
const dataManager = require('../dataManager');
const dbUtils = global.dbUtils;
// refer:
// https://neteasecloudmusicapi-docs.4everland.app/
// https://github.com/Binaryify/NeteaseCloudMusicApi
const { comment_music } = require('NeteaseCloudMusicApi');
async function fetchAll({ args = {} }) {
console.log("start fetching comment ...");
// // 首先将需要爬取的song_id导入comment_progress表
// await dbUtils.query(`
// INSERT IGNORE INTO comment_progress ( song_id )
// SELECT song_id FROM wait_fetch_comment WHERE song_id NOT IN ( SELECT song_id FROM comment_progress )
// `, []);
let songIds = await dataManager.comment.getIdsToFetch(args);
console.log(`songIds was fetched, count: ${songIds.length}`);
if (songIds.length == 0) {
console.log("完成!");
process.exit(0);
}
for (let i = 0; i < songIds.length; i++) {
await global.checkIsExit();
const songId = songIds[i];
console.log(`${i + 1}/${songIds.length} | comment: ${songId} | ${args.min || "?"}-${args.max || "?"}`);
try {
await fetch({ songId: songId });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(global.sleepTime);
}
}
// 获取评论详情
async function fetch({ songId }) {
// 首先查询有无正在爬取中的记录
var commentProgress = await dbUtils.query(`
SELECT * FROM comment_progress WHERE song_id = ? and current_status != 2 LIMIT 1
`, [songId]);
if (commentProgress.length == 0) {
console.log('No commentProgress found, song_id:', songId);
return;
}
var item = commentProgress[0];
var progress = {
maxTime: item.max_time,
minTime: item.min_time,
currentTime: item.current_time,
currentStatus: item.current_status,
total: item.total,
};
// https://neteasecloudmusicapi-docs.4everland.app/#/?id=%e6%ad%8c%e6%9b%b2%e8%af%84%e8%ae%ba
var queryParams = {
id: songId,
limit: 20,
// before: undefined,
};
if (progress.currentTime != 0)
queryParams.before = progress.currentTime;
let isFinish = false; let pageCount = 0;
while (!isFinish) {
await global.checkIsExit();
console.log(`comment: ${songId}, page: ${++pageCount}, currentTime: ${progress.currentTime} | ${new Date(progress.currentTime + 8 * 3600 * 1000).toISOString().replace("T", " ").replace("Z", "")}`);
// 是否是第一页
let isFirstPage = progress.currentStatus === 0;
try {
// console.log(progress, queryParams);
// console.log("await comment_music")
var commentResult = await comment_music(queryParams);
// console.log("finish await comment_music")
// fs.writeFileSync(path.join(__dirname, "../../temp", `comment-${songId}-${pageCount}.json`), JSON.stringify(commentResult));
} catch (errors) {
console.error("error", errors);
await sleepUtils.sleep(10 * 1000);
continue;
}
var topComments = commentResult.body.topComments || [];
var hotComments = commentResult.body.hotComments || [];
var comments = commentResult.body.comments || [];
var commentInfoList = [
...topComments.map(comment => getCommitInfoForInsert(songId, comment, 2)),
...hotComments.map(comment => getCommitInfoForInsert(songId, comment, 1)),
...comments.map(comment => getCommitInfoForInsert(songId, comment, 0))
];
var userInfoList = [...topComments, ...hotComments, ...comments]
.map(comment => comment.user).filter(user => !!user).map(getUserInfoForInsert);
// console.log(commentInfoList);
// console.log(userInfoList);
console.log("dataManager.comment.insertCollection & dataManager.user.insertCollection")
let p1 = dataManager.comment.insertCollection(commentInfoList.map(commentInfo => [
commentInfo.comment_id,
commentInfo.parent_comment_id,
commentInfo.user_id,
commentInfo.song_id,
commentInfo.content,
commentInfo.time,
commentInfo.like_count,
commentInfo.comment_type
]));
let p2 = dataManager.user.insertCollection(userInfoList.map(userInfo => [
userInfo.user_id,
userInfo.user_type,
userInfo.nickname,
userInfo.avatar_url,
]));
await Promise.all([p1, p2])
// console.log("INSERT comment and user finished");
// console.log(commentResult.body.more, comments.length, commentInfoList.length);
// 判断是否还有下一页
if (commentResult.body.more && comments.length > 0) {
// 更新 progress
progress.currentTime = comments[comments.length - 1].time;
if (progress.maxTime == progress.minTime) { // minTime = maxTime 代表这是本轮爬取的第一次
progress.maxTime = comments[0].time;
}
progress.currentStatus = 1; // 0-等待爬取/增量爬取 1-爬取中 2-完成
// 更新 queryParams
queryParams.before = progress.currentTime;
progress.total = commentResult.body.total;
} else {
isFinish = true;
console.log(`comment: ${songId} done\n`);
progress.currentStatus = 2; // 0-等待爬取/增量爬取 1-爬取中 2-完成
if (progress.maxTime == 0) { // 第一次爬取 且 没有分页的情况
progress.maxTime = comments[0] ? (comments[0].time || 0) : 0;
}
progress.minTime = progress.maxTime; // minTime = maxTime 代表这一轮爬取完成了
progress.currentTime = progress.maxTime; // 可有可无
}
// progress更新到数据库中
let commentProgressInfo = {
max_time: progress.maxTime,
min_time: progress.minTime,
current_time: progress.currentTime,
current_status: progress.currentStatus,
total: progress.total,
};
console.log("dataManager.comment_progress.update")
let p3 = dataManager.comment_progress.update(commentProgressInfo, songId);
await p3
// console.log("UPDATE comment_progress");
// console.log("sleepUtils.sleep")
// await sleepUtils.sleep(global.sleepTime);
}
}
function getCommitInfoForInsert(songId, comment, commentType) {
return {
comment_id: comment.commentId,
parent_comment_id: comment.parentCommentId,
user_id: comment.user ? comment.user.userId : null,
song_id: songId,
content: comment.content || "", // 有些 comment 的 content 为 null
time: comment.time,
like_count: comment.likedCount,
comment_type: commentType, // 评论类型 0-comments 1-hotComments 2-topComments
}
}
function getUserInfoForInsert(user) {
var shortAvatarUrlUrl = user.avatarUrl.match(/^http:\/\/p\d+\.music\.126\.net\/(.*?)$/);
shortAvatarUrlUrl = shortAvatarUrlUrl ? shortAvatarUrlUrl[1] : user.avatarUrl;
return {
user_id: user.userId,
user_type: user.userType,
nickname: user.nickname,
avatar_url: shortAvatarUrlUrl || user.avatarUrl,
}
}
module.exports = {
fetchAll: fetchAll,
fetch: fetch,
}

View File

@@ -0,0 +1,87 @@
const fs = require('fs');
const path = require('path');
const requestUtils = require('../../../utils/requestUtils');
const sleepUtils = require('../../../utils/sleepUtils');
const dataManager = require('../dataManager');
const dbUtils = global.dbUtils;
// 从数据库中查出还缺少的歌词,并进行爬取
async function fetchAll({ args = {} }) {
console.log("start fetching lyrics ...");
let songIds = await dataManager.lyric.getIdsToFetch(args);
console.log(`songIds was fetched, count: ${songIds.length}`);
if (songIds.length == 0) {
console.log("完成!");
process.exit(0);
}
for (let i = 0; i < songIds.length; i++) {
await global.checkIsExit();
const songId = songIds[i];
console.log(`${i + 1}/${songIds.length} | lyric: ${songId} | ${args.min || "?"}-${args.max || "?"}`);
try {
await fetch({ songId: songId });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(global.sleepTime);
}
}
// 获取歌词详情
async function fetch({ songId, debug = false }) {
let result = await dbUtils.query('SELECT count(*) as count FROM lyric WHERE song_id = ?', [songId]);
if (result[0].count > 0 && !debug) {
// 这里暂时跳过后期可能要考虑歌词version更新的问题
console.log(`数据库中已有数据,跳过 songId: ${songId}`);
// 从待爬取表中删除记录
await dataManager.wait_fetch.deleteCollection("lyric", [songId]);
return;
}
var url = `https://music.163.com/api/song/lyric?id=${songId}&lv=1`; // &kv=1&tv=-1
try {
// var json = fs.readFileSync(path.join(__dirname, "../../temp", `lyric-${songId}.json`), 'utf8');
var json = await requestUtils.getApiResult(url);
// fs.writeFileSync(path.join(__dirname, "../../temp", `lyric-${songId}.json`), json);
} catch (errors) {
console.error(errors);
return;
}
try {
var lyric = JSON.parse(json).lrc; // { version: xx, lyric: 'xxx' }
} catch (error) {
console.error(error);
return;
}
if (typeof lyric == "undefined") {
// 这首歌爬song的时候还在但是现在不在了
await dataManager.lyric.insert({
song_id: songId,
lyric: '',
version: -1,
});
return;
}
let lyricInfo = {
song_id: songId,
lyric: lyric.lyric,
version: lyric.version,
};
// console.log("lyricInfo", lyricInfo);
// 插入数据
await dataManager.lyric.insert(lyricInfo);
// 从待爬取表中删除记录
await dataManager.wait_fetch.deleteCollection("lyric", [songId]);
}
module.exports = {
fetch: fetch,
fetchAll: fetchAll,
}

View File

@@ -0,0 +1,73 @@
const fs = require('fs');
const path = require('path');
const sleepUtils = require('../../../utils/sleepUtils');
const dataManager = require('../dataManager');
const dbUtils = global.dbUtils;
const oldPlaylistUtils = require('./playlistUtils_old');
// refer:
// https://neteasecloudmusicapi-docs.4everland.app/
// https://github.com/Binaryify/NeteaseCloudMusicApi
const { top_playlist, top_playlist_highquality, related_playlist } = require('NeteaseCloudMusicApi');
/**
* 这里有几种情况:
*
* top_playlist 并保存相关歌单信息只有id
* top_playlist_highquality 并保存相关歌单信息只有id
* 还有就是从数据库中查出的待爬取歌单详情 并保存相关歌单信息只有id
*/
async function fetchTop({ args = {} }) {
console.log("start fetching playlist ...");
let limit = 50; // 默认为 50
let page = 0;
let hasMore = true;
while (hasMore) {
try {
// https://neteasecloudmusicapi-docs.4everland.app/#/?id=%e6%ad%8c%e5%8d%95-%e7%bd%91%e5%8f%8b%e7%b2%be%e9%80%89%e7%a2%9f-
var result = await top_playlist({ id: 2 });
// https://neteasecloudmusicapi-docs.4everland.app/#/?id=%e8%8e%b7%e5%8f%96%e7%b2%be%e5%93%81%e6%ad%8c%e5%8d%95
// var result = await top_playlist_highquality({});
// 调用此接口,传入歌单 id 可获取相关歌单
// https://neteasecloudmusicapi-docs.4everland.app/#/?id=%e7%9b%b8%e5%85%b3%e6%ad%8c%e5%8d%95%e6%8e%a8%e8%8d%90
var result = await related_playlist({ id: 809945533 });
// fs.writeFileSync(path.join(__dirname, "../../temp", `test.json`), JSON.stringify(result.body));
} catch (errors) {
console.error("error", errors);
await sleepUtils.sleep(10 * 1000);
}
// 针对每一个歌单调用相关歌单接口,然后加入待爬取歌单
hasMore = result.more;
let playlists = result.body.playlists;
// console.log(playlists);
for (let i in playlists) {
await global.checkIsExit();
const playlist = playlists[i];
const playlistId = playlist.id;
console.log(`${Number(i) + 1}/${playlists.length} | playlist: ${playlistId} | limit: ${limit}, page: ${page}`);
try {
await oldPlaylistUtils.fetch({ playlistId: playlist.id })
} catch (err) {
console.error(err);
}
// await sleepUtils.sleep(global.sleepTime);
}
// 更新 is_fetched_related_playlist 字段
}
}
module.exports = {
fetchTop: fetchTop,
}

View File

@@ -0,0 +1,188 @@
const fs = require('fs');
const path = require('path');
const requestUtils = require('../../../utils/requestUtils');
const sleepUtils = require('../../../utils/sleepUtils');
const dbUtils = global.dbUtils;
// refer:
// https://neteasecloudmusicapi-docs.4everland.app/
// https://github.com/Binaryify/NeteaseCloudMusicApi
const { playlist_detail } = require('NeteaseCloudMusicApi');
const dataManager = require('../dataManager');
async function fetchAll() {
// 睡眠时间设置长一些不然容易触发500错误
global.sleepTime = Math.max(1000, global.sleepTime);
console.log("global.sleepTime", global.sleepTime);
console.log("playlist 需要一口气爬完,中途不能停止,否则下次又要重头爬(歌单不会重复爬取,但是分页列表会)");
console.log("start fetching playlists ...");
// 从数据库中查出所有的网易云分类
let result = await dbUtils.query(`SELECT title FROM category WHERE netease_group_chinese IS NOT NULL`);
cate = result.map(cate => cate.title);
cate.unshift('全部'); // 插入第一个
console.log(cate);
for (let i = 0; i < cate.length; i++) {
const categoryName = cate[i];
try {
console.log(`开始爬取分类:${categoryName}(i=${i})`);
await fetchCategory({ categoryName: `${categoryName}`, progress: `${i + 1}/${cate.length}` });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(global.sleepTime);
}
console.log("爬取完毕");
await sleepUtils.sleep(2000);
process.exit(0);
}
async function fetchCategory({ categoryName, progress }) {
// 首先去网易云音乐首页获得歌单 (每一首音乐右侧都会有几个包含该音乐的歌单)
let haveNext = true;
let perPage = 35;
let offset = 0;
while (haveNext) {
let url = `https://music.163.com/discover/playlist?cat=${encodeURIComponent(categoryName)}&limit=${perPage}&offset=${offset}`;
try {
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `discover-playlist.html`), 'utf8');
var html = await requestUtils.getApiResult(url);
// fs.writeFileSync(path.join(__dirname, "../../temp", `discover-playlist.html`), html);
var matcher = html.matchAll(/"\/playlist\?id=(\d{1,20})"/g);
var m = matcher.next();
var a = new Set(); // 因为每个歌单id会出现两次所以使用Set去重
while (!m.done) {
a.add(Number(m.value[1]));
m = matcher.next();
}
var playlistIds = Array.from(a).sort();
} catch (errors) {
console.error(errors);
return;
}
if (playlistIds.length > 0) {
// 从数据库查出已爬取的歌单ids并从 playlistIds 中排除这部分歌单
var exceptPlaylistIds = await dbUtils.query(`
SELECT playlist_id FROM playlist WHERE playlist_id IN ?
`, [[playlistIds]]);
exceptPlaylistIds = exceptPlaylistIds.map(playlist => playlist.playlist_id);
var finalPlaylistIds = playlistIds.filter(playlistId => exceptPlaylistIds.indexOf(playlistId) == -1);
// console.log("playlistIds", playlistIds);
// console.log("exceptPlaylistIds", exceptPlaylistIds);
// console.log("finalPlaylistIds", finalPlaylistIds);
console.log(categoryName, "offset", offset, "playlistIds.length", playlistIds.length, "finalPlaylistIds.length", finalPlaylistIds.length);
// console.log(url);
for (let i = 0; i < finalPlaylistIds.length; i++) {
await global.checkIsExit();
const playlistId = finalPlaylistIds[i];
// console.log(offset, i, finalPlaylistIds.length);
console.log(`分类: ${progress} | 歌单: ${offset + i + 1}/${offset + finalPlaylistIds.length} | playlist: ${playlistId}`);
try {
await fetch({ playlistId: playlistId });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(global.sleepTime);
}
} else {
console.log("失败url", url);
fs.writeFileSync(path.join(__dirname, "../../temp", `[error]discover-playlist.html`), html);
await dbUtils.query('INSERT INTO log (`id`, `name`, `msg`) VALUES (?, ?, ?)', [0, 'playlist_fetch', `失败 可能是爬太快了\n${url}`]);
await sleepUtils.sleep(40 * 1000); // 等待40s再继续爬取
continue;
}
// 最有一页判断标识
if (html.indexOf(`class="zbtn znxt js-disabled">下一页</a>`) > -1) haveNext = false;
offset += perPage;
await sleepUtils.sleep(global.sleepTime);
}
}
// 获取歌词详情
async function fetch({ playlistId, debug = false }) {
let result = await dbUtils.query('SELECT count(*) as count FROM playlist WHERE playlist_id = ?', [playlistId]);
if (result[0].count > 0 && !debug) {
console.log(`数据库中已有数据,跳过 playlistId: ${playlistId}`);
return;
}
// https://neteasecloudmusicapi-docs.4everland.app/#/?id=%e6%ad%8c%e5%8d%95%e5%88%86%e7%b1%bb
try {
// 获取歌单分类
// var playlistResult = await playlist_catlist({});
// var playlistResult = await playlist_hot({});
var playlistResult = await playlist_detail({
id: playlistId,
});
// fs.writeFileSync(path.join(__dirname, "../../temp", `playlist-${playlistId}.json`), JSON.stringify(playlistResult));
} catch (errors) {
console.error(errors);
return;
}
let playlist = playlistResult.body.playlist;
// console.log("playlist", playlist);
let playlistInfo = {
playlist_id: playlist.id,
title: playlist.name,
english_title: playlist.englishTitle,
description: playlist.description,
user_id: playlist.userId,
tags: JSON.stringify(playlist.tags),
alg_tags: JSON.stringify(playlist.algTags),
playlist_create_time: playlist.createTime,
playlist_update_time: playlist.updateTime,
track_count: playlist.trackCount,
play_count: playlist.playCount,
subscribed_count: playlist.subscribedCount,
share_count: playlist.shareCount,
comment_count: playlist.commentCount,
cover_image: playlist.coverImgUrl ? /^https?:\/\/p.\.music\.126\.net\/(.*?)$/.exec(playlist.coverImgUrl)[1] : '',
title_image: playlist.titleImageUrl ? /^https?:\/\/p.\.music\.126\.net\/(.*?)$/.exec(playlist.titleImageUrl)[1] : '',
background_cover: playlist.backgroundCoverUrl ? /^https?:\/\/p.\.music\.126\.net\/(.*?)$/.exec(playlist.backgroundCoverUrl)[1] : '',
ordered: playlist.ordered,
copied: playlist.copied,
status: playlist.status,
privacy: playlist.privacy,
ad_type: playlist.adType,
special_type: playlist.specialType,
official_playlist_type: playlist.officialPlaylistType,
op_recommend: playlist.opRecommend,
high_quality: playlist.highQuality,
new_imported: playlist.newImported,
update_frequency: playlist.updateFrequency,
grade_status: playlist.gradeStatus,
score: playlist.score,
creator: JSON.stringify(playlist.creator),
video_ids: JSON.stringify(playlist.videoIds),
videos: JSON.stringify(playlist.videos),
banned_track_ids: JSON.stringify(playlist.bannedTrackIds),
remix_video: JSON.stringify(playlist.remixVideo),
};
// console.log("playlistInfo", playlistInfo);
if (playlist.bannedTrackIds) {
console.log("bannedTrackIds", playlist.bannedTrackIds);
process.exit(0);
}
await dataManager.wait_check.insert("song", playlist.trackIds.map(track => track.id));
let trackIds = playlist.trackIds.map(track => [track.id, playlist.id, track.alg]);
await dataManager.song_playlist.insertCollection(trackIds);
await dataManager.playlist.insertCollection(playlistInfo);
}
module.exports = {
fetch: fetch,
fetchAll: fetchAll,
}

View File

@@ -0,0 +1,195 @@
const fs = require('fs');
const path = require('path');
const requestUtils = require('../../../utils/requestUtils');
const sleepUtils = require('../../../utils/sleepUtils');
const { fill } = require('../../../utils/stringUtils');
const dataManager = require('../dataManager');
const dbUtils = global.dbUtils;
const { song_detail } = require('NeteaseCloudMusicApi');
// 从数据库中查出还缺少的歌曲,并进行爬取
async function fetchAll({ args = {} }) {
console.log("start fetching songs ...");
let songIds = await dataManager.song.getIdsToFetch(args);
console.log(`songIds was fetched, count: ${songIds.length}`);
// 0 - 100, 200 - 399, 400 - ..., ... - songIds.length-1
// 0 1 2 count-1
var step = 1000;
var total = songIds.length;
var count = Math.ceil(total / step);
for (let i = 0; i < count; i++) {
await global.checkIsExit();
var subArray = songIds.slice(i * step, (i + 1) * step);
console.log(`${i + 1}/${count} | song: ${fill(subArray[0], 10)}-${fill(subArray.slice(-1)[0], 10)} ${fill(`(${subArray.length})`, 6, ' ', true)} | ${args.min || "?"}-${args.max || "?"}`);
// if (subArray.length < 800) {
// console.log("小于800首歌等待凑够800首歌下次一起爬取");
// return;
// }
try {
await fetch({ songIdArray: subArray });
} catch (err) {
console.error(err);
}
await sleepUtils.sleep(global.sleepTime);
}
}
// 获取音乐详情
async function fetch({ songIdArray, debug = false }) {
// https://neteasecloudmusicapi-docs.4everland.app/#/?id=%e8%8e%b7%e5%8f%96%e6%ad%8c%e6%9b%b2%e8%af%a6%e6%83%85
try {
// 每一次大概可以取到1000条以上
var songResult = await song_detail({ ids: songIdArray.join(',') });
// fs.writeFileSync(path.join(__dirname, "../../temp", `song-${songIdArray[0]}-${songIdArray[songIdArray.length - 1]}.json`), JSON.stringify(songResult));
} catch (errors) {
console.error(errors);
return;
}
// console.log(songResult.body.songs.map(item => JSON.stringify(item)));
let albumIds = [], artistIds = [];
let songAlbumRel = [], songArtistRel = [];
let songInfoList = songResult.body.songs.map(song => {
song.ar.forEach(item => {
artistIds.push(item.id);
songArtistRel.push([song.id, item.id])
});
albumIds.push(song.al.id || 0);
songAlbumRel.push([song.id, song.al.id || 0])
return {
title: song.name || "", // 歌曲标题
id: song.id, // 歌曲ID
type: song.t, // 0: 一般类型 1: 通过云盘上传的音乐,网易云不存在公开对应 2: 通过云盘上传的音乐,网易云存在公开对应
alias: JSON.stringify(song.alia), // 别名列表,第一个别名会被显示作副标题
pop: song.pop, // 小数,常取[0.0, 100.0]中离散的几个数值, 表示歌曲热度
fee: song.fee, // 版权 0: 免费或无版权 1: VIP 歌曲 4: 购买专辑 8: 非会员可免费播放低音质,会员可播放高音质及下载 fee 为 1 或 8 的歌曲均可单独购买 2 元单曲
duration: song.dt, // 歌曲时长
quality: JSON.stringify({ h: song.h, m: song.m, l: song.l, sq: song.sq }), // 高/中/低/无损质量文件信息
version: song.version, // 歌曲版本信息
cd: song.cd || "", // None或如"04", "1/2", "3", "null"的字符串表示歌曲属于专辑中第几张CD对应音频文件的Tag
no: song.no, // 表示歌曲属于CD中第几曲0表示没有这个字段对应音频文件的Tag
djId: song.djId, // 0: 不是DJ节目 其他是DJ节目表示DJ ID
sId: song.s_id, // 对于t == 2的歌曲表示匹配到的公开版本歌曲ID
originCoverType: song.originCoverType, // 0: 未知 1: 原曲 2: 翻唱
image: "",
pubTime: song.publishTime, // 毫秒为单位的Unix时间戳
mv: song.mv, // 非零表示有MV ID
single: song.single, // 0: 有专辑信息或者是DJ节目 1: 未知专辑
noCopyrightRcmd: song.noCopyrightRcmd ? JSON.stringify(song.noCopyrightRcmd) : "", // None表示可以播非空表示无版权
artist: song.ar.map(item => item.id), // 歌手列表
album: song.al.id || 0, // 专辑如果是DJ节目(dj_type != 0)或者无专辑信息(single == 1)则专辑id为0
};
});
// console.log("songAlbumRel, songArtistRel", songAlbumRel, songArtistRel);
// console.log("songInfoList", songInfoList);
if (songInfoList.length == 0) return;
console.log("插入数据库");
// 插入待爬取表
await dataManager.wait_check.insert("album", albumIds);
await dataManager.wait_check.insert("artist", artistIds);
// 插入关联关系
await dataManager.song_album.insertCollection(songAlbumRel);
await dataManager.song_artist.insertCollection(songArtistRel);
// 插入数据
await dataManager.song.insertCollection(songInfoList); // image 因为接口没有返回,所以不更新
// 从待爬取表中删除记录
await dataManager.wait_fetch.deleteCollection("song", songIdArray);
}
// 获取音乐详情
async function fetch_old({ songId, debug = false }) {
let result = await dbUtils.query('SELECT count(*) as count FROM song WHERE song_id = ?', [songId]);
if (result[0].count > 0 && !debug) {
console.log(`数据库中已有数据,跳过 songId: ${songId}`);
return;
}
let url = `https://music.163.com/song?id=${songId}`;
try {
// var html = fs.readFileSync(path.join(__dirname, "../../temp", `song-${songId}.html`), 'utf8');
var html = await requestUtils.getApiResult(url);
// fs.writeFileSync(path.join(__dirname, "../../temp", `song-${songId}.html`), html);
} catch (errors) {
console.error(errors);
return;
}
if (html.includes(`<p class="note s-fc3">很抱歉,你要查找的网页找不到</p>`)) {
let deleteResult1 = await dbUtils.query('DELETE FROM song_album_relation WHERE song_id = ?', [songId]);
let deleteResult2 = await dbUtils.query('DELETE FROM song_artist_relation WHERE song_id = ?', [songId]);
console.log(`song: ${songId} 不存在从song_album_relation, song_artist_relation表中删除. affectedRows: ${deleteResult1.affectedRows}, ${deleteResult2.affectedRows}`);
return;
}
// 正则匹配
let regExResult = /\<script type\=\"application\/ld\+json\"\>([\S\s]*?)\<\/script\>/.exec(html);
let songInfoJSONString = regExResult[1];
let songInfoDict = JSON.parse(songInfoJSONString);
// console.log(songInfoDict);
// TODO 考虑歌曲别名 例如https://music.163.com/#/song?id=26830207
let title = /<meta property="og:title" content="(.*?)" \/>/.exec(html)[1];
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
let artist = /<meta property="og:music:artist" content="(.*?)" \/>/.exec(html)[1];
let duration = /<meta property="music:duration" content="(.*?)"\/>/.exec(html)[1];
try {
var album = /<meta property="og:music:album" content="(.*?)"\/>/.exec(html)[1];
var albumId = /<meta property="music:album" content="https:\/\/music\.163\.com\/album\?id=(.*?)"\/>/.exec(html)[1];
} catch (err) {
// 歌曲不在专辑中
}
const reg = /<meta property="music:musician" content="https:\/\/music\.163\.com\/artist\?id=(.*?)"\/>/g;
let artistIds = [];
let matched = null;
while ((matched = reg.exec(html)) !== null) {
artistIds.push(matched[1]);
}
let songInfo = {
songId: songId,
title: title,
image: image,
pubDate: songInfoDict.pubDate,
artist: artist,
artistIds: artistIds,
album: album || null,
albumId: albumId || null,
duration: duration,
};
// console.log("songInfo", songInfo);
if (albumId != null)
dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', {
song_id: songInfo.songId,
album_id: songInfo.albumId,
});
artistIds.forEach(function (artistId) {
dbUtils.query('INSERT IGNORE INTO song_artist_relation SET ?', {
song_id: songInfo.songId,
artist_id: artistId,
});
});
dbUtils.query('INSERT IGNORE INTO song SET ?', {
song_id: songInfo.songId,
title: songInfo.title,
image: songInfo.image,
pub_date: songInfo.pubDate,
});
return songInfo;
}
module.exports = {
fetch: fetch,
fetchAll: fetchAll,
}

View File

@@ -0,0 +1,29 @@
const fs = require('fs');
const path = require('path');
const sleepUtils = require('../../../utils/sleepUtils');
const dbUtils = global.dbUtils;
// refer:
// https://neteasecloudmusicapi-docs.4everland.app/
// https://github.com/Binaryify/NeteaseCloudMusicApi
const { top_playlist, top_playlist_highquality, related_playlist } = require('NeteaseCloudMusicApi');
// 获取歌词详情
async function fetch() {
try {
var result = await related_playlist({
id: 1
});
fs.writeFileSync(path.join(__dirname, "../../temp", `test.json`), JSON.stringify(result.body));
} catch (errors) {
console.error("error", errors);
await sleepUtils.sleep(10 * 1000);
}
console.log(result);
}
module.exports = {
fetch: fetch,
}

View File

@@ -0,0 +1,24 @@
const fs = require('fs');
const path = require('path');
const requestUtils = require('../../../utils/requestUtils');
const sleepUtils = require('../../../utils/sleepUtils');
const dbUtils = global.dbUtils;
// 获取用户详情
async function fetch({ userId, debug = false }) {
let url = `https://music.163.com/user/home?id=${userId}`;
try {
var html = fs.readFileSync(path.join(__dirname, "../../temp", ` user-${userId}.html`), 'utf8');
} catch (errors) {
var html = await requestUtils.getApiResult(url);
fs.writeFileSync(path.join(__dirname, "../../temp", ` user-${userId}.html`), html);
}
}
module.exports = {
fetch: fetch,
}

266
netease_music/src/index.js Normal file
View File

@@ -0,0 +1,266 @@
// 引入modules
const fs = require('fs');
const path = require('path');
const dbUtils = require(`../../utils/${global.useMysqlPool ? 'dbPoolUtils' : 'dbUtils'}`);
const sleepUtils = require('../../utils/sleepUtils');
// 数据库连接池
dbUtils.create({
database: global.database || "neteasemusic", // 指定数据库
connectionLimit: global.connectionLimit || 10, // 设置数据库连接池数量
});
global.dbUtils = dbUtils;
console.log("global.useMysqlPool:", !!global.useMysqlPool);
// 引入utils
const songInfoUtils = require('./getInfo/songInfoUtils');
const artistInfoUtils = require('./getInfo/artistInfoUtils');
const albumInfoUtils = require('./getInfo/albumInfoUtils');
const lyricInfoUtils = require('./getInfo/lyricInfoUtils');
const commentUtils = require('./getInfo/commentUtils');
const playlistUtilsOld = require('./getInfo/playlistUtils_old');
const playlistUtils = require('./getInfo/playlistUtils');
const assistantUtils = require('./assistantUtils');
const testUtils = require('./getInfo/testUtils');
/**
* 测试
*/
async function test() {
console.log("neteaseMusic test...");
// 不是所有歌手都有个人主页 例如 https://music.163.com/#/artist?id=1079075
// let res = await playlistUtils.fetchTop();
// let res = await songInfoUtils.fetchAll({ args: {} });
// let res = await playlistUtilsOld.fetchAll();
// let res = await playlistUtils.fetchAll({ args: {} });
// let res = await albumInfoUtils.fetch({ albumId: "9156", debug: true });
// let res = await artistInfoUtils.fetch({ artistId: "12023508" });
// let res = await songInfoUtils.fetch({ songId: "437608327" });
// let res = await playlistUtilsOld.fetch({ playlistId: "4980157066", debug: true });
// let res = await commentUtils.fetch({ songId: "4980157066" });
// let res = await lyricInfoUtils.fetch({ songId: "569200213" });
// let res = await testUtils.fetch();
// console.log(res);
}
/**
* 主函数
*/
async function main(args) {
console.log("neteaseMusic Start fetch ...");
const cycle = false // 是否循环
while (true) {
// // 删除脏数据
// var affectedRows1 = await dbUtils.query(`DELETE FROM song_artist_relation WHERE song_id = 0 OR artist_id = 0`, []);
// var affectedRows2 = await dbUtils.query(`DELETE FROM song_album_relation WHERE song_id = 0 OR album_id = 0`, []);
// console.log(`删除脏数据 affectedRows:`, affectedRows1.affectedRows, affectedRows2.affectedRows);
switch (args.utils) {
case 'song':
await songInfoUtils.fetchAll({ args: args });
await sleepUtils.sleep(60 * 1000);
break;
case 'album':
await albumInfoUtils.fetchAll({ args: args });
await sleepUtils.sleep(30 * 1000);
break;
case 'artist':
await artistInfoUtils.fetchAll({ args: args });
await sleepUtils.sleep(30 * 1000);
break;
case 'lyric': // 执行完就退出
await lyricInfoUtils.fetchAll({ args: args });
// await sleepUtils.sleep(30 * 1000);
break;
case 'comment': // 执行完就退出
await commentUtils.fetchAll({ args: args });
// await sleepUtils.sleep(30 * 1000);
break;
case 'playlist': // 执行完就退出
await playlistUtils.fetchTop({ args: args });
process.exit(0);
break;
case 'assistant':
await assistantUtils.updateWaitTable();
await sleepUtils.sleep(20 * 1000);
break;
default:
console.log("utils参数不匹配退出");
return;
}
}
}
/**
* 数据更新 (重新爬取)
*/
async function update() {
console.log("neteaseMusic Start update ...");
while (true) {
await albumInfoUtils.fetchAll({ isUpdate: true });
await sleepUtils.sleep(2000);
}
}
/**
* 统计数据库中数据
*/
let oldWatchParam = {};
async function watch() {
console.log(`开始统计 ... ${new Date(Date.now() + 8 * 3600 * 1000).toISOString()}`);
let statisticTime = Date.now();
let newWatchParam = {};
let sqls = [
// InnoDB count(*) 会扫描全表,粗略数据可以通过 show table status 查看
{
name: "songCount",
sql: `SELECT count(*) AS count FROM song`,
}, {
name: "songWaiting",
sql: `SELECT count(DISTINCT song_id) AS count
FROM ( SELECT song_id FROM song_artist_relation UNION SELECT song_id FROM song_album_relation ) t_tmp
WHERE song_id NOT IN ( SELECT song_id FROM song )`,
}, {
name: "playlistCount",
sql: `SELECT count(*) AS count FROM playlist`,
}, {
name: "albumCount",
sql: `SELECT count(*) AS count FROM album`,
}, {
name: "albumWaiting",
sql: `SELECT count( DISTINCT album_id ) as count FROM song_album_relation WHERE album_id NOT IN ( SELECT album_id FROM album )`,
}, {
name: "artistCount",
sql: `SELECT count(*) AS count FROM artist`,
}, {
name: "artistWaiting",
sql: `SELECT count( DISTINCT artist_id ) as count FROM song_artist_relation WHERE artist_id NOT IN ( SELECT artist_id FROM artist )`,
}, {
name: "lyricCount",
sql: `SELECT count(*) AS count FROM lyric`,
}, {
name: "commentCount",
sql: `SELECT count( DISTINCT song_id ) AS count FROM comment`,
}, {
name: "commentTotalCount",
sql: `SELECT count(*) AS count FROM comment`,
}, {
name: "userCount",
sql: `SELECT count(*) AS count FROM user`,
}, {
name: "songPlaylistCount",
sql: `SELECT count(*) AS count FROM song_playlist_relation`,
}, {
name: "songAlbumCount",
sql: `SELECT count(*) AS count FROM song_album_relation`,
}, {
name: "songArtistCount",
sql: `SELECT count(*) AS count FROM song_artist_relation`,
}
];
let sqlsTimeSpent = 0;
let promiseList = [];
for (let i = 0; i < sqls.length; i++) {
const sql = sqls[i];
if (!sql.sql) continue; // 跳过注释掉SQL的项
promiseList.push(new Promise(async (resolve, reject) => {
// console.log(`query ${sql.name} ...`);
let sqlStartTime = Date.now();
// let result = await dbUtils.query(sql.sql, []);
let result = await dbUtils.query(`SELECT \`value\` as count FROM analysis WHERE \`key\`='${sql.name}'`);
let sqlTimeSpent = Date.now() - sqlStartTime;
sqlsTimeSpent += sqlTimeSpent;
newWatchParam[sql.name] = result ? result[0].count : undefined; // result[0]?.count 兼容 node 12
console.log(`query ${sql.name} finished.\tspend time: ${sqlTimeSpent}ms (${(sqlTimeSpent / 1000).toFixed(2)}s),\tcount: ${newWatchParam[sql.name]}`);
resolve();
}));
}
await Promise.all(promiseList);
// let tableCountResult = await dbUtils.query("show table status");
// let tableCount = {}; // 查询近似值代替精确查询
// tableCountResult.forEach(rowData => tableCount[rowData.Name] = rowData.Rows);
// newWatchParam['commentTotalCount'] = tableCount['comment'];
let statisticTimeDelta = Date.now() - statisticTime;
let statisticsString = [
``,
`统计完成 ${new Date(Date.now() + 8 * 3600 * 1000).toISOString()}`,
`spend time: ${statisticTimeDelta}ms (${(statisticTimeDelta / 1000).toFixed(2)}s; ${(statisticTimeDelta / (60 * 1000)).toFixed(2)}min), sql query time (sum): ${sqlsTimeSpent}ms (${(sqlsTimeSpent / 1000).toFixed(2)}s; ${(sqlsTimeSpent / (60 * 1000)).toFixed(2)}min)`,
`[与上次运行统计时相比]`,
[
`song: ${newWatchParam['songCount'] - oldWatchParam['songCount']}`,
`playlist: ${newWatchParam['playlistCount'] - oldWatchParam['playlistCount']}`,
`album: ${newWatchParam['albumCount'] - oldWatchParam['albumCount']}`,
`artist: ${newWatchParam['artistCount'] - oldWatchParam['artistCount']}`,
`lyric: ${newWatchParam['lyricCount'] - oldWatchParam['lyricCount']}`,
`comment: ${newWatchParam['commentCount'] - oldWatchParam['commentCount']}(song)/${newWatchParam['commentTotalCount'] - oldWatchParam['commentTotalCount']}(comment)`,
`user: ${newWatchParam['userCount'] - oldWatchParam['userCount']}`,
].join(', '),
`[已爬取]`,
[
`song: ${newWatchParam['songCount']}`,
`playlist: ${newWatchParam['playlistCount']}`,
`album: ${newWatchParam['albumCount']}`,
`artist: ${newWatchParam['artistCount']}`,
`lyric: ${newWatchParam['lyricCount']}`,
`comment: ${newWatchParam['commentCount']}(song)/${newWatchParam['commentTotalCount']}(comment)`,
`user: ${newWatchParam['userCount']}`,
].join(', '),
`[待爬取]`,
[
`song: ${newWatchParam['songWaiting']}`,
`playlist: 未知`,
`album: ${newWatchParam['albumWaiting']}`,
`artist: ${newWatchParam['artistWaiting']}`,
`lyric: ${newWatchParam['songCount'] - newWatchParam['lyricCount']}`,
`comment: ${newWatchParam['songCount'] - newWatchParam['commentCount']}`,
`user: 未知`,
].join(', '),
`[总计] (已爬取 + 待爬取)`,
[
`song: ${newWatchParam['songCount'] + newWatchParam['songWaiting']}`,
`playlist: ${newWatchParam['playlistCount']}`,
`album: ${newWatchParam['albumCount'] + newWatchParam['albumWaiting']}`,
`artist: ${newWatchParam['artistCount'] + newWatchParam['artistWaiting']}`,
`lyric: ${newWatchParam['songCount']}`,
`comment: ${newWatchParam['songCount']}`,
`user: ${newWatchParam['userCount']}`,
].join(', '),
`[关联关系统计]`,
`song-playlist: ${newWatchParam['songPlaylistCount']}, song-album: ${newWatchParam['songAlbumCount']}, song-artist: ${newWatchParam['songArtistCount']}`,
``
].join('\n');
console.log(statisticsString);
oldWatchParam = newWatchParam;
}
/**
* 退出程序
*/
global.checkIsExit = async function () {
if (fs.readFileSync('stop.txt') != "1")
return;
console.log();
console.log(`收到退出指令,准备退出...`);
await sleepUtils.sleep(500);
await dbUtils.close();
console.log(`数据库连接池已关闭`);
await sleepUtils.sleep(100);
process.exit(0);
}
module.exports = {
main: main,
update: update,
watch: watch,
test: test,
}

View File

@@ -0,0 +1,28 @@
const fs = require('fs');
const path = require('path');
var html = fs.readFileSync(path.join(__dirname, 'get_cate_html.html'), 'utf8');
var htmlGroup = html.split('<dl class="f-cb">');
var rows = [];
htmlGroup.forEach(function (group) {
let title = group.match(/<\/i>(.*?)<\/dt>/);
if (!title) return; // 排除第一个 全部
title = title[1];
var matcher = group.matchAll(/data-cat="(.*?)"/g);
var m = matcher.next();
var cate = [];
while (!m.done) {
let category = m.value[1].replace(/&amp;/g, "&");
cate.push(category);
rows.push(`('${category}', '${title}')`);
m = matcher.next();
}
console.log({ title, cate });
});
console.log(`
INSERT INTO category (title, netease_group_chinese) VALUES ${rows.join(',')} ON DUPLICATE KEY UPDATE netease_group_chinese=VALUES(netease_group_chinese)
`);
return;

View File

@@ -0,0 +1,96 @@
<h3><a href="/discover/playlist/" class="j-flag u-btn u-btn-g s-fc1" data-cat="全部"><em>全部风格</em></a></h3>
<dl class="f-cb">
<dt><i class="u-icn u-icn-71"></i>语种</dt>
<dd >
<a class="s-fc1 " href="/discover/playlist/?cat=%E5%8D%8E%E8%AF%AD" data-cat="华语">华语</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E6%AC%A7%E7%BE%8E" data-cat="欧美">欧美</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E6%97%A5%E8%AF%AD" data-cat="日语">日语</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E9%9F%A9%E8%AF%AD" data-cat="韩语">韩语</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E7%B2%A4%E8%AF%AD" data-cat="粤语">粤语</a><span class="line">|</span>
</dd>
</dl>
<dl class="f-cb">
<dt><i class="u-icn u-icn-6"></i>风格</dt>
<dd >
<a class="s-fc1 " href="/discover/playlist/?cat=%E6%B5%81%E8%A1%8C" data-cat="流行">流行</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E6%91%87%E6%BB%9A" data-cat="摇滚">摇滚</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E6%B0%91%E8%B0%A3" data-cat="民谣">民谣</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E7%94%B5%E5%AD%90" data-cat="电子">电子</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E8%88%9E%E6%9B%B2" data-cat="舞曲">舞曲</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E8%AF%B4%E5%94%B1" data-cat="说唱">说唱</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E8%BD%BB%E9%9F%B3%E4%B9%90" data-cat="轻音乐">轻音乐</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E7%88%B5%E5%A3%AB" data-cat="爵士">爵士</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E4%B9%A1%E6%9D%91" data-cat="乡村">乡村</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=R%26B%2FSoul" data-cat="R&amp;B/Soul">R&amp;B/Soul</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E5%8F%A4%E5%85%B8" data-cat="古典">古典</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E6%B0%91%E6%97%8F" data-cat="民族">民族</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E8%8B%B1%E4%BC%A6" data-cat="英伦">英伦</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E9%87%91%E5%B1%9E" data-cat="金属">金属</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E6%9C%8B%E5%85%8B" data-cat="朋克">朋克</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E8%93%9D%E8%B0%83" data-cat="蓝调">蓝调</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E9%9B%B7%E9%AC%BC" data-cat="雷鬼">雷鬼</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E4%B8%96%E7%95%8C%E9%9F%B3%E4%B9%90" data-cat="世界音乐">世界音乐</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E6%8B%89%E4%B8%81" data-cat="拉丁">拉丁</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=New%20Age" data-cat="New Age">New Age</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E5%8F%A4%E9%A3%8E" data-cat="古风">古风</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E5%90%8E%E6%91%87" data-cat="后摇">后摇</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=Bossa%20Nova" data-cat="Bossa Nova">Bossa Nova</a><span class="line">|</span>
</dd>
</dl>
<dl class="f-cb">
<dt><i class="u-icn u-icn-7"></i>场景</dt>
<dd >
<a class="s-fc1 " href="/discover/playlist/?cat=%E6%B8%85%E6%99%A8" data-cat="清晨">清晨</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E5%A4%9C%E6%99%9A" data-cat="夜晚">夜晚</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E5%AD%A6%E4%B9%A0" data-cat="学习">学习</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E5%B7%A5%E4%BD%9C" data-cat="工作">工作</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E5%8D%88%E4%BC%91" data-cat="午休">午休</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E4%B8%8B%E5%8D%88%E8%8C%B6" data-cat="下午茶">下午茶</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E5%9C%B0%E9%93%81" data-cat="地铁">地铁</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E9%A9%BE%E8%BD%A6" data-cat="驾车">驾车</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E8%BF%90%E5%8A%A8" data-cat="运动">运动</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E6%97%85%E8%A1%8C" data-cat="旅行">旅行</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E6%95%A3%E6%AD%A5" data-cat="散步">散步</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E9%85%92%E5%90%A7" data-cat="酒吧">酒吧</a><span class="line">|</span>
</dd>
</dl>
<dl class="f-cb">
<dt><i class="u-icn u-icn-8"></i>情感</dt>
<dd >
<a class="s-fc1 " href="/discover/playlist/?cat=%E6%80%80%E6%97%A7" data-cat="怀旧">怀旧</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E6%B8%85%E6%96%B0" data-cat="清新">清新</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E6%B5%AA%E6%BC%AB" data-cat="浪漫">浪漫</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E4%BC%A4%E6%84%9F" data-cat="伤感">伤感</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E6%B2%BB%E6%84%88" data-cat="治愈">治愈</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E6%94%BE%E6%9D%BE" data-cat="放松">放松</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E5%AD%A4%E7%8B%AC" data-cat="孤独">孤独</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E6%84%9F%E5%8A%A8" data-cat="感动">感动</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E5%85%B4%E5%A5%8B" data-cat="兴奋">兴奋</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E5%BF%AB%E4%B9%90" data-cat="快乐">快乐</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E5%AE%89%E9%9D%99" data-cat="安静">安静</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E6%80%9D%E5%BF%B5" data-cat="思念">思念</a><span class="line">|</span>
</dd>
</dl>
<dl class="f-cb">
<dt><i class="u-icn u-icn-9"></i>主题</dt>
<dd class="last">
<a class="s-fc1 " href="/discover/playlist/?cat=%E7%BB%BC%E8%89%BA" data-cat="综艺">综艺</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E5%BD%B1%E8%A7%86%E5%8E%9F%E5%A3%B0" data-cat="影视原声">影视原声</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=ACG" data-cat="ACG">ACG</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E5%84%BF%E7%AB%A5" data-cat="儿童">儿童</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E6%A0%A1%E5%9B%AD" data-cat="校园">校园</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E6%B8%B8%E6%88%8F" data-cat="游戏">游戏</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=70%E5%90%8E" data-cat="70后">70后</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=80%E5%90%8E" data-cat="80后">80后</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=90%E5%90%8E" data-cat="90后">90后</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E7%BD%91%E7%BB%9C%E6%AD%8C%E6%9B%B2" data-cat="网络歌曲">网络歌曲</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=KTV" data-cat="KTV">KTV</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E7%BB%8F%E5%85%B8" data-cat="经典">经典</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E7%BF%BB%E5%94%B1" data-cat="翻唱">翻唱</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E5%90%89%E4%BB%96" data-cat="吉他">吉他</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E9%92%A2%E7%90%B4" data-cat="钢琴">钢琴</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E5%99%A8%E4%B9%90" data-cat="器乐">器乐</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=%E6%A6%9C%E5%8D%95" data-cat="榜单">榜单</a><span class="line">|</span>
<a class="s-fc1 " href="/discover/playlist/?cat=00%E5%90%8E" data-cat="00后">00后</a><span class="line">|</span>
</dd>
</dl>

View File

@@ -0,0 +1,27 @@
const fs = require('fs');
const absPath = `D:/sql_export`;
// 数字转成字符串,同时在前面填充
function fill(num, fillers, length) {
var result = `${num}`;
if (result.length < length)
result = new Array(length - result.length + 1).join(fillers) + result;
return result;
}
let begin = 115;
let end = 116;
for (let i = begin; i < end; i++) {
console.log(`读取文件 comment_export_${fill(i, '0', 4)}.sql`);
let sqlFile = fs.readFileSync(`${absPath}/comment_export_${fill(i, '0', 4)}.sql`, "utf-8");
console.log(`处理文件`);
// console.log(sqlFile);
let sqls = sqlFile.split("\r\n");
sqls = sqls.map(sql => sql.replace(`INSERT INTO \`comment_export_${fill(i, '0', 4)}\` VALUES`, "INSERT INTO `comment` VALUES"))
// console.log(sqls);
console.log(`拼接文件`);
let newSqlFile = sqls.join('\n');
console.log(`写入文件`);
fs.writeFileSync(`${absPath}/output/comment_${fill(i, '0', 4)}.sql`, newSqlFile, "utf-8");
console.log(`完成`);
}

1
netease_music/stop.txt Normal file
View File

@@ -0,0 +1 @@
0

2
netease_music/temp/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
*
!.gitignore

3
netease_music/test.js Normal file
View File

@@ -0,0 +1,3 @@
global.useMysqlPool = false;
const neteaseMusic = require('./src/index');
neteaseMusic.test();

3
netease_music/update.js Normal file
View File

@@ -0,0 +1,3 @@
global.useMysqlPool = true;
const neteaseMusic = require('./src/index');
neteaseMusic.update();

19
netease_music/watch.js Normal file
View File

@@ -0,0 +1,19 @@
let keepWatching = false;
if (keepWatching) {
global.useMysqlPool = true;
global.connectionLimit = 15;
} else {
global.useMysqlPool = false;
}
// global.dbConfig = 'mysql_local';
const neteaseMusic = require('./src/index');
const sleepUtils = require('../utils/sleepUtils');
async function main() {
do {
await neteaseMusic.watch();
keepWatching && await sleepUtils.sleep(10 * 1000);
} while (keepWatching)
}
main();

1
nowcoder/exam_interview/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
temp/*.json

View File

@@ -0,0 +1,82 @@
const fs = require("fs");
const requestUtils = require("../../utils/requestUtils");
// 爬取的内容https://www.nowcoder.com/exam/interview
const saveTempFile = true;
main();
async function main() {
// 请求参数id
let questionInfo = {
questionJobId: 156,
questionClassifyId: null,
questionId: null,
}
// 获取 Job 的 QuestionClassify
let getQuestionClassifyResult = await getQuestionClassify(questionInfo.questionJobId);
for (let a = 0; a < getQuestionClassifyResult.length; a++) {
const QuestionClassify = getQuestionClassifyResult[a];
questionInfo.questionClassifyId = QuestionClassify.questionClassifyId;
// console.log(questionInfo);
// // 获取 QuestionClassify 的 Filter
// let jobQuestionFilterResult = await jobQuestionFilter(questionInfo.questionJobId, questionInfo.questionClassifyId);
// 获取 QuestionClassify 的 Question
let jobQuestionListResult = await jobQuestionList(questionInfo.questionJobId, questionInfo.questionClassifyId);
for (let b = 0; b < jobQuestionListResult.subjectList.length; b++) {
const Question = jobQuestionListResult.subjectList[b];
questionInfo.questionId = Question.questionId;
console.log(questionInfo);
// 获取 Question 的 QuestionDetail
let jobQuestionDetailResult = await jobQuestionDetail(questionInfo.questionJobId, questionInfo.questionClassifyId, questionInfo.questionId);
}
}
}
async function get(url, functionName) {
let json = await requestUtils.getApiResult(url);
// saveTempFile && fs.writeFileSync(`./temp/${functionName}-1.json`, json);
try {
let response = JSON.parse(json);
// saveTempFile && fs.writeFileSync(`./temp/${functionName}.json`, JSON.stringify(response, null, 2));
// console.log(response);
if (response.code === 0) {
saveTempFile && fs.writeFileSync(`./temp/${functionName}-data.json`, JSON.stringify(response.data, null, 2));
return response.data;
} else {
console.error("请求失败", json);
return null;
}
} catch (err) {
console.error("请求解析失败", err);
return null;
}
}
// 获取分类下面的题目分类 比如 前端工程师 下面的【全部题目】
async function getQuestionClassify(questionJobId) {
let url = `https://www.nowcoder.com/api/questiontraining/interview/getQuestionClassify?questionJobId=${questionJobId}&_=${Date.now()}`
return await get(url, `1 getQuestionClassify-${questionJobId}`);
}
// 获取题目上面的筛选条件
async function jobQuestionFilter(questionJobId, questionClassifyId) {
let url = `https://www.nowcoder.com/api/questiontraining/interview/jobQuestionFilter?questionJobId=${questionJobId}&questionClassifyId=${questionClassifyId}&_=${Date.now()}`
return await get(url, `2 jobQuestionFilter-${questionJobId}-${questionClassifyId}`);
}
// 获取题目列表
async function jobQuestionList(questionJobId, questionClassifyId) {
let url = `https://www.nowcoder.com/api/questiontraining/interview/jobQuestionList?questionJobId=${questionJobId}&questionClassifyId=${questionClassifyId}&page=1&size=50000&_=${Date.now()}`
return await get(url, `3 jobQuestionList-${questionJobId}-${questionClassifyId}`);
}
// 获取题目详情
async function jobQuestionDetail(questionJobId, questionClassifyId, questionId) {
let url = `https://www.nowcoder.com/api/questiontraining/interview/jobQuestionDetail?questionId=${questionId}&questionJobId=${questionJobId}&questionClassifyId=${questionClassifyId}&_=${Date.now()}`
console.log(url);
return await get(url, `4 jobQuestionDetail-${questionJobId}-${questionClassifyId}-${questionId}`);
}

View File

Some files were not shown because too many files have changed in this diff Show More