add netease music
This commit is contained in:
parent
08f303de8f
commit
2a72b59dce
7
index.js
7
index.js
@ -1,2 +1,5 @@
|
|||||||
const dbUtils = require('./utils/dbUtils');
|
// const dbUtils = require('./utils/dbUtils');
|
||||||
const dbPoolUtils = require('./utils/dbPoolUtils');
|
// const dbPoolUtils = require('./utils/dbPoolUtils');
|
||||||
|
|
||||||
|
const neteaseMusic = require('./netease_music/index');
|
||||||
|
neteaseMusic.main();
|
260
netease_music/index.js
Normal file
260
netease_music/index.js
Normal file
@ -0,0 +1,260 @@
|
|||||||
|
const fs = require('fs');
|
||||||
|
const path = require('path');
|
||||||
|
const dbUtils = require('../utils/dbUtils');
|
||||||
|
|
||||||
|
const requestUtils = require('../utils/requestUtils');
|
||||||
|
const sleepUtils = require('../utils/sleepUtils');
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log("neteaseMusic Starting...");
|
||||||
|
|
||||||
|
// 指定数据库
|
||||||
|
dbUtils.create("neteaseMusic");
|
||||||
|
|
||||||
|
// getMusicInfo({ songId: "1855221507" });
|
||||||
|
// getMusicInfo({ songId: "1855221517" });
|
||||||
|
// getMusicInfo({ songId: "1861632812" });
|
||||||
|
|
||||||
|
// getArtistInfo({ artistId: "1079074" });
|
||||||
|
// getArtistInfo({ artistId: "1079075" });
|
||||||
|
|
||||||
|
// getAlbumInfo({ albumId: "74268047" });
|
||||||
|
// getAlbumInfo({ albumId: "129327797" });
|
||||||
|
|
||||||
|
// 不是所有歌手都有个人主页 例如 https://music.163.com/#/artist?id=1079075
|
||||||
|
// getUserInfo({ userId: "37365202" });
|
||||||
|
// getUserInfo({ userId: "29879272" });
|
||||||
|
|
||||||
|
await startGetMusic({ songId: "1966061035" });
|
||||||
|
}
|
||||||
|
|
||||||
|
async function startGetMusic({ songId }) {
|
||||||
|
await sleepUtils.sleep(500);
|
||||||
|
var songInfo = await getMusicInfo({ songId: songId });
|
||||||
|
|
||||||
|
var albumInfo = await getAlbumInfo({ albumId: songInfo.albumId });
|
||||||
|
if (albumInfo) {
|
||||||
|
for (var songId of albumInfo.songIds) {
|
||||||
|
await startGetMusic({ songId: songId });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (var artistId of songInfo.artistIds) {
|
||||||
|
var artistInfo = await getArtistInfo({ artistId: artistId });
|
||||||
|
if (artistInfo) {
|
||||||
|
for (var songId of artistInfo.songIds) {
|
||||||
|
await startGetMusic({ songId: songId });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 获取音乐详情
|
||||||
|
async function getMusicInfo({ songId }) {
|
||||||
|
console.log(`开始处理 song: ${songId}`);
|
||||||
|
// let result = await dbUtils.query('SELECT count(*) as count FROM song WHERE song_id = ?', [songId]);
|
||||||
|
// if (result[0].count > 0) {
|
||||||
|
// console.log(`数据库中已有数据,跳过 songId: ${songId}`);
|
||||||
|
// return;
|
||||||
|
// }
|
||||||
|
|
||||||
|
let url = `https://music.163.com/song?id=${songId}`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
throw new Error(`Error`);
|
||||||
|
var html = fs.readFileSync(path.join(__dirname, "../temp", `song-${songId}.html`), 'utf8');
|
||||||
|
} catch (errors) {
|
||||||
|
var html = await requestUtils.getApiResult(url);
|
||||||
|
fs.writeFileSync(path.join(__dirname, "../temp", `song-${songId}.html`), html);
|
||||||
|
}
|
||||||
|
// console.log(html);
|
||||||
|
|
||||||
|
// 正则匹配
|
||||||
|
let regExResult = /\<script type\=\"application\/ld\+json\"\>([\S\s]*?)\<\/script\>/.exec(html);
|
||||||
|
let songInfoJSONString = regExResult[1];
|
||||||
|
let songInfoDict = JSON.parse(songInfoJSONString);
|
||||||
|
// console.log(songInfoDict);
|
||||||
|
|
||||||
|
let title = /<meta property="og:title" content="(.*?)" \/>/.exec(html)[1];
|
||||||
|
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
|
||||||
|
let artist = /<meta property="og:music:artist" content="(.*?)" \/>/.exec(html)[1];
|
||||||
|
let album = /<meta property="og:music:album" content="(.*?)"\/>/.exec(html)[1];
|
||||||
|
let albumId = /<meta property="music:album" content="https:\/\/music\.163\.com\/album\?id=(.*?)"\/>/.exec(html)[1];
|
||||||
|
let duration = /<meta property="music:duration" content="(.*?)"\/>/.exec(html)[1];
|
||||||
|
|
||||||
|
const reg = /<meta property="music:musician" content="https:\/\/music\.163\.com\/artist\?id=(.*?)"\/>/g;
|
||||||
|
let artistIds = [];
|
||||||
|
let matched = null;
|
||||||
|
while ((matched = reg.exec(html)) !== null) {
|
||||||
|
artistIds.push(matched[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
let songInfo = {
|
||||||
|
songId: songId,
|
||||||
|
title: title,
|
||||||
|
image: image,
|
||||||
|
pubDate: songInfoDict.pubDate,
|
||||||
|
artist: artist,
|
||||||
|
artistIds: artistIds,
|
||||||
|
album: album,
|
||||||
|
albumId: albumId,
|
||||||
|
duration: duration,
|
||||||
|
};
|
||||||
|
// console.log("songInfo", songInfo);
|
||||||
|
dbUtils.query('INSERT IGNORE INTO song SET ?', {
|
||||||
|
song_id: songInfo.songId,
|
||||||
|
title: songInfo.title,
|
||||||
|
image: songInfo.image,
|
||||||
|
pub_date: songInfo.pubDate,
|
||||||
|
});
|
||||||
|
dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', {
|
||||||
|
song_id: songInfo.songId,
|
||||||
|
album_id: songInfo.albumId,
|
||||||
|
});
|
||||||
|
artistIds.forEach(function (artistId) {
|
||||||
|
dbUtils.query('INSERT IGNORE INTO song_artist_relation SET ?', {
|
||||||
|
song_id: songInfo.songId,
|
||||||
|
artist_id: artistId,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
return songInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 获取音乐人详情
|
||||||
|
async function getArtistInfo({ artistId }) {
|
||||||
|
console.log(`开始处理 artist: ${artistId}`);
|
||||||
|
let result = await dbUtils.query('SELECT count(*) as count FROM artist WHERE artist_id = ?', [artistId]);
|
||||||
|
if (result[0].count > 0) {
|
||||||
|
console.log(`数据库中已有数据,跳过 artistId: ${artistId}`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let url = `https://music.163.com/artist?id=${artistId}`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
throw new Error(`Error`);
|
||||||
|
var html = fs.readFileSync(path.join(__dirname, "../temp", `artist-${artistId}.html`), 'utf8');
|
||||||
|
} catch (errors) {
|
||||||
|
var html = await requestUtils.getApiResult(url);
|
||||||
|
fs.writeFileSync(path.join(__dirname, "../temp", `artist-${artistId}.html`), html);
|
||||||
|
}
|
||||||
|
// console.log(html);
|
||||||
|
|
||||||
|
// 正则匹配
|
||||||
|
let regExResult = /\<script type\=\"application\/ld\+json\"\>([\S\s]*?)\<\/script\>/.exec(html);
|
||||||
|
let artistInfoJSONString = regExResult[1];
|
||||||
|
let artistInfoDict = JSON.parse(artistInfoJSONString);
|
||||||
|
// console.log(artistInfoDict);
|
||||||
|
|
||||||
|
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
|
||||||
|
let songListJSONString = /<textarea id="song-list-pre-data" style="display:none;">(.*?)<\/textarea>/.exec(html)[1];
|
||||||
|
let songList = JSON.parse(songListJSONString);
|
||||||
|
let songIds = songList.map(song => song.id);
|
||||||
|
|
||||||
|
let artistInfo = {
|
||||||
|
artistId: artistId,
|
||||||
|
title: artistInfoDict.title,
|
||||||
|
image: image,
|
||||||
|
description: artistInfoDict.description,
|
||||||
|
pubDate: artistInfoDict.pubDate,
|
||||||
|
songIds: songIds,
|
||||||
|
};
|
||||||
|
// console.log("artistInfo", artistInfo);
|
||||||
|
dbUtils.query('INSERT IGNORE INTO artist SET ?', {
|
||||||
|
artist_id: artistInfo.artistId,
|
||||||
|
title: artistInfo.title,
|
||||||
|
description: artistInfo.description,
|
||||||
|
image: artistInfo.image,
|
||||||
|
pub_date: artistInfo.pubDate,
|
||||||
|
});
|
||||||
|
songIds.forEach(function (songId) {
|
||||||
|
dbUtils.query('INSERT IGNORE INTO song_artist_relation SET ?', {
|
||||||
|
song_id: songId,
|
||||||
|
artist_id: artistId,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
return artistInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 获取专辑详情
|
||||||
|
async function getAlbumInfo({ albumId }) {
|
||||||
|
console.log(`开始处理 album: ${albumId}`);
|
||||||
|
let result = await dbUtils.query('SELECT count(*) as count FROM album WHERE album_id = ?', [albumId]);
|
||||||
|
if (result[0].count > 0) {
|
||||||
|
console.log(`数据库中已有数据,跳过 albumId: ${albumId}`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let url = `https://music.163.com/album?id=${albumId}`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
throw new Error(`Error`);
|
||||||
|
var html = fs.readFileSync(path.join(__dirname, "../temp", `album-${albumId}.html`), 'utf8');
|
||||||
|
} catch (errors) {
|
||||||
|
var html = await requestUtils.getApiResult(url);
|
||||||
|
fs.writeFileSync(path.join(__dirname, "../temp", `album-${albumId}.html`), html);
|
||||||
|
}
|
||||||
|
// console.log(html);
|
||||||
|
|
||||||
|
// 正则匹配
|
||||||
|
let regExResult = /\<script type\=\"application\/ld\+json\"\>([\S\s]*?)\<\/script\>/.exec(html);
|
||||||
|
let albumInfoJSONString = regExResult[1];
|
||||||
|
let albumInfoDict = JSON.parse(albumInfoJSONString);
|
||||||
|
// console.log(albumInfoDict);
|
||||||
|
|
||||||
|
let company = null;
|
||||||
|
try {
|
||||||
|
company = /<p class="intr"><b>发行公司:<\/b>\n(.*?)\n<\/p>/.exec(html)[1];
|
||||||
|
} catch (e) {
|
||||||
|
}
|
||||||
|
|
||||||
|
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
|
||||||
|
let songListJSONString = /<textarea id="song-list-pre-data" style="display:none;">(.*?)<\/textarea>/.exec(html)[1];
|
||||||
|
let songList = JSON.parse(songListJSONString);
|
||||||
|
let songIds = songList.map(song => song.id);
|
||||||
|
|
||||||
|
let albumInfo = {
|
||||||
|
albumId: albumId,
|
||||||
|
title: albumInfoDict.title,
|
||||||
|
image: image,
|
||||||
|
description: albumInfoDict.description,
|
||||||
|
pubDate: albumInfoDict.pubDate,
|
||||||
|
company: company,
|
||||||
|
songIds: songIds,
|
||||||
|
};
|
||||||
|
// console.log("albumInfo", albumInfo);
|
||||||
|
dbUtils.query('INSERT IGNORE INTO album SET ?', {
|
||||||
|
album_id: albumInfo.albumId,
|
||||||
|
title: albumInfo.title,
|
||||||
|
description: albumInfo.description,
|
||||||
|
image: albumInfo.image,
|
||||||
|
pub_date: albumInfo.pubDate,
|
||||||
|
company: albumInfo.company,
|
||||||
|
});
|
||||||
|
songIds.forEach(function (songId) {
|
||||||
|
dbUtils.query('INSERT IGNORE INTO song_album_relation SET ?', {
|
||||||
|
song_id: songId,
|
||||||
|
album_id: albumId,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
return albumInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
// // 获取音乐人详情
|
||||||
|
// async function getUserInfo({ userId }) {
|
||||||
|
// let url = `https://music.163.com/user/home?id=${userId}`;
|
||||||
|
|
||||||
|
// try {
|
||||||
|
// var html = fs.readFileSync(path.join(__dirname, "../temp", ` user-${userId}.html`), 'utf8');
|
||||||
|
// } catch (errors) {
|
||||||
|
// var html = await requestUtils.getApiResult(url);
|
||||||
|
// fs.writeFileSync(path.join(__dirname, "../temp", ` user-${userId}.html`), html);
|
||||||
|
// }
|
||||||
|
// // console.log(html);
|
||||||
|
|
||||||
|
|
||||||
|
// }
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
main: main,
|
||||||
|
}
|
50
netease_music/sql/structure.sql
Normal file
50
netease_music/sql/structure.sql
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
CREATE DATABASE `neteaseMusic` CHARACTER SET 'utf8mb4' COLLATE 'utf8mb4_general_ci';
|
||||||
|
USE `neteaseMusic`;
|
||||||
|
CREATE TABLE `song` (
|
||||||
|
`song_id` int(10) unsigned NOT NULL COMMENT '歌曲id',
|
||||||
|
`title` varchar(200) COLLATE utf8mb4_general_ci NOT NULL COMMENT '歌曲名',
|
||||||
|
`image` varchar(200) COLLATE utf8mb4_general_ci NOT NULL COMMENT '封面图 http://p1.music.126.net/ 后面的部分',
|
||||||
|
`pub_date` varchar(100) COLLATE utf8mb4_general_ci NOT NULL COMMENT '发布日期',
|
||||||
|
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||||
|
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||||
|
PRIMARY KEY (`song_id`)
|
||||||
|
) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||||
|
|
||||||
|
CREATE TABLE `artist` (
|
||||||
|
`artist_id` int(10) unsigned NOT NULL COMMENT '歌手id',
|
||||||
|
`title` varchar(200) COLLATE utf8mb4_general_ci NOT NULL COMMENT '歌手名',
|
||||||
|
`description` varchar(1500) COLLATE utf8mb4_general_ci NOT NULL COMMENT '歌手简介',
|
||||||
|
`image` varchar(200) COLLATE utf8mb4_general_ci NOT NULL COMMENT '封面图 http://p1.music.126.net/ 后面的部分',
|
||||||
|
`pub_date` varchar(100) COLLATE utf8mb4_general_ci NOT NULL COMMENT '发布日期',
|
||||||
|
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||||
|
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||||
|
PRIMARY KEY (`artist_id`)
|
||||||
|
) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||||
|
|
||||||
|
CREATE TABLE `album` (
|
||||||
|
`album_id` int(10) unsigned NOT NULL COMMENT '专辑id',
|
||||||
|
`title` varchar(200) COLLATE utf8mb4_general_ci NOT NULL COMMENT '专辑名',
|
||||||
|
`description` varchar(1500) COLLATE utf8mb4_general_ci NOT NULL COMMENT '专辑简介',
|
||||||
|
`image` varchar(200) COLLATE utf8mb4_general_ci NOT NULL COMMENT '封面图 http://p1.music.126.net/ 后面的部分',
|
||||||
|
`pub_date` varchar(100) COLLATE utf8mb4_general_ci NOT NULL COMMENT '发布日期',
|
||||||
|
`company` varchar(100) COLLATE utf8mb4_general_ci NULL COMMENT '发行公司',
|
||||||
|
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||||
|
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||||
|
PRIMARY KEY (`album_id`)
|
||||||
|
) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||||
|
|
||||||
|
CREATE TABLE `song_album_relation` (
|
||||||
|
`song_id` int(10) unsigned NOT NULL COMMENT '歌曲id',
|
||||||
|
`album_id` int(10) unsigned NOT NULL COMMENT '专辑id',
|
||||||
|
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||||
|
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||||
|
PRIMARY KEY `song_id` (`song_id`,`album_id`)
|
||||||
|
) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
||||||
|
|
||||||
|
CREATE TABLE `song_artist_relation` (
|
||||||
|
`song_id` int(10) unsigned NOT NULL COMMENT '歌曲id',
|
||||||
|
`artist_id` int(10) unsigned NOT NULL COMMENT '歌手id',
|
||||||
|
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '爬取时间',
|
||||||
|
`modify_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '最后更新时间',
|
||||||
|
PRIMARY KEY `song_id` (`song_id`,`artist_id`)
|
||||||
|
) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
|
2
temp/.gitignore
vendored
Normal file
2
temp/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
*
|
||||||
|
!.gitignore
|
@ -11,7 +11,7 @@ function createPool(database) {
|
|||||||
...globalConfig.mysql,
|
...globalConfig.mysql,
|
||||||
database: database,
|
database: database,
|
||||||
};
|
};
|
||||||
console.log(config);
|
// console.log(config);
|
||||||
|
|
||||||
//创建数据库连接池
|
//创建数据库连接池
|
||||||
pool = mysql.createPool(config);
|
pool = mysql.createPool(config);
|
||||||
@ -37,14 +37,14 @@ function createPool(database) {
|
|||||||
// });
|
// });
|
||||||
}
|
}
|
||||||
|
|
||||||
async function query(sql) {
|
async function query(sql, params) {
|
||||||
if (!pool) {
|
if (!pool) {
|
||||||
console.error('Database connection pool is not initialized yet.');
|
console.error('Database connection pool is not initialized yet.');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
return await new Promise(function (resolve, reject) {
|
return await new Promise(function (resolve, reject) {
|
||||||
//pool.query()方法可以自动的帮我们在连接池中获取可用连接
|
//pool.query()方法可以自动的帮我们在连接池中获取可用连接
|
||||||
pool.query(sql, function (err, data) {
|
pool.query(sql, params, function (err, data) {
|
||||||
if (err) reject(err);
|
if (err) reject(err);
|
||||||
// console.log(data);
|
// console.log(data);
|
||||||
resolve(data);
|
resolve(data);
|
||||||
|
@ -4,29 +4,36 @@ const fs = require('fs');
|
|||||||
const path = require('path');
|
const path = require('path');
|
||||||
|
|
||||||
let globalConfig = JSON.parse(fs.readFileSync(path.join(__dirname, '../config.json'), 'utf8'));
|
let globalConfig = JSON.parse(fs.readFileSync(path.join(__dirname, '../config.json'), 'utf8'));
|
||||||
|
let database = null;
|
||||||
|
|
||||||
async function query(sql) {
|
function create(databaseName) {
|
||||||
|
database = databaseName;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function query(sql, params) {
|
||||||
let config = {
|
let config = {
|
||||||
...globalConfig.mysql,
|
...globalConfig.mysql,
|
||||||
|
database: database,
|
||||||
};
|
};
|
||||||
console.log(config);
|
// console.log(config);
|
||||||
|
|
||||||
//通过MySQL中方法创建连接对象
|
return await new Promise(function (resolve, reject) {
|
||||||
var connection = mysql.createConnection(config);
|
//通过MySQL中方法创建连接对象
|
||||||
//开始连接
|
var connection = mysql.createConnection(config);
|
||||||
connection.connect();
|
//开始连接
|
||||||
//执行SQL语句 (添加、删除、更新、查询)
|
connection.connect();
|
||||||
connection.query(sql, (err, result) => {
|
//执行SQL语句 (添加、删除、更新、查询)
|
||||||
if (err) {
|
connection.query(sql, params, (err, data) => {
|
||||||
console.error('err', err);
|
if (err) reject(err);
|
||||||
return;
|
// console.log(data);
|
||||||
}
|
resolve(data);
|
||||||
console.log('result', result);
|
})
|
||||||
})
|
//最后需要关闭连接
|
||||||
//最后需要关闭连接
|
connection.end();
|
||||||
connection.end();
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
create: create,
|
||||||
query: query,
|
query: query,
|
||||||
}
|
}
|
@ -4,4 +4,6 @@ async function sleep(ms) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = sleep;
|
module.exports = {
|
||||||
|
sleep: sleep,
|
||||||
|
};
|
Loading…
Reference in New Issue
Block a user