1
0
Code Issues Pull Requests Projects Releases Wiki Activity GitHub Gitee

artist考虑极少数正则匹配内容不存在问题

This commit is contained in:
程序员小墨 2022-11-07 15:35:22 +08:00
parent e638b09313
commit d17f4282e4
2 changed files with 14 additions and 3 deletions

View File

@ -66,7 +66,7 @@ GROUP BY s
ORDER BY s DESC
-- 查看需要爬取的 artist 的分布
SELECT cast( format( id / 1000000, 0) * 1000000 as UNSIGNED ) as s, count(*) as count
SELECT cast( format( id / 100000, 0) * 100000 as UNSIGNED ) as s, count(*) as count
FROM wait_fetch_artist
GROUP BY s
ORDER BY s DESC

View File

@ -74,9 +74,20 @@ async function fetch({ artistId, debug = false }) {
}
// 正则匹配
let artistInfoDict;
let regExResult = /\<script type\=\"application\/ld\+json\"\>([\S\s]*?)\<\/script\>/.exec(html);
let artistInfoJSONString = regExResult[1];
let artistInfoDict = JSON.parse(artistInfoJSONString);
if (regExResult) {
let artistInfoJSONString = regExResult[1];
artistInfoDict = JSON.parse(artistInfoJSONString);
} else {
// 极少数artist没有 <script type="application/ld+json">......</script> 这块内容
// 例如2948847329717445。这些artist都没有标题所以下面这三项就直接保留空字符串
artistInfoDict = {
"title": "",
"description": "",
"pubDate": ""
}
}
// console.log(artistInfoDict);
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];