artist考虑极少数正则匹配内容不存在问题
This commit is contained in:
parent
e638b09313
commit
d17f4282e4
@ -66,7 +66,7 @@ GROUP BY s
|
||||
ORDER BY s DESC
|
||||
|
||||
-- 查看需要爬取的 artist 的分布
|
||||
SELECT cast( format( id / 1000000, 0) * 1000000 as UNSIGNED ) as s, count(*) as count
|
||||
SELECT cast( format( id / 100000, 0) * 100000 as UNSIGNED ) as s, count(*) as count
|
||||
FROM wait_fetch_artist
|
||||
GROUP BY s
|
||||
ORDER BY s DESC
|
||||
|
@ -74,9 +74,20 @@ async function fetch({ artistId, debug = false }) {
|
||||
}
|
||||
|
||||
// 正则匹配
|
||||
let artistInfoDict;
|
||||
let regExResult = /\<script type\=\"application\/ld\+json\"\>([\S\s]*?)\<\/script\>/.exec(html);
|
||||
let artistInfoJSONString = regExResult[1];
|
||||
let artistInfoDict = JSON.parse(artistInfoJSONString);
|
||||
if (regExResult) {
|
||||
let artistInfoJSONString = regExResult[1];
|
||||
artistInfoDict = JSON.parse(artistInfoJSONString);
|
||||
} else {
|
||||
// 极少数artist没有 <script type="application/ld+json">......</script> 这块内容
|
||||
// 例如:29488473,29717445。这些artist都没有标题,所以下面这三项就直接保留空字符串
|
||||
artistInfoDict = {
|
||||
"title": "",
|
||||
"description": "",
|
||||
"pubDate": ""
|
||||
}
|
||||
}
|
||||
// console.log(artistInfoDict);
|
||||
|
||||
let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
|
||||
|
Loading…
Reference in New Issue
Block a user