artist考虑极少数正则匹配内容不存在问题
This commit is contained in:
		@@ -66,7 +66,7 @@ GROUP BY s
 | 
			
		||||
ORDER BY s DESC
 | 
			
		||||
 | 
			
		||||
-- 查看需要爬取的 artist 的分布
 | 
			
		||||
SELECT cast( format( id / 1000000, 0) * 1000000 as UNSIGNED ) as s, count(*) as count
 | 
			
		||||
SELECT cast( format( id / 100000, 0) * 100000 as UNSIGNED ) as s, count(*) as count
 | 
			
		||||
FROM wait_fetch_artist
 | 
			
		||||
GROUP BY s
 | 
			
		||||
ORDER BY s DESC
 | 
			
		||||
 
 | 
			
		||||
@@ -74,9 +74,20 @@ async function fetch({ artistId, debug = false }) {
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // 正则匹配
 | 
			
		||||
    let artistInfoDict;
 | 
			
		||||
    let regExResult = /\<script type\=\"application\/ld\+json\"\>([\S\s]*?)\<\/script\>/.exec(html);
 | 
			
		||||
    let artistInfoJSONString = regExResult[1];
 | 
			
		||||
    let artistInfoDict = JSON.parse(artistInfoJSONString);
 | 
			
		||||
    if (regExResult) {
 | 
			
		||||
        let artistInfoJSONString = regExResult[1];
 | 
			
		||||
        artistInfoDict = JSON.parse(artistInfoJSONString);
 | 
			
		||||
    } else {
 | 
			
		||||
        // 极少数artist没有 <script type="application/ld+json">......</script> 这块内容
 | 
			
		||||
        // 例如:29488473,29717445。这些artist都没有标题,所以下面这三项就直接保留空字符串
 | 
			
		||||
        artistInfoDict = {
 | 
			
		||||
            "title": "",
 | 
			
		||||
            "description": "",
 | 
			
		||||
            "pubDate": ""
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    // console.log(artistInfoDict);
 | 
			
		||||
 | 
			
		||||
    let image = /<meta property="og:image" content="http:\/\/p.\.music\.126\.net\/(.*?)" \/>/.exec(html)[1];
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user