忽略https警告；国外版力扣题库拉取失败问题修复；其他小问题修复

2025-10-25 06:48:57 +08:00 · 2024-07-16 16:02:46 +08:00
parent b1fc2c627d
commit 847e599aec
3 changed files with 80 additions and 58 deletions
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@
 **所有版权都为 LeetCode (及力扣中国) 官方所有，此处仅供学习使用，不要他用。也请大家不要滥用，不要侵犯力扣平台的合法权益。**
-**感谢 LeetCode 平台为我们提供大量的算法题目进行练习与提升。如果大家经济条件允许，请大家多多支持力扣，例如冲冲会员等。**
+**感谢 LeetCode 平台为我们提供大量的算法题目进行练习与提升。如果大家经济条件允许，请大家多多支持力扣，例如充充会员等。**
 力扣题库的权益归属力扣，使用力扣题库，需要遵循力扣使用条例，若您不同意此条例，请立即关闭当前网页，不要继续使用本题库。
--- a/leetcode-cn.py
+++ b/leetcode-cn.py
@@ -8,9 +8,15 @@ import requests
 from requests.exceptions import RequestException
 from bs4 import BeautifulSoup
 import urllib3
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 def get_proble_set(url):
    try:
-        response = requests.get(url)
+        # response = requests.get(url)
        response = requests.get(url, headers = {
            'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
        }, verify=False)
        if response.status_code == 200:
            return response.text
        return None
@@ -118,7 +124,11 @@ def saveJSON(data, filename):
 def main():
    url = "https://leetcode.cn/api/problems/all/"
-    html = json.loads(get_proble_set(url))
+    jsonContent = get_proble_set(url)
    if jsonContent == None:
        print('列表请求失败！')
        return
    html = json.loads(jsonContent)
    saveJSON(html, "origin-data.json")
    # html = json.load(open("origin-data.json", 'r', encoding='utf-8'))
--- a/leetcode.py
+++ b/leetcode.py
@@ -8,10 +8,13 @@ import requests
 from requests.exceptions import RequestException
 from bs4 import BeautifulSoup
 import urllib3
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 def get_proble_set(url):
    try:
        response = requests.get(url, headers = {
-            'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32"
+            'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
        }, verify=False)
        if response.status_code == 200:
            return response.text
@@ -28,6 +31,11 @@ def parse_proble_set(problemSet):
            print(i, "has been parsed.")
            # print("The question has been parsed: {}".format(title))
            continue
        elif "paid_only" in problemSet[i]:
            paid_only = problemSet[i]["paid_only"]
            if paid_only:
                print(i, '付费题目，跳过')
                continue
        #construct_url(title)
        # time.sleep(0.5)
        time.sleep(1)
@@ -49,8 +57,8 @@ def save_problem(title,content):
 def get_proble_content(problemUrl,title):
    response = requests.get(problemUrl, headers = {
-        'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36"
+        'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
-    })
+    }, verify=False)
    setCookie = response.headers["Set-Cookie"]
    '''
    print(setCookie)
@@ -58,7 +66,7 @@ def get_proble_content(problemUrl,title):
    print(type(setCookie))
    '''
    try:
-        pattern = re.compile("csrftoken=(.*?);.*?",re.S)
+        pattern = re.compile("__cf_bm=(.*?);.*?",re.S)
        csrftoken = re.search(pattern, setCookie)
        url = "https://leetcode.com/graphql"
        data = {
@@ -105,7 +113,11 @@ def saveJSON(data, filename):
 def main():
    url = "https://leetcode.com/api/problems/all/"
-    html = json.loads(get_proble_set(url))
+    jsonContent = get_proble_set(url)
    if jsonContent == None:
        print('列表请求失败！')
        return
    html = json.loads(jsonContent)
    saveJSON(html, "origin-data.json")
    # html = json.load(open("origin-data.json", 'r', encoding='utf-8'))