From 847e599aec044db945149cf152af6b24e69c2932 Mon Sep 17 00:00:00 2001
From: zhangbk1 <v-zhangbk@chinatelecom.cn>
Date: Tue, 16 Jul 2024 16:02:46 +0800
Subject: [PATCH] =?UTF-8?q?=E5=BF=BD=E7=95=A5https=E8=AD=A6=E5=91=8A?=
 =?UTF-8?q?=EF=BC=9B=E5=9B=BD=E5=A4=96=E7=89=88=E5=8A=9B=E6=89=A3=E9=A2=98?=
 =?UTF-8?q?=E5=BA=93=E6=8B=89=E5=8F=96=E5=A4=B1=E8=B4=A5=E9=97=AE=E9=A2=98?=
 =?UTF-8?q?=E4=BF=AE=E5=A4=8D=EF=BC=9B=E5=85=B6=E4=BB=96=E5=B0=8F=E9=97=AE?=
 =?UTF-8?q?=E9=A2=98=E4=BF=AE=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md      | 92 +++++++++++++++++++++++++-------------------------
 leetcode-cn.py | 18 +++++++---
 leetcode.py    | 28 ++++++++++-----
 3 files changed, 80 insertions(+), 58 deletions(-)

diff --git a/README.md b/README.md
index bd55ae46..0bed937b 100644
--- a/README.md
+++ b/README.md
@@ -1,46 +1,46 @@
-# 力扣题库（完整版）
-
-> 最后更新日期： **2024.06.26**
->
-> 使用脚本前请务必仔细完整阅读本 `README.md` 文件
-
-### 仓库介绍
-
-使用 Python 脚本分批将力扣（`leetcode.com` 和 `leetcode-cn.com`）上面的题目保存下来，方便没有网的时候进行学习。（仅包含可以在网页上直接打开的公开题目，不包含 VIP 题目）
-
-
-
-### 仓库目录结构
-
-国外版力扣题库，在仓库 `leetcode/problem` 文件夹下；国内版力扣题库，在仓库  `leetcode-cn/problem (Chinese)` 和 `leetcode-cn/problem (English)` 文件夹下。（部分题目只有中文版，无对应英文版）。
-
-
-
-### 特别注意！
-
-#### 版权相关
-
-**所有版权都为 LeetCode (及力扣中国) 官方所有，此处仅供学习使用，不要他用。也请大家不要滥用，不要侵犯力扣平台的合法权益。**
-
-**感谢 LeetCode 平台为我们提供大量的算法题目进行练习与提升。如果大家经济条件允许，请大家多多支持力扣，例如冲冲会员等。**
-
-力扣题库的权益归属力扣，使用力扣题库，需要遵循力扣使用条例，若您不同意此条例，请立即关闭当前网页，不要继续使用本题库。
-
-力扣（LeetCode）• 使用条例： https://leetcode-cn.com/terms-c/
-
-LeetCode Terms of Service： https://leetcode.com/terms/
-
-
-
-#### 脚本原作者
-
-Python脚本是在网上教程的基础上进行二改得到的，原版地址：https://blog.csdn.net/weixin_37267014/article/details/81429057
-
-
-
-#### 其他
-
-由于脚本运行时会向力扣网站发出大量请求，所以请大家不要随便尝试此脚本，以免影响力扣网站正常运行。
-
-因为使用此脚本所造成的一系列问题，责任由您自己承担，作者不承担相应责任。
-
+# 力扣题库（完整版）
+
+> 最后更新日期： **2024.06.26**
+>
+> 使用脚本前请务必仔细完整阅读本 `README.md` 文件
+
+### 仓库介绍
+
+使用 Python 脚本分批将力扣（`leetcode.com` 和 `leetcode-cn.com`）上面的题目保存下来，方便没有网的时候进行学习。（仅包含可以在网页上直接打开的公开题目，不包含 VIP 题目）
+
+
+
+### 仓库目录结构
+
+国外版力扣题库，在仓库 `leetcode/problem` 文件夹下；国内版力扣题库，在仓库  `leetcode-cn/problem (Chinese)` 和 `leetcode-cn/problem (English)` 文件夹下。（部分题目只有中文版，无对应英文版）。
+
+
+
+### 特别注意！
+
+#### 版权相关
+
+**所有版权都为 LeetCode (及力扣中国) 官方所有，此处仅供学习使用，不要他用。也请大家不要滥用，不要侵犯力扣平台的合法权益。**
+
+**感谢 LeetCode 平台为我们提供大量的算法题目进行练习与提升。如果大家经济条件允许，请大家多多支持力扣，例如充充会员等。**
+
+力扣题库的权益归属力扣，使用力扣题库，需要遵循力扣使用条例，若您不同意此条例，请立即关闭当前网页，不要继续使用本题库。
+
+力扣（LeetCode）• 使用条例： https://leetcode-cn.com/terms-c/
+
+LeetCode Terms of Service： https://leetcode.com/terms/
+
+
+
+#### 脚本原作者
+
+Python脚本是在网上教程的基础上进行二改得到的，原版地址：https://blog.csdn.net/weixin_37267014/article/details/81429057
+
+
+
+#### 其他
+
+由于脚本运行时会向力扣网站发出大量请求，所以请大家不要随便尝试此脚本，以免影响力扣网站正常运行。
+
+因为使用此脚本所造成的一系列问题，责任由您自己承担，作者不承担相应责任。
+
diff --git a/leetcode-cn.py b/leetcode-cn.py
index 4e4968bb..8fe10a43 100644
--- a/leetcode-cn.py
+++ b/leetcode-cn.py
@@ -8,9 +8,15 @@ import requests
 from requests.exceptions import RequestException
 from bs4 import BeautifulSoup
 
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
 def get_proble_set(url):
     try:
-        response = requests.get(url)
+        # response = requests.get(url)
+        response = requests.get(url, headers = {
+            'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
+        }, verify=False)
         if response.status_code == 200:
             return response.text
         return None
@@ -34,9 +40,9 @@ def parse_proble_set(problemSet):
         continue
 
 def construct_url(problemTitle):
-    url = "https://leetcode.cn/problems/"+ problemTitle + "/"
+    url = "https://leetcode.cn/problems/" + problemTitle + "/"
     # print(url)
-    get_proble_content(url,problemTitle)
+    get_proble_content(url, problemTitle)
 
 def save_problem(title,content, editorType = ""):
     #content = bytes(content,encoding = 'utf8')
@@ -118,7 +124,11 @@ def saveJSON(data, filename):
 
 def main():
     url = "https://leetcode.cn/api/problems/all/"
-    html = json.loads(get_proble_set(url))
+    jsonContent = get_proble_set(url)
+    if jsonContent == None:
+        print('列表请求失败！')
+        return
+    html = json.loads(jsonContent)
     saveJSON(html, "origin-data.json")
 
     # html = json.load(open("origin-data.json", 'r', encoding='utf-8'))
diff --git a/leetcode.py b/leetcode.py
index 78469546..3039d3c8 100644
--- a/leetcode.py
+++ b/leetcode.py
@@ -8,10 +8,13 @@ import requests
 from requests.exceptions import RequestException
 from bs4 import BeautifulSoup
 
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
 def get_proble_set(url):
     try:
         response = requests.get(url, headers = {
-            'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32"
+            'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
         }, verify=False)
         if response.status_code == 200:
             return response.text
@@ -28,18 +31,23 @@ def parse_proble_set(problemSet):
             print(i, "has been parsed.")
             # print("The question has been parsed: {}".format(title))
             continue
+        elif "paid_only" in problemSet[i]:
+            paid_only = problemSet[i]["paid_only"]
+            if paid_only:
+                print(i, '付费题目，跳过')
+                continue
         #construct_url(title)
         # time.sleep(0.5)
         time.sleep(1)
-        t =threading.Thread(target=construct_url,args=(title,))
+        t = threading.Thread(target=construct_url, args=(title,))
         t.start()
         print(i, "is done.")
         continue
 
 def construct_url(problemTitle):
-    url = "https://leetcode.com/problems/"+ problemTitle + "/description/"
+    url = "https://leetcode.com/problems/" + problemTitle + "/description/"
     # print(url)
-    get_proble_content(url,problemTitle)
+    get_proble_content(url, problemTitle)
 
 def save_problem(title,content):
     #content = bytes(content,encoding = 'utf8')
@@ -49,8 +57,8 @@ def save_problem(title,content):
 
 def get_proble_content(problemUrl,title):
     response = requests.get(problemUrl, headers = {
-        'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36"
-    })
+        'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
+    }, verify=False)
     setCookie = response.headers["Set-Cookie"]
     '''
     print(setCookie)
@@ -58,7 +66,7 @@ def get_proble_content(problemUrl,title):
     print(type(setCookie))
     '''
     try:
-        pattern = re.compile("csrftoken=(.*?);.*?",re.S)
+        pattern = re.compile("__cf_bm=(.*?);.*?",re.S)
         csrftoken = re.search(pattern, setCookie)
         url = "https://leetcode.com/graphql"
         data = {
@@ -105,7 +113,11 @@ def saveJSON(data, filename):
 
 def main():
     url = "https://leetcode.com/api/problems/all/"
-    html = json.loads(get_proble_set(url))
+    jsonContent = get_proble_set(url)
+    if jsonContent == None:
+        print('列表请求失败！')
+        return
+    html = json.loads(jsonContent)
     saveJSON(html, "origin-data.json")
 
     # html = json.load(open("origin-data.json", 'r', encoding='utf-8'))