1
0
mirror of https://gitee.com/coder-xiaomo/leetcode-problemset synced 2025-01-10 18:48:13 +08:00
Code Issues Projects Releases Wiki Activity GitHub Gitee

忽略https警告;国外版力扣题库拉取失败问题修复;其他小问题修复

This commit is contained in:
zhangbk1 2024-07-16 16:02:46 +08:00
parent b1fc2c627d
commit 847e599aec
3 changed files with 80 additions and 58 deletions

View File

@ -22,7 +22,7 @@
**所有版权都为 LeetCode (及力扣中国) 官方所有,此处仅供学习使用,不要他用。也请大家不要滥用,不要侵犯力扣平台的合法权益。** **所有版权都为 LeetCode (及力扣中国) 官方所有,此处仅供学习使用,不要他用。也请大家不要滥用,不要侵犯力扣平台的合法权益。**
**感谢 LeetCode 平台为我们提供大量的算法题目进行练习与提升。如果大家经济条件允许,请大家多多支持力扣,例如冲冲会员等。** **感谢 LeetCode 平台为我们提供大量的算法题目进行练习与提升。如果大家经济条件允许,请大家多多支持力扣,例如充充会员等。**
力扣题库的权益归属力扣,使用力扣题库,需要遵循力扣使用条例,若您不同意此条例,请立即关闭当前网页,不要继续使用本题库。 力扣题库的权益归属力扣,使用力扣题库,需要遵循力扣使用条例,若您不同意此条例,请立即关闭当前网页,不要继续使用本题库。

View File

@ -8,9 +8,15 @@ import requests
from requests.exceptions import RequestException from requests.exceptions import RequestException
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def get_proble_set(url): def get_proble_set(url):
try: try:
response = requests.get(url) # response = requests.get(url)
response = requests.get(url, headers = {
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
}, verify=False)
if response.status_code == 200: if response.status_code == 200:
return response.text return response.text
return None return None
@ -118,7 +124,11 @@ def saveJSON(data, filename):
def main(): def main():
url = "https://leetcode.cn/api/problems/all/" url = "https://leetcode.cn/api/problems/all/"
html = json.loads(get_proble_set(url)) jsonContent = get_proble_set(url)
if jsonContent == None:
print('列表请求失败!')
return
html = json.loads(jsonContent)
saveJSON(html, "origin-data.json") saveJSON(html, "origin-data.json")
# html = json.load(open("origin-data.json", 'r', encoding='utf-8')) # html = json.load(open("origin-data.json", 'r', encoding='utf-8'))

View File

@ -8,10 +8,13 @@ import requests
from requests.exceptions import RequestException from requests.exceptions import RequestException
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def get_proble_set(url): def get_proble_set(url):
try: try:
response = requests.get(url, headers = { response = requests.get(url, headers = {
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32" 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
}, verify=False) }, verify=False)
if response.status_code == 200: if response.status_code == 200:
return response.text return response.text
@ -28,6 +31,11 @@ def parse_proble_set(problemSet):
print(i, "has been parsed.") print(i, "has been parsed.")
# print("The question has been parsed: {}".format(title)) # print("The question has been parsed: {}".format(title))
continue continue
elif "paid_only" in problemSet[i]:
paid_only = problemSet[i]["paid_only"]
if paid_only:
print(i, '付费题目,跳过')
continue
#construct_url(title) #construct_url(title)
# time.sleep(0.5) # time.sleep(0.5)
time.sleep(1) time.sleep(1)
@ -49,8 +57,8 @@ def save_problem(title,content):
def get_proble_content(problemUrl,title): def get_proble_content(problemUrl,title):
response = requests.get(problemUrl, headers = { response = requests.get(problemUrl, headers = {
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36" 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
}) }, verify=False)
setCookie = response.headers["Set-Cookie"] setCookie = response.headers["Set-Cookie"]
''' '''
print(setCookie) print(setCookie)
@ -58,7 +66,7 @@ def get_proble_content(problemUrl,title):
print(type(setCookie)) print(type(setCookie))
''' '''
try: try:
pattern = re.compile("csrftoken=(.*?);.*?",re.S) pattern = re.compile("__cf_bm=(.*?);.*?",re.S)
csrftoken = re.search(pattern, setCookie) csrftoken = re.search(pattern, setCookie)
url = "https://leetcode.com/graphql" url = "https://leetcode.com/graphql"
data = { data = {
@ -105,7 +113,11 @@ def saveJSON(data, filename):
def main(): def main():
url = "https://leetcode.com/api/problems/all/" url = "https://leetcode.com/api/problems/all/"
html = json.loads(get_proble_set(url)) jsonContent = get_proble_set(url)
if jsonContent == None:
print('列表请求失败!')
return
html = json.loads(jsonContent)
saveJSON(html, "origin-data.json") saveJSON(html, "origin-data.json")
# html = json.load(open("origin-data.json", 'r', encoding='utf-8')) # html = json.load(open("origin-data.json", 'r', encoding='utf-8'))