add time.sleep > 10
This commit is contained in:
parent
59bfefff0e
commit
7285475eb5
@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/env python3
|
t#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
import requests
|
import requests
|
||||||
from typing import Dict, Any, List, Optional, Tuple
|
from typing import Dict, Any, List, Optional, Tuple
|
||||||
@ -22,19 +22,22 @@ logger.add("logs/log_{time:YYYY-MM-DD}.log", level="INFO", rotation="00:00", ret
|
|||||||
|
|
||||||
def sleep_random_between() -> float:
|
def sleep_random_between() -> float:
|
||||||
"""
|
"""
|
||||||
执行统一的随机延时
|
执行统一的随机延时(至少10秒以上,减少风控触发)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
float: 实际休眠的秒数
|
float: 实际休眠的秒数
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
min_seconds = float(os.getenv('SLEEP_MIN_SECONDS', '1'))
|
min_seconds = float(os.getenv('SLEEP_MIN_SECONDS', '10'))
|
||||||
max_seconds = float(os.getenv('SLEEP_MAX_SECONDS', '10'))
|
max_seconds = float(os.getenv('SLEEP_MAX_SECONDS', '20'))
|
||||||
|
# 确保最小值至少为10秒
|
||||||
|
if min_seconds < 10:
|
||||||
|
min_seconds = 10
|
||||||
if max_seconds < min_seconds:
|
if max_seconds < min_seconds:
|
||||||
max_seconds = min_seconds
|
max_seconds = min_seconds + 10
|
||||||
wait_time = random.uniform(min_seconds, max_seconds)
|
wait_time = random.uniform(min_seconds, max_seconds)
|
||||||
except Exception:
|
except Exception:
|
||||||
wait_time = 1.0
|
wait_time = 10.0
|
||||||
time.sleep(wait_time)
|
time.sleep(wait_time)
|
||||||
return wait_time
|
return wait_time
|
||||||
|
|
||||||
@ -429,8 +432,8 @@ class BossZhipinAPI:
|
|||||||
for retry_count in range(max_retries):
|
for retry_count in range(max_retries):
|
||||||
print(f"⏳ 第 {retry_count + 1}/{max_retries} 次重试...")
|
print(f"⏳ 第 {retry_count + 1}/{max_retries} 次重试...")
|
||||||
|
|
||||||
# 等待时间递增:5秒、10秒、15秒
|
# 等待时间递增:至少10秒,然后15秒、20秒
|
||||||
wait_time = (retry_count + 1) * 5
|
wait_time = max(10, (retry_count + 1) * 5 + 5)
|
||||||
print(f"⏰ 等待 {wait_time} 秒后重试(让隧道代理切换IP)...")
|
print(f"⏰ 等待 {wait_time} 秒后重试(让隧道代理切换IP)...")
|
||||||
time.sleep(wait_time)
|
time.sleep(wait_time)
|
||||||
|
|
||||||
@ -531,7 +534,8 @@ class BossZhipinAPI:
|
|||||||
max_retries = 3
|
max_retries = 3
|
||||||
|
|
||||||
for retry_count in range(1, max_retries + 1):
|
for retry_count in range(1, max_retries + 1):
|
||||||
wait_time = retry_count * 5 # 5秒、10秒、15秒递增等待
|
# 等待时间递增:至少10秒,然后15秒、20秒
|
||||||
|
wait_time = max(10, retry_count * 5 + 5)
|
||||||
print(f"⏳ 第 {retry_count} 次重试,等待 {wait_time} 秒...")
|
print(f"⏳ 第 {retry_count} 次重试,等待 {wait_time} 秒...")
|
||||||
time.sleep(wait_time)
|
time.sleep(wait_time)
|
||||||
|
|
||||||
@ -574,7 +578,8 @@ class BossZhipinAPI:
|
|||||||
max_retries = 3
|
max_retries = 3
|
||||||
|
|
||||||
for retry_count in range(1, max_retries + 1):
|
for retry_count in range(1, max_retries + 1):
|
||||||
wait_time = retry_count * 5 # 5秒、10秒、15秒递增等待
|
# 等待时间递增:至少10秒,然后15秒、20秒
|
||||||
|
wait_time = max(10, retry_count * 5 + 5)
|
||||||
logger.info(f"⏳ 第 {retry_count} 次重试,等待 {wait_time} 秒...")
|
logger.info(f"⏳ 第 {retry_count} 次重试,等待 {wait_time} 秒...")
|
||||||
time.sleep(wait_time)
|
time.sleep(wait_time)
|
||||||
|
|
||||||
@ -944,9 +949,8 @@ class BossZhipinAPI:
|
|||||||
self.reinit_session(cfg)
|
self.reinit_session(cfg)
|
||||||
self.init_cookies()
|
self.init_cookies()
|
||||||
logger.info("IP_SWITCH mode={} cfg={}", mode, cfg)
|
logger.info("IP_SWITCH mode={} cfg={}", mode, cfg)
|
||||||
wait_time = 5
|
wait_time = sleep_random_between()
|
||||||
logger.info(f"⏳ IP异常,等待 {wait_time} 秒后重试 (批量GET)")
|
logger.info(f"⏳ IP异常,等待 {int(wait_time)} 秒后重试 (批量GET)")
|
||||||
time.sleep(wait_time)
|
|
||||||
headers = self.build_request_headers({
|
headers = self.build_request_headers({
|
||||||
"Referer": "https://www.zhipin.com/web/geek/job",
|
"Referer": "https://www.zhipin.com/web/geek/job",
|
||||||
"User-Agent": self.get_random_user_agent()
|
"User-Agent": self.get_random_user_agent()
|
||||||
@ -963,9 +967,8 @@ class BossZhipinAPI:
|
|||||||
return None
|
return None
|
||||||
result = response.json()
|
result = response.json()
|
||||||
if self.handle_ip_abnormal_response(result):
|
if self.handle_ip_abnormal_response(result):
|
||||||
wait_time = 5
|
wait_time = sleep_random_between()
|
||||||
logger.info(f"⏳ IP异常,等待 {wait_time} 秒后重试 (批量GET)")
|
logger.info(f"⏳ IP异常,等待 {int(wait_time)} 秒后重试 (批量GET)")
|
||||||
time.sleep(wait_time)
|
|
||||||
headers = self.build_request_headers({
|
headers = self.build_request_headers({
|
||||||
"Referer": "https://www.zhipin.com/web/geek/job",
|
"Referer": "https://www.zhipin.com/web/geek/job",
|
||||||
"User-Agent": self.get_random_user_agent()
|
"User-Agent": self.get_random_user_agent()
|
||||||
@ -986,6 +989,9 @@ class BossZhipinAPI:
|
|||||||
if hasattr(self, 'local_mode') and self.local_mode:
|
if hasattr(self, 'local_mode') and self.local_mode:
|
||||||
self.local_success_count += 1
|
self.local_success_count += 1
|
||||||
self.try_restore_proxy()
|
self.try_restore_proxy()
|
||||||
|
# 请求成功后也添加短暂休眠,进一步减少风控
|
||||||
|
post_wait_time = random.uniform(2, 5)
|
||||||
|
time.sleep(post_wait_time)
|
||||||
return result
|
return result
|
||||||
else:
|
else:
|
||||||
print(f"❌ 批量请求失败: {response.status_code}")
|
print(f"❌ 批量请求失败: {response.status_code}")
|
||||||
@ -1028,8 +1034,9 @@ class BossZhipinAPI:
|
|||||||
print("❌ 会话初始化失败")
|
print("❌ 会话初始化失败")
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
print("⏱️ 连续请求,等待2秒...")
|
print("⏱️ 连续请求,等待至少10秒...")
|
||||||
time.sleep(2)
|
wait_time = sleep_random_between()
|
||||||
|
print(f"⏰ 已等待 {int(wait_time)} 秒")
|
||||||
|
|
||||||
search_params = {
|
search_params = {
|
||||||
'pageSize': params.get('pageSize', 15),
|
'pageSize': params.get('pageSize', 15),
|
||||||
@ -1183,8 +1190,9 @@ class BossZhipinAPI:
|
|||||||
print(f"⏹ 接口返回 hasMore = False,在第 {current_page} 页停止翻页")
|
print(f"⏹ 接口返回 hasMore = False,在第 {current_page} 页停止翻页")
|
||||||
break
|
break
|
||||||
|
|
||||||
print("⏱️ 等待3秒后获取下一页...")
|
print("⏱️ 等待至少10秒后获取下一页...")
|
||||||
time.sleep(3)
|
wait_time = sleep_random_between()
|
||||||
|
print(f"⏰ 已等待 {int(wait_time)} 秒")
|
||||||
current_page += 1
|
current_page += 1
|
||||||
|
|
||||||
print(f"\n🎉 处理完成,总共处理 {processed_count} 条职位数据")
|
print(f"\n🎉 处理完成,总共处理 {processed_count} 条职位数据")
|
||||||
@ -1386,9 +1394,8 @@ class BossZhipinAPI:
|
|||||||
data = response.json()
|
data = response.json()
|
||||||
logger.info("RAW_RESPONSE method={} url={} status={} resp_size={}", "POST", "https://www.zhipin.com/wapi/batch/requests", response.status_code, len(response.content))
|
logger.info("RAW_RESPONSE method={} url={} status={} resp_size={}", "POST", "https://www.zhipin.com/wapi/batch/requests", response.status_code, len(response.content))
|
||||||
if self.handle_ip_abnormal_response(data):
|
if self.handle_ip_abnormal_response(data):
|
||||||
wait_time = 5
|
wait_time = sleep_random_between()
|
||||||
logger.info(f"⏳ IP异常,等待 {wait_time} 秒后重试 (批量POST)")
|
logger.info(f"⏳ IP异常,等待 {int(wait_time)} 秒后重试 (批量POST)")
|
||||||
time.sleep(wait_time)
|
|
||||||
self.init_cookies()
|
self.init_cookies()
|
||||||
headers = self.build_request_headers({
|
headers = self.build_request_headers({
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
@ -1403,6 +1410,9 @@ class BossZhipinAPI:
|
|||||||
)
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
data = response.json()
|
data = response.json()
|
||||||
|
# 请求成功后也添加短暂休眠,进一步减少风控
|
||||||
|
post_wait_time = random.uniform(2, 5)
|
||||||
|
time.sleep(post_wait_time)
|
||||||
return data
|
return data
|
||||||
except requests.exceptions.RequestException as e:
|
except requests.exceptions.RequestException as e:
|
||||||
print(f"请求失败: {str(e)}")
|
print(f"请求失败: {str(e)}")
|
||||||
@ -1528,8 +1538,8 @@ class BossZhipinAPI:
|
|||||||
|
|
||||||
# 检查是否需要处理反爬虫
|
# 检查是否需要处理反爬虫
|
||||||
if self.handle_anti_bot_response(result):
|
if self.handle_anti_bot_response(result):
|
||||||
print("🔄 批量请求检测到安全验证,等待3秒后重试...")
|
wait_time = sleep_random_between()
|
||||||
time.sleep(3)
|
print(f"🔄 批量请求检测到安全验证,等待 {int(wait_time)} 秒后重试...")
|
||||||
|
|
||||||
# 重新构建请求头
|
# 重新构建请求头
|
||||||
headers = self.build_request_headers({
|
headers = self.build_request_headers({
|
||||||
@ -1560,6 +1570,9 @@ class BossZhipinAPI:
|
|||||||
mapped_results[request_key] = response_data
|
mapped_results[request_key] = response_data
|
||||||
|
|
||||||
self.ip_manager.mark_success()
|
self.ip_manager.mark_success()
|
||||||
|
# 请求成功后也添加短暂休眠,进一步减少风控
|
||||||
|
post_wait_time = random.uniform(2, 5)
|
||||||
|
time.sleep(post_wait_time)
|
||||||
return mapped_results
|
return mapped_results
|
||||||
else:
|
else:
|
||||||
print(f"批量请求失败: {result.get('message', '未知错误')}")
|
print(f"批量请求失败: {result.get('message', '未知错误')}")
|
||||||
@ -1698,8 +1711,8 @@ class BossZhipinAPI:
|
|||||||
|
|
||||||
# 检查是否需要处理反爬虫
|
# 检查是否需要处理反爬虫
|
||||||
if self.handle_anti_bot_response(result):
|
if self.handle_anti_bot_response(result):
|
||||||
print(f"🔄 检测到安全验证,等待3秒后重试... (尝试 {attempt + 1}/{max_retries})")
|
wait_time = sleep_random_between()
|
||||||
time.sleep(3)
|
print(f"🔄 检测到安全验证,等待 {int(wait_time)} 秒后重试... (尝试 {attempt + 1}/{max_retries})")
|
||||||
# 在重试前更新一些请求头
|
# 在重试前更新一些请求头
|
||||||
default_custom_headers.update({
|
default_custom_headers.update({
|
||||||
'User-Agent': self.get_random_user_agent(),
|
'User-Agent': self.get_random_user_agent(),
|
||||||
@ -1732,7 +1745,8 @@ class BossZhipinAPI:
|
|||||||
self.reinit_session()
|
self.reinit_session()
|
||||||
self.init_cookies()
|
self.init_cookies()
|
||||||
logger.info("IP_SWITCH mode={} cfg={}", "local", None)
|
logger.info("IP_SWITCH mode={} cfg={}", "local", None)
|
||||||
time.sleep(1)
|
wait_time = sleep_random_between()
|
||||||
|
logger.info(f"⏳ IP切换后等待 {int(wait_time)} 秒")
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
self.ip_manager.mark_failure(reason)
|
self.ip_manager.mark_failure(reason)
|
||||||
@ -1752,6 +1766,9 @@ class BossZhipinAPI:
|
|||||||
self.local_success_count += 1
|
self.local_success_count += 1
|
||||||
self.local_fail_count = 0
|
self.local_fail_count = 0
|
||||||
self.try_restore_proxy()
|
self.try_restore_proxy()
|
||||||
|
# 请求成功后也添加短暂休眠,进一步减少风控
|
||||||
|
post_wait_time = random.uniform(2, 5)
|
||||||
|
time.sleep(post_wait_time)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
except requests.RequestException as e:
|
except requests.RequestException as e:
|
||||||
@ -1774,7 +1791,8 @@ class BossZhipinAPI:
|
|||||||
logger.info("IP_SWITCH mode={} cfg={}", mode, cfg)
|
logger.info("IP_SWITCH mode={} cfg={}", mode, cfg)
|
||||||
|
|
||||||
if attempt < max_retries - 1:
|
if attempt < max_retries - 1:
|
||||||
wait_time = min(2 ** attempt, 10) # 最大等待10秒
|
# 确保至少等待10秒,重试时递增等待时间
|
||||||
|
wait_time = max(10, min(2 ** attempt * 5, 30)) # 至少10秒,最大30秒
|
||||||
print(f"⏳ 等待{wait_time}秒后重试...")
|
print(f"⏳ 等待{wait_time}秒后重试...")
|
||||||
time.sleep(wait_time)
|
time.sleep(wait_time)
|
||||||
else:
|
else:
|
||||||
@ -2175,9 +2193,8 @@ if __name__ == "__main__":
|
|||||||
self_reinit_ok = True
|
self_reinit_ok = True
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
wait_time = 5
|
wait_time = sleep_random_between()
|
||||||
print(f" ⏳ 等待 {wait_time} 秒后继续...{'(已重建会话)' if self_reinit_ok else ''}")
|
print(f" ⏳ 等待 {int(wait_time)} 秒后继续...{'(已重建会话)' if self_reinit_ok else ''}")
|
||||||
time.sleep(wait_time)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f" ⚠️ 处理职位详情异常: {e}")
|
print(f" ⚠️ 处理职位详情异常: {e}")
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user