add time.sleep > 10

This commit is contained in:
duxin 2026-01-20 15:42:47 +08:00
parent 59bfefff0e
commit 7285475eb5

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python3 t#!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import requests import requests
from typing import Dict, Any, List, Optional, Tuple from typing import Dict, Any, List, Optional, Tuple
@ -22,19 +22,22 @@ logger.add("logs/log_{time:YYYY-MM-DD}.log", level="INFO", rotation="00:00", ret
def sleep_random_between() -> float: def sleep_random_between() -> float:
""" """
执行统一的随机延时 执行统一的随机延时至少10秒以上减少风控触发
Returns: Returns:
float: 实际休眠的秒数 float: 实际休眠的秒数
""" """
try: try:
min_seconds = float(os.getenv('SLEEP_MIN_SECONDS', '1')) min_seconds = float(os.getenv('SLEEP_MIN_SECONDS', '10'))
max_seconds = float(os.getenv('SLEEP_MAX_SECONDS', '10')) max_seconds = float(os.getenv('SLEEP_MAX_SECONDS', '20'))
# 确保最小值至少为10秒
if min_seconds < 10:
min_seconds = 10
if max_seconds < min_seconds: if max_seconds < min_seconds:
max_seconds = min_seconds max_seconds = min_seconds + 10
wait_time = random.uniform(min_seconds, max_seconds) wait_time = random.uniform(min_seconds, max_seconds)
except Exception: except Exception:
wait_time = 1.0 wait_time = 10.0
time.sleep(wait_time) time.sleep(wait_time)
return wait_time return wait_time
@ -429,8 +432,8 @@ class BossZhipinAPI:
for retry_count in range(max_retries): for retry_count in range(max_retries):
print(f"⏳ 第 {retry_count + 1}/{max_retries} 次重试...") print(f"⏳ 第 {retry_count + 1}/{max_retries} 次重试...")
# 等待时间递增:5秒、10秒、15 # 等待时间递增:至少10秒然后15秒、20
wait_time = (retry_count + 1) * 5 wait_time = max(10, (retry_count + 1) * 5 + 5)
print(f"⏰ 等待 {wait_time} 秒后重试让隧道代理切换IP...") print(f"⏰ 等待 {wait_time} 秒后重试让隧道代理切换IP...")
time.sleep(wait_time) time.sleep(wait_time)
@ -531,7 +534,8 @@ class BossZhipinAPI:
max_retries = 3 max_retries = 3
for retry_count in range(1, max_retries + 1): for retry_count in range(1, max_retries + 1):
wait_time = retry_count * 5 # 5秒、10秒、15秒递增等待 # 等待时间递增至少10秒然后15秒、20秒
wait_time = max(10, retry_count * 5 + 5)
print(f"⏳ 第 {retry_count} 次重试,等待 {wait_time} 秒...") print(f"⏳ 第 {retry_count} 次重试,等待 {wait_time} 秒...")
time.sleep(wait_time) time.sleep(wait_time)
@ -574,7 +578,8 @@ class BossZhipinAPI:
max_retries = 3 max_retries = 3
for retry_count in range(1, max_retries + 1): for retry_count in range(1, max_retries + 1):
wait_time = retry_count * 5 # 5秒、10秒、15秒递增等待 # 等待时间递增至少10秒然后15秒、20秒
wait_time = max(10, retry_count * 5 + 5)
logger.info(f"⏳ 第 {retry_count} 次重试,等待 {wait_time} 秒...") logger.info(f"⏳ 第 {retry_count} 次重试,等待 {wait_time} 秒...")
time.sleep(wait_time) time.sleep(wait_time)
@ -944,9 +949,8 @@ class BossZhipinAPI:
self.reinit_session(cfg) self.reinit_session(cfg)
self.init_cookies() self.init_cookies()
logger.info("IP_SWITCH mode={} cfg={}", mode, cfg) logger.info("IP_SWITCH mode={} cfg={}", mode, cfg)
wait_time = 5 wait_time = sleep_random_between()
logger.info(f"⏳ IP异常等待 {wait_time} 秒后重试 (批量GET)") logger.info(f"⏳ IP异常等待 {int(wait_time)} 秒后重试 (批量GET)")
time.sleep(wait_time)
headers = self.build_request_headers({ headers = self.build_request_headers({
"Referer": "https://www.zhipin.com/web/geek/job", "Referer": "https://www.zhipin.com/web/geek/job",
"User-Agent": self.get_random_user_agent() "User-Agent": self.get_random_user_agent()
@ -963,9 +967,8 @@ class BossZhipinAPI:
return None return None
result = response.json() result = response.json()
if self.handle_ip_abnormal_response(result): if self.handle_ip_abnormal_response(result):
wait_time = 5 wait_time = sleep_random_between()
logger.info(f"⏳ IP异常等待 {wait_time} 秒后重试 (批量GET)") logger.info(f"⏳ IP异常等待 {int(wait_time)} 秒后重试 (批量GET)")
time.sleep(wait_time)
headers = self.build_request_headers({ headers = self.build_request_headers({
"Referer": "https://www.zhipin.com/web/geek/job", "Referer": "https://www.zhipin.com/web/geek/job",
"User-Agent": self.get_random_user_agent() "User-Agent": self.get_random_user_agent()
@ -986,6 +989,9 @@ class BossZhipinAPI:
if hasattr(self, 'local_mode') and self.local_mode: if hasattr(self, 'local_mode') and self.local_mode:
self.local_success_count += 1 self.local_success_count += 1
self.try_restore_proxy() self.try_restore_proxy()
# 请求成功后也添加短暂休眠,进一步减少风控
post_wait_time = random.uniform(2, 5)
time.sleep(post_wait_time)
return result return result
else: else:
print(f"❌ 批量请求失败: {response.status_code}") print(f"❌ 批量请求失败: {response.status_code}")
@ -1028,8 +1034,9 @@ class BossZhipinAPI:
print("❌ 会话初始化失败") print("❌ 会话初始化失败")
return None return None
else: else:
print("⏱️ 连续请求等待2秒...") print("⏱️ 连续请求等待至少10秒...")
time.sleep(2) wait_time = sleep_random_between()
print(f"⏰ 已等待 {int(wait_time)}")
search_params = { search_params = {
'pageSize': params.get('pageSize', 15), 'pageSize': params.get('pageSize', 15),
@ -1183,8 +1190,9 @@ class BossZhipinAPI:
print(f"⏹ 接口返回 hasMore = False在第 {current_page} 页停止翻页") print(f"⏹ 接口返回 hasMore = False在第 {current_page} 页停止翻页")
break break
print("⏱️ 等待3秒后获取下一页...") print("⏱️ 等待至少10秒后获取下一页...")
time.sleep(3) wait_time = sleep_random_between()
print(f"⏰ 已等待 {int(wait_time)}")
current_page += 1 current_page += 1
print(f"\n🎉 处理完成,总共处理 {processed_count} 条职位数据") print(f"\n🎉 处理完成,总共处理 {processed_count} 条职位数据")
@ -1386,9 +1394,8 @@ class BossZhipinAPI:
data = response.json() data = response.json()
logger.info("RAW_RESPONSE method={} url={} status={} resp_size={}", "POST", "https://www.zhipin.com/wapi/batch/requests", response.status_code, len(response.content)) logger.info("RAW_RESPONSE method={} url={} status={} resp_size={}", "POST", "https://www.zhipin.com/wapi/batch/requests", response.status_code, len(response.content))
if self.handle_ip_abnormal_response(data): if self.handle_ip_abnormal_response(data):
wait_time = 5 wait_time = sleep_random_between()
logger.info(f"⏳ IP异常等待 {wait_time} 秒后重试 (批量POST)") logger.info(f"⏳ IP异常等待 {int(wait_time)} 秒后重试 (批量POST)")
time.sleep(wait_time)
self.init_cookies() self.init_cookies()
headers = self.build_request_headers({ headers = self.build_request_headers({
"Content-Type": "application/json", "Content-Type": "application/json",
@ -1403,6 +1410,9 @@ class BossZhipinAPI:
) )
response.raise_for_status() response.raise_for_status()
data = response.json() data = response.json()
# 请求成功后也添加短暂休眠,进一步减少风控
post_wait_time = random.uniform(2, 5)
time.sleep(post_wait_time)
return data return data
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
print(f"请求失败: {str(e)}") print(f"请求失败: {str(e)}")
@ -1528,8 +1538,8 @@ class BossZhipinAPI:
# 检查是否需要处理反爬虫 # 检查是否需要处理反爬虫
if self.handle_anti_bot_response(result): if self.handle_anti_bot_response(result):
print("🔄 批量请求检测到安全验证等待3秒后重试...") wait_time = sleep_random_between()
time.sleep(3) print(f"🔄 批量请求检测到安全验证,等待 {int(wait_time)} 秒后重试...")
# 重新构建请求头 # 重新构建请求头
headers = self.build_request_headers({ headers = self.build_request_headers({
@ -1560,6 +1570,9 @@ class BossZhipinAPI:
mapped_results[request_key] = response_data mapped_results[request_key] = response_data
self.ip_manager.mark_success() self.ip_manager.mark_success()
# 请求成功后也添加短暂休眠,进一步减少风控
post_wait_time = random.uniform(2, 5)
time.sleep(post_wait_time)
return mapped_results return mapped_results
else: else:
print(f"批量请求失败: {result.get('message', '未知错误')}") print(f"批量请求失败: {result.get('message', '未知错误')}")
@ -1698,8 +1711,8 @@ class BossZhipinAPI:
# 检查是否需要处理反爬虫 # 检查是否需要处理反爬虫
if self.handle_anti_bot_response(result): if self.handle_anti_bot_response(result):
print(f"🔄 检测到安全验证等待3秒后重试... (尝试 {attempt + 1}/{max_retries})") wait_time = sleep_random_between()
time.sleep(3) print(f"🔄 检测到安全验证,等待 {int(wait_time)} 秒后重试... (尝试 {attempt + 1}/{max_retries})")
# 在重试前更新一些请求头 # 在重试前更新一些请求头
default_custom_headers.update({ default_custom_headers.update({
'User-Agent': self.get_random_user_agent(), 'User-Agent': self.get_random_user_agent(),
@ -1732,7 +1745,8 @@ class BossZhipinAPI:
self.reinit_session() self.reinit_session()
self.init_cookies() self.init_cookies()
logger.info("IP_SWITCH mode={} cfg={}", "local", None) logger.info("IP_SWITCH mode={} cfg={}", "local", None)
time.sleep(1) wait_time = sleep_random_between()
logger.info(f"⏳ IP切换后等待 {int(wait_time)}")
continue continue
else: else:
self.ip_manager.mark_failure(reason) self.ip_manager.mark_failure(reason)
@ -1752,6 +1766,9 @@ class BossZhipinAPI:
self.local_success_count += 1 self.local_success_count += 1
self.local_fail_count = 0 self.local_fail_count = 0
self.try_restore_proxy() self.try_restore_proxy()
# 请求成功后也添加短暂休眠,进一步减少风控
post_wait_time = random.uniform(2, 5)
time.sleep(post_wait_time)
return result return result
except requests.RequestException as e: except requests.RequestException as e:
@ -1774,7 +1791,8 @@ class BossZhipinAPI:
logger.info("IP_SWITCH mode={} cfg={}", mode, cfg) logger.info("IP_SWITCH mode={} cfg={}", mode, cfg)
if attempt < max_retries - 1: if attempt < max_retries - 1:
wait_time = min(2 ** attempt, 10) # 最大等待10秒 # 确保至少等待10秒重试时递增等待时间
wait_time = max(10, min(2 ** attempt * 5, 30)) # 至少10秒最大30秒
print(f"⏳ 等待{wait_time}秒后重试...") print(f"⏳ 等待{wait_time}秒后重试...")
time.sleep(wait_time) time.sleep(wait_time)
else: else:
@ -2175,9 +2193,8 @@ if __name__ == "__main__":
self_reinit_ok = True self_reinit_ok = True
except Exception: except Exception:
pass pass
wait_time = 5 wait_time = sleep_random_between()
print(f" ⏳ 等待 {wait_time} 秒后继续...{'(已重建会话)' if self_reinit_ok else ''}") print(f" ⏳ 等待 {int(wait_time)} 秒后继续...{'(已重建会话)' if self_reinit_ok else ''}")
time.sleep(wait_time)
except Exception as e: except Exception as e:
print(f" ⚠️ 处理职位详情异常: {e}") print(f" ⚠️ 处理职位详情异常: {e}")