From 7285475eb5d2bc7df414a61522c677ef60a1808f Mon Sep 17 00:00:00 2001 From: duxin Date: Tue, 20 Jan 2026 15:42:47 +0800 Subject: [PATCH] add time.sleep > 10 --- jobs_spider/boss/boos_api.py | 81 ++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 32 deletions(-) diff --git a/jobs_spider/boss/boos_api.py b/jobs_spider/boss/boos_api.py index 7da547c..fac08eb 100644 --- a/jobs_spider/boss/boos_api.py +++ b/jobs_spider/boss/boos_api.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +t#!/usr/bin/env python3 # -*- coding: utf-8 -*- import requests from typing import Dict, Any, List, Optional, Tuple @@ -22,19 +22,22 @@ logger.add("logs/log_{time:YYYY-MM-DD}.log", level="INFO", rotation="00:00", ret def sleep_random_between() -> float: """ - 执行统一的随机延时 + 执行统一的随机延时(至少10秒以上,减少风控触发) Returns: float: 实际休眠的秒数 """ try: - min_seconds = float(os.getenv('SLEEP_MIN_SECONDS', '1')) - max_seconds = float(os.getenv('SLEEP_MAX_SECONDS', '10')) + min_seconds = float(os.getenv('SLEEP_MIN_SECONDS', '10')) + max_seconds = float(os.getenv('SLEEP_MAX_SECONDS', '20')) + # 确保最小值至少为10秒 + if min_seconds < 10: + min_seconds = 10 if max_seconds < min_seconds: - max_seconds = min_seconds + max_seconds = min_seconds + 10 wait_time = random.uniform(min_seconds, max_seconds) except Exception: - wait_time = 1.0 + wait_time = 10.0 time.sleep(wait_time) return wait_time @@ -429,8 +432,8 @@ class BossZhipinAPI: for retry_count in range(max_retries): print(f"⏳ 第 {retry_count + 1}/{max_retries} 次重试...") - # 等待时间递增:5秒、10秒、15秒 - wait_time = (retry_count + 1) * 5 + # 等待时间递增:至少10秒,然后15秒、20秒 + wait_time = max(10, (retry_count + 1) * 5 + 5) print(f"⏰ 等待 {wait_time} 秒后重试(让隧道代理切换IP)...") time.sleep(wait_time) @@ -531,7 +534,8 @@ class BossZhipinAPI: max_retries = 3 for retry_count in range(1, max_retries + 1): - wait_time = retry_count * 5 # 5秒、10秒、15秒递增等待 + # 等待时间递增:至少10秒,然后15秒、20秒 + wait_time = max(10, retry_count * 5 + 5) print(f"⏳ 第 {retry_count} 次重试,等待 {wait_time} 秒...") time.sleep(wait_time) @@ -574,7 +578,8 @@ class BossZhipinAPI: max_retries = 3 for retry_count in range(1, max_retries + 1): - wait_time = retry_count * 5 # 5秒、10秒、15秒递增等待 + # 等待时间递增:至少10秒,然后15秒、20秒 + wait_time = max(10, retry_count * 5 + 5) logger.info(f"⏳ 第 {retry_count} 次重试,等待 {wait_time} 秒...") time.sleep(wait_time) @@ -944,9 +949,8 @@ class BossZhipinAPI: self.reinit_session(cfg) self.init_cookies() logger.info("IP_SWITCH mode={} cfg={}", mode, cfg) - wait_time = 5 - logger.info(f"⏳ IP异常,等待 {wait_time} 秒后重试 (批量GET)") - time.sleep(wait_time) + wait_time = sleep_random_between() + logger.info(f"⏳ IP异常,等待 {int(wait_time)} 秒后重试 (批量GET)") headers = self.build_request_headers({ "Referer": "https://www.zhipin.com/web/geek/job", "User-Agent": self.get_random_user_agent() @@ -963,9 +967,8 @@ class BossZhipinAPI: return None result = response.json() if self.handle_ip_abnormal_response(result): - wait_time = 5 - logger.info(f"⏳ IP异常,等待 {wait_time} 秒后重试 (批量GET)") - time.sleep(wait_time) + wait_time = sleep_random_between() + logger.info(f"⏳ IP异常,等待 {int(wait_time)} 秒后重试 (批量GET)") headers = self.build_request_headers({ "Referer": "https://www.zhipin.com/web/geek/job", "User-Agent": self.get_random_user_agent() @@ -986,6 +989,9 @@ class BossZhipinAPI: if hasattr(self, 'local_mode') and self.local_mode: self.local_success_count += 1 self.try_restore_proxy() + # 请求成功后也添加短暂休眠,进一步减少风控 + post_wait_time = random.uniform(2, 5) + time.sleep(post_wait_time) return result else: print(f"❌ 批量请求失败: {response.status_code}") @@ -1028,8 +1034,9 @@ class BossZhipinAPI: print("❌ 会话初始化失败") return None else: - print("⏱️ 连续请求,等待2秒...") - time.sleep(2) + print("⏱️ 连续请求,等待至少10秒...") + wait_time = sleep_random_between() + print(f"⏰ 已等待 {int(wait_time)} 秒") search_params = { 'pageSize': params.get('pageSize', 15), @@ -1183,8 +1190,9 @@ class BossZhipinAPI: print(f"⏹ 接口返回 hasMore = False,在第 {current_page} 页停止翻页") break - print("⏱️ 等待3秒后获取下一页...") - time.sleep(3) + print("⏱️ 等待至少10秒后获取下一页...") + wait_time = sleep_random_between() + print(f"⏰ 已等待 {int(wait_time)} 秒") current_page += 1 print(f"\n🎉 处理完成,总共处理 {processed_count} 条职位数据") @@ -1386,9 +1394,8 @@ class BossZhipinAPI: data = response.json() logger.info("RAW_RESPONSE method={} url={} status={} resp_size={}", "POST", "https://www.zhipin.com/wapi/batch/requests", response.status_code, len(response.content)) if self.handle_ip_abnormal_response(data): - wait_time = 5 - logger.info(f"⏳ IP异常,等待 {wait_time} 秒后重试 (批量POST)") - time.sleep(wait_time) + wait_time = sleep_random_between() + logger.info(f"⏳ IP异常,等待 {int(wait_time)} 秒后重试 (批量POST)") self.init_cookies() headers = self.build_request_headers({ "Content-Type": "application/json", @@ -1403,6 +1410,9 @@ class BossZhipinAPI: ) response.raise_for_status() data = response.json() + # 请求成功后也添加短暂休眠,进一步减少风控 + post_wait_time = random.uniform(2, 5) + time.sleep(post_wait_time) return data except requests.exceptions.RequestException as e: print(f"请求失败: {str(e)}") @@ -1528,8 +1538,8 @@ class BossZhipinAPI: # 检查是否需要处理反爬虫 if self.handle_anti_bot_response(result): - print("🔄 批量请求检测到安全验证,等待3秒后重试...") - time.sleep(3) + wait_time = sleep_random_between() + print(f"🔄 批量请求检测到安全验证,等待 {int(wait_time)} 秒后重试...") # 重新构建请求头 headers = self.build_request_headers({ @@ -1560,6 +1570,9 @@ class BossZhipinAPI: mapped_results[request_key] = response_data self.ip_manager.mark_success() + # 请求成功后也添加短暂休眠,进一步减少风控 + post_wait_time = random.uniform(2, 5) + time.sleep(post_wait_time) return mapped_results else: print(f"批量请求失败: {result.get('message', '未知错误')}") @@ -1698,8 +1711,8 @@ class BossZhipinAPI: # 检查是否需要处理反爬虫 if self.handle_anti_bot_response(result): - print(f"🔄 检测到安全验证,等待3秒后重试... (尝试 {attempt + 1}/{max_retries})") - time.sleep(3) + wait_time = sleep_random_between() + print(f"🔄 检测到安全验证,等待 {int(wait_time)} 秒后重试... (尝试 {attempt + 1}/{max_retries})") # 在重试前更新一些请求头 default_custom_headers.update({ 'User-Agent': self.get_random_user_agent(), @@ -1732,7 +1745,8 @@ class BossZhipinAPI: self.reinit_session() self.init_cookies() logger.info("IP_SWITCH mode={} cfg={}", "local", None) - time.sleep(1) + wait_time = sleep_random_between() + logger.info(f"⏳ IP切换后等待 {int(wait_time)} 秒") continue else: self.ip_manager.mark_failure(reason) @@ -1752,6 +1766,9 @@ class BossZhipinAPI: self.local_success_count += 1 self.local_fail_count = 0 self.try_restore_proxy() + # 请求成功后也添加短暂休眠,进一步减少风控 + post_wait_time = random.uniform(2, 5) + time.sleep(post_wait_time) return result except requests.RequestException as e: @@ -1774,7 +1791,8 @@ class BossZhipinAPI: logger.info("IP_SWITCH mode={} cfg={}", mode, cfg) if attempt < max_retries - 1: - wait_time = min(2 ** attempt, 10) # 最大等待10秒 + # 确保至少等待10秒,重试时递增等待时间 + wait_time = max(10, min(2 ** attempt * 5, 30)) # 至少10秒,最大30秒 print(f"⏳ 等待{wait_time}秒后重试...") time.sleep(wait_time) else: @@ -2175,9 +2193,8 @@ if __name__ == "__main__": self_reinit_ok = True except Exception: pass - wait_time = 5 - print(f" ⏳ 等待 {wait_time} 秒后继续...{'(已重建会话)' if self_reinit_ok else ''}") - time.sleep(wait_time) + wait_time = sleep_random_between() + print(f" ⏳ 等待 {int(wait_time)} 秒后继续...{'(已重建会话)' if self_reinit_ok else ''}") except Exception as e: print(f" ⚠️ 处理职位详情异常: {e}")