# ⚠️ DEPRECATED — 2026-03-21 # 此文件是内部手工复制文件,已废弃,不再由任何 facade 引用。 # 请改用 spiderJobs.platforms.* 或 crawler_core 中的对应模块。 # 将在下一里程碑中删除。 # """ 通用 HTTP 客户端 基于 requests-go,自带 Chrome TLS 指纹伪装 支持代理 IP / 隧道代理 / 代理池轮换 与任何招聘平台无关,纯粹负责发请求 复制自 spiderJobs/core/http_client.py — 不要直接 import spiderJobs,避免跨模块依赖 """ from __future__ import annotations import random from typing import Any, Optional import requests_go as requests from requests_go.tls_config import TLS_CHROME_LATEST class HTTPClient: """ 通用 HTTP 客户端 代理优先级: tunnel_proxy > proxy_pool > proxy """ def __init__( self, base_url: str, default_headers: Optional[dict] = None, proxy: Optional[str] = None, tunnel_proxy: Optional[str] = None, proxy_pool: Optional[list[str]] = None, timeout: int = 10, ): self.base_url = base_url self.default_headers = default_headers or {} self.timeout = timeout self._proxy = proxy self._tunnel_proxy = tunnel_proxy self._proxy_pool = proxy_pool self._session = requests.Session() self._session.tls_config = TLS_CHROME_LATEST TLS_CHROME_LATEST.random_ja3 = True if proxy and not proxy_pool and not tunnel_proxy: self._session.proxies = {"http": proxy, "https": proxy} def _new_session(self) -> requests.Session: s = requests.Session() s.tls_config = TLS_CHROME_LATEST TLS_CHROME_LATEST.random_ja3 = True return s def _get_proxies(self) -> Optional[dict]: if self._proxy_pool: chosen = random.choice(self._proxy_pool) unique = f"{chosen}#{random.randint(100000, 999999)}" return {"http": unique, "https": unique} return None def _merge_headers(self, extra: Optional[dict] = None) -> dict: headers = {**self.default_headers} if extra: headers.update(extra) return headers def post(self, path: str, body: dict, headers: Optional[dict] = None) -> tuple[int, Any]: merged_headers = self._merge_headers(headers) if self._tunnel_proxy: s = self._new_session() try: resp = s.post( f"{self.base_url}{path}", json=body, headers=merged_headers, proxies={"http": self._tunnel_proxy, "https": self._tunnel_proxy}, timeout=self.timeout, ) return resp.status_code, resp.json() finally: s.close() kwargs: dict[str, Any] = { "json": body, "headers": merged_headers, "timeout": self.timeout, } proxies = self._get_proxies() if proxies: kwargs["proxies"] = proxies resp = self._session.post(f"{self.base_url}{path}", **kwargs) return resp.status_code, resp.json() def get(self, path: str, params: Optional[dict] = None, headers: Optional[dict] = None) -> tuple[int, Any]: merged_headers = self._merge_headers(headers) if self._tunnel_proxy: s = self._new_session() try: resp = s.get( f"{self.base_url}{path}", params=params, headers=merged_headers, proxies={"http": self._tunnel_proxy, "https": self._tunnel_proxy}, timeout=self.timeout, ) return resp.status_code, resp.json() finally: s.close() kwargs: dict[str, Any] = { "params": params, "headers": merged_headers, "timeout": self.timeout, } proxies = self._get_proxies() if proxies: kwargs["proxies"] = proxies resp = self._session.get(f"{self.base_url}{path}", **kwargs) return resp.status_code, resp.json()