Plan 01 - facade migration (ARCH-06/07):
- boss.py: import from spiderJobs.platforms.boss.{api,client,sign}
- qcwy.py: import from spiderJobs.platforms.job51.{api,client}
- zhilian.py: import from spiderJobs.platforms.zhilian.{api,client,sign}
- All 3 Service classes: +4 async_* methods via asyncio.to_thread()
Plan 02 - deprecation + cleanup (ARCH-08):
- 11 private copy files (_base, _http_client, _boss/job51/zhilian *): DEPRECATED header
- jobs_spider/ directory: fully deleted (user request)
Full regression: 106 passed in 0.61s
138 lines
4.6 KiB
Python
138 lines
4.6 KiB
Python
# ⚠️ DEPRECATED — 2026-03-21
|
|
# 此文件是内部手工复制文件,已废弃,不再由任何 facade 引用。
|
|
# 请改用 spiderJobs.platforms.* 或 crawler_core 中的对应模块。
|
|
# 将在下一里程碑中删除。
|
|
#
|
|
"""
|
|
前程无忧 (51Job) HTTP 客户端
|
|
复制自 spiderJobs/platforms/job51/client.py — import 改为本地引用
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from typing import Any, Optional
|
|
from urllib.parse import quote
|
|
|
|
from app.services.crawler._http_client import HTTPClient
|
|
from app.services.crawler._job51_sign import Job51Sign
|
|
|
|
BASE_URL = "https://cupid.51job.com"
|
|
|
|
JOB51_HEADERS = {
|
|
"user-agent": (
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
|
|
"(KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 "
|
|
"MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI "
|
|
"MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.7(0x13080712) "
|
|
"UnifiedPCMacWechat(0xf2641702) XWEB/18788"
|
|
),
|
|
"xweb_xhr": "1",
|
|
"from-domain": "51job_weixin_wxapp",
|
|
"sec-fetch-site": "cross-site",
|
|
"sec-fetch-mode": "cors",
|
|
"sec-fetch-dest": "empty",
|
|
"referer": "https://servicewechat.com/wx1131e5c71e668b5d/426/page-frame.html",
|
|
"accept-language": "zh-CN,zh;q=0.9",
|
|
"priority": "u=1, i",
|
|
}
|
|
|
|
|
|
class Job51Client(HTTPClient):
|
|
def __init__(
|
|
self,
|
|
signer: Optional[Job51Sign] = None,
|
|
tunnel_proxy: Optional[str] = None,
|
|
proxy: Optional[str] = None,
|
|
proxy_pool: Optional[list[str]] = None,
|
|
timeout: int = 10,
|
|
):
|
|
super().__init__(
|
|
base_url=BASE_URL,
|
|
default_headers=JOB51_HEADERS,
|
|
tunnel_proxy=tunnel_proxy,
|
|
proxy=proxy,
|
|
proxy_pool=proxy_pool,
|
|
timeout=timeout,
|
|
)
|
|
self.signer = signer or Job51Sign()
|
|
self._uuid = Job51Sign.generate_uuid()
|
|
|
|
def _job51_headers(self, sign: str) -> dict:
|
|
property_obj = {
|
|
"frompageUrl": "",
|
|
"pageUrl": "pages/index/index",
|
|
"isLogin": "否",
|
|
"accountid": "",
|
|
"resumeId": "",
|
|
"firstFrompageUrl": "",
|
|
"distinct_id": self._uuid,
|
|
}
|
|
return {
|
|
"sign": sign,
|
|
"partner": "",
|
|
"property": quote(json.dumps(property_obj, ensure_ascii=False, separators=(",", ":")), safe=""),
|
|
"uuid": self._uuid,
|
|
"user-token": "",
|
|
"account-id": "",
|
|
}
|
|
|
|
def post(self, path: str, body: dict, headers: Optional[dict] = None) -> tuple[int, Any]:
|
|
url_path, sign = self.signer.build_sign_path(path, "POST", body=body)
|
|
|
|
job51_h = self._job51_headers(sign)
|
|
job51_h["Content-Type"] = "application/json"
|
|
if headers:
|
|
job51_h.update(headers)
|
|
|
|
raw_body = json.dumps(body, ensure_ascii=False, separators=(",", ":"))
|
|
return self._post_raw(url_path, raw_body, job51_h)
|
|
|
|
def _post_raw(self, path: str, raw_body: str, headers: dict) -> tuple[int, Any]:
|
|
merged_headers = self._merge_headers(headers)
|
|
url = f"{self.base_url}{path}"
|
|
|
|
if self._tunnel_proxy:
|
|
s = self._new_session()
|
|
try:
|
|
resp = s.post(
|
|
url,
|
|
data=raw_body.encode("utf-8"),
|
|
headers=merged_headers,
|
|
proxies={"http": self._tunnel_proxy, "https": self._tunnel_proxy},
|
|
timeout=self.timeout,
|
|
)
|
|
return resp.status_code, resp.json()
|
|
finally:
|
|
s.close()
|
|
|
|
proxies = self._get_proxies()
|
|
kwargs: dict[str, Any] = {
|
|
"data": raw_body.encode("utf-8"),
|
|
"headers": merged_headers,
|
|
"timeout": self.timeout,
|
|
}
|
|
if proxies:
|
|
kwargs["proxies"] = proxies
|
|
resp = self._session.post(url, **kwargs)
|
|
return resp.status_code, resp.json()
|
|
|
|
def get(self, path: str, params: Optional[dict] = None, headers: Optional[dict] = None) -> tuple[int, Any]:
|
|
url_path, sign = self.signer.build_sign_path(path, "GET", params=params)
|
|
|
|
job51_h = self._job51_headers(sign)
|
|
job51_h["content-type"] = "application/x-www-form-urlencoded"
|
|
if headers:
|
|
job51_h.update(headers)
|
|
|
|
return super().get(url_path, params=None, headers=job51_h)
|
|
|
|
|
|
def create_client(
|
|
signer: Optional[Job51Sign] = None,
|
|
tunnel_proxy: Optional[str] = None,
|
|
proxy: Optional[str] = None,
|
|
proxy_pool: Optional[list[str]] = None,
|
|
) -> Job51Client:
|
|
return Job51Client(signer=signer, tunnel_proxy=tunnel_proxy, proxy=proxy, proxy_pool=proxy_pool)
|