387 lines
16 KiB
Python

import os
import requests
import time
import random
from typing import Dict, Any, List, Optional
from urllib.parse import urlencode
from loguru import logger
class ZhilianService:
def __init__(self, proxy_url: Optional[str] = None):
self.session = requests.Session()
if proxy_url:
self.session.proxies = {"http": proxy_url, "https": proxy_url}
def set_proxy(self, proxy_url: Optional[str]) -> None:
if not proxy_url:
self.session.proxies = {}
return
proxy_url = proxy_url.strip().strip("`")
self.session.proxies = {"http": proxy_url, "https": proxy_url}
def _sanitize_headers(self, headers: Dict[str, Any]) -> Dict[str, Any]:
masked_headers: Dict[str, Any] = {}
for k, v in headers.items():
key_lower = str(k).lower()
if key_lower in {"authorization", "cookie", "set-cookie"}:
masked_headers[k] = "***"
else:
masked_headers[k] = v
return masked_headers
def _log_request_response(
self,
label: str,
method: str,
url: str,
headers: Dict[str, Any],
params: Optional[Dict[str, Any]] = None,
json_body: Optional[Dict[str, Any]] = None,
response: Optional[requests.Response] = None,
) -> None:
safe_headers = self._sanitize_headers(headers)
logger.info(
f"[Zhilian-{label}] request method={method} url={url} headers={safe_headers} "
f"params={params} json={json_body}"
)
try:
curl_url = url
if params and isinstance(params, dict):
query_string = urlencode(params)
if query_string:
separator = "&" if "?" in curl_url else "?"
curl_url = f"{curl_url}{separator}{query_string}"
header_parts = []
for k, v in safe_headers.items():
v_str = str(v).replace("'", "'\"'\"'")
header_parts.append(f"-H '{k}: {v_str}'")
data_part = ""
if json_body is not None:
body_str = json.dumps(json_body, ensure_ascii=False)
body_str = body_str.replace("'", "'\"'\"'")
data_part = f" --data '{body_str}'"
curl_cmd = f"curl -X {method.upper()} '{curl_url}' " + " ".join(header_parts) + data_part
logger.info(f"[Zhilian-{label}] curl_debug {curl_cmd}")
except Exception as e:
logger.debug(f"[Zhilian-{label}] build curl error: {e}")
if response is not None:
text_sample = ""
try:
body = response.text or ""
text_sample = body[:1000]
except Exception:
text_sample = "<unreadable>"
logger.info(
f"[Zhilian-{label}] response status={response.status_code} "
f"headers={self._sanitize_headers(dict(response.headers))} "
f"body_sample={text_sample}"
)
def _gen_client_id(self) -> str:
t = int(time.time() * 1000)
try:
t += int(time.perf_counter() * 1000)
except Exception:
pass
def repl(c: str) -> str:
n = int((t + random.random() * 16) % 16)
if c == 'x':
return hex(n)[2:]
return hex((n & 0x3) | 0x8)[2:]
tpl = "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
return ''.join(repl(c) if c in 'xy' else c for c in tpl)
def _gen_v(self) -> float:
return round(random.random(), 8)
def _gen_page_request_id(self) -> str:
return f"cf1e3b3e655b4eb5a306110a83c77c29-{int(time.time()*1000)}-{random.randint(0,999999)}"
def _build_headers_pc(self) -> Dict[str, str]:
return {
"accept": "application/json, text/plain, */*",
"accept-language": "zh-CN,zh;q=0.9",
"content-type": "application/json;charset=UTF-8",
"origin": "https://www.zhaopin.com",
"priority": "u=1, i",
"referer": "https://www.zhaopin.com/",
"sec-ch-ua-mobile": "?0",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"x-zp-page-code": "0",
}
def _request_json(self, method: str, url: str, headers: Dict[str, str], params: Optional[Dict[str, Any]] = None,
json_body: Optional[Dict[str, Any]] = None, timeout: int = 30) -> Optional[Dict[str, Any]]:
try:
resp = self.session.request(method.upper(), url, headers=headers, params=params, json=json_body, timeout=timeout)
self._log_request_response(
"request",
method.upper(),
url,
headers,
params=params,
json_body=json_body,
response=resp,
)
resp.raise_for_status()
return resp.json()
except Exception as e:
logger.error(f"Request failed: {e}")
return None
def fetch_company_desc_by_job(self, number: str) -> Optional[str]:
client_id = self._gen_client_id()
url_pc = "https://fe-api.zhaopin.com/c/i/jobs/position-detail-new"
params_pc = {
"number": number,
"_v": self._gen_v(),
"x-zp-page-request-id": self._gen_page_request_id(),
"x-zp-client-id": client_id,
}
headers_pc = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
"Cookie": f"x-zp-client-id={client_id}"
}
# Merge basic headers
headers_pc.update(self._build_headers_pc())
data_pc = self._request_json("GET", url_pc, headers_pc, params=params_pc)
if data_pc and isinstance(data_pc, dict):
detail = data_pc.get("data") or {}
comp = detail.get("detailedCompany") or {}
desc_pc = comp.get("companyDescription")
if isinstance(desc_pc, str) and desc_pc:
return desc_pc
return None
def search_jobs(self, city_id: int = 801, page_size: int = 15, page_index: int = 1, job_level3_code: Optional[str] = None) -> List[Dict[str, Any]]:
headers = self._build_headers_pc()
base_url = "https://fe-api.zhaopin.com/c/i/search/positions"
params = {
"_v": self._gen_v(),
"x-zp-page-request-id": self._gen_page_request_id(),
"x-zp-client-id": self._gen_client_id(),
}
payload = {
"S_SOU_WORK_CITY": "",
"order": 4,
"pageSize": page_size,
"pageIndex": page_index,
"eventScenario": "pcSearchedSouSearch",
"anonymous": 1,
"platform": 13,
"version": "0.0.0",
}
if job_level3_code:
payload["S_SOU_JD_JOB_LEVEL3"] = job_level3_code
data = self._request_json("POST", base_url, headers, params=params, json_body=payload)
if data and data.get("code") == 200:
lst = data.get("data", {}).get("list", [])
for job in lst:
num = job.get("number")
if num:
desc = self.fetch_company_desc_by_job(str(num)) or ""
job["companyDesc"] = desc
return lst
return []
def search_company_jobs_by_name(self, company_name: str, city_id: Optional[int] = None, page_size: int = 15, page_index: int = 1) -> Optional[Dict[str, Any]]:
url = "https://cgate.zhaopin.com/positionbusiness/searchrecommend/searchPositions"
ua = os.getenv(
"ZP_MINIAPP_UA",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 "
"MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Mac "
"MacWechat/WMPF MacWechat/3.8.7(0x13080712) UnifiedPCMacWechat(0xf26414f0) XWEB/16962",
)
headers: Dict[str, Any] = {
"User-Agent": ua,
"Content-Type": "application/json",
"x-zp-channel": "wxxiaochengxu",
"x-zp-business-system": "73",
"x-zp-action-id": "",
"xweb_xhr": "1",
"x-zp-page-code": "7019",
"x-zp-version": os.getenv("ZP_MINIAPP_VERSION", "4.1.224"),
"x-zp-platform": "12",
"x-zp-device-id": os.getenv("ZP_MINIAPP_DEVICE_ID", "A774EA47-0AB5-4608-B51D-84BF51CC0786"),
"sec-fetch-site": "cross-site",
"sec-fetch-mode": "cors",
"sec-fetch-dest": "empty",
"referer": "https://servicewechat.com/wxb7718fb9257e4fd2/617/page-frame.html",
"accept-language": "zh-CN,zh;q=0.9",
"priority": "u=1, i",
}
at_token = os.getenv("ZP_MINIAPP_AT", "").strip()
rt_token = os.getenv("ZP_MINIAPP_RT", "").strip()
if at_token:
headers["x-zp-at"] = at_token
if rt_token:
headers["x-zp-rt"] = rt_token
body: Dict[str, Any] = {
"eventScenario": "wxmpZhaopinSearchV2",
"pageIndex": page_index,
"pageSize": page_size,
"filterMinSalary": 1,
"S_SOU_EXPAND": "SOU_COMPANY_ID",
"S_SOU_FULL_INDEX": company_name,
"sortType": "DEFAULT",
"version": "8.11.22",
"identity": "2",
"anonymous": 0,
}
if city_id is not None:
body["S_SOU_WORK_CITY"] = city_id
resume_number = os.getenv("ZP_MINIAPP_RESUME_NUMBER", "").strip()
if resume_number:
body["resumeNumber"] = resume_number
try:
resp = self.session.post(url, headers=headers, json=body, timeout=30)
self._log_request_response(
"search-company-jobs",
"POST",
url,
headers,
params=None,
json_body=body,
response=resp,
)
resp.raise_for_status()
data = resp.json()
return data
except Exception as e:
logger.error(f"Zhilian search_company_jobs_by_name failed: {e}")
return None
def get_job_detail(self, job_number: str) -> Optional[Dict[str, Any]]:
# Reuse fetch_company_desc_by_job logic but return full detail
client_id = self._gen_client_id()
url_pc = "https://fe-api.zhaopin.com/c/i/jobs/position-detail-new"
params_pc = {
"number": job_number,
"_v": self._gen_v(),
"x-zp-page-request-id": self._gen_page_request_id(),
"x-zp-client-id": client_id,
}
headers_pc = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
"Cookie": f"x-zp-client-id={client_id}"
}
headers_pc.update(self._build_headers_pc())
data_pc = self._request_json("GET", url_pc, headers_pc, params=params_pc)
if data_pc and isinstance(data_pc, dict):
return data_pc.get("data")
return None
def get_company_detail(self, company_number: str) -> Optional[Dict[str, Any]]:
"""获取公司详情"""
url = "https://cgate.zhaopin.com/positionbusiness/exposure/companyDetail"
params = {
"number": company_number,
"platform": "12",
"version": "0.0.0",
}
ua = os.getenv(
"ZP_MINIAPP_UA",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 "
"MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Mac "
"MacWechat/WMPF MacWechat/3.8.7(0x13080712) UnifiedPCMacWechat(0xf26414f0) XWEB/16962",
)
headers = {
"User-Agent": ua,
"x-zp-channel": "wxxiaochengxu",
"x-zp-business-system": "73",
"xweb_xhr": "1",
"x-zp-page-code": "0",
"x-zp-version": os.getenv("ZP_MINIAPP_VERSION", "4.1.224"),
"x-zp-platform": "12",
"x-zp-device-id": os.getenv("ZP_MINIAPP_DEVICE_ID", "A774EA47-0AB5-4608-B51D-84BF51CC0786"),
"content-type": "application/json",
"sec-fetch-site": "cross-site",
"sec-fetch-mode": "cors",
"sec-fetch-dest": "empty",
"referer": "https://servicewechat.com/wxb7718fb9257e4fd2/617/page-frame.html",
"accept-language": "zh-CN,zh;q=0.9",
"priority": "u=1, i",
}
at_token = os.getenv("ZP_MINIAPP_AT", "").strip()
rt_token = os.getenv("ZP_MINIAPP_RT", "").strip()
if at_token:
headers["x-zp-at"] = at_token
if rt_token:
headers["x-zp-rt"] = rt_token
data = self._request_json("GET", url, headers, params=params)
if data and isinstance(data, dict):
return data.get("data")
return None
def get_company_jobs_by_id(
self,
company_number: str,
page_index: int = 1,
page_size: int = 30,
work_city: Optional[int] = None,
) -> Optional[Dict[str, Any]]:
url = "https://capi.zhaopin.com/capi/searchrecommend/searchPositionsCompany"
ua = os.getenv(
"ZP_MINIAPP_UA",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 "
"MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Mac "
"MacWechat/WMPF MacWechat/3.8.7(0x13080712) UnifiedPCMacWechat(0xf26414f0) XWEB/16962",
)
version = os.getenv("ZP_MINIAPP_CAPI_VERSION", "4.1.230")
device_id = os.getenv("ZP_MINIAPP_DEVICE_ID", "CFD341F3-29D6-4C46-81BF-F6C705407F2E")
headers: Dict[str, Any] = {
"User-Agent": ua,
"x-zp-channel": "wxxiaochengxu",
"x-zp-business-system": "73",
"x-zp-action-id": "",
"xweb_xhr": "1",
"x-zp-page-code": "0",
"x-zp-version": version,
"x-zp-platform": "12",
"x-zp-device-id": device_id,
"content-type": "application/json",
"sec-fetch-site": "cross-site",
"sec-fetch-mode": "cors",
"sec-fetch-dest": "empty",
"referer": "https://servicewechat.com/wxb7718fb9257e4fd2/619/page-frame.html",
"accept-language": "zh-CN,zh;q=0.9",
"priority": "u=1, i",
}
at_token = os.getenv("ZP_MINIAPP_AT", "").strip()
rt_token = os.getenv("ZP_MINIAPP_RT", "").strip()
params: Dict[str, Any] = {
"channel": "wxxiaochengxu",
"platform": "12",
"version": version,
"d": device_id,
"S_SOU_COMPANY_ID": company_number,
"S_SOU_POSITION_SOURCE_TYPE": 1,
"eventScenario": "wxmpZhaopinSearchPositionsCompany",
"pageCode": "wxmpZhaopinCompanyDetailPage",
"pageIndex": page_index,
"pageSize": page_size,
"S_SOU_JD_JOB_LEVEL": "",
}
if at_token:
params["at"] = at_token
if rt_token:
params["rt"] = rt_token
if work_city is not None:
params["S_SOU_WORK_CITY"] = work_city
else:
params["S_SOU_WORK_CITY"] = ""
data = self._request_json("GET", url, headers, params=params)
return data