387 lines
16 KiB
Python
387 lines
16 KiB
Python
import os
|
|
import requests
|
|
import time
|
|
import random
|
|
from typing import Dict, Any, List, Optional
|
|
from urllib.parse import urlencode
|
|
from loguru import logger
|
|
|
|
class ZhilianService:
|
|
def __init__(self, proxy_url: Optional[str] = None):
|
|
self.session = requests.Session()
|
|
if proxy_url:
|
|
self.session.proxies = {"http": proxy_url, "https": proxy_url}
|
|
|
|
def set_proxy(self, proxy_url: Optional[str]) -> None:
|
|
if not proxy_url:
|
|
self.session.proxies = {}
|
|
return
|
|
proxy_url = proxy_url.strip().strip("`")
|
|
self.session.proxies = {"http": proxy_url, "https": proxy_url}
|
|
|
|
def _sanitize_headers(self, headers: Dict[str, Any]) -> Dict[str, Any]:
|
|
masked_headers: Dict[str, Any] = {}
|
|
for k, v in headers.items():
|
|
key_lower = str(k).lower()
|
|
if key_lower in {"authorization", "cookie", "set-cookie"}:
|
|
masked_headers[k] = "***"
|
|
else:
|
|
masked_headers[k] = v
|
|
return masked_headers
|
|
|
|
def _log_request_response(
|
|
self,
|
|
label: str,
|
|
method: str,
|
|
url: str,
|
|
headers: Dict[str, Any],
|
|
params: Optional[Dict[str, Any]] = None,
|
|
json_body: Optional[Dict[str, Any]] = None,
|
|
response: Optional[requests.Response] = None,
|
|
) -> None:
|
|
safe_headers = self._sanitize_headers(headers)
|
|
logger.info(
|
|
f"[Zhilian-{label}] request method={method} url={url} headers={safe_headers} "
|
|
f"params={params} json={json_body}"
|
|
)
|
|
try:
|
|
curl_url = url
|
|
if params and isinstance(params, dict):
|
|
query_string = urlencode(params)
|
|
if query_string:
|
|
separator = "&" if "?" in curl_url else "?"
|
|
curl_url = f"{curl_url}{separator}{query_string}"
|
|
header_parts = []
|
|
for k, v in safe_headers.items():
|
|
v_str = str(v).replace("'", "'\"'\"'")
|
|
header_parts.append(f"-H '{k}: {v_str}'")
|
|
data_part = ""
|
|
if json_body is not None:
|
|
body_str = json.dumps(json_body, ensure_ascii=False)
|
|
body_str = body_str.replace("'", "'\"'\"'")
|
|
data_part = f" --data '{body_str}'"
|
|
curl_cmd = f"curl -X {method.upper()} '{curl_url}' " + " ".join(header_parts) + data_part
|
|
logger.info(f"[Zhilian-{label}] curl_debug {curl_cmd}")
|
|
except Exception as e:
|
|
logger.debug(f"[Zhilian-{label}] build curl error: {e}")
|
|
if response is not None:
|
|
text_sample = ""
|
|
try:
|
|
body = response.text or ""
|
|
text_sample = body[:1000]
|
|
except Exception:
|
|
text_sample = "<unreadable>"
|
|
logger.info(
|
|
f"[Zhilian-{label}] response status={response.status_code} "
|
|
f"headers={self._sanitize_headers(dict(response.headers))} "
|
|
f"body_sample={text_sample}"
|
|
)
|
|
|
|
def _gen_client_id(self) -> str:
|
|
t = int(time.time() * 1000)
|
|
try:
|
|
t += int(time.perf_counter() * 1000)
|
|
except Exception:
|
|
pass
|
|
def repl(c: str) -> str:
|
|
n = int((t + random.random() * 16) % 16)
|
|
if c == 'x':
|
|
return hex(n)[2:]
|
|
return hex((n & 0x3) | 0x8)[2:]
|
|
tpl = "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
|
|
return ''.join(repl(c) if c in 'xy' else c for c in tpl)
|
|
|
|
def _gen_v(self) -> float:
|
|
return round(random.random(), 8)
|
|
|
|
def _gen_page_request_id(self) -> str:
|
|
return f"cf1e3b3e655b4eb5a306110a83c77c29-{int(time.time()*1000)}-{random.randint(0,999999)}"
|
|
|
|
def _build_headers_pc(self) -> Dict[str, str]:
|
|
return {
|
|
"accept": "application/json, text/plain, */*",
|
|
"accept-language": "zh-CN,zh;q=0.9",
|
|
"content-type": "application/json;charset=UTF-8",
|
|
"origin": "https://www.zhaopin.com",
|
|
"priority": "u=1, i",
|
|
"referer": "https://www.zhaopin.com/",
|
|
"sec-ch-ua-mobile": "?0",
|
|
"sec-fetch-dest": "empty",
|
|
"sec-fetch-mode": "cors",
|
|
"sec-fetch-site": "same-site",
|
|
"x-zp-page-code": "0",
|
|
}
|
|
|
|
def _request_json(self, method: str, url: str, headers: Dict[str, str], params: Optional[Dict[str, Any]] = None,
|
|
json_body: Optional[Dict[str, Any]] = None, timeout: int = 30) -> Optional[Dict[str, Any]]:
|
|
try:
|
|
resp = self.session.request(method.upper(), url, headers=headers, params=params, json=json_body, timeout=timeout)
|
|
self._log_request_response(
|
|
"request",
|
|
method.upper(),
|
|
url,
|
|
headers,
|
|
params=params,
|
|
json_body=json_body,
|
|
response=resp,
|
|
)
|
|
resp.raise_for_status()
|
|
return resp.json()
|
|
except Exception as e:
|
|
logger.error(f"Request failed: {e}")
|
|
return None
|
|
|
|
def fetch_company_desc_by_job(self, number: str) -> Optional[str]:
|
|
client_id = self._gen_client_id()
|
|
url_pc = "https://fe-api.zhaopin.com/c/i/jobs/position-detail-new"
|
|
params_pc = {
|
|
"number": number,
|
|
"_v": self._gen_v(),
|
|
"x-zp-page-request-id": self._gen_page_request_id(),
|
|
"x-zp-client-id": client_id,
|
|
}
|
|
headers_pc = {
|
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
|
|
"Cookie": f"x-zp-client-id={client_id}"
|
|
}
|
|
# Merge basic headers
|
|
headers_pc.update(self._build_headers_pc())
|
|
|
|
data_pc = self._request_json("GET", url_pc, headers_pc, params=params_pc)
|
|
if data_pc and isinstance(data_pc, dict):
|
|
detail = data_pc.get("data") or {}
|
|
comp = detail.get("detailedCompany") or {}
|
|
desc_pc = comp.get("companyDescription")
|
|
if isinstance(desc_pc, str) and desc_pc:
|
|
return desc_pc
|
|
return None
|
|
|
|
def search_jobs(self, city_id: int = 801, page_size: int = 15, page_index: int = 1, job_level3_code: Optional[str] = None) -> List[Dict[str, Any]]:
|
|
headers = self._build_headers_pc()
|
|
base_url = "https://fe-api.zhaopin.com/c/i/search/positions"
|
|
|
|
params = {
|
|
"_v": self._gen_v(),
|
|
"x-zp-page-request-id": self._gen_page_request_id(),
|
|
"x-zp-client-id": self._gen_client_id(),
|
|
}
|
|
payload = {
|
|
"S_SOU_WORK_CITY": "",
|
|
"order": 4,
|
|
"pageSize": page_size,
|
|
"pageIndex": page_index,
|
|
"eventScenario": "pcSearchedSouSearch",
|
|
"anonymous": 1,
|
|
"platform": 13,
|
|
"version": "0.0.0",
|
|
}
|
|
if job_level3_code:
|
|
payload["S_SOU_JD_JOB_LEVEL3"] = job_level3_code
|
|
|
|
data = self._request_json("POST", base_url, headers, params=params, json_body=payload)
|
|
if data and data.get("code") == 200:
|
|
lst = data.get("data", {}).get("list", [])
|
|
for job in lst:
|
|
num = job.get("number")
|
|
if num:
|
|
desc = self.fetch_company_desc_by_job(str(num)) or ""
|
|
job["companyDesc"] = desc
|
|
return lst
|
|
return []
|
|
|
|
def search_company_jobs_by_name(self, company_name: str, city_id: Optional[int] = None, page_size: int = 15, page_index: int = 1) -> Optional[Dict[str, Any]]:
|
|
url = "https://cgate.zhaopin.com/positionbusiness/searchrecommend/searchPositions"
|
|
ua = os.getenv(
|
|
"ZP_MINIAPP_UA",
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 "
|
|
"MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Mac "
|
|
"MacWechat/WMPF MacWechat/3.8.7(0x13080712) UnifiedPCMacWechat(0xf26414f0) XWEB/16962",
|
|
)
|
|
headers: Dict[str, Any] = {
|
|
"User-Agent": ua,
|
|
"Content-Type": "application/json",
|
|
"x-zp-channel": "wxxiaochengxu",
|
|
"x-zp-business-system": "73",
|
|
"x-zp-action-id": "",
|
|
"xweb_xhr": "1",
|
|
"x-zp-page-code": "7019",
|
|
"x-zp-version": os.getenv("ZP_MINIAPP_VERSION", "4.1.224"),
|
|
"x-zp-platform": "12",
|
|
"x-zp-device-id": os.getenv("ZP_MINIAPP_DEVICE_ID", "A774EA47-0AB5-4608-B51D-84BF51CC0786"),
|
|
"sec-fetch-site": "cross-site",
|
|
"sec-fetch-mode": "cors",
|
|
"sec-fetch-dest": "empty",
|
|
"referer": "https://servicewechat.com/wxb7718fb9257e4fd2/617/page-frame.html",
|
|
"accept-language": "zh-CN,zh;q=0.9",
|
|
"priority": "u=1, i",
|
|
}
|
|
at_token = os.getenv("ZP_MINIAPP_AT", "").strip()
|
|
rt_token = os.getenv("ZP_MINIAPP_RT", "").strip()
|
|
if at_token:
|
|
headers["x-zp-at"] = at_token
|
|
if rt_token:
|
|
headers["x-zp-rt"] = rt_token
|
|
|
|
body: Dict[str, Any] = {
|
|
"eventScenario": "wxmpZhaopinSearchV2",
|
|
"pageIndex": page_index,
|
|
"pageSize": page_size,
|
|
"filterMinSalary": 1,
|
|
"S_SOU_EXPAND": "SOU_COMPANY_ID",
|
|
"S_SOU_FULL_INDEX": company_name,
|
|
"sortType": "DEFAULT",
|
|
"version": "8.11.22",
|
|
"identity": "2",
|
|
"anonymous": 0,
|
|
}
|
|
if city_id is not None:
|
|
body["S_SOU_WORK_CITY"] = city_id
|
|
resume_number = os.getenv("ZP_MINIAPP_RESUME_NUMBER", "").strip()
|
|
if resume_number:
|
|
body["resumeNumber"] = resume_number
|
|
|
|
try:
|
|
resp = self.session.post(url, headers=headers, json=body, timeout=30)
|
|
self._log_request_response(
|
|
"search-company-jobs",
|
|
"POST",
|
|
url,
|
|
headers,
|
|
params=None,
|
|
json_body=body,
|
|
response=resp,
|
|
)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
return data
|
|
except Exception as e:
|
|
logger.error(f"Zhilian search_company_jobs_by_name failed: {e}")
|
|
return None
|
|
|
|
def get_job_detail(self, job_number: str) -> Optional[Dict[str, Any]]:
|
|
# Reuse fetch_company_desc_by_job logic but return full detail
|
|
client_id = self._gen_client_id()
|
|
url_pc = "https://fe-api.zhaopin.com/c/i/jobs/position-detail-new"
|
|
params_pc = {
|
|
"number": job_number,
|
|
"_v": self._gen_v(),
|
|
"x-zp-page-request-id": self._gen_page_request_id(),
|
|
"x-zp-client-id": client_id,
|
|
}
|
|
headers_pc = {
|
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
|
|
"Cookie": f"x-zp-client-id={client_id}"
|
|
}
|
|
headers_pc.update(self._build_headers_pc())
|
|
|
|
data_pc = self._request_json("GET", url_pc, headers_pc, params=params_pc)
|
|
if data_pc and isinstance(data_pc, dict):
|
|
return data_pc.get("data")
|
|
return None
|
|
|
|
def get_company_detail(self, company_number: str) -> Optional[Dict[str, Any]]:
|
|
"""获取公司详情"""
|
|
url = "https://cgate.zhaopin.com/positionbusiness/exposure/companyDetail"
|
|
params = {
|
|
"number": company_number,
|
|
"platform": "12",
|
|
"version": "0.0.0",
|
|
}
|
|
ua = os.getenv(
|
|
"ZP_MINIAPP_UA",
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 "
|
|
"MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Mac "
|
|
"MacWechat/WMPF MacWechat/3.8.7(0x13080712) UnifiedPCMacWechat(0xf26414f0) XWEB/16962",
|
|
)
|
|
headers = {
|
|
"User-Agent": ua,
|
|
"x-zp-channel": "wxxiaochengxu",
|
|
"x-zp-business-system": "73",
|
|
"xweb_xhr": "1",
|
|
"x-zp-page-code": "0",
|
|
"x-zp-version": os.getenv("ZP_MINIAPP_VERSION", "4.1.224"),
|
|
"x-zp-platform": "12",
|
|
"x-zp-device-id": os.getenv("ZP_MINIAPP_DEVICE_ID", "A774EA47-0AB5-4608-B51D-84BF51CC0786"),
|
|
"content-type": "application/json",
|
|
"sec-fetch-site": "cross-site",
|
|
"sec-fetch-mode": "cors",
|
|
"sec-fetch-dest": "empty",
|
|
"referer": "https://servicewechat.com/wxb7718fb9257e4fd2/617/page-frame.html",
|
|
"accept-language": "zh-CN,zh;q=0.9",
|
|
"priority": "u=1, i",
|
|
}
|
|
at_token = os.getenv("ZP_MINIAPP_AT", "").strip()
|
|
rt_token = os.getenv("ZP_MINIAPP_RT", "").strip()
|
|
if at_token:
|
|
headers["x-zp-at"] = at_token
|
|
if rt_token:
|
|
headers["x-zp-rt"] = rt_token
|
|
|
|
data = self._request_json("GET", url, headers, params=params)
|
|
if data and isinstance(data, dict):
|
|
return data.get("data")
|
|
return None
|
|
|
|
def get_company_jobs_by_id(
|
|
self,
|
|
company_number: str,
|
|
page_index: int = 1,
|
|
page_size: int = 30,
|
|
work_city: Optional[int] = None,
|
|
) -> Optional[Dict[str, Any]]:
|
|
url = "https://capi.zhaopin.com/capi/searchrecommend/searchPositionsCompany"
|
|
ua = os.getenv(
|
|
"ZP_MINIAPP_UA",
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 "
|
|
"MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Mac "
|
|
"MacWechat/WMPF MacWechat/3.8.7(0x13080712) UnifiedPCMacWechat(0xf26414f0) XWEB/16962",
|
|
)
|
|
version = os.getenv("ZP_MINIAPP_CAPI_VERSION", "4.1.230")
|
|
device_id = os.getenv("ZP_MINIAPP_DEVICE_ID", "CFD341F3-29D6-4C46-81BF-F6C705407F2E")
|
|
headers: Dict[str, Any] = {
|
|
"User-Agent": ua,
|
|
"x-zp-channel": "wxxiaochengxu",
|
|
"x-zp-business-system": "73",
|
|
"x-zp-action-id": "",
|
|
"xweb_xhr": "1",
|
|
"x-zp-page-code": "0",
|
|
"x-zp-version": version,
|
|
"x-zp-platform": "12",
|
|
"x-zp-device-id": device_id,
|
|
"content-type": "application/json",
|
|
"sec-fetch-site": "cross-site",
|
|
"sec-fetch-mode": "cors",
|
|
"sec-fetch-dest": "empty",
|
|
"referer": "https://servicewechat.com/wxb7718fb9257e4fd2/619/page-frame.html",
|
|
"accept-language": "zh-CN,zh;q=0.9",
|
|
"priority": "u=1, i",
|
|
}
|
|
at_token = os.getenv("ZP_MINIAPP_AT", "").strip()
|
|
rt_token = os.getenv("ZP_MINIAPP_RT", "").strip()
|
|
params: Dict[str, Any] = {
|
|
"channel": "wxxiaochengxu",
|
|
"platform": "12",
|
|
"version": version,
|
|
"d": device_id,
|
|
"S_SOU_COMPANY_ID": company_number,
|
|
"S_SOU_POSITION_SOURCE_TYPE": 1,
|
|
"eventScenario": "wxmpZhaopinSearchPositionsCompany",
|
|
"pageCode": "wxmpZhaopinCompanyDetailPage",
|
|
"pageIndex": page_index,
|
|
"pageSize": page_size,
|
|
"S_SOU_JD_JOB_LEVEL": "",
|
|
}
|
|
if at_token:
|
|
params["at"] = at_token
|
|
if rt_token:
|
|
params["rt"] = rt_token
|
|
if work_city is not None:
|
|
params["S_SOU_WORK_CITY"] = work_city
|
|
else:
|
|
params["S_SOU_WORK_CITY"] = ""
|
|
data = self._request_json("GET", url, headers, params=params)
|
|
return data
|