import os import requests import time import random from typing import Dict, Any, List, Optional from urllib.parse import urlencode from loguru import logger class ZhilianService: def __init__(self, proxy_url: Optional[str] = None): self.session = requests.Session() if proxy_url: self.session.proxies = {"http": proxy_url, "https": proxy_url} def set_proxy(self, proxy_url: Optional[str]) -> None: if not proxy_url: self.session.proxies = {} return proxy_url = proxy_url.strip().strip("`") self.session.proxies = {"http": proxy_url, "https": proxy_url} def _sanitize_headers(self, headers: Dict[str, Any]) -> Dict[str, Any]: masked_headers: Dict[str, Any] = {} for k, v in headers.items(): key_lower = str(k).lower() if key_lower in {"authorization", "cookie", "set-cookie"}: masked_headers[k] = "***" else: masked_headers[k] = v return masked_headers def _log_request_response( self, label: str, method: str, url: str, headers: Dict[str, Any], params: Optional[Dict[str, Any]] = None, json_body: Optional[Dict[str, Any]] = None, response: Optional[requests.Response] = None, ) -> None: safe_headers = self._sanitize_headers(headers) logger.info( f"[Zhilian-{label}] request method={method} url={url} headers={safe_headers} " f"params={params} json={json_body}" ) try: curl_url = url if params and isinstance(params, dict): query_string = urlencode(params) if query_string: separator = "&" if "?" in curl_url else "?" curl_url = f"{curl_url}{separator}{query_string}" header_parts = [] for k, v in safe_headers.items(): v_str = str(v).replace("'", "'\"'\"'") header_parts.append(f"-H '{k}: {v_str}'") data_part = "" if json_body is not None: body_str = json.dumps(json_body, ensure_ascii=False) body_str = body_str.replace("'", "'\"'\"'") data_part = f" --data '{body_str}'" curl_cmd = f"curl -X {method.upper()} '{curl_url}' " + " ".join(header_parts) + data_part logger.info(f"[Zhilian-{label}] curl_debug {curl_cmd}") except Exception as e: logger.debug(f"[Zhilian-{label}] build curl error: {e}") if response is not None: text_sample = "" try: body = response.text or "" text_sample = body[:1000] except Exception: text_sample = "" logger.info( f"[Zhilian-{label}] response status={response.status_code} " f"headers={self._sanitize_headers(dict(response.headers))} " f"body_sample={text_sample}" ) def _gen_client_id(self) -> str: t = int(time.time() * 1000) try: t += int(time.perf_counter() * 1000) except Exception: pass def repl(c: str) -> str: n = int((t + random.random() * 16) % 16) if c == 'x': return hex(n)[2:] return hex((n & 0x3) | 0x8)[2:] tpl = "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx" return ''.join(repl(c) if c in 'xy' else c for c in tpl) def _gen_v(self) -> float: return round(random.random(), 8) def _gen_page_request_id(self) -> str: return f"cf1e3b3e655b4eb5a306110a83c77c29-{int(time.time()*1000)}-{random.randint(0,999999)}" def _build_headers_pc(self) -> Dict[str, str]: return { "accept": "application/json, text/plain, */*", "accept-language": "zh-CN,zh;q=0.9", "content-type": "application/json;charset=UTF-8", "origin": "https://www.zhaopin.com", "priority": "u=1, i", "referer": "https://www.zhaopin.com/", "sec-ch-ua-mobile": "?0", "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-site", "x-zp-page-code": "0", } def _request_json(self, method: str, url: str, headers: Dict[str, str], params: Optional[Dict[str, Any]] = None, json_body: Optional[Dict[str, Any]] = None, timeout: int = 30) -> Optional[Dict[str, Any]]: try: resp = self.session.request(method.upper(), url, headers=headers, params=params, json=json_body, timeout=timeout) self._log_request_response( "request", method.upper(), url, headers, params=params, json_body=json_body, response=resp, ) resp.raise_for_status() return resp.json() except Exception as e: logger.error(f"Request failed: {e}") return None def fetch_company_desc_by_job(self, number: str) -> Optional[str]: client_id = self._gen_client_id() url_pc = "https://fe-api.zhaopin.com/c/i/jobs/position-detail-new" params_pc = { "number": number, "_v": self._gen_v(), "x-zp-page-request-id": self._gen_page_request_id(), "x-zp-client-id": client_id, } headers_pc = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36", "Cookie": f"x-zp-client-id={client_id}" } # Merge basic headers headers_pc.update(self._build_headers_pc()) data_pc = self._request_json("GET", url_pc, headers_pc, params=params_pc) if data_pc and isinstance(data_pc, dict): detail = data_pc.get("data") or {} comp = detail.get("detailedCompany") or {} desc_pc = comp.get("companyDescription") if isinstance(desc_pc, str) and desc_pc: return desc_pc return None def search_jobs(self, city_id: int = 801, page_size: int = 15, page_index: int = 1, job_level3_code: Optional[str] = None) -> List[Dict[str, Any]]: headers = self._build_headers_pc() base_url = "https://fe-api.zhaopin.com/c/i/search/positions" params = { "_v": self._gen_v(), "x-zp-page-request-id": self._gen_page_request_id(), "x-zp-client-id": self._gen_client_id(), } payload = { "S_SOU_WORK_CITY": "", "order": 4, "pageSize": page_size, "pageIndex": page_index, "eventScenario": "pcSearchedSouSearch", "anonymous": 1, "platform": 13, "version": "0.0.0", } if job_level3_code: payload["S_SOU_JD_JOB_LEVEL3"] = job_level3_code data = self._request_json("POST", base_url, headers, params=params, json_body=payload) if data and data.get("code") == 200: lst = data.get("data", {}).get("list", []) for job in lst: num = job.get("number") if num: desc = self.fetch_company_desc_by_job(str(num)) or "" job["companyDesc"] = desc return lst return [] def search_company_jobs_by_name(self, company_name: str, city_id: Optional[int] = None, page_size: int = 15, page_index: int = 1) -> Optional[Dict[str, Any]]: url = "https://cgate.zhaopin.com/positionbusiness/searchrecommend/searchPositions" ua = os.getenv( "ZP_MINIAPP_UA", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 " "MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Mac " "MacWechat/WMPF MacWechat/3.8.7(0x13080712) UnifiedPCMacWechat(0xf26414f0) XWEB/16962", ) headers: Dict[str, Any] = { "User-Agent": ua, "Content-Type": "application/json", "x-zp-channel": "wxxiaochengxu", "x-zp-business-system": "73", "x-zp-action-id": "", "xweb_xhr": "1", "x-zp-page-code": "7019", "x-zp-version": os.getenv("ZP_MINIAPP_VERSION", "4.1.224"), "x-zp-platform": "12", "x-zp-device-id": os.getenv("ZP_MINIAPP_DEVICE_ID", "A774EA47-0AB5-4608-B51D-84BF51CC0786"), "sec-fetch-site": "cross-site", "sec-fetch-mode": "cors", "sec-fetch-dest": "empty", "referer": "https://servicewechat.com/wxb7718fb9257e4fd2/617/page-frame.html", "accept-language": "zh-CN,zh;q=0.9", "priority": "u=1, i", } at_token = os.getenv("ZP_MINIAPP_AT", "").strip() rt_token = os.getenv("ZP_MINIAPP_RT", "").strip() if at_token: headers["x-zp-at"] = at_token if rt_token: headers["x-zp-rt"] = rt_token body: Dict[str, Any] = { "eventScenario": "wxmpZhaopinSearchV2", "pageIndex": page_index, "pageSize": page_size, "filterMinSalary": 1, "S_SOU_EXPAND": "SOU_COMPANY_ID", "S_SOU_FULL_INDEX": company_name, "sortType": "DEFAULT", "version": "8.11.22", "identity": "2", "anonymous": 0, } if city_id is not None: body["S_SOU_WORK_CITY"] = city_id resume_number = os.getenv("ZP_MINIAPP_RESUME_NUMBER", "").strip() if resume_number: body["resumeNumber"] = resume_number try: resp = self.session.post(url, headers=headers, json=body, timeout=30) self._log_request_response( "search-company-jobs", "POST", url, headers, params=None, json_body=body, response=resp, ) resp.raise_for_status() data = resp.json() return data except Exception as e: logger.error(f"Zhilian search_company_jobs_by_name failed: {e}") return None def get_job_detail(self, job_number: str) -> Optional[Dict[str, Any]]: # Reuse fetch_company_desc_by_job logic but return full detail client_id = self._gen_client_id() url_pc = "https://fe-api.zhaopin.com/c/i/jobs/position-detail-new" params_pc = { "number": job_number, "_v": self._gen_v(), "x-zp-page-request-id": self._gen_page_request_id(), "x-zp-client-id": client_id, } headers_pc = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36", "Cookie": f"x-zp-client-id={client_id}" } headers_pc.update(self._build_headers_pc()) data_pc = self._request_json("GET", url_pc, headers_pc, params=params_pc) if data_pc and isinstance(data_pc, dict): return data_pc.get("data") return None def get_company_detail(self, company_number: str) -> Optional[Dict[str, Any]]: """获取公司详情""" url = "https://cgate.zhaopin.com/positionbusiness/exposure/companyDetail" params = { "number": company_number, "platform": "12", "version": "0.0.0", } ua = os.getenv( "ZP_MINIAPP_UA", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 " "MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Mac " "MacWechat/WMPF MacWechat/3.8.7(0x13080712) UnifiedPCMacWechat(0xf26414f0) XWEB/16962", ) headers = { "User-Agent": ua, "x-zp-channel": "wxxiaochengxu", "x-zp-business-system": "73", "xweb_xhr": "1", "x-zp-page-code": "0", "x-zp-version": os.getenv("ZP_MINIAPP_VERSION", "4.1.224"), "x-zp-platform": "12", "x-zp-device-id": os.getenv("ZP_MINIAPP_DEVICE_ID", "A774EA47-0AB5-4608-B51D-84BF51CC0786"), "content-type": "application/json", "sec-fetch-site": "cross-site", "sec-fetch-mode": "cors", "sec-fetch-dest": "empty", "referer": "https://servicewechat.com/wxb7718fb9257e4fd2/617/page-frame.html", "accept-language": "zh-CN,zh;q=0.9", "priority": "u=1, i", } at_token = os.getenv("ZP_MINIAPP_AT", "").strip() rt_token = os.getenv("ZP_MINIAPP_RT", "").strip() if at_token: headers["x-zp-at"] = at_token if rt_token: headers["x-zp-rt"] = rt_token data = self._request_json("GET", url, headers, params=params) if data and isinstance(data, dict): return data.get("data") return None def get_company_jobs_by_id( self, company_number: str, page_index: int = 1, page_size: int = 30, work_city: Optional[int] = None, ) -> Optional[Dict[str, Any]]: url = "https://capi.zhaopin.com/capi/searchrecommend/searchPositionsCompany" ua = os.getenv( "ZP_MINIAPP_UA", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 " "MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Mac " "MacWechat/WMPF MacWechat/3.8.7(0x13080712) UnifiedPCMacWechat(0xf26414f0) XWEB/16962", ) version = os.getenv("ZP_MINIAPP_CAPI_VERSION", "4.1.230") device_id = os.getenv("ZP_MINIAPP_DEVICE_ID", "CFD341F3-29D6-4C46-81BF-F6C705407F2E") headers: Dict[str, Any] = { "User-Agent": ua, "x-zp-channel": "wxxiaochengxu", "x-zp-business-system": "73", "x-zp-action-id": "", "xweb_xhr": "1", "x-zp-page-code": "0", "x-zp-version": version, "x-zp-platform": "12", "x-zp-device-id": device_id, "content-type": "application/json", "sec-fetch-site": "cross-site", "sec-fetch-mode": "cors", "sec-fetch-dest": "empty", "referer": "https://servicewechat.com/wxb7718fb9257e4fd2/619/page-frame.html", "accept-language": "zh-CN,zh;q=0.9", "priority": "u=1, i", } at_token = os.getenv("ZP_MINIAPP_AT", "").strip() rt_token = os.getenv("ZP_MINIAPP_RT", "").strip() params: Dict[str, Any] = { "channel": "wxxiaochengxu", "platform": "12", "version": version, "d": device_id, "S_SOU_COMPANY_ID": company_number, "S_SOU_POSITION_SOURCE_TYPE": 1, "eventScenario": "wxmpZhaopinSearchPositionsCompany", "pageCode": "wxmpZhaopinCompanyDetailPage", "pageIndex": page_index, "pageSize": page_size, "S_SOU_JD_JOB_LEVEL": "", } if at_token: params["at"] = at_token if rt_token: params["rt"] = rt_token if work_city is not None: params["S_SOU_WORK_CITY"] = work_city else: params["S_SOU_WORK_CITY"] = "" data = self._request_json("GET", url, headers, params=params) return data