import requests import time import json import uuid from typing import Dict, Any, Optional, List from app.core.algorithms.antispider import IPStrategyConfig, IPAnomalyDetector, SmartIPManager, generate_boss_trace_id, generate_token from loguru import logger import os from urllib.parse import urlencode class BossService: def __init__(self, proxy_pool: Optional[List[Dict[str, str]]] = None): self.app_id = 10002 self.zp_product_id = 10002 self.serve_domain = "https://www.zhipin.com" self.api_domain = "https://wxapp.zhipin.com" self.session = requests.Session() self.session.trust_env = False self.session.headers.update({'no_proxy': '10.0.0.0/16,example.com,.example.com'}) # Initialize IP Strategy self.ip_cfg = IPStrategyConfig() self.ip_detector = IPAnomalyDetector(self.ip_cfg) self.ip_manager = SmartIPManager(proxy_pool, self.ip_cfg) # Initial route route_mode, route_cfg = self.ip_manager.current_route() if route_mode == 'proxy' and route_cfg: self.session.proxies = route_cfg self.device_id = str(uuid.uuid4()) self.wx_version = "8.0.43" self.mini_version = "1.0.0" self.scene = 1001 self.default_headers = { "Accept": "*/*", "Accept-Language": "zh-CN,zh;q=0.9", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", "Content-Type": "application/x-www-form-urlencoded", "Host": "www.zhipin.com", "Referer": "https://servicewechat.com/wxa8da525af05281f3/571/page-frame.html", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.10(0x13080a10) XWEB/1227", "X-Requested-With": "XMLHttpRequest", "platform": "zhipin/mac", "zp_app_id": str(self.app_id), "ver": "100.0000", "mini_ver": "100.0000", "ua": json.dumps({"model": "Mac16,8", "platform": "mac"}), "zp_product_id": str(self.zp_product_id), "scene": "1006", "xweb_xhr": "1", "sec-fetch-site": "cross-site", "sec-fetch-mode": "cors", "sec-fetch-dest": "empty" } self.login_data = { "mpt": "", # Needs to be filled via login/token logic if required "wt2": "", "openId": "", "traceid": "F-77d05bnXuMVrHIB3" } self.current_token_id: Optional[int] = None self.init_cookies() def init_cookies(self): cookies = { '__zp_stoken__': generate_token(), 'Hm_lvt_194df3105ad7148dcf2b98a91b5e727a': str(int(time.time())), 'Hm_lpvt_194df3105ad7148dcf2b98a91b5e727a': str(int(time.time())), '__c': self.device_id[:8], '__g': '-', '__l': 'l=%2Fwww.zhipin.com%2F&r=&friend_source=0&s=3&friend_source=0', 'lastCity': '101010100', 'cityName': '%E5%8C%97%E4%BA%AC', '__zp_sseed__': 'btHZ0bjBq8m//WNwlVrPUnVcIvini5J5P5LQUbflM24=', '__zp_sname__': '3998243a', '__zp_sts__': str(int(time.time() * 1000)) } for name, value in cookies.items(): self.session.cookies.set(name, value, domain='.zhipin.com') def set_login_data(self, mpt: str, wt2: str, open_id: str = "") -> None: self.login_data.update( { "mpt": mpt, "wt2": wt2, "openId": open_id, } ) if wt2: self.session.cookies.set("wt2", wt2, domain=".zhipin.com") if mpt: self.session.cookies.set("mpt", mpt, domain=".zhipin.com") def set_proxy(self, proxy: Optional[str]) -> None: if not proxy: self.session.proxies = {} route_mode, route_cfg = self.ip_manager.current_route() if route_mode == "proxy" and route_cfg: self.session.proxies = route_cfg logger.info("BossService proxy reset to default route") return proxy = proxy.strip().strip("`") proxies = {"http": proxy, "https": proxy} self.session.proxies = proxies logger.info(f"BossService using user proxy: {proxies}") def build_request_headers(self, custom_headers: Optional[Dict] = None) -> Dict[str, str]: headers = self.default_headers.copy() headers.update({ "mpt": self.login_data.get("mpt", ""), "scene": "1006", "wt2": self.login_data.get("wt2", ""), "Traceid": generate_boss_trace_id() }) headers["timestamp"] = str(int(time.time() * 1000)) if custom_headers: headers.update(custom_headers) return headers def _sanitize_headers(self, headers: Dict[str, Any]) -> Dict[str, Any]: return headers def _log_request_response( self, label: str, method: str, url: str, headers: Dict[str, Any], params: Optional[Dict[str, Any]] = None, json_body: Optional[Dict[str, Any]] = None, response: Optional[requests.Response] = None, ) -> None: safe_headers = self._sanitize_headers(headers) current_proxies = getattr(self.session, "proxies", None) proxy_info = current_proxies if current_proxies else {} login_flags = { "mpt_set": bool(self.login_data.get("mpt")), "wt2_set": bool(self.login_data.get("wt2")), } logger.info( f"[Boss-{label}] request method={method} url={url} headers={safe_headers} " f"params={params} json={json_body} proxies={proxy_info} login={login_flags}" ) try: curl_url = url if params and isinstance(params, dict): query_string = urlencode(params) if query_string: separator = "&" if "?" in curl_url else "?" curl_url = f"{curl_url}{separator}{query_string}" header_parts = [] for k, v in safe_headers.items(): v_str = str(v).replace("'", "'\"'\"'") header_parts.append(f"-H '{k}: {v_str}'") data_part = "" if json_body is not None: body_str = json.dumps(json_body, ensure_ascii=False) body_str = body_str.replace("'", "'\"'\"'") data_part = f" --data '{body_str}'" curl_cmd = f"curl -X {method} '{curl_url}' " + " ".join(header_parts) + data_part logger.info(f"[Boss-{label}] curl_debug {curl_cmd}") except Exception as e: logger.debug(f"[Boss-{label}] build curl error: {e}") if response is not None: text_sample = "" try: body = response.text or "" text_sample = body[:1000] except Exception: text_sample = "" logger.info( f"[Boss-{label}] response status={response.status_code} " f"headers={self._sanitize_headers(dict(response.headers))} " f"body_sample={text_sample}" ) def build_request_data(self, data: Optional[Dict] = None) -> Dict[str, Any]: request_data = { "appId": self.app_id, "scene": self.scene, "timestamp": int(time.time() * 1000) } if data: request_data.update(data) return request_data def get_job_detail_by_id(self, job_id: str, lid: str = "", security_id: str = "") -> Optional[Dict]: """根据招聘ID获取招聘详情""" logger.info(f"🔍 获取招聘详情: {job_id}") # Batch request simulation sub_reqs = [ { "path": "/wapi/zpgeek/miniapp/job/detail.json", "method": "GET", "query": urlencode({ "securityId": security_id, "jobId": job_id, "lid": lid, "source": "10" }) }, { "path": "/wapi/zpgeek/miniapp/jobdetail/improvement/query.json", "method": "GET", "query": urlencode({ "securityId": security_id, "jobId": job_id, "lid": lid }) } ] post_data = { "subReqs": sub_reqs, "appId": 10002 } headers = self.build_request_headers({ "Content-Type": "application/json", "Referer": "https://servicewechat.com/wxa8da525af05281f3/585/page-frame.html" }) try: response = self.session.post( "https://www.zhipin.com/wapi/batch/requests", json=post_data, headers=headers, timeout=30 ) self._log_request_response( "job-detail", "POST", "https://www.zhipin.com/wapi/batch/requests", headers, params=None, json_body=post_data, response=response, ) response.raise_for_status() data = response.json() # Extract relevant part from batch response if data.get("code") == 0 and data.get("zpData"): # Simplification: return the whole structure or extract job detail # Usually we want the job detail part job_detail_path = "/wapi/zpgeek/miniapp/job/detail.json" if job_detail_path in data["zpData"]: return data["zpData"][job_detail_path] return data except Exception as e: logger.error(f"Failed to fetch job detail: {e}") return None def get_company_detail_by_id(self, company_id: str) -> Optional[Dict]: """根据公司ID获取公司详情""" logger.info(f"🏢 获取公司详情: {company_id}") params = { "brandId": company_id, "appId": "10002" } headers = self.build_request_headers({ "Referer": "https://servicewechat.com/wxa8da525af05281f3/574/page-frame.html" }) try: request_data = self.build_request_data(params) response = self.session.get( f"{self.serve_domain}/wapi/zpgeek/miniapp/brand/detail.json", headers=headers, params=request_data, timeout=30 ) self._log_request_response( "company-detail", "GET", f"{self.serve_domain}/wapi/zpgeek/miniapp/brand/detail.json", headers, params=request_data, json_body=None, response=response, ) response.raise_for_status() return response.json() except Exception as e: logger.error(f"Failed to fetch company detail: {e}") return None def get_company_jobs_by_id(self, company_id: str, page: int = 1) -> Optional[Dict]: """根据公司ID获取该公司职位列表""" logger.info(f"📄 获取公司职位列表: {company_id}, page={page}") params = { "brandId": company_id, "query": "", "page": page, "hasMore": "true", "positionLv1": 0, "city": "", "experience": "", "salary": "", "appId": "10002", } headers = self.build_request_headers({ "Referer": "https://servicewechat.com/wxa8da525af05281f3/587/page-frame.html" }) try: request_data = self.build_request_data(params) response = self.session.get( f"{self.serve_domain}/wapi/zpgeek/miniapp/brand/joblist.json", headers=headers, params=request_data, timeout=30, ) self._log_request_response( "company-joblist", "GET", f"{self.serve_domain}/wapi/zpgeek/miniapp/brand/joblist.json", headers, params=request_data, json_body=None, response=response, ) response.raise_for_status() return response.json() except Exception as e: logger.error(f"Failed to fetch company job list: {e}") return None def search_jobs(self, keyword: str, city_code: str = "101010100", page: int = 1) -> Optional[Dict]: """搜索职位""" params = { 'pageSize': 15, 'query': keyword, 'city': city_code, 'page': page, 'appId': '10002' } try: headers = self.build_request_headers({ "Referer": "https://www.zhipin.com/web/geek/job" }) request_data = self.build_request_data(params) response = self.session.get( f"{self.serve_domain}/wapi/zpgeek/miniapp/search/joblist.json", headers=headers, params=request_data, timeout=30 ) self._log_request_response( "search-jobs", "GET", f"{self.serve_domain}/wapi/zpgeek/miniapp/search/joblist.json", headers, params=request_data, json_body=None, response=response, ) response.raise_for_status() return response.json() except Exception as e: logger.error(f"Search failed: {e}") return None