import httpx import time import random import json import os from typing import Dict, Any, Optional, List from urllib.parse import quote from loguru import logger from app.core.algorithms.signature import SignatureGenerator from jobs_spider.qcwy import search_company_jobs as qcwy_spider class QcwyService: def __init__(self, proxy_url: Optional[str] = None): self.signature_generator = SignatureGenerator("abfc8f9dcf8c3f3d8aa294ac5f2cf2cc7767e5592590f39c3f503271dd68562b") self.base_url = "https://cupid.51job.com" self.api_key = "51job" self.base_headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.10(0x13080a10) XWEB/1227", "Connection": "keep-alive", "Accept": "*/*", "Accept-Encoding": "gzip, deflate, br", "Content-Type": "application/json", "account-id": "", "From-Domain": "51job_weixin_wxapp", "xweb_xhr": "1", "user-token": "", "uuid": str(int(time.time() * 1000)) + str(random.randint(10000000, 99999999)), "partner": "", "timestamp": str(int(time.time() * 1000)), "Sec-Fetch-Site": "cross-site", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Dest": "empty", "Referer": "https://servicewechat.com/wx1131e5c71e668b5d/391/page-frame.html", "Accept-Language": "zh-CN,zh;q=0.9" } env_account_id = os.getenv("QCWY_ACCOUNT_ID", "").strip() env_user_token = os.getenv("QCWY_USER_TOKEN", "").strip() if env_account_id: self.base_headers["account-id"] = env_account_id if env_user_token: self.base_headers["user-token"] = env_user_token client_kwargs = { "timeout": 30.0, "verify": True, "trust_env": False } if proxy_url: client_kwargs["proxy"] = proxy_url self.client = httpx.Client(**client_kwargs) def set_proxy(self, proxy_url: Optional[str]) -> None: client_kwargs = { "timeout": 30.0, "verify": True, "trust_env": False, } if proxy_url: client_kwargs["proxy"] = proxy_url try: old_client = self.client except AttributeError: old_client = None self.client = httpx.Client(**client_kwargs) if old_client is not None: try: old_client.close() except Exception: pass def _sanitize_headers(self, headers: Dict[str, Any]) -> Dict[str, Any]: masked_headers: Dict[str, Any] = {} for k, v in headers.items(): key_lower = str(k).lower() if key_lower in {"authorization", "cookie", "set-cookie"}: masked_headers[k] = "***" else: masked_headers[k] = v return masked_headers def _log_request_response( self, label: str, method: str, url: str, headers: Dict[str, Any], params: Optional[Dict[str, Any]] = None, json_body: Optional[Dict[str, Any]] = None, response: Optional[httpx.Response] = None, ) -> None: safe_headers = self._sanitize_headers(headers) logger.info( f"[Qcwy-{label}] request method={method} url={url} headers={safe_headers} " f"params={params} json={json_body}" ) if response is not None: text_sample = "" try: body = response.text or "" text_sample = body[:1000] except Exception: text_sample = "" logger.info( f"[Qcwy-{label}] response status={response.status_code} " f"headers={self._sanitize_headers(dict(response.headers))} " f"body_sample={text_sample}" ) def build_property(self, page_code: str = "home|hotjob|jobfxlist") -> str: distinct_id = str(int(time.time() * 1000)) + str(random.randint(100000, 999999)) property_data = { "frompageUrl": "", "pageUrl": "pages/index/index", "isLogin": "否", "accountid": "", "resumeId": "", "firstFrompageUrl": "", "distinct_id": distinct_id, "pageCode": page_code, "shortPageCode": page_code, "policyType": "推荐" } return quote(json.dumps(property_data, ensure_ascii=False, separators=(',', ':'))) def _make_request(self, url: str, data: Dict[str, Any] = None, headers: Dict[str, str] = None, method: str = "POST") -> Optional[Dict[str, Any]]: try: local_headers: Dict[str, str] = headers or {} if method.upper() == "GET": response = self.client.get(url, headers=local_headers) else: response = self.client.post(url, headers=local_headers, json=data) self._log_request_response( "request", method.upper(), url, local_headers, params=None, json_body=data if method.upper() != "GET" else None, response=response, ) if response.status_code == 200: return response.json() else: logger.warning(f"Request failed: {response.status_code} - {response.text}") return None except Exception as e: logger.error(f"Request exception: {e}") return None def get_job_detail(self, job_id: str) -> Dict[str, Any]: timestamp = int(time.time()) api_path = f"open/noauth/jobs/detail/base/{job_id}" url_path = f"/{api_path}?api_key={self.api_key}×tamp={timestamp}" full_url = f"{self.base_url}{url_path}" signature = self.signature_generator.generate_signature(url_path) property_value = self.build_property(page_code="pages/jobs/jobdetail/jobdetail") headers = self.base_headers.copy() headers["sign"] = signature headers["property"] = property_value headers["Content-Type"] = "application/x-www-form-urlencoded" response = self._make_request(full_url, None, headers, method="GET") if response and response.get('status') in ['1', 1]: return response.get('resultbody', {}) return {} def get_company_info(self, company_id: str) -> Dict[str, Any]: try: return qcwy_spider.get_company_info(company_id) except Exception as e: logger.error(f"Qcwy get_company_info failed: {e}") return {} def search_jobs(self, keyword: str, job_area: str = "020000", page: int = 1) -> List[Dict[str, Any]]: # This uses the recommend/search logic timestamp = int(time.time()) data = { "pageNo": page, "pageSize": 20, "keyword": keyword, # QCwy usually recommends, but let's assume recommend for now or search "jobArea": job_area, "type": "recommend", # fallback to recommend if keyword search API is different/complex "isTouristMode": True, "specialPageCode": True } # Note: QCwy search API might be different, but using the recommend endpoint from original script # If real search is needed, we might need to reverse engineer 'search/job-list' endpoint. # For now, let's stick to what was in the script or use recommend. # The original script used `open/noauth/recommend/job-tab-dynamic-wx-mini` api_path = "open/noauth/recommend/job-tab-dynamic-wx-mini" url_path = f"/{api_path}?api_key={self.api_key}×tamp={timestamp}" full_url = f"{self.base_url}{url_path}" signature = self.signature_generator.generate_signature(url_path, data) property_value = self.build_property() headers = self.base_headers.copy() headers["sign"] = signature headers["property"] = property_value # Convert bools for key, value in data.items(): if isinstance(value, bool): data[key] = "true" if value else "false" response = self._make_request(full_url, data, headers, method="POST") if response and response.get("status") in ['1', 1]: return response.get("resultbody", {}).get("jobList", {}).get("items", []) return [] def get_company_jobs_by_id( self, company_id: str, page: int = 1, page_size: int = 30, job_area: str = "", function: str = "", salary_type: str = "", ) -> Dict[str, Any]: try: return qcwy_spider.company_jobs_by_id( co_id=company_id, page=page, size=page_size, ) except Exception as e: logger.error(f"Qcwy get_company_jobs_by_id failed: {e}") return {}