""" 智联招聘 - 所有 API 接口 每个类只负责参数构建,HTTP 和算法由 client / core 层处理 """ from __future__ import annotations from typing import Any, Optional from crawler_core.base import BaseFetcher, BaseSearcher, parse_response, Result # ───────────────────────────────────────────── # 智联响应解析(覆写默认算法) # ───────────────────────────────────────────── def _parse_zhilian_response(http_code: int, raw: Any) -> Result: """ 智联专用响应解析 智联响应格式(cgate / capi 接口): {"data": {...}} 或 {"data": {"list": [...]}} HTTP 200 且无 statusCode 字段时视为成功 """ if http_code != 200: return Result(success=False, status_code=http_code, error=f"HTTP 请求失败: {http_code}") if not isinstance(raw, dict): return Result(success=False, status_code=http_code, error="响应格式异常") payload = raw.get("data") or {} # 列表型响应 if isinstance(payload, dict) and "list" in payload: items = payload.get("list", []) num_found = raw.get("pageInfo", {}).get("numFound", 0) or payload.get("numFound", len(items)) return Result( success=True, status_code=200, data=payload, list=items, count=num_found, is_end_page=len(items) == 0, ) return Result(success=True, status_code=200, data=payload) from spiderJobs.platforms.zhilian.client import ZhilianClient, create_cgate_client, create_capi_client # ───────────────────────────────────────────── # 1. 职位搜索(POST cgate) # ───────────────────────────────────────────── _SEARCH_BODY = { "eventScenario": "wxmpZhaopinSearchV2", "filterMinSalary": 1, "S_SOU_EXPAND": "SOU_COMPANY_ID", "sortType": "DEFAULT", "resumeNumber": "", "version": "8.11.22", "identity": 0, "anonymous": 1, } _FILTER_KEYS = [ "S_SOU_SALARY", "S_SOU_EDUCATION_LOWESTLEVEL", "S_SOU_REFRESH_DATE", "S_SOU_WORK_EXPERIENCE", "S_SOU_POSITION_TYPE", "S_SOU_COMPANY_TYPE", "S_SOU_COMPANY_SCALE", "welfareLabels", "S_SOU_JD_INDUSTRY_LEVEL", ] class SearchPositions(BaseSearcher): """ 职位搜索 api = SearchPositions(keyword="Python", city_code=538) result = api.search() all_jobs = api.load_all(max_pages=5) """ ENDPOINT = "/positionbusiness/searchrecommend/searchPositions" def __init__( self, *, keyword: str = "", city_code: int | str = "", collected_purpose: Optional[dict] = None, filters: Optional[dict] = None, page_size: int = 15, client: Optional[ZhilianClient] = None, ): super().__init__(page_size=page_size, http_client=client or create_cgate_client()) self.keyword = keyword self.city_code = city_code self.collected_purpose = collected_purpose self.filters = filters or {} def _build_params(self, page_index: int) -> dict: body = {**_SEARCH_BODY, "pageIndex": page_index, "pageSize": self.page_size} if self.collected_purpose: body.update(self._purpose_params(self.collected_purpose, page_index)) if self.keyword and "S_SOU_JD_JOB_LEVEL3" not in body: body["S_SOU_FULL_INDEX"] = self.keyword if self.city_code and "S_SOU_WORK_CITY" not in body: body["S_SOU_WORK_CITY"] = self.city_code body.update({k: self.filters[k] for k in _FILTER_KEYS if self.filters.get(k)}) return body def _request(self, params: dict): """智联职位搜索使用 POST 请求""" return self.http_client.post(self.ENDPOINT, params) def _parse(self, http_code: int, raw) -> "Result": return _parse_zhilian_response(http_code, raw) @staticmethod def _purpose_params(purpose: dict, page_index: int) -> dict: params: dict = {"pageIndex": page_index} pnew = purpose.get("pnew_preferred_job_type", "") name = purpose.get("job_type_name", "") if pnew: params["S_SOU_JD_JOB_LEVEL3"] = pnew elif name: params["S_SOU_FULL_INDEX"] = name city = purpose.get("city_id", "") or purpose.get("preferred_location", "") if city: params["S_SOU_WORK_CITY"] = city sal_min = purpose.get("preferred_salary_min", "") sal_max = purpose.get("preferred_salary_max", "") if sal_min not in ("", "-1") or sal_max != "": params["S_SOU_SALARY"] = f"{sal_min},{sal_max}" return params # ───────────────────────────────────────────── # 2. 职位详情(GET cgate) # ───────────────────────────────────────────── class GetPositionDetail(BaseFetcher): """ 职位详情 detail = GetPositionDetail(number="CC462451910J40881838003").fetch() """ ENDPOINT = "/positionbusiness/position/getPositionModule" def __init__(self, *, number: str, identity: int = 0, client: Optional[ZhilianClient] = None): super().__init__(http_client=client or create_cgate_client()) self.number = number self.identity = identity def _build_params(self) -> dict: return {"number": self.number, "identity": self.identity, "resumeNumber": ""} def _parse(self, http_code: int, raw) -> "Result": return _parse_zhilian_response(http_code, raw) # ───────────────────────────────────────────── # 3. 企查查(工商)信息(GET cgate) # ───────────────────────────────────────────── class GetCompanyExtDetail(BaseFetcher): """ 企查查(工商)信息 detail = GetCompanyExtDetail(company_name="上海有大信息科技", company_number="CZ462451910").fetch() """ ENDPOINT = "/riskstorm/company/getCompanyExtDetail" def __init__(self, *, company_name: str, company_number: str, client: Optional[ZhilianClient] = None): super().__init__(http_client=client or create_cgate_client()) self.company_name = company_name self.company_number = company_number def _build_params(self) -> dict: return {"companyName": self.company_name, "companyNumber": self.company_number} def _parse(self, http_code: int, raw) -> "Result": return _parse_zhilian_response(http_code, raw) # ───────────────────────────────────────────── # 4. 公司详细信息(GET cgate) # ───────────────────────────────────────────── class GetCompanyDetail(BaseFetcher): """ 公司详细信息 detail = GetCompanyDetail(number="CZ462451910").fetch() """ ENDPOINT = "/positionbusiness/exposure/companyDetail" def __init__(self, *, number: str, client: Optional[ZhilianClient] = None): super().__init__(http_client=client or create_cgate_client()) self.number = number def _build_params(self) -> dict: return {"number": self.number} def _parse(self, http_code: int, raw) -> "Result": return _parse_zhilian_response(http_code, raw) # ───────────────────────────────────────────── # 5. 公司招聘职位列表(GET capi) # ───────────────────────────────────────────── class SearchCompanyPositions(BaseSearcher): """ 公司招聘职位列表 api = SearchCompanyPositions(company_id="CZ462451910") result = api.search() all_jobs = api.load_all(max_pages=3) """ ENDPOINT = "/capi/searchrecommend/searchPositionsCompany" def __init__( self, *, company_id: str, job_level: str = "", city_code: str = "", page_size: int = 30, client: Optional[ZhilianClient] = None, ): self._client = client or create_capi_client() super().__init__(page_size=page_size, http_client=self._client) self.company_id = company_id self.job_level = job_level self.city_code = city_code def _build_params(self, page_index: int) -> dict: params = {**self._client.signer.sign_params()} params.update({ "S_SOU_COMPANY_ID": self.company_id, "S_SOU_POSITION_SOURCE_TYPE": "1", "eventScenario": "wxmpZhaopinSearchPositionsCompany", "pageCode": "wxmpZhaopinCompanyDetailPage", "pageIndex": page_index, "pageSize": self.page_size, }) if self.job_level: params["S_SOU_JD_JOB_LEVEL"] = self.job_level if self.city_code: params["S_SOU_WORK_CITY"] = self.city_code return params def _request(self, params: dict) -> tuple[int, Any]: return self.http_client.get(self.ENDPOINT, params) def _parse(self, http_code: int, raw) -> "Result": return _parse_zhilian_response(http_code, raw) # ───────────────────────────────────────────── # 使用示例 # ───────────────────────────────────────────── if __name__ == "__main__": import json print("=== 1. 职位搜索 ===") r = SearchPositions(keyword="Python", city_code=538).search() print(f"共 {r.count} 条,本页 {len(r.list)} 条") print("\n=== 2. 职位详情 ===") r = GetPositionDetail(number="CC462451910J40881838003").fetch() print(f"成功: {r.success}") print("\n=== 3. 企查查信息 ===") r = GetCompanyExtDetail(company_name="上海有大信息科技", company_number="CZ462451910").fetch() print(f"成功: {r.success}") print("\n=== 4. 公司详情 ===") r = GetCompanyDetail(number="CZ462451910").fetch() print(f"成功: {r.success}") print("\n=== 5. 公司招聘列表 ===") r = SearchCompanyPositions(company_id="CZ462451910").search() print(f"共 {r.count} 个职位,本页 {len(r.list)} 条")