""" 前程无忧 (51Job) - 所有 API 接口 每个类只负责参数构建,HTTP 和算法由 client / core 层处理 响应格式适配: 51job 使用 status/data 或直接返回数据 status=1 或 HTTP 200 表示成功 """ from __future__ import annotations from typing import Any, Optional from crawler_core.base import BaseFetcher, BaseSearcher, Result from spiderJobs.platforms.job51.client import Job51Client, create_client # ───────────────────────────────────────────── # 51job 响应解析(覆写默认算法) # ───────────────────────────────────────────── def _parse_job51_response(http_code: int, raw: Any) -> Result: """ 51job 专用响应解析 51job 响应格式(cupid 接口): {"status": 1, "message": "成功", "resultbody": {...}} status=1 或 "1" 表示成功,resultbody 为实际业务数据 """ if http_code != 200: return Result( success=False, status_code=http_code, error=f"HTTP 请求失败: {http_code}", ) if not isinstance(raw, dict): return Result(success=False, status_code=http_code, error="响应格式异常") # 检查业务状态码(status 可能是 int 1 或 str "1") biz_status = raw.get("status") if biz_status is not None and str(biz_status) != "1": return Result( success=False, status_code=int(biz_status) if str(biz_status).isdigit() else -1, error=raw.get("message") or f"业务错误: {biz_status}", ) payload = raw.get("resultbody") or raw.get("data") or {} # 列表型响应:推荐职位 resultbody.jobList.items[] if isinstance(payload, dict) and "jobList" in payload: job_list_wrap = payload.get("jobList", {}) if isinstance(job_list_wrap, dict) and "items" in job_list_wrap: items = job_list_wrap.get("items", []) return Result( success=True, status_code=200, data=payload, list=items, count=len(items), is_end_page=len(items) == 0, ) # jobList 本身就是列表 if isinstance(job_list_wrap, list): return Result( success=True, status_code=200, data=payload, list=job_list_wrap, count=len(job_list_wrap), is_end_page=len(job_list_wrap) == 0, ) # 列表型响应:公司职位 resultbody.items[] if isinstance(payload, dict) and "items" in payload: items = payload.get("items", []) total = payload.get("totalCount", len(items)) return Result( success=True, status_code=200, data=payload, list=items, count=total, is_end_page=len(items) == 0, ) # 列表型响应:通用 list 字段 if isinstance(payload, dict) and "list" in payload: items = payload.get("list", []) return Result( success=True, status_code=200, data=payload, list=items, count=len(items), is_end_page=len(items) == 0, ) return Result(success=True, status_code=200, data=payload) # ───────────────────────────────────────────── # 1. 首页推荐职位搜索(POST) # ───────────────────────────────────────────── class SearchRecommendJobs(BaseSearcher): """ 首页推荐/搜索职位列表(无需登录) api = SearchRecommendJobs(job_area="020000", function_type="A0N7") result = api.search() all_jobs = api.load_all(max_pages=5) """ ENDPOINT = "open/noauth/recommend/job-tab-dynamic-wx-mini" def __init__( self, *, job_area: str = "020000", function_type: str = "", job_type: str = "recommend", page_size: int = 10, client: Optional[Job51Client] = None, ): super().__init__(page_size=page_size, http_client=client or create_client()) self.job_area = job_area self.function_type = function_type self.job_type = job_type def _build_params(self, page_index: int) -> dict: body = { "pageNo": page_index, "pageSize": self.page_size, "specialPageCode": True, "isTouristMode": True, "type": self.job_type, "jobArea": self.job_area, "personAsLabel": "1", } if self.function_type: body["functionType"] = self.function_type return body def _request(self, params: dict): """51job 推荐搜索使用 POST""" return self.http_client.post(self.ENDPOINT, params) def _parse(self, http_code: int, raw: Any) -> Result: return _parse_job51_response(http_code, raw) # ───────────────────────────────────────────── # 2. 职位详情(GET) # ───────────────────────────────────────────── class GetJobDetail(BaseFetcher): """ 职位详情(无需登录) detail = GetJobDetail(job_id="170651439").fetch() """ ENDPOINT = "open/noauth/jobs/detail/base" def __init__(self, *, job_id: str, client: Optional[Job51Client] = None): super().__init__(http_client=client or create_client()) self.job_id = job_id def _build_params(self) -> dict: return {} def fetch(self) -> Result: """覆写 fetch,将 job_id 拼入路径""" endpoint = f"{self.ENDPOINT}/{self.job_id}" try: http_code, data = self.http_client.get(endpoint) except Exception as e: return Result(success=False, status_code=-1, error=str(e)) return self._parse(http_code, data) def _parse(self, http_code: int, raw: Any) -> Result: return _parse_job51_response(http_code, raw) # ───────────────────────────────────────────── # 3. 公司详情(GET) # ───────────────────────────────────────────── class GetCompanyInfo(BaseFetcher): """ 公司详细信息(无需登录) detail = GetCompanyInfo(company_id="9825088").fetch() """ ENDPOINT = "open/noauth/company-info/info-data" def __init__( self, *, company_id: str, color_one: str = "#ffffff", color_two: str = "#ffffffcc", client: Optional[Job51Client] = None, ): super().__init__(http_client=client or create_client()) self.company_id = company_id self.color_one = color_one self.color_two = color_two def _build_params(self) -> dict: return { "companyId": self.company_id, "colorOne": self.color_one, "colorTwo": self.color_two, } def fetch(self) -> Result: """覆写 fetch,传入 query 参数""" try: http_code, data = self.http_client.get(self.ENDPOINT, self._build_params()) except Exception as e: return Result(success=False, status_code=-1, error=str(e)) return self._parse(http_code, data) def _parse(self, http_code: int, raw: Any) -> Result: return _parse_job51_response(http_code, raw) # ───────────────────────────────────────────── # 4. 公司职位列表(POST) # ───────────────────────────────────────────── class SearchCompanyJobs(BaseSearcher): """ 公司招聘职位列表(无需登录) api = SearchCompanyJobs(company_id="9825088") result = api.search() all_jobs = api.load_all(max_pages=3) """ ENDPOINT = "open/noauth/jobs/company" def __init__( self, *, company_id: str, job_area: str = "", function: str = "", salary_type: str = "", page_size: int = 10, client: Optional[Job51Client] = None, ): super().__init__(page_size=page_size, http_client=client or create_client()) self.company_id = company_id self.job_area = job_area self.function = function self.salary_type = salary_type def _build_params(self, page_index: int) -> dict: return { "pageNum": page_index, "pageSize": self.page_size, "coId": self.company_id, "jobArea": self.job_area, "function": self.function, "salaryType": self.salary_type, "scene": 14, "requestId": "", } def _request(self, params: dict): """51job 公司搜索使用 POST""" return self.http_client.post(self.ENDPOINT, params) def _parse(self, http_code: int, raw: Any) -> Result: return _parse_job51_response(http_code, raw) # ───────────────────────────────────────────── # 使用示例 # ───────────────────────────────────────────── if __name__ == "__main__": import json print("=== 1. 首页推荐职位 ===") r = SearchRecommendJobs(job_area="020000").search() print(f"成功: {r.success}, 本页 {len(r.list)} 条, is_end_page: {r.is_end_page}") if r.list: print(f"第一条: {json.dumps(r.list[0], ensure_ascii=False, indent=2)[:300]}...") print("\n=== 2. 公司详情 ===") r = GetCompanyInfo(company_id="9825088").fetch() print(f"成功: {r.success}") if r.data: print(f"数据: {json.dumps(r.data, ensure_ascii=False, indent=2)[:300]}...") print("\n=== 3. 公司职位列表 ===") r = SearchCompanyJobs(company_id="9825088").search() print(f"成功: {r.success}, 本页 {len(r.list)} 条") # 职位详情:从搜索结果中获取 jobId print("\n=== 4. 职位详情 ===") search_r = SearchRecommendJobs(job_area="020000").search() if search_r.list: first_job = search_r.list[0] job_id = str(first_job.get("jobId", "") or first_job.get("id", "")) if job_id: r = GetJobDetail(job_id=job_id).fetch() print(f"成功: {r.success}") if r.data: print(f"数据: {json.dumps(r.data, ensure_ascii=False, indent=2)[:300]}...") else: print("搜索结果中未找到 jobId 字段") else: print("搜索结果为空,跳过")