# ⚠️ DEPRECATED — 2026-03-21 # 此文件是内部手工复制文件,已废弃,不再由任何 facade 引用。 # 请改用 spiderJobs.platforms.* 或 crawler_core 中的对应模块。 # 将在下一里程碑中删除。 # """ 通用基类与数据结构 复制自 spiderJobs/core/base.py — import 改为本地引用 """ from __future__ import annotations from dataclasses import dataclass, field from typing import Any, Callable, Optional from app.services.crawler._http_client import HTTPClient @dataclass class ApiResult: success: bool status_code: int data: Any = None list: list[dict] = field(default_factory=list) count: int = 0 is_end_page: bool = True error: Optional[str] = None def parse_response(http_code: int, raw: Any) -> ApiResult: biz_code = raw.get("statusCode") if isinstance(raw, dict) else http_code if http_code != 200 or biz_code != 200: return ApiResult( success=False, status_code=biz_code or http_code, error=( raw.get("statusDescription") or raw.get("message") or f"请求失败: {biz_code}" ) if isinstance(raw, dict) else f"请求失败: {http_code}", ) payload = (raw.get("data") or {}) if isinstance(raw, dict) else {} if isinstance(payload, dict) and "list" in payload: return ApiResult( success=True, status_code=200, data=payload, list=payload.get("list", []), count=payload.get("count", 0), is_end_page=payload.get("isEndPage", True), ) return ApiResult(success=True, status_code=200, data=payload) class BaseFetcher: ENDPOINT: str = "" def __init__(self, http_client: HTTPClient): self._http = http_client def _build_params(self) -> dict: raise NotImplementedError def _parse(self, http_code: int, raw: Any) -> ApiResult: return parse_response(http_code, raw) def fetch(self) -> ApiResult: try: http_code, data = self._http.get(self.ENDPOINT, self._build_params()) except Exception as e: return ApiResult(success=False, status_code=-1, error=str(e)) return self._parse(http_code, data) class BaseSearcher: ENDPOINT: str = "" def __init__(self, page_size: int = 15, http_client: HTTPClient = None): self.page_size = page_size self._http = http_client def _build_params(self, page_index: int) -> dict: raise NotImplementedError def _request(self, params: dict) -> tuple[int, Any]: return self._http.post(self.ENDPOINT, params) def _parse(self, http_code: int, raw: Any) -> ApiResult: return parse_response(http_code, raw) def search(self, page_index: int = 1) -> ApiResult: params = self._build_params(page_index) try: http_code, data = self._request(params) except Exception as e: return ApiResult(success=False, status_code=-1, error=str(e)) return self._parse(http_code, data) def load_all( self, max_pages: int = 10, on_page: Optional[Callable[[ApiResult, int], None]] = None, ) -> list[dict]: all_list: list[dict] = [] for page_index in range(1, max_pages + 1): result = self.search(page_index=page_index) if not result.success: break all_list.extend(result.list) if on_page: on_page(result, page_index) if result.is_end_page: break return all_list