Architecture clarification from user: spiderJobs/ is standalone execution,
NOT meant to be imported by app/. Correct dependency graph:
crawler_core ← shared base library
↑ ↑
spiderJobs app/services/crawler/
(standalone) (FastAPI backend, private layer)
Changes:
- boss.py/qcwy.py/zhilian.py: revert import back to private _boss_api etc.
- _boss/job51/zhilian_api.py: use crawler_core.base.Result/BaseFetcher/BaseSearcher
+ fix self._http → self.http_client
- _boss/job51/zhilian_client.py: use crawler_core.http_client.HTTPClient
+ _boss_client uses crawler_core.boss.sign.BossSign directly
- _boss/job51/zhilian_sign.py: backward-compat stubs → crawler_core.*.sign
Full regression: 106 passed in 0.68s
172 lines
6.3 KiB
Python
172 lines
6.3 KiB
Python
"""
|
||
智联招聘 Service — 基于新算法文件的封装
|
||
保持对外公开接口不变(cleaning.py / company_cleaner.py 依赖)
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from typing import Any, Dict, List, Optional
|
||
|
||
from loguru import logger
|
||
|
||
from app.services.crawler._zhilian_api import (
|
||
GetCompanyDetail,
|
||
GetPositionDetail,
|
||
SearchCompanyPositions,
|
||
SearchPositions,
|
||
)
|
||
from app.services.crawler._zhilian_client import (
|
||
ZhilianClient,
|
||
create_capi_client,
|
||
create_cgate_client,
|
||
)
|
||
from app.services.crawler._zhilian_sign import ZhilianSign
|
||
|
||
|
||
class ZhilianService:
|
||
def __init__(self, proxy_url: Optional[str] = None):
|
||
self._signer = ZhilianSign()
|
||
self._cgate = create_cgate_client(signer=self._signer, proxy=proxy_url or None)
|
||
self._capi = create_capi_client(signer=self._signer, proxy=proxy_url or None)
|
||
|
||
def set_proxy(self, proxy_url: Optional[str]) -> None:
|
||
proxy = proxy_url.strip().strip("`") if proxy_url else None
|
||
self._cgate = create_cgate_client(signer=self._signer, proxy=proxy)
|
||
self._capi = create_capi_client(signer=self._signer, proxy=proxy)
|
||
logger.info(f"ZhilianService proxy set to: {proxy or 'direct'}")
|
||
|
||
def get_job_detail(self, job_number: str) -> Optional[Dict[str, Any]]:
|
||
"""获取职位详情"""
|
||
logger.info(f"Zhilian get_job_detail: {job_number}")
|
||
try:
|
||
fetcher = GetPositionDetail(number=job_number, client=self._cgate)
|
||
result = fetcher.fetch()
|
||
if result.success:
|
||
return result.data
|
||
logger.warning(f"Zhilian get_job_detail failed: {result.error}")
|
||
return None
|
||
except Exception as e:
|
||
logger.error(f"Zhilian get_job_detail exception: {e}")
|
||
return None
|
||
|
||
def get_company_detail(self, company_number: str) -> Optional[Dict[str, Any]]:
|
||
"""获取公司详情"""
|
||
logger.info(f"Zhilian get_company_detail: {company_number}")
|
||
try:
|
||
fetcher = GetCompanyDetail(number=company_number, client=self._cgate)
|
||
result = fetcher.fetch()
|
||
if result.success:
|
||
return result.data
|
||
logger.warning(f"Zhilian get_company_detail failed: {result.error}")
|
||
return None
|
||
except Exception as e:
|
||
logger.error(f"Zhilian get_company_detail exception: {e}")
|
||
return None
|
||
|
||
def get_company_jobs_by_id(
|
||
self,
|
||
company_number: str,
|
||
page_index: int = 1,
|
||
page_size: int = 30,
|
||
work_city: Optional[int] = None,
|
||
) -> Optional[Dict[str, Any]]:
|
||
"""获取公司职位列表"""
|
||
logger.info(f"Zhilian get_company_jobs: company={company_number}, page={page_index}")
|
||
try:
|
||
searcher = SearchCompanyPositions(
|
||
company_id=company_number,
|
||
city_code=str(work_city) if work_city is not None else "",
|
||
page_size=page_size,
|
||
client=self._capi,
|
||
)
|
||
result = searcher.search(page_index=page_index)
|
||
if result.success:
|
||
return result.data
|
||
logger.warning(f"Zhilian get_company_jobs failed: {result.error}")
|
||
return None
|
||
except Exception as e:
|
||
logger.error(f"Zhilian get_company_jobs exception: {e}")
|
||
return None
|
||
|
||
def search_company_jobs_by_name(
|
||
self,
|
||
company_name: str,
|
||
city_id: Optional[int] = None,
|
||
page_size: int = 15,
|
||
page_index: int = 1,
|
||
) -> Optional[Dict[str, Any]]:
|
||
"""按公司名搜索职位"""
|
||
logger.info(f"Zhilian search_company_jobs_by_name: {company_name}")
|
||
try:
|
||
searcher = SearchPositions(
|
||
keyword=company_name,
|
||
city_code=city_id if city_id is not None else "",
|
||
page_size=page_size,
|
||
client=self._cgate,
|
||
)
|
||
result = searcher.search(page_index=page_index)
|
||
if result.success:
|
||
return result.data
|
||
logger.warning(f"Zhilian search_company_jobs failed: {result.error}")
|
||
return None
|
||
except Exception as e:
|
||
logger.error(f"Zhilian search_company_jobs exception: {e}")
|
||
return None
|
||
|
||
def search_jobs(
|
||
self,
|
||
city_id: int = 801,
|
||
page_size: int = 15,
|
||
page_index: int = 1,
|
||
job_level3_code: Optional[str] = None,
|
||
) -> List[Dict[str, Any]]:
|
||
"""搜索职位(返回列表)"""
|
||
logger.info(f"Zhilian search_jobs: city={city_id}, page={page_index}")
|
||
try:
|
||
filters = {}
|
||
if job_level3_code:
|
||
filters["S_SOU_POSITION_TYPE"] = job_level3_code
|
||
searcher = SearchPositions(
|
||
city_code=city_id,
|
||
filters=filters,
|
||
page_size=page_size,
|
||
client=self._cgate,
|
||
)
|
||
result = searcher.search(page_index=page_index)
|
||
if result.success:
|
||
return result.list or []
|
||
logger.warning(f"Zhilian search_jobs failed: {result.error}")
|
||
return []
|
||
except Exception as e:
|
||
logger.error(f"Zhilian search_jobs exception: {e}")
|
||
return []
|
||
|
||
# ── asyncio.to_thread 桥接(ARCH-06)────────────────────────
|
||
|
||
async def async_get_job_detail(self, job_number: str) -> Optional[Dict]:
|
||
import asyncio
|
||
return await asyncio.to_thread(self.get_job_detail, job_number)
|
||
|
||
async def async_get_company_detail(self, company_number: str) -> Optional[Dict]:
|
||
import asyncio
|
||
return await asyncio.to_thread(self.get_company_detail, company_number)
|
||
|
||
async def async_get_company_jobs(
|
||
self, company_number: str, page_index: int = 1, page_size: int = 30,
|
||
work_city: Optional[int] = None,
|
||
) -> Optional[Dict]:
|
||
import asyncio
|
||
return await asyncio.to_thread(
|
||
self.get_company_jobs_by_id, company_number, page_index, page_size, work_city
|
||
)
|
||
|
||
async def async_search_jobs(
|
||
self, city_id: int = 801, page_size: int = 15, page_index: int = 1,
|
||
job_level3_code: Optional[str] = None,
|
||
) -> List:
|
||
import asyncio
|
||
return await asyncio.to_thread(
|
||
self.search_jobs, city_id, page_size, page_index, job_level3_code
|
||
)
|
||
|