Plan 01 - DATA-01: 30-day window dedup fix: - dedup.py: both single-field and double-field SQL queries now include AND created_at > now() - INTERVAL 30 DAY - tests/ingest/test_dedup.py: 6 mock tests validating 30-day window Plan 02 - DATA-04: company vs search job channel separation: - schemas/ingest.py: ChannelType.COMPANY = 'company' - configs/boss.py: register channel='company' config - configs/qcwy.py: register channel='company' config - configs/zhilian.py: register channel='company' config - company_jobs_sync.py: store_batch(..., 'mini', ...) → (..., 'company', ...) DATA-02: confirmed already complete (job.py has /data/batch-async endpoint) DATA-03: confirmed already complete (company_cleaner.py full pipeline) Full regression: 112 passed (106 existing + 6 new)
44 lines
1.3 KiB
Python
44 lines
1.3 KiB
Python
from enum import Enum
|
|
from typing import Dict, Any, List, Optional
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
class PlatformType(str, Enum):
|
|
BOSS = "boss"
|
|
QCWY = "qcwy"
|
|
ZHILIAN = "zhilian"
|
|
|
|
|
|
class ChannelType(str, Enum):
|
|
MINI = "mini"
|
|
WEB = "web"
|
|
APP = "app"
|
|
COMPANY = "company" # 公司关联职位(与搜索职位 mini 区分)
|
|
|
|
|
|
class DataType(str, Enum):
|
|
JOB = "job"
|
|
COMPANY = "company"
|
|
|
|
|
|
class IngestSingleRequest(BaseModel):
|
|
data: Dict[str, Any] = Field(..., description="要存储的数据")
|
|
data_type: DataType = Field(..., description="数据类型")
|
|
platform: PlatformType = Field(..., description="平台类型")
|
|
channel: ChannelType = Field(ChannelType.MINI, description="渠道类型")
|
|
check_duplicate: bool = Field(True, description="是否检查重复")
|
|
|
|
|
|
class IngestBatchRequest(BaseModel):
|
|
data_list: List[Dict[str, Any]] = Field(..., description="要存储的数据列表")
|
|
data_type: DataType = Field(..., description="数据类型")
|
|
platform: PlatformType = Field(..., description="平台类型")
|
|
channel: ChannelType = Field(ChannelType.MINI, description="渠道类型")
|
|
check_duplicate: bool = Field(True, description="是否检查重复")
|
|
|
|
|
|
class IngestResponse(BaseModel):
|
|
code: int = 200
|
|
message: str = "ok"
|
|
data: Optional[Dict[str, Any]] = None
|