69 lines
2.1 KiB
Python
69 lines
2.1 KiB
Python
"""
|
|
Boss直聘 独立公司爬虫入口
|
|
|
|
从 pending_company 队列获取待爬取的 Boss 公司,
|
|
逐个调用 GetBrandDetail 获取详情并上传。
|
|
|
|
启动:
|
|
python -m spiderJobs.platforms.boss.company_main
|
|
|
|
环境变量:
|
|
API_BASE_URL 后端地址 (默认 http://124.222.106.226:9999)
|
|
COMPANY_BATCH_SIZE 每批获取公司数 (默认 10)
|
|
SLEEP_MIN_SECONDS 最小延迟秒数 (默认 10)
|
|
SLEEP_MAX_SECONDS 最大延迟秒数 (默认 20)
|
|
BOSS_MPT Boss Token (mpt)
|
|
BOSS_WT2 Boss Token (wt2)
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import sys
|
|
|
|
_project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
|
|
if _project_root not in sys.path:
|
|
sys.path.insert(0, _project_root)
|
|
|
|
from spiderJobs.core.base import BaseFetcher
|
|
from spiderJobs.platforms.boss.api import GetBrandDetail
|
|
from spiderJobs.platforms.boss.client import BossClient, create_client
|
|
from spiderJobs.platforms.boss.sign import BossSign
|
|
from spiderJobs.runner.company_loop import run_company_loop
|
|
|
|
|
|
def create_company_fetcher(company_id: str, http_client: BossClient) -> BaseFetcher:
|
|
"""创建 Boss 公司详情 fetcher"""
|
|
return GetBrandDetail(brand_id=company_id, client=http_client)
|
|
|
|
|
|
def main():
|
|
client_kwargs = {}
|
|
|
|
mpt = os.environ.get("BOSS_MPT", "")
|
|
wt2 = os.environ.get("BOSS_WT2", "")
|
|
if mpt or wt2:
|
|
signer = BossSign(mpt=mpt, wt2=wt2)
|
|
client_kwargs["signer"] = signer
|
|
|
|
tunnel = os.environ.get("PROXY_TUNNEL", "")
|
|
if tunnel:
|
|
scheme = os.environ.get("PROXY_SCHEME", "http")
|
|
username = os.environ.get("PROXY_USERNAME", "")
|
|
password = os.environ.get("PROXY_PASSWORD", "")
|
|
if username and password:
|
|
client_kwargs["tunnel_proxy"] = f"{scheme}://{username}:{password}@{tunnel}"
|
|
else:
|
|
client_kwargs["tunnel_proxy"] = f"{scheme}://{tunnel}"
|
|
|
|
run_company_loop(
|
|
platform="boss",
|
|
create_company_fetcher=create_company_fetcher,
|
|
create_client_fn=create_client,
|
|
client_kwargs=client_kwargs,
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|