import math from collections.abc import Generator from datetime import datetime from typing import Optional, Dict, Any, List from clickhouse_connect.driver import AsyncClient from clickhouse_connect.driver.query import QueryResult class ClickHouseBaseRepo: """ClickHouse基础仓库类""" def __init__(self, clickhouse_client: AsyncClient, table_name: str): self._clickhouse_client = clickhouse_client self._table_name = table_name async def execute_query(self, query: str, parameters: Optional[Dict[str, Any]] = None) -> QueryResult: """执行查询""" return await self._clickhouse_client.query(query, parameters=parameters) async def execute_insert(self, data: List[Dict[str, Any]]) -> None: """批量插入数据""" if not data: return columns = list(data[0].keys()) values = [[row[col] for col in columns] for row in data] await self._clickhouse_client.insert( table=self._table_name, data=values, column_names=columns ) def _build_where_statements( self, filters: Optional[Dict[str, Any]] = None, from_dt: Optional[datetime] = None, to_dt: Optional[datetime] = None ) -> tuple[List[str], Dict[str, Any]]: """构建WHERE条件语句""" where_statements = [] params = {} if filters: for key, value in filters.items(): if value is not None: where_statements.append(f"{key} = %({key})s") params[key] = value if from_dt: where_statements.append("created_at >= %(from_dt)s") params['from_dt'] = from_dt if to_dt: where_statements.append("created_at <= %(to_dt)s") params['to_dt'] = to_dt return where_statements, params class JobAnalyticsRepo(ClickHouseBaseRepo): """招聘数据分析仓库""" def __init__(self, clickhouse_client: AsyncClient): super().__init__(clickhouse_client, "job_analytics") async def get_job_count( self, filters: Optional[Dict[str, Any]] = None, from_dt: Optional[datetime] = None, to_dt: Optional[datetime] = None, ) -> int: """获取职位数量""" where_statements, params = self._build_where_statements( filters=filters, from_dt=from_dt, to_dt=to_dt ) where_clause = " AND ".join(where_statements) if where_statements else "1=1" query = f""" SELECT COUNT(*) FROM {self._table_name} WHERE {where_clause} """ result = await self.execute_query(query, parameters=params) return int(result.result_rows[0][0]) async def group_jobs_by_column( self, group_by_column: str, filters: Optional[Dict[str, Any]] = None, from_dt: Optional[datetime] = None, to_dt: Optional[datetime] = None, limit: int = 10 ) -> List[Dict[str, Any]]: """按指定列分组统计职位""" where_statements, params = self._build_where_statements( filters=filters, from_dt=from_dt, to_dt=to_dt ) where_clause = " AND ".join(where_statements) if where_statements else "1=1" query = f""" SELECT {group_by_column} AS category, COUNT(*) AS job_count FROM {self._table_name} WHERE {where_clause} GROUP BY {group_by_column} ORDER BY job_count DESC LIMIT {limit} """ result = await self.execute_query(query, parameters=params) return [ {"category": row[0], "job_count": int(row[1])} for row in result.result_rows ] async def get_volume_trend( self, interval: str = "day", # day or hour filters: Optional[Dict[str, Any]] = None, from_dt: Optional[datetime] = None, to_dt: Optional[datetime] = None, ) -> List[Dict[str, Any]]: """获取数据量趋势""" where_statements, params = self._build_where_statements( filters=filters, from_dt=from_dt, to_dt=to_dt ) where_clause = " AND ".join(where_statements) if where_statements else "1=1" if interval == "day": time_func = "toStartOfDay" elif interval == "hour": time_func = "toStartOfHour" elif interval == "week": time_func = "toStartOfWeek" elif interval == "month": time_func = "toStartOfMonth" else: time_func = "toStartOfDay" # 使用 toTimeZone 确保聚合按北京时间划分 query = f""" SELECT {time_func}(toTimeZone(created_at, 'Asia/Shanghai')) AS time_bucket, source, COUNT(*) AS count FROM {self._table_name} WHERE {where_clause} GROUP BY time_bucket, source ORDER BY time_bucket ASC """ result = await self.execute_query(query, parameters=params) return [ { "time": row[0].isoformat(), "source": row[1], "count": int(row[2]) } for row in result.result_rows ] async def get_source_distribution( self, filters: Optional[Dict[str, Any]] = None, from_dt: Optional[datetime] = None, to_dt: Optional[datetime] = None, ) -> List[Dict[str, Any]]: """获取数据来源分布""" return await self.group_jobs_by_column("source", filters, from_dt, to_dt)