Merge pull request #2777 from lyen1688/feat/content-moderation-risk-threshold

feat: 支持内容审计风险阈值配置
2026-05-26 14:12:54 +08:00 · 2026-05-26 14:12:54 +08:00 · 83248478e2
commit 83248478e2
parent a28e8e3d44 23f3d426c6
8 changed files with 216 additions and 1 deletions
--- a/backend/internal/handler/admin/content_moderation_handler.go
+++ b/backend/internal/handler/admin/content_moderation_handler.go
@ -34,6 +34,7 @@ type contentModerationConfigRequest struct {
 	AllGroups            *bool                                 `json:"all_groups"`
 	GroupIDs             *[]int64                              `json:"group_ids"`
 	RecordNonHits        *bool                                 `json:"record_non_hits"`
+	Thresholds           *map[string]float64                   `json:"thresholds"`
 	WorkerCount          *int                                  `json:"worker_count"`
 	QueueSize            *int                                  `json:"queue_size"`
 	BlockStatus          *int                                  `json:"block_status"`
@ -94,6 +95,7 @@ func (h *ContentModerationHandler) UpdateConfig(c *gin.Context) {
 		AllGroups:            req.AllGroups,
 		GroupIDs:             req.GroupIDs,
 		RecordNonHits:        req.RecordNonHits,
+		Thresholds:           req.Thresholds,
 		WorkerCount:          req.WorkerCount,
 		QueueSize:            req.QueueSize,
 		BlockStatus:          req.BlockStatus,
--- a/backend/internal/service/content_moderation.go
+++ b/backend/internal/service/content_moderation.go
@ -177,6 +177,7 @@ type ContentModerationConfigView struct {
 	AllGroups            bool                            `json:"all_groups"`
 	GroupIDs             []int64                         `json:"group_ids"`
 	RecordNonHits        bool                            `json:"record_non_hits"`
+	Thresholds           map[string]float64              `json:"thresholds"`
 	WorkerCount          int                             `json:"worker_count"`
 	QueueSize            int                             `json:"queue_size"`
 	BlockStatus          int                             `json:"block_status"`
@ -249,6 +250,7 @@ type UpdateContentModerationConfigInput struct {
 	AllGroups            *bool                         `json:"all_groups"`
 	GroupIDs             *[]int64                      `json:"group_ids"`
 	RecordNonHits        *bool                         `json:"record_non_hits"`
+	Thresholds           *map[string]float64           `json:"thresholds"`
 	WorkerCount          *int                          `json:"worker_count"`
 	QueueSize            *int                          `json:"queue_size"`
 	BlockStatus          *int                          `json:"block_status"`
@ -607,6 +609,9 @@ func (s *ContentModerationService) UpdateConfig(ctx context.Context, input Updat
 	if input.RecordNonHits != nil {
 		cfg.RecordNonHits = *input.RecordNonHits
 	}
+	if input.Thresholds != nil {
+		cfg.Thresholds = mergeContentModerationThresholds(ContentModerationDefaultThresholds(), *input.Thresholds)
+	}
 	if input.ClearAPIKey {
 		cfg.APIKey = ""
 		cfg.APIKeys = []string{}
@ -1894,6 +1899,7 @@ func (s *ContentModerationService) configView(cfg *ContentModerationConfig) *Con
 		AllGroups:            cfg.AllGroups,
 		GroupIDs:             append([]int64(nil), cfg.GroupIDs...),
 		RecordNonHits:        cfg.RecordNonHits,
+		Thresholds:           cloneFloatMap(cfg.Thresholds),
 		WorkerCount:          cfg.WorkerCount,
 		QueueSize:            cfg.QueueSize,
 		BlockStatus:          cfg.BlockStatus,
--- a/backend/internal/service/content_moderation_test.go
+++ b/backend/internal/service/content_moderation_test.go
@ -726,6 +726,37 @@ func TestContentModerationUpdateConfig_ReplacesAPIKeysWhenRequested(t *testing.T
 	require.Equal(t, []string{"sk-new-only"}, saved.apiKeys())
 }

+func TestContentModerationUpdateConfig_SavesCustomThresholds(t *testing.T) {
+	cfg := defaultContentModerationConfig()
+	rawCfg, err := json.Marshal(cfg)
+	require.NoError(t, err)
+
+	repo := &contentModerationTestSettingRepo{values: map[string]string{
+		SettingKeyContentModerationConfig: string(rawCfg),
+	}}
+	svc := NewContentModerationService(repo, nil, nil, nil, nil, nil, nil)
+	thresholds := map[string]float64{
+		"sexual":     0.72,
+		"harassment": 1.25,
+		"unknown":    0.01,
+	}
+
+	view, err := svc.UpdateConfig(context.Background(), UpdateContentModerationConfigInput{
+		Thresholds: &thresholds,
+	})
+
+	require.NoError(t, err)
+	require.Equal(t, 0.72, view.Thresholds["sexual"])
+	require.Equal(t, 1.0, view.Thresholds["harassment"])
+	require.NotContains(t, view.Thresholds, "unknown")
+
+	var saved ContentModerationConfig
+	require.NoError(t, json.Unmarshal([]byte(repo.values[SettingKeyContentModerationConfig]), &saved))
+	require.Equal(t, 0.72, saved.Thresholds["sexual"])
+	require.Equal(t, 1.0, saved.Thresholds["harassment"])
+	require.NotContains(t, saved.Thresholds, "unknown")
+}
+
 func TestExtractContentModerationInput_AnthropicImageSourceOnlyParticipatesInMemory(t *testing.T) {
 	body := []byte(`{
 		"messages": [
--- a/frontend/src/api/admin/riskControl.ts
+++ b/frontend/src/api/admin/riskControl.ts
@ -24,6 +24,7 @@ export interface ContentModerationConfig {
  all_groups: boolean
  group_ids: number[]
  record_non_hits: boolean
+  thresholds: Record<string, number>
  worker_count: number
  queue_size: number
  block_status: number
@ -98,6 +99,7 @@ export interface UpdateContentModerationConfig {
  all_groups?: boolean
  group_ids?: number[]
  record_non_hits?: boolean
+  thresholds?: Record<string, number>
  worker_count?: number
  queue_size?: number
  block_status?: number
--- a/frontend/src/i18n/locales/en.ts
+++ b/frontend/src/i18n/locales/en.ts
@ -2564,11 +2564,17 @@ export default {
      lastCleanup: 'Last cleanup: {time}',
      cleanupStats: 'Last cleanup deleted {hit} hits and {nonHit} non-hits',
      riskSwitchOff: 'System switch off',
+      riskThresholds: 'Risk Thresholds',
+      riskThresholdsHint: 'Adjust hit thresholds by OpenAI Moderations category. Scores greater than or equal to the threshold count as hits.',
+      riskThresholdDefault: 'Default {value}',
+      riskThresholdReset: 'Restore defaults',
+      riskThresholdPercent: 'Threshold percentage',
      tabs: {
        basic: 'Basic',
        scope: 'Scope',
        runtime: 'Runtime',
        response: 'Hit Notice',
+        riskThresholds: 'Risk Thresholds',
        keywords: 'Keyword Block',
        retention: 'Retention',
      },
--- a/frontend/src/i18n/locales/zh.ts
+++ b/frontend/src/i18n/locales/zh.ts
@ -2641,11 +2641,17 @@ export default {
      lastCleanup: '上次清理：{time}',
      cleanupStats: '上次清理删除命中 {hit} 条，未命中 {nonHit} 条',
      riskSwitchOff: '系统开关关闭',
+      riskThresholds: '风险阈值',
+      riskThresholdsHint: '按 OpenAI Moderations 分类调整命中阈值，分数达到或超过阈值即视为命中。',
+      riskThresholdDefault: '默认 {value}',
+      riskThresholdReset: '恢复默认阈值',
+      riskThresholdPercent: '阈值百分比',
      tabs: {
        basic: '基础',
        scope: '审计范围',
        runtime: '运行队列',
        response: '命中通知',
+        riskThresholds: '风险阈值',
        keywords: '关键词拦截',
        retention: '日志保留',
      },
--- a/frontend/src/views/admin/RiskControlView.vue
+++ b/frontend/src/views/admin/RiskControlView.vue
@ -794,6 +794,63 @@
            </div>
          </div>

+          <div v-else-if="activeSettingsTab === 'riskThresholds'" class="space-y-5">
+            <div class="flex flex-col gap-3 lg:flex-row lg:items-center lg:justify-between">
+              <div>
+                <h3 class="text-base font-semibold text-gray-900 dark:text-white">{{ t('admin.riskControl.riskThresholds') }}</h3>
+                <p class="mt-1 text-sm text-gray-500 dark:text-gray-400">{{ t('admin.riskControl.riskThresholdsHint') }}</p>
+              </div>
+              <button
+                type="button"
+                class="btn btn-secondary inline-flex items-center justify-center gap-2"
+                @click="resetRiskThresholds"
+              >
+                <Icon name="refresh" size="sm" />
+                {{ t('admin.riskControl.riskThresholdReset') }}
+              </button>
+            </div>
+
+            <div class="grid grid-cols-1 gap-3 md:grid-cols-2 xl:grid-cols-3">
+              <div
+                v-for="row in riskThresholdRows"
+                :key="row.category"
+                class="rounded-lg border border-gray-100 bg-gray-50 p-4 dark:border-dark-700 dark:bg-dark-900/30"
+              >
+                <div class="flex items-start justify-between gap-3">
+                  <div class="min-w-0">
+                    <label class="block truncate text-sm font-semibold text-gray-900 dark:text-white" :for="`risk-threshold-${row.category}`">
+                      {{ row.category }}
+                    </label>
+                    <p class="mt-1 text-xs text-gray-500 dark:text-gray-400">
+                      {{ t('admin.riskControl.riskThresholdDefault', { value: formatThresholdPercent(row.defaultValue) }) }}
+                    </p>
+                  </div>
+                  <span class="inline-flex shrink-0 rounded-md bg-white px-2 py-1 font-mono text-xs font-medium text-gray-600 shadow-sm dark:bg-dark-800 dark:text-gray-300">
+                    {{ formatThresholdPercent(row.value) }}
+                  </span>
+                </div>
+                <div class="mt-3">
+                  <label class="sr-only" :for="`risk-threshold-${row.category}`">
+                    {{ t('admin.riskControl.riskThresholdPercent') }}
+                  </label>
+                  <div class="relative">
+                    <input
+                      :id="`risk-threshold-${row.category}`"
+                      v-model.number="configForm.thresholds[row.category]"
+                      :data-test="`risk-threshold-${row.category}`"
+                      type="number"
+                      min="0"
+                      max="100"
+                      step="0.1"
+                      class="input pr-8 font-mono"
+                    />
+                    <span class="pointer-events-none absolute right-3 top-1/2 -translate-y-1/2 text-gray-400">%</span>
+                  </div>
+                </div>
+              </div>
+            </div>
+          </div>
+
          <div v-else-if="activeSettingsTab === 'keywords'" class="space-y-5">
            <div
              class="flex items-start gap-3 rounded-lg border p-4"
@ -972,7 +1029,7 @@ import { useAppStore } from '@/stores/app'
 import { extractApiErrorMessage } from '@/utils/apiError'
 import { formatDateTime as formatDateTimeValue } from '@/utils/format'

-type SettingsTab = 'basic' | 'scope' | 'runtime' | 'response' | 'retention' | 'keywords'
+type SettingsTab = 'basic' | 'scope' | 'runtime' | 'response' | 'riskThresholds' | 'retention' | 'keywords'
 type WorkerSlotState = 'active' | 'idle' | 'disabled'
 type APIKeysWriteMode = 'append' | 'replace'
 type OverviewIcon = 'shield' | 'key' | 'users' | 'document'
@ -992,11 +1049,32 @@ type ModerationScoreRow = {
  threshold: number
  hit: boolean
 }
+type RiskThresholdRow = {
+  category: string
+  value: number
+  defaultValue: number
+}

 const maxModerationTestImages = 1
 const maxModerationTestImageSize = 8 * 1024 * 1024
 const maxVisibleApiKeyRows: number = 3
 const blockedKeywordMax = 10000
+const riskThresholdDefaults: Record<string, number> = {
+  harassment: 98,
+  'harassment/threatening': 90,
+  hate: 65,
+  'hate/threatening': 65,
+  illicit: 95,
+  'illicit/violent': 95,
+  'self-harm': 65,
+  'self-harm/intent': 85,
+  'self-harm/instructions': 65,
+  sexual: 65,
+  'sexual/minors': 65,
+  violence: 95,
+  'violence/graphic': 95,
+}
+const riskThresholdCategories = Object.keys(riskThresholdDefaults)

 const { t } = useI18n()
 const appStore = useAppStore()
@ -1054,6 +1132,7 @@ const configForm = reactive({
  hit_retention_days: 180,
  non_hit_retention_days: 3,
  pre_hash_check_enabled: false,
+  thresholds: { ...riskThresholdDefaults } as Record<string, number>,
  blocked_keywords_text: '',
  keyword_blocking_mode: 'keyword_and_api' as KeywordBlockingMode,
  model_filter_type: 'all' as ContentModerationModelFilterType,
@ -1081,6 +1160,7 @@ const settingsTabs = computed<Array<{ id: SettingsTab; label: string }>>(() => [
  { id: 'scope', label: t('admin.riskControl.tabs.scope') },
  { id: 'runtime', label: t('admin.riskControl.tabs.runtime') },
  { id: 'response', label: t('admin.riskControl.tabs.response') },
+  { id: 'riskThresholds', label: t('admin.riskControl.tabs.riskThresholds') },
  { id: 'keywords', label: t('admin.riskControl.tabs.keywords') },
  { id: 'retention', label: t('admin.riskControl.tabs.retention') },
 ])
@ -1373,6 +1453,14 @@ const moderationScoreRows = computed<ModerationScoreRow[]>(() => {
    .sort((a, b) => b.score - a.score)
 })

+const riskThresholdRows = computed<RiskThresholdRow[]>(() => (
+  riskThresholdCategories.map((category) => ({
+    category,
+    value: configForm.thresholds[category] ?? riskThresholdDefaults[category],
+    defaultValue: riskThresholdDefaults[category],
+  }))
+))
+
 const inputDetailText = computed(() => {
  if (!inputDetailRow.value) return '-'
  return inputDetailRow.value.input_excerpt || inputDetailRow.value.error || '-'
@ -1445,6 +1533,7 @@ function applyConfig(config: ContentModerationConfig) {
  configForm.hit_retention_days = config.hit_retention_days || 180
  configForm.non_hit_retention_days = Math.min(Math.max(config.non_hit_retention_days || 3, 1), 3)
  configForm.pre_hash_check_enabled = config.pre_hash_check_enabled ?? false
+  configForm.thresholds = riskThresholdsFromConfig(config.thresholds)
  configForm.blocked_keywords_text = Array.isArray(config.blocked_keywords) ? config.blocked_keywords.join('\n') : ''
  configForm.keyword_blocking_mode = normalizeKeywordBlockingMode(config.keyword_blocking_mode)
  const modelFilter = normalizeModelFilter(config.model_filter)
@ -1524,6 +1613,7 @@ async function saveConfig() {
      hit_retention_days: Number(configForm.hit_retention_days) || 180,
      non_hit_retention_days: Math.min(Math.max(Number(configForm.non_hit_retention_days) || 3, 1), 3),
      pre_hash_check_enabled: configForm.pre_hash_check_enabled,
+      thresholds: buildRiskThresholdPayload(),
      blocked_keywords: blockedKeywordList.value,
      keyword_blocking_mode: configForm.keyword_blocking_mode,
      model_filter: modelFilterPayload,
@ -1988,6 +2078,41 @@ function buildModelFilterPayload(): ContentModerationModelFilter {
  }
 }

+function riskThresholdsFromConfig(thresholds: Record<string, number> | null | undefined): Record<string, number> {
+  const out: Record<string, number> = { ...riskThresholdDefaults }
+  for (const category of riskThresholdCategories) {
+    const value = thresholds?.[category]
+    if (Number.isFinite(value)) {
+      out[category] = clampPercent(Number(value) * 100)
+    }
+  }
+  return out
+}
+
+function buildRiskThresholdPayload(): Record<string, number> {
+  const payload: Record<string, number> = {}
+  for (const category of riskThresholdCategories) {
+    payload[category] = Number((clampPercent(configForm.thresholds[category]) / 100).toFixed(4))
+  }
+  return payload
+}
+
+function resetRiskThresholds() {
+  configForm.thresholds = { ...riskThresholdDefaults }
+}
+
+function clampPercent(value: unknown): number {
+  const numeric = Number(value)
+  if (!Number.isFinite(numeric)) {
+    return 0
+  }
+  return Math.min(100, Math.max(0, numeric))
+}
+
+function formatThresholdPercent(value: number): string {
+  return `${clampPercent(value).toFixed(1)}%`
+}
+
 function parseBlockedKeywords(value: string): string[] {
  const seen = new Set<string>()
  const out: string[] = []
--- a/frontend/src/views/admin/tests/RiskControlView.spec.ts
+++ b/frontend/src/views/admin/tests/RiskControlView.spec.ts
@ -93,6 +93,10 @@ const baseConfig = (): ContentModerationConfig => ({
  pre_hash_check_enabled: false,
  blocked_keywords: [],
  keyword_blocking_mode: 'keyword_and_api',
+  thresholds: {
+    harassment: 0.98,
+    sexual: 0.65,
+  },
  model_filter: {
    type: 'all',
    models: [],
@ -224,4 +228,37 @@ describe('admin RiskControlView', () => {
    }))
    expect(showError).not.toHaveBeenCalled()
  })
+
+  it('submits edited risk control thresholds when saving moderation config', async () => {
+    const wrapper = mount(RiskControlView, {
+      global: {
+        stubs: {
+          AppLayout: AppLayoutStub,
+          BaseDialog: BaseDialogStub,
+          Icon: true,
+          Select: true,
+          Toggle: true,
+          Pagination: true,
+          ModelWhitelistSelector: ModelWhitelistSelectorStub,
+        },
+      },
+    })
+
+    await flushPromises()
+
+    await findButtonByText(wrapper, 'admin.riskControl.openSettings').trigger('click')
+    await findButtonByText(wrapper, 'admin.riskControl.tabs.riskThresholds').trigger('click')
+    await wrapper.get('[data-test="risk-threshold-sexual"]').setValue('72')
+    await wrapper.get('[data-test="risk-threshold-harassment"]').setValue('99')
+    await findButtonByText(wrapper, 'admin.riskControl.saveConfig').trigger('click')
+    await flushPromises()
+
+    expect(updateConfig).toHaveBeenCalledWith(expect.objectContaining({
+      thresholds: expect.objectContaining({
+        sexual: 0.72,
+        harassment: 0.99,
+      }),
+    }))
+    expect(showError).not.toHaveBeenCalled()
+  })
 })