Merge pull request #2777 from lyen1688/feat/content-moderation-risk-threshold

feat: 支持内容审计风险阈值配置
This commit is contained in:
Wesley Liddick 2026-05-26 14:12:54 +08:00 committed by GitHub
commit 83248478e2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 216 additions and 1 deletions

View File

@ -34,6 +34,7 @@ type contentModerationConfigRequest struct {
AllGroups *bool `json:"all_groups"`
GroupIDs *[]int64 `json:"group_ids"`
RecordNonHits *bool `json:"record_non_hits"`
Thresholds *map[string]float64 `json:"thresholds"`
WorkerCount *int `json:"worker_count"`
QueueSize *int `json:"queue_size"`
BlockStatus *int `json:"block_status"`
@ -94,6 +95,7 @@ func (h *ContentModerationHandler) UpdateConfig(c *gin.Context) {
AllGroups: req.AllGroups,
GroupIDs: req.GroupIDs,
RecordNonHits: req.RecordNonHits,
Thresholds: req.Thresholds,
WorkerCount: req.WorkerCount,
QueueSize: req.QueueSize,
BlockStatus: req.BlockStatus,

View File

@ -177,6 +177,7 @@ type ContentModerationConfigView struct {
AllGroups bool `json:"all_groups"`
GroupIDs []int64 `json:"group_ids"`
RecordNonHits bool `json:"record_non_hits"`
Thresholds map[string]float64 `json:"thresholds"`
WorkerCount int `json:"worker_count"`
QueueSize int `json:"queue_size"`
BlockStatus int `json:"block_status"`
@ -249,6 +250,7 @@ type UpdateContentModerationConfigInput struct {
AllGroups *bool `json:"all_groups"`
GroupIDs *[]int64 `json:"group_ids"`
RecordNonHits *bool `json:"record_non_hits"`
Thresholds *map[string]float64 `json:"thresholds"`
WorkerCount *int `json:"worker_count"`
QueueSize *int `json:"queue_size"`
BlockStatus *int `json:"block_status"`
@ -607,6 +609,9 @@ func (s *ContentModerationService) UpdateConfig(ctx context.Context, input Updat
if input.RecordNonHits != nil {
cfg.RecordNonHits = *input.RecordNonHits
}
if input.Thresholds != nil {
cfg.Thresholds = mergeContentModerationThresholds(ContentModerationDefaultThresholds(), *input.Thresholds)
}
if input.ClearAPIKey {
cfg.APIKey = ""
cfg.APIKeys = []string{}
@ -1894,6 +1899,7 @@ func (s *ContentModerationService) configView(cfg *ContentModerationConfig) *Con
AllGroups: cfg.AllGroups,
GroupIDs: append([]int64(nil), cfg.GroupIDs...),
RecordNonHits: cfg.RecordNonHits,
Thresholds: cloneFloatMap(cfg.Thresholds),
WorkerCount: cfg.WorkerCount,
QueueSize: cfg.QueueSize,
BlockStatus: cfg.BlockStatus,

View File

@ -726,6 +726,37 @@ func TestContentModerationUpdateConfig_ReplacesAPIKeysWhenRequested(t *testing.T
require.Equal(t, []string{"sk-new-only"}, saved.apiKeys())
}
func TestContentModerationUpdateConfig_SavesCustomThresholds(t *testing.T) {
cfg := defaultContentModerationConfig()
rawCfg, err := json.Marshal(cfg)
require.NoError(t, err)
repo := &contentModerationTestSettingRepo{values: map[string]string{
SettingKeyContentModerationConfig: string(rawCfg),
}}
svc := NewContentModerationService(repo, nil, nil, nil, nil, nil, nil)
thresholds := map[string]float64{
"sexual": 0.72,
"harassment": 1.25,
"unknown": 0.01,
}
view, err := svc.UpdateConfig(context.Background(), UpdateContentModerationConfigInput{
Thresholds: &thresholds,
})
require.NoError(t, err)
require.Equal(t, 0.72, view.Thresholds["sexual"])
require.Equal(t, 1.0, view.Thresholds["harassment"])
require.NotContains(t, view.Thresholds, "unknown")
var saved ContentModerationConfig
require.NoError(t, json.Unmarshal([]byte(repo.values[SettingKeyContentModerationConfig]), &saved))
require.Equal(t, 0.72, saved.Thresholds["sexual"])
require.Equal(t, 1.0, saved.Thresholds["harassment"])
require.NotContains(t, saved.Thresholds, "unknown")
}
func TestExtractContentModerationInput_AnthropicImageSourceOnlyParticipatesInMemory(t *testing.T) {
body := []byte(`{
"messages": [

View File

@ -24,6 +24,7 @@ export interface ContentModerationConfig {
all_groups: boolean
group_ids: number[]
record_non_hits: boolean
thresholds: Record<string, number>
worker_count: number
queue_size: number
block_status: number
@ -98,6 +99,7 @@ export interface UpdateContentModerationConfig {
all_groups?: boolean
group_ids?: number[]
record_non_hits?: boolean
thresholds?: Record<string, number>
worker_count?: number
queue_size?: number
block_status?: number

View File

@ -2564,11 +2564,17 @@ export default {
lastCleanup: 'Last cleanup: {time}',
cleanupStats: 'Last cleanup deleted {hit} hits and {nonHit} non-hits',
riskSwitchOff: 'System switch off',
riskThresholds: 'Risk Thresholds',
riskThresholdsHint: 'Adjust hit thresholds by OpenAI Moderations category. Scores greater than or equal to the threshold count as hits.',
riskThresholdDefault: 'Default {value}',
riskThresholdReset: 'Restore defaults',
riskThresholdPercent: 'Threshold percentage',
tabs: {
basic: 'Basic',
scope: 'Scope',
runtime: 'Runtime',
response: 'Hit Notice',
riskThresholds: 'Risk Thresholds',
keywords: 'Keyword Block',
retention: 'Retention',
},

View File

@ -2641,11 +2641,17 @@ export default {
lastCleanup: '上次清理:{time}',
cleanupStats: '上次清理删除命中 {hit} 条,未命中 {nonHit} 条',
riskSwitchOff: '系统开关关闭',
riskThresholds: '风险阈值',
riskThresholdsHint: '按 OpenAI Moderations 分类调整命中阈值,分数达到或超过阈值即视为命中。',
riskThresholdDefault: '默认 {value}',
riskThresholdReset: '恢复默认阈值',
riskThresholdPercent: '阈值百分比',
tabs: {
basic: '基础',
scope: '审计范围',
runtime: '运行队列',
response: '命中通知',
riskThresholds: '风险阈值',
keywords: '关键词拦截',
retention: '日志保留',
},

View File

@ -794,6 +794,63 @@
</div>
</div>
<div v-else-if="activeSettingsTab === 'riskThresholds'" class="space-y-5">
<div class="flex flex-col gap-3 lg:flex-row lg:items-center lg:justify-between">
<div>
<h3 class="text-base font-semibold text-gray-900 dark:text-white">{{ t('admin.riskControl.riskThresholds') }}</h3>
<p class="mt-1 text-sm text-gray-500 dark:text-gray-400">{{ t('admin.riskControl.riskThresholdsHint') }}</p>
</div>
<button
type="button"
class="btn btn-secondary inline-flex items-center justify-center gap-2"
@click="resetRiskThresholds"
>
<Icon name="refresh" size="sm" />
{{ t('admin.riskControl.riskThresholdReset') }}
</button>
</div>
<div class="grid grid-cols-1 gap-3 md:grid-cols-2 xl:grid-cols-3">
<div
v-for="row in riskThresholdRows"
:key="row.category"
class="rounded-lg border border-gray-100 bg-gray-50 p-4 dark:border-dark-700 dark:bg-dark-900/30"
>
<div class="flex items-start justify-between gap-3">
<div class="min-w-0">
<label class="block truncate text-sm font-semibold text-gray-900 dark:text-white" :for="`risk-threshold-${row.category}`">
{{ row.category }}
</label>
<p class="mt-1 text-xs text-gray-500 dark:text-gray-400">
{{ t('admin.riskControl.riskThresholdDefault', { value: formatThresholdPercent(row.defaultValue) }) }}
</p>
</div>
<span class="inline-flex shrink-0 rounded-md bg-white px-2 py-1 font-mono text-xs font-medium text-gray-600 shadow-sm dark:bg-dark-800 dark:text-gray-300">
{{ formatThresholdPercent(row.value) }}
</span>
</div>
<div class="mt-3">
<label class="sr-only" :for="`risk-threshold-${row.category}`">
{{ t('admin.riskControl.riskThresholdPercent') }}
</label>
<div class="relative">
<input
:id="`risk-threshold-${row.category}`"
v-model.number="configForm.thresholds[row.category]"
:data-test="`risk-threshold-${row.category}`"
type="number"
min="0"
max="100"
step="0.1"
class="input pr-8 font-mono"
/>
<span class="pointer-events-none absolute right-3 top-1/2 -translate-y-1/2 text-gray-400">%</span>
</div>
</div>
</div>
</div>
</div>
<div v-else-if="activeSettingsTab === 'keywords'" class="space-y-5">
<div
class="flex items-start gap-3 rounded-lg border p-4"
@ -972,7 +1029,7 @@ import { useAppStore } from '@/stores/app'
import { extractApiErrorMessage } from '@/utils/apiError'
import { formatDateTime as formatDateTimeValue } from '@/utils/format'
type SettingsTab = 'basic' | 'scope' | 'runtime' | 'response' | 'retention' | 'keywords'
type SettingsTab = 'basic' | 'scope' | 'runtime' | 'response' | 'riskThresholds' | 'retention' | 'keywords'
type WorkerSlotState = 'active' | 'idle' | 'disabled'
type APIKeysWriteMode = 'append' | 'replace'
type OverviewIcon = 'shield' | 'key' | 'users' | 'document'
@ -992,11 +1049,32 @@ type ModerationScoreRow = {
threshold: number
hit: boolean
}
type RiskThresholdRow = {
category: string
value: number
defaultValue: number
}
const maxModerationTestImages = 1
const maxModerationTestImageSize = 8 * 1024 * 1024
const maxVisibleApiKeyRows: number = 3
const blockedKeywordMax = 10000
const riskThresholdDefaults: Record<string, number> = {
harassment: 98,
'harassment/threatening': 90,
hate: 65,
'hate/threatening': 65,
illicit: 95,
'illicit/violent': 95,
'self-harm': 65,
'self-harm/intent': 85,
'self-harm/instructions': 65,
sexual: 65,
'sexual/minors': 65,
violence: 95,
'violence/graphic': 95,
}
const riskThresholdCategories = Object.keys(riskThresholdDefaults)
const { t } = useI18n()
const appStore = useAppStore()
@ -1054,6 +1132,7 @@ const configForm = reactive({
hit_retention_days: 180,
non_hit_retention_days: 3,
pre_hash_check_enabled: false,
thresholds: { ...riskThresholdDefaults } as Record<string, number>,
blocked_keywords_text: '',
keyword_blocking_mode: 'keyword_and_api' as KeywordBlockingMode,
model_filter_type: 'all' as ContentModerationModelFilterType,
@ -1081,6 +1160,7 @@ const settingsTabs = computed<Array<{ id: SettingsTab; label: string }>>(() => [
{ id: 'scope', label: t('admin.riskControl.tabs.scope') },
{ id: 'runtime', label: t('admin.riskControl.tabs.runtime') },
{ id: 'response', label: t('admin.riskControl.tabs.response') },
{ id: 'riskThresholds', label: t('admin.riskControl.tabs.riskThresholds') },
{ id: 'keywords', label: t('admin.riskControl.tabs.keywords') },
{ id: 'retention', label: t('admin.riskControl.tabs.retention') },
])
@ -1373,6 +1453,14 @@ const moderationScoreRows = computed<ModerationScoreRow[]>(() => {
.sort((a, b) => b.score - a.score)
})
const riskThresholdRows = computed<RiskThresholdRow[]>(() => (
riskThresholdCategories.map((category) => ({
category,
value: configForm.thresholds[category] ?? riskThresholdDefaults[category],
defaultValue: riskThresholdDefaults[category],
}))
))
const inputDetailText = computed(() => {
if (!inputDetailRow.value) return '-'
return inputDetailRow.value.input_excerpt || inputDetailRow.value.error || '-'
@ -1445,6 +1533,7 @@ function applyConfig(config: ContentModerationConfig) {
configForm.hit_retention_days = config.hit_retention_days || 180
configForm.non_hit_retention_days = Math.min(Math.max(config.non_hit_retention_days || 3, 1), 3)
configForm.pre_hash_check_enabled = config.pre_hash_check_enabled ?? false
configForm.thresholds = riskThresholdsFromConfig(config.thresholds)
configForm.blocked_keywords_text = Array.isArray(config.blocked_keywords) ? config.blocked_keywords.join('\n') : ''
configForm.keyword_blocking_mode = normalizeKeywordBlockingMode(config.keyword_blocking_mode)
const modelFilter = normalizeModelFilter(config.model_filter)
@ -1524,6 +1613,7 @@ async function saveConfig() {
hit_retention_days: Number(configForm.hit_retention_days) || 180,
non_hit_retention_days: Math.min(Math.max(Number(configForm.non_hit_retention_days) || 3, 1), 3),
pre_hash_check_enabled: configForm.pre_hash_check_enabled,
thresholds: buildRiskThresholdPayload(),
blocked_keywords: blockedKeywordList.value,
keyword_blocking_mode: configForm.keyword_blocking_mode,
model_filter: modelFilterPayload,
@ -1988,6 +2078,41 @@ function buildModelFilterPayload(): ContentModerationModelFilter {
}
}
function riskThresholdsFromConfig(thresholds: Record<string, number> | null | undefined): Record<string, number> {
const out: Record<string, number> = { ...riskThresholdDefaults }
for (const category of riskThresholdCategories) {
const value = thresholds?.[category]
if (Number.isFinite(value)) {
out[category] = clampPercent(Number(value) * 100)
}
}
return out
}
function buildRiskThresholdPayload(): Record<string, number> {
const payload: Record<string, number> = {}
for (const category of riskThresholdCategories) {
payload[category] = Number((clampPercent(configForm.thresholds[category]) / 100).toFixed(4))
}
return payload
}
function resetRiskThresholds() {
configForm.thresholds = { ...riskThresholdDefaults }
}
function clampPercent(value: unknown): number {
const numeric = Number(value)
if (!Number.isFinite(numeric)) {
return 0
}
return Math.min(100, Math.max(0, numeric))
}
function formatThresholdPercent(value: number): string {
return `${clampPercent(value).toFixed(1)}%`
}
function parseBlockedKeywords(value: string): string[] {
const seen = new Set<string>()
const out: string[] = []

View File

@ -93,6 +93,10 @@ const baseConfig = (): ContentModerationConfig => ({
pre_hash_check_enabled: false,
blocked_keywords: [],
keyword_blocking_mode: 'keyword_and_api',
thresholds: {
harassment: 0.98,
sexual: 0.65,
},
model_filter: {
type: 'all',
models: [],
@ -224,4 +228,37 @@ describe('admin RiskControlView', () => {
}))
expect(showError).not.toHaveBeenCalled()
})
it('submits edited risk control thresholds when saving moderation config', async () => {
const wrapper = mount(RiskControlView, {
global: {
stubs: {
AppLayout: AppLayoutStub,
BaseDialog: BaseDialogStub,
Icon: true,
Select: true,
Toggle: true,
Pagination: true,
ModelWhitelistSelector: ModelWhitelistSelectorStub,
},
},
})
await flushPromises()
await findButtonByText(wrapper, 'admin.riskControl.openSettings').trigger('click')
await findButtonByText(wrapper, 'admin.riskControl.tabs.riskThresholds').trigger('click')
await wrapper.get('[data-test="risk-threshold-sexual"]').setValue('72')
await wrapper.get('[data-test="risk-threshold-harassment"]').setValue('99')
await findButtonByText(wrapper, 'admin.riskControl.saveConfig').trigger('click')
await flushPromises()
expect(updateConfig).toHaveBeenCalledWith(expect.objectContaining({
thresholds: expect.objectContaining({
sexual: 0.72,
harassment: 0.99,
}),
}))
expect(showError).not.toHaveBeenCalled()
})
})