From 91da815993732e6536be8c702168822e482cd850 Mon Sep 17 00:00:00 2001 From: shaw Date: Wed, 20 May 2026 11:13:53 +0800 Subject: [PATCH] =?UTF-8?q?feat(risk-control):=20=E5=86=85=E5=AE=B9?= =?UTF-8?q?=E5=AE=A1=E8=AE=A1=E6=96=B0=E5=A2=9E=E5=85=B3=E9=94=AE=E8=AF=8D?= =?UTF-8?q?=E6=8B=A6=E6=88=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../admin/content_moderation_handler.go | 4 + .../internal/service/content_moderation.go | 127 ++++++++++- .../service/content_moderation_test.go | 209 ++++++++++++++++++ frontend/src/api/admin/riskControl.ts | 5 + frontend/src/i18n/locales/en.ts | 18 ++ frontend/src/i18n/locales/zh.ts | 18 ++ frontend/src/views/admin/RiskControlView.vue | 175 ++++++++++++++- 7 files changed, 550 insertions(+), 6 deletions(-) diff --git a/backend/internal/handler/admin/content_moderation_handler.go b/backend/internal/handler/admin/content_moderation_handler.go index 4266f5d8..6f0f2aab 100644 --- a/backend/internal/handler/admin/content_moderation_handler.go +++ b/backend/internal/handler/admin/content_moderation_handler.go @@ -46,6 +46,8 @@ type contentModerationConfigRequest struct { HitRetentionDays *int `json:"hit_retention_days"` NonHitRetentionDays *int `json:"non_hit_retention_days"` PreHashCheckEnabled *bool `json:"pre_hash_check_enabled"` + BlockedKeywords *[]string `json:"blocked_keywords"` + KeywordBlockingMode *string `json:"keyword_blocking_mode"` } type contentModerationAPIKeyTestRequest struct { @@ -103,6 +105,8 @@ func (h *ContentModerationHandler) UpdateConfig(c *gin.Context) { HitRetentionDays: req.HitRetentionDays, NonHitRetentionDays: req.NonHitRetentionDays, PreHashCheckEnabled: req.PreHashCheckEnabled, + BlockedKeywords: req.BlockedKeywords, + KeywordBlockingMode: req.KeywordBlockingMode, }) if err != nil { response.ErrorFrom(c, err) diff --git a/backend/internal/service/content_moderation.go b/backend/internal/service/content_moderation.go index 144222c2..6a7c9904 100644 --- a/backend/internal/service/content_moderation.go +++ b/backend/internal/service/content_moderation.go @@ -32,10 +32,17 @@ const ( contentModerationAPIKeysModeAppend = "append" contentModerationAPIKeysModeReplace = "replace" - ContentModerationActionAllow = "allow" - ContentModerationActionBlock = "block" - ContentModerationActionHashBlock = "hash_block" - ContentModerationActionError = "error" + ContentModerationActionAllow = "allow" + ContentModerationActionBlock = "block" + ContentModerationActionHashBlock = "hash_block" + ContentModerationActionKeywordBlock = "keyword_block" + ContentModerationActionError = "error" + + contentModerationKeywordCategory = "keyword" + + ContentModerationKeywordModeKeywordOnly = "keyword_only" + ContentModerationKeywordModeKeywordAndAPI = "keyword_and_api" + ContentModerationKeywordModeAPIOnly = "api_only" ContentModerationProtocolAnthropicMessages = "anthropic_messages" ContentModerationProtocolOpenAIResponses = "openai_responses" @@ -71,6 +78,8 @@ const ( maxContentModerationTestImages = maxContentModerationInputImages maxContentModerationTestImageBytes = 8 * 1024 * 1024 maxContentModerationTestImageDataURLBytes = 12 * 1024 * 1024 + maxContentModerationBlockedKeywords = 10000 + maxContentModerationBlockedKeywordRunes = 200 contentModerationCleanupInterval = 24 * time.Hour contentModerationCleanupTimeout = 30 * time.Minute @@ -142,6 +151,8 @@ type ContentModerationConfig struct { HitRetentionDays int `json:"hit_retention_days"` NonHitRetentionDays int `json:"non_hit_retention_days"` PreHashCheckEnabled bool `json:"pre_hash_check_enabled"` + BlockedKeywords []string `json:"blocked_keywords"` + KeywordBlockingMode string `json:"keyword_blocking_mode"` } type ContentModerationConfigView struct { @@ -171,6 +182,8 @@ type ContentModerationConfigView struct { HitRetentionDays int `json:"hit_retention_days"` NonHitRetentionDays int `json:"non_hit_retention_days"` PreHashCheckEnabled bool `json:"pre_hash_check_enabled"` + BlockedKeywords []string `json:"blocked_keywords"` + KeywordBlockingMode string `json:"keyword_blocking_mode"` } type ContentModerationAPIKeyStatus struct { @@ -240,6 +253,8 @@ type UpdateContentModerationConfigInput struct { HitRetentionDays *int `json:"hit_retention_days"` NonHitRetentionDays *int `json:"non_hit_retention_days"` PreHashCheckEnabled *bool `json:"pre_hash_check_enabled"` + BlockedKeywords *[]string `json:"blocked_keywords"` + KeywordBlockingMode *string `json:"keyword_blocking_mode"` } type ContentModerationCheckInput struct { @@ -560,6 +575,12 @@ func (s *ContentModerationService) UpdateConfig(ctx context.Context, input Updat if input.PreHashCheckEnabled != nil { cfg.PreHashCheckEnabled = *input.PreHashCheckEnabled } + if input.BlockedKeywords != nil { + cfg.BlockedKeywords = normalizeBlockedKeywords(*input.BlockedKeywords) + } + if input.KeywordBlockingMode != nil { + cfg.KeywordBlockingMode = strings.TrimSpace(*input.KeywordBlockingMode) + } if input.AllGroups != nil { cfg.AllGroups = *input.AllGroups } @@ -767,6 +788,44 @@ func (s *ContentModerationService) Check(ctx context.Context, input ContentModer "protocol", input.Protocol, "text_runes", len([]rune(content.Text)), "image_count", len(content.Images)) + if cfg.Mode == ContentModerationModePreBlock { + if cfg.KeywordBlockingMode != ContentModerationKeywordModeAPIOnly && len(cfg.BlockedKeywords) > 0 { + if keyword, hit := matchBlockedKeyword(content.Text, cfg.BlockedKeywords); hit { + slog.Info("content_moderation.keyword_block", + "user_id", input.UserID, + "api_key_id", input.APIKeyID, + "group_id", contentModerationLogGroupID(input.GroupID), + "endpoint", input.Endpoint, + "protocol", input.Protocol, + "keyword_blocking_mode", cfg.KeywordBlockingMode, + "keyword", keyword) + scores := map[string]float64{contentModerationKeywordCategory: 1.0} + log := s.buildLog(input, cfg, ContentModerationActionKeywordBlock, true, contentModerationKeywordCategory, 1.0, scores, content.ExcerptText(), nil, nil, "") + s.applyFlaggedSideEffects(ctx, cfg, log) + _ = s.repo.CreateLog(ctx, log) + return &ContentModerationDecision{ + Allowed: false, + Blocked: true, + Flagged: true, + Message: cfg.BlockMessage, + StatusCode: cfg.BlockStatus, + HighestCategory: contentModerationKeywordCategory, + HighestScore: 1.0, + CategoryScores: scores, + Action: ContentModerationActionKeywordBlock, + }, nil + } + } + if cfg.KeywordBlockingMode == ContentModerationKeywordModeKeywordOnly { + slog.Info("content_moderation.skip_api_keyword_only", + "user_id", input.UserID, + "api_key_id", input.APIKeyID, + "group_id", contentModerationLogGroupID(input.GroupID), + "endpoint", input.Endpoint, + "protocol", input.Protocol) + return allow, nil + } + } hashText := content.Hash() if cfg.PreHashCheckEnabled && s.hashCache != nil { matched, err := s.hashCache.HasFlaggedInputHash(ctx, hashText) @@ -1451,6 +1510,8 @@ func defaultContentModerationConfig() *ContentModerationConfig { HitRetentionDays: defaultContentModerationHitRetentionDays, NonHitRetentionDays: defaultContentModerationNonHitRetentionDays, PreHashCheckEnabled: false, + BlockedKeywords: []string{}, + KeywordBlockingMode: ContentModerationKeywordModeKeywordAndAPI, } } @@ -1529,6 +1590,8 @@ func (cfg *ContentModerationConfig) normalize() { } cfg.GroupIDs = normalizeInt64IDs(cfg.GroupIDs) cfg.Thresholds = mergeContentModerationThresholds(ContentModerationDefaultThresholds(), cfg.Thresholds) + cfg.BlockedKeywords = normalizeBlockedKeywords(cfg.BlockedKeywords) + cfg.KeywordBlockingMode = normalizeKeywordBlockingMode(cfg.KeywordBlockingMode) } func (cfg *ContentModerationConfig) includesGroup(groupID *int64) bool { @@ -1705,6 +1768,8 @@ func (s *ContentModerationService) configView(cfg *ContentModerationConfig) *Con HitRetentionDays: cfg.HitRetentionDays, NonHitRetentionDays: cfg.NonHitRetentionDays, PreHashCheckEnabled: cfg.PreHashCheckEnabled, + BlockedKeywords: append([]string(nil), cfg.BlockedKeywords...), + KeywordBlockingMode: cfg.KeywordBlockingMode, } } @@ -1944,6 +2009,60 @@ func normalizeInt64IDs(ids []int64) []int64 { return out } +func normalizeBlockedKeywords(in []string) []string { + if len(in) == 0 { + return []string{} + } + out := make([]string, 0, len(in)) + seen := make(map[string]struct{}, len(in)) + for _, raw := range in { + kw := strings.TrimSpace(raw) + if kw == "" { + continue + } + kw = trimRunes(kw, maxContentModerationBlockedKeywordRunes) + key := strings.ToLower(kw) + if _, ok := seen[key]; ok { + continue + } + seen[key] = struct{}{} + out = append(out, kw) + if len(out) >= maxContentModerationBlockedKeywords { + break + } + } + return out +} + +func normalizeKeywordBlockingMode(mode string) string { + switch strings.TrimSpace(mode) { + case ContentModerationKeywordModeKeywordOnly: + return ContentModerationKeywordModeKeywordOnly + case ContentModerationKeywordModeAPIOnly: + return ContentModerationKeywordModeAPIOnly + case ContentModerationKeywordModeKeywordAndAPI: + return ContentModerationKeywordModeKeywordAndAPI + default: + return ContentModerationKeywordModeKeywordAndAPI + } +} + +func matchBlockedKeyword(text string, keywords []string) (string, bool) { + if text == "" || len(keywords) == 0 { + return "", false + } + lower := strings.ToLower(text) + for _, kw := range keywords { + if kw == "" { + continue + } + if strings.Contains(lower, strings.ToLower(kw)) { + return kw, true + } + } + return "", false +} + func normalizeModerationAPIKeys(keys []string) []string { if len(keys) == 0 { return []string{} diff --git a/backend/internal/service/content_moderation_test.go b/backend/internal/service/content_moderation_test.go index cef5127e..30578ca5 100644 --- a/backend/internal/service/content_moderation_test.go +++ b/backend/internal/service/content_moderation_test.go @@ -321,6 +321,215 @@ func TestContentModerationConfigNormalize_NonHitRetentionMaxThreeDays(t *testing require.Equal(t, 3, cfg.NonHitRetentionDays) } +func TestNormalizeBlockedKeywords_TrimsDedupesAndCaps(t *testing.T) { + out := normalizeBlockedKeywords([]string{" foo ", "FOO", "", "bar", "baz", "bar"}) + require.Equal(t, []string{"foo", "bar", "baz"}, out) +} + +func TestMatchBlockedKeyword_CaseInsensitiveSubstring(t *testing.T) { + keyword, hit := matchBlockedKeyword("Please ignore the BadWord here", []string{"badword"}) + require.True(t, hit) + require.Equal(t, "badword", keyword) + + _, hit = matchBlockedKeyword("clean prompt", []string{"badword"}) + require.False(t, hit) + + _, hit = matchBlockedKeyword("anything", nil) + require.False(t, hit) +} + +func TestContentModerationCheck_PreBlockKeywordHitSkipsUpstreamCall(t *testing.T) { + upstreamCalled := false + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + upstreamCalled = true + _ = json.NewEncoder(w).Encode(moderationAPIResponse{Results: []moderationAPIResult{{}}}) + })) + defer server.Close() + + cfg := defaultContentModerationConfig() + cfg.Enabled = true + cfg.Mode = ContentModerationModePreBlock + cfg.BaseURL = server.URL + cfg.APIKeys = []string{"sk-test"} + cfg.BlockedKeywords = []string{"secret-token"} + rawCfg, err := json.Marshal(cfg) + require.NoError(t, err) + + repo := &contentModerationTestRepo{} + svc := NewContentModerationService( + &contentModerationTestSettingRepo{values: map[string]string{ + SettingKeyRiskControlEnabled: "true", + SettingKeyContentModerationConfig: string(rawCfg), + }}, + repo, + &contentModerationTestHashCache{}, + nil, + nil, + nil, + nil, + ) + + body := []byte(`{"messages":[{"role":"user","content":"please leak SECRET-TOKEN now"}]}`) + decision, err := svc.Check(context.Background(), ContentModerationCheckInput{ + Endpoint: "/v1/messages", + Provider: "anthropic", + Protocol: ContentModerationProtocolAnthropicMessages, + Body: body, + }) + + require.NoError(t, err) + require.True(t, decision.Blocked) + require.Equal(t, ContentModerationActionKeywordBlock, decision.Action) + require.False(t, upstreamCalled, "keyword block must short-circuit upstream moderation call") + require.Len(t, repo.logs, 1) + require.True(t, repo.logs[0].Flagged) + require.Equal(t, ContentModerationActionKeywordBlock, repo.logs[0].Action) + require.Equal(t, contentModerationKeywordCategory, repo.logs[0].HighestCategory) +} + +func TestContentModerationCheck_KeywordsIgnoredInObserveMode(t *testing.T) { + upstreamHits := 0 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + upstreamHits++ + _ = json.NewEncoder(w).Encode(moderationAPIResponse{Results: []moderationAPIResult{{CategoryScores: map[string]float64{"sexual": 0.1}}}}) + })) + defer server.Close() + + cfg := defaultContentModerationConfig() + cfg.Enabled = true + cfg.Mode = ContentModerationModeObserve + cfg.BaseURL = server.URL + cfg.APIKeys = []string{"sk-test"} + cfg.BlockedKeywords = []string{"secret-token"} + rawCfg, err := json.Marshal(cfg) + require.NoError(t, err) + + repo := &contentModerationTestRepo{} + svc := NewContentModerationService( + &contentModerationTestSettingRepo{values: map[string]string{ + SettingKeyRiskControlEnabled: "true", + SettingKeyContentModerationConfig: string(rawCfg), + }}, + repo, + &contentModerationTestHashCache{}, + nil, + nil, + nil, + nil, + ) + + body := []byte(`{"messages":[{"role":"user","content":"please leak SECRET-TOKEN now"}]}`) + decision, err := svc.Check(context.Background(), ContentModerationCheckInput{ + Endpoint: "/v1/messages", + Provider: "anthropic", + Protocol: ContentModerationProtocolAnthropicMessages, + Body: body, + }) + + require.NoError(t, err) + require.True(t, decision.Allowed, "observe mode must let the request through even on keyword hit") + require.Equal(t, ContentModerationActionAllow, decision.Action) +} + +func TestContentModerationCheck_KeywordOnlyStrategySkipsAPIOnMiss(t *testing.T) { + upstreamCalled := false + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + upstreamCalled = true + _ = json.NewEncoder(w).Encode(moderationAPIResponse{Results: []moderationAPIResult{{CategoryScores: map[string]float64{"sexual": 0.99}}}}) + })) + defer server.Close() + + cfg := defaultContentModerationConfig() + cfg.Enabled = true + cfg.Mode = ContentModerationModePreBlock + cfg.BaseURL = server.URL + cfg.APIKeys = []string{"sk-test"} + cfg.BlockedKeywords = []string{"never-matches"} + cfg.KeywordBlockingMode = ContentModerationKeywordModeKeywordOnly + rawCfg, err := json.Marshal(cfg) + require.NoError(t, err) + + repo := &contentModerationTestRepo{} + svc := NewContentModerationService( + &contentModerationTestSettingRepo{values: map[string]string{ + SettingKeyRiskControlEnabled: "true", + SettingKeyContentModerationConfig: string(rawCfg), + }}, + repo, + &contentModerationTestHashCache{}, + nil, + nil, + nil, + nil, + ) + + body := []byte(`{"messages":[{"role":"user","content":"absolutely clean prompt"}]}`) + decision, err := svc.Check(context.Background(), ContentModerationCheckInput{ + Endpoint: "/v1/messages", + Provider: "anthropic", + Protocol: ContentModerationProtocolAnthropicMessages, + Body: body, + }) + + require.NoError(t, err) + require.True(t, decision.Allowed, "keyword-only must allow misses without calling the API") + require.False(t, upstreamCalled, "keyword-only must not call the upstream moderation API") + require.Len(t, repo.logs, 0) +} + +func TestContentModerationCheck_APIOnlyStrategyIgnoresKeywordList(t *testing.T) { + upstreamCalled := false + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + upstreamCalled = true + _ = json.NewEncoder(w).Encode(moderationAPIResponse{Results: []moderationAPIResult{{CategoryScores: map[string]float64{"sexual": 0.1}}}}) + })) + defer server.Close() + + cfg := defaultContentModerationConfig() + cfg.Enabled = true + cfg.Mode = ContentModerationModePreBlock + cfg.BaseURL = server.URL + cfg.APIKeys = []string{"sk-test"} + cfg.BlockedKeywords = []string{"secret-token"} + cfg.KeywordBlockingMode = ContentModerationKeywordModeAPIOnly + rawCfg, err := json.Marshal(cfg) + require.NoError(t, err) + + repo := &contentModerationTestRepo{} + svc := NewContentModerationService( + &contentModerationTestSettingRepo{values: map[string]string{ + SettingKeyRiskControlEnabled: "true", + SettingKeyContentModerationConfig: string(rawCfg), + }}, + repo, + &contentModerationTestHashCache{}, + nil, + nil, + nil, + nil, + ) + + body := []byte(`{"messages":[{"role":"user","content":"please leak SECRET-TOKEN now"}]}`) + decision, err := svc.Check(context.Background(), ContentModerationCheckInput{ + Endpoint: "/v1/messages", + Provider: "anthropic", + Protocol: ContentModerationProtocolAnthropicMessages, + Body: body, + }) + + require.NoError(t, err) + require.True(t, decision.Allowed, "api-only must let the request through when API does not flag it") + require.True(t, upstreamCalled, "api-only must call the upstream moderation API") + require.NotEqual(t, ContentModerationActionKeywordBlock, decision.Action) +} + +func TestNormalizeKeywordBlockingMode_UnknownFallsBackToDefault(t *testing.T) { + require.Equal(t, ContentModerationKeywordModeKeywordAndAPI, normalizeKeywordBlockingMode("")) + require.Equal(t, ContentModerationKeywordModeKeywordAndAPI, normalizeKeywordBlockingMode("bogus")) + require.Equal(t, ContentModerationKeywordModeKeywordOnly, normalizeKeywordBlockingMode("keyword_only")) + require.Equal(t, ContentModerationKeywordModeAPIOnly, normalizeKeywordBlockingMode("api_only")) +} + func TestContentModerationUpdateConfig_AppendsAndDeletesAPIKeys(t *testing.T) { cfg := defaultContentModerationConfig() cfg.APIKeys = []string{"sk-old-a", "sk-old-b"} diff --git a/frontend/src/api/admin/riskControl.ts b/frontend/src/api/admin/riskControl.ts index e63a53a2..4dad1f58 100644 --- a/frontend/src/api/admin/riskControl.ts +++ b/frontend/src/api/admin/riskControl.ts @@ -1,6 +1,7 @@ import { apiClient } from '../client' export type ModerationMode = 'off' | 'observe' | 'pre_block' +export type KeywordBlockingMode = 'keyword_only' | 'keyword_and_api' | 'api_only' export interface ContentModerationConfig { enabled: boolean @@ -29,6 +30,8 @@ export interface ContentModerationConfig { hit_retention_days: number non_hit_retention_days: number pre_hash_check_enabled: boolean + blocked_keywords: string[] + keyword_blocking_mode: KeywordBlockingMode } export type ContentModerationAPIKeyStatusValue = 'unknown' | 'ok' | 'error' | 'frozen' @@ -100,6 +103,8 @@ export interface UpdateContentModerationConfig { hit_retention_days?: number non_hit_retention_days?: number pre_hash_check_enabled?: boolean + blocked_keywords?: string[] + keyword_blocking_mode?: KeywordBlockingMode } export interface ContentModerationRuntimeStatus { diff --git a/frontend/src/i18n/locales/en.ts b/frontend/src/i18n/locales/en.ts index 3048c0e6..9ac1466e 100644 --- a/frontend/src/i18n/locales/en.ts +++ b/frontend/src/i18n/locales/en.ts @@ -2547,8 +2547,25 @@ export default { scope: 'Scope', runtime: 'Runtime', response: 'Hit Notice', + keywords: 'Keyword Block', retention: 'Retention', }, + blockedKeywords: 'Blocked keywords', + blockedKeywordsPlaceholder: 'One keyword per line, e.g.:\nbadword1\nbadword2', + blockedKeywordsDescription: 'Matching is case-insensitive. Whether the upstream moderation API is invoked after a hit depends on the strategy below.', + blockedKeywordsPreBlockHint: 'Keyword blocking only takes effect in "Pre-block" mode.', + blockedKeywordsModeWarning: 'Current mode is "{mode}". Keyword blocking will not run until you switch to "Pre-block" mode.', + blockedKeywordCount: '{count} keywords configured', + blockedKeywordsLimit: 'Up to {max} keywords, each no longer than 200 characters. Duplicates are removed automatically.', + keywordBlockingMode: 'Moderation strategy', + keywordModeKeywordAndApi: 'Keyword + API', + keywordModeKeywordAndApiDesc: 'Block on keyword hit; otherwise fall through to the upstream moderation API.', + keywordModeKeywordOnly: 'Keyword only', + keywordModeKeywordOnlyDesc: 'Decide using keywords only; misses are allowed without calling the API, saving upstream cost.', + keywordModeKeywordOnlyNotice: 'Keyword-only strategy: requests that do not match any keyword are allowed without calling the upstream moderation API.', + keywordModeApiOnly: 'API only', + keywordModeApiOnlyDesc: 'Use the upstream moderation API only; the keyword list configured here is not consulted.', + keywordModeApiOnlyNotice: 'API-only strategy: the keyword list is not consulted; all requests go through the upstream moderation API.', overview: { status: 'Status', enabled: 'Enabled', @@ -2586,6 +2603,7 @@ export default { }, action: { block: 'Blocked', + keywordBlock: 'Keyword Blocked', error: 'Error', }, }, diff --git a/frontend/src/i18n/locales/zh.ts b/frontend/src/i18n/locales/zh.ts index fe478c55..3e90405a 100644 --- a/frontend/src/i18n/locales/zh.ts +++ b/frontend/src/i18n/locales/zh.ts @@ -2624,8 +2624,25 @@ export default { scope: '审计范围', runtime: '运行队列', response: '命中通知', + keywords: '关键词拦截', retention: '日志保留', }, + blockedKeywords: '拦截关键词', + blockedKeywordsPlaceholder: '每行输入一个关键词,例如:\n敏感词1\n敏感词2', + blockedKeywordsDescription: '匹配忽略大小写;命中后会按下方策略决定是否调用上游审计接口。', + blockedKeywordsPreBlockHint: '关键词拦截仅在「前置拦截」模式下生效。', + blockedKeywordsModeWarning: '当前为「{mode}」模式,关键词拦截不会生效;请切换到「前置拦截」模式后再保存关键词。', + blockedKeywordCount: '已配置 {count} 个关键词', + blockedKeywordsLimit: '最多保存 {max} 个关键词,单个长度不超过 200 个字符;重复项会自动去重。', + keywordBlockingMode: '审计策略', + keywordModeKeywordAndApi: '关键词 + API', + keywordModeKeywordAndApiDesc: '命中关键词直接拦截;未命中时再调用上游审计接口。', + keywordModeKeywordOnly: '仅关键词', + keywordModeKeywordOnlyDesc: '只用关键词判断,未命中即放行,不调用上游审计接口,可显著降低 API 用量。', + keywordModeKeywordOnlyNotice: '当前为「仅关键词」策略:未命中关键词的请求将直接放行,不调用上游审计接口。', + keywordModeApiOnly: '仅 API', + keywordModeApiOnlyDesc: '只调用上游审计接口判断,本页的关键词列表将不会生效。', + keywordModeApiOnlyNotice: '当前为「仅 API」策略:关键词列表不会生效,请求会全部交给上游审计接口判断。', overview: { status: '运行状态', enabled: '已启用', @@ -2663,6 +2680,7 @@ export default { }, action: { block: '拦截', + keywordBlock: '关键词拦截', error: '异常', }, }, diff --git a/frontend/src/views/admin/RiskControlView.vue b/frontend/src/views/admin/RiskControlView.vue index 74db4772..acfcec77 100644 --- a/frontend/src/views/admin/RiskControlView.vue +++ b/frontend/src/views/admin/RiskControlView.vue @@ -728,6 +728,70 @@ +
+
+ +
+

{{ keywordNotice.title }}

+

{{ keywordNotice.description }}

+
+
+ +
+ +
+ +
+
+ +
+
+ + + {{ t('admin.riskControl.blockedKeywordCount', { count: blockedKeywordCount }) }} + +
+ +

+ {{ t('admin.riskControl.blockedKeywordsLimit', { max: blockedKeywordMax }) }} +

+
+
+
@@ -830,6 +894,7 @@ import type { ContentModerationLog, ContentModerationRuntimeStatus, ContentModerationTestAuditResult, + KeywordBlockingMode, ModerationMode, UpdateContentModerationConfig, } from '@/api/admin/riskControl' @@ -838,7 +903,7 @@ import { useAppStore } from '@/stores/app' import { extractApiErrorMessage } from '@/utils/apiError' import { formatDateTime as formatDateTimeValue } from '@/utils/format' -type SettingsTab = 'basic' | 'scope' | 'runtime' | 'response' | 'retention' +type SettingsTab = 'basic' | 'scope' | 'runtime' | 'response' | 'retention' | 'keywords' type WorkerSlotState = 'active' | 'idle' | 'disabled' type APIKeysWriteMode = 'append' | 'replace' type OverviewIcon = 'shield' | 'key' | 'users' | 'document' @@ -862,6 +927,7 @@ type ModerationScoreRow = { const maxModerationTestImages = 1 const maxModerationTestImageSize = 8 * 1024 * 1024 const maxVisibleApiKeyRows: number = 3 +const blockedKeywordMax = 10000 const { t } = useI18n() const appStore = useAppStore() @@ -919,6 +985,8 @@ const configForm = reactive({ hit_retention_days: 180, non_hit_retention_days: 3, pre_hash_check_enabled: false, + blocked_keywords_text: '', + keyword_blocking_mode: 'keyword_and_api' as KeywordBlockingMode, }) const pagination = reactive({ @@ -942,6 +1010,7 @@ const settingsTabs = computed>(() => [ { id: 'scope', label: t('admin.riskControl.tabs.scope') }, { id: 'runtime', label: t('admin.riskControl.tabs.runtime') }, { id: 'response', label: t('admin.riskControl.tabs.response') }, + { id: 'keywords', label: t('admin.riskControl.tabs.keywords') }, { id: 'retention', label: t('admin.riskControl.tabs.retention') }, ]) @@ -951,6 +1020,78 @@ const modeOptions = computed(() => [ { value: 'off', label: t('admin.riskControl.modeOff') }, ]) +const keywordBlockingModeOptions = computed>(() => [ + { + value: 'keyword_and_api', + label: t('admin.riskControl.keywordModeKeywordAndApi'), + description: t('admin.riskControl.keywordModeKeywordAndApiDesc'), + }, + { + value: 'keyword_only', + label: t('admin.riskControl.keywordModeKeywordOnly'), + description: t('admin.riskControl.keywordModeKeywordOnlyDesc'), + }, + { + value: 'api_only', + label: t('admin.riskControl.keywordModeApiOnly'), + description: t('admin.riskControl.keywordModeApiOnlyDesc'), + }, +]) + +type KeywordNoticeView = { + title: string + description: string + icon: 'infoCircle' | 'exclamationTriangle' + toneClass: string + iconClass: string + titleClass: string +} + +const keywordNoticeTones = { + info: { + icon: 'infoCircle' as const, + toneClass: 'border-primary-100 bg-primary-50/60 dark:border-primary-900/40 dark:bg-primary-900/10', + iconClass: 'mt-0.5 flex-shrink-0 text-primary-500 dark:text-primary-300', + titleClass: 'text-primary-700 dark:text-primary-200', + }, + warning: { + icon: 'exclamationTriangle' as const, + toneClass: 'border-amber-200 bg-amber-50 dark:border-amber-900/40 dark:bg-amber-900/20', + iconClass: 'mt-0.5 flex-shrink-0 text-amber-500 dark:text-amber-300', + titleClass: 'text-amber-700 dark:text-amber-200', + }, +} + +const keywordNotice = computed(() => { + const strategy = configForm.keyword_blocking_mode + if (strategy === 'api_only') { + return { + ...keywordNoticeTones.info, + title: t('admin.riskControl.keywordModeApiOnlyNotice'), + description: t('admin.riskControl.keywordModeApiOnlyDesc'), + } + } + if (configForm.mode !== 'pre_block') { + return { + ...keywordNoticeTones.warning, + title: t('admin.riskControl.blockedKeywordsModeWarning', { mode: modeLabel(configForm.mode) }), + description: t('admin.riskControl.blockedKeywordsDescription'), + } + } + if (strategy === 'keyword_only') { + return { + ...keywordNoticeTones.info, + title: t('admin.riskControl.keywordModeKeywordOnlyNotice'), + description: t('admin.riskControl.keywordModeKeywordOnlyDesc'), + } + } + return { + ...keywordNoticeTones.info, + title: t('admin.riskControl.blockedKeywordsPreBlockHint'), + description: t('admin.riskControl.blockedKeywordsDescription'), + } +}) + const resultOptions = computed(() => [ { value: '', label: t('admin.riskControl.result.all') }, { value: 'hit', label: t('admin.riskControl.result.hit') }, @@ -989,6 +1130,10 @@ const filteredGroups = computed(() => { const inputApiKeyCount = computed(() => parseApiKeys(configForm.api_keys_text).length) +const blockedKeywordList = computed(() => parseBlockedKeywords(configForm.blocked_keywords_text)) + +const blockedKeywordCount = computed(() => blockedKeywordList.value.length) + const pendingDeletedApiKeyCount = computed(() => pendingDeleteApiKeyHashes.value.length) const effectiveStoredApiKeyCount = computed(() => Math.max(0, configForm.api_key_count - pendingDeletedApiKeyCount.value)) @@ -1195,6 +1340,8 @@ function applyConfig(config: ContentModerationConfig) { configForm.hit_retention_days = config.hit_retention_days || 180 configForm.non_hit_retention_days = Math.min(Math.max(config.non_hit_retention_days || 3, 1), 3) configForm.pre_hash_check_enabled = config.pre_hash_check_enabled ?? false + configForm.blocked_keywords_text = Array.isArray(config.blocked_keywords) ? config.blocked_keywords.join('\n') : '' + configForm.keyword_blocking_mode = normalizeKeywordBlockingMode(config.keyword_blocking_mode) } async function loadAll() { @@ -1264,6 +1411,8 @@ async function saveConfig() { hit_retention_days: Number(configForm.hit_retention_days) || 180, non_hit_retention_days: Math.min(Math.max(Number(configForm.non_hit_retention_days) || 3, 1), 3), pre_hash_check_enabled: configForm.pre_hash_check_enabled, + blocked_keywords: blockedKeywordList.value, + keyword_blocking_mode: configForm.keyword_blocking_mode, } const keys = parseApiKeys(configForm.api_keys_text) if (!payload.clear_api_key && configForm.api_keys_mode === 'replace' && keys.length === 0) { @@ -1563,6 +1712,7 @@ function modeDescription(mode: ModerationMode): string { } function resultLabel(row: ContentModerationLog): string { + if (row.action === 'keyword_block') return t('admin.riskControl.action.keywordBlock') if (row.action === 'block') return t('admin.riskControl.action.block') if (row.action === 'error' || row.error) return t('admin.riskControl.action.error') if (row.flagged) return t('admin.riskControl.result.hit') @@ -1570,7 +1720,7 @@ function resultLabel(row: ContentModerationLog): string { } function resultBadgeClass(row: ContentModerationLog): string { - if (row.action === 'block') return 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-300' + if (row.action === 'block' || row.action === 'keyword_block') return 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-300' if (row.action === 'error' || row.error) return 'bg-amber-100 text-amber-700 dark:bg-amber-900/30 dark:text-amber-300' if (row.flagged) return 'bg-pink-100 text-pink-700 dark:bg-pink-900/30 dark:text-pink-300' return 'bg-green-100 text-green-700 dark:bg-green-900/30 dark:text-green-300' @@ -1667,6 +1817,27 @@ function parseApiKeys(value: string): string[] { .filter((item, index, arr) => item && arr.indexOf(item) === index) } +function normalizeKeywordBlockingMode(value: unknown): KeywordBlockingMode { + if (value === 'keyword_only' || value === 'api_only' || value === 'keyword_and_api') { + return value + } + return 'keyword_and_api' +} + +function parseBlockedKeywords(value: string): string[] { + const seen = new Set() + const out: string[] = [] + for (const line of value.split(/\r?\n/)) { + const kw = line.trim() + if (!kw) continue + const key = kw.toLowerCase() + if (seen.has(key)) continue + seen.add(key) + out.push(kw) + } + return out +} + function violationCountText(row: ContentModerationLog): string { if (!row.flagged) return '-' return t('admin.riskControl.violationCount', { count: row.violation_count || 1 })