package douyin import ( "net/url" "regexp" "strings" ) // CurlParams 从curl命令解析出的参数 type CurlParams struct { URL string Cookie string Params url.Values } // ParseCurlCommand 解析curl命令,提取URL、Cookie和查询参数 func ParseCurlCommand(curlCmd string) (*CurlParams, error) { result := &CurlParams{ Params: url.Values{}, } // 提取URL urlPattern := regexp.MustCompile(`'(https://[^'\s]+)'`) urlMatch := urlPattern.FindStringSubmatch(curlCmd) if len(urlMatch) > 1 { result.URL = urlMatch[1] // 解析URL中的查询参数 if u, err := url.Parse(result.URL); err == nil { result.Params = u.Query() } } // 提取Cookie (支持 -b 'xxx' 和 -H 'cookie: xxx' 两种格式) // 格式1: -b 'cookie_string' cookiePattern1 := regexp.MustCompile(`-b\s+'([^']+)'`) cookieMatch1 := cookiePattern1.FindStringSubmatch(curlCmd) if len(cookieMatch1) > 1 { result.Cookie = cookieMatch1[1] } // 格式2: -H 'cookie: xxx' (不区分大小写) cookiePattern2 := regexp.MustCompile(`(?i)-H\s+'cookie:\s*([^']+)'`) cookieMatch2 := cookiePattern2.FindStringSubmatch(curlCmd) if len(cookieMatch2) > 1 { result.Cookie = cookieMatch2[1] } return result, nil } // ExtractAntiScrapParams 从参数中提取反爬虫相关参数 func ExtractAntiScrapParams(params url.Values) url.Values { antiScrapKeys := []string{ "__token", "verifyFp", "fp", "msToken", "a_bogus", "_lid", } result := url.Values{} for _, key := range antiScrapKeys { if v := params.Get(key); v != "" { result.Set(key, v) } } return result } // MergeParams 合并基础参数和反爬虫参数 func MergeParams(base, antiScrap url.Values) url.Values { result := url.Values{} // 先复制基础参数 for k, v := range base { result[k] = append([]string{}, v...) } // 覆盖/添加反爬虫参数 for k, v := range antiScrap { result[k] = v } return result } // IsAntiScrapParamsEmpty 检查反爬虫参数是否为空 func IsAntiScrapParamsEmpty(params url.Values) bool { if params == nil { return true } antiScrapKeys := []string{"__token", "verifyFp", "fp", "msToken", "a_bogus"} for _, key := range antiScrapKeys { if params.Get(key) != "" { return false } } return true } // ParseCurlFromEnvOrFile 从文件或环境变量读取curl命令并解析 func ParseCurlFromEnvOrFile(envValue string, filePath string) (*CurlParams, error) { var curlCmd string // 优先从环境变量读取 if envValue != "" { curlCmd = envValue } // 如果环境变量为空,尝试从文件读取 if curlCmd == "" && filePath != "" { // 这里不直接读取文件,由调用方处理 return nil, nil } if curlCmd == "" { return nil, nil } return ParseCurlCommand(curlCmd) } // CleanCurlCommand 清理curl命令中的换行和多余空格 func CleanCurlCommand(curlCmd string) string { // 移除换行符 curlCmd = strings.ReplaceAll(curlCmd, "\\\n", " ") curlCmd = strings.ReplaceAll(curlCmd, "\\\r\n", " ") curlCmd = strings.ReplaceAll(curlCmd, "\n", " ") // 压缩多个空格为一个 spacePattern := regexp.MustCompile(`\s+`) curlCmd = spacePattern.ReplaceAllString(curlCmd, " ") return strings.TrimSpace(curlCmd) }