2026-03-05 12:50:06 +08:00

131 lines
3.1 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package douyin
import (
"net/url"
"regexp"
"strings"
)
// CurlParams 从curl命令解析出的参数
type CurlParams struct {
URL string
Cookie string
Params url.Values
}
// ParseCurlCommand 解析curl命令提取URL、Cookie和查询参数
func ParseCurlCommand(curlCmd string) (*CurlParams, error) {
result := &CurlParams{
Params: url.Values{},
}
// 提取URL
urlPattern := regexp.MustCompile(`'(https://[^'\s]+)'`)
urlMatch := urlPattern.FindStringSubmatch(curlCmd)
if len(urlMatch) > 1 {
result.URL = urlMatch[1]
// 解析URL中的查询参数
if u, err := url.Parse(result.URL); err == nil {
result.Params = u.Query()
}
}
// 提取Cookie (支持 -b 'xxx' 和 -H 'cookie: xxx' 两种格式)
// 格式1: -b 'cookie_string'
cookiePattern1 := regexp.MustCompile(`-b\s+'([^']+)'`)
cookieMatch1 := cookiePattern1.FindStringSubmatch(curlCmd)
if len(cookieMatch1) > 1 {
result.Cookie = cookieMatch1[1]
}
// 格式2: -H 'cookie: xxx' (不区分大小写)
cookiePattern2 := regexp.MustCompile(`(?i)-H\s+'cookie:\s*([^']+)'`)
cookieMatch2 := cookiePattern2.FindStringSubmatch(curlCmd)
if len(cookieMatch2) > 1 {
result.Cookie = cookieMatch2[1]
}
return result, nil
}
// ExtractAntiScrapParams 从参数中提取反爬虫相关参数
func ExtractAntiScrapParams(params url.Values) url.Values {
antiScrapKeys := []string{
"__token",
"verifyFp",
"fp",
"msToken",
"a_bogus",
"_lid",
}
result := url.Values{}
for _, key := range antiScrapKeys {
if v := params.Get(key); v != "" {
result.Set(key, v)
}
}
return result
}
// MergeParams 合并基础参数和反爬虫参数
func MergeParams(base, antiScrap url.Values) url.Values {
result := url.Values{}
// 先复制基础参数
for k, v := range base {
result[k] = append([]string{}, v...)
}
// 覆盖/添加反爬虫参数
for k, v := range antiScrap {
result[k] = v
}
return result
}
// IsAntiScrapParamsEmpty 检查反爬虫参数是否为空
func IsAntiScrapParamsEmpty(params url.Values) bool {
if params == nil {
return true
}
antiScrapKeys := []string{"__token", "verifyFp", "fp", "msToken", "a_bogus"}
for _, key := range antiScrapKeys {
if params.Get(key) != "" {
return false
}
}
return true
}
// ParseCurlFromEnvOrFile 从文件或环境变量读取curl命令并解析
func ParseCurlFromEnvOrFile(envValue string, filePath string) (*CurlParams, error) {
var curlCmd string
// 优先从环境变量读取
if envValue != "" {
curlCmd = envValue
}
// 如果环境变量为空,尝试从文件读取
if curlCmd == "" && filePath != "" {
// 这里不直接读取文件,由调用方处理
return nil, nil
}
if curlCmd == "" {
return nil, nil
}
return ParseCurlCommand(curlCmd)
}
// CleanCurlCommand 清理curl命令中的换行和多余空格
func CleanCurlCommand(curlCmd string) string {
// 移除换行符
curlCmd = strings.ReplaceAll(curlCmd, "\\\n", " ")
curlCmd = strings.ReplaceAll(curlCmd, "\\\r\n", " ")
curlCmd = strings.ReplaceAll(curlCmd, "\n", " ")
// 压缩多个空格为一个
spacePattern := regexp.MustCompile(`\s+`)
curlCmd = spacePattern.ReplaceAllString(curlCmd, " ")
return strings.TrimSpace(curlCmd)
}