bindbox-game/internal/service/douyin/curl_parser.go

package douyin

import (
	"net/url"
	"regexp"
	"strings"
)

// CurlParams 从curl命令解析出的参数
type CurlParams struct {
	URL    string
	Cookie string
	Params url.Values
}

// ParseCurlCommand 解析curl命令，提取URL、Cookie和查询参数
func ParseCurlCommand(curlCmd string) (*CurlParams, error) {
	result := &CurlParams{
		Params: url.Values{},
	}

	// 提取URL
	urlPattern := regexp.MustCompile(`'(https://[^'\s]+)'`)
	urlMatch := urlPattern.FindStringSubmatch(curlCmd)
	if len(urlMatch) > 1 {
		result.URL = urlMatch[1]
		// 解析URL中的查询参数
		if u, err := url.Parse(result.URL); err == nil {
			result.Params = u.Query()
		}
	}

	// 提取Cookie (支持 -b 'xxx' 和 -H 'cookie: xxx' 两种格式)
	// 格式1: -b 'cookie_string'
	cookiePattern1 := regexp.MustCompile(`-b\s+'([^']+)'`)
	cookieMatch1 := cookiePattern1.FindStringSubmatch(curlCmd)
	if len(cookieMatch1) > 1 {
		result.Cookie = cookieMatch1[1]
	}

	// 格式2: -H 'cookie: xxx' (不区分大小写)
	cookiePattern2 := regexp.MustCompile(`(?i)-H\s+'cookie:\s*([^']+)'`)
	cookieMatch2 := cookiePattern2.FindStringSubmatch(curlCmd)
	if len(cookieMatch2) > 1 {
		result.Cookie = cookieMatch2[1]
	}

	return result, nil
}

// ExtractAntiScrapParams 从参数中提取反爬虫相关参数
func ExtractAntiScrapParams(params url.Values) url.Values {
	antiScrapKeys := []string{
		"__token",
		"verifyFp",
		"fp",
		"msToken",
		"a_bogus",
		"_lid",
	}

	result := url.Values{}
	for _, key := range antiScrapKeys {
		if v := params.Get(key); v != "" {
			result.Set(key, v)
		}
	}
	return result
}

// MergeParams 合并基础参数和反爬虫参数
func MergeParams(base, antiScrap url.Values) url.Values {
	result := url.Values{}
	// 先复制基础参数
	for k, v := range base {
		result[k] = append([]string{}, v...)
	}
	// 覆盖/添加反爬虫参数
	for k, v := range antiScrap {
		result[k] = v
	}
	return result
}

// IsAntiScrapParamsEmpty 检查反爬虫参数是否为空
func IsAntiScrapParamsEmpty(params url.Values) bool {
	if params == nil {
		return true
	}
	antiScrapKeys := []string{"__token", "verifyFp", "fp", "msToken", "a_bogus"}
	for _, key := range antiScrapKeys {
		if params.Get(key) != "" {
			return false
		}
	}
	return true
}

// ParseCurlFromEnvOrFile 从文件或环境变量读取curl命令并解析
func ParseCurlFromEnvOrFile(envValue string, filePath string) (*CurlParams, error) {
	var curlCmd string

	// 优先从环境变量读取
	if envValue != "" {
		curlCmd = envValue
	}

	// 如果环境变量为空，尝试从文件读取
	if curlCmd == "" && filePath != "" {
		// 这里不直接读取文件，由调用方处理
		return nil, nil
	}

	if curlCmd == "" {
		return nil, nil
	}

	return ParseCurlCommand(curlCmd)
}

// CleanCurlCommand 清理curl命令中的换行和多余空格
func CleanCurlCommand(curlCmd string) string {
	// 移除换行符
	curlCmd = strings.ReplaceAll(curlCmd, "\\\n", " ")
	curlCmd = strings.ReplaceAll(curlCmd, "\\\r\n", " ")
	curlCmd = strings.ReplaceAll(curlCmd, "\n", " ")
	// 压缩多个空格为一个
	spacePattern := regexp.MustCompile(`\s+`)
	curlCmd = spacePattern.ReplaceAllString(curlCmd, " ")
	return strings.TrimSpace(curlCmd)
}