131 lines
3.1 KiB
Go
131 lines
3.1 KiB
Go
package douyin
|
||
|
||
import (
|
||
"net/url"
|
||
"regexp"
|
||
"strings"
|
||
)
|
||
|
||
// CurlParams 从curl命令解析出的参数
|
||
type CurlParams struct {
|
||
URL string
|
||
Cookie string
|
||
Params url.Values
|
||
}
|
||
|
||
// ParseCurlCommand 解析curl命令,提取URL、Cookie和查询参数
|
||
func ParseCurlCommand(curlCmd string) (*CurlParams, error) {
|
||
result := &CurlParams{
|
||
Params: url.Values{},
|
||
}
|
||
|
||
// 提取URL
|
||
urlPattern := regexp.MustCompile(`'(https://[^'\s]+)'`)
|
||
urlMatch := urlPattern.FindStringSubmatch(curlCmd)
|
||
if len(urlMatch) > 1 {
|
||
result.URL = urlMatch[1]
|
||
// 解析URL中的查询参数
|
||
if u, err := url.Parse(result.URL); err == nil {
|
||
result.Params = u.Query()
|
||
}
|
||
}
|
||
|
||
// 提取Cookie (支持 -b 'xxx' 和 -H 'cookie: xxx' 两种格式)
|
||
// 格式1: -b 'cookie_string'
|
||
cookiePattern1 := regexp.MustCompile(`-b\s+'([^']+)'`)
|
||
cookieMatch1 := cookiePattern1.FindStringSubmatch(curlCmd)
|
||
if len(cookieMatch1) > 1 {
|
||
result.Cookie = cookieMatch1[1]
|
||
}
|
||
|
||
// 格式2: -H 'cookie: xxx' (不区分大小写)
|
||
cookiePattern2 := regexp.MustCompile(`(?i)-H\s+'cookie:\s*([^']+)'`)
|
||
cookieMatch2 := cookiePattern2.FindStringSubmatch(curlCmd)
|
||
if len(cookieMatch2) > 1 {
|
||
result.Cookie = cookieMatch2[1]
|
||
}
|
||
|
||
return result, nil
|
||
}
|
||
|
||
// ExtractAntiScrapParams 从参数中提取反爬虫相关参数
|
||
func ExtractAntiScrapParams(params url.Values) url.Values {
|
||
antiScrapKeys := []string{
|
||
"__token",
|
||
"verifyFp",
|
||
"fp",
|
||
"msToken",
|
||
"a_bogus",
|
||
"_lid",
|
||
}
|
||
|
||
result := url.Values{}
|
||
for _, key := range antiScrapKeys {
|
||
if v := params.Get(key); v != "" {
|
||
result.Set(key, v)
|
||
}
|
||
}
|
||
return result
|
||
}
|
||
|
||
// MergeParams 合并基础参数和反爬虫参数
|
||
func MergeParams(base, antiScrap url.Values) url.Values {
|
||
result := url.Values{}
|
||
// 先复制基础参数
|
||
for k, v := range base {
|
||
result[k] = append([]string{}, v...)
|
||
}
|
||
// 覆盖/添加反爬虫参数
|
||
for k, v := range antiScrap {
|
||
result[k] = v
|
||
}
|
||
return result
|
||
}
|
||
|
||
// IsAntiScrapParamsEmpty 检查反爬虫参数是否为空
|
||
func IsAntiScrapParamsEmpty(params url.Values) bool {
|
||
if params == nil {
|
||
return true
|
||
}
|
||
antiScrapKeys := []string{"__token", "verifyFp", "fp", "msToken", "a_bogus"}
|
||
for _, key := range antiScrapKeys {
|
||
if params.Get(key) != "" {
|
||
return false
|
||
}
|
||
}
|
||
return true
|
||
}
|
||
|
||
// ParseCurlFromEnvOrFile 从文件或环境变量读取curl命令并解析
|
||
func ParseCurlFromEnvOrFile(envValue string, filePath string) (*CurlParams, error) {
|
||
var curlCmd string
|
||
|
||
// 优先从环境变量读取
|
||
if envValue != "" {
|
||
curlCmd = envValue
|
||
}
|
||
|
||
// 如果环境变量为空,尝试从文件读取
|
||
if curlCmd == "" && filePath != "" {
|
||
// 这里不直接读取文件,由调用方处理
|
||
return nil, nil
|
||
}
|
||
|
||
if curlCmd == "" {
|
||
return nil, nil
|
||
}
|
||
|
||
return ParseCurlCommand(curlCmd)
|
||
}
|
||
|
||
// CleanCurlCommand 清理curl命令中的换行和多余空格
|
||
func CleanCurlCommand(curlCmd string) string {
|
||
// 移除换行符
|
||
curlCmd = strings.ReplaceAll(curlCmd, "\\\n", " ")
|
||
curlCmd = strings.ReplaceAll(curlCmd, "\\\r\n", " ")
|
||
curlCmd = strings.ReplaceAll(curlCmd, "\n", " ")
|
||
// 压缩多个空格为一个
|
||
spacePattern := regexp.MustCompile(`\s+`)
|
||
curlCmd = spacePattern.ReplaceAllString(curlCmd, " ")
|
||
return strings.TrimSpace(curlCmd)
|
||
} |