patient-mini/api/ocrReact copy.js
2025-08-06 23:28:08 +08:00

232 lines
9.5 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

export const getOcrReact = (url) => {
return new Promise((resolve, reject) => {
wx.request({
url: 'https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions',
method: 'POST',
dataType: 'json', // 微信官方文档中介绍会对数据进行一次JSON.parse
header: {
'Authorization': 'Bearer sk-52414b887aee47e4883caf16cbf801bd',
'Content-Type': 'application/json'
},
data: {
"model": "qwen-vl-ocr-latest",
"messages": [{
"role": "user",
"content": [{
"type": "image_url",
"image_url": {
"url": url
},
"min_pixels": 3136,
"max_pixels": 6422528
},
{
"type": "text",
"text": "要求准确无误的提取上述关键信息、不要遗漏和捏造虚假信息,模糊或者强光遮挡的单个文字可以用英文问号?代替。返回数据格式以MD方式输出"
}
]
}]
},
success(res) {
let data = parseOcrResult(res.data.choices[0].message.content)
console.log(data)
// 新增:统一字段名
if (Array.isArray(data)) {
data = data.map(item => {
const newItem = { ...item };
if ('项目' in newItem) {
newItem.name = newItem['项目'];
delete newItem['项目'];
} else if ('项目名称' in newItem) {
newItem.name = newItem['项目名称'];
delete newItem['项目名称'];
} else if ('检验项目' in newItem) {
newItem.name = newItem['检验项目'];
delete newItem['检验项目'];
} else if ('检查项目' in newItem) {
newItem.name = newItem['检查项目'];
delete newItem['检查项目'];
} else if ('检查项目名称' in newItem) {
newItem.name = newItem['检查项目名称'];
delete newItem['检查项目名称'];
} else if ('项目全称' in newItem) {
newItem.name = newItem['项目全称'];
delete newItem['项目全称'];
} else if ('中文名称' in newItem) {
newItem.name = newItem['中文名称'];
delete newItem['中文名称'];
} else if ('分析项目' in newItem) {
newItem.name = newItem['分析项目'];
delete newItem['分析项目'];
} else if ('实验名称' in newItem) {
newItem.name = newItem['实验名称'];
delete newItem['实验名称'];
} else if ('N项目名称' in newItem) {
newItem.name = newItem['N项目名称'];
delete newItem['N项目名称'];
}else if ('序号检查项目' in newItem) {
newItem.name = newItem['序号检查项目'];
delete newItem['序号检查项目'];
}
if ('结果' in newItem) {
newItem.value = newItem['结果'];
delete newItem['结果'];
} else if ('值' in newItem) {
newItem.value = newItem['值'];
delete newItem['值'];
} else if ('检验结果' in newItem) {
newItem.value = newItem['检验结果'];
delete newItem['检验结果'];
} else if ('检查结果' in newItem) {
newItem.value = newItem['检查结果'];
delete newItem['检查结果'];
} else if ('结果值' in newItem) {
newItem.value = newItem['结果值'];
delete newItem['结果值'];
} else if ('结果浓度' in newItem) {
newItem.value = newItem['结果浓度'];
delete newItem['结果浓度'];
} else if ('测定结果' in newItem) {
newItem.value = newItem['测定结果'];
delete newItem['测定结果'];
} else if ('检验值' in newItem) {
newItem.value = newItem['检验值'];
delete newItem['检验值'];
}
// 去掉name中的括号及其内容
if (typeof newItem.name === 'string') {
newItem.name = newItem.name.replace(/.*?|\(.*?\)/g, '').trim();
}
console.log(newItem)
return newItem;
});
}
resolve(data);
},
fail(err) {
console.log(err)
// 断网、服务器挂了都会fail回调直接reject即可
reject(err);
},
});
})
}
function parseMarkdownTable(md) {
// 拆分行,去掉空行
const lines = md.split('\n').filter(line => line.trim().length > 0);
// 检查第二行是否为分隔符(全是 ---
const isSeparator = line => line.split('|').every(cell => cell.trim() === '' || /^-+$/.test(cell.trim()));
let header = [];
let dataLines = [];
if (lines.length > 1 && isSeparator(lines[1])) {
// 没有表头,第一行是数据
const colCount = lines[0].split('|').length;
header = Array.from({ length: colCount }, (_, i) => `col${i + 1}`);
dataLines = [lines[0], ...lines.slice(2)];
} else {
// 有表头
header = lines[0].split('|').map(h => h.trim());
dataLines = lines.slice(2);
}
// 解析每一行
const result = dataLines.map(line => {
const cells = line.split('|').map(cell => cell.trim());
const obj = {};
header.forEach((key, idx) => {
if (key) obj[key] = cells[idx] || '';
});
return obj;
});
return result;
}
/**
* 解析类似 ```json ... ``` 格式的字符串,提取检测项目数组
* @param {string} str
* @returns {Array<Object>}
*/
function parseJsonBlock(str) {
// 去除包裹的代码块标记
const jsonStr = str.replace(/^[\s`]*```json[\s`]*|```$/g, '').replace(/↵/g, '\n').trim();
// 用正则提取所有 "key": "value"
const regex = /"([^"]+)":\s*"([^"]*)"/g;
const pairs = [];
let match;
while ((match = regex.exec(jsonStr)) !== null) {
pairs.push([match[1], match[2]]);
}
// 按“序号”分组
const items = [];
let current = {};
const itemFields = ['序号', '项目名称', '缩写', '结果', '单位', '参考区间', '测定方法'];
pairs.forEach(([key, value]) => {
if (key === '序号' && Object.keys(current).length > 0) {
items.push({ ...current });
current = {};
}
if (itemFields.includes(key)) {
current[key] = value;
}
});
if (Object.keys(current).length > 0) {
items.push({ ...current });
}
return items;
}
/**
* 自动判断OCR返回内容格式并调用对应解析方法
* @param {string} content
* @returns {Array<Object>}
*/
function parseOcrResult(content) {
// 判断是否为JSON代码块
if (/^```json/.test(content.trim())) {
return parseJsonBlock(content);
}
// 判断是否为Markdown表格以|开头,且有---分隔行)
if (/\|.*\|/.test(content) && /\|[\s\-:|]+\|/.test(content)) {
return parseMarkdownTable(content);
}
// 判断是否为实验室结果格式(数字+中文+数字+单位+参考区间)
if (/^\d+[\u4e00-\u9fa5A-Za-z]+[\d.]+[a-zA-Zμ\/]+[\d.\-]+/m.test(content.replace(/↵/g, '\n'))) {
return parseLabResults(content);
}
// 其它情况返回空数组或原始内容
return [];
}
/**
* 解析实验室结果字符串为结构化对象数组
* @param {string} str - 原始字符串
* @returns {Array} 结构化结果数组
*/
function parseLabResults(str) {
if (!str) return [];
// 替换特殊换行符为标准换行
str = str.replace(/↵/g, '\n');
const lines = str.split(/\n+/).filter(Boolean);
const result = [];
const regex = /^(\d+)([\u4e00-\u9fa5A-Za-z]+)([\d.]+)([a-zA-Zμ\/]+)?([\d.\-]+)?/;
lines.forEach(line => {
// 尝试用正则提取
const match = line.match(/^(\d+)([\u4e00-\u9fa5A-Za-z]+)([\d.]+)([a-zA-Zμ\/]+)?([\d.\-]+)?/);
if (match) {
result.push({
index: Number(match[1]),
name: match[2],
value: Number(match[3]),
unit: match[4] || '',
reference: match[5] || ''
});
}
});
return result;
}