patient-mini/api/ocrReact.js
2025-08-06 23:28:08 +08:00

193 lines
6.8 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

export const getOcrReact = (url) => {
return new Promise((resolve, reject) => {
wx.request({
url: 'https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions',
method: 'POST',
dataType: 'json', // 微信官方文档中介绍会对数据进行一次JSON.parse
header: {
'Authorization': 'Bearer sk-52414b887aee47e4883caf16cbf801bd',
'Content-Type': 'application/json'
},
data: {
"model": "qwen-vl-ocr-latest",
"messages": [{
"role": "user",
"content": [{
"type": "image_url",
"image_url": {
"url": url
},
"min_pixels": 3136,
"max_pixels": 6422528
},
{
"type": "text",
"text": "要求准确无误的提取上述关键信息、不要遗漏和捏造虚假信息,模糊或者强光遮挡的单个文字可以用英文问号?代替。返回数据格式以MD方式输出"
}
]
}]
},
success(res) {
let data = parseOcrResult(res.data.choices[0].message.content)
console.log(data)
// 新增:统一字段名
if (Array.isArray(data)) {
// 找到包含"项目"和"结果"的字段名
let projectField = null;
let resultField = null;
// 检查第一条数据的所有字段
if (data.length > 0) {
Object.keys(data[0]).forEach(key => {
if (typeof data[0][key] === 'string' && data[0][key].includes('项目')) {
projectField = key;
}
if (typeof data[0][key] === 'string' && data[0][key].includes('结果')) {
resultField = key;
}
});
}
// 重新映射所有数据
data = data.map(item => {
const newItem = {};
if (projectField && item[projectField]) {
newItem.name = item[projectField];
}
if (resultField && item[resultField]) {
newItem.value = item[resultField];
}
return newItem;
});
}
resolve(data);
},
fail(err) {
console.log(err)
// 断网、服务器挂了都会fail回调直接reject即可
reject(err);
},
});
})
}
function parseMarkdownTable(md) {
// 拆分行,去掉空行
const lines = md.split('\n').filter(line => line.trim().length > 0);
// 检查第二行是否为分隔符(全是 ---
const isSeparator = line => line.split('|').every(cell => cell.trim() === '' || /^-+$/.test(cell.trim()));
let header = [];
let dataLines = [];
if (lines.length > 1 && isSeparator(lines[1])) {
// 没有表头,第一行是数据
const colCount = lines[0].split('|').length;
header = Array.from({ length: colCount }, (_, i) => `col${i + 1}`);
dataLines = [lines[0], ...lines.slice(2)];
} else {
// 有表头
header = lines[0].split('|').map(h => h.trim());
dataLines = lines.slice(2);
}
// 解析每一行
const result = dataLines.map(line => {
const cells = line.split('|').map(cell => cell.trim());
const obj = {};
header.forEach((key, idx) => {
if (key) obj[key] = cells[idx] || '';
});
return obj;
});
return result;
}
/**
* 解析类似 ```json ... ``` 格式的字符串,提取检测项目数组
* @param {string} str
* @returns {Array<Object>}
*/
function parseJsonBlock(str) {
// 去除包裹的代码块标记
const jsonStr = str.replace(/^[\s`]*```json[\s`]*|```$/g, '').replace(/↵/g, '\n').trim();
// 用正则提取所有 "key": "value"
const regex = /"([^"]+)":\s*"([^"]*)"/g;
const pairs = [];
let match;
while ((match = regex.exec(jsonStr)) !== null) {
pairs.push([match[1], match[2]]);
}
// 按“序号”分组
const items = [];
let current = {};
const itemFields = ['序号', '项目名称', '缩写', '结果', '单位', '参考区间', '测定方法'];
pairs.forEach(([key, value]) => {
if (key === '序号' && Object.keys(current).length > 0) {
items.push({ ...current });
current = {};
}
if (itemFields.includes(key)) {
current[key] = value;
}
});
if (Object.keys(current).length > 0) {
items.push({ ...current });
}
return items;
}
/**
* 自动判断OCR返回内容格式并调用对应解析方法
* @param {string} content
* @returns {Array<Object>}
*/
function parseOcrResult(content) {
// 判断是否为JSON代码块
if (/^```json/.test(content.trim())) {
return parseJsonBlock(content);
}
// 判断是否为Markdown表格以|开头,且有---分隔行)
if (/\|.*\|/.test(content) && /\|[\s\-:|]+\|/.test(content)) {
return parseMarkdownTable(content);
}
// 判断是否为实验室结果格式(数字+中文+数字+单位+参考区间)
if (/^\d+[\u4e00-\u9fa5A-Za-z]+[\d.]+[a-zA-Zμ\/]+[\d.\-]+/m.test(content.replace(/↵/g, '\n'))) {
return parseLabResults(content);
}
// 其它情况返回空数组或原始内容
return [];
}
/**
* 解析实验室结果字符串为结构化对象数组
* @param {string} str - 原始字符串
* @returns {Array} 结构化结果数组
*/
function parseLabResults(str) {
if (!str) return [];
// 替换特殊换行符为标准换行
str = str.replace(/↵/g, '\n');
const lines = str.split(/\n+/).filter(Boolean);
const result = [];
const regex = /^(\d+)([\u4e00-\u9fa5A-Za-z]+)([\d.]+)([a-zA-Zμ\/]+)?([\d.\-]+)?/;
lines.forEach(line => {
// 尝试用正则提取
const match = line.match(/^(\d+)([\u4e00-\u9fa5A-Za-z]+)([\d.]+)([a-zA-Zμ\/]+)?([\d.\-]+)?/);
if (match) {
result.push({
index: Number(match[1]),
name: match[2],
value: Number(match[3]),
unit: match[4] || '',
reference: match[5] || ''
});
}
});
return result;
}