193 lines
6.8 KiB
JavaScript
193 lines
6.8 KiB
JavaScript
export const getOcrReact = (url) => {
|
||
return new Promise((resolve, reject) => {
|
||
wx.request({
|
||
url: 'https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions',
|
||
method: 'POST',
|
||
dataType: 'json', // 微信官方文档中介绍会对数据进行一次JSON.parse
|
||
header: {
|
||
'Authorization': 'Bearer sk-52414b887aee47e4883caf16cbf801bd',
|
||
'Content-Type': 'application/json'
|
||
},
|
||
data: {
|
||
"model": "qwen-vl-ocr-latest",
|
||
"messages": [{
|
||
"role": "user",
|
||
"content": [{
|
||
"type": "image_url",
|
||
"image_url": {
|
||
"url": url
|
||
},
|
||
"min_pixels": 3136,
|
||
"max_pixels": 6422528
|
||
},
|
||
{
|
||
"type": "text",
|
||
"text": "要求准确无误的提取上述关键信息、不要遗漏和捏造虚假信息,模糊或者强光遮挡的单个文字可以用英文问号?代替。返回数据格式以MD方式输出"
|
||
}
|
||
]
|
||
}]
|
||
},
|
||
success(res) {
|
||
let data = parseOcrResult(res.data.choices[0].message.content)
|
||
console.log(data)
|
||
// 新增:统一字段名
|
||
if (Array.isArray(data)) {
|
||
// 找到包含"项目"和"结果"的字段名
|
||
let projectField = null;
|
||
let resultField = null;
|
||
|
||
// 检查第一条数据的所有字段
|
||
if (data.length > 0) {
|
||
Object.keys(data[0]).forEach(key => {
|
||
if (typeof data[0][key] === 'string' && data[0][key].includes('项目')) {
|
||
projectField = key;
|
||
}
|
||
if (typeof data[0][key] === 'string' && data[0][key].includes('结果')) {
|
||
resultField = key;
|
||
}
|
||
});
|
||
}
|
||
|
||
// 重新映射所有数据
|
||
data = data.map(item => {
|
||
const newItem = {};
|
||
|
||
if (projectField && item[projectField]) {
|
||
newItem.name = item[projectField];
|
||
}
|
||
if (resultField && item[resultField]) {
|
||
newItem.value = item[resultField];
|
||
}
|
||
|
||
return newItem;
|
||
});
|
||
}
|
||
resolve(data);
|
||
},
|
||
fail(err) {
|
||
console.log(err)
|
||
// 断网、服务器挂了都会fail回调,直接reject即可
|
||
reject(err);
|
||
},
|
||
});
|
||
})
|
||
}
|
||
|
||
function parseMarkdownTable(md) {
|
||
// 拆分行,去掉空行
|
||
const lines = md.split('\n').filter(line => line.trim().length > 0);
|
||
|
||
// 检查第二行是否为分隔符(全是 ---)
|
||
const isSeparator = line => line.split('|').every(cell => cell.trim() === '' || /^-+$/.test(cell.trim()));
|
||
|
||
let header = [];
|
||
let dataLines = [];
|
||
if (lines.length > 1 && isSeparator(lines[1])) {
|
||
// 没有表头,第一行是数据
|
||
const colCount = lines[0].split('|').length;
|
||
header = Array.from({ length: colCount }, (_, i) => `col${i + 1}`);
|
||
dataLines = [lines[0], ...lines.slice(2)];
|
||
} else {
|
||
// 有表头
|
||
header = lines[0].split('|').map(h => h.trim());
|
||
dataLines = lines.slice(2);
|
||
}
|
||
|
||
// 解析每一行
|
||
const result = dataLines.map(line => {
|
||
const cells = line.split('|').map(cell => cell.trim());
|
||
const obj = {};
|
||
header.forEach((key, idx) => {
|
||
if (key) obj[key] = cells[idx] || '';
|
||
});
|
||
return obj;
|
||
});
|
||
|
||
return result;
|
||
}
|
||
|
||
/**
|
||
* 解析类似 ```json ... ``` 格式的字符串,提取检测项目数组
|
||
* @param {string} str
|
||
* @returns {Array<Object>}
|
||
*/
|
||
function parseJsonBlock(str) {
|
||
// 去除包裹的代码块标记
|
||
const jsonStr = str.replace(/^[\s`]*```json[\s`]*|```$/g, '').replace(/↵/g, '\n').trim();
|
||
|
||
// 用正则提取所有 "key": "value"
|
||
const regex = /"([^"]+)":\s*"([^"]*)"/g;
|
||
const pairs = [];
|
||
let match;
|
||
while ((match = regex.exec(jsonStr)) !== null) {
|
||
pairs.push([match[1], match[2]]);
|
||
}
|
||
|
||
// 按“序号”分组
|
||
const items = [];
|
||
let current = {};
|
||
const itemFields = ['序号', '项目名称', '缩写', '结果', '单位', '参考区间', '测定方法'];
|
||
pairs.forEach(([key, value]) => {
|
||
if (key === '序号' && Object.keys(current).length > 0) {
|
||
items.push({ ...current });
|
||
current = {};
|
||
}
|
||
if (itemFields.includes(key)) {
|
||
current[key] = value;
|
||
}
|
||
});
|
||
if (Object.keys(current).length > 0) {
|
||
items.push({ ...current });
|
||
}
|
||
return items;
|
||
}
|
||
|
||
/**
|
||
* 自动判断OCR返回内容格式并调用对应解析方法
|
||
* @param {string} content
|
||
* @returns {Array<Object>}
|
||
*/
|
||
function parseOcrResult(content) {
|
||
// 判断是否为JSON代码块
|
||
if (/^```json/.test(content.trim())) {
|
||
return parseJsonBlock(content);
|
||
}
|
||
// 判断是否为Markdown表格(以|开头,且有---分隔行)
|
||
if (/\|.*\|/.test(content) && /\|[\s\-:|]+\|/.test(content)) {
|
||
return parseMarkdownTable(content);
|
||
}
|
||
// 判断是否为实验室结果格式(数字+中文+数字+单位+参考区间)
|
||
if (/^\d+[\u4e00-\u9fa5A-Za-z]+[\d.]+[a-zA-Zμ\/]+[\d.\-]+/m.test(content.replace(/↵/g, '\n'))) {
|
||
return parseLabResults(content);
|
||
}
|
||
// 其它情况返回空数组或原始内容
|
||
return [];
|
||
}
|
||
|
||
/**
|
||
* 解析实验室结果字符串为结构化对象数组
|
||
* @param {string} str - 原始字符串
|
||
* @returns {Array} 结构化结果数组
|
||
*/
|
||
function parseLabResults(str) {
|
||
if (!str) return [];
|
||
// 替换特殊换行符为标准换行
|
||
str = str.replace(/↵/g, '\n');
|
||
const lines = str.split(/\n+/).filter(Boolean);
|
||
const result = [];
|
||
const regex = /^(\d+)([\u4e00-\u9fa5A-Za-z]+)([\d.]+)([a-zA-Zμ\/]+)?([\d.\-]+)?/;
|
||
lines.forEach(line => {
|
||
// 尝试用正则提取
|
||
const match = line.match(/^(\d+)([\u4e00-\u9fa5A-Za-z]+)([\d.]+)([a-zA-Zμ\/]+)?([\d.\-]+)?/);
|
||
if (match) {
|
||
result.push({
|
||
index: Number(match[1]),
|
||
name: match[2],
|
||
value: Number(match[3]),
|
||
unit: match[4] || '',
|
||
reference: match[5] || ''
|
||
});
|
||
}
|
||
});
|
||
return result;
|
||
} |