patient-mini/api/ocr.js
2025-07-21 22:24:05 +08:00

119 lines
3.9 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

export const getOcr = (url) => {
return new Promise((resolve, reject) => {
wx.request({
url: 'https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions',
method: 'POST',
dataType: 'json', // 微信官方文档中介绍会对数据进行一次JSON.parse
header: {
'Authorization': 'Bearer sk-52414b887aee47e4883caf16cbf801bd',
'Content-Type': 'application/json'
},
data: {
"model": "qwen-vl-ocr-latest",
"messages": [{
"role": "user",
"content": [{
"type": "image_url",
"image_url": {
"url": url
},
"min_pixels": 3136,
"max_pixels": 6422528
},
{
"type": "text",
"text": "要求准确无误的提取上述关键信息、不要遗漏和捏造虚假信息,模糊或者强光遮挡的单个文字可以用英文问号?代替。返回数据格式以MD方式输出"
}
]
}]
},
success(res) {
const data = parseOcrResult(res.data.choices[0].message.content)
resolve(data);
},
fail(err) {
console.log(err)
// 断网、服务器挂了都会fail回调直接reject即可
reject(err);
},
});
})
}
function parseMarkdownTable(md) {
// 拆分行,去掉空行
const lines = md.split('\n').filter(line => line.trim().length > 0);
// 找到表头和数据行
const headerLine = lines[0];
const header = headerLine.split('|').map(h => h.trim()).filter(Boolean);
// 数据行从第三行开始(第二行为分隔符)
const dataLines = lines.slice(2);
// 解析每一行
const result = dataLines.map(line => {
const cells = line.split('|').map(cell => cell.trim()).filter(Boolean);
const obj = {};
header.forEach((key, idx) => {
obj[key] = cells[idx];
});
return obj;
});
return result;
}
/**
* 解析类似 ```json ... ``` 格式的字符串,提取检测项目数组
* @param {string} str
* @returns {Array<Object>}
*/
function parseJsonBlock(str) {
// 去除包裹的代码块标记
const jsonStr = str.replace(/^[\s`]*```json[\s`]*|```$/g, '').replace(/↵/g, '\n').trim();
// 用正则提取所有 "key": "value"
const regex = /"([^"]+)":\s*"([^"]*)"/g;
const pairs = [];
let match;
while ((match = regex.exec(jsonStr)) !== null) {
pairs.push([match[1], match[2]]);
}
// 按“序号”分组
const items = [];
let current = {};
const itemFields = ['序号', '项目名称', '缩写', '结果', '单位', '参考区间', '测定方法'];
pairs.forEach(([key, value]) => {
if (key === '序号' && Object.keys(current).length > 0) {
items.push({ ...current });
current = {};
}
if (itemFields.includes(key)) {
current[key] = value;
}
});
if (Object.keys(current).length > 0) {
items.push({ ...current });
}
return items;
}
/**
* 自动判断OCR返回内容格式并调用对应解析方法
* @param {string} content
* @returns {Array<Object>}
*/
function parseOcrResult(content) {
// 判断是否为JSON代码块
if (/^```json/.test(content.trim())) {
return parseJsonBlock(content);
}
// 判断是否为Markdown表格以|开头,且有---分隔行)
if (/\|.*\|/.test(content) && /\|[\s\-:|]+\|/.test(content)) {
return parseMarkdownTable(content);
}
// 其它情况返回空数组或原始内容
return [];
}