119 lines
3.9 KiB
JavaScript
119 lines
3.9 KiB
JavaScript
export const getOcr = (url) => {
|
||
return new Promise((resolve, reject) => {
|
||
wx.request({
|
||
url: 'https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions',
|
||
method: 'POST',
|
||
dataType: 'json', // 微信官方文档中介绍会对数据进行一次JSON.parse
|
||
header: {
|
||
'Authorization': 'Bearer sk-52414b887aee47e4883caf16cbf801bd',
|
||
'Content-Type': 'application/json'
|
||
},
|
||
data: {
|
||
"model": "qwen-vl-ocr-latest",
|
||
"messages": [{
|
||
"role": "user",
|
||
"content": [{
|
||
"type": "image_url",
|
||
"image_url": {
|
||
"url": url
|
||
},
|
||
"min_pixels": 3136,
|
||
"max_pixels": 6422528
|
||
},
|
||
{
|
||
"type": "text",
|
||
"text": "要求准确无误的提取上述关键信息、不要遗漏和捏造虚假信息,模糊或者强光遮挡的单个文字可以用英文问号?代替。返回数据格式以MD方式输出"
|
||
}
|
||
]
|
||
}]
|
||
},
|
||
success(res) {
|
||
const data = parseOcrResult(res.data.choices[0].message.content)
|
||
resolve(data);
|
||
},
|
||
fail(err) {
|
||
console.log(err)
|
||
// 断网、服务器挂了都会fail回调,直接reject即可
|
||
reject(err);
|
||
},
|
||
});
|
||
})
|
||
}
|
||
|
||
function parseMarkdownTable(md) {
|
||
// 拆分行,去掉空行
|
||
const lines = md.split('\n').filter(line => line.trim().length > 0);
|
||
|
||
// 找到表头和数据行
|
||
const headerLine = lines[0];
|
||
const header = headerLine.split('|').map(h => h.trim()).filter(Boolean);
|
||
|
||
// 数据行从第三行开始(第二行为分隔符)
|
||
const dataLines = lines.slice(2);
|
||
|
||
// 解析每一行
|
||
const result = dataLines.map(line => {
|
||
const cells = line.split('|').map(cell => cell.trim()).filter(Boolean);
|
||
const obj = {};
|
||
header.forEach((key, idx) => {
|
||
obj[key] = cells[idx];
|
||
});
|
||
return obj;
|
||
});
|
||
|
||
return result;
|
||
}
|
||
|
||
/**
|
||
* 解析类似 ```json ... ``` 格式的字符串,提取检测项目数组
|
||
* @param {string} str
|
||
* @returns {Array<Object>}
|
||
*/
|
||
function parseJsonBlock(str) {
|
||
// 去除包裹的代码块标记
|
||
const jsonStr = str.replace(/^[\s`]*```json[\s`]*|```$/g, '').replace(/↵/g, '\n').trim();
|
||
|
||
// 用正则提取所有 "key": "value"
|
||
const regex = /"([^"]+)":\s*"([^"]*)"/g;
|
||
const pairs = [];
|
||
let match;
|
||
while ((match = regex.exec(jsonStr)) !== null) {
|
||
pairs.push([match[1], match[2]]);
|
||
}
|
||
|
||
// 按“序号”分组
|
||
const items = [];
|
||
let current = {};
|
||
const itemFields = ['序号', '项目名称', '缩写', '结果', '单位', '参考区间', '测定方法'];
|
||
pairs.forEach(([key, value]) => {
|
||
if (key === '序号' && Object.keys(current).length > 0) {
|
||
items.push({ ...current });
|
||
current = {};
|
||
}
|
||
if (itemFields.includes(key)) {
|
||
current[key] = value;
|
||
}
|
||
});
|
||
if (Object.keys(current).length > 0) {
|
||
items.push({ ...current });
|
||
}
|
||
return items;
|
||
}
|
||
|
||
/**
|
||
* 自动判断OCR返回内容格式并调用对应解析方法
|
||
* @param {string} content
|
||
* @returns {Array<Object>}
|
||
*/
|
||
function parseOcrResult(content) {
|
||
// 判断是否为JSON代码块
|
||
if (/^```json/.test(content.trim())) {
|
||
return parseJsonBlock(content);
|
||
}
|
||
// 判断是否为Markdown表格(以|开头,且有---分隔行)
|
||
if (/\|.*\|/.test(content) && /\|[\s\-:|]+\|/.test(content)) {
|
||
return parseMarkdownTable(content);
|
||
}
|
||
// 其它情况返回空数组或原始内容
|
||
return [];
|
||
} |