diff --git a/dsLightRag/.idea/dsLightRag.iml b/dsLightRag/.idea/dsLightRag.iml index 3f1bd9d5..880d61c1 100644 --- a/dsLightRag/.idea/dsLightRag.iml +++ b/dsLightRag/.idea/dsLightRag.iml @@ -2,7 +2,7 @@ - + diff --git a/dsLightRag/.idea/misc.xml b/dsLightRag/.idea/misc.xml index c7c8f257..0f9b3bc1 100644 --- a/dsLightRag/.idea/misc.xml +++ b/dsLightRag/.idea/misc.xml @@ -3,5 +3,5 @@ - + \ No newline at end of file diff --git a/dsLightRag/Test/1.png b/dsLightRag/Test/1.png new file mode 100644 index 00000000..9d4065f8 Binary files /dev/null and b/dsLightRag/Test/1.png differ diff --git a/dsLightRag/Test/TestOcrTable.py b/dsLightRag/Test/TestOcrTable.py new file mode 100644 index 00000000..5c5dac71 --- /dev/null +++ b/dsLightRag/Test/TestOcrTable.py @@ -0,0 +1,57 @@ +import base64 +import json +import urllib.request + +ENCODING = 'utf-8' + + +def get_img_base64(img_file): + with open(img_file, 'rb') as infile: + s = infile.read() + return base64.b64encode(s).decode(ENCODING) + + +def predict(url, appcode, img_base64, kv_configure): + param = {} + param['image'] = img_base64 + if kv_configure is not None: + param['configure'] = json.dumps(kv_configure) + body = json.dumps(param) + data = bytes(body, "utf-8") + + headers = {'Authorization': 'APPCODE %s' % appcode} + request = urllib.request.Request(url=url, headers=headers, data=data) + try: + response = urllib.request.urlopen(request, timeout=10) + return response.code, response.headers, response.read() + except urllib.request.HTTPError as e: + return e.code, e.headers, e.read() + + +def demo(): + AppKey = '204917800' + AppSecret = 'rmIcdp5is0hM2e4KBC3Ned9aFGBYGU2F' + AppCode = '3a9b1eeacf6a4a7c9b6f3157feb61fa7' + + url = 'https://form.market.alicloudapi.com/api/predict/ocr_table_parse' + img_file = '1.png' + # configure = {'side': 'face'} + # 如果没有configure字段,configure设为None + # configure = None + configure = {"format": "html", "dir_assure": False, "line_less": True} + + img_base64data = get_img_base64(img_file) + stat, header, content = predict(url, AppCode, img_base64data, configure) + if stat != 200: + print('Http status code: ', stat) + print('Error msg in header: ', header['x-ca-error-message'] if 'x-ca-error-message' in header else '') + print('Error msg in body: ', content) + exit() + result_str = content + + print(result_str.decode(ENCODING)) + # result = json.loads(result_str) + + +if __name__ == '__main__': + demo() diff --git a/dsOcr/.idea/.gitignore b/dsOcr/.idea/.gitignore new file mode 100644 index 00000000..a7cdac76 --- /dev/null +++ b/dsOcr/.idea/.gitignore @@ -0,0 +1,8 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# 基于编辑器的 HTTP 客户端请求 +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/dsOcr/.idea/dsOcr.iml b/dsOcr/.idea/dsOcr.iml new file mode 100644 index 00000000..876477c0 --- /dev/null +++ b/dsOcr/.idea/dsOcr.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/dsOcr/.idea/inspectionProfiles/Project_Default.xml b/dsOcr/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 00000000..03d9549e --- /dev/null +++ b/dsOcr/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/dsOcr/.idea/inspectionProfiles/profiles_settings.xml b/dsOcr/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 00000000..105ce2da --- /dev/null +++ b/dsOcr/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/dsOcr/.idea/misc.xml b/dsOcr/.idea/misc.xml new file mode 100644 index 00000000..ddf22936 --- /dev/null +++ b/dsOcr/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/dsOcr/.idea/modules.xml b/dsOcr/.idea/modules.xml new file mode 100644 index 00000000..77da7d45 --- /dev/null +++ b/dsOcr/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/dsOcr/.idea/vcs.xml b/dsOcr/.idea/vcs.xml new file mode 100644 index 00000000..2e3f6920 --- /dev/null +++ b/dsOcr/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/dsOcr/Doc/2024长春43所高中录取分数线.md b/dsOcr/Doc/2024长春43所高中录取分数线.md new file mode 100644 index 00000000..35c696c2 --- /dev/null +++ b/dsOcr/Doc/2024长春43所高中录取分数线.md @@ -0,0 +1,35 @@ +| | | | | +| --------- | -------------------- | ---- | ----- | +| 一批次666 | 东北师范大学附属中学 | 统招 | 721.6 | +| | 长春市十一高 | 统招 | 710.1 | +| | 长春市二中 | 统招 | 697 | +| | 吉林省实验中学 | 统招 | 690.1 | +| | 长春外国语 | 统招 | 689.6 | +| | 长春市第二实验 | 统招 | 682.1 | +| | 长春市实验 | 统招 | 676.8 | +| | 长春市六中 | 统招 | 666 | +| | 长春汽车厂六中 | 统招 | 666 | +| 二批次586 | 师大青华 | 私立 | 705.6 | +| | 吉大 | 私立 | 701.1 | +| | 二中精致 | 私立 | 687.3 | +| | 八中 | 统招 | 677 | +| | 五中 | 统招 | 664 | +| | 省实验繁荣 | 私立 | 662.3 | +| | 希望 | 统招 | 651.4 | +| | 师大净月慧泽 | 私立 | 651 | +| | 北师大博硕 | 私立 | 646.9 | +| | 十七中 | 统招 | 643.3 | +| | 汽三 | 统招 | 637.6 | +| | 一中 | 统招 | 633.5 | +| | 北湖 | 私立 | 630.5 | +| | 二十九中 | 统招 | 624.8 | +| | 养正 | 统招 | 623.3 | +| | 十中 | 统招 | 620.5 | +| | 文理 | 私立 | 616.4 | +| | 七中 | 统招 | 614.4 | +| | 朝中 | 统招 | 613.8 | +| | 一三七中学 | 统招 | 611.8 | +| | 解放 | 私立 | 608.2 | +| | 力旺 | 私立 | 586 | +| | 日章学园 | 私立 | 586 | +| | 清浦 | 私立 | 586 | \ No newline at end of file diff --git a/dsOcr/Doc/2024长春43所高中录取分数线.png b/dsOcr/Doc/2024长春43所高中录取分数线.png new file mode 100644 index 00000000..9d4065f8 Binary files /dev/null and b/dsOcr/Doc/2024长春43所高中录取分数线.png differ diff --git a/dsOcr/Doc/Mineru私有化部署.txt b/dsOcr/Doc/Mineru私有化部署.txt new file mode 100644 index 00000000..b8e31460 --- /dev/null +++ b/dsOcr/Doc/Mineru私有化部署.txt @@ -0,0 +1,5 @@ +私有化部署MinerU 与应用实践 +https://article.juejin.cn/post/7490746158648475657 + +在线解析图片 +https://mineru.net/OpenSourceTools/Extractor \ No newline at end of file diff --git a/dsOcr/Doc/长春市2025年中考各批次录取最低控制线.jpg b/dsOcr/Doc/长春市2025年中考各批次录取最低控制线.jpg new file mode 100644 index 00000000..2b46dc4c Binary files /dev/null and b/dsOcr/Doc/长春市2025年中考各批次录取最低控制线.jpg differ diff --git a/dsOcr/Doc/长春市2025年中考各批次录取最低控制线.txt b/dsOcr/Doc/长春市2025年中考各批次录取最低控制线.txt new file mode 100644 index 00000000..c933e71b --- /dev/null +++ b/dsOcr/Doc/长春市2025年中考各批次录取最低控制线.txt @@ -0,0 +1,7 @@ +长春市2025年中考各批次录取最低控制线确定如下: +第一批次城区普通高中 689分 +第二批次城区普通高中 590分 +第三批次城区普通高中 573分 +中等职业学校的综合高中班录取不低于第三批次城区普通高中最低控制线下60分。 +来源:长春市教育考试院 + diff --git a/dsOcr/Doc/长春市一批次高中学校介绍.docx b/dsOcr/Doc/长春市一批次高中学校介绍.docx new file mode 100644 index 00000000..b7f7ed94 Binary files /dev/null and b/dsOcr/Doc/长春市一批次高中学校介绍.docx differ