Skip to content

Commit 6b257ec

Browse files
authored
fix(ocr): support rapidocr 3.8 mobile model naming (#3277)
Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>
1 parent 60fc517 commit 6b257ec

File tree

4 files changed

+159
-53
lines changed

4 files changed

+159
-53
lines changed

docling/models/stages/ocr/rapid_ocr_model.py

Lines changed: 40 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import logging
22
from collections.abc import Iterable
33
from pathlib import Path
4-
from typing import Literal, Optional, Type, TypedDict
4+
from typing import Literal, Type, TypedDict
55

66
import numpy
77
from docling_core.types.doc import BoundingBox, CoordOrigin
@@ -33,64 +33,57 @@ class _ModelPathDetail(TypedDict):
3333
path: str
3434

3535

36+
_RAPIDOCR_MODELSCOPE_RELEASE = "v3.8.0"
37+
_RAPIDOCR_MODELSCOPE_BASE_URL = (
38+
"https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve"
39+
)
40+
_RAPIDOCR_DEFAULT_MODEL_PATHS: dict[_ModelPathEngines, dict[_ModelPathTypes, str]] = {
41+
"onnxruntime": {
42+
"det_model_path": "onnx/PP-OCRv4/det/ch_PP-OCRv4_det_mobile.onnx",
43+
"cls_model_path": "onnx/PP-OCRv4/cls/ch_ppocr_mobile_v2.0_cls_mobile.onnx",
44+
"rec_model_path": "onnx/PP-OCRv4/rec/ch_PP-OCRv4_rec_mobile.onnx",
45+
"rec_keys_path": "paddle/PP-OCRv4/rec/ch_PP-OCRv4_rec_mobile/ppocr_keys_v1.txt",
46+
"font_path": "resources/fonts/FZYTK.TTF",
47+
},
48+
"torch": {
49+
"det_model_path": "torch/PP-OCRv4/det/ch_PP-OCRv4_det_mobile.pth",
50+
"cls_model_path": "torch/PP-OCRv4/cls/ch_ptocr_mobile_v2.0_cls_mobile.pth",
51+
"rec_model_path": "torch/PP-OCRv4/rec/ch_PP-OCRv4_rec_mobile.pth",
52+
"rec_keys_path": "paddle/PP-OCRv4/rec/ch_PP-OCRv4_rec_mobile/ppocr_keys_v1.txt",
53+
"font_path": "resources/fonts/FZYTK.TTF",
54+
},
55+
}
56+
57+
58+
def _build_model_detail(path: str) -> _ModelPathDetail:
59+
return {
60+
"url": f"{_RAPIDOCR_MODELSCOPE_BASE_URL}/{_RAPIDOCR_MODELSCOPE_RELEASE}/{path}",
61+
"path": path,
62+
}
63+
64+
3665
class RapidOcrModel(BaseOcrModel):
3766
_model_repo_folder = "RapidOcr"
38-
# from https://github.com/RapidAI/RapidOCR/blob/main/python/rapidocr/default_models.yaml
39-
# matching the default config in https://github.com/RapidAI/RapidOCR/blob/main/python/rapidocr/config.yaml
40-
# and naming f"{file_info.engine_type.value}.{file_info.ocr_version.value}.{file_info.task_type.value}"
67+
# Match the PP-OCRv4 mobile defaults used by RapidOCR 3.8+:
68+
# - default_models.yaml in RapidOCR 3.8.1 points at the v3.8.0 modelscope assets
69+
# - config.yaml defaults Det/Cls/Rec model_type to "mobile"
4170
_default_models: dict[
4271
_ModelPathEngines, dict[_ModelPathTypes, _ModelPathDetail]
4372
] = {
4473
"onnxruntime": {
45-
"det_model_path": {
46-
"url": "https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.5.0/onnx/PP-OCRv4/det/ch_PP-OCRv4_det_infer.onnx",
47-
"path": "onnx/PP-OCRv4/det/ch_PP-OCRv4_det_infer.onnx",
48-
},
49-
"cls_model_path": {
50-
"url": "https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.5.0/onnx/PP-OCRv4/cls/ch_ppocr_mobile_v2.0_cls_infer.onnx",
51-
"path": "onnx/PP-OCRv4/cls/ch_ppocr_mobile_v2.0_cls_infer.onnx",
52-
},
53-
"rec_model_path": {
54-
"url": "https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.5.0/onnx/PP-OCRv4/rec/ch_PP-OCRv4_rec_infer.onnx",
55-
"path": "onnx/PP-OCRv4/rec/ch_PP-OCRv4_rec_infer.onnx",
56-
},
57-
"rec_keys_path": {
58-
"url": "https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.5.0/paddle/PP-OCRv4/rec/ch_PP-OCRv4_rec_infer/ppocr_keys_v1.txt",
59-
"path": "paddle/PP-OCRv4/rec/ch_PP-OCRv4_rec_infer/ppocr_keys_v1.txt",
60-
},
61-
"font_path": {
62-
"url": "https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.5.0/resources/fonts/FZYTK.TTF",
63-
"path": "fonts/FZYTK.TTF",
64-
},
74+
key: _build_model_detail(path)
75+
for key, path in _RAPIDOCR_DEFAULT_MODEL_PATHS["onnxruntime"].items()
6576
},
6677
"torch": {
67-
"det_model_path": {
68-
"url": "https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.5.0/torch/PP-OCRv4/det/ch_PP-OCRv4_det_infer.pth",
69-
"path": "torch/PP-OCRv4/det/ch_PP-OCRv4_det_infer.pth",
70-
},
71-
"cls_model_path": {
72-
"url": "https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.5.0/torch/PP-OCRv4/cls/ch_ptocr_mobile_v2.0_cls_infer.pth",
73-
"path": "torch/PP-OCRv4/cls/ch_ptocr_mobile_v2.0_cls_infer.pth",
74-
},
75-
"rec_model_path": {
76-
"url": "https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.5.0/torch/PP-OCRv4/rec/ch_PP-OCRv4_rec_infer.pth",
77-
"path": "torch/PP-OCRv4/rec/ch_PP-OCRv4_rec_infer.pth",
78-
},
79-
"rec_keys_path": {
80-
"url": "https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.5.0/paddle/PP-OCRv4/rec/ch_PP-OCRv4_rec_infer/ppocr_keys_v1.txt",
81-
"path": "paddle/PP-OCRv4/rec/ch_PP-OCRv4_rec_infer/ppocr_keys_v1.txt",
82-
},
83-
"font_path": {
84-
"url": "https://www.modelscope.cn/models/RapidAI/RapidOCR/resolve/v3.5.0/resources/fonts/FZYTK.TTF",
85-
"path": "fonts/FZYTK.TTF",
86-
},
78+
key: _build_model_detail(path)
79+
for key, path in _RAPIDOCR_DEFAULT_MODEL_PATHS["torch"].items()
8780
},
8881
}
8982

9083
def __init__(
9184
self,
9285
enabled: bool,
93-
artifacts_path: Optional[Path],
86+
artifacts_path: Path | None,
9487
options: RapidOcrOptions,
9588
accelerator_options: AcceleratorOptions,
9689
):
@@ -167,10 +160,10 @@ def __init__(
167160
)
168161

169162
for model_path in (
163+
det_model_path,
170164
rec_keys_path,
171165
cls_model_path,
172166
rec_model_path,
173-
rec_keys_path,
174167
font_path,
175168
):
176169
if model_path is None:
@@ -224,7 +217,7 @@ def __init__(
224217
@staticmethod
225218
def download_models(
226219
backend: _ModelPathEngines,
227-
local_dir: Optional[Path] = None,
220+
local_dir: Path | None = None,
228221
force: bool = False,
229222
progress: bool = False,
230223
) -> Path:

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ dependencies = [
5555
'huggingface_hub (>=0.23,<2)',
5656
'requests (>=2.32.2,<3.0.0)',
5757
'ocrmac (>=1.0.0,<2.0.0) ; sys_platform == "darwin"',
58-
'rapidocr (>=3.3,<4.0.0)',
58+
'rapidocr (>=3.8,<4.0.0)',
5959
'certifi (>=2024.7.4)',
6060
'rtree (>=1.3.0,<2.0.0)',
6161
'typer (>=0.12.5,<0.22.0)',
@@ -103,7 +103,7 @@ vlm = [
103103
"peft>=0.18.1",
104104
]
105105
rapidocr = [
106-
'rapidocr (>=3.3,<4.0.0)',
106+
'rapidocr (>=3.8,<4.0.0)',
107107
'onnxruntime (>=1.7.0,<2.0.0) ; python_version < "3.14"',
108108
]
109109
onnxruntime = [

tests/test_rapid_ocr_model.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import sys
2+
from enum import Enum
3+
from pathlib import Path
4+
from types import SimpleNamespace
5+
6+
import pytest
7+
8+
from docling.datamodel.accelerator_options import AcceleratorOptions
9+
from docling.datamodel.pipeline_options import RapidOcrOptions
10+
from docling.models.stages.ocr.rapid_ocr_model import RapidOcrModel
11+
12+
13+
@pytest.mark.parametrize(
14+
("backend", "det_name", "cls_name", "rec_name"),
15+
[
16+
(
17+
"onnxruntime",
18+
"ch_PP-OCRv4_det_mobile.onnx",
19+
"ch_ppocr_mobile_v2.0_cls_mobile.onnx",
20+
"ch_PP-OCRv4_rec_mobile.onnx",
21+
),
22+
(
23+
"torch",
24+
"ch_PP-OCRv4_det_mobile.pth",
25+
"ch_ptocr_mobile_v2.0_cls_mobile.pth",
26+
"ch_PP-OCRv4_rec_mobile.pth",
27+
),
28+
],
29+
)
30+
def test_rapidocr_default_models_use_3_8_mobile_assets(
31+
backend: str,
32+
det_name: str,
33+
cls_name: str,
34+
rec_name: str,
35+
):
36+
model_paths = RapidOcrModel._default_models[backend]
37+
38+
assert "/v3.8.0/" in model_paths["det_model_path"]["url"]
39+
assert model_paths["det_model_path"]["path"].endswith(det_name)
40+
assert model_paths["cls_model_path"]["path"].endswith(cls_name)
41+
assert model_paths["rec_model_path"]["path"].endswith(rec_name)
42+
assert model_paths["rec_keys_path"]["path"].endswith(
43+
"paddle/PP-OCRv4/rec/ch_PP-OCRv4_rec_mobile/ppocr_keys_v1.txt"
44+
)
45+
assert model_paths["font_path"]["path"] == "resources/fonts/FZYTK.TTF"
46+
47+
for detail in model_paths.values():
48+
assert "_infer" not in detail["path"]
49+
assert "_infer" not in detail["url"]
50+
51+
52+
@pytest.mark.parametrize(
53+
("backend", "det_name", "cls_name", "rec_name"),
54+
[
55+
(
56+
"onnxruntime",
57+
"ch_PP-OCRv4_det_mobile.onnx",
58+
"ch_ppocr_mobile_v2.0_cls_mobile.onnx",
59+
"ch_PP-OCRv4_rec_mobile.onnx",
60+
),
61+
(
62+
"torch",
63+
"ch_PP-OCRv4_det_mobile.pth",
64+
"ch_ptocr_mobile_v2.0_cls_mobile.pth",
65+
"ch_PP-OCRv4_rec_mobile.pth",
66+
),
67+
],
68+
)
69+
def test_rapidocr_model_initialization_uses_mobile_default_paths(
70+
monkeypatch: pytest.MonkeyPatch,
71+
tmp_path: Path,
72+
backend: str,
73+
det_name: str,
74+
cls_name: str,
75+
rec_name: str,
76+
):
77+
captured: dict[str, object] = {}
78+
79+
class FakeEngineType(str, Enum):
80+
ONNXRUNTIME = "onnxruntime"
81+
OPENVINO = "openvino"
82+
PADDLE = "paddle"
83+
TORCH = "torch"
84+
85+
class FakeRapidOCR:
86+
def __init__(self, params):
87+
captured["params"] = params
88+
89+
monkeypatch.setitem(
90+
sys.modules,
91+
"rapidocr",
92+
SimpleNamespace(EngineType=FakeEngineType, RapidOCR=FakeRapidOCR),
93+
)
94+
95+
model_root = tmp_path / RapidOcrModel._model_repo_folder
96+
for detail in RapidOcrModel._default_models[backend].values():
97+
file_path = model_root / detail["path"]
98+
file_path.parent.mkdir(parents=True, exist_ok=True)
99+
file_path.write_bytes(b"")
100+
101+
RapidOcrModel(
102+
enabled=True,
103+
artifacts_path=tmp_path,
104+
options=RapidOcrOptions(backend=backend),
105+
accelerator_options=AcceleratorOptions(device="cpu", num_threads=1),
106+
)
107+
108+
params = captured["params"]
109+
assert Path(params["Det.model_path"]).name == det_name
110+
assert Path(params["Cls.model_path"]).name == cls_name
111+
assert Path(params["Rec.model_path"]).name == rec_name
112+
assert Path(params["Rec.rec_keys_path"]).name == "ppocr_keys_v1.txt"
113+
assert Path(params["Global.font_path"]).name == "FZYTK.TTF"

uv.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)