Skip to content

Commit dd271b8

Browse files
authored
chore: optimize dependencies (#11)
1 parent 3c5dfb7 commit dd271b8

File tree

4 files changed

+127
-5602
lines changed

4 files changed

+127
-5602
lines changed

README.md

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,29 +2,24 @@
22

33
### Installations
44

5-
- pip
5+
CPU-only (default):
66

7-
``` bassh
8-
pip install deepdoc-lib --extra-index-url https://huangpustar.github.io/deepdoc-lib/simple
9-
```
10-
11-
- pyproject
7+
```bash
8+
pip install git+https://github.com/xorbitsai/deepdoc-lib
9+
```
1210

13-
Add the following to pyproject.toml
11+
GPU (Linux x86_64 only):
1412

15-
For uv:
13+
```bash
14+
pip install "deepdoc-lib[gpu] @ git+https://github.com/xorbitsai/deepdoc-lib"
15+
```
1616

17-
```toml
18-
[[tool.uv.index]]
19-
name = "deepdoc"
20-
url = "https://huangpustar.github.io/deepdoc-lib/simple"
17+
Note: `onnxruntime` (CPU) and `onnxruntime-gpu` should not be installed together. If you're switching an existing environment to GPU, uninstall CPU ORT first:
2118

22-
[project]
23-
dependencies = [
24-
"deepdoc-lib",
25-
]
26-
```
27-
```
19+
```bash
20+
pip uninstall -y onnxruntime
21+
pip install onnxruntime-gpu==1.19.2
22+
```
2823

2924
### Parser Usage
3025

deepdoc/common/misc_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,16 @@
1818

1919
logger = logging.getLogger(__name__)
2020

21+
2122
def pip_install_torch():
2223
"""
2324
Install torch based on system configuration.
2425
This is a simplified version for the independent library.
2526
"""
2627
try:
2728
import torch
29+
2830
logger.info("PyTorch is already installed")
2931
return True
3032
except ImportError:
31-
logger.warning("PyTorch not found. Please install manually: pip install torch")
3233
return False

pyproject.toml

Lines changed: 11 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -12,148 +12,61 @@ readme = "README.md"
1212
requires-python = ">=3.10,<3.14"
1313
dependencies = [
1414
"datrie==0.8.2",
15-
"akshare>=1.15.78,<2.0.0",
16-
"azure-storage-blob==12.22.0",
17-
"azure-identity==1.17.1",
18-
"azure-storage-file-datalake==12.16.0",
1915
"anthropic==0.69.0",
20-
"arxiv==2.1.3",
21-
"aspose-slides>=24.9.0,<25.0.0; platform_machine == 'x86_64' or (sys_platform == 'darwin' and platform_machine == 'arm64')",
2216
"beartype>=0.18.5,<0.19.0",
23-
"bio==1.7.1",
24-
"blinker==1.7.0",
25-
"boto3==1.34.140",
26-
"botocore==1.34.140",
27-
"cachetools==5.3.3",
17+
"beautifulsoup4>=4.12.0",
2818
"chardet==5.2.0",
29-
"cn2an==0.5.22",
30-
"cohere==5.6.2",
31-
"dashscope==1.20.11",
32-
"deepl==1.18.0",
3319
"demjson3==3.0.6",
34-
"discord-py==2.3.2",
35-
"duckduckgo-search>=7.2.0,<8.0.0",
36-
"editdistance==0.8.1",
37-
"elastic-transport==8.12.0",
38-
"elasticsearch==8.12.1",
39-
"elasticsearch-dsl==8.12.0",
40-
"filelock==3.15.4",
41-
"flask==3.0.3",
42-
"flask-cors==5.0.0",
43-
"flask-login==0.6.3",
44-
"flask-session==0.8.0",
45-
"google-search-results==2.4.2",
46-
"groq==0.9.0",
4720
"hanziconv==0.3.2",
48-
"html-text==0.6.2",
49-
"httpx[socks]==0.27.2",
50-
"huggingface-hub>=0.25.0,<0.26.0",
51-
"infinity-emb>=0.0.66,<0.0.67",
52-
"itsdangerous==2.1.2",
53-
"json-repair==0.35.0",
21+
"html5lib>=1.1",
22+
"jinja2>=3.1.0",
5423
"markdown==3.6",
55-
"markdown-to-json==2.1.1",
56-
"minio==7.2.4",
57-
"mistralai==0.4.2",
5824
"nltk==3.9.1",
5925
"numpy>=1.26.0,<2.0.0",
6026
"ollama==0.2.1",
61-
"onnxruntime==1.19.2; sys_platform == 'darwin' or platform_machine != 'x86_64'",
62-
"onnxruntime-gpu==1.19.2; sys_platform != 'darwin' and platform_machine == 'x86_64'",
27+
"onnxruntime==1.19.2",
6328
"openai>=1.45.0",
6429
"opencv-python==4.10.0.84",
6530
"opencv-python-headless==4.10.0.84",
6631
"openpyxl>=3.1.0,<4.0.0",
67-
"opendal>=0.45.0,<0.46.0",
6832
"pandas>=2.2.0,<3.0.0",
6933
"pdfplumber==0.10.4",
70-
"peewee==3.17.1",
7134
"pillow>=11.0.0",
72-
"protobuf>=5.27.2",
73-
"psycopg2-binary==2.9.9",
7435
"pyclipper==1.3.0.post5",
75-
"pycryptodomex==3.20.0",
76-
"pymysql>=1.1.1,<2.0.0",
7736
"pypdf==6.0.0",
78-
"python-dotenv==1.0.1",
79-
"python-dateutil==2.8.2",
8037
"python-pptx>=1.0.2,<2.0.0",
81-
"pywencai==0.12.2",
82-
"qianfan==0.4.6",
83-
"ranx==0.3.20",
84-
"readability-lxml==0.8.1",
85-
"valkey==6.0.2",
38+
"python-docx>=1.1.2,<2.0.0",
39+
"pyyaml>=6.0.0",
8640
"requests==2.32.2",
87-
"replicate==0.31.0",
88-
"roman-numbers==1.0.2",
89-
"ruamel-base==1.0.0",
90-
"ruamel-yaml>=0.18.6,<0.19.0",
91-
"scholarly==1.7.11",
9241
"scikit-learn==1.5.0",
93-
"selenium==4.22.0",
94-
"selenium-wire==5.1.0",
95-
"setuptools>=75.2.0,<76.0.0",
9642
"shapely==2.0.5",
9743
"six==1.16.0",
9844
"strenum==0.4.15",
99-
"tabulate==0.9.0",
100-
"tavily-python==0.5.1",
10145
"tencentcloud-sdk-python==3.0.1215",
102-
"tika==2.6.0",
10346
"tiktoken>=0.7.0",
104-
"umap_learn==0.5.6",
105-
"vertexai==1.64.0",
106-
"volcengine==1.0.194",
107-
"voyageai==0.2.3",
108-
"webdriver-manager==4.0.1",
109-
"werkzeug==3.0.6",
110-
"wikipedia==1.4.0",
111-
"word2number==1.1",
11247
"xgboost==1.6.0",
11348
"xpinyin==0.7.6",
114-
"yfinance==0.2.65",
11549
"zhipuai==2.0.1",
11650
"google-generativeai>=0.8.1,<0.9.0",
117-
"python-docx>=1.1.2,<2.0.0",
118-
"pypdf2>=3.0.1,<4.0.0",
119-
"graspologic>=3.4.1,<4.0.0",
120-
"mini-racer>=0.12.4,<0.13.0",
121-
"pyodbc>=5.2.0,<6.0.0",
122-
"flasgger>=0.9.7.1,<0.10.0",
123-
"xxhash>=3.5.0,<4.0.0",
12451
"trio>=0.29.0",
125-
"langfuse>=2.60.0",
126-
"debugpy>=1.8.13",
127-
"mcp>=1.9.4",
128-
"opensearch-py==2.7.1",
129-
"pluginlib==0.9.4",
130-
"click>=8.1.8",
131-
"python-calamine>=0.4.0",
132-
"litellm>=1.74.15.post1",
133-
"flask-mail>=0.10.0",
134-
"lark>=1.2.2",
135-
"torch",
52+
"setuptools>=75.2.0,<76.0.0",
53+
"huggingface-hub>=0.25.0,<0.26.0",
13654
]
13755

13856
[project.optional-dependencies]
139-
full = [
140-
"bcembedding==0.1.5",
141-
"flagembedding==1.2.10",
142-
"torch>=2.5.0,<3.0.0",
143-
"transformers>=4.35.0,<5.0.0",
57+
gpu = [
58+
"onnxruntime-gpu==1.19.2; sys_platform != 'darwin' and platform_machine == 'x86_64'",
59+
"torch",
14460
]
14561

14662
[dependency-groups]
14763
test = [
148-
"hypothesis>=6.132.0",
14964
"openpyxl>=3.1.5",
15065
"pillow>=10.4.0",
15166
"pytest>=8.3.5",
15267
"python-docx>=1.1.2",
15368
"python-pptx>=1.0.2",
154-
"reportlab>=4.4.1",
15569
"requests>=2.32.2",
156-
"requests-toolbelt>=1.0.0",
15770
]
15871

15972
[tool.setuptools]

0 commit comments

Comments
 (0)