Create DPBench evaluation datasets:
# Make the ground-truth
docling-eval create-gt --benchmark DPBench --output-dir ./benchmarks/DPBench-gt/
# Make predictions for different modalities.
docling-eval create-eval \
--benchmark DPBench \
--gt-dir ./benchmarks/DPBench-gt/gt_dataset/ \
--output-dir ./benchmarks/DPBench-e2e/ \
--prediction-provider Docling # use full-document predictions from docling
docling-eval create-eval \
--benchmark DPBench \
--gt-dir ./benchmarks/DPBench-gt/gt_dataset/ \
--output-dir ./benchmarks/DPBench-tables/ \
--prediction-provider TableFormer # use tableformer predictions onlyCreate the evaluation report:
docling-eval evaluate \
--modality layout \
--benchmark DPBench \
--output-dir ./benchmarks/DPBench-e2e/
Visualize the report:
docling-eval visualize \
--modality layout \
--benchmark DPBench \
--output-dir ./benchmarks/DPBench-e2e/ Create the evaluation report:
docling-eval evaluate \
--modality table_structure \
--benchmark DPBench \
--output-dir ./benchmarks/DPBench-tables/ Visualize the report:
Visualize the report:
docling-eval visualize \
--modality table_structure \
--benchmark DPBench \
--output-dir ./benchmarks/DPBench-tables/ Create the evaluation report:
docling-eval evaluate \
--modality reading_order \
--benchmark DPBench \
--output-dir ./benchmarks/DPBench-e2e/ Visualize the report:
docling-eval visualize \
--modality reading_order \
--benchmark DPBench \
--output-dir ./benchmarks/DPBench-e2e/ Create the evaluation report:
docling-eval evaluate \
--modality markdown_text \
--benchmark DPBench \
--output-dir ./benchmarks/DPBench-e2e/ Visualize the report:
docling-eval visualize \
--modality markdown_text \
--benchmark DPBench \
--output-dir ./benchmarks/DPBench-e2e/ ![mAP[0.5:0.95] plot](/docling-project/docling-eval/raw/main/docs/evaluations/DPBench/evaluation_DPBench_layout_mAP_0.5_0.95.png)










