Visualization transforms your knowledge graphs into interactive, explorable visualizations. Docling Graph automatically generates HTML visualizations and markdown reports for every pipeline run.
In this guide:
- Interactive HTML graphs
- Markdown reports
- Graph statistics
- Customization options
- Integration examples
Every pipeline run automatically creates:
outputs/
├── visualization.html # Interactive graph
├── report.md # Markdown report
└── graph_stats.json # Statistics
from docling_graph import run_pipeline, PipelineConfig
config = PipelineConfig(
source="document.pdf",
template="templates.BillingDocument",
output_dir="outputs"
)
run_pipeline(config)
# Automatically generates:
# - outputs/visualization.html
# - outputs/report.md
# - outputs/graph_stats.json✅ Interactive exploration
- Zoom and pan
- Node selection
- Search functionality
- Layout algorithms
✅ Visual styling
- Color-coded node types
- Edge labels
- Hover tooltips
- Responsive design
✅ Export options
- Save as image
- Share via URL
- Embed in websites
# Open in browser
open outputs/visualization.html # macOS
xdg-open outputs/visualization.html # Linux
start outputs/visualization.html # Windows
# Or double-click the filefrom docling_graph.core.visualizers import InteractiveVisualizer
from docling_graph.core.converters import GraphConverter
# Convert models to graph
converter = GraphConverter()
graph, metadata = converter.pydantic_list_to_graph(models)
# Generate visualization
visualizer = InteractiveVisualizer()
visualizer.save_cytoscape_graph(
graph=graph,
output_path="my_graph.html",
open_browser=True # Automatically open
)Markdown reports contain:
- Overview - Node/edge counts, timestamps
- Node Distribution - Types and percentages
- Edge Distribution - Relationship types
- Sample Nodes - Example entities
- Sample Edges - Example relationships
# Knowledge Graph Report
Automatically generated by docling-graph.
## Overview
- **Total Nodes**: 15
- **Total Edges**: 18
- **Source Models**: 1
- **Generated**: 2024-01-15 14:30:00
## Node Type Distribution
| Node Type | Count | Percentage |
|-----------|-------|------------|
| LineItem | 9 | 60.0% |
| Address | 3 | 20.0% |
| Organization | 2 | 13.3% |
| Invoice | 1 | 6.7% |
## Edge Type Distribution
| Edge Type | Count | Percentage |
|-----------|-------|------------|
| contains_item | 9 | 50.0% |
| located_at | 5 | 27.8% |
| has_total | 2 | 11.1% |
| issued_by | 1 | 5.6% |
| sent_to | 1 | 5.6% |from docling_graph.core.visualizers import ReportGenerator
from docling_graph.core.converters import GraphConverter
# Convert models to graph
converter = GraphConverter()
graph, metadata = converter.pydantic_list_to_graph(models)
# Generate report
generator = ReportGenerator()
generator.visualize(
graph=graph,
output_path="my_report.md",
source_model_count=len(models),
include_samples=True
){
"node_count": 15,
"edge_count": 18,
"node_types": {
"BillingDocument": 1,
"Organization": 2,
"Address": 3,
"LineItem": 9
},
"edge_types": {
"issued_by": 1,
"sent_to": 1,
"located_at": 5,
"contains_item": 9,
"has_total": 2
},
"avg_degree": 2.4,
"density": 0.17,
"source_models": 1,
"created_at": "2024-01-15T14:30:00"
}import json
# Load statistics
with open("outputs/graph_stats.json") as f:
stats = json.load(f)
# Analyze
print(f"Graph has {stats['node_count']} nodes")
print(f"Average degree: {stats['avg_degree']:.2f}")
print(f"Density: {stats['density']:.2f}")
# Most common node type
most_common = max(stats['node_types'], key=stats['node_types'].get)
print(f"Most common node type: {most_common}")from docling_graph import run_pipeline, PipelineConfig
# Run pipeline (automatic visualization)
config = PipelineConfig(
source="invoice.pdf",
template="templates.BillingDocument",
output_dir="outputs"
)
run_pipeline(config)
# Open visualization
import webbrowser
webbrowser.open("file://outputs/visualization.html")from docling_graph.core.visualizers import InteractiveVisualizer
from docling_graph.core.converters import GraphConverter
# Create graph
converter = GraphConverter()
graph, metadata = converter.pydantic_list_to_graph(models)
# Generate custom visualization
visualizer = InteractiveVisualizer()
html_path = visualizer.save_cytoscape_graph(
graph=graph,
output_path="custom_graph.html",
open_browser=False
)
print(f"Visualization saved to {html_path}")from docling_graph import run_pipeline, PipelineConfig
from pathlib import Path
# Process multiple documents
for pdf_file in Path("documents").glob("*.pdf"):
output_dir = f"visualizations/{pdf_file.stem}"
config = PipelineConfig(
source=str(pdf_file),
template="templates.BillingDocument",
output_dir=output_dir
)
run_pipeline(config)
print(f"Visualization: {output_dir}/visualization.html")import json
from pathlib import Path
# Analyze multiple reports
reports = []
for stats_file in Path("outputs").rglob("graph_stats.json"):
with open(stats_file) as f:
stats = json.load(f)
reports.append({
"file": stats_file.parent.name,
"nodes": stats["node_count"],
"edges": stats["edge_count"],
"density": stats["density"]
})
# Summary
import pandas as pd
df = pd.DataFrame(reports)
print(df.describe())from docling_graph.core.visualizers import InteractiveVisualizer
from pathlib import Path
# Load CSV and create visualization
visualizer = InteractiveVisualizer()
html_path = visualizer.display_cytoscape_graph(
path=Path("outputs"), # Directory with nodes.csv and edges.csv
input_format="csv",
output_path="from_csv.html",
open_browser=True
)from docling_graph.core.visualizers import InteractiveVisualizer
from pathlib import Path
# Load JSON and create visualization
visualizer = InteractiveVisualizer()
html_path = visualizer.display_cytoscape_graph(
path=Path("outputs/graph_data.json"),
input_format="json",
output_path="from_json.html",
open_browser=True
)from docling_graph.core.visualizers import ReportGenerator
generator = ReportGenerator()
generator.visualize(
graph=graph,
output_path="custom_report.md",
source_model_count=len(models),
include_samples=False # Exclude sample nodes/edges
)from docling_graph.core.utils import calculate_graph_stats
# Calculate custom statistics
metadata = calculate_graph_stats(graph, source_model_count=len(models))
print(f"Nodes: {metadata.node_count}")
print(f"Edges: {metadata.edge_count}")
print(f"Density: {metadata.density:.3f}")
print(f"Avg degree: {metadata.avg_degree:.2f}")
# Node type distribution
for node_type, count in metadata.node_types.items():
percentage = (count / metadata.node_count) * 100
print(f"{node_type}: {count} ({percentage:.1f}%)")from flask import Flask, render_template
from docling_graph import run_pipeline, PipelineConfig
import json
app = Flask(__name__)
@app.route('/visualize/<doc_id>')
def visualize(doc_id):
# Load graph data
with open(f"outputs/{doc_id}/graph_stats.json") as f:
stats = json.load(f)
return render_template('dashboard.html',
stats=stats,
viz_url=f"/static/{doc_id}/visualization.html")
if __name__ == '__main__':
app.run(debug=True)from docling_graph import run_pipeline, PipelineConfig
from pathlib import Path
import smtplib
from email.mime.text import MIMEText
def process_and_email(pdf_path, recipient):
"""Process document and email report."""
# Process document
config = PipelineConfig(
source=pdf_path,
template="templates.BillingDocument",
output_dir="temp_output"
)
run_pipeline(config)
# Read report
with open("temp_output/report.md") as f:
report = f.read()
# Email report
msg = MIMEText(report)
msg['Subject'] = f'Graph Report: {Path(pdf_path).name}'
msg['To'] = recipient
# Send email (configure SMTP)
# smtp.send_message(msg)
print(f"Report sent to {recipient}")# ✅ Good - Keep visualizations enabled
config = PipelineConfig(
source="document.pdf",
template="templates.BillingDocument",
# Visualizations generated automatically
)# ✅ Good - Verify graph quality
import json
with open("outputs/graph_stats.json") as f:
stats = json.load(f)
if stats["node_count"] == 0:
print("Warning: Empty graph")
if stats["edge_count"] == 0:
print("Warning: No relationships")# ✅ Good - Structured output
from datetime import datetime
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_dir = f"visualizations/{timestamp}"
config = PipelineConfig(
source="document.pdf",
template="templates.BillingDocument",
output_dir=output_dir
)Solution:
# Check file exists
import os
viz_path = "outputs/visualization.html"
if os.path.exists(viz_path):
print(f"✅ File exists: {viz_path}")
# Open manually
import webbrowser
webbrowser.open(f"file://{os.path.abspath(viz_path)}")
else:
print(f"❌ File not found: {viz_path}")Solution:
# Check graph has nodes
import json
with open("outputs/graph_stats.json") as f:
stats = json.load(f)
if stats["node_count"] == 0:
print("Graph is empty - check extraction")Solution:
# Check graph validity
from docling_graph.core.visualizers import ReportGenerator
generator = ReportGenerator()
if generator.validate_graph(graph):
print("✅ Graph is valid")
else:
print("❌ Graph is empty")Now that you understand visualization:
- Neo4j Integration → - Import into Neo4j
- Graph Analysis → - Analyze graph structure
- CLI Guide → - Use command-line tools