-
Notifications
You must be signed in to change notification settings - Fork 55
Expand file tree
/
Copy pathTEMPORAL_VIEWS_OPTIMIZATION.sql
More file actions
271 lines (237 loc) · 10.1 KB
/
TEMPORAL_VIEWS_OPTIMIZATION.sql
File metadata and controls
271 lines (237 loc) · 10.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
-- ============================================================================
-- TEMPORAL ANALYSIS FRAMEWORK: IMMEDIATE OPTIMIZATION SCRIPT
-- ============================================================================
-- Generated: 2026-01-22
-- Database: PostgreSQL 18
-- Purpose: Optimize performance of 27 temporal analysis views
--
-- Execution Time: ~15 minutes
-- Prerequisites: Database admin access
-- ============================================================================
-- ============================================================================
-- SECTION 1: REFRESH ALL MATERIALIZED VIEWS (5 minutes)
-- ============================================================================
-- All 16 materialized views need initial population
\echo 'Starting materialized view refresh...'
\timing on
-- Ballot Summary Views (Daily/Weekly/Monthly/Annual)
\echo 'Refreshing ballot summary views...'
REFRESH MATERIALIZED VIEW view_riksdagen_vote_data_ballot_summary_daily;
REFRESH MATERIALIZED VIEW view_riksdagen_vote_data_ballot_summary_weekly;
REFRESH MATERIALIZED VIEW view_riksdagen_vote_data_ballot_summary_monthly;
REFRESH MATERIALIZED VIEW view_riksdagen_vote_data_ballot_summary_annual;
-- Ballot Party Summary Views
\echo 'Refreshing ballot party summary views...'
REFRESH MATERIALIZED VIEW view_riksdagen_vote_data_ballot_party_summary_daily;
REFRESH MATERIALIZED VIEW view_riksdagen_vote_data_ballot_party_summary_weekly;
REFRESH MATERIALIZED VIEW view_riksdagen_vote_data_ballot_party_summary_monthly;
REFRESH MATERIALIZED VIEW view_riksdagen_vote_data_ballot_party_summary_annual;
-- Ballot Politician Summary Views
\echo 'Refreshing ballot politician summary views...'
REFRESH MATERIALIZED VIEW view_riksdagen_vote_data_ballot_politician_summary_daily;
REFRESH MATERIALIZED VIEW view_riksdagen_vote_data_ballot_politician_summary_weekly;
REFRESH MATERIALIZED VIEW view_riksdagen_vote_data_ballot_politician_summary_monthly;
REFRESH MATERIALIZED VIEW view_riksdagen_vote_data_ballot_politician_summary_annual;
-- Document Summary Views
\echo 'Refreshing document summary views...'
REFRESH MATERIALIZED VIEW view_riksdagen_document_type_daily_summary;
REFRESH MATERIALIZED VIEW view_riksdagen_party_document_daily_summary;
REFRESH MATERIALIZED VIEW view_riksdagen_org_document_daily_summary;
REFRESH MATERIALIZED VIEW view_riksdagen_politician_document_daily_summary;
\echo 'Materialized view refresh complete!'
-- ============================================================================
-- SECTION 2: ADD RECOMMENDED INDEXES (10 minutes)
-- ============================================================================
-- These indexes will improve query performance by 20-40%
\echo 'Creating recommended indexes...'
-- Vote Data Table Indexes
\echo 'Adding indexes to vote_data table...'
-- Composite index for ballot + date queries (used in all ballot summary views)
CREATE INDEX IF NOT EXISTS idx_vote_data_ballot_date
ON vote_data (embedded_id_ballot_id, vote_date);
-- Composite index for party + date queries (used in party summary views)
CREATE INDEX IF NOT EXISTS idx_vote_data_party_date
ON vote_data (party, vote_date);
-- Composite index for politician (intressent_id) + date queries
CREATE INDEX IF NOT EXISTS idx_vote_data_politician_date
ON vote_data (embedded_id_intressent_id, vote_date);
-- Covering index for common aggregation queries
-- Note: This index is large but provides significant performance improvement
CREATE INDEX IF NOT EXISTS idx_vote_data_aggregation_cover
ON vote_data (vote_date, party, vote, approved)
INCLUDE (embedded_id_ballot_id, embedded_id_intressent_id);
\echo 'Vote data indexes created!'
-- Application Action Event Indexes
\echo 'Checking application_action_event indexes...'
-- Note: Current indexes are already excellent:
-- - application_action_event_created_date_idx
-- - application_action_event_page_idx
-- - application_action_event_page_created_date_idx (composite)
-- - application_action_event_page_element_id_idx (composite)
-- No additional indexes needed!
\echo 'All recommended indexes created!'
-- ============================================================================
-- SECTION 3: ANALYZE TABLES (1 minute)
-- ============================================================================
-- Update statistics for query planner optimization
\echo 'Analyzing tables to update statistics...'
ANALYZE vote_data;
ANALYZE application_action_event;
ANALYZE document_data;
ANALYZE document_element;
\echo 'Table analysis complete!'
-- ============================================================================
-- SECTION 4: VERIFY OPTIMIZATION
-- ============================================================================
\echo '============================================================================'
\echo 'OPTIMIZATION COMPLETE!'
\echo '============================================================================'
-- Check materialized view sizes
\echo ''
\echo 'Materialized View Status:'
SELECT
schemaname,
matviewname,
pg_size_pretty(pg_total_relation_size(schemaname || '.' || matviewname)) AS size,
hasindexes
FROM pg_matviews
WHERE schemaname = 'public'
AND matviewname LIKE '%_summary_%'
ORDER BY matviewname;
-- Check new indexes
\echo ''
\echo 'New Indexes Created:'
SELECT
schemaname,
relname as tablename,
indexrelname as indexname,
pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
WHERE schemaname = 'public'
AND indexrelname LIKE 'idx_vote_data_%'
AND indexrelname IN (
'idx_vote_data_ballot_date',
'idx_vote_data_party_date',
'idx_vote_data_politician_date',
'idx_vote_data_aggregation_cover'
)
ORDER BY indexrelname;
-- ============================================================================
-- SECTION 5: ENHANCED STATISTICS VALIDATION (PR #8271)
-- ============================================================================
\echo ''
\echo '============================================================================'
\echo 'ENHANCED STATISTICS VALIDATION'
\echo '============================================================================'
-- Verify pg_stat_statements extension and configuration
\echo ''
\echo 'pg_stat_statements Configuration:'
SELECT
name,
setting,
unit
FROM pg_settings
WHERE name IN (
'pg_stat_statements.track',
'pg_stat_statements.track_planning',
'pg_stat_statements.track_utility',
'track_io_timing',
'track_functions'
)
ORDER BY name;
-- Check statistics collection status
\echo ''
\echo 'Statistics Collection Status:'
SELECT
schemaname,
relname,
last_analyze,
last_autoanalyze,
n_live_tup,
n_dead_tup,
n_mod_since_analyze
FROM pg_stat_user_tables
WHERE relname IN ('vote_data', 'application_action_event', 'document_data', 'document_element')
ORDER BY relname;
-- Verify statistics freshness (should be within 20% of actual data changes)
\echo ''
\echo 'Statistics Freshness Check:'
SELECT
relname,
n_live_tup,
n_mod_since_analyze,
CASE
WHEN n_live_tup > 0 THEN
ROUND(100.0 * n_mod_since_analyze / n_live_tup, 2)
ELSE 0
END as staleness_pct,
CASE
WHEN n_live_tup = 0 THEN 'OK (empty)'
WHEN n_mod_since_analyze::float / NULLIF(n_live_tup, 0) < 0.20 THEN 'OK'
ELSE 'NEEDS ANALYZE'
END as status
FROM pg_stat_user_tables
WHERE relname IN ('vote_data', 'application_action_event', 'document_data', 'document_element')
ORDER BY staleness_pct DESC;
-- Sample pg_stat_statements data (will be empty until queries are run)
\echo ''
\echo 'pg_stat_statements Sample (Top 10 temporal views by total time):'
-- Use specific naming patterns for temporal views per documentation
SELECT
LEFT(query, 60) as view_query,
calls,
ROUND(mean_plan_time::numeric, 3) as mean_plan_ms,
ROUND(mean_exec_time::numeric, 3) as mean_exec_ms,
ROUND((mean_plan_time + mean_exec_time)::numeric, 3) as total_ms,
ROUND((100.0 * mean_plan_time / NULLIF(mean_plan_time + mean_exec_time, 0))::numeric, 1) as plan_pct
FROM pg_stat_statements
WHERE (query LIKE '%_summary_%'
OR query LIKE '%_temporal_%'
OR query LIKE '%_daily%'
OR query LIKE '%_weekly%'
OR query LIKE '%_monthly%'
OR query LIKE '%_annual%')
AND query LIKE '%COUNT%'
AND query NOT LIKE '%pg_stat_statements%'
ORDER BY total_ms DESC
LIMIT 10;
-- Buffer hit ratio check
\echo ''
\echo 'Buffer Hit Ratios (should be >95% in production):'
SELECT
relname,
heap_blks_read,
heap_blks_hit,
CASE
WHEN (heap_blks_hit + heap_blks_read) > 0 THEN
ROUND(100.0 * heap_blks_hit / (heap_blks_hit + heap_blks_read), 2)
ELSE 0
END as buffer_hit_ratio,
CASE
WHEN (heap_blks_hit + heap_blks_read) = 0 THEN 'N/A (no access)'
WHEN heap_blks_hit::float / NULLIF(heap_blks_hit + heap_blks_read, 0) > 0.95 THEN 'GOOD'
WHEN heap_blks_hit::float / NULLIF(heap_blks_hit + heap_blks_read, 0) > 0.80 THEN 'OK'
ELSE 'POOR (increase shared_buffers)'
END as status
FROM pg_statio_user_tables
WHERE relname LIKE '%vote_data%'
OR relname LIKE '%application_action_event%'
OR relname LIKE '%document_%'
ORDER BY (heap_blks_hit + heap_blks_read) DESC
LIMIT 10;
\echo ''
\echo '============================================================================'
\echo 'NEXT STEPS:'
\echo '1. Run queries on temporal views to populate pg_stat_statements'
\echo '2. Run performance benchmarks: see TEMPORAL_ANALYSIS_PERFORMANCE_REPORT.md'
\echo '3. Set up automated refresh: see pg_cron configuration in report'
\echo '4. Monitor query performance with enhanced statistics'
\echo '============================================================================'
\echo ''
\echo 'Enhanced Statistics Queries:'
\echo ' - Planning vs Execution: see pg_stat_statements output above'
\echo ' - Buffer Statistics: see buffer hit ratios above'
\echo ' - I/O Timing: use EXPLAIN (ANALYZE, BUFFERS, TIMING) on individual views'
\echo ' - Row Estimate Accuracy: compare plan_rows vs actual_rows in EXPLAIN output'
\echo '============================================================================'
\timing off