-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTask1_Environment_Setup.py
More file actions
182 lines (160 loc) · 6.73 KB
/
Task1_Environment_Setup.py
File metadata and controls
182 lines (160 loc) · 6.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# =============================================================================
# CSE512 — Data Warehousing and Mining | Spring 2023
# Task 1: Environment Setup & PostgreSQL Schema Creation
# Student: Sazzad Hossain | ID: 2135184050
# =============================================================================
# Instructions:
# 1. Install PostgreSQL and start the service
# 2. Run this script once to create the ecomdb schema and all tables
# 3. Requires: psycopg2-binary → pip install psycopg2-binary
# =============================================================================
import psycopg2
from psycopg2 import sql, OperationalError
# ── Connection Configuration ──────────────────────────────────────────────────
DB_CONFIG = {
"host": "localhost",
"port": 5432,
"dbname": "ecomdb",
"user": "ecomuser",
"password": "ecom123",
}
# ── DDL: Schema + All Tables ──────────────────────────────────────────────────
DDL_STATEMENTS = [
# Schema
"CREATE SCHEMA IF NOT EXISTS ecomdb;",
# DIM_TRANSACTION
"""
DROP TABLE IF EXISTS ecomdb.dim_transaction CASCADE;
CREATE TABLE ecomdb.dim_transaction (
payment_key VARCHAR(10) PRIMARY KEY,
trans_type VARCHAR(20) NOT NULL, -- 'cash' or 'card'
bank_name VARCHAR(100) -- NULL for cash transactions
);
""",
# DIM_CUSTOMER
"""
DROP TABLE IF EXISTS ecomdb.dim_customer CASCADE;
CREATE TABLE ecomdb.dim_customer (
coustomer_key VARCHAR(10) PRIMARY KEY, -- note: typo preserved for FK compatibility
name VARCHAR(100),
contact_no BIGINT,
nid BIGINT -- stored as hash in production
);
""",
# DIM_ITEM
"""
DROP TABLE IF EXISTS ecomdb.dim_item CASCADE;
CREATE TABLE ecomdb.dim_item (
item_key VARCHAR(10) PRIMARY KEY,
item_name VARCHAR(200) NOT NULL,
description VARCHAR(200),
unit_price NUMERIC(10, 2),
man_country VARCHAR(100),
supplier VARCHAR(200),
unit VARCHAR(20)
);
""",
# DIM_STORE
"""
DROP TABLE IF EXISTS ecomdb.dim_store CASCADE;
CREATE TABLE ecomdb.dim_store (
store_key VARCHAR(10) PRIMARY KEY,
division VARCHAR(50) NOT NULL, -- e.g. DHAKA, CHITTAGONG
district VARCHAR(50) NOT NULL,
upazila VARCHAR(50) NOT NULL
);
""",
# DIM_TIME
"""
DROP TABLE IF EXISTS ecomdb.dim_time CASCADE;
CREATE TABLE ecomdb.dim_time (
time_key VARCHAR(10) PRIMARY KEY,
date_val VARCHAR(30), -- stored as string: 'DD-MM-YYYY HH:MM'
hour INTEGER,
day INTEGER,
week VARCHAR(20),
month INTEGER,
quarter VARCHAR(5), -- Q1, Q2, Q3, Q4
year INTEGER
);
""",
# FACT_SALES
"""
DROP TABLE IF EXISTS ecomdb.fact_sales CASCADE;
CREATE TABLE ecomdb.fact_sales (
id SERIAL PRIMARY KEY,
payment_key VARCHAR(10) REFERENCES ecomdb.dim_transaction(payment_key),
coustomer_key VARCHAR(10) REFERENCES ecomdb.dim_customer(coustomer_key),
time_key VARCHAR(10) REFERENCES ecomdb.dim_time(time_key),
item_key VARCHAR(10) REFERENCES ecomdb.dim_item(item_key),
store_key VARCHAR(10) REFERENCES ecomdb.dim_store(store_key),
quantity INTEGER,
unit VARCHAR(20),
unit_price NUMERIC(10, 2),
total_price NUMERIC(12, 2)
);
""",
# FACT_INVENTORY (for Analytics #5: Item × Time Inventory Analytics)
"""
DROP TABLE IF EXISTS ecomdb.fact_inventory CASCADE;
CREATE TABLE ecomdb.fact_inventory (
inventory_id SERIAL PRIMARY KEY,
time_key VARCHAR(10) REFERENCES ecomdb.dim_time(time_key),
item_key VARCHAR(10) REFERENCES ecomdb.dim_item(item_key),
store_key VARCHAR(10) REFERENCES ecomdb.dim_store(store_key),
qty_sold INTEGER,
qty_received INTEGER,
current_stock INTEGER,
stock_turnover_rate NUMERIC(8, 4),
days_on_hand NUMERIC(8, 2),
reorder_flag BOOLEAN
);
""",
# Grants
"GRANT ALL ON SCHEMA ecomdb TO ecomuser;",
"GRANT ALL ON ALL TABLES IN SCHEMA ecomdb TO ecomuser;",
"GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA ecomdb TO ecomuser;",
]
# ── Main ──────────────────────────────────────────────────────────────────────
def create_schema():
print("=" * 60)
print("CSE512 — Task 1: Environment Setup")
print("=" * 60)
try:
conn = psycopg2.connect(**DB_CONFIG)
conn.autocommit = True
cur = conn.cursor()
print(f"[OK] Connected to PostgreSQL at {DB_CONFIG['host']}:{DB_CONFIG['port']}")
print(f" Database : {DB_CONFIG['dbname']}")
print(f" User : {DB_CONFIG['user']}\n")
for stmt in DDL_STATEMENTS:
stmt = stmt.strip()
if not stmt:
continue
# Print first line as label
label = stmt.split("\n")[0][:80]
cur.execute(stmt)
print(f" [DONE] {label}")
# Verify tables were created
cur.execute("""
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'ecomdb'
ORDER BY table_name;
""")
tables = [r[0] for r in cur.fetchall()]
print(f"\n[OK] Tables in schema 'ecomdb': {tables}")
cur.close()
conn.close()
print("\n[SUCCESS] Schema setup complete. Ready for Task 2 (ETL).")
except OperationalError as e:
print(f"[ERROR] Cannot connect to PostgreSQL: {e}")
print("\nTroubleshooting:")
print(" 1. Ensure PostgreSQL is running: sudo service postgresql start")
print(" 2. Create the database: createdb -U postgres ecomdb")
print(" 3. Create the user: psql -U postgres -c \"CREATE USER ecomuser WITH PASSWORD 'ecom123';\"")
print(" 4. Grant privileges: psql -U postgres -c \"GRANT ALL ON DATABASE ecomdb TO ecomuser;\"")
except Exception as e:
print(f"[ERROR] {type(e).__name__}: {e}")
if __name__ == "__main__":
create_schema()