-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathretail sales analysis.sql
More file actions
278 lines (205 loc) · 7.36 KB
/
retail sales analysis.sql
File metadata and controls
278 lines (205 loc) · 7.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
-- Creating database
CREATE DATABASE sales;
USE sales;
-- Creating tables
CREATE TABLE sales_store(
transaction_id VARCHAR(15),
customer_id VARCHAR(15),
customer_name VARCHAR(30),
customer_age INT,
gender VARCHAR(15),
product_id VARCHAR(15),
product_name VARCHAR(15),
product_category VARCHAR(15),
quantiy INT,
prce FLOAT,
payment_mode VARCHAR(15),
purchase_date DATE,
time_of_purchase TIME,
status VARCHAR(15)
);
-- Loading data into database
LOAD DATA INFILE '"C:\Users\tejes\Desktop\Data Analyst\Portfolio projects\SQL\Retail Store Sales Data.csv"'
INTO TABLE sales_store
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS;
SELECT * FROM sales_store;
-- Checking total records
SELECT COUNT(*) as total_records FROM sales_store;
-- Checking data types and structure
DESCRIBE sales_store;
-- Checking for duplicates on key fields
SELECT transaction_id, COUNT(*)
FROM sales_store
GROUP BY transaction_id
HAVING COUNT(transaction_id) >1;
-- duplicate transaction id
-- "TXN745076"
-- "TXN855235"
-- "TXN626832"
-- "TXN240646"
-- "TXN342128"
-- "TXN981773"
-- "TXN832908"
-- ============================================
-- DATA CLEANING
-- ============================================
-- copying data into new table for data cleaning
SELECT * INTO sales FROM sales_store;
CREATE TABLE sales LIKE sales_store;
INSERT INTO sales
SELECT * FROM sales_store;
WITH duplicates AS (
SELECT customer_id,
ROW_NUMBER() OVER (PARTITION BY transaction_id ORDER BY customer_id) AS row_num
FROM sales
)
DELETE FROM sales
WHERE customer_id IN (
SELECT customer_id FROM duplicates WHERE row_num > 1
);
SELECT transaction_id, COUNT(*)
FROM sales
GROUP BY transaction_id
HAVING COUNT(transaction_id) >1;
SELECT * FROM sales;
-- correction of headers
ALTER TABLE sales rename column quantiy to quantity;
ALTER TABLE sales rename column prce to price;
SELECT * FROM sales;
-- count null valus
SELECT COUNT(*) AS null_count
FROM sales
WHERE transaction_id IS NULL OR transaction_id = '';
DELETE FROM sales
WHERE transaction_id IS NULL OR transaction_id = '';
-- ============================================
-- EXPLORATORY DATA ANALYSIS
-- ============================================
-- Top 5 most selling products by quantity
SELECT DISTINCT status
from sales;
SELECT product_name, SUM(quantity) AS total_quantity_sold
FROM sales
WHERE status='delivered'
GROUP BY product_name
ORDER BY total_quantity_sold DESC
limit 5;
-- business problem: we don't know which products are most in demand.
-- business impact: helps priortize stock and boost sales through targeted promotions.
-- Products which are most frequently canceled
SELECT product_name, COUNT(*) AS total_cancelled
FROM sales
WHERE status='cancelled'
GROUP BY product_name
ORDER BY total_cancelled DESC
limit 5;
-- business problem: frequently cancellations affect revenue and customer trust
-- business impact: identify poor-performing products to improve quality or remove from catelog.
-- time of the day has highest number of purchases
Select * from sales;
SELECT
CASE
WHEN TIME(time_of_purchase) BETWEEN '05:00:00' AND '11:59:59' THEN 'Morning'
WHEN TIME(time_of_purchase) BETWEEN '12:00:00' AND '15:59:59' THEN 'Noon'
WHEN TIME(time_of_purchase) BETWEEN '16:00:00' AND '19:59:59' THEN 'Evening'
WHEN TIME(time_of_purchase) BETWEEN '20:00:00' AND '23:59:59' THEN 'Night'
WHEN TIME(time_of_purchase) BETWEEN '00:00:00' AND '04:59:59' THEN 'Night'
END AS time_of_day,
COUNT(*) AS total_order
FROM sales
GROUP BY
CASE
WHEN TIME(time_of_purchase) BETWEEN '05:00:00' AND '11:59:59' THEN 'Morning'
WHEN TIME(time_of_purchase) BETWEEN '12:00:00' AND '15:59:59' THEN 'Noon'
WHEN TIME(time_of_purchase) BETWEEN '16:00:00' AND '19:59:59' THEN 'Evening'
WHEN TIME(time_of_purchase) BETWEEN '20:00:00' AND '23:59:59' THEN 'Night'
WHEN TIME(time_of_purchase) BETWEEN '00:00:00' AND '04:59:59' THEN 'Night'
END
ORDER BY total_order DESC;
-- business problem solved: find the peak sales times
-- business impact:optimize staffing, promotions and server loads
-- top 5 highest spending customers
SELECT
customer_name,
CONCAT('₹', FORMAT(SUM(price * quantity), 0)) AS total_spend
FROM sales
GROUP BY customer_name
ORDER BY SUM(price * quantity) DESC
LIMIT 5;
-- business problem solved: identify VIP customers
-- business impact: personalized offers, loyality rewards, and retention.
SELECT * FROM sales;
-- product category to generate the highest revenue
SELECT
product_category,
CONCAT('₹', FORMAT(SUM(price * quantity), 0)) AS highest_revenue
FROM sales
GROUP BY product_category
ORDER BY SUM(price * quantity) DESC
LIMIT 1;
-- business problem solved: identify top-performing product categories.
-- business impact: refine product category, supply chain, and promotions.
-- allowing the business to invest more in high-margin or high-demand categories.
-- the return/cancellation rate per product category
-- cancellation
SELECT product_category,
CONCAT(ROUND(COUNT(CASE WHEN status = 'cancelled' THEN 1 END) * 100.0 / COUNT(*), 2),' %') AS cancelled_percent
FROM sales
GROUP BY product_category
ORDER BY cancelled_percent DESC;
-- return
SELECT product_category,
CONCAT(ROUND(COUNT(CASE WHEN status = 'returned' THEN 1 END) * 100.0 / COUNT(*), 2),' %') AS returned_percent
FROM sales
GROUP BY product_category
ORDER BY returned_percent DESC;
-- bussiness problem solved: monitor dissatisfaction trends per category
-- bussiness impact: reduce returns, improve product descriptions/expections,
-- helps indentify and fix product or logistics issues.
-- most preferred payment mode
SELECT payment_mode, COUNT(payment_mode) AS total_count
FROM sales
GROUP BY payment_mode
ORDER BY total_count desc;
-- business problem: know which payment options customer prefer
-- business impact: streamline payment processing, prioritizw popular modes.
-- how does age group affect purchasing behaviour
SELECT
CASE
WHEN customer_age BETWEEN 18 AND 25 THEN '18-25'
WHEN customer_age BETWEEN 26 AND 35 THEN '26-35'
WHEN customer_age BETWEEN 36 AND 50 THEN '36-50'
ELSE '51+'
END AS customer_Age,
CONCAT('₹', FORMAT(SUM(price * quantity), 0)) AS total_purchase
FROM sales
GROUP BY
CASE
WHEN customer_age BETWEEN 18 AND 25 THEN '18-25'
WHEN customer_age BETWEEN 26 AND 35 THEN '26-35'
WHEN customer_age BETWEEN 36 AND 50 THEN '36-50'
ELSE '51+'
END
ORDER BY SUM(price * quantity) DESC;
-- business problem solved: understand customer demographics.
-- business impact: targeted marketing and product recommendations by age group.
-- monthly sales trend
SELECT
DATE_FORMAT(purchase_date, '%Y-%m') AS month_year,
CONCAT('₹', FORMAT(SUM(price * quantity), 0)) AS total_sales,
SUM(quantity) AS total_quantity
FROM sales
GROUP BY DATE_FORMAT(purchase_date, '%Y-%m')
ORDER BY DATE_FORMAT(purchase_date, '%Y-%m');
-- business problem solved: sales fluctuations go unnoticed.
-- business impact: plan inventory and marketing according to seasonal trends.
-- are certain gender buying more specific product categories?
SELECT gender, product_category, COUNT(product_category) AS total_purchase
FROM sales
GROUP BY gender, product_category
ORDER BY gender;
-- business problem solved: gender-based product preferences.
-- business impact: personalized ads, gender-focused campaigns.