Quick reference for SELECT, Filtering, Aggregations, JOINs, CTEs, Window Functions, String/Date Functions, CASE WHEN & Common Patterns
Template:
SELECT column1, column2, expression AS alias FROM schema_name.table_name WHERE condition GROUP BY column1 HAVING aggregate_condition ORDER BY column1 ASC | DESC LIMIT n; -- MySQL/PostgreSQL (use TOP n in SQL Server)
Practical Example:
SELECT customer_id, customer_name, city, total_spend, ROUND(total_spend * 0.18, 2) AS gst_amount FROM customers WHERE city = 'Mumbai' AND total_spend > 5000 ORDER BY total_spend DESC LIMIT 10;
BETWEEN (inclusive on both ends):
SELECT * FROM orders WHERE order_date BETWEEN '2025-01-01' AND '2025-12-31'; WHERE revenue BETWEEN 1000 AND 5000;
IN (list of values):
WHERE city IN ('Delhi', 'Mumbai', 'Bengaluru'); WHERE status NOT IN ('cancelled', 'returned');
LIKE (pattern matching):
WHERE email LIKE '%@gmail.com' WHERE name LIKE 'Raj%' -- starts with WHERE code LIKE '_A%' -- 2nd char = A -- % = any sequence, _ = single char
IS NULL / COALESCE:
WHERE phone IS NULL; WHERE phone IS NOT NULL; -- Replace NULL with default SELECT COALESCE(phone, 'N/A') AS phone FROM customers;
Core aggregate functions:
SELECT COUNT(*) AS total_rows, COUNT(phone) AS non_null_phones, COUNT(DISTINCT city)AS unique_cities, SUM(revenue) AS total_revenue, AVG(revenue) AS avg_revenue, MIN(order_date) AS first_order, MAX(order_date) AS last_order FROM orders;
GROUP BY + HAVING:
SELECT region, COUNT(*) AS order_count, SUM(revenue) AS total_revenue, ROUND(AVG(revenue),2) AS avg_revenue FROM orders WHERE status = 'completed' GROUP BY region HAVING SUM(revenue) > 100000 ORDER BY total_revenue DESC;
(A) ∩ (B)
╔══╗ ╔══╗
║ ╠══╣ ║
╚══╝ ╚══╝
▲▲▲▲▲▲▲▲
matched
only
ALL A + B match ╔══╗ ╔══╗ ║██╠══╣ ║ ╚══╝ ╚══╝ ▲▲▲▲▲▲ all A rows NULLs for unmatched B
A match + ALL B
╔══╗ ╔══╗
║ ╠══╣██║
╚══╝ ╚══╝
▲▲▲▲
all B rows
NULLs for
unmatched A
ALL A + ALL B ╔══╗ ╔══╗ ║██╠══╣██║ ╚══╝ ╚══╝ ▲▲▲▲▲▲▲▲▲ NULLs on both sides where no match
Syntax examples:
-- INNER JOIN: only matched rows SELECT o.order_id, c.customer_name, o.revenue FROM orders o INNER JOIN customers c ON o.customer_id = c.customer_id; -- LEFT JOIN: all orders, customer info where available SELECT o.order_id, c.customer_name FROM orders o LEFT JOIN customers c ON o.customer_id = c.customer_id; -- Find orders with NO matching customer (orphaned records) SELECT o.order_id FROM orders o LEFT JOIN customers c ON o.customer_id = c.customer_id WHERE c.customer_id IS NULL; -- FULL OUTER JOIN (PostgreSQL / SQL Server) SELECT a.id, b.id FROM table_a a FULL OUTER JOIN table_b b ON a.id = b.id; -- CROSS JOIN: cartesian product (every A × every B) SELECT p.product, c.colour FROM products p CROSS JOIN colours c;
Inline subquery:
SELECT customer_id, total_spend FROM customers WHERE total_spend > ( SELECT AVG(total_spend) FROM customers );
Derived table (FROM subquery):
SELECT region, avg_rev FROM ( SELECT region, AVG(revenue) AS avg_rev FROM orders GROUP BY region ) region_stats WHERE avg_rev > 5000;
CTE with WITH (preferred):
WITH region_stats AS ( SELECT region, AVG(revenue) AS avg_rev, COUNT(*) AS order_count FROM orders GROUP BY region ), top_regions AS ( SELECT * FROM region_stats WHERE avg_rev > 5000 ) SELECT r.region, r.avg_rev, c.customer_name FROM top_regions r JOIN customers c ON c.region = r.region;
Syntax template:
function_name() OVER ( [PARTITION BY column] -- reset window per group [ORDER BY column] -- define row order [ROWS BETWEEN ... AND ...] -- optional frame )
Ranking functions:
SELECT name, salary, dept, ROW_NUMBER() OVER (PARTITION BY dept ORDER BY salary DESC) rn, RANK() OVER (PARTITION BY dept ORDER BY salary DESC) rnk, DENSE_RANK() OVER (PARTITION BY dept ORDER BY salary DESC) dr FROM employees;
LAG / LEAD (access previous/next row):
SELECT month, revenue, LAG(revenue, 1) OVER (ORDER BY month) AS prev_rev, LEAD(revenue, 1) OVER (ORDER BY month) AS next_rev FROM monthly_revenue;
NTILE (percentile buckets):
SELECT customer_id, total_spend, NTILE(4) OVER (ORDER BY total_spend) AS quartile FROM customers; -- quartile 4 = top 25% spenders
Running total + moving average:
SELECT sale_date, daily_sales, SUM(daily_sales) OVER (ORDER BY sale_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_total, AVG(daily_sales) OVER (ORDER BY sale_date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS ma_7day FROM daily_sales;
-- Concatenation CONCAT(first_name, ' ', last_name) first_name || ' ' || last_name -- PostgreSQL -- Case conversion UPPER(city) -- 'MUMBAI' LOWER(email) -- 'user@example.com' -- Length LEN(name) -- SQL Server LENGTH(name) -- MySQL / PostgreSQL -- Trim whitespace TRIM(name) LTRIM(name) -- left only RTRIM(name) -- right only
-- Substring extraction SUBSTRING(email, 1, CHARINDEX('@', email)-1) -- extract username before @ LEFT(phone, 4) -- first 4 chars RIGHT(phone, 4) -- last 4 chars -- Replace REPLACE(phone, '-', '') -- remove dashes -- Position of substring CHARINDEX('@', email) -- SQL Server POSITION('@' IN email) -- Standard SQL INSTR(email, '@') -- MySQL
Practical example — extract domain from email:
SELECT email, SUBSTRING(email, CHARINDEX('@', email)+1, LEN(email)) AS email_domain FROM customers;
Current date/time:
GETDATE() -- SQL Server: now NOW() -- MySQL: now CURRENT_DATE -- PostgreSQL: date only CURRENT_TIMESTAMP -- standard SQL
Add / subtract intervals:
-- SQL Server DATEADD(DAY, 30, order_date) -- +30 days DATEADD(MONTH, -3, GETDATE()) -- 3 months ago -- PostgreSQL order_date + INTERVAL '30 days' order_date - INTERVAL '3 months' -- MySQL DATE_ADD(order_date, INTERVAL 30 DAY)
Difference between dates:
-- SQL Server DATEDIFF(DAY, start_date, end_date) DATEDIFF(MONTH, signup_date, GETDATE()) -- PostgreSQL end_date - start_date -- returns integer days AGE(end_date, start_date) -- interval -- MySQL DATEDIFF(end_date, start_date) -- days only
Extract parts:
YEAR(order_date) -- 2025 MONTH(order_date) -- 3 DAY(order_date) -- 15 EXTRACT(DOW FROM order_date) -- day of week (PostgreSQL) FORMAT(order_date, 'yyyy-MM') -- '2025-03' (SQL Server)
Cohort month example:
SELECT customer_id, FORMAT(MIN(order_date), 'yyyy-MM') AS cohort_month, COUNT(*) AS total_orders FROM orders GROUP BY customer_id;
Simple CASE (match a value):
SELECT order_id, CASE status WHEN 'completed' THEN '✓ Done' WHEN 'pending' THEN '⏳ Wait' WHEN 'cancelled' THEN '✗ Void' ELSE '? Unknown' END AS status_label FROM orders;
Searched CASE (evaluate conditions):
SELECT customer_id, total_spend, CASE WHEN total_spend >= 100000 THEN 'Platinum' WHEN total_spend >= 50000 THEN 'Gold' WHEN total_spend >= 10000 THEN 'Silver' ELSE 'Bronze' END AS tier FROM customers;
CASE inside aggregation (pivot-style):
SELECT region, SUM(CASE WHEN product_cat = 'Electronics' THEN revenue ELSE 0 END) AS electronics_rev, SUM(CASE WHEN product_cat = 'Clothing' THEN revenue ELSE 0 END) AS clothing_rev, SUM(CASE WHEN product_cat = 'Groceries' THEN revenue ELSE 0 END) AS grocery_rev FROM orders GROUP BY region;
1. Running Total:
SELECT sale_date, daily_revenue, SUM(daily_revenue) OVER (ORDER BY sale_date ROWS UNBOUNDED PRECEDING) AS cumulative_revenue FROM daily_sales;
2. Year-over-Year (YoY) Comparison:
WITH yearly AS ( SELECT YEAR(order_date) AS yr, SUM(revenue) AS total_rev FROM orders GROUP BY YEAR(order_date) ) SELECT yr, total_rev, LAG(total_rev) OVER (ORDER BY yr) AS prev_yr_rev, ROUND( (total_rev - LAG(total_rev) OVER (ORDER BY yr)) * 100.0 / NULLIF(LAG(total_rev) OVER (ORDER BY yr), 0), 1) AS yoy_pct FROM yearly;
3. 7-Day Moving Average:
SELECT sale_date, daily_sales, ROUND(AVG(daily_sales) OVER ( ORDER BY sale_date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW ), 2) AS ma_7day FROM daily_sales;
4. Top N Per Group (Top 3 products per region):
WITH ranked AS ( SELECT region, product_name, total_revenue, ROW_NUMBER() OVER ( PARTITION BY region ORDER BY total_revenue DESC ) AS rn FROM product_revenue ) SELECT region, product_name, total_revenue FROM ranked WHERE rn <= 3;
5. Cohort Retention Table:
WITH first_order AS ( SELECT customer_id, MIN(order_date) AS first_order_date, FORMAT(MIN(order_date), 'yyyy-MM') AS cohort_month FROM orders GROUP BY customer_id ), activity AS ( SELECT o.customer_id, f.cohort_month, DATEDIFF(MONTH, f.first_order_date, o.order_date) AS months_since_first FROM orders o JOIN first_order f ON o.customer_id = f.customer_id ) SELECT cohort_month, months_since_first, COUNT(DISTINCT customer_id) AS active_users FROM activity GROUP BY cohort_month, months_since_first ORDER BY cohort_month, months_since_first;