SQL REFERENCE
Subqueries
A subquery is a SELECT statement nested inside another statement. Subqueries can appear in WHERE, FROM, and SELECT clauses.
IN (Subquery)
Tests whether a value matches any row returned by a subquery. The subquery must return exactly one column.
-- Employees in departments located in Building A
SELECT name, salary
FROM employees
WHERE dept_id IN (
SELECT id FROM departments
WHERE location = 'Building A'
);
-- Products that have been ordered
SELECT name, price
FROM products
WHERE id IN (
SELECT DISTINCT product_id FROM order_items
);
NOT IN (Subquery)
Tests whether a value does not match any row returned by a subquery.
-- Employees NOT in any department
SELECT name, salary
FROM employees
WHERE dept_id NOT IN (
SELECT id FROM departments
);
-- Products that have never been ordered
SELECT name, price
FROM products
WHERE id NOT IN (
SELECT DISTINCT product_id FROM order_items
);
Warning: If the subquery returns any NULL values, NOT IN will return no rows. Filter NULLs in the subquery to avoid this:
SELECT name FROM employees
WHERE dept_id NOT IN (
SELECT id FROM departments WHERE id IS NOT NULL
);
FROM Clause Subqueries (Derived Tables)
A subquery in the FROM clause acts as a virtual table. It must be given an alias.
-- Use a derived table to filter on aggregated values
SELECT dept, avg_salary
FROM (
SELECT department AS dept, AVG(salary) AS avg_salary
FROM employees
GROUP BY department
) AS dept_stats
WHERE avg_salary > 80000
ORDER BY avg_salary DESC;
-- Top earner per department using derived table + window function
SELECT name, department, salary
FROM (
SELECT
name, department, salary,
ROW_NUMBER() OVER (
PARTITION BY department ORDER BY salary DESC
) AS rn
FROM employees
) AS ranked
WHERE rn = 1
ORDER BY salary DESC;
Scalar Subqueries
A subquery that returns exactly one row and one column can be used wherever a single value is expected.
-- Compare each salary to the company average
SELECT
name,
salary,
salary - (SELECT AVG(salary) FROM employees) AS diff_from_avg
FROM employees
ORDER BY diff_from_avg DESC;
-- Find the highest-paid employee
SELECT name, salary
FROM employees
WHERE salary = (SELECT MAX(salary) FROM employees);
-- Use scalar subquery in CASE
SELECT
name,
salary,
CASE
WHEN salary > (SELECT AVG(salary) FROM employees) THEN 'Above Average'
ELSE 'At or Below Average'
END AS salary_band
FROM employees;
Nested Subqueries
Subqueries can be nested to arbitrary depth.
-- Employees in departments with above-average headcount
SELECT name, department
FROM employees
WHERE department IN (
SELECT department
FROM employees
GROUP BY department
HAVING COUNT(*) > (
SELECT AVG(dept_count) FROM (
SELECT COUNT(*) AS dept_count
FROM employees
GROUP BY department
) AS counts
)
);
Subqueries with Aggregation
-- Departments where total salary exceeds 500k
SELECT department, total_salary
FROM (
SELECT department, SUM(salary) AS total_salary
FROM employees
GROUP BY department
) AS dept_totals
WHERE total_salary > 500000
ORDER BY total_salary DESC;
Subqueries in INSERT
-- Archive high-salary employees
INSERT INTO high_earners (name, department, salary)
SELECT name, department, salary
FROM employees
WHERE salary > (SELECT AVG(salary) * 1.5 FROM employees);
Subquery vs. JOIN
Many subqueries can be rewritten as joins. Choose based on readability:
-- Subquery approach
SELECT name FROM employees
WHERE dept_id IN (SELECT id FROM departments WHERE location = 'Building A');
-- Equivalent JOIN approach
SELECT e.name
FROM employees e
INNER JOIN departments d ON e.dept_id = d.id
WHERE d.location = 'Building A';