SQL REFERENCE

Subqueries

A subquery is a SELECT statement nested inside another statement. Subqueries can appear in WHERE, FROM, and SELECT clauses.

IN (Subquery)

Tests whether a value matches any row returned by a subquery. The subquery must return exactly one column.

-- Employees in departments located in Building A
SELECT name, salary
FROM employees
WHERE dept_id IN (
  SELECT id FROM departments
  WHERE location = 'Building A'
);
-- Products that have been ordered
SELECT name, price
FROM products
WHERE id IN (
  SELECT DISTINCT product_id FROM order_items
);

NOT IN (Subquery)

Tests whether a value does not match any row returned by a subquery.

-- Employees NOT in any department
SELECT name, salary
FROM employees
WHERE dept_id NOT IN (
  SELECT id FROM departments
);

-- Products that have never been ordered
SELECT name, price
FROM products
WHERE id NOT IN (
  SELECT DISTINCT product_id FROM order_items
);

Warning: If the subquery returns any NULL values, NOT IN will return no rows. Filter NULLs in the subquery to avoid this:

SELECT name FROM employees
WHERE dept_id NOT IN (
  SELECT id FROM departments WHERE id IS NOT NULL
);

FROM Clause Subqueries (Derived Tables)

A subquery in the FROM clause acts as a virtual table. It must be given an alias.

-- Use a derived table to filter on aggregated values
SELECT dept, avg_salary
FROM (
  SELECT department AS dept, AVG(salary) AS avg_salary
  FROM employees
  GROUP BY department
) AS dept_stats
WHERE avg_salary > 80000
ORDER BY avg_salary DESC;
-- Top earner per department using derived table + window function
SELECT name, department, salary
FROM (
  SELECT
    name, department, salary,
    ROW_NUMBER() OVER (
      PARTITION BY department ORDER BY salary DESC
    ) AS rn
  FROM employees
) AS ranked
WHERE rn = 1
ORDER BY salary DESC;

Scalar Subqueries

A subquery that returns exactly one row and one column can be used wherever a single value is expected.

-- Compare each salary to the company average
SELECT
  name,
  salary,
  salary - (SELECT AVG(salary) FROM employees) AS diff_from_avg
FROM employees
ORDER BY diff_from_avg DESC;
-- Find the highest-paid employee
SELECT name, salary
FROM employees
WHERE salary = (SELECT MAX(salary) FROM employees);
-- Use scalar subquery in CASE
SELECT
  name,
  salary,
  CASE
    WHEN salary > (SELECT AVG(salary) FROM employees) THEN 'Above Average'
    ELSE 'At or Below Average'
  END AS salary_band
FROM employees;

Nested Subqueries

Subqueries can be nested to arbitrary depth.

-- Employees in departments with above-average headcount
SELECT name, department
FROM employees
WHERE department IN (
  SELECT department
  FROM employees
  GROUP BY department
  HAVING COUNT(*) > (
    SELECT AVG(dept_count) FROM (
      SELECT COUNT(*) AS dept_count
      FROM employees
      GROUP BY department
    ) AS counts
  )
);

Subqueries with Aggregation

-- Departments where total salary exceeds 500k
SELECT department, total_salary
FROM (
  SELECT department, SUM(salary) AS total_salary
  FROM employees
  GROUP BY department
) AS dept_totals
WHERE total_salary > 500000
ORDER BY total_salary DESC;

Subqueries in INSERT

-- Archive high-salary employees
INSERT INTO high_earners (name, department, salary)
SELECT name, department, salary
FROM employees
WHERE salary > (SELECT AVG(salary) * 1.5 FROM employees);

Subquery vs. JOIN

Many subqueries can be rewritten as joins. Choose based on readability:

-- Subquery approach
SELECT name FROM employees
WHERE dept_id IN (SELECT id FROM departments WHERE location = 'Building A');

-- Equivalent JOIN approach
SELECT e.name
FROM employees e
INNER JOIN departments d ON e.dept_id = d.id
WHERE d.location = 'Building A';