#5 Subqueries and CTEs

#5.1 Introduction

subquery์™€ CTE๋ฅผ ํ™œ์šฉํ•ด ๋” ๋‚˜์€ ํ™œ์šฉ์ด ๊ฐ€๋Šฅ

-- List movies with a (rating|revenue) higher than the average (rating|revenue) of all movies.
-- List movies with a rating higher than the average rating of movies in their genre.
-- Find the directors with a career revenue higher than the average revenue of all directors.
-- Find the movies with a rating higher than the average rating of movies released in the same year.
-- Make a super slow query and optimize it as much as we can with what we know so far.

#5.1 Independent Subqueries

Subquery - ๋‹ค๋ฅธ ์ฟผ๋ฆฌ ๋‚ด๋ถ€์— ์žˆ๋Š” ์ฟผ๋ฆฌ๋กœ ๊ด„ํ˜ธ๋กœ ๊ฐ์‹ธ ์‚ฌ์šฉ

Independence Subquery - ์„œ๋ธŒ ์ฟผ๋ฆฌ์˜ ์ผ์ข…์œผ๋กœ ์ฟผ๋ฆฌ์˜ ๊ฒฐ๊ณผ๊ฐ€ ๋ฐ”๋€Œ์ง€ ์•Š๊ธฐ์— ํ•œ๋ฒˆ๋งŒ ์‹คํ–‰์‹œํ‚จ ํ›„ ๊ฐ’์„ ๊ธฐ์–ตํ•ด ๋‚˜๋จธ์ง€์— ํ™œ์šฉ

-- List movies with a (rating|revenue) higher
-- than the average (rating|revenue) of all movies.

SELECT title, rating, revenue
FROM movies
WHERE 
  rating > (SELECT avg(rating) FROM movies) AND
  revenue > (SELECT avg(revenue) FROM movies)

#5.2 CTEs

Common Table Expressions - Independent Subqueries๋ฅผ ์žฌ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ๊ฒŒ ํ•œ๋‹ค.

SQL ์ฟผ๋ฆฌ๋ฅผ ๋” ์ž‘์€ ๋…ผ๋ฆฌ์  ๋‹จ์œ„๋กœ ๋‚˜๋ˆ  ๋ชจ๋“ˆ์‹ SQL ์ฟผ๋ฆฌ๋ฅผ ๊ตฌํ˜„ํ•œ๋‹ค.

View์™€ ๋น„์Šทํ•˜์ง€๋งŒ ์˜๊ตฌ์ ์ด์ง€ ์•Š์œผ๋ฉฐ ํ•ด๋‹น ์ฟผ๋ฆฌ์˜ context์—์„œ๋งŒ ์ƒ์„ฑ๋˜๊ณ  ๋ฒ„๋ ค์ง„๋‹ค.

-- CTE ์ƒ์„ฑํ•˜๊ธฐ
WITH cte_something AS (SELECT AVG(rating) FROM movies)


-- CTE EX
WITH avg_revenue_cte AS (SELECT AVG(revenue) FROM movies),
    avg_rating_cte AS (SELECT AVG(rating) FROM movies)

SELECT
    title, director, revenue, rating,
    round((SELECT * FROM avg_revenue_cte), 0) AS avg_revenue,
    round((SELECT * FROM avg_rating_cte), 0) AS avg_rating
FROM movies
WHERE
    revenue > (SELECT avg_revenue FROM avg_revenue_cte) AND
    rating > (SELECT avg_rating FROM avg_rating_cte);

#5.3 Correlated Subqueries

์ƒ๊ด€๊ด€๊ณ„ ์„œ๋ธŒ์ฟผ๋ฆฌ - ์™ธ๋ถ€ ์ฟผ๋ฆฌ์˜ ๊ฐ’์„ ์‚ฌ์šฉํ•˜๋Š” ์„œ๋ธŒ์ฟผ๋ฆฌ ์™ธ๋ถ€ ์ฟผ๋ฆฌ์—์„œ ์ฐธ์กฐ๋˜๋Š” ํ…Œ์ด๋ธ”์—์„œ ๋ฐ์ดํ„ฐ๋ฅผ ๊ฒ€์ƒ‰ํ•จ ์„œ๋ธŒ์ฟผ๋ฆฌ์˜ ์‹คํ–‰์ด ์™ธ๋ถ€ ์ฟผ๋ฆฌ์˜ ํ–‰์— ์˜ํ–ฅ์„ ๋ฐ›๊ธฐ ๋•Œ๋ฌธ์— "์ƒ๊ด€๋จ"์ด๋ผ๊ณ  ํ•จ ์ƒ๊ด€๊ด€๊ณ„๊ฐ€ ์—†๋Š” ์„œ๋ธŒ์ฟผ๋ฆฌ์—์„œ๋Š” ์„œ๋ธŒ์ฟผ๋ฆฌ๊ฐ€ ๋จผ์ € ๋…๋ฆฝ์ ์œผ๋กœ ์‹คํ–‰๋œ ๋‹ค์Œ ํ•ด๋‹น ๊ฒฐ๊ณผ๊ฐ€ ์™ธ๋ถ€ ์ฟผ๋ฆฌ์˜ ๊ฐ’์œผ๋กœ ์‚ฌ์šฉ๋˜์ง€๋งŒ, ์ƒ๊ด€ ๊ด€๊ณ„๊ฐ€ ์žˆ๋Š” ์„œ๋ธŒ์ฟผ๋ฆฌ๋Š” ์—ด์„ ์ฐธ์กฐํ•˜์—ฌ ์™ธ๋ถ€ ์ฟผ๋ฆฌ์— ์—ฐ๊ฒฐ๋˜๊ณ  ์™ธ๋ถ€ ์ฟผ๋ฆฌ์—์„œ ์ฒ˜๋ฆฌํ•˜๋Š” ๊ฐ ํ–‰์— ๋Œ€ํ•ด ํ•œ ๋ฒˆ์”ฉ ๋ฐ˜๋ณต์ ์œผ๋กœ ์‹คํ–‰๋œ๋‹ค.

-- Find the movies with a rating higher than
-- the average rating of movies released in the same year.

-- Correlated Subquery
SELECT 
    main_movies.title,
    main_movies.director,
    main_movies.rating,
    main_movies.release_date,
    (
        SELECT avg(inner_movies.rating)
        FROM movies AS inner_movies
        WHERE inner_movies.release_date = main_movies.release_date) AS year_average
    )
FROM movies AS main_movies
WHERE main_movies.rating > (
    SELECT avg(inner_movies.rating)
    FROM movies AS inner_movies
    WHERE innre_movies.release_date = main_movies.release_date
    );

#5.4 Correlated CTEs

WITH movie_avg_per_year AS (
    SELECT avg(inner_movies.rating)
    FROM movies AS inner_movies
    WHERE inner_movies.release_date = main_movies.release_date
)

SELECT 
    main_movies.title,
    main_movies.director,
    main_movies.rating,
    main_movies.release_date,
    (
        SELECT *
        FROM movie_avg_per_year) AS year_average
    )
FROM movies AS main_movies
WHERE main_movies.rating > (
    SELECT *
    FROM movie_avg_per_year
)

#5.5 Subquery Practice

-- List movies with a rating higher
-- than the average rating of movies in their genre.

WITH avg_rating_genre AS (
    SELECT avg(inner_movies.rating)
    FROM movies AS inner_movies
    WHERE inner_movies.genre = main_movies.genre
)

SELECT title
FROM movies AS main_movies
WHERE main_moveis.rating > (
    SELECT *
    FROM avg_rating_genre
)


-- Find the directors with a career revenue higher
-- than the average revenue of all directors.

WITH directors_revenues AS (
    SELECT sum(revenue) AS career_revenue
    FROM movies
    WHERE director IS NOT NULL AND revenue IS NOTT NULL
    GROUP BY director
), avg_director_carrer_revenue AS (
    SELECT avg(career_revenue)
    FROM directors_revenues
)

SELECT
    director,
    sum(revenue) AS total_revenue,
    avg_director_carrer_revenue AS peersAVG
FROM movies
WHERE
    director IS NOT NULL
    AND revenue IS NOT NULL
GROUP BY director
HAVING total_revenue > avg_director_carrer_revenue;

#5.6 Final Practice

WITH director_stats AS (
    SELECT
        director,
        COUNT(*) AS total_movies,
        AVG(rating) AS avg_rating,
        MAX(rating) AS best_rating,
        MIN(rating) AS worst_rating,
        MAX(budget) AS highest_budget,
        MIN(budget) AS lowest_budget
    FROM movies
    WHERE director IS NOT NULL AND budget IS NOT NULL AND rating IS NOT NULL
    GROUP BY director
)

SELECT 
        director,
        total_movies,
        avg_rating,
        best_rating,
        worst_rating,
        highest_budget,
        lowest_budget,
    (
        SELECT title
        FROM movies
        WHERE rating IS NOT NULL AND budget IS NOT NULL AND director = ds.director
        ORDER BY rating DESC
        LIMIT 1
    ) AS best_rated_movie,
    (
        SELECT title
        FROM movies
        WHERE rating IS NOT NULL AND budget IS NOT NULL AND director = ds.director
        ORDER BY rating ASC
        LIMIT 1
    ) AS worst_rated_movie,
    (
        SELECT title
        FROM movies
        WHERE rating IS NOT NULL AND budget IS NOT NULL AND director = ds.director
        ORDER BY budget DESC
        LIMIT 1
    ) AS best_expensive_movie,
    (
        SELECT title
        FROM movies
        WHERE rating IS NOT NULL AND budget IS NOT NULL AND director = ds.director
        ORDER BY rating ASC
        LIMIT 1
    ) AS least_expensive_movie
FROM director_stats AS ds;


-- CREATE INDEX idx_director ON movies (director);
-- DROP INDEX idx_director;

Last updated

Was this helpful?