#5 Subqueries and CTEs
#5.1 Introduction
subquery์ CTE๋ฅผ ํ์ฉํด ๋ ๋์ ํ์ฉ์ด ๊ฐ๋ฅ
-- List movies with a (rating|revenue) higher than the average (rating|revenue) of all movies.
-- List movies with a rating higher than the average rating of movies in their genre.
-- Find the directors with a career revenue higher than the average revenue of all directors.
-- Find the movies with a rating higher than the average rating of movies released in the same year.
-- Make a super slow query and optimize it as much as we can with what we know so far.
#5.1 Independent Subqueries
Subquery - ๋ค๋ฅธ ์ฟผ๋ฆฌ ๋ด๋ถ์ ์๋ ์ฟผ๋ฆฌ๋ก ๊ดํธ๋ก ๊ฐ์ธ ์ฌ์ฉ
Independence Subquery - ์๋ธ ์ฟผ๋ฆฌ์ ์ผ์ข ์ผ๋ก ์ฟผ๋ฆฌ์ ๊ฒฐ๊ณผ๊ฐ ๋ฐ๋์ง ์๊ธฐ์ ํ๋ฒ๋ง ์คํ์ํจ ํ ๊ฐ์ ๊ธฐ์ตํด ๋๋จธ์ง์ ํ์ฉ
-- List movies with a (rating|revenue) higher
-- than the average (rating|revenue) of all movies.
SELECT title, rating, revenue
FROM movies
WHERE
rating > (SELECT avg(rating) FROM movies) AND
revenue > (SELECT avg(revenue) FROM movies)
#5.2 CTEs
Common Table Expressions - Independent Subqueries๋ฅผ ์ฌ์ฌ์ฉํ ์ ์๊ฒ ํ๋ค.
SQL ์ฟผ๋ฆฌ๋ฅผ ๋ ์์ ๋ ผ๋ฆฌ์ ๋จ์๋ก ๋๋ ๋ชจ๋์ SQL ์ฟผ๋ฆฌ๋ฅผ ๊ตฌํํ๋ค.
View์ ๋น์ทํ์ง๋ง ์๊ตฌ์ ์ด์ง ์์ผ๋ฉฐ ํด๋น ์ฟผ๋ฆฌ์ context์์๋ง ์์ฑ๋๊ณ ๋ฒ๋ ค์ง๋ค.
-- CTE ์์ฑํ๊ธฐ
WITH cte_something AS (SELECT AVG(rating) FROM movies)
-- CTE EX
WITH avg_revenue_cte AS (SELECT AVG(revenue) FROM movies),
avg_rating_cte AS (SELECT AVG(rating) FROM movies)
SELECT
title, director, revenue, rating,
round((SELECT * FROM avg_revenue_cte), 0) AS avg_revenue,
round((SELECT * FROM avg_rating_cte), 0) AS avg_rating
FROM movies
WHERE
revenue > (SELECT avg_revenue FROM avg_revenue_cte) AND
rating > (SELECT avg_rating FROM avg_rating_cte);
#5.3 Correlated Subqueries
์๊ด๊ด๊ณ ์๋ธ์ฟผ๋ฆฌ - ์ธ๋ถ ์ฟผ๋ฆฌ์ ๊ฐ์ ์ฌ์ฉํ๋ ์๋ธ์ฟผ๋ฆฌ ์ธ๋ถ ์ฟผ๋ฆฌ์์ ์ฐธ์กฐ๋๋ ํ ์ด๋ธ์์ ๋ฐ์ดํฐ๋ฅผ ๊ฒ์ํจ ์๋ธ์ฟผ๋ฆฌ์ ์คํ์ด ์ธ๋ถ ์ฟผ๋ฆฌ์ ํ์ ์ํฅ์ ๋ฐ๊ธฐ ๋๋ฌธ์ "์๊ด๋จ"์ด๋ผ๊ณ ํจ ์๊ด๊ด๊ณ๊ฐ ์๋ ์๋ธ์ฟผ๋ฆฌ์์๋ ์๋ธ์ฟผ๋ฆฌ๊ฐ ๋จผ์ ๋ ๋ฆฝ์ ์ผ๋ก ์คํ๋ ๋ค์ ํด๋น ๊ฒฐ๊ณผ๊ฐ ์ธ๋ถ ์ฟผ๋ฆฌ์ ๊ฐ์ผ๋ก ์ฌ์ฉ๋์ง๋ง, ์๊ด ๊ด๊ณ๊ฐ ์๋ ์๋ธ์ฟผ๋ฆฌ๋ ์ด์ ์ฐธ์กฐํ์ฌ ์ธ๋ถ ์ฟผ๋ฆฌ์ ์ฐ๊ฒฐ๋๊ณ ์ธ๋ถ ์ฟผ๋ฆฌ์์ ์ฒ๋ฆฌํ๋ ๊ฐ ํ์ ๋ํด ํ ๋ฒ์ฉ ๋ฐ๋ณต์ ์ผ๋ก ์คํ๋๋ค.
-- Find the movies with a rating higher than
-- the average rating of movies released in the same year.
-- Correlated Subquery
SELECT
main_movies.title,
main_movies.director,
main_movies.rating,
main_movies.release_date,
(
SELECT avg(inner_movies.rating)
FROM movies AS inner_movies
WHERE inner_movies.release_date = main_movies.release_date) AS year_average
)
FROM movies AS main_movies
WHERE main_movies.rating > (
SELECT avg(inner_movies.rating)
FROM movies AS inner_movies
WHERE innre_movies.release_date = main_movies.release_date
);
#5.4 Correlated CTEs
WITH movie_avg_per_year AS (
SELECT avg(inner_movies.rating)
FROM movies AS inner_movies
WHERE inner_movies.release_date = main_movies.release_date
)
SELECT
main_movies.title,
main_movies.director,
main_movies.rating,
main_movies.release_date,
(
SELECT *
FROM movie_avg_per_year) AS year_average
)
FROM movies AS main_movies
WHERE main_movies.rating > (
SELECT *
FROM movie_avg_per_year
)
#5.5 Subquery Practice
-- List movies with a rating higher
-- than the average rating of movies in their genre.
WITH avg_rating_genre AS (
SELECT avg(inner_movies.rating)
FROM movies AS inner_movies
WHERE inner_movies.genre = main_movies.genre
)
SELECT title
FROM movies AS main_movies
WHERE main_moveis.rating > (
SELECT *
FROM avg_rating_genre
)
-- Find the directors with a career revenue higher
-- than the average revenue of all directors.
WITH directors_revenues AS (
SELECT sum(revenue) AS career_revenue
FROM movies
WHERE director IS NOT NULL AND revenue IS NOTT NULL
GROUP BY director
), avg_director_carrer_revenue AS (
SELECT avg(career_revenue)
FROM directors_revenues
)
SELECT
director,
sum(revenue) AS total_revenue,
avg_director_carrer_revenue AS peersAVG
FROM movies
WHERE
director IS NOT NULL
AND revenue IS NOT NULL
GROUP BY director
HAVING total_revenue > avg_director_carrer_revenue;
#5.6 Final Practice
WITH director_stats AS (
SELECT
director,
COUNT(*) AS total_movies,
AVG(rating) AS avg_rating,
MAX(rating) AS best_rating,
MIN(rating) AS worst_rating,
MAX(budget) AS highest_budget,
MIN(budget) AS lowest_budget
FROM movies
WHERE director IS NOT NULL AND budget IS NOT NULL AND rating IS NOT NULL
GROUP BY director
)
SELECT
director,
total_movies,
avg_rating,
best_rating,
worst_rating,
highest_budget,
lowest_budget,
(
SELECT title
FROM movies
WHERE rating IS NOT NULL AND budget IS NOT NULL AND director = ds.director
ORDER BY rating DESC
LIMIT 1
) AS best_rated_movie,
(
SELECT title
FROM movies
WHERE rating IS NOT NULL AND budget IS NOT NULL AND director = ds.director
ORDER BY rating ASC
LIMIT 1
) AS worst_rated_movie,
(
SELECT title
FROM movies
WHERE rating IS NOT NULL AND budget IS NOT NULL AND director = ds.director
ORDER BY budget DESC
LIMIT 1
) AS best_expensive_movie,
(
SELECT title
FROM movies
WHERE rating IS NOT NULL AND budget IS NOT NULL AND director = ds.director
ORDER BY rating ASC
LIMIT 1
) AS least_expensive_movie
FROM director_stats AS ds;
-- CREATE INDEX idx_director ON movies (director);
-- DROP INDEX idx_director;
Last updated
Was this helpful?