Post

SQL

Update

Swap Salary

1
2
3
4
5
6
UPDATE salary
SET
    sex = CASE sex
        WHEN 'm' THEN 'f'
        ELSE 'm'
    END;
1
2
UPDATE salary
SET sex = IF(sex='m', 'f', 'm');
1
2
UPDATE salary
SET sex = CHAR(ASCII('f') ^ ASCII('m') ^ ASCII(sex));

Percentage of Users Attended a Contest

1
2
3
4
5
6
7
8
9
10
11
SELECT
    contest_id,
    ROUND(COUNT(DISTINCT user_id) / (SELECT COUNT(*) FROM Users) * 100, 2) AS percentage
FROM
    Register
GROUP BY
    contest_id
ORDER BY
    percentage DESC,
    contest_id
;

Classes More Than 5 Students

1
2
3
4
5
6
7
8
9
10
11
SELECT
    class
FROM
    (SELECT
        class, COUNT(DISTINCT student) AS num
    FROM
        courses
    GROUP BY class) AS temp_table
WHERE
    num >= 5
;

Rising Temprature

1
2
3
4
5
6
7
8
9
SELECT
    weather.id AS 'Id'
FROM
    weather
JOIN
    weather w
    ON DATEDIFF(weather.recordDate, w.recordDate) = 1
        AND weather.Temperature > w.Temperature
;

Second Highest Salary

1
2
3
4
5
6
7
8
9
SELECT
    (SELECT DISTINCT
            Salary
        FROM
            Employee
        ORDER BY
            Salary DESC
        LIMIT 1 OFFSET 1) AS SecondHighestSalary
;
1
2
3
4
5
6
7
8
9
10
11
SELECT
    IFNULL(
      (SELECT DISTINCT
              Salary
          FROM
              Employee
          ORDER BY
              Salary DESC
          LIMIT 1 OFFSET 1),
    NULL) AS SecondHighestSalary
;
1
2
3
4
5
6
7
SELECT
    MAX(Salary) AS SecondHighestSalary
FROM
    Employee
WHERE
    Salary < (SELECT MAX(Salary) FROM Employee)
;

Employees Earning More Than Their Managers

1
2
3
4
5
6
SELECT
     a.NAME AS Employee
FROM Employee AS a JOIN Employee AS b
     ON a.ManagerId = b.Id
     AND a.Salary > b.Salary
;

IN

Highest Grade For Each Student

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
SELECT
    student_id,
    MIN(course_id) AS course_id,
    grade
FROM
    Enrollments
WHERE
    (student_id, grade) IN
    (SELECT
        student_id,
        MAX(grade)
    FROM
        Enrollments
    GROUP BY
        student_id)
GROUP BY
    student_id,
    grade
ORDER BY
    student_id
;

Customers Who Bought Products A and B but Not C

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
SELECT
    customer_id,
    customer_name
FROM
    Customers
WHERE
    customer_id IN (
        SELECT
            customer_id
        FROM
            Orders
        WHERE
            product_name = 'A'
        )
    AND customer_id IN (
        SELECT
            customer_id
        FROM
            Orders
        WHERE
            product_name = 'B'
        )
    AND customer_id NOT IN (
        SELECT
            customer_id
        FROM
            Orders
        WHERE
            product_name = 'C'
        )
;

GROUP BY

Multiple levels:

Active Businesses

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
SELECT
    business_id
FROM
    (SELECT
        event_type,
        AVG(occurences) AS ave_occurences
    FROM
        Events
    GROUP BY
        event_type) t
JOIN
    events e
    ON t.event_type = e.event_type
WHERE
    e.occurences > t.ave_occurences
GROUP BY
    business_id
HAVING
    COUNT(DISTINCT t.event_type) > 1
;

CASE Statement

Reformat Department Table

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
SELECT id, 
	SUM(CASE WHEN month = 'Jan' THEN revenue ELSE NULL END) AS Jan_Revenue,
	SUM(CASE WHEN month = 'Feb' THEN revenue ELSE NULL END) AS Feb_Revenue,
	SUM(CASE WHEN month = 'Mar' THEN revenue ELSE NULL END) AS Mar_Revenue,
	SUM(CASE WHEN month = 'Apr' THEN revenue ELSE NULL END) AS Apr_Revenue,
	SUM(CASE WHEN month = 'May' THEN revenue ELSE NULL END) AS May_Revenue,
	SUM(CASE WHEN month = 'Jun' THEN revenue ELSE NULL END) AS Jun_Revenue,
	SUM(CASE WHEN month = 'Jul' THEN revenue ELSE NULL END) AS Jul_Revenue,
	SUM(CASE WHEN month = 'Aug' THEN revenue ELSE NULL END) AS Aug_Revenue,
	SUM(CASE WHEN month = 'Sep' THEN revenue ELSE NULL END) AS Sep_Revenue,
	SUM(CASE WHEN month = 'Oct' THEN revenue ELSE NULL END) AS Oct_Revenue,
	SUM(CASE WHEN month = 'Nov' THEN revenue ELSE NULL END) AS Nov_Revenue,
	SUM(CASE WHEN month = 'Dec' THEN revenue ELSE NULL END) AS Dec_Revenue
FROM department
GROUP BY id
ORDER BY id
;

Evaluate Boolean Expression

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
SELECT
    e.left_operand,
    e.operator,
    e.right_operand,
    (
        CASE
            WHEN e.operator = '<' AND v1.value < v2.value THEN 'true'
            WHEN e.operator = '=' AND v1.value = v2.value THEN 'true'
            WHEN e.operator = '>' AND v1.value > v2.value THEN 'true'
            ELSE 'false'
        END
    ) AS value
FROM
    Expressions e
JOIN
    Variables v1
ON
    e.left_operand = v1.name
JOIN
    Variables v2
ON
    e.right_operand = v2.name
;

Team Scores in Football Tournament

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
SELECT
    team_id,
    team_name,
    IFNULL(
        SUM(CASE WHEN team_id = host_team AND host_goals > guest_goals THEN 3
            WHEN team_id = guest_team AND guest_goals > host_goals THEN 3
            WHEN team_id = host_team AND host_goals < guest_goals THEN 0
            WHEN team_id = guest_team AND guest_goals < host_goals THEN 0
            WHEN guest_goals = host_goals THEN 1
            END), 0) AS num_points
FROM
    Teams
LEFT JOIN
    Matches
    ON team_id = host_team OR team_id = guest_team
GROUP BY
    team_id
ORDER BY
    num_points DESC,
    team_id
;

League Statistics

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
SELECT
    team_name,
    COUNT(*) AS matches_played,
    SUM(CASE WHEN home_goals > away_goals THEN 3
        WHEN home_goals = away_goals THEN 1
        ELSE 0 END) AS points,
    SUM(home_goals) AS goal_for,
    SUM(away_goals) AS goal_against,
    SUM(home_goals) - SUM(away_goals) AS goal_diff
FROM
    -- views away team as home team
    (SELECT
        home_team_id,
        home_team_goals AS home_goals,
        away_team_goals AS away_goals
     FROM
        matches
     UNION ALL
     SELECT
        away_team_id AS home_team_id,
        away_team_goals AS home_goals,
        home_team_goals AS away_goals
     FROM
        matches
	 ) m
JOIN
    teams
    ON home_team_id = team_id
GROUP BY
    team_name
ORDER BY
    points DESC,
    goal_diff DESC,
    team_name
;

Exchange Seats

1
2
3
4
5
6
7
8
9
10
11
12
13
SELECT
    CASE
        WHEN seat.id % 2 <> 0 AND seat.id = (SELECT COUNT(*) FROM seat) THEN seat.id
        WHEN seat.id % 2 = 0 THEN seat.id - 1
    ELSE
        seat.id + 1
    END AS id,
    student 
FROM
    seat
ORDER BY
    id
;

Game Play Analysis I

1
2
3
4
5
6
7
8
SELECT
    player_id,
    MIN(event_date) AS first_login
FROM
    Activity
GROUP BY
    player_id
;

Game Play Analysis II

1
2
3
4
5
6
7
8
9
10
11
12
13
14
SELECT
    player_id,
    device_id
FROM Activity
WHERE
    (player_id, event_date) IN
        (SELECT
             player_id,
             MIN(event_date)
         FROM
             Activity
         GROUP BY
             player_id)
;

Game Play Analysis III

1
2
3
4
5
6
7
8
9
10
SELECT a1.player_id,
    a1.event_date,
    SUM(a2.games_played) AS games_played_so_far
FROM Activity a1
JOIN Activity a2
    ON a1.event_date >= a2.event_date
    AND a1.player_id = a2.player_id
GROUP BY
    a1.player_id, a1.event_date
;

MySQL uses three-valued logic – TRUE, FALSE and UNKNOWN. Anything compared to NULL evaluates to the third value: UNKNOWN. That “anything” includes NULL itself! That’s why MySQL provides the IS NULL and IS NOT NULL operators to specifically check for NULL.

WHERE

The Most Recent Orders for Each Product

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
SELECT
    product_name,
    p.product_id,
    order_id,
    order_date
FROM
    Products p
JOIN
    Orders o
    ON p.product_id = o.product_id
WHERE
    (o.product_id, o.order_date) IN (
        SELECT
            product_id,
            MAX(order_date) AS order_date
        FROM
            Orders
        GROUP BY
            product_id)
ORDER BY
    product_name,
    product_id,
    order_id
;

HAVING

value = max

Customer Placing the Largest Number of Orders

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
SELECT
    customer_number
FROM
    orders
GROUP BY
    customer_number
HAVING
    COUNT(order_number) = (
        SELECT
            COUNT(order_number) AS order_count
        FROM
            orders
        GROUP BY
            customer_number
        ORDER BY
            order_count DESC
        LIMIT 1
)
;

Sales Analysis I

1
2
3
4
5
6
7
8
9
10
11
12
13
14
SELECT
    seller_id
FROM
    Sales
GROUP BY
    seller_id
    HAVING SUM(price) >= ALL(
        SELECT
            SUM(price)
        FROM
            Sales
        GROUP BY
            seller_id)
;

Sales Analysis II

1
2
3
4
5
6
7
8
9
10
11
12
13
SELECT
    s.buyer_id
FROM
    Product p
JOIN
    Sales s
ON
    p.product_id = s.product_id 
GROUP BY
    s.buyer_id 
    HAVING
        SUM(p.product_name = 'S8') > 0 AND SUM(p.product_name = 'iPhone') = 0;
;

Customer Order Frequency

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
SELECT
    customer_id,
    name
FROM
    Customers
JOIN
    Orders
    USING(customer_id)
JOIN
    Product
    USING(product_id)
GROUP BY
    customer_id
    HAVING
        SUM(IF(LEFT(order_date, 7) = '2020-06', quantity, 0) * price) >= 100
        AND SUM(IF(LEFT(order_date, 7) = '2020-07', quantity, 0) * price) >= 100
;

Investments in 2016

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
SELECT
    SUM(insurance.TIV_2016) AS TIV_2016
FROM
    insurance
WHERE
    TIV_2015 IN
    (
        SELECT
            TIV_2015
        FROM
            insurance
        GROUP BY
            TIV_2015
            HAVING COUNT(*) > 1
    )
    AND CONCAT(LAT, LON) IN
    (
        SELECT
            CONCAT(LAT, LON)
        FROM
            insurance
        GROUP BY
            LAT, LON
            HAVING COUNT(*) = 1
    )
;

Consecutive Rows model

Customers With Strictly Increasing Purchases

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
WITH cte AS (
    SELECT
        customer_id,
        YEAR(MAX(order_date)) year,
        SUM(price) price
    FROM
        orders
    GROUP BY
        YEAR(order_date),
        customer_id)

SELECT
    t1.customer_id
FROM
    cte t1
LEFT JOIN
    cte t2
    ON t1.customer_id = t2.customer_id AND t1.year + 1 = t2.year AND t1.price < t2.price
GROUP BY
    t1.customer_id
    -- the last year contains NULL next year
    HAVING COUNT(*) - 1 = COUNT(t2.customer_id)
;

Consecutive Available Seats

1
2
3
4
5
6
7
8
9
10
11
SELECT
    DISTINCT c1.seat_id
FROM
    cinema c1
JOIN
    cinema c2
    ON ABS(c1.seat_id - c2.seat_id) = 1
    AND c1.free = 1 AND c2.free = 1
ORDER BY
    c1.seat_id;
;

EXISTS

Leetcodify Friends Recommendations

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
WITH allRecs AS (
    SELECT DISTINCT
        l1.user_id AS user1_id,
        l2.user_id AS user2_id
    FROM
        Listens l1
    INNER JOIN
        Listens l2
        ON l1.song_id = l2.song_id
        AND l1.day = l2.day
        AND l1.user_id < l2.user_id
    WHERE
        -- excludes existing friends
        NOT EXISTS(
            SELECT
                *
            FROM
                Friendship f
            WHERE
                l1.user_id = f.user1_id
                AND l2.user_id = f.user2_id)
    GROUP BY
        l1.user_id,
        l2.user_id,
        l1.day
        HAVING COUNT(DISTINCT l1.song_id) >= 3)

SELECT
    user1_id AS user_id,
    user2_id AS recommended_id
FROM
    allRecs
UNION
SELECT
    user2_id AS user_id,
    user1_id AS recommended_id
FROM
    allRecs
;

Friend Requests I: Overall Acceptance Rate

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
SELECT
ROUND(
    IFNULL(
        (SELECT COUNT(*)
        FROM
            (SELECT DISTINCT requester_id,
                accepter_id
            FROM RequestAccepted) n)
        /
        (SELECT COUNT(*)
        FROM
            (SELECT DISTINCT sender_id,
                send_to_id
            FROM FriendRequest) d),
        0),
    2) AS accept_rate
;

Sales Person

1
2
3
4
5
6
7
8
9
SELECT s.name
FROM orders o
JOIN company c
    ON o.com_id = c.com_id
    AND c.name = 'RED'
RIGHT JOIN salesperson s
    ON s.sales_id = o.sales_id
WHERE o.sales_id IS NULL
;

Actors and Directors Who Cooperated At Least Three Times

1
2
3
4
5
6
7
8
9
SELECT
    actor_id,
    director_id
FROM
    ActorDirector
GROUP BY
    actor_id, director_id
    HAVING COUNT(actor_id) >= 3
;

Game Play Analysis IV

1
2
3
4
5
6
7
8
9
10
11
12
13
SELECT
    ROUND(COUNT(t2.player_id) / COUNT(t1.player_id), 2) AS fraction
FROM
    (SELECT
         player_id, MIN(event_date) AS first_login
     FROM
         Activity
     GROUP BY
         player_id) t1
LEFT JOIN
    Activity t2
    ON t1.player_id = t2.player_id AND t1.first_login = t2.event_date - 1
;

Get Highest Answer Rate Question

1
2
3
4
5
6
7
8
9
10
11
12
SELECT question_id as survey_log
FROM
    (SELECT
        question_id,
        SUM(CASE WHEN action="show" THEN 1 ELSE 0 END) as num_show,
        SUM(CASE WHEN action="answer" THEN 1 ELSE 0 END) as num_answer
     FROM survey_log
     GROUP BY question_id
) as q
ORDER BY (num_answer / num_show) DESC
LIMIT 1
;

Customers Who Bought All Products

1
2
3
4
5
6
7
SELECT customer_id
FROM customer c
GROUP BY customer_id
HAVING COUNT(DISTINCT product_key) =
    (SELECT COUNT(DISTINCT product_key)
     FROM product)
;

Tree Node

1
2
3
4
5
6
7
8
9
10
11
SELECT
    id,
    CASE WHEN p_id IS NULL THEN 'Root'
         WHEN id IN (SELECT p_id FROM tree) THEN 'Inner'
         ELSE 'Leaf'
    END AS Type
FROM
    tree
ORDER BY
    id
;

Aggregate Functions

AVG

Ads Performance

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
SELECT
    ad_id,
    IFNULL(
        ROUND(
-- AVG() does not consider NULL values
            AVG(
                CASE WHEN action = 'Clicked' THEN 1
                WHEN action = 'Viewed' THEN 0
                ELSE NULL END) * 100,
            2),
        0) AS ctr
FROM
    Ads
GROUP BY
    ad_id
ORDER BY
    ctr DESC, ad_id
;

OVER Clause

SELECT - OVER Clause (Transact-SQL)

Hopper Company Queries III

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
WITH RECURSIVE cte AS (
    SELECT
        1 AS month
    UNION ALL
    SELECT
        month + 1 AS month
    FROM
        cte
    WHERE
        month < 12
)

SELECT
    t1.month,
    ROUND(AVG(IFNULL(t2.monthly_distance, 0)) OVER(ORDER BY month ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING), 2) AS average_ride_distance,
    ROUND(AVG(IFNULL(t2.monthly_duration, 0)) OVER(ORDER BY month ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING), 2) AS average_ride_duration
FROM
    cte t1
LEFT JOIN
    (SELECT
        MONTH(r.requested_at) AS month,
        SUM(a.ride_distance) AS monthly_distance,
        SUM(ride_duration) AS monthly_duration
     FROM
        rides r
     JOIN
        acceptedrides a
        ON r.ride_id = a.ride_id
     WHERE
        YEAR(r.requested_at) = 2020
     GROUP BY
        month
    ) t2
    ON t1.month = t2.month
ORDER BY
    t1.month
LIMIT
    10
;

GROUP_CONCAT

1
2
3
4
GROUP_CONCAT([DISTINCT] expr [,expr ...]
             [ORDER BY {unsigned_integer | col_name | expr}
                 [ASC | DESC] [,col_name ...]]
             [SEPARATOR str_val])

The default separator between values in a group is comma (,)

Group Sold Products By The Date

1
2
3
4
5
6
7
8
9
10
11
SELECT
    sell_date,
    COUNT(DISTINCT product) AS num_sold,
    GROUP_CONCAT(DISTINCT product ORDER BY product) AS products
FROM
    Activities
GROUP BY
    sell_date
ORDER BY
    sell_date
;

Build the Equation

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
SELECT
    CONCAT(GROUP_CONCAT(term ORDER BY power DESC SEPARATOR ''), '=0') AS equation
FROM (
    SELECT
        CONCAT(CASE WHEN factor > 0 THEN '+' ELSE '' END,
               factor,
               CASE WHEN power = 0 THEN '' ELSE 'X' END,
               CASE WHEN power = 0 OR power = 1 THEN '' ELSE '^' END,
               CASE WHEN power = 0 OR power = 1 THEN '' ELSE power END
        ) term,
        power
    FROM
        Terms
    ORDER BY
        power DESC) t
;

OVER

Calculate Salaries

1
2
3
4
5
6
7
8
9
10
11
12
13
14
SELECT
    company_id,
    employee_id,
    employee_name,
    CASE WHEN max_salary < 1000 THEN salary
         WHEN max_salary > 10000 THEN ROUND(salary * (1 - 0.49))
         ELSE ROUND(salary * (1 - 0.24)) END AS salary
FROM
    (SELECT
        s.*,
        MAX(salary) OVER(PARTITION BY company_id) AS max_salary
     FROM
        Salaries s) m
;

If PARTITION BY is not specified, the function treats all rows of the query result set as a single partition.

Nth Highest Salary

1
2
3
4
5
6
7
8
9
10
11
12
CREATE FUNCTION getNthHighestSalary(N INT) RETURNS INT
BEGIN
  DECLARE M INT;
  SET M = N - 1;
  RETURN (
      # Write your MySQL query statement below.
      SELECT DISTINCT Salary
      FROM Employee
      ORDER BY Salary DESC
      LIMIT M, 1
  );
END

Leetflex Banned Accounts

1
2
3
4
5
6
7
8
9
10
SELECT
    DISTINCT a.account_id
FROM
    LogInfo a, LogInfo b
-- BETWEEN is inclusive
WHERE
    a.login BETWEEN b.login AND b.logout
    AND a.account_id = b.account_id
    AND a.ip_address != b.ip_address
;

Immediate Food Delivery II

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
SELECT
    ROUND(100 * SUM(
        CASE order_date
            WHEN customer_pref_delivery_date THEN 1
            ELSE 0
        END) / COUNT(distinct customer_id), 2) AS immediate_percentage
FROM
    Delivery
WHERE
    (customer_id, order_date)
IN
    (SELECT
        customer_id, MIN(order_date) as min_date
    FROM
        Delivery
    GROUP BY
        customer_id
    )
;

Countries You Can Safely Invest In

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
SELECT
    co.name AS country
FROM
    Person p
JOIN
    Calls ca
    ON p.id = ca.caller_id OR p.id = ca.callee_id
JOIN
    Country co
    ON co.country_code = LEFT(p.phone_number, 3)
GROUP BY
    co.name
HAVING
    AVG(duration) > (SELECT AVG(duration) FROM Calls)
;

UNION

Rearrange Products Table

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
SELECT
    product_id,
    'store1' AS store,
    store1 AS price
FROM
    Products
WHERE
    store1 IS NOT NULL

UNION

SELECT
    product_id,
    'store2' AS store,
    store2 AS price
FROM
    Products
WHERE
    store2 IS NOT NULL

UNION

SELECT
    product_id,
    'store3' AS store,
    store3 AS price
FROM
    Products
WHERE
    store3 IS NOT NULL
;

Product Price at a Given Date

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
SELECT
    DISTINCT product_id,
    10 AS price
FROM
    Products
GROUP BY
    product_id
HAVING
    MIN(change_date) > '2019-08-16'

UNION

SELECT
    product_id, new_price
FROM
    Products
WHERE
    (product_id, change_date)
IN
    (SELECT
        product_id, MAX(change_date) AS recent_date
    FROM
        Products
    WHERE
        change_date <= '2019-08-16'
    GROUP BY
        product_id)
;

Get the Second Most Recent Activity

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
SELECT
    *
FROM
    UserActivity
GROUP BY
    username
HAVING
    COUNT(*) = 1

UNION

SELECT
    u1.*
FROM
    UserActivity u1
LEFT JOIN
    UserActivity u2
    ON u1.username = u2.username AND u1.endDate < u2.endDate
GROUP BY
    u1.username, u1.endDate
HAVING
-- second most recent
    COUNT(u2.endDate) = 1
;

UNION ALL

  • UNION: only keeps unique records
  • UNION ALL: keeps all records, including duplicates

Monthly Transactions II

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
SELECT
    month,
    country,
    SUM(CASE WHEN state = 'approved' THEN 1 ELSE 0 END) AS approved_count,
    SUM(CASE WHEN state = 'approved' THEN amount ELSE 0 END) AS approved_amount,
    SUM(CASE WHEN state = 'back' THEN 1 ELSE 0 END) AS chargeback_count,
    SUM(CASE WHEN state = 'back' THEN amount ELSE 0 END) AS chargeback_amount
FROM
    (SELECT
        LEFT(c.trans_date, 7) AS month,
        country,
        'back' AS state,
        amount
    FROM
        Chargebacks c
    JOIN
        Transactions t
        ON c.trans_id = t.id

-- some trans_date exists in Chargebacks only
-- so we need to UNION the two tables
    UNION ALL

    SELECT
        LEFT(trans_date, 7) AS month,
        country,
        state,
        amount
    FROM
        Transactions
    WHERE
        state = 'approved'
    ) s
GROUP BY
    month, country
;

Last Person to Fit in the Elevator

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
SELECT
    q1.person_name
FROM
    Queue q1
JOIN
    Queue q2
ON q1.turn >= q2.turn
GROUP BY
    q1.turn
HAVING
    SUM(q2.weight) <= 1000
ORDER BY
    q1.turn DESC
LIMIT 1
;

Median

Find Median Given Frequency of Numbers

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
SELECT
    AVG(Number) AS median
FROM
    Numbers n
WHERE
    -- let m = freq(x)
    -- (m + l) - (m - r) = l - r
    --   if l == r, x is median
    --   if l != r, x is median as long as m covers the diff
    Frequency >= ABS(
        -- m + l, where l is freq(numbers < x)
        (SELECT
            SUM(Frequency)
         FROM
            Numbers
         WHERE
            Number <= n.Number) -
        -- m + r, where r is freq(numbers > x)
        (SELECT
            SUM(Frequency)
         FROM
            Numbers
         WHERE
            Number >= n.Number))

Common Table Expressions (CTE)

Create a Session Bar Chart

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
WITH cte AS (
    SELECT
        '[0-5>' AS bin,
        0 AS min_duration,
        5 * 60 AS max_duration
    UNION ALL
    SELECT
        '[5-10>' AS bin,
        5 * 60 AS min_duration,
        10 * 60 AS max_duration
    UNION ALL
    SELECT
        '[10-15>' AS bin,
        10 * 60 AS min_duration,
        15 * 60 AS max_duration
    UNION ALL
    SELECT
        '15 or more' AS bin,
        15 * 60 as min_duration,
        ~0 AS max_duration
)

SELECT
    cte.bin,
    COUNT(s.session_id) AS total
FROM
    cte
LEFT JOIN
    Sessions s
    ON s.duration >= min_duration 
        AND s.duration < max_duration				 
GROUP BY
    cte.bin
;

A temporary named result set.

Restaurant Growth

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
WITH Day AS(
    SELECT visited_on, SUM(amount) AS day_sum
    FROM Customer
    GROUP BY visited_on)

SELECT
    a.visited_on AS visited_on,
    SUM(b.day_sum) AS amount,
    ROUND(AVG(b.day_sum), 2) AS average_amount
FROM
    Day a, Day b
WHERE
    DATEDIFF(a.visited_on, b.visited_on) BETWEEN 0 AND 6
GROUP BY
    a.visited_on
HAVING
    COUNT(b.visited_on) = 7
;

Suspicious Bank Accounts

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
WITH income AS(
    SELECT
        a.account_id,
        YEAR(t.day) * 12 + MONTH(t.day) AS month
    FROM
        Accounts a
    LEFT JOIN
        Transactions t
        USING(account_id)
    WHERE
        type = 'Creditor'
    GROUP BY
        a.account_id,
        DATE_FORMAT(t.day, "%Y-%m"),
        max_income
        HAVING SUM(amount) > max_income
)

SELECT
    DISTINCT t1.account_id
FROM
    income t1
JOIN
    income t2
    ON t1.account_id = t2.account_id
    -- can be generalized to n consecutive months
    AND t2.month - t1.month BETWEEN 0 AND 1
GROUP BY
    t1.account_id,
    t1.month
    HAVING COUNT(DISTINCT t2.month) = 2
ORDER BY
    t1.account_id
;

Find the Quiet Students in All Exams

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
WITH cte AS(
    SELECT
        exam_id,
        exam.student_id,
        student_name,
        score,
        RANK() OVER(PARTITION BY exam_id ORDER BY score) rank_asc,
        RANK() OVER(PARTITION BY exam_id ORDER BY score DESC) rank_desc 
    FROM
        exam
    LEFT JOIN
        student
        ON exam.student_id = student.student_id
)

SELECT
    DISTINCT student_id,
    student_name
FROM
    cte
WHERE
    student_id NOT IN (
        SELECT
            student_id
        FROM
            cte
        WHERE
            rank_asc = 1 or rank_desc = 1
    )
ORDER BY
    student_id
;

Number of Transactions per Visit

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
-- transactions per user per visist date
WITH vt AS(
    SELECT
        v.user_id,
        visit_date,
        COUNT(t.transaction_date) AS transaction_count
    FROM
        Visits v
    LEFT JOIN
        Transactions t
        ON v.visit_date = t.transaction_date AND v.user_id = t.user_id
    GROUP BY
        v.user_id,
        visit_date),

-- generates a table with numbers [0, row count of Transactions table]
    row_nums AS(
        SELECT
            ROW_NUMBER() OVER () AS rn
        FROM
            Transactions 
        UNION 
        SELECT 0) 
				 
SELECT
    rn AS transactions_count,
    COUNT(vt.transaction_count) AS visits_count
FROM
    vt
RIGHT JOIN
    row_nums
    ON transaction_count = rn
WHERE
    -- removes excessive row numbers
    rn <= (
        SELECT
            MAX(transaction_count)
        FROM
            vt)
GROUP BY
    rn
ORDER BY
    rn
;

Recursive CTE

1
2
3
4
5
6
7
8
9
10
11
12
WITH RECURSIVE expression_name (column_list)
AS
(
    -- Anchor member
    initial_query  
    UNION ALL
    -- Recursive member that references expression_name.
    recursive_query  
)
-- references expression name
SELECT *
FROM   expression_name

Flow chart

Ref: https://www.sqlservertutorial.net/sql-server-basics/sql-server-recursive-cte/

Find the Missing IDs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
WITH RECURSIVE Seq AS (
    SELECT
        1 AS ids
    UNION
    SELECT
        ids + 1
    FROM
        Seq
    WHERE
        ids < (
            SELECT
                MAX(customer_id)
            FROM
                Customers)
    )

SELECT
    ids
FROM
    Seq
WHERE
    ids NOT IN (
        SELECT
            customer_id
        FROM
            Customers)
;

Total Sales Amount by Year

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
WITH RECURSIVE dates as (
    SELECT
        MIN(period_start) AS start_date,
        MAX(period_end) AS end_date
    FROM
        Sales
	UNION ALL
    SELECT
        DATE_ADD(start_date, INTERVAL 1 DAY),
        end_date 
    FROM
        dates
    WHERE
        start_date < end_date
) 

SELECT
    CAST(p.product_id AS char) AS product_id,
    p.product_name AS product_name,
    CAST(YEAR(start_date) AS char) AS report_year,
    SUM(average_daily_sales) AS total_amount
FROM
    Product p 
LEFT JOIN
    Sales s
    ON p.product_id = s.product_id
LEFT JOIN
    dates d
    ON start_date BETWEEN period_start and period_end 
GROUP BY
    1, 3
ORDER BY
    1, 3
;

Find the Subtasks That Did Not Execute

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
WITH RECURSIVE cte AS (
    SELECT
        task_id,
        subtasks_count
    FROM
        Tasks
    UNION ALL
    SELECT
        task_id,
        subtasks_count - 1
    FROM
        cte
    WHERE
        subtasks_count > 1
)

SELECT
    task_id,
    subtasks_count AS subtask_id
FROM
    cte
WHERE
    (task_id, subtasks_count) NOT IN (SELECT * FROM Executed)
;

All People Report to the Given Manager

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
WITH RECURSIVE cte AS (
    SELECT
        employee_id
    FROM
        Employees
    WHERE
        manager_id = 1 AND employee_id != 1
    UNION ALL
    SELECT
        e.employee_id
    FROM
        cte c
    JOIN
        Employees e
        ON c.employee_id = e.manager_id
)

SELECT
    employee_id
FROM
    cte
;

JOIN

Grand Slam Titles

1
2
3
4
5
6
7
8
9
10
11
12
SELECT
    player_id,
    player_name,
    SUM(player_id = Wimbledon) + SUM(player_id = Fr_open) + SUM(player_id = US_open) + SUM(player_id = Au_open) AS grand_slams_count
FROM
    Players
JOIN
    Championships
    ON player_id = Wimbledon OR player_id = Fr_open OR player_id = US_open OR player_id=Au_open
GROUP BY
    player_id
;

LEFT JOIN

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
WITH Friends AS (
    SELECT
        user1_id,
        user2_id
    FROM
        Friendship
    UNION
    SELECT
        user2_id AS user1_id,
        user1_id AS user2_id
    FROM
        Friendship
)

SELECT
    user1_id AS user_id,
    l1.page_id,
    COUNT(DISTINCT user2_id) as friends_likes
FROM
    Friends
JOIN
    Likes l1
    -- friend likes the page
    ON user2_id = l1.user_id
LEFT JOIN
    Likes l2
    ON user1_id = l2.user_id AND l1.page_id = l2.page_id
WHERE
    -- user doesn't like the page
    l2.page_id is NULL
GROUP BY
    user_id,
    page_id
;

CROSS JOIN

Cartesian Product

All Valid Triplets That Can Represent a Country

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
SELECT
    sa.student_name AS member_a,
    sb.student_name AS member_b,
    sc.student_name AS member_c
FROM
    schoola sa
CROSS JOIN
    schoolb sb
CROSS JOIN
    schoolc sc
WHERE
    sa.student_name != sb.student_name
    AND sa.student_name != sc.student_name
    AND sb.student_name != sc.student_name
    AND sa.student_id != sc.student_id
    AND sb.student_id != sc.student_id
    AND sa.student_id != sb.student_id
;

Multiple JOINs

Number of Trusted Contacts of a Customer

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
SELECT
    i.invoice_id,
    cu.customer_name,
    i.price,
    COUNT(co.user_id) as contacts_cnt,
    COUNT(cu2.email) as trusted_contacts_cnt
FROM
    invoices i
JOIN
    Customers cu
    ON cu.customer_id = i.user_id
LEFT JOIN
    Contacts co
    ON co.user_id = cu.customer_id
LEFT JOIN
    Customers cu2
    ON cu2.email = co.contact_email
GROUP BY
    i.invoice_id, cu.customer_name, i.price
ORDER BY
    i.invoice_id
;

The Most Recent Three Orders

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
SELECT
    c.name AS customer_name,
    o1.customer_id,
    o1.order_id,
    o1.order_date
FROM
    Customers c
JOIN
    Orders o1    
    ON c.customer_id = o1.customer_id
JOIN
    Orders o2
    ON o1.customer_id = o2.customer_id
    AND o1.order_date <= o2.order_date
GROUP BY
    customer_name,
    c.customer_id,
    o1.order_id,
    o1.order_date
HAVING
    COUNT(o2.order_date) <= 3
ORDER BY
    customer_name,
    customer_id,
    order_date DESC
;

Implicit Join (= Cross Join)

Strong Friendship

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
WITH Friends AS (
    SELECT
        user1_id,
        user2_id
    FROM
        Friendship
    UNION
    SELECT
        user2_id AS user1_id,
        user1_id AS user2_id
    FROM
        Friendship
)

SELECT
    f.user1_id,
    f.user2_id,
    COUNT(f2.user2_id) AS common_friend
FROM
    Friendship f
JOIN
    Friends f1
    -- u1
    ON f.user1_id = f1.user1_id
JOIN friends f2
    -- u2
    ON f.user2_id = f2.user1_id
    -- common friends
    AND f1.user2_id = f2.user2_id
GROUP BY
    1, 2
    HAVING COUNT(3) >= 3

Consecutive Numbers

1
2
3
4
5
6
7
8
9
10
11
12
SELECT DISTINCT
    l1.Num AS ConsecutiveNums
FROM
    Logs l1,
    Logs l2,
    Logs l3
WHERE
    l1.Id = l2.Id - 1
    AND l2.Id = l3.Id - 1
    AND l1.Num = l2.Num
    AND l2.Num = l3.Num
;

Leetcodify Similar Friends

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
SELECT DISTINCT
    l1.user_id AS user1_id,
    l2.user_id AS user2_id
FROM
    Listens l1,
    Listens l2
WHERE
    l1.song_id = l2.song_id
    AND l1.day = l2.day
    AND l1.user_id != l2.user_id
    AND (l1.user_id, l2.user_id) IN (SELECT * FROM Friendship)
GROUP BY
    1, 2, l1.day
    HAVING COUNT(DISTINCT l1.song_id) >= 3
;

String Functions

1
CONCAT(str1,str2,...)
1
LEFT(str,len)
1
LOWER(str)
1
2
3
4
SUBSTR(str,pos)
SUBSTR(str FROM pos)
SUBSTR(str,pos,len)
SUBSTR(str FROM pos FOR len)
1
TRIM(str)
1
CHAR_LENGTH(str)

Returns the length of the string str, measured in characters. A multibyte character counts as a single character. This means that for a string containing five 2-byte characters, LENGTH() returns 10, whereas CHAR_LENGTH() returns 5.

Date and Time Functions

1
DATE_FORMAT(date,format)

format

1
YEAR(date)
1
TIMESTAMPDIFF(unit,datetime_expr1,datetime_expr2)

Comparison Operators

Comparison operations result in a value of 1 (TRUE), 0 (FALSE), or NULL.

Number of Calls Between Two Persons

1
2
3
4
5
6
7
8
9
10
SELECT
    LEAST(from_id,to_id) AS person1,
    GREATEST(from_id,to_id) AS person2,
    COUNT(*) AS call_count,
    SUM(duration) AS total_duration
FROM
    Calls
GROUP BY
    person1, person2
;
1
COUNT(DISTINCT *)

Specifies that COUNT returns the number of unique nonnull values.

Window Functions

SUM

Cumulative sum.

The Number of Seniors and Juniors to Join the Company

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
WITH cte AS (
    SELECT
        employee_id,
        experience,
        SUM(salary) OVER(PARTITION BY experience ORDER BY salary) AS total_salary
    FROM
        Candidates)

SELECT
    'Senior' AS experience,
    COUNT(employee_id) AS accepted_candidates
FROM
    cte
WHERE
    experience = 'Senior'
    AND total_salary < 70000
UNION
SELECT
    'Junior' AS experience,
    COUNT(employee_id) AS accepted_candidates
FROM
    cte
WHERE
    experience = 'Junior'
    AND total_salary < (
        SELECT
            70000 - IFNULL(MAX(total_salary), 0)
        FROM
            cte
        WHERE
            experience = 'Senior'
            AND total_salary < 70000)

RANK

Tournament Winners

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
WITH c1 AS (
    SELECT
        player_id,
        group_id,
        SUM(score) AS scores
    FROM (
        SELECT
            first_player AS player_id,
            first_score AS score
        FROM matches
        UNION ALL
        SELECT
            second_player AS player_id,
            second_score AS score
        FROM
            matches
    ) t
    LEFT JOIN
        players
        USING (player_id)
    GROUP BY
        player_id,
        group_id
),

c2 AS (
    SELECT
        group_id,
        player_id,
        RANK() OVER (PARTITION BY group_id ORDER BY scores DESC, player_id) AS rk
    FROM
        c1
)

SELECT
    group_id,
    player_id
FROM
    c2
WHERE
    rk = 1
;

The Most Frequently Ordered Products for Each Customer

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
SELECT
    customer_id,
    product_id,
    product_name
FROM
    (SELECT
        o.customer_id,
        o.product_id,
        p.product_name,
        RANK() OVER (PARTITION BY customer_id ORDER BY COUNT(o.product_id) DESC) AS r
    FROM
        Orders o
    JOIN
        Products p
        ON o.product_id = p.product_id
    GROUP BY
        customer_id, product_id) t
WHERE
    r = 1
ORDER BY
    customer_id, product_id
;

Report Contiguous Dates

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
SELECT
    stats AS period_state,
    MIN(day) AS start_date,
    MAX(day) AS end_date
FROM
    (SELECT
        day,
        stats,
        rk,
        -- use the difference between global rank and relative rank as group id
        (RANK() OVER (ORDER BY day) - rk) AS inv
    FROM
        (SELECT
            fail_date AS day,
            'failed' AS stats,
            RANK() OVER (ORDER BY fail_date) AS rk
        FROM
            Failed
        WHERE
            fail_date BETWEEN '2019-01-01' AND '2019-12-31'
        UNION
        SELECT
            success_date AS day,
            'succeeded' AS stats,
            RANK() OVER (ORDER BY success_date) AS rk
        FROM
            Succeeded
        WHERE success_date BETWEEN '2019-01-01' AND '2019-12-31') t
    ) c
GROUP BY
    inv,
    stats
ORDER BY
    start_date
;

For example:

1
2
3
4
5
6
7
8
9
10
day   | overall_ranking| stats | rk | inv
| 2019-01-01 | 1 | success | 1 | 0
| 2019-01-02 | 2 | success | 2 | 0
| 2019-01-03 | 3 | success | 3 | 0
| 2019-01-04 | 4 | fail | 1 | 3
| 2019-01-05 | 5 | fail | 2 | 3
| 2019-01-06 | 6 | success | 4 | 2
| 2019-01-07 | 7 | success | 5 | 2
| 2019-01-08 | 8 | fail | 3 | 5
| 2019-01-09 | 9 | fail | 4 | 5

RANK

Arrange Table by Gender

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
WITH cte AS(
    SELECT *,
        RANK() OVER(PARTITION BY gender ORDER BY user_id) AS rnk,
        IF(gender = 'female', 0, IF(gender = 'other', 1, 2)) AS rnk2
    FROM Genders
)

SELECT
    user_id,
    gender
FROM
    cte
ORDER BY
    rnk,
    rnk2
;

Compute the Rank as a Percentage

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
SELECT
    student_id,
    department_id,
    COALESCE
        (ROUND(
            (RANK () OVER (PARTITION BY department_id ORDER BY mark DESC) - 1) * 100 / 
            (
                SELECT
                    COUNT(*) - 1
                FROM
                    Students s2
                WHERE
                    s1.department_id = s2.department_id)
            , 2), 0) AS percentage
FROM
    Students s1
;

DENSE_RANK

Rank Scores

1
2
3
4
5
6
SELECT
    score,
    DENSE_RANK() OVER (ORDER BY score DESC) AS `rank`
FROM
    scores
;

Find max/min

Group Employees of the Same Salary

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
SELECT
    *,
    DENSE_RANK() OVER(ORDER BY salary) AS team_id
FROM
    Employees
WHERE
    salary NOT IN(
        SELECT
            salary
        FROM
            Employees
        GROUP BY
            salary
        HAVING COUNT(*) = 1)
ORDER BY
    team_id,
    employee_id
;

First and Last Call On the Same Day

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
WITH cte AS (
    SELECT
        caller_id AS user_id,
        recipient_id,
        call_time
    FROM
        Calls
    UNION
    SELECT
        recipient_id AS user_id,
        caller_id AS recipient_id,
        call_time
    FROM
        Calls),

cte1 AS (
    SELECT
        user_id,
        recipient_id,
        DATE(call_time) as day,
        DENSE_RANK() OVER(PARTITION BY user_id, DATE(call_time) ORDER BY call_time ASC) AS asc_rank,
        DENSE_RANK() OVER(PARTITION BY user_id, DATE(call_time) ORDER BY call_time DESC) AS desc_rank
    FROM
        cte)

SELECT
    DISTINCT user_id
FROM
    cte1
WHERE
    asc_rank = 1 OR desc_rank = 1
GROUP BY
    user_id,
    day
    HAVING COUNT(DISTINCT recipient_id) = 1
;

LEAD

Biggest Window Between Visits

1
LEAD(expr [, N[, default]]) [null_treatment] over_clause

Returns the value of expr from the row that leads (follows) the current row by N rows within its partition. If there is no such row, the return value is default. If N is not specified, the default is 1.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
SELECT
    user_id,
    MAX(diff) AS biggest_window
FROM
    (SELECT
        user_id,
        DATEDIFF(LEAD(visit_date, 1, '2021-01-01') OVER(PARTITION BY user_id ORDER BY visit_date), visit_date) AS diff
     FROM
        userVisits) t
GROUP BY
    user_id
ORDER BY
    user_id
;

LAG

Consecutive Numbers

1
2
3
4
5
6
7
8
9
10
11
12
SELECT
    DISTINCT Num AS ConsecutiveNums
FROM
    (SELECT
        Num,
        LEAD(Num) OVER(ORDER BY id) AS lead_num,
        LAG(Num) OVER(ORDER BY id) AS lag_num
     FROM
        Logs) t
WHERE
    Num = lead_num AND Num = lag_num
;

Find Interview Candidates

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
WITH cte AS (
    SELECT
        user_id,
        name,
        mail,
        contest_id,
        user_id = gold_medal AS gold,
        user_id = silver_medal AS silver,
        user_id = bronze_medal AS bronze,
        LAG(contest_id, 2) OVER (PARTITION BY user_id ORDER BY contest_id) AS lagged_contest_id
    FROM
        Users
    LEFT JOIN
        Contests
        ON user_id = gold_medal OR user_id = silver_medal OR user_id = bronze_medal
)

SELECT
    name,
    mail
FROM
    cte
GROUP BY
    user_id
    -- contest_id is consecutive in Contests
    -- so whenever the lag difference is 2, there must be 3 consecutive wins
    HAVING SUM(gold) >= 3 OR SUM(contest_id - lagged_contest_id = 2) >= 1
;

ROW_NUMBER

Find the Start and End Number of Continuous Ranges

ROW_NUMBER numbers all rows sequentially (for example 1, 2, 3, 4, 5).

1
2
3
4
5
6
7
8
9
10
11
12
SELECT
    MIN(log_id) AS start_id,
    MAX(log_id) AS end_id
FROM
    (SELECT
        log_id,
        ROW_NUMBER() OVER(ORDER BY log_id) AS num
     FROM
        Logs) l
GROUP BY
    log_id - num
;
1
2
3
4
5
6
7
log_id, num, difference
1, 1, 0
2, 2, 0
3, 3, 0
7, 4, 3
8, 5, 3
10, 6, 4

Longest Winning Streak

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
WITH cte AS (
    SELECT
        player_id,
        result,
        match_day,
        ROW_NUMBER() OVER(PARTITION BY player_id ORDER BY match_day) AS rnk
    FROM
        matches
),

player_groups AS (
SELECT
    player_id,
    -- winning streak will be in the same group
    rnk - ROW_NUMBER() OVER(PARTITION BY player_id ORDER BY match_day) AS group_id
FROM
    cte
WHERE
    result = 'Win'
)

SELECT
    players.player_id,
    IFNULL(MAX(group_count.cnt), 0) AS longest_streak
FROM
    (SELECT
        DISTINCT player_id
     FROM
        matches) players
LEFT JOIN
    (SELECT
        player_id,
        group_id,
        COUNT(*) AS cnt
     FROM
        player_groups
     GROUP BY
        1, 2) group_count
    ON players.player_id = group_count.player_id
GROUP BY
    1
;

he Change in Global Rankings

1
2
3
4
5
6
7
8
9
10
11
SELECT
    t.team_id,
    t.name,
    CAST(ROW_NUMBER() OVER(ORDER BY points DESC, name) AS SIGNED) -
    CAST(ROW_NUMBER() OVER(ORDER BY points + points_change DESC, name) as SIGNED) as rank_diff
FROM
    TeamPoints t
JOIN
    PointsChange p
    ON t.team_id = p.team_id
;

Median Employee Salary

1
2
3
4
5
6
7
8
9
10
11
12
13
14
SELECT
    Id,
    Company,
    Salary
FROM
    (SELECT
        *,
        ROW_NUMBER() OVER(PARTITION BY COMPANY ORDER BY Salary, Id) AS rank_asc,
        ROW_NUMBER() OVER(PARTITION BY COMPANY ORDER BY Salary DESC, Id DESC) AS rank_desc
    FROM
        Employee) t
WHERE
    rank_asc BETWEEN rank_desc - 1 AND rank_desc + 1
;

Students Report By Geography

1
2
3
4
5
6
7
8
9
10
11
12
13
SELECT
    -- MAX considers only non-null values
    MAX(CASE WHEN continent = 'America' THEN name END) AS America,
    MAX(CASE WHEN continent = 'Asia' THEN name END) AS Asia,
    MAX(CASE WHEN continent = 'Europe' THEN name END) AS Europe
FROM
    (SELECT *,
        ROW_NUMBER() OVER(PARTITION BY continent ORDER BY name) AS row_id
     FROM
        student) t
GROUP BY
    row_id
;

FIRST_VALUE

Change Null Values in a Table to the Previous Value

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
WITH cte AS (
    SELECT
        id,
        drink,
        ROW_NUMBER() OVER () AS rn
    FROM
        CoffeeShop
),

cte2 AS (
    SELECT
        id,
        drink,
        rn,
        -- null row has same drink as the non-null row above
        SUM(1 - ISNULL(drink)) OVER (ORDER BY rn) AS drink_id
    FROM
        cte
)

SELECT
    id,
    FIRST_VALUE(drink) OVER (PARTITION BY drink_id) AS drink
FROM
    cte2
ORDER BY
    rn
;

Regular Expressions

Find Users With Valid E-Mails

1
2
3
4
5
6
7
8
9
SELECT
    user_id,
    name,
    mail
FROM
    Users
WHERE
    REGEXP_LIKE(mail, '^[A-Za-z]+[A-Za-z0-9\_\.\-]*@leetcode\.com')
;

User-defined Variables

The Number of Passengers in Each Bus II

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
-- accumulated number of passengers arrive before each bus
WITH cte AS (
    SELECT
        bus_id,
        b.arrival_time,
        capacity,
        count(passenger_id) AS cnt
    FROM
        Buses b
    LEFT JOIN
        Passengers p
        ON p.arrival_time <= b.arrival_time
    GROUP BY
        bus_id
    ORDER BY
        arrival_time
)

SELECT
    bus_id,
    passengers_cnt
FROM
    (SELECT
        bus_id,
        capacity,
        cnt,
        @passengers_cnt := LEAST(capacity, cnt - @accum) AS passengers_cnt,
        @accum := @accum + @passengers_cnt
    FROM
        cte,
        (SELECT
            -- @accum: number of people who already took earlier buses
            @accum := 0,
            -- @passengers_cnt: number of people who take the current bus
            @passengers_cnt := 0) init) t
ORDER BY
    bus_id
;

Prepared Statements

Prepared statements has the following benefits:

  • Less overhead of parsing the statement at each execution
  • Protection against SQL injection attacks

  • PREPARE
  • EXECUTE
  • DEALLOCATE PREPARE

Dynamic Pivoting of a Table

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
CREATE PROCEDURE PivotProducts()
BEGIN
    -- overrides GROUP_CONCAT default length (= 1024 characters)
    SET SESSION group_concat_max_len = 1000000;

    -- stores case-when statements for dynamically generated columns in a variable
    SET @case_stmt = NULL;
    SELECT GROUP_CONCAT(DISTINCT CONCAT('SUM(CASE WHEN store = "', store, '" THEN price END) AS ', store))
    INTO @case_stmt
    FROM products;

    -- inserts @case_stmt in the main query
    SET @sql = CONCAT('SELECT product_id, ', @case_stmt, ' FROM products GROUP BY product_id');

    PREPARE sql_query FROM @sql;
    EXECUTE sql_query;
    DEALLOCATE PREPARE sql_query;
END

INFORMATION_SCHEMA

INFORMATION_SCHEMA Table Reference

Dynamic Unpivoting of a Table

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
CREATE PROCEDURE UnpivotProducts()
BEGIN
    SET SESSION group_concat_max_len = 1000000;

    SET @sql = NULL;
    SELECT GROUP_CONCAT(
        CONCAT(
            'SELECT product_id, "', COLUMN_NAME, '" store, ', COLUMN_NAME,
            ' price FROM Products WHERE ', COLUMN_NAME, ' IS NOT NULL'
        ) SEPARATOR ' UNION '
    )
    INTO @sql
    FROM INFORMATION_SCHEMA.COLUMNS
    WHERE TABLE_SCHEMA = 'test' AND TABLE_NAME = 'products' AND COLUMN_NAME != 'product_id';

    PREPARE sql_query FROM @sql;
    EXECUTE sql_query;
    DEALLOCATE PREPARE sql_query;
END
This post is licensed under CC BY 4.0 by the author.