Skip to content

HannaStselmashok/f1_fun_PostgreSQL

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

26 Commits
 
 
 
 
 
 
 
 

Repository files navigation

F1 race results 2023

Practicing PostgreSQL using f1 data

Uploading and transforming the data

Table 1 2023 Race Results

  1. Renaming the columns
ALTER TABLE public.formula1_data RENAME COLUMN "Track" TO track_country;
ALTER TABLE public.formula1_data RENAME COLUMN "Position" TO "position";
ALTER TABLE public.formula1_data RENAME COLUMN "No" TO driver_number;
ALTER TABLE public.formula1_data RENAME COLUMN "Driver" TO driver_name;
ALTER TABLE public.formula1_data RENAME COLUMN "Team" TO team_name;
ALTER TABLE public.formula1_data RENAME COLUMN "Starting Grid" TO starting_grid;
ALTER TABLE public.formula1_data RENAME COLUMN "Laps" TO laps;
ALTER TABLE public.formula1_data RENAME COLUMN "Time/Retired" TO finish_time_text;
ALTER TABLE public.formula1_data RENAME COLUMN "Points" TO points;
ALTER TABLE public.formula1_data RENAME COLUMN "Set Fastest Lap" TO has_set_fastest_lap;
ALTER TABLE public.formula1_data RENAME COLUMN "Fastest Lap Time" TO fastest_lap_time;

SELECT *
FROM formula1_data
LIMIT 5

image

  1. Changing data type to integer for position column (replace NC / DQ with 20)
--Expression ~ '^\d+$' check if the data in a cell is an integer number
--if it is - change data type to integer
--if it's not - replace the value with 20

SELECT 
    case 
        when position ~ '^\d+$'
        then cast(position as integer)
        else 20
    end as position
FROM 
    formula1_data

Table 2 races order

Fun queries

  1. Find the average position of every racer during the championship; total points, the share of points contributed by the team's lead driver
WITH drivers_ranked as (

    SELECT 
        team_name,
        driver_name,
        avg(
            case 
                when position ~ '^\d+$'
                then cast(position as integer)
                else 20
            end
        ) as average_position,
        sum(points) as sum_points,
        row_number()
        over (
            partition by team_name 
            order by sum(points) desc
            ) as row_number_drivers
    FROM 
        formula1_data
    GROUP BY 
        1, 2
    
), drivers_ranked_main as (

    SELECT 
        team_name,  
        driver_name,
        round(average_position, 2) as average_position,
        sum_points,
        row_number_drivers
    FROM
        drivers_ranked
    WHERE 
        row_number_drivers <= 2
    ORDER BY 
        1, 4 desc
        
)

SELECT 
    team_name,
    driver_name,
    average_position,
    sum_points,
    case 
        when row_number_drivers = 1
        then round(sum_points / sum(sum_points)
            over (partition by team_name) * 100, 2)
        else null 
    end as first_driver_ratio
FROM
    drivers_ranked_main

image

  1. Calculate a driver's championship points for each race; a driver's position in the championship for each race.
WITH drivers_sum_points as (

    SELECT 
        round as no_race,
        track_country,
        driver_name, 
        sum(points)
        over (
            partition by driver_name
            order by round
            ) as sum_points
    FROM
        formula1_data 
    LEFT JOIN 
        races_order 
        on lower(formula1_data.track_country) = replace(races_order.grand_prix_id, '-', ' ') 
    GROUP BY 
        round,
        track_country,
        driver_name,
        points

), drivers_in_championship as (
    
    SELECT 
        no_race,
        track_country,
        driver_name,
        sum_points,
        dense_rank()
        over (
            partition by no_race 
            order by sum_points desc
            ) as driver_in_championship
    FROM
        drivers_sum_points
    ORDER BY 
        no_race,
        driver_name
)

SELECT 
    *
FROM 
    drivers_in_championship
--WHERE 
--    driver_name = 'Carlos Sainz'
ORDER BY 
    no_race

image

  1. Display the rating of drivers based on the frequency of position changes in the championship.
WITH drivers_sum_points as (

    SELECT 
        round as no_race,
        track_country,
        driver_name, 
        sum(points)
        over (
            partition by driver_name 
            order by round) as sum_points
    FROM    
        formula1_data
    INNER JOIN 
        races_order on lower(formula1_data.track_country) = replace(races_order.grand_prix_id, '-', ' ') 
    GROUP BY 
        round,
        track_country,
        driver_name,
        points

), driver_place_in_champoinship as (

    SELECT 
        no_race,
        track_country,
        driver_name,
        sum_points,
        dense_rank() 
        over (
            partition by no_race 
            order by sum_points desc) as driver_in_championship
    FROM drivers_sum_points
    ORDER BY 
        no_race,
        driver_name

), driver_place_in_champoinship_rank as (

    SELECT 
        no_race,
        track_country,
        driver_name,
        sum_points,
        driver_in_championship,
        case   
            when driver_in_championship = lag(driver_in_championship, 1) 
            over (
                partition by driver_name 
                order by no_race)
            then 0
            else 1
        end as change_position_count
    FROM 
        driver_place_in_champoinship
    ORDER BY 
        3, 1
    
)

SELECT 
    driver_name,
    sum(change_position_count) as change_of_position
FROM 
    driver_place_in_champoinship_rank
GROUP BY 
    driver_name
ORDER BY 
    2 DESC

image

  1. For each race, display the driver who took the best lap
SELECT 
    round asno_race,
    track_country,
    driver_name,
    fastest_lap_time 
FROM
    formula1_data
INNER JOIN 
    races_order
    on lower(formula1_data.track_country) = replace(races_order.grand_prix_id, '-', ' ') 
WHERE
    has_set_fastest_lap = 'Yes'
ORDER BY 
    1

image

  1. Find the racers with the most intense battle during the championship
WITH drivers_sum_points as (

    SELECT 
        round as no_race,
        track_country,
        driver_name, 
        sum(points) over (partition by driver_name order by round) as sum_points
    FROM    
        formula1_data
    INNER JOIN
        races_order 
        on lower(formula1_data.track_country) = replace(races_order.grand_prix_id, '-', ' ') 
    GROUP BY 
        round,
        track_country,
        driver_name,
        points

), dif_points as (

    SELECT 
        dsp1.no_race,
        dsp1.track_country,
        dsp1.driver_name as driver_name_1,
        dsp2.driver_name as driver_name_2,
        abs(dsp1.sum_points - dsp2.sum_points) as difference_points
    FROM
        drivers_sum_points as dsp1
    INNER JOIN 
        drivers_sum_points as dsp2
        on dsp1.track_country = dsp2.track_country
        and dsp1.driver_name <> dsp2.driver_name
        and dsp1.driver_name < dsp2.driver_name
        
)

SELECT 
    driver_name_1,
    driver_name_2,
    round(avg(difference_points),2) as sweaty_avg,
    row_number() 
    over(
        order by round(avg(difference_points),2)) as sweaty_rank
FROM
    dif_points
GROUP BY 
    1, 2
ORDER BY 
    3

image image

About

Practicing SQL using f1 data

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published