Skip to content
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
models:

Check failure on line 1 in src/dbt/kipptaf/models/extracts/google/sheets/properties/rpt_gsheets__nsc_enrollment_new.yml

View workflow job for this annotation

GitHub Actions / Trunk Check

prettier

Incorrect formatting, autoformat by running 'trunk fmt'
- name: rpt_gsheets__nsc_enrollment_new
data_tests:
- dbt_utils.unique_combination_of_columns:
Expand All @@ -18,7 +18,7 @@
description: Salesforce Account ID of the college or university.
- name: start_date__c
data_type: date
description: Date the enrollment began (first day of the academic year).
description: First day of the earliest semester in this NSC enrollment period.
- name: actual_end_date__c
data_type: date
description: Date the enrollment ended per NSC data.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,54 +1,3 @@
with
nsc_with_account as (
select
n.contact_id,
n.enrollment_begin,
n.enrollment_end,
n.enrollment_status,
n.graduated,
n.two_year_four_year,

x.account_id,

extract(year from n.enrollment_begin) as enrollment_begin_year,

row_number() over (
partition by
n.contact_id, x.account_id, extract(year from n.enrollment_begin)
order by n.enrollment_begin desc
) as rn_recent,
from {{ ref("stg_nsc__student_tracker") }} as n
inner join
{{ ref("stg_google_sheets__kippadb__nsc_crosswalk") }} as x
on n.college_code_branch = x.college_code_nsc
and x.rn_college_code_nsc = 1
where n.record_found_y_n = 'Y'
),

nsc_enrollment as (
select
contact_id,
account_id,
enrollment_begin_year,

min(enrollment_begin) as enrollment_begin,
max(enrollment_end) as enrollment_end,

/* any_graduated: if any NSC row for this enrollment shows graduation */
countif(graduated = 'Y') > 0 as any_graduated,
/* any_withdrawn: W is the NSC single-character code for Withdrawn */
countif(enrollment_status = 'W') > 0 as any_withdrawn,

max(
if(rn_recent = 1, enrollment_status, null)
) as current_enrollment_status,
max(
if(rn_recent = 1, two_year_four_year, null)
) as current_two_year_four_year,
from nsc_with_account
group by contact_id, account_id, enrollment_begin_year
)

select
n.contact_id as student__c,
n.account_id as school__c,
Expand All @@ -71,14 +20,21 @@ select
case
n.current_two_year_four_year
when '4-year'
then "Bachelor's (4-year)"
then 'Bachelor''s (4-year)'
when '2-year'
then "Associate's (2 year)"
then 'Associate''s (2 year)'
end as pursuing_degree_type__c,
from nsc_enrollment as n
from {{ ref("int_nsc__enrollments") }} as n
left join
{{ ref("stg_kippadb__enrollment") }} as e
on n.contact_id = e.student
and n.account_id = e.school
and n.enrollment_begin_year = e.start_date_year
/*
Match on date proximity (±180 days) rather than calendar year to handle
enrollments that span an academic year boundary (e.g. Fall+Spring).
*/
and n.enrollment_begin
between date_sub(e.start_date, interval 180 day) and date_add(
e.start_date, interval 180 day
)
where e.id is null
Original file line number Diff line number Diff line change
@@ -1,58 +1,9 @@
with

Check failure on line 1 in src/dbt/kipptaf/models/extracts/google/sheets/rpt_gsheets__nsc_enrollment_updates.sql

View workflow job for this annotation

GitHub Actions / Trunk Check

sqlfmt

Incorrect formatting, autoformat by running 'trunk fmt'
nsc_with_account as (
select
n.contact_id,
n.enrollment_begin,
n.enrollment_end,
n.enrollment_status,
n.graduated,
n.two_year_four_year,

x.account_id,

extract(year from n.enrollment_begin) as enrollment_begin_year,

row_number() over (
partition by
n.contact_id, x.account_id, extract(year from n.enrollment_begin)
order by n.enrollment_begin desc
) as rn_recent,

from {{ ref("stg_nsc__student_tracker") }} as n
inner join
{{ ref("stg_google_sheets__kippadb__nsc_crosswalk") }} as x
on n.college_code_branch = x.college_code_nsc
and x.rn_college_code_nsc = 1
where n.record_found_y_n = 'Y'
),

nsc_enrollment as (
select
contact_id,
account_id,
enrollment_begin_year,

min(enrollment_begin) as enrollment_begin,
max(enrollment_end) as enrollment_end,

/* any_graduated: if any NSC row for this enrollment shows graduation */
countif(graduated = 'Y') > 0 as any_graduated,
/* any_withdrawn: W is the NSC single-character code for Withdrawn */
countif(enrollment_status = 'W') > 0 as any_withdrawn,

max(
if(rn_recent = 1, enrollment_status, null)
) as current_enrollment_status,

from nsc_with_account
group by contact_id, account_id, enrollment_begin_year
),

nsc_enrollment_derived as (
select
contact_id,
account_id,
enrollment_begin_year,
enrollment_begin,
enrollment_end,
current_enrollment_status,

Expand All @@ -64,7 +15,7 @@
else 'Attending'
end as derived_status,

from nsc_enrollment
from {{ ref("int_nsc__enrollments") }}
)

select
Expand All @@ -80,11 +31,23 @@
{{ ref("stg_kippadb__enrollment") }} as e
on n.contact_id = e.student
and n.account_id = e.school
and n.enrollment_begin_year = e.start_date_year
/*
Match on date proximity (±180 days) rather than calendar year to handle
enrollments that span an academic year boundary (e.g. Fall+Spring).
*/
and n.enrollment_begin
between date_sub(e.start_date, interval 180 day) and date_add(
e.start_date, interval 180 day
)
where
not e.do_not_overwrite_with_nsc_data
and (
n.enrollment_end is distinct from e.actual_end_date
or n.derived_status is distinct from e.status
or n.current_enrollment_status is distinct from e.attending_status
)
qualify
row_number() over (
partition by e.id
order by abs(date_diff(n.enrollment_begin, e.start_date, day))
) = 1
107 changes: 107 additions & 0 deletions src/dbt/kipptaf/models/nsc/intermediate/int_nsc__enrollments.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
with
nsc_with_account as (
select
n.contact_id,
n.enrollment_begin,
n.enrollment_end,
n.enrollment_status,
n.graduated,
n.two_year_four_year,

x.account_id,

/* deduplicate in case NSC sends multiple rows for the same semester */
row_number() over (
partition by n.contact_id, x.account_id, n.enrollment_begin
order by n.search_date desc
) as rn_semester,

from {{ ref("stg_nsc__student_tracker") }} as n
inner join
{{ ref("stg_google_sheets__kippadb__nsc_crosswalk") }} as x
on n.college_code_branch = x.college_code_nsc
and x.rn_college_code_nsc = 1
where n.record_found_y_n = 'Y'
),

nsc_semesters as (
select
contact_id,
account_id,
enrollment_begin,
enrollment_end,
enrollment_status,
graduated,
two_year_four_year,
from nsc_with_account
where rn_semester = 1
),

semesters_with_lag as (
select
contact_id,
account_id,
enrollment_begin,
enrollment_end,
enrollment_status,
graduated,
two_year_four_year,

lag(enrollment_end) over (
partition by contact_id, account_id order by enrollment_begin
) as prev_enrollment_end,
from nsc_semesters
),

enrollment_groups as (
select
contact_id,
account_id,
enrollment_begin,
enrollment_end,
enrollment_status,
graduated,
two_year_four_year,

/*
A gap > 200 days between the prior semester's end and this
semester's begin signals a new enrollment period.
Summer break ≈ 90 days; a missing semester ≈ 270 days.
*/
sum(
case
when prev_enrollment_end is null
then 1
when date_diff(enrollment_begin, prev_enrollment_end, day) > 200
then 1
else 0
end
) over (
partition by contact_id, account_id
order by enrollment_begin
rows between unbounded preceding and current row
) as enrollment_group,
from semesters_with_lag
)

select
contact_id,
account_id,
enrollment_group,

min(enrollment_begin) as enrollment_begin,
max(enrollment_end) as enrollment_end,

countif(graduated = 'Y') > 0 as any_graduated,
countif(enrollment_status = 'W') > 0 as any_withdrawn,

/* most recent semester's status fields */
array_agg(enrollment_status order by enrollment_begin desc limit 1)[
offset(0)
] as current_enrollment_status,
array_agg(two_year_four_year order by enrollment_begin desc limit 1)[
offset(0)
] as current_two_year_four_year,

from enrollment_groups
group by contact_id, account_id, enrollment_group
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
models:
- name: int_nsc__enrollments
description: >
Collapses NSC semester-level rows into continuous enrollment periods using
a gaps-and-islands approach. Consecutive semesters at the same institution
are merged into a single enrollment; a gap of more than 200 days between a
semester's end and the next semester's begin signals a new enrollment
(summer break ≈ 90 days; a missing semester ≈ 270 days).
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- contact_id
- account_id
- enrollment_group
config:
store_failures: true
columns:
- name: contact_id
data_type: string
description: Salesforce Contact ID of the KIPP alum.
- name: account_id
data_type: string
description: Salesforce Account ID of the college or university.
- name: enrollment_group
data_type: int64
description: >
Monotonically increasing integer assigned per continuous enrollment
period within a (contact, institution) pair. Increments each time a
gap > 200 days is detected between consecutive semesters.
- name: enrollment_begin
data_type: date
description:
First day of the earliest semester in this enrollment period.
- name: enrollment_end
data_type: date
description: Last day of the latest semester in this enrollment period.
- name: any_graduated
data_type: boolean
description:
True if any NSC semester row within this enrollment shows graduation.
- name: any_withdrawn
data_type: boolean
description: >
True if any NSC semester row within this enrollment has status 'W'
(Withdrawn).
- name: current_enrollment_status
data_type: string
description: >
NSC enrollment status code from the most recent semester in this
enrollment period (e.g. F = Full-time, H = Half-time, W = Withdrawn).
- name: current_two_year_four_year
data_type: string
description: >
Two-year/four-year classification from the most recent semester in
this enrollment period.
2 changes: 1 addition & 1 deletion src/dbt/kipptaf/package-lock.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
packages:
- name: dbt_external_tables
package: dbt-labs/dbt_external_tables
version: 0.12.1
version: 0.12.0
- name: dbt_utils
package: dbt-labs/dbt_utils
version: 1.3.3
Expand Down
Loading