Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions products/cscl/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ models:
+materialized: view
product:
+materialized: table
etl_dev_qa:
+materialized: table
log_files:
+materialized: table

on-run-start:
- '{{ create_pg_functions() }}'
Expand Down
12 changes: 10 additions & 2 deletions products/cscl/design_doc.md
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,16 @@ A somewhat confusingly named layer since shoreline, rail, and subway are not str

[stub]

### Error reporting
[stub]
#### Error reports

Errors are logged for the following conditions
- If a preferred B7SC is not found in StreetName or in FeatureName. This is reflected/measured by segments missing face codes (`log__lion_segments_missing_facecode`)
- If a segment has an endpoint that is not joined to a Node (`log__lion_segments_missing_nodes`)
- If a segment does not join to an Atomic Polygon on either side (`log__lion_segments_missing_aps`)
- If a centerline or protosegment has a SEGLOCSTATUS in its source table that differs from its calculated Segment Locational Status (`log__lion_centerline_or_proto_seglocstatus_mismatch`)
- If a segment does not have a joined Atomic Polygon sharing the same borough code as the segment (`log__lion_segments_ap_boro_mismatch`)
- If a segment does not join to a NYPD BEAT polygon (`log__lion_segments_missing_nypd`)
- If a protosegment does not share a Segment ID with a geometry-modeled segment (`log__lion_protosegment_orphans`)


## Special Address Files (SAF)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@ SELECT
co.globalid,
co.lionkey,
co.segmentid,
left(left_beat.post, 1) AS left_nypd_service_area,
left(right_beat.post, 1) AS right_nypd_service_area
left_beat.sector AS left_nypd_sector,
left_beat.geo_type AS left_beat_geo_type,
CASE WHEN left_beat.geo_type = 'HP' THEN left(left_beat.post, 1) END AS left_nypd_service_area,
right_beat.sector AS right_nypd_sector,
right_beat.geo_type AS right_beat_geo_type,
CASE WHEN right_beat.geo_type = 'HP' THEN left(left_beat.post, 1) END AS right_nypd_service_area
FROM segment_offsets AS co
-- using a cte around reference can confus the postgres compiler to not use index
LEFT JOIN {{ source("recipe_sources", "dcp_cscl_nypdbeat") }} AS left_beat
ON
st_within(co.left_offset_point, left_beat.geom)
AND left_beat.geo_type = 'HP'
ON st_within(co.left_offset_point, left_beat.geom)
LEFT JOIN {{ source("recipe_sources", "dcp_cscl_nypdbeat") }} AS right_beat
ON
st_within(co.right_offset_point, right_beat.geom)
AND right_beat.geo_type = 'HP'
ON st_within(co.right_offset_point, right_beat.geom)
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{{ config(
materialized = 'table',
indexes=[
{'columns': ['segmentid']},
{'columns': ['globalid']},
{'columns': ['nodeid']},
]
) }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,4 +112,3 @@ segment_attributes AS (
)

SELECT * FROM segment_attributes
WHERE face_code IS NOT NULL -- TODO error report for this and maybe refactor to get this in a more logical place
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ feature_type_codes AS (
)
SELECT
proto.borough AS boroughcode,
facecode.face_code, -- TODO error report when null
facecode.face_code,
CASE
WHEN feature_type_codes.source_feature_class <> 'nonstreetfeatures' THEN proto.alt_segment_seqnum
ELSE seqnum.segment_seqnum
Expand Down Expand Up @@ -65,8 +65,7 @@ SELECT
proto.source_table,
proto.globalid
FROM proto
INNER JOIN primary_segments ON proto.segmentid = primary_segments.segmentid -- TODO error report for non-matches
LEFT JOIN primary_segments ON proto.segmentid = primary_segments.segmentid
LEFT JOIN facecode ON proto.b7sc = facecode.b7sc
LEFT JOIN seqnum ON proto.globalid = seqnum.globalid
LEFT JOIN feature_type_codes ON proto.feature_type_code IS NOT DISTINCT FROM feature_type_codes.code -- NULL -> centerline
WHERE facecode.face_code IS NOT NULL -- TODO - clean up in #2073
2 changes: 2 additions & 0 deletions products/cscl/models/intermediate/segments/int__segments.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ WITH segments AS (
SELECT
{{ dbt_utils.star(ref('int__primary_segments')) }}
FROM {{ ref("int__protosegments") }}
WHERE geom IS NOT NULL -- proxy for joined to a segment
)
SELECT
CONCAT(boroughcode, face_code, segment_seqnum) AS lionkey,
*
FROM segments
WHERE face_code IS NOT NULL
ORDER BY lionkey
14 changes: 14 additions & 0 deletions products/cscl/models/log_files/log.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
-- this could be done with dbt_utils.get_relations_by_pattern
-- and dbt_utils.union_relations
-- if we either remove dash from db names
-- or if dbt fixes a bug

{{ dbt_utils.union_relations(relations=[
ref("log__lion_centerline_or_proto_seglocstatus_mismatch"),
ref("log__lion_protosegment_orphans"),
ref("log__lion_segments_ap_boro_mismatch"),
ref("log__lion_segments_missing_nypd"),
ref("log__lion_segments_missing_aps"),
ref("log__lion_segments_missing_facecode"),
ref("log__lion_segments_missing_nodes"),
]) }}
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
WITH lion AS (
SELECT * FROM {{ ref("int__lion") }}
),
centerline AS (
SELECT * FROM {{ ref("stg__centerline") }}
),
proto AS (
SELECT * FROM {{ ref("stg__altsegmentdata_proto") }}
),
joined AS (
SELECT
lion.lionkey,
lion.globalid,
lion.segmentid,
lion.segment_locational_status,
CASE
WHEN lion.source_table = 'centerline' THEN centerline.seglocstatus
WHEN lion.source_table = 'altsegmentdata' THEN proto.seglocstatus
END AS source_segment_locational_status,
lion.source_table
FROM lion
LEFT JOIN centerline ON lion.source_table = 'centerline' AND lion.segmentid = centerline.segmentid
LEFT JOIN proto ON lion.source_table = 'altsegmentdata' AND lion.globalid = proto.globalid
WHERE lion.source_table IN ('centerline', 'altsegmentdata')
)
SELECT
'seglocstatus mismatch' AS error_category,
globalid,
source_table AS source_feature_layer,
'segmentid' AS record_id_type,
segmentid AS record_id,
FORMAT(
'The %s feature with an OID = %s has a SEGLOCSTATUS value that is different than what was found using the LION ETL rules. Feature = ''{2}'', ETL Rules = ''{3}''.'
source_table,
globalid,
source_segment_locational_status,
segment_locational_status
) AS message
FROM joined
WHERE segment_locational_status IS DISTINCT FROM source_segment_locational_status
13 changes: 13 additions & 0 deletions products/cscl/models/log_files/log__lion_protosegment_orphans.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
SELECT
'protosegment without geometry-modeled segment' AS error_category,
globalid,
source_table AS source_feature_layer,
'segmentid' AS record_id_type,
segmentid AS record_id,
FORMAT(
'Protosegment with globalid "%s" and segmentid "%s" has no corresponding geometry-modeled segment.',
globalid,
segmentid::INT
) AS message
FROM {{ ref('int__protosegments') }}
WHERE geom IS NULL
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
WITH segments AS (
SELECT * FROM {{ ref("int__segments") }}
),
aps AS (
SELECT * FROM {{ ref("int__segment_atomicpolygons") }}
)
SELECT
'neither joined atomic polygon matches segment''s borocode' AS error_category,
segments.globalid,
segments.source_table AS source_feature_layer,
'segmentid' AS record_id_type,
segments.segmentid AS record_id,
FORMAT(
"Neither of the AtomicPolygon borough matches the segment borough for the {0} feature with an OID = {1}. {2} = '{3}', {4} = '{5}' and {6} = '{7}'."
'Neither of the AtomicPolygon borough matches the segment borough for the %s feature with an OID = %s. '
|| 'Segment has boro ''%s'', '
|| 'left atomic polygon with atomicid ''%s'' has boro ''%s'', '
|| 'and right atomic polygon with atomicid ''%s'' has boro ''%s''.',
segments.source_table,
segments.globalid,
aps.segment_borocode,
aps.left_atomicid,
aps.left_borocode,
aps.right_atomicid,
aps.right_borocode
) AS message
FROM aps
INNER JOIN segments ON aps.globalid = segments.globalid
WHERE
left_borocode IS DISTINCT FROM segment_borocode AND right_borocode IS DISTINCT FROM segment_borocode
AND (left_borocode IS NOT NULL OR right_borocode IS NOT NULL) -- to not duplicate rows from other test
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
SELECT
'segment joined to no atomic polygon' AS error_category,
globalid,
source_table AS source_feature_layer,
'segmentid' AS record_id_type,
segmentid AS record_id,
'' AS message
FROM {{ ref('int__lion') }}
WHERE left_atomicid IS NULL AND right_atomicid IS NULL
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
WITH segments AS (
SELECT
{{ dbt_utils.star(ref('int__primary_segments')) }}
FROM {{ ref("int__primary_segments") }}
UNION ALL
SELECT
{{ dbt_utils.star(ref('int__primary_segments')) }}
FROM {{ ref("int__protosegments") }}
)
SELECT DISTINCT
'missing facecode' AS error_category,
globalid,
source_table AS source_feature_layer,
'segmentid' AS record_id_type,
segmentid AS record_id,
'' AS message
FROM segments
WHERE face_code IS NULL
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
SELECT
'segment missing node' AS error_category,
globalid,
source_table AS source_feature_layer,
'segmentid' AS record_id_type,
segmentid AS record_id,
'' AS message
FROM {{ ref('int__lion') }}
WHERE from_nodeid IS NULL OR to_nodeid IS NULL
16 changes: 16 additions & 0 deletions products/cscl/models/log_files/log__lion_segments_missing_nypd.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
WITH segments AS (
SELECT * FROM {{ ref('int__segments') }}
),
nypd AS (
SELECT * FROM {{ ref('int__segment_nypdbeat') }}
)
SELECT
'segment joined to no nypd beat' AS error_category,
segments.globalid,
segments.source_table AS source_feature_layer,
'segmentid' AS record_id_type,
segments.segmentid AS record_id,
'' AS message
FROM segments
INNER JOIN nypd ON segments.lionkey = nypd.lionkey
WHERE nypd.left_nypd_sector IS NULL AND nypd.right_nypd_sector IS NULL
2 changes: 2 additions & 0 deletions products/cscl/poc_validation/dat_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
assert recipe.exports

for export in recipe.exports.datasets:
if export.name == 'log':
continue
formatting = (export.custom or {"formatting": export.name})["formatting"]
f_path = Path(f"seeds/text_formatting/text_formatting__{formatting}.csv")
dataset = {
Expand Down
4 changes: 4 additions & 0 deletions products/cscl/recipe.yml
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,7 @@ exports:
filename: SpecialSEDAT.txt
format: dat
custom: { formatting: sedat }

- name: log
filename: log.csv
format: csv
Loading