-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconstraints_post-import.sql
More file actions
255 lines (236 loc) · 10.3 KB
/
constraints_post-import.sql
File metadata and controls
255 lines (236 loc) · 10.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
-- B.1.177.75, B.1.160 and D.2 can only be used as infected variants
-- The reason: Each has only D614G + one Spike mutation which can be mixed
-- with a single mutation isolate
DO $$
DECLARE row isolates%rowtype;
BEGIN
FOR row IN SELECT * FROM isolates iso WHERE
var_name IN ('B.1.177.75', 'B.1.160', 'D.2', 'L452R variants', 'Unknown variant') AND (
EXISTS (
SELECT 1 FROM rx_potency pot WHERE
pot.iso_name=iso.iso_name
) OR
EXISTS (
SELECT 1 FROM rx_fold f WHERE
f.control_iso_name=iso.iso_name OR
f.iso_name=iso.iso_name
)
)
LOOP
RAISE EXCEPTION E'Variant \x1b[1m%\x1b[0m cannot be assigned to any isolate. It exist only because it is an infected variant in subject_infections. Unlink \x1b[1m%\x1b[0m from \x1b[1m%\x1b[0m.', row.var_name, row.iso_name, row.var_name;
END LOOP;
END
$$;
SELECT * FROM susc_results WHERE rx_type IS NULL;
-- rx_type must not be NULL
ALTER TABLE susc_results ALTER COLUMN rx_type SET NOT NULL;
-- for each variant with consensus_availability is TRUE, at least 1 consensus must exist
DO $$
DECLARE row variants%rowtype;
BEGIN
FOR row in SELECT * FROM variants V
WHERE
V.var_name != 'B' AND
V.consensus_availability IS TRUE AND
NOT EXISTS(
SELECT 1 FROM variant_consensus C WHERE
C.var_name = V.var_name
)
LOOP
RAISE EXCEPTION E'Variant \x1b[1m%\x1b[0m should have at least one variant_consensus record. Use `\x1b[1mmake sync-varcons\x1b[0m` to correct this error.', row.var_name;
END LOOP;
END
$$;
CREATE FUNCTION hasPlasmaOrIsolate(rname varchar, sname varchar) RETURNS boolean AS $$
SELECT EXISTS (
SELECT 1 FROM subject_plasma SbjP
WHERE rname = SbjP.ref_name AND sname = SbjP.subject_name
) OR EXISTS (
SELECT 1 FROM subject_isolates SbjI
WHERE rname = SbjI.ref_name AND sname = SbjI.subject_name
)
$$ LANGUAGE SQL;
CREATE FUNCTION hasSubsequentPlasma(rname varchar, sname varchar, event_date date) RETURNS boolean AS $$
SELECT EXISTS (
SELECT 1 FROM subject_plasma
WHERE rname = ref_name AND sname = subject_name AND event_date <= collection_date
)
$$ LANGUAGE SQL;
CREATE FUNCTION hasSubsequentIsolate(rname varchar, sname varchar, event_date date) RETURNS boolean AS $$
SELECT EXISTS (
SELECT 1 FROM subject_isolates
WHERE rname = ref_name AND sname = subject_name AND event_date <= collection_date
)
$$ LANGUAGE SQL;
-- for each subject there must be at least one record in subject_plasma or subject_isolates
DO $$
DECLARE row subjects%rowtype;
BEGIN
FOR row IN SELECT * FROM subjects LOOP
IF NOT hasPlasmaOrIsolate(row.ref_name, row.subject_name) THEN
RAISE EXCEPTION E'Subject ref_name=\x1b[1m%\x1b[0m subject_name=\x1b[1m%\x1b[0m doesn''t have any `subject_plasma` or `subject_isolates`', row.ref_name, row.subject_name;
END IF;
END LOOP;
END
$$;
-- for each subject_infections there must be at least one Subsequent record in subject_plasma or subject_isolates
DO $$
DECLARE row subject_infections%rowtype;
BEGIN
FOR row IN SELECT * FROM subject_infections LOOP
IF NOT hasSubsequentPlasma(row.ref_name, row.subject_name, row.infection_date) AND
NOT hasSubsequentIsolate(row.ref_name, row.subject_name, row.infection_date) THEN
RAISE EXCEPTION E'Subject ref_name=\x1b[1m%\x1b[0m subject_name=\x1b[1m%\x1b[0m doesn''t have any `subject_plasma` or `subject_isolates` dated after the infection date \x1b[1m%\x1b[0m', row.ref_name, row.subject_name, row.infection_date;
END IF;
END LOOP;
END
$$;
-- for each subject_vaccines there must be at least one Subsequent record in subject_plasma or subject_isolates
DO $$
DECLARE row subject_vaccines%rowtype;
BEGIN
FOR row IN SELECT * FROM subject_vaccines LOOP
IF NOT hasSubsequentPlasma(row.ref_name, row.subject_name, row.vaccination_date) AND
NOT hasSubsequentIsolate(row.ref_name, row.subject_name, row.vaccination_date) THEN
RAISE EXCEPTION E'Subject ref_name=\x1b[1m%\x1b[0m subject_name=\x1b[1m%\x1b[0m doesn''t have any `subject_plasma` or `subject_isolates` dated after the vaccination date \x1b[1m%\x1b[0m', row.ref_name, row.subject_name, row.vaccination_date;
END IF;
END LOOP;
END
$$;
-- for each iso_name in rx_potency, it must exist in ref_isolate_pairs either as
-- control_iso_name or as iso_name
CREATE FUNCTION isIsolateReferred(rname varchar, iname varchar) RETURNS boolean AS $$
SELECT EXISTS (
SELECT 1 FROM ref_isolate_pairs P
WHERE rname = P.ref_name AND (
iname = P.control_iso_name OR
iname = P.iso_name
)
) OR EXISTS (
SELECT 1 FROM ref_unpaired_isolates UP
WHERE
rname = UP.ref_name AND
iname = UP.iso_name
)
$$ LANGUAGE SQL;
DO $$
DECLARE row rx_potency%rowtype;
BEGIN
FOR row IN SELECT * FROM rx_potency LOOP
IF NOT isIsolateReferred(row.ref_name, row.iso_name) THEN
RAISE EXCEPTION E'Isolate \x1b[1m%\x1b[0m used by article \x1b[1m%\x1b[0m is neither referred as `control_iso_name` nor `iso_name` in `ref_isolate_pairs` table', row.iso_name, row.ref_name;
END IF;
END LOOP;
END
$$;
-- ALTER TABLE rx_potency
-- ADD CONSTRAINT chk_susc_results CHECK (
-- isSuscRecordCreated(ref_name, rx_name, iso_name) IS TRUE
-- );
DO $$
DECLARE row rx_potency%rowtype;
BEGIN
FOR row IN SELECT * FROM rx_potency pot JOIN rx_antibodies ab ON pot.ref_name = ab.ref_name AND pot.rx_name = ab.rx_name LOOP
IF row.potency_type::text NOT LIKE 'IC%' THEN
RAISE EXCEPTION E'An experiment record of monoclonal antibody must use IC50/IC80/ICXX as its `potency_type`. However, ref_name=\x1b[1m%\x1b[0m rx_name=\x1b[1m%\x1b[0m iso_name=\x1b[1m%\x1b[0m has potency_type=\x1b[1m%\x1b[0m.', row.ref_name, row.rx_name, row.iso_name, row.potency_type;
END IF;
END LOOP;
FOR row IN SELECT * FROM rx_potency pot JOIN subject_plasma plasma ON pot.ref_name = plasma.ref_name AND pot.rx_name = plasma.rx_name LOOP
IF (row.potency_type::text NOT LIKE 'NT%' AND row.potency_type::text NOT LIKE 'NC%') THEN
RAISE EXCEPTION E'An experiment record of plasma antibody must use NT50/NT80/NTXX/NCXX as its `potency_type`. However, ref_name=\x1b[1m%\x1b[0m rx_name=\x1b[1m%\x1b[0m iso_name=\x1b[1m%\x1b[0m has potency_type=\x1b[1m%\x1b[0m.', row.ref_name, row.rx_name, row.iso_name, row.potency_type;
END IF;
END LOOP;
END
$$;
DO $$
DECLARE row rx_fold%rowtype;
BEGIN
FOR row IN SELECT * FROM rx_fold fold JOIN rx_antibodies ab ON fold.ref_name = ab.ref_name AND fold.rx_name = ab.rx_name LOOP
IF row.potency_type::text NOT LIKE 'IC%' THEN
RAISE EXCEPTION E'An experiment record of monoclonal antibody must use IC50/IC80/ICXX as its `potency_type`. However, ref_name=\x1b[1m%\x1b[0m rx_name=\x1b[1m%\x1b[0m iso_name=\x1b[1m%\x1b[0m has potency_type=\x1b[1m%\x1b[0m.', row.ref_name, row.rx_name, row.iso_name, row.potency_type;
END IF;
END LOOP;
FOR row IN SELECT * FROM rx_fold fold JOIN subject_plasma plasma ON fold.ref_name = plasma.ref_name AND fold.rx_name = plasma.rx_name LOOP
IF (row.potency_type::text NOT LIKE 'NT%' AND row.potency_type::text NOT LIKE 'NC%') THEN
RAISE EXCEPTION E'An experiment record of plasma antibody must use NT50/NT80/NTXX/NCXX as its `potency_type`. However, ref_name=\x1b[1m%\x1b[0m rx_name=\x1b[1m%\x1b[0m iso_name=\x1b[1m%\x1b[0m has potency_type=\x1b[1m%\x1b[0m.', row.ref_name, row.rx_name, row.iso_name, row.potency_type;
END IF;
END LOOP;
END
$$;
DO $$
DECLARE _iso_aggkey VARCHAR;
DECLARE _iso_aggkeys VARCHAR[];
DECLARE _iso_names VARCHAR[];
BEGIN
_iso_aggkeys := (
SELECT ARRAY_AGG(iso_aggkey) FROM (
SELECT
iso_aggkey,
COUNT(
DISTINCT CASE WHEN var_name IS NULL THEN '__NULL_PH' ELSE var_name END
) AS num_variants
FROM isolate_pairs pair, isolates iso
WHERE pair.iso_name = iso.iso_name
GROUP BY iso_aggkey
) numvars
WHERE num_variants > 1 AND iso_aggkey IS NOT NULL
);
FOREACH _iso_aggkey IN ARRAY _iso_aggkeys LOOP
_iso_names := (
SELECT ARRAY_AGG(DISTINCT iso_name) FROM isolate_pairs WHERE iso_aggkey = _iso_aggkey
);
RAISE WARNING E'Subsequent isolates are identical at Spike mutation level but were assigned different var_name: \x1b[1m%\x1b[0m', ARRAY_TO_STRING(_iso_names, ', ');
END LOOP;
END
$$ LANGUAGE PLPGSQL;
DO $$
DECLARE _ref_with_unused_subject_plasma VARCHAR;
DECLARE _unused_subject_plasma VARCHAR;
BEGIN
FOR _ref_with_unused_subject_plasma in SELECT DISTINCT SbjP.ref_name
FROM subject_plasma SbjP
WHERE
NOT EXISTS (
SELECT 1 FROM susc_results S WHERE
SbjP.ref_name = S.ref_name AND
SbjP.rx_name = S.rx_name
) AND NOT EXISTS (
SELECT 1 FROM unlinked_susc_results UnS WHERE
SbjP.ref_name = UnS.ref_name AND
SbjP.rx_name = UnS.rx_name
) AND NOT EXISTS (
SELECT 1 FROM subject_treatments SbjRx WHERE
SbjP.ref_name = SbjRx.ref_name AND
SbjP.rx_name = SbjRx.rx_name
) AND NOT EXISTS (
SELECT 1 FROM invitro_selection_results IVRx WHERE
SbjP.ref_name = IVRx.ref_name AND
SbjP.rx_name = IVRx.rx_name
)
LOOP
_unused_subject_plasma := (
SELECT STRING_AGG(rx_name, ', ')
FROM subject_plasma SbjP
WHERE
NOT EXISTS (
SELECT 1 FROM susc_results S WHERE
SbjP.ref_name = S.ref_name AND
SbjP.rx_name = S.rx_name
) AND NOT EXISTS (
SELECT 1 FROM unlinked_susc_results UnS WHERE
SbjP.ref_name = UnS.ref_name AND
SbjP.rx_name = UnS.rx_name
) AND NOT EXISTS (
SELECT 1 FROM subject_treatments SbjRx WHERE
SbjP.ref_name = SbjRx.ref_name AND
SbjP.rx_name = SbjRx.rx_name
) AND NOT EXISTS (
SELECT 1 FROM invitro_selection_results IVRx WHERE
SbjP.ref_name = IVRx.ref_name AND
SbjP.rx_name = IVRx.rx_name
) AND SbjP.ref_name = _ref_with_unused_subject_plasma
);
RAISE WARNING E'Subsequent subject_plasma are not used by reference \x1b[1m%\x1b[0m: \x1b[1m%\x1b[0m', _ref_with_unused_subject_plasma, _unused_subject_plasma;
END LOOP;
END
$$ LANGUAGE PLPGSQL;