Open-Grok/closur2.i at main · CordyJ/Open-Grok · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
% Compute transitive closure (Second version, using sorting)
% Dec 1997, Ric Holt

% Algorithm uses new fast Rel Composition algorithm.
% Algorithm attempts to minimize sorting
proc transitiveClosure2 (R0Number, R1Number : str)
    % Implement transitive closure: R0 = R1+

    % Definitions:  Sn = R**1 + R**2 + ... + R**n
    %               Un = R**n - Sn-1 (Unique new part of Sn due to R**n)
    % Hence:        Sn =  Sn-1 + Un
    % In this we consider R to be R1 and we compute
    %               R0 := R1+, which is the limit as n -> infinity of Sn
    % Basic algorithm is:

    % STEP 0.  n := 1
    % STEP 1.  S := EMPTY  (S represents Sn-1)
    % STEP 2.  U := R1     (U represents Un)
    % STEP 3.  loop           (n increases by 1 each time thru the loop
    %              assert S = Sn-1  &  U = Un  &  Sn = S + U
    % STEP 4.      P := U * R1
    % STEP 5.      S := S + U
    %              assert S = Sn+1
    % STEP 6.      U := P - S
    %              assert U = Un+1
    % STEP 7.      exit when U = EMPTY
    % STEP 8.      n += 1
    % STEP 9.  end loop
    %          assert S = R1+

    % We consider n to be the 'ply' level, ie, how deep in jumps
    % along arcs we have progressed.  Let P be the number of plies
    % to finish algorithm.  (Note: P <= N , the number of tuples).
    % The big O timing is P * C where C is the time to perform steps 4-8.
    % Since each step in 4-8 is bounded by time proportional to the number
    % of tuples (t) (actually, by the number of tuples in a relation),
    % the big O timing is P * t.  For a worst-case (dense) graph, this
    % will be n**4, but effectively much faster for typical cases.
    % There are transitive closure algorithms that run in worst-case
    % time n**3, which might possibly be better.

    % In the implemented algorithm: n is redundant (but is computed anyway)

    % Use of Index arrays:   R1: Index1
    %                         S: Index3
    %                         P: Index2
    %                         U: Index2
    %                   delDups: Index2
    %                         T: tempIndex1 (temporary copy for S)

    % Make sure names are unique (use space inside name)
    const S := nameNum ("S ")
    const U := nameNum ("U ")
    const P := nameNum ("P ")

    % Collect R1 and sort index by src (for repeated use in U * R1);
    %           this will be the only sort of R1 in entire algorithm.
    % R1 uses Index1.
    var R1Count : int
    collectRelIndex (R1Number, R1Count, Index1)
    PigeonHoleSort (DBSrc, R1Count, Index1, tempIndex1, nameTemp1, tempIndex2)

    % STEP 0.  n := 1
    var N := 1

    const oldDBSize := DBSize

    % STEP 1.  S := EMPTY
    const SStart := DBSize + 1
    var SCount := 0

    % STEP 2.  U := R1
    var UCount := 0
    var UStart := DBSize + 1
    for i : 1 .. DBSize
        if DBRel (i) = R1Number then
            insertDB (DBSrc (i), U, DBTrg (i))
            UCount += 1
        end if
    end for
    % Eliminate any possible dup's in U, in case R1 had dup's
    deleteDuplicateTuples (UStart) % Could optimize this
    % Danger: 'deleteDuplicatePairsSpecial' clobbers work array Index2.
    UCount := DBSize - UStart + 1

    loop % STEP  3. loop  (in basic algorithm)
        exit when R1Count = 0 % Special case of null relation

        % Assert S = Sn-1  &  U = Un  &  Sn = S + U

        % STEP 4.  P := U * R1 (set up for this step)

        % Create index for U, sorted by target (for use in U * R1).
        % U uses Index2.  Note that U tuples are contiguous.
        const UStartLess1 := UStart - 1
        for i : 1 .. UCount
            Index2 (i) := i + UStartLess1
        end for
        PigeonHoleSort (DBTrg, UCount, Index2, tempIndex1, nameTemp1, tempIndex2)

        % Assert: R1 is sorted by src (done preceding loop body)

        % STEP 4.  P := U * R1 (Rel. Composition loop to carry out this step)
        var PCount := 0
        var PStart := DBSize + 1
        var kU, k1 := 1 % Count through the 2 index arrays (for U & R1)
        loop % Using common link (U trg to R1 src) merge U and R1
            const trgU := DBTrg (Index2 (kU))
            const src1 := DBSrc (Index1 (k1))
            if trgU < src1 then
                loop
                    % Advance the kU counter till match R1 (or exhaust kU)
                    kU += 1
                    exit when kU > UCount
                    exit when DBTrg (Index2 (kU)) >= src1
                end loop
            end if
            exit when kU > UCount

            if trgU > src1 then
                loop
                    % Advance the k1 counter till match U (or exhaust k1)
                    k1 += 1
                    exit when k1 > R1Count
                    exit when trgU <= DBSrc (Index1 (k1))
                end loop
            end if
            exit when k1 > R1Count % No more matching links

            if DBTrg (Index2 (kU)) = DBSrc (Index1 (k1)) then
                const link := DBTrg (Index2 (kU))

                var lastLinkU := UCount
                for i : kU + 1 .. UCount
                    if DBTrg (Index2 (i)) not= link then
                        lastLinkU := i - 1
                        exit
                    end if
                end for

                var lastLink1 := R1Count
                for i : k1 + 1 .. R1Count
                    if DBSrc (Index1 (i)) not= link then
                        lastLink1 := i - 1
                        exit
                    end if
                end for

                % For this matching link value, add all tuples to DB
                for mU : kU .. lastLinkU
                    for m1 : k1 .. lastLink1
                        PCount += 1
                        insertDB (DBSrc (Index2 (mU)), P,
                            DBTrg (Index1 (m1)))
                    end for
                end for
                kU := lastLinkU + 1
                k1 := lastLink1 + 1
            end if
            exit when kU > UCount or k1 > R1Count
        end loop % End of loop for P := U * R1 (STEP 4)
        % Assert: P now contains U * R1
        % In most cases, P includes dups,
        %       which will be effectively deleted in STEP 6. U := P - S

        % STEP 5. S := S + U  (set up by sorting U by pairs)
        % Assert: S and U are disjoint  (cause U is unique new part of Sn)
        % Assert: U's index is already in Index2; sort this index by pairs
        PigeonHoleSort (DBTrg, UCount, Index2, tempIndex1, nameTemp1, tempIndex2)
        PigeonHoleSort (DBSrc, UCount, Index2, tempIndex1, nameTemp1, tempIndex2)

        % STEP 5. S := S + U  (loop to do the union)
        % We know that S's tuples lie contiguously just beyond oldDBSize and
        % that U's tuples lie contiguously just beyond that.
        % To represent S, we simply extend (merge into) S's index,
        % so it also contains the items from U's index.

        % Create a copy T of S's index (Index3) to build a new index for S.
        var TCount := 0 % Temporary copy of S's index in tempIndex1
        % During merging of S and U, index for the union goes into tempIndex1

        % Count through the 2 index arrays
        kU := 1
        var kS := 1
        loop % Loop for S := S + U.  Maintains S's index sorted by pairs!
            % Consider sum to be written with U first: U + S
			loop
				exit when kU > UCount
				exit when kS <= SCount & GE_Pair23 (kU, kS)
				% Advance the kU counter till match S (or exhaust kU)
				% Found pair in U but not in S.
				% So, add it to T (temporary for S).
				TCount += 1
				tempIndex1 (TCount) := Index2 (kU)
				DBRel (Index2 (kU)) := S  % Redundant (for assertions)
				kU += 1
			end loop

			loop
				exit when kS > SCount
				exit when kU <= UCount & LE_Pair23 (kU, kS)
				% Advance the kS counter till match U (or exhaust kS)
				% Found pair in S but not in U.
				% So, add it to T (temporary for S).
				TCount += 1
				tempIndex1 (TCount) := Index3 (kS)
				DBRel (Index3 (kS)) := S
				kS += 1
			end loop

			exit when kS > SCount & kU > UCount

			% Since S and U are disjoint, the following is redundant
			if kU <= UCount & kS <= SCount & EQ_Pair23 (kU, kS) then

				% Following two loops only needed if DB contains duplicates
                var lastLinkU := UCount
                for i : kU + 1 .. UCount
                    put "###ERROR: Bug in Transitive Closure (1)"
                    quit
                    if not EQ_Pair23 (i, kS) then
                        lastLinkU := i - 1
                        exit
                    end if
                end for
                var lastLinkS := SCount
                for i : kS + 1 .. SCount
                    put "###ERROR: Bug in Transitive Closure (2)"
                    if not EQ_Pair23 (kU, i) then
                        lastLinkS := i - 1
                        exit
                    end if
                end for

                kU := lastLinkU + 1
                kS := lastLinkS + 1
            end if
        end loop % End of loop for union S + U in STEP 5
        assert TCount = SCount + UCount % T is merge of disjoint S and U
        SCount := TCount
        for i : 1 .. SCount % Copy S's new index into S's proper index
            Index3 (i) := tempIndex1 (i)
        end for

        % Assert S = Sn+1

        % STEP 6.  U := P - S  (Set up for Set Difference)
        % Assert: S is sorted by pairs (previous loop maintains this ordering)

        % Sort P by pairs (src & trg) for use in P - S
        % P uses Index2 (which is not longer used by U, which is now empty)
        % P is contiguous.
        const PStartLess1 := PStart - 1
        for i : 1 .. PCount
            Index2 (i) := i + PStartLess1
        end for
        PigeonHoleSort (DBTrg, PCount, Index2, tempIndex1, nameTemp1, tempIndex2)
        PigeonHoleSort (DBSrc, PCount, Index2, tempIndex1, nameTemp1, tempIndex2)

        % STEP 6.  U := P - S  (Loop to do Set Diff using sorted P and S)
        UCount := 0 % All of U's old elements were absorbed by S
        % Count through the 2 index arrays
        kS := 1
        var kP := 1
        loop % Loop for P - S.  Merge with common link from P to S
            exit when kP > PCount
            if kS > SCount or LT_Pair23 (kP, kS) then
                loop
                    % Advance the kP counter till match S (or exhaust kP)
                    % Found pair in P but not in S
                    % So, add it to U (for U := P - S),
                    const src := DBSrc (Index2 (kP))
                    const trg := DBTrg (Index2 (kP))
                    % Ignore dup's in P
                    if kP = PCount or
                            src not= DBSrc (Index2 (kP + 1)) or
                            trg not= DBTrg (Index2 (kP + 1)) then
                        DBRel (Index2 (kP)) := U % Just reuse P's element
                        UCount += 1
                    end if
                    kP += 1
                    exit when kP > PCount
                    exit when kS <= SCount & GE_Pair23 (kP, kS)
                end loop
            end if
            exit when kP > PCount
            assert kS <= SCount
            % To see this, inspect exits in preceding loop

            if GT_Pair23 (kP, kS) then
                loop
                    % Advance the kS counter till match P (or exhaust kS)
                    % Found pair in S but not in P; just ignore it.
                    kS += 1
                    exit when kS > SCount
                    exit when LE_Pair23 (kP, kS)
                end loop
            end if

            if kP <= PCount & kS <= SCount & EQ_Pair23 (kP, kS) then

                % Following two loops only needed if DB contains duplicates
                var lastLinkP := PCount
                for i : kP + 1 .. PCount
                    if not EQ_Pair23 (i, kS) then
                        lastLinkP := i - 1
                        exit
                    end if
                end for
                var lastLinkS := SCount
                for i : kS + 1 .. SCount
                    if not EQ_Pair23 (kP, i) then
                        lastLinkS := i - 1
                        exit
                    end if
                end for

                kP := lastLinkP + 1
                kS := lastLinkS + 1
            end if
        end loop % End of STEP 6. U := P - S

        % Delete relation P (value is now dead)
        deleteRelationTuples (P, PStart) % Could optimize this
        UStart := PStart
        assert UCount = DBSize - UStart + 1

        % Assert: U = Un+1

        % STEP 7. exit when U = EMPTY
        exit when UCount = 0

        % STEP 8. n += 1
        N += 1

    end loop % STEP 9.  end loop
    % Assert S: = R1+

    % Delete U
    DBSize := oldDBSize + SCount

    % Rename S as R0
    for i : oldDBSize + 1 .. DBSize
        assert DBRel (i) = S
        DBRel (i) := R0Number
    end for

    if verbal then
        put "   Added ", SCount, " tuples. DBSize is ", DBSize
    end if
    insertIntoListUnique (relNames, relNamesSize, R0Number)
end transitiveClosure2

procedure reflexiveTransitiveClosure (R0Number, R1Number : str)
    % Use (non-reflexive) transitive closure.
    const oldDBSize := DBSize % Assumes knowledge that
    %      transive closure algorithm leaves R0 tuples at end of DB arrays.
    transitiveClosure2 (R0Number, R1Number)
    % Then add the ID relation to R0 (then delete dup's)
    const notSeen := 1
    const seen := 0
    for i : 1 .. maxNames
        nameTemp1 (i) := notSeen
    end for
    for i : 1 .. oldDBSize
        nameTemp1 (DBSrc (i)) := seen
        nameTemp1 (DBTrg (i)) := seen
    end for
    for i : 1 .. maxNames
        if nameTemp1 (i) = seen then
            insertDB (i, R0Number, i)
        end if
    end for
    deleteDuplicateTuples (oldDBSize)  % Could optimize
end reflexiveTransitiveClosure