From 6b84b50db02d6817a657a6364fdf77c83f77b0f5 Mon Sep 17 00:00:00 2001 From: Kanchan Chowdhury <43951087+kanchanchy@users.noreply.github.com> Date: Tue, 3 Dec 2024 23:57:45 -0700 Subject: [PATCH 01/15] Added greedy optimizer --- inferf/python/greedy.py | 140 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 inferf/python/greedy.py diff --git a/inferf/python/greedy.py b/inferf/python/greedy.py new file mode 100644 index 000000000..71e620d04 --- /dev/null +++ b/inferf/python/greedy.py @@ -0,0 +1,140 @@ +import heapq +import json +from collections import defaultdict +import torch + + +class Edge: + def __init__(self, _id, parent_node, child_node, direction, num_input_features, num_input_rows, num_output_features, num_output_rows): + self._id = _id + self.parent_node = parent_node + self.child_node = child_node + self.direction = direction + self.num_input_features = num_input_features + self.num_input_rows = num_input_rows + self.num_output_features = num_output_features + self.num_output_rows = num_output_rows + + + +def performGreedy(edges, factorized_output_features, utility_threshold=0.5, k1=9.8167, k2=2.1713): + + def getUtility(num_input_features, num_input_rows, num_output_features, num_output_rows): + #return k1 * (num_output_features - factorized_output_features) + k2 * (num_output_rows*num_output_features - num_input_rows*num_input_features) + return k1 * (num_output_features - factorized_output_features) + k2 * (num_output_rows*num_output_features - num_input_rows*num_output_features) + + + labels = {} + utility = {} + node_to_edge = defaultdict(lambda: []) + gpq = [] + processed = set() + + for e in edges: + labels[e._id] = 0 + temp_list = node_to_edge[e.parent_node] + temp_list.append((e._id, e.direction)) + node_to_edge[e.parent_node] = temp_list + + utility[e._id] = getUtility(e.num_input_features, e.num_input_rows, e.num_output_features, e.num_output_rows) + if utility[e._id] >= utility_threshold: + heapq.heappush(gpq, (-1 * utility[e._id], e._id)) + + + child_edge_list = defaultdict(lambda: ["", ""]) + for e in edges: + child_node = e.child_node + if child_node in node_to_edge: + e1, direction1 = node_to_edge[child_node][0] + e2, direction2 = node_to_edge[child_node][1] + child_edge_list[e._id][direction1] = e1 + child_edge_list[e._id][direction2] = e2 + + def traverseBranch(eLocal): + if eLocal in processed: + return labels[eLocal] + + + if eLocal not in child_edge_list: + if utility[eLocal] >= utility_threshold: + labels[eLocal] = 1 + processed.add(eLocal) + return labels[eLocal] + else: + left, right = child_edge_list[eLocal] + + if left not in processed: + labels[left] = traverseBranch(left) + if right not in processed: + labels[right] = traverseBranch(right) + + if (labels[left] == 1 and labels[right] == 1) or (labels[left] == 1 and labels[right] == 2) or (labels[left] == 2 and labels[right] == 1): + labels[eLocal] = 2 + else: + if utility[eLocal] >= utility_threshold: + labels[eLocal] = 1 + processed.add(eLocal) + return labels[eLocal] + + + while gpq: + max_pair = heapq.heappop(gpq) + uCurrent = max_pair[0] + eCurrent = max_pair[1] + labels[eCurrent] = traverseBranch(eCurrent) + + return labels + + +def getEdgesFromJson(json_data): + idx = 0 + edge_list = [] + for json_obj in json_data: + join_id = json_obj['ID'] + left = json_obj['Left'] + right = json_obj['Right'] + tuple_left = json_obj['NumTuplesLeft'] + dim_left = json_obj['NumDimLeft'] + tuple_right = json_obj['NumTuplesRight'] + dim_right = json_obj['NumDimRight'] + tuple_output = json_obj['NumTuplesOutput'] + dim_output = json_obj['NumDimOutput'] + + edge = Edge(idx, join_id, left, 0, dim_left, tuple_left, dim_output, tuple_output) + edge_list.append(edge) + idx += 1 + + edge = Edge(idx, join_id, right, 1, dim_right, tuple_right, dim_output, tuple_output) + edge_list.append(edge) + idx += 1 + return edge_list + + + +def getNeuronInputSize(model_path, input_layer): + state_dict = torch.load(model_path) + input_weight = state_dict[f"{input_layer}.weight"] + return input_weight.shape[0] + + + + +if __name__ == "__main__": + file_path = "plans/4_3.txt" + num_neurons = getNeuronInputSize("plans/dummy.pth", "fc1") + + with open(file_path, "r") as file: + json_string = file.read() + + if json_string.startswith('R"(') and json_string.endswith(')"'): + json_string = json_string[3:-2] + json_data = json.loads(json_string) + + edge_list = getEdgesFromJson(json_data) + labels = performGreedy(edge_list, factorized_output_features=num_neurons, utility_threshold=0.5) + + print("Number of neurons in the split layer:", str(num_neurons)) + for edge in edge_list: + print(f"Plan: {edge.child_node} ---> {edge.parent_node} = {labels[edge._id]}") + + From 210d76d02dec674825c2e9e2de3affceceda6bb4 Mon Sep 17 00:00:00 2001 From: Kanchan Chowdhury <43951087+kanchanchy@users.noreply.github.com> Date: Tue, 3 Dec 2024 23:58:47 -0700 Subject: [PATCH 02/15] Created plans folder --- inferf/python/plans/test.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 inferf/python/plans/test.txt diff --git a/inferf/python/plans/test.txt b/inferf/python/plans/test.txt new file mode 100644 index 000000000..acbe86c7c --- /dev/null +++ b/inferf/python/plans/test.txt @@ -0,0 +1 @@ +abcd From e134e098fd896b1df8484bf569f78086a9989e1f Mon Sep 17 00:00:00 2001 From: Kanchan Chowdhury <43951087+kanchanchy@users.noreply.github.com> Date: Tue, 3 Dec 2024 23:59:26 -0700 Subject: [PATCH 03/15] Added test files for greedy optimizer --- inferf/python/plans/4_3.txt | 5 +++++ inferf/python/plans/dummy.pth | Bin 0 -> 52935 bytes 2 files changed, 5 insertions(+) create mode 100644 inferf/python/plans/4_3.txt create mode 100644 inferf/python/plans/dummy.pth diff --git a/inferf/python/plans/4_3.txt b/inferf/python/plans/4_3.txt new file mode 100644 index 000000000..261cff1c7 --- /dev/null +++ b/inferf/python/plans/4_3.txt @@ -0,0 +1,5 @@ +R"([ + {"ID":"0","Left":"table_1","Right":"table_2","Pred":"table_1.join_key1 = table_2.join_key2","ProbeKeys":"join_key1","BuildKeys":"join_key2","Projection":["join_key1","f_table_1","join_key2","f_table_2"],"NumTuplesLeft":1000,"NumDimLeft":20,"NumTuplesRight":300,"NumDimRight":39,"NumTuplesOutput":300,"NumDimOutput":59}, + {"ID":"1","Left":"0","Right":"table_3","Pred":"table_2.join_key2 = table_3.join_key3","ProbeKeys":"join_key2","BuildKeys":"join_key3","Projection":["join_key1","f_table_1","join_key2","f_table_2","join_key3","f_table_3"],"NumTuplesLeft":300,"NumDimLeft":59,"NumTuplesRight":12000,"NumDimRight":31,"NumTuplesOutput":12000,"NumDimOutput":90}, + {"ID":"2","Left":"1","Right":"table_4","Pred":"table_3.join_key3 = table_4.join_key4","ProbeKeys":"join_key3","BuildKeys":"join_key4","Projection":["join_key1","f_table_1","join_key2","f_table_2","join_key3","f_table_3","join_key4","f_table_4"],"NumTuplesLeft":12000,"NumDimLeft":90,"NumTuplesRight":120000,"NumDimRight":19,"NumTuplesOutput":120000,"NumDimOutput":109} +])" \ No newline at end of file diff --git a/inferf/python/plans/dummy.pth b/inferf/python/plans/dummy.pth new file mode 100644 index 0000000000000000000000000000000000000000..a18998400782f4f5a3ab0c9e65eb257e410b12e5 GIT binary patch literal 52935 zcmZ^~2~>^I_cxvfrI{iXN=2!VqSAf#4xy5i3}uXxA~X_0X&xj>18E?X1|b#Q=j-_uVvima^FL|JNs6+|$!<(Ay8V+A(}$aDdE02Qh~k4wW$hLmVm{2E_!(#{2vSid=Ajyi0(B+keLTA3Po#)&>PA z`iTC_N7CoN#-;=)`A9p=2o6yG9~;pDLmdW12Mqgff0f_>RhNL_|7$#^|9dtSROphhm@n$wn=P z+;wg6#KR-7^!N!9+AYRx)h*oc6A!6RQWK?`Eri_8i25%<47imWPWf(DQG;^7Mkc`Gl;|Wz&j5#DpwO6Et)gKRl-yS<= z@n{-dJm8PwQVY-{d=tOf=qpR^>1S3egXsAbgQg$2?-aQ2V zk`i|3zT?-&9Av}lzC!u!9*9gD&hj7Z1UJtinC>lM-MLMIS$@0^tA^PNY<)+NfA$sD zWL*yG4=1AZ{ByK4NMuv`i|Re)JE=8>$vEir?6Un9Bp}-i6IVS(fp+$D$M-B4wQ^1u~%#HK;J?d zc>97>kIcc%!P7Bh)h@6yb7Q$h#&|D8pWcxUW7!MfL%;-x&0j+A?%WnMytod#{#r20 z?*}pawk>u}Zzjz)Rir@?P#30z_4Wqb)4V}gB&&u-*6*427kNryEZ&2Zi_9 zR5@Or8F^(3%ezOilBQ9VQ6E7O#_#dt6~v}zoA~Z2$4Tz38jZa>mhJdC2ye2ZbhGs& zo*FiqKCE-0>TQ|SaO*pZRewOS1&>KMdk!2ZD1)%0IaKF4i<_gc3kSAVgcx{>713S`!YPjl~%Fw1>Injt41wN$W6Kb2@Z#I9?e8;AizySmsP@__6Ca zV@)PFu{{vd7ek5d`%$|$klI&WX1_j6#yOXjF{m;O>%5&&|KKqye?Fg*8ba9e4_)-s z`!sV{QAkVJ2I^R=in@=?L9SyexoC^mC@G2yrH<^vD~St8cHk_mxO@m@yI#Y!y+f>v z*C)~DH!8yO!NnA*8^Nr`oS|Ya5{9%NVk=~K(ea>r0y_mUN*H_=7B-9Htlal(r^6Qf zTiwlXUU|C^8Vuy{#o$Bq-zSgVjtW z?i{?)3L^1Mx1jHz0WEPGfxbFFSc+RWBo<0TlVnp0&jDsTNLSu%QCE%xydp$ zJ;Ay$h+|va$v18>9k{=M4Bl3=v8;}rD^kVnfA-SoiC%2khe^0CsEXN_rm%&FY|%9H z4%2TwLgfj?IMX#7T{6zo6JCK^)xDdqy7HPk*>sPYq#ma0nYMJ6pNf79bMTGaDTut~ z#;ScB@ze7;XqhdC7E^~<3*szjwE6;UIJ^tPhUUTUgqO_e@^n;gi^jTTt7*IM88m3~ z;@(6H@Vohb<~{BN+pJd(enpcoVM7?rb1A@+2;&5op_<*A`|G4-zb#`U!fL&sNL z?8*730xhd}a$GY6b8qzHR>ukS`Ob2f&BeoGmlD1_d^!v$7?FxW2pu@Pl@@FzD6Rg? zJjH*pvfW?!&4L58#AFAR&o1L5?v#?4cr|9;9LvNY2aIfalzaPL=-DxfWna9<3vH~R z^=&ZQa&R)PxHIk5g>MS{$jmmWC{-V%0DHu~5{ZPtsywo-~14 z6N=eTsZzF5B?Z0f6ZjFSBhd8D2ME#{VDsCpDDj~gtuz{pOFoy9d)YlEQ@#oe`hUBy*V6)SttopTStW zdk9lqm%&nZx})c~8s5Cnlq51wkcxF0byzQ=#k-E9MS(Ia*&@N3#5Q86)DoP%KE$)_lHHJU@fl=J5wCGP>W+3t#?Tkv z0;^TAJJ3cj7<|X=qn^|ZdM!Q+F@GwGRRbZ z2;F0fAasr)wR*;Zv~f4*_w6H-{Beln4%nl^qde#+R>9Vo~v2FOntvZH5%LP;#o+1UoG^vG$U1{GhQC2V!bj zwbN66m(o~LKVMF9(+nwTVwWJMR6_PUbp(PUD$ zA5-(bp^(HNBi8<^(1lLV&WxN67&cvP1K@3kUflSVYzddTp)$EV=t3k?+7 z;sF*@Y*6FQG<a^Cd|qA#?4z+kxc0?G_37oPkoFrxl^23 zKWnmz`BEpSTyPb|lYu{sV))?gah7qYf_e3YaAB9;!^@5_ocuqLy$j38+Pjd=+F(Iu zy_cb<&k+|1C(%IZe%RwampSVU##0GVXz13+y?$5C){JDhTuG4@P2o^umOT1i*Ps^) zsx+?p51UsWffM^JaC?|4=EM|3!$KKm?mC_IolwK3`PXQkjx&VXm|*WCL;4jEO~Io1 zu$|zA|F+Do{`5s1Yj1yJwM*A9%{Mb}yZ>P{pJ!fA^iLuX|^xe5}Q z?lvWB`MZoWi2jSsz3bVUNjKR`*C=YX-+=b*gP=hu$1;jMF@K2Inx>6*~^6F|*L?FyMQHhMbYYzEm4pG)+MAzw)4a z)MvUBBcK7jBqp485zFxbdpE^{IktGh#sqtO>VA@0$hqURq=}$AMitNOkmJWV#iDwT z6|G2;#MMGo*euzN-8RZ(I^GbauV><%#rOH%sMW%R7G*3IZ{)@JOuBHrnJ(-sWml$W zQI%CPH0rntpNyJ|c~?sW?l?#CT1Ge`;6Z{*Wsy`Cb-V6zqy}t7Aw)m`u3=uB>;WDDJG82OeUrY`IPg zmr-nM<*{F%0=-k1*Ec`7J#_+<{8gnug&=Gxz08LjIpG(FI?-M*0}doS#&#!lTqLJO zu{+0s$D@9}$36uQUOSBjzUs^=%-0nB4F*BnI ztQK}d;phNV9ASr^ANE4$o2mTIjB56I?RqL+=m`U1mC%1qb=g8OsiQ6oA;b% zUoAT!f9@|{DkqaJuhVBnnJO^EAr>bWrvoi*CO+^eEo_}YRh^O;YVZmURBpzZ?m6i0 zTqCHxHX9X^*V6N<9R6bNNj6Uq1L=iE@NIMN$Q@^D$}zoEG}eONlo3{t?M3ganb-Y?e7WZ<=66Uq9h(5U8ku)Sao(}v~3 zgRIqjyFxT6-nBrF%_`s_n28$?r&7@020UhcgLhM&O5-LxV;V(6$hIUC>MnJ2?bp6@ ze;!^zJ21yS{YsYnz+E8uNP~W!T0|@E=<{3W9b|s#-eh1W5B%6GiX!FEHsy)~cABW7Xt z$9Ry+3Wp%EaH@Ul$ci*i;&{_pT=PhGmQ(M*MJV1!zs1Y>0<}XR_h%I?AH4*lTK3YR z8!`AqEriY16RWu?3d?-V{H=asWMn!<-cVz+e}>cY zz!qqIq+#72D?`?4DmB(Vg*fCtTk(hIijGnldbJg7-2vVQPI2RgpMM475Zs>ld6@ryR>F$)nim#e%#e zAKCl6R?PaRGR^m#$+VJM;d^2aOZ`>P{0ltrR7e90<0|3R>`Dq7SBMeo#Ray9c2k4! z66>umV6U_Ds5Lhjhwa_X*Lfczr_NI(`}{8aOtoW+a) z9jLiRhssauGQYp~L3h>ui>%*Um;i zT`e^H+QIVwMZpf26Rh@cCJf{rql(_^Y{1);vR@2A8+Ua+LX_`Dp(I{V*(gkp~xF7+_MFEuOR0$5qNpaOCP#_OV+IKO7#868ly7$qA+S zFFuOg$M9G@X*a%>iJ(hWkz^qT_-@2&c3{>{HdPb@G`~)#M<35qRE-LBnd?`#uH@MF zuoyZMro*0@|AkfFHMDzl1KqQ=z!e!;lw189oaYx)GV^AJYwv)T&n)zOJc1r(H{quv zR@7(mO8D-`Gip{}i&wjNG@SjE%Zlvf{fm~f#GqV^p8FMI{nP1Q-Do;>q!D%>U&lV} zR;@99nn(XGM8SOL37DoEg*Ln2(yHTeD3iY(jUvZTOIkkm4eDkN2M^Gk#Wg6`8-%Y% z%Ar7e7XNI|N51EU8;1AW(0SKDbd%ji1$bL#^n-1*IwDO{#zxk2>2 zH4du8v{~`O6ug{0nhof5u&m8m*kdPWJ=b9nHlLl&A1S}j7RnvObsJ_=_>^1J{G|+H zLw4ch{ntSvDW5D8?xI|hGB&y^Gmu%2rNz=@5)gbM^Xzs{!6!&mA2%=|cWp-K)Wr#3Rv z6`~qMbR|g?7=U$}K7QUPjt%)≫g)l5$c!Hr}ly?{GCLe|3b8IL^a|O`|c?F$#mT z(ztJ}O`wol1Q{-`S^K8hq@@_nH+F@C!vYDK{;HJh-llNRBaP9=DvcsfoP}`#t>kc5 zjwE*|QiNkLc#Dn4CeuL>m!8Oe3fDpVlhe3+{(Uy|Um`js#8AY_72xpYC_MUlfd%h8 z&5rv7;zk8?T*wzO{|y7QLUkDGuUHGs`Gu&kc7WC8Fgg@i$Evo@K+mk5WMi*SIROT! zb!{naF-l~-e@0|6G>3GESJEa*| z!MPtrD1BCqG(6hyL=TVcZvtV})ZKJ(^D?%zUJ-pq3<%uTcB0SFAR5kgRm&Xv$F9k? zLStAiEjguxReUy`z92#Efi_g!tInq9)zMok2e{s8fM!FEWB%BysPKLY2(I72lg_T> zEc{2~7slY;CCMyKOpDKNIL-3z`QyP(0p2?qO|7^4nNH#b(2N_7)sN=kVFv>$J9wPg z74@^)_O0ae{v-3qTSJ;NV#s=A5DukMEYZuu3kx}%->!$wkIY%}jzo5%ES}H0*u}kz zi363RZLCRkJ+EI91Bv>tSXjb4w%g$Y>uRVG>TZrehp@Y#w)!CH$cAvddnVUYIt@Ld z@<4ju6D~1yC>D%6$d*-PfzHi8P#%?sd2z(~JPBa3@%w4nVrwedo{an780YTdY89k|SeY1hb)FX19=?l)?*r-48dYk1{sOD!W@F7XfAl>4moti3OMb2& z@%o?y*zVK|;yTB;FZVN{T`7Xg!y9JB}?PSBvW0}+UkEE@thQe$+j50Q%$E=q5cHFp_>8n;VC5s0+ z%K_#x=?ZH_zo`)Ogk z=}~$*Egj;`CW6guc?#X9Mn`MMurld6Y|9VB5}-S_UH!sXFmAuHxeTW1#r%qUAAp_s?Y`7Iy6VYf{K6biW(!$Rf z=p=TK(tY$uZj3UUJv0?{>JL(dx-xEdDu$)=OKEPTIGs9ooZJh|$i4O|-!7X%?9(ll zu%(yjb?S0=qgx5CEufccbK&BS7AWvtkA+8G!Ud)MaQB-#n*P%ytGq`7wNIm|`SB50 z{q->a{elFx*eqfaPZ!{f8}Zn?%TVZ~na!`tSj|#zY{B6Rw=jLTT*w`LuK8 zSR8qjlxi)pXK*-Cb1K|Qs)FZ*YPfKC0<(>|MN{{j;_r^m;>C3xA+UQ5SZKL5M;%Q{lU#9Y~9+rt8Am!Aza9O_@&Sc7@Y*@K6e9kl5Ue%Q%qru;=Es_)GEzSD>{JH#fJ@yJs)h zhh4W}-_T+fpX`9ry88%>!(mCCI;wt`;}&h{WS#cE!Dh#N{9rcCTF&Gb7rdtsJkvh{ zniz0twa+P3^X0Szc+XBX9L3rvml*#_sz&&C7Zn_uL|L|CP$Q!aDM{~O>y*>rbiEMy z^RjTrvyNNdeu)_>%mDF!5;WZ64rl3Lf|9wS`gB?fMrJ5d%C7H}K4LaoRC%9`_zAFl z<_^p~vW)3iSD}T*Y#7<+jH8d$LwRNp`?PL3nuubN5qdLF^I$lWn>v`PEuQdCdK4(q zR*x+?KaG^@595_HuDsCgFTC)Vp}K=daQnl_e2bWjXe{NBdhi)mKeK{=dg@eD7J*^c zEubd;CyRk;EPv1w-s{30{?ztda(^Dn`xLE3x#&;KHlrCQzF*F&9>(!b(-w(%sqIWA z@G03Rm4d|uWo&o(h^PNklO1MB;eWGfpSm?woiwr5T$TumqYpEa?RT+`jQE2-CTz{v zF7~NL8ZSyMMvV>Y8S{LLgYU~xnw2G~87rW!O9e}Kvkt9wZ?LYm-EjO#JrnEct#*^Y z0lD8eIQvMI)ro3l+ina2bv0XT*Vu@S+-D|CIwkaVbHIeIYI<7ZAfo8bfyJ{}Tq!L< zXPXMylqea>Xi$aOpR;NEpDV;R>JU`6Q`XqGe7Md?uJOVK$RB(VE@nS~~ z*vc<2b;X7ed+}?*MjDp-9*m{sain|#%lxQc(MJ+!(cFi8PIDc`9G=9_ ztFYr1eXS-J2R-a7lw!Yb#zN5-2UJ@23^a%OV@AhCy44?!+xCaUcag6orrCGX}FfLi?TFVVrst+J9?)ES;lc_Q+~rfFSFqt6_0 z%g#aFlMZG%s+O8=uOgQPqIl(vIa-`tPHvfvEH+OJLk8zie0?dJ3_4EXWh22`D~)fH zOo!KtC)2aDdSEKbZ4E8Yu;r!tB(v!@Y_S_F(T!^DQN=ju zcM*&Keh5sWKC;XMgE6apF>@%m#ZKn+h`6#+{I2$p3qO<&(T7WfcizckfADk)^KN3V z?klsFW2@NkRy{CZ5>0b7UZB~RO1k4}Ld##Qrc|475NwN}v-VaHHnA8cbK7D1V_m-8 z!V=Y7dSQe8ELQ!-my@Ww28JJo@@YY3xW;r0SU-J)QF{!4dDz3T?4cOKv_RkJAUv4c z$x<7eFv7bFF0774uZg3e^zIMf&!>ZF;}EWQb)E2yy(}dc$>Zzus@64gu0r#WGi2FZ zf*-8jz|LzMu&~~hj-FOWgC(K#)A<oV%>=W)TXXH@uwy%xpuu8Xd-tX*-eTO|&~O)rzK^d?HVJ{$$&+GPI4oSvyXCvT5~{H*M5 zkb9I*zs6bMWS?ymc6Tss7#NQe|E)!d!$r)!ilc&i>*(C8^RRht2qmR`!n)(J(7pR2 zTugsR&r)l_r(TY_c8wqt`2_5k(M-D>;<4(nG{49uhaGB^;r@P%A=5*#%y89MDEk(N zJwdP8kr`W|di_OIVNtBR)sEKqETcq^KTM;17^UChS=qcOOmX-Okt0W9#xZeR{%sA8 zF_UB!`CV+p#nWJ6P!Cc$JIO(zkFSu4W$nL$(QL&5exC1m2JfrU@5n6?$8nz-nU18_ zd-~w5sJ6U*krqB{z6m$7PUGt!865uPI~WZ2#1-|mw4^pgq~nO@EB&?Vp;qYqKJpyUxf16CKRSqgRji8S?%kY zW6$ah9%Hy^Cf%3*Ka&M~QlUpd3`^YG-eh45Z(68`m{hP|V$Ro_+b8Pi2=NS%F)sn5p-*!0sA@pFlJ0= z+^fjbRF`v?@1{psoKZ$!SILm4do0=-f8~GeK99XKR*~N62BuPW0uTO?z~dr+`0l>~ zJ7uRZn*|@)?iC`QckEnx6cLWkzkNk_yCbaXpAz0WAkRh|NFd{hSrG7XDy%iN2a8d` z+&H=Av_r9+r9SF}7bzE6`GxuPO3$1zJ!5(>b+F*nrXtc-pNpTasnMkK(!6`{H?+3U z#g4nz@$vNv6jZLEscWp6X6;(67;cDFQ86TUWIr6&s%8bA85Gi7#5}fz3bHzm!oW){ ztczy&_0D)GTkB5$Y!gv&Y(1YlsS}r1N09eA2Yg(q$2L}*Q_BuNw&Y0*RQBh9^%*hi zx%MRjKc{@m%{NA)@K(BO_=4(JH(^oo0V+^YqLI62;?*q~oR-)LQrFMHEK7mapZI_5 zbMq~1+8Twoo8n+yVIn=#e@qgUFX_E`1n0WyGNxWWW7T?M9V%rw)9;~C(6a5fm2688 z`TkfysiW`k7FV3vx2Huo+^(2(hs1E+Gi%|K_aL&k7)fQYAJPUJz$4q)Y-ig%XpdNr zkMcaR>wF^47CGr3wqb64rX*8(-% zzI!yAt7C+Yv$9xGNv!aZRK8$xsWgneG!Ij5*pkkP1a@G}d>ZAI$}ZeJfGwZ@vE5$n z%;t~`JqpZ&gEudO!u~#}9wB7q!_slvH!XTTHj_SxNm>t?G@S;^6rkf)FKl|~NCg_k zDC-r)DKFPV->iIoSo~hJdgDSH%nImvlQ$ZUEv2D*Bx^!sAHwL5>QLWy7&2aEQ&6Nj zm4yYe_s6ZUNA?`)`_#f9_nA!IEDpcUm&ctI57@}03>*lG=Z_eDk50YfO!;0SBDt$_{aMO@9}=WKL^A%rk*Tyv@)vM-p>qxEiJeP}iw zFleL32O*?1kDvnfvFMGHX_})Jov0Lrc~uECzaxuS=SDnWk%u?#Yh%pVLlmR87d-ZE zVGDJpVvogM6uTlM-Jq4I<@5j|*Dm9OPN%`aw+H!mrthFgcmlHauNLtUwbZlYA+Ncb zxUKSms1;^LH6tBarLK5Qp5kC@SydHk8a0SD4+OItmDfPpUarO`X*yk*xlh1noa7c? zJqmu88^Nt-C`nxV3d!EFO!~$G@@f78vvv~RKJSIbs!hC1zcm$>Euk_maa^N*pJt~I zWiK82xE=B7nCx#)Zo}5H(Yu`4f`>etdo~{ST$q3o*Scx!)nbf$?M6yAv#2J|5wpAQ zK*vmP8hkO2X{Hq7MooE?ev}PjJ2KI1pMcJY_(GMYB{V8!g3#ex6pqvf)?ndIj_&jD zjEf!#@7@;#NNUjJdn!1v+!U2AK0&Y26!`Qwonn;Yp!@1c>Z_Hu&L0vCnF-g~Yr{D# zbkSxENtjMnyNWo&);wr=r%jm~0$6t67PjPUESY6I=cbG(V+oCy$tS^)VyA|~iMB(y zIjxR`U5{hMRsE`;6+P`lz3x|r@rCZ!cP-v27e?>LIB%U5Cd@eh3AA0gCSTuMO|7n#?bDb!}* zz;<0eh^IfC#_zX-sasToa2r!ar6==Q=jO*Oa)>uOvuY|_O^js5iep)*yc39RisfBy zw3Cs#xb+S1(HQTXg0AV2C>G;J_7zc(cc`A#PP4;^&4IY@^Z_QVkjcIk8G(;#1SadR zBi)oUHnRHMLq(Mp6ftrfUkq)L?R{)+GN=AK1!zd(u931lv`v!>|kMaQqczoSnZP z4~+g!dKZULmdRmSvtXh~gSi}K_gRsK`As^#NELU2DgKxqh3e6xuwnN~dSG0Sd%m8A z-=k_on$8_mv|O34?J>a2?CngczzD88R+6BjpOp<(!YyGF*j+w^d#=3_`<)g+uV`(1 zMrbg#5x3FFP?62gmW0XAo(XabM7ohVF{rCFmb!e(!12&!)NMThAO3c+F=ujFz!_aI zAD+o3@@FZvKbjx(VLCO6=qjvV zWsMiAj?#`Hb4a%-kisgrpeb)f25S4Tv z<^;ZD$TaVjz>}YjU8l})!fyC4s&dO8%$Ca2V^O4hd|AFiNb%Y}P@4*j;QvU4NGhiqm zOa7*-m`r*LWL#Z_11rR+!mSg0IYh@cBV>OBy#3hhN7NF4CfRu8p@OJHY$e8~DGzXN(*U^ls4xht^Z&NT) z>;g+H)MfEdCtNZyokce;2k8N#%vMDl?w81|)4m9OrQ)31ur4;XF9q~wSAg_7Ul!7{ zfnKznXO_~wxW7@A$vg}vpO?SE;^_n&f6a*E2FozlgTc&Mq=iyh`JPc6WoJKY$jH}Y+-P2lGn@#?#d{ctyPI&Nzz z!KKEMwEN{q=K8Rgo2QmT<9n1bG`fp>?&);eIZBzn-MP-HejX(M8-8ro)IlWkx`oqQpncOP}?6*?_Yt?HPWOf1wX}JUxwWYX{+;NdgRASBZgvQm}O74OZo8K(P}};QV)| z;AXNC&OH_hfvf#V`eXq~HCy7G(7kMLhz_-F-V8(I?NIL5VAL8dYyEI$Dc!J?#a+3B z@E;e$N{XyS`owtFG2$uyi*B+aEi`}Q+hH5#q0RiOh>tHSz&qs^ zq3U}*xAK#PD8Fl@o`UJHv9`V1N_n`oQ_L}{dJ>M78-9be+6>&WLmM=|Ek@~WS8>bF zDl+no66q*huq)>>RY+Q6!o<4}$Iqbj(-Wy+3&Nsl5>Ud=r7wHJQS;9ap~vYDpwJeM z;b&y2YUpFW!PXom&8(&;kKL(i;}zBx-#`gbKiS#LdJ1~^j2m5p*f%)|73UO@`<_g; zMB9?4R8~P*s6P6eDd2RCJg6#L&Q`ruLhB{7Q0&`mjFm}X;=fz4w_zpbx<{j6dlW=G z7sE&;U_Vb4;}M@HXz6won|?XL&=I2uXEs|!PkbxvlyPP$X-nzkaXUyI7tXYL4N>d4 zGiMjH7bS*ub3#jP5s$4w5x*Q+q*@Br#;+yiXVWpz-;K??VFlTjR$*m~7&;32*w!`) zoY=OU2KT3v>-7PF?nZHXaWMtOpWmWu*Y{Ax?oQ~`X(78MzD&AHiC2)+U~WHGlS5EC z^IvqI`Mejcl^r_t11k8|bv-owX(G;S6`-!#LR$6n5-JTiW25mi*0=s1)Y)GK@5C*v z;A;^5{2W2+-HzdYza&0Fd%1`|&%}zUr&-s8gDmi=KfAd&i_MZh#xA~m!)~8AMzU&BZ2sd0{I%K3vG$jclmHU5J8|;dJ3j7U@qnq88gfXg6vfOS9=_t>0(h zyD{tW&0!_j2o)HTIE8ZSEbv!CHK*`LT6li08>+;lqo-0bD?99oYEefqw@{p(jgqKQ zsyso<eW$Z*fKGD)bTG%dEGFIC0Yv7^Egyvv}HBSiYkb_j_kj*0YmjGTVags!b#d z?fY<7kiom2TuKnLUBt=cL*9~yyzFd!Trt6bdX=|ev9~WjXZcI6K*JBiuN|TS83~M% zn}Ant+~T~AUh~K0#K`g2YBEgB;%pYKqoPl?SaMhk1u+4aZJ zmdx<%*Og@SULV~KtjDgKEAVxr1AG;ZpfS%G`t6h_(*j?#mix?N$8QmxLnpbHpT=^h z8^`kE;bVm3j(d^#m~fUPn~i2`63NDQ8&>pxg?%s6L2i|`b)k1QeQlPaXLn-Z*^&(O zGr1=S+UJkBOPB2}%Eo11BPdbJSD-yg2>o#eoYQ?fI-e8;0}CTiV*Gk~`r!&o^HHbg zZ|$LflPrtUPovH{e|i9Jgarra*|tpRy=e{uRbL=@UWG`r@68(yxeUW|mXoe;G?vKZ z zIrqOY@YFvIhp)A#;RjRr*S))OQF;?O4|1f?qm2|9KbG>(M&Lf%y)5jGEqpA`<6C6z zG0hLJ`05i1xLe~fchz2$BULZP5h7pGzU@J;k2mrvifydB`!;|2{8>?st4$AN|MEi1 zSU$1G1$S!I;Fs#NRvVM!SoH%Pnzgx;b^I+Pp@^%B+!RIcbXFq%t>He`KPS%{OKGm> zN!HW65jR@yq z0<(-*#gbGtDED6@N%LA{RUC~g{sdFb+Iqp1S@x*ltb=|H#jtV4Leag3LHJYe8h_2v zu14Xw4_CD*o8R2E3Cbm6*|OxZD7&N$dPKQ|FGRzPgPWjl&JX6EZ%G--d+~Xp9rpH% zGrDq^%+BF-hehAkwZZgK^6;WrjCkuFZ73ErnVCdrA zRC8-Pzq$AoU$rlVf}+%z$G)fh+e}+*>%75!UfzX;nVaF~y%z9X6u>SX+K0oWT2Ox3 zJj_>~1oJ&A@MOOO|D^XYh8Xv=Lvgj#WiSIe0`)nC$L(+u95H#~S^Tc2M`p`JddK1r zx*e^89;T(Z{rn8(F%SWPmcKy1ooC4#PQv0Rhj4U6D-E~o<1IB6sJzD#R_SE20$*k8 z^8N&#D>r20@D(TFHwX-xNHkl8nO7UgZ`%-|tti$Jn&?1SSqd~Y>Z9h7 z$I!t#q3GQ@(V0;VHZ3`9$Har^<9!07-)UnVyT+H^6C>x9d0-aXLf7O^vAzdhbpJ^f zI>!&Dh6DFNur&w6hQ~13b4qafkP7K5?;yASZ!msN94FHqgmuO5;d8eb>7T11;pIux z7{QbBptUqFp&Fm>UX0`Vw_wk4MGQAQ2@j*>1T(`MoZ?*~D@dAs> zdB{f1iRGKN29nxm8M;#tOIF@RxT4}S(-++bC|IB&;0~yeo0S~|8?^Y_6F|J9_c zWL-Pskn$pLIsJumtwpRXNe!nuYYl$@l7+M%oa!Gu{Vs_wmP>=%nYF^`dH1YV{N2GOC*|YmE;&56p&Bfd-@vMK*=QH? zovm0L$dZoRgVWI#7O^gy>lj#$!GUoU8s7*RK|)Aay%Hy7RWomg)0|!B4=6n1j$&_G z!E@ONin=uj{tmNYh7S*7!{|@+a=ACZJiS(wZ>+vFE&393edh!^BUTf5N{jACt!B5M zKLnMF9el5_0?roYa4R0X;wN7-#YlN6RFZS#LN6RA+rz^!#!eF_Z4t$~uO5T)iH{iZ zvQDsWZVG;G8f9(s;Uxv0bjI9m;@GrNfMwbX=;UB$6nnn_-&|Nu(fYrbVC`_W^_K@a z-8SXV-!;Z*J0od|(-ivBSHnx4^Fgm;SMl?^C1|l)4*xw~#jkfc17&9lV5*cYjJR@` z6%U@zVuRG7Z(=xg)+*Gj)8#PwxHhF$+`;LePg9dtIg^e}Bi9#4ncb&RbnlrWeHZrt z!yh|Y&+iOoBJ>#fm%8*2uW%()-*CUiCJ@jOkQ zcMM=;L_T*;VI&r1`7mw2ZrGn_1FU2sDH{x9g5(tXcP|Of@+Voq=^6NUZ60eYJV@W4 z^azT5PVf?+no`3C68!XFA+?{F-(cHjbS@xhjr$x8D)N=H*b?&Ttg= zkFp;3#g{1<#-n)R0p`lw;C9hA2yy&P8^t|%zu{_ht4{^pCtt^7E~Xs2rG-jCzaetL z3bt6MjEq;#A)|~LjQR%A`=8P^MaMl5*nCo$TL|vsOwiLVg-*j=zLei&BYco zs}=Xz_TWf*qOhFh$IW6L>x;kY3*D|0rD^%xtig^K?VMQ7sIQ`bfD zMorR$Xb{pYMN<0Ry_3wL38kV4A(<1Bq=25i6e2_BXb?h~9`Aks zgFbaTXP>p#`mT;#VU9EAjk`Zr`Fo^Mmf}fL9ygXkJ`_^w)&kUfTf%=$4T8po(fDri zP~Q4p1q2nJhJ<=Ek}23t9bX<%$HS?p7JZT7a#?Z=7UqSt;jD7W80>NI!DYfdF`%=N z@e2vZ_zp+iTPOHeb90$b$P!34euesyspO<*OG$&?Le88^yv93cC|_6&iu*Ij;%yzP z8y^h~WdSfHdIEWTNaVJ?I0g43ENJ4uL}qI78iMA26f)mmV9u`;wnOh4)DFJDE__U< zAqb$u)f6&bG=R2e)>F@uFl>C8gmMvCtbAe%^lKTS=0j_Yd9$2cO}wk( zuL{27an(g!z|niB)dk0i3-;Wq&H zoo3tC>|zf*#p(CyzYwmw9xvT4rmE~V?DhPDi#jU6FUXsYpe65D{2w#>kqRAW!!Y$> z16}&FlIq_XarS-7Afi;7duv8zLb2dTgrE9GCv&OyX~a8MGD(<7ZR`G3xek`8c3v_H_eRXcpyvSV z(yz0kB1>{tnuVJ>cEaxv399UWfdQ}A!i!(SQ0BZ6x{GzNBKJ}u2m6yBe&hoGb?;uB z<)VP*KKm(K!-GP%9>#l*lS#BT0B`H0;Wc5FzIP>?sqTEwJ3qb3W~Av*dZQeA9&zP& zUvs7(FBq*7ouqefR-*brM^XN}Z0t+kO~uz_=~r?yxQ46ZSIJ~%?l^%<%YGQKD64mW})2aKIHCtUjm`YWzV^9%N zlGF$8l$U(vE3Um@|3lWm!8Aq|7}8E}hb-Eu^#krqiK5gckC^33SGuHW3dapgus~LVX`VWVz0>+x ziA*fVpa{h3EZK=-VH>{n* z+{*g-oYH+*=dl25zKm$J)ie$4oOuM@BTiMhS@gnrj)%)u|FO&eJaHEv!I#FoyrBz`e_BNwPdvO*g_cn<^@>2yav64 zq^YAklzVe+E4nFXlU}|drp#5sjzAx_DR{7L-{WVzXYK$DI~+m}T&@Tip2g(1sh)Wo zu0#{lMPGg099l0GrP+`H3{|<1l8i z&r^+|inPCF+XE zqY>6IeEh)->i>R~wTth8JVB3_%v=hSPtT?35oN5*O9`G@8DL$ymT0c-WcWQS9$&6h z2k)*+oMdV(_@2>bZ!*ic=yQpTQyR@S$@*8v8#L1l-3YMH&Va=W7n8$~7Mp(GbNqO* zsrciLGUkpwL=xAX=x5|4>PaO2{uRPpr)Q&C?@T%xB@3;FS*T-KR27nC z!zm5OqxU8$eB*V1TErq~gl##?&9#P0Yo#&%=Ryz(zIWUFY+O3&D#-nJ66~jIU|rN` zO1-!YTc@Y8jG-Z<>GX{|dtw_XPrc1O7xdRYttcM4+M%j*Huf4xa&4R^vp z;nB&Csi%R{l1~1lVi+Egt>+rt_R`clX1HWkCON$dBmLx=C7u$L@@g+084<-4 zpQ&&Tw<@YSot`n*+9bB-_aR)+?m*HbZL#(3R;c;=7~Y)i!aFM$2wKZyezDRHl*t=M zxeCX*wW&YwU56Z|+?@qUC#^6wE`?h9b@8!XH@yFO9#n4cZE*UuhqZo#nZua?ke1%X4PJBw?9^4L>W&lIKDNS6>*vhl zQ!Z*7Wn;H$4$ILy21*A@!Q4CnPTw=Z$KS&#WJN3lL^?yFicYnW*eSl(+6lhD+lA>) z8QiM1rVuxOIcBbTi=PwSs7u&^)d;@9g9Vb*TjYVQL-bjS-4)ombT9XNlrjb_I7Q>W zXeOYwyI>FnT zZsUK?cEFCC$8qP%bi*fBO(s;_qe$65r->Z&y1;5> z5N0|}#GXu^pSj(LB|Y59y!WYMciS2G+u~By^fr$yyjQcCA3Et!Jx^KI)%@**6dLtC zlC!LM!Gf>8X0{~4=-j)^%qf9>Fa5%l3VI><-ErQdZwWaq*nskXI=KTD)oj>EIaJ#^ z4x{z_DE;kxlu)UG!kv>?))OcAW_k%vrkddSW9FpRa)+uvBnd2nJkV*@XV)%Y$nJy* zrzV1#V-wC@YDM37xzo8dO1Rd-kW&bXVNrF}{D8%5sCjptBp#)c-54|cT)viem8Gz7 zk3a}~o+`M|QUd`ee!#oE z=O8EZ7{%=T09|H@l(a1tkNG1yE{;Xbcn;hBZE$RK4~rikM@vwhc`rH1#8ydCooB7h zq6P{4w;QNv$qfkoFN_Pzs)E+M697y8vRdJuYFT*|K1*#uUtL%7zn4lrbuYoz;VfO( z-UBrehUi3{q+)GL_1D)^_vK{TJ35iRPrt_598$)qX16KW)Rq!M12B8f3s$bOkB#cl zqfKkKz=OTHBzyHZ(;X5_mn9}p=J~UzkXZ*a_RDgoru2c8=_x9@sfDLnUHQ12kzl#& zH0>^QhIAh*bgVY!>Q9DamGvQT&=St1?g&a-If5EXq_8rq9!6X!W~qh}SXl6w2JMrg zjjDz;z0nE3hb|+-{7Dr2Hh~#;8B^rThpc(=L(sUj8T}UwW{Gb{(!90b*~-28_;6`C zUy~gos`~4Rm*d8f{=5_1_aW!Ww)Z02W~0yh6s|`33(erQJ(X-O`GI&!yh!Y|4(+RK zW_Tu!Mt{Bwl{&w}X?7;=Yy^h^%T{yU3!$_!tGMl*FMg0AKtRtv5!hj?#y z3c5IfLKHWkYSIXPNuVjD94v&VTkB!&zvJj*xrv&z-$0)5_ucq#HZJ`elMWOPEp(pww=elDaN!XvWGLiuq9rD zC^SUIcDAEBj$IcB>jy2wP1)zceTWbFt$D`Un|@+6QI7^YEYb4NJhH0EMQixU zo7!fvS<6NO1Y}{;tD{sodKx-J#i8vXTTFC4z<*O6UTxkofh6Y};88Ve>?#)_T3C2wJA2o^0iAx7qUgcGs!>bI zuxj#5IPE*4vU>3gc*50Emy`{ZY! zxbdncCCraN$(5BP_U0w1Yg>`x$V?W$Hx8ffiXdgtag1`3#oTYY(4cdfg>F{E+FWr`nzKz1uI=d`+Hxj+Q)n1h;wo*#H4~>-2I$| z2hNA&!>TCV+RgIUUt&?l7lgMjPszLQ@f-F`p^yk4^uMQ6ee?cKcpG?{zZ~I+&sU4v zUN3N=c?AlrNo5FnK9~e@hZV`UHWcSvN@ZQ0QS9}hNSyNFGWO~v;K#?|xalfT-MAO9 zaYG`Q<*xwENLAA7n#azs`3>V%n~{_^N5%=`v2jr?z8*OnH`V;X$={XH=J{W?uVD+b zpL>jD%@F3KHWT4ZUIgMhC*r7`XEDWO7#Uyt!46B^WkuZ?EOf6f4S492uVpM*hYhEi zfLAo~Y&`_*6i0iPLD=b~h&LxM1^*wT@UQt=woYL--mzE0$1Y9WNWoYCz4;fX`5mj8 zhgGoT&fOFqc@Sqv3Yq1R-gxw&BzovOlE1q?r|@32+IMX@b@h+JH-VGr{F@o5Iv`GK zj8fQ-!U~v?bei)`_QDnC!qHyt0Hz8-?}1|m!ndNx-pugAgpY#W>^l;Bbk_2^UUqOu zp%BvZO8J`u?ckWJL=Boz$fDFSf7u-DkLtsiiQhyup^Hg#%W?cUPXfbU%oKLw()6+R z3aoM{=Jvj{Kwl}wQsyqA)W{T2)%prcF2)L8_$u1lWd=2~4#BkkBC2KjG{!Omup>R2=+09`yy7+l@9dMpFUym_BY73dZ(V@q6>em3t~F@j$0$fs$C zqInI6A!KA;gt5A3FhlGY1!`n*WxdyE#my{8kUop&rweb6-M&{!#LrN*elycMq0AiC z8Me znvE_0$enl~PE{rQG3W3)l58A94p|qV_g6AW2=m{F6>C}YFF9xbqud;`GYaLd-U(~NEVxYmiev;!~Vb| zVMZTdO=VYb@f#)dKB-5=$A(t3j#F5u><&do%*nK@f*u|%pzv`$=)1cI_Eg)_*T0o~ z_$7Bf(s>X^Pdpi8hMsvtDd_rbNYiSe>#t5hRe1&ld~1dB zG<7b1&_2v6*PwS@KUu)>*|=reC~ChVWcqETILqcmboWLw$^WnuJcXaC+!nS9e!z#= z=irDgXHHV}j8WB@k^50ZL?)NS(cfqdcjxH^wk)z2vrTv#cTFFvFe^ln)oci8pGC8? z$AkS!35pk=hCvef%yZiq*mubV=BbqNFP~)NGLt~GIp<4@-e=J(?_o45cL=)u-o;w= z#3=ap2J9-XAf-Rn+=}HVK=z6kNz7SQWm$JaqN66tEFf(oHBC8-aa+x>?&uzvnj@l3IvFfI z#}=msMAB_nH5{m!W;5sV#A@H`kHGVoCH5<&^Io#oL@VW{g2mMgip)aN+}1!AP7$PI zG~8Ct{WMo46H3c^%+cU%vcPW$hR~}gSdB&iD4c!(XG4~Q{hX2Hr09)i57SWVObT^( zNiy@hzoBA5I=6Es@n4QsvlN{<^x^gvNLsd(sW*3n!`I_DZBYSJvd-pHPoH4)J!Qg^$^Fr*}+j{{Xf5BKKI}M>REXga=-d^to~!DILurTf;ll7G#2FO=~e|V<8H476Zd9!qCi|Sn|kwtnKY5 zZtF%l)bCNk^5A6rU6TiX^+%bh$~R%3V$1E)70!!dDXe}xot|(H04xR!~WuT z;Cy8W?O2ilE^F3-Oz0g}Dll%39v_D6y(+VQ@r8flo(%_tY)A|~fD<*va6SJYj53bo zUUM(#@4R39#2emYrN5LvMn9xEh8nb!dy&`CkMK&oijrP}HM}Nz`+`)~N;5z6i=_gH~TNO1h z!NZ&eXDH!}SL4up%4w#Y?*y~!+#q(wJPLLiMLI90GuPRxnPh7Qwq;54Kab28H~Gs4TOe@zWs3bX7~3XkVaM1=a?lpe_3Qij*9j|GfL$4> z%@VTEu6>je+$uT~FXY6!OgO*9txRvwUD%ZV6O4AA2OCKzTKXy(YMYfwF=Qq5o&Ctx zF1`o4A){%J9OK7Lw5ImD8kQk8oeS8Qjn^YHndv|bj^))U=2sOOwIs9LGiubcFAk0@ zHvs8nuei29`DCb@igr3<$Z3Q*Bs^22?qF+@xDdvw+AOhsN(S0rMN&Psgk@=j;k#+e z_)DX`SaiA_jB3Bg43~70$%^qPSs+KfFV}F|Gc`zItSX(lGm7)IFMwjlKeTA896p*^ zPM=FW*mT&+Sy>7{$24tN;Vn(J{BE9!=W?DK@?prHn_$6hf&a2~NK+sk#7>!lt%A?@ zhe8tV%PZmVSQ6D2ti;2uCz;77YwGIVPwo}lxow;SKHZdr^O**H%GghfUgm-SKq2O2 zX46q^339!xO{3oji4;GKz<{^4oMvV%KfA&fOMKR`4S}~|jj9zNV*8ObEJ(zT^SU%x z(8elkY^16!53ikSnpwx;l366WAqCdP;bbEC z+54=+G26L~{nJmP#^TTHTeKGJ7Pw+V9|;WGqf?mCASvGRha4@-zX-`AUxD6uQxdyj zg=)FpP)Q39>a{XDh-?0)K)>P`NU)Z-b$ z_y0b`rUV&-deu`%bJxI};TCK`f(xm}zJ-!4HSk6M4Ex+Hk99Y7nEQ?6Hh1=XU|-s< zvHH15oaxDE*i^8O@<+bp8}_6zo4PLM`|cZeX!Ql~Uv``EaZ2b^qsA8&OVZa`N>d0qq$Yk0oeM~QSwxAR9T3P4#agzoV3;c28i zn3sP=gT>MGFyRzrC=SML5n@botvpgJY?o4|~}JYuE79;ogu0u$Uc! z`wWLb!997pZrTfCA8s<3>Ovk{JjiZtB!0a$0cS5yprY(O%;n%7PHB8BmpXMTehWFq z9qAKzd!y6v{R)m3ofw3?wG_^7wr11UL}Bz*vb|k(7#eFm@Zo~pBID=ZVfu3MQ%Z}qN-Wy69IgY@4B;cmE zsZ7=WF8|VKBmX|I99qunW2KPy@i|}5SDsG=hj=UUiw%XeH_@11bCk7jaAS{U+C-lt zgZVcakr=Y!E{oog!$p;d*b4DRFZZ1PFa7g>CzLc$H_dl zw`YL2&L5G-g@d$ung?cD9fKD#aa`@(!>~E66XKtXVf80N8oo%EsVtAhvl3o7^mhhl zV(k_uybSNBrri_<7teh}NeMl((QC{`S?l`U{M#-EyQ8F<{O{j)53U|G;1sFlvqOUS5v9A+JKJxB~fi*8J52(qXqpJQToqDRxH&-$#Z-8 zg4wf3X5tj=ygZyHI~;@MzZxLu!Pfu3oho(qYExUn z8&)ploy2x`g2TunE;dLNQf6-Dynlo-#qtbPySNb3UOG``??mKUjX|XBf`;Nzz`wc$ zLw_K*yL%m3i6UuRiy^MMEY0To3`6S$w)l5*0o0D#CTfdWL(OB>P)cWu)Y|BWK~$WPakjl%nM%!Ca~np7kpm(5J)@s7q_UJk$c7#RvT-`LX?F(z!!PU z*&9uoQoeNgZX0U|XyAI2<*D_~8cJBKLMz)9d7X`NRLG}7WRpMD#jfFg4=jSv_F>h< zATYIH&d?`MemX06IU7a-) z)N90ba;9-NN0XY-WK?lW$0os-aOT@Kx|2MWGRr@>-wGaqq*tx+7>Xu?&9aCsggj z_1x1UKj><*0QGsxXj4=+X3bnhc4sco&j?d?xkkd){cR4Ni&unjlNfw`^9B{&%cXsT zYjEREX^K8QlrGurV8iA2qjltURuOUmHHNf7%8XLFcZabBEj!ux1&a9N(-53v@e8tN zBVOgx3BQ)GQu&*BS^p=?*FJ{^d;YM#fAKK%vmWQ7x|B|O+w##fY{_-p9D&E{jxkw} zSx7)6ZXO$sb+3-`oyK!1xornF_`T!|u506fkr=(;53uo8<{&3Fo(26$gt*=QOz(~~ zCW$B0ncHeW`=jxJ@ga0nUq^jL*62InOyLivqQjY#Dl2heFS#U{ZVOtJ%*_vwrFH~f z4l*JP*FbfRRje&igY4@*L-Yts+!_6Wc_)qHltvuHpC_V0X7UO2^LL@5xU&KeJdstm zKA~~8#gG*gM{5`7!{(deEYt5D78m^oKHcA;)vJ_kI+R1sNl9ol!wMD~{9`TGN5Ss2 zT}m-Ks3KemQu3o*#prbOkJAKW~>(YRh=#% zXqfoVMC>DKq07Y`%(Di6CMjM%;+UfvGwNHgqrF7Q!S%syqpUB-j z`i^;w>|znwV{n1jX6&p?!mO1x1Vap<>Q)xZ{pSnmbg5J?@DJ`e^3uh`xMG9f3d?gP1o4Mo{{wInjW|I{%bh;N&=H6&lYxb0u#*r zBP9xK0>_pZs{a=V^@Q~2Yi$>6fLvAzzu1@ ziH=5C_;fWpmGp|@UgR+;t$5PPuY`XyR(TTvpTxG zFcp43|H1VwVQ8xJ2&Wq_!j2bOf=4_PWy}0<>(bNg@RUm4Z@fHnY7SscUm}1lyuy0L zU1;^rbIe6{J#Y1K67Kyz44xXRVxN-Golum;KFGfMybFpFZdc1waf|B8f{?6VS zO=TmunbY;1k6BXIU}iBgU*O^dp}b5DjScQ#QBNxHzD5N;_KgHHRbzG?mZ8qevHUWF z12nU7FJ8TVh~m}9GsmQ5=#r=nkM&mLStSSZSK7~}Nrkf~(G{R%wVJtViIdiun@s!Y zY1}i^gg$7MLx5Krr0DMex9)>%Qbaw5ZIq!51p|CE>JpRpzr=nmP^aFoaHeiq!5!Wc z&)n`mC9!j>vGd#-Z210!GX8r7&3(}*yDg?_X|Wbc%DjTKqaNt=(veh_TVTNWuk6`u zO_)%Ui0xOjU`nL0tLn<3D&-pXS{=bmLg4A`{|i2L1EP*Sm)Tj>Gg!>+qEK5|rjy(c zlKTYC)Ulb^uey=DG-EoB(K`a_x0TU;${5@KWT(OU@l9fNiq=h691 z0roURh-OI|Vc!bD^EuX(*8NrBy8>S`tCR2fJ{u=o0|#02EjxHvzaOXj3}v&dRVh^8 z8MGZ+S<6!|s9t`Qda8GjScw)lvQvrVejQ<{3lG!xGwtB_`4n!mQ=qAf#qguc0sJj^ zflro6LQ$=2A*DjXyM*E>d}sfSTbB+SM^(>(Nj$RkT-rG@eSOT z=F+c4XQ9|Com8(0nE`ef7U~QAFjKeD0>vBb$DYH?IPeRTd@gNU+qVG@_MKw(oi9W# zio0>gpd#Go*TSAYkffZGK@hGziJk?`qW#UcsZr)9d$Z;toW1RbfnNYjpX{N`LREAd z(F6wd4>8N;vcSrWX76`La$}9cDLUXZCU_UKsPlKp!MBX(wTXS)pbf$2svyfKpQ$A{ zimvVvbo=~l$`ZLyOKuLC>p!Kn-cLYb`7D;D<%F{4oy_#qAiTWeJ$?B5npM=B(y0R> zaHh?v(#d@o8Yg!O?}E9sY0DR|ZCQgN`$1SXNW{iWRiwqTQLIK)iX2r2V0Qgru7BhT z41O7dn#ZnDXIBKSsXorNWKX4-RV(PBz7aJC7eJorY8(+JhHqY0bKCxm!lLbBqdcJ5s?F?KLkOx>`pjZ`*W`T$c!%x(}H`k8nRNOTwu6*?3BQ2TO3fMl+_xQrAxh z%80#)Y7XI)ykx478TF$KSzlg$ZV(oYZ(v6ql+bXOIFc8vRpXZZMVdz9`yzo&Jur8drnFu<4 z{2M-b>=JtRq=-pBIL4C4d;*L3gHT^x$w!Q>0i7>%$=G89g$K>L;@jB5wOpD=2It3; z*hqmBH%gO=rd3jb)du=%r&H7N(P-K8J6 zvGHfgtoth4wOo%5K4@htb{(UQ_1aYQp^Vzr2MK$}r%bYVD@so;Lis%cAMcV5>UUJI zpZfb~$ov^JkfUIG%c&4O+E+o)=s4ctc{R(>JAoT?G;PCk>^T>khn&N(NYL4!20Ck6 zv2|l<6_E?o7r%f-gcICWD<%h}-Dr|0uzsieqTd!boZ*l} zvb#c1t7r-Y3w#8F=F=#a|B4wn+Clc8pM2)o!Sq}z44*oS^YYSioYd%ZVE1zZ>%1OD zrP2C$-rkp1I;TMNi|aH?UxVtc7qe^8Ib4KGEPB2SW_Gpl_$#9UWU_;>>r5QXIp`t! zEi(tFHko4OnUm<8U&Bh{Cc(Nx)%4GvK1@@V)rqppI3~5b-X>M>3{2c#&l)`2pxEy@zc%nBju^I^`jo^;=8+^m zTo{RG>g9xdgD`7G48z3oH~dxACUzxM34i#VgxzN^@}(2x(P!2NP~MP9&Kq>#>N4A| zzFL3Go7&1;GG2k--EcU+el+PjE8&H2qp7R?I1PMVBD}S;>6Y(Envum(Y05Ns8YTuV z=i8_!)*IesKVwCEk6}-2KASV{6I1S94~MeUFmY88S=_OpLaivccS7hii2Q`%7tUk$ zE_0O2JPdc@QZe`t(r)`*tUO8@ozBmtl4g7fdIyhdI@PZtmb;n5tEvc;|FxBZ9r7{q><;#HZy74Q zci{ZiU1mYc1g10V#s4Omvew1Qc>7s6rz5$OuJR@1ZhetteS{t(@i=Du*aLihEot=b z&7`38P~@x}0e9x^g;593a&LbEXC@{hhbo?O1}>uKL`_?n=qyUww2?*+8o_pzOJmNI z1!Vop5I1LD#-^`*VBqG>F77)D?7v;O++LnG4c>u^9O~HJqB?HohnhYq3x?<{8bnJP7Ah$_ZHB za5{M?>4Eok2 zaHA*8Cg=A`xL0owr1rfet6v@P??NIMp=^X(*Byp!#j6S1kMgqjron~2$)qf1g${!) zFh{zGOtO4f^bK=*f8r1epRkQOUE1J{Z8Dp8>L(j}CI#v)8IX;cHzo@G1f5^>>1tX8 zv|baqYHuIX#h!A?$n$~+t4uM$P7D78#zTix1$n-_k8>9f#Tzw-lyl__Scp$Sm8WS{ zO8dl^%4b>dZ;HXgXYz5biU|AOEkNfk8$50`nN$_$QnA>7 z$xmqDe6T=Lo=W}&VE@|HnA!M%9}%ic+Y1LH6F7xE?|#5bv!C#<}BcDu&zPh)WsZyIl=OTiDX5d3msMV>N0g{$!nl zwlM>#_iX1TJvOPFSk@9{T=-%f<-FZ~Ak(%!^QQ!Vf5H)W-2wVoCkiHy%R8$UeR@C2}}p%q*U*B3HJ8tFl|e%x*mZ<>Ng3 zX|W}#mMB_(Jrj1zm7?DJB(OT6K;DmsV^ZrqrvH94RgnX%KH*DI2JsjaQj3MRCS!JH z0c(m|2jk@vaHG;WR=oQ%Z@%jm+d50hw)DwTk-pg(m?ZIlo&q__bQ%6OUv(|zTcG_n|~V2ytlC4)%WO&O@C>5{nkFgfzCnmv2&lG$woSPSF zRmrnQ15;TnIFntq5!F<3Dec5-P;hW3$EX7= zCg=&+e$Zs`;y!dM(Ht-o{+4$7>F1RSw3z@GBavU%tk5|2$SHjFeB_iuRRe0~iL27>ci#>g$Lh&01;Nkpz+`h37 z>2K6|QvCCQI(^SFag9PMo$HV4g4X|Ia2ieMLsIHhhk@5?DDCS3n(@mRIiRRwjYQ_*b&n7MEXm+qWLG<-bN&Fy3+8_Mv_MmG%qyp)n}snVfiNLwMX@pkO zM`QW7n_yplk!DUDghk)o(5JQ#Juj+}j(R3ltt&!>!I~)R_*3*lOaXnaFD9#jAm}$3 zgr6-6*hPml>{HZ41+`RCNlt*-v-fekh1|l2YZ)wla5Akw*M$EHEXn3yB3;p%h-t!^ z^!QmXZ9k#KFl>Wc7Z+gO4r8-RcR|nxQ>!5zVIsUf2iN=*LP&*}t zEpjTU(#Vpav-^dvx+T*&c?f_N`yxU8loK<2ago0NorLne!|=&D14=vRO4mmS{SC8@ zV`6hSRxhe&PS>x1rj$SPwsxbUBp2ExJy~S?;yS2aa|Yj$9zOtVUmjY?o$0SBP}RTO9R;5Y;deF&FU%2w%H4yJ)V2O1V1 ziH7Uu(@X0@P<_6Q`92ws)zZmiW|G3pA{LTs#SoOcR)X7vyotTAUl}(qoKh!=+pbxY zh>`1V!te7B>2|mQ)}|HG_^GoXbwwVAXg0zKe^+vSUq+Myn8f@&sM%`9 zCVkW8s&*Y?F<(P4V}=L2JU@^M!`8va({gxK{fS`W-aw_DXK+ZCE?Yd-o|@)=grZSyE7 zRnRVUI;k@Mgpg}1s!|a=)PZu($?aJ&{#AKII}eSf)erBnoh@O!hLi%j8gCJ@d@^9D z@rp^ft)bw^OKfavqOhZU2<%Kz1&Eymk ze=rQg$Ib-R6lB%Brrg=f)-{i-X2HddYXCVu|fuaoi40W@ zHx=~5v&=D64Qi+a)pMrP-w|W!_hn78QoO?stk!2e*-P;F+6b!Ykbq&!QdzT8x1gP0 zz?kh`v^~L;n&*~Nx=$D1{8{i(jFP0*dpVRcOM!~FY{8gm|FL^N%&~c!D)jo=laFFA zlZZG7hGXN<^x!89{?ftTna?5n{#kI;LyYRvYjM7hBJJ0n4Hl7OQEi_;KYP_tc>8ub z)9D|MJDeG<+%8Ys6c*r#fE=cN;}yJ^yHB*t^BSD`v6I2%N_OIgg^f~%CCmD_oT^SW z!0zas_-*K9DqA4*j2g9b8+40UwQ>n-==i|=PslQVuWnKQ-kt32pfJ3?&JtHt3=$Zd zC+W#bMP`*P&N_cSVP%u8Son`CRiD<*B7?jkDD&k4WiS=;KIcfVR*FOi5> zPV*xy}_< zZF6DP)2GhT9DEcZPu)+5HgubXSr?e;UkIY~{}+s-kz% zPOu+XPkD)P*f_0`y;&~~&86$AGS*abA1YT)ZG2~eE*eKs-n^e_u6zroZl@u6&V6=n zYz{Q?Dp;d#f?z1jT0-sC8r2H6^Jfw7uw4QAyL`w(Hx%n#|1wv#w@kc=7j}j(`1-#U z+cPws2x+_oGeuV*wXv%Ij;FLUwEm7@jC4o!y39 zMp!zNkE-SyjC|OOm8uvVCUh#2Dd%}B7nDO9@sfTc1g(sR(GEAkBj5l!3Z22~-gVsS z3j-j6E9~s$ZDh3V7L{!1VUm&+FroeeyHX*^rRP~7ugTbr@sZHFIgHs%8h}OBvrx{; z4(H`s)7S=SbT2AkPH(^S1KH16hO8gDMF}2G2|b~|HH?;qR8r#>eLC~>95u^dg#I?< z+f}8Y(OMB@PAZ_o^doGO_D7^c^|Yd*ntajbJ?FzCF*=CP7_B|6ZTXQ{ zBHt`J!!p?ABWl!|Y>2m;gbta4BN%-yUz{rxym$6tFbfn04aZx$9$QSWHNs{Jph4i^#*p@IGi(g@L36|H^yZ3c+%p&odVyT#E&mwVA4}noz(>$Em8hF8{SxK6=6D30QssdM^C$3Cv7^T{0NO4&k4wK6;$jDSh0mQ?U^I%(i~S#5DM z@z~@Y@T;j4v%cwr@Q*w+$o$Rt%n8TH`!6x_O$ST5J?Z6c;rf!Mr^s{q#@HWEhQsYv zWX;-0_GD!UswHklD=vfenJEIn?zd>?%fF_YD(_i4t0?rS7XhW(L?jkh@V8R~vF$%f z;<)!^Z^pFxE^}c><@(LK`jo`v=i__5Gc;&Bi}}*0@ka9uXi>L^V~-c$Gp9%#%}Iix zJY5=hUXZ$F50FQN4w!1k`S7BrVbRq?nBi{-tGyf8R>l;K&b=ey3G>YMPMm`+v(};< z*L$o9E{5!oQi%M_vD2(pV_QErbMd{AH%%d*#CzmWU68=ElDBkO9yf?s5(%?n6)@;# z2YhwUCZ>Wbpi}MzYE*nDPi&@93rQaOcgcWW(JORrn1IU<=Rsb_6f>o<%OpGwX`^Qm z4aan!J3;MRPgXmwK2TcxLY+jrXu6MqO0^BW-o5}IRBPP*8Ze5po zJBHTFC}Ez>4+_e0#B13hCc-Hi7F1m%3bD^wk1%n#?-~W0rlnCOX<1qyf0np#8Tyx* zGa&MA7rrW;2aiPelOvojAoxAECc3@|o}8`2UZp$|xigzlGrYwdY?=m6+b>f2^jacA zUXlC^2e@TE%FgJEB>nRYVT8P;d9AKcaKn@8%vub$ilyM~M!++bcbVLsGFa!p@%29x zV06$gvgvj)emyz?B+BQY{nDxEU;Bp0j4p)iH`B4lZYSo&sDS?HZF1X87BmxYp>0V$ z={Vg^qSrgYwjwj!=9oc+gIoB!njK-P;XRmNZVd}pb+I~%MpWoPIf>b#3TmHIp|@BG zT18{YT|r7t|0jj&)AeAwt0fJ&>PEG;?!{HpCX?(to$QvBlkg%f95Ung;%yCY{P$}% zH^;aY2d@Y~iHKxUPG^{PR61S-2bukNBdN;#xeKcOL8#UwRX#$u*AXiEU;^sA_8@I{qj8CCE5|I`iY7(!W_8Cs zu<=C($@VqHz}2s4cNX`&l<20jge9>!n!pbeeb8Q63R?qf&{k^_9=dR!;MFEjUnjwF zy2>!VQ-XvWf8ehl8zh!%o$y6v1cY&ObWdVDnEU&xh#qx^ks~_9dZizv$(e(6nmw^~ z;#lw->ez~8KfHb;-PE$qg@3$5nf7_ugTT@nrj%QsDV*AcHPs2wxKN)FAR`__8f@!%I$A-D?3B%5-P(>K_fJtFwC@0DoSXGaJ_6kh zDG;|#pO&9?#k!BSOmnU()tvl*zkJpn6xDi2KhK+lqKO*dc;N-P@l^^8mahTfo8s8! zEDk>hPUECQp?r;|JrKYvqsFGjc(hxnKL3V0WSO2~FC;18ZMkh=eLI0>@XnIbGmBBD zm^)*!8^$KhTTqnrn983jAc~v^cmIQQ5Re!r;}?a@f}-__=IseIykjGo{Z*K|-*Rcj z%XeH4*aKAUM)|kAAChCM@}PQ-JEr-mm|S1y2K>+%6dFz^P5$v{PJ|?@zwf zG-u{?X%OgFQ&b+b#i!4YbF+vCF~cwh6$`IG@3BBQ?Y<6M6UFGrlQk%Ma~5p8nNJ@b z?q|eE3|b8)qw~#9=3!GDQ8+wI?CyRe$#3HM>7wCiXSR-=v%Hn|D_2Pz6Th6*adco5%-)igABV92 zzl&^{&;h<+$0BO^x(uc+{znZs|J2$OA!wwzf$Nya5&!1{`;k3#WIB;wm1m6FFl{W;K zgt!^}6W-JC{61Y!9h{7N#{EIQq=*h?EaG;nSLnyX-ngE7PCo7>VCXK(Xw5W**U=o) z(ZB|NNyyQA>;CYL-kAXPTezS9)fY|wwv(f|m#DM(Y`D~tk2Y`r5Yb9WK>G-)mYL2p zvF^}z&mZbl3OU|U3(8E%qYA;#andvqEIT5GIfuCQ{k3PLVaYwzxW5_Ca}!=&Zr_+j z?(9!u+7?{jHIp9F`AN*(ropB)1Vqe>!RP%c);hkPguhRqd&YMn^hXn-5C;w?tWkW) zX$YFN8TN$ir2g&c#6yIe6L*gUk@+VflAVvvjeEgdlPHgstTEe}?$>WEE{_@x()Z#KC)w zbaQtSSTAD0wMh~cFG`}(UN2NjlY}24Ut#skgZQdDg)VV3#yMhQaP`p@&|GAU1^HJn zIFtLG#4h8?{d$m6p#(?Yq>|0)b1`&WuKtjU3%8}?_?n_z$Im_v6!fzg8BtNRUVPGc zZ;=u{@N?(f1l!5g!fWKf3KMRoR9~K?BRg{0W}vL-`$)8!H9$6> zJWs6jmZ1BhCvZ>Y2oC4}VdpG10;8^EysE8%e@~vFqe9o|5Ai#sIA$R(Y7ipt9wmT~ zmMY{geF1tg+-G<~Jr$pJ2EDpM*zBLr$ukK>7;g?hO>GC#@z)3U^j1I{*Z2JwZ%-Fh zGyLNBOQR%`0C`(l1?Mha~Cs9}N*tu*-`^n9Ydrf1Tzg6MG zmQajXGo79IPXWwU`x3i<^Kt!^3J8BP#`kT0hK3x&us5-TSKu@a?dvC#{^fGS@54); z`{cXS==f5!da;k=ZW?fmTq)u@_MPV_)&rK4{W19bIvihfod`?KKymJju%_!CHl31Y z`(%6QaJMa1XW2l2k{I((bS9*wZ1r;08@h1lh z`P}nOJ_zM?DmYe55ca>GhsH0SLJ zjLdmMZfhptTcgL2#?1y7@9AN?QreiIvbQ9;#TgvPnei z%WPbj6NQ_e_rs|$9+xR9q4rrZ+#>pxJ=R8G^vN>J7~9Kuea<7o`>v8b-t(bnBLiio z3_d)g0P0^R;NQuHDId^jiF>>?PKxeGpQF`3Ow z#>7v2^!wI_CbuS&vxkqdUw!V934gzn>ES0hrobR{W@TW{x9_mPM~DqjI16<}uc&ic zFV%29N#7Jm5{AwL{|Y`M*Iz-})*nafBpWQRv!#AD!u7qg%)oAc0T}LA0Ig?hVDgy; z+~@w5c5KV1jR!+X>%2}n_tSfF|A-h&T3tZhPd%cerk(VDuQpD;wHS`u#*+85w4mvL zJ#mdoWp9b8qWYHz$h#*=HFwt1e;Sf_YJVQnz|JS4nq0r|M-P!6d;whtC~Dqvr9Owx zk=|2B(bi-aXgpgFnU8I-OxKOBd^`p6DsIz~!edOm*bUP7&IDe>Ws)92Cp^3Q3bqY% zvve9ppi*`cJxdkw>BgP3xbiUn0 zOnMQ4<~RPK;%p}rpL>@t>HnJ@{ntx{3bjH1Qygqgm8S}yWC<^52PA4%FdwE^K<^Vv z&K=+j`%HJ!iJ8e5Jn;y|+@6Z0?m6xMu^)8vIp$K9AuOqjgsE;ZC_A{9j!6H-md9GS zxH5-%_$!^diT|Lgd(P0BM{4-;^C{3-f1Ud5@PJ^$Dd_CC2u#&yz%z&;qAR#PtBW?B z`=Ois;+IgzkZ$;8z7ze6yW>G=AEgA?|NIJ@H+2c=9hT?z%oZ5maRj1jDv8U9^ZbDL4>aP`1R4>XLCnQ+;fc8^ zR*6UPwI0`@$-p!i9Qed_h-c%ks#|2XqB!PmX{Y|mj_~KtJNU6hhTkf0fI%bu)OdOV z&AXq*`8exgL*;6m%|_DaW|<_gD~w+``xCu)v7I=#@zLzdC)Q^9Nw)Zl4LUroglJoP z^6}gnbW={l<6Mt-OKc~xZ;fY0p1m?j`Rhv$UTPz@I;)5aHwVWbDB_z+JfJc*YoWh4 z!Bpn`KPaDc;rfG~i)8(TN1(TE5=OjQ0I7E(sR_pf*{9uz^-Y`6eOR00d%vaqi^I{w z$pWOl|0MNhnslh@4e90Yf=mM9bBoZRw={D}3*3@Igoz_9~xfmjE`J2DJJS{(~a zU;L zt6x(O@oC_5doFwFl^ptUdl|LhDpqn$6C86ICqDVE5M{g=rY82#f@lAz5XTRxj@E=e zHC-&8eG>GSWb)X96RrRH@aya~P%S^v?3cq2Vmm((JG`0+?Qf#D+d%JGaKV7v>M`U&##cAvISJU1?yWK!>RW&aW2nM0riM7wqc+e zMn+S4A5}P}VfPc1o-0PK{7ojWx*ubi#6D`9d!9~t9S0W;CxX5TB?0T7vDHg14Hg1`a1xQXCH6fcDzPkk=B20X^J&4A(uG>{&_e zgVV_p6OON8+yskUrjrTX4*~10z`J|mAR#rMehVnU%6U!@EBg#A)H7gx{Vf{i_LsR; z*u&NiHP9b<7qRudAlMr?(|&0?9B)a&z00delb|k_0R=*5;|%o3;v8aOzTDhPB-MI& zi)4tMWRpT+UdsZzXtM3oplIf)8JZ%L)b z2^g8rb$Yu_p>>!jwXU3t`ZSz*_y*ujJA)o3?!+cf4tu|!Wpa|QLhVA1b($E3VPnUM z`K=T5n$c?Z^PU>2U-^$W@*oHNRr6@d=vs`Ld6Mnxm{0ZwSDKEfUF9653=<5TKd-z3 zCg?^}%!$GEdydiRcN>u}`GtM$ngY8XuK;yVF_0SaAVEd?*oJ9zF<4oABN7`!@kz%q`cFTQzWp}B7HZp*b05wV({Eg@(J~PE zUT2B^y7%<{m+K_SDuJXuI!qUznMD)Ek5l8%QgEgB3Sh+Z61sCE(ZJl#b^QfJN?K;^R3;9=heh zt29$m)@h8kPGQXWx_Q+6*Gf=x-$|M{r@?}VMAM$vrmR+a4v6jN&R;}=@bT^$nCMeM zrw&DsCmd62)8<*QY4Ul@mDq{4tpVhqt|3*e(lO8#7ELmgJeWsMPQxt+Z(O~v91j_XVrJhAxFNcf>@5_>4csjBrZB?F z&1XpTVa_Y@stkm5?5QcA>&9uxbA1n6@=|mr=sq^Urlfa-=hTG!70c*$sSx!0_YkdP zUqa6J2rRTVryg~`7~6}ED8;eLN={DYW&%d)2L0?<<8|NYl-2}lxLFDhOqc>Y{)UsU zYoAcjDTX-w%$DeHOl5~fUO<|-BHevEkIc^0B&k}_(ZJ`fV0!;4lz*|r1Jw^m@zXolHe)GF z?>k9NdQYSN18yGG@h}Eh`k`dvOzfRJpqflxI*hn|&19AJ3`zKUJto{M7Wy+5ps3(~^l%IhYcjg1 z?W!EUUP>vmYK9UsRp=(&HP}dQ8-L~wCH?#2Xl~gV?3-+bo!q`cUTr5#{&|L2iciA= zvkaQKShikDV2qgWo5S%sJ>aWcF8S)>fLCwif!X^;cH^Qj(3^ON$jIIVnbdl&b18+p zrvIgmYsAe$?DmqG2RwGP58A77Yj1*EQT^QE$;l_a!Y)) z<|33e#DU$KVA%g!36_pOp@#cK@ltvusnpgXrkab%=hKHsR6;(x&-X04l|D>WMm?HYtlsMyjV%T8BInd`4wP#^&Pq4 zHGyNlsDn)0Bih)^xiIuw=_}KjG^z0!lG;gi{iAo8S@N;)p#K89`SVmX8+c9+{dX4Z z*UrEzK7#fAF+NOyWFM(HB#r#lkLbvjS3o|+;8;mL{a1XBOroNZvpmWGU?lm-Fjm&be z$YCNImG+xSA20-=H#*qoU<@*+T&U%+4`^!&fuMsbhCA$`UM;cY)`esU*vf+=vu`sZ zniDV{-w{6LIuG6m1X|N?%PE-NX&$x&C+;JRt^*cV`#Q` zH10SMh0l!jG32%qvqvqDFUD*`kCIw=xl*5Fw{ad4^F8?0sfSK@qCl*;zpG)_PO7l# zHWkk}imNV}qOF}hk^&Hj=`b%u9iLir{Pe@XH3?EIWaxXFD4*CcH#}9a~|de=4kBo&sa%q`%VryQMzO ze49b9cB^ByQw%W*4g#HsLGpL|G}bOF5Y^VM!nCO}xZETQpG}zsPtUCa(LzsB`I^fg zEI3YE`6Z0!GJx@KJRIregS)ULUGh=SlwT#xmp%NBw4Zh3^5RZRFS$keF3(N%`>a`; zI7jm2;2yZu{G5)ACc~rq<&bhchcUhQ78gDVfMY4P4D&Dw+^TgUHm?!Zi%o>w^5f{k zv9TJyMB=veHPCA_2jr%%Br(ddjOfWS^r_`|i2G)cHupf#8aqc)>(%J#qkVPqxA%~Q zn)&#JHwP^KUgPFNtw{0rFf+bdksIyYd`SBTnk0IRx_0ltKVcmBDlCaH zi=BawAIZ@)iAqX8jWCs*8^&FI3WfTqc>9Dmi@LEy*>N>lSma3UO&zfH*Ck*@PNVtn zdt`_AWA?|vE+TQc0FC7C!s|G3v=xzq8ciuP(QRfpl2S<9HA6^;3wJ-|i=okx?^J%) z4v=2L@nE|@as8xPR&zSX0AoQ)n7uzRE{~f`6XJmmqU@+1IF=VHx+)8O%jso7%TaER3Nd04)qDa{M?D; z(|oQ=eLar&MV~_6#Ra@nH$%2tq??Ebst@FLwc z!yR_4+6O$1BKTVP7H2OPqv4&}OuoPldfGr4qF#HWd43wz$j=4I=i#6>^$*$P`I`y9 zf0ekId9t_5rc+n0L*@knWbXMqc>7Ed9<WR`o4p>u0|uZv<+o;J|b+5|4)AIxT#4 zeG^D}Y{q+&55fbFS2WT@mim0uCUejIA_j}Oq95l)BXduJOJEExV9wHGPhS&Xp)Qg+ zHJ8SOoG&Ee9J~66&{bUiyLSz#UZ#MSJU+)4Q-pcrJUl%d&PFqj zN%1>FnAofj(`X{)`T1gXMb-pqG18Rl(H+&q z(K-LIxfZTG@4schH|5T(ycBVmwwty~=fi!@Kc9A?h$nP?8Z5l3&^#v| zZN#oJr;T5cpc6gB<6I*yeU(59MAcD};{xuAJO*xgsodJnj{FN)M;4ZeK!b%Ho87L1 zsX-|){5OoLoAZ!HZaj{bs}otbo@X!=xRvwTz9-^GjEFAhwX}#T#CE$d&OJh@x{nr& zWSrt3dv1x!m$`ljEx~yP3+Te-M)(h}^Ny(T;JaHX9#>sM7mBqIm5p}n&Ar-?G*6Dm zC2mLADHkvxWhd28)kf+4vS`$+j3rGKpnT#q*~Bqj{M1$1CsWqqzG?mNrp*f-2Zc$} zqYRSXbdBo{io@cv`Ecw%Y3_B%VzR$nhb1?SF}|^iTF-68U~YakUI;Kd$O7x?hDq;b z1nY9s8f%Zv3?sbo6$eHSpwtd;L0Z4bP92h8>|M*XU@SmwP7x}6NM zK46fMjebv3q@-a7%cGV%W&pQwL&F84IHn?lr?aP%>*mg6FuZ`~YHD7tb(<*o5{TGE*erIXy`#r>#V{DIAyk^}>K2uYU z<9?z=pA;aA>{gcOwdC{*9zSHt6V&V}|v^PxTE z1{q%+jlor~$=?!|Q?4DMpM>*q%&>*}$V|kimzuF*i8pCUS&5rgz6b4KYfuf$COhON zVBvQ|A~k`a(Z@6F=+_@~0b360^Z4{pCl8XO&#^l`Hj=gT`0VYYDp=MdhgY79!zK4c zAUBjq6~A01(Uabh7H?hDd%2zC4U0m3UIfkQXeR@Cb3n1E1gd^n@H|f3BNDfiY1_PS zq+fU^2@q0Z8hkym!&w+Sh81CflOnWdrC|~O2i~Y`MagA>plA7vZc4uiPF;7vf1wm~ zJe>!^caJfrZxq9SW){?Hr6)DLH^MlUThl33p=gnI5WVg1(OzF)poeyXX!8~N*61Bm zc`mdtgQr>O3H-~e3~=0^3Zg$Oh=^@HSu$=;q^7Nd zZC_`Tu9{zLJmU;GOFuCGrhKKh){Zla92u;icak=*6E;&i{Ee4-P#1rC@YZ9nt*Jk?q<3%=Z+jZi!RS_G!r_$na z8`JMe7r=Xf^DyDA>k_quo&;k=BXpJPS#!%Y5t)k|(OOQ(#owkEFayqld0+#GviwbeE?rG_R^= z)ja>e%zFyN^64TldvJt^*rlSxl`2xy-c6MrW-|dN?$gfOk*IpcnN0ID!>Q*?aqxL6 znssn2zuS^%y2ulKVJ3*@+=Qi56rkm;0iCM-jto{@g9iuQIEbSX9$4QC^_kAxjPgRH zy%xAhAr41h%JTaziGrl)E%fd?1nJ)kh{Tu$s3bff($Sq{T*QZ{3M_>3h27-Ia64bC z^C!8nBa7}5f6P8up@}`;FR{4?4}$MGJ@D(@3WoD;qs!oK6rGw2K@YZ~WuhF;&mq{R z76)hCwvtJL%J`p)G}n2Fry5)*g>Rig9)GGJwtA^TJ>9jw&`_52o^9k_&-7m|x6@8xmI`y0fzyO3I}6o68jQ>6IlAxtearE1&X zvpBf`l5;0g_wB1;B3BQ6pFRa$?z+OrJ`tE_PyrwIaNX%`k<@{6PP|_)i-VIS;ci|9 z=BbsS3&%#?^*RtejFV{bmkrpvb|>_#rm=ezUfFE;N!udJSJ8OtW z4G3UOWhR<9Oa`amyP*D}f()he=t05Dpfz)lUR+X$Rz`YIuuGEuclHb|7T-xnI@Qqp zq$+$nTSp7SGQq3w602SNo4v{|z~J>tY-XG;YJUyIsi;DS7TK|1CsxswdHlNLQF3sp zHc(Ld)B@sDD3vFr+pkKdoy(OLn*k4+U zeY5?Th>X=Bfw5qIu?RnVhoM@j24wk^WAvo8WVwkUZjPX^HeMXfj1$rE_z)YU{u;K% z7nAY7952B%6_?7p;jW-)EWLCY<+bO)$kt?zch^ePGx8Y;nKS&8J=H|hYcmEj;Z)Va znjJ`GB!v|lcF_2L z?zr>kYLvUSjH>?`gFV_S;9uNoTzs;Ewn}{^uBW*(VR?5vVEUKJeHvhQ3GBzrO?R0C zb9_m+^9Abm)suD3Q9#q^r^MjWUSt#_v5lPx!=2aZ{T<5mmS`cd%kyE4B5dH=%UfvG zz5~lIaJ;jiWK=2Li2r+bMEboUXJ2-ai541|87_`>n!C{cz;jZsn~LEj-_iPw2BzJ) zPYcd!;IvmJ*!^rXabF=p@2j2$(Y}|cQv0)Z7(!9=>`d_ATthX#33K_aKP&Oeo0EQPAeL^(f$HCP z!Z#1BFmR3wb_WUItFTYV@4e1AK085+f|l}(w#(5Tvu7Zb-bU|i2xSlSbb|DbU1*!L z9870&T>KV0)RFl~4K#KW33WmE;vLHber$)#ZJ}T=^*3={{gHB60qeT70yIqJ!L)}@ z&AxN9l*ulXf4B(kw*2L95G;b>ly^+rhFJ3WS3HJ4PooLSjri-zS|XJ9n-sq7WqX@# zNvCZU+oBuIS7f%3HJ)=|sIP@sIBukhlXa=s3p*M=WCcrnCc&A&ovfFz0hoP@LECE) zG-?aS2|w?S6#*T@tE`{QUR_A~N;~OxH!ZNO(IzfEDn#o0a`wCqLEn`MnZHIJ?4YZYO!>-+2pOZfmN2_1p-mi`PnWFMId}A-{;75@9 zxhL2fc~M;8bQcy@7Gc=CSW@Y8ntTj6Mn!!MnMu}!7OdXE?2yqUu?gGIr`Vq=-n$38 zUd+O-+HhPe+f02DtZ7yDJi56;JL_zZuKWEj)wtSY5_KpS?87 zsT%kT7lPQMAnJ17fn>YRhI{!%Zg|1vUyI>M=~{SXybh0v zdQkd43R=#la_hlh=DcGX92+l%1l{vQ;GGDUd#Hivm48I_Le zMfKI06RBfU8YzYAC?+)>PCcn121obdxQaBy)*54{i3W91F{3(loL|{!5Ov~rVLbmJ z)Q(QZB7HYt_2VFN{{@;8af?iiTM0+=*AU5hoDX!G2yAcH!s<6+RBM?xJ-x`2_^lMe zY^k&Chh^$8zpM=6HiR*5YmbnZJFq`o0{xQ}K)Ze>P+zNy zIJC7A&MNtU=Jv}Fv6sOcSJmp=QdZFoPvfBY+7{ZX(@$~=9O$(z7g5U831q&Bpv}^1 zT044z`qX97Cj;K-DAC8unUoAAxwZ86y&GiC!O4&rypPFj@=W!P+c(0RDD0 zIO#zw?JaE~yEDX5t27j6?7T_d+~b&3G08MUeLKc|>cINO5HQXDPIG3Jv2m~UsOWQX z+SYUk&*zmPKa7W2rH3&=_6IT3iKiw#EjTRT3wG9`Vuh2V>IJR|2>kEM&RS@qfPk17 z_rL$=-(&$10nbBw_8ign+;(uAw(iR169hJH6gXGNbK&K(54ObdzlunZmJbf}sb&g& z#C7R69up?=&xM$QTT#?SJ%F6do=Yt9ow1AX0Mu{D8mFV7iMe}I0TpLjw=tSMD`d!q z$IYkSEg@u8ts3n+IKcZ9rbP-{jxc@O0(hQP9~hq_O0?N2l}?;o$>gTG(HpD6=u^F| zRHyL}S&!CK=iqMgzw3A9#Fr8QLGE&KU;p3NPw)R;zpT%P$ddy>IP=g#Ms)c+oROr* z9JzWPq{}7(&X$0Mrr9{9aw5qbTgp6A$inCJ!1MA>sT78r^`SO_0? z$4(*>`VGi~E7c$-{t$0gh2hf0UPPrLjL}W!GM4eJcydny`T0ANkvKh_DR@2=Rc^TQo)(&OXKr$inaF#&6+Z;Dus)hg_;L?l zL*SSIF0bR(XV#pU5~lXmWAYP%xh4XbO4o3e&&i9ziGjJcc$ za*1c?n86S!$d5(+-YPnO_a(05!<~_C7B!PjEug-3OTbG$m-_v_PqePS2Vbu+oG&^X zvbIW?WmW8DjZ*b7EV!4wF(;IcYdu8uQFkJ5T0s`;8KI~nkia+jM3r3*>aHK*N1rsk zm$n<v#F$_X^$&eDj@$+%BImPY7aWQs*sW24;_5WdE-AM<>mE;$*0 z1rD+A%d5dlQIeSV&jVA39uiqG6ScCd$%dT<*g8`a_jt9T-$Wi}CdZ(=ULIt8bEm3C zvNShQ1TUYPj!E`YNbzcEbl9cKW*a3z@6NaIcxVCcU%3E(SnEM+i5+>8{StM22~{3k z&*;3>f>lpel2EroVr`-a0q3fjQ%_Do*z8pBtuQ3YVUOrDTTKkVx)rJxgt8hM>Lj_U zl{O!m086e{g2pUEW+-JjI`{k2&WGPYd4n^kHWe{Z6?NpwTY_@BFG+!gG8~+z2LF8) zz@rOe_`zot!K2EHc5aI&O$RDS=Ic7fHKCA{w}p_xxG=h9?ge6%;es8jBj``b1@Nfn zHv6olh)jJW3sUtxq^7Ka{&p>8+w#RZR`CZaFvFrx?*{i z1(wTwAa#=>vH$%g^p$$X{Jpyf1$Zw=yJ0A^G3Ep9xE2Sq8V&Hig8=^cnZQpS)rJ2i zDdC3`mE=~!0KI=U442(tFvs}?st6u|>&8ErK8sEa5Pb~Ew-#a3 zN$~6AIuv68C^tmVn6Zmw?`m_BF?WdXOpEXe_e{5cl!NO{yNIZv2bdLkfX?PFay?^&9yvRY zW^VFEAr$+xkmrYMNXe=1RBxXTZQ^nM z-!x11>c=*;a?*z@<_h?*TNJ_tF42N17x72iR%{>~AJT*49XLDVgWIdA>xgYlQ`*R-N80PjzUq4dkyEkZZau!|}>!;?O&7`TM z6#agTkvE~|NWyV1I&$kh?%bD#=Reoe^BWSOEt*dpU)aEZ?dL%@+kxH6y+5ZEnZTmq zTO4z(5KnTP1U0=9(j=fl`|KNtNRcV38#d7j-S6mpcpi{UdyY{WiU(HMlD0_$w7lap z$y;s-e;=CSvXx6f^>8>9+@Fe$f{)%uI|Z8$)qBcgFj=>QXK@Q$r)&Z9*qw&T#da5m~nHQ6Lp2Zc%u9gcPc!<%PlZUV=@@KCIHJTH5+|7o3(yaykDh%GoTzmZ`FBt~I(~D6s zBAhSRQV%i{wIN!ml-C~fm}W_`@Pq584hzN+8aK+`U0VnlGKY!srZUPatD%N>(%^I$ z=Uv!#9K{s_sp$AqSbCw6W}5mCk2UG=NKS+bi7SB1&|19TI~|g~euKB!F&O04Ks%SV zQ=6B+i9q;C>N!CO)0<0hB!TM%TsFh-3obZ~^OG3gp!GU7i48~nCl z*^kSt^h7y`<9r(m8$`iu-e>lh^nQNdJs%9v7J+`d+hkMV1{jD`hk-T$hFny@^!vK> zmd!-q6-^_)GuF_w@LKBY)QJ20uYp+&5iD%miparYo-Z% z*j1bkJ#3+NI$P@QoU@?%o2S&Pc!-nt$0SKp6idnX-7u^^g?NUj&=nK1F(>abiP>WX zy1wP`S3v-@&VR;&Bdx4+=3cs6hliE-ydi;Ig|Z%RNO;=}@~8AHI~Y5In8mF`gE|}L z>D(o_{Pi9vvDgT!&V@qds3|lhwqcFWbx8ZvNrZoygYrWYvUK_c^i3?JpV%fy9LOe7 zYn*Y1?IsA<%a58c2}=EHQP=1K8C`vX%Vl-g$;Ydyq)IK**Bp(ub6Bul z`sq4EiqgDjfB^z&6=_pkbMwMCNcPymI{j0>6JUm8Vui{iUmn^PT53;`%J8 znOn=0alQNQr%U-R=S@)I>vVj(Y!23}IgM!tv(Tw=6`VRA39G+9B&H&T#HXLKBW2nA zq#b(jM(_Y5JKzm3SF1x_=r=NBf-I?g^NL+PTMbH5V_1j&3Dl@l2_AC%1Rk9X`rlF^ zg}I6T)d#R;q>q|Nn?a)UH5|%3O{~gIXnV>^=;auNmfH z?;-xxglc+V?Fb{^Ih}cKH5K~iNfBPyUt+rQA@ny;bebeblxjYK^0!dz`?3%>Zsq)v z{JoGYt3}s2mt)TXC(z6EBWb3uS*eNT5Vuepe+wsKf&j->iI2i7E2nVRX*&7wI3KSE zyTiISxu|=)mK^XJBp(i5p#3vdO@|IlFk2t^n?_n3!|A3H{4&`}BE5|T*N8Xd?V15f zWiC_48!w8S$ws;1ph2fg_@h9sK06!=XW_qpG0xlg#1>L^D>8o6!?P6 z>m69X<_@WVm((mGhh`kRgH@-Zz;{_JQy_Q=?8Nu8Y13D79KZS4HoK36G4breV}oQZ zH?yPXnM576*ATzceN_0rUYgY4PArYnc_+=4>BcX6NxSw z8Z8@F(~1RwAdtBU)MsC$DqLP!EnUL@$4kUJispn~`NVYRb9{Vp2^ibwi0h*ZK*da; zKF*~T#`pc7&dxs=t1OJ;>h){zmO&_%Y`<(0h1z=Wd0wfZU2B*`ww-n@y?#qYwWZW% z$%s2Mh{xN88tXVE@HInk*2&98866kCjn)bi5~{1IR`bGP%?oIq%I{^pB^B(H(H5-{0J!#UnB$dp(NAqHJRA_3l<99Nr}8& zc6C4n#46=vYPvbj^v=iP#&sar5J8<*`r#!o;;PD3=r=CzM0=VDrWhK5t4sotUTbpv zTq5je8vW0j-9%J;n$u0{q`|!&SbgF!$lMNrL1i46j5C9}GG9y??18i#A?kj=Oy)Z% zrKXn)(MamDWkoC6gsp2Rs^5P{e7jil)J z=j4ReFfH8HOOk`ih}UF0nW?h`w+tE%;K- z;<)8D(jkIXT}#$e<%2?TX+x|mD19rpJSvH1r$|6hehfptOQx0`30QoM?P&_F!R77i zA;T-5xE3tH9S?foVn;eTS$PW1rU|Q!w(jJHr2*t+Lk2O{n=SKUu_umOenD51{Y{P3 z-C&LD9cmg?i4D5DG5eVkr`IXK1rKXL)3cVk*ssBYC5|wl76oRH^uc|PF@*l*08?d6 zG^bDt%4fBbf11xyC!GkWc=3b`rJR=a|6v9m-j7J#y{~AKAImW|GnEw7ia@6&snYC$ znEijVmoBy_CC(Y~)G;^#tr}X8*AFB%4{j-GGyQ9X zJHJXlu%Fl;_T}ss3~+f?R&W1x;-#5n?KnMZDl~!}5<`d__CwJPE6|u{2NQcJ<}S{K z$e})xmz0L}KZ>; z?Fx{3$Y^?&n8hB9z#|(?X+({bY-!Va^ZstVie9sejh(lT9>3j)ex;-&W|)|t2f;nN z=e+R6+kf{XR3alf*k;Mb@W8Njjt+~M5cWTdjRx Date: Wed, 4 Dec 2024 00:00:13 -0700 Subject: [PATCH 04/15] Deleted unnecessary file --- inferf/python/plans/test.txt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 inferf/python/plans/test.txt diff --git a/inferf/python/plans/test.txt b/inferf/python/plans/test.txt deleted file mode 100644 index acbe86c7c..000000000 --- a/inferf/python/plans/test.txt +++ /dev/null @@ -1 +0,0 @@ -abcd From 02e1c9f02e5f1945c27080290478169b8b60bb47 Mon Sep 17 00:00:00 2001 From: Kanchan Chowdhury <43951087+kanchanchy@users.noreply.github.com> Date: Sun, 15 Dec 2024 01:41:49 -0700 Subject: [PATCH 05/15] Genetic algorithm added --- inferf/python/genetic.py | 207 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 inferf/python/genetic.py diff --git a/inferf/python/genetic.py b/inferf/python/genetic.py new file mode 100644 index 000000000..bfd1bd282 --- /dev/null +++ b/inferf/python/genetic.py @@ -0,0 +1,207 @@ +import json +import random +from collections import defaultdict + +# class that define the properties of an input edge +class Edge: + def __init__(self, _id, parent_node, child_node, direction, num_input_features, num_input_rows, num_output_features, num_output_rows): + self._id = _id + self.parent_node = parent_node + self.child_node = child_node + self.direction = direction + self.num_input_features = num_input_features + self.num_input_rows = num_input_rows + self.num_output_features = num_output_features + self.num_output_rows = num_output_rows + + +# for each input edge, returns the height/distance from corresponding leaf edge. returns 1 for leaf edges +def getHeight(edge_list, child_edge_list): + height = {} + def helper(e_id): + if e_id in height: + return height[e_id] + + if e_id not in child_edge_list: + height[e_id] = 1 + return height[e_id] + else: + left, right = child_edge_list[e_id] + height[left] = helper(left) + height[right] = helper(right) + height[e_id] = max(height[left], height[right]) + 1 + return height[e_id] + + edge_heights = [] + for edge in edge_list: + height[edge._id] = helper(edge._id) + edge_heights.append((edge._id, height[edge._id])) + return edge_heights + + +# checks if the labels of various join edges are valid, specially looks for edges where label should be 2 +def validateChromosome(current, map_edge, child_edge_list, edge_heights): + keys = child_edge_list.keys() + for e_id, _ in edge_heights: + if e_id in keys: + left, right = child_edge_list[e_id] + lLeft, lRight = current[map_edge[left]], current[map_edge[right]] + if (lLeft == 1 and lRight == 1) or (lLeft == 1 and lRight == 2) or (lLeft == 2 and lRight == 1) or (lLeft == 2 and lRight == 2): + current[map_edge[e_id]] = 2 + else: + if current[map_edge[e_id]] == 2: + current[map_edge[e_id]] = random.randint(0, 1) + return current + + +# method to initialize n valid chromosomes representing various factorization plans +def initChromosome(n, k, map_edge, child_edge_list, edge_heights): + map_chrm = {} + count = 0 + while count < n: + current = [random.choice([0, 1]) for _ in range(k)] + current_updated = validateChromosome(current, map_edge, child_edge_list, edge_heights) + if current_updated not in map_chrm.values(): + map_chrm[count] = current_updated + count += 1 + return map_chrm + + +# method to exchange subtrees between two parents +def crossoverParents(parent1, parent2, map_edge, child_edge_list, edge_heights): + keys = list(child_edge_list.keys()) + targetKey = keys.pop(random.randrange(len(keys))) + offspring1, offspring2 = parent1.copy(), parent2.copy() + stack, subtree = [targetKey], [] + + while stack: + current = stack.pop() + subtree.append(current) + if current in keys: + stack.extend(childList[current]) + + for e_id in subtree: + offspring1[e_id], offspring2[e_id] = offspring2[e_id], offspring1[e_id] + + return validateChromosome(offspring1, map_edge, child_edge_list, edge_heights), validateChromosome(offspring2, map_edge, child_edge_list, edge_heights) + + +# method to make random change on the chromosomes generated after crossover between two parent chromosomes +def mutateChromosome(chromosome, map_edge, child_edge_list, edge_heights): + mutabel_edges = [i for i, label in enumerate(chromosome) if label in [0, 1]] + if len(mutabel_edges) > 0: + targetEdge = mutabel_edges.pop(random.randrange(len(mutabel_edges))) + chromosome[targetEdge] = 1 - chromosome[targetEdge] + return validateChromosome(chromosome, map_edge, child_edge_list, edge_heights) + + +def getFitness(chromosome): + """Fitness function that returns a random value.""" + return random.random() + + +# method to perform the genetic algorithm +def performGenetic(edges, num_join, factorized_output_features, p=2, max_iter=5, utility_threshold=0.5, k1=9.8167, k2=2.1713): + + labels = {} # stores labels of input join edges + map_edge = {} # maps edge id to a position index in the chromosome representation + node_to_edge = defaultdict(lambda: []) # maps a join node to its two child edges + + # initialize labels, map_edges, and node_to_edge + j = 0 + for e in edges: + map_edge[e._id] = j + j += 1 + labels[e._id] = 0 + temp_list = node_to_edge[e.parent_node] + temp_list.append((e._id, e.direction)) + node_to_edge[e.parent_node] = temp_list + + # child_edge_list holds the left and right child ids for each non-leaf edges + child_edge_list = defaultdict(lambda: ["", ""]) + for e in edges: + child_node = e.child_node + if child_node in node_to_edge: + e1, direction1 = node_to_edge[child_node][0] + e2, direction2 = node_to_edge[child_node][1] + child_edge_list[e._id][direction1] = e1 + child_edge_list[e._id][direction2] = e2 + + edge_heights = getHeight(edge_list, child_edge_list) + sorted_heights = sorted(edge_heights, key=lambda x: x[1], reverse=False) + + map_chrm = initChromosome(num_join*p, len(edges), map_edge, child_edge_list, edge_heights) + num_chrm = len(map_chrm.keys()) + bestChrm, bestFitness = map_chrm[0], float("-inf") + + # perform parent selection, crossover of parents, and mutation max_iter times to generate random chromosomes + for i in range(max_iter): + idx1, idx2 = random.sample(range(0, num_chrm), 2) + parent1, parent2 = map_chrm[idx1], map_chrm[idx2] + parent1, parent2 = crossoverParents(parent1, parent2, map_edge, child_edge_list, edge_heights) + parent1 = mutateChromosome(parent1, map_edge, child_edge_list, edge_heights) + parent2 = mutateChromosome(parent2, map_edge, child_edge_list, edge_heights) + fitness1, fitness2 = getFitness(parent1), getFitness(parent2) + + if fitness1 > bestFitness: + bestFitness = fitness1 + bestChrm = parent1 + if fitness2 > bestFitness: + bestFitness = fitness2 + bestChrm = parent2 + + return bestChrm, map_edge + + + +def getEdgesFromJson(json_data): + num_join = 0 + idx = 0 + edge_list = [] + for json_obj in json_data: + num_join += 1 + join_id = json_obj['ID'] + left = json_obj['Left'] + right = json_obj['Right'] + tuple_left = json_obj['NumTuplesLeft'] + dim_left = json_obj['NumDimLeft'] + tuple_right = json_obj['NumTuplesRight'] + dim_right = json_obj['NumDimRight'] + tuple_output = json_obj['NumTuplesOutput'] + dim_output = json_obj['NumDimOutput'] + + edge = Edge(idx, join_id, left, 0, dim_left, tuple_left, dim_output, tuple_output) + edge_list.append(edge) + idx += 1 + + edge = Edge(idx, join_id, right, 1, dim_right, tuple_right, dim_output, tuple_output) + edge_list.append(edge) + idx += 1 + return edge_list, num_join + + +def getNeuronInputSize(model_path, input_layer): + state_dict = torch.load(model_path) + input_weight = state_dict[f"{input_layer}.weight"] + return input_weight.shape[0] + + + + +if __name__ == "__main__": + + file_path = "plans/4_3.txt" + num_neurons = getNeuronInputSize("plans/dummy.pth", "fc1") + + with open(file_path, "r") as file: + json_string = file.read() + + if json_string.startswith('R"(') and json_string.endswith(')"'): + json_string = json_string[3:-2] + json_data = json.loads(json_string) + + edge_list, num_join = getEdgesFromJson(json_data) + selected_plan, map_edge = performGenetic(edge_list, num_join, factorized_output_features=64, utility_threshold=0.5) + + for edge in edge_list: + print(f"Plan: {edge.child_node} ---> {edge.parent_node} = {selected_plan[map_edge[edge._id]]}") From 7bdddcb0392c40f435b717f7978f598401419910 Mon Sep 17 00:00:00 2001 From: Kanchan Chowdhury <43951087+kanchanchy@users.noreply.github.com> Date: Sun, 15 Dec 2024 01:43:52 -0700 Subject: [PATCH 06/15] Greedy algorithm updated --- inferf/python/greedy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inferf/python/greedy.py b/inferf/python/greedy.py index 71e620d04..7441c7af2 100644 --- a/inferf/python/greedy.py +++ b/inferf/python/greedy.py @@ -68,7 +68,7 @@ def traverseBranch(eLocal): if right not in processed: labels[right] = traverseBranch(right) - if (labels[left] == 1 and labels[right] == 1) or (labels[left] == 1 and labels[right] == 2) or (labels[left] == 2 and labels[right] == 1): + if (labels[left] == 1 and labels[right] == 1) or (labels[left] == 1 and labels[right] == 2) or (labels[left] == 2 and labels[right] == 1) or (labels[left] == 2 and labels[right] == 2): labels[eLocal] = 2 else: if utility[eLocal] >= utility_threshold: From 2506b9004a6c82725fc09308c320a15fd8f20fc1 Mon Sep 17 00:00:00 2001 From: Kanchan Chowdhury <43951087+kanchanchy@users.noreply.github.com> Date: Mon, 16 Dec 2024 18:31:17 -0700 Subject: [PATCH 07/15] Added code for rewriting with factorization --- inferf/RewriteFactorized.cpp | 1315 ++++++++++++++++++++++++++++++++++ 1 file changed, 1315 insertions(+) create mode 100644 inferf/RewriteFactorized.cpp diff --git a/inferf/RewriteFactorized.cpp b/inferf/RewriteFactorized.cpp new file mode 100644 index 000000000..35056e30c --- /dev/null +++ b/inferf/RewriteFactorized.cpp @@ -0,0 +1,1315 @@ +#include +//#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "velox/type/Type.h" +#include "velox/exec/tests/utils/HiveConnectorTestBase.h" +#include "velox/functions/prestosql/aggregates/RegisterAggregateFunctions.h" +#include "velox/functions/prestosql/registration/RegistrationFunctions.h" +#include "velox/ml_functions/DecisionTree.h" +#include "velox/ml_functions/XGBoost.h" +#include "velox/ml_functions/tests/MLTestUtility.h" +#include "velox/parse/TypeResolver.h" +#include "velox/ml_functions/VeloxDecisionTree.h" +#include "velox/common/file/FileSystems.h" +#include "velox/dwio/dwrf/reader/DwrfReader.h" +#include "velox/dwio/parquet/RegisterParquetReader.h" +#include "velox/dwio/parquet/RegisterParquetWriter.h" +#include +#include "velox/connectors/hive/HiveConfig.h" +#include "velox/ml_functions/functions.h" +#include "velox/ml_functions/Concat.h" +#include "velox/ml_functions/NNBuilder.h" +#include +#include +#include "velox/ml_functions/VeloxDecisionTree.h" +#include "velox/expression/VectorFunction.h" +#include "velox/vector/ComplexVector.h" +#include "velox/vector/FlatVector.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace ml; +using namespace facebook::velox; +using namespace facebook::velox::test; +using namespace facebook::velox::exec; +using namespace facebook::velox::exec::test; +using namespace facebook::velox::core; + +/* + * The structure to describe the context of a neural network model architecture + */ +struct NNModelContext { + + int inputFeatures; + + int numLayers; + + int hiddenLayerNeurons; + + int outputLayerNeurons; + +}; + +/* + * The structure to describe the context of a decision tree model architecture + */ + +struct DTModelContext { + + int inputFeatures; + + int treeDepth; + +}; + + +/* + * The structure to describe the push down status of a feature + */ +struct FeatureStatus { + int isFeature; + + int isPushed; + + int vectorSize; + + int featureStartPos; + +}; + + +class VectorAddition : public MLFunction { + public: + VectorAddition(int inputDims) { + inputDims_ = inputDims; + } + + void apply( + const SelectivityVector& rows, + std::vector& args, + const TypePtr& type, + exec::EvalCtx& context, + VectorPtr& output) const override { + BaseVector::ensureWritable(rows, type, context.pool(), output); + + BaseVector* left = args[0].get(); + BaseVector* right = args[1].get(); + + exec::LocalDecodedVector leftHolder(context, *left, rows); + auto decodedLeftArray = leftHolder.get(); + auto baseLeftArray = + decodedLeftArray->base()->as()->elements(); + + exec::LocalDecodedVector rightHolder(context, *right, rows); + auto decodedRightArray = rightHolder.get(); + auto baseRightArray = rightHolder->base()->as()->elements(); + + float* input1Values = baseLeftArray->values()->asMutable(); + float* input2Values = baseRightArray->values()->asMutable(); + + int numInput = rows.size(); + + Eigen::Map< + Eigen::Matrix> + input1Matrix(input1Values, numInput, inputDims_); + Eigen::Map< + Eigen::Matrix> + input2Matrix(input2Values, numInput, inputDims_); + + std::vector> results; + + for (int i = 0; i < numInput; i++) { + //Eigen::Matrix vSum = input1Matrix.row(i) + input2Matrix.row(i); + Eigen::VectorXf vSum = input1Matrix.row(i) + input2Matrix.row(i); + std::vector curVec(vSum.data(), vSum.data() + vSum.size()); + //std::vector std_vector(vSum.data(), vSum.data() + vSum.size()); + results.push_back(curVec); + } + + VectorMaker maker{context.pool()}; + output = maker.arrayVector(results, REAL()); + } + + static std::vector> signatures() { + return {exec::FunctionSignatureBuilder() + .argumentType("array(REAL)") + .argumentType("array(REAL)") + .returnType("array(REAL)") + .build()}; + } + + static std::string getName() { + return "vector_addition"; + }; + + float* getTensor() const override { + // TODO: need to implement + return nullptr; + } + + CostEstimate getCost(std::vector inputDims) { + // TODO: need to implement + return CostEstimate(0, inputDims[0], inputDims[1]); + } + + private: + int inputDims_; +}; + + +class RewriteFactorized : HiveConnectorTestBase { + public: + RewriteFactorized() { + // Register Presto scalar functions. + functions::prestosql::registerAllScalarFunctions(); + // Register Presto aggregate functions. + aggregate::prestosql::registerAllAggregateFunctions(); + // Register type resolver with DuckDB SQL parser. + parse::registerTypeResolver(); + // HiveConnectorTestBase::SetUp(); + //parquet::registerParquetReaderFactory(); + + auto hiveConnector = + connector::getConnectorFactory( + connector::hive::HiveConnectorFactory::kHiveConnectorName) + ->newConnector(kHiveConnectorId, std::make_shared()); + connector::registerConnector(hiveConnector); + + // SetUp(); + + } + + ~RewriteFactorized() {} + + void SetUp() override { + // TODO: not used for now + // HiveConnectorTestBase::SetUp(); + // parquet::registerParquetReaderFactory(); + } + + void TearDown() override { + HiveConnectorTestBase::TearDown(); + } + + void TestBody() override { + } + + + static void waitForFinishedDrivers(const std::shared_ptr& task) { + + while (!task->isFinished()) { + + usleep(1000); // 0.01 second. + + } + } + + std::shared_ptr executor_{ + std::make_shared( + std::thread::hardware_concurrency())}; + + std::shared_ptr queryCtx_{ + std::make_shared(executor_.get())}; + + std::shared_ptr pool_{memory::MemoryManager::getInstance()->addLeafPool()}; + + VectorMaker maker{pool_.get()}; + + std::unordered_map tableName2RowVector; + std::unordered_map>> operatorParam2Weights; + std::unordered_map> tabel2Columns; + std::unordered_map allFeatureStatus; + std::vector modelOperators; + std::map factorizationPlans; + std::map featureStartPos; + int totalFeatures = 0; + + + + // Function to check if a string represents a valid integer + bool isInteger(const std::string& s) { + + if (s.empty() || (s.size() > 1 && s[0] == '0')) { + + return false; // prevent leading zeros for non-zero integers + + } + + for (char c : s) { + + if (!std::isdigit(c)) { + + return false; + } + } + + return true; + + } + + + + void findFactorizationPlans(std::string filePath) { + // Open the file + std::ifstream inputFile(filePath); + if (!inputFile.is_open()) { + std::cerr << "Failed to open the file: " << filePath << std::endl; + //return 1; + } + + // Map to store key-value pairs + std::map myMap; + + // Read the file line by line + std::string line; + while (std::getline(inputFile, line)) { + // Find the separator " = " to split the key and value + size_t separatorPos = line.find(" = "); + if (separatorPos != std::string::npos) { + // Extract the key and value + std::string key = line.substr(0, separatorPos); + int value = std::stoi(line.substr(separatorPos + 3)); // Convert value to int + + // Store in the map + myMap[key] = value; + } + } + + // Close the file + inputFile.close(); + + factorizationPlans = myMap; + } + + + + std::vector extractOperatorsInReverse(const std::string& input) { + std::vector operators; + std::regex operatorPattern(R"(([a-zA-Z_]+\d+)\()"); // Match patterns like mat_mul1(, mat_add2(, etc. + std::smatch match; + + std::string::const_iterator searchStart(input.cbegin()); + while (std::regex_search(searchStart, input.cend(), match, operatorPattern)) { + operators.push_back(match[1]); // Extract the operator name + searchStart = match.suffix().first; // Move the search start position + } + + // Reverse the order of operators to match actual execution order + std::reverse(operators.begin(), operators.end()); + return operators; + } + + + + std::vector> loadHDF5Array(const std::string& filename, const std::string& datasetName, int doPrint) { + H5::H5File file(filename, H5F_ACC_RDONLY); + H5::DataSet dataset = file.openDataSet(datasetName); + H5::DataSpace dataspace = dataset.getSpace(); + + // Get the number of dimensions + int rank = dataspace.getSimpleExtentNdims(); + // std::cout << "Rank: " << rank << std::endl; + + // Allocate space for the dimensions + std::vector dims(rank); + + // Get the dataset dimensions + dataspace.getSimpleExtentDims(dims.data(), nullptr); + + size_t rows; + size_t cols; + + if (rank == 1) { + rows = dims[0]; + cols = 1; + } + else if (rank == 2) { + rows = dims[0]; + cols = dims[1]; + } else { + throw std::runtime_error("Unsupported rank: " + std::to_string(rank)); + } + + // Read data into a 1D vector + std::vector flatData(rows * cols); + dataset.read(flatData.data(), H5::PredType::NATIVE_FLOAT); + + // Convert to 2D vector + std::vector> result(rows, std::vector(cols)); + for (size_t i = 0; i < rows; ++i) { + for (size_t j = 0; j < cols; ++j) { + result[i][j] = flatData[i * cols + j]; + if (doPrint == 1) + std::cout << result[i][j] << ", "; + } + if (doPrint == 1) + std::cout << std::endl; + } + + // Close the dataset and file + dataset.close(); + file.close(); + + return result; + } + + + + + void findWeights(std::string modelPath) { + // read the parameter weights from file + std::vector> w1 = loadHDF5Array(modelPath, "fc1.weight", 0); + std::vector> b1 = loadHDF5Array(modelPath, "fc1.bias", 0); + std::vector> w2 = loadHDF5Array(modelPath, "fc2.weight", 0); + std::vector> b2 = loadHDF5Array(modelPath, "fc2.bias", 0); + std::vector> w3 = loadHDF5Array(modelPath, "fc3.weight", 0); + std::vector> b3 = loadHDF5Array(modelPath, "fc3.bias", 0); + + // store the weights in map with same name as operator + operatorParam2Weights["mat_mul1"] = w1; + operatorParam2Weights["mat_add1"] = b1; + operatorParam2Weights["mat_mul2"] = w2; + operatorParam2Weights["mat_add2"] = b2; + operatorParam2Weights["mat_mul3"] = w3; + operatorParam2Weights["mat_add3"] = b3; + + std::cout << "Shape of mat_mul1 weight: " << w1.size() << ", " << w1[0].size() << std::endl; + } + + + + std::vector> extractSubweight(const std::vector>& matrix, int start, int n) { + std::vector> result; + + std::cout << "Extracting subweight" << std::endl; + std::cout << start << ", " << n << std::endl; + + // Ensure that the range [start, start + n) is within bounds + //int end = std::min(start + n, matrix.size()); + int end = start + n; + for (int i = start; i < end; ++i) { + result.push_back(matrix[i]); // Copy rows within the range + } + + return result; + } + + + + RowVectorPtr getTableFromCSVFile(VectorMaker & maker, std::string csvFilePath, std::string tableName, std::string joinKey) { + + std::ifstream file(csvFilePath.c_str()); + if (file.fail()) { + + std::cerr << "Error in reading data file:" << csvFilePath << std::endl; + exit(1); + + } + + std::string line; + + std::cout << tableName << std::endl; + + std::vector joinVec; + std::vector> featuresVec; + + // Ignore the first line (header) + if (std::getline(file, line)) { + std::cout << "Ignoring header: " << line << std::endl; + } + + int count = 0; + while (std::getline(file, line)) { + std::istringstream iss(line); + std::string numberStr; + std::vector features; + int colIndex = 0; + + while (std::getline(iss, numberStr, ',')) { + if (numberStr.size() >= 2 && numberStr.front() == '"' && numberStr.back() == '"') { + numberStr = numberStr.substr(1, numberStr.size() - 2); + } + + if (colIndex == 0) { + joinVec.push_back(std::stoi(numberStr)); + } + else { + features.push_back(std::stof(numberStr)); + } + colIndex += 1; + } + featuresVec.push_back(features); + count += 1; + } + file.close(); + + std::string featureCol = "f_" + tableName; + auto joinVector = maker.flatVector(joinVec); + auto featuresVector = maker.arrayVector(featuresVec, REAL()); + auto tableRowVector = maker.rowVector( + {joinKey, featureCol}, {joinVector, featuresVector} + ); + + std::vector cols = {joinKey, featureCol}; + tabel2Columns[tableName] = cols; + totalFeatures += featuresVec[0].size(); + + FeatureStatus fs1; + fs1.isFeature = 0; + allFeatureStatus[joinKey] = fs1; + + FeatureStatus fs2; + fs2.isFeature = 1; + fs2.isPushed = 0; + fs2.vectorSize = featuresVec[0].size(); + allFeatureStatus[featureCol] = fs2; + + return tableRowVector; + } + + + + +int sampleQuery() { + + return 29; + +} + +int sampleModel() { + + return 0; + +} + + +void sampleNNModelArch(int numInputFeatures, NNModelContext & nn) { + + nn.inputFeatures = numInputFeatures; + nn.numLayers = 3; + nn.hiddenLayerNeurons = 16; + nn.outputLayerNeurons = 2; + +} + +void sampleDTModelArch (int numInputFeatures, DTModelContext & dt) { + + dt.inputFeatures = numInputFeatures; + dt.treeDepth = 8; + +} + +bool replace(std::string& str, const std::string& from, const std::string& to) { + size_t start_pos = str.find(from); + if(start_pos == std::string::npos) + return false; + str.replace(start_pos, from.length(), to); + return true; +} + + + +void registerNNFunction(std::string op_name, std::vector> weightMatrice, int dim1, int dim2) { + + if (op_name.find("mat_mul") != std::string::npos) { + auto nnWeightVector = maker.arrayVector(weightMatrice, REAL()); + exec::registerVectorFunction( + op_name, + MatrixMultiply::signatures(), + std::make_unique( + std::move(nnWeightVector->elements()->values()->asMutable()), dim1, dim2) + ); + std::cout << "Registered a mat_mul function of name " << op_name << " with dimension " << dim1 << ", " << dim2 << endl; + } + + else if (op_name.find("mat_add") != std::string::npos) { + auto nnWeightVector = maker.arrayVector(weightMatrice, REAL()); + exec::registerVectorFunction( + op_name, + MatrixVectorAddition::signatures(), + std::make_unique( + std::move(nnWeightVector->elements()->values()->asMutable()), dim1) + ); + std::cout << "Registered a mat_add function of name " << op_name << " with dimension " << dim1 << endl; + } + + else if (op_name.find("relu") != std::string::npos) { + exec::registerVectorFunction( + op_name, Relu::signatures(), std::make_unique(), + {}, + true); + std::cout << "Registered a relu function of name " << op_name << endl; + } + + else if (op_name.find("softmax") != std::string::npos) { + exec::registerVectorFunction( + op_name, Softmax::signatures(), std::make_unique()); + std::cout << "Registered a softmax function of name " << op_name << endl; + } + + else if (op_name.find("vector_addition") != std::string::npos) { + exec::registerVectorFunction( + op_name, + VectorAddition::signatures(), + std::make_unique(dim1) + ); + std::cout << "Registered a vector_addition function of name " << op_name << " with dimension " << dim1 << endl; + } + + +} + + + + + +float * genWeight(int dim1, int dim2) { + + int total_size = dim1 * dim2; + + //generate weight matrix + float * weight = new float[total_size]; + for (int i = 0; i < total_size; i++) { + if (i % 2 == 0) { + weight[i] = 1.0; + } else { + weight[i] = 0.0; + } + } + return weight; +} + +bool addModelInferenceToQueryPlanAfterFactorize(PlanBuilder & planBuilder, std::shared_ptr planNodeIdGenerator) { + + std::string modelProjString = ""; + std::vector> emptyMatrix; + for (int i = modelOperators.size() - 1; i > 0; i--) { + std::string opName = modelOperators[i]; + if (opName.find("mat_mul") != std::string::npos) { + std::vector> param = operatorParam2Weights[opName]; + int dim1 = param.size(); + int dim2 = param[0].size(); + registerNNFunction(opName, param, dim1, dim2); + } + else if (opName.find("mat_add") != std::string::npos) { + std::vector> param = operatorParam2Weights[opName]; + int dim1 = param.size(); + registerNNFunction(opName, param, dim1, -1); + } + else { + registerNNFunction(opName, emptyMatrix, -1, -1); + } + modelProjString += opName + "("; + } + modelProjString += "features"; + + for (int i = modelOperators.size() - 1; i > 0; i--) { + modelProjString += ")"; + } + modelProjString += " AS output"; + + std::cout << "Inference Part: " << modelProjString << endl; + planBuilder.project({modelProjString}); + + return true; + +} + +bool addModelInferenceToQueryPlan(PlanBuilder & planBuilder, std::shared_ptr planNodeIdGenerator) { + + std::string modelProjString = ""; + std::vector> emptyMatrix; + for (int i = modelOperators.size() - 1; i >= 0; i--) { + std::string opName = modelOperators[i]; + if (opName.find("mat_mul") != std::string::npos) { + std::vector> param = operatorParam2Weights[opName]; + int dim1 = param.size(); + int dim2 = param[0].size(); + registerNNFunction(opName, param, dim1, dim2); + } + else if (opName.find("mat_add") != std::string::npos) { + std::vector> param = operatorParam2Weights[opName]; + int dim1 = param.size(); + registerNNFunction(opName, param, dim1, -1); + } + else { + registerNNFunction(opName, emptyMatrix, -1, -1); + } + modelProjString += opName + "("; + } + modelProjString += "features"; + + for (int i = modelOperators.size() - 1; i >= 0; i--) { + modelProjString += ")"; + } + modelProjString += " AS output"; + + std::cout << "Inference Part: " << modelProjString << endl; + planBuilder.project({modelProjString}); + + return true; + +} + + +bool rewriteWithFactorization(PlanBuilder & planBuilder, std::shared_ptr planNodeIdGenerator, std::string joinOrderStr) { + + // Create a JSON reader and root object + Json::CharReaderBuilder readerBuilder; + Json::Value root; // Root will hold the parsed JSON array + std::string errors; + + // Parse the JSON string + std::istringstream stream(joinOrderStr); + if (!Json::parseFromStream(readerBuilder, stream, &root, &errors)) { + std::cerr << "Error parsing JSON: " << errors << "\n"; + return 1; + } + //JSON Reading successful" + + std::unordered_map sources; //with filters and projections pushed down; + + std::string outName; + std::string leftName; + std::string rightName; + + std::string firstOpName = modelOperators[0]; // name of operator of split layer + std::vector> firstWeight = operatorParam2Weights[firstOpName]; // weight of the split layer + int fCurrentTotal = 0; + int numCols = firstWeight.size(); // number of columns in split layer + int numNeurons = firstWeight[0].size(); // number of neurons in split layer + int k = 0; + int addIdx = 0; + + std::vector> emptyMatrix; + bool isAdditionRegistered = false; + + // Iterate through the array + for (const auto& item : root) { + std::cout << "ID: " << item["ID"].asString() << "\n"; + std::cout << "Left: " << item["Left"].asString() << "\n"; + std::cout << "Right: " << item["Right"].asString() << "\n"; + std::cout << "Pred: " << item["Pred"].asString() << "\n"; + std::cout << "ProbeKeys: " << item["ProbeKeys"].asString() << "\n"; + std::cout << "BuildKeys: " << item["BuildKeys"].asString() << "\n"; + + std::string joinId = item["ID"].asString(); + std::string leftTable = item["Left"].asString(); + std::string rightTable = item["Right"].asString(); + std::string probKeys = item["ProbeKeys"].asString(); + std::string buildKeys = item["BuildKeys"].asString(); + int NumDimLeft = item["NumDimLeft"].asInt(); + int NumDimRight = item["NumDimRight"].asInt(); + + std::string leftFactorizationKey = leftTable + "--->" + joinId; + std::string rightFactorizationKey = rightTable + "--->" + joinId; + + std::cout << "left factorization key: " << leftFactorizationKey << std::endl; + std::cout << "right factorization key: " << leftFactorizationKey << std::endl; + + bool isLeftTableNotLeaf = isInteger(leftTable); + + if (isLeftTableNotLeaf == false) { + // left table is leaf + + featureStartPos[leftTable] = fCurrentTotal; + fCurrentTotal += NumDimLeft; + + std::string fName = tabel2Columns[leftTable][1]; + + if (factorizationPlans[leftFactorizationKey] == 1) { + // Doing factorization of left edge + std::vector> subWeight = extractSubweight(firstWeight, featureStartPos[leftTable], NumDimLeft); + std::string newOpName = firstOpName + "_" + std::to_string(k); + registerNNFunction(newOpName, subWeight, NumDimLeft, numNeurons); + std::string fNewName = "factorized_" + std::to_string(k); + std::string fNewNameFull = newOpName + "(" + fName + ") AS " + fNewName; + tabel2Columns[leftTable][1] = fNewName; + k += 1; + + FeatureStatus fs; + fs.isFeature = 1; + fs.isPushed = 1; + fs.vectorSize = numNeurons; + allFeatureStatus[fNewName] = fs; + + auto leftPlan = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector[leftTable]}) + .project({tabel2Columns[leftTable][0], fNewNameFull}); + + sources[leftTable] = leftPlan; + } + else { + // Not Doing factorization of left edge + FeatureStatus fs = allFeatureStatus[fName]; + fs.featureStartPos = featureStartPos[leftTable]; + allFeatureStatus[fName] = fs; + + auto leftPlan = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector[leftTable]}) + .project({tabel2Columns[leftTable]}); + + sources[leftTable] = leftPlan; + } + + } + + bool isRightTableNotLeaf = isInteger(rightTable); + if (isRightTableNotLeaf == false) { + // right table is leaf + + featureStartPos[rightTable] = fCurrentTotal; + fCurrentTotal += NumDimRight; + + std::string fName = tabel2Columns[rightTable][1]; + + if (factorizationPlans[rightFactorizationKey] == 1) { + // Doing factorization of right edge + std::vector> subWeight = extractSubweight(firstWeight, featureStartPos[rightTable], NumDimRight); + std::string newOpName = firstOpName + "_" + std::to_string(k); + registerNNFunction(newOpName, subWeight, NumDimRight, numNeurons); + std::string fNewName = "factorized_" + std::to_string(k); + std::string fNewNameFull = newOpName + "(" + fName + ") AS " + fNewName; + tabel2Columns[rightTable][1] = fNewName; + k += 1; + + FeatureStatus fs; + fs.isFeature = 1; + fs.isPushed = 1; + fs.vectorSize = numNeurons; + allFeatureStatus[fNewName] = fs; + + auto rightPlan = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector[rightTable]}) + .project({tabel2Columns[rightTable][0], fNewNameFull}); + + sources[rightTable] = rightPlan; + } + else { + // Not Doing factorization of right edge + FeatureStatus fs = allFeatureStatus[fName]; + fs.featureStartPos = featureStartPos[rightTable]; + allFeatureStatus[fName] = fs; + + auto rightPlan = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector[rightTable]}) + .project({tabel2Columns[rightTable]}); + + sources[rightTable] = rightPlan; + } + + } + + featureStartPos[joinId] = featureStartPos[leftTable]; + + + PlanBuilder left, right, out; + + if (sources.count(leftTable) > 0) { + + left = sources[leftTable]; + + } + + if (sources.count(rightTable) > 0) { + + right = sources[rightTable]; + + } + + //compose join features + std::vector leftProj; + std::vector joinCols; + int totalPushed = 0; + + if (factorizationPlans[leftFactorizationKey] == 1) { + // Factorizing all left features that were not pushed earlier + int newFactorizedCount = 0; + for (int i = 0; i < tabel2Columns[leftTable].size(); i++) { + std::string fName = tabel2Columns[leftTable][i]; + if (allFeatureStatus.count(fName) <= 0) { + std::cout << "Feature status not found for feature " << fName << std::endl; + continue; + } + FeatureStatus fs = allFeatureStatus[fName]; + if (fs.isFeature == 1 && fs.isPushed == 0) { + + // Feature not pushed earlier, pushing now + std::vector> subWeight = extractSubweight(firstWeight, fs.featureStartPos, fs.vectorSize); + std::string newOpName = firstOpName + "_" + std::to_string(k); + registerNNFunction(newOpName, subWeight, fs.vectorSize, numNeurons); + std::string fNewName = "factorized_" + std::to_string(k); + std::string fNewNameFull = newOpName + "(" + fName + ") AS " + fNewName; + k += 1; + + FeatureStatus fs; + fs.isFeature = true; + fs.isPushed = 1; + fs.vectorSize = numNeurons; + allFeatureStatus[fNewName] = fs; + leftProj.push_back(fNewNameFull); + joinCols.push_back(fNewName); + newFactorizedCount += 1; + totalPushed += 1; + } + else { + // Feature not a feature or pushed earlier + leftProj.push_back(fName); + joinCols.push_back(fName); + if (fs.isFeature == 1) { + // Feature pushed earlier + totalPushed += 1; + } + } + } + + if (newFactorizedCount > 0) { + left = left.project({leftProj}); + } + } + else { + // Just adding all left columns without factorization + for (int i = 0; i < tabel2Columns[leftTable].size(); i++) { + std::string fName = tabel2Columns[leftTable][i]; + leftProj.push_back(fName); + joinCols.push_back(fName); + } + } + + + std::vector rightProj; + + if (factorizationPlans[rightFactorizationKey] == 1) { + // Factorizing all right features that were not pushed earlier + int newFactorizedCount = 0; + for (int i = 0; i < tabel2Columns[rightTable].size(); i++) { + std::string fName = tabel2Columns[rightTable][i]; + if (allFeatureStatus.count(fName) <= 0) { + std::cout << "Feature status not found for feature " << fName << std::endl; + continue; + } + FeatureStatus fs = allFeatureStatus[fName]; + if (fs.isFeature == 1 && fs.isPushed == 0) { + + // Feature not pushed earlier, pushing now + std::vector> subWeight = extractSubweight(firstWeight, fs.featureStartPos, fs.vectorSize); + std::string newOpName = firstOpName + "_" + std::to_string(k); + registerNNFunction(newOpName, subWeight, fs.vectorSize, numNeurons); + std::string fNewName = "factorized_" + std::to_string(k); + std::string fNewNameFull = newOpName + "(" + fName + ") AS " + fNewName; + k += 1; + + FeatureStatus fs; + fs.isFeature = true; + fs.isPushed = 1; + fs.vectorSize = numNeurons; + allFeatureStatus[fNewName] = fs; + rightProj.push_back(fNewNameFull); + joinCols.push_back(fNewName); + newFactorizedCount += 1; + totalPushed += 1; + } + else { + // Feature not a feature or pushed earlier + rightProj.push_back(fName); + joinCols.push_back(fName); + if (fs.isFeature == 1) { + // Feature pushed earlier + totalPushed += 1; + } + } + } + + if (newFactorizedCount > 0) { + right = right.project({rightProj}); + } + } + else { + // Just adding all right columns without factorization + for (int i = 0; i < tabel2Columns[rightTable].size(); i++) { + std::string fName = tabel2Columns[rightTable][i]; + rightProj.push_back(fName); + joinCols.push_back(fName); + } + } + + tabel2Columns[joinId] = joinCols; + // Writing join + out = left.hashJoin( + {item["ProbeKeys"].asString()}, + {item["BuildKeys"].asString()}, + right.planNode(), + "", + {joinCols} + ); + + if (totalPushed > 1) { + // some features were pushed before join, they need to be aggregated + std::vector joinColsNew; + std::vector joinProj; + + bool isFirstVec = true; + std::string projString = ""; + for (int i = 0; i < joinCols.size(); i++) { + std::string fName = joinCols[i]; + FeatureStatus fs = allFeatureStatus[fName]; + if (fs.isFeature == 0 || fs.isPushed == 0) { + joinColsNew.push_back(fName); + joinProj.push_back(fName); + continue; + } + + if (isFirstVec == true) { + projString = fName; + isFirstVec = false; + continue; + } + + std::string newOpName = "vector_addition"; + std::string opName = newOpName + "(" + projString + ", " + fName + ")"; + projString = opName; + if (isAdditionRegistered == false) { + registerNNFunction(newOpName, emptyMatrix, numNeurons, -1); + isAdditionRegistered = true; + } + } + + std::string fNewName = "added_vec_" + std::to_string(addIdx); + addIdx += 1; + projString += " AS " + fNewName; + joinColsNew.push_back(fNewName); + joinProj.push_back(projString); + + FeatureStatus fs; + fs.isFeature = true; + fs.isPushed = 1; + fs.vectorSize = numNeurons; + allFeatureStatus[fNewName] = fs; + + out = out.project({joinProj}); + tabel2Columns[joinId] = joinColsNew; + } + // Finished Writing join + + outName = joinId; + sources[outName] = out; + + } + + planBuilder = sources[outName]; + + std::cout << "After the last join" << std::endl; + std::vector joinProj; + std::vector joinCols; + int newPushedCount = 0; + + // iterate over the final join projection output to compute not pushed feature + for (int i = 0; i < tabel2Columns[outName].size(); i++) { + std::string fName = tabel2Columns[outName][i]; + FeatureStatus fs = allFeatureStatus[fName]; + if (fs.isFeature == 1) { + if (fs.isPushed == 0) { + + // found a feature in the final join output which were not pushed + std::vector> subWeight = extractSubweight(firstWeight, fs.featureStartPos, fs.vectorSize); + std::string newOpName = firstOpName + "_" + std::to_string(k); + std::cout << fName << ", " << fs.featureStartPos << ", " << fs.vectorSize << ", " << subWeight.size() << ", " << subWeight[0].size() << std::endl; + registerNNFunction(newOpName, subWeight, fs.vectorSize, numNeurons); + std::string fNewName = "factorized_" + std::to_string(k); + std::string fNewNameFull = newOpName + "(" + fName + ") AS " + fNewName; + k += 1; + + FeatureStatus fs; + fs.isFeature = true; + fs.isPushed = 1; + fs.vectorSize = numNeurons; + allFeatureStatus[fNewName] = fs; + joinProj.push_back(fNewNameFull); + joinCols.push_back(fNewName); + newPushedCount += 1; + } + else { + joinProj.push_back(fName); + joinCols.push_back(fName); + } + } + } + + if (newPushedCount > 0) { + // final join output projection changed + planBuilder = planBuilder.project({joinProj}); + } + + // perform aggregation of the final join output + std::string projString = joinCols[0]; + for (int i = 1; i < joinCols.size(); i++) { + std::string newOpName = "vector_addition"; + std::string opName = newOpName + "(" + projString + ", " + joinCols[i] + ")"; + projString = opName; + if (isAdditionRegistered == false) { + registerNNFunction(newOpName, emptyMatrix, numNeurons, -1); + isAdditionRegistered = true; + } + } + projString += " AS features"; + + planBuilder = planBuilder.project({projString}); + std::cout << "Plan: " << planBuilder.planNode()->toString(true, true) << std::endl; + return true; + +} + + + +bool writeWithoutFactorization(PlanBuilder & planBuilder, std::shared_ptr planNodeIdGenerator, std::string joinOrderStr) { + + // Create a JSON reader and root object + Json::CharReaderBuilder readerBuilder; + Json::Value root; // Root will hold the parsed JSON array + std::string errors; + + // Parse the JSON string + std::istringstream stream(joinOrderStr); + if (!Json::parseFromStream(readerBuilder, stream, &root, &errors)) { + std::cerr << "Error parsing JSON: " << errors << "\n"; + return 1; + } + + std::unordered_map sources; //with filters and projections pushed down; + + std::string outName; + std::string leftName; + std::string rightName; + std::vector projections; + + std::string firstOpName = modelOperators[0]; + std::vector> firstWeight = operatorParam2Weights[firstOpName]; + int fCurrentTotal = 0; + int numCols = firstWeight.size(); + int numNeurons = firstWeight[0].size(); + int k = 0; + + // Iterate through the array + for (const auto& item : root) { + std::cout << "ID: " << item["ID"].asString() << "\n"; + std::cout << "Left: " << item["Left"].asString() << "\n"; + std::cout << "Right: " << item["Right"].asString() << "\n"; + std::cout << "Pred: " << item["Pred"].asString() << "\n"; + std::cout << "ProbeKeys: " << item["ProbeKeys"].asString() << "\n"; + std::cout << "BuildKeys: " << item["BuildKeys"].asString() << "\n"; + + std::string joinId = item["ID"].asString(); + std::string leftTable = item["Left"].asString(); + std::string rightTable = item["Right"].asString(); + std::string probKeys = item["ProbeKeys"].asString(); + std::string buildKeys = item["BuildKeys"].asString(); + int NumDimLeft = item["NumDimLeft"].asInt(); + int NumDimRight = item["NumDimRight"].asInt(); + + bool isLeftTableNotLeaf = isInteger(leftTable); + if (isLeftTableNotLeaf == false) { + // left table is leaf + fCurrentTotal += NumDimLeft; + + auto leftPlan = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector[leftTable]}) + .project({tabel2Columns[leftTable]}); + + sources[leftTable] = leftPlan; + + } + + bool isRightTableNotLeaf = isInteger(rightTable); + if (isRightTableNotLeaf == false) { + // right table is leaf + fCurrentTotal += NumDimRight; + + auto rightPlan = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector[rightTable]}) + .project({tabel2Columns[rightTable]}); + + sources[rightTable] = rightPlan; + + } + + + PlanBuilder left, right, out; + + //retrieve the corresponding PlanBuilder + if (sources.count(leftTable) > 0) { + + left = sources[leftTable]; + + } + + if (sources.count(rightTable) > 0) { + + right = sources[rightTable]; + + } + + //compose join + projections.clear(); + // Access Projection if it exists + if (item.isMember("Projection")) { + for (const auto& proj : item["Projection"]) { + std::cout << proj << std::endl; + projections.push_back(proj.asString()); + } + } + + std::cout << "Writing join" << std::endl; + out = left.hashJoin( + {item["ProbeKeys"].asString()}, + {item["BuildKeys"].asString()}, + right.planNode(), + "", + {projections} + ); + std::cout << "Finished Writing join" << std::endl; + + outName = joinId; + sources[outName] = out; + + } + planBuilder = sources[outName]; + + // After the final join + std::string projString=""; + bool isFirst = true; + for (std::string proj : projections) { + FeatureStatus fs = allFeatureStatus[proj]; + if (fs.isFeature == 1) { + if (isFirst) { + projString += proj; + isFirst = false; + } else { + projString += "," + proj; + } + } + } + projString = "concat(" + projString + ") as features"; + + planBuilder = planBuilder.project({projString}); + std::cout << "Plan: " << planBuilder.planNode()->toString(true, true) << std::endl; + return true; + +} + + + +bool createAndExecuteQuery(std::string queryJsonStr, bool withFactorization) { + PlanBuilder planBuilder{pool_.get()}; + std::shared_ptr planNodeIdGenerator = std::make_shared(); + if (withFactorization) { + // Rewriting with factorization + rewriteWithFactorization(planBuilder, planNodeIdGenerator, queryJsonStr); + + // Step 2. Add model inference to query plan + addModelInferenceToQueryPlanAfterFactorize(planBuilder, planNodeIdGenerator); + } + else { + // Writing without factorization + writeWithoutFactorization(planBuilder, planNodeIdGenerator, queryJsonStr); + + // Step 2. Add model inference to query plan + addModelInferenceToQueryPlan(planBuilder, planNodeIdGenerator); + } + + //std::cout << "Plan: " << planBuilder.planNode()->toString(true, true) << std::endl; + auto myPlan = planBuilder.planNode(); + std::cout << myPlan->toString(true, true) << std::endl; + std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); + auto results = exec::test::AssertQueryBuilder(myPlan).copyResults(pool_.get()); + std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); + std::cout << "Time (sec) = " << (std::chrono::duration_cast(end - begin).count()) /1000000.0 << std::endl; + std::cout << "Results Size: " << results->size() << std::endl; + std::cout << "Results:" << results->toString(0, 5) << std::endl; + //std::cout << results->toString(0, results->size()) << std::endl; + return true; + +} + + +}; + +int main(int argc, char** argv) { + + setlocale(LC_TIME, "C"); + + folly::init(&argc, &argv, false); + memory::MemoryManager::initialize({}); + + RewriteFactorized rewriteObj; + + // Load table1 + std::cout << "Reading Table 1 CSV file" << std::endl; + RowVectorPtr table1Vec = rewriteObj.getTableFromCSVFile(rewriteObj.maker, "resources/data/synthetic_dataset_4_3/table_1.csv", "table_1", "join_key1"); + + std::cout << "Reading Table 2 CSV file" << std::endl; + // Load table2 + RowVectorPtr table2Vec = rewriteObj.getTableFromCSVFile(rewriteObj.maker, "resources/data/synthetic_dataset_4_3/table_2.csv", "table_2", "join_key2"); + + // Load table3 + std::cout << "Reading Table 3 CSV file" << std::endl; + RowVectorPtr table3Vec = rewriteObj.getTableFromCSVFile(rewriteObj.maker, "resources/data/synthetic_dataset_4_3/table_3.csv", "table_3", "join_key3"); + + // Load table4 + std::cout << "Reading Table 4 CSV file" << std::endl; + RowVectorPtr table4Vec = rewriteObj.getTableFromCSVFile(rewriteObj.maker, "resources/data/synthetic_dataset_4_3/table_4.csv", "table_4", "join_key4"); + + + rewriteObj.tableName2RowVector["table_1"] = table1Vec; + rewriteObj.tableName2RowVector["table_2"] = table2Vec; + rewriteObj.tableName2RowVector["table_3"] = table3Vec; + rewriteObj.tableName2RowVector["table_4"] = table4Vec; + + + // retrieve the weights and set to map + std::cout << "Reading model parameters" << std::endl; + rewriteObj.findWeights("resources/model/dummy.h5"); + + // retrieve the model operators from model expression IR + std::string modelInput = "softmax3(mat_add3(mat_mul3(relu2(mat_add2(mat_mul2(relu1(mat_add1(mat_mul1(features)))))))))"; + std::cout << "Extracting model operators" << std::endl; + std::vector operators = rewriteObj.extractOperatorsInReverse(modelInput); + rewriteObj.modelOperators = operators; + + std::string jsonString = R"([ + {"ID":"0","Left":"table_1","Right":"table_2","Pred":"table_1.join_key1 = table_2.join_key2","ProbeKeys":"join_key1","BuildKeys":"join_key2","Projection":["join_key1","f_table_1","join_key2","f_table_2"],"NumTuplesLeft":1000,"NumDimLeft":20,"NumTuplesRight":300,"NumDimRight":39,"NumTuplesOutput":300,"NumDimOutput":59}, + {"ID":"1","Left":"0","Right":"table_3","Pred":"table_2.join_key2 = table_3.join_key3","ProbeKeys":"join_key2","BuildKeys":"join_key3","Projection":["join_key1","f_table_1","join_key2","f_table_2","join_key3","f_table_3"],"NumTuplesLeft":300,"NumDimLeft":59,"NumTuplesRight":12000,"NumDimRight":31,"NumTuplesOutput":12000,"NumDimOutput":90} + ])"; + + std::cout << "Reading factorization Plan" << std::endl; + rewriteObj.findFactorizationPlans("resources/plans/factorization_plan4_3.txt"); + + std::cout << "Performing rewriting" << std::endl; + bool ret = rewriteObj.createAndExecuteQuery(jsonString, true); + + return 1; +} + From 485b99633d6f9bed02b2a5e48265596577b760bc Mon Sep 17 00:00:00 2001 From: Kanchan Chowdhury <43951087+kanchanchy@users.noreply.github.com> Date: Mon, 16 Dec 2024 18:33:18 -0700 Subject: [PATCH 08/15] Added code for rewriting with factorization --- inferf/tests/RewriteFactorized.cpp | 1315 ++++++++++++++++++++++++++++ 1 file changed, 1315 insertions(+) create mode 100644 inferf/tests/RewriteFactorized.cpp diff --git a/inferf/tests/RewriteFactorized.cpp b/inferf/tests/RewriteFactorized.cpp new file mode 100644 index 000000000..35056e30c --- /dev/null +++ b/inferf/tests/RewriteFactorized.cpp @@ -0,0 +1,1315 @@ +#include +//#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "velox/type/Type.h" +#include "velox/exec/tests/utils/HiveConnectorTestBase.h" +#include "velox/functions/prestosql/aggregates/RegisterAggregateFunctions.h" +#include "velox/functions/prestosql/registration/RegistrationFunctions.h" +#include "velox/ml_functions/DecisionTree.h" +#include "velox/ml_functions/XGBoost.h" +#include "velox/ml_functions/tests/MLTestUtility.h" +#include "velox/parse/TypeResolver.h" +#include "velox/ml_functions/VeloxDecisionTree.h" +#include "velox/common/file/FileSystems.h" +#include "velox/dwio/dwrf/reader/DwrfReader.h" +#include "velox/dwio/parquet/RegisterParquetReader.h" +#include "velox/dwio/parquet/RegisterParquetWriter.h" +#include +#include "velox/connectors/hive/HiveConfig.h" +#include "velox/ml_functions/functions.h" +#include "velox/ml_functions/Concat.h" +#include "velox/ml_functions/NNBuilder.h" +#include +#include +#include "velox/ml_functions/VeloxDecisionTree.h" +#include "velox/expression/VectorFunction.h" +#include "velox/vector/ComplexVector.h" +#include "velox/vector/FlatVector.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace ml; +using namespace facebook::velox; +using namespace facebook::velox::test; +using namespace facebook::velox::exec; +using namespace facebook::velox::exec::test; +using namespace facebook::velox::core; + +/* + * The structure to describe the context of a neural network model architecture + */ +struct NNModelContext { + + int inputFeatures; + + int numLayers; + + int hiddenLayerNeurons; + + int outputLayerNeurons; + +}; + +/* + * The structure to describe the context of a decision tree model architecture + */ + +struct DTModelContext { + + int inputFeatures; + + int treeDepth; + +}; + + +/* + * The structure to describe the push down status of a feature + */ +struct FeatureStatus { + int isFeature; + + int isPushed; + + int vectorSize; + + int featureStartPos; + +}; + + +class VectorAddition : public MLFunction { + public: + VectorAddition(int inputDims) { + inputDims_ = inputDims; + } + + void apply( + const SelectivityVector& rows, + std::vector& args, + const TypePtr& type, + exec::EvalCtx& context, + VectorPtr& output) const override { + BaseVector::ensureWritable(rows, type, context.pool(), output); + + BaseVector* left = args[0].get(); + BaseVector* right = args[1].get(); + + exec::LocalDecodedVector leftHolder(context, *left, rows); + auto decodedLeftArray = leftHolder.get(); + auto baseLeftArray = + decodedLeftArray->base()->as()->elements(); + + exec::LocalDecodedVector rightHolder(context, *right, rows); + auto decodedRightArray = rightHolder.get(); + auto baseRightArray = rightHolder->base()->as()->elements(); + + float* input1Values = baseLeftArray->values()->asMutable(); + float* input2Values = baseRightArray->values()->asMutable(); + + int numInput = rows.size(); + + Eigen::Map< + Eigen::Matrix> + input1Matrix(input1Values, numInput, inputDims_); + Eigen::Map< + Eigen::Matrix> + input2Matrix(input2Values, numInput, inputDims_); + + std::vector> results; + + for (int i = 0; i < numInput; i++) { + //Eigen::Matrix vSum = input1Matrix.row(i) + input2Matrix.row(i); + Eigen::VectorXf vSum = input1Matrix.row(i) + input2Matrix.row(i); + std::vector curVec(vSum.data(), vSum.data() + vSum.size()); + //std::vector std_vector(vSum.data(), vSum.data() + vSum.size()); + results.push_back(curVec); + } + + VectorMaker maker{context.pool()}; + output = maker.arrayVector(results, REAL()); + } + + static std::vector> signatures() { + return {exec::FunctionSignatureBuilder() + .argumentType("array(REAL)") + .argumentType("array(REAL)") + .returnType("array(REAL)") + .build()}; + } + + static std::string getName() { + return "vector_addition"; + }; + + float* getTensor() const override { + // TODO: need to implement + return nullptr; + } + + CostEstimate getCost(std::vector inputDims) { + // TODO: need to implement + return CostEstimate(0, inputDims[0], inputDims[1]); + } + + private: + int inputDims_; +}; + + +class RewriteFactorized : HiveConnectorTestBase { + public: + RewriteFactorized() { + // Register Presto scalar functions. + functions::prestosql::registerAllScalarFunctions(); + // Register Presto aggregate functions. + aggregate::prestosql::registerAllAggregateFunctions(); + // Register type resolver with DuckDB SQL parser. + parse::registerTypeResolver(); + // HiveConnectorTestBase::SetUp(); + //parquet::registerParquetReaderFactory(); + + auto hiveConnector = + connector::getConnectorFactory( + connector::hive::HiveConnectorFactory::kHiveConnectorName) + ->newConnector(kHiveConnectorId, std::make_shared()); + connector::registerConnector(hiveConnector); + + // SetUp(); + + } + + ~RewriteFactorized() {} + + void SetUp() override { + // TODO: not used for now + // HiveConnectorTestBase::SetUp(); + // parquet::registerParquetReaderFactory(); + } + + void TearDown() override { + HiveConnectorTestBase::TearDown(); + } + + void TestBody() override { + } + + + static void waitForFinishedDrivers(const std::shared_ptr& task) { + + while (!task->isFinished()) { + + usleep(1000); // 0.01 second. + + } + } + + std::shared_ptr executor_{ + std::make_shared( + std::thread::hardware_concurrency())}; + + std::shared_ptr queryCtx_{ + std::make_shared(executor_.get())}; + + std::shared_ptr pool_{memory::MemoryManager::getInstance()->addLeafPool()}; + + VectorMaker maker{pool_.get()}; + + std::unordered_map tableName2RowVector; + std::unordered_map>> operatorParam2Weights; + std::unordered_map> tabel2Columns; + std::unordered_map allFeatureStatus; + std::vector modelOperators; + std::map factorizationPlans; + std::map featureStartPos; + int totalFeatures = 0; + + + + // Function to check if a string represents a valid integer + bool isInteger(const std::string& s) { + + if (s.empty() || (s.size() > 1 && s[0] == '0')) { + + return false; // prevent leading zeros for non-zero integers + + } + + for (char c : s) { + + if (!std::isdigit(c)) { + + return false; + } + } + + return true; + + } + + + + void findFactorizationPlans(std::string filePath) { + // Open the file + std::ifstream inputFile(filePath); + if (!inputFile.is_open()) { + std::cerr << "Failed to open the file: " << filePath << std::endl; + //return 1; + } + + // Map to store key-value pairs + std::map myMap; + + // Read the file line by line + std::string line; + while (std::getline(inputFile, line)) { + // Find the separator " = " to split the key and value + size_t separatorPos = line.find(" = "); + if (separatorPos != std::string::npos) { + // Extract the key and value + std::string key = line.substr(0, separatorPos); + int value = std::stoi(line.substr(separatorPos + 3)); // Convert value to int + + // Store in the map + myMap[key] = value; + } + } + + // Close the file + inputFile.close(); + + factorizationPlans = myMap; + } + + + + std::vector extractOperatorsInReverse(const std::string& input) { + std::vector operators; + std::regex operatorPattern(R"(([a-zA-Z_]+\d+)\()"); // Match patterns like mat_mul1(, mat_add2(, etc. + std::smatch match; + + std::string::const_iterator searchStart(input.cbegin()); + while (std::regex_search(searchStart, input.cend(), match, operatorPattern)) { + operators.push_back(match[1]); // Extract the operator name + searchStart = match.suffix().first; // Move the search start position + } + + // Reverse the order of operators to match actual execution order + std::reverse(operators.begin(), operators.end()); + return operators; + } + + + + std::vector> loadHDF5Array(const std::string& filename, const std::string& datasetName, int doPrint) { + H5::H5File file(filename, H5F_ACC_RDONLY); + H5::DataSet dataset = file.openDataSet(datasetName); + H5::DataSpace dataspace = dataset.getSpace(); + + // Get the number of dimensions + int rank = dataspace.getSimpleExtentNdims(); + // std::cout << "Rank: " << rank << std::endl; + + // Allocate space for the dimensions + std::vector dims(rank); + + // Get the dataset dimensions + dataspace.getSimpleExtentDims(dims.data(), nullptr); + + size_t rows; + size_t cols; + + if (rank == 1) { + rows = dims[0]; + cols = 1; + } + else if (rank == 2) { + rows = dims[0]; + cols = dims[1]; + } else { + throw std::runtime_error("Unsupported rank: " + std::to_string(rank)); + } + + // Read data into a 1D vector + std::vector flatData(rows * cols); + dataset.read(flatData.data(), H5::PredType::NATIVE_FLOAT); + + // Convert to 2D vector + std::vector> result(rows, std::vector(cols)); + for (size_t i = 0; i < rows; ++i) { + for (size_t j = 0; j < cols; ++j) { + result[i][j] = flatData[i * cols + j]; + if (doPrint == 1) + std::cout << result[i][j] << ", "; + } + if (doPrint == 1) + std::cout << std::endl; + } + + // Close the dataset and file + dataset.close(); + file.close(); + + return result; + } + + + + + void findWeights(std::string modelPath) { + // read the parameter weights from file + std::vector> w1 = loadHDF5Array(modelPath, "fc1.weight", 0); + std::vector> b1 = loadHDF5Array(modelPath, "fc1.bias", 0); + std::vector> w2 = loadHDF5Array(modelPath, "fc2.weight", 0); + std::vector> b2 = loadHDF5Array(modelPath, "fc2.bias", 0); + std::vector> w3 = loadHDF5Array(modelPath, "fc3.weight", 0); + std::vector> b3 = loadHDF5Array(modelPath, "fc3.bias", 0); + + // store the weights in map with same name as operator + operatorParam2Weights["mat_mul1"] = w1; + operatorParam2Weights["mat_add1"] = b1; + operatorParam2Weights["mat_mul2"] = w2; + operatorParam2Weights["mat_add2"] = b2; + operatorParam2Weights["mat_mul3"] = w3; + operatorParam2Weights["mat_add3"] = b3; + + std::cout << "Shape of mat_mul1 weight: " << w1.size() << ", " << w1[0].size() << std::endl; + } + + + + std::vector> extractSubweight(const std::vector>& matrix, int start, int n) { + std::vector> result; + + std::cout << "Extracting subweight" << std::endl; + std::cout << start << ", " << n << std::endl; + + // Ensure that the range [start, start + n) is within bounds + //int end = std::min(start + n, matrix.size()); + int end = start + n; + for (int i = start; i < end; ++i) { + result.push_back(matrix[i]); // Copy rows within the range + } + + return result; + } + + + + RowVectorPtr getTableFromCSVFile(VectorMaker & maker, std::string csvFilePath, std::string tableName, std::string joinKey) { + + std::ifstream file(csvFilePath.c_str()); + if (file.fail()) { + + std::cerr << "Error in reading data file:" << csvFilePath << std::endl; + exit(1); + + } + + std::string line; + + std::cout << tableName << std::endl; + + std::vector joinVec; + std::vector> featuresVec; + + // Ignore the first line (header) + if (std::getline(file, line)) { + std::cout << "Ignoring header: " << line << std::endl; + } + + int count = 0; + while (std::getline(file, line)) { + std::istringstream iss(line); + std::string numberStr; + std::vector features; + int colIndex = 0; + + while (std::getline(iss, numberStr, ',')) { + if (numberStr.size() >= 2 && numberStr.front() == '"' && numberStr.back() == '"') { + numberStr = numberStr.substr(1, numberStr.size() - 2); + } + + if (colIndex == 0) { + joinVec.push_back(std::stoi(numberStr)); + } + else { + features.push_back(std::stof(numberStr)); + } + colIndex += 1; + } + featuresVec.push_back(features); + count += 1; + } + file.close(); + + std::string featureCol = "f_" + tableName; + auto joinVector = maker.flatVector(joinVec); + auto featuresVector = maker.arrayVector(featuresVec, REAL()); + auto tableRowVector = maker.rowVector( + {joinKey, featureCol}, {joinVector, featuresVector} + ); + + std::vector cols = {joinKey, featureCol}; + tabel2Columns[tableName] = cols; + totalFeatures += featuresVec[0].size(); + + FeatureStatus fs1; + fs1.isFeature = 0; + allFeatureStatus[joinKey] = fs1; + + FeatureStatus fs2; + fs2.isFeature = 1; + fs2.isPushed = 0; + fs2.vectorSize = featuresVec[0].size(); + allFeatureStatus[featureCol] = fs2; + + return tableRowVector; + } + + + + +int sampleQuery() { + + return 29; + +} + +int sampleModel() { + + return 0; + +} + + +void sampleNNModelArch(int numInputFeatures, NNModelContext & nn) { + + nn.inputFeatures = numInputFeatures; + nn.numLayers = 3; + nn.hiddenLayerNeurons = 16; + nn.outputLayerNeurons = 2; + +} + +void sampleDTModelArch (int numInputFeatures, DTModelContext & dt) { + + dt.inputFeatures = numInputFeatures; + dt.treeDepth = 8; + +} + +bool replace(std::string& str, const std::string& from, const std::string& to) { + size_t start_pos = str.find(from); + if(start_pos == std::string::npos) + return false; + str.replace(start_pos, from.length(), to); + return true; +} + + + +void registerNNFunction(std::string op_name, std::vector> weightMatrice, int dim1, int dim2) { + + if (op_name.find("mat_mul") != std::string::npos) { + auto nnWeightVector = maker.arrayVector(weightMatrice, REAL()); + exec::registerVectorFunction( + op_name, + MatrixMultiply::signatures(), + std::make_unique( + std::move(nnWeightVector->elements()->values()->asMutable()), dim1, dim2) + ); + std::cout << "Registered a mat_mul function of name " << op_name << " with dimension " << dim1 << ", " << dim2 << endl; + } + + else if (op_name.find("mat_add") != std::string::npos) { + auto nnWeightVector = maker.arrayVector(weightMatrice, REAL()); + exec::registerVectorFunction( + op_name, + MatrixVectorAddition::signatures(), + std::make_unique( + std::move(nnWeightVector->elements()->values()->asMutable()), dim1) + ); + std::cout << "Registered a mat_add function of name " << op_name << " with dimension " << dim1 << endl; + } + + else if (op_name.find("relu") != std::string::npos) { + exec::registerVectorFunction( + op_name, Relu::signatures(), std::make_unique(), + {}, + true); + std::cout << "Registered a relu function of name " << op_name << endl; + } + + else if (op_name.find("softmax") != std::string::npos) { + exec::registerVectorFunction( + op_name, Softmax::signatures(), std::make_unique()); + std::cout << "Registered a softmax function of name " << op_name << endl; + } + + else if (op_name.find("vector_addition") != std::string::npos) { + exec::registerVectorFunction( + op_name, + VectorAddition::signatures(), + std::make_unique(dim1) + ); + std::cout << "Registered a vector_addition function of name " << op_name << " with dimension " << dim1 << endl; + } + + +} + + + + + +float * genWeight(int dim1, int dim2) { + + int total_size = dim1 * dim2; + + //generate weight matrix + float * weight = new float[total_size]; + for (int i = 0; i < total_size; i++) { + if (i % 2 == 0) { + weight[i] = 1.0; + } else { + weight[i] = 0.0; + } + } + return weight; +} + +bool addModelInferenceToQueryPlanAfterFactorize(PlanBuilder & planBuilder, std::shared_ptr planNodeIdGenerator) { + + std::string modelProjString = ""; + std::vector> emptyMatrix; + for (int i = modelOperators.size() - 1; i > 0; i--) { + std::string opName = modelOperators[i]; + if (opName.find("mat_mul") != std::string::npos) { + std::vector> param = operatorParam2Weights[opName]; + int dim1 = param.size(); + int dim2 = param[0].size(); + registerNNFunction(opName, param, dim1, dim2); + } + else if (opName.find("mat_add") != std::string::npos) { + std::vector> param = operatorParam2Weights[opName]; + int dim1 = param.size(); + registerNNFunction(opName, param, dim1, -1); + } + else { + registerNNFunction(opName, emptyMatrix, -1, -1); + } + modelProjString += opName + "("; + } + modelProjString += "features"; + + for (int i = modelOperators.size() - 1; i > 0; i--) { + modelProjString += ")"; + } + modelProjString += " AS output"; + + std::cout << "Inference Part: " << modelProjString << endl; + planBuilder.project({modelProjString}); + + return true; + +} + +bool addModelInferenceToQueryPlan(PlanBuilder & planBuilder, std::shared_ptr planNodeIdGenerator) { + + std::string modelProjString = ""; + std::vector> emptyMatrix; + for (int i = modelOperators.size() - 1; i >= 0; i--) { + std::string opName = modelOperators[i]; + if (opName.find("mat_mul") != std::string::npos) { + std::vector> param = operatorParam2Weights[opName]; + int dim1 = param.size(); + int dim2 = param[0].size(); + registerNNFunction(opName, param, dim1, dim2); + } + else if (opName.find("mat_add") != std::string::npos) { + std::vector> param = operatorParam2Weights[opName]; + int dim1 = param.size(); + registerNNFunction(opName, param, dim1, -1); + } + else { + registerNNFunction(opName, emptyMatrix, -1, -1); + } + modelProjString += opName + "("; + } + modelProjString += "features"; + + for (int i = modelOperators.size() - 1; i >= 0; i--) { + modelProjString += ")"; + } + modelProjString += " AS output"; + + std::cout << "Inference Part: " << modelProjString << endl; + planBuilder.project({modelProjString}); + + return true; + +} + + +bool rewriteWithFactorization(PlanBuilder & planBuilder, std::shared_ptr planNodeIdGenerator, std::string joinOrderStr) { + + // Create a JSON reader and root object + Json::CharReaderBuilder readerBuilder; + Json::Value root; // Root will hold the parsed JSON array + std::string errors; + + // Parse the JSON string + std::istringstream stream(joinOrderStr); + if (!Json::parseFromStream(readerBuilder, stream, &root, &errors)) { + std::cerr << "Error parsing JSON: " << errors << "\n"; + return 1; + } + //JSON Reading successful" + + std::unordered_map sources; //with filters and projections pushed down; + + std::string outName; + std::string leftName; + std::string rightName; + + std::string firstOpName = modelOperators[0]; // name of operator of split layer + std::vector> firstWeight = operatorParam2Weights[firstOpName]; // weight of the split layer + int fCurrentTotal = 0; + int numCols = firstWeight.size(); // number of columns in split layer + int numNeurons = firstWeight[0].size(); // number of neurons in split layer + int k = 0; + int addIdx = 0; + + std::vector> emptyMatrix; + bool isAdditionRegistered = false; + + // Iterate through the array + for (const auto& item : root) { + std::cout << "ID: " << item["ID"].asString() << "\n"; + std::cout << "Left: " << item["Left"].asString() << "\n"; + std::cout << "Right: " << item["Right"].asString() << "\n"; + std::cout << "Pred: " << item["Pred"].asString() << "\n"; + std::cout << "ProbeKeys: " << item["ProbeKeys"].asString() << "\n"; + std::cout << "BuildKeys: " << item["BuildKeys"].asString() << "\n"; + + std::string joinId = item["ID"].asString(); + std::string leftTable = item["Left"].asString(); + std::string rightTable = item["Right"].asString(); + std::string probKeys = item["ProbeKeys"].asString(); + std::string buildKeys = item["BuildKeys"].asString(); + int NumDimLeft = item["NumDimLeft"].asInt(); + int NumDimRight = item["NumDimRight"].asInt(); + + std::string leftFactorizationKey = leftTable + "--->" + joinId; + std::string rightFactorizationKey = rightTable + "--->" + joinId; + + std::cout << "left factorization key: " << leftFactorizationKey << std::endl; + std::cout << "right factorization key: " << leftFactorizationKey << std::endl; + + bool isLeftTableNotLeaf = isInteger(leftTable); + + if (isLeftTableNotLeaf == false) { + // left table is leaf + + featureStartPos[leftTable] = fCurrentTotal; + fCurrentTotal += NumDimLeft; + + std::string fName = tabel2Columns[leftTable][1]; + + if (factorizationPlans[leftFactorizationKey] == 1) { + // Doing factorization of left edge + std::vector> subWeight = extractSubweight(firstWeight, featureStartPos[leftTable], NumDimLeft); + std::string newOpName = firstOpName + "_" + std::to_string(k); + registerNNFunction(newOpName, subWeight, NumDimLeft, numNeurons); + std::string fNewName = "factorized_" + std::to_string(k); + std::string fNewNameFull = newOpName + "(" + fName + ") AS " + fNewName; + tabel2Columns[leftTable][1] = fNewName; + k += 1; + + FeatureStatus fs; + fs.isFeature = 1; + fs.isPushed = 1; + fs.vectorSize = numNeurons; + allFeatureStatus[fNewName] = fs; + + auto leftPlan = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector[leftTable]}) + .project({tabel2Columns[leftTable][0], fNewNameFull}); + + sources[leftTable] = leftPlan; + } + else { + // Not Doing factorization of left edge + FeatureStatus fs = allFeatureStatus[fName]; + fs.featureStartPos = featureStartPos[leftTable]; + allFeatureStatus[fName] = fs; + + auto leftPlan = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector[leftTable]}) + .project({tabel2Columns[leftTable]}); + + sources[leftTable] = leftPlan; + } + + } + + bool isRightTableNotLeaf = isInteger(rightTable); + if (isRightTableNotLeaf == false) { + // right table is leaf + + featureStartPos[rightTable] = fCurrentTotal; + fCurrentTotal += NumDimRight; + + std::string fName = tabel2Columns[rightTable][1]; + + if (factorizationPlans[rightFactorizationKey] == 1) { + // Doing factorization of right edge + std::vector> subWeight = extractSubweight(firstWeight, featureStartPos[rightTable], NumDimRight); + std::string newOpName = firstOpName + "_" + std::to_string(k); + registerNNFunction(newOpName, subWeight, NumDimRight, numNeurons); + std::string fNewName = "factorized_" + std::to_string(k); + std::string fNewNameFull = newOpName + "(" + fName + ") AS " + fNewName; + tabel2Columns[rightTable][1] = fNewName; + k += 1; + + FeatureStatus fs; + fs.isFeature = 1; + fs.isPushed = 1; + fs.vectorSize = numNeurons; + allFeatureStatus[fNewName] = fs; + + auto rightPlan = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector[rightTable]}) + .project({tabel2Columns[rightTable][0], fNewNameFull}); + + sources[rightTable] = rightPlan; + } + else { + // Not Doing factorization of right edge + FeatureStatus fs = allFeatureStatus[fName]; + fs.featureStartPos = featureStartPos[rightTable]; + allFeatureStatus[fName] = fs; + + auto rightPlan = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector[rightTable]}) + .project({tabel2Columns[rightTable]}); + + sources[rightTable] = rightPlan; + } + + } + + featureStartPos[joinId] = featureStartPos[leftTable]; + + + PlanBuilder left, right, out; + + if (sources.count(leftTable) > 0) { + + left = sources[leftTable]; + + } + + if (sources.count(rightTable) > 0) { + + right = sources[rightTable]; + + } + + //compose join features + std::vector leftProj; + std::vector joinCols; + int totalPushed = 0; + + if (factorizationPlans[leftFactorizationKey] == 1) { + // Factorizing all left features that were not pushed earlier + int newFactorizedCount = 0; + for (int i = 0; i < tabel2Columns[leftTable].size(); i++) { + std::string fName = tabel2Columns[leftTable][i]; + if (allFeatureStatus.count(fName) <= 0) { + std::cout << "Feature status not found for feature " << fName << std::endl; + continue; + } + FeatureStatus fs = allFeatureStatus[fName]; + if (fs.isFeature == 1 && fs.isPushed == 0) { + + // Feature not pushed earlier, pushing now + std::vector> subWeight = extractSubweight(firstWeight, fs.featureStartPos, fs.vectorSize); + std::string newOpName = firstOpName + "_" + std::to_string(k); + registerNNFunction(newOpName, subWeight, fs.vectorSize, numNeurons); + std::string fNewName = "factorized_" + std::to_string(k); + std::string fNewNameFull = newOpName + "(" + fName + ") AS " + fNewName; + k += 1; + + FeatureStatus fs; + fs.isFeature = true; + fs.isPushed = 1; + fs.vectorSize = numNeurons; + allFeatureStatus[fNewName] = fs; + leftProj.push_back(fNewNameFull); + joinCols.push_back(fNewName); + newFactorizedCount += 1; + totalPushed += 1; + } + else { + // Feature not a feature or pushed earlier + leftProj.push_back(fName); + joinCols.push_back(fName); + if (fs.isFeature == 1) { + // Feature pushed earlier + totalPushed += 1; + } + } + } + + if (newFactorizedCount > 0) { + left = left.project({leftProj}); + } + } + else { + // Just adding all left columns without factorization + for (int i = 0; i < tabel2Columns[leftTable].size(); i++) { + std::string fName = tabel2Columns[leftTable][i]; + leftProj.push_back(fName); + joinCols.push_back(fName); + } + } + + + std::vector rightProj; + + if (factorizationPlans[rightFactorizationKey] == 1) { + // Factorizing all right features that were not pushed earlier + int newFactorizedCount = 0; + for (int i = 0; i < tabel2Columns[rightTable].size(); i++) { + std::string fName = tabel2Columns[rightTable][i]; + if (allFeatureStatus.count(fName) <= 0) { + std::cout << "Feature status not found for feature " << fName << std::endl; + continue; + } + FeatureStatus fs = allFeatureStatus[fName]; + if (fs.isFeature == 1 && fs.isPushed == 0) { + + // Feature not pushed earlier, pushing now + std::vector> subWeight = extractSubweight(firstWeight, fs.featureStartPos, fs.vectorSize); + std::string newOpName = firstOpName + "_" + std::to_string(k); + registerNNFunction(newOpName, subWeight, fs.vectorSize, numNeurons); + std::string fNewName = "factorized_" + std::to_string(k); + std::string fNewNameFull = newOpName + "(" + fName + ") AS " + fNewName; + k += 1; + + FeatureStatus fs; + fs.isFeature = true; + fs.isPushed = 1; + fs.vectorSize = numNeurons; + allFeatureStatus[fNewName] = fs; + rightProj.push_back(fNewNameFull); + joinCols.push_back(fNewName); + newFactorizedCount += 1; + totalPushed += 1; + } + else { + // Feature not a feature or pushed earlier + rightProj.push_back(fName); + joinCols.push_back(fName); + if (fs.isFeature == 1) { + // Feature pushed earlier + totalPushed += 1; + } + } + } + + if (newFactorizedCount > 0) { + right = right.project({rightProj}); + } + } + else { + // Just adding all right columns without factorization + for (int i = 0; i < tabel2Columns[rightTable].size(); i++) { + std::string fName = tabel2Columns[rightTable][i]; + rightProj.push_back(fName); + joinCols.push_back(fName); + } + } + + tabel2Columns[joinId] = joinCols; + // Writing join + out = left.hashJoin( + {item["ProbeKeys"].asString()}, + {item["BuildKeys"].asString()}, + right.planNode(), + "", + {joinCols} + ); + + if (totalPushed > 1) { + // some features were pushed before join, they need to be aggregated + std::vector joinColsNew; + std::vector joinProj; + + bool isFirstVec = true; + std::string projString = ""; + for (int i = 0; i < joinCols.size(); i++) { + std::string fName = joinCols[i]; + FeatureStatus fs = allFeatureStatus[fName]; + if (fs.isFeature == 0 || fs.isPushed == 0) { + joinColsNew.push_back(fName); + joinProj.push_back(fName); + continue; + } + + if (isFirstVec == true) { + projString = fName; + isFirstVec = false; + continue; + } + + std::string newOpName = "vector_addition"; + std::string opName = newOpName + "(" + projString + ", " + fName + ")"; + projString = opName; + if (isAdditionRegistered == false) { + registerNNFunction(newOpName, emptyMatrix, numNeurons, -1); + isAdditionRegistered = true; + } + } + + std::string fNewName = "added_vec_" + std::to_string(addIdx); + addIdx += 1; + projString += " AS " + fNewName; + joinColsNew.push_back(fNewName); + joinProj.push_back(projString); + + FeatureStatus fs; + fs.isFeature = true; + fs.isPushed = 1; + fs.vectorSize = numNeurons; + allFeatureStatus[fNewName] = fs; + + out = out.project({joinProj}); + tabel2Columns[joinId] = joinColsNew; + } + // Finished Writing join + + outName = joinId; + sources[outName] = out; + + } + + planBuilder = sources[outName]; + + std::cout << "After the last join" << std::endl; + std::vector joinProj; + std::vector joinCols; + int newPushedCount = 0; + + // iterate over the final join projection output to compute not pushed feature + for (int i = 0; i < tabel2Columns[outName].size(); i++) { + std::string fName = tabel2Columns[outName][i]; + FeatureStatus fs = allFeatureStatus[fName]; + if (fs.isFeature == 1) { + if (fs.isPushed == 0) { + + // found a feature in the final join output which were not pushed + std::vector> subWeight = extractSubweight(firstWeight, fs.featureStartPos, fs.vectorSize); + std::string newOpName = firstOpName + "_" + std::to_string(k); + std::cout << fName << ", " << fs.featureStartPos << ", " << fs.vectorSize << ", " << subWeight.size() << ", " << subWeight[0].size() << std::endl; + registerNNFunction(newOpName, subWeight, fs.vectorSize, numNeurons); + std::string fNewName = "factorized_" + std::to_string(k); + std::string fNewNameFull = newOpName + "(" + fName + ") AS " + fNewName; + k += 1; + + FeatureStatus fs; + fs.isFeature = true; + fs.isPushed = 1; + fs.vectorSize = numNeurons; + allFeatureStatus[fNewName] = fs; + joinProj.push_back(fNewNameFull); + joinCols.push_back(fNewName); + newPushedCount += 1; + } + else { + joinProj.push_back(fName); + joinCols.push_back(fName); + } + } + } + + if (newPushedCount > 0) { + // final join output projection changed + planBuilder = planBuilder.project({joinProj}); + } + + // perform aggregation of the final join output + std::string projString = joinCols[0]; + for (int i = 1; i < joinCols.size(); i++) { + std::string newOpName = "vector_addition"; + std::string opName = newOpName + "(" + projString + ", " + joinCols[i] + ")"; + projString = opName; + if (isAdditionRegistered == false) { + registerNNFunction(newOpName, emptyMatrix, numNeurons, -1); + isAdditionRegistered = true; + } + } + projString += " AS features"; + + planBuilder = planBuilder.project({projString}); + std::cout << "Plan: " << planBuilder.planNode()->toString(true, true) << std::endl; + return true; + +} + + + +bool writeWithoutFactorization(PlanBuilder & planBuilder, std::shared_ptr planNodeIdGenerator, std::string joinOrderStr) { + + // Create a JSON reader and root object + Json::CharReaderBuilder readerBuilder; + Json::Value root; // Root will hold the parsed JSON array + std::string errors; + + // Parse the JSON string + std::istringstream stream(joinOrderStr); + if (!Json::parseFromStream(readerBuilder, stream, &root, &errors)) { + std::cerr << "Error parsing JSON: " << errors << "\n"; + return 1; + } + + std::unordered_map sources; //with filters and projections pushed down; + + std::string outName; + std::string leftName; + std::string rightName; + std::vector projections; + + std::string firstOpName = modelOperators[0]; + std::vector> firstWeight = operatorParam2Weights[firstOpName]; + int fCurrentTotal = 0; + int numCols = firstWeight.size(); + int numNeurons = firstWeight[0].size(); + int k = 0; + + // Iterate through the array + for (const auto& item : root) { + std::cout << "ID: " << item["ID"].asString() << "\n"; + std::cout << "Left: " << item["Left"].asString() << "\n"; + std::cout << "Right: " << item["Right"].asString() << "\n"; + std::cout << "Pred: " << item["Pred"].asString() << "\n"; + std::cout << "ProbeKeys: " << item["ProbeKeys"].asString() << "\n"; + std::cout << "BuildKeys: " << item["BuildKeys"].asString() << "\n"; + + std::string joinId = item["ID"].asString(); + std::string leftTable = item["Left"].asString(); + std::string rightTable = item["Right"].asString(); + std::string probKeys = item["ProbeKeys"].asString(); + std::string buildKeys = item["BuildKeys"].asString(); + int NumDimLeft = item["NumDimLeft"].asInt(); + int NumDimRight = item["NumDimRight"].asInt(); + + bool isLeftTableNotLeaf = isInteger(leftTable); + if (isLeftTableNotLeaf == false) { + // left table is leaf + fCurrentTotal += NumDimLeft; + + auto leftPlan = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector[leftTable]}) + .project({tabel2Columns[leftTable]}); + + sources[leftTable] = leftPlan; + + } + + bool isRightTableNotLeaf = isInteger(rightTable); + if (isRightTableNotLeaf == false) { + // right table is leaf + fCurrentTotal += NumDimRight; + + auto rightPlan = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector[rightTable]}) + .project({tabel2Columns[rightTable]}); + + sources[rightTable] = rightPlan; + + } + + + PlanBuilder left, right, out; + + //retrieve the corresponding PlanBuilder + if (sources.count(leftTable) > 0) { + + left = sources[leftTable]; + + } + + if (sources.count(rightTable) > 0) { + + right = sources[rightTable]; + + } + + //compose join + projections.clear(); + // Access Projection if it exists + if (item.isMember("Projection")) { + for (const auto& proj : item["Projection"]) { + std::cout << proj << std::endl; + projections.push_back(proj.asString()); + } + } + + std::cout << "Writing join" << std::endl; + out = left.hashJoin( + {item["ProbeKeys"].asString()}, + {item["BuildKeys"].asString()}, + right.planNode(), + "", + {projections} + ); + std::cout << "Finished Writing join" << std::endl; + + outName = joinId; + sources[outName] = out; + + } + planBuilder = sources[outName]; + + // After the final join + std::string projString=""; + bool isFirst = true; + for (std::string proj : projections) { + FeatureStatus fs = allFeatureStatus[proj]; + if (fs.isFeature == 1) { + if (isFirst) { + projString += proj; + isFirst = false; + } else { + projString += "," + proj; + } + } + } + projString = "concat(" + projString + ") as features"; + + planBuilder = planBuilder.project({projString}); + std::cout << "Plan: " << planBuilder.planNode()->toString(true, true) << std::endl; + return true; + +} + + + +bool createAndExecuteQuery(std::string queryJsonStr, bool withFactorization) { + PlanBuilder planBuilder{pool_.get()}; + std::shared_ptr planNodeIdGenerator = std::make_shared(); + if (withFactorization) { + // Rewriting with factorization + rewriteWithFactorization(planBuilder, planNodeIdGenerator, queryJsonStr); + + // Step 2. Add model inference to query plan + addModelInferenceToQueryPlanAfterFactorize(planBuilder, planNodeIdGenerator); + } + else { + // Writing without factorization + writeWithoutFactorization(planBuilder, planNodeIdGenerator, queryJsonStr); + + // Step 2. Add model inference to query plan + addModelInferenceToQueryPlan(planBuilder, planNodeIdGenerator); + } + + //std::cout << "Plan: " << planBuilder.planNode()->toString(true, true) << std::endl; + auto myPlan = planBuilder.planNode(); + std::cout << myPlan->toString(true, true) << std::endl; + std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); + auto results = exec::test::AssertQueryBuilder(myPlan).copyResults(pool_.get()); + std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); + std::cout << "Time (sec) = " << (std::chrono::duration_cast(end - begin).count()) /1000000.0 << std::endl; + std::cout << "Results Size: " << results->size() << std::endl; + std::cout << "Results:" << results->toString(0, 5) << std::endl; + //std::cout << results->toString(0, results->size()) << std::endl; + return true; + +} + + +}; + +int main(int argc, char** argv) { + + setlocale(LC_TIME, "C"); + + folly::init(&argc, &argv, false); + memory::MemoryManager::initialize({}); + + RewriteFactorized rewriteObj; + + // Load table1 + std::cout << "Reading Table 1 CSV file" << std::endl; + RowVectorPtr table1Vec = rewriteObj.getTableFromCSVFile(rewriteObj.maker, "resources/data/synthetic_dataset_4_3/table_1.csv", "table_1", "join_key1"); + + std::cout << "Reading Table 2 CSV file" << std::endl; + // Load table2 + RowVectorPtr table2Vec = rewriteObj.getTableFromCSVFile(rewriteObj.maker, "resources/data/synthetic_dataset_4_3/table_2.csv", "table_2", "join_key2"); + + // Load table3 + std::cout << "Reading Table 3 CSV file" << std::endl; + RowVectorPtr table3Vec = rewriteObj.getTableFromCSVFile(rewriteObj.maker, "resources/data/synthetic_dataset_4_3/table_3.csv", "table_3", "join_key3"); + + // Load table4 + std::cout << "Reading Table 4 CSV file" << std::endl; + RowVectorPtr table4Vec = rewriteObj.getTableFromCSVFile(rewriteObj.maker, "resources/data/synthetic_dataset_4_3/table_4.csv", "table_4", "join_key4"); + + + rewriteObj.tableName2RowVector["table_1"] = table1Vec; + rewriteObj.tableName2RowVector["table_2"] = table2Vec; + rewriteObj.tableName2RowVector["table_3"] = table3Vec; + rewriteObj.tableName2RowVector["table_4"] = table4Vec; + + + // retrieve the weights and set to map + std::cout << "Reading model parameters" << std::endl; + rewriteObj.findWeights("resources/model/dummy.h5"); + + // retrieve the model operators from model expression IR + std::string modelInput = "softmax3(mat_add3(mat_mul3(relu2(mat_add2(mat_mul2(relu1(mat_add1(mat_mul1(features)))))))))"; + std::cout << "Extracting model operators" << std::endl; + std::vector operators = rewriteObj.extractOperatorsInReverse(modelInput); + rewriteObj.modelOperators = operators; + + std::string jsonString = R"([ + {"ID":"0","Left":"table_1","Right":"table_2","Pred":"table_1.join_key1 = table_2.join_key2","ProbeKeys":"join_key1","BuildKeys":"join_key2","Projection":["join_key1","f_table_1","join_key2","f_table_2"],"NumTuplesLeft":1000,"NumDimLeft":20,"NumTuplesRight":300,"NumDimRight":39,"NumTuplesOutput":300,"NumDimOutput":59}, + {"ID":"1","Left":"0","Right":"table_3","Pred":"table_2.join_key2 = table_3.join_key3","ProbeKeys":"join_key2","BuildKeys":"join_key3","Projection":["join_key1","f_table_1","join_key2","f_table_2","join_key3","f_table_3"],"NumTuplesLeft":300,"NumDimLeft":59,"NumTuplesRight":12000,"NumDimRight":31,"NumTuplesOutput":12000,"NumDimOutput":90} + ])"; + + std::cout << "Reading factorization Plan" << std::endl; + rewriteObj.findFactorizationPlans("resources/plans/factorization_plan4_3.txt"); + + std::cout << "Performing rewriting" << std::endl; + bool ret = rewriteObj.createAndExecuteQuery(jsonString, true); + + return 1; +} + From 1614f1314bbda0e5b8d83d2f645b43b392092a3b Mon Sep 17 00:00:00 2001 From: Kanchan Chowdhury <43951087+kanchanchy@users.noreply.github.com> Date: Mon, 16 Dec 2024 18:33:43 -0700 Subject: [PATCH 09/15] Delete inferf/RewriteFactorized.cpp --- inferf/RewriteFactorized.cpp | 1315 ---------------------------------- 1 file changed, 1315 deletions(-) delete mode 100644 inferf/RewriteFactorized.cpp diff --git a/inferf/RewriteFactorized.cpp b/inferf/RewriteFactorized.cpp deleted file mode 100644 index 35056e30c..000000000 --- a/inferf/RewriteFactorized.cpp +++ /dev/null @@ -1,1315 +0,0 @@ -#include -//#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "velox/type/Type.h" -#include "velox/exec/tests/utils/HiveConnectorTestBase.h" -#include "velox/functions/prestosql/aggregates/RegisterAggregateFunctions.h" -#include "velox/functions/prestosql/registration/RegistrationFunctions.h" -#include "velox/ml_functions/DecisionTree.h" -#include "velox/ml_functions/XGBoost.h" -#include "velox/ml_functions/tests/MLTestUtility.h" -#include "velox/parse/TypeResolver.h" -#include "velox/ml_functions/VeloxDecisionTree.h" -#include "velox/common/file/FileSystems.h" -#include "velox/dwio/dwrf/reader/DwrfReader.h" -#include "velox/dwio/parquet/RegisterParquetReader.h" -#include "velox/dwio/parquet/RegisterParquetWriter.h" -#include -#include "velox/connectors/hive/HiveConfig.h" -#include "velox/ml_functions/functions.h" -#include "velox/ml_functions/Concat.h" -#include "velox/ml_functions/NNBuilder.h" -#include -#include -#include "velox/ml_functions/VeloxDecisionTree.h" -#include "velox/expression/VectorFunction.h" -#include "velox/vector/ComplexVector.h" -#include "velox/vector/FlatVector.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace std; -using namespace ml; -using namespace facebook::velox; -using namespace facebook::velox::test; -using namespace facebook::velox::exec; -using namespace facebook::velox::exec::test; -using namespace facebook::velox::core; - -/* - * The structure to describe the context of a neural network model architecture - */ -struct NNModelContext { - - int inputFeatures; - - int numLayers; - - int hiddenLayerNeurons; - - int outputLayerNeurons; - -}; - -/* - * The structure to describe the context of a decision tree model architecture - */ - -struct DTModelContext { - - int inputFeatures; - - int treeDepth; - -}; - - -/* - * The structure to describe the push down status of a feature - */ -struct FeatureStatus { - int isFeature; - - int isPushed; - - int vectorSize; - - int featureStartPos; - -}; - - -class VectorAddition : public MLFunction { - public: - VectorAddition(int inputDims) { - inputDims_ = inputDims; - } - - void apply( - const SelectivityVector& rows, - std::vector& args, - const TypePtr& type, - exec::EvalCtx& context, - VectorPtr& output) const override { - BaseVector::ensureWritable(rows, type, context.pool(), output); - - BaseVector* left = args[0].get(); - BaseVector* right = args[1].get(); - - exec::LocalDecodedVector leftHolder(context, *left, rows); - auto decodedLeftArray = leftHolder.get(); - auto baseLeftArray = - decodedLeftArray->base()->as()->elements(); - - exec::LocalDecodedVector rightHolder(context, *right, rows); - auto decodedRightArray = rightHolder.get(); - auto baseRightArray = rightHolder->base()->as()->elements(); - - float* input1Values = baseLeftArray->values()->asMutable(); - float* input2Values = baseRightArray->values()->asMutable(); - - int numInput = rows.size(); - - Eigen::Map< - Eigen::Matrix> - input1Matrix(input1Values, numInput, inputDims_); - Eigen::Map< - Eigen::Matrix> - input2Matrix(input2Values, numInput, inputDims_); - - std::vector> results; - - for (int i = 0; i < numInput; i++) { - //Eigen::Matrix vSum = input1Matrix.row(i) + input2Matrix.row(i); - Eigen::VectorXf vSum = input1Matrix.row(i) + input2Matrix.row(i); - std::vector curVec(vSum.data(), vSum.data() + vSum.size()); - //std::vector std_vector(vSum.data(), vSum.data() + vSum.size()); - results.push_back(curVec); - } - - VectorMaker maker{context.pool()}; - output = maker.arrayVector(results, REAL()); - } - - static std::vector> signatures() { - return {exec::FunctionSignatureBuilder() - .argumentType("array(REAL)") - .argumentType("array(REAL)") - .returnType("array(REAL)") - .build()}; - } - - static std::string getName() { - return "vector_addition"; - }; - - float* getTensor() const override { - // TODO: need to implement - return nullptr; - } - - CostEstimate getCost(std::vector inputDims) { - // TODO: need to implement - return CostEstimate(0, inputDims[0], inputDims[1]); - } - - private: - int inputDims_; -}; - - -class RewriteFactorized : HiveConnectorTestBase { - public: - RewriteFactorized() { - // Register Presto scalar functions. - functions::prestosql::registerAllScalarFunctions(); - // Register Presto aggregate functions. - aggregate::prestosql::registerAllAggregateFunctions(); - // Register type resolver with DuckDB SQL parser. - parse::registerTypeResolver(); - // HiveConnectorTestBase::SetUp(); - //parquet::registerParquetReaderFactory(); - - auto hiveConnector = - connector::getConnectorFactory( - connector::hive::HiveConnectorFactory::kHiveConnectorName) - ->newConnector(kHiveConnectorId, std::make_shared()); - connector::registerConnector(hiveConnector); - - // SetUp(); - - } - - ~RewriteFactorized() {} - - void SetUp() override { - // TODO: not used for now - // HiveConnectorTestBase::SetUp(); - // parquet::registerParquetReaderFactory(); - } - - void TearDown() override { - HiveConnectorTestBase::TearDown(); - } - - void TestBody() override { - } - - - static void waitForFinishedDrivers(const std::shared_ptr& task) { - - while (!task->isFinished()) { - - usleep(1000); // 0.01 second. - - } - } - - std::shared_ptr executor_{ - std::make_shared( - std::thread::hardware_concurrency())}; - - std::shared_ptr queryCtx_{ - std::make_shared(executor_.get())}; - - std::shared_ptr pool_{memory::MemoryManager::getInstance()->addLeafPool()}; - - VectorMaker maker{pool_.get()}; - - std::unordered_map tableName2RowVector; - std::unordered_map>> operatorParam2Weights; - std::unordered_map> tabel2Columns; - std::unordered_map allFeatureStatus; - std::vector modelOperators; - std::map factorizationPlans; - std::map featureStartPos; - int totalFeatures = 0; - - - - // Function to check if a string represents a valid integer - bool isInteger(const std::string& s) { - - if (s.empty() || (s.size() > 1 && s[0] == '0')) { - - return false; // prevent leading zeros for non-zero integers - - } - - for (char c : s) { - - if (!std::isdigit(c)) { - - return false; - } - } - - return true; - - } - - - - void findFactorizationPlans(std::string filePath) { - // Open the file - std::ifstream inputFile(filePath); - if (!inputFile.is_open()) { - std::cerr << "Failed to open the file: " << filePath << std::endl; - //return 1; - } - - // Map to store key-value pairs - std::map myMap; - - // Read the file line by line - std::string line; - while (std::getline(inputFile, line)) { - // Find the separator " = " to split the key and value - size_t separatorPos = line.find(" = "); - if (separatorPos != std::string::npos) { - // Extract the key and value - std::string key = line.substr(0, separatorPos); - int value = std::stoi(line.substr(separatorPos + 3)); // Convert value to int - - // Store in the map - myMap[key] = value; - } - } - - // Close the file - inputFile.close(); - - factorizationPlans = myMap; - } - - - - std::vector extractOperatorsInReverse(const std::string& input) { - std::vector operators; - std::regex operatorPattern(R"(([a-zA-Z_]+\d+)\()"); // Match patterns like mat_mul1(, mat_add2(, etc. - std::smatch match; - - std::string::const_iterator searchStart(input.cbegin()); - while (std::regex_search(searchStart, input.cend(), match, operatorPattern)) { - operators.push_back(match[1]); // Extract the operator name - searchStart = match.suffix().first; // Move the search start position - } - - // Reverse the order of operators to match actual execution order - std::reverse(operators.begin(), operators.end()); - return operators; - } - - - - std::vector> loadHDF5Array(const std::string& filename, const std::string& datasetName, int doPrint) { - H5::H5File file(filename, H5F_ACC_RDONLY); - H5::DataSet dataset = file.openDataSet(datasetName); - H5::DataSpace dataspace = dataset.getSpace(); - - // Get the number of dimensions - int rank = dataspace.getSimpleExtentNdims(); - // std::cout << "Rank: " << rank << std::endl; - - // Allocate space for the dimensions - std::vector dims(rank); - - // Get the dataset dimensions - dataspace.getSimpleExtentDims(dims.data(), nullptr); - - size_t rows; - size_t cols; - - if (rank == 1) { - rows = dims[0]; - cols = 1; - } - else if (rank == 2) { - rows = dims[0]; - cols = dims[1]; - } else { - throw std::runtime_error("Unsupported rank: " + std::to_string(rank)); - } - - // Read data into a 1D vector - std::vector flatData(rows * cols); - dataset.read(flatData.data(), H5::PredType::NATIVE_FLOAT); - - // Convert to 2D vector - std::vector> result(rows, std::vector(cols)); - for (size_t i = 0; i < rows; ++i) { - for (size_t j = 0; j < cols; ++j) { - result[i][j] = flatData[i * cols + j]; - if (doPrint == 1) - std::cout << result[i][j] << ", "; - } - if (doPrint == 1) - std::cout << std::endl; - } - - // Close the dataset and file - dataset.close(); - file.close(); - - return result; - } - - - - - void findWeights(std::string modelPath) { - // read the parameter weights from file - std::vector> w1 = loadHDF5Array(modelPath, "fc1.weight", 0); - std::vector> b1 = loadHDF5Array(modelPath, "fc1.bias", 0); - std::vector> w2 = loadHDF5Array(modelPath, "fc2.weight", 0); - std::vector> b2 = loadHDF5Array(modelPath, "fc2.bias", 0); - std::vector> w3 = loadHDF5Array(modelPath, "fc3.weight", 0); - std::vector> b3 = loadHDF5Array(modelPath, "fc3.bias", 0); - - // store the weights in map with same name as operator - operatorParam2Weights["mat_mul1"] = w1; - operatorParam2Weights["mat_add1"] = b1; - operatorParam2Weights["mat_mul2"] = w2; - operatorParam2Weights["mat_add2"] = b2; - operatorParam2Weights["mat_mul3"] = w3; - operatorParam2Weights["mat_add3"] = b3; - - std::cout << "Shape of mat_mul1 weight: " << w1.size() << ", " << w1[0].size() << std::endl; - } - - - - std::vector> extractSubweight(const std::vector>& matrix, int start, int n) { - std::vector> result; - - std::cout << "Extracting subweight" << std::endl; - std::cout << start << ", " << n << std::endl; - - // Ensure that the range [start, start + n) is within bounds - //int end = std::min(start + n, matrix.size()); - int end = start + n; - for (int i = start; i < end; ++i) { - result.push_back(matrix[i]); // Copy rows within the range - } - - return result; - } - - - - RowVectorPtr getTableFromCSVFile(VectorMaker & maker, std::string csvFilePath, std::string tableName, std::string joinKey) { - - std::ifstream file(csvFilePath.c_str()); - if (file.fail()) { - - std::cerr << "Error in reading data file:" << csvFilePath << std::endl; - exit(1); - - } - - std::string line; - - std::cout << tableName << std::endl; - - std::vector joinVec; - std::vector> featuresVec; - - // Ignore the first line (header) - if (std::getline(file, line)) { - std::cout << "Ignoring header: " << line << std::endl; - } - - int count = 0; - while (std::getline(file, line)) { - std::istringstream iss(line); - std::string numberStr; - std::vector features; - int colIndex = 0; - - while (std::getline(iss, numberStr, ',')) { - if (numberStr.size() >= 2 && numberStr.front() == '"' && numberStr.back() == '"') { - numberStr = numberStr.substr(1, numberStr.size() - 2); - } - - if (colIndex == 0) { - joinVec.push_back(std::stoi(numberStr)); - } - else { - features.push_back(std::stof(numberStr)); - } - colIndex += 1; - } - featuresVec.push_back(features); - count += 1; - } - file.close(); - - std::string featureCol = "f_" + tableName; - auto joinVector = maker.flatVector(joinVec); - auto featuresVector = maker.arrayVector(featuresVec, REAL()); - auto tableRowVector = maker.rowVector( - {joinKey, featureCol}, {joinVector, featuresVector} - ); - - std::vector cols = {joinKey, featureCol}; - tabel2Columns[tableName] = cols; - totalFeatures += featuresVec[0].size(); - - FeatureStatus fs1; - fs1.isFeature = 0; - allFeatureStatus[joinKey] = fs1; - - FeatureStatus fs2; - fs2.isFeature = 1; - fs2.isPushed = 0; - fs2.vectorSize = featuresVec[0].size(); - allFeatureStatus[featureCol] = fs2; - - return tableRowVector; - } - - - - -int sampleQuery() { - - return 29; - -} - -int sampleModel() { - - return 0; - -} - - -void sampleNNModelArch(int numInputFeatures, NNModelContext & nn) { - - nn.inputFeatures = numInputFeatures; - nn.numLayers = 3; - nn.hiddenLayerNeurons = 16; - nn.outputLayerNeurons = 2; - -} - -void sampleDTModelArch (int numInputFeatures, DTModelContext & dt) { - - dt.inputFeatures = numInputFeatures; - dt.treeDepth = 8; - -} - -bool replace(std::string& str, const std::string& from, const std::string& to) { - size_t start_pos = str.find(from); - if(start_pos == std::string::npos) - return false; - str.replace(start_pos, from.length(), to); - return true; -} - - - -void registerNNFunction(std::string op_name, std::vector> weightMatrice, int dim1, int dim2) { - - if (op_name.find("mat_mul") != std::string::npos) { - auto nnWeightVector = maker.arrayVector(weightMatrice, REAL()); - exec::registerVectorFunction( - op_name, - MatrixMultiply::signatures(), - std::make_unique( - std::move(nnWeightVector->elements()->values()->asMutable()), dim1, dim2) - ); - std::cout << "Registered a mat_mul function of name " << op_name << " with dimension " << dim1 << ", " << dim2 << endl; - } - - else if (op_name.find("mat_add") != std::string::npos) { - auto nnWeightVector = maker.arrayVector(weightMatrice, REAL()); - exec::registerVectorFunction( - op_name, - MatrixVectorAddition::signatures(), - std::make_unique( - std::move(nnWeightVector->elements()->values()->asMutable()), dim1) - ); - std::cout << "Registered a mat_add function of name " << op_name << " with dimension " << dim1 << endl; - } - - else if (op_name.find("relu") != std::string::npos) { - exec::registerVectorFunction( - op_name, Relu::signatures(), std::make_unique(), - {}, - true); - std::cout << "Registered a relu function of name " << op_name << endl; - } - - else if (op_name.find("softmax") != std::string::npos) { - exec::registerVectorFunction( - op_name, Softmax::signatures(), std::make_unique()); - std::cout << "Registered a softmax function of name " << op_name << endl; - } - - else if (op_name.find("vector_addition") != std::string::npos) { - exec::registerVectorFunction( - op_name, - VectorAddition::signatures(), - std::make_unique(dim1) - ); - std::cout << "Registered a vector_addition function of name " << op_name << " with dimension " << dim1 << endl; - } - - -} - - - - - -float * genWeight(int dim1, int dim2) { - - int total_size = dim1 * dim2; - - //generate weight matrix - float * weight = new float[total_size]; - for (int i = 0; i < total_size; i++) { - if (i % 2 == 0) { - weight[i] = 1.0; - } else { - weight[i] = 0.0; - } - } - return weight; -} - -bool addModelInferenceToQueryPlanAfterFactorize(PlanBuilder & planBuilder, std::shared_ptr planNodeIdGenerator) { - - std::string modelProjString = ""; - std::vector> emptyMatrix; - for (int i = modelOperators.size() - 1; i > 0; i--) { - std::string opName = modelOperators[i]; - if (opName.find("mat_mul") != std::string::npos) { - std::vector> param = operatorParam2Weights[opName]; - int dim1 = param.size(); - int dim2 = param[0].size(); - registerNNFunction(opName, param, dim1, dim2); - } - else if (opName.find("mat_add") != std::string::npos) { - std::vector> param = operatorParam2Weights[opName]; - int dim1 = param.size(); - registerNNFunction(opName, param, dim1, -1); - } - else { - registerNNFunction(opName, emptyMatrix, -1, -1); - } - modelProjString += opName + "("; - } - modelProjString += "features"; - - for (int i = modelOperators.size() - 1; i > 0; i--) { - modelProjString += ")"; - } - modelProjString += " AS output"; - - std::cout << "Inference Part: " << modelProjString << endl; - planBuilder.project({modelProjString}); - - return true; - -} - -bool addModelInferenceToQueryPlan(PlanBuilder & planBuilder, std::shared_ptr planNodeIdGenerator) { - - std::string modelProjString = ""; - std::vector> emptyMatrix; - for (int i = modelOperators.size() - 1; i >= 0; i--) { - std::string opName = modelOperators[i]; - if (opName.find("mat_mul") != std::string::npos) { - std::vector> param = operatorParam2Weights[opName]; - int dim1 = param.size(); - int dim2 = param[0].size(); - registerNNFunction(opName, param, dim1, dim2); - } - else if (opName.find("mat_add") != std::string::npos) { - std::vector> param = operatorParam2Weights[opName]; - int dim1 = param.size(); - registerNNFunction(opName, param, dim1, -1); - } - else { - registerNNFunction(opName, emptyMatrix, -1, -1); - } - modelProjString += opName + "("; - } - modelProjString += "features"; - - for (int i = modelOperators.size() - 1; i >= 0; i--) { - modelProjString += ")"; - } - modelProjString += " AS output"; - - std::cout << "Inference Part: " << modelProjString << endl; - planBuilder.project({modelProjString}); - - return true; - -} - - -bool rewriteWithFactorization(PlanBuilder & planBuilder, std::shared_ptr planNodeIdGenerator, std::string joinOrderStr) { - - // Create a JSON reader and root object - Json::CharReaderBuilder readerBuilder; - Json::Value root; // Root will hold the parsed JSON array - std::string errors; - - // Parse the JSON string - std::istringstream stream(joinOrderStr); - if (!Json::parseFromStream(readerBuilder, stream, &root, &errors)) { - std::cerr << "Error parsing JSON: " << errors << "\n"; - return 1; - } - //JSON Reading successful" - - std::unordered_map sources; //with filters and projections pushed down; - - std::string outName; - std::string leftName; - std::string rightName; - - std::string firstOpName = modelOperators[0]; // name of operator of split layer - std::vector> firstWeight = operatorParam2Weights[firstOpName]; // weight of the split layer - int fCurrentTotal = 0; - int numCols = firstWeight.size(); // number of columns in split layer - int numNeurons = firstWeight[0].size(); // number of neurons in split layer - int k = 0; - int addIdx = 0; - - std::vector> emptyMatrix; - bool isAdditionRegistered = false; - - // Iterate through the array - for (const auto& item : root) { - std::cout << "ID: " << item["ID"].asString() << "\n"; - std::cout << "Left: " << item["Left"].asString() << "\n"; - std::cout << "Right: " << item["Right"].asString() << "\n"; - std::cout << "Pred: " << item["Pred"].asString() << "\n"; - std::cout << "ProbeKeys: " << item["ProbeKeys"].asString() << "\n"; - std::cout << "BuildKeys: " << item["BuildKeys"].asString() << "\n"; - - std::string joinId = item["ID"].asString(); - std::string leftTable = item["Left"].asString(); - std::string rightTable = item["Right"].asString(); - std::string probKeys = item["ProbeKeys"].asString(); - std::string buildKeys = item["BuildKeys"].asString(); - int NumDimLeft = item["NumDimLeft"].asInt(); - int NumDimRight = item["NumDimRight"].asInt(); - - std::string leftFactorizationKey = leftTable + "--->" + joinId; - std::string rightFactorizationKey = rightTable + "--->" + joinId; - - std::cout << "left factorization key: " << leftFactorizationKey << std::endl; - std::cout << "right factorization key: " << leftFactorizationKey << std::endl; - - bool isLeftTableNotLeaf = isInteger(leftTable); - - if (isLeftTableNotLeaf == false) { - // left table is leaf - - featureStartPos[leftTable] = fCurrentTotal; - fCurrentTotal += NumDimLeft; - - std::string fName = tabel2Columns[leftTable][1]; - - if (factorizationPlans[leftFactorizationKey] == 1) { - // Doing factorization of left edge - std::vector> subWeight = extractSubweight(firstWeight, featureStartPos[leftTable], NumDimLeft); - std::string newOpName = firstOpName + "_" + std::to_string(k); - registerNNFunction(newOpName, subWeight, NumDimLeft, numNeurons); - std::string fNewName = "factorized_" + std::to_string(k); - std::string fNewNameFull = newOpName + "(" + fName + ") AS " + fNewName; - tabel2Columns[leftTable][1] = fNewName; - k += 1; - - FeatureStatus fs; - fs.isFeature = 1; - fs.isPushed = 1; - fs.vectorSize = numNeurons; - allFeatureStatus[fNewName] = fs; - - auto leftPlan = PlanBuilder(planNodeIdGenerator, pool_.get()) - .values({tableName2RowVector[leftTable]}) - .project({tabel2Columns[leftTable][0], fNewNameFull}); - - sources[leftTable] = leftPlan; - } - else { - // Not Doing factorization of left edge - FeatureStatus fs = allFeatureStatus[fName]; - fs.featureStartPos = featureStartPos[leftTable]; - allFeatureStatus[fName] = fs; - - auto leftPlan = PlanBuilder(planNodeIdGenerator, pool_.get()) - .values({tableName2RowVector[leftTable]}) - .project({tabel2Columns[leftTable]}); - - sources[leftTable] = leftPlan; - } - - } - - bool isRightTableNotLeaf = isInteger(rightTable); - if (isRightTableNotLeaf == false) { - // right table is leaf - - featureStartPos[rightTable] = fCurrentTotal; - fCurrentTotal += NumDimRight; - - std::string fName = tabel2Columns[rightTable][1]; - - if (factorizationPlans[rightFactorizationKey] == 1) { - // Doing factorization of right edge - std::vector> subWeight = extractSubweight(firstWeight, featureStartPos[rightTable], NumDimRight); - std::string newOpName = firstOpName + "_" + std::to_string(k); - registerNNFunction(newOpName, subWeight, NumDimRight, numNeurons); - std::string fNewName = "factorized_" + std::to_string(k); - std::string fNewNameFull = newOpName + "(" + fName + ") AS " + fNewName; - tabel2Columns[rightTable][1] = fNewName; - k += 1; - - FeatureStatus fs; - fs.isFeature = 1; - fs.isPushed = 1; - fs.vectorSize = numNeurons; - allFeatureStatus[fNewName] = fs; - - auto rightPlan = PlanBuilder(planNodeIdGenerator, pool_.get()) - .values({tableName2RowVector[rightTable]}) - .project({tabel2Columns[rightTable][0], fNewNameFull}); - - sources[rightTable] = rightPlan; - } - else { - // Not Doing factorization of right edge - FeatureStatus fs = allFeatureStatus[fName]; - fs.featureStartPos = featureStartPos[rightTable]; - allFeatureStatus[fName] = fs; - - auto rightPlan = PlanBuilder(planNodeIdGenerator, pool_.get()) - .values({tableName2RowVector[rightTable]}) - .project({tabel2Columns[rightTable]}); - - sources[rightTable] = rightPlan; - } - - } - - featureStartPos[joinId] = featureStartPos[leftTable]; - - - PlanBuilder left, right, out; - - if (sources.count(leftTable) > 0) { - - left = sources[leftTable]; - - } - - if (sources.count(rightTable) > 0) { - - right = sources[rightTable]; - - } - - //compose join features - std::vector leftProj; - std::vector joinCols; - int totalPushed = 0; - - if (factorizationPlans[leftFactorizationKey] == 1) { - // Factorizing all left features that were not pushed earlier - int newFactorizedCount = 0; - for (int i = 0; i < tabel2Columns[leftTable].size(); i++) { - std::string fName = tabel2Columns[leftTable][i]; - if (allFeatureStatus.count(fName) <= 0) { - std::cout << "Feature status not found for feature " << fName << std::endl; - continue; - } - FeatureStatus fs = allFeatureStatus[fName]; - if (fs.isFeature == 1 && fs.isPushed == 0) { - - // Feature not pushed earlier, pushing now - std::vector> subWeight = extractSubweight(firstWeight, fs.featureStartPos, fs.vectorSize); - std::string newOpName = firstOpName + "_" + std::to_string(k); - registerNNFunction(newOpName, subWeight, fs.vectorSize, numNeurons); - std::string fNewName = "factorized_" + std::to_string(k); - std::string fNewNameFull = newOpName + "(" + fName + ") AS " + fNewName; - k += 1; - - FeatureStatus fs; - fs.isFeature = true; - fs.isPushed = 1; - fs.vectorSize = numNeurons; - allFeatureStatus[fNewName] = fs; - leftProj.push_back(fNewNameFull); - joinCols.push_back(fNewName); - newFactorizedCount += 1; - totalPushed += 1; - } - else { - // Feature not a feature or pushed earlier - leftProj.push_back(fName); - joinCols.push_back(fName); - if (fs.isFeature == 1) { - // Feature pushed earlier - totalPushed += 1; - } - } - } - - if (newFactorizedCount > 0) { - left = left.project({leftProj}); - } - } - else { - // Just adding all left columns without factorization - for (int i = 0; i < tabel2Columns[leftTable].size(); i++) { - std::string fName = tabel2Columns[leftTable][i]; - leftProj.push_back(fName); - joinCols.push_back(fName); - } - } - - - std::vector rightProj; - - if (factorizationPlans[rightFactorizationKey] == 1) { - // Factorizing all right features that were not pushed earlier - int newFactorizedCount = 0; - for (int i = 0; i < tabel2Columns[rightTable].size(); i++) { - std::string fName = tabel2Columns[rightTable][i]; - if (allFeatureStatus.count(fName) <= 0) { - std::cout << "Feature status not found for feature " << fName << std::endl; - continue; - } - FeatureStatus fs = allFeatureStatus[fName]; - if (fs.isFeature == 1 && fs.isPushed == 0) { - - // Feature not pushed earlier, pushing now - std::vector> subWeight = extractSubweight(firstWeight, fs.featureStartPos, fs.vectorSize); - std::string newOpName = firstOpName + "_" + std::to_string(k); - registerNNFunction(newOpName, subWeight, fs.vectorSize, numNeurons); - std::string fNewName = "factorized_" + std::to_string(k); - std::string fNewNameFull = newOpName + "(" + fName + ") AS " + fNewName; - k += 1; - - FeatureStatus fs; - fs.isFeature = true; - fs.isPushed = 1; - fs.vectorSize = numNeurons; - allFeatureStatus[fNewName] = fs; - rightProj.push_back(fNewNameFull); - joinCols.push_back(fNewName); - newFactorizedCount += 1; - totalPushed += 1; - } - else { - // Feature not a feature or pushed earlier - rightProj.push_back(fName); - joinCols.push_back(fName); - if (fs.isFeature == 1) { - // Feature pushed earlier - totalPushed += 1; - } - } - } - - if (newFactorizedCount > 0) { - right = right.project({rightProj}); - } - } - else { - // Just adding all right columns without factorization - for (int i = 0; i < tabel2Columns[rightTable].size(); i++) { - std::string fName = tabel2Columns[rightTable][i]; - rightProj.push_back(fName); - joinCols.push_back(fName); - } - } - - tabel2Columns[joinId] = joinCols; - // Writing join - out = left.hashJoin( - {item["ProbeKeys"].asString()}, - {item["BuildKeys"].asString()}, - right.planNode(), - "", - {joinCols} - ); - - if (totalPushed > 1) { - // some features were pushed before join, they need to be aggregated - std::vector joinColsNew; - std::vector joinProj; - - bool isFirstVec = true; - std::string projString = ""; - for (int i = 0; i < joinCols.size(); i++) { - std::string fName = joinCols[i]; - FeatureStatus fs = allFeatureStatus[fName]; - if (fs.isFeature == 0 || fs.isPushed == 0) { - joinColsNew.push_back(fName); - joinProj.push_back(fName); - continue; - } - - if (isFirstVec == true) { - projString = fName; - isFirstVec = false; - continue; - } - - std::string newOpName = "vector_addition"; - std::string opName = newOpName + "(" + projString + ", " + fName + ")"; - projString = opName; - if (isAdditionRegistered == false) { - registerNNFunction(newOpName, emptyMatrix, numNeurons, -1); - isAdditionRegistered = true; - } - } - - std::string fNewName = "added_vec_" + std::to_string(addIdx); - addIdx += 1; - projString += " AS " + fNewName; - joinColsNew.push_back(fNewName); - joinProj.push_back(projString); - - FeatureStatus fs; - fs.isFeature = true; - fs.isPushed = 1; - fs.vectorSize = numNeurons; - allFeatureStatus[fNewName] = fs; - - out = out.project({joinProj}); - tabel2Columns[joinId] = joinColsNew; - } - // Finished Writing join - - outName = joinId; - sources[outName] = out; - - } - - planBuilder = sources[outName]; - - std::cout << "After the last join" << std::endl; - std::vector joinProj; - std::vector joinCols; - int newPushedCount = 0; - - // iterate over the final join projection output to compute not pushed feature - for (int i = 0; i < tabel2Columns[outName].size(); i++) { - std::string fName = tabel2Columns[outName][i]; - FeatureStatus fs = allFeatureStatus[fName]; - if (fs.isFeature == 1) { - if (fs.isPushed == 0) { - - // found a feature in the final join output which were not pushed - std::vector> subWeight = extractSubweight(firstWeight, fs.featureStartPos, fs.vectorSize); - std::string newOpName = firstOpName + "_" + std::to_string(k); - std::cout << fName << ", " << fs.featureStartPos << ", " << fs.vectorSize << ", " << subWeight.size() << ", " << subWeight[0].size() << std::endl; - registerNNFunction(newOpName, subWeight, fs.vectorSize, numNeurons); - std::string fNewName = "factorized_" + std::to_string(k); - std::string fNewNameFull = newOpName + "(" + fName + ") AS " + fNewName; - k += 1; - - FeatureStatus fs; - fs.isFeature = true; - fs.isPushed = 1; - fs.vectorSize = numNeurons; - allFeatureStatus[fNewName] = fs; - joinProj.push_back(fNewNameFull); - joinCols.push_back(fNewName); - newPushedCount += 1; - } - else { - joinProj.push_back(fName); - joinCols.push_back(fName); - } - } - } - - if (newPushedCount > 0) { - // final join output projection changed - planBuilder = planBuilder.project({joinProj}); - } - - // perform aggregation of the final join output - std::string projString = joinCols[0]; - for (int i = 1; i < joinCols.size(); i++) { - std::string newOpName = "vector_addition"; - std::string opName = newOpName + "(" + projString + ", " + joinCols[i] + ")"; - projString = opName; - if (isAdditionRegistered == false) { - registerNNFunction(newOpName, emptyMatrix, numNeurons, -1); - isAdditionRegistered = true; - } - } - projString += " AS features"; - - planBuilder = planBuilder.project({projString}); - std::cout << "Plan: " << planBuilder.planNode()->toString(true, true) << std::endl; - return true; - -} - - - -bool writeWithoutFactorization(PlanBuilder & planBuilder, std::shared_ptr planNodeIdGenerator, std::string joinOrderStr) { - - // Create a JSON reader and root object - Json::CharReaderBuilder readerBuilder; - Json::Value root; // Root will hold the parsed JSON array - std::string errors; - - // Parse the JSON string - std::istringstream stream(joinOrderStr); - if (!Json::parseFromStream(readerBuilder, stream, &root, &errors)) { - std::cerr << "Error parsing JSON: " << errors << "\n"; - return 1; - } - - std::unordered_map sources; //with filters and projections pushed down; - - std::string outName; - std::string leftName; - std::string rightName; - std::vector projections; - - std::string firstOpName = modelOperators[0]; - std::vector> firstWeight = operatorParam2Weights[firstOpName]; - int fCurrentTotal = 0; - int numCols = firstWeight.size(); - int numNeurons = firstWeight[0].size(); - int k = 0; - - // Iterate through the array - for (const auto& item : root) { - std::cout << "ID: " << item["ID"].asString() << "\n"; - std::cout << "Left: " << item["Left"].asString() << "\n"; - std::cout << "Right: " << item["Right"].asString() << "\n"; - std::cout << "Pred: " << item["Pred"].asString() << "\n"; - std::cout << "ProbeKeys: " << item["ProbeKeys"].asString() << "\n"; - std::cout << "BuildKeys: " << item["BuildKeys"].asString() << "\n"; - - std::string joinId = item["ID"].asString(); - std::string leftTable = item["Left"].asString(); - std::string rightTable = item["Right"].asString(); - std::string probKeys = item["ProbeKeys"].asString(); - std::string buildKeys = item["BuildKeys"].asString(); - int NumDimLeft = item["NumDimLeft"].asInt(); - int NumDimRight = item["NumDimRight"].asInt(); - - bool isLeftTableNotLeaf = isInteger(leftTable); - if (isLeftTableNotLeaf == false) { - // left table is leaf - fCurrentTotal += NumDimLeft; - - auto leftPlan = PlanBuilder(planNodeIdGenerator, pool_.get()) - .values({tableName2RowVector[leftTable]}) - .project({tabel2Columns[leftTable]}); - - sources[leftTable] = leftPlan; - - } - - bool isRightTableNotLeaf = isInteger(rightTable); - if (isRightTableNotLeaf == false) { - // right table is leaf - fCurrentTotal += NumDimRight; - - auto rightPlan = PlanBuilder(planNodeIdGenerator, pool_.get()) - .values({tableName2RowVector[rightTable]}) - .project({tabel2Columns[rightTable]}); - - sources[rightTable] = rightPlan; - - } - - - PlanBuilder left, right, out; - - //retrieve the corresponding PlanBuilder - if (sources.count(leftTable) > 0) { - - left = sources[leftTable]; - - } - - if (sources.count(rightTable) > 0) { - - right = sources[rightTable]; - - } - - //compose join - projections.clear(); - // Access Projection if it exists - if (item.isMember("Projection")) { - for (const auto& proj : item["Projection"]) { - std::cout << proj << std::endl; - projections.push_back(proj.asString()); - } - } - - std::cout << "Writing join" << std::endl; - out = left.hashJoin( - {item["ProbeKeys"].asString()}, - {item["BuildKeys"].asString()}, - right.planNode(), - "", - {projections} - ); - std::cout << "Finished Writing join" << std::endl; - - outName = joinId; - sources[outName] = out; - - } - planBuilder = sources[outName]; - - // After the final join - std::string projString=""; - bool isFirst = true; - for (std::string proj : projections) { - FeatureStatus fs = allFeatureStatus[proj]; - if (fs.isFeature == 1) { - if (isFirst) { - projString += proj; - isFirst = false; - } else { - projString += "," + proj; - } - } - } - projString = "concat(" + projString + ") as features"; - - planBuilder = planBuilder.project({projString}); - std::cout << "Plan: " << planBuilder.planNode()->toString(true, true) << std::endl; - return true; - -} - - - -bool createAndExecuteQuery(std::string queryJsonStr, bool withFactorization) { - PlanBuilder planBuilder{pool_.get()}; - std::shared_ptr planNodeIdGenerator = std::make_shared(); - if (withFactorization) { - // Rewriting with factorization - rewriteWithFactorization(planBuilder, planNodeIdGenerator, queryJsonStr); - - // Step 2. Add model inference to query plan - addModelInferenceToQueryPlanAfterFactorize(planBuilder, planNodeIdGenerator); - } - else { - // Writing without factorization - writeWithoutFactorization(planBuilder, planNodeIdGenerator, queryJsonStr); - - // Step 2. Add model inference to query plan - addModelInferenceToQueryPlan(planBuilder, planNodeIdGenerator); - } - - //std::cout << "Plan: " << planBuilder.planNode()->toString(true, true) << std::endl; - auto myPlan = planBuilder.planNode(); - std::cout << myPlan->toString(true, true) << std::endl; - std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); - auto results = exec::test::AssertQueryBuilder(myPlan).copyResults(pool_.get()); - std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); - std::cout << "Time (sec) = " << (std::chrono::duration_cast(end - begin).count()) /1000000.0 << std::endl; - std::cout << "Results Size: " << results->size() << std::endl; - std::cout << "Results:" << results->toString(0, 5) << std::endl; - //std::cout << results->toString(0, results->size()) << std::endl; - return true; - -} - - -}; - -int main(int argc, char** argv) { - - setlocale(LC_TIME, "C"); - - folly::init(&argc, &argv, false); - memory::MemoryManager::initialize({}); - - RewriteFactorized rewriteObj; - - // Load table1 - std::cout << "Reading Table 1 CSV file" << std::endl; - RowVectorPtr table1Vec = rewriteObj.getTableFromCSVFile(rewriteObj.maker, "resources/data/synthetic_dataset_4_3/table_1.csv", "table_1", "join_key1"); - - std::cout << "Reading Table 2 CSV file" << std::endl; - // Load table2 - RowVectorPtr table2Vec = rewriteObj.getTableFromCSVFile(rewriteObj.maker, "resources/data/synthetic_dataset_4_3/table_2.csv", "table_2", "join_key2"); - - // Load table3 - std::cout << "Reading Table 3 CSV file" << std::endl; - RowVectorPtr table3Vec = rewriteObj.getTableFromCSVFile(rewriteObj.maker, "resources/data/synthetic_dataset_4_3/table_3.csv", "table_3", "join_key3"); - - // Load table4 - std::cout << "Reading Table 4 CSV file" << std::endl; - RowVectorPtr table4Vec = rewriteObj.getTableFromCSVFile(rewriteObj.maker, "resources/data/synthetic_dataset_4_3/table_4.csv", "table_4", "join_key4"); - - - rewriteObj.tableName2RowVector["table_1"] = table1Vec; - rewriteObj.tableName2RowVector["table_2"] = table2Vec; - rewriteObj.tableName2RowVector["table_3"] = table3Vec; - rewriteObj.tableName2RowVector["table_4"] = table4Vec; - - - // retrieve the weights and set to map - std::cout << "Reading model parameters" << std::endl; - rewriteObj.findWeights("resources/model/dummy.h5"); - - // retrieve the model operators from model expression IR - std::string modelInput = "softmax3(mat_add3(mat_mul3(relu2(mat_add2(mat_mul2(relu1(mat_add1(mat_mul1(features)))))))))"; - std::cout << "Extracting model operators" << std::endl; - std::vector operators = rewriteObj.extractOperatorsInReverse(modelInput); - rewriteObj.modelOperators = operators; - - std::string jsonString = R"([ - {"ID":"0","Left":"table_1","Right":"table_2","Pred":"table_1.join_key1 = table_2.join_key2","ProbeKeys":"join_key1","BuildKeys":"join_key2","Projection":["join_key1","f_table_1","join_key2","f_table_2"],"NumTuplesLeft":1000,"NumDimLeft":20,"NumTuplesRight":300,"NumDimRight":39,"NumTuplesOutput":300,"NumDimOutput":59}, - {"ID":"1","Left":"0","Right":"table_3","Pred":"table_2.join_key2 = table_3.join_key3","ProbeKeys":"join_key2","BuildKeys":"join_key3","Projection":["join_key1","f_table_1","join_key2","f_table_2","join_key3","f_table_3"],"NumTuplesLeft":300,"NumDimLeft":59,"NumTuplesRight":12000,"NumDimRight":31,"NumTuplesOutput":12000,"NumDimOutput":90} - ])"; - - std::cout << "Reading factorization Plan" << std::endl; - rewriteObj.findFactorizationPlans("resources/plans/factorization_plan4_3.txt"); - - std::cout << "Performing rewriting" << std::endl; - bool ret = rewriteObj.createAndExecuteQuery(jsonString, true); - - return 1; -} - From d4e3a4ac8134b60bd75c2ac15f1cc46b26a2405e Mon Sep 17 00:00:00 2001 From: Kanchan Chowdhury <43951087+kanchanchy@users.noreply.github.com> Date: Mon, 16 Dec 2024 18:37:12 -0700 Subject: [PATCH 10/15] Added CMake file for factorized rewriting --- inferf/CMakeLists.txt | 80 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 inferf/CMakeLists.txt diff --git a/inferf/CMakeLists.txt b/inferf/CMakeLists.txt new file mode 100644 index 000000000..15abf21d5 --- /dev/null +++ b/inferf/CMakeLists.txt @@ -0,0 +1,80 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cmake_minimum_required(VERSION 3.0 FATAL_ERROR) +#set(CMAKE_PREFIX_PATH "$HOME/libtorch; $CONDA_PREFIX") +set(CMAKE_PREFIX_PATH "$HOME/libtorch/share/cmake/Torch") +set(CMAKE_PREFIX_PATH "$CONDA_PREFIX") +# include_directories(SYSTEM ${TORCH_INCLUDE_DIRS}) +# set(TORCH_LIBRARIES "$HOME/libtorch") + +find_package(Torch REQUIRED) +find_package(xgboost REQUIRED) +find_package(cpr REQUIRED) +find_package(jsoncpp REQUIRED) + +# include tokenizer cpp as a sub directory + + + +include_directories(/home/velox/third_party/tokenizers-cpp/include) +include_directories(/home/velox/third_party/tokenizers-cpp/src) + +add_library(tokenizer_cpp STATIC IMPORTED) +add_library(tokenizer_c STATIC IMPORTED) +set_target_properties(tokenizer_cpp PROPERTIES IMPORTED_LOCATION /home/velox/third_party/tokenizers-cpp/example/build/tokenizers/libtokenizers_cpp.a) +set_target_properties(tokenizer_c PROPERTIES IMPORTED_LOCATION /home/velox/third_party/tokenizers-cpp/example/build/tokenizers/libtokenizers_c.a) + + +# TODO: temporary disable it until we can fix the build of dependency +# add_executable(standalone_hf_tokenizer_test tests/StandaloneHFTokenizerTest.cpp) +# target_link_libraries(standalone_hf_tokenizer_test +# # FIXME: for some reason tokenizer cpp needs to be placed before tokenizer c, +# # needs fix to automatically compile it from 3rd library +# tokenizer_cpp +# tokenizer_c +# ) + + +include_directories("/usr/include/hdf5/serial") +include_directories("/home/h5cpp/build/src/h5cpp") + +find_package(h5cpp REQUIRED) +find_package(HDF5 COMPONENTS C CXX HL REQUIRED) + +link_directories( ${HDF5_LIBRARY_DIRS} ) +include_directories( ${HDF5_INCLUDE_DIRS} ) + + +add_executable(factorize_test tests/RewriteFactorized.cpp) +target_link_libraries( + factorize_test + velox_aggregates + velox_type + velox_vector + velox_vector_test_lib + velox_exec + velox_exec_test_lib + velox_tpch_connector + velox_memory + velox_common_base + velox_vector_fuzzer + openblas + ${TORCH_LIBRARIES} + jsoncpp_lib + h5cpp::h5cpp + hdf5_serial + ${HDF5_CXX_LIBRARIES} +) + From b35a297e8520c12632537ffa5e998c36efe5b621 Mon Sep 17 00:00:00 2001 From: Kanchan Chowdhury <43951087+kanchanchy@users.noreply.github.com> Date: Mon, 16 Dec 2024 18:40:03 -0700 Subject: [PATCH 11/15] Added code for generating synthetic data --- inferf/python/synthesize_data.py | 136 +++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 inferf/python/synthesize_data.py diff --git a/inferf/python/synthesize_data.py b/inferf/python/synthesize_data.py new file mode 100644 index 000000000..5ae394541 --- /dev/null +++ b/inferf/python/synthesize_data.py @@ -0,0 +1,136 @@ +import numpy as np +import pandas as pd +import random +import os +import json + + +def get_join_statistics(join_id, left_table_name, right_table_name, left_table_actual, left_key, right_key, projection, left_rows, left_cols, right_rows, right_cols): + join_info = {} + join_info["ID"] = join_id + join_info["Left"] = left_table_name + join_info["Right"] = right_table_name + join_info["Pred"] = f"{left_table_actual}.{left_key} = {right_table_name}.{right_key}" + join_info["ProbeKeys"] = left_key + join_info["BuildKeys"] = right_key + join_info["Projection"] = projection + join_info["NumTuplesLeft"] = left_rows + join_info["NumDimLeft"] = left_cols + join_info["NumTuplesRight"] = right_rows + join_info["NumDimRight"] = right_cols + join_info["NumTuplesOutput"] = right_rows + join_info["NumDimOutput"] = left_cols + right_cols + return join_info + + +# Function to generate a random table with size controls +def generate_table_optimized(table_idx, join_key, num_rows, num_columns, ratio=None, join_key_values=None): + if join_key_values is None: + join_key_values = np.random.randint(1, num_rows + 1, size=num_rows) + else: + #ratio = 5 # Limit the duplication factor to control row growth + #num_rows = min(num_rows, len(join_key_values) * ratio) + #join_key_values = np.repeat(join_key_values, np.random.randint(1, ratio + 1))[:num_rows] + if ratio < 1: + join_key_values = np.random.choice(join_key_values, size=num_rows, replace=False) + else: + multiply_factor = int(ratio) + join_key_values = np.repeat(join_key_values, multiply_factor)#[:num_rows] + + feature_columns = np.random.rand(num_rows, num_columns) + data = pd.DataFrame(feature_columns, columns=[f"feature_{i}" for i in range(1, num_columns + 1)]) + data.insert(0, join_key, join_key_values) + return data + +# Main function to generate and save the dataset +def generate_synthetic_dataset(data_id="10_2", max_columns=1500, max_single_columns=150, num_tables=10, base_rows=1000, ratios=[0.6, 0.6, 15, 25, 2, 4, 0.8, 3, 10]): + output_dir = "synthetic_dataset_" + data_id + total_columns = 0 + tables = [] + #base_rows = random.randint(5000, 10000) # Number of rows in the first table + + plans_array = [] + join_serial = 0 + prev_join_id = None + left_rows = 0 + left_cols = 0 + projection = [] + + # Generate tables with size controls + for i in range(num_tables): + remaining_columns = max_columns - total_columns - (num_tables - len(tables) - 1) + if remaining_columns <= 0: + break + + num_columns = random.randint(10, min(max_single_columns, remaining_columns)) # Reduce max columns per table + total_columns += num_columns + + table_idx = i + 1 + join_key = "join_key" + str(table_idx) + + if i == 0: + num_rows = base_rows + table = generate_table_optimized(table_idx, join_key, num_rows, num_columns) + left_rows = num_rows + left_cols = num_columns + else: + #k = random.uniform(0.1, 0.5) # Reduce row growth factor + k = ratios[i - 1] + num_rows = int(len(tables[-1]) * k) + #if num_rows > 100000: + # num_rows = 100000 + prev_join_key = "join_key" + str(i) + table = generate_table_optimized(table_idx, join_key, num_rows, num_columns, ratio=k, join_key_values=tables[-1][prev_join_key].values) + + right_table_name = "table_" + str(i + 1) + if join_serial == 0: + left_table_name = "table_" + str(i) + left_table_actual = left_table_name + else: + left_table_name = prev_join_id + left_table_actual = "table_" + str(i) + join_id = str(join_serial) + + if len(projection) == 0: + projection.append(prev_join_key) + projection.append("f_" + left_table_name) + + projection.append(join_key) + projection.append("f_" + right_table_name) + + join_info = get_join_statistics(join_id, left_table_name, right_table_name, left_table_actual, prev_join_key, join_key, projection.copy(), left_rows, left_cols, num_rows, num_columns) + plans_array.append(join_info) + + left_rows = num_rows + left_cols += num_columns + prev_join_id = join_id + join_serial += 1 + + tables.append(table) + + # Save tables to CSV + os.makedirs(output_dir, exist_ok=True) + for idx, table in enumerate(tables, start=1): + table.to_csv(os.path.join(output_dir, f"table_{idx}.csv"), index=False) + print(f"Dataset generated and saved in {output_dir}") + + # Save the JSON array to a file + #with open(f"plans/{data_id}.json", "w") as json_file: + # json.dump(plans_array, json_file, indent=4) + + # Convert each JSON object in the array to a compact one-line format + formatted_objects = [json.dumps(obj, separators=(',', ':')) for obj in plans_array] + # Combine the formatted objects into a single JSON array + formatted_json_array = "[\n\t" + ",\n\t".join(formatted_objects) + "\n]" + # Wrap the formatted array in R"( ... )" + formatted_string = f'R"({formatted_json_array})"' + # Print the result + print(formatted_string) + # Save to a file + with open(f"plans/{data_id}.txt", "w") as file: + file.write(formatted_string) + + +# Run the script +if __name__ == "__main__": + generate_synthetic_dataset() From 2dc9665a8f11e5272e978e18d42d99840d560127 Mon Sep 17 00:00:00 2001 From: Kanchan Chowdhury <43951087+kanchanchy@users.noreply.github.com> Date: Wed, 19 Feb 2025 00:53:13 -0700 Subject: [PATCH 12/15] Added code for random 2-way join in job --- velox/ml_functions/tests/Job2WayJoin.cpp | 1445 ++++++++++++++++++++++ 1 file changed, 1445 insertions(+) create mode 100644 velox/ml_functions/tests/Job2WayJoin.cpp diff --git a/velox/ml_functions/tests/Job2WayJoin.cpp b/velox/ml_functions/tests/Job2WayJoin.cpp new file mode 100644 index 000000000..1934a2ab3 --- /dev/null +++ b/velox/ml_functions/tests/Job2WayJoin.cpp @@ -0,0 +1,1445 @@ +#include +//#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "velox/type/Type.h" +#include "velox/exec/tests/utils/HiveConnectorTestBase.h" +#include "velox/functions/prestosql/aggregates/RegisterAggregateFunctions.h" +#include "velox/functions/prestosql/registration/RegistrationFunctions.h" +#include "velox/ml_functions/DecisionTree.h" +#include "velox/ml_functions/XGBoost.h" +#include "velox/ml_functions/tests/MLTestUtility.h" +#include "velox/parse/TypeResolver.h" +#include "velox/ml_functions/VeloxDecisionTree.h" +#include "velox/common/file/FileSystems.h" +#include "velox/dwio/dwrf/reader/DwrfReader.h" +#include "velox/dwio/parquet/RegisterParquetReader.h" +#include "velox/dwio/parquet/RegisterParquetWriter.h" +#include +#include "velox/connectors/hive/HiveConfig.h" +#include "velox/ml_functions/functions.h" +#include "velox/ml_functions/Concat.h" +#include "velox/ml_functions/NNBuilder.h" +#include +#include +#include "velox/ml_functions/VeloxDecisionTree.h" +#include "velox/expression/VectorFunction.h" +#include "velox/vector/ComplexVector.h" +#include "velox/vector/FlatVector.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace ml; +using namespace facebook::velox; +using namespace facebook::velox::test; +using namespace facebook::velox::exec; +using namespace facebook::velox::exec::test; +using namespace facebook::velox::core; + +/* + * The structure to describe the context of a neural network model architecture + */ +struct NNModelContext { + int inputFeatures; + + int numLayers; + + int hiddenLayerNeurons; + + int outputLayerNeurons; +}; + +/* + * The structure to describe the context of a decision tree model architecture + */ + +struct DTModelContext { + int inputFeatures; + + int treeDepth; +}; + + +/* + * The structure to describe the push down status of a feature + */ +struct FeatureStatus { + int isFeature; + + int isPushed; + + int vectorSize; + + int featureStartPos; + +}; + + +class VectorAddition : public MLFunction { + public: + VectorAddition(int inputDims) { + dims.push_back(inputDims); + } + + void apply( + const SelectivityVector& rows, + std::vector& args, + const TypePtr& type, + exec::EvalCtx& context, + VectorPtr& output) const override { + BaseVector::ensureWritable(rows, type, context.pool(), output); + + /*auto input_elements1 = args[0]->as()->elements(); + float* input1Values = input_elements1->values()->asMutable(); + + auto input_elements2 = args[1]->as()->elements(); + float* input2Values = input_elements2->values()->asMutable();*/ + + BaseVector* left = args[0].get(); + BaseVector* right = args[1].get(); + + exec::LocalDecodedVector leftHolder(context, *left, rows); + auto decodedLeftArray = leftHolder.get(); + auto baseLeftArray = decodedLeftArray->base()->as()->elements(); + + exec::LocalDecodedVector rightHolder(context, *right, rows); + auto decodedRightArray = rightHolder.get(); + auto baseRightArray = decodedRightArray->base()->as()->elements(); + + float* input1Values = baseLeftArray->values()->asMutable(); + float* input2Values = baseRightArray->values()->asMutable(); + + int numInput = rows.size(); + + Eigen::Map< + Eigen::Matrix> + input1Matrix(input1Values, numInput, dims[0]); + Eigen::Map< + Eigen::Matrix> + input2Matrix(input2Values, numInput, dims[0]); + + std::vector> results; + + Eigen::Matrix sumMat = input1Matrix + input2Matrix; + for (int i = 0; i < numInput; i++) { + std::vector curVec( + sumMat.row(i).data(), + sumMat.row(i).data() + sumMat.cols()); + results.push_back(curVec); + } + + VectorMaker maker{context.pool()}; + output = maker.arrayVector(results, REAL()); + } + + static std::vector> signatures() { + return {exec::FunctionSignatureBuilder() + .argumentType("array(REAL)") + .argumentType("array(REAL)") + .returnType("array(REAL)") + .build()}; + } + + static std::string getName() { + return "vector_addition"; + }; + + float* getTensor() const override { + // TODO: need to implement + return nullptr; + } + + CostEstimate getCost(std::vector inputDims) { + // TODO: need to implement + return CostEstimate(0, inputDims[0], inputDims[1]); + } + +}; + + + + +class GetFeatureVec : public MLFunction { + public: + + void apply( + const SelectivityVector& rows, + std::vector& args, + const TypePtr& type, + exec::EvalCtx& context, + VectorPtr& output) const override { + BaseVector::ensureWritable(rows, type, context.pool(), output); + + int64_t vecSizeLarge = 0; + if (args.size() == 2) { + // an optional parameter can be passed to enable the GPU for mat_mul + vecSizeLarge = args[1]->as>()->valueAt(0); + } + int vecSize = static_cast(vecSizeLarge); + + std::vector> results; + + for (int i = 0; i < rows.size(); i++) { + std::vector vec; + + for (int j = 0; j < vecSize; j++) { + if (j % 2 == 0) + vec.push_back(1.0); + else + vec.push_back(0.0); + } + results.push_back(vec); + } + + VectorMaker maker{context.pool()}; + output = maker.arrayVector(results, REAL()); + } + + static std::vector> signatures() { + return {exec::FunctionSignatureBuilder() + .argumentType("array(REAL)") + .argumentType("BIGINT") + .returnType("array(REAL)") + .build()}; + } + + static std::string getName() { + return "get_feature_vec"; + }; + + float* getTensor() const override { + // TODO: need to implement + return nullptr; + } + + CostEstimate getCost(std::vector inputDims) { + // TODO: need to implement + return CostEstimate(0, inputDims[0], inputDims[1]); + } + +}; + + + +class Job2WayJoin : HiveConnectorTestBase { + public: + Job2WayJoin() { + // Register Presto scalar functions. + functions::prestosql::registerAllScalarFunctions(); + // Register Presto aggregate functions. + aggregate::prestosql::registerAllAggregateFunctions(); + // Register type resolver with DuckDB SQL parser. + parse::registerTypeResolver(); + // HiveConnectorTestBase::SetUp(); + // parquet::registerParquetReaderFactory(); + + auto hiveConnector = + connector::getConnectorFactory( + connector::hive::HiveConnectorFactory::kHiveConnectorName) + ->newConnector( + kHiveConnectorId, std::make_shared()); + connector::registerConnector(hiveConnector); + + // SetUp(); + } + + ~Job2WayJoin() {} + + void SetUp() override { + // TODO: not used for now + // HiveConnectorTestBase::SetUp(); + // parquet::registerParquetReaderFactory(); + } + + void TearDown() override { + HiveConnectorTestBase::TearDown(); + } + + void TestBody() override {} + + static void waitForFinishedDrivers(const std::shared_ptr& task) { + while (!task->isFinished()) { + usleep(1000); // 0.01 second. + } + } + + std::shared_ptr executor_{ + std::make_shared( + std::thread::hardware_concurrency())}; + + std::shared_ptr queryCtx_{ + std::make_shared(executor_.get())}; + + std::shared_ptr pool_{ + memory::MemoryManager::getInstance()->addLeafPool()}; + + VectorMaker maker{pool_.get()}; + + std::unordered_map tableName2RowVector; + std::unordered_map>> operatorParam2Weights; + std::unordered_map> tabel2Columns; + std::vector modelOperators; + + // Function to check if a string represents a valid integer + bool isInteger(const std::string& s) { + if (s.empty() || (s.size() > 1 && s[0] == '0')) { + return false; // prevent leading zeros for non-zero integers + } + + for (char c : s) { + if (!std::isdigit(c)) { + return false; + } + } + + return true; + } + + + int getStringIndex(const std::vector& strVec, const std::string& target) { + auto it = std::find(strVec.begin(), strVec.end(), target); + + if (it != strVec.end()) { + // Calculate the index + int index = std::distance(strVec.begin(), it); + return index; + } else { + return -1; + } + + } + + + + std::vector extractOperatorsInReverse(const std::string& input) { + std::vector operators; + std::regex operatorPattern(R"(([a-zA-Z_]+\d+)\()"); // Match patterns like mat_mul1(, mat_add2(, etc. + std::smatch match; + + std::string::const_iterator searchStart(input.cbegin()); + while (std::regex_search(searchStart, input.cend(), match, operatorPattern)) { + operators.push_back(match[1]); // Extract the operator name + searchStart = match.suffix().first; // Move the search start position + } + + // Reverse the order of operators to match actual execution order + std::reverse(operators.begin(), operators.end()); + return operators; + } + + + + + std::vector> loadHDF5Array(const std::string& filename, const std::string& datasetName, int doPrint) { + H5::H5File file(filename, H5F_ACC_RDONLY); + H5::DataSet dataset = file.openDataSet(datasetName); + H5::DataSpace dataspace = dataset.getSpace(); + + // Get the number of dimensions + int rank = dataspace.getSimpleExtentNdims(); + // std::cout << "Rank: " << rank << std::endl; + + // Allocate space for the dimensions + std::vector dims(rank); + + // Get the dataset dimensions + dataspace.getSimpleExtentDims(dims.data(), nullptr); + + size_t rows; + size_t cols; + + if (rank == 1) { + rows = dims[0]; + cols = 1; + } + else if (rank == 2) { + rows = dims[0]; + cols = dims[1]; + } else { + throw std::runtime_error("Unsupported rank: " + std::to_string(rank)); + } + + // Read data into a 1D vector + std::vector flatData(rows * cols); + dataset.read(flatData.data(), H5::PredType::NATIVE_FLOAT); + + // Convert to 2D vector + std::vector> result(rows, std::vector(cols)); + for (size_t i = 0; i < rows; ++i) { + for (size_t j = 0; j < cols; ++j) { + result[i][j] = flatData[i * cols + j]; + if (doPrint == 1) + std::cout << result[i][j] << ", "; + } + if (doPrint == 1) + std::cout << std::endl; + } + + // Close the dataset and file + dataset.close(); + file.close(); + + return result; + } + + + + void findWeights(const std::string& modelPath) { + // read the parameter weights from file + std::vector> w1 = loadHDF5Array(modelPath, "fc1.weight", 0); + std::vector> b1 = loadHDF5Array(modelPath, "fc1.bias", 0); + std::vector> w2 = loadHDF5Array(modelPath, "fc2.weight", 0); + std::vector> b2 = loadHDF5Array(modelPath, "fc2.bias", 0); + std::vector> w3 = loadHDF5Array(modelPath, "fc3.weight", 0); + std::vector> b3 = loadHDF5Array(modelPath, "fc3.bias", 0); + + // store the weights in map with same name as operator + operatorParam2Weights["mat_mul1"] = w1; + operatorParam2Weights["mat_add1"] = b1; + operatorParam2Weights["mat_mul2"] = w2; + operatorParam2Weights["mat_add2"] = b2; + operatorParam2Weights["mat_mul3"] = w3; + operatorParam2Weights["mat_add3"] = b3; + + std::cout << "Shape of mat_mul1 weight: " << w1.size() << ", " << w1[0].size() << std::endl; + std::cout << "Shape of mat_add1 weight: " << b1.size() << std::endl; + std::cout << "Shape of mat_mul2 weight: " << w2.size() << ", " << w2[0].size() << std::endl; + std::cout << "Shape of mat_add2 weight: " << b2.size() << std::endl; + std::cout << "Shape of mat_mul3 weight: " << w3.size() << ", " << w3[0].size() << std::endl; + std::cout << "Shape of mat_add3 weight: " << b3.size() << std::endl; + } + + + std::vector> extractSubweight(const std::vector>& matrix, int start, int n) { + std::vector> result; + + std::cout << "Extracting subweight" << std::endl; + std::cout << start << ", " << n << std::endl; + + // Ensure that the range [start, start + n) is within bounds + //int end = std::min(start + n, matrix.size()); + int end = start + n; + for (int i = start; i < end; ++i) { + result.push_back(matrix[i]); // Copy rows within the range + } + + return result; + } + + + RowVectorPtr getTableFromCSVFile( + VectorMaker& maker, + std::string csvFilePath, + std::string tableName, + int k) { + std::ifstream file(csvFilePath.c_str()); + if (file.fail()) { + std::cerr << "Error in reading data file:" << csvFilePath << std::endl; + exit(1); + } + + std::cout << tableName << std::endl; + + std::unordered_map> colName2colHeader; + + std::unordered_map> colName2colType; + + colName2colType["aka_name"] = {0, 0, 1, 1, 1, 1, 1, 1}; + colName2colType["aka_title"] = {0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1}; + colName2colType["cast_info"] = {0, 0, 0, 0, 1, 0, 0}; + colName2colType["char_name"] = {0, 1, 1, 0, 1, 1, 1}; + colName2colType["comp_cast_type"] = {0, 1}; + colName2colType["company_name"] = {0, 1, 1, 0, 1, 1, 1}; + colName2colType["company_type"] = {0, 1}; + colName2colType["complete_cast"] = {0, 0, 0, 0}; + colName2colType["info_type"] = {0, 1}; + colName2colType["keyword"] = {0, 1, 1}; + colName2colType["kind_type"] = {0, 1}; + colName2colType["link_type"] = {0, 1}; + colName2colType["movie_companies"] = {0, 0, 0, 0, 1}; + colName2colType["movie_info_idx"] = {0, 0, 0, 1, 1}; + colName2colType["movie_keyword"] = {0, 0, 0}; + colName2colType["movie_link"] = {0, 0, 0, 0}; + colName2colType["name"] = {0, 1, 1, 0, 1, 1, 1, 1, 1}; + colName2colType["role_type"] = {0, 1}; + colName2colType["title"] = {0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1}; + colName2colType["movie_info"] = {0, 0, 0, 1, 1}; + colName2colType["person_info"] = {0, 0, 0, 1, 1}; + + std::vector aka_name_columns = { + "id", + "person_id", + "name", + "imdb_index", + "name_pcode_cf", + "name_pcode_ndf", + "surname_pcode", + "md5sum", + "an_features"}; + + colName2colHeader["aka_name"] = aka_name_columns; + + std::vector aka_title_columns = { + "id", + "movie_id", + "title", + "imdb_index", + "kind_id", + "production_year", + "phonetic_code", + "episode_of_id", + "season_nr", + "episode_nr", + "note", + "md5sum", + "at_features"}; + + colName2colHeader["aka_title"] = aka_title_columns; + + std::vector cast_info_columns = { + "id", + "person_id", + "movie_id", + "person_role_id", + "note", + "nr_order", + "role_id", + "ci_features"}; + + colName2colHeader["cast_info"] = cast_info_columns; + + std::vector char_name_columns = { + "id", + "name", + "imdb_index", + "imdb_id", + "name_pcode_nf", + "surname_pcode", + "md5sum", + "chn_features"}; + + colName2colHeader["char_name"] = char_name_columns; + + std::vector comp_cast_type_columns = { + "id", "kind", "cct_features"}; + + colName2colHeader["comp_cast_type"] = comp_cast_type_columns; + + std::vector company_name_columns = { + "id", + "name", + "country_code", + "imdb_id", + "name_pcode_nf", + "name_pcode_sf", + "md5sum", + "cn_features"}; + + colName2colHeader["company_name"] = company_name_columns; + + std::vector company_type_columns = { + "id", "kind", "ct_features"}; + + colName2colHeader["company_type"] = company_type_columns; + + std::vector complete_cast_columns = { + "id", "movie_id", "subject_id", "status_id", "cc_features"}; + + colName2colHeader["complete_cast"] = complete_cast_columns; + + std::vector info_type_columns = {"id", "info", "it_features"}; + + colName2colHeader["info_type"] = info_type_columns; + + std::vector keyword_columns = { + "id", "keyword", "phonetic_code", "k_features"}; + + colName2colHeader["keyword"] = keyword_columns; + + std::vector kind_type_columns = {"id", "kind", "kt_features"}; + + colName2colHeader["kind_type"] = kind_type_columns; + + std::vector link_type_columns = {"id", "link", "lt_features"}; + + colName2colHeader["link_type"] = link_type_columns; + + std::vector movie_companies_columns = { + "id", + "movie_id", + "company_id", + "company_type_id", + "note", + "mc_features"}; + + colName2colHeader["movie_companies"] = movie_companies_columns; + + std::vector movie_info_idx_columns = { + "id", "movie_id", "info_type_id", "info", "note", "mii_features"}; + + colName2colHeader["movie_info_idx"] = movie_info_idx_columns; + + std::vector movie_keyword_columns = { + "id", "movie_id", "keyword_id", "mk_features"}; + + colName2colHeader["movie_keyword"] = movie_keyword_columns; + + std::vector movie_link_columns = { + "id", "movie_id", "linked_movie_id", "link_type_id", "ml_features"}; + + colName2colHeader["movie_link"] = movie_link_columns; + + std::vector name_columns = { + "id", + "name", + "imdb_index", + "imdb_id", + "gender", + "name_pcode_cf", + "name_pcode_nf", + "surname_pcode", + "md5sum", + "n_features"}; + + colName2colHeader["name"] = name_columns; + + std::vector role_type_columns = {"id", "role", "rt_features"}; + + colName2colHeader["role_type"] = role_type_columns; + + std::vector title_columns = { + "id", + "title", + "imdb_index", + "kind_id", + "production_year", + "imdb_id", + "phonetic_code", + "episode_of_id", + "season_nr", + "episode_nr", + "series_years", + "md5sum", + "t_features"}; + + colName2colHeader["title"] = title_columns; + + std::vector movie_info_columns = { + "id", "movie_id", "info_type_id", "info", "note", "mi_features"}; + + colName2colHeader["movie_info"] = movie_info_columns; + + std::vector person_info_columns = { + "id", "person_id", "info_type_id", "info", "note", "pi_features"}; + + colName2colHeader["person_info"] = person_info_columns; + + std::string line; + + std::vector> intCols; + + std::vector> stringCols; + + std::vector colTypeIndex = colName2colType[tableName]; + + std::vector colIndexInType; + + int colIndex = 0; + + std::string cell; + + int numRows = 0; + + while (std::getline(file, line)) { + // std::cout << line << std::endl; + + // analyze the first line + std::stringstream iss(line); + + bool fragmentFlag = false; + + std::string fragmentedStr; + + colIndex = 0; + + // The JOB tables only have two types of columns: integer and string + while (std::getline(iss, cell, ',')) { + if ((fragmentFlag == false) && (cell.size() == 1) && (cell[0] == '"')) { + fragmentFlag = true; + + fragmentedStr = ","; + + continue; + + } else if ( + (fragmentFlag == true) && (cell.size() == 1) && (cell[0] == '"')) { + fragmentFlag = false; + + cell = fragmentedStr; + + fragmentedStr = ""; + + } else if ( + (fragmentFlag == false) && (cell[0] == '"') && + ((cell[cell.size() - 1] != '"') || + ((cell[cell.size() - 1] == '"') && + (cell[cell.size() - 2] == '\\')))) { + fragmentFlag = true; + + fragmentedStr = cell; + + continue; + + } else if ( + (fragmentFlag == true) && (cell[0] != '"') && + (cell[cell.size() - 1] == '"') && (cell[cell.size() - 2] != '\\')) { + fragmentFlag = false; + + fragmentedStr += cell; + + cell = fragmentedStr; + + fragmentedStr = ""; + + } else if (fragmentFlag == true) { + fragmentedStr += cell; + + continue; + } + + // std::cout << colIndex << ":" << cell << std::endl; + + if (!fragmentFlag) { + if (!colTypeIndex[colIndex]) { + // this is an integer column + + if (numRows == 0) { + if (cell == "") + + intCols.push_back(std::vector{INT_MIN}); + + else + + intCols.push_back(std::vector{stoi(cell)}); + + colIndexInType.push_back(intCols.size() - 1); + + } else { + int vecIndex = colIndexInType[colIndex]; + + if (cell == "") + + intCols[vecIndex].push_back(INT_MIN); + + else + + intCols[vecIndex].push_back(stoi(cell)); + } + + } else { + // this is a string column + + if (numRows == 0) { + stringCols.push_back(std::vector{cell}); + + colIndexInType.push_back(stringCols.size() - 1); + + } else { + int vecIndex = colIndexInType[colIndex]; + + stringCols[vecIndex].push_back(cell); + } + } + + colIndex++; + } + } + + if (colIndex < colTypeIndex.size()) { + // std::cout << "colIndex:" << colIndex << std::endl; + // std::cout << "colTypeIndex.size():"<< colTypeIndex.size() << + // std::endl; + + for (int i = colIndex; i < colTypeIndex.size(); i++) { + if (!colTypeIndex[i]) { + if (numRows == 0) { + intCols.push_back(std::vector{INT_MIN}); + + colIndexInType.push_back(intCols.size() - 1); + + } else { + int vecIndex = colIndexInType[i]; + + intCols[vecIndex].push_back(INT_MIN); + } + + } else { + if (numRows == 0) { + stringCols.push_back(std::vector{""}); + + colIndexInType.push_back(stringCols.size() - 1); + + } else { + int vecIndex = colIndexInType[i]; + + stringCols[vecIndex].push_back(""); + } + } + } + } + + colIndex = colTypeIndex.size(); + + /*if (numRows == 0) { + + for (int i = 0; i < colIndex; i++) { + + std::cout << colTypeIndex[i] << ":" << colIndexInType[i] << + std::endl; + + } + + }*/ + + numRows++; + } + + std::vector vecs; + + std::cout << "Building RowVector for this table with " << colIndex + << " columns and " << numRows << " rows." << std::endl; + + for (int i = 0; i < colIndex; i++) { + int type = colTypeIndex[i]; + + int vecIndex = colIndexInType[i]; + + // std::cout << i << ":" << type << ":" << vecIndex << std::endl; + + if (!type) { + auto vec = maker.flatVector(intCols[vecIndex]); + + vecs.push_back(vec); + + } else { + auto vec = maker.flatVector(stringCols[vecIndex]); + + vecs.push_back(vec); + } + } + + // to create the last column, which is a feature vector of length k + + if (k < 0) + k = 8; + + std::vector> inputVectors; + + for (int i = 0; i < numRows; i++) { + std::vector inputVector; + + for (int j = 0; j < k; j++) { + if (j % 2 == 0) + + inputVector.push_back(1.0); + + else + + inputVector.push_back(0.0); + } + + inputVectors.push_back(inputVector); + } + + auto inputArrayVector = maker.arrayVector(inputVectors, REAL()); + + vecs.push_back(inputArrayVector); + + RowVectorPtr myRowVector = + maker.rowVector(colName2colHeader[tableName], vecs); + + return myRowVector; + } + + int sampleQuery() { + return 29; + } + + int sampleModel() { + return 0; + } + + void sampleNNModelArch(int numInputFeatures, NNModelContext& nn) { + nn.inputFeatures = numInputFeatures; + nn.numLayers = 3; + nn.hiddenLayerNeurons = 16; + nn.outputLayerNeurons = 2; + } + + void sampleDTModelArch(int numInputFeatures, DTModelContext& dt) { + dt.inputFeatures = numInputFeatures; + dt.treeDepth = 8; + } + + bool replace(std::string& str, const std::string& from, const std::string& to) { + size_t start_pos = str.find(from); + if (start_pos == std::string::npos) + return false; + str.replace(start_pos, from.length(), to); + return true; + } + + + void registerNNFunction(const std::string& op_name, const std::vector>& weightMatrice, int dim1, int dim2) { + + if (op_name.find("mat_mul") != std::string::npos) { + auto nnWeightVector = maker.arrayVector(weightMatrice, REAL()); + exec::registerVectorFunction( + op_name, + MatrixMultiply::signatures(), + std::make_unique( + nnWeightVector->elements()->values()->asMutable(), dim1, dim2) + ); + std::cout << "Registered a mat_mul function of name " << op_name << " with dimension " << dim1 << ", " << dim2 << endl; + } + + else if (op_name.find("mat_add") != std::string::npos) { + auto nnWeightVector = maker.arrayVector(weightMatrice, REAL()); + exec::registerVectorFunction( + op_name, + MatrixVectorAddition::signatures(), + std::make_unique( + nnWeightVector->elements()->values()->asMutable(), dim1) + ); + std::cout << "Registered a mat_add function of name " << op_name << " with dimension " << dim1 << endl; + } + + else if (op_name.find("relu") != std::string::npos) { + exec::registerVectorFunction( + op_name, Relu::signatures(), std::make_unique(), + {}, + true); + std::cout << "Registered a relu function of name " << op_name << endl; + } + + else if (op_name.find("softmax") != std::string::npos) { + exec::registerVectorFunction( + op_name, Softmax::signatures(), std::make_unique()); + std::cout << "Registered a softmax function of name " << op_name << endl; + } + + else if (op_name.find("vector_addition") != std::string::npos) { + exec::registerVectorFunction( + op_name, + VectorAddition::signatures(), + std::make_unique(dim1) + ); + std::cout << "Registered a vector_addition function of name " << op_name << " with dimension " << dim1 << endl; + } + + +} + + + + + std::unordered_map getAllTableSources(std::shared_ptr planNodeIdGenerator) { + std::unordered_map + sources; // with filters and projections pushed down; + + auto an_a = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector["aka_name"]}) + .project({"person_id as an_person_id", "name as an_name", "imdb_index as an_imdb_index", "an_features"}); + tabel2Columns["an"] = {"an_person_id", "an_name", "an_imdb_index", "an_features"}; + sources["an"] = an_a; + + auto at_a = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector["aka_title"]}) + .project({"id as at_id", "movie_id as at_movie_id", "title as at_title", "imdb_index as at_imdb_index", "kind_id as at_kind_id", "at_features"}); + tabel2Columns["at"] = {"at_id", "at_movie_id", "at_title", "at_imdb_index", "at_kind_id", "at_features"}; + sources["at"] = at_a; + + auto ci_a = + PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector["cast_info"]}) + .project( + {"movie_id as ci_movie_id", + "person_id as ci_person_id", + "role_id as ci_role_id", + "person_role_id as ci_person_role_id", + "ci_features"}) + .limit(0, 3624434, false); + tabel2Columns["ci"] = {"ci_movie_id", + "ci_person_id", + "ci_role_id", + "ci_person_role_id", + "ci_features"}; + sources["ci"] = ci_a; + + auto chn_a = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector["char_name"]}) + .project({"id as chn_id", "name as chn_name", "imdb_index as chn_imdb_index", "imdb_id as chn_imdb_id", "chn_features"}); + tabel2Columns["chn"] = {"chn_id", "chn_name", "chn_imdb_index", "chn_imdb_id", "chn_features"}; + sources["chn"] = chn_a; + + auto cc_a = + PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector["complete_cast"]}) + .project({"id as cc_id", "movie_id as cc_movie_id", "subject_id as cc_subject_id", "status_id as cc_status_id", "cc_features"}); + tabel2Columns["cc"] = {"cc_id", "cc_movie_id", "cc_subject_id", "cc_status_id", "cc_features"}; + sources["cc"] = cc_a; + + auto cct_a = + PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector["comp_cast_type"]}) + .project({"id as cct_id", "kind as cct_kind", "cct_features"}); + tabel2Columns["cct"] = {"cct_id", "cct_kind", "cct_features"}; + sources["cct"] = cct_a; + + + auto cn_a = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector["company_name"]}) + .project({"id as cn_id", "name as cn_name", "country_code as cn_country_code", "imdb_id as cn_imdb_id", "cn_features"}); + tabel2Columns["cn"] = {"cn_id", "cn_name", "cn_country_code", "cn_imdb_id", "cn_features"}; + sources["cn"] = cn_a; + + auto ct_a = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector["company_type"]}) + .project({"id as ct_id", "kind as ct_kind", "ct_features"}); + tabel2Columns["ct"] = {"ct_id", "ct_kind", "ct_features"}; + sources["ct"] = ct_a; + + auto it_a = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector["info_type"]}) + .project({"id as it_id", "info as it_info", "it_features"}); + tabel2Columns["it"] = {"it_id", "it_info", "it_features"}; + sources["it"] = it_a; + + auto kt_a = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector["kind_type"]}) + .project({"id as kt_id", "kind as kt_kind", "kt_features"}); + tabel2Columns["kt"] = {"kt_id", "kt_kind", "kt_features"}; + sources["kt"] = kt_a; + + auto lt_a = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector["link_type"]}) + .project({"id as lt_id", "link as lt_link", "lt_features"}); + tabel2Columns["lt"] = {"lt_id", "lt_link", "lt_features"}; + sources["lt"] = lt_a; + + auto k_a = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector["keyword"]}) + .project({"id as k_id", "keyword as k_keyword", "k_features"}); + tabel2Columns["k"] = {"k_id", "k_keyword", "k_features"}; + sources["k"] = k_a; + + auto mc_a = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector["movie_companies"]}) + .project({"id as mc_id", "movie_id as mc_movie_id", "company_id as mc_company_id", "company_type_id as mc_company_type_id", "mc_features"}); + tabel2Columns["mc"] = {"mc_id", "mc_movie_id", "mc_company_id", "mc_company_type_id", "mc_features"}; + sources["mc"] = mc_a; + + auto mi_a = + PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector["movie_info"]}) + .project({"id as mi_id", "movie_id as mi_movie_id", "info_type_id as mi_info_type_id", "info as mi_info", "mi_features"}); + tabel2Columns["mi"] = {"mi_id", "mi_movie_id", "mi_info_type_id", "mi_info", "mi_features"}; + sources["mi"] = mi_a; + + auto mii_a = + PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector["movie_info_idx"]}) + .project({"id as mii_id", "movie_id as mii_movie_id", "info_type_id as mii_info_type_id", "info as mii_info", "mii_features"}); + tabel2Columns["mii"] = {"mii_id", "mii_movie_id", "mii_info_type_id", "mii_info", "mii_features"}; + sources["mii"] = mii_a; + + auto mk_a = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector["movie_keyword"]}) + .project({"id as mk_id", "movie_id as mk_movie_id", "keyword_id as mk_keyword_id", "mk_features"}); + tabel2Columns["mk"] = {"mk_id", "mk_movie_id", "mk_keyword_id", "mk_features"}; + sources["mk"] = mk_a; + + auto ml_a = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector["movie_link"]}) + .project({"id as ml_id", "movie_id as ml_movie_id", "linked_movie_id as ml_linked_movie_id", "link_type_id as ml_link_type_id", "ml_features"}); + tabel2Columns["ml"] = {"ml_id", "ml_movie_id", "ml_linked_movie_id", "ml_link_type_id", "ml_features"}; + sources["ml"] = ml_a; + + auto n_a = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector["name"]}) + .project({"id as n_id", "name as n_name", "imdb_index as n_imdb_index", "imdb_id as n_imdb_id", "n_features"}); + tabel2Columns["n"] = {"n_id", "n_name", "n_imdb_index", "n_imdb_id", "n_features"}; + sources["n"] = n_a; + + auto pi_a = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector["person_info"]}) + .project( + {"id as pi_id", + "person_id as pi_person_id", + "info_type_id as pi_info_type_id", + "pi_features"}); + tabel2Columns["pi"] = {"pi_id", "pi_person_id", "pi_info_type_id", "pi_features"}; + sources["pi"] = pi_a; + + auto rt_a = PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector["role_type"]}) + .project({"id as rt_id", "role as rt_role", "rt_features"}); + tabel2Columns["rt"] = {"rt_id", "rt_role", "rt_features"}; + sources["rt"] = rt_a; + + auto t_a = + PlanBuilder(planNodeIdGenerator, pool_.get()) + .values({tableName2RowVector["title"]}) + .project({"id as t_id", "title as t_title", "imdb_index as t_imdb_index", "kind_id as t_kind_id", "imdb_id as t_imdb_id", "t_features"}); + tabel2Columns["t"] = {"t_id", "t_title", "t_imdb_index", "t_kind_id", "t_imdb_id", "t_features"}; + sources["t"] = t_a; + + return sources; + } + + + +bool addModelInferenceToQueryPlanAfterFactorize(PlanBuilder & planBuilder, std::shared_ptr planNodeIdGenerator, const std::string& colFeature) { + + std::string modelProjString = ""; + std::vector> emptyMatrix; + for (int i = modelOperators.size() - 1; i > 0; i--) { + std::string opName = modelOperators[i]; + if (opName.find("mat_mul") != std::string::npos) { + std::vector> param = operatorParam2Weights[opName]; + int dim1 = param.size(); + int dim2 = param[0].size(); + registerNNFunction(opName, param, dim1, dim2); + } + else if (opName.find("mat_add") != std::string::npos) { + std::vector> param = operatorParam2Weights[opName]; + int dim1 = param.size(); + registerNNFunction(opName, param, dim1, -1); + } + else { + registerNNFunction(opName, emptyMatrix, -1, -1); + } + modelProjString += opName + "("; + } + modelProjString += colFeature; + + for (int i = modelOperators.size() - 1; i > 0; i--) { + modelProjString += ")"; + } + modelProjString += " AS output"; + + std::cout << "Inference Part: " << modelProjString << endl; + planBuilder.project({modelProjString}); + + return true; +} + + + +/* +param pushDown: 0 -> no push, 1 -> all push, 2 -> left push, 3 -> right push +*/ +bool createAndExecuteQuery(std::string leftTable, std::string rightTable, std::string probKey, std::string buildKey, int dimLeft, int dimRight, int pushDown) { + std::string firstOpName = modelOperators[0]; // name of operator of split layer + std::vector> firstWeight = operatorParam2Weights[firstOpName]; // weight of the split layer + int numCols = firstWeight.size(); // number of columns in split layer + int numNeurons = firstWeight[0].size(); // number of neurons in split layer + + //registering vector addition operator for later use as aggregation + std::vector> emptyMatrix; + registerNNFunction("vector_addition", emptyMatrix, numNeurons, -1); + + PlanBuilder planBuilder{pool_.get()}; + std::shared_ptr planNodeIdGenerator = std::make_shared(); + + // Fetch plan builders for each plan + std::unordered_map sources = getAllTableSources(planNodeIdGenerator); + + // fetch plan builders corresponding to left and right table + PlanBuilder leftPlan = sources[leftTable]; + PlanBuilder rightPlan = sources[rightTable]; + + // form the left and right feature columns + std::string lFeatureName = leftTable + "_features"; + std::string rFeatureName = rightTable + "_features"; + + std::vector projections; + projections.push_back(probKey); + projections.push_back(buildKey); + + std::string fNewName; //new name after applying operator on feature + std::string fNewNameFull; // fNewName with its full projection details + + // checking if push left feature + if (pushDown == 1 || pushDown == 2) { + std::vector> subWeight = extractSubweight(firstWeight, 0, dimLeft); + std::string newOpName = firstOpName + "_left"; + registerNNFunction(newOpName, subWeight, dimLeft, numNeurons); + fNewName = "factorized_left"; + fNewNameFull = newOpName + "(get_feature_vec(" + lFeatureName + ", " + std::to_string(dimLeft) + ")) AS " + fNewName; + } + else { + fNewName = "mapped_left"; + fNewNameFull = "get_feature_vec(" + lFeatureName + ", " + std::to_string(dimLeft) + ") AS " + fNewName; + } + projections.push_back(fNewName); + leftPlan= leftPlan.project({probKey, fNewNameFull}); + + // checking if push right feature + if (pushDown == 1 || pushDown == 3) { + std::vector> subWeight = extractSubweight(firstWeight, dimLeft, dimRight); + std::string newOpName = firstOpName + "_right"; + registerNNFunction(newOpName, subWeight, dimRight, numNeurons); + fNewName = "factorized_right"; + fNewNameFull = newOpName + "(get_feature_vec(" + rFeatureName + ", " + std::to_string(dimRight) + ")) AS " + fNewName; + } + else { + fNewName = "mapped_right"; + fNewNameFull = "get_feature_vec(" + rFeatureName + ", " + std::to_string(dimRight) + ") AS " + fNewName; + } + projections.push_back(fNewName); + rightPlan= rightPlan.project({buildKey, fNewNameFull}); + + // Perform the join + PlanBuilder out = leftPlan.hashJoin( + {probKey}, + {buildKey}, + rightPlan.planNode(), + "", + {projections} + ); + + // Apply operators after join + std::string projString; + + if (pushDown == 0) { + // no features pushed, so concatenate them and apply first operator in the model + registerNNFunction(firstOpName, firstWeight, dimLeft + dimRight, numNeurons); + projString = firstOpName + "(concat(mapped_left, mapped_right)) AS features"; + } + else if (pushDown == 1) { + // all features pushed, so just perform aggregation + projString = "vector_addition(factorized_left, factorized_right) AS features"; + } + else if (pushDown == 2) { + // only left features pushed, so apply first operator on right feature and perform aggregation + std::vector> subWeight = extractSubweight(firstWeight, dimLeft, dimRight); + std::string newOpName = firstOpName + "_right"; + registerNNFunction(newOpName, subWeight, dimRight, numNeurons); + std::string fNewNameFull = newOpName + "(mapped_right)"; + + projString = "vector_addition(factorized_left, " + fNewNameFull + ") AS features"; + } + + else { + // only right features pushed, so apply first operator on left feature and perform aggregation + std::vector> subWeight = extractSubweight(firstWeight, 0, dimLeft); + std::string newOpName = firstOpName + "_left"; + registerNNFunction(newOpName, subWeight, dimLeft, numNeurons); + std::string fNewNameFull = newOpName + "(mapped_left)"; + + projString = "vector_addition(" + fNewNameFull + ", factorized_right) AS features"; + } + + planBuilder = out.project({projString}); + addModelInferenceToQueryPlanAfterFactorize(planBuilder, planNodeIdGenerator, "features"); + + auto myPlan = planBuilder.planNode(); + std::cout << myPlan->toString(true, true) << std::endl; + std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); + auto results = exec::test::AssertQueryBuilder(myPlan).copyResults(pool_.get()); + std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); + std::cout << "Time (sec) = " << (std::chrono::duration_cast(end - begin).count()) /1000000.0 << std::endl; + std::cout << "Results Size: " << results->size() << std::endl; + std::cout << "Results:" << results->toString(0, 5) << std::endl; + return true; + } + +}; + + +int main(int argc, char** argv) { + setlocale(LC_TIME, "C"); + malloc_trim(0); + + folly::init(&argc, &argv, false); + memory::MemoryManager::initialize({}); + + Job2WayJoin bench; + std::cout + << "[WARNING] the data path is hardcoded and needs to be modified accordingly." + << std::endl; + + // Load Aka Name table + RowVectorPtr akaNameVec = bench.getTableFromCSVFile( + bench.maker, "resources/data/imdb/aka_name.csv", "aka_name", 8); + + // Load Aka Title table + RowVectorPtr akaTitleVec = bench.getTableFromCSVFile( + bench.maker, "resources/data/imdb/aka_title.csv", "aka_title", 8); + + // Load Cast Info table + RowVectorPtr castInfoVec = bench.getTableFromCSVFile( + bench.maker, "resources/data/imdb/cast_info.csv", "cast_info", 8); + + // Load Char Name table + RowVectorPtr charNameVec = bench.getTableFromCSVFile( + bench.maker, "resources/data/imdb/char_name.csv", "char_name", 8); + + // Load Comp Cast Type table + RowVectorPtr compCastTypeVec = bench.getTableFromCSVFile( + bench.maker, "resources/data/imdb/comp_cast_type.csv", "comp_cast_type", 8); + + // Load Company Name table + RowVectorPtr companyNameVec = bench.getTableFromCSVFile( + bench.maker, "resources/data/imdb/company_name.csv", "company_name", 8); + + // Load Company Type table + RowVectorPtr companyTypeVec = bench.getTableFromCSVFile( + bench.maker, "resources/data/imdb/company_type.csv", "company_type", 8); + + // Load Complete Cast table + RowVectorPtr completeCastVec = bench.getTableFromCSVFile( + bench.maker, "resources/data/imdb/complete_cast.csv", "complete_cast", 8); + + // Load Info Type table + RowVectorPtr infoTypeVec = bench.getTableFromCSVFile( + bench.maker, "resources/data/imdb/info_type.csv", "info_type", 8); + + // Load Keyword table + RowVectorPtr keywordVec = bench.getTableFromCSVFile( + bench.maker, "resources/data/imdb/keyword.csv", "keyword", 8); + + // Load Kind Type table + RowVectorPtr kindTypeVec = bench.getTableFromCSVFile( + bench.maker, "resources/data/imdb/kind_type.csv", "kind_type", 8); + + // Load Link Type table + RowVectorPtr linkTypeVec = bench.getTableFromCSVFile( + bench.maker, "resources/data/imdb/link_type.csv", "link_type", 8); + + // Load Movie Companies table + RowVectorPtr movieCompaniesVec = bench.getTableFromCSVFile( + bench.maker, "resources/data/imdb/movie_companies.csv", "movie_companies", 8); + + // Load Movie Info Index table + RowVectorPtr movieInfoIdxVec = bench.getTableFromCSVFile( + bench.maker, "resources/data/imdb/movie_info_idx.csv", "movie_info_idx", 8); + + // Load Movie Keyword table + RowVectorPtr movieKeywordVec = bench.getTableFromCSVFile( + bench.maker, "resources/data/imdb/movie_keyword.csv", "movie_keyword", 8); + + // Load Movie Link table + RowVectorPtr movieLinkVec = bench.getTableFromCSVFile( + bench.maker, "resources/data/imdb/movie_link.csv", "movie_link", 8); + + // Load Name table + RowVectorPtr nameVec = bench.getTableFromCSVFile( + bench.maker, "resources/data/imdb/name.csv", "name", 8); + + // Load Role Type table + RowVectorPtr roleTypeVec = bench.getTableFromCSVFile( + bench.maker, "resources/data/imdb/role_type.csv", "role_type", 8); + + // Load Title table + RowVectorPtr titleVec = bench.getTableFromCSVFile( + bench.maker, "resources/data/imdb/title.csv", "title", 8); + + // Load Movie Info table + RowVectorPtr movieInfoVec = bench.getTableFromCSVFile( + bench.maker, "resources/data/imdb/movie_info.csv", "movie_info", 8); + + // Load Person Info table + RowVectorPtr personInfoVec = bench.getTableFromCSVFile( + bench.maker, "resources/data/imdb//person_info.csv", "person_info", 8); + + bench.tableName2RowVector["aka_name"] = akaNameVec; + + bench.tableName2RowVector["aka_title"] = akaTitleVec; + + bench.tableName2RowVector["cast_info"] = castInfoVec; + + bench.tableName2RowVector["char_name"] = charNameVec; + + bench.tableName2RowVector["comp_cast_type"] = compCastTypeVec; + + bench.tableName2RowVector["company_name"] = companyNameVec; + + bench.tableName2RowVector["company_type"] = companyTypeVec; + + bench.tableName2RowVector["complete_cast"] = completeCastVec; + + bench.tableName2RowVector["info_type"] = infoTypeVec; + + bench.tableName2RowVector["keyword"] = keywordVec; + + bench.tableName2RowVector["kind_type"] = kindTypeVec; + + bench.tableName2RowVector["link_type"] = linkTypeVec; + + bench.tableName2RowVector["movie_companies"] = movieCompaniesVec; + + bench.tableName2RowVector["movie_info_idx"] = movieInfoIdxVec; + + bench.tableName2RowVector["movie_keyword"] = movieKeywordVec; + + bench.tableName2RowVector["movie_link"] = movieLinkVec; + + bench.tableName2RowVector["name"] = nameVec; + + bench.tableName2RowVector["role_type"] = roleTypeVec; + + bench.tableName2RowVector["title"] = titleVec; + + bench.tableName2RowVector["movie_info"] = movieInfoVec; + + bench.tableName2RowVector["person_info"] = personInfoVec; + + + exec::registerVectorFunction( + "get_feature_vec", + GetFeatureVec::signatures(), + std::make_unique()); + std::cout << "Completed registering function for get_feature_vec" << std::endl; + + // retrieve the weights and set to map + std::cout << "Reading model parameters" << std::endl; + bench.findWeights("resources/model/job_any_64.h5"); + + // retrieve the model operators from model expression IR + std::string modelInput = "softmax3(mat_add3(mat_mul3(relu2(mat_add2(mat_mul2(relu1(mat_add1(mat_mul1(features)))))))))"; + std::cout << "Extracting model operators" << std::endl; + std::vector operators = bench.extractOperatorsInReverse(modelInput); + bench.modelOperators = operators; + + std::cout << "Performing Join" << std::endl; + bool ret = bench.createAndExecuteQuery("ci", "rt", "ci_role_id", "rt_id", 50, 50, 3); + + return ret; +} + From 13085b0971c49709abe1741742b510eea14161da Mon Sep 17 00:00:00 2001 From: Kanchan Chowdhury <43951087+kanchanchy@users.noreply.github.com> Date: Wed, 19 Feb 2025 00:55:13 -0700 Subject: [PATCH 13/15] Added new entry for job 2-way join --- velox/ml_functions/CMakeLists.txt | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/velox/ml_functions/CMakeLists.txt b/velox/ml_functions/CMakeLists.txt index a38b20183..bb1ccc186 100644 --- a/velox/ml_functions/CMakeLists.txt +++ b/velox/ml_functions/CMakeLists.txt @@ -184,6 +184,27 @@ find_package(HDF5 COMPONENTS C CXX HL REQUIRED) link_directories( ${HDF5_LIBRARY_DIRS} ) include_directories( ${HDF5_INCLUDE_DIRS} ) +add_executable(job_two_join_test tests/Job2WayJoin.cpp) +target_link_libraries( + job_two_join_test + velox_aggregates + velox_type + velox_vector + velox_vector_test_lib + velox_exec + velox_exec_test_lib + velox_tpch_connector + velox_memory + velox_common_base + velox_vector_fuzzer + openblas + ${TORCH_LIBRARIES} + jsoncpp_lib + h5cpp::h5cpp + hdf5_serial + ${HDF5_CXX_LIBRARIES} +) + add_executable(fraud_detection_test tests/FraudDetectionTest.cpp) target_link_libraries( fraud_detection_test From af7a9c316f38bcae31560e32b3c855212544dffd Mon Sep 17 00:00:00 2001 From: Kanchan Chowdhury <43951087+kanchanchy@users.noreply.github.com> Date: Wed, 19 Feb 2025 00:57:19 -0700 Subject: [PATCH 14/15] Added model for job 2-way join --- resources/model/job_any_64.h5 | Bin 0 -> 34248 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 resources/model/job_any_64.h5 diff --git a/resources/model/job_any_64.h5 b/resources/model/job_any_64.h5 new file mode 100644 index 0000000000000000000000000000000000000000..f5fc94c526a3daae7c52eb499cb21b5150d1649d GIT binary patch literal 34248 zcmeFYdst0h_cvTBolsPYP*h4p6wzLDlt?M(5Ry_-q=P~cO6j1aR60nCP%1)n+G~s= z9TcJ*q6ncJL&&*z-@n`U`Qv_`>-p!oe%JF}*ZZ!0?LEd^bM3j-nsdxK$NY@X+-E&| z&Oo{0a$^5FWMsr7#T5Tb{pY9e?@<^2?=%0T|H*g$tBC)VQGaD_zke&@V*mPxiK+cP zKlpF?|9RcgC6<;B%?{IC4aw1mj=|8?bHO$|}s`q!Z;rYa+* z8vI|nn5cA_|H|*z@81Q={Ou_c;-ZXyAO95jHyQX>{@)RH3l`267wi8&`g5Sv|Nc4t zc?nI4|NV8-|H->MtuK25H{!`KaWc-J;{+m|Z{T~_s&B_1S zuKqnjii`i-uFU`5{~!5(U`Lz&7WMCf|0YB=_&>D&Hz@yeef+zn|LJGue;@Ln+37#i z{|EKs^&dt5P5$5QXMZ2!zg<*@Y@+QSWZ?NJaZ0Ux!VK)9DMq!CO1vgex@>=3);<;H zwJMQc`UgxJF&2hVA~QQ~M4Dfwz@lS1bV>R#cxs2CRbLYulH80P(j%$6tf^dm&JebC z*#S}#jt6O58*sNTW)24P;p1l=m_K<3%pRJ;xOde5!}@+PyWB{v3+qbo+&J$AKG8p@sd_nUN{JrHSGee-IeU> z(EdW-Y1=90bsPJ1w3DH?6It4=B;}XsB$yHc2KF;hr@5Xh8FhmT3rmJOwW=)oSUN~I zUBFY-b;1R<*U9bMeb7?W1eY9Hy6;j(KRzCzpWBs4Q+AWU!1osPTrK0e%lBbp`Y6zf zvqKB@cD(aFl5O~)%IqqYSlgoU)b-Y$ec08^(i5uqem0-@z8D*JcJu|3zhlpOHh2pj z*F|E=*4ymPx)`p`D-hGR#bDQ-bA0ODWK5W#%?=AU(3SJ`EUDUr`Sow6$$Kv`z4Vi) z?zn(+;2W9Fq&=W3YLkjO1L#=tn~CqvfMw$E_}20$Xdb?lgk=iQr?l;5ZpTH0r%5r zHH4mOz_DJjcz??dn6^2C_G?~*peQ%qbLvi(()u3XB}kzE5o=JO2+Fzb4>HY8G~{b6 z9X*5?Uf)Rr&PGt*79WuNDS=fp2ZM2MCfMf2()7Ew&_72D^!bILkekhh?ih~yN7$0~ zqEM9HnS;JEOR)W(6jkhf&e@cmrzo2MJoSAV*HXF{l(c6Ega@v3x<31%s%ryOWlw=E zmBpwYFc|u7{G|5G*ZkUtG7$e@0qyUu2X$T5ijP(4tbSfKIZVF=-?kaR>~A5|JfVg; z9M}$icNO74LNazYQZ~=FlIAm5vwh}Pdm-e!wSC}%;d#oyc{+R7aTfAVTH}YvrW@!v+rbjsB{$1 z#YVxDP9+dCa)X;){h@ZYKZH$Fpp*w)7&>S*W=-Ek=ApR&gU>?!-xUp{)Fj}r>rryqgN+&5ggOFvdKavX%!>S1B$CD__J z4uk_=vhg-|Ocy3jlDy-XuDQK1#-NoL(Xx5#d7DaG0R;<^_O18Ge)a(UdrQc8Yu zTOV7KrSwaB0WM@`Jc~WB902=_4)bd+9pqHIq^MR(K*2E@%-3QD^I5PTgU;+_CJEPB z>HH_m*>WW|Io!lsjcK6N{~YxWv8IC|9L~#`BZw^716GSSLap9-3|rKR>N7G}+tEw7 zvRfCv>{~?Pw;3C=P#Ky~9kv|IfEWF?;EgFR}3}QPUe1N7a~{ zrwnS!YpMU0DxB#e!*@T@C#Qa|nQ?>(-HMc<)6cV*Q>7T2o?XV0vrNG{c{^Qjnh%nK zSeBwU0RkQ*KvUu%2%4xv@%s%)H)<80nqADKR*nR8`^eo7yv@niUSe&f@n{mALw4q4 zIJbv(Oze~xR(;zEA*Z@vqNg6UD;1Qx)`dWAfC>ysRHTf#N!-HQ`{DICOR!9EqM2(3 zQ@v6N2C6J3!LXV1t3(Y9ez>6@pG>Y(OGx4OB9P!dp+Vz6^ z8hEhYNf9Kcu!pYHZ>Lv66&4n&f)e9vG4S^ul3h@O4l^u4>-c2&zB!y$=*H792V*ol zq*PJ*Tn`q`E~2)RdqMTeC$8emMu?m`6Bh<|u*M&I>B%F)(^?h$N7su?B>$03IBo{~{&?oovH)C;mrzWa4uo8|Os&ISQ{N~VXlYBJ#1022 z-dDveE<{uF?KZX}WC#0lCmPorP$QeE{h_*j8AQA%W@*)d2Ym~uEkKUYXb0@7|BAw% zu~5%cAg|k#^i2nn#Ij5L;_6Yj!c`U;7Kg9}?6JR>R*K`j-&{-E}qQm*7svtD71=L*NCSF;p% z39}T#aN#!p66}^1V}sK!GsDTVpwD}Dd4!uWn=vGrjr=whofMaW^MgDpNio8AC->5u zVhu3uk`i8=G72;bUAU6uNf?wYMFs3U8267ydNcxdoVzWoqyD5;U7_k9^?$}hml1xl15uF3`l#&I_$ zo#giVN@7uQ0n0mlk9Eh50)vY+H2eE0YS|`%yEbL9xn~m4B6b{wMr4$$-<+unzbqX?RaVyExVMH1o9vN0kq=o}<0g>yEveTXg2@5Wu!kDFya-M_Nb@~vrK7^et8d9Nk)R9e5e8LtUpG4EI?!pUm zjziAQgKU+SFh35(-WVPl0C zJFMOxKE2_=RAgJv4_ZQkX(3q^22qv#4@!v-0V_96>eP{jHxn&jlF}KLc}t0Wt}G|T zsPC-pe0lj2BRjUFDFNTQ9EOdX4zv1M^lQ-Qi^BjJM79WHzGMJ(C88~lnZNk?`eyAbWgTHg6WM_vdk z3+hL$f7ZZ-aR*`cl~c@i(4hQE(#Zxs2w2FxwG(| zOr=l|bAyG%J;Qf#PlWU9bs@Mtg-On?fx_|uT+w@Fs2tgc%7Hr6b#)&T|0Pd@D`iRQ zdoDXOClI2_Hh{m<50rdyg0770=B`9EqyDfkb~Uj-7BEBJaOP;X=wlC)xZlCTB7UNl z-cIuKtHx%R5AFL`6rB(5HPO7k zdI@b;&}4zWX&}2Mf;t78XyNX$qOtP~JgnZu&9}Y7hfhcYpQuNe95IvK^xp&Jk;mxa z?m&?E7(>m{YMk+}lIdYD6!`ad!(rAf3+kJn266_E=x4t)eqYX9njhN5ZdAEL_~u<$ zuAon^r?p~n$}F~cWfd-oo5`7nOl8jkc5xW?oeH&I~72yYtMUZZCuyJ>WtUQ=s$oV2ZIUfa?cUp+qVR*2-7lwuUJrwola0 zp%hnb@5b|Gu@E@>5%#64K}gvY>MaoG!~YDW>4OdkHSE3My|Wq_b!qTVzt;1EewL!* z#W|4ieJ_|vJjT1;58;gO3)u9$jQR~c$u7G_q0yR9h#OnZ`^78Jur>o|{y3jsnsR}R z?!04@G>pmX$0w|CT8^2U?QulIKBhKi0y>T2A;rR)&erZHqa$kMKT?zGB{#6fybqd4aN z!j?`1sNsC+X=L7%3B6s(WD!1^$%V6xI_v{o4c58y)s#xSx%XQvcQV< zC(YKKbVF7ivUlxargJvou4CD}Y_SyiozI{rhj;QWW1c|!S^+s^tYc1J?4fE*5%;8` zkrPN8vChL~U_am?-dA~r((}%ff^!J#pLk8Wj4oQCpSg*lXX;Wg9WYR{q%-li#0 zz4Y_mQp)jjWzA(*$>D_?nWs7Ot_~8+_t-EDTCfm2S1Q23KpRT55ip(SQV=4^s5mX0 zvTRnvsSDP$-S#59H#LB+povf-77Oo;=8#AJ3ii#|6Q-^>z^BV3vedS%;1O^YTk7(` zrhON0)oe;_ikdWWL?oDxaD}W28#Z-FCUd$}i7#bB>BzvzEbQqeSo9*UTTI6i2@ zj#0_e$uKgUK_qkKOV<&GoB0%jV5fMvvLgD9$4sQoW8~lJ{Kt zd1D}(KVTuBl3CA6PmP4I6|>;gjne6l#`#i&$z;NujjU3?fw{je;!Of7*wj8v)INVe zI8;0XGPw&V;kla5U0Fu*66v^6Z6$nKa0xsVpVK+Bc`T*ehKzWcm0R z?7TP-#wt&T)YP5O^Ck;6$El0P5H5u=0pA>##;AVgPuxSZ=DxN}H^vbX{?w6ob#~$XDjwPc>j#SjS2p!J<;WXyB zfOJYPTah`IovgoytH<{TotxfZ^gnPqtjKPj9F^FkPqPM}? z@Z;Avg5WqzWTVS3=S`u~xBGFIZYSG6E*mQHHloy}8ZKQglY$ocLf(Pf%>7m#b{iC# z7VSUF`98O$&=<4mh>kdQSUqJ0s&bH6HJp0*Fw#1If*y4lLq_CGYJ4dT?LQ*Oa`r50 zEK-K6*VK5e?X&ULesdVMvz%P324be^RW8$8oRER^U_v5l(ddK)kjTg9UYB1vbRCps-f~gsIL29m~nPFlYE&XtYZro~S?)_KO zlMBaj>KQB3L3d5S(R(Cy4edmgz!9M4JPS+gtZ?jsO1xh`o^#&oP2EjZ;L*Mt+GFBb z+=wyc`Z0J)FS!}Edxp>4ep98w&>!rj}rr%@<7CX>Ttp)St zPEnz~3eNK>V&_9o(Wb3DUOT7@!k4C;&H9tLfAtkM_KrG9j?!lS`z1;0-VQcwh8b0D zn+!#kQLxkT0L)59zRlN~zC@eCL(Ku?cXTj=5pqJkbCPx;h2Eg7QdNeC8ge9F+f%CjE$P{gaRK+%C zwDu812G}!S_XKni%mnw9D}=_PHBre#CmawGf`dozWfLn$!uj~KFr!$RjqVjtuYm=0 z*2Ati?;-et+`O4Zy<*^?Q4iI!dfw_J5VH?vDDY51#-R?{# zzo|`d_qGy=&3Vc^7x(b%JPWCKzbeyodPs$KmN2ibKNxwB0palo_*yo)!t>E~st^yw z()S+l{hkd7ip%M{#Z*|=cZenbtl(stwP;5CNwE8-&3&v^<)lAOfK76S?9B6S*u`2g z^1*FXb8(`85rbiB{z_8Z{F^<=YvMmSJtj9D3D%S1M*T`&Le!$)tlXy!u1%Q%b9dz7 z=bdwCqog71@z9`1rx5l@B^5g+jUapADEfDRU`)VXVaW*w)4Iawe%%xFHT@>+lt0DJ z?fiv1EcK~p>@1SkxI|6HbA{qP`|qcElo*+S8NJK{1kMsUD)h`zzV7B@@j;H7~LI7ZNa>U0|hm zPk`7T2S~>aocFD8Chs$dUd*ZHdhE9|x0_37M!gU!dpP!e!W_8LcAD?JDSCcx0SpPR zLyNoX$|w6K;g`J+$oXO-8{c;mB)^U!%NjWvxp6D}$lHTW2V6)(OdpH&LnzGq8atc4 zkNPySDe6fC>+nhkt>L>M|5XANE+P!Oy#hOv&qAYX6l)8Rp_IpsRPaZcj+tHO4g99V z-U=mlvQ5Y&4^^{{#R4`dMZoKqU4h(t6Dw}&Rbj-O6n;x(9ktp#<+693;%quqDZ9!U zxNFyh`o+Uw)s?}5*cFcSBDs^r1P^9`m+YA1!sF<(YbnjM5H~yiwg|h17Q(HG?c{ag zDwpdiW9I%cjrHnkfQP{#GW8z9bhBfKJ6^_@yjh60h8fUNn8vQYO~QKjWT8%~i?9ULUbZQyDLd(l>`Fj#L6Q_2pnw*b3(Ym2tg_OVWj^#IuccD_ z)HJaQv0G*oIU)iIcM93Ec^^^uAP~pLi^jRnVi3|ln4PLP%R2|&j zFLSeq5{um6?&2_#Q+1|Qw>LrI;CR^Go=j%WsjR!h63+F8!p6_LJsO^<)>OS~2yK)9_=TBTHQ#O?v$oP}%P@STuVt z*M8fSov4i>hp%NU{P`UCsFRGEt5;FOjbsekzlr85g~O`+-QX4a3yX{k>CeRsI4Us! z8q`O?hwPnfOim*d1sYPnW1%o;@+k7v`HDWDWd(r}U*L-K2KrtT0%@Yyn6l-A6nTR{~N7#i-&K6_EEl_H z#8TgH4SMH&0|(s7r}j~e0BdWoAt4sx1|1+nC?u#x&cvQC0f{bX}x_n}^Jy#g=1)Cf}0DT6+cUxar2L zE8AiB30p3u_7{3jJ<6mKCJS4q#X;505A63VebePzlTcb?6{&9B%VfmOAo24uc>MVY zg^ezPMY$C?qC5h`jK|QV=)Ng+AV0FHA`4iE)K{Pi%QX{BQs3=pY1-FnJOmCwv zgxtE0GyHl{D7}Vc5$|{`H*odHhFsa*tHG224@2bhXQLkKu%-s!W zl_N&u9a?$W6$HJuCh+W~Dp{m-VD8rg%tGWde=yhq1@8`0*`^p6KUshQca`B$YZWS* zttP|Bdn`!hmB_yjhrw4&&@5sbxJIkv>u+w9mY71j#^0hbJ$|GcUf0Vt9a2JRlS$}2j;F>S$|#>1&8aqC;os}bBUL3` zUTgPOQca%%w-2jQ#P}?%{hkah_N8PsAp|2jlF;*X2-xM{V~4a2X!f;nWENK`^qYH# zGA32?Mcb^P_f;=@SoR9t6KBJf4mZGJm&S0rPK)v}|k zJ+<+W7FvhTXUwA(wGqT$D&}{*3gRR0oW~yBH0T_&2b>*NL*$+?w%7k4MGLx_>s4e6 z^?KRu3kgi-dlh@*T#KqZ^q{Xj3)Sq#(eF%G{2Ek)Qg7UF#hd=z=1vRpRcU7rw#z}a zkqiE5D59Blqo}tsm%kyoiHo2174dUCOSmk@K8s@Dv*tc$pM2in-obJ#&gv*s4o)SP z=eyy{>J#`~HWdO+-euDR0+^QfF^Y43LID#pSl^>T(3iT8!rwe)-``Dw;K5n!+ad?{ zZEFtxxm3+sinD2nyaw5_X)w=qCf>6$f%|=v;5|1M3YXnrtA-*D&M=^7AuF&ny@#pX zTnv3m2Cz8jJj)8mfs5_qnEkg+Z19Yc5aqE0E=}@=)c8SQg|VRj#u@MQ#guDb83MU2 zJ6LJWU6yjah_;6hgBxpGDe~+l8g;0Sw3pVgDW%&fJ#;Kx=!yrYY4=byr;LxayiFVZ z2Y_mcGXxHE1=G=@-7ox;X{O@LdzDj>Nn7rbpX=;OK> z7=360+Kzk3Ho`^RqEyVKb4&3_w+E|p?2qS?Jn)=%I_`6;VAH!IS(E!cF6ogR`8^DW zl!w){_3b#2?ViFtcRWt*4=WKRgp}WP95WY=plU69%KP?#Dft{{-Y-(Yzc?5cU!DP# z1v)G%R}$V-_HkO<6wI6-{out~odjQE-e6|=FGg)guw(2p>`dv$s(dGq|C7&fEmj^@ zyULjLcdru2U5tbv5$>3KRHof8A-r)xZ{7!}1kS~r92V=KD5F`Nbt zFQF|@CFoUT9vQ8*LeKfRP*XYydJAqc=gp6~knC^lu2TgYxnMUd4c!C*nutkVW8lUY zbu%69I;NU^jA2wCTe@X33$z^#Rql&G@m4svEp-5YiF%=9`AuH^E^=??CV9ra@?XH3;25)ha=Q=i~;JM(T!fh@lO_JV7g@R8^ z1^t9lkpJ|UVcbhRR#8iH_9akb%Xqf_P7F;qu4GQ>D(r-Q6r^t3%>_zaK}-4$Max%# zbagFNNImA1p7ycA^#f?3^?r7CLm?{0)Xx~=>_<1}MN`i~88&R*L2|3!2h(2YgVw=g zRJ(Ew?Gb2^&}}10{`ZRnyWk&mF=8=4;`(_! zYp=kcyyt-Os}!kQW-|5exsA~t)4)336PnN6M8{W^aOa60^j;mqw$+|yhHZiT#H8Vr zxyl8COA6@+nnSnvPNCAGBiwTHC|0pih~srsq4iEP6r9Ts8HI3H{WTDQ&&1;#GNQq<&}76 z^EH&IcM|$$8#1oyG<7vT!x3e}xq&mkVo}FD3iLb+rMqu&(r2?lTEw|)=vj|?p2N`L zjxL`QP>XtN?=UI*<*ab*=7CB%N4(kopw!_;SN&G@1U2*Ss_WW(lTJwHd+Y^_(ae z+z(zUB(r9HDNuPZnvUPS4XU$D$ZUfI+13cz?z$u{W795tv@cc^YY1g7HEGzBYzEUL zCWA$1tSC;j1iCj|=lHvaIfb*D^gDeh4cbtE0Z)uM8K(_!|Kwr1_S6PO9aDjNtqh7U z7)!CW0a)GW#+jICGus*)aQ!xqoG0GI@sX1$z~&(C=8BnyhXrK59!-9mwCO^LIb3kt z%Fdau%j@$d$v7_-&D>;GQs2m}!k&JcruVb-K zqPQ2%EBSyoA8;!u(`s>R5XyJ5CEfOHfcqRczVSe;_m=b5U^(ou9J&sC-*sza3 z_h5ku50T7{>18P3d$St0!Ez2HU(e^a*0(dUmuq3wyH@H77eLACZ*Vsio1xORB7T#OHXW55 zN5-w!xG7u0*|ROZyzb=(l=nEC=@!jr^PhN&e5XPzu}~v}T3brZJcml_krnlC;}$rnjMoR5+UUP;0sMURoNPs7xx3B@Y&q@5oIVzF zjR7JYx>=mLrvDI)2}jwU_5B3hhfV42rvfNgD}aUZ&$u6TcX?kOeHN*hhu>bxgLKDp z_RO}0bKW%@RZcx%dYh`a%*0|8sE5*}$NI1&X9l_6PeT{8QkK z6q-Ikxu<4qP+}MjlyBnNkIjTXljn)v`8g-PGYbqJKgLT#T2Vtah&IhQ0tQc)Q^t%P zaM5)z_j|)jCKh^#+t7Ipdqi<`x6kis`<6rzRIZkFCXfOzj~aHD@Jn|pStjkGUc;Bd zXKMrD(x(<0-aS#EsL;(Gy=>$?U%f!1FU4?G{~{}lk6~5VU&N?VypihE~WDvYwajRh3;X{TG>c@C2Aou`aEfM970uz6EHtI zoc$PFf@Atm#~rMi{UT|WxcVYGKKFvVn?=0Iw9WYGO+2Tl{8*@55DjI~1K>}aGE_9A zqxWDrF!7r}p>plaZs|(u)z604i?ZP9@-*17{|Gn7uZ}Z`y$Nk$zvk@xN?nncoc%dT?5WAY zhGA==pWbhb*~deESO+;)pQfC8LmH4XfEMv5Fi`m|b{5!C$f~6R>%0t**nI>2p1TN5 z_SW)tbpmGOKNrF|aa>g4Pvd_|!?x!f`iohxkY%g*nBf{wb#*-SOtz(ZK{)n|JtMHm z^kf@u7qCMnukgmc0g!r12@Q`9GW!{N8|4CnLG=`|;4k-a!P0&ZW^j@ux=-Wk$rb2! zb|`79PXob}19ah*9X)Kg#bPE;LI2prIMFj3r6mr--Tc#F{c;{b;dn4LT7mNSjcLyO z4EV*#RD3YrOg-iTI6v?b?&?|tlecJ5XGWF?$KYXl(LHP`GlV386d4Af3V0`i*^kB#1HxTkY;?3pg2JZ98*5a zTfLR022*k7Be99Rns)J8S!X!+)gwt)=n1v)Y4AJ3jV2bKMz4&IsL-(qnzf&C?%}`{ zRVzXJrpKJbX2P9fz;4tZ<4Z2DrMqhs8LzEK_vJQ${qVz_TZ=D9ukS+1h?!75VKgjU z)W1TYtq(&S4p1zg&VDzlQ%2qdSfbgl;(fqe3Uog~x@w|WgZ3P%GvXh)y-TPH6H0%|s&o#i zqP5ZoB@oQ$&pPg1#XtQBJ-P^$v9Ecl_Wkr> zkS&N9<=x$4S)#dhBF?j?#$br=tsmgA^vO%d3PaODPT zseoSkYUU&thwnDK;I)8bY(=g#jniAKphJXd1aaq?I%6^l_nJ#W;&5QO!X~-@G9-%lrSDr+A!%_esH<+kkZ4cvuiXuWKU6A44HyF7N0f1EXbh!# zR&nX;UAP^)302R}BRjSST5}HL^sF@YnnbwX;hi8IK7jmX(lBF#Fa7*2OPa>9kT-HO z^t`pFD)%Jv&0B?mZDruQXAY*M+0jl%751oUD%rjn48HM?Sd`LKGFzMv=bB^4{c;1F zGw(7rCY*x#F=nK1G#^a!O5tIK2R}x|5gWcWFq$0858^_Z{azow@Wd_>TmKQ`dt}%M zl~Iu7)8Fj0$j_49D&nN>v_lga3+x@?$YSLs;mo^MG+!D)*#nQ`g&Fm1s=)%%D{Ew= zorkxiHQ3=baZKo3j(N$Csnoxh$uF{H+K<#}%z|*bHF`4*T`EhBx}y2Tp&KI4hOp?2 z1*|G&0o7G5rHY#KaDTlnUD0=DS^9}2JJcEkE4SdnmQxhtp9$6%JgKB=Hv6nLi&Ijl zVD8lneHJUQqnd+YWB;G1dh{H4X$ZhLR+7{ozQPUB`^f#c6le%zhU$RiniyydjHgc^bxY!iS$^k5ZhWUB^!2{L%hmFD?Z|1YlzcDvGHVz7 z&N_ny=Tx{+8|uildniaennB&-9hhvFjc+%PV{$*wu`kY_x!7-8K*&91y;l+`BxW0} zdAEm&$H~!Dot(YUTmUQlI2d6bkBHT6$EGLIB_p6hs-%w@P{PHBe?K2@?|1GB5-kH&b<25W% z>^Q{N1&ik1sZeO43L0Xb?5p-Zx{{f~-@kE+QxLvoUGJ}R9deb}(+~u{Pntm6I*qw! zzTv~OmyyHeBrOTK%KOV+<60I@PH<{i{d%$kguBF?< z!nxQA0W`GjA~)sxICWi^@R`^NK32h%HVxkgCd=xmZ`}tL+|foEnr<*grIGJ==)^ua zECkz-20=~2R1qd*NG`{Y@=M;;;9EG40r&KwW}OUsdEzLe9aq_>fo^)~r2RMNesqyC+mT|!W9;6>7xvlBsrPB#B#D=M!`!2um{Y&`B~ zCl8ADjir=h-^|z(?%ym+rm@PF10Z+9 zjp8iwm`^`(8hLmnWS^2l%jSh(|3(yN{q%q}N*;%W>(xb^gQpN_VZdUSKSiyVdDvUn zEEp7N4bs905Gp9KJPCCXCNcq4R}P^X#i8`|##!b#E_PeO$>SdCGci&&AEI zWeaP%xKq|^ z8g0UiR$RucZcyQd0rV!efV#h4VP;yX#6=y# zUb|xS3e97uuPDHy-IM8u)+^@FB1L*CLpbWvXNpNr88>0xxZiRWp^r8}*G5Tp|4uo5+PNB>DS?@+ zyABU)_QI2+?$kSREfz$mgWsoW^jlO*iM8{fr63u@hOMQDHVzX0m|*_;Skf@vN3J>* ze2i2L=XGKiXFJCp@)x_ZZ2fgKb6Y(G9bbpTC+P8cGEr2tL4ffy9tmRR#ZXknS7yEK zg=z2YVr)=OBis6MFmUQU?ESL{b%e{Q%6cL#^#0AIe&xA2M~V3M0l&O2i0en=?ua$iKf;sO}sQw04)xT3{(JJQlyNILVoA?3p!7O-dl)!WU+ zg^_6}bw7(rTz8UKw=bD~IS5`|O2P%svM?=o3SD$N&STdP){%Aye+3sav+v3@>w6*V z-lbfjHdYnR+abSHx>h*!+X;blECkw;l_0 z&dJgm!x(l%z8-C!tRVZKVpDVfQ{1~NI`H5}f6$1lW@AJ9S1f<9pLr;)B$vc#bh9G~ zG^3TRe&?ebm$Q_D5PDZ#PN_#~`Q=|j;fCpRy6mk$vzQmv z9+Du{;tC7v`!+GL45T@Y}gKh`cDqH0<3(yo3nKXtII}TkL2 znm?099t)&S69)5&#=y5*TVdcXJ$Ua?gC!zfOrEwkeU{DOBAfTKxYWzsE_pzogWqY* zgDARq)`ZK;zslS?omq;w9=6#kE ztVh08+XPj2b~D`rBEPHdE(^GP0BKDfX2rXcOWRa1k{iXB+q7ZkZ7Xh(G(n2gB$7yq z6xcX*F~xCnX=msGxb}_ zAQ*L)KHOQw?wi?B!ipgB@^&QIvr^#KFA=AOT7bIq7_wb74!+dP;G^bu@YB5y!)W8l zpf2JIk7_YSHO>wi=R|TZ&i7&N^$ql5_CsuIu7L*kYj{)T5XDY6ql3|h!A`ANxaDjZ zX?#D2%I8yXVAKdIco>bI`Kes`-Di|?d<+P9Puyj*mE2Dp#YuRD<>%N!)=>jWvph^S z`g2Lb=K;LAe;S$}S3>r&%eZ7h2Ht%o1Id|rq-Xp}m@w@)JaTCg>Un=>riWhO77^~! zQP}|6Pps(X{dnfWEhn!|0P7j@wEXx9IwKy+3b`;go7=~h?i9^Wmo_n(GvDE3;w;i= zkb;c&``FpjT9~A@7w*TuM=rXSs!ytmIALem4S2)yj!RM4>>b=ji4D*?M}ZC<$)F*g z!EjvbD5d`q;bgZ@F*of+U_WgqwOGfIfAB2mwQs|p2`fQUw2n$2<&41v9w34zz}Y2~ zLZmv;Q>l;*tcqtXt1I}4RykmA#rpJlCH%QgSZqIq4z$uPE-4%eSU&*8;b zD7Fl=lVZr+Vg?MUUrQ%rM0-eUA7YsNVDNw4pBjGIF*BJd?9RATv?XK=IA{V2t(QVo zumZ?7C*iFn1-wa)BJP;J3H2*`ap6<}UE6&crP4!~*PjP$)~;8iwe&37Z5HtmXBvxm zBxaa)crxd@;}2a@f69d&Kv1|d1#Rs7>CCUSXgm8Bx3X{_RW0jBb>;`D;X@KTsj!Ul znj1OS!`s-NFC$@I@-hCy@l=SDI82clxuU(tqd;A>$>sbN3(&W@#d23`Lt;NEXp$aa zI#3jYj@0uc%ep*vNJ!wm;3D%0+>aF^E^JDG3N4rI2cdn<{GrqZaJ?{xwa5O%e)nF0 zt*9-cCcVR$r*l}*fN1DhXhco7Cv!5F8~8?LZ?0yp9_J{xlZrxSusUxEY%U0arydhv z!1pZnV|oko6AK|Ni?7V_c+~&X-n&QD_2mcSZg!U{X-S`8VJW@P^ri@#)?el{rVgXg z1^3AJ!U6V2)6FWPRSu=?&x?okKPN7E-U}DPj3`ql9Ji2a`GgFaa+Pbl+56c>cs;+H zsV#^>_we5=Vf8&0H(kt94+@&(+hNq3v=-I0N?Dd>8`+FH%Ilt*&#VLv=WwnhY1EbB zpTptYaibDS{5=Vqgc$b2!BJ=_nZUR13?rGFk?cmW2m)U}7A=2~jn=MetVa7iw@-c) zUX?7OA!nXYTe>7>`2QpNqhv_65}oED0Bp%&8>G)y%@Q_&R17qD_|QPEUt-z|p0qD7|hX+y8YRJzJGe(RN*+ zy&wcvE0)2r?qbrjIg6V=KIOvfThaH@2KaI85oN!53<*(@^ud1>%HDIKo^6X+-{D+z z9I8jEZGv9?Y6zQgWCf~D-;S(b1FoFAi5|X}V{)1z`m31FUBx~UFX@YDdEr~p@f#x1E_D)VrVFx2Un!7g9K-T?koLK zM`IS+``J;z#r@1J;3{)Cumx;88pXGSeZgQrvB0js<6kseGOcq)qRQ;wpy8v55=kpj zW_t+uPnZYeL+q$wPnPJQS$_)uIiBnj2Ga*6AIm@Sx6!^~GQ=O!qPHOjF+%?x8oSOE zuU6VCFpP_7YyV7^?HLTZQxo9suhY0F-ikeC*Ur=-1S7s@^}z3yOAzf?qZhk-A`$4uTX0#8-hab8~i z%I6=Ar8LQB5W36)ed3G3u`82(*E<*bxKvpMvkh7Z#9S-8X?OnJ%c^)hBo=j(KC2+zyA%AMck!yJ_ z`3KhU^1jJrMlI@`8T2TGNDq!zLEXV3U}52UhHfm9>fMB6{~OGjyM~kmD6!(7wtg_ zomY_UAO#Zy2NM7CSge&+#!JT^!RG=mO5FQ}x0Tm|nBf`t_-zKWzvBtCxQ;eCFXhh` zTttN~f1JJXG)pin7F~9kMn}G+u~^AK%({IP4b?}3?A-{g)A-^ilfKlrLA^P?}1H{ z7VN-RMUvm31fNSr3jQ~}&=yxHK4&|E>y8Lz-r7!RXb9Yi`xa+Z@Y{>Iq?wchtBuMf~8Yj`|j=)g}`rw(}Hn6sqqq{!`;WEu* z;62b5Z6p71?0qo4%9w%GlZ~CWBzl)-B>Ig@L21k#C-m@s`LZQGIqW3@+M(vCeO zE}MXLGZ(TUt$Rqb$^qeQBN;2%L)AD%=2{nqF2=iPf0r$#kGNFo)w6?Z^T=Y3_2WtY z&vMEb)4;fqGkDc)%OHH}V73FxSX0my*1q8m-3ea;(On8uW9*6Nj+LX)c_#|&70y!% z(Xh!bfK~3%2Sw8?$aoV?YN}DxlG6m{2kMx+RthR<{eW!y*Fqjyg?+F{M5r+0n#w)>pv`^&ukgM#6OSN23>-h?C%f65Nqp|wZr z{|PugvE=<DlAGUZE z^74M+%w~!fEiu!;kwX<}%Jl>6MMD@R4lRPVJ?rVS;VxLPEeQY60ODShgHp0Kd90lV zD|42UZ}0+`G4(7fDq6<|YR$%q!quR4ZxY6~jwA1xhxoRU3viinE_={qL>DgCvv;?Y z@YnV(78KxvMb$Fod{)q!JsYTp5e4>mqSl zw>D;oH9%^{OiI5Y@Z%u@i~i*|ll+s3-XDtLUUw#>kM*#Wcy5n0pV&$z4p$tXjVVdf z`0ithr0{GbGc27b?nu}OdKVb?V9$F9Iwk1ZH1?6-!h!tAjd>8bFAF0Vm}1D0LD*|? z5Xyb;!wVU8Y8ox@V&Rt{CN}^(4z0%32Zmc&u5~2OW$US<{|=!(wVE#Ft%hO~ZOE$~ zK?g!4u}`uLV?G>Ymx}yRv2Ql%Y7Z6|d_O#PI~>pSg~I+{HuQXF8v89Lrsi9h$>o?L zYH4P&7w;~CsNXGo&{oeDXpRM5>42P2N&Fi$fn$rWq37@%@!<2hlxTg9jI3f< z_X|7mr?Ep&^`aqJYgd7Vl^mv3Npe;yWBJtkl9;0EK~L`mP?zWoZl4%WiB*TF=hj8+ zJb9l_7rDURsc~dKr2@*9e}jfqh1isCftQ0Lu(i4YZFf9mADsGw#pNZaJgtQSGTWH4 z%67{9dJcxnv_p)p5j>oHxP(_J7+LMj603sgg^xVN-W|nu_G^WBuhrmJyoAp0QsAvU zPGQ@y9clQXvr*)o!?f)q(8HlzG;@qZdBxu(SRiwQP5UyKDidC^ z2;I}+JsXGUb{fA(WK?3|r z>f|2kXJDl3X%?jXlCB%?;0?bX1NZba*86ENO+R=I-d;z1aX*s`ZFx*Gkt#2J6wFOu zZ%*o+mQ-uf#pmdnoS2tdy<6}nyG;qJ z>sjZsMyzzX$fvJKfthJ%aizj!yk?(<@mXH<=IA}tSXG0)OFZG@wCkkV@sgDUKgH34 z-fV{XTOps)ftT7N`TfK1LcOyWpA>nTG-oBiQFSAVa_T1Y8P51VyOzBge-k7u`ccF! zPbTwbEP4*!gjYAIfKqoR$!l-JwI=<_jwIZIwAg8=7gY*RH5{;evoUxt8O1zgBfyfo zN1E>qNo|G}t83rDe?7bq6yClUFF5IezE7r-S=TjikLicXM^;f&!4Y<;>lW+WR>TcU zdjyMH6-jz#8}6N31m;h-(wxJI&}sTaU{{Bc<6{-vsXQK2!tODPw>7BMJD&d>G=_8s zOyPp~ofvO51;xd^pszd%l+NtN-^-IBMp2cG8Xv@DQ)8&dErkU`5xVW|gN(c_%zT&( zeGNT@-ZM`yzQl-GLM9Vk$;oNc3kabZX zlLmIMrX(?0wO3F}?Q}Z+RtLO?r*S1GggU+N5eNz$fewNOwB00wa`j!vsc8h8BkVD0 z4r$ogvj9ECgW*NVDZc!eB>wg`r53|m%-31k%D^Up3R7dD(r^afzw0pe++F}l75BKO z@yh6}sl*PBErQ;4L+I#fHR|I}a)ArSS#^>b6Du#l+GTrjdHN8%sxTWY##|s7UtfZb zSD?_pomYukjwR55V_)1L^AoesFgZ@pxc0*_aR(^E#*b6Jork{r)HwOQ!@xtThT?~u zrkSPM6h2RnEP7KgXw*3}*{s4xtOLw(RHh{3^=zHnFD}XZChpi@k4k~}(L;3~ZT>S| z{O-9mR~qtzHykI6if@Ner>zC1mj1(k@y)iJx;B<`5>FyOfoCaA%VLWsMq=vA0YdHN z1AK5_${RS@BX3;-Ep^Up&(vUWxu;C0zY8_c^9Hp4wz9al&H%eL`mC?iB1w{ z;GL~Ei}VOWowreBlM+T}r4_B*^#e%8SP4r~l-T!0(cn1Jn(9sju&qb$Fq2hfs5CZR z(Cl7>8bx(b-j&D?wT=gwy{nmf@&^|CbOzX)6k%4mHD1%n=JVSpqVjuLjMoiE@7(X; z?(mq?9h4y2Y<89&=1jm%!D0L?y>D!t%}O|xGl(KazF^hTf);h~V03L9CHRr4;5{2l zSRSzp6JnzI-h#vM+(w1$MBz{(X~y(D)cL4~zq!sW2YI!r{Yhr78rDS|q^01w;8$%p0cUR@u-^{js~vDY>wt|Ecv`2 z3?8}Q6Deu@lX?eD+jS`E?<4kW@H#X%oWv6Q#nQXk`P^l%rR?+#W!ml42nnYY&njB*p?e}1wb)D0y7i-7KP0i^s8Hv8yN%h1`h)( z6f7`;7nH7%T}C~OQMf_7PTwTIE!(ML;8Z&Fa{$R&PRC91g=BTP9@R{2aaxEJx%91| zKqm{PXFeBJj%sEF5nuT;ix`eI-blHQ1g#(AP$W~$KGm#Y8Hc0EbKN|)+VM5KpUz`j z`z8oHuZv;Icf`Y*hmhArfx#LjLAzcTaQ=;3+05QtabCx9R(^jSKT_`@?A$VhM#^o2 z$P!N)qgTvi-*tlZiw4diatJz1Uyg5%^`e8oQm@srqsHoBUM#Z`E~uzr`E^|`vD7h z_l;L95Ip=Y4WO1SkD=!f&xZ_af_uB{N$>V;c2{XS#$Pv~Cb25bRXr!NGWp3|7M?-> zzlm%JH^55w9+G##JZiU*EMFa`jy{K*Xg^iJrT&6u-#!DQ)H-3uXnES)DKK*Tf-raN zc1jNjrOGMWE!nFAqT$h~=Q9)Iu6Rf|GxS z;V)-Du4kbFz3!a^)q^w0$EyqTKE}eG`Ye2><4;*yUeqb@P;SC}&PIizweut1r{OO; zPPIhyJSWy(9|`BRhggL+Uu0>ke95(P5>>zLg1c)qX!WJf!hHQnZ$~G)-F5-hipyEt zm93=JH;I04QASf63%um?o#|IO;nQ);_`4gG%4G+9XLsJe6=lAxrTq4E+C6_0eM_0f zHeCv*2v1cK$@r17S_$+u7L$xlui$^GNTQQ_Da<4i14eFwE7Rrajiwsi(d>en%s;&N z(;6lfI0dpFx-jqR(FB$DEUR`CR^L@7d&vq=x>dpMl^C(!OEaO!K8EBUe_@;U&0%?} zTBNW+FiX6=pLIx$#rW<6G~KO+>k3^VmOiqI)%7bwmsy|iro>2=4aec-!bD_W?uvey zcQdE1A+*o*3_n2n7}M4|1Wm08pyS*RUFC_n?pcPq2ckhMBLJ_an31nXBU%)RS1}<~e*#iGvv{_E@ zEfsX4DRU*@UF-u^XsE-2v7oq>ti;HS`ZR8NNasKHQSW2QGQbN`Iy?EB6Z4 z^T(b17v<78r^~R79ip3C0>P+48Ye%ULLb&FqMv>Gw6pjrbdL(a?DA(VYP-?zJ!Jx-P<& zjBQy4q}{qz>2vO@^!_jHS9h#tObHt>*p_mHc=^-s z4Wi+^GmN-(8Fo8=f#kbAEU$4E@sf7XQ*)aNN@fDvr%MB+Zn6r+4ncP~K+uz^Ba7XP zrQ4Ip)omz_mGUQ7)0en+j6JR#&agnLhL7&(;Xh^7vQI1K!nIT_COys?Bg;PGlK@ka zo~}kgwd-+){A=93dr-My=>lBbmk%DnmI6aF9WLFdf^%1-$$9>GKF=|cnq_yv<=)mNig2LVU_gWN!l*~fBe!rV-8vUJFt?7ga(I3vPM30s#4h+pZv!J-hzblJtF1n9z@8Oxa6o^<9nt%N#_>bNMI zP+S?bfsassEYy5kVX(BYH_Lwq@yM;@WWr(btvXt}vjns{=VRKmbV~8l!HCu+a9{Q^ zMe5jtQF}IL92P(e+P1Sd+m51=g@-`$B+=T`ll0p=7;HSw@LSifg*g_Z>E`YbdN#0> zNxMzMmZhbXCDh~0>_c#3Hmd{mwj0Ti7U}&P#r#Sbkz9Djd#-ri$rg_xdpg^S-oN zLoE1rX+y`KU$DEllXYptiesG@QtL1);UMPiv4`RB$Npi3QWK-_$l-H7RMWyZi1+gF_^WrhqtB) zG^M{c7nffK@=G@m&w?P~Zvoied&b$i?IKs*r}+C_5LtVuvW>Afcs;uiY&x1rfiCI% zT1&ytDWx70=T1Y*cTb_8dI0t#3Gtjf$t!1%;x|_V3x|(LpG8){7#Hez4Ba3K)@IgqrKlLf%P3x_P{XeR%bU z+@3$=j&%NF*mQA$&My&*`M3&N<|0b`5e)$|63Cp;^iU=xW*|RJ?SY zZBMraH)X-^p*;eWB_bK$BEpP?3(4TwPrgag9gR97&`htD_#xR$dCy$3ZeB+zi)B$$ zLQH<=u7kDD8t(F+zwE5pF&yEch^McgXPW=S!<@zj{y}pTygD#|&6%_Z5>j)>m{Y~C zZ|wzMMVE>~(oo7S0Rp3=;Dqi~rh585Cw+4*nJo|JM9cgsHv0lNSakAy-WYN|qEfDV z@(8_D8cZdYN1#&f0{6b@FdHxEia!)iV$b4aZtL#Z^oT#ozRW%g5rZycIc zcQgMTUL2G-;)}65wD068dbjlfcSrsSx5Ysf)#qxLk1hMdYDT+?4J5VDc=Hx`&|A+X z7CmB#Me5{HW-E9je?;X&ir}g?4~PB&tn_Terb=B(SlZ6JTov}J*@CyBw@%xYGwL{T&^x{>cL?j-G7hIk4=0<) z5@>X41!reC91Ck4&;eFajE60JepQ9T{BA+^WEE6>7RQ@iyvlWac|5K4Pc)7#5PUyh zNs~q)j}I!Uxbpl}P<=*1@F!YA%>!at^F2v?f948)TCkEyH-Cckkgr^xpWv&}?FXlJ z9>zUeY}H(iBXU#uIaZz6?;*Qep;zbz0qd?Yoc5NhaLXY-Sy zfD?FK&9(8gU{)+8bf2WGfz$9^>K~Zgr%pfoo7tzIv9xu;E%aHy@2WgXhYcGi6b|F@MOvwk^qFAJrVKofXSauqj*D`0C;i8!Y#h}jL#2mOFn zERnm*en{Cezt%eNynmeExqmQv+Ue-4Cc!XK4Yp;DO8FUQ0&>y z%r+|#a=&F${Hd9gKN=`|)^)z|vH>jAG{jp*>e%<|OIe!`?}Uw7iC)T{;uoU5?ER`T znim^G19BB<*qIN|v$77lygJeLqz&rc9!%C|33R_FimJDt;sRHlg`KDBNvv*y4)*uB zj>e@--lH1~DzbPxaSq7u^kOqVXR=gRU|8eD!xAuK)gjP)L$Pp#e?xua8VLQi8k+&MJ> zww)89X7oDVwf!Sk^hlLi#0Iih3xN%veu33LF{aT`f4GC=3#sHHLeAYmyz0*V(DU7p zi7eaU&n&>Vd6BGXS3Yl~y9-0?%(3C;5i*?r13u~xc{}lD*Tqk@9RfV>`_lF zU>RUwIS`@7pDSwFj*H3`P=fDnYIc-ku|wsl-6u@&68?elLq9;+jy;%hNCvEj?`0WI zJ7DCap^Ud_;Ea1JM4xQpAn4~6{)OK-rf8DMOIUBTd}=qHeYt)J%#+=jV$uQ57gE`} z&33$XNFnd~RF^H(SqzrXuCT&pLoR8iI`1Uo!qjG_L0@bTypQSODt`U7oYdbFd^*jT zO7>Y+5Pk!aoz>a5MQ6B`&!t48eDXo^N*z?)I>;(&qsu3`%v8wTbX*#2wZ3;8%EqW*@`8Cd zNboIswP6xj4fn@jq2_qsRu&U37U7%ZFqSadnL-i=lE(VAY}J@D-oWBKyE*FyOzc~Q zbsj40Nv#%kt+oc8P)|093Q*ci#!A&Hn$HV4Ni+21#lLHgVBDT=5_`VIMHNBlUZ6*H z1J0AqiClqWS&M-SLUGi#2=tnQkUu6J_u3IgY>*=53JYvdo-8)3+|Ck*&Sd=`3Au`} z9F*Sh04(bAaYYG7MR~HcEhT}|%KgJy-HuR^BZv7-`S^LlQ?9dr5G9GDz*qJbbGq_~ z#amj?iW6aMqs=^<9b*zcq1|Jkh7B`m{l}AR ziedwtxp0uT(I3cmf9Ak(NEACUvKFPzOyEaZm7taGZ5A}Jf%_Bn4)k3FpX`hXxZog3 zIs=1vj~~zYIe8}`rL=`}6=JT$nmV!#zfWBecd4N}p3F67Ld(6qu+DrX**o1X`{8gJ zTlbd2jvqr|tP9Ec^ypOE`OKeAgWP4mvrK?mpiVzra^SmQMrI$~19q_tD{#ezrI**BM%*6AvE z^R}z(ZvPuh_IED(6;{W+iLVg2h{ZJRbpgGNyFzYNK1_eeT7G4^;6t<73#4=xfSuWP zK^Pf=MM?f_vEpqw(|ZLDb{~T=M6kX}LFg|t0_MDp;}&U4;ihlOsFxoOQCkL)im@(j zZCy!2haRMD+DM<)EXA~M9C)?5amJw^12DC1y@aMUC3Of|uSmFb%NAi%$o$NcBG~DYiczm~@*N3))L}A8T4?nLuVTJXqbH zhI^ylF;ceUXSM#rZFQ9>S9?+g_NgXp=oo8!Ae=$NlA|c4!I>4l{)Ua#btF5dfQ6hX zgkzb*xn$FB+H3cLS!IN=nhzo1^Cg?PN(nh*6FJa48pL+o9so|WpYZnC_nB4PJht2J zAi5MkW@ES1lT%CtCg@v2Lv^|Mk6ITSX5S22g4RYQ{w*|XZ(~Z6`;+=G!Oy*}60_4g z*~G*hbm5sS-*J0^*sf85c6dg?#qYVO>lcg?gKS`lxg0z*Jc-xz>OjL^8nVsX_y!Fh z3_mf6A6Hj^!Mg{+BAZi8=5I9Xw`(AUxEv?x!eL~$zY%oh>OlRp4c9NGj-5}E!?3G| zsjX9qYPiqrXz?7bXV??&W=|RT*T`}43qLTA1>3;=;W^S+eGiANI7({2?||!#Xv|rh z2D9Z)L%;+-QHbpr#>bo$JAEyq;A6+3JM%gH{TNPa0|%kE#5fEbThE^nyyQpA@8Aq2 zcQeB!nb=?u$0nY6z-|`Bl{sCSLcVTxxX5k{s-4IeFLpE}lVcHVlWQ^7no2_Vz^`26 z8zKJjNaBV}jbP0l7UXu|r8uM|8J7Qk!&ItHU{p&Peaewx58o-`>v=k0uzVEcOK4E! z@o4t6*ah7u@1@RyDQMsQksLoKqKy4Jik*rnhr{#7eT^|9zwoN1JS&i)C zw@B>Ln}N6CIm*s9QU7&UkpI!GL)N$Dw+DPIFW``zZmZhr+P{vzalvBQ1d zwXElEF8=ZFU}>r?c)#r&Dz}ERfU1Egcc_gMr_RCS>jcl|>fbPbj|t1xam4n`a%A2f zfl5o-;n^b%qIrQZ9aXxsNJBWvFXG7SY zve9?{K$7pU^2_Q1&s6HkzVw>2Lxb{|jd2kqRu6{lhl602#!JEPJYT$W-4dFf8HsjZ zPQpb?4&_ewv7*p)G)dvX&pr=2^B%E_FH7*?5j%`O+Kx7#Q>k0HkIlYZp}zdZqUhCs z*pw14X#QHvT4xTxoAULL9!Z>%MK4=Czksr>iOF3zB3a!-RMu_@JO5m#sPv`u(AAhc zn);Jwkt!{De~Y&5KF9t{x&oiIsH!Ne_R?LhlV$NrkQU2gz(#zc~)JrqSCVdg4l&nCPN_8y1 z5R7o8fmE|r(Gi;$=dz$Di`ltiY0BSu1jD{nTwPLe zgYQh&rK=w%!sd<5tnAjinvvdV~n9^J5s(tjnoHZ>AG7j-rwAPwwo_G|H$sh^d;( zu)*8{RplF@Ftdf(%?m@NRlTf4U7GGT>tj=a75aR=f)B##X%FMcIAt?#+m{0?UPSPJ z8w}|3uXSwLqAu8fZy^mWJqNj|GFW+vQJ3u~G8>SAI@!lT&F?z1y19_5mJNmUVXEYL z{3EQ(U4^;va+Lb89JkJ$!M-1uz?th;!HZXVl)kl+y%D{o>`^zc@4N)2)h%JKR;7XW zAzf;Ee3hZGKBo}&i5>bKg)A`xdf)hR3PVju?$mB-t6Byn!rmyOv=)cFH-VPsb7Hqy z9$32T4jgcpY^iCwn~Se_L30x8(5-q0)vLT=(v>%X^A87qr5@g7SpvAH?56I3zar7C z+j#v%BW~HGgE_uyQPyTE`OB11pysP7dheIA5vPyi=Os4$_z#2lzh+f*SuTR5*$95e zbMZ~}iaha|&Q_m9w<%5({ZAekpPMH3 zGu4KiUT;p{J(l@6ncS8gvM_kc(+oSs*239qU|dCux2Oo zJmy8R7cPR;^QUN-^M%dx9mm!yrqT|>XeKl35gq6%rMVpz5WZI`mu4mf$&NnlV ze=SokyY)G3(Me;KZA~n6pFVU9@L~AA3~wA(L%*Iqm~c3a+a#gGmrvP@fro~XW%^Um zq+_?}$DN%h{=0|f#uPxN@>4c#Lekp!MQyESDPzIYJK;m8g4o=FL>pGdv1Xrq}4+ zz8*LdU&Ai$x5d}-*WsV6FuElf1+r6ov8z}bH^l|8kueV_F6JHht6EU=yh-KeSxc#9 zb~DqJ-9Rseda$YcK=fbohu5DkjpYx1u|WGS*!#~v*kt3xAm;&jg&e~GlXI|FsK@NL z*hX$iH7vfS7UOOW!L?30_%-Yboj)Lh+vMeFRCF;&jXupOUDE>T=L^vu1O0tZtmKM0;j{c{pctr$G{W6v6QqG}Of*zK?&4z-J&Ln<1Pl&gIX{^9X zb?q?_E0vgFs-F!ecvfdfonz}O%> z^{N=@Xa?J69L*eteqkPoE12pZXG&Qk595`r!FXpQs>Ta?#|y(jeCaq?r?!Ir5Krt| zHjsLp{_+jN{p^zai@lQCi0}ICq4?Vy(V_nX$b4^2Qhn0&iVS%}Sw-~LN#IRu25<^b zGiiBM1*yawM5kAgw8@|zD;ff*@>K{NRzJ(eI&Ne$KD>i;OGg}H{*Vh>XUwD&qu9NE z$AMNqFJS0CPgt;2t`cmMC* z;{Wyf|9pL{|6_fE{%w8!-|fTuzq>#FHSd32|Ig=N@gMV-`?vZ3f9wAE*FU$`(`3>P zul3&Rw*9wv{_E@2>sR^zk8#s~8#n#8akGCLH~XK)|JAH|&c8nJKd=9}0{^)J|G5JH z53j(8;4s+vL5k&UdBa!iIn1wHeiat~E)~2jHnGi@&k1l-4|A3{$42^ycy5{@D~{X3 zhOT-5ZcY2ZxJHW;&k5s87bimX-&Qsw_!&j?VwshpR=KL9A?BXrX}`pLQ16$By6t|D zkhFz78@1`ErZoK{O6F8=_@gY?kz#tPpo>eU#%{s4blO&Wpu8I2)WqYu6?LF{VK&^n j*aUm`&BN3|EqqXg7_K-UW}cg$qE*`$CM<{5fBpL}zS%)e literal 0 HcmV?d00001 From 98430457ef7c8150e23dcc7817d82f4edc64e1be Mon Sep 17 00:00:00 2001 From: Kanchan Chowdhury <43951087+kanchanchy@users.noreply.github.com> Date: Wed, 19 Feb 2025 01:01:11 -0700 Subject: [PATCH 15/15] Updated push-down parameter --- velox/ml_functions/tests/Job2WayJoin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/velox/ml_functions/tests/Job2WayJoin.cpp b/velox/ml_functions/tests/Job2WayJoin.cpp index 1934a2ab3..203d7d9c4 100644 --- a/velox/ml_functions/tests/Job2WayJoin.cpp +++ b/velox/ml_functions/tests/Job2WayJoin.cpp @@ -1438,7 +1438,7 @@ int main(int argc, char** argv) { bench.modelOperators = operators; std::cout << "Performing Join" << std::endl; - bool ret = bench.createAndExecuteQuery("ci", "rt", "ci_role_id", "rt_id", 50, 50, 3); + bool ret = bench.createAndExecuteQuery("ci", "rt", "ci_role_id", "rt_id", 50, 50, 1); return ret; }