From d55e9e9109dfa8c23025b0051218cef932a728d0 Mon Sep 17 00:00:00 2001 From: Clark Wang <107419732+OEG-Clark@users.noreply.github.com> Date: Mon, 12 Sep 2022 15:24:54 +0200 Subject: [PATCH] Add files via upload training code for SimGNN --- src/SimGNN/__pycache__/layers.cpython-39.pyc | Bin 0 -> 3416 bytes src/SimGNN/__pycache__/simgnn.cpython-39.pyc | Bin 0 -> 8217 bytes src/SimGNN/__pycache__/utils.cpython-39.pyc | Bin 0 -> 2495 bytes src/SimGNN/layers.py | 94 ++++++++ src/SimGNN/simgnn.py | 236 +++++++++++++++++++ src/SimGNN/test.py | 122 ++++++++++ src/SimGNN/utils.py | 87 +++++++ 7 files changed, 539 insertions(+) create mode 100644 src/SimGNN/__pycache__/layers.cpython-39.pyc create mode 100644 src/SimGNN/__pycache__/simgnn.cpython-39.pyc create mode 100644 src/SimGNN/__pycache__/utils.cpython-39.pyc create mode 100644 src/SimGNN/layers.py create mode 100644 src/SimGNN/simgnn.py create mode 100644 src/SimGNN/test.py create mode 100644 src/SimGNN/utils.py diff --git a/src/SimGNN/__pycache__/layers.cpython-39.pyc b/src/SimGNN/__pycache__/layers.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c4dfeaadca5402fb66238a0371e294a1fabf0c6e GIT binary patch literal 3416 zcmbtWO>Y}T7@pa0yRlQ!KtiEn)mBP#iJHIxMS!SUQT0%(v~XEPT1|Gx&N};HX4g$? zIb2$)C;o$T%#YwF@CSw?r<}O=KzZNUjk9(VRF$!2JoD~+yiXoBq5E7hyPkk6V1~oc}PM%G+vX?;H0njjaRy7g3JspkXcii71C#9 z-9CX`NN1hOW_OqdMUh=U1>}{w;fmXXA_VJQf^E=QKq@ZK+N}Fg$gpx7zeSJ(Oh8mHHiT`MTV{LR- zZHM>R9x%ZYmx*4!?cNi;VFH(hb9ehZD9TfDT4&voW7x}R!jhx3_VZ-8MDjU8Z#Gs$=ACtGYA8hYD5g8ts@BIAa z@eUYfr?k&b%wEFsjloM{!O9lY+<}G|ZE8`6j#j4zEbX^(dar=uw5nhq*WZVl--MBXF;EKC8B=gHNDs}i2}x>>{+Jr#G22HIFc*|MU?Ow} zA{($C!vro<5;8OpoE4a(DmUGjKj*P4_(1R+vPaE53y~_rc(TjG5cb}7pYnVd7jD*d zi#_f>yx*x!7LZ3Go!bmD^FNMqpF`$k;%pOv@ec5&hf#9!4$6rVG?Wjek#Lqu3*s*s z#>#8bC|J5D?L6uwSrkewNu&YJJ&?3mw^V@2#`OMPyBBA>EcSyeE%=MVTPg%ga|{9= z`sFvTUG3%PK?YWl?OE&0g^08(Yu|#4=ZXugRobN2=@K2S%!f-g_qc&5ltQ`z3N?K{ zQSb`=1>gT=$l~gY?fYRC_`U)Vl~@AfNvuI5TNMrkJdRP_<6qSlP^l*W?mO~AF zq2bt$rC=}s`AosdSpF|*ECG0AqQi@WOzc;@@q>!diZ@DdajO|_sXLNK08}=NF*@@o ziDD+A;^j<$%w*RGbBI7nhS)ISPi$a=iK@V^W@GfEP^MItRrbVrUqPM4@z2gHd^waO ziQLC6LTE0Aas}wtk*)yUo|^*zUjqIzsUY7uH$x!?kU=BV0U-==6^=SVa|wj9&U*6< z5XChVBg~fie+)g@E)@LxDIba~&C5KyHiP~CZkz@C(%6kyej#uo3w#ST$HNs1Q0pHD z*r55&(MIkCCVNjxK>MS{bxNp>O{9q0*gB;Qu?h>Djglk463zaV$--D4n=1t5trY@nVvQ|`42`V-xPBmgT#};r;F@O* ze{-HUHuJ)rg0joH@V;=_>09%R49;z|HNVkT8ot*?6*ZNJj+9ipz7o#h3@RZ$2M4XU z%Xqm7eQ_Hb6)Lx|hn^IeFXApVQV-x!q+$pk8UoWz8iL0|6;lv+_*U|+Ottnm>w_)N z#%ln@dMiPe>_#c@(Jb$HEtPYMm0+Hn>erAG?4GH7 zXI5rJWW*OyVY6A&@cV}^e|7NQ8=CedRTh61D(~aT{tk)IgdS^tk?X$Bb;CC}H+_q9 z+jo!~aV4qvmBjU3oyMDSHL3YEE?aRuY4{B;+i^2l@mG>nf0gS_yq2u{>&b?{k!<># z$(FyBobk`-+AU2~g!@DjZlw3l_Rbxf{&}=jMGbAW=mP2&g?>kC)t_Uh+Fr}ZEp=Zc zxuay*??f#Qm1rsTjslzOf(mQ@IzZhpD_* zw6*NKF_39H%CcY(c4fZO4&(MP4pkJyX_n=U!&D}r3OX_z9OczG4Mot)(th6H;$al3 zp^VzJxy7GZRn_=-t_?v6}FE&m3kkAneuMY z_Gx@K4rNz8@@}NbV5mZ(4li}2)F9ivd^wa~bRTS|vU52+$Sz-b=dE|P-}?UB@B9G| zYEO?vr=#-p$=*aoX-Xe!$Hv4s(R6K$a$t@Pq}EuQ*wn_HG)70f7#(1D5Xvy|LfOf1 z3fUPZU|8m*2fe7Rwoec150isP2I*lChX+xNDSO=FrH5WW6_Hmo&*$y6oZQZagGkCI zHki9X(Cv3s5abrUGH;eE7W48(7OCL?i$98F)-rP|i{eAMhME4_>32WV^J);Z;}C)q zJk$Q?(#_qwpURZ>o!$N6r=Q#fm+lt)xtn#9PQSlBcqCV`);Tjw%~XnukoYUOSXnBAKEy8V zXSsFIMcOQHL(oo#ef3P2o7k>AJ4@)~`M2g5Z2r=qoSFRwhGY&B&1&dd#^_Q>ns1!+ z0=-DRF};3<0)$T^;UnSf5v|OgbxdMua|>$R=5Sl3Y;(BH<+iFKnwrNX&~??Am78j% zw>nuveVs>doY1P9vsSaWA#`rrDmn4yj1xB`6vvbkg}}{8{+xKe*E%C#MJ{bh)+u30 ztR8k_1^EoFCcW!t4Q7Gj_jPo}O20{wj_hzDS=5QFtk01hlG^t!B)>jf&DHIy8}S!dmIE zIW{MzfC4@-9-B%7>rJrE?m5DIYCN{a7RFb`)`|X#Mm3Cu(#BYnah0smf6o~>H51jhMOiEy3+r+6Tn@@ZG6@Kztpk_;#7btV(>&S8=fgKi+f}zQ+ z2i@qQZxb=vET}v(H?v^^{XvQ#--Jaueejnx?&MCfh1{a$PKimrIoAa<(!YcCvj!5a zvIcS4&{vR-F20~VW)qgAhO8J#4yz1>0;vpGF5?5p^k*d@x4KNygrv$vi!*j0QB^LX^)ZYLWT-l>p})>!>u9Yn ztn>HE>bc|k*gZ4|GCnmXRpf3lzE|tjCooQv<{0y6CPq`+3ffl7w#K*tJysq4XeMbN zrSf4Y1q+w=u&a&=wezCASTE^H5{{6+i6l4M(86R^ z7R0d%Hy{b)?kJLPP|pq}Zz5?m3sq&&Y328+`E5#mpAu3s@^>luJxVSk@z)1w8k3Qm zZTrQh9Z9eaK$P_AX?dMtAYqROK3$PdcH?nZI;Nk9mO#Q z&+Rl>W=SxP@ia}%R*<;5Wi@JU)m_2gS?+72dFe-&?0r0$Kr;7;gaHr76sE9{!ygh3 za=UN};SsST2*pts4Ya$WDOQkI#j03CUK8tL19@F+iY?>~aYmd)-W2B`=I8TfL8N_B zPLW)?=d>Mq_?~+xu)hKi1rQET>BHdOvfrX%Ib*L&*rH{z%;n7p&WIp19RN7z)lQ_? zgUs@J0Z+h!C~g@A2qx&ZKu0DaIRj%ng)_jefUA$rF75g2eF76RdGo{*g@t;IZyB+g z0vmp&?_sZ`&x%VzYBINZS1*NMemIP%sZFjA|U8w26H zK+7U%XAk%!evNMusI86w6~bfy-@d7$i~x1X1Dj1^ZgsP6A9%L3PDSUGC+}jjr+IP< zZ9lu50%%jw#|{lukbR$uoE1AXM$6C@Y{QId0RVm1r>rV4ADisZK6u?RvM- zO=ypXWreyg`{t7*c^~9BbATl`A>Hzi(Nxl&#|Ig7XYPc9LDUze{mv`pQohZ|^%Q9l zSs|Y%3d&J`jU@}ocP#y){+d2|b(zk~!CDeGb8@bK0>^ zFdqblDJ~56tld%s{UM#R@fMG_7usRyF}`SblszUN5GMVqhO)(F2W7~!awk<%AXu+P zOq|rA!73+!jFScov_5H`08ol5k6ux$WBZs)bOm#%=5Wm&SH`YE<7!taty2m?XpBCe znd=hu6~>rS+)LS=3d9|N!q*G%7g!ad15dSxk+F6_3=Nv%+Fkgmz?y^~;gtey+KfU8 zXqe|Bv2Y7}IA3a!F#sM8z9#-wqg0fV{_E@zML zyZke3fn{67W6`?wJ4Y%IQ9FFJ@7o^^()Lly@a;GXA4It& zqO8r*;#WQ>><}g*IHbZ@-UB^Gt31Q|&TTN|-8VX|=QdhfTg8R9S%`e=x(E{%7BG^c zzY#^V3dPaHizK;`frO=yJPDIhg%jtOB~=OxSxDVho|NCE}A}syI@bX!D}-hcv#HknA&nkbgw02yQLj#2CIqt#edN zXf8JoyXv1&$UGz(bOJ)Vzjn!`vlNBf8Dd#9yI8NaDD- z#d?8FJH%p>Q)0e7aadz~1|dP50{Q_u0h)n~oHcRBh-6*RNE8WSpjRJ5Z%`fLrjapY z4Pj1P1UnHGn~pxl8bBo9!3@?NB~Tk})oS>F62)HhAdH9n8DYD|N~G1u?ExZvs^GCj z0}2PI(P6p=N)FYL+{3BlUr@r@W1mWGO6Z#9YrI4-U%d<>$%uMQXq`eK9MX(sPRMI$ z`!_u0rFTJZ3bSt@-vVD;{Z)M$N!%|ygKY+sOB^6kBZxtHMuIqnwI}+|xy&DO5EFPp zm)y7sr~zsN@qx0q^e7w35~i}1Ly>}GgRZHt31T1=Z3(~$^cbgmLqbl}n^QP?j?%ko{jN}#cg&qmk2#Kd3W(MCJVVtkru+gIZh(Yov!3e(A> z_1Xs^KCHka^Y}cwo=mV-u5XVvOQ9kxKkeLWRisb-7H{YQiY!i2QKO0Yl)R79EQoY~ z#RFd0ud}mLREm?3yH)tF4^blInAMPI4*Y7^gNyp;wQoMr60r#=&s-mhh)-pNyhgfr zJw$@&`{x}^{vonA^4HCU&ApCRvbLdMRb74wB^T_a&g2se z|1UgWvrk%QueBoI#}Ij)k`E|ZrQ~Bu$PZ%;UuXx?<1(X!F1IA+Ng{=$_bl9h)&wN3 z@Mz zg^;Zbn<0ce2^Vhn`vu6^;3_Uo3WFxQZR#Mx1JCg1ME=rJd|t8f!6wcMyiCfDVJstM y$?*6Rz0`qv#W(BXyQNcn5akWO?6kRi6{3#e*GOeekIZ%lPzQqDK>Dh=)%{=kfcNqM literal 0 HcmV?d00001 diff --git a/src/SimGNN/__pycache__/utils.cpython-39.pyc b/src/SimGNN/__pycache__/utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c95a50c1c208013adb0a38490f3b89cdfc4f17ee GIT binary patch literal 2495 zcmaJ@OK%)S5bmDG?(BN)1QLiH0vR9z_J9RJ;$fnIh)JYKS*#d1tQ;+!>GpWy*_m}u zZ=6`O7lM><;J`nykNFw=jX5E4@;N7Rfv->V14h`XtZIE@{AE?OUj+Tu1U z!=A85{8wi96SnK@L7s5qK4KH6bap+-r4u_b-{YK7|1n$V_JTplm!V7J#>Y&9R>Bw` zmuT!{?GWGfaTb{*&q9-jBb{U>G#e^R^SB6;EEHjuL@LA;$Encadaj4UEZT)jBca7G z6gn=J!WEL0QMw4ng_5Lb_Zj9=rL}(5I#R0qA4NKzwwa!wfpUgyzJw}fCA)KB!b`qu z;ooz1`5tg6`2vfb-<>TFRyY+OJm(?WrdFd=M5<~>Vw4z>COZnl&f-TE-+am|k1qO@ zJ%4w3X>CR4_f%wxwI5b)t=;JL)>e}tHDi;c#p38;)xMG@g^BXv=;~{@$}<$Ua}g*E zEJR8#UP<#vq{USThnl@^v-0i>T~v)C*G9?8+f)w=O_ucoA>}~l+ePKuEh&)S01bjz(r)IFa-Us+b?}4sY<&e4gub=uc6H4kOxJLn&wz!*+Se_CT1D zxW{*Xwy$?v37KRu9{65?$6rrU6>cYH0|O(G=rCUoV=YD-#nfYugxR3Sh92l5!keq# z_3YXMEv?L0XQ32EEQQO7C6>?+>z!$|*2BB_CfCtM<)yihPo2u6oq-8dY7i_E7KF}7 z=aDLk0o`k!%<@pNMw{z5(MDwP;pal2at)>U0M()OGi*}Ja0bXgq~!DP^E1|kh)6qs zSX{1&Gy>HPVMC5kY>kBmq0_u5Lh=%fW#k3KFB#tWc6k~g=+ z1Qy52EQQxaWX2*T*9(mnS&^>~r%pHkw}^7BmcrG8iz%&hUEg7!pa8ZWd~~C7u~FBp z+M}Z@RgL1R||*N*lQk-L{9P^vG590yHWwAE`{Ahj{W9E%K-VX?}*;`dB{4br-#< zW9i5yiaqvI2(k5xUAD)T*`4!i&cvCx2)?{@cO6PGdk!U%t&VZ;d8JJ-zd7`jy?x4E z`~Tq6BoSUR8Fu~3PmCHO9EuTci*R^9QQOvqD>o9R>sdgnMm{#9v8jU~0%%&Cw5ell zT2}(mMSY1QYF=^=LGUcspI~IlG^BW&KdosG&>dH zZ`o@Zyy@uAx88#JZs{R-HtIR(zB6gs(I!R%%C=?0=3N(3U;(;`bx!HV1diLYkW^kT z&lC{xC|g)dHL(H+Wl}xRi;%mmnqTbChXYzuHBD}Xdhul}=%2E)$Lf93 zp>BWnK&ZEQ3C=o=4{U({20In6qhfIJHUK@%9j9&OHu2^Q%yc{YBAR~79R9A@q-LK| zzmFN4hio{uLBev}GS0q@-Q$Hz9LM(H3VsmAsk%y0q&SO;a$CRyH|PW{ly=|+&3^$d C++0oo literal 0 HcmV?d00001 diff --git a/src/SimGNN/layers.py b/src/SimGNN/layers.py new file mode 100644 index 0000000..cf2be61 --- /dev/null +++ b/src/SimGNN/layers.py @@ -0,0 +1,94 @@ +# !/usr/bin/env python +# -*-coding:utf-8 -*- +# File : layers.py +# Author :Clark Wang +# version :python 3.x +import torch +from torch.nn import functional + +class AttentionModule(torch.nn.Module): + """ + SimGNN Attention Module to make a pass on graph. + """ + def __init__(self, args): + """ + :param args: Arguments object. + """ + super(AttentionModule, self).__init__() + self.args = args + self.setup_weights() + self.init_parameters() + + def setup_weights(self): + """ + Defining weights. + """ + self.weight_matrix = torch.nn.Parameter(torch.Tensor(self.args.filters_3, + self.args.filters_3)) + + def init_parameters(self): + """ + Initializing weights. + """ + torch.nn.init.xavier_uniform_(self.weight_matrix) + + def forward(self, embedding): + """ + Making a forward propagation pass to create a graph level representation. + :param embedding: Result of the GCN. + :return representation: A graph level representation vector. + """ + global_context = torch.mean(torch.matmul(embedding, self.weight_matrix), dim=0) + transformed_global = torch.tanh(global_context) + sigmoid_scores = torch.sigmoid(torch.mm(embedding, transformed_global.view(-1, 1))) + representation = torch.mm(torch.t(embedding), sigmoid_scores) + return representation + + +class TenorNetworkModule(torch.nn.Module): + """ + SimGNN Tensor Network module to calculate similarity vector. + """ + def __init__(self, args): + """ + :param args: Arguments object. + """ + super(TenorNetworkModule, self).__init__() + self.args = args + self.setup_weights() + self.init_parameters() + + def setup_weights(self): + """ + Defining weights. + """ + self.weight_matrix = torch.nn.Parameter(torch.Tensor(self.args.filters_3, + self.args.filters_3, + self.args.tensor_neurons)) + + self.weight_matrix_block = torch.nn.Parameter(torch.Tensor(self.args.tensor_neurons, + 2*self.args.filters_3)) + self.bias = torch.nn.Parameter(torch.Tensor(self.args.tensor_neurons, 1)) + + def init_parameters(self): + """ + Initializing weights. + """ + torch.nn.init.xavier_uniform_(self.weight_matrix) + torch.nn.init.xavier_uniform_(self.weight_matrix_block) + torch.nn.init.xavier_uniform_(self.bias) + + def forward(self, embedding_1, embedding_2): + """ + Making a forward propagation pass to create a similarity vector. + :param embedding_1: Result of the 1st embedding after attention. + :param embedding_2: Result of the 2nd embedding after attention. + :return scores: A similarity score vector. + """ + scoring = torch.mm(torch.t(embedding_1), self.weight_matrix.view(self.args.filters_3, -1)) + scoring = scoring.view(self.args.filters_3, self.args.tensor_neurons) + scoring = torch.mm(torch.t(scoring), embedding_2) + combined_representation = torch.cat((embedding_1, embedding_2)) + block_scoring = torch.mm(self.weight_matrix_block, combined_representation) + scores = torch.nn.functional.relu(scoring + block_scoring + self.bias) + return scores \ No newline at end of file diff --git a/src/SimGNN/simgnn.py b/src/SimGNN/simgnn.py new file mode 100644 index 0000000..b13d9fc --- /dev/null +++ b/src/SimGNN/simgnn.py @@ -0,0 +1,236 @@ +# !/usr/bin/env python +# -*-coding:utf-8 -*- +# File : simgnn.py +# Author :Clark Wang +# version :python 3.x +import glob +import torch +import random +import pandas as pd +import numpy as np +from tqdm import tqdm, trange +from torch.nn import functional +from torch_geometric.nn import GCNConv +from layers import AttentionModule, TenorNetworkModule +from utils import process_pair, calculate_loss, format_graph, load_json, load_feature + + +class SimGNN(torch.nn.Module): + """ + SimGNN: A Neural Network Approach to Fast Graph Similarity Computation + https://arxiv.org/abs/1808.05689 + """ + def __init__(self, args, number_of_labels): + """ + :param args: Arguments object. + :param number_of_labels: Number of node labels. + """ + super(SimGNN, self).__init__() + self.args = args + self.number_labels = number_of_labels + self.setup_layers() + + def calculate_bottleneck_features(self): + """ + Deciding the shape of the bottleneck layer. + """ + if self.args.histogram == True: + self.feature_count = self.args.tensor_neurons + self.args.bins + else: + self.feature_count = self.args.tensor_neurons + + def setup_layers(self): + """ + Creating the layers. + """ + self.calculate_bottleneck_features() + self.convolution_1 = GCNConv(self.number_labels, self.args.filters_1) + self.convolution_2 = GCNConv(self.args.filters_1, self.args.filters_2) + self.convolution_3 = GCNConv(self.args.filters_2, self.args.filters_3) + self.attention = AttentionModule(self.args) + self.tensor_network = TenorNetworkModule(self.args) + self.fully_connected_first = torch.nn.Linear(self.feature_count, + self.args.bottle_neck_neurons) + self.scoring_layer = torch.nn.Linear(self.args.bottle_neck_neurons, 1) + + def calculate_histogram(self, abstract_features_1, abstract_features_2): + """ + Calculate histogram from similarity matrix. + :param abstract_features_1: Feature matrix for graph 1. + :param abstract_features_2: Feature matrix for graph 2. + :return hist: Histsogram of similarity scores. + """ + scores = torch.mm(abstract_features_1, abstract_features_2).detach() + scores = scores.view(-1, 1) + hist = torch.histc(scores, bins=self.args.bins) + hist = hist/torch.sum(hist) + hist = hist.view(1, -1) + return hist + + def convolutional_pass(self, edge_index, features): + """ + Making convolutional pass. + :param edge_index: Edge indices. + :param features: Feature matrix. + :return features: Absstract feature matrix. + """ + features = self.convolution_1(features, edge_index) + features = torch.nn.functional.relu(features) + features = torch.nn.functional.dropout(features, + p=self.args.dropout, + training=True) + + features = self.convolution_2(features, edge_index) + features = torch.nn.functional.relu(features) + features = torch.nn.functional.dropout(features, + p=self.args.dropout, + training=True) + + features = self.convolution_3(features, edge_index) + return features + + def forward(self, data): + """ + Forward pass with graphs. + :param data: Data dictiyonary. + :return score: Similarity score. + """ + edge_index_1 = data["edge_index_1"] + edge_index_2 = data["edge_index_2"] + features_1 = data["features_1"] + features_2 = data["features_2"] + + abstract_features_1 = self.convolutional_pass(edge_index_1, features_1) + abstract_features_2 = self.convolutional_pass(edge_index_2, features_2) + + if self.args.histogram == True: + hist = self.calculate_histogram(abstract_features_1, + torch.t(abstract_features_2)) + + pooled_features_1 = self.attention(abstract_features_1) + pooled_features_2 = self.attention(abstract_features_2) + scores = self.tensor_network(pooled_features_1, pooled_features_2) + scores = torch.t(scores) + + if self.args.histogram == True: + scores = torch.cat((scores, hist), dim=1).view(1, -1) + + scores = torch.nn.functional.normalize(self.fully_connected_first(scores)) + score = torch.nn.functional.relu(self.scoring_layer(scores)) + return score + + +class SimGNNTrainer(object): + def __init__(self, args): + self.args = args + self.embedding_len = 1024 + self.get_pairs() + self.setup_model() + + def setup_model(self): + self.model = SimGNN(self.args, self.embedding_len) + + def get_pairs(self): + # data = glob.glob(self.args.data_path + '*.pt') + data = pd.read_csv(self.args.score_path) + ### Pairs + self.testing_pairs= data.sample(frac=0.2) + + self.training_pairs = data[~data.index.isin(self.testing_pairs.index)] + # print(self.training_pairs.head()) + + + def create_batches(self): + """ + Creating batches from the training graph list. + :return batches: List of lists with batches. + """ + # random.shuffle(self.training_pairs) + batches = [] + for graph in range(0, len(self.training_pairs), self.args.batch_size): + batches.append(self.training_pairs[graph:graph+self.args.batch_size]) + return batches + + ### need to train the datatype + def transfer_to_torch(self, data): + ''' + :param data: data.series from Score.csv + :return: graph pair as dict + ''' + new_dict = {} + graph_1 = process_pair(self.args.data_path + data['graph_1'] + '.pt') + graph_2 = process_pair(self.args.data_path + data['graph_2'] + '.pt') + json_g_1 = load_json(self.args.json_path + data['graph_1'] + '.json') + json_g_2 = load_json(self.args.json_path + data['graph_2'] + '.json') + # new_dict['graph_1'], new_dict['graph_2'] = graph_1, graph_2 + new_dict['features_1'] = load_feature(graph_1) + new_dict['features_2'] = load_feature(graph_2) + new_dict['target'] = torch.from_numpy(np.float64(data[self.args.sim_type]).reshape(1, 1)).view(-1).float() + # new_dict['target'] = data[self.args.sim_type] + edge_1 = torch.LongTensor(format_graph(json_g_1)) + edge_2 = torch.LongTensor(format_graph(json_g_2)) + new_dict['edge_index_1'], new_dict['edge_index_2'] = edge_1, edge_2 + return new_dict + + def process_batch(self, batch): + self.optimizer.zero_grad() + losses = 0 + for _, graph_pairs in batch.iterrows(): + data = self.transfer_to_torch(graph_pairs) + target = data['target'] + prediction = self.model(data).view(1) + # print(prediction) + # print(target) + losses = losses + torch.nn.functional.mse_loss(target, prediction) + losses.backward(retain_graph=True) + self.optimizer.step() + loss = losses.item() + return loss + + def fit(self): + self.optimizer = torch.optim.Adam(self.model.parameters(), + lr=self.args.learning_rate, + weight_decay=self.args.weight_decay) + self.model.train() + epochs = trange(self.args.epochs, leave=True, desc="Epoch") + for epoch in epochs: + batches = self.create_batches() + self.loss_sum = 0 + main_index = 0 + for index, batch in tqdm(enumerate(batches), total=len(batches), desc="Batches"): + loss_score = self.process_batch(batch) + main_index = main_index + len(batch) + self.loss_sum = self.loss_sum + loss_score * len(batch) + loss = self.loss_sum / main_index + epochs.set_description("Epoch (Loss=%g)" % round(loss, 5)) + + def score(self): + print("\n\nModel evaluation.\n") + self.model.eval() + self.scores = [] + self.ground_truth = [] + for _, row in self.testing_pairs.iterrows(): + data = self.transfer_to_torch(row) + self.ground_truth.append(data['target'].item()) + prediction = self.model(data).item() + print(prediction) + self.scores.append(calculate_loss(prediction, data['target'].item())) + self.print_evaluation() + + def print_evaluation(self): + """ + Printing the error rates. + """ + # print(self.ground_truth) + # print(type(self.ground_truth)) + norm_ged_mean = np.mean(self.ground_truth) + base_error = np.mean([(n - norm_ged_mean) ** 2 for n in self.ground_truth]) + model_error = np.mean(self.scores) + print("\nBaseline error: " + str(round(base_error, 5)) + ".") + print("\nModel test error: " + str(round(model_error, 5)) + ".") + + def save(self): + torch.save(self.model.state_dict(), self.args.save_path) + + def load(self): + self.model.load_state_dict(torch.load(self.args.load_path)) \ No newline at end of file diff --git a/src/SimGNN/test.py b/src/SimGNN/test.py new file mode 100644 index 0000000..aca25a1 --- /dev/null +++ b/src/SimGNN/test.py @@ -0,0 +1,122 @@ +# !/usr/bin/env python +# -*-coding:utf-8 -*- +# File : test.py +# Author :Clark Wang +# version :python 3.x +import argparse +from simgnn import * +from utils import * + +def parameter_parser(): + """ + A method to parse up command line parameters. + The default hyperparameters give a high performance model without grid search. + """ + parser = argparse.ArgumentParser(description="Run SimGNN.") + + parser.add_argument("--data-path", + nargs="?", + default="/home/repos/SimCal/data/post_process/", + help="Json data path for linking.") + + parser.add_argument("--json-path", + nargs="?", + default="/home/repos/SimCal/code/SimGNN/model/final_data/", + help="Folder with graph pair pts.") + + parser.add_argument("--score-path", + nargs="?", + default="/home/repos/SimCal/code/SimGNN/lean_simcal.csv", + help="DataFrame contains pairs and Sim Score.") + + parser.add_argument("--save-path", + type=str, + default='/home/repos/SimCal/code/SimGNN/frist_model.pt', + help="Where to save the trained model") + + parser.add_argument("--load-path", + type=str, + default=None, + help="Load a pretrained model") + + parser.add_argument("--sim_type", + type=str, + default='sbert', + help="Where to save the trained model") + + parser.set_defaults(histogram=True) + + parser.set_defaults(dropout_flag=True) + + parser.add_argument("--epochs", + type=int, + default=10, + help="Number of training epochs. Default is 5.") + + parser.add_argument("--filters-1", + type=int, + default=512, + help="Filters (neurons) in 1st convolution. Default is 128.") + + parser.add_argument("--filters-2", + type=int, + default=192, + help="Filters (neurons) in 2nd convolution. Default is 64.") + + parser.add_argument("--filters-3", + type=int, + default=32, + help="Filters (neurons) in 2nd convolution. Default is 64.") + + parser.add_argument("--tensor-neurons", + type=int, + default=16, + help="Neurons in tensor network layer. Default is 16.") + + parser.add_argument("--bottle-neck-neurons", + type=int, + default=16, + help="Bottle neck layer neurons. Default is 16.") + + parser.add_argument("--batch-size", + type=int, + default=64, + help="Number of graph pairs per batch. Default is 128.") + + parser.add_argument("--bins", + type=int, + default=16, + help="Similarity score bins. Default is 16.") + + parser.add_argument("--dropout", + type=float, + default=0.5, + help="Dropout probability. Default is 0.5.") + + parser.add_argument("--learning-rate", + type=float, + default=0.001, + help="Learning rate. Default is 0.0005.") + + parser.add_argument("--weight-decay", + type=float, + default=5 * 10 ** -4, + help="Adam weight decay. Default is 5*10^-4.") + + parser.add_argument("--histogram", + dest="histogram", + action="store_true") + + return parser.parse_args() + + +args = parameter_parser() +tab_printer(args) +trainer = SimGNNTrainer(args) +if args.load_path: + trainer.load() +else: + trainer.fit() +trainer.score() +if args.save_path: + trainer.save() diff --git a/src/SimGNN/utils.py b/src/SimGNN/utils.py new file mode 100644 index 0000000..b4ef384 --- /dev/null +++ b/src/SimGNN/utils.py @@ -0,0 +1,87 @@ +# !/usr/bin/env python +# -*-coding:utf-8 -*- +# File : utils.py +# Author :Clark Wang +# version :python 3.x +import math +from texttable import Texttable +import torch +import json + + +def tab_printer(args): + """ + Function to print the logs in a nice tabular format. + :param args: Parameters used for the model. + """ + args = vars(args) + keys = sorted(args.keys()) + t = Texttable() + t.add_rows([["Parameter", "Value"]]) + t.add_rows([[k.replace("_", " ").capitalize(), args[k]] for k in keys]) + print(t.draw()) + +def process_pair(path): + """ + Reading a json file with a pair of graphs. + :param path: Path to a JSON file. + :return data: Dictionary with data. + """ + data = torch.load(path) + return data + +def calculate_loss(prediction, target): + """ + Calculating the squared loss on the normalized GED. + :param prediction: Predicted log value of GED. + :param target: Factual log transofmed GED. + :return score: Squared error. + """ + prediction = -math.log(prediction) + target = -math.log(target) + score = (prediction-target)**2 + return score + +def calculate_normalized_ged(data): + """ + Calculating the normalized GED for a pair of graphs. + :param data: Data table. + :return norm_ged: Normalized GED score. + """ + norm_ged = data["ged"]/(0.5*(len(data["labels_1"])+len(data["labels_2"]))) + return norm_ged + + +def load_json(path): + data = json.load(open(path)) + return data + +def load_feature(data): + output = [] + for keys, vals in data.items(): + output.append(vals.view(1, -1)) + return torch.cat(output, dim=0) + + +def format_graph(data): + node_index = list(data.keys()) + from_list, to_list = [], [] + for keys, vals in data.items(): + if vals[0][0] == 'None': + pass + else: + from_node_index = node_index.index(keys) + for to_node in vals[0]: + to_node_index = node_index.index(to_node) + from_list.append(from_node_index) + to_list.append(to_node_index) + return [from_list, to_list] + +# data = process_pair('D:\\Projects\\UPM\\GNN\\data\\final_ae_Data\\Zasder3_Latent-Neural-Differential-Equations-for-Video-Generation.pt') +# output = load_feature(data) +# print(output) +# print(torch.cat(output, dim=0).size()) +# data = load_json('D:\\Projects\\UPM\\GNN\\data\\final_data\\2-Chae_A-NDFT.json') +# a = format_graph(data) +# print(torch.FloatTensor(a)) +