From 6f033c93c95b1290bb8a7c5be69c7f7370179e4c Mon Sep 17 00:00:00 2001 From: root Date: Mon, 12 Jan 2026 07:00:54 +0000 Subject: [PATCH 1/2] add SKNet --- .../build-in/Classification/SKNet/README.md | 10 + .../Classification/SKNet/coverage.txt | 3 + .../Classification/SKNet/resnet_loss.jpg | Bin 0 -> 33981 bytes .../Classification/SKNet/resnet_loss.txt | 29 + PyTorch/build-in/Classification/SKNet/run | 1 + .../build-in/Classification/SKNet/sknet.py | 242 +++++++ .../build-in/Classification/SKNet/sknet222.py | 145 ++++ .../Classification/SKNet/sknetdebug.py | 161 +++++ .../Classification/SKNet/weloTrain.py | 567 +++++++++++++++ .../Classification/SKNet/weloTrainStep.py | 647 ++++++++++++++++++ 10 files changed, 1805 insertions(+) create mode 100644 PyTorch/build-in/Classification/SKNet/README.md create mode 100644 PyTorch/build-in/Classification/SKNet/coverage.txt create mode 100644 PyTorch/build-in/Classification/SKNet/resnet_loss.jpg create mode 100644 PyTorch/build-in/Classification/SKNet/resnet_loss.txt create mode 100644 PyTorch/build-in/Classification/SKNet/run create mode 100644 PyTorch/build-in/Classification/SKNet/sknet.py create mode 100644 PyTorch/build-in/Classification/SKNet/sknet222.py create mode 100644 PyTorch/build-in/Classification/SKNet/sknetdebug.py create mode 100644 PyTorch/build-in/Classification/SKNet/weloTrain.py create mode 100644 PyTorch/build-in/Classification/SKNet/weloTrainStep.py diff --git a/PyTorch/build-in/Classification/SKNet/README.md b/PyTorch/build-in/Classification/SKNet/README.md new file mode 100644 index 000000000..b5b4f472d --- /dev/null +++ b/PyTorch/build-in/Classification/SKNet/README.md @@ -0,0 +1,10 @@ +# SKNet + +Try to implement the CVPR 2019 paper ["Selective Kernel Networks"](https://arxiv.org/abs/1903.06586) by PyTorch. + +I implement the core Slective Kernel Module, as illustrated in the img: +![Slective Kernel Module](https://github.com/pppLang/SKNet/blob/master/img/img1.PNG) + +The core SKConv unit is implement as a Pytorch Module in untils.py. + +I intend to test in cifar-10 and imagenet, and test in some SR models. \ No newline at end of file diff --git a/PyTorch/build-in/Classification/SKNet/coverage.txt b/PyTorch/build-in/Classification/SKNet/coverage.txt new file mode 100644 index 000000000..e15af6f05 --- /dev/null +++ b/PyTorch/build-in/Classification/SKNet/coverage.txt @@ -0,0 +1,3 @@ +all api: ['_amp_foreach_non_finite_check_and_unscale_', '_amp_update_scale_', '_copy_from', '_has_compatible_shallow_copy_type', '_local_scalar_dense', '_log_softmax', '_log_softmax_backward_data', '_reshape_alias', '_softmax', '_softmax_backward_data', 'add_', 'addmm', 'as_strided', 'as_strided_', 'cat', 'clone', 'convolution', 'convolution_backward', 'copy_', 'copy_stride', 'div', 'empty', 'empty_strided', 'eq', 'fill_', 'max_pool2d', 'maxpool2d_backward', 'maxpool2d_forward', 'mean', 'mm', 'mul', 'native_batch_norm', 'native_batch_norm_backward', 'nll_loss_backward', 'nll_loss_forward', 'reciprocal', 'relu_', 'sum', 'threshold_backward', 'topk', 'view', 'zero_'], total: 42 +fallback op: [], total: 0 +coverage rate: 100.00% diff --git a/PyTorch/build-in/Classification/SKNet/resnet_loss.jpg b/PyTorch/build-in/Classification/SKNet/resnet_loss.jpg new file mode 100644 index 0000000000000000000000000000000000000000..183a2ae4ec9ea4052f7018ab0daaa2d881c3c118 GIT binary patch literal 33981 zcmeEv1zcQNmVOZkkYFLWli;2pL4t-rA;H~~pam2JcM1s*Jdgkh8rb+a{ynF6 zJmlr&rC|~j5#Sc)Af z2cY5KynFAV1UiAr3yk}AgxtPSshD(6N?V9jhY#p^UfTQJx=l<%N=DAW$i&RT%FFkN zUqDbu@~MP#+yZ3j;OOM+;_Bw^|2iNr=uL1)^t<;lv2hDuo=^X5U0eUUvAMN8Y&vbcezk*xF8Qye6*YQ9-`lsP{DX%M{u9p z7nATwRBCC3_@8jRk;NLp%e?|ucQrxqWY*tz0r_)tRH?{^XHdq*J`l|+fR6CSwR_)=kiw44c~+2N zatEBzRm7xskd>Y4N???r87(MPQ!!dGQW(u~cS}e1#JE-h0eGd1k-d}5In*F1tZW(g zg=Jmxep{4qaf#E*oLg9XuDP5IYN^wk4|m>KD&ss-ZqnbD59W8gjV@2HiEi~SMZ4r6 zuC37?0cg8>9XqR1UPY{BPe*uPWO)S}rPU;Gc9E}tT9Pv=!_}&d(?-)L^%d|QgHjM?#Rm286$Wiy$2 zoXfUBekDQvV|9zM8=>Y_1y0BiUDR7{Z!@OE(}I0U%LVp>y_nn`UB^o{(AnFI{G06s zg)E=RZpMlWZB)y}eSsVaKo{X(?m)J9)%Qb;j-CliA^?V9SUW(1B-Cuk7T&wEYb-~g zV8h9=R8pE`5a&EPo?_Qc!@QBZ$tH~ebT*NXQr$gPb`5yEL@sNo(T4YBlOn&8Y)gg6trXkdCLneYysb)(mq1k93jlyXW!X+_pTBo`Su1!S_Id0(fv1h z-a6bEhh7Dg*AC*;F$K_8i6O3+IX_^8TVNd6XA`;p>8 zwv#>-SPdsyFY3a+b(Y^)`k*DuD@`emdY&28Tyc`qRv27lQ3k}UDygoFWmI-~$Mvz^ zZs6e&_H*28OD~6ZA^sq2tUSXDZlx4f`tbFr!?D-mdZ@J>Y~i9TLol90t+r0xv86H+ zVrNmUc)MncIDtzOZ^y|)(NOb46k!zy_aMnJ-YBqn?oCF9Xu*`e$X7Hh!qo1&XP&+hELfB~u?uZW%*xz)f)sYic!XWTrl0gdB8&GcRMW*QiJLu0P5qmHYf6I&39#Uwn*e}q0?1k-BZoDX)W{W^9_ozkZRm= zyf+OTMS9RkP0)#z-iMo6jUdZmGKzbxH&FsrKHv12@v8lVdn|P*`fi0d$#FKA-u1>u zVm-He&nW~1oY4IJyH4`FnGdNZVw|$Jgfuc-+&i-Nsqs^Y1GUzpyj4mUI!%en>n z5w)k~6T184Udyi~_+t?Ov3T*wFcXuKZGl07f+=BxROknNP&~)}Y*zHbvxbMI5uy+H zMIYQ^%5pGtVhXO^3Fv0n1^1_Xjx|Z}4-m|8|>Y$2U zO)%$ISSVBuqgsaEg`z{^OKw*OHxQLhpkLp=&0V3x|4}v76nwzf6l7*%Iz>k>sviE? zFFVY+$fY$3hyY+Ib&Oh5o5wv*E$yOKqlBc$tvhv@2DQ7uQJzA|prOh^%BmPxHujT} zB4tA-2tZZ^%BFi{SOpa$mY^+Hh*#M|Ui^Z)!bKl2Tw?RIae>MYl@i)#(8#gLXSZJ^ z2`&3ca^HX(Q~Kn6BIamW!and~dwocKO|l|&mYU-lcq~!cQ0n(sud8r1`o410=Eo$8 z7rp4HVQZ)DOj8Y9=F4mzS%(@hnVE*TiMo2dfazE2fft^Q3AlPWfVHI|jI_;}y`+tubB%ajRnm2xtvW@-(XWvWOKMZpY>UG22$5n1jL|J<5ZvVd|px$_h!FB2b?B$ zG9swa1=A=;08l<*rcE{|69=r$;iM<~n9gfQp8>BFrO~ZI4y{vgi}>_*5@vV8+1l{Uctp|_ zZ0-9RZlu0q*nIHHL(OqwY~Lv*kO1n(2p_xRIPPK4e_+eUh-_Scv?v-;`L zC8HFXw@wT}zPKD>U&zMA4(mmvf6KGGcy}?$@@XEJvAKv-d9CDfrcjh3e0R>8y=p`# zN7~vEjg8a8e8EQqsD#9hJP^TlReTtSscnrC+q!UcIy>cqn=+J3z8a2^6i^p~cp1lC zV1K+o!zzU&z4b5x;3mn;DKg2oAyn9~z%c6Rk-u40uD=x*`TWH#G8*s^DepSUy01~3 zSbH^5)8*HXt9aC|nnrjsHgO941cr)HWd-;ZQfSsPq-HNg4VyVVlu--38KjD!V*0fe z$Ay*h@>bI?5P+v~FW*$7fd>+JzC>qiGIqQ!k` zG-vLV2=?TzVnndL8>!SI6{Uu@wvNnYxHn+sbu*+N(molvGbN%Ph#`8Knj=L?jSICe zz5hu;QW48H%c%djJx zl~wqYYkbZKjy8*tiaGN&b+nRBK~&1g4}l6KdWU=wwJSTjmI-P88^hP^q6bO zUhMXoFiNhDe4gv7S}^%S*EW{u)}1V^t|vwqZz*CoN(&_Yv9R&pwYkE;8>tJD7JhWAk_uZDBfgauO{_$X%CERIv#SU(8!5Jf=;+|9pz>}h21i%L$a>$D^^h3MS z-#@}>5CQmty0m{uHqHfzn}WQ73TlTeO4?~a=mvu4d#6t#2nh=HVDX@8<#Jf9a&I5x$gH-DsF zkA6bE24VM-(UHu{78z9h%=`oM!am@?#&XAlqx{Ic6n?_Ic#ybe?%TYnHv~Dj{R8uo z6vja^uLXZ3^HTZFyheY*y!w#Li)_jElK6xMcH>XXYY-JORQ(A7SR(v^HO~Iid+z%~ z&jSFCKXhf(7wrbF#J>KlD^|;b4&9d-re}Wa-x(}u0cCZ0F|hd?2^axLi}ev={U6eH z!9fZHpbLE#{-w?Z9xG`;<&eye0KCPiz7BBQzh)1t69O=&g&>dgq%|3O*Ysl(4F)x5 ze#%!oGd}cbWxP1vIQH|Mu@mPdRB?}r&(mzTMRpAHRP3BhI^^Uwk+(+sg)o1*@kcxI zyHFb6;3v}2tZT9&RXB1A0oX}^JQODY8%bQKRC-~o>vx{fgfwryLI8RkA*;Zj{-Jv% zfB*==AYGj^@)tDAaC4>aH%O14(8M^`CENS&M+Hz;>6Y<{;AxB(R`{CO&_y*(x=>|!>-qjBqAq>)^$0?S@ptkLw|qs00Lm{{Agp=mMnYcG6QIopj@rM+NoYSc|WK} z-i`bso#b}Fg2HEZo9tB8j0b?VFo1{Ici||shP*XaAa|T(0?yxXt|ITz9L-QFkMjdI ze_&0&W&Hdzy;mLgvZDJ+W0en2MWgPlrpJk&AmP+}Wk&K#-#86bnHFKs*3m89A6i-` zsTb|dyDo>EiNP2Oud}Nkdqy(YHPf_wb@QksuwuRZuC)ckA1?`o3t?K zD2)>}vr=+&EvxP8nqV62c!n5bUBWB}55bg02HM}g{3WB zEFT+98ue{j6o66N@U33SM3PlES3kA^8|_Okj(JS3|8X$Y3s;nYK2>2r% z>UanM%`l{9u}}RJ6`7YgG1}ulCPWb-QaA+*36fF6#|>n$3t5qD)Ge2>siInD!79gC z2VJ?r2c3F)XH4Bg+bVCU*<7@*R7xuZKH|C-O_5_F(_s)T{v^_2Ce-W6!1j~a^Hnas0S=Bt+mraKGz?rKiEFrs){#D&*=wO?<#=E34Kbsuyx{?&?q?- zF;DYp7t=~GLQjrUJM-Q6MI{%neK^PGzj~#24e%S=?744HmA5~Kj&_D;Tj(B+fcIfL zHy=_-3*avWdfp&hTZ?*oYet=*n-FDT0%sr~U%`pJ)U{f@I()9Im8vmU9qoEZ_wFl^ zZr)wnw*1tO)LTSdvYHZnGV6_S&lLu6Y0ON!Hjn40z$O1gWeg=LhjM5snD?Z%VYpHrlv%51!9jJO zyPxicxIvK`DMoZFDPM86VdM3uWO<5wra6hgHD_nsFdgoU;gk6p{$3t4IL|f5Sl(v;x*`RVT6$7~i5k)5xoea0tw~ALy z%Wv714sZoN(`s2Q5jvzU7`WL|HST3PIawEFD!o4{Rr4R>U!d+cP#{EUa~+g#kOA-O z8H`cSZC|=wVbN2|^v_9>;qR{&FI)>0E!6&!8vINqhZzJQXuUp(56mh7E)W+5!pY*U zBDYyPgJ2TGmDo|;WL0Av1G5QQS6vI~->0(!ob~_~_YRo1rHTc9Ip@8nv>XGefq$A9 zj*ebS=|%tyuuVxY2JzzIb#QYDDPvbi8~)BiS;Eo;u=Lz+Z6%~3Xw2PzM~^QtZRJ}( z9Rq_S@)65QND(Td{80{NF&IP&z6yD$Hb#JP8tZu`QEd=echgIS+<5`2c{X%v#_GM8@B|k6PK{D*Z2(% zT;A+CM&8+s*S~yTGM*K^ZdU7tR?4$f2fi(F)3?=}mO6-N!R-h#SkWAhq)61BDaX59 zUz=CODM>;-Z{_S{ zD3os#l4{(|z;EZZH|z*2Aas6-*L=TP_(E)B#;70Ck^Yh_lrNb6{2rvkNAPhomj3pL z6?szl>BlSZdTeG^Pl3sk7cz&dClXd1}kMW@lhMwXxj_(7r84YI#IU(puG# z%5;Lqd$d^2#a3V6iMmuCA3)_>l%X5VtMdQ(gi+dbp^_T@z^2OBPGGIy*A+p!ks{8l z=$Uzfks!{?cZn3_|@|j|_XMb2EE5DnsqAYCUAckSH*=Ky!%Z-lrc6`X+er;eTR~C@1EepPaH7c-1QU;M zvzHK-o>Z+uatXb>lpk9Wlxm-jm;RYc!diKHU6o|JVIYUk<2(0m6T|(ZtdxW7H10!t z&r&VZc(zAWmK32B2|3F((>9rJ9B1@Uh0V9Lz&3aHM%dmQm#O3OOroiODPTINj}uZ0 zY74eV3mNMB(OdfNG5xU)D+D0lHnj-yS?ov=hJ=;p^|sf9l|y2eRhQ%&FeHPa-)}WK z!$MNTVR>;XqCb(tKa<42_my%Z906GSuD34?9wPwq_lhF^Yjg8wjeG9u-@2b8OO2eK z98MlDu5yEbG;z0FW>9vg%vuh+*!}+M}^}`Ij3rHud^Mb{!oLZR>vH-kop7e zFKwDo#bs}SElRyuh)?cd6?Uj zzt=bTm|>Dp!=Yw`>kpUfwZ_b+!U?deWBYqCLG&J$5h>FbpP@}BwUJhKOsW<{DR zp$QwY^CTA+aOtvWzqN>KqYF20V-u-Zrtm-wU$qlHC1B+vArsAT?!KXQd9F!;Ys*t( z&_h{pf#uMLxhF>jT~m)`UmQC~$x9bdRo7iZB%T~)Gk6ss07_%iX(4P5chW+b0e`>) zr5Ik=6o?F4|IF>06h{EIXE&`<{`uJUXRJf2g&x?0K zf)k%>zAM|-&w%`R77%JEcEPx}N+FvGI1TI(<-nnf&yP2Po>X1J z2cx8bMabTJnxA1(ph?3<7wkNIj(%&W`&As!!;r2RNK6*10&rgsF^{JN2Ib> zr(C7JqtPJ{`mRfiGdNH7!=RPhpslIx@zA8Pp1NOX-Sjt8MH{(6ry5Pu@oy8C*Df@=c1{X~2FH_oW&~_)>CBGkDaatNlX)gH?I-I3Mk@>8T9XGtIXVOVhUZ3) zLC|<409e2LGYb6CUXge@UOhA^wnTHm@0@)FFGu5ld^jDcOP<}-8H)$L!LfgjVFt>dH5Khi*F&YF?3&Q8SN1DjWE` zEJ(OXdx1NRZboCqhuKgURrut=Y51A_$Wvr&vq`{qjC`y}?F;0PbDXac`=IKa#=-|O zDv16{G-aYPvQeRE1|E6xFs@!h^wK;3RH!a9Upq-&uBJjYHN)tAwYWqVT*Eo!%PY0e zuqyMXjLQrRb#&Ep9IIMRE*iJ{vi(~9FUMlSP0j7HtnRjAo1R=H_6Q;Xs@I)E6a7+T zwLlHC3eEy`)ATy`074brkAJV}{8zXh2XYEN;Tweh%p}$ zdGV6P0yXWo+L|Ks4mId&-C11=A+Nq2`*xeXLel`#>M-NITLu!&11R0Ieyr~Cx@7W;A?saWmzuLkf`Jyu7` z@>v)xJe4k+yC&YhoZmP=0CYXOcTb>-sf^i;Ytbgk2_3?fo#Kx*E}8RQ6B0!cBqY=$ zS9lb>EmG4)mXR2RpLogG5v^2)b3^huN?NPk5S#Va-o1zz2 zQWYipuIIT1o3FIyG`;GLK*)ri99{P3QgGl#! zFSBWM1E`yEQz#!hWaaY)`gwHJBK z$AA34zlgdzPJe##@GLR(C52q7pOL;{$3M@v7mnxG*8I8 zv$@AmTcQ1o%VI_3H3EPuswxt0R51I2!@81d7i1&+*7!~hoMa>HY9hu|s%L;_-rE3& zHD_M#vDjdaiWE`l+yCJ}{F&JOZm*Mn2}!L!{Uy}>9aDh+Y1FN%+RSci`zVfYdj*(m zPE2wuaproGF(ra!WDx)jQsjyc1wXw)B|VTvMqo$Zd(;>nG0#W|J&k?}&FGY;^9?Wr z;IY@n(_(2(Dud$n^$Vjtq&n3pzWRx!Kz(d5s--nlY)yK@>`lO+*>?8vjeJ*m6|h<> zG5_7WrnlDRlk4Nw1us{{!ogZB%Y)EZXo|t`Am#|x6vh?TdFeOYn_wFE1cSBika}nET>aPdF!92JD958aAci z3=asA!EAkL#8;yD2a4i!ei+9^?ujM2Wap!&MiD7I?w+IB%+8Z1kjbb@DMBp?pMR^_ zf02#;n={Qnsu>%Fa#Kx;YUs1kMe9$&oD3aLL0hMM#@b^8Dc1S7FX9}i-U7E5nkBS2 zhTCLDG2uh%x5&^@#Fi&&x5X^xnd^0tbBOKy8aKphI#X5dys>$3^ppYV{(jo{0l9wK z(REKrBuFh1ZR*yn2mfq!QiI3<>Y=gmW~E~ zdbCrBrig};`*2=ecm~PD-4=)X9$I3yvF96+J*5R1TTMXop^4Ct_zhj+oE-MO7Y!8= z9xhx2kuj0p<02s9jG{N7_Gu(%eD+5QdZQZs1t+HnX(RrELj>5&`OQ(YawpS?xi59X zGoEx`c|_KfxUz2Km0hj9+iLlQ!^-x?ze6|XEFgnI-by0hQtN1{qfe(0nG!Zxe9(b) zPdgZu?s1UFj(KZ*`!4J ziYI7=1ZUyv3oO>777>~%P;G_19e_K!Q{ir=MC}Xk4x53 zKDji}3?S5aB3xQNe!ll?y>G9=2x;DkOn&7SpxwZvqaBt4*f-r`9NilIix1pBF?u^zUrkByTqdjchV>XoAxgn9mAekfT#^;AaP4bPHSRq zW)aH+nkd(kt?nfd48nlk>Kfe9bqdY-Xnnea)xPi|;n7G=H>z-M=j4UvxWn=;;Z~-N zfmqi<3OaOxk2hKV~!pS0?+gWc5u5qE^R(iC? zi|mG7W~Jwe)LW9j-(e57mW^OgSAVK1&f!x_?;EW|)F=gT@a9*!4zPa3bDf{<5P9ht zSSR<_!uI#Nk^2)&`0(r%aTJ>;0PpscHPF`sVSvlzj>4H)DRV2T1}vm96Ka6Do*6u1 z;kg&@me^4Kpv6IJhg{{|`8JowkwJ6cOhH;>mzc}3*Ow6H!muZQh41Wy<%~^6%{rC^efgqZY3V> zjj{tJ=)6DGRD1xz)+}Dg+(af!i1x-$jiQEzrHGztf9!)~x z)TI9_8U7W0n%}`^(PGGj*12-oR?M>Lxs0j8u_)BzRnfxf&S=)S( zojdK(Iv!;>1z3PgKb3BlC|_F?yO!UOxXL+jfl-z*xT!CJ&OW-1L}LS`Y7YVM)ja(n zP5t+0rv56q*J3e7(gO&M;dC;`sdm7aVY21H>bA7fx`BZ`{86BcH3we~;m4EufCSWh zX+=tJI+_?OpjPjq(h=;L6^Pm!nJ7=m)dBc~g<6sevucKxb8)q`C1!~0K%6wiu0RO)0~cF^tYDje7lkzq`AhToi`0b9(&@ z^K(rapW8wr?dWLfs(Uo+Sbs@pLoFedQeP#TN={#YMN@orodS+U< zzr%+Eg}%@%rS!SPd60dA^PuB)_(><~mVt=9dofG~26~Yf89S*$V_bWJtW9E{LIAiv z6_r&M=5H84vW$UJU6&v@l38j>cW5-O?W{b`CtGv`-|Yu{CZ!_vIM%I9%9+#C9<*{| zr^s+ayP*O@Y4pmME=^B=uh*EebowYiFD$xpa>ehDgpa=iHvVJ+{)TwYUoUP!H+oYM zRcOJlDMIf!PXKZ1WQ9Cbff{KF4F>HNnXD`WV}z2^q?R@=;C!X}$sdsjW=myBZnv_i zD&dBa{XxL)R0MK+M)ZWz!wQacV8gCwLJW{=g1L73c91-+Z3-Fm=FH6MwH|TvD3M%! zEF;*j6EGp85-ar{|9tGg@(Btl9qj}#=u`Z_fQO*5sf@YPo06+8MG2b1=hrjcnRSb! zw*3iQ?(ejwcXyiC!rhtEs(*|b{nYdN7yTWXq4^6bMMu29lz$w;kowK#M?}3aiK=KN zwC(d(i;Ft_rp9)51iJdX^mQHW_FB;>fK^_UF%^B}K=<2fRu3_Zh8?vMT2IE zF+W;g?WLrQpE`(vk-%w!KSgXa`pI&94cT*!*Ioy*ZjU@{Y#fc*t~eU z8>cC@Wc$XF<)!PwjOPk9=Ze^loHepgv36^s-u>??Uto(ptWdHAnHGs>^k)t*&^lQR z;)=!X7$`_NfvMIrCPq0ovm_}Zm8EeE1ns-IStXM>>C$4D0vLn+f%gM7san>^y&Oa_ zYu|$IZfS8OBypQnL7Xtw#h$$qjTu!DIO=X=8BAuC&>`V3pN0l52qs>Kd=&}){G|u4 zxTerO#8uUVVntStYLoI6siWwk?nXYDtVdD*dd3x7q{{x0?V?pCgy;z6Ta zR|b`7II{B6CLj{bSFf+#y`3gBfx#6iyM;Ht4htmMK=o4Za-;4&Y zOKjCdEb5M;JwUr7v4+3!k@w-8_(2aca$WiUoajlLWJYGwM~+!py4WMhFsk$_uYl*p ziq6AQaF=Tvn_X3s$&cQe*X$sqXHxrz2pazHv$LOAg8v2eV2cbM4Di%qR}HVzEarq} z>=LzXXJ79hobXL)P#MWpO;mIjaK=6C5}?aV?h|PTDat;UH#1kol{=g#$wBU{Ak_9n zmY2#b4+Y`MG1jD08H zdX>WR6{Fq@5s6^7w418N=VFkI%_PnXvT*thYcEi`sSa4N^pbHfTAcGAa@ynV=<(*N zpS@M9EE1}?PMrHr;C0E6@xK2k#s5`|=U?Ex-zOEE_G8Hs>Mt^2lY+lk$ncw18eu2d zwod4NV`ajjYSV9Zz@McDj?rxCAXOz7G*h5U+*l@SstV>(3rNIG4KhO(&2Hor7!g|R z7l3Yp2YPp$31+$k*zO;RiN;h4X74H4c6DTspD#Cm1*_qUA!$x)!p`7{Qr!JLvTc#@7wF@%(n?-h*+!km{PBwT(R#?e4U}4sn2=y){I6c% z^@l0s`l}&4UoOmGsgRnmG7`B@2 zdS~QWx}t)4)-I5rm5~S;xpXY)RT}l;v$1%^e5*oGsgbVe=A1pAy2#j7NPfQFv-t49 zbSnZ`dZl%_{IQP3qHliUmHc-?{!`xnoeWNq z(u#D9ajq+de)Q2iqm_Z^R|^;OH9LYArQ6J?1!*-3awCg6ha>?NUWyC}&%A^@-q|ue z>}SQ)ki-qIH0Z+EnW^wZ>x)-RH!Io)8|NA5&cwcKds_Tu?dZu`&Y8Y48vNJPTeR*) zb%?@aB?F!-Bc=8)3$+O}R-rWYM?O2g=hqio28};kX7T5elE3xUUMM_fG4Oi;jd6aX zG$rAR`Emp9PJ4Pvv*xI4mTy|1!!RGV!>+J3u9Qj@u3z-H&0Wnx+UTxD=BIdW^1XSn z@i|8Cm%x`o8#@ct3K|ur&5-Xa+HNt)9YxG*T|lCW@n(7ZO+GJx+hb;lfr7?Zr?dz_ zxt<15!{NN1lpU5$h&pcgHfC2r_VC`Wjjk8xMLOx{ue*@Prlj>>HCwtIQ+Kp8CIRAnW(h z)V@hx9I-Y{#$L4u5J!Xyy%x7*%^eAx(zsW&$#_KBQh?|D{=EF7&r3W!0s|K$**Pc6 zKfZkVz`4iBJECrT7OIH<=3?x>RVfL( zQzKSx(Z)yu@4q({X(7Fzf=o-nZ^z4fJnqL`4o;CzY`e8RoUGE!4u4V?vUX|&edoHU zIM&%~n^Qbhg!lYf`C3Wo$5@rx53#DJOUPK2k*|{SD2J1PXZ?pGAoFUV|!@nYy{ytjyC({1EK-hnoDooGcdQ>-&JmK;JRwjuq-O7ghnnt(9i>qBo zh-a&=%u+e0tGZb!@d|_T-Rb9|vXBuz1|slq;+C}H6%UuhB**Ezu>MnQ`_yS+yz?}> zaRo~)j-o9Wcbjws;1+Q(nTO9qH;YD2wJ}JA*kJ1TFibLQCyKbN23_jvpt<)xjg#15 zreKR+GKKfkFJMvG{fl_TV0PBeg`SmTSjb&IG3Op3#Pj?8+!y=WAPbahF z2_2pe%Y83rQWdygYuVCmh&!@>?I@JnFEt~#Q++ySUse~WdOPZ^#8QZES7T$Gl!fyQ z%gFc*9$@cNFPW1<8MBBL!dNCeim-f^rI34?-0y-kuDx1>6j|ic;Z27pDpHnafAbvt z4eJqqOWVIwsq>FDfVjV2x2*r`l_md}jey%HggLS;kFWx@su~Sd_vXGuDUCV7tSm!v zy4x4nKT2~Wx5%XcJ0k$C)w0>p_d(STRf}pGU&KDWh1@zv(z{9B0xC(Cn=)Or2U0)> ztDCzOvF8uzOS&F(tYs|`en`t>VaTs5? zU$`ZGL_zKhfrmz(?vc^zM))w;yjol0)|tz6&FgFWLe$jAdGva{zYVghWe1?DM@&!Y~z&kJQTby8)u9vevF zv|?b)r0CQ|B#89oI)J=5D%-3oT%@UYB-OIrq>(w`URYovt1fqi=XZMB&&tNT*q^J0xdMR{D_h4>J{z_l-bPpJcHXonNCLbEKsnJ9})4ZC zLRKAe!01LEXA#oeWET^OsL!A65hAc_gb>li3xW^;{w2y&vA9E_c-z>7&~M6Z&r2wy zPK)&QpfiJOAy;x^1m$yW!_8|AsuyqpLxtoJcrU$A%0Q&CcaHbWqG>$Va=i?rj&*iz zMe7Y!7R8A(+Of$It2_E+>`PTO)t|4YUlu1rMcA0GjN3~PfP#Yt9*=gx_r7DeE69lPFXwlvem3C8WbT94<6t!{T{|mF!XnaZUA!!>ezK zlw};AtOTpJsab9x?oYy+?{ySWr#U;v*`#b-Bsgk{rg(V~&1}si()UO_oa<<&_i?`@ zpUTbDP}h=D&)Ta0UZW$4{P9m&8}>Vf1k88H#{>P%s&!wN(JQfjN!I6L<0<`;qkc1( zX-GZZEEk-wn-Ho|tKCZ2-d<3(8x^@6rM{Y6({2iFIPk=>H*){q|wY4(QZFHl4!_4bZkE zZnBV&^EPL4+1lk1XvnqWa-n_1i%vU!$EE9N%FQZ5XqRcB-kn82^{c&X76QJgcjSEk|B-<`9OIs=5Hm|TF#cCObU zy>UluGn`gEBu4Yju#0A%Pbuo9Rd+t_8NYePFRi~y7kNk|%ztMt3(6RwaVm{1Cbw>O z&{Wr3;hHiaG0bUA_wJMr+neH$UGrE^DsOAJm49*Cs-k**^dRrdNwDI*?nU+m{_efr z0+W>P;=WzxBB8}rQMxLXb!e1wL`tn0>H$aONHH+eH3nU;4Re3)`lGm5fXTXw^d7(0 zJ0ox83S1ryQb8TL-g>3Ikqw}f!~NFwoV9SX5vgxkfEt*%V75F{7%kSiWagajd0Ah^ zwb1P%Tdyaxr<+P#H5*=aPC{~QvF~&a`I@ln0B>E65-H)&EGs)hoht3tUCl1EsK|*O zQu0Q^fy}kc%{BIZVej?h7M#_!=x9PKrGs#mljH{mjqdpwU%=m=9tO;L&OvQ8X6IT_ zOJO0tm*Wnirqo)+?_h?#U)eadI8C$TVZ{4uPL5if!p}ESzKO`-&Za(xP_uNe;_=@X z!P8nTx_ZfTvJ;ZsC7zj)c6qFH#7PU zLiY>a6g;W<0BD_vtI%(lKvGC71mND0$6U1*GHs814rOVz6vc1ig;kzR9A)gSaweR8 z6Sj*M+#elDvN}YSNhV7Mz4RS2mA-(4mz61NlsbO{zn{0JdJFKGL3}ZY?DBL&-gD3E+JcOuI71|_I#>Ow>1tRB^o1h40|pxv2909OKb8X2_sbTKx949t`*3sPqz>0t~=z6vDhwl zEYdbVHxi)HR|T$s@5`Gf8s5*NB(fpBCdth|8@ospj2RZ~$!1NeFl!_V#|x=&`>Gwg z$zrpY30zSklwT>NCewXX*RAHKf`#`v0tYx@addo~5B7*Tb)TC{EV`QM=qGxI+`!&- zSr+HSL`=My2QP;*vLl*{V6!YxZ014MaLPi`$4i?I$n9pZQp&rJMwO>b)9ULR>(2W_ zlWCBi`2ww@W0Y;q;VhuKEdYeb0Mdy0V75}9J@T<)={wbmQUxmNIzAp;9T?VS+uG(TRoNv@+V7gF?Qz|p62ib^5 zMIPi6s(tlp@2S7z6*3CoheI0T?w74oI=pbD}^h5)=n3U{b6I;=E1{=L92swHpo{!VG=7&!X z5dfQRI8mk)CrC7XEo9R?90Q0!CtHFs`K=pHxc|+74elFjbiNGvQmmrponAJTc^#RB zcM+ruSqwl9xXYPlB4qu`Bpg-@)u>~!Pb=z4e^#Qw$`1As5f65|W-%Tuc=1-2D5=3l zh`a4mWG9hXh;N03G@1-wS4b zelAyRnSTTbtJ_I|(jmF=s>X;aeheMbN$0_ctdP%l-Q!r9F!R&Eqm0_D4l6?bFQ=#4 zv=OIFFvd$Q*yVrRl$!gWM&idt{_F)J&rYi<6vUgi8sl;14i|MyMh zF&x4JC%J4&tr#7pT;5nzC&XyK&4^^9y<6zNh%r(Zl8XEO?asM-NmN+eqoG;S!!K`U zq%jVUmDZS1x!?p|Y;Z59K^Xo5LUOxR2#_cz=|S502{wNnf~*8Ha9=v zI{f_9FYow2-HaiGX482%m?=~z7%LC`om7Le&KnKN_OWOHRW}jWFXvGKNVY%+2&NAw z&+H?i%^7&6n&IQk0DxIu0~}Hg61I$vcZwiO5y(91_Z{7DHN_xHNa{A=nEPVTaYsjG z48dQXr1|+xT0kTj_;*7;e{g@{8QCAWA^?)LVclrDeK}NbE8IC&0X=dhE1aK@yKGA2 zLVht(3KeMN6sUXTE!IFpd*-zafe%b$*60p6fy@6!z`tU6{(-psZv^}ahX3o~@$ZX( zybF&;LLLyuf0aQ1KK)?Da$)#rRSP!`PDt=&35P*dTlDrUn$q|=wEkVR6Q}>Ut$?*e zRz0o?AAc$DHht)~0{T>?|ZM>r1z)HSmv{<>ZVW-lsR8_Ex zHsbXiOp!N88w!*?F``sR&Ai^751X-QP;q-T+MvpwK!Mwq_<(j*;HKik_?;aJ;K7`b z!g~Ah!ooJmXh$hu|4zO@xV;AYxH7t|2)FE7{Pf>cw}@-hK|JwLS2Wuz&+@E=4qxJC z0CKBKRmGv44-0E=0m%O*A~YdGK9HT`YTkO;f7)NsDI0yR{K$dS5cIY!!6g+9VDi#A zDjLIbZlZy{ZlD&FA8xKzR#Wk&^S0gv2l-t^efFp(%838h-nm94VL)-5WZKF{KIUsa zvf*R1!h8f6NVHURN;79oLC8!EbrOkB=;1UqOED{IGkcghAq7nl!ccpPCYm6=Ky8Y+ zrlQLfUuAROcFwlb*FNm)z2EM+=iGDe{r~QDgX>Ypaqj_oXeK&hHm>*#HT!Jz`JlYA zu(J_B9V>_-*$tw!Gs<5B3pNNF1d?g?4A^!&KzHTZmg7z@8K4W<+i1K&Em{%YAxJWJ z9=|D&{#fi;pzDq(5R9cnil=DzBK?u`r#`%vWw9m>HXyGivj}Y}1@@`*d4K0;dN-Vh zKwm~XKQyFfp={lq3LSXOeLG=7pIAyV_{_pV&5q$a5}eqQcbjSNeitA{JB@p}(w}$A ze|vIjhlD%db8Ln^ztw-W%LV-Tq2XsWdFH!Q;Xj#L5YVQ4%18qEO^BfBer~$p9>n-D z%82W|HV*KFpY5cEV0WKUbI;E;(%yZ&I{GEDgVa}BxnyL#;efo1)6?d8giAqLt!?B# zsNOPF=~u^@@1oapCd`(Lq)F`kYF$NKg4Ppj(q}cwp^D18?z+>fd`<=RZXrklkc1BE_oyI`Q&}U9#MS% z^9=^}gD>(SzUwU}@x6=`c}`(8_4j+6?aIr{sjxkw$xt~zvubC&LNGkLDu<7S&alahZH^H6c`=x@eaCMwhr}BE5qzRz+J7s*$to~DAb=;Dn~8MK0`>C zda*yWSpD4dU1l}oG0ToGZbW<0_37m-0#Uy2;xc2-aRFc48I$hUz{@uF_jYl@FZx*h zh=A%*`zY+RQ~EJ9_|5vE0m8u5SuopXFOv)eTZZ|;tPQCf*V?I14_f@v@bT>gdw=7f4<Y7_0RZj4-k&G2zxg1Ku(fb8`x6QAkBKYG z+3&c$3n^z=#}XxFxq>_#Ih?c3Yqu<_ArXpZ?)jYNT|jDrM$D!C~+b~cP2+4WbV zoFHGBuprcl2uPlO(ngG@u|H}3{x@2tl9{E`z;5f#NO(o&X*C;bwW#jkUae{SydB{^ zy@sPY4sU=t-^wqsja{%c=$7{do_kDVyJ6fw+Ep4-sniV+1@FlM*88_2GN@g^6DG9e z)KZM8>d9cok&~2YP40@cD!6tlw5iK|T0bcnjg}@Pd4X+^;@y{Ql?}dwo#Fs@*Xd_@ z_RGbMAGLPV)@{ns~h)d^K#dl?YTTo1Vs^6i#uB&hbrJJXgc_g@fTJ3A+feOY(oQ*7;4 zTYEB37O5R;c(UH!QBZSY(<{}d6XZnb+WCt4R@U50eN6n_#mG^8=lB4cZ_||#rw1l& zChhsgs=P?iahQ`OqDx05%J&HHSLTe5rUm|Ju(aJVv^!0EuK>^7ht}IOek-d5QoGo) z@}w|6rB<=cAqE$F-MZD{f**#dBY9)?s7hC4j6P4)*sm?bU zx2_Kw(FMwC)8|C0S>uS}kp7_phw7ts-3~Pkb@a4ezxG|Q4>*OE$-*)?kpZU zz-#_PJ=DQHTqbjw;S|{i^M1}^%t?~XM#^p7^ z10W=;KO;^}S+(dU%}o{>?vxR*Lvgj?d)@#W+^r*QH(6Jb(>a+Ru3mpbTK;iz=6~P+ N@%zYgVE6lhKLKg&E4=^! literal 0 HcmV?d00001 diff --git a/PyTorch/build-in/Classification/SKNet/resnet_loss.txt b/PyTorch/build-in/Classification/SKNet/resnet_loss.txt new file mode 100644 index 000000000..5ba79e51b --- /dev/null +++ b/PyTorch/build-in/Classification/SKNet/resnet_loss.txt @@ -0,0 +1,29 @@ +=== CUDA === +6.722200 5.131600 5.915500 5.783700 6.252200 59.203100 20.966800 35.868000 34.632800 46.643600 +35.153800 41.165300 48.390600 28.469700 27.430200 37.822800 32.582000 19.891400 12.522000 12.106000 +16.621300 15.517600 17.301500 14.923800 7.623500 6.523400 5.043700 14.833500 10.851100 6.979500 +7.734900 8.155800 8.726100 8.512200 12.779800 6.853000 5.218800 6.705100 7.197300 4.713900 +6.400400 6.716300 5.617700 5.742200 6.246100 5.667000 4.404100 6.937000 5.252400 4.816400 +4.772900 4.983900 4.612300 5.179200 4.505900 4.680700 4.637700 4.376000 4.464400 4.775400 +4.465300 4.737300 4.744100 4.904800 5.029800 5.647500 4.488800 6.481400 4.438000 4.585000 +4.657200 4.438700 4.719200 4.987800 4.421600 5.077100 4.496800 4.625500 4.993700 4.719200 +4.710400 4.707500 4.793900 4.690900 5.836400 4.506600 4.635700 4.522500 4.782700 4.443800 +4.710900 4.803700 4.661100 4.900900 4.777800 4.624000 4.619100 4.593500 4.877000 4.713900 + +=== SDAA === +6.725100 5.131800 31.123000 42.451500 38.739300 44.960900 44.694100 34.868200 33.409200 34.249800 +31.345700 20.143100 14.624300 28.907700 15.078900 18.812300 13.294900 33.630900 14.099100 18.008300 +11.372100 12.844500 15.206500 7.287600 10.127400 9.049800 6.249500 4.945300 7.943400 5.042500 +5.852500 4.924800 6.449200 9.627900 7.587900 5.962900 4.840300 5.529800 10.456100 5.598600 +5.537600 4.422900 4.964400 5.057100 5.792000 6.228000 5.322800 4.571300 4.973600 4.879900 +4.813000 4.700200 4.487500 5.230500 4.220700 4.823200 4.521000 5.182100 5.922900 4.387200 +4.585900 4.917500 4.928200 4.788600 4.723600 4.774400 4.610400 4.845700 4.514200 4.766100 +5.018600 4.715300 4.779800 4.624300 4.476300 4.760500 4.502700 4.383500 4.783700 4.772500 +4.583000 4.667500 4.724600 4.572800 4.492700 4.439900 4.571000 4.491200 4.741700 4.707000 +4.671900 4.492900 4.674800 4.914600 4.663600 4.661100 4.769000 4.473900 4.906200 4.654300 + +=== RESULT === +MeanRelativeError: 0.11245239121555989 +MeanAbsoluteError: -0.43448099999999995 +Rule,mean_absolute_error -0.43448099999999995 +pass mean_relative_error=0.11245239121555989 <= 0.05 or mean_absolute_error=-0.43448099999999995 <= 0.0002 diff --git a/PyTorch/build-in/Classification/SKNet/run b/PyTorch/build-in/Classification/SKNet/run new file mode 100644 index 000000000..17276b713 --- /dev/null +++ b/PyTorch/build-in/Classification/SKNet/run @@ -0,0 +1 @@ +bash ../sdaaTest.sh sknet 2 5 diff --git a/PyTorch/build-in/Classification/SKNet/sknet.py b/PyTorch/build-in/Classification/SKNet/sknet.py new file mode 100644 index 000000000..88a812b48 --- /dev/null +++ b/PyTorch/build-in/Classification/SKNet/sknet.py @@ -0,0 +1,242 @@ +import torch +import torch.nn as nn +import math + +class SafeGroupConv2d(nn.Module): + """ + 修改版:在初始化时强制对齐原生卷积的随机权重。 + """ + def __init__(self, in_channels, out_channels, kernel_size, stride=1, + padding=0, dilation=1, groups=1, bias=False): + super(SafeGroupConv2d, self).__init__() + + assert in_channels % groups == 0 + assert out_channels % groups == 0 + + self.groups = groups + self.convs = nn.ModuleList() + + in_channels_per_group = in_channels // groups + out_channels_per_group = out_channels // groups + + # ======================================================= + # 【核心魔法】:影子层策略 (Shadow Layer Strategy) + # 1. 创建一个临时的、原生的、带 groups 的卷积层 + # 它的唯一作用就是按照 PyTorch 标准逻辑消耗随机种子 + # ======================================================= + shadow_conv = nn.Conv2d( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, # 这里用原始的 groups + bias=bias + ) + + # 2. 获取这个影子层初始化好的权重 (Shape: [Out, In/G, K, K]) + # 因为此时全局 Seed 是固定的,所以这里生成的权重和 CUDA 上的一模一样 + master_weight = shadow_conv.weight.data + + # 3. 将权重在输出通道维度(dim=0)切分成 groups 份 + # 每一份 Shape: [Out/G, In/G, K, K] + split_weights = torch.chunk(master_weight, groups, dim=0) + + if bias: + master_bias = shadow_conv.bias.data + split_bias = torch.chunk(master_bias, groups, dim=0) + + # ======================================================= + # 4. 创建实际运行的小卷积,并将权重“塞”进去 + # ======================================================= + for i in range(groups): + # 创建小卷积 (groups=1, SDAA 支持) + mini_conv = nn.Conv2d( + in_channels_per_group, + out_channels_per_group, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=1, + bias=bias + ) + + # 【强制覆盖权重】 + mini_conv.weight.data = split_weights[i].clone() + + if bias: + mini_conv.bias.data = split_bias[i].clone() + + self.convs.append(mini_conv) + + # 5. 影子层完成了它的使命,在此处会被自动回收,不占用训练显存 + + def forward(self, x): + x_splits = torch.chunk(x, self.groups, dim=1) + results = [] + for i, conv in enumerate(self.convs): + out = conv(x_splits[i]) + results.append(out) + return torch.cat(results, dim=1) + + +class SKConv(nn.Module): + def __init__(self, channels, branches=2, groups=32, reduce=16, stride=1, len=32): + super(SKConv, self).__init__() + len = max(channels // reduce, len) + self.convs = nn.ModuleList([]) + + for i in range(branches): + # SKNet 的核心机制:不同分支使用不同的 dilation 和 padding + dilation = 1 + i + padding = 1 + i + + # === 修改逻辑开始 === + # 检测是否会触发 TecoDNN 的不支持配置 (groups > 1 且 dilation > 1) + if groups > 1 and dilation > 1: + # 使用自定义的拆解版卷积,避开报错 + conv_layer = SafeGroupConv2d( + channels, channels, kernel_size=3, stride=stride, + padding=padding, dilation=dilation, groups=groups, bias=False + ) + else: + # 正常情况(如 dilation=1)继续使用原生算子,保持最高性能 + conv_layer = nn.Conv2d( + channels, channels, kernel_size=3, stride=stride, + padding=padding, dilation=dilation, groups=groups, bias=False + ) + # === 修改逻辑结束 === + + self.convs.append(nn.Sequential( + conv_layer, + nn.BatchNorm2d(channels), + nn.ReLU(inplace=True) + )) + + self.gap = nn.AdaptiveAvgPool2d((1, 1)) + self.fc = nn.Sequential( + nn.Conv2d(channels, len, kernel_size=1, stride=1, bias=False), + nn.BatchNorm2d(len), + nn.ReLU(inplace=True) + ) + self.fcs = nn.ModuleList([]) + for i in range(branches): + self.fcs.append( + nn.Conv2d(len, channels, kernel_size=1, stride=1) + ) + self.softmax = nn.Softmax(dim=1) + + def forward(self, x): + x = [conv(x) for conv in self.convs] + x = torch.stack(x, dim=1) + attention = torch.sum(x, dim=1) + attention = self.gap(attention) + attention = self.fc(attention) + attention = [fc(attention) for fc in self.fcs] + attention = torch.stack(attention, dim=1) + attention = self.softmax(attention) + x = torch.sum(x * attention, dim=1) + return x + + +class SKUnit(nn.Module): + def __init__(self, in_channels, mid_channels, out_channels, branches=2, group=32, reduce=16, stride=1, len=32): + super(SKUnit, self).__init__() + + self.conv1 = nn.Sequential( + nn.Conv2d(in_channels, mid_channels, kernel_size=1, stride=1, bias=False), + nn.BatchNorm2d(mid_channels), + nn.ReLU(inplace=True) + ) + + self.conv2 = SKConv(mid_channels, branches=branches, groups=group, reduce=reduce, stride=stride, len=len) + + self.conv3 = nn.Sequential( + nn.Conv2d(mid_channels, out_channels, kernel_size=1, stride=1, bias=False), + nn.BatchNorm2d(out_channels) + ) + + if in_channels == out_channels: + self.shortcut = nn.Sequential() + else: + self.shortcut = nn.Sequential( + nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(out_channels) + ) + + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + residual = x + residual = self.shortcut(residual) + + x = self.conv1(x) + x = self.conv2(x) + x = self.conv3(x) + x += residual + return self.relu(x) + + +class sknet(nn.Module): + def __init__(self, num_classes, num_block_lists=[3, 4, 6, 3]): + super(sknet, self).__init__() + self.basic_conv = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + ) + + self.stage_1 = self._make_layer(64, 128, 256, nums_block=num_block_lists[0], stride=1) + self.stage_2 = self._make_layer(256, 256, 512, nums_block=num_block_lists[1], stride=2) + self.stage_3 = self._make_layer(512, 512, 1024, nums_block=num_block_lists[2], stride=2) + self.stage_4 = self._make_layer(1024, 1024, 2048, nums_block=num_block_lists[3], stride=2) + + self.gap = nn.AdaptiveAvgPool2d(1) + self.classifier = nn.Linear(2048, num_classes) + + for m in self.modules(): + if isinstance(m, (nn.Conv2d, nn.Linear)): + nn.init.kaiming_normal_(m.weight, mode='fan_in') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + + def _make_layer(self, in_channels, mid_channels, out_channels, nums_block, stride=1): + layers = [SKUnit(in_channels, mid_channels, out_channels, stride=stride)] + for _ in range(1, nums_block): + layers.append(SKUnit(out_channels, mid_channels, out_channels)) + return nn.Sequential(*layers) + + def forward(self, x): + x = self.basic_conv(x) + x = self.stage_1(x) + x = self.stage_2(x) + x = self.stage_3(x) + x = self.stage_4(x) + x = self.gap(x) + x = x.view(x.size(0), -1) + + x = self.classifier(x) + return x + + +def SKNet(num_classes=1000, depth=50): + assert depth in [50, 101], 'depth invalid' + key2blocks = { + 50: [3, 4, 6, 3], + 101: [3, 4, 23, 3], + } + model = sknet(num_classes, key2blocks[depth]) + return model + +def Model(num_classes): # welo + r"""Return your custom model + """ + return SKNet(num_classes=num_classes) + + \ No newline at end of file diff --git a/PyTorch/build-in/Classification/SKNet/sknet222.py b/PyTorch/build-in/Classification/SKNet/sknet222.py new file mode 100644 index 000000000..781bb064b --- /dev/null +++ b/PyTorch/build-in/Classification/SKNet/sknet222.py @@ -0,0 +1,145 @@ +import torch +from torch import nn + + +class SKConv(nn.Module): + def __init__(self, channels, branches=2, groups=32, reduce=16, stride=1, len=32): + super(SKConv, self).__init__() + len = max(channels // reduce, len) + self.convs = nn.ModuleList([]) + + for i in range(branches): + self.convs.append(nn.Sequential( + nn.Conv2d(channels, channels, kernel_size=3, stride=stride, padding=1 + i, dilation=1 + i, + groups=groups, bias=False), + nn.BatchNorm2d(channels), + nn.ReLU(inplace=True) + )) + + self.gap = nn.AdaptiveAvgPool2d((1, 1)) + self.fc = nn.Sequential( + nn.Conv2d(channels, len, kernel_size=1, stride=1, bias=False), + nn.BatchNorm2d(len), + nn.ReLU(inplace=True) + ) + self.fcs = nn.ModuleList([]) + for i in range(branches): + self.fcs.append( + nn.Conv2d(len, channels, kernel_size=1, stride=1) + ) + self.softmax = nn.Softmax(dim=1) + + def forward(self, x): + x = [conv(x) for conv in self.convs] + x = torch.stack(x, dim=1) + attention = torch.sum(x, dim=1) + attention = self.gap(attention) + attention = self.fc(attention) + attention = [fc(attention) for fc in self.fcs] + attention = torch.stack(attention, dim=1) + attention = self.softmax(attention) + x = torch.sum(x * attention, dim=1) + return x + + +class SKUnit(nn.Module): + def __init__(self, in_channels, mid_channels, out_channels, branches=2, group=32, reduce=16, stride=1, len=32): + super(SKUnit, self).__init__() + + self.conv1 = nn.Sequential( + nn.Conv2d(in_channels, mid_channels, kernel_size=1, stride=1, bias=False), + nn.BatchNorm2d(mid_channels), + nn.ReLU(inplace=True) + ) + + self.conv2 = SKConv(mid_channels, branches=branches, groups=group, reduce=reduce, stride=stride, len=len) + + self.conv3 = nn.Sequential( + nn.Conv2d(mid_channels, out_channels, kernel_size=1, stride=1, bias=False), + nn.BatchNorm2d(out_channels) + ) + + if in_channels == out_channels: # when dim not change, input_features could be added diectly to out + self.shortcut = nn.Sequential() + else: # when dim not change, input_features should also change dim to be added to out + self.shortcut = nn.Sequential( + nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(out_channels) + ) + + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + residual = x + residual = self.shortcut(residual) + + x = self.conv1(x) + x = self.conv2(x) + x = self.conv3(x) + x += residual + return self.relu(x) + + +class sknet(nn.Module): + def __init__(self, num_classes, num_block_lists=[3, 4, 6, 3]): + super(sknet, self).__init__() + self.basic_conv = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + ) + + self.stage_1 = self._make_layer(64, 128, 256, nums_block=num_block_lists[0], stride=1) + self.stage_2 = self._make_layer(256, 256, 512, nums_block=num_block_lists[1], stride=2) + self.stage_3 = self._make_layer(512, 512, 1024, nums_block=num_block_lists[2], stride=2) + self.stage_4 = self._make_layer(1024, 1024, 2048, nums_block=num_block_lists[3], stride=2) + + self.gap = nn.AdaptiveAvgPool2d(1) + self.classifier = nn.Linear(2048, num_classes) + + for m in self.modules(): + if isinstance(m, (nn.Conv2d, nn.Linear)): + nn.init.kaiming_normal_(m.weight, mode='fan_in') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + + def _make_layer(self, in_channels, mid_channels, out_channels, nums_block, stride=1): + layers = [SKUnit(in_channels, mid_channels, out_channels, stride=stride)] + for _ in range(1, nums_block): + layers.append(SKUnit(out_channels, mid_channels, out_channels)) + return nn.Sequential(*layers) + + def forward(self, x): + x = self.basic_conv(x) + x = self.stage_1(x) + x = self.stage_2(x) + x = self.stage_3(x) + x = self.stage_4(x) + x = self.gap(x) + x = x.view(x.size(0), -1) + + x = self.classifier(x) + return x + + +def SKNet(num_classes=1000, depth=50): + assert depth in [50, 101], 'depth invalid' + key2blocks = { + 50: [3, 4, 6, 3], + 101: [3, 4, 23, 3], + } + model = sknet(num_classes, key2blocks[depth]) + return model + +def Model(num_classes): # welo + r"""Return your custom model + """ + return SKNet(num_classes=num_classes) + + + +# 问题由底层 tecodnn(torch-sdaa 后端)不支持 groups>1 与 dilation>1 同时出现的卷积配置,当模型在 SKConv 某些分支构造 kernel=3, dilation=2, padding=2, groups=32 时,底层返回 TECODNN_STATUS_NOT_SUPPORTED 并抛出错误,导致训练中断。 diff --git a/PyTorch/build-in/Classification/SKNet/sknetdebug.py b/PyTorch/build-in/Classification/SKNet/sknetdebug.py new file mode 100644 index 000000000..2a0ccdb7d --- /dev/null +++ b/PyTorch/build-in/Classification/SKNet/sknetdebug.py @@ -0,0 +1,161 @@ +import torch +from torch import nn + +class DebugConv2d(nn.Conv2d): + def forward(self, x): + try: + out = super().forward(x) + print(f"[Debug][OK] Conv2d: " + f"in={tuple(x.shape)}, out={tuple(out.shape)}, " + f"kernel={self.kernel_size}, stride={self.stride}, " + f"padding={self.padding}, dilation={self.dilation}, " + f"groups={self.groups}") + return out + except Exception as e: + print(f"[Debug][ERROR] Conv2d failed: " + f"in={tuple(x.shape)}, kernel={self.kernel_size}, " + f"stride={self.stride}, padding={self.padding}, " + f"dilation={self.dilation}, groups={self.groups}") + raise e + +# [Debug][ERROR] Conv2d failed: in=(128, 128, 56, 56), kernel=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), groups=32 + +# 底层返回 TECODNN_STATUS_NOT_SUPPORTED 并抛出错误,导致训练中断。 + + +class SKConv(nn.Module): + def __init__(self, channels, branches=2, groups=32, reduce=16, stride=1, len=32): + super(SKConv, self).__init__() + len = max(channels // reduce, len) + self.convs = nn.ModuleList([]) + for i in range(branches): + self.convs.append(nn.Sequential( + DebugConv2d(channels, channels, kernel_size=3, stride=stride, padding=1 + i, dilation=1 + i, + groups=groups, bias=False), + nn.BatchNorm2d(channels), + nn.ReLU(inplace=True) + )) + self.gap = nn.AdaptiveAvgPool2d((1, 1)) + self.fc = nn.Sequential( + DebugConv2d(channels, len, kernel_size=1, stride=1, bias=False), + nn.BatchNorm2d(len), + nn.ReLU(inplace=True) + ) + self.fcs = nn.ModuleList([]) + for i in range(branches): + self.fcs.append( + DebugConv2d(len, channels, kernel_size=1, stride=1) + ) + self.softmax = nn.Softmax(dim=1) + + def forward(self, x): + x = [conv(x) for conv in self.convs] + x = torch.stack(x, dim=1) + attention = torch.sum(x, dim=1) + attention = self.gap(attention) + attention = self.fc(attention) + attention = [fc(attention) for fc in self.fcs] + attention = torch.stack(attention, dim=1) + attention = self.softmax(attention) + x = torch.sum(x * attention, dim=1) + return x + + +class SKUnit(nn.Module): + def __init__(self, in_channels, mid_channels, out_channels, branches=2, group=32, reduce=16, stride=1, len=32): + super(SKUnit, self).__init__() + + self.conv1 = nn.Sequential( + DebugConv2d(in_channels, mid_channels, kernel_size=1, stride=1, bias=False), + nn.BatchNorm2d(mid_channels), + nn.ReLU(inplace=True) + ) + + self.conv2 = SKConv(mid_channels, branches=branches, groups=group, reduce=reduce, stride=stride, len=len) + + self.conv3 = nn.Sequential( + DebugConv2d(mid_channels, out_channels, kernel_size=1, stride=1, bias=False), + nn.BatchNorm2d(out_channels) + ) + + if in_channels == out_channels: # when dim not change, input_features could be added diectly to out + self.shortcut = nn.Sequential() + else: # when dim not change, input_features should also change dim to be added to out + self.shortcut = nn.Sequential( + DebugConv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(out_channels) + ) + + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + residual = x + residual = self.shortcut(residual) + + x = self.conv1(x) + x = self.conv2(x) + x = self.conv3(x) + x += residual + return self.relu(x) + + +class sknet(nn.Module): + def __init__(self, num_classes, num_block_lists=[3, 4, 6, 3]): + super(sknet, self).__init__() + self.basic_conv = nn.Sequential( + DebugConv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + ) + + self.stage_1 = self._make_layer(64, 128, 256, nums_block=num_block_lists[0], stride=1) + self.stage_2 = self._make_layer(256, 256, 512, nums_block=num_block_lists[1], stride=2) + self.stage_3 = self._make_layer(512, 512, 1024, nums_block=num_block_lists[2], stride=2) + self.stage_4 = self._make_layer(1024, 1024, 2048, nums_block=num_block_lists[3], stride=2) + + self.gap = nn.AdaptiveAvgPool2d(1) + self.classifier = nn.Linear(2048, num_classes) + + for m in self.modules(): + if isinstance(m, (DebugConv2d, nn.Linear)): + nn.init.kaiming_normal_(m.weight, mode='fan_in') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + + def _make_layer(self, in_channels, mid_channels, out_channels, nums_block, stride=1): + layers = [SKUnit(in_channels, mid_channels, out_channels, stride=stride)] + for _ in range(1, nums_block): + layers.append(SKUnit(out_channels, mid_channels, out_channels)) + return nn.Sequential(*layers) + + def forward(self, x): + x = self.basic_conv(x) + x = self.stage_1(x) + x = self.stage_2(x) + x = self.stage_3(x) + x = self.stage_4(x) + x = self.gap(x) + x = x.view(x.size(0), -1) + + x = self.classifier(x) + return x + + +def SKNet(num_classes=1000, depth=50): + assert depth in [50, 101], 'depth invalid' + key2blocks = { + 50: [3, 4, 6, 3], + 101: [3, 4, 23, 3], + } + model = sknet(num_classes, key2blocks[depth]) + return model + +def Model(num_classes): # welo + r"""Return your custom model + """ + return SKNet(num_classes=num_classes) + diff --git a/PyTorch/build-in/Classification/SKNet/weloTrain.py b/PyTorch/build-in/Classification/SKNet/weloTrain.py new file mode 100644 index 000000000..79fcf2400 --- /dev/null +++ b/PyTorch/build-in/Classification/SKNet/weloTrain.py @@ -0,0 +1,567 @@ +#!/usr/bin/env python3 +# coding: utf-8 + +import os +import random +import sys +import time +import json +import argparse +from collections import OrderedDict +from pathlib import Path +import numpy as np +import pandas as pd +from tqdm import tqdm +import importlib + +os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" # 强烈推荐在 shell/最顶端设置 +os.environ["PYTHONHASHSEED"] = "12345" +os.environ["OMP_NUM_THREADS"] = "1" +os.environ["MKL_NUM_THREADS"] = "1" + +def ensure_cublas_workspace(config=":4096:8"): + """ + 尝试为 cuBLAS 设置可复现 workspace。强烈建议在主脚本入口处(import torch 之前) + 通过 export 设置该 env。此函数会在运行时设置,但如果 torch 已经被 import, + 则可能为时已晚——函数会打印提醒。 + """ + already = os.environ.get("CUBLAS_WORKSPACE_CONFIG") + if already: + print(f"[seed_utils] CUBLAS_WORKSPACE_CONFIG 已存在:{already}") + else: + os.environ["CUBLAS_WORKSPACE_CONFIG"] = config + print(f"[seed_utils] 已设置 CUBLAS_WORKSPACE_CONFIG={config} (注意:请在 import torch 前设置以保证生效)") + +def set_global_seed(seed: int = 42, set_threads: bool = True): + """ + 统一随机性设置。注意:若希望完全发挥效果,请在主脚本入口(import torch 之前) + 先调用 ensure_cublas_workspace(...) 或在 shell 中 export CUBLAS_WORKSPACE_CONFIG。 + """ + ensure_cublas_workspace() # 会设置 env 并提醒 + os.environ["PYTHONHASHSEED"] = str(seed) + + if set_threads: + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["MKL_NUM_THREADS"] = "1" + + random.seed(seed) + np.random.seed(seed) + + # 现在导入 torch(晚导入以便前面 env 生效) + import torch + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + # 强制确定性(如果存在不确定性算子,PyTorch 会报错并提示) + try: + torch.use_deterministic_algorithms(True) + except Exception as e: + print("[seed_utils] 设置 deterministic 模式时出错:", e) + print("[seed_utils] 请确认 CUBLAS_WORKSPACE_CONFIG 已在 import torch 之前设置。") + + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + if set_threads: + torch.set_num_threads(1) + torch.set_num_interop_threads(1) + + print(f"[seed_utils] 全局 seed 已设置为 {seed}") + +set_global_seed(2025) + +""" +通用训练模版(优先从本地导入 Model -> 支持 DDP / 单卡,AMP,resume,日志,checkpoint) +保存为 train_template_localmodel.py +""" +import torch +import torch.nn as nn +import torch.optim as optim +import torch.backends.cudnn as cudnn +import torchvision.transforms as transforms +import torchvision.datasets as datasets +import torchvision.models as tv_models + +import torch.distributed as dist +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.utils.data import DataLoader +from torch.utils.data.distributed import DistributedSampler + +from torch.sdaa import amp +# from torch.cuda import amp + + +# ---------------------------- +# Helper utilities (self-contained) +# ---------------------------- +class AverageMeter(object): + def __init__(self, name='Meter', fmt=':.4f'): + self.name = name + self.fmt = fmt + self.reset() + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / max(1, self.count) + def __str__(self): + fmtstr = '{name} {val' + self.fmt + '} (avg {avg' + self.fmt + '})' + return fmtstr.format(name=self.name, val=self.val, avg=self.avg) + +def accuracy(output, target, topk=(1,)): + """Computes the precision@k for the specified values of k + 返回一个 list,每个元素是 tensor(百分比形式) + """ + with torch.no_grad(): + maxk = max(topk) + batch_size = target.size(0) + + # output: (N, C) -> pred: (maxk, N) + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() # (maxk, N) + correct = pred.eq(target.view(1, -1).expand_as(pred)) # (maxk, N) bool + + res = [] + for k in topk: + # 把前 k 行展平后求和(返回 0-dim tensor),随后换算为百分比 + correct_k = correct[:k].reshape(-1).float().sum() # 注意:不传 keepdim + # 乘以 100.0 / batch_size,保持返回 tensor(和之前代码兼容) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + +def save_checkpoint(state, is_best, save_dir, filename='checkpoint.pth'): + save_path = os.path.join(save_dir, filename) + torch.save(state, save_path) + if is_best: + best_path = os.path.join(save_dir, 'model_best.pth') + torch.save(state, best_path) + +def set_seed(seed, deterministic=False): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + if deterministic: + cudnn.deterministic = True + cudnn.benchmark = False + else: + cudnn.deterministic = False + cudnn.benchmark = True + +# ---------------------------- +# Argument parser +# ---------------------------- +def parse_args(): + parser = argparse.ArgumentParser(description='Generic PyTorch training template (DDP/AMP) with LocalModel priority') + parser.add_argument('--name', default='run', type=str, help='experiment name (log/checkpoints dir)') + parser.add_argument('--seed', default=42, type=int, help='random seed') + parser.add_argument('--arch', default='None', type=str, help='model name') + parser.add_argument('--deterministic', action='store_true', help='set cudnn deterministic (may be slower)') + parser.add_argument('--dataset', default='cifar10', choices=['cifar10','cifar100','imagenet','custom'], help='which dataset') + parser.add_argument('--datapath', default='./data', type=str, help='dataset root / imagenet root / custom root') + parser.add_argument('--imagenet_dir', default='./imagenet', type=str, help='if dataset=imagenet, path to imagenet root') + parser.add_argument('--custom_eval_dir', default=None, help='if dataset=custom, provide val dir') + parser.add_argument('--num_workers', default=4, type=int, help='dataloader workers per process') + parser.add_argument('--epochs', default=200, type=int) + parser.add_argument('--batch_size', default=128, type=int) + parser.add_argument('--model_name', default='resnet18', help='torchvision model name or python path e.g. mypkg.mymodule.Model (used if no local Model)') + parser.add_argument('--num_classes', default=None, type=int, help='override num classes (auto-detect for common sets)') + parser.add_argument('--pretrained', action='store_true', help='use torchvision pretrained weights when available') + parser.add_argument('--optimizer', default='sgd', choices=['sgd','adam','adamw'], help='optimizer') + parser.add_argument('--lr', '--learning_rate', default=0.1, type=float) + parser.add_argument('--momentum', default=0.9, type=float) + parser.add_argument('--weight_decay', default=5e-4, type=float) + parser.add_argument('--nesterov', action='store_true') + parser.add_argument('--scheduler', default='multistep', choices=['multistep','step','cosine','none'], help='lr scheduler') + parser.add_argument('--milestones', default='100,150', type=str, help='milestones for multistep (comma sep)') + parser.add_argument('--step_size', default=30, type=int, help='step size for StepLR or cosine max epochs') + parser.add_argument('--gamma', default=0.1, type=float) + parser.add_argument('--scheduler_step_per_batch', action='store_true', help='call scheduler.step() per batch (for some schedulers)') + parser.add_argument('--resume', default='', type=str, help='path to checkpoint to resume from') + parser.add_argument('--start_epoch', default=0, type=int) + parser.add_argument('--print_freq', default=100, type=int) + parser.add_argument('--save_freq', default=10, type=int, help='save checkpoint every N epochs (rank0 only)') + parser.add_argument('--amp', action='store_true', default = True,help='use automatic mixed precision (AMP)') + parser.add_argument('--grad_accum_steps', default=1, type=int, help='gradient accumulation steps') + parser.add_argument('--local_rank', default=None, type=int, help='local rank passed by torchrun (if any). Use -1 or None for non-distributed') + parser.add_argument('--cutmix_prob', default=0.0, type=float) + parser.add_argument('--beta', default=1.0, type=float) + parser.add_argument('--seed_sampler', default=False, action='store_true', help='set sampler epoch seeds to make deterministic distributed shuffling') + args = parser.parse_args() + args.milestones = [int(x) for x in args.milestones.split(',')] if args.milestones else [] + return args + +# ---------------------------- +# build model (优先 LocalModel) +# ---------------------------- +def build_model_with_local_priority(args, device=None): + """ + 用参数 args.arch 作为模块名导入 Model() + 如果模块不存在或没有 Model 类,则报错停止。 + """ + try: + # 动态导入模块,比如 args.arch = "rexnet" + mod = importlib.import_module(args.arch) + Model = getattr(mod, "Model") # 从模块中获取 Model 类 + except Exception as e: + raise RuntimeError( + f"无法导入模型模块 '{args.arch}' 或未找到类 Model。" + f"\n错误信息:{e}" + ) + + # 解析数据集类别数 + if args.dataset == 'cifar10': + num_classes = 10 + elif args.dataset == 'cifar100': + num_classes = 100 + else: + print(f"[ERROR] 不支持的数据集类型:{args.dataset},无法确定类别数。程序终止。") + sys.exit(1) + + + # 实例化 + try: + model = Model(num_classes) + except Exception as e: + raise RuntimeError( + f"Model() 实例化失败,请检查模型构造函数。\n错误信息:{e}" + ) + + return model + +# ---------------------------- +# Data loader factory +# ---------------------------- +def build_dataloaders(args, rank, world_size): + if args.dataset == 'cifar10' or args.dataset == 'cifar100': + # mean = (0.4914, 0.4822, 0.4465) + # std = (0.2470, 0.2435, 0.2616) if args.dataset == 'cifar10' else (0.2023, 0.1994, 0.2010) + # train_transform = transforms.Compose([ + # transforms.RandomCrop(32, padding=4), + # transforms.RandomHorizontalFlip(), + # transforms.ToTensor(), + # transforms.Normalize(mean, std), + # ]) + # test_transform = transforms.Compose([ + # transforms.ToTensor(), + # transforms.Normalize(mean, std), + # ]) + + # 使用 ImageNet 风格的 224 输入 + mean = (0.485, 0.456, 0.406) + std = (0.229, 0.224, 0.225) + train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), # 随机裁剪并缩放到 224x224 + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize(mean, std), + ]) + test_transform = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean, std), + ]) + + root = args.datapath + if args.dataset == 'cifar10': + train_set = datasets.CIFAR10(root=root, train=True, download=False, transform=train_transform) + val_set = datasets.CIFAR10(root=root, train=False, download=False, transform=test_transform) + num_classes = 10 + else: + train_set = datasets.CIFAR100(root=root, train=True, download=False, transform=train_transform) + val_set = datasets.CIFAR100(root=root, train=False, download=False, transform=test_transform) + num_classes = 100 + + elif args.dataset == 'imagenet': + train_dir = os.path.join(args.imagenet_dir, 'train') + val_dir = os.path.join(args.imagenet_dir, 'val') + train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize((0.485,0.456,0.406), (0.229,0.224,0.225)), + ]) + test_transform = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize((0.485,0.456,0.406), (0.229,0.224,0.225)), + ]) + train_set = datasets.ImageFolder(train_dir, train_transform) + val_set = datasets.ImageFolder(val_dir, test_transform) + num_classes = args.num_classes or 1000 + + elif args.dataset == 'custom': + train_dir = os.path.join(args.datapath, 'train') + val_dir = args.custom_eval_dir or os.path.join(args.datapath, 'val') + train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + ]) + test_transform = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + ]) + train_set = datasets.ImageFolder(train_dir, train_transform) + val_set = datasets.ImageFolder(val_dir, test_transform) + num_classes = len(train_set.classes) + else: + raise ValueError("Unknown dataset") + + if dist.is_initialized() and world_size > 1: + train_sampler = DistributedSampler(train_set, num_replicas=world_size, rank=rank, shuffle=True) + else: + train_sampler = None + + train_loader = DataLoader(train_set, + batch_size=args.batch_size, + shuffle=(train_sampler is None), + num_workers=args.num_workers, + pin_memory=True, + sampler=train_sampler, + drop_last=False) + val_loader = DataLoader(val_set, + batch_size=args.batch_size, + shuffle=False, + num_workers=args.num_workers, + pin_memory=True) + + return train_loader, val_loader, num_classes, train_sampler + +# ---------------------------- +# Train & validate +# ---------------------------- +def train_one_epoch(args, epoch, model, criterion, optimizer, train_loader, device, scaler, scheduler=None, train_sampler=None): + batch_time = AverageMeter('Time') + data_time = AverageMeter('Data') + losses = AverageMeter('Loss') + top1 = AverageMeter('Acc@1') + top5 = AverageMeter('Acc@5') + + model.train() + end = time.time() + optimizer.zero_grad() + + iters = len(train_loader) + for i, (images, targets) in enumerate(train_loader): + data_time.update(time.time() - end) + images = images.to(device, non_blocking=True) + targets = targets.to(device, non_blocking=True) + + if args.amp: + with amp.autocast(): + outputs = model(images) + loss = criterion(outputs, targets) / args.grad_accum_steps + else: + outputs = model(images) + loss = criterion(outputs, targets) / args.grad_accum_steps + + if args.amp: + scaler.scale(loss).backward() + else: + loss.backward() + + if (i + 1) % args.grad_accum_steps == 0: + if args.amp: + scaler.step(optimizer) + scaler.update() + else: + optimizer.step() + optimizer.zero_grad() + if scheduler is not None and args.scheduler_step_per_batch: + scheduler.step() + + with torch.no_grad(): + acc1, acc5 = accuracy(outputs, targets, topk=(1,5)) + losses.update(loss.item() * args.grad_accum_steps, images.size(0)) + top1.update(acc1.item(), images.size(0)) + top5.update(acc5.item(), images.size(0)) + + batch_time.update(time.time() - end) + end = time.time() + + # if (i % args.print_freq == 0) and ((dist.get_rank() if dist.is_initialized() else 0) == 0): + # lr = optimizer.param_groups[0]['lr'] + # print(f"Epoch[{epoch}] Iter[{i}/{iters}] lr={lr:.6f} loss={losses.val:.4f} avg={losses.avg:.4f} " + # f"top1={top1.val:.2f} avg={top1.avg:.2f} time={batch_time.val:.3f}s") + + if scheduler is not None and not args.scheduler_step_per_batch: + scheduler.step() + + return OrderedDict([('loss', losses.avg), ('acc1', top1.avg), ('acc5', top5.avg)]) + +def validate(args, model, val_loader, criterion, device): + losses = AverageMeter('Loss') + top1 = AverageMeter('Acc@1') + top5 = AverageMeter('Acc@5') + + model.eval() + with torch.no_grad(): + for i, (images, targets) in enumerate(tqdm(val_loader)): + images = images.to(device, non_blocking=True) + targets = targets.to(device, non_blocking=True) + outputs = model(images) + loss = criterion(outputs, targets) + acc1, acc5 = accuracy(outputs, targets, topk=(1,5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1.item(), images.size(0)) + top5.update(acc5.item(), images.size(0)) + return OrderedDict([('loss', losses.avg), ('acc1', top1.avg), ('acc5', top5.avg)]) + +# ---------------------------- +# Main +# ---------------------------- +def main(): + args = parse_args() + + # handle local_rank from env if not provided + local_rank_env = os.environ.get('LOCAL_RANK', None) + if args.local_rank is None and local_rank_env is not None: + args.local_rank = int(local_rank_env) + + distributed = (args.local_rank is not None and args.local_rank != -1) + if distributed: + dist.init_process_group(backend='nccl', init_method='env://') + rank = dist.get_rank() + world_size = dist.get_world_size() + else: + rank = 0 + world_size = 1 + + if distributed: + torch.cuda.set_device(args.local_rank) + device = torch.device('cuda', args.local_rank) + else: + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + + set_seed(args.seed + (rank if distributed else 0), deterministic=args.deterministic) + + save_dir = os.path.join('models', args.name) + if rank == 0: + os.makedirs(save_dir, exist_ok=True) + with open(os.path.join(save_dir, 'args.json'), 'w') as f: + json.dump(vars(args), f, indent=2) + if distributed: + dist.barrier() + + train_loader, val_loader, auto_num_classes, train_sampler = build_dataloaders(args, rank, world_size) + if args.num_classes is None: + args.num_classes = auto_num_classes + + # 使用本地 Model 优先(LocalModel 已在文件顶部尝试导入) + model = build_model_with_local_priority(args, device) + model.to(device) + + if distributed: + model = DDP(model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True) + + criterion = nn.CrossEntropyLoss().to(device) + params = [p for p in model.parameters() if p.requires_grad] + if args.optimizer == 'sgd': + optimizer = optim.SGD(params, lr=args.lr, momentum=args.momentum, + weight_decay=args.weight_decay, nesterov=args.nesterov) + elif args.optimizer == 'adam': + optimizer = optim.Adam(params, lr=args.lr, weight_decay=args.weight_decay) + elif args.optimizer == 'adamw': + optimizer = optim.AdamW(params, lr=args.lr, weight_decay=args.weight_decay) + else: + raise ValueError('Unknown optimizer') + + scheduler = None + if args.scheduler == 'multistep': + scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.milestones, gamma=args.gamma) + elif args.scheduler == 'step': + scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=args.gamma) + elif args.scheduler == 'cosine': + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs) + elif args.scheduler == 'none': + scheduler = None + + scaler = amp.GradScaler() if args.amp else None + + start_epoch = args.start_epoch + best_acc = 0.0 + if args.resume: + if os.path.isfile(args.resume): + ckpt = torch.load(args.resume, map_location='cpu') + model_state = ckpt.get('state_dict', ckpt) + if isinstance(model, DDP): + model.module.load_state_dict(model_state) + else: + model.load_state_dict(model_state) + if 'optimizer' in ckpt: + optimizer.load_state_dict(ckpt['optimizer']) + start_epoch = ckpt.get('epoch', start_epoch) + best_acc = ckpt.get('best_acc', best_acc) + print(f"=> resumed from {args.resume}, start_epoch={start_epoch}") + else: + print(f"=> resume path {args.resume} not found") + + log_columns = ['epoch', 'lr', 'loss', 'acc1', 'acc5', 'val_loss', 'val_acc1', 'val_acc5'] + log_df = pd.DataFrame(columns=log_columns) + + total_epochs = args.epochs + for epoch in range(start_epoch, total_epochs): + if train_sampler is not None: + if args.seed_sampler: + train_sampler.set_epoch(epoch + args.seed) + else: + train_sampler.set_epoch(epoch) + + if rank == 0: + print(f"==== Epoch {epoch}/{total_epochs - 1} ====") + + train_log = train_one_epoch(args, epoch, model, criterion, optimizer, train_loader, device, scaler, scheduler, train_sampler) + val_log = validate(args, model, val_loader, criterion, device) + current_lr = optimizer.param_groups[0]['lr'] + + if rank == 0: + print("Epoch[{}]: train_loss {:.4f} acc1 {:.2f} acc5 {:.2f} | val_loss {:.4f} acc1 {:.2f} acc5 {:.2f} lr {:.6f}".format( + epoch, train_log['loss'], train_log['acc1'], train_log['acc5'], + val_log['loss'], val_log['acc1'], val_log['acc5'], current_lr + )) + row = { + 'epoch': epoch, + 'lr': current_lr, + 'loss': train_log['loss'], + 'acc1': train_log['acc1'], + 'acc5': train_log['acc5'], + 'val_loss': val_log['loss'], + 'val_acc1': val_log['acc1'], + 'val_acc5': val_log['acc5'], + } + # log_df = log_df.append(row, ignore_index=True) + new_row_df = pd.DataFrame([row]) + log_df = pd.concat([log_df, new_row_df], ignore_index=True) + log_df.to_csv(os.path.join(save_dir, 'log.csv'), index=False) + + is_best = val_log['acc1'] > best_acc + if is_best: + best_acc = val_log['acc1'] + if (epoch % args.save_freq == 0) or is_best or (epoch == total_epochs - 1): + state = { + 'epoch': epoch, + 'state_dict': model.module.state_dict() if isinstance(model, DDP) else model.state_dict(), + 'best_acc': best_acc, + 'optimizer': optimizer.state_dict(), + 'args': vars(args) + } + save_checkpoint(state, is_best, save_dir, filename=f'checkpoint_epoch_{epoch}.pth') + + if dist.is_initialized(): + dist.barrier() + if rank == 0: + print("Training finished. Best val acc1: {:.2f}".format(best_acc)) + +if __name__ == '__main__': + # set_global_seed(2025) # 任意数字都行 + main() \ No newline at end of file diff --git a/PyTorch/build-in/Classification/SKNet/weloTrainStep.py b/PyTorch/build-in/Classification/SKNet/weloTrainStep.py new file mode 100644 index 000000000..2c191729c --- /dev/null +++ b/PyTorch/build-in/Classification/SKNet/weloTrainStep.py @@ -0,0 +1,647 @@ +#!/usr/bin/env python3 +# coding: utf-8 + +import os +import random +import sys +import time +import json +import argparse +from collections import OrderedDict +from pathlib import Path +import numpy as np +import pandas as pd +from tqdm import tqdm +import importlib + +os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" # 强烈推荐在 shell/最顶端设置 +os.environ["PYTHONHASHSEED"] = "12345" +os.environ["OMP_NUM_THREADS"] = "1" +os.environ["MKL_NUM_THREADS"] = "1" + +def ensure_cublas_workspace(config=":4096:8"): + """ + 尝试为 cuBLAS 设置可复现 workspace。强烈建议在主脚本入口处(import torch 之前) + 通过 export 设置该 env。此函数会在运行时设置,但如果 torch 已经被 import, + 则可能为时已晚——函数会打印提醒。 + """ + already = os.environ.get("CUBLAS_WORKSPACE_CONFIG") + if already: + print(f"[seed_utils] CUBLAS_WORKSPACE_CONFIG 已存在:{already}") + else: + os.environ["CUBLAS_WORKSPACE_CONFIG"] = config + print(f"[seed_utils] 已设置 CUBLAS_WORKSPACE_CONFIG={config} (注意:请在 import torch 前设置以保证生效)") + +def set_global_seed(seed: int = 42, set_threads: bool = True): + """ + 统一随机性设置。注意:若希望完全发挥效果,请在主脚本入口(import torch 之前) + 先调用 ensure_cublas_workspace(...) 或在 shell 中 export CUBLAS_WORKSPACE_CONFIG。 + """ + ensure_cublas_workspace() # 会设置 env 并提醒 + os.environ["PYTHONHASHSEED"] = str(seed) + + if set_threads: + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["MKL_NUM_THREADS"] = "1" + + random.seed(seed) + np.random.seed(seed) + + # 现在导入 torch(晚导入以便前面 env 生效) + import torch + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + # 强制确定性(如果存在不确定性算子,PyTorch 会报错并提示) + try: + torch.use_deterministic_algorithms(True) + except Exception as e: + print("[seed_utils] 设置 deterministic 模式时出错:", e) + print("[seed_utils] 请确认 CUBLAS_WORKSPACE_CONFIG 已在 import torch 之前设置。") + + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + if set_threads: + torch.set_num_threads(1) + torch.set_num_interop_threads(1) + + print(f"[seed_utils] 全局 seed 已设置为 {seed}") + +set_global_seed(2025) + +""" +通用训练模版(优先从本地导入 Model -> 支持 DDP / 单卡,AMP,resume,日志,checkpoint) +保存为 train_template_localmodel.py +""" +import torch +import torch.nn as nn +import torch.optim as optim +import torch.backends.cudnn as cudnn +import torchvision.transforms as transforms +import torchvision.datasets as datasets +import torchvision.models as tv_models + +import torch.distributed as dist +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.utils.data import DataLoader +from torch.utils.data.distributed import DistributedSampler + +from torch.sdaa import amp +# from torch.cuda import amp + + +# ---------------------------- +# Helper utilities (self-contained) +# ---------------------------- +class AverageMeter(object): + def __init__(self, name='Meter', fmt=':.4f'): + self.name = name + self.fmt = fmt + self.reset() + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / max(1, self.count) + def __str__(self): + fmtstr = '{name} {val' + self.fmt + '} (avg {avg' + self.fmt + '})' + return fmtstr.format(name=self.name, val=self.val, avg=self.avg) + +def accuracy(output, target, topk=(1,)): + """Computes the precision@k for the specified values of k + 返回一个 list,每个元素是 tensor(百分比形式) + """ + with torch.no_grad(): + maxk = max(topk) + batch_size = target.size(0) + + # output: (N, C) -> pred: (maxk, N) + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() # (maxk, N) + correct = pred.eq(target.view(1, -1).expand_as(pred)) # (maxk, N) bool + + res = [] + for k in topk: + # 把前 k 行展平后求和(返回 0-dim tensor),随后换算为百分比 + correct_k = correct[:k].reshape(-1).float().sum() # 注意:不传 keepdim + # 乘以 100.0 / batch_size,保持返回 tensor(和之前代码兼容) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + +def save_checkpoint(state, is_best, save_dir, filename='checkpoint.pth'): + save_path = os.path.join(save_dir, filename) + torch.save(state, save_path) + if is_best: + best_path = os.path.join(save_dir, 'model_best.pth') + torch.save(state, best_path) + +def set_seed(seed, deterministic=False): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + if deterministic: + cudnn.deterministic = True + cudnn.benchmark = False + else: + cudnn.deterministic = False + cudnn.benchmark = True + +# ---------------------------- +# Argument parser +# ---------------------------- +def parse_args(): + parser = argparse.ArgumentParser(description='Generic PyTorch training template (DDP/AMP) with LocalModel priority') + parser.add_argument('--name', default='run', type=str, help='experiment name (log/checkpoints dir)') + parser.add_argument('--seed', default=42, type=int, help='random seed') + parser.add_argument('--arch', default='None', type=str, help='model name') + parser.add_argument('--deterministic', action='store_true', help='set cudnn deterministic (may be slower)') + parser.add_argument('--dataset', default='cifar10', choices=['cifar10','cifar100','imagenet','custom'], help='which dataset') + parser.add_argument('--datapath', default='./data', type=str, help='dataset root / imagenet root / custom root') + parser.add_argument('--imagenet_dir', default='./imagenet', type=str, help='if dataset=imagenet, path to imagenet root') + parser.add_argument('--custom_eval_dir', default=None, help='if dataset=custom, provide val dir') + parser.add_argument('--num_workers', default=4, type=int, help='dataloader workers per process') + parser.add_argument('--epochs', default=200, type=int) + parser.add_argument('--steps', default=0, type=int, help='max steps to run (if >0, training will stop when global_step reaches this).') + parser.add_argument('--batch_size', default=128, type=int) + parser.add_argument('--model_name', default='resnet18', help='torchvision model name or python path e.g. mypkg.mymodule.Model (used if no local Model)') + parser.add_argument('--num_classes', default=None, type=int, help='override num classes (auto-detect for common sets)') + parser.add_argument('--pretrained', action='store_true', help='use torchvision pretrained weights when available') + parser.add_argument('--optimizer', default='sgd', choices=['sgd','adam','adamw'], help='optimizer') + parser.add_argument('--lr', '--learning_rate', default=0.1, type=float) + parser.add_argument('--momentum', default=0.9, type=float) + parser.add_argument('--weight_decay', default=5e-4, type=float) + parser.add_argument('--nesterov', action='store_true') + parser.add_argument('--scheduler', default='multistep', choices=['multistep','step','cosine','none'], help='lr scheduler') + parser.add_argument('--milestones', default='100,150', type=str, help='milestones for multistep (comma sep)') + parser.add_argument('--step_size', default=30, type=int, help='step size for StepLR or cosine max epochs') + parser.add_argument('--gamma', default=0.1, type=float) + parser.add_argument('--scheduler_step_per_batch', action='store_true', help='call scheduler.step() per batch (for some schedulers)') + parser.add_argument('--resume', default='', type=str, help='path to checkpoint to resume from') + parser.add_argument('--start_epoch', default=0, type=int) + parser.add_argument('--print_freq', default=100, type=int) + parser.add_argument('--save_freq', default=10, type=int, help='save checkpoint every N epochs (rank0 only)') + parser.add_argument('--amp', action='store_true', default = True,help='use automatic mixed precision (AMP)') + parser.add_argument('--grad_accum_steps', default=1, type=int, help='gradient accumulation steps') + parser.add_argument('--local_rank', default=None, type=int, help='local rank passed by torchrun (if any). Use -1 or None for non-distributed') + parser.add_argument('--cutmix_prob', default=0.0, type=float) + parser.add_argument('--beta', default=1.0, type=float) + parser.add_argument('--seed_sampler', default=False, action='store_true', help='set sampler epoch seeds to make deterministic distributed shuffling') + args = parser.parse_args() + args.milestones = [int(x) for x in args.milestones.split(',')] if args.milestones else [] + return args + +# ---------------------------- +# build model (优先 LocalModel) +# ---------------------------- +def build_model_with_local_priority(args, device=None): + """ + 用参数 args.arch 作为模块名导入 Model() + 如果模块不存在或没有 Model 类,则报错停止。 + """ + try: + # 动态导入模块,比如 args.arch = "rexnet" + mod = importlib.import_module(args.arch) + Model = getattr(mod, "Model") # 从模块中获取 Model 类 + except Exception as e: + raise RuntimeError( + f"无法导入模型模块 '{args.arch}' 或未找到类 Model。" + f"\n错误信息:{e}" + ) + + # 解析数据集类别数 + if args.dataset == 'cifar10': + num_classes = 10 + elif args.dataset == 'cifar100': + num_classes = 100 + else: + print(f"[ERROR] 不支持的数据集类型:{args.dataset},无法确定类别数。程序终止。") + sys.exit(1) + + + # 实例化 + try: + model = Model(num_classes) + except Exception as e: + raise RuntimeError( + f"Model() 实例化失败,请检查模型构造函数。\n错误信息:{e}" + ) + + return model + +# ---------------------------- +# Data loader factory +# ---------------------------- +def build_dataloaders(args, rank, world_size): + if args.dataset == 'cifar10' or args.dataset == 'cifar100': + mean = (0.4914, 0.4822, 0.4465) + std = (0.2470, 0.2435, 0.2616) if args.dataset == 'cifar10' else (0.2023, 0.1994, 0.2010) + # train_transform = transforms.Compose([ + # transforms.RandomCrop(32, padding=4), + # transforms.RandomHorizontalFlip(), + # transforms.ToTensor(), + # transforms.Normalize(mean, std), + # ]) + # test_transform = transforms.Compose([ + # transforms.ToTensor(), + # transforms.Normalize(mean, std), + # ]) + + train_transform = transforms.Compose([ # 2025/12/3 从visformer模型开始 + transforms.Resize(256), # 先放大到 256 + transforms.RandomCrop(224), # 再随机裁剪为 224(更符合 ImageNet 风格增强) + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize(mean, std), + ]) + test_transform = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean, std), + ]) + root = args.datapath + if args.dataset == 'cifar10': + train_set = datasets.CIFAR10(root=root, train=True, download=False, transform=train_transform) + val_set = datasets.CIFAR10(root=root, train=False, download=False, transform=test_transform) + num_classes = 10 + else: + train_set = datasets.CIFAR100(root=root, train=True, download=False, transform=train_transform) + val_set = datasets.CIFAR100(root=root, train=False, download=False, transform=test_transform) + num_classes = 100 + + elif args.dataset == 'imagenet': + train_dir = os.path.join(args.imagenet_dir, 'train') + val_dir = os.path.join(args.imagenet_dir, 'val') + train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + transforms.Normalize((0.485,0.456,0.406), (0.229,0.224,0.225)), + ]) + test_transform = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize((0.485,0.456,0.406), (0.229,0.224,0.225)), + ]) + train_set = datasets.ImageFolder(train_dir, train_transform) + val_set = datasets.ImageFolder(val_dir, test_transform) + num_classes = args.num_classes or 1000 + + elif args.dataset == 'custom': + train_dir = os.path.join(args.datapath, 'train') + val_dir = args.custom_eval_dir or os.path.join(args.datapath, 'val') + train_transform = transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + ]) + test_transform = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + ]) + train_set = datasets.ImageFolder(train_dir, train_transform) + val_set = datasets.ImageFolder(val_dir, test_transform) + num_classes = len(train_set.classes) + else: + raise ValueError("Unknown dataset") + + if dist.is_initialized() and world_size > 1: + train_sampler = DistributedSampler(train_set, num_replicas=world_size, rank=rank, shuffle=True) + else: + train_sampler = None + + train_loader = DataLoader(train_set, + batch_size=args.batch_size, + shuffle=(train_sampler is None), + num_workers=args.num_workers, + pin_memory=True, + sampler=train_sampler, + drop_last=False) + val_loader = DataLoader(val_set, + batch_size=args.batch_size, + shuffle=False, + num_workers=args.num_workers, + pin_memory=True) + + return train_loader, val_loader, num_classes, train_sampler + +# ---------------------------- +# Train & validate +# ---------------------------- +def train_one_epoch(args, epoch, model, criterion, optimizer, train_loader, device, scaler, scheduler=None, train_sampler=None, global_step_start=0, max_global_steps=None): + """ + 现在支持:若 max_global_steps 非 None,则当 global_step 达到该值时提前退出 + 返回: epoch_summary_dict, step_logs_list, global_step_end + step_logs_list: list of dicts with per-step info (for logging to CSV if需要) + """ + batch_time = AverageMeter('Time') + data_time = AverageMeter('Data') + losses = AverageMeter('Loss') + top1 = AverageMeter('Acc@1') + top5 = AverageMeter('Acc@5') + + model.train() + end = time.time() + optimizer.zero_grad() + + iters = len(train_loader) + step_logs = [] + global_step = global_step_start + + for i, (images, targets) in enumerate(train_loader): + # check global steps limit + if (max_global_steps is not None) and (global_step >= max_global_steps): + break + + data_time.update(time.time() - end) + images = images.to(device, non_blocking=True) + targets = targets.to(device, non_blocking=True) + + if args.amp: + with amp.autocast(): + outputs = model(images) + loss = criterion(outputs, targets) / args.grad_accum_steps + else: + outputs = model(images) + loss = criterion(outputs, targets) / args.grad_accum_steps + + if args.amp: + scaler.scale(loss).backward() + else: + loss.backward() + + if (i + 1) % args.grad_accum_steps == 0: + if args.amp: + scaler.step(optimizer) + scaler.update() + else: + optimizer.step() + optimizer.zero_grad() + if scheduler is not None and args.scheduler_step_per_batch: + scheduler.step() + + with torch.no_grad(): + acc1, acc5 = accuracy(outputs, targets, topk=(1,5)) + losses.update(loss.item() * args.grad_accum_steps, images.size(0)) + top1.update(acc1.item(), images.size(0)) + top5.update(acc5.item(), images.size(0)) + + batch_time.update(time.time() - end) + end = time.time() + + # increment global step AFTER processing this batch + global_step += 1 + + # per-step print (controlled by print_freq) + # 输出格式调整为:Epoch[23]:step[1/32] step_train_loss 3.0075 acc1 25.95 acc5 54.46 + # 使用 i+1 / iters 更贴近人类可读的“第几步 / 总步数(该 epoch 内)” + if ((global_step % args.print_freq == 0) or (i == iters - 1)) and ((dist.get_rank() if dist.is_initialized() else 0) == 0): + lr = optimizer.param_groups[0]['lr'] + # note: losses.val is 当前 batch 的 loss(经过 grad_accum 处理后还原),losses.avg 是到目前为止的 epoch 平均 + print(f"Epoch[{epoch}]:step[{i+1}/{iters}] step_train_loss {losses.val:.4f} acc1 {top1.val:.2f} acc5 {top5.val:.2f}") + + # collect per-step log + step_logs.append({ + 'epoch': epoch, + 'batch_idx': i, + 'global_step': global_step, + 'lr': optimizer.param_groups[0]['lr'], + 'loss': losses.val, + 'loss_avg': losses.avg, + 'acc1': top1.val, + 'acc1_avg': top1.avg, + 'acc5': top5.val, + 'acc5_avg': top5.avg, + 'time': batch_time.val + }) + + # if reached max_global_steps inside epoch, break (handled at loop start next iter) + if (max_global_steps is not None) and (global_step >= max_global_steps): + # optional message + if (dist.get_rank() if dist.is_initialized() else 0) == 0: + print(f"[Info] 达到 max_global_steps={max_global_steps},将在 epoch 内提前停止。") + break + + if scheduler is not None and not args.scheduler_step_per_batch: + scheduler.step() + + return OrderedDict([('loss', losses.avg), ('acc1', top1.avg), ('acc5', top5.avg)]), step_logs, global_step + +def validate(args, model, val_loader, criterion, device): + losses = AverageMeter('Loss') + top1 = AverageMeter('Acc@1') + top5 = AverageMeter('Acc@5') + + model.eval() + with torch.no_grad(): + for i, (images, targets) in enumerate(tqdm(val_loader)): + images = images.to(device, non_blocking=True) + targets = targets.to(device, non_blocking=True) + outputs = model(images) + loss = criterion(outputs, targets) + acc1, acc5 = accuracy(outputs, targets, topk=(1,5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1.item(), images.size(0)) + top5.update(acc5.item(), images.size(0)) + return OrderedDict([('loss', losses.avg), ('acc1', top1.avg), ('acc5', top5.avg)]) + +# ---------------------------- +# Main +# ---------------------------- +def main(): + args = parse_args() + + # handle local_rank from env if not provided + local_rank_env = os.environ.get('LOCAL_RANK', None) + if args.local_rank is None and local_rank_env is not None: + args.local_rank = int(local_rank_env) + + distributed = (args.local_rank is not None and args.local_rank != -1) + if distributed: + dist.init_process_group(backend='nccl', init_method='env://') + rank = dist.get_rank() + world_size = dist.get_world_size() + else: + rank = 0 + world_size = 1 + + if distributed: + torch.cuda.set_device(args.local_rank) + device = torch.device('cuda', args.local_rank) + else: + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + + set_seed(args.seed + (rank if distributed else 0), deterministic=args.deterministic) + + save_dir = os.path.join('models', args.name) + if rank == 0: + os.makedirs(save_dir, exist_ok=True) + with open(os.path.join(save_dir, 'args.json'), 'w') as f: + json.dump(vars(args), f, indent=2) + if distributed: + dist.barrier() + + train_loader, val_loader, auto_num_classes, train_sampler = build_dataloaders(args, rank, world_size) + if args.num_classes is None: + args.num_classes = auto_num_classes + + # 使用本地 Model 优先(LocalModel 已在文件顶部尝试导入) + model = build_model_with_local_priority(args, device) + model.to(device) + + if distributed: + model = DDP(model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True) + + criterion = nn.CrossEntropyLoss().to(device) + params = [p for p in model.parameters() if p.requires_grad] + if args.optimizer == 'sgd': + optimizer = optim.SGD(params, lr=args.lr, momentum=args.momentum, + weight_decay=args.weight_decay, nesterov=args.nesterov) + elif args.optimizer == 'adam': + optimizer = optim.Adam(params, lr=args.lr, weight_decay=args.weight_decay) + elif args.optimizer == 'adamw': + optimizer = optim.AdamW(params, lr=args.lr, weight_decay=args.weight_decay) + else: + raise ValueError('Unknown optimizer') + + scheduler = None + if args.scheduler == 'multistep': + scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.milestones, gamma=args.gamma) + elif args.scheduler == 'step': + scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=args.gamma) + elif args.scheduler == 'cosine': + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs) + elif args.scheduler == 'none': + scheduler = None + + scaler = amp.GradScaler() if args.amp else None + + start_epoch = args.start_epoch + best_acc = 0.0 + if args.resume: + if os.path.isfile(args.resume): + ckpt = torch.load(args.resume, map_location='cpu') + model_state = ckpt.get('state_dict', ckpt) + if isinstance(model, DDP): + model.module.load_state_dict(model_state) + else: + model.load_state_dict(model_state) + if 'optimizer' in ckpt: + optimizer.load_state_dict(ckpt['optimizer']) + start_epoch = ckpt.get('epoch', start_epoch) + best_acc = ckpt.get('best_acc', best_acc) + print(f"=> resumed from {args.resume}, start_epoch={start_epoch}") + else: + print(f"=> resume path {args.resume} not found") + + log_columns = ['epoch', 'lr', 'loss', 'acc1', 'acc5', 'val_loss', 'val_acc1', 'val_acc5'] + log_df = pd.DataFrame(columns=log_columns) + # step-level log + step_log_columns = ['epoch', 'batch_idx', 'global_step', 'lr', 'loss', 'loss_avg', 'acc1', 'acc1_avg', 'acc5', 'acc5_avg', 'time'] + step_log_df = pd.DataFrame(columns=step_log_columns) + + total_epochs = args.epochs + # global_step计数器(训练过程中跨epoch持续) + global_step = 0 + + epoch = start_epoch + # loop until either epoch criteria or step criteria met + while True: + if train_sampler is not None: + if args.seed_sampler: + train_sampler.set_epoch(epoch + args.seed) + else: + train_sampler.set_epoch(epoch) + + if rank == 0: + print(f"==== Epoch {epoch}/{total_epochs - 1} ====") + + # 如果传入了 args.steps (>0),则把剩余允许的 step 数传给 train_one_epoch, + # 否则 max_global_steps=None(按整 epoch 执行完) + if args.steps and args.steps > 0: + max_global_steps = args.steps + else: + max_global_steps = None + + train_log, step_logs, global_step = train_one_epoch( + args, epoch, model, criterion, optimizer, train_loader, device, scaler, + scheduler, train_sampler, global_step_start=global_step, max_global_steps=max_global_steps + ) + + # 如果启用了按 steps 的模式且已经达到上限,直接退出 main(跳过 validate) + if max_global_steps is not None and global_step >= max_global_steps: + if rank == 0: + print(f"[Main] 达到 max_global_steps={max_global_steps}(global_step={global_step}),提前退出训练(跳过验证)。") + # 直接返回 main(),不再执行后续 validate / 保存逻辑 + return + + # 验证并记录 epoch 级别日志(如果在 step 模式下很可能在中间某个 epoch 提前结束,但我们仍做一次 validate) + val_log = validate(args, model, val_loader, criterion, device) + current_lr = optimizer.param_groups[0]['lr'] + + if rank == 0: + # epoch summary print, 格式与示例对齐 + print(f"Epoch[{epoch}]: epoch_train_loss {train_log['loss']:.4f} acc1 {train_log['acc1']:.2f} acc5 {train_log['acc5']:.2f} | " + f"val_loss {val_log['loss']:.4f} acc1 {val_log['acc1']:.2f} acc5 {val_log['acc5']:.2f} lr {current_lr:.6f}") + row = { + 'epoch': epoch, + 'lr': current_lr, + 'loss': train_log['loss'], + 'acc1': train_log['acc1'], + 'acc5': train_log['acc5'], + 'val_loss': val_log['loss'], + 'val_acc1': val_log['acc1'], + 'val_acc5': val_log['acc5'], + } + new_row_df = pd.DataFrame([row]) + log_df = pd.concat([log_df, new_row_df], ignore_index=True) + log_df.to_csv(os.path.join(save_dir, 'log.csv'), index=False) + + is_best = val_log['acc1'] > best_acc + if is_best: + best_acc = val_log['acc1'] + if (epoch % args.save_freq == 0) or is_best or ( (max_global_steps is None) and (epoch == total_epochs - 1) ) : + state = { + 'epoch': epoch, + 'state_dict': model.module.state_dict() if isinstance(model, DDP) else model.state_dict(), + 'best_acc': best_acc, + 'optimizer': optimizer.state_dict(), + 'args': vars(args) + } + save_checkpoint(state, is_best, save_dir, filename=f'checkpoint_epoch_{epoch}.pth') + + # increment epoch + epoch += 1 + + # stopping conditions: + # 1) if steps mode enabled and reached steps -> stop + if args.steps and args.steps > 0: + if global_step >= args.steps: + if rank == 0: + print(f"[Main] 已达到指定 steps={args.steps}(global_step={global_step}),训练结束。") + break + + # 2) if steps not used, stop when epoch >= epochs + else: + if epoch >= total_epochs: + if rank == 0: + print(f"[Main] 已达到指定 epochs={total_epochs}(epoch={epoch}),训练结束。") + break + + if dist.is_initialized(): + dist.barrier() + if rank == 0: + print("Training finished. Best val acc1: {:.2f}".format(best_acc)) + +if __name__ == '__main__': + main() \ No newline at end of file From 9b1f8fe03edea6ba1fcee988ac567b8afd0c272f Mon Sep 17 00:00:00 2001 From: root Date: Mon, 12 Jan 2026 07:07:48 +0000 Subject: [PATCH 2/2] fix: cleanup code and update --- .../build-in/Classification/SKNet/README.md | 10 - .../Classification/SKNet/coverage.txt | 3 - .../build-in/Classification/SKNet/readme.md | 65 ++ .../Classification/SKNet/requirement.txt | 89 +++ .../Classification/SKNet/resnet_loss.jpg | Bin 33981 -> 0 bytes .../Classification/SKNet/resnet_loss.txt | 29 - PyTorch/build-in/Classification/SKNet/run | 1 - .../build-in/Classification/SKNet/sknet222.py | 145 ----- .../Classification/SKNet/sknetdebug.py | 161 ----- .../Classification/SKNet/weloTrain.py | 567 ------------------ 10 files changed, 154 insertions(+), 916 deletions(-) delete mode 100644 PyTorch/build-in/Classification/SKNet/README.md delete mode 100644 PyTorch/build-in/Classification/SKNet/coverage.txt create mode 100644 PyTorch/build-in/Classification/SKNet/readme.md create mode 100644 PyTorch/build-in/Classification/SKNet/requirement.txt delete mode 100644 PyTorch/build-in/Classification/SKNet/resnet_loss.jpg delete mode 100644 PyTorch/build-in/Classification/SKNet/resnet_loss.txt delete mode 100644 PyTorch/build-in/Classification/SKNet/run delete mode 100644 PyTorch/build-in/Classification/SKNet/sknet222.py delete mode 100644 PyTorch/build-in/Classification/SKNet/sknetdebug.py delete mode 100644 PyTorch/build-in/Classification/SKNet/weloTrain.py diff --git a/PyTorch/build-in/Classification/SKNet/README.md b/PyTorch/build-in/Classification/SKNet/README.md deleted file mode 100644 index b5b4f472d..000000000 --- a/PyTorch/build-in/Classification/SKNet/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# SKNet - -Try to implement the CVPR 2019 paper ["Selective Kernel Networks"](https://arxiv.org/abs/1903.06586) by PyTorch. - -I implement the core Slective Kernel Module, as illustrated in the img: -![Slective Kernel Module](https://github.com/pppLang/SKNet/blob/master/img/img1.PNG) - -The core SKConv unit is implement as a Pytorch Module in untils.py. - -I intend to test in cifar-10 and imagenet, and test in some SR models. \ No newline at end of file diff --git a/PyTorch/build-in/Classification/SKNet/coverage.txt b/PyTorch/build-in/Classification/SKNet/coverage.txt deleted file mode 100644 index e15af6f05..000000000 --- a/PyTorch/build-in/Classification/SKNet/coverage.txt +++ /dev/null @@ -1,3 +0,0 @@ -all api: ['_amp_foreach_non_finite_check_and_unscale_', '_amp_update_scale_', '_copy_from', '_has_compatible_shallow_copy_type', '_local_scalar_dense', '_log_softmax', '_log_softmax_backward_data', '_reshape_alias', '_softmax', '_softmax_backward_data', 'add_', 'addmm', 'as_strided', 'as_strided_', 'cat', 'clone', 'convolution', 'convolution_backward', 'copy_', 'copy_stride', 'div', 'empty', 'empty_strided', 'eq', 'fill_', 'max_pool2d', 'maxpool2d_backward', 'maxpool2d_forward', 'mean', 'mm', 'mul', 'native_batch_norm', 'native_batch_norm_backward', 'nll_loss_backward', 'nll_loss_forward', 'reciprocal', 'relu_', 'sum', 'threshold_backward', 'topk', 'view', 'zero_'], total: 42 -fallback op: [], total: 0 -coverage rate: 100.00% diff --git a/PyTorch/build-in/Classification/SKNet/readme.md b/PyTorch/build-in/Classification/SKNet/readme.md new file mode 100644 index 000000000..38a481446 --- /dev/null +++ b/PyTorch/build-in/Classification/SKNet/readme.md @@ -0,0 +1,65 @@ +```markdown +## 1. 模型链接 +- 原始仓库链接: +https://github.com/huggingface/pytorch-image-models?tab=readme-ov-file#models + +## 2. 快速开始 + +使用本模型执行训练的主要流程如下: + +1. **基础环境安装**:介绍训练前需要完成的基础环境检查和安装。 +2. **获取数据集**:介绍如何获取训练所需的数据集。 +3. **构建环境**:介绍如何构建模型运行所需要的环境。 +4. **启动训练**:介绍如何运行训练。 + +### 2.1 基础环境安装 + +请参考主仓库的基础环境安装章节,完成训练前的基础环境检查和安装(如驱动、固件等)。 + +### 2.2 准备数据集 + +#### 2.2.1 获取数据集 + +Res2Net 训练使用 **CIFAR-100** 数据集。该数据集为开源数据集,包含 100 个类别的 60000 张彩色图像。 + +#### 2.2.2 处理数据集 + +请确保数据集已下载并解压。根据训练脚本的默认配置,建议将数据集存放在模型目录的上级 `data` 目录中(即 `../data`),或者根据实际路径修改训练命令中的 `--datapath` 参数。 + +### 2.3 构建环境 + +所使用的环境下需包含 PyTorch 框架虚拟环境。 + +1. 执行以下命令,启动虚拟环境(根据实际环境名称修改): + + ```bash + conda activate torch_env_py310 + +``` + +2. 安装 Python 依赖。确保已安装项目所需的依赖包: +```bash +pip install -r requirements.txt + +``` + + + +### 2.4 启动训练 + +1. 在构建好的环境中,进入模型训练脚本所在目录。 + +2. 运行训练。该模型支持单机单卡训练。 +执行以下命令启动训练(使用 CIFAR-100 数据集,Batch Size 为 128): +```bash +python weloTrainStep.py \ + --name train \ + --arch sknet \ + --print_freq 1 \ + --steps 100 \ + --dataset cifar100 \ + --datapath ../data \ + --batch_size 8 \ + --epochs 100 + +``` diff --git a/PyTorch/build-in/Classification/SKNet/requirement.txt b/PyTorch/build-in/Classification/SKNet/requirement.txt new file mode 100644 index 000000000..7394b3319 --- /dev/null +++ b/PyTorch/build-in/Classification/SKNet/requirement.txt @@ -0,0 +1,89 @@ +addict==2.4.0 +aliyun-python-sdk-core==2.16.0 +aliyun-python-sdk-kms==2.16.5 +anyio==4.11.0 +astunparse==1.6.3 +certifi==2024.12.14 +cffi==2.0.0 +charset-normalizer==3.4.1 +click==8.3.1 +colorama==0.4.6 +contourpy==1.3.2 +crcmod==1.7 +cryptography==46.0.3 +cycler==0.12.1 +einops==0.8.1 +exceptiongroup==1.3.1 +filelock==3.14.0 +fonttools==4.60.1 +fsspec==2024.12.0 +future @ file:///croot/future_1730902796226/work +git-filter-repo==2.47.0 +h11==0.16.0 +hf-xet==1.2.0 +httpcore==1.0.9 +httpx==0.28.1 +huggingface_hub==1.1.5 +idna==3.10 +inplace-abn @ git+https://github.com/mapillary/inplace_abn.git@b50bfe9c7cd7116a3ab091a352b48d6ba5ee701c +Jinja2==3.1.5 +jmespath==0.10.0 +joblib==1.5.2 +kiwisolver==1.4.9 +Markdown==3.10 +markdown-it-py==4.0.0 +MarkupSafe==3.0.2 +matplotlib==3.10.7 +mdurl==0.1.2 +mmdet==3.3.0 +mmengine==0.10.7 +model-index==0.1.11 +mpmath==1.3.0 +networkx==3.4.2 +numpy==1.23.5 +opencv-python==4.12.0.88 +opendatalab==0.0.10 +openmim==0.3.9 +openxlab==0.1.3 +ordered-set==4.1.0 +oss2==2.17.0 +packaging @ file:///croot/packaging_1734472117206/work +pandas==2.3.3 +pillow==11.1.0 +platformdirs==4.5.1 +pycocotools==2.0.11 +pycparser @ file:///tmp/build/80754af9/pycparser_1636541352034/work +pycryptodome==3.23.0 +Pygments==2.19.2 +pyparsing==3.2.5 +python-dateutil==2.9.0.post0 +pytz==2023.4 +PyYAML @ file:///croot/pyyaml_1728657952215/work +requests==2.28.2 +rich==13.4.2 +safetensors==0.7.0 +scikit-learn==1.7.2 +scipy==1.15.3 +shapely==2.1.2 +shellingham==1.5.4 +six @ file:///tmp/build/80754af9/six_1644875935023/work +sniffio==1.3.1 +sympy==1.13.3 +tabulate==0.9.0 +termcolor==3.2.0 +terminaltables==3.1.10 +threadpoolctl==3.6.0 +timm==1.0.22 +tomli==2.3.0 +torch @ file:///apps/torch-2.4.0a0%2Bgit4451b0e-cp310-cp310-linux_x86_64.whl#sha256=2e472c916044cac5a1a0e0d8b0e12bb943d8522b24ff826c8014dd444dccd378 +torch_sdaa @ file:///apps/torch_sdaa-2.0.0-cp310-cp310-linux_x86_64.whl#sha256=5aa57889b002e1231fbf806642e1353bfa016297bc25178396e89adc2b1f92e7 +torchaudio @ file:///apps/torchaudio-2.0.2%2Bda3eb8d-cp310-cp310-linux_x86_64.whl#sha256=46525c02fb7eaa8dafea860428de3d01e437ba8d6ff2cc228d7c71975ac4054b +torchdata @ file:///apps/torchdata-0.6.1%2Be1feeb2-py3-none-any.whl#sha256=aa2dc1a7732ea68adfad186978049bf68cc1afdbbdd1e17a8024227ab770e433 +torchtext @ file:///apps/torchtext-0.15.2a0%2B4571036-cp310-cp310-linux_x86_64.whl#sha256=7e42c684ba366f97b59ec37488bf95e416cce3892b6589200d2b3ad159ee5788 +torchvision @ file:///apps/torchvision-0.15.1a0%2B42759b1-cp310-cp310-linux_x86_64.whl#sha256=4b904db2d50102415536bc764bbc31c669b90b1b014f90964e9eccaadb2fd9eb +tqdm==4.65.2 +typer-slim==0.20.0 +typing_extensions==4.15.0 +tzdata==2025.2 +urllib3==1.26.20 +yapf==0.43.0 diff --git a/PyTorch/build-in/Classification/SKNet/resnet_loss.jpg b/PyTorch/build-in/Classification/SKNet/resnet_loss.jpg deleted file mode 100644 index 183a2ae4ec9ea4052f7018ab0daaa2d881c3c118..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 33981 zcmeEv1zcQNmVOZkkYFLWli;2pL4t-rA;H~~pam2JcM1s*Jdgkh8rb+a{ynF6 zJmlr&rC|~j5#Sc)Af z2cY5KynFAV1UiAr3yk}AgxtPSshD(6N?V9jhY#p^UfTQJx=l<%N=DAW$i&RT%FFkN zUqDbu@~MP#+yZ3j;OOM+;_Bw^|2iNr=uL1)^t<;lv2hDuo=^X5U0eUUvAMN8Y&vbcezk*xF8Qye6*YQ9-`lsP{DX%M{u9p z7nATwRBCC3_@8jRk;NLp%e?|ucQrxqWY*tz0r_)tRH?{^XHdq*J`l|+fR6CSwR_)=kiw44c~+2N zatEBzRm7xskd>Y4N???r87(MPQ!!dGQW(u~cS}e1#JE-h0eGd1k-d}5In*F1tZW(g zg=Jmxep{4qaf#E*oLg9XuDP5IYN^wk4|m>KD&ss-ZqnbD59W8gjV@2HiEi~SMZ4r6 zuC37?0cg8>9XqR1UPY{BPe*uPWO)S}rPU;Gc9E}tT9Pv=!_}&d(?-)L^%d|QgHjM?#Rm286$Wiy$2 zoXfUBekDQvV|9zM8=>Y_1y0BiUDR7{Z!@OE(}I0U%LVp>y_nn`UB^o{(AnFI{G06s zg)E=RZpMlWZB)y}eSsVaKo{X(?m)J9)%Qb;j-CliA^?V9SUW(1B-Cuk7T&wEYb-~g zV8h9=R8pE`5a&EPo?_Qc!@QBZ$tH~ebT*NXQr$gPb`5yEL@sNo(T4YBlOn&8Y)gg6trXkdCLneYysb)(mq1k93jlyXW!X+_pTBo`Su1!S_Id0(fv1h z-a6bEhh7Dg*AC*;F$K_8i6O3+IX_^8TVNd6XA`;p>8 zwv#>-SPdsyFY3a+b(Y^)`k*DuD@`emdY&28Tyc`qRv27lQ3k}UDygoFWmI-~$Mvz^ zZs6e&_H*28OD~6ZA^sq2tUSXDZlx4f`tbFr!?D-mdZ@J>Y~i9TLol90t+r0xv86H+ zVrNmUc)MncIDtzOZ^y|)(NOb46k!zy_aMnJ-YBqn?oCF9Xu*`e$X7Hh!qo1&XP&+hELfB~u?uZW%*xz)f)sYic!XWTrl0gdB8&GcRMW*QiJLu0P5qmHYf6I&39#Uwn*e}q0?1k-BZoDX)W{W^9_ozkZRm= zyf+OTMS9RkP0)#z-iMo6jUdZmGKzbxH&FsrKHv12@v8lVdn|P*`fi0d$#FKA-u1>u zVm-He&nW~1oY4IJyH4`FnGdNZVw|$Jgfuc-+&i-Nsqs^Y1GUzpyj4mUI!%en>n z5w)k~6T184Udyi~_+t?Ov3T*wFcXuKZGl07f+=BxROknNP&~)}Y*zHbvxbMI5uy+H zMIYQ^%5pGtVhXO^3Fv0n1^1_Xjx|Z}4-m|8|>Y$2U zO)%$ISSVBuqgsaEg`z{^OKw*OHxQLhpkLp=&0V3x|4}v76nwzf6l7*%Iz>k>sviE? zFFVY+$fY$3hyY+Ib&Oh5o5wv*E$yOKqlBc$tvhv@2DQ7uQJzA|prOh^%BmPxHujT} zB4tA-2tZZ^%BFi{SOpa$mY^+Hh*#M|Ui^Z)!bKl2Tw?RIae>MYl@i)#(8#gLXSZJ^ z2`&3ca^HX(Q~Kn6BIamW!and~dwocKO|l|&mYU-lcq~!cQ0n(sud8r1`o410=Eo$8 z7rp4HVQZ)DOj8Y9=F4mzS%(@hnVE*TiMo2dfazE2fft^Q3AlPWfVHI|jI_;}y`+tubB%ajRnm2xtvW@-(XWvWOKMZpY>UG22$5n1jL|J<5ZvVd|px$_h!FB2b?B$ zG9swa1=A=;08l<*rcE{|69=r$;iM<~n9gfQp8>BFrO~ZI4y{vgi}>_*5@vV8+1l{Uctp|_ zZ0-9RZlu0q*nIHHL(OqwY~Lv*kO1n(2p_xRIPPK4e_+eUh-_Scv?v-;`L zC8HFXw@wT}zPKD>U&zMA4(mmvf6KGGcy}?$@@XEJvAKv-d9CDfrcjh3e0R>8y=p`# zN7~vEjg8a8e8EQqsD#9hJP^TlReTtSscnrC+q!UcIy>cqn=+J3z8a2^6i^p~cp1lC zV1K+o!zzU&z4b5x;3mn;DKg2oAyn9~z%c6Rk-u40uD=x*`TWH#G8*s^DepSUy01~3 zSbH^5)8*HXt9aC|nnrjsHgO941cr)HWd-;ZQfSsPq-HNg4VyVVlu--38KjD!V*0fe z$Ay*h@>bI?5P+v~FW*$7fd>+JzC>qiGIqQ!k` zG-vLV2=?TzVnndL8>!SI6{Uu@wvNnYxHn+sbu*+N(molvGbN%Ph#`8Knj=L?jSICe zz5hu;QW48H%c%djJx zl~wqYYkbZKjy8*tiaGN&b+nRBK~&1g4}l6KdWU=wwJSTjmI-P88^hP^q6bO zUhMXoFiNhDe4gv7S}^%S*EW{u)}1V^t|vwqZz*CoN(&_Yv9R&pwYkE;8>tJD7JhWAk_uZDBfgauO{_$X%CERIv#SU(8!5Jf=;+|9pz>}h21i%L$a>$D^^h3MS z-#@}>5CQmty0m{uHqHfzn}WQ73TlTeO4?~a=mvu4d#6t#2nh=HVDX@8<#Jf9a&I5x$gH-DsF zkA6bE24VM-(UHu{78z9h%=`oM!am@?#&XAlqx{Ic6n?_Ic#ybe?%TYnHv~Dj{R8uo z6vja^uLXZ3^HTZFyheY*y!w#Li)_jElK6xMcH>XXYY-JORQ(A7SR(v^HO~Iid+z%~ z&jSFCKXhf(7wrbF#J>KlD^|;b4&9d-re}Wa-x(}u0cCZ0F|hd?2^axLi}ev={U6eH z!9fZHpbLE#{-w?Z9xG`;<&eye0KCPiz7BBQzh)1t69O=&g&>dgq%|3O*Ysl(4F)x5 ze#%!oGd}cbWxP1vIQH|Mu@mPdRB?}r&(mzTMRpAHRP3BhI^^Uwk+(+sg)o1*@kcxI zyHFb6;3v}2tZT9&RXB1A0oX}^JQODY8%bQKRC-~o>vx{fgfwryLI8RkA*;Zj{-Jv% zfB*==AYGj^@)tDAaC4>aH%O14(8M^`CENS&M+Hz;>6Y<{;AxB(R`{CO&_y*(x=>|!>-qjBqAq>)^$0?S@ptkLw|qs00Lm{{Agp=mMnYcG6QIopj@rM+NoYSc|WK} z-i`bso#b}Fg2HEZo9tB8j0b?VFo1{Ici||shP*XaAa|T(0?yxXt|ITz9L-QFkMjdI ze_&0&W&Hdzy;mLgvZDJ+W0en2MWgPlrpJk&AmP+}Wk&K#-#86bnHFKs*3m89A6i-` zsTb|dyDo>EiNP2Oud}Nkdqy(YHPf_wb@QksuwuRZuC)ckA1?`o3t?K zD2)>}vr=+&EvxP8nqV62c!n5bUBWB}55bg02HM}g{3WB zEFT+98ue{j6o66N@U33SM3PlES3kA^8|_Okj(JS3|8X$Y3s;nYK2>2r% z>UanM%`l{9u}}RJ6`7YgG1}ulCPWb-QaA+*36fF6#|>n$3t5qD)Ge2>siInD!79gC z2VJ?r2c3F)XH4Bg+bVCU*<7@*R7xuZKH|C-O_5_F(_s)T{v^_2Ce-W6!1j~a^Hnas0S=Bt+mraKGz?rKiEFrs){#D&*=wO?<#=E34Kbsuyx{?&?q?- zF;DYp7t=~GLQjrUJM-Q6MI{%neK^PGzj~#24e%S=?744HmA5~Kj&_D;Tj(B+fcIfL zHy=_-3*avWdfp&hTZ?*oYet=*n-FDT0%sr~U%`pJ)U{f@I()9Im8vmU9qoEZ_wFl^ zZr)wnw*1tO)LTSdvYHZnGV6_S&lLu6Y0ON!Hjn40z$O1gWeg=LhjM5snD?Z%VYpHrlv%51!9jJO zyPxicxIvK`DMoZFDPM86VdM3uWO<5wra6hgHD_nsFdgoU;gk6p{$3t4IL|f5Sl(v;x*`RVT6$7~i5k)5xoea0tw~ALy z%Wv714sZoN(`s2Q5jvzU7`WL|HST3PIawEFD!o4{Rr4R>U!d+cP#{EUa~+g#kOA-O z8H`cSZC|=wVbN2|^v_9>;qR{&FI)>0E!6&!8vINqhZzJQXuUp(56mh7E)W+5!pY*U zBDYyPgJ2TGmDo|;WL0Av1G5QQS6vI~->0(!ob~_~_YRo1rHTc9Ip@8nv>XGefq$A9 zj*ebS=|%tyuuVxY2JzzIb#QYDDPvbi8~)BiS;Eo;u=Lz+Z6%~3Xw2PzM~^QtZRJ}( z9Rq_S@)65QND(Td{80{NF&IP&z6yD$Hb#JP8tZu`QEd=echgIS+<5`2c{X%v#_GM8@B|k6PK{D*Z2(% zT;A+CM&8+s*S~yTGM*K^ZdU7tR?4$f2fi(F)3?=}mO6-N!R-h#SkWAhq)61BDaX59 zUz=CODM>;-Z{_S{ zD3os#l4{(|z;EZZH|z*2Aas6-*L=TP_(E)B#;70Ck^Yh_lrNb6{2rvkNAPhomj3pL z6?szl>BlSZdTeG^Pl3sk7cz&dClXd1}kMW@lhMwXxj_(7r84YI#IU(puG# z%5;Lqd$d^2#a3V6iMmuCA3)_>l%X5VtMdQ(gi+dbp^_T@z^2OBPGGIy*A+p!ks{8l z=$Uzfks!{?cZn3_|@|j|_XMb2EE5DnsqAYCUAckSH*=Ky!%Z-lrc6`X+er;eTR~C@1EepPaH7c-1QU;M zvzHK-o>Z+uatXb>lpk9Wlxm-jm;RYc!diKHU6o|JVIYUk<2(0m6T|(ZtdxW7H10!t z&r&VZc(zAWmK32B2|3F((>9rJ9B1@Uh0V9Lz&3aHM%dmQm#O3OOroiODPTINj}uZ0 zY74eV3mNMB(OdfNG5xU)D+D0lHnj-yS?ov=hJ=;p^|sf9l|y2eRhQ%&FeHPa-)}WK z!$MNTVR>;XqCb(tKa<42_my%Z906GSuD34?9wPwq_lhF^Yjg8wjeG9u-@2b8OO2eK z98MlDu5yEbG;z0FW>9vg%vuh+*!}+M}^}`Ij3rHud^Mb{!oLZR>vH-kop7e zFKwDo#bs}SElRyuh)?cd6?Uj zzt=bTm|>Dp!=Yw`>kpUfwZ_b+!U?deWBYqCLG&J$5h>FbpP@}BwUJhKOsW<{DR zp$QwY^CTA+aOtvWzqN>KqYF20V-u-Zrtm-wU$qlHC1B+vArsAT?!KXQd9F!;Ys*t( z&_h{pf#uMLxhF>jT~m)`UmQC~$x9bdRo7iZB%T~)Gk6ss07_%iX(4P5chW+b0e`>) zr5Ik=6o?F4|IF>06h{EIXE&`<{`uJUXRJf2g&x?0K zf)k%>zAM|-&w%`R77%JEcEPx}N+FvGI1TI(<-nnf&yP2Po>X1J z2cx8bMabTJnxA1(ph?3<7wkNIj(%&W`&As!!;r2RNK6*10&rgsF^{JN2Ib> zr(C7JqtPJ{`mRfiGdNH7!=RPhpslIx@zA8Pp1NOX-Sjt8MH{(6ry5Pu@oy8C*Df@=c1{X~2FH_oW&~_)>CBGkDaatNlX)gH?I-I3Mk@>8T9XGtIXVOVhUZ3) zLC|<409e2LGYb6CUXge@UOhA^wnTHm@0@)FFGu5ld^jDcOP<}-8H)$L!LfgjVFt>dH5Khi*F&YF?3&Q8SN1DjWE` zEJ(OXdx1NRZboCqhuKgURrut=Y51A_$Wvr&vq`{qjC`y}?F;0PbDXac`=IKa#=-|O zDv16{G-aYPvQeRE1|E6xFs@!h^wK;3RH!a9Upq-&uBJjYHN)tAwYWqVT*Eo!%PY0e zuqyMXjLQrRb#&Ep9IIMRE*iJ{vi(~9FUMlSP0j7HtnRjAo1R=H_6Q;Xs@I)E6a7+T zwLlHC3eEy`)ATy`074brkAJV}{8zXh2XYEN;Tweh%p}$ zdGV6P0yXWo+L|Ks4mId&-C11=A+Nq2`*xeXLel`#>M-NITLu!&11R0Ieyr~Cx@7W;A?saWmzuLkf`Jyu7` z@>v)xJe4k+yC&YhoZmP=0CYXOcTb>-sf^i;Ytbgk2_3?fo#Kx*E}8RQ6B0!cBqY=$ zS9lb>EmG4)mXR2RpLogG5v^2)b3^huN?NPk5S#Va-o1zz2 zQWYipuIIT1o3FIyG`;GLK*)ri99{P3QgGl#! zFSBWM1E`yEQz#!hWaaY)`gwHJBK z$AA34zlgdzPJe##@GLR(C52q7pOL;{$3M@v7mnxG*8I8 zv$@AmTcQ1o%VI_3H3EPuswxt0R51I2!@81d7i1&+*7!~hoMa>HY9hu|s%L;_-rE3& zHD_M#vDjdaiWE`l+yCJ}{F&JOZm*Mn2}!L!{Uy}>9aDh+Y1FN%+RSci`zVfYdj*(m zPE2wuaproGF(ra!WDx)jQsjyc1wXw)B|VTvMqo$Zd(;>nG0#W|J&k?}&FGY;^9?Wr z;IY@n(_(2(Dud$n^$Vjtq&n3pzWRx!Kz(d5s--nlY)yK@>`lO+*>?8vjeJ*m6|h<> zG5_7WrnlDRlk4Nw1us{{!ogZB%Y)EZXo|t`Am#|x6vh?TdFeOYn_wFE1cSBika}nET>aPdF!92JD958aAci z3=asA!EAkL#8;yD2a4i!ei+9^?ujM2Wap!&MiD7I?w+IB%+8Z1kjbb@DMBp?pMR^_ zf02#;n={Qnsu>%Fa#Kx;YUs1kMe9$&oD3aLL0hMM#@b^8Dc1S7FX9}i-U7E5nkBS2 zhTCLDG2uh%x5&^@#Fi&&x5X^xnd^0tbBOKy8aKphI#X5dys>$3^ppYV{(jo{0l9wK z(REKrBuFh1ZR*yn2mfq!QiI3<>Y=gmW~E~ zdbCrBrig};`*2=ecm~PD-4=)X9$I3yvF96+J*5R1TTMXop^4Ct_zhj+oE-MO7Y!8= z9xhx2kuj0p<02s9jG{N7_Gu(%eD+5QdZQZs1t+HnX(RrELj>5&`OQ(YawpS?xi59X zGoEx`c|_KfxUz2Km0hj9+iLlQ!^-x?ze6|XEFgnI-by0hQtN1{qfe(0nG!Zxe9(b) zPdgZu?s1UFj(KZ*`!4J ziYI7=1ZUyv3oO>777>~%P;G_19e_K!Q{ir=MC}Xk4x53 zKDji}3?S5aB3xQNe!ll?y>G9=2x;DkOn&7SpxwZvqaBt4*f-r`9NilIix1pBF?u^zUrkByTqdjchV>XoAxgn9mAekfT#^;AaP4bPHSRq zW)aH+nkd(kt?nfd48nlk>Kfe9bqdY-Xnnea)xPi|;n7G=H>z-M=j4UvxWn=;;Z~-N zfmqi<3OaOxk2hKV~!pS0?+gWc5u5qE^R(iC? zi|mG7W~Jwe)LW9j-(e57mW^OgSAVK1&f!x_?;EW|)F=gT@a9*!4zPa3bDf{<5P9ht zSSR<_!uI#Nk^2)&`0(r%aTJ>;0PpscHPF`sVSvlzj>4H)DRV2T1}vm96Ka6Do*6u1 z;kg&@me^4Kpv6IJhg{{|`8JowkwJ6cOhH;>mzc}3*Ow6H!muZQh41Wy<%~^6%{rC^efgqZY3V> zjj{tJ=)6DGRD1xz)+}Dg+(af!i1x-$jiQEzrHGztf9!)~x z)TI9_8U7W0n%}`^(PGGj*12-oR?M>Lxs0j8u_)BzRnfxf&S=)S( zojdK(Iv!;>1z3PgKb3BlC|_F?yO!UOxXL+jfl-z*xT!CJ&OW-1L}LS`Y7YVM)ja(n zP5t+0rv56q*J3e7(gO&M;dC;`sdm7aVY21H>bA7fx`BZ`{86BcH3we~;m4EufCSWh zX+=tJI+_?OpjPjq(h=;L6^Pm!nJ7=m)dBc~g<6sevucKxb8)q`C1!~0K%6wiu0RO)0~cF^tYDje7lkzq`AhToi`0b9(&@ z^K(rapW8wr?dWLfs(Uo+Sbs@pLoFedQeP#TN={#YMN@orodS+U< zzr%+Eg}%@%rS!SPd60dA^PuB)_(><~mVt=9dofG~26~Yf89S*$V_bWJtW9E{LIAiv z6_r&M=5H84vW$UJU6&v@l38j>cW5-O?W{b`CtGv`-|Yu{CZ!_vIM%I9%9+#C9<*{| zr^s+ayP*O@Y4pmME=^B=uh*EebowYiFD$xpa>ehDgpa=iHvVJ+{)TwYUoUP!H+oYM zRcOJlDMIf!PXKZ1WQ9Cbff{KF4F>HNnXD`WV}z2^q?R@=;C!X}$sdsjW=myBZnv_i zD&dBa{XxL)R0MK+M)ZWz!wQacV8gCwLJW{=g1L73c91-+Z3-Fm=FH6MwH|TvD3M%! zEF;*j6EGp85-ar{|9tGg@(Btl9qj}#=u`Z_fQO*5sf@YPo06+8MG2b1=hrjcnRSb! zw*3iQ?(ejwcXyiC!rhtEs(*|b{nYdN7yTWXq4^6bMMu29lz$w;kowK#M?}3aiK=KN zwC(d(i;Ft_rp9)51iJdX^mQHW_FB;>fK^_UF%^B}K=<2fRu3_Zh8?vMT2IE zF+W;g?WLrQpE`(vk-%w!KSgXa`pI&94cT*!*Ioy*ZjU@{Y#fc*t~eU z8>cC@Wc$XF<)!PwjOPk9=Ze^loHepgv36^s-u>??Uto(ptWdHAnHGs>^k)t*&^lQR z;)=!X7$`_NfvMIrCPq0ovm_}Zm8EeE1ns-IStXM>>C$4D0vLn+f%gM7san>^y&Oa_ zYu|$IZfS8OBypQnL7Xtw#h$$qjTu!DIO=X=8BAuC&>`V3pN0l52qs>Kd=&}){G|u4 zxTerO#8uUVVntStYLoI6siWwk?nXYDtVdD*dd3x7q{{x0?V?pCgy;z6Ta zR|b`7II{B6CLj{bSFf+#y`3gBfx#6iyM;Ht4htmMK=o4Za-;4&Y zOKjCdEb5M;JwUr7v4+3!k@w-8_(2aca$WiUoajlLWJYGwM~+!py4WMhFsk$_uYl*p ziq6AQaF=Tvn_X3s$&cQe*X$sqXHxrz2pazHv$LOAg8v2eV2cbM4Di%qR}HVzEarq} z>=LzXXJ79hobXL)P#MWpO;mIjaK=6C5}?aV?h|PTDat;UH#1kol{=g#$wBU{Ak_9n zmY2#b4+Y`MG1jD08H zdX>WR6{Fq@5s6^7w418N=VFkI%_PnXvT*thYcEi`sSa4N^pbHfTAcGAa@ynV=<(*N zpS@M9EE1}?PMrHr;C0E6@xK2k#s5`|=U?Ex-zOEE_G8Hs>Mt^2lY+lk$ncw18eu2d zwod4NV`ajjYSV9Zz@McDj?rxCAXOz7G*h5U+*l@SstV>(3rNIG4KhO(&2Hor7!g|R z7l3Yp2YPp$31+$k*zO;RiN;h4X74H4c6DTspD#Cm1*_qUA!$x)!p`7{Qr!JLvTc#@7wF@%(n?-h*+!km{PBwT(R#?e4U}4sn2=y){I6c% z^@l0s`l}&4UoOmGsgRnmG7`B@2 zdS~QWx}t)4)-I5rm5~S;xpXY)RT}l;v$1%^e5*oGsgbVe=A1pAy2#j7NPfQFv-t49 zbSnZ`dZl%_{IQP3qHliUmHc-?{!`xnoeWNq z(u#D9ajq+de)Q2iqm_Z^R|^;OH9LYArQ6J?1!*-3awCg6ha>?NUWyC}&%A^@-q|ue z>}SQ)ki-qIH0Z+EnW^wZ>x)-RH!Io)8|NA5&cwcKds_Tu?dZu`&Y8Y48vNJPTeR*) zb%?@aB?F!-Bc=8)3$+O}R-rWYM?O2g=hqio28};kX7T5elE3xUUMM_fG4Oi;jd6aX zG$rAR`Emp9PJ4Pvv*xI4mTy|1!!RGV!>+J3u9Qj@u3z-H&0Wnx+UTxD=BIdW^1XSn z@i|8Cm%x`o8#@ct3K|ur&5-Xa+HNt)9YxG*T|lCW@n(7ZO+GJx+hb;lfr7?Zr?dz_ zxt<15!{NN1lpU5$h&pcgHfC2r_VC`Wjjk8xMLOx{ue*@Prlj>>HCwtIQ+Kp8CIRAnW(h z)V@hx9I-Y{#$L4u5J!Xyy%x7*%^eAx(zsW&$#_KBQh?|D{=EF7&r3W!0s|K$**Pc6 zKfZkVz`4iBJECrT7OIH<=3?x>RVfL( zQzKSx(Z)yu@4q({X(7Fzf=o-nZ^z4fJnqL`4o;CzY`e8RoUGE!4u4V?vUX|&edoHU zIM&%~n^Qbhg!lYf`C3Wo$5@rx53#DJOUPK2k*|{SD2J1PXZ?pGAoFUV|!@nYy{ytjyC({1EK-hnoDooGcdQ>-&JmK;JRwjuq-O7ghnnt(9i>qBo zh-a&=%u+e0tGZb!@d|_T-Rb9|vXBuz1|slq;+C}H6%UuhB**Ezu>MnQ`_yS+yz?}> zaRo~)j-o9Wcbjws;1+Q(nTO9qH;YD2wJ}JA*kJ1TFibLQCyKbN23_jvpt<)xjg#15 zreKR+GKKfkFJMvG{fl_TV0PBeg`SmTSjb&IG3Op3#Pj?8+!y=WAPbahF z2_2pe%Y83rQWdygYuVCmh&!@>?I@JnFEt~#Q++ySUse~WdOPZ^#8QZES7T$Gl!fyQ z%gFc*9$@cNFPW1<8MBBL!dNCeim-f^rI34?-0y-kuDx1>6j|ic;Z27pDpHnafAbvt z4eJqqOWVIwsq>FDfVjV2x2*r`l_md}jey%HggLS;kFWx@su~Sd_vXGuDUCV7tSm!v zy4x4nKT2~Wx5%XcJ0k$C)w0>p_d(STRf}pGU&KDWh1@zv(z{9B0xC(Cn=)Or2U0)> ztDCzOvF8uzOS&F(tYs|`en`t>VaTs5? zU$`ZGL_zKhfrmz(?vc^zM))w;yjol0)|tz6&FgFWLe$jAdGva{zYVghWe1?DM@&!Y~z&kJQTby8)u9vevF zv|?b)r0CQ|B#89oI)J=5D%-3oT%@UYB-OIrq>(w`URYovt1fqi=XZMB&&tNT*q^J0xdMR{D_h4>J{z_l-bPpJcHXonNCLbEKsnJ9})4ZC zLRKAe!01LEXA#oeWET^OsL!A65hAc_gb>li3xW^;{w2y&vA9E_c-z>7&~M6Z&r2wy zPK)&QpfiJOAy;x^1m$yW!_8|AsuyqpLxtoJcrU$A%0Q&CcaHbWqG>$Va=i?rj&*iz zMe7Y!7R8A(+Of$It2_E+>`PTO)t|4YUlu1rMcA0GjN3~PfP#Yt9*=gx_r7DeE69lPFXwlvem3C8WbT94<6t!{T{|mF!XnaZUA!!>ezK zlw};AtOTpJsab9x?oYy+?{ySWr#U;v*`#b-Bsgk{rg(V~&1}si()UO_oa<<&_i?`@ zpUTbDP}h=D&)Ta0UZW$4{P9m&8}>Vf1k88H#{>P%s&!wN(JQfjN!I6L<0<`;qkc1( zX-GZZEEk-wn-Ho|tKCZ2-d<3(8x^@6rM{Y6({2iFIPk=>H*){q|wY4(QZFHl4!_4bZkE zZnBV&^EPL4+1lk1XvnqWa-n_1i%vU!$EE9N%FQZ5XqRcB-kn82^{c&X76QJgcjSEk|B-<`9OIs=5Hm|TF#cCObU zy>UluGn`gEBu4Yju#0A%Pbuo9Rd+t_8NYePFRi~y7kNk|%ztMt3(6RwaVm{1Cbw>O z&{Wr3;hHiaG0bUA_wJMr+neH$UGrE^DsOAJm49*Cs-k**^dRrdNwDI*?nU+m{_efr z0+W>P;=WzxBB8}rQMxLXb!e1wL`tn0>H$aONHH+eH3nU;4Re3)`lGm5fXTXw^d7(0 zJ0ox83S1ryQb8TL-g>3Ikqw}f!~NFwoV9SX5vgxkfEt*%V75F{7%kSiWagajd0Ah^ zwb1P%Tdyaxr<+P#H5*=aPC{~QvF~&a`I@ln0B>E65-H)&EGs)hoht3tUCl1EsK|*O zQu0Q^fy}kc%{BIZVej?h7M#_!=x9PKrGs#mljH{mjqdpwU%=m=9tO;L&OvQ8X6IT_ zOJO0tm*Wnirqo)+?_h?#U)eadI8C$TVZ{4uPL5if!p}ESzKO`-&Za(xP_uNe;_=@X z!P8nTx_ZfTvJ;ZsC7zj)c6qFH#7PU zLiY>a6g;W<0BD_vtI%(lKvGC71mND0$6U1*GHs814rOVz6vc1ig;kzR9A)gSaweR8 z6Sj*M+#elDvN}YSNhV7Mz4RS2mA-(4mz61NlsbO{zn{0JdJFKGL3}ZY?DBL&-gD3E+JcOuI71|_I#>Ow>1tRB^o1h40|pxv2909OKb8X2_sbTKx949t`*3sPqz>0t~=z6vDhwl zEYdbVHxi)HR|T$s@5`Gf8s5*NB(fpBCdth|8@ospj2RZ~$!1NeFl!_V#|x=&`>Gwg z$zrpY30zSklwT>NCewXX*RAHKf`#`v0tYx@addo~5B7*Tb)TC{EV`QM=qGxI+`!&- zSr+HSL`=My2QP;*vLl*{V6!YxZ014MaLPi`$4i?I$n9pZQp&rJMwO>b)9ULR>(2W_ zlWCBi`2ww@W0Y;q;VhuKEdYeb0Mdy0V75}9J@T<)={wbmQUxmNIzAp;9T?VS+uG(TRoNv@+V7gF?Qz|p62ib^5 zMIPi6s(tlp@2S7z6*3CoheI0T?w74oI=pbD}^h5)=n3U{b6I;=E1{=L92swHpo{!VG=7&!X z5dfQRI8mk)CrC7XEo9R?90Q0!CtHFs`K=pHxc|+74elFjbiNGvQmmrponAJTc^#RB zcM+ruSqwl9xXYPlB4qu`Bpg-@)u>~!Pb=z4e^#Qw$`1As5f65|W-%Tuc=1-2D5=3l zh`a4mWG9hXh;N03G@1-wS4b zelAyRnSTTbtJ_I|(jmF=s>X;aeheMbN$0_ctdP%l-Q!r9F!R&Eqm0_D4l6?bFQ=#4 zv=OIFFvd$Q*yVrRl$!gWM&idt{_F)J&rYi<6vUgi8sl;14i|MyMh zF&x4JC%J4&tr#7pT;5nzC&XyK&4^^9y<6zNh%r(Zl8XEO?asM-NmN+eqoG;S!!K`U zq%jVUmDZS1x!?p|Y;Z59K^Xo5LUOxR2#_cz=|S502{wNnf~*8Ha9=v zI{f_9FYow2-HaiGX482%m?=~z7%LC`om7Le&KnKN_OWOHRW}jWFXvGKNVY%+2&NAw z&+H?i%^7&6n&IQk0DxIu0~}Hg61I$vcZwiO5y(91_Z{7DHN_xHNa{A=nEPVTaYsjG z48dQXr1|+xT0kTj_;*7;e{g@{8QCAWA^?)LVclrDeK}NbE8IC&0X=dhE1aK@yKGA2 zLVht(3KeMN6sUXTE!IFpd*-zafe%b$*60p6fy@6!z`tU6{(-psZv^}ahX3o~@$ZX( zybF&;LLLyuf0aQ1KK)?Da$)#rRSP!`PDt=&35P*dTlDrUn$q|=wEkVR6Q}>Ut$?*e zRz0o?AAc$DHht)~0{T>?|ZM>r1z)HSmv{<>ZVW-lsR8_Ex zHsbXiOp!N88w!*?F``sR&Ai^751X-QP;q-T+MvpwK!Mwq_<(j*;HKik_?;aJ;K7`b z!g~Ah!ooJmXh$hu|4zO@xV;AYxH7t|2)FE7{Pf>cw}@-hK|JwLS2Wuz&+@E=4qxJC z0CKBKRmGv44-0E=0m%O*A~YdGK9HT`YTkO;f7)NsDI0yR{K$dS5cIY!!6g+9VDi#A zDjLIbZlZy{ZlD&FA8xKzR#Wk&^S0gv2l-t^efFp(%838h-nm94VL)-5WZKF{KIUsa zvf*R1!h8f6NVHURN;79oLC8!EbrOkB=;1UqOED{IGkcghAq7nl!ccpPCYm6=Ky8Y+ zrlQLfUuAROcFwlb*FNm)z2EM+=iGDe{r~QDgX>Ypaqj_oXeK&hHm>*#HT!Jz`JlYA zu(J_B9V>_-*$tw!Gs<5B3pNNF1d?g?4A^!&KzHTZmg7z@8K4W<+i1K&Em{%YAxJWJ z9=|D&{#fi;pzDq(5R9cnil=DzBK?u`r#`%vWw9m>HXyGivj}Y}1@@`*d4K0;dN-Vh zKwm~XKQyFfp={lq3LSXOeLG=7pIAyV_{_pV&5q$a5}eqQcbjSNeitA{JB@p}(w}$A ze|vIjhlD%db8Ln^ztw-W%LV-Tq2XsWdFH!Q;Xj#L5YVQ4%18qEO^BfBer~$p9>n-D z%82W|HV*KFpY5cEV0WKUbI;E;(%yZ&I{GEDgVa}BxnyL#;efo1)6?d8giAqLt!?B# zsNOPF=~u^@@1oapCd`(Lq)F`kYF$NKg4Ppj(q}cwp^D18?z+>fd`<=RZXrklkc1BE_oyI`Q&}U9#MS% z^9=^}gD>(SzUwU}@x6=`c}`(8_4j+6?aIr{sjxkw$xt~zvubC&LNGkLDu<7S&alahZH^H6c`=x@eaCMwhr}BE5qzRz+J7s*$to~DAb=;Dn~8MK0`>C zda*yWSpD4dU1l}oG0ToGZbW<0_37m-0#Uy2;xc2-aRFc48I$hUz{@uF_jYl@FZx*h zh=A%*`zY+RQ~EJ9_|5vE0m8u5SuopXFOv)eTZZ|;tPQCf*V?I14_f@v@bT>gdw=7f4<Y7_0RZj4-k&G2zxg1Ku(fb8`x6QAkBKYG z+3&c$3n^z=#}XxFxq>_#Ih?c3Yqu<_ArXpZ?)jYNT|jDrM$D!C~+b~cP2+4WbV zoFHGBuprcl2uPlO(ngG@u|H}3{x@2tl9{E`z;5f#NO(o&X*C;bwW#jkUae{SydB{^ zy@sPY4sU=t-^wqsja{%c=$7{do_kDVyJ6fw+Ep4-sniV+1@FlM*88_2GN@g^6DG9e z)KZM8>d9cok&~2YP40@cD!6tlw5iK|T0bcnjg}@Pd4X+^;@y{Ql?}dwo#Fs@*Xd_@ z_RGbMAGLPV)@{ns~h)d^K#dl?YTTo1Vs^6i#uB&hbrJJXgc_g@fTJ3A+feOY(oQ*7;4 zTYEB37O5R;c(UH!QBZSY(<{}d6XZnb+WCt4R@U50eN6n_#mG^8=lB4cZ_||#rw1l& zChhsgs=P?iahQ`OqDx05%J&HHSLTe5rUm|Ju(aJVv^!0EuK>^7ht}IOek-d5QoGo) z@}w|6rB<=cAqE$F-MZD{f**#dBY9)?s7hC4j6P4)*sm?bU zx2_Kw(FMwC)8|C0S>uS}kp7_phw7ts-3~Pkb@a4ezxG|Q4>*OE$-*)?kpZU zz-#_PJ=DQHTqbjw;S|{i^M1}^%t?~XM#^p7^ z10W=;KO;^}S+(dU%}o{>?vxR*Lvgj?d)@#W+^r*QH(6Jb(>a+Ru3mpbTK;iz=6~P+ N@%zYgVE6lhKLKg&E4=^! diff --git a/PyTorch/build-in/Classification/SKNet/resnet_loss.txt b/PyTorch/build-in/Classification/SKNet/resnet_loss.txt deleted file mode 100644 index 5ba79e51b..000000000 --- a/PyTorch/build-in/Classification/SKNet/resnet_loss.txt +++ /dev/null @@ -1,29 +0,0 @@ -=== CUDA === -6.722200 5.131600 5.915500 5.783700 6.252200 59.203100 20.966800 35.868000 34.632800 46.643600 -35.153800 41.165300 48.390600 28.469700 27.430200 37.822800 32.582000 19.891400 12.522000 12.106000 -16.621300 15.517600 17.301500 14.923800 7.623500 6.523400 5.043700 14.833500 10.851100 6.979500 -7.734900 8.155800 8.726100 8.512200 12.779800 6.853000 5.218800 6.705100 7.197300 4.713900 -6.400400 6.716300 5.617700 5.742200 6.246100 5.667000 4.404100 6.937000 5.252400 4.816400 -4.772900 4.983900 4.612300 5.179200 4.505900 4.680700 4.637700 4.376000 4.464400 4.775400 -4.465300 4.737300 4.744100 4.904800 5.029800 5.647500 4.488800 6.481400 4.438000 4.585000 -4.657200 4.438700 4.719200 4.987800 4.421600 5.077100 4.496800 4.625500 4.993700 4.719200 -4.710400 4.707500 4.793900 4.690900 5.836400 4.506600 4.635700 4.522500 4.782700 4.443800 -4.710900 4.803700 4.661100 4.900900 4.777800 4.624000 4.619100 4.593500 4.877000 4.713900 - -=== SDAA === -6.725100 5.131800 31.123000 42.451500 38.739300 44.960900 44.694100 34.868200 33.409200 34.249800 -31.345700 20.143100 14.624300 28.907700 15.078900 18.812300 13.294900 33.630900 14.099100 18.008300 -11.372100 12.844500 15.206500 7.287600 10.127400 9.049800 6.249500 4.945300 7.943400 5.042500 -5.852500 4.924800 6.449200 9.627900 7.587900 5.962900 4.840300 5.529800 10.456100 5.598600 -5.537600 4.422900 4.964400 5.057100 5.792000 6.228000 5.322800 4.571300 4.973600 4.879900 -4.813000 4.700200 4.487500 5.230500 4.220700 4.823200 4.521000 5.182100 5.922900 4.387200 -4.585900 4.917500 4.928200 4.788600 4.723600 4.774400 4.610400 4.845700 4.514200 4.766100 -5.018600 4.715300 4.779800 4.624300 4.476300 4.760500 4.502700 4.383500 4.783700 4.772500 -4.583000 4.667500 4.724600 4.572800 4.492700 4.439900 4.571000 4.491200 4.741700 4.707000 -4.671900 4.492900 4.674800 4.914600 4.663600 4.661100 4.769000 4.473900 4.906200 4.654300 - -=== RESULT === -MeanRelativeError: 0.11245239121555989 -MeanAbsoluteError: -0.43448099999999995 -Rule,mean_absolute_error -0.43448099999999995 -pass mean_relative_error=0.11245239121555989 <= 0.05 or mean_absolute_error=-0.43448099999999995 <= 0.0002 diff --git a/PyTorch/build-in/Classification/SKNet/run b/PyTorch/build-in/Classification/SKNet/run deleted file mode 100644 index 17276b713..000000000 --- a/PyTorch/build-in/Classification/SKNet/run +++ /dev/null @@ -1 +0,0 @@ -bash ../sdaaTest.sh sknet 2 5 diff --git a/PyTorch/build-in/Classification/SKNet/sknet222.py b/PyTorch/build-in/Classification/SKNet/sknet222.py deleted file mode 100644 index 781bb064b..000000000 --- a/PyTorch/build-in/Classification/SKNet/sknet222.py +++ /dev/null @@ -1,145 +0,0 @@ -import torch -from torch import nn - - -class SKConv(nn.Module): - def __init__(self, channels, branches=2, groups=32, reduce=16, stride=1, len=32): - super(SKConv, self).__init__() - len = max(channels // reduce, len) - self.convs = nn.ModuleList([]) - - for i in range(branches): - self.convs.append(nn.Sequential( - nn.Conv2d(channels, channels, kernel_size=3, stride=stride, padding=1 + i, dilation=1 + i, - groups=groups, bias=False), - nn.BatchNorm2d(channels), - nn.ReLU(inplace=True) - )) - - self.gap = nn.AdaptiveAvgPool2d((1, 1)) - self.fc = nn.Sequential( - nn.Conv2d(channels, len, kernel_size=1, stride=1, bias=False), - nn.BatchNorm2d(len), - nn.ReLU(inplace=True) - ) - self.fcs = nn.ModuleList([]) - for i in range(branches): - self.fcs.append( - nn.Conv2d(len, channels, kernel_size=1, stride=1) - ) - self.softmax = nn.Softmax(dim=1) - - def forward(self, x): - x = [conv(x) for conv in self.convs] - x = torch.stack(x, dim=1) - attention = torch.sum(x, dim=1) - attention = self.gap(attention) - attention = self.fc(attention) - attention = [fc(attention) for fc in self.fcs] - attention = torch.stack(attention, dim=1) - attention = self.softmax(attention) - x = torch.sum(x * attention, dim=1) - return x - - -class SKUnit(nn.Module): - def __init__(self, in_channels, mid_channels, out_channels, branches=2, group=32, reduce=16, stride=1, len=32): - super(SKUnit, self).__init__() - - self.conv1 = nn.Sequential( - nn.Conv2d(in_channels, mid_channels, kernel_size=1, stride=1, bias=False), - nn.BatchNorm2d(mid_channels), - nn.ReLU(inplace=True) - ) - - self.conv2 = SKConv(mid_channels, branches=branches, groups=group, reduce=reduce, stride=stride, len=len) - - self.conv3 = nn.Sequential( - nn.Conv2d(mid_channels, out_channels, kernel_size=1, stride=1, bias=False), - nn.BatchNorm2d(out_channels) - ) - - if in_channels == out_channels: # when dim not change, input_features could be added diectly to out - self.shortcut = nn.Sequential() - else: # when dim not change, input_features should also change dim to be added to out - self.shortcut = nn.Sequential( - nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False), - nn.BatchNorm2d(out_channels) - ) - - self.relu = nn.ReLU(inplace=True) - - def forward(self, x): - residual = x - residual = self.shortcut(residual) - - x = self.conv1(x) - x = self.conv2(x) - x = self.conv3(x) - x += residual - return self.relu(x) - - -class sknet(nn.Module): - def __init__(self, num_classes, num_block_lists=[3, 4, 6, 3]): - super(sknet, self).__init__() - self.basic_conv = nn.Sequential( - nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False), - nn.BatchNorm2d(64), - nn.ReLU(inplace=True), - nn.MaxPool2d(kernel_size=3, stride=2, padding=1) - ) - - self.stage_1 = self._make_layer(64, 128, 256, nums_block=num_block_lists[0], stride=1) - self.stage_2 = self._make_layer(256, 256, 512, nums_block=num_block_lists[1], stride=2) - self.stage_3 = self._make_layer(512, 512, 1024, nums_block=num_block_lists[2], stride=2) - self.stage_4 = self._make_layer(1024, 1024, 2048, nums_block=num_block_lists[3], stride=2) - - self.gap = nn.AdaptiveAvgPool2d(1) - self.classifier = nn.Linear(2048, num_classes) - - for m in self.modules(): - if isinstance(m, (nn.Conv2d, nn.Linear)): - nn.init.kaiming_normal_(m.weight, mode='fan_in') - if m.bias is not None: - nn.init.zeros_(m.bias) - elif isinstance(m, nn.BatchNorm2d): - nn.init.ones_(m.weight) - nn.init.zeros_(m.bias) - - def _make_layer(self, in_channels, mid_channels, out_channels, nums_block, stride=1): - layers = [SKUnit(in_channels, mid_channels, out_channels, stride=stride)] - for _ in range(1, nums_block): - layers.append(SKUnit(out_channels, mid_channels, out_channels)) - return nn.Sequential(*layers) - - def forward(self, x): - x = self.basic_conv(x) - x = self.stage_1(x) - x = self.stage_2(x) - x = self.stage_3(x) - x = self.stage_4(x) - x = self.gap(x) - x = x.view(x.size(0), -1) - - x = self.classifier(x) - return x - - -def SKNet(num_classes=1000, depth=50): - assert depth in [50, 101], 'depth invalid' - key2blocks = { - 50: [3, 4, 6, 3], - 101: [3, 4, 23, 3], - } - model = sknet(num_classes, key2blocks[depth]) - return model - -def Model(num_classes): # welo - r"""Return your custom model - """ - return SKNet(num_classes=num_classes) - - - -# 问题由底层 tecodnn(torch-sdaa 后端)不支持 groups>1 与 dilation>1 同时出现的卷积配置,当模型在 SKConv 某些分支构造 kernel=3, dilation=2, padding=2, groups=32 时,底层返回 TECODNN_STATUS_NOT_SUPPORTED 并抛出错误,导致训练中断。 diff --git a/PyTorch/build-in/Classification/SKNet/sknetdebug.py b/PyTorch/build-in/Classification/SKNet/sknetdebug.py deleted file mode 100644 index 2a0ccdb7d..000000000 --- a/PyTorch/build-in/Classification/SKNet/sknetdebug.py +++ /dev/null @@ -1,161 +0,0 @@ -import torch -from torch import nn - -class DebugConv2d(nn.Conv2d): - def forward(self, x): - try: - out = super().forward(x) - print(f"[Debug][OK] Conv2d: " - f"in={tuple(x.shape)}, out={tuple(out.shape)}, " - f"kernel={self.kernel_size}, stride={self.stride}, " - f"padding={self.padding}, dilation={self.dilation}, " - f"groups={self.groups}") - return out - except Exception as e: - print(f"[Debug][ERROR] Conv2d failed: " - f"in={tuple(x.shape)}, kernel={self.kernel_size}, " - f"stride={self.stride}, padding={self.padding}, " - f"dilation={self.dilation}, groups={self.groups}") - raise e - -# [Debug][ERROR] Conv2d failed: in=(128, 128, 56, 56), kernel=(3, 3), stride=(1, 1), padding=(2, 2), dilation=(2, 2), groups=32 - -# 底层返回 TECODNN_STATUS_NOT_SUPPORTED 并抛出错误,导致训练中断。 - - -class SKConv(nn.Module): - def __init__(self, channels, branches=2, groups=32, reduce=16, stride=1, len=32): - super(SKConv, self).__init__() - len = max(channels // reduce, len) - self.convs = nn.ModuleList([]) - for i in range(branches): - self.convs.append(nn.Sequential( - DebugConv2d(channels, channels, kernel_size=3, stride=stride, padding=1 + i, dilation=1 + i, - groups=groups, bias=False), - nn.BatchNorm2d(channels), - nn.ReLU(inplace=True) - )) - self.gap = nn.AdaptiveAvgPool2d((1, 1)) - self.fc = nn.Sequential( - DebugConv2d(channels, len, kernel_size=1, stride=1, bias=False), - nn.BatchNorm2d(len), - nn.ReLU(inplace=True) - ) - self.fcs = nn.ModuleList([]) - for i in range(branches): - self.fcs.append( - DebugConv2d(len, channels, kernel_size=1, stride=1) - ) - self.softmax = nn.Softmax(dim=1) - - def forward(self, x): - x = [conv(x) for conv in self.convs] - x = torch.stack(x, dim=1) - attention = torch.sum(x, dim=1) - attention = self.gap(attention) - attention = self.fc(attention) - attention = [fc(attention) for fc in self.fcs] - attention = torch.stack(attention, dim=1) - attention = self.softmax(attention) - x = torch.sum(x * attention, dim=1) - return x - - -class SKUnit(nn.Module): - def __init__(self, in_channels, mid_channels, out_channels, branches=2, group=32, reduce=16, stride=1, len=32): - super(SKUnit, self).__init__() - - self.conv1 = nn.Sequential( - DebugConv2d(in_channels, mid_channels, kernel_size=1, stride=1, bias=False), - nn.BatchNorm2d(mid_channels), - nn.ReLU(inplace=True) - ) - - self.conv2 = SKConv(mid_channels, branches=branches, groups=group, reduce=reduce, stride=stride, len=len) - - self.conv3 = nn.Sequential( - DebugConv2d(mid_channels, out_channels, kernel_size=1, stride=1, bias=False), - nn.BatchNorm2d(out_channels) - ) - - if in_channels == out_channels: # when dim not change, input_features could be added diectly to out - self.shortcut = nn.Sequential() - else: # when dim not change, input_features should also change dim to be added to out - self.shortcut = nn.Sequential( - DebugConv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False), - nn.BatchNorm2d(out_channels) - ) - - self.relu = nn.ReLU(inplace=True) - - def forward(self, x): - residual = x - residual = self.shortcut(residual) - - x = self.conv1(x) - x = self.conv2(x) - x = self.conv3(x) - x += residual - return self.relu(x) - - -class sknet(nn.Module): - def __init__(self, num_classes, num_block_lists=[3, 4, 6, 3]): - super(sknet, self).__init__() - self.basic_conv = nn.Sequential( - DebugConv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False), - nn.BatchNorm2d(64), - nn.ReLU(inplace=True), - nn.MaxPool2d(kernel_size=3, stride=2, padding=1) - ) - - self.stage_1 = self._make_layer(64, 128, 256, nums_block=num_block_lists[0], stride=1) - self.stage_2 = self._make_layer(256, 256, 512, nums_block=num_block_lists[1], stride=2) - self.stage_3 = self._make_layer(512, 512, 1024, nums_block=num_block_lists[2], stride=2) - self.stage_4 = self._make_layer(1024, 1024, 2048, nums_block=num_block_lists[3], stride=2) - - self.gap = nn.AdaptiveAvgPool2d(1) - self.classifier = nn.Linear(2048, num_classes) - - for m in self.modules(): - if isinstance(m, (DebugConv2d, nn.Linear)): - nn.init.kaiming_normal_(m.weight, mode='fan_in') - if m.bias is not None: - nn.init.zeros_(m.bias) - elif isinstance(m, nn.BatchNorm2d): - nn.init.ones_(m.weight) - nn.init.zeros_(m.bias) - - def _make_layer(self, in_channels, mid_channels, out_channels, nums_block, stride=1): - layers = [SKUnit(in_channels, mid_channels, out_channels, stride=stride)] - for _ in range(1, nums_block): - layers.append(SKUnit(out_channels, mid_channels, out_channels)) - return nn.Sequential(*layers) - - def forward(self, x): - x = self.basic_conv(x) - x = self.stage_1(x) - x = self.stage_2(x) - x = self.stage_3(x) - x = self.stage_4(x) - x = self.gap(x) - x = x.view(x.size(0), -1) - - x = self.classifier(x) - return x - - -def SKNet(num_classes=1000, depth=50): - assert depth in [50, 101], 'depth invalid' - key2blocks = { - 50: [3, 4, 6, 3], - 101: [3, 4, 23, 3], - } - model = sknet(num_classes, key2blocks[depth]) - return model - -def Model(num_classes): # welo - r"""Return your custom model - """ - return SKNet(num_classes=num_classes) - diff --git a/PyTorch/build-in/Classification/SKNet/weloTrain.py b/PyTorch/build-in/Classification/SKNet/weloTrain.py deleted file mode 100644 index 79fcf2400..000000000 --- a/PyTorch/build-in/Classification/SKNet/weloTrain.py +++ /dev/null @@ -1,567 +0,0 @@ -#!/usr/bin/env python3 -# coding: utf-8 - -import os -import random -import sys -import time -import json -import argparse -from collections import OrderedDict -from pathlib import Path -import numpy as np -import pandas as pd -from tqdm import tqdm -import importlib - -os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" # 强烈推荐在 shell/最顶端设置 -os.environ["PYTHONHASHSEED"] = "12345" -os.environ["OMP_NUM_THREADS"] = "1" -os.environ["MKL_NUM_THREADS"] = "1" - -def ensure_cublas_workspace(config=":4096:8"): - """ - 尝试为 cuBLAS 设置可复现 workspace。强烈建议在主脚本入口处(import torch 之前) - 通过 export 设置该 env。此函数会在运行时设置,但如果 torch 已经被 import, - 则可能为时已晚——函数会打印提醒。 - """ - already = os.environ.get("CUBLAS_WORKSPACE_CONFIG") - if already: - print(f"[seed_utils] CUBLAS_WORKSPACE_CONFIG 已存在:{already}") - else: - os.environ["CUBLAS_WORKSPACE_CONFIG"] = config - print(f"[seed_utils] 已设置 CUBLAS_WORKSPACE_CONFIG={config} (注意:请在 import torch 前设置以保证生效)") - -def set_global_seed(seed: int = 42, set_threads: bool = True): - """ - 统一随机性设置。注意:若希望完全发挥效果,请在主脚本入口(import torch 之前) - 先调用 ensure_cublas_workspace(...) 或在 shell 中 export CUBLAS_WORKSPACE_CONFIG。 - """ - ensure_cublas_workspace() # 会设置 env 并提醒 - os.environ["PYTHONHASHSEED"] = str(seed) - - if set_threads: - os.environ["OMP_NUM_THREADS"] = "1" - os.environ["MKL_NUM_THREADS"] = "1" - - random.seed(seed) - np.random.seed(seed) - - # 现在导入 torch(晚导入以便前面 env 生效) - import torch - torch.manual_seed(seed) - if torch.cuda.is_available(): - torch.cuda.manual_seed(seed) - torch.cuda.manual_seed_all(seed) - # 强制确定性(如果存在不确定性算子,PyTorch 会报错并提示) - try: - torch.use_deterministic_algorithms(True) - except Exception as e: - print("[seed_utils] 设置 deterministic 模式时出错:", e) - print("[seed_utils] 请确认 CUBLAS_WORKSPACE_CONFIG 已在 import torch 之前设置。") - - torch.backends.cudnn.deterministic = True - torch.backends.cudnn.benchmark = False - - if set_threads: - torch.set_num_threads(1) - torch.set_num_interop_threads(1) - - print(f"[seed_utils] 全局 seed 已设置为 {seed}") - -set_global_seed(2025) - -""" -通用训练模版(优先从本地导入 Model -> 支持 DDP / 单卡,AMP,resume,日志,checkpoint) -保存为 train_template_localmodel.py -""" -import torch -import torch.nn as nn -import torch.optim as optim -import torch.backends.cudnn as cudnn -import torchvision.transforms as transforms -import torchvision.datasets as datasets -import torchvision.models as tv_models - -import torch.distributed as dist -from torch.nn.parallel import DistributedDataParallel as DDP -from torch.utils.data import DataLoader -from torch.utils.data.distributed import DistributedSampler - -from torch.sdaa import amp -# from torch.cuda import amp - - -# ---------------------------- -# Helper utilities (self-contained) -# ---------------------------- -class AverageMeter(object): - def __init__(self, name='Meter', fmt=':.4f'): - self.name = name - self.fmt = fmt - self.reset() - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - def update(self, val, n=1): - self.val = val - self.sum += val * n - self.count += n - self.avg = self.sum / max(1, self.count) - def __str__(self): - fmtstr = '{name} {val' + self.fmt + '} (avg {avg' + self.fmt + '})' - return fmtstr.format(name=self.name, val=self.val, avg=self.avg) - -def accuracy(output, target, topk=(1,)): - """Computes the precision@k for the specified values of k - 返回一个 list,每个元素是 tensor(百分比形式) - """ - with torch.no_grad(): - maxk = max(topk) - batch_size = target.size(0) - - # output: (N, C) -> pred: (maxk, N) - _, pred = output.topk(maxk, 1, True, True) - pred = pred.t() # (maxk, N) - correct = pred.eq(target.view(1, -1).expand_as(pred)) # (maxk, N) bool - - res = [] - for k in topk: - # 把前 k 行展平后求和(返回 0-dim tensor),随后换算为百分比 - correct_k = correct[:k].reshape(-1).float().sum() # 注意:不传 keepdim - # 乘以 100.0 / batch_size,保持返回 tensor(和之前代码兼容) - res.append(correct_k.mul_(100.0 / batch_size)) - return res - -def save_checkpoint(state, is_best, save_dir, filename='checkpoint.pth'): - save_path = os.path.join(save_dir, filename) - torch.save(state, save_path) - if is_best: - best_path = os.path.join(save_dir, 'model_best.pth') - torch.save(state, best_path) - -def set_seed(seed, deterministic=False): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - torch.cuda.manual_seed_all(seed) - if deterministic: - cudnn.deterministic = True - cudnn.benchmark = False - else: - cudnn.deterministic = False - cudnn.benchmark = True - -# ---------------------------- -# Argument parser -# ---------------------------- -def parse_args(): - parser = argparse.ArgumentParser(description='Generic PyTorch training template (DDP/AMP) with LocalModel priority') - parser.add_argument('--name', default='run', type=str, help='experiment name (log/checkpoints dir)') - parser.add_argument('--seed', default=42, type=int, help='random seed') - parser.add_argument('--arch', default='None', type=str, help='model name') - parser.add_argument('--deterministic', action='store_true', help='set cudnn deterministic (may be slower)') - parser.add_argument('--dataset', default='cifar10', choices=['cifar10','cifar100','imagenet','custom'], help='which dataset') - parser.add_argument('--datapath', default='./data', type=str, help='dataset root / imagenet root / custom root') - parser.add_argument('--imagenet_dir', default='./imagenet', type=str, help='if dataset=imagenet, path to imagenet root') - parser.add_argument('--custom_eval_dir', default=None, help='if dataset=custom, provide val dir') - parser.add_argument('--num_workers', default=4, type=int, help='dataloader workers per process') - parser.add_argument('--epochs', default=200, type=int) - parser.add_argument('--batch_size', default=128, type=int) - parser.add_argument('--model_name', default='resnet18', help='torchvision model name or python path e.g. mypkg.mymodule.Model (used if no local Model)') - parser.add_argument('--num_classes', default=None, type=int, help='override num classes (auto-detect for common sets)') - parser.add_argument('--pretrained', action='store_true', help='use torchvision pretrained weights when available') - parser.add_argument('--optimizer', default='sgd', choices=['sgd','adam','adamw'], help='optimizer') - parser.add_argument('--lr', '--learning_rate', default=0.1, type=float) - parser.add_argument('--momentum', default=0.9, type=float) - parser.add_argument('--weight_decay', default=5e-4, type=float) - parser.add_argument('--nesterov', action='store_true') - parser.add_argument('--scheduler', default='multistep', choices=['multistep','step','cosine','none'], help='lr scheduler') - parser.add_argument('--milestones', default='100,150', type=str, help='milestones for multistep (comma sep)') - parser.add_argument('--step_size', default=30, type=int, help='step size for StepLR or cosine max epochs') - parser.add_argument('--gamma', default=0.1, type=float) - parser.add_argument('--scheduler_step_per_batch', action='store_true', help='call scheduler.step() per batch (for some schedulers)') - parser.add_argument('--resume', default='', type=str, help='path to checkpoint to resume from') - parser.add_argument('--start_epoch', default=0, type=int) - parser.add_argument('--print_freq', default=100, type=int) - parser.add_argument('--save_freq', default=10, type=int, help='save checkpoint every N epochs (rank0 only)') - parser.add_argument('--amp', action='store_true', default = True,help='use automatic mixed precision (AMP)') - parser.add_argument('--grad_accum_steps', default=1, type=int, help='gradient accumulation steps') - parser.add_argument('--local_rank', default=None, type=int, help='local rank passed by torchrun (if any). Use -1 or None for non-distributed') - parser.add_argument('--cutmix_prob', default=0.0, type=float) - parser.add_argument('--beta', default=1.0, type=float) - parser.add_argument('--seed_sampler', default=False, action='store_true', help='set sampler epoch seeds to make deterministic distributed shuffling') - args = parser.parse_args() - args.milestones = [int(x) for x in args.milestones.split(',')] if args.milestones else [] - return args - -# ---------------------------- -# build model (优先 LocalModel) -# ---------------------------- -def build_model_with_local_priority(args, device=None): - """ - 用参数 args.arch 作为模块名导入 Model() - 如果模块不存在或没有 Model 类,则报错停止。 - """ - try: - # 动态导入模块,比如 args.arch = "rexnet" - mod = importlib.import_module(args.arch) - Model = getattr(mod, "Model") # 从模块中获取 Model 类 - except Exception as e: - raise RuntimeError( - f"无法导入模型模块 '{args.arch}' 或未找到类 Model。" - f"\n错误信息:{e}" - ) - - # 解析数据集类别数 - if args.dataset == 'cifar10': - num_classes = 10 - elif args.dataset == 'cifar100': - num_classes = 100 - else: - print(f"[ERROR] 不支持的数据集类型:{args.dataset},无法确定类别数。程序终止。") - sys.exit(1) - - - # 实例化 - try: - model = Model(num_classes) - except Exception as e: - raise RuntimeError( - f"Model() 实例化失败,请检查模型构造函数。\n错误信息:{e}" - ) - - return model - -# ---------------------------- -# Data loader factory -# ---------------------------- -def build_dataloaders(args, rank, world_size): - if args.dataset == 'cifar10' or args.dataset == 'cifar100': - # mean = (0.4914, 0.4822, 0.4465) - # std = (0.2470, 0.2435, 0.2616) if args.dataset == 'cifar10' else (0.2023, 0.1994, 0.2010) - # train_transform = transforms.Compose([ - # transforms.RandomCrop(32, padding=4), - # transforms.RandomHorizontalFlip(), - # transforms.ToTensor(), - # transforms.Normalize(mean, std), - # ]) - # test_transform = transforms.Compose([ - # transforms.ToTensor(), - # transforms.Normalize(mean, std), - # ]) - - # 使用 ImageNet 风格的 224 输入 - mean = (0.485, 0.456, 0.406) - std = (0.229, 0.224, 0.225) - train_transform = transforms.Compose([ - transforms.RandomResizedCrop(224), # 随机裁剪并缩放到 224x224 - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - transforms.Normalize(mean, std), - ]) - test_transform = transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize(mean, std), - ]) - - root = args.datapath - if args.dataset == 'cifar10': - train_set = datasets.CIFAR10(root=root, train=True, download=False, transform=train_transform) - val_set = datasets.CIFAR10(root=root, train=False, download=False, transform=test_transform) - num_classes = 10 - else: - train_set = datasets.CIFAR100(root=root, train=True, download=False, transform=train_transform) - val_set = datasets.CIFAR100(root=root, train=False, download=False, transform=test_transform) - num_classes = 100 - - elif args.dataset == 'imagenet': - train_dir = os.path.join(args.imagenet_dir, 'train') - val_dir = os.path.join(args.imagenet_dir, 'val') - train_transform = transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - transforms.Normalize((0.485,0.456,0.406), (0.229,0.224,0.225)), - ]) - test_transform = transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize((0.485,0.456,0.406), (0.229,0.224,0.225)), - ]) - train_set = datasets.ImageFolder(train_dir, train_transform) - val_set = datasets.ImageFolder(val_dir, test_transform) - num_classes = args.num_classes or 1000 - - elif args.dataset == 'custom': - train_dir = os.path.join(args.datapath, 'train') - val_dir = args.custom_eval_dir or os.path.join(args.datapath, 'val') - train_transform = transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - ]) - test_transform = transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - ]) - train_set = datasets.ImageFolder(train_dir, train_transform) - val_set = datasets.ImageFolder(val_dir, test_transform) - num_classes = len(train_set.classes) - else: - raise ValueError("Unknown dataset") - - if dist.is_initialized() and world_size > 1: - train_sampler = DistributedSampler(train_set, num_replicas=world_size, rank=rank, shuffle=True) - else: - train_sampler = None - - train_loader = DataLoader(train_set, - batch_size=args.batch_size, - shuffle=(train_sampler is None), - num_workers=args.num_workers, - pin_memory=True, - sampler=train_sampler, - drop_last=False) - val_loader = DataLoader(val_set, - batch_size=args.batch_size, - shuffle=False, - num_workers=args.num_workers, - pin_memory=True) - - return train_loader, val_loader, num_classes, train_sampler - -# ---------------------------- -# Train & validate -# ---------------------------- -def train_one_epoch(args, epoch, model, criterion, optimizer, train_loader, device, scaler, scheduler=None, train_sampler=None): - batch_time = AverageMeter('Time') - data_time = AverageMeter('Data') - losses = AverageMeter('Loss') - top1 = AverageMeter('Acc@1') - top5 = AverageMeter('Acc@5') - - model.train() - end = time.time() - optimizer.zero_grad() - - iters = len(train_loader) - for i, (images, targets) in enumerate(train_loader): - data_time.update(time.time() - end) - images = images.to(device, non_blocking=True) - targets = targets.to(device, non_blocking=True) - - if args.amp: - with amp.autocast(): - outputs = model(images) - loss = criterion(outputs, targets) / args.grad_accum_steps - else: - outputs = model(images) - loss = criterion(outputs, targets) / args.grad_accum_steps - - if args.amp: - scaler.scale(loss).backward() - else: - loss.backward() - - if (i + 1) % args.grad_accum_steps == 0: - if args.amp: - scaler.step(optimizer) - scaler.update() - else: - optimizer.step() - optimizer.zero_grad() - if scheduler is not None and args.scheduler_step_per_batch: - scheduler.step() - - with torch.no_grad(): - acc1, acc5 = accuracy(outputs, targets, topk=(1,5)) - losses.update(loss.item() * args.grad_accum_steps, images.size(0)) - top1.update(acc1.item(), images.size(0)) - top5.update(acc5.item(), images.size(0)) - - batch_time.update(time.time() - end) - end = time.time() - - # if (i % args.print_freq == 0) and ((dist.get_rank() if dist.is_initialized() else 0) == 0): - # lr = optimizer.param_groups[0]['lr'] - # print(f"Epoch[{epoch}] Iter[{i}/{iters}] lr={lr:.6f} loss={losses.val:.4f} avg={losses.avg:.4f} " - # f"top1={top1.val:.2f} avg={top1.avg:.2f} time={batch_time.val:.3f}s") - - if scheduler is not None and not args.scheduler_step_per_batch: - scheduler.step() - - return OrderedDict([('loss', losses.avg), ('acc1', top1.avg), ('acc5', top5.avg)]) - -def validate(args, model, val_loader, criterion, device): - losses = AverageMeter('Loss') - top1 = AverageMeter('Acc@1') - top5 = AverageMeter('Acc@5') - - model.eval() - with torch.no_grad(): - for i, (images, targets) in enumerate(tqdm(val_loader)): - images = images.to(device, non_blocking=True) - targets = targets.to(device, non_blocking=True) - outputs = model(images) - loss = criterion(outputs, targets) - acc1, acc5 = accuracy(outputs, targets, topk=(1,5)) - losses.update(loss.item(), images.size(0)) - top1.update(acc1.item(), images.size(0)) - top5.update(acc5.item(), images.size(0)) - return OrderedDict([('loss', losses.avg), ('acc1', top1.avg), ('acc5', top5.avg)]) - -# ---------------------------- -# Main -# ---------------------------- -def main(): - args = parse_args() - - # handle local_rank from env if not provided - local_rank_env = os.environ.get('LOCAL_RANK', None) - if args.local_rank is None and local_rank_env is not None: - args.local_rank = int(local_rank_env) - - distributed = (args.local_rank is not None and args.local_rank != -1) - if distributed: - dist.init_process_group(backend='nccl', init_method='env://') - rank = dist.get_rank() - world_size = dist.get_world_size() - else: - rank = 0 - world_size = 1 - - if distributed: - torch.cuda.set_device(args.local_rank) - device = torch.device('cuda', args.local_rank) - else: - device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - - set_seed(args.seed + (rank if distributed else 0), deterministic=args.deterministic) - - save_dir = os.path.join('models', args.name) - if rank == 0: - os.makedirs(save_dir, exist_ok=True) - with open(os.path.join(save_dir, 'args.json'), 'w') as f: - json.dump(vars(args), f, indent=2) - if distributed: - dist.barrier() - - train_loader, val_loader, auto_num_classes, train_sampler = build_dataloaders(args, rank, world_size) - if args.num_classes is None: - args.num_classes = auto_num_classes - - # 使用本地 Model 优先(LocalModel 已在文件顶部尝试导入) - model = build_model_with_local_priority(args, device) - model.to(device) - - if distributed: - model = DDP(model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True) - - criterion = nn.CrossEntropyLoss().to(device) - params = [p for p in model.parameters() if p.requires_grad] - if args.optimizer == 'sgd': - optimizer = optim.SGD(params, lr=args.lr, momentum=args.momentum, - weight_decay=args.weight_decay, nesterov=args.nesterov) - elif args.optimizer == 'adam': - optimizer = optim.Adam(params, lr=args.lr, weight_decay=args.weight_decay) - elif args.optimizer == 'adamw': - optimizer = optim.AdamW(params, lr=args.lr, weight_decay=args.weight_decay) - else: - raise ValueError('Unknown optimizer') - - scheduler = None - if args.scheduler == 'multistep': - scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.milestones, gamma=args.gamma) - elif args.scheduler == 'step': - scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=args.gamma) - elif args.scheduler == 'cosine': - scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs) - elif args.scheduler == 'none': - scheduler = None - - scaler = amp.GradScaler() if args.amp else None - - start_epoch = args.start_epoch - best_acc = 0.0 - if args.resume: - if os.path.isfile(args.resume): - ckpt = torch.load(args.resume, map_location='cpu') - model_state = ckpt.get('state_dict', ckpt) - if isinstance(model, DDP): - model.module.load_state_dict(model_state) - else: - model.load_state_dict(model_state) - if 'optimizer' in ckpt: - optimizer.load_state_dict(ckpt['optimizer']) - start_epoch = ckpt.get('epoch', start_epoch) - best_acc = ckpt.get('best_acc', best_acc) - print(f"=> resumed from {args.resume}, start_epoch={start_epoch}") - else: - print(f"=> resume path {args.resume} not found") - - log_columns = ['epoch', 'lr', 'loss', 'acc1', 'acc5', 'val_loss', 'val_acc1', 'val_acc5'] - log_df = pd.DataFrame(columns=log_columns) - - total_epochs = args.epochs - for epoch in range(start_epoch, total_epochs): - if train_sampler is not None: - if args.seed_sampler: - train_sampler.set_epoch(epoch + args.seed) - else: - train_sampler.set_epoch(epoch) - - if rank == 0: - print(f"==== Epoch {epoch}/{total_epochs - 1} ====") - - train_log = train_one_epoch(args, epoch, model, criterion, optimizer, train_loader, device, scaler, scheduler, train_sampler) - val_log = validate(args, model, val_loader, criterion, device) - current_lr = optimizer.param_groups[0]['lr'] - - if rank == 0: - print("Epoch[{}]: train_loss {:.4f} acc1 {:.2f} acc5 {:.2f} | val_loss {:.4f} acc1 {:.2f} acc5 {:.2f} lr {:.6f}".format( - epoch, train_log['loss'], train_log['acc1'], train_log['acc5'], - val_log['loss'], val_log['acc1'], val_log['acc5'], current_lr - )) - row = { - 'epoch': epoch, - 'lr': current_lr, - 'loss': train_log['loss'], - 'acc1': train_log['acc1'], - 'acc5': train_log['acc5'], - 'val_loss': val_log['loss'], - 'val_acc1': val_log['acc1'], - 'val_acc5': val_log['acc5'], - } - # log_df = log_df.append(row, ignore_index=True) - new_row_df = pd.DataFrame([row]) - log_df = pd.concat([log_df, new_row_df], ignore_index=True) - log_df.to_csv(os.path.join(save_dir, 'log.csv'), index=False) - - is_best = val_log['acc1'] > best_acc - if is_best: - best_acc = val_log['acc1'] - if (epoch % args.save_freq == 0) or is_best or (epoch == total_epochs - 1): - state = { - 'epoch': epoch, - 'state_dict': model.module.state_dict() if isinstance(model, DDP) else model.state_dict(), - 'best_acc': best_acc, - 'optimizer': optimizer.state_dict(), - 'args': vars(args) - } - save_checkpoint(state, is_best, save_dir, filename=f'checkpoint_epoch_{epoch}.pth') - - if dist.is_initialized(): - dist.barrier() - if rank == 0: - print("Training finished. Best val acc1: {:.2f}".format(best_acc)) - -if __name__ == '__main__': - # set_global_seed(2025) # 任意数字都行 - main() \ No newline at end of file