From f9a2599b4f34ef16dd571938efe9496a89f378ac Mon Sep 17 00:00:00 2001 From: Enes Hoxha Date: Tue, 7 Jan 2025 12:38:22 +0100 Subject: [PATCH 01/12] v1/feature/83 Add support for SQLite --- CODE_OF_CONDUCT.md | 15 ++ Cortex.sln | 6 + src/Cortex.Mediator/Assets/license.md | 2 +- .../Assets/license.md | 2 +- src/Cortex.States.SQLite/Assets/cortex.png | Bin 0 -> 7179 bytes src/Cortex.States.SQLite/Assets/license.md | 20 ++ .../Cortex.States.SQLite.csproj | 55 ++++ .../SqliteKeyValueStateStore.cs | 238 ++++++++++++++++++ 8 files changed, 336 insertions(+), 2 deletions(-) create mode 100644 CODE_OF_CONDUCT.md create mode 100644 src/Cortex.States.SQLite/Assets/cortex.png create mode 100644 src/Cortex.States.SQLite/Assets/license.md create mode 100644 src/Cortex.States.SQLite/Cortex.States.SQLite.csproj create mode 100644 src/Cortex.States.SQLite/SqliteKeyValueStateStore.cs diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..5531f68 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,15 @@ +# Cortex Code of Conduct + +**All creatures are welcome**: We aim to create a safe space for all community members, regardless of their age, race, gender, sexual orientation, physical appearance or disability, choice of text editor, or any other qualities by which living beings can be discriminated. + +**Be excellent to each other**: We do not tolerate verbal or physical harassment, violence or intimidation. + +We do not tolerate life forms who refuse to share this openness and respect towards others: Creatures that are not excellent to others are not welcome. + +We continuously strive to make our community a better place for everyone – in the best tradition of hackers we "build, test, improve, reiterate". In this ongoing adventure, we rely on the support, courage, and creativity of all members of the Cortex community. + +If you are made uncomfortable in your role as Cortex community member, please let us know: You can reach us at cortex@buildersoft.io. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. + +#### Attribution + +This Code of Conduct is adapted from the [CCC event CoC](https://www.ccc.de/en/updates/2016/a-reminder-to-be-excellent-to-each-other) \ No newline at end of file diff --git a/Cortex.sln b/Cortex.sln index a87ef46..a8ca664 100644 --- a/Cortex.sln +++ b/Cortex.sln @@ -45,6 +45,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cortex.States.ClickHouse", EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cortex.Streams.Http", "src\Cortex.Streams.Http\Cortex.Streams.Http.csproj", "{20BD7107-8199-4CA8-815B-4D156B522B82}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cortex.States.SQLite", "src\Cortex.States.SQLite\Cortex.States.SQLite.csproj", "{19167D25-6383-46B4-9449-B9E364F809FF}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -134,6 +136,10 @@ Global {20BD7107-8199-4CA8-815B-4D156B522B82}.Debug|Any CPU.Build.0 = Debug|Any CPU {20BD7107-8199-4CA8-815B-4D156B522B82}.Release|Any CPU.ActiveCfg = Release|Any CPU {20BD7107-8199-4CA8-815B-4D156B522B82}.Release|Any CPU.Build.0 = Release|Any CPU + {19167D25-6383-46B4-9449-B9E364F809FF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {19167D25-6383-46B4-9449-B9E364F809FF}.Debug|Any CPU.Build.0 = Debug|Any CPU + {19167D25-6383-46B4-9449-B9E364F809FF}.Release|Any CPU.ActiveCfg = Release|Any CPU + {19167D25-6383-46B4-9449-B9E364F809FF}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/src/Cortex.Mediator/Assets/license.md b/src/Cortex.Mediator/Assets/license.md index 530f621..3c845d4 100644 --- a/src/Cortex.Mediator/Assets/license.md +++ b/src/Cortex.Mediator/Assets/license.md @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2024 Buildersoft +Copyright (c) 2025 Buildersoft Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/src/Cortex.States.MSSqlServer/Assets/license.md b/src/Cortex.States.MSSqlServer/Assets/license.md index 530f621..3c845d4 100644 --- a/src/Cortex.States.MSSqlServer/Assets/license.md +++ b/src/Cortex.States.MSSqlServer/Assets/license.md @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2024 Buildersoft +Copyright (c) 2025 Buildersoft Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/src/Cortex.States.SQLite/Assets/cortex.png b/src/Cortex.States.SQLite/Assets/cortex.png new file mode 100644 index 0000000000000000000000000000000000000000..a4f9727d04f91f61f20720bd64b9c139fb041236 GIT binary patch literal 7179 zcmV+m9Q5OfP)Px#1ZP1_K>z@;j|==^1poj532;bRa{vGf5&!@T5&_cPe*6Fc8=y%C^C!)jU*}Z_TUZ5zkPS!zgzWpiKoUY0LP$tR!Xokt%Ag{H3XX^@B8wnv03972 z_hwv1kf6A*-1mH+Tg$Cm_f}O`27H#NQDPYKIv zcTCHy`fO@u)s1PH)sbnLH4!`}kZK|c%WX|xUTdf>GJS=u8BkWepa#-RXr-xDk(t@H zW@WFA%*qkfWGlBRGCQ}KE6>y#u7Wj@IeBX$a|&84%`IF@nqxf9Eo!w@+!~oz+$N|! zGOwgPGT%~1WPT~v)1{qUWzc%A^7WAg4s}HqRBUi)VHMOZs3)?px+juI+89Z!G1VJc zB&xQLlmylF3mS+d)%Qn|>-%q{Gz@%}+OT;?VqO2Az$`8yqwdcMnN^=m&#Gn9GOO9N zWmSgoa}7%%RkMWUH7sFyEou6S8a90e9_zTW>Pa(9t!!{;=E_xE+0bgztQ@G(RxZ@U zX6H7t+4KlCv)TDgY<6BVo1M3Y&B-^_!sZmT(Bo4DE$penwd|>awfwQDmCY?QRMf`i z7PlFX?QCvIJDW%9VDpMQ_+#lhHowFWelF`|^GiF~)8*^A%D5JkcX3sij~m#6$_@0m zu(F#itm@{El|3x6Y9mXm>S2l1c-+Vmt9w~uO)pzi-OCo$^sz;?rZ%x8Q+55MWT<|? zR>L4ErD2dQZhVHNG;H}~QR9|B$4$wo*gJiBEt{HI$)=I2xEuhhIXqJqL=&J0vCala z5N(h%O%XsY0Gvq>4G}=VGamuY)|~uiL2C?vYdAnt09d$|;L#&sk_?c=ZERj~yDb1L z>0t9s0idC$%Q^*_5G9Z#17M~3$VC820LaAwn(8AZK>|nur~sJ;c=dyOfYHG-0D8c)!3BgD z0ANym|IN6m>E(B(W>gS3OJ2?cFb6y?Pk02*2B>qM3!dyehx-I@f^^Pv!7~6xXt|F=bO5v<25_GNQSUxC#H8AO4sJ?%xdNQ*J~@1Hcqd#$aK3;6*i@;(*!qp zN$K#ErDYCqdVq%egycmAs0*E1fQrN=5J1U&?wU^l&=op;GEa^BbZsAjqqfh>eP;6F z)Ao6|Z&6L30M3*Oo+ETR-KT=*2%XxtPvO3(3wF_Ye(o~>8t!w1PapS14NvJlvF#Jx zr)&H4@D^1Y;3h9EV~`Hsl#Ft1yQhN}IO}{ANsFp%P!{Yu=K-8E>wGlM^MYqOFId^& z2%iLQaft=a3!W})l0KJV3MhJsTZ0AEbxpuK9u{Elz-8;ROww zR?TOCTT;p_`2Zw{iqPrGJT3QmCoihd8BOM?bf27cD&6OV=tx|mt@-5SMRuQ8=yZio zXXvycikWBcaNHyroC=-<(Ct1seEPUg8#$OY0`0p9-Gr zJ{g`@!7jLOL4{YlXVrYeag)-D-QWax7C_l`CNRIYFQbZ{KY1z6$Kr!#EgZ*Wg$`|~Vo!z(PSdoO8a_bY<-k(PJvp>_%Gv>m7eXoO8y z=v38xA~-=V_sQW?)$U2~rleO;=quWCh;^O(o^87Do2U9N{K`5{e9LlrUu6j^#7=7= z^K^|iM3;Gn?{VS6|5NG*Skr-zS?{?!Y}3WxJ=J&q7q<4T&slowP6s>-T#)3&a-QwJ zg;hNOSL6bxgy$1FEqErlrDbgSx;<>mwFhkL2aIjG<}W;N@XCF*V*N{$o67aDm_{0CMM*lVjE#{Dj{8=BvSl58QYDE=yg#mCogYr)>Me!L!;uW!7o8S@;_g zl7_uQY-^OC2f{JS*1bsWqcU+(hEBQd6W{?{d}`5fJiw)yk^ly7`)qjlJuY(RJKqrq z`GfcguyO3dif(EzUGOZ31~>troplQEFvkURT|{^Q=UekNvpM)VZw)KievEA%dfrbnvSA!cLKpGf(cmS-Gnz;po5gd(_bb!i_sP^nlvAr;IF94NvVp2S5{E zIe{x2j!!EhSb%2%93eco6oY=f@81m-UN#$TMCJ2m2ny;!--6vrS|s-=;JMtVsQK{x z#ZBAU#4ZIzS;aOu?=L=xFyyPUm3?Au_nm}G zSbMn7S+E-pEG2LS!||zwHaG#E*!EfQ6af9;A!OnMw7>mzcz7Q$)_Ul3`k>r-s{YQf z&a>g+c?iv$uRaJBBI%f+2drT5@6;XkD29qGXyza-z)iHk1%yWcqj%nyCZ5l%AP$=9nz{{A|*f(qAK$Qw5&SJ zo?;tL{umCL@!pybd`vwo2)C-yhLO6Dw|gDjaV9t?JOP~4_C;OuneH?GpWBB}^e|le zag}$AW%vAz`a|7QB%*g8o`;LG_RW8Vt~&u6od!uYy_Dq0U1st~gFVI<0$vHgCFeW9 zY2hhCr&r?Qf@c7ngiJHNoWjXST(|Mu9k#gnSvqH6=P3$yBu=={wJ%+went7Uf(X!h z-~WXzZrVoeq^hse9xCz}qzwws23No)rt09S0S1Am1V|yXgw4pRV@44sR;1jC7Xs)LiHjQyTH$)pCo|DzJmknGB*FnHD z$os71*<)0+QxUHkDiUUvsUTYIo+5d105l-ZFV=!{!t(V?P4YG}AheuQ< z)a-emPmxfTfpUQ9H^!gPggt-s-G2;kgHCgTT|RX;9RB!N9ozB9ZX| zC?+lh(FY)Yh70OmycjxJ!EqZ-{m7DPHj(pOlXI8VSTHMgo+MZULl4-%l?OEJ5)_`9 z(Pp&nqIS~RQSCoP!Z}bY&#D&NJyVLBFFJ5unWq3x0WgUB;?oN0L#}=Ev(RZd;I$w5 znl1!QRFdFfPE1bke%62S4+_m(COm)VAtDT1xySPR-{571QM}KRM;lZ_MXEts6QFA; zw9b8|16!j67Zjc@boz9c;XFw15r)DzZxBWORmK|j4NgF5PGS zm+!Fw$N|sWdE)pWr0VN*zzZ}*!U@j?XipDx^|lm-0Y}oo696jUd1alV^A}LqXi93!NSS@!Xt^``O^oeP4KkLkM3S8lMvPE8BGbE_G&O zSOp#2xInA-f57@K{6;+-NMz8F40r&k255&)Kx;ev74^(xYZ0-#OziKJ0jff0xXe@P zaVZ!nI7}*&F!yQT0hY^wYabyvt@HHoT#1VeFSU>ga2wjm_3!?`+K+t0I!}Dhx=#K`zsKJ#c;`7hI8_PnDx<%n&g8Sr z!jJQb?hEfc?!GyX3>@hZz>@&tSiDhOd}r?l!k8HzeONU`<1)T0q+4D7<$MGH@|7DY0!)`sNf0U{Nc%2ryo3k zYk8F5MCl*~1Sr76`LGIe<9oNQf*FoLRtTr`T28yoHk`i6)*H~)9sibok2Z4f@_qjM zf^u?^wsN8?5{EPmb(x)1}`npzBZk zK+o?!^Aiu9M%Xmr;kebWen8VL(9`1XsMZV>1%xNpe5UigK7Hn?bf27c3htXV*5HC9E-H8v zQVLjNO&@DL^o0>3BNXanKw4qK>g9W^^Tc=LJcQTM9q-Uo1{xL`!*%Sgf?ZrmYrvb8 z+cXy7py=U=p_6`}TEx;@o?~r?zoa&5vz(0h=^8Jt0lwzN^VD`=`IvGYy9CcYNNZ1# za5+x{FL2wZSWPsB;6&*lx&d+-0FD2b?l|twAOi|v6&dNmiVZZ|T3k`<4bVGuqCYdJ z`@8_k?z7syoW@uNr-o;`58)D7R_oy}Jo_{KgdP}_8OU5QZBX8Xz_ovi1TR?ld~^kS z41qHh0GVslES5%vl{{;4J9? z7P1M+d2D6RJ~ntY+3(wPe{{`Su0WFVqlEuC$BTYv9L^HWA zV=cfwTA*TX1!4iyWSsz~+Unn3^U3i1O9l&`0dU5D1Govv`7A!Igw^i37<#IBw8HCP z?j*u8k`;R!kP)~~129qxa52`At{d)Zax&K5Asn8Q`3=YVpf$Fp2mId zn$OF9qVs~)e6bBKxq#X=EbZ;T9BwrYg!V9OB7$1h9~cut4myz&XdXp z^=9L-3+~B9IkX3H&zYN{lM#fxhJ9CQur&;L90Tapf5mE`YBpKtJeQ=5Q-bFRn-)NQ z$w1%)fYVpj#U{8XlXBUtye8IuG!i-)!EsHmen=MzdtuU+P9foKzIvbK558e7ht_UB zr)c*`8h~Ll&*@n)2o7x;QgQ6#720W7t$Hj%+YBPyqKm9x;Em8B6YwzKWogTE)WI!p zf2D`#?>?6U1GsN`R())NL$3&W{mY*_75XCcjPPfcTt7(R7TshxHzwFt?LHU!W-wRj zV4J9ksYP0NYJfI8ch;$cXE@JFUL4MgA#lhblk-?My0eDvg$ipBdi$}Z?h}i09o=O( z|BS2#w&smbL&L*Ofl+hJCPsg$1y2TOZg;E*oBUf+)HRyMd|DWY2|=M9{Q(l5YZS8LvDQJ_zf6|OeXj5+*L)g) zCd7mlF#-xlnb`aj=d?B?5Z$K>of`KAtogTb+Y7WWR~Tq?%(aKKqYuKZ ze15HjaXz$n*kl{i`@+JbLjT}DDNG|2b8rFSxfEcKmbUw-9vg5^BxchW*!8~8SJTKB z!%sM7>-C4MY{v--d3G`qPy)Os5_4#lSI?QBLc_yx)w|Ep#mD;*UFQqWbBctVbsBBo zSb)P_j&;qLfgHAdL3@bRGzc3wpZprZh4v0*LLSZ0#H69H?+M2h48BP-(5#TDa-XW* zbHTISr)>K&s~#^nTx3`si{QiV--Qc=wE~#Y>&zfS%1BnQlq;8Zh7C6Xi4VZuW0~!{ z>AVhjDhEpLQ#(-nT2sM&j}sj3YV`9}>^e0fXjmVBFOqmN(H|Nw#QB(>{5M$u7oS>8 zAunvmL^*?nBiObd=hVT|g-#2e!g-FwCF3!IgWGcZUgLLjm^<3oUJzfrFoD7&ptxVh z5ruBAj_xl+kEh!p>HlED{XIoMcrbKOj8F>0Z`>UN7C}Rz#)TNx@HGm z_%O)`2YsK~5c2}duOSOJ3cmJiaOe-#eDG81;|tOr;yD=g#Kkc z-20a~LZ`0g)3$vTqYn;uI(7@fnzXP*HZDZ%ix=qw2MK$~TCs6=&zakFEL6GcG_4N^ zbo`=R+A1F#0*vH6m+w4j?BnU&ghvT48uz7-GB`lPLWS11z6c$9Z~?HKDj~Bf&_$=( zET@rr@y%^EXe9gELzdI`nsJU`j>R`n4PJQ=DzxJmEJnd=B4nB?YhCbE;Zp@qRC@X7 zf#cm-6|DZ{%OeUoSgVcpg12k#*Kxe2(Hnv-4P~d8S>KX1&jw0t%=2Musm)mvHl=WbI^evqCKK8=qryV7NafQRsjtQ3W}d=6Ro47iAO zFNgnDF1Wt!@Yif+b|VeIn(&kWgSv0jz$NF?8^bPkVLmXqex(#`AKi z`wK~oR|13!+4|O3Y<6z5f2L`M`|MS0?V-=ZM~>@^zAKe?Zaa5SQUdgWH)XWJVdJwf zj>8AVG;r=*A85F^iP^@Qf?#88+x~%X47p5=KGq}1=aYr37~ilFcDxYBEUN9JbBX)6 zXu+w%rwDFo*(iZSm`TZ}FRG39F2(cFV~;m))?!|QBe11=*U29OK{JKtqW$9M3HCbp zE;;P663xws&x7qe(9=UniemR9EkN0MCcsezXAF=wzV;!#dB{x7l`@{weDD*RfEpx& zgd4GL41=)Ge55$1hY&IW0N)s54l(%&pNW5a<8SPlf4I*n3(whf;TM+C{=BfNR@&-c z0Z|9ghFCg!;2>Ol%u>ScbCu7Zrf>Jq2|KT@=z5vWSWy=U8l68mk2WQ(*>m1?zChZ2 zj=uX8#d(mB%p*V{jck-`dz)45R-Ds1_gPl5?Fh{Z11xkL$U8}dcHfdw1_$9T#RcXI z?)Ado@w^}zq$v9GUGeUXR>!P_YGw zSNL?mTU;^@ZuF#srv_-c&kLRfa2(udO9wAN=+wX)2lrT|f+qlUgiSki>flWp2lqIo zb6*sp(}b8-JPz)$OR)`z4xZM1odhs- z99)b@2TuSf!W#z{Lki+N6JToLIJnqS0C?lzVpv-D84y!AxI3-`#}y+|!6R^Y0Pd#y zfN{l+6z)qYycst!Iqv|5(%lD)D<&j`P4t{hOer`JH!(H)FB4OWzCm&4I%r%mq(a)B z1FR;d7W^d+|DH_Bc_Kcw;Pv?A{9DxKAz>L;Ea?u|fYF4My#E2S{{wP}i{H;-Y-|7k N002ovPDHLkV1i-n!nFVZ literal 0 HcmV?d00001 diff --git a/src/Cortex.States.SQLite/Assets/license.md b/src/Cortex.States.SQLite/Assets/license.md new file mode 100644 index 0000000..530f621 --- /dev/null +++ b/src/Cortex.States.SQLite/Assets/license.md @@ -0,0 +1,20 @@ +The MIT License (MIT) + +Copyright (c) 2024 Buildersoft + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/Cortex.States.SQLite/Cortex.States.SQLite.csproj b/src/Cortex.States.SQLite/Cortex.States.SQLite.csproj new file mode 100644 index 0000000..7581fd5 --- /dev/null +++ b/src/Cortex.States.SQLite/Cortex.States.SQLite.csproj @@ -0,0 +1,55 @@ + + + + net9.0;net8.0;net7.0 + + 1.0.1 + 1.0.1 + Buildersoft Cortex Framework + Buildersoft + Buildersoft,EnesHoxha + Copyright © Buildersoft 2025 + + Cortex Data Framework is a robust, extensible platform designed to facilitate real-time data streaming, processing, and state management. It provides developers with a comprehensive suite of tools and libraries to build scalable, high-performance data pipelines tailored to diverse use cases. By abstracting underlying streaming technologies and state management solutions, Cortex Data Framework enables seamless integration, simplified development workflows, and enhanced maintainability for complex data-driven applications. + + + https://github.com/buildersoftio/cortex + cortex vortex mediator eda streaming distributed streams states sqlite + + 1.0.1 + license.md + cortex.png + Cortex.States.SQLite + True + True + True + + Just as the Cortex in our brains handles complex processing efficiently, Cortex Data Framework brings brainpower to your data management! + https://buildersoft.io/ + README.md + + + + + True + \ + + + True + + + + True + + + + + + + + + + + + + diff --git a/src/Cortex.States.SQLite/SqliteKeyValueStateStore.cs b/src/Cortex.States.SQLite/SqliteKeyValueStateStore.cs new file mode 100644 index 0000000..1b75095 --- /dev/null +++ b/src/Cortex.States.SQLite/SqliteKeyValueStateStore.cs @@ -0,0 +1,238 @@ +using Microsoft.Data.Sqlite; +using System; +using System.Collections.Generic; +using System.Text.Json; +using System.Threading; +using System.Threading.Tasks; + +namespace Cortex.States.Sqlite +{ + public class SqliteKeyValueStateStore : IStateStore, IDisposable + { + private readonly string _connectionString; + private readonly string _tableName; + private readonly Func _keySerializer; + private readonly Func _valueSerializer; + private readonly Func _keyDeserializer; + private readonly Func _valueDeserializer; + + private static readonly SemaphoreSlim _initializationLock = new SemaphoreSlim(1, 1); + private volatile bool _isInitialized; + + public string Name { get; } + + /// + /// Initializes a new instance of the SqliteKeyValueStateStore. + /// + /// A friendly name for the store. + /// + /// The connection string to the SQLite database. + /// For example, "Data Source=MyDatabase.db" or an in-memory DB "Data Source=:memory:". + /// + /// The name of the table to use for storing state entries. + /// Optional key serializer. If not provided, JSON serialization is used. + /// Optional value serializer. If not provided, JSON serialization is used. + /// Optional key deserializer. If not provided, JSON deserialization is used. + /// Optional value deserializer. If not provided, JSON deserialization is used. + public SqliteKeyValueStateStore( + string name, + string connectionString, + string tableName, + Func keySerializer = null, + Func valueSerializer = null, + Func keyDeserializer = null, + Func valueDeserializer = null) + { + if (string.IsNullOrWhiteSpace(name)) + throw new ArgumentNullException(nameof(name)); + if (string.IsNullOrWhiteSpace(connectionString)) + throw new ArgumentNullException(nameof(connectionString)); + if (string.IsNullOrWhiteSpace(tableName)) + throw new ArgumentNullException(nameof(tableName)); + + Name = name; + _connectionString = connectionString; + _tableName = tableName; + + // Assign custom or default (JSON-based) serializers/deserializers + _keySerializer = keySerializer ?? (key => JsonSerializer.Serialize(key)); + _valueSerializer = valueSerializer ?? (value => JsonSerializer.Serialize(value)); + _keyDeserializer = keyDeserializer ?? (str => JsonSerializer.Deserialize(str)!); + _valueDeserializer = valueDeserializer ?? (str => JsonSerializer.Deserialize(str)!); + + // Initialize the table + InitializeAsync().GetAwaiter().GetResult(); + } + + private async Task InitializeAsync() + { + if (_isInitialized) return; + + await _initializationLock.WaitAsync().ConfigureAwait(false); + try + { + if (_isInitialized) return; + + using (var connection = new SqliteConnection(_connectionString)) + { + await connection.OpenAsync().ConfigureAwait(false); + + // Create the table if it does not exist + var createTableSql = $@" + CREATE TABLE IF NOT EXISTS [{_tableName}] ( + [key] TEXT NOT NULL PRIMARY KEY, + [value] TEXT + );"; + + using (var cmd = new SqliteCommand(createTableSql, connection)) + { + await cmd.ExecuteNonQueryAsync().ConfigureAwait(false); + } + } + + _isInitialized = true; + } + finally + { + _initializationLock.Release(); + } + } + + private void EnsureInitialized() + { + if (!_isInitialized) + { + throw new InvalidOperationException("SqliteKeyValueStateStore is not properly initialized."); + } + } + + public TValue Get(TKey key) + { + EnsureInitialized(); + + var serializedKey = _keySerializer(key); + + var sql = $@"SELECT [value] FROM [{_tableName}] WHERE [key] = @key;"; + using (var connection = new SqliteConnection(_connectionString)) + using (var cmd = new SqliteCommand(sql, connection)) + { + cmd.Parameters.AddWithValue("@key", (object)serializedKey ?? DBNull.Value); + + connection.Open(); + var result = cmd.ExecuteScalar() as string; + if (result == null) + return default; + return _valueDeserializer(result); + } + } + + public void Put(TKey key, TValue value) + { + EnsureInitialized(); + + var serializedKey = _keySerializer(key); + var serializedValue = _valueSerializer(value); + + // In SQLite, a common "upsert" pattern is using INSERT OR REPLACE (or INSERT ON CONFLICT). + var sql = $@" + INSERT OR REPLACE INTO [{_tableName}] ([key], [value]) + VALUES (@key, @value);"; + + using (var connection = new SqliteConnection(_connectionString)) + using (var cmd = new SqliteCommand(sql, connection)) + { + cmd.Parameters.AddWithValue("@key", (object)serializedKey ?? DBNull.Value); + cmd.Parameters.AddWithValue("@value", (object)serializedValue ?? DBNull.Value); + + connection.Open(); + cmd.ExecuteNonQuery(); + } + } + + public bool ContainsKey(TKey key) + { + EnsureInitialized(); + + var serializedKey = _keySerializer(key); + + var sql = $@"SELECT COUNT(*) FROM [{_tableName}] WHERE [key] = @key;"; + using (var connection = new SqliteConnection(_connectionString)) + using (var cmd = new SqliteCommand(sql, connection)) + { + cmd.Parameters.AddWithValue("@key", (object)serializedKey ?? DBNull.Value); + + connection.Open(); + var count = (long)cmd.ExecuteScalar()!; // In SQLite, COUNT returns long + return count > 0; + } + } + + public void Remove(TKey key) + { + EnsureInitialized(); + + var serializedKey = _keySerializer(key); + + var sql = $@"DELETE FROM [{_tableName}] WHERE [key] = @key;"; + using (var connection = new SqliteConnection(_connectionString)) + using (var cmd = new SqliteCommand(sql, connection)) + { + cmd.Parameters.AddWithValue("@key", (object)serializedKey ?? DBNull.Value); + + connection.Open(); + cmd.ExecuteNonQuery(); + } + } + + public IEnumerable> GetAll() + { + EnsureInitialized(); + + var sql = $@"SELECT [key], [value] FROM [{_tableName}];"; + using (var connection = new SqliteConnection(_connectionString)) + using (var cmd = new SqliteCommand(sql, connection)) + { + connection.Open(); + using (var reader = cmd.ExecuteReader()) + { + while (reader.Read()) + { + var serializedKey = reader.GetString(0); + var serializedValue = reader.IsDBNull(1) ? null : reader.GetString(1); + + var key = _keyDeserializer(serializedKey); + var value = serializedValue == null ? default : _valueDeserializer(serializedValue); + + yield return new KeyValuePair(key, value!); + } + } + } + } + + public IEnumerable GetKeys() + { + EnsureInitialized(); + + var sql = $@"SELECT [key] FROM [{_tableName}];"; + using (var connection = new SqliteConnection(_connectionString)) + using (var cmd = new SqliteCommand(sql, connection)) + { + connection.Open(); + using (var reader = cmd.ExecuteReader()) + { + while (reader.Read()) + { + var serializedKey = reader.GetString(0); + yield return _keyDeserializer(serializedKey); + } + } + } + } + + public void Dispose() + { + // Nothing special to dispose in this implementation. + _initializationLock.Dispose(); + } + } +} From 704915604e30094b967ab90f80325ce425864320 Mon Sep 17 00:00:00 2001 From: Enes Hoxha Date: Tue, 7 Jan 2025 13:04:34 +0100 Subject: [PATCH 02/12] [v1/general]: Update year to 2025 --- src/Cortex.Mediator/Cortex.Mediator.csproj | 2 +- src/Cortex.States.Cassandra/Assets/license.md | 2 +- src/Cortex.States.Cassandra/Cortex.States.Cassandra.csproj | 2 +- src/Cortex.States.ClickHouse/Assets/license.md | 2 +- src/Cortex.States.ClickHouse/Cortex.States.ClickHouse.csproj | 2 +- src/Cortex.States.MSSqlServer/Cortex.States.MSSqlServer.csproj | 2 +- src/Cortex.States.MongoDb/Assets/license.md | 2 +- src/Cortex.States.MongoDb/Cortex.States.MongoDb.csproj | 2 +- src/Cortex.States.PostgreSQL/Assets/license.md | 2 +- src/Cortex.States.PostgreSQL/Cortex.States.PostgreSQL.csproj | 2 +- src/Cortex.States.RocksDb/Assets/license.md | 2 +- src/Cortex.States.RocksDb/Cortex.States.RocksDb.csproj | 2 +- src/Cortex.States.SQLite/Assets/license.md | 2 +- src/Cortex.States/Assets/license.md | 2 +- src/Cortex.States/Cortex.States.csproj | 2 +- src/Cortex.Streams.AWSSQS/Assets/license.md | 2 +- src/Cortex.Streams.AWSSQS/Cortex.Streams.AWSSQS.csproj | 2 +- src/Cortex.Streams.AzureBlobStorage/Assets/license.md | 2 +- .../Cortex.Streams.AzureBlobStorage.csproj | 2 +- src/Cortex.Streams.AzureServiceBus/Assets/license.md | 2 +- .../Cortex.Streams.AzureServiceBus.csproj | 2 +- src/Cortex.Streams.Files/Assets/license.md | 2 +- src/Cortex.Streams.Files/Cortex.Streams.Files.csproj | 2 +- src/Cortex.Streams.Http/Assets/license.md | 2 +- src/Cortex.Streams.Http/Cortex.Streams.Http.csproj | 2 +- src/Cortex.Streams.Kafka/Assets/license.md | 2 +- src/Cortex.Streams.Kafka/Cortex.Streams.Kafka.csproj | 2 +- src/Cortex.Streams.Pulsar/Assets/license.md | 2 +- src/Cortex.Streams.Pulsar/Cortex.Streams.Pulsar.csproj | 2 +- src/Cortex.Streams.RabbitMQ/Assets/license.md | 2 +- src/Cortex.Streams.RabbitMQ/Cortex.Streams.RabbitMQ.csproj | 2 +- src/Cortex.Streams.S3/Assets/license.md | 2 +- src/Cortex.Streams.S3/Cortex.Streams.S3.csproj | 2 +- src/Cortex.Streams/Assets/license.md | 2 +- src/Cortex.Streams/Cortex.Streams.csproj | 2 +- src/Cortex.Telemetry.OpenTelemetry/Assets/license.md | 2 +- .../Cortex.Telemetry.OpenTelemetry.csproj | 2 +- src/Cortex.Telemetry/Assets/license.md | 2 +- src/Cortex.Telemetry/Cortex.Telemetry.csproj | 2 +- 39 files changed, 39 insertions(+), 39 deletions(-) diff --git a/src/Cortex.Mediator/Cortex.Mediator.csproj b/src/Cortex.Mediator/Cortex.Mediator.csproj index 0dc2c70..aacd7de 100644 --- a/src/Cortex.Mediator/Cortex.Mediator.csproj +++ b/src/Cortex.Mediator/Cortex.Mediator.csproj @@ -8,7 +8,7 @@ Buildersoft Cortex Framework Buildersoft Buildersoft,EnesHoxha - Copyright © Buildersoft 2024 + Copyright © Buildersoft 2025 Buildersoft Cortex Mediator is a library for .NET applications that implements the mediator pattern. It helps to reduce dependencies between objects by allowing in-process messaging without direct communication. Instead, objects communicate through Cortex Mediator, making them less coupled and more maintainable.. diff --git a/src/Cortex.States.Cassandra/Assets/license.md b/src/Cortex.States.Cassandra/Assets/license.md index 530f621..3c845d4 100644 --- a/src/Cortex.States.Cassandra/Assets/license.md +++ b/src/Cortex.States.Cassandra/Assets/license.md @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2024 Buildersoft +Copyright (c) 2025 Buildersoft Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/src/Cortex.States.Cassandra/Cortex.States.Cassandra.csproj b/src/Cortex.States.Cassandra/Cortex.States.Cassandra.csproj index 30b781c..ee91411 100644 --- a/src/Cortex.States.Cassandra/Cortex.States.Cassandra.csproj +++ b/src/Cortex.States.Cassandra/Cortex.States.Cassandra.csproj @@ -8,7 +8,7 @@ Buildersoft Cortex Framework Buildersoft Buildersoft,EnesHoxha - Copyright © Buildersoft 2024 + Copyright © Buildersoft 2025 Cortex Data Framework is a robust, extensible platform designed to facilitate real-time data streaming, processing, and state management. It provides developers with a comprehensive suite of tools and libraries to build scalable, high-performance data pipelines tailored to diverse use cases. By abstracting underlying streaming technologies and state management solutions, Cortex Data Framework enables seamless integration, simplified development workflows, and enhanced maintainability for complex data-driven applications. diff --git a/src/Cortex.States.ClickHouse/Assets/license.md b/src/Cortex.States.ClickHouse/Assets/license.md index 530f621..3c845d4 100644 --- a/src/Cortex.States.ClickHouse/Assets/license.md +++ b/src/Cortex.States.ClickHouse/Assets/license.md @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2024 Buildersoft +Copyright (c) 2025 Buildersoft Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/src/Cortex.States.ClickHouse/Cortex.States.ClickHouse.csproj b/src/Cortex.States.ClickHouse/Cortex.States.ClickHouse.csproj index e40cbcb..92a3681 100644 --- a/src/Cortex.States.ClickHouse/Cortex.States.ClickHouse.csproj +++ b/src/Cortex.States.ClickHouse/Cortex.States.ClickHouse.csproj @@ -8,7 +8,7 @@ Buildersoft Cortex Framework Buildersoft Buildersoft,EnesHoxha - Copyright © Buildersoft 2024 + Copyright © Buildersoft 2025 Cortex Data Framework is a robust, extensible platform designed to facilitate real-time data streaming, processing, and state management. It provides developers with a comprehensive suite of tools and libraries to build scalable, high-performance data pipelines tailored to diverse use cases. By abstracting underlying streaming technologies and state management solutions, Cortex Data Framework enables seamless integration, simplified development workflows, and enhanced maintainability for complex data-driven applications. diff --git a/src/Cortex.States.MSSqlServer/Cortex.States.MSSqlServer.csproj b/src/Cortex.States.MSSqlServer/Cortex.States.MSSqlServer.csproj index 86bf0bc..7ca2e78 100644 --- a/src/Cortex.States.MSSqlServer/Cortex.States.MSSqlServer.csproj +++ b/src/Cortex.States.MSSqlServer/Cortex.States.MSSqlServer.csproj @@ -8,7 +8,7 @@ Buildersoft Cortex Framework Buildersoft Buildersoft,EnesHoxha - Copyright © Buildersoft 2024 + Copyright © Buildersoft 2025 Cortex Data Framework is a robust, extensible platform designed to facilitate real-time data streaming, processing, and state management. It provides developers with a comprehensive suite of tools and libraries to build scalable, high-performance data pipelines tailored to diverse use cases. By abstracting underlying streaming technologies and state management solutions, Cortex Data Framework enables seamless integration, simplified development workflows, and enhanced maintainability for complex data-driven applications. diff --git a/src/Cortex.States.MongoDb/Assets/license.md b/src/Cortex.States.MongoDb/Assets/license.md index 530f621..3c845d4 100644 --- a/src/Cortex.States.MongoDb/Assets/license.md +++ b/src/Cortex.States.MongoDb/Assets/license.md @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2024 Buildersoft +Copyright (c) 2025 Buildersoft Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/src/Cortex.States.MongoDb/Cortex.States.MongoDb.csproj b/src/Cortex.States.MongoDb/Cortex.States.MongoDb.csproj index d65c4e7..2135316 100644 --- a/src/Cortex.States.MongoDb/Cortex.States.MongoDb.csproj +++ b/src/Cortex.States.MongoDb/Cortex.States.MongoDb.csproj @@ -8,7 +8,7 @@ Buildersoft Cortex Framework Buildersoft Buildersoft,EnesHoxha - Copyright © Buildersoft 2024 + Copyright © Buildersoft 2025 Cortex Data Framework is a robust, extensible platform designed to facilitate real-time data streaming, processing, and state management. It provides developers with a comprehensive suite of tools and libraries to build scalable, high-performance data pipelines tailored to diverse use cases. By abstracting underlying streaming technologies and state management solutions, Cortex Data Framework enables seamless integration, simplified development workflows, and enhanced maintainability for complex data-driven applications. diff --git a/src/Cortex.States.PostgreSQL/Assets/license.md b/src/Cortex.States.PostgreSQL/Assets/license.md index 530f621..3c845d4 100644 --- a/src/Cortex.States.PostgreSQL/Assets/license.md +++ b/src/Cortex.States.PostgreSQL/Assets/license.md @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2024 Buildersoft +Copyright (c) 2025 Buildersoft Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/src/Cortex.States.PostgreSQL/Cortex.States.PostgreSQL.csproj b/src/Cortex.States.PostgreSQL/Cortex.States.PostgreSQL.csproj index fd0a4eb..84285e2 100644 --- a/src/Cortex.States.PostgreSQL/Cortex.States.PostgreSQL.csproj +++ b/src/Cortex.States.PostgreSQL/Cortex.States.PostgreSQL.csproj @@ -8,7 +8,7 @@ Buildersoft Cortex Framework Buildersoft Buildersoft,EnesHoxha - Copyright © Buildersoft 2024 + Copyright © Buildersoft 2025 Cortex Data Framework is a robust, extensible platform designed to facilitate real-time data streaming, processing, and state management. It provides developers with a comprehensive suite of tools and libraries to build scalable, high-performance data pipelines tailored to diverse use cases. By abstracting underlying streaming technologies and state management solutions, Cortex Data Framework enables seamless integration, simplified development workflows, and enhanced maintainability for complex data-driven applications. diff --git a/src/Cortex.States.RocksDb/Assets/license.md b/src/Cortex.States.RocksDb/Assets/license.md index 530f621..3c845d4 100644 --- a/src/Cortex.States.RocksDb/Assets/license.md +++ b/src/Cortex.States.RocksDb/Assets/license.md @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2024 Buildersoft +Copyright (c) 2025 Buildersoft Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/src/Cortex.States.RocksDb/Cortex.States.RocksDb.csproj b/src/Cortex.States.RocksDb/Cortex.States.RocksDb.csproj index 96660c6..44c31b0 100644 --- a/src/Cortex.States.RocksDb/Cortex.States.RocksDb.csproj +++ b/src/Cortex.States.RocksDb/Cortex.States.RocksDb.csproj @@ -8,7 +8,7 @@ Buildersoft Cortex Framework Buildersoft Buildersoft,EnesHoxha - Copyright © Buildersoft 2024 + Copyright © Buildersoft 2025 Cortex Data Framework is a robust, extensible platform designed to facilitate real-time data streaming, processing, and state management. It provides developers with a comprehensive suite of tools and libraries to build scalable, high-performance data pipelines tailored to diverse use cases. By abstracting underlying streaming technologies and state management solutions, Cortex Data Framework enables seamless integration, simplified development workflows, and enhanced maintainability for complex data-driven applications. diff --git a/src/Cortex.States.SQLite/Assets/license.md b/src/Cortex.States.SQLite/Assets/license.md index 530f621..3c845d4 100644 --- a/src/Cortex.States.SQLite/Assets/license.md +++ b/src/Cortex.States.SQLite/Assets/license.md @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2024 Buildersoft +Copyright (c) 2025 Buildersoft Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/src/Cortex.States/Assets/license.md b/src/Cortex.States/Assets/license.md index 530f621..3c845d4 100644 --- a/src/Cortex.States/Assets/license.md +++ b/src/Cortex.States/Assets/license.md @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2024 Buildersoft +Copyright (c) 2025 Buildersoft Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/src/Cortex.States/Cortex.States.csproj b/src/Cortex.States/Cortex.States.csproj index 0e7ec29..e09b8c0 100644 --- a/src/Cortex.States/Cortex.States.csproj +++ b/src/Cortex.States/Cortex.States.csproj @@ -8,7 +8,7 @@ Buildersoft Cortex Framework Buildersoft Buildersoft,EnesHoxha - Copyright © Buildersoft 2024 + Copyright © Buildersoft 2025 Cortex Data Framework is a robust, extensible platform designed to facilitate real-time data streaming, processing, and state management. It provides developers with a comprehensive suite of tools and libraries to build scalable, high-performance data pipelines tailored to diverse use cases. By abstracting underlying streaming technologies and state management solutions, Cortex Data Framework enables seamless integration, simplified development workflows, and enhanced maintainability for complex data-driven applications. diff --git a/src/Cortex.Streams.AWSSQS/Assets/license.md b/src/Cortex.Streams.AWSSQS/Assets/license.md index 530f621..3c845d4 100644 --- a/src/Cortex.Streams.AWSSQS/Assets/license.md +++ b/src/Cortex.Streams.AWSSQS/Assets/license.md @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2024 Buildersoft +Copyright (c) 2025 Buildersoft Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/src/Cortex.Streams.AWSSQS/Cortex.Streams.AWSSQS.csproj b/src/Cortex.Streams.AWSSQS/Cortex.Streams.AWSSQS.csproj index e2b5f7a..36b4f05 100644 --- a/src/Cortex.Streams.AWSSQS/Cortex.Streams.AWSSQS.csproj +++ b/src/Cortex.Streams.AWSSQS/Cortex.Streams.AWSSQS.csproj @@ -9,7 +9,7 @@ Buildersoft Cortex Framework Buildersoft Buildersoft,EnesHoxha - Copyright © Buildersoft 2024 + Copyright © Buildersoft 2025 Cortex Data Framework is a robust, extensible platform designed to facilitate real-time data streaming, processing, and state management. It provides developers with a comprehensive suite of tools and libraries to build scalable, high-performance data pipelines tailored to diverse use cases. By abstracting underlying streaming technologies and state management solutions, Cortex Data Framework enables seamless integration, simplified development workflows, and enhanced maintainability for complex data-driven applications. diff --git a/src/Cortex.Streams.AzureBlobStorage/Assets/license.md b/src/Cortex.Streams.AzureBlobStorage/Assets/license.md index 530f621..3c845d4 100644 --- a/src/Cortex.Streams.AzureBlobStorage/Assets/license.md +++ b/src/Cortex.Streams.AzureBlobStorage/Assets/license.md @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2024 Buildersoft +Copyright (c) 2025 Buildersoft Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/src/Cortex.Streams.AzureBlobStorage/Cortex.Streams.AzureBlobStorage.csproj b/src/Cortex.Streams.AzureBlobStorage/Cortex.Streams.AzureBlobStorage.csproj index 78851a2..2825d59 100644 --- a/src/Cortex.Streams.AzureBlobStorage/Cortex.Streams.AzureBlobStorage.csproj +++ b/src/Cortex.Streams.AzureBlobStorage/Cortex.Streams.AzureBlobStorage.csproj @@ -9,7 +9,7 @@ Buildersoft Cortex Framework Buildersoft Buildersoft,EnesHoxha - Copyright © Buildersoft 2024 + Copyright © Buildersoft 2025 Cortex Data Framework is a robust, extensible platform designed to facilitate real-time data streaming, processing, and state management. It provides developers with a comprehensive suite of tools and libraries to build scalable, high-performance data pipelines tailored to diverse use cases. By abstracting underlying streaming technologies and state management solutions, Cortex Data Framework enables seamless integration, simplified development workflows, and enhanced maintainability for complex data-driven applications. diff --git a/src/Cortex.Streams.AzureServiceBus/Assets/license.md b/src/Cortex.Streams.AzureServiceBus/Assets/license.md index 530f621..3c845d4 100644 --- a/src/Cortex.Streams.AzureServiceBus/Assets/license.md +++ b/src/Cortex.Streams.AzureServiceBus/Assets/license.md @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2024 Buildersoft +Copyright (c) 2025 Buildersoft Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/src/Cortex.Streams.AzureServiceBus/Cortex.Streams.AzureServiceBus.csproj b/src/Cortex.Streams.AzureServiceBus/Cortex.Streams.AzureServiceBus.csproj index 0529ad9..54f975e 100644 --- a/src/Cortex.Streams.AzureServiceBus/Cortex.Streams.AzureServiceBus.csproj +++ b/src/Cortex.Streams.AzureServiceBus/Cortex.Streams.AzureServiceBus.csproj @@ -9,7 +9,7 @@ Buildersoft Cortex Framework Buildersoft Buildersoft,EnesHoxha - Copyright © Buildersoft 2024 + Copyright © Buildersoft 2025 Cortex Data Framework is a robust, extensible platform designed to facilitate real-time data streaming, processing, and state management. It provides developers with a comprehensive suite of tools and libraries to build scalable, high-performance data pipelines tailored to diverse use cases. By abstracting underlying streaming technologies and state management solutions, Cortex Data Framework enables seamless integration, simplified development workflows, and enhanced maintainability for complex data-driven applications. diff --git a/src/Cortex.Streams.Files/Assets/license.md b/src/Cortex.Streams.Files/Assets/license.md index 530f621..3c845d4 100644 --- a/src/Cortex.Streams.Files/Assets/license.md +++ b/src/Cortex.Streams.Files/Assets/license.md @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2024 Buildersoft +Copyright (c) 2025 Buildersoft Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/src/Cortex.Streams.Files/Cortex.Streams.Files.csproj b/src/Cortex.Streams.Files/Cortex.Streams.Files.csproj index 8c6de69..cbe0811 100644 --- a/src/Cortex.Streams.Files/Cortex.Streams.Files.csproj +++ b/src/Cortex.Streams.Files/Cortex.Streams.Files.csproj @@ -9,7 +9,7 @@ Buildersoft Cortex Framework Buildersoft Buildersoft,EnesHoxha - Copyright © Buildersoft 2024 + Copyright © Buildersoft 2025 Cortex Data Framework is a robust, extensible platform designed to facilitate real-time data streaming, processing, and state management. It provides developers with a comprehensive suite of tools and libraries to build scalable, high-performance data pipelines tailored to diverse use cases. By abstracting underlying streaming technologies and state management solutions, Cortex Data Framework enables seamless integration, simplified development workflows, and enhanced maintainability for complex data-driven applications. diff --git a/src/Cortex.Streams.Http/Assets/license.md b/src/Cortex.Streams.Http/Assets/license.md index 530f621..3c845d4 100644 --- a/src/Cortex.Streams.Http/Assets/license.md +++ b/src/Cortex.Streams.Http/Assets/license.md @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2024 Buildersoft +Copyright (c) 2025 Buildersoft Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/src/Cortex.Streams.Http/Cortex.Streams.Http.csproj b/src/Cortex.Streams.Http/Cortex.Streams.Http.csproj index 4916512..6213806 100644 --- a/src/Cortex.Streams.Http/Cortex.Streams.Http.csproj +++ b/src/Cortex.Streams.Http/Cortex.Streams.Http.csproj @@ -8,7 +8,7 @@ Buildersoft Cortex Framework Buildersoft Buildersoft,EnesHoxha - Copyright © Buildersoft 2024 + Copyright © Buildersoft 2025 Cortex Data Framework is a robust, extensible platform designed to facilitate real-time data streaming, processing, and state management. It provides developers with a comprehensive suite of tools and libraries to build scalable, high-performance data pipelines tailored to diverse use cases. By abstracting underlying streaming technologies and state management solutions, Cortex Data Framework enables seamless integration, simplified development workflows, and enhanced maintainability for complex data-driven applications. diff --git a/src/Cortex.Streams.Kafka/Assets/license.md b/src/Cortex.Streams.Kafka/Assets/license.md index 530f621..3c845d4 100644 --- a/src/Cortex.Streams.Kafka/Assets/license.md +++ b/src/Cortex.Streams.Kafka/Assets/license.md @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2024 Buildersoft +Copyright (c) 2025 Buildersoft Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/src/Cortex.Streams.Kafka/Cortex.Streams.Kafka.csproj b/src/Cortex.Streams.Kafka/Cortex.Streams.Kafka.csproj index 5005fc5..67b0f6a 100644 --- a/src/Cortex.Streams.Kafka/Cortex.Streams.Kafka.csproj +++ b/src/Cortex.Streams.Kafka/Cortex.Streams.Kafka.csproj @@ -9,7 +9,7 @@ Buildersoft Cortex Framework Buildersoft Buildersoft,EnesHoxha - Copyright © Buildersoft 2024 + Copyright © Buildersoft 2025 Cortex Data Framework is a robust, extensible platform designed to facilitate real-time data streaming, processing, and state management. It provides developers with a comprehensive suite of tools and libraries to build scalable, high-performance data pipelines tailored to diverse use cases. By abstracting underlying streaming technologies and state management solutions, Cortex Data Framework enables seamless integration, simplified development workflows, and enhanced maintainability for complex data-driven applications. diff --git a/src/Cortex.Streams.Pulsar/Assets/license.md b/src/Cortex.Streams.Pulsar/Assets/license.md index 530f621..3c845d4 100644 --- a/src/Cortex.Streams.Pulsar/Assets/license.md +++ b/src/Cortex.Streams.Pulsar/Assets/license.md @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2024 Buildersoft +Copyright (c) 2025 Buildersoft Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/src/Cortex.Streams.Pulsar/Cortex.Streams.Pulsar.csproj b/src/Cortex.Streams.Pulsar/Cortex.Streams.Pulsar.csproj index 49a1de9..9f64003 100644 --- a/src/Cortex.Streams.Pulsar/Cortex.Streams.Pulsar.csproj +++ b/src/Cortex.Streams.Pulsar/Cortex.Streams.Pulsar.csproj @@ -9,7 +9,7 @@ Buildersoft Cortex Framework Buildersoft Buildersoft,EnesHoxha - Copyright © Buildersoft 2024 + Copyright © Buildersoft 2025 Cortex Data Framework is a robust, extensible platform designed to facilitate real-time data streaming, processing, and state management. It provides developers with a comprehensive suite of tools and libraries to build scalable, high-performance data pipelines tailored to diverse use cases. By abstracting underlying streaming technologies and state management solutions, Cortex Data Framework enables seamless integration, simplified development workflows, and enhanced maintainability for complex data-driven applications. diff --git a/src/Cortex.Streams.RabbitMQ/Assets/license.md b/src/Cortex.Streams.RabbitMQ/Assets/license.md index 530f621..3c845d4 100644 --- a/src/Cortex.Streams.RabbitMQ/Assets/license.md +++ b/src/Cortex.Streams.RabbitMQ/Assets/license.md @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2024 Buildersoft +Copyright (c) 2025 Buildersoft Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/src/Cortex.Streams.RabbitMQ/Cortex.Streams.RabbitMQ.csproj b/src/Cortex.Streams.RabbitMQ/Cortex.Streams.RabbitMQ.csproj index 1d20b45..6549495 100644 --- a/src/Cortex.Streams.RabbitMQ/Cortex.Streams.RabbitMQ.csproj +++ b/src/Cortex.Streams.RabbitMQ/Cortex.Streams.RabbitMQ.csproj @@ -9,7 +9,7 @@ Buildersoft Cortex Framework Buildersoft Buildersoft,EnesHoxha - Copyright © Buildersoft 2024 + Copyright © Buildersoft 2025 Cortex Data Framework is a robust, extensible platform designed to facilitate real-time data streaming, processing, and state management. It provides developers with a comprehensive suite of tools and libraries to build scalable, high-performance data pipelines tailored to diverse use cases. By abstracting underlying streaming technologies and state management solutions, Cortex Data Framework enables seamless integration, simplified development workflows, and enhanced maintainability for complex data-driven applications. diff --git a/src/Cortex.Streams.S3/Assets/license.md b/src/Cortex.Streams.S3/Assets/license.md index 530f621..3c845d4 100644 --- a/src/Cortex.Streams.S3/Assets/license.md +++ b/src/Cortex.Streams.S3/Assets/license.md @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2024 Buildersoft +Copyright (c) 2025 Buildersoft Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/src/Cortex.Streams.S3/Cortex.Streams.S3.csproj b/src/Cortex.Streams.S3/Cortex.Streams.S3.csproj index e093aa0..268d309 100644 --- a/src/Cortex.Streams.S3/Cortex.Streams.S3.csproj +++ b/src/Cortex.Streams.S3/Cortex.Streams.S3.csproj @@ -9,7 +9,7 @@ Buildersoft Cortex Framework Buildersoft Buildersoft,EnesHoxha - Copyright © Buildersoft 2024 + Copyright © Buildersoft 2025 Cortex Data Framework is a robust, extensible platform designed to facilitate real-time data streaming, processing, and state management. It provides developers with a comprehensive suite of tools and libraries to build scalable, high-performance data pipelines tailored to diverse use cases. By abstracting underlying streaming technologies and state management solutions, Cortex Data Framework enables seamless integration, simplified development workflows, and enhanced maintainability for complex data-driven applications. diff --git a/src/Cortex.Streams/Assets/license.md b/src/Cortex.Streams/Assets/license.md index 530f621..3c845d4 100644 --- a/src/Cortex.Streams/Assets/license.md +++ b/src/Cortex.Streams/Assets/license.md @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2024 Buildersoft +Copyright (c) 2025 Buildersoft Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/src/Cortex.Streams/Cortex.Streams.csproj b/src/Cortex.Streams/Cortex.Streams.csproj index 5e1df3f..73ffdc6 100644 --- a/src/Cortex.Streams/Cortex.Streams.csproj +++ b/src/Cortex.Streams/Cortex.Streams.csproj @@ -8,7 +8,7 @@ Buildersoft Cortex Framework Buildersoft Buildersoft,EnesHoxha - Copyright © Buildersoft 2024 + Copyright © Buildersoft 2025 Cortex Data Framework is a robust, extensible platform designed to facilitate real-time data streaming, processing, and state management. It provides developers with a comprehensive suite of tools and libraries to build scalable, high-performance data pipelines tailored to diverse use cases. By abstracting underlying streaming technologies and state management solutions, Cortex Data Framework enables seamless integration, simplified development workflows, and enhanced maintainability for complex data-driven applications. diff --git a/src/Cortex.Telemetry.OpenTelemetry/Assets/license.md b/src/Cortex.Telemetry.OpenTelemetry/Assets/license.md index 530f621..3c845d4 100644 --- a/src/Cortex.Telemetry.OpenTelemetry/Assets/license.md +++ b/src/Cortex.Telemetry.OpenTelemetry/Assets/license.md @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2024 Buildersoft +Copyright (c) 2025 Buildersoft Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/src/Cortex.Telemetry.OpenTelemetry/Cortex.Telemetry.OpenTelemetry.csproj b/src/Cortex.Telemetry.OpenTelemetry/Cortex.Telemetry.OpenTelemetry.csproj index dae0173..c4f1d6c 100644 --- a/src/Cortex.Telemetry.OpenTelemetry/Cortex.Telemetry.OpenTelemetry.csproj +++ b/src/Cortex.Telemetry.OpenTelemetry/Cortex.Telemetry.OpenTelemetry.csproj @@ -9,7 +9,7 @@ Buildersoft Cortex Framework Buildersoft Buildersoft,EnesHoxha - Copyright © Buildersoft 2024 + Copyright © Buildersoft 2025 Cortex Data Framework is a robust, extensible platform designed to facilitate real-time data streaming, processing, and state management. It provides developers with a comprehensive suite of tools and libraries to build scalable, high-performance data pipelines tailored to diverse use cases. By abstracting underlying streaming technologies and state management solutions, Cortex Data Framework enables seamless integration, simplified development workflows, and enhanced maintainability for complex data-driven applications. diff --git a/src/Cortex.Telemetry/Assets/license.md b/src/Cortex.Telemetry/Assets/license.md index 530f621..3c845d4 100644 --- a/src/Cortex.Telemetry/Assets/license.md +++ b/src/Cortex.Telemetry/Assets/license.md @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2024 Buildersoft +Copyright (c) 2025 Buildersoft Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/src/Cortex.Telemetry/Cortex.Telemetry.csproj b/src/Cortex.Telemetry/Cortex.Telemetry.csproj index d4ccc47..666324a 100644 --- a/src/Cortex.Telemetry/Cortex.Telemetry.csproj +++ b/src/Cortex.Telemetry/Cortex.Telemetry.csproj @@ -8,7 +8,7 @@ Buildersoft Cortex Framework Buildersoft Buildersoft,EnesHoxha - Copyright © Buildersoft 2024 + Copyright © Buildersoft 2025 Cortex Data Framework is a robust, extensible platform designed to facilitate real-time data streaming, processing, and state management. It provides developers with a comprehensive suite of tools and libraries to build scalable, high-performance data pipelines tailored to diverse use cases. By abstracting underlying streaming technologies and state management solutions, Cortex Data Framework enables seamless integration, simplified development workflows, and enhanced maintainability for complex data-driven applications. From b60bfcf64e45355551c41d7f1b5eb732bf5c6a57 Mon Sep 17 00:00:00 2001 From: Enes Hoxha Date: Tue, 7 Jan 2025 19:38:20 +0100 Subject: [PATCH 03/12] [v1/feature/69]: Add TABLE Apis Rename IStateStore to IDataStore, Rename IStateStore to IDataStore, Update README.md file --- README.md | 3 +++ .../CassandraStateStore.cs | 2 +- .../ClickHouseStateStore.cs | 2 +- .../SqlServerKeyValueStateStore.cs | 2 +- .../SqlServerStateStore.cs | 2 +- .../MongoDbStateStore.cs | 2 +- .../PostgresKeyValueStateStore.cs | 2 +- .../PostgresStateStore.cs | 2 +- .../RocksDbStateStore.cs | 2 +- .../SqliteKeyValueStateStore.cs | 2 +- src/Cortex.States/Abstractions/IStateStore.cs | 5 ++--- src/Cortex.States/Abstractions/ITable.cs | 12 +++++++++++ src/Cortex.States/InMemoryStateStore.cs | 2 +- .../Operators/IStatefulOperator.cs | 2 +- src/Cortex.States/Table.cs | 20 +++++++++++++++++++ .../Abstractions/IBranchStreamBuilder.cs | 8 ++++---- src/Cortex.Streams/Abstractions/IStream.cs | 4 ++-- .../Abstractions/IStreamBuilder.cs | 20 +++++++++---------- src/Cortex.Streams/BranchStreamBuilder.cs | 8 ++++---- .../Operators/AggregateOperator.cs | 6 +++--- .../Operators/AggregateSilentlyOperator.cs | 6 +++--- .../Operators/GroupByKeyOperator.cs | 6 +++--- .../Operators/GroupByKeySilentlyOperator.cs | 6 +++--- .../Operators/SessionWindowOperator.cs | 10 +++++----- .../Operators/SlidingWindowOperator.cs | 10 +++++----- .../Operators/TumblingWindowOperator.cs | 10 +++++----- src/Cortex.Streams/Stream.cs | 10 +++++----- src/Cortex.Streams/StreamBuilder.cs | 20 +++++++++---------- 28 files changed, 110 insertions(+), 76 deletions(-) create mode 100644 src/Cortex.States/Abstractions/ITable.cs create mode 100644 src/Cortex.States/Table.cs diff --git a/README.md b/README.md index 4cd9b0e..3647c7f 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,9 @@ - **Cortex.States.ClickHouse:** Persistent state storage using Clickhouse. [![NuGet Version](https://img.shields.io/nuget/v/Cortex.States.ClickHouse?label=Cortex.States.ClickHouse)](https://www.nuget.org/packages/Cortex.States.ClickHouse) +- **Cortex.States.SQLite:** Persistent state storage using SQLite. +[![NuGet Version](https://img.shields.io/nuget/v/Cortex.States.SQLite?label=Cortex.States.SQLite)](https://www.nuget.org/packages/Cortex.States.SQLite) + - **Cortex.Telemetry:** Core library to add support for Tracing and Matrics. [![NuGet Version](https://img.shields.io/nuget/v/Cortex.Telemetry?label=Cortex.Telemetry)](https://www.nuget.org/packages/Cortex.Telemetry) diff --git a/src/Cortex.States.Cassandra/CassandraStateStore.cs b/src/Cortex.States.Cassandra/CassandraStateStore.cs index 2c358eb..d750b9e 100644 --- a/src/Cortex.States.Cassandra/CassandraStateStore.cs +++ b/src/Cortex.States.Cassandra/CassandraStateStore.cs @@ -9,7 +9,7 @@ namespace Cortex.States.Cassandra { - public class CassandraStateStore : IStateStore + public class CassandraStateStore : IDataStore { private readonly ISession _session; private readonly string _keyspace; diff --git a/src/Cortex.States.ClickHouse/ClickHouseStateStore.cs b/src/Cortex.States.ClickHouse/ClickHouseStateStore.cs index 9236d32..c6844c6 100644 --- a/src/Cortex.States.ClickHouse/ClickHouseStateStore.cs +++ b/src/Cortex.States.ClickHouse/ClickHouseStateStore.cs @@ -8,7 +8,7 @@ namespace Cortex.States.ClickHouse { - public class ClickHouseStateStore : IStateStore, IDisposable + public class ClickHouseStateStore : IDataStore, IDisposable where TValue : new() { private readonly string _connectionString; diff --git a/src/Cortex.States.MSSqlServer/SqlServerKeyValueStateStore.cs b/src/Cortex.States.MSSqlServer/SqlServerKeyValueStateStore.cs index 615fefc..0c930ee 100644 --- a/src/Cortex.States.MSSqlServer/SqlServerKeyValueStateStore.cs +++ b/src/Cortex.States.MSSqlServer/SqlServerKeyValueStateStore.cs @@ -7,7 +7,7 @@ namespace Cortex.States.MSSqlServer { - public class SqlServerKeyValueStateStore : IStateStore, IDisposable + public class SqlServerKeyValueStateStore : IDataStore, IDisposable { private readonly string _connectionString; private readonly string _schemaName; diff --git a/src/Cortex.States.MSSqlServer/SqlServerStateStore.cs b/src/Cortex.States.MSSqlServer/SqlServerStateStore.cs index 807cd01..4a71176 100644 --- a/src/Cortex.States.MSSqlServer/SqlServerStateStore.cs +++ b/src/Cortex.States.MSSqlServer/SqlServerStateStore.cs @@ -8,7 +8,7 @@ namespace Cortex.States.MSSqlServer { - public class SqlServerStateStore : IStateStore, IDisposable + public class SqlServerStateStore : IDataStore, IDisposable where TValue : new() { private readonly string _connectionString; diff --git a/src/Cortex.States.MongoDb/MongoDbStateStore.cs b/src/Cortex.States.MongoDb/MongoDbStateStore.cs index 3cf999c..ae9332b 100644 --- a/src/Cortex.States.MongoDb/MongoDbStateStore.cs +++ b/src/Cortex.States.MongoDb/MongoDbStateStore.cs @@ -11,7 +11,7 @@ namespace Cortex.States.MongoDb /// /// The type of the key. Must be serializable by MongoDB. /// The type of the value. Must be serializable by MongoDB. - public class MongoDbStateStore : IStateStore + public class MongoDbStateStore : IDataStore { private readonly IMongoCollection> _collection; public string Name { get; } diff --git a/src/Cortex.States.PostgreSQL/PostgresKeyValueStateStore.cs b/src/Cortex.States.PostgreSQL/PostgresKeyValueStateStore.cs index 82578d9..df007e3 100644 --- a/src/Cortex.States.PostgreSQL/PostgresKeyValueStateStore.cs +++ b/src/Cortex.States.PostgreSQL/PostgresKeyValueStateStore.cs @@ -7,7 +7,7 @@ namespace Cortex.States.PostgreSQL { - public class PostgresKeyValueStateStore : IStateStore, IDisposable + public class PostgresKeyValueStateStore : IDataStore, IDisposable { private readonly string _connectionString; private readonly string _schemaName; diff --git a/src/Cortex.States.PostgreSQL/PostgresStateStore.cs b/src/Cortex.States.PostgreSQL/PostgresStateStore.cs index 03acb60..344d5f2 100644 --- a/src/Cortex.States.PostgreSQL/PostgresStateStore.cs +++ b/src/Cortex.States.PostgreSQL/PostgresStateStore.cs @@ -7,7 +7,7 @@ namespace Cortex.States.PostgreSQL { - public class PostgresStateStore : IStateStore, IDisposable where TValue : new() + public class PostgresStateStore : IDataStore, IDisposable where TValue : new() { private readonly string _connectionString; private readonly string _schemaName; diff --git a/src/Cortex.States.RocksDb/RocksDbStateStore.cs b/src/Cortex.States.RocksDb/RocksDbStateStore.cs index 2b840bf..487ed63 100644 --- a/src/Cortex.States.RocksDb/RocksDbStateStore.cs +++ b/src/Cortex.States.RocksDb/RocksDbStateStore.cs @@ -6,7 +6,7 @@ namespace Cortex.States.RocksDb { - public class RocksDbStateStore : IStateStore + public class RocksDbStateStore : IDataStore { private readonly RocksDbSharp.RocksDb _db; private readonly ColumnFamilyHandle _handle; diff --git a/src/Cortex.States.SQLite/SqliteKeyValueStateStore.cs b/src/Cortex.States.SQLite/SqliteKeyValueStateStore.cs index 1b75095..fe9db30 100644 --- a/src/Cortex.States.SQLite/SqliteKeyValueStateStore.cs +++ b/src/Cortex.States.SQLite/SqliteKeyValueStateStore.cs @@ -7,7 +7,7 @@ namespace Cortex.States.Sqlite { - public class SqliteKeyValueStateStore : IStateStore, IDisposable + public class SqliteKeyValueStateStore : IDataStore, IDisposable { private readonly string _connectionString; private readonly string _tableName; diff --git a/src/Cortex.States/Abstractions/IStateStore.cs b/src/Cortex.States/Abstractions/IStateStore.cs index 2195f46..7b17990 100644 --- a/src/Cortex.States/Abstractions/IStateStore.cs +++ b/src/Cortex.States/Abstractions/IStateStore.cs @@ -2,13 +2,12 @@ namespace Cortex.States { - public interface IStateStore + public interface IDataStore { string Name { get; } } - - public interface IStateStore : IStateStore + public interface IDataStore : IDataStore { TValue Get(TKey key); void Put(TKey key, TValue value); diff --git a/src/Cortex.States/Abstractions/ITable.cs b/src/Cortex.States/Abstractions/ITable.cs new file mode 100644 index 0000000..9b129dc --- /dev/null +++ b/src/Cortex.States/Abstractions/ITable.cs @@ -0,0 +1,12 @@ +using System.Collections.Generic; + +namespace Cortex.States.Abstractions +{ + public interface ITable + { + TValue Get(TKey key); + bool ContainsKey(TKey key); + IEnumerable> GetAll(); + IEnumerable GetKeys(); + } +} diff --git a/src/Cortex.States/InMemoryStateStore.cs b/src/Cortex.States/InMemoryStateStore.cs index a7b0fbb..e898aed 100644 --- a/src/Cortex.States/InMemoryStateStore.cs +++ b/src/Cortex.States/InMemoryStateStore.cs @@ -4,7 +4,7 @@ namespace Cortex.States { - public class InMemoryStateStore : IStateStore + public class InMemoryStateStore : IDataStore { private readonly ConcurrentDictionary _store = new ConcurrentDictionary(); diff --git a/src/Cortex.States/Operators/IStatefulOperator.cs b/src/Cortex.States/Operators/IStatefulOperator.cs index 810c164..47cd0c8 100644 --- a/src/Cortex.States/Operators/IStatefulOperator.cs +++ b/src/Cortex.States/Operators/IStatefulOperator.cs @@ -4,6 +4,6 @@ namespace Cortex.States.Operators { public interface IStatefulOperator { - IEnumerable GetStateStores(); + IEnumerable GetStateStores(); } } diff --git a/src/Cortex.States/Table.cs b/src/Cortex.States/Table.cs new file mode 100644 index 0000000..9b5c9de --- /dev/null +++ b/src/Cortex.States/Table.cs @@ -0,0 +1,20 @@ +using Cortex.States.Abstractions; +using System.Collections.Generic; + +namespace Cortex.States +{ + public class Table : ITable + { + private readonly IDataStore _dataStore; + + public Table(IDataStore dataStore) + { + _dataStore = dataStore; + } + + public TValue Get(TKey key) => _dataStore.Get(key); + public bool ContainsKey(TKey key) => _dataStore.ContainsKey(key); + public IEnumerable> GetAll() => _dataStore.GetAll(); + public IEnumerable GetKeys() => _dataStore.GetKeys(); + } +} diff --git a/src/Cortex.Streams/Abstractions/IBranchStreamBuilder.cs b/src/Cortex.Streams/Abstractions/IBranchStreamBuilder.cs index 1ccc642..3e2008e 100644 --- a/src/Cortex.Streams/Abstractions/IBranchStreamBuilder.cs +++ b/src/Cortex.Streams/Abstractions/IBranchStreamBuilder.cs @@ -46,7 +46,7 @@ public interface IBranchStreamBuilder IBranchStreamBuilder GroupBySilently( Func keySelector, string stateStoreName = null, - IStateStore> stateStore = null); + IDataStore> stateStore = null); /// /// Groups the stream data by a specified key selector silently. @@ -58,7 +58,7 @@ IBranchStreamBuilder GroupBySilently( IBranchStreamBuilder>> GroupBy( Func keySelector, string stateStoreName = null, - IStateStore> stateStore = null); + IDataStore> stateStore = null); /// /// Aggregates the stream data using a specified aggregation function. @@ -71,7 +71,7 @@ IBranchStreamBuilder AggregateSilently( Func keySelector, Func aggregateFunction, string stateStoreName = null, - IStateStore stateStore = null); + IDataStore stateStore = null); /// /// Aggregates the stream data using a specified aggregation function silently in the background. @@ -84,7 +84,7 @@ IBranchStreamBuilder> Aggregate keySelector, Func aggregateFunction, string stateStoreName = null, - IStateStore stateStore = null); + IDataStore stateStore = null); diff --git a/src/Cortex.Streams/Abstractions/IStream.cs b/src/Cortex.Streams/Abstractions/IStream.cs index 8ce3fb9..2c9e895 100644 --- a/src/Cortex.Streams/Abstractions/IStream.cs +++ b/src/Cortex.Streams/Abstractions/IStream.cs @@ -12,7 +12,7 @@ public interface IStream string GetStatus(); IReadOnlyDictionary> GetBranches(); - TStateStore GetStateStoreByName(string name) where TStateStore : IStateStore; - IEnumerable GetStateStoresByType() where TStateStore : IStateStore; + TStateStore GetStateStoreByName(string name) where TStateStore : IDataStore; + IEnumerable GetStateStoresByType() where TStateStore : IDataStore; } } diff --git a/src/Cortex.Streams/Abstractions/IStreamBuilder.cs b/src/Cortex.Streams/Abstractions/IStreamBuilder.cs index e49275d..e237326 100644 --- a/src/Cortex.Streams/Abstractions/IStreamBuilder.cs +++ b/src/Cortex.Streams/Abstractions/IStreamBuilder.cs @@ -88,7 +88,7 @@ public interface IStreamBuilder IStreamBuilder GroupBySilently( Func keySelector, string stateStoreName = null, - IStateStore> stateStore = null); + IDataStore> stateStore = null); /// /// Groups the stream data by a specified key selector silently. @@ -100,7 +100,7 @@ IStreamBuilder GroupBySilently( IStreamBuilder>> GroupBy( Func keySelector, string stateStoreName = null, - IStateStore> stateStore = null); + IDataStore> stateStore = null); /// /// Aggregates the stream data using a specified aggregation function. @@ -113,7 +113,7 @@ IStreamBuilder AggregateSilently( Func keySelector, Func aggregateFunction, string stateStoreName = null, - IStateStore stateStore = null); + IDataStore stateStore = null); /// /// Aggregates the stream data using a specified aggregation function silently in the background. @@ -126,7 +126,7 @@ IStreamBuilder> Aggregate( Func keySelector, Func aggregateFunction, string stateStoreName = null, - IStateStore stateStore = null); + IDataStore stateStore = null); /// @@ -148,8 +148,8 @@ IStreamBuilder TumblingWindow( Func, TWindowOutput> windowFunction, string windowStateStoreName = null, string windowResultsStateStoreName = null, - IStateStore> windowStateStore = null, - IStateStore, TWindowOutput> windowResultsStateStore = null); + IDataStore> windowStateStore = null, + IDataStore, TWindowOutput> windowResultsStateStore = null); /// @@ -173,8 +173,8 @@ IStreamBuilder SlidingWindow( Func, TWindowOutput> windowFunction, string windowStateStoreName = null, string windowResultsStateStoreName = null, - IStateStore, List> windowStateStore = null, - IStateStore, TWindowOutput> windowResultsStateStore = null); + IDataStore, List> windowStateStore = null, + IDataStore, TWindowOutput> windowResultsStateStore = null); /// /// Adds a session window operator to the stream. @@ -195,8 +195,8 @@ IStreamBuilder SessionWindow( Func, TSessionOutput> sessionFunction, string sessionStateStoreName = null, string sessionResultsStateStoreName = null, - IStateStore> sessionStateStore = null, - IStateStore, TSessionOutput> sessionResultsStateStore = null); + IDataStore> sessionStateStore = null, + IDataStore, TSessionOutput> sessionResultsStateStore = null); IStreamBuilder SetNext(IOperator customOperator); diff --git a/src/Cortex.Streams/BranchStreamBuilder.cs b/src/Cortex.Streams/BranchStreamBuilder.cs index 8b9bb63..5780214 100644 --- a/src/Cortex.Streams/BranchStreamBuilder.cs +++ b/src/Cortex.Streams/BranchStreamBuilder.cs @@ -116,7 +116,7 @@ public void Sink(ISinkOperator sinkOperator) } - public IBranchStreamBuilder>> GroupBy(Func keySelector, string stateStoreName = null, States.IStateStore> stateStore = null) + public IBranchStreamBuilder>> GroupBy(Func keySelector, string stateStoreName = null, States.IDataStore> stateStore = null) { if (stateStore == null) { @@ -148,7 +148,7 @@ public IBranchStreamBuilder>> GroupBy GroupBySilently(Func keySelector, string stateStoreName = null, States.IStateStore> stateStore = null) + public IBranchStreamBuilder GroupBySilently(Func keySelector, string stateStoreName = null, States.IDataStore> stateStore = null) { if (stateStore == null) { @@ -180,7 +180,7 @@ public IBranchStreamBuilder GroupBySilently(Func> Aggregate(Func keySelector, Func aggregateFunction, string stateStoreName = null, States.IStateStore stateStore = null) + public IBranchStreamBuilder> Aggregate(Func keySelector, Func aggregateFunction, string stateStoreName = null, States.IDataStore stateStore = null) { //private readonly Func _keySelector if (stateStore == null) @@ -213,7 +213,7 @@ public IBranchStreamBuilder> Aggregate AggregateSilently(Func keySelector, Func aggregateFunction, string stateStoreName = null, States.IStateStore stateStore = null) + public IBranchStreamBuilder AggregateSilently(Func keySelector, Func aggregateFunction, string stateStoreName = null, States.IDataStore stateStore = null) { //private readonly Func _keySelector if (stateStore == null) diff --git a/src/Cortex.Streams/Operators/AggregateOperator.cs b/src/Cortex.Streams/Operators/AggregateOperator.cs index 6c79621..1b9b8cb 100644 --- a/src/Cortex.Streams/Operators/AggregateOperator.cs +++ b/src/Cortex.Streams/Operators/AggregateOperator.cs @@ -12,7 +12,7 @@ public class AggregateOperator : IOperator, IStatefu { private readonly Func _keySelector; private readonly Func _aggregateFunction; - private readonly IStateStore _stateStore; + private readonly IDataStore _stateStore; private IOperator _nextOperator; // Telemetry fields @@ -23,7 +23,7 @@ public class AggregateOperator : IOperator, IStatefu private Action _incrementProcessedCounter; private Action _recordProcessingTime; - public AggregateOperator(Func keySelector, Func aggregateFunction, IStateStore stateStore) + public AggregateOperator(Func keySelector, Func aggregateFunction, IDataStore stateStore) { _keySelector = keySelector; _aggregateFunction = aggregateFunction; @@ -122,7 +122,7 @@ public void SetNext(IOperator nextOperator) } } - public IEnumerable GetStateStores() + public IEnumerable GetStateStores() { yield return _stateStore; } diff --git a/src/Cortex.Streams/Operators/AggregateSilentlyOperator.cs b/src/Cortex.Streams/Operators/AggregateSilentlyOperator.cs index 8332f1a..0e73e74 100644 --- a/src/Cortex.Streams/Operators/AggregateSilentlyOperator.cs +++ b/src/Cortex.Streams/Operators/AggregateSilentlyOperator.cs @@ -11,7 +11,7 @@ public class AggregateSilentlyOperator : IOperator, IS { private readonly Func _keySelector; private readonly Func _aggregateFunction; - private readonly IStateStore _stateStore; + private readonly IDataStore _stateStore; private IOperator _nextOperator; // Telemetry fields @@ -22,7 +22,7 @@ public class AggregateSilentlyOperator : IOperator, IS private Action _incrementProcessedCounter; private Action _recordProcessingTime; - public AggregateSilentlyOperator(Func keySelector, Func aggregateFunction, IStateStore stateStore) + public AggregateSilentlyOperator(Func keySelector, Func aggregateFunction, IDataStore stateStore) { _keySelector = keySelector; _aggregateFunction = aggregateFunction; @@ -124,7 +124,7 @@ public void SetNext(IOperator nextOperator) } } - public IEnumerable GetStateStores() + public IEnumerable GetStateStores() { yield return _stateStore; } diff --git a/src/Cortex.Streams/Operators/GroupByKeyOperator.cs b/src/Cortex.Streams/Operators/GroupByKeyOperator.cs index d0f8e05..5ac8715 100644 --- a/src/Cortex.Streams/Operators/GroupByKeyOperator.cs +++ b/src/Cortex.Streams/Operators/GroupByKeyOperator.cs @@ -10,7 +10,7 @@ namespace Cortex.Streams.Operators public class GroupByKeyOperator : IOperator, IStatefulOperator, ITelemetryEnabled { private readonly Func _keySelector; - private readonly IStateStore> _stateStore; + private readonly IDataStore> _stateStore; private IOperator _nextOperator; // Telemetry fields @@ -21,7 +21,7 @@ public class GroupByKeyOperator : IOperator, IStatefulOperator, IT private Action _incrementProcessedCounter; private Action _recordProcessingTime; - public GroupByKeyOperator(Func keySelector, IStateStore> stateStore) + public GroupByKeyOperator(Func keySelector, IDataStore> stateStore) { _keySelector = keySelector; _stateStore = stateStore; @@ -118,7 +118,7 @@ public void SetNext(IOperator nextOperator) } } - public IEnumerable GetStateStores() + public IEnumerable GetStateStores() { yield return _stateStore; } diff --git a/src/Cortex.Streams/Operators/GroupByKeySilentlyOperator.cs b/src/Cortex.Streams/Operators/GroupByKeySilentlyOperator.cs index b982071..14374ba 100644 --- a/src/Cortex.Streams/Operators/GroupByKeySilentlyOperator.cs +++ b/src/Cortex.Streams/Operators/GroupByKeySilentlyOperator.cs @@ -10,7 +10,7 @@ namespace Cortex.Streams.Operators public class GroupByKeySilentlyOperator : IOperator, IStatefulOperator, ITelemetryEnabled { private readonly Func _keySelector; - private readonly IStateStore> _stateStore; + private readonly IDataStore> _stateStore; private IOperator _nextOperator; // Telemetry fields @@ -21,7 +21,7 @@ public class GroupByKeySilentlyOperator : IOperator, IStatefulOper private Action _incrementProcessedCounter; private Action _recordProcessingTime; - public GroupByKeySilentlyOperator(Func keySelector, IStateStore> stateStore) + public GroupByKeySilentlyOperator(Func keySelector, IDataStore> stateStore) { _keySelector = keySelector; _stateStore = stateStore; @@ -121,7 +121,7 @@ public void SetNext(IOperator nextOperator) } } - public IEnumerable GetStateStores() + public IEnumerable GetStateStores() { yield return _stateStore; } diff --git a/src/Cortex.Streams/Operators/SessionWindowOperator.cs b/src/Cortex.Streams/Operators/SessionWindowOperator.cs index f6caec4..e4f076b 100644 --- a/src/Cortex.Streams/Operators/SessionWindowOperator.cs +++ b/src/Cortex.Streams/Operators/SessionWindowOperator.cs @@ -19,8 +19,8 @@ public class SessionWindowOperator : IOperator, IS private readonly Func _keySelector; private readonly TimeSpan _inactivityGap; private readonly Func, TSessionOutput> _sessionFunction; - private readonly IStateStore> _sessionStateStore; - private readonly IStateStore, TSessionOutput> _sessionResultsStateStore; + private readonly IDataStore> _sessionStateStore; + private readonly IDataStore, TSessionOutput> _sessionResultsStateStore; private IOperator _nextOperator; // Telemetry fields @@ -39,8 +39,8 @@ public SessionWindowOperator( Func keySelector, TimeSpan inactivityGap, Func, TSessionOutput> sessionFunction, - IStateStore> sessionStateStore, - IStateStore, TSessionOutput> sessionResultsStateStore = null) + IDataStore> sessionStateStore, + IDataStore, TSessionOutput> sessionResultsStateStore = null) { _keySelector = keySelector ?? throw new ArgumentNullException(nameof(keySelector)); _inactivityGap = inactivityGap; @@ -242,7 +242,7 @@ private void SessionExpirationCallback(object state) } } - public IEnumerable GetStateStores() + public IEnumerable GetStateStores() { yield return _sessionStateStore; if (_sessionResultsStateStore != null) diff --git a/src/Cortex.Streams/Operators/SlidingWindowOperator.cs b/src/Cortex.Streams/Operators/SlidingWindowOperator.cs index 89d128b..a2391c8 100644 --- a/src/Cortex.Streams/Operators/SlidingWindowOperator.cs +++ b/src/Cortex.Streams/Operators/SlidingWindowOperator.cs @@ -22,8 +22,8 @@ public class SlidingWindowOperator : IOperator, ISt private readonly TimeSpan _windowDuration; private readonly TimeSpan _slideInterval; private readonly Func, TWindowOutput> _windowFunction; - private readonly IStateStore, List> _windowStateStore; - private readonly IStateStore, TWindowOutput> _windowResultsStateStore; + private readonly IDataStore, List> _windowStateStore; + private readonly IDataStore, TWindowOutput> _windowResultsStateStore; private IOperator _nextOperator; // Telemetry fields @@ -43,8 +43,8 @@ public SlidingWindowOperator( TimeSpan windowDuration, TimeSpan slideInterval, Func, TWindowOutput> windowFunction, - IStateStore, List> windowStateStore, - IStateStore, TWindowOutput> windowResultsStateStore = null) + IDataStore, List> windowStateStore, + IDataStore, TWindowOutput> windowResultsStateStore = null) { _keySelector = keySelector ?? throw new ArgumentNullException(nameof(keySelector)); _windowDuration = windowDuration; @@ -238,7 +238,7 @@ private List GetWindowStartTimes(DateTime eventTime) return windowStartTimes; } - public IEnumerable GetStateStores() + public IEnumerable GetStateStores() { yield return _windowStateStore; if (_windowResultsStateStore != null) diff --git a/src/Cortex.Streams/Operators/TumblingWindowOperator.cs b/src/Cortex.Streams/Operators/TumblingWindowOperator.cs index d79a2d1..69f1c76 100644 --- a/src/Cortex.Streams/Operators/TumblingWindowOperator.cs +++ b/src/Cortex.Streams/Operators/TumblingWindowOperator.cs @@ -20,8 +20,8 @@ public class TumblingWindowOperator : IOperator, IS private readonly Func _keySelector; private readonly TimeSpan _windowDuration; private readonly Func, TWindowOutput> _windowFunction; - private readonly IStateStore> _windowStateStore; - private readonly IStateStore, TWindowOutput> _windowResultsStateStore; + private readonly IDataStore> _windowStateStore; + private readonly IDataStore, TWindowOutput> _windowResultsStateStore; private IOperator _nextOperator; // Telemetry fields @@ -40,8 +40,8 @@ public TumblingWindowOperator( Func keySelector, TimeSpan windowDuration, Func, TWindowOutput> windowFunction, - IStateStore> windowStateStore, - IStateStore, TWindowOutput> windowResultsStateStore = null) + IDataStore> windowStateStore, + IDataStore, TWindowOutput> windowResultsStateStore = null) { _keySelector = keySelector ?? throw new ArgumentNullException(nameof(keySelector)); _windowDuration = windowDuration; @@ -253,7 +253,7 @@ private DateTime GetWindowStartTime(DateTime timestamp) return new DateTime(windowStartTicks, DateTimeKind.Utc); } - public IEnumerable GetStateStores() + public IEnumerable GetStateStores() { yield return _windowStateStore; if (_windowResultsStateStore != null) diff --git a/src/Cortex.Streams/Stream.cs b/src/Cortex.Streams/Stream.cs index e261d60..d062a94 100644 --- a/src/Cortex.Streams/Stream.cs +++ b/src/Cortex.Streams/Stream.cs @@ -123,15 +123,15 @@ public IReadOnlyDictionary> GetBranches() return branchDict; } - public IEnumerable GetStateStores() + public IEnumerable GetStateStores() { var visitedOperators = new HashSet(); - var stateStores = new List(); + var stateStores = new List(); CollectStateStores(_operatorChain, stateStores, visitedOperators); return stateStores; } - private void CollectStateStores(IOperator op, List stateStores, HashSet visitedOperators) + private void CollectStateStores(IOperator op, List stateStores, HashSet visitedOperators) { if (op == null || visitedOperators.Contains(op)) return; @@ -161,14 +161,14 @@ private void CollectStateStores(IOperator op, List stateStores, Has } } - public TStateStore GetStateStoreByName(string name) where TStateStore : IStateStore + public TStateStore GetStateStoreByName(string name) where TStateStore : IDataStore { return GetStateStores() .OfType() .FirstOrDefault(store => store.Name == name); } - public IEnumerable GetStateStoresByType() where TStateStore : IStateStore + public IEnumerable GetStateStoresByType() where TStateStore : IDataStore { return GetStateStores() .OfType(); diff --git a/src/Cortex.Streams/StreamBuilder.cs b/src/Cortex.Streams/StreamBuilder.cs index 46f4fb1..f257efe 100644 --- a/src/Cortex.Streams/StreamBuilder.cs +++ b/src/Cortex.Streams/StreamBuilder.cs @@ -258,7 +258,7 @@ public IStreamBuilder AddBranch(string name, Action GroupBySilently(Func keySelector, string stateStoreName = null, States.IStateStore> stateStore = null) + public IStreamBuilder GroupBySilently(Func keySelector, string stateStoreName = null, States.IDataStore> stateStore = null) { if (stateStore == null) { @@ -285,7 +285,7 @@ public IStreamBuilder GroupBySilently(Func return new StreamBuilder(_name, _firstOperator, _lastOperator, _sourceAdded); } - public IStreamBuilder AggregateSilently(Func keySelector, Func aggregateFunction, string stateStoreName = null, States.IStateStore stateStore = null) + public IStreamBuilder AggregateSilently(Func keySelector, Func aggregateFunction, string stateStoreName = null, States.IDataStore stateStore = null) { //private readonly Func _keySelector if (stateStore == null) @@ -315,7 +315,7 @@ public IStreamBuilder AggregateSilently(Func>> GroupBy(Func keySelector, string stateStoreName = null, IStateStore> stateStore = null) + public IStreamBuilder>> GroupBy(Func keySelector, string stateStoreName = null, IDataStore> stateStore = null) { if (stateStore == null) { @@ -342,7 +342,7 @@ public IStreamBuilder>> GroupBy(Fun return new StreamBuilder>>(_name, _firstOperator, _lastOperator, _sourceAdded); } - public IStreamBuilder> Aggregate(Func keySelector, Func aggregateFunction, string stateStoreName = null, IStateStore stateStore = null) + public IStreamBuilder> Aggregate(Func keySelector, Func aggregateFunction, string stateStoreName = null, IDataStore stateStore = null) { if (stateStore == null) { @@ -394,8 +394,8 @@ public IStreamBuilder TumblingWindow( Func, TWindowOutput> windowFunction, string windowStateStoreName = null, string windowResultsStateStoreName = null, - IStateStore> windowStateStore = null, - IStateStore, TWindowOutput> windowResultsStateStore = null) + IDataStore> windowStateStore = null, + IDataStore, TWindowOutput> windowResultsStateStore = null) { if (windowStateStore == null) { @@ -453,8 +453,8 @@ public IStreamBuilder SlidingWindow( Func, TWindowOutput> windowFunction, string windowStateStoreName = null, string windowResultsStateStoreName = null, - IStateStore, List> windowStateStore = null, - IStateStore, TWindowOutput> windowResultsStateStore = null) + IDataStore, List> windowStateStore = null, + IDataStore, TWindowOutput> windowResultsStateStore = null) { if (windowStateStore == null) { @@ -509,8 +509,8 @@ public IStreamBuilder SessionWindow( Func, TSessionOutput> sessionFunction, string sessionStateStoreName = null, string sessionResultsStateStoreName = null, - IStateStore> sessionStateStore = null, - IStateStore, TSessionOutput> sessionResultsStateStore = null) + IDataStore> sessionStateStore = null, + IDataStore, TSessionOutput> sessionResultsStateStore = null) { if (sessionStateStore == null) { From 6a5b4f6c5ad0309a796cbc0bbd357751c28cacb2 Mon Sep 17 00:00:00 2001 From: Enes Hoxha Date: Thu, 9 Jan 2025 14:26:12 +0100 Subject: [PATCH 04/12] [v1/feature/86]: Implement MS SQL Server CDC Source Add SqlServerRecord, Add SqlServerSettings, Add SqlServerSourceOperator --- Cortex.sln | 6 + .../Cortex.Streams.MSSqlServer.csproj | 17 + .../SqlServerRecord.cs | 26 ++ .../SqlServerSettings.cs | 16 + .../SqlServerSourceOperator.cs | 436 ++++++++++++++++++ 5 files changed, 501 insertions(+) create mode 100644 src/Cortex.Streams.MSSqlServer/Cortex.Streams.MSSqlServer.csproj create mode 100644 src/Cortex.Streams.MSSqlServer/SqlServerRecord.cs create mode 100644 src/Cortex.Streams.MSSqlServer/SqlServerSettings.cs create mode 100644 src/Cortex.Streams.MSSqlServer/SqlServerSourceOperator.cs diff --git a/Cortex.sln b/Cortex.sln index a8ca664..6e9a85e 100644 --- a/Cortex.sln +++ b/Cortex.sln @@ -47,6 +47,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cortex.Streams.Http", "src\ EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cortex.States.SQLite", "src\Cortex.States.SQLite\Cortex.States.SQLite.csproj", "{19167D25-6383-46B4-9449-B9E364F809FF}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cortex.Streams.MSSqlServer", "src\Cortex.Streams.MSSqlServer\Cortex.Streams.MSSqlServer.csproj", "{81A01446-A8AA-4F9D-BB9B-B66E21B2C348}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -140,6 +142,10 @@ Global {19167D25-6383-46B4-9449-B9E364F809FF}.Debug|Any CPU.Build.0 = Debug|Any CPU {19167D25-6383-46B4-9449-B9E364F809FF}.Release|Any CPU.ActiveCfg = Release|Any CPU {19167D25-6383-46B4-9449-B9E364F809FF}.Release|Any CPU.Build.0 = Release|Any CPU + {81A01446-A8AA-4F9D-BB9B-B66E21B2C348}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {81A01446-A8AA-4F9D-BB9B-B66E21B2C348}.Debug|Any CPU.Build.0 = Debug|Any CPU + {81A01446-A8AA-4F9D-BB9B-B66E21B2C348}.Release|Any CPU.ActiveCfg = Release|Any CPU + {81A01446-A8AA-4F9D-BB9B-B66E21B2C348}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/src/Cortex.Streams.MSSqlServer/Cortex.Streams.MSSqlServer.csproj b/src/Cortex.Streams.MSSqlServer/Cortex.Streams.MSSqlServer.csproj new file mode 100644 index 0000000..355fa28 --- /dev/null +++ b/src/Cortex.Streams.MSSqlServer/Cortex.Streams.MSSqlServer.csproj @@ -0,0 +1,17 @@ + + + + net8.0 + enable + enable + + + + + + + + + + + diff --git a/src/Cortex.Streams.MSSqlServer/SqlServerRecord.cs b/src/Cortex.Streams.MSSqlServer/SqlServerRecord.cs new file mode 100644 index 0000000..a2a373b --- /dev/null +++ b/src/Cortex.Streams.MSSqlServer/SqlServerRecord.cs @@ -0,0 +1,26 @@ +namespace Cortex.Streams.MSSqlServer +{ + /// + /// Represents a generic CDC record, carrying enough information + /// to understand what changed and how. + /// + public class SqlServerRecord + { + /// + /// The operation name or type, e.g. 'INSERT', 'UPDATE', 'DELETE'. + /// + public string Operation { get; set; } // Insert, Update, Delete, etc. + + + /// + /// The primary key or other columns in the changed record, + /// serialized for demonstration. + /// + public Dictionary Data { get; set; } // Column names -> values + + /// + /// Timestamp or time of the record, if needed. + /// + public DateTime ChangeTime { get; set; } + } +} diff --git a/src/Cortex.Streams.MSSqlServer/SqlServerSettings.cs b/src/Cortex.Streams.MSSqlServer/SqlServerSettings.cs new file mode 100644 index 0000000..3942dc4 --- /dev/null +++ b/src/Cortex.Streams.MSSqlServer/SqlServerSettings.cs @@ -0,0 +1,16 @@ +namespace Cortex.Streams.MSSqlServer +{ + public class SqlServerSettings + { + public bool DoInitialLoad { get; set; } + public TimeSpan PullInterval { get; set; } + public bool ConfigureCDCInServer { get; set; } + + public SqlServerSettings() + { + DoInitialLoad = true; + PullInterval = TimeSpan.FromSeconds(3); + ConfigureCDCInServer = false; + } + } +} diff --git a/src/Cortex.Streams.MSSqlServer/SqlServerSourceOperator.cs b/src/Cortex.Streams.MSSqlServer/SqlServerSourceOperator.cs new file mode 100644 index 0000000..a3926da --- /dev/null +++ b/src/Cortex.Streams.MSSqlServer/SqlServerSourceOperator.cs @@ -0,0 +1,436 @@ +using Cortex.States; +using Cortex.Streams.Operators; +using Microsoft.Data.SqlClient; +using System.Data; +using System.Security.Cryptography; +using System.Text; + +namespace Cortex.Streams.MSSqlServer +{ + /// + /// MSSQL CDC Source Operator that optionally performs an initial load of the table, + /// then continues reading incremental changes via CDC. + /// Now we skip duplicates by storing a hash of the last record we emitted. + /// + public class SqlServerSourceOperator : ISourceOperator + { + private readonly string _connectionString; + private readonly string _schemaName; + private readonly string _tableName; + private readonly bool _autoEnableCdc; + private readonly bool _doInitialLoad; + + private readonly int _pollIntervalMs; + private Thread _pollingThread; + private bool _stopRequested; + + // Checkpoint store + private readonly IDataStore _checkpointStore; + + // Key for the last LSN processed + private readonly string _checkpointKey; + + // Key to track if the initial load is done + private readonly string _initialLoadCheckpointKey; + + // Key to store the last emitted record's hash + private readonly string _lastRecordHashKey; + + public SqlServerSourceOperator( + string connectionString, + string schemaName, + string tableName, + SqlServerSettings sqlServerSettings = null, + IDataStore checkpointStore = null) + { + _connectionString = connectionString; + _schemaName = schemaName; + _tableName = tableName; + + sqlServerSettings ??= new SqlServerSettings(); + _autoEnableCdc = sqlServerSettings.ConfigureCDCInServer; + _doInitialLoad = sqlServerSettings.DoInitialLoad; + + // Using Milliseconds from the original code + _pollIntervalMs = sqlServerSettings.PullInterval.Milliseconds; + + _checkpointStore = checkpointStore + ?? new InMemoryStateStore($"{_schemaName}.{_tableName}.STORE"); + + // A unique key that identifies this table’s LSN checkpoint + _checkpointKey = $"{_schemaName}.{_tableName}.CDC.LSN"; + + // A unique key that identifies if the initial load is done + _initialLoadCheckpointKey = $"{_schemaName}.{_tableName}.INITIAL_LOAD_DONE"; + + // A unique key to store the last emitted record's hash + _lastRecordHashKey = $"{_schemaName}.{_tableName}.CDC.LAST_HASH"; + } + + public void Start(Action emit) + { + // Optionally enable CDC if requested + if (_autoEnableCdc && !IsCdcEnabledForTable()) + { + EnableCdcForTable(); + } + + // 1. If doInitialLoad = true and we haven't done it yet, run initial load + if (_doInitialLoad && _checkpointStore.Get(_initialLoadCheckpointKey) == null) + { + Console.WriteLine("Starting one-time initial load..."); + RunInitialLoad(emit); + + // Mark initial load as completed + _checkpointStore.Put(_initialLoadCheckpointKey, new byte[] { 0x01 }); + Console.WriteLine("Initial load completed."); + } + else + { + Console.WriteLine("Skipping initial load (already done or disabled)."); + } + + // 2. Initialize the LSN checkpoint if we don’t already have one + byte[] lastCommittedLsn = _checkpointStore.Get(_checkpointKey); + if (lastCommittedLsn == null) + { + // By default, start from "current" so we only see future changes + lastCommittedLsn = GetMaxLsn(); + _checkpointStore.Put(_checkpointKey, lastCommittedLsn); + } + + // 3. Start CDC polling + _stopRequested = false; + _pollingThread = new Thread(() => PollCdcChanges(emit)); + _pollingThread.IsBackground = true; + _pollingThread.Start(); + } + + public void Stop() + { + _stopRequested = true; + _pollingThread?.Join(); + } + + /// + /// Polls the CDC table periodically for new changes, + /// using the last LSN from the checkpoint store. + /// + private void PollCdcChanges(Action emit) + { + string captureInstanceName = $"{_schemaName}_{_tableName}"; + + while (!_stopRequested) + { + try + { + byte[] lastCommittedLsn = _checkpointStore.Get(_checkpointKey); + byte[] maxLsn = GetMaxLsn(); + if (maxLsn == null) + { + Thread.Sleep(_pollIntervalMs); + continue; + } + + // If maxLSN <= lastCommitted, there's nothing new + if (CompareLsn(maxLsn, lastCommittedLsn) <= 0) + { + Thread.Sleep(_pollIntervalMs); + continue; + } + + // Get changes + var changes = GetChangesSinceLastLsn(captureInstanceName, lastCommittedLsn, maxLsn); + + // Retrieve the last record's hash we stored + var lastHashBytes = _checkpointStore.Get(_lastRecordHashKey); + string lastHash = lastHashBytes == null ? null : Encoding.UTF8.GetString(lastHashBytes); + + // Process changes in LSN order to ensure proper checkpointing + foreach (var change in changes) + { + if (_stopRequested) + break; + + // Compute a hash of this record + string currentHash = ComputeHash(change); + + // If it's the same as the last emitted, skip + if (currentHash == lastHash) + { + continue; + } + + // Otherwise, emit + emit(change); + + // Update last-hash checkpoint + lastHash = currentHash; + _checkpointStore.Put(_lastRecordHashKey, Encoding.UTF8.GetBytes(lastHash)); + } + + // Update the LSN checkpoint if new changes arrived + if (changes.Count > 0) + { + _checkpointStore.Put(_checkpointKey, maxLsn); + } + + Thread.Sleep(_pollIntervalMs); + } + catch (Exception ex) + { + Console.WriteLine($"Error in CDC polling: {ex}"); + Thread.Sleep(5000); + } + } + } + + /// + /// Reads all data from the base table and emits it once. + /// + private void RunInitialLoad(Action emit) + { + using (var conn = new SqlConnection(_connectionString)) + using (var cmd = conn.CreateCommand()) + { + conn.Open(); + cmd.CommandText = $"SELECT * FROM [{_schemaName}].[{_tableName}] (NOLOCK)"; + + using (var reader = cmd.ExecuteReader()) + { + while (reader.Read()) + { + var record = new SqlServerRecord + { + Operation = "InitialLoad", + Data = new Dictionary(), + ChangeTime = DateTime.UtcNow, + }; + + // Populate the data dictionary with all columns + for (int i = 0; i < reader.FieldCount; i++) + { + string colName = reader.GetName(i); + if (colName.StartsWith("__$")) + continue; + + object value = reader.GetValue(i); + record.Data[colName] = value == DBNull.Value ? null : value; + } + + emit(record); + } + } + } + } + + private bool IsCdcEnabledForTable() + { + using (var conn = new SqlConnection(_connectionString)) + using (var cmd = conn.CreateCommand()) + { + conn.Open(); + + // Check if database-level CDC is enabled + cmd.CommandText = @" + SELECT is_cdc_enabled + FROM sys.databases + WHERE name = DB_NAME();"; + + bool isDbCdcEnabled = Convert.ToBoolean(cmd.ExecuteScalar()); + if (!isDbCdcEnabled) + return false; + + // Check if table-level CDC is enabled + cmd.CommandText = @" + SELECT COUNT(*) + FROM sys.tables t + JOIN sys.schemas s ON t.schema_id = s.schema_id + WHERE s.name = @schemaName + AND t.name = @tableName + AND t.is_tracked_by_cdc = 1;"; + + cmd.Parameters.AddWithValue("@schemaName", _schemaName); + cmd.Parameters.AddWithValue("@tableName", _tableName); + + int count = (int)cmd.ExecuteScalar(); + return (count > 0); + } + } + + private void EnableCdcForTable() + { + using (var conn = new SqlConnection(_connectionString)) + using (var cmd = conn.CreateCommand()) + { + conn.Open(); + + // 1. Enable CDC at the database level if not already + cmd.CommandText = @" + IF NOT EXISTS ( + SELECT 1 FROM sys.databases + WHERE name = DB_NAME() AND is_cdc_enabled = 1 + ) + BEGIN + EXEC sys.sp_cdc_enable_db; + END + "; + cmd.ExecuteNonQuery(); + + // 2. Enable CDC on the table + cmd.CommandText = $@" + EXEC sys.sp_cdc_enable_table + @source_schema = '{_schemaName}', + @source_name = '{_tableName}', + @capture_instance = '{_schemaName}_{_tableName}', + @role_name = NULL, + @supports_net_changes = 0; + "; + cmd.ExecuteNonQuery(); + + Console.WriteLine($"CDC enabled for table [{_schemaName}].[{_tableName}]."); + } + } + + private byte[] GetMaxLsn() + { + using (var conn = new SqlConnection(_connectionString)) + using (var cmd = conn.CreateCommand()) + { + conn.Open(); + cmd.CommandText = "SELECT sys.fn_cdc_get_max_lsn();"; + object result = cmd.ExecuteScalar(); + if (result == DBNull.Value) return null; + return (byte[])result; + } + } + + private byte[] GetMinLsn(string captureInstanceName) + { + using (var conn = new SqlConnection(_connectionString)) + using (var cmd = conn.CreateCommand()) + { + conn.Open(); + cmd.CommandText = + $"SELECT sys.fn_cdc_get_min_lsn('{captureInstanceName}');"; + object result = cmd.ExecuteScalar(); + if (result == DBNull.Value) return null; + return (byte[])result; + } + } + + private List GetChangesSinceLastLsn( + string captureInstanceName, + byte[] fromLsn, + byte[] toLsn) + { + var changes = new List(); + string functionName = $"cdc.fn_cdc_get_all_changes_{captureInstanceName}"; + + using (var conn = new SqlConnection(_connectionString)) + using (var cmd = conn.CreateCommand()) + { + conn.Open(); + // Use 'all update old' to get both old & new update rows + cmd.CommandText = $@" + SELECT * + FROM {functionName}(@from_lsn, @to_lsn, 'all update old') + "; + + cmd.Parameters.Add(new SqlParameter("@from_lsn", SqlDbType.VarBinary, 10) { Value = fromLsn }); + cmd.Parameters.Add(new SqlParameter("@to_lsn", SqlDbType.VarBinary, 10) { Value = toLsn }); + + using (var reader = cmd.ExecuteReader()) + { + while (reader.Read()) + { + var record = new SqlServerRecord + { + Operation = GetOperationName(Convert.ToInt32(reader["__$operation"])), + Data = new Dictionary(), + ChangeTime = DateTime.UtcNow, + }; + + for (int i = 0; i < reader.FieldCount; i++) + { + string colName = reader.GetName(i); + if (colName.StartsWith("__$")) continue; + + object value = reader.GetValue(i); + record.Data[colName] = (value == DBNull.Value) ? null : value; + } + + changes.Add(record); + } + } + } + + // Filter out rows that are the "old" side of an update + // (operation code 3 = "Update (old)") + changes = changes + .Where(c => c.Operation != "Update (old)") + .ToList(); + + return changes; + } + + + + private string GetOperationName(int operationCode) + { + switch (operationCode) + { + case 1: return "Delete (old)"; + case 2: return "Insert"; + case 3: return "Update (old)"; + case 4: return "Update (new)"; + case 5: return "Delete (new)"; + default: return $"Unknown ({operationCode})"; + } + } + + /// + /// Compare two varbinary(10) LSNs: + /// -1 if lsnA < lsnB + /// 0 if lsnA == lsnB + /// 1 if lsnA > lsnB + /// + private int CompareLsn(byte[] lsnA, byte[] lsnB) + { + if (lsnA == null && lsnB == null) return 0; + if (lsnA == null) return -1; + if (lsnB == null) return 1; + + for (int i = 0; i < 10; i++) + { + if (lsnA[i] < lsnB[i]) return -1; + if (lsnA[i] > lsnB[i]) return 1; + } + return 0; + } + + /// + /// Computes an MD5 hash from the SqlServerRecord's Data dictionary. + /// You could also use JSON serialization, SHA256, etc. + /// + private string ComputeHash(SqlServerRecord record) + { + // Build a stable string from the record's Data + // Sort by key so that the order is deterministic + var sb = new StringBuilder(); + foreach (var kv in record.Data.OrderBy(x => x.Key)) + { + // "Key=Value;" + sb.Append(kv.Key).Append('=').Append(kv.Value ?? "null").Append(';'); + } + + // Get MD5 hash of that string + using (var md5 = MD5.Create()) + { + var bytes = Encoding.UTF8.GetBytes(sb.ToString()); + var hashBytes = md5.ComputeHash(bytes); + return Convert.ToBase64String(hashBytes); + } + } + } +} From 17df5a6fe1e335f94375255d98d29659dbe2cf76 Mon Sep 17 00:00:00 2001 From: Enes Hoxha Date: Thu, 9 Jan 2025 17:41:22 +0100 Subject: [PATCH 05/12] [v1/feature/86]: Implementation of MS Sql CDC Source Operator Update SqlServerSourceOperator, Add Assets for Nuget Release --- .../Assets/cortex.png | Bin 0 -> 7179 bytes .../Assets/license.md | 20 + .../Cortex.Streams.MSSqlServer.csproj | 48 +- .../SqlServerRecord.cs | 5 +- .../SqlServerSettings.cs | 4 +- .../SqlServerSourceOperator.cs | 13 +- .../SqlServerSourceOperatorExperiment.cs | 483 ++++++++++++++++++ 7 files changed, 564 insertions(+), 9 deletions(-) create mode 100644 src/Cortex.Streams.MSSqlServer/Assets/cortex.png create mode 100644 src/Cortex.Streams.MSSqlServer/Assets/license.md create mode 100644 src/Cortex.Streams.MSSqlServer/SqlServerSourceOperatorExperiment.cs diff --git a/src/Cortex.Streams.MSSqlServer/Assets/cortex.png b/src/Cortex.Streams.MSSqlServer/Assets/cortex.png new file mode 100644 index 0000000000000000000000000000000000000000..a4f9727d04f91f61f20720bd64b9c139fb041236 GIT binary patch literal 7179 zcmV+m9Q5OfP)Px#1ZP1_K>z@;j|==^1poj532;bRa{vGf5&!@T5&_cPe*6Fc8=y%C^C!)jU*}Z_TUZ5zkPS!zgzWpiKoUY0LP$tR!Xokt%Ag{H3XX^@B8wnv03972 z_hwv1kf6A*-1mH+Tg$Cm_f}O`27H#NQDPYKIv zcTCHy`fO@u)s1PH)sbnLH4!`}kZK|c%WX|xUTdf>GJS=u8BkWepa#-RXr-xDk(t@H zW@WFA%*qkfWGlBRGCQ}KE6>y#u7Wj@IeBX$a|&84%`IF@nqxf9Eo!w@+!~oz+$N|! zGOwgPGT%~1WPT~v)1{qUWzc%A^7WAg4s}HqRBUi)VHMOZs3)?px+juI+89Z!G1VJc zB&xQLlmylF3mS+d)%Qn|>-%q{Gz@%}+OT;?VqO2Az$`8yqwdcMnN^=m&#Gn9GOO9N zWmSgoa}7%%RkMWUH7sFyEou6S8a90e9_zTW>Pa(9t!!{;=E_xE+0bgztQ@G(RxZ@U zX6H7t+4KlCv)TDgY<6BVo1M3Y&B-^_!sZmT(Bo4DE$penwd|>awfwQDmCY?QRMf`i z7PlFX?QCvIJDW%9VDpMQ_+#lhHowFWelF`|^GiF~)8*^A%D5JkcX3sij~m#6$_@0m zu(F#itm@{El|3x6Y9mXm>S2l1c-+Vmt9w~uO)pzi-OCo$^sz;?rZ%x8Q+55MWT<|? zR>L4ErD2dQZhVHNG;H}~QR9|B$4$wo*gJiBEt{HI$)=I2xEuhhIXqJqL=&J0vCala z5N(h%O%XsY0Gvq>4G}=VGamuY)|~uiL2C?vYdAnt09d$|;L#&sk_?c=ZERj~yDb1L z>0t9s0idC$%Q^*_5G9Z#17M~3$VC820LaAwn(8AZK>|nur~sJ;c=dyOfYHG-0D8c)!3BgD z0ANym|IN6m>E(B(W>gS3OJ2?cFb6y?Pk02*2B>qM3!dyehx-I@f^^Pv!7~6xXt|F=bO5v<25_GNQSUxC#H8AO4sJ?%xdNQ*J~@1Hcqd#$aK3;6*i@;(*!qp zN$K#ErDYCqdVq%egycmAs0*E1fQrN=5J1U&?wU^l&=op;GEa^BbZsAjqqfh>eP;6F z)Ao6|Z&6L30M3*Oo+ETR-KT=*2%XxtPvO3(3wF_Ye(o~>8t!w1PapS14NvJlvF#Jx zr)&H4@D^1Y;3h9EV~`Hsl#Ft1yQhN}IO}{ANsFp%P!{Yu=K-8E>wGlM^MYqOFId^& z2%iLQaft=a3!W})l0KJV3MhJsTZ0AEbxpuK9u{Elz-8;ROww zR?TOCTT;p_`2Zw{iqPrGJT3QmCoihd8BOM?bf27cD&6OV=tx|mt@-5SMRuQ8=yZio zXXvycikWBcaNHyroC=-<(Ct1seEPUg8#$OY0`0p9-Gr zJ{g`@!7jLOL4{YlXVrYeag)-D-QWax7C_l`CNRIYFQbZ{KY1z6$Kr!#EgZ*Wg$`|~Vo!z(PSdoO8a_bY<-k(PJvp>_%Gv>m7eXoO8y z=v38xA~-=V_sQW?)$U2~rleO;=quWCh;^O(o^87Do2U9N{K`5{e9LlrUu6j^#7=7= z^K^|iM3;Gn?{VS6|5NG*Skr-zS?{?!Y}3WxJ=J&q7q<4T&slowP6s>-T#)3&a-QwJ zg;hNOSL6bxgy$1FEqErlrDbgSx;<>mwFhkL2aIjG<}W;N@XCF*V*N{$o67aDm_{0CMM*lVjE#{Dj{8=BvSl58QYDE=yg#mCogYr)>Me!L!;uW!7o8S@;_g zl7_uQY-^OC2f{JS*1bsWqcU+(hEBQd6W{?{d}`5fJiw)yk^ly7`)qjlJuY(RJKqrq z`GfcguyO3dif(EzUGOZ31~>troplQEFvkURT|{^Q=UekNvpM)VZw)KievEA%dfrbnvSA!cLKpGf(cmS-Gnz;po5gd(_bb!i_sP^nlvAr;IF94NvVp2S5{E zIe{x2j!!EhSb%2%93eco6oY=f@81m-UN#$TMCJ2m2ny;!--6vrS|s-=;JMtVsQK{x z#ZBAU#4ZIzS;aOu?=L=xFyyPUm3?Au_nm}G zSbMn7S+E-pEG2LS!||zwHaG#E*!EfQ6af9;A!OnMw7>mzcz7Q$)_Ul3`k>r-s{YQf z&a>g+c?iv$uRaJBBI%f+2drT5@6;XkD29qGXyza-z)iHk1%yWcqj%nyCZ5l%AP$=9nz{{A|*f(qAK$Qw5&SJ zo?;tL{umCL@!pybd`vwo2)C-yhLO6Dw|gDjaV9t?JOP~4_C;OuneH?GpWBB}^e|le zag}$AW%vAz`a|7QB%*g8o`;LG_RW8Vt~&u6od!uYy_Dq0U1st~gFVI<0$vHgCFeW9 zY2hhCr&r?Qf@c7ngiJHNoWjXST(|Mu9k#gnSvqH6=P3$yBu=={wJ%+went7Uf(X!h z-~WXzZrVoeq^hse9xCz}qzwws23No)rt09S0S1Am1V|yXgw4pRV@44sR;1jC7Xs)LiHjQyTH$)pCo|DzJmknGB*FnHD z$os71*<)0+QxUHkDiUUvsUTYIo+5d105l-ZFV=!{!t(V?P4YG}AheuQ< z)a-emPmxfTfpUQ9H^!gPggt-s-G2;kgHCgTT|RX;9RB!N9ozB9ZX| zC?+lh(FY)Yh70OmycjxJ!EqZ-{m7DPHj(pOlXI8VSTHMgo+MZULl4-%l?OEJ5)_`9 z(Pp&nqIS~RQSCoP!Z}bY&#D&NJyVLBFFJ5unWq3x0WgUB;?oN0L#}=Ev(RZd;I$w5 znl1!QRFdFfPE1bke%62S4+_m(COm)VAtDT1xySPR-{571QM}KRM;lZ_MXEts6QFA; zw9b8|16!j67Zjc@boz9c;XFw15r)DzZxBWORmK|j4NgF5PGS zm+!Fw$N|sWdE)pWr0VN*zzZ}*!U@j?XipDx^|lm-0Y}oo696jUd1alV^A}LqXi93!NSS@!Xt^``O^oeP4KkLkM3S8lMvPE8BGbE_G&O zSOp#2xInA-f57@K{6;+-NMz8F40r&k255&)Kx;ev74^(xYZ0-#OziKJ0jff0xXe@P zaVZ!nI7}*&F!yQT0hY^wYabyvt@HHoT#1VeFSU>ga2wjm_3!?`+K+t0I!}Dhx=#K`zsKJ#c;`7hI8_PnDx<%n&g8Sr z!jJQb?hEfc?!GyX3>@hZz>@&tSiDhOd}r?l!k8HzeONU`<1)T0q+4D7<$MGH@|7DY0!)`sNf0U{Nc%2ryo3k zYk8F5MCl*~1Sr76`LGIe<9oNQf*FoLRtTr`T28yoHk`i6)*H~)9sibok2Z4f@_qjM zf^u?^wsN8?5{EPmb(x)1}`npzBZk zK+o?!^Aiu9M%Xmr;kebWen8VL(9`1XsMZV>1%xNpe5UigK7Hn?bf27c3htXV*5HC9E-H8v zQVLjNO&@DL^o0>3BNXanKw4qK>g9W^^Tc=LJcQTM9q-Uo1{xL`!*%Sgf?ZrmYrvb8 z+cXy7py=U=p_6`}TEx;@o?~r?zoa&5vz(0h=^8Jt0lwzN^VD`=`IvGYy9CcYNNZ1# za5+x{FL2wZSWPsB;6&*lx&d+-0FD2b?l|twAOi|v6&dNmiVZZ|T3k`<4bVGuqCYdJ z`@8_k?z7syoW@uNr-o;`58)D7R_oy}Jo_{KgdP}_8OU5QZBX8Xz_ovi1TR?ld~^kS z41qHh0GVslES5%vl{{;4J9? z7P1M+d2D6RJ~ntY+3(wPe{{`Su0WFVqlEuC$BTYv9L^HWA zV=cfwTA*TX1!4iyWSsz~+Unn3^U3i1O9l&`0dU5D1Govv`7A!Igw^i37<#IBw8HCP z?j*u8k`;R!kP)~~129qxa52`At{d)Zax&K5Asn8Q`3=YVpf$Fp2mId zn$OF9qVs~)e6bBKxq#X=EbZ;T9BwrYg!V9OB7$1h9~cut4myz&XdXp z^=9L-3+~B9IkX3H&zYN{lM#fxhJ9CQur&;L90Tapf5mE`YBpKtJeQ=5Q-bFRn-)NQ z$w1%)fYVpj#U{8XlXBUtye8IuG!i-)!EsHmen=MzdtuU+P9foKzIvbK558e7ht_UB zr)c*`8h~Ll&*@n)2o7x;QgQ6#720W7t$Hj%+YBPyqKm9x;Em8B6YwzKWogTE)WI!p zf2D`#?>?6U1GsN`R())NL$3&W{mY*_75XCcjPPfcTt7(R7TshxHzwFt?LHU!W-wRj zV4J9ksYP0NYJfI8ch;$cXE@JFUL4MgA#lhblk-?My0eDvg$ipBdi$}Z?h}i09o=O( z|BS2#w&smbL&L*Ofl+hJCPsg$1y2TOZg;E*oBUf+)HRyMd|DWY2|=M9{Q(l5YZS8LvDQJ_zf6|OeXj5+*L)g) zCd7mlF#-xlnb`aj=d?B?5Z$K>of`KAtogTb+Y7WWR~Tq?%(aKKqYuKZ ze15HjaXz$n*kl{i`@+JbLjT}DDNG|2b8rFSxfEcKmbUw-9vg5^BxchW*!8~8SJTKB z!%sM7>-C4MY{v--d3G`qPy)Os5_4#lSI?QBLc_yx)w|Ep#mD;*UFQqWbBctVbsBBo zSb)P_j&;qLfgHAdL3@bRGzc3wpZprZh4v0*LLSZ0#H69H?+M2h48BP-(5#TDa-XW* zbHTISr)>K&s~#^nTx3`si{QiV--Qc=wE~#Y>&zfS%1BnQlq;8Zh7C6Xi4VZuW0~!{ z>AVhjDhEpLQ#(-nT2sM&j}sj3YV`9}>^e0fXjmVBFOqmN(H|Nw#QB(>{5M$u7oS>8 zAunvmL^*?nBiObd=hVT|g-#2e!g-FwCF3!IgWGcZUgLLjm^<3oUJzfrFoD7&ptxVh z5ruBAj_xl+kEh!p>HlED{XIoMcrbKOj8F>0Z`>UN7C}Rz#)TNx@HGm z_%O)`2YsK~5c2}duOSOJ3cmJiaOe-#eDG81;|tOr;yD=g#Kkc z-20a~LZ`0g)3$vTqYn;uI(7@fnzXP*HZDZ%ix=qw2MK$~TCs6=&zakFEL6GcG_4N^ zbo`=R+A1F#0*vH6m+w4j?BnU&ghvT48uz7-GB`lPLWS11z6c$9Z~?HKDj~Bf&_$=( zET@rr@y%^EXe9gELzdI`nsJU`j>R`n4PJQ=DzxJmEJnd=B4nB?YhCbE;Zp@qRC@X7 zf#cm-6|DZ{%OeUoSgVcpg12k#*Kxe2(Hnv-4P~d8S>KX1&jw0t%=2Musm)mvHl=WbI^evqCKK8=qryV7NafQRsjtQ3W}d=6Ro47iAO zFNgnDF1Wt!@Yif+b|VeIn(&kWgSv0jz$NF?8^bPkVLmXqex(#`AKi z`wK~oR|13!+4|O3Y<6z5f2L`M`|MS0?V-=ZM~>@^zAKe?Zaa5SQUdgWH)XWJVdJwf zj>8AVG;r=*A85F^iP^@Qf?#88+x~%X47p5=KGq}1=aYr37~ilFcDxYBEUN9JbBX)6 zXu+w%rwDFo*(iZSm`TZ}FRG39F2(cFV~;m))?!|QBe11=*U29OK{JKtqW$9M3HCbp zE;;P663xws&x7qe(9=UniemR9EkN0MCcsezXAF=wzV;!#dB{x7l`@{weDD*RfEpx& zgd4GL41=)Ge55$1hY&IW0N)s54l(%&pNW5a<8SPlf4I*n3(whf;TM+C{=BfNR@&-c z0Z|9ghFCg!;2>Ol%u>ScbCu7Zrf>Jq2|KT@=z5vWSWy=U8l68mk2WQ(*>m1?zChZ2 zj=uX8#d(mB%p*V{jck-`dz)45R-Ds1_gPl5?Fh{Z11xkL$U8}dcHfdw1_$9T#RcXI z?)Ado@w^}zq$v9GUGeUXR>!P_YGw zSNL?mTU;^@ZuF#srv_-c&kLRfa2(udO9wAN=+wX)2lrT|f+qlUgiSki>flWp2lqIo zb6*sp(}b8-JPz)$OR)`z4xZM1odhs- z99)b@2TuSf!W#z{Lki+N6JToLIJnqS0C?lzVpv-D84y!AxI3-`#}y+|!6R^Y0Pd#y zfN{l+6z)qYycst!Iqv|5(%lD)D<&j`P4t{hOer`JH!(H)FB4OWzCm&4I%r%mq(a)B z1FR;d7W^d+|DH_Bc_Kcw;Pv?A{9DxKAz>L;Ea?u|fYF4My#E2S{{wP}i{H;-Y-|7k N002ovPDHLkV1i-n!nFVZ literal 0 HcmV?d00001 diff --git a/src/Cortex.Streams.MSSqlServer/Assets/license.md b/src/Cortex.Streams.MSSqlServer/Assets/license.md new file mode 100644 index 0000000..3c845d4 --- /dev/null +++ b/src/Cortex.Streams.MSSqlServer/Assets/license.md @@ -0,0 +1,20 @@ +The MIT License (MIT) + +Copyright (c) 2025 Buildersoft + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/Cortex.Streams.MSSqlServer/Cortex.Streams.MSSqlServer.csproj b/src/Cortex.Streams.MSSqlServer/Cortex.Streams.MSSqlServer.csproj index 355fa28..70866ce 100644 --- a/src/Cortex.Streams.MSSqlServer/Cortex.Streams.MSSqlServer.csproj +++ b/src/Cortex.Streams.MSSqlServer/Cortex.Streams.MSSqlServer.csproj @@ -1,10 +1,48 @@  - - net8.0 - enable - enable - + + net9.0;net8.0;net7.0 + + 1.0.1 + 1.0.1 + Buildersoft Cortex Framework + Buildersoft + Buildersoft,EnesHoxha + Copyright © Buildersoft 2025 + + Cortex Data Framework is a robust, extensible platform designed to facilitate real-time data streaming, processing, and state management. It provides developers with a comprehensive suite of tools and libraries to build scalable, high-performance data pipelines tailored to diverse use cases. By abstracting underlying streaming technologies and state management solutions, Cortex Data Framework enables seamless integration, simplified development workflows, and enhanced maintainability for complex data-driven applications. + + + https://github.com/buildersoftio/cortex + cortex cdc mediator eda streaming distributed streams states mssql + + 1.0.1 + license.md + cortex.png + Cortex.Streams.MSSqlServer + True + True + True + + Just as the Cortex in our brains handles complex processing efficiently, Cortex Data Framework brings brainpower to your data management! + https://buildersoft.io/ + README.md + + + + + True + \ + + + True + + + + True + + + diff --git a/src/Cortex.Streams.MSSqlServer/SqlServerRecord.cs b/src/Cortex.Streams.MSSqlServer/SqlServerRecord.cs index a2a373b..4ab7f29 100644 --- a/src/Cortex.Streams.MSSqlServer/SqlServerRecord.cs +++ b/src/Cortex.Streams.MSSqlServer/SqlServerRecord.cs @@ -1,4 +1,7 @@ -namespace Cortex.Streams.MSSqlServer +using System; +using System.Collections.Generic; + +namespace Cortex.Streams.MSSqlServer { /// /// Represents a generic CDC record, carrying enough information diff --git a/src/Cortex.Streams.MSSqlServer/SqlServerSettings.cs b/src/Cortex.Streams.MSSqlServer/SqlServerSettings.cs index 3942dc4..6746728 100644 --- a/src/Cortex.Streams.MSSqlServer/SqlServerSettings.cs +++ b/src/Cortex.Streams.MSSqlServer/SqlServerSettings.cs @@ -1,4 +1,6 @@ -namespace Cortex.Streams.MSSqlServer +using System; + +namespace Cortex.Streams.MSSqlServer { public class SqlServerSettings { diff --git a/src/Cortex.Streams.MSSqlServer/SqlServerSourceOperator.cs b/src/Cortex.Streams.MSSqlServer/SqlServerSourceOperator.cs index a3926da..1789c31 100644 --- a/src/Cortex.Streams.MSSqlServer/SqlServerSourceOperator.cs +++ b/src/Cortex.Streams.MSSqlServer/SqlServerSourceOperator.cs @@ -1,9 +1,13 @@ using Cortex.States; using Cortex.Streams.Operators; using Microsoft.Data.SqlClient; +using System; +using System.Collections.Generic; using System.Data; +using System.Linq; using System.Security.Cryptography; using System.Text; +using System.Threading; namespace Cortex.Streams.MSSqlServer { @@ -73,6 +77,9 @@ public void Start(Action emit) if (_autoEnableCdc && !IsCdcEnabledForTable()) { EnableCdcForTable(); + + // Sleep for 10s; believe that MS SQL Server will enable CDC for the connected table! + Thread.Sleep(10000); } // 1. If doInitialLoad = true and we haven't done it yet, run initial load @@ -170,7 +177,10 @@ private void PollCdcChanges(Action emit) } // Update the LSN checkpoint if new changes arrived - if (changes.Count > 0) + if (changes.Count() <= 0) + { + } + else { _checkpointStore.Put(_checkpointKey, maxLsn); } @@ -411,7 +421,6 @@ private int CompareLsn(byte[] lsnA, byte[] lsnB) /// /// Computes an MD5 hash from the SqlServerRecord's Data dictionary. - /// You could also use JSON serialization, SHA256, etc. /// private string ComputeHash(SqlServerRecord record) { diff --git a/src/Cortex.Streams.MSSqlServer/SqlServerSourceOperatorExperiment.cs b/src/Cortex.Streams.MSSqlServer/SqlServerSourceOperatorExperiment.cs new file mode 100644 index 0000000..bd42be2 --- /dev/null +++ b/src/Cortex.Streams.MSSqlServer/SqlServerSourceOperatorExperiment.cs @@ -0,0 +1,483 @@ +using Cortex.States; +using Cortex.Streams.Operators; +using Microsoft.Data.SqlClient; +using System; +using System.Collections.Generic; +using System.Data; +using System.Linq; +using System.Security.Cryptography; +using System.Text; +using System.Threading; + +namespace Cortex.Streams.MSSqlServer +{ + /// + /// MSSQL CDC Source Operator that optionally performs an initial load of the table, + /// then continues reading incremental changes via CDC. + /// Now we skip duplicates by storing a hash of the last record we emitted. + /// + internal class SqlServerSourceOperatorExperiment : ISourceOperator + { + private readonly string _connectionString; + private readonly string _schemaName; + private readonly string _tableName; + private readonly bool _autoEnableCdc; + private readonly bool _doInitialLoad; + + private readonly int _pollIntervalMs; + private Thread _pollingThread; + private bool _stopRequested; + + // Checkpoint store + private readonly IDataStore _checkpointStore; + + // Key for the last LSN processed + private readonly string _checkpointKey; + + // Key to track if the initial load is done + private readonly string _initialLoadCheckpointKey; + + // Key to store the last emitted record's hash + private readonly string _lastRecordHashKey; + + public SqlServerSourceOperatorExperiment( + string connectionString, + string schemaName, + string tableName, + SqlServerSettings sqlServerSettings = null, + IDataStore checkpointStore = null) + { + _connectionString = connectionString; + _schemaName = schemaName; + _tableName = tableName; + + sqlServerSettings ??= new SqlServerSettings(); + _autoEnableCdc = sqlServerSettings.ConfigureCDCInServer; + _doInitialLoad = sqlServerSettings.DoInitialLoad; + + // Using Milliseconds from the original code + _pollIntervalMs = sqlServerSettings.PullInterval.Milliseconds; + + _checkpointStore = checkpointStore + ?? new InMemoryStateStore($"{_schemaName}.{_tableName}.STORE"); + + // A unique key that identifies this table’s LSN checkpoint + _checkpointKey = $"{_schemaName}.{_tableName}.CDC.LSN"; + + // A unique key that identifies if the initial load is done + _initialLoadCheckpointKey = $"{_schemaName}.{_tableName}.INITIAL_LOAD_DONE"; + + // A unique key to store the last emitted record's hash + _lastRecordHashKey = $"{_schemaName}.{_tableName}.CDC.LAST_HASH"; + } + + public void Start(Action emit) + { + // Optionally enable CDC if requested + if (_autoEnableCdc && !IsCdcEnabledForTable()) + { + EnableCdcForTable(); + + // Wait for the capture instance to appear, + // so we don’t get “insufficient arguments” right away + string captureInstanceName = $"{_schemaName}_{_tableName}"; + WaitForCaptureInstance(captureInstanceName, 5000); + Thread.Sleep(5000); + } + + // 1. If doInitialLoad = true and we haven't done it yet, run initial load + if (_doInitialLoad && _checkpointStore.Get(_initialLoadCheckpointKey) == null) + { + Console.WriteLine("Starting one-time initial load..."); + RunInitialLoad(emit); + + // Mark initial load as completed + _checkpointStore.Put(_initialLoadCheckpointKey, new byte[] { 0x01 }); + Console.WriteLine("Initial load completed."); + } + else + { + Console.WriteLine("Skipping initial load (already done or disabled)."); + } + + // 2. Initialize the LSN checkpoint if we don’t already have one + byte[] lastCommittedLsn = _checkpointStore.Get(_checkpointKey); + if (lastCommittedLsn == null) + { + // By default, start from "current" so we only see future changes + lastCommittedLsn = GetMaxLsn(); + _checkpointStore.Put(_checkpointKey, lastCommittedLsn); + } + + // 3. Start CDC polling + _stopRequested = false; + _pollingThread = new Thread(() => PollCdcChanges(emit)); + _pollingThread.IsBackground = true; + _pollingThread.Start(); + } + + public void Stop() + { + _stopRequested = true; + _pollingThread?.Join(); + } + + /// + /// Polls the CDC table periodically for new changes, + /// using the last LSN from the checkpoint store. + /// + private void PollCdcChanges(Action emit) + { + string captureInstanceName = $"{_schemaName}_{_tableName}"; + + while (!_stopRequested) + { + try + { + byte[] lastCommittedLsn = _checkpointStore.Get(_checkpointKey); + byte[] maxLsn = GetMaxLsn(); + if (maxLsn == null) + { + Thread.Sleep(_pollIntervalMs); + continue; + } + + // If maxLSN <= lastCommitted, there's nothing new + if (CompareLsn(maxLsn, lastCommittedLsn) <= 0) + { + Thread.Sleep(_pollIntervalMs); + continue; + } + + // Get changes + var changes = GetChangesSinceLastLsn(captureInstanceName, lastCommittedLsn, maxLsn); + + // Retrieve the last record's hash we stored + var lastHashBytes = _checkpointStore.Get(_lastRecordHashKey); + string lastHash = lastHashBytes == null ? null : Encoding.UTF8.GetString(lastHashBytes); + + // Process changes in LSN order to ensure proper checkpointing + foreach (var change in changes) + { + if (_stopRequested) + break; + + // Compute a hash of this record + string currentHash = ComputeHash(change); + + // If it's the same as the last emitted, skip + if (currentHash == lastHash) + { + continue; + } + + // Otherwise, emit + emit(change); + + // Update last-hash checkpoint + lastHash = currentHash; + _checkpointStore.Put(_lastRecordHashKey, Encoding.UTF8.GetBytes(lastHash)); + } + + // Update the LSN checkpoint if new changes arrived + if (changes.Count() > 0) + { + _checkpointStore.Put(_checkpointKey, maxLsn); + } + + Thread.Sleep(_pollIntervalMs); + } + catch (Exception ex) + { + Console.WriteLine($"Error in CDC polling: {ex}"); + Thread.Sleep(5000); + } + } + } + + /// + /// Reads all data from the base table and emits it once. + /// + private void RunInitialLoad(Action emit) + { + using (var conn = new SqlConnection(_connectionString)) + using (var cmd = conn.CreateCommand()) + { + conn.Open(); + cmd.CommandText = $"SELECT * FROM [{_schemaName}].[{_tableName}] (NOLOCK)"; + + using (var reader = cmd.ExecuteReader()) + { + while (reader.Read()) + { + var record = new SqlServerRecord + { + Operation = "InitialLoad", + Data = new Dictionary(), + ChangeTime = DateTime.UtcNow, + }; + + // Populate the data dictionary with all columns + for (int i = 0; i < reader.FieldCount; i++) + { + string colName = reader.GetName(i); + if (colName.StartsWith("__$")) + continue; + + object value = reader.GetValue(i); + record.Data[colName] = value == DBNull.Value ? null : value; + } + + emit(record); + } + } + } + } + + private bool IsCdcEnabledForTable() + { + using (var conn = new SqlConnection(_connectionString)) + using (var cmd = conn.CreateCommand()) + { + conn.Open(); + + // Check if database-level CDC is enabled + cmd.CommandText = @" + SELECT is_cdc_enabled + FROM sys.databases + WHERE name = DB_NAME();"; + + bool isDbCdcEnabled = Convert.ToBoolean(cmd.ExecuteScalar()); + if (!isDbCdcEnabled) + return false; + + // Check if table-level CDC is enabled + cmd.CommandText = @" + SELECT COUNT(*) + FROM sys.tables t + JOIN sys.schemas s ON t.schema_id = s.schema_id + WHERE s.name = @schemaName + AND t.name = @tableName + AND t.is_tracked_by_cdc = 1;"; + + cmd.Parameters.AddWithValue("@schemaName", _schemaName); + cmd.Parameters.AddWithValue("@tableName", _tableName); + + int count = (int)cmd.ExecuteScalar(); + return (count > 0); + } + } + + private void EnableCdcForTable() + { + using (var conn = new SqlConnection(_connectionString)) + using (var cmd = conn.CreateCommand()) + { + conn.Open(); + + // 1. Enable CDC at the database level if not already + cmd.CommandText = @" + IF NOT EXISTS ( + SELECT 1 FROM sys.databases + WHERE name = DB_NAME() AND is_cdc_enabled = 1 + ) + BEGIN + EXEC sys.sp_cdc_enable_db; + END + "; + cmd.ExecuteNonQuery(); + + // 2. Enable CDC on the table + cmd.CommandText = $@" + EXEC sys.sp_cdc_enable_table + @source_schema = '{_schemaName}', + @source_name = '{_tableName}', + @capture_instance = '{_schemaName}_{_tableName}', + @role_name = NULL, + @supports_net_changes = 0; + "; + cmd.ExecuteNonQuery(); + + Console.WriteLine($"CDC enabled for table [{_schemaName}].[{_tableName}]."); + } + } + + + /// + /// Poll cdc.change_tables to confirm the capture instance is created. + /// This prevents "insufficient arguments" if we call fn_cdc_get_all_changes too soon. + /// + private void WaitForCaptureInstance(string captureInstanceName, int timeoutMs) + { + int elapsed = 0; + while (elapsed < timeoutMs) + { + Thread.Sleep(500); + + if (CaptureInstanceExists(captureInstanceName)) + return; // Found it + + elapsed += 500; + } + Console.WriteLine($"Warning: capture instance '{captureInstanceName}' not found within {timeoutMs} ms."); + } + + private bool CaptureInstanceExists(string captureInstanceName) + { + using var conn = new SqlConnection(_connectionString); + conn.Open(); + using var cmd = conn.CreateCommand(); + cmd.CommandText = @" + SELECT COUNT(*) + FROM cdc.change_tables + WHERE capture_instance = @capInst; + "; + cmd.Parameters.AddWithValue("@capInst", captureInstanceName); + + int count = (int)cmd.ExecuteScalar(); + return count > 0; + } + + + private byte[] GetMaxLsn() + { + using (var conn = new SqlConnection(_connectionString)) + using (var cmd = conn.CreateCommand()) + { + conn.Open(); + cmd.CommandText = "SELECT sys.fn_cdc_get_max_lsn();"; + object result = cmd.ExecuteScalar(); + if (result == DBNull.Value) return null; + return (byte[])result; + } + } + + private byte[] GetMinLsn(string captureInstanceName) + { + using (var conn = new SqlConnection(_connectionString)) + using (var cmd = conn.CreateCommand()) + { + conn.Open(); + cmd.CommandText = + $"SELECT sys.fn_cdc_get_min_lsn('{captureInstanceName}');"; + object result = cmd.ExecuteScalar(); + if (result == DBNull.Value) return null; + return (byte[])result; + } + } + + private List GetChangesSinceLastLsn( + string captureInstanceName, + byte[] fromLsn, + byte[] toLsn) + { + var changes = new List(); + string functionName = $"cdc.fn_cdc_get_all_changes_{captureInstanceName}"; + + using (var conn = new SqlConnection(_connectionString)) + using (var cmd = conn.CreateCommand()) + { + conn.Open(); + // Use 'all update old' to get both old & new update rows + cmd.CommandText = $@" + SELECT * + FROM {functionName}(@from_lsn, @to_lsn, 'all update old') + "; + + cmd.Parameters.Add(new SqlParameter("@from_lsn", SqlDbType.VarBinary, 10) { Value = fromLsn }); + cmd.Parameters.Add(new SqlParameter("@to_lsn", SqlDbType.VarBinary, 10) { Value = toLsn }); + + using (var reader = cmd.ExecuteReader()) + { + while (reader.Read()) + { + var record = new SqlServerRecord + { + Operation = GetOperationName(Convert.ToInt32(reader["__$operation"])), + Data = new Dictionary(), + ChangeTime = DateTime.UtcNow, + }; + + for (int i = 0; i < reader.FieldCount; i++) + { + string colName = reader.GetName(i); + if (colName.StartsWith("__$")) continue; + + object value = reader.GetValue(i); + record.Data[colName] = (value == DBNull.Value) ? null : value; + } + + changes.Add(record); + } + } + } + + // Filter out rows that are the "old" side of an update + // (operation code 3 = "Update (old)") + changes = changes + .Where(c => c.Operation != "Update (old)") + .ToList(); + + return changes; + } + + + + private string GetOperationName(int operationCode) + { + switch (operationCode) + { + case 1: return "Delete (old)"; + case 2: return "Insert"; + case 3: return "Update (old)"; + case 4: return "Update (new)"; + case 5: return "Delete (new)"; + default: return $"Unknown ({operationCode})"; + } + } + + /// + /// Compare two varbinary(10) LSNs: + /// -1 if lsnA < lsnB + /// 0 if lsnA == lsnB + /// 1 if lsnA > lsnB + /// + private int CompareLsn(byte[] lsnA, byte[] lsnB) + { + if (lsnA == null && lsnB == null) return 0; + if (lsnA == null) return -1; + if (lsnB == null) return 1; + + for (int i = 0; i < 10; i++) + { + if (lsnA[i] < lsnB[i]) return -1; + if (lsnA[i] > lsnB[i]) return 1; + } + return 0; + } + + /// + /// Computes an MD5 hash from the SqlServerRecord's Data dictionary. + /// You could also use JSON serialization, SHA256, etc. + /// + private string ComputeHash(SqlServerRecord record) + { + // Build a stable string from the record's Data + // Sort by key so that the order is deterministic + var sb = new StringBuilder(); + foreach (var kv in record.Data.OrderBy(x => x.Key)) + { + // "Key=Value;" + sb.Append(kv.Key).Append('=').Append(kv.Value ?? "null").Append(';'); + } + + // Get MD5 hash of that string + using (var md5 = MD5.Create()) + { + var bytes = Encoding.UTF8.GetBytes(sb.ToString()); + var hashBytes = md5.ComputeHash(bytes); + return Convert.ToBase64String(hashBytes); + } + } + } +} From 8d9e62d358a047f79a6d38da72c0fc505cfc2bd6 Mon Sep 17 00:00:00 2001 From: Enes Hoxha Date: Fri, 10 Jan 2025 14:18:42 +0100 Subject: [PATCH 06/12] [v1/feature/86]: Rename SqlServerSourceAdapter to SqlServerCDCSourceAdapter --- ...lServerSourceOperator.cs => SqlServerCDCSourceOperator.cs} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename src/Cortex.Streams.MSSqlServer/{SqlServerSourceOperator.cs => SqlServerCDCSourceOperator.cs} (99%) diff --git a/src/Cortex.Streams.MSSqlServer/SqlServerSourceOperator.cs b/src/Cortex.Streams.MSSqlServer/SqlServerCDCSourceOperator.cs similarity index 99% rename from src/Cortex.Streams.MSSqlServer/SqlServerSourceOperator.cs rename to src/Cortex.Streams.MSSqlServer/SqlServerCDCSourceOperator.cs index 1789c31..a317188 100644 --- a/src/Cortex.Streams.MSSqlServer/SqlServerSourceOperator.cs +++ b/src/Cortex.Streams.MSSqlServer/SqlServerCDCSourceOperator.cs @@ -16,7 +16,7 @@ namespace Cortex.Streams.MSSqlServer /// then continues reading incremental changes via CDC. /// Now we skip duplicates by storing a hash of the last record we emitted. /// - public class SqlServerSourceOperator : ISourceOperator + public class SqlServerCDCSourceOperator : ISourceOperator { private readonly string _connectionString; private readonly string _schemaName; @@ -40,7 +40,7 @@ public class SqlServerSourceOperator : ISourceOperator // Key to store the last emitted record's hash private readonly string _lastRecordHashKey; - public SqlServerSourceOperator( + public SqlServerCDCSourceOperator( string connectionString, string schemaName, string tableName, From 31f1bb34aedcdc6038bdd8ed8b0b5b9d6c22aedd Mon Sep 17 00:00:00 2001 From: Enes Hoxha Date: Sun, 12 Jan 2025 19:23:48 +0100 Subject: [PATCH 07/12] [v1/feature/89]: Add Base implementation for PostgreSQL CDC --- Cortex.sln | 6 + .../Assets/cortex.png | Bin 0 -> 7179 bytes .../Assets/license.md | 20 + .../Cortex.Streams.PostgreSQL.csproj | 17 + .../PostgresCDCSourceOperator.cs | 468 ++++++++++++++++++ .../PostgresRecord.cs | 24 + .../PostgresSettings.cs | 33 ++ 7 files changed, 568 insertions(+) create mode 100644 src/Cortex.Streams.PostgreSQL/Assets/cortex.png create mode 100644 src/Cortex.Streams.PostgreSQL/Assets/license.md create mode 100644 src/Cortex.Streams.PostgreSQL/Cortex.Streams.PostgreSQL.csproj create mode 100644 src/Cortex.Streams.PostgreSQL/PostgresCDCSourceOperator.cs create mode 100644 src/Cortex.Streams.PostgreSQL/PostgresRecord.cs create mode 100644 src/Cortex.Streams.PostgreSQL/PostgresSettings.cs diff --git a/Cortex.sln b/Cortex.sln index 6e9a85e..3d51707 100644 --- a/Cortex.sln +++ b/Cortex.sln @@ -49,6 +49,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cortex.States.SQLite", "src EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cortex.Streams.MSSqlServer", "src\Cortex.Streams.MSSqlServer\Cortex.Streams.MSSqlServer.csproj", "{81A01446-A8AA-4F9D-BB9B-B66E21B2C348}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cortex.Streams.PostgreSQL", "src\Cortex.Streams.PostgreSQL\Cortex.Streams.PostgreSQL.csproj", "{0E60F75D-C44B-428A-9252-A11C365E2C56}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -146,6 +148,10 @@ Global {81A01446-A8AA-4F9D-BB9B-B66E21B2C348}.Debug|Any CPU.Build.0 = Debug|Any CPU {81A01446-A8AA-4F9D-BB9B-B66E21B2C348}.Release|Any CPU.ActiveCfg = Release|Any CPU {81A01446-A8AA-4F9D-BB9B-B66E21B2C348}.Release|Any CPU.Build.0 = Release|Any CPU + {0E60F75D-C44B-428A-9252-A11C365E2C56}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {0E60F75D-C44B-428A-9252-A11C365E2C56}.Debug|Any CPU.Build.0 = Debug|Any CPU + {0E60F75D-C44B-428A-9252-A11C365E2C56}.Release|Any CPU.ActiveCfg = Release|Any CPU + {0E60F75D-C44B-428A-9252-A11C365E2C56}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/src/Cortex.Streams.PostgreSQL/Assets/cortex.png b/src/Cortex.Streams.PostgreSQL/Assets/cortex.png new file mode 100644 index 0000000000000000000000000000000000000000..a4f9727d04f91f61f20720bd64b9c139fb041236 GIT binary patch literal 7179 zcmV+m9Q5OfP)Px#1ZP1_K>z@;j|==^1poj532;bRa{vGf5&!@T5&_cPe*6Fc8=y%C^C!)jU*}Z_TUZ5zkPS!zgzWpiKoUY0LP$tR!Xokt%Ag{H3XX^@B8wnv03972 z_hwv1kf6A*-1mH+Tg$Cm_f}O`27H#NQDPYKIv zcTCHy`fO@u)s1PH)sbnLH4!`}kZK|c%WX|xUTdf>GJS=u8BkWepa#-RXr-xDk(t@H zW@WFA%*qkfWGlBRGCQ}KE6>y#u7Wj@IeBX$a|&84%`IF@nqxf9Eo!w@+!~oz+$N|! zGOwgPGT%~1WPT~v)1{qUWzc%A^7WAg4s}HqRBUi)VHMOZs3)?px+juI+89Z!G1VJc zB&xQLlmylF3mS+d)%Qn|>-%q{Gz@%}+OT;?VqO2Az$`8yqwdcMnN^=m&#Gn9GOO9N zWmSgoa}7%%RkMWUH7sFyEou6S8a90e9_zTW>Pa(9t!!{;=E_xE+0bgztQ@G(RxZ@U zX6H7t+4KlCv)TDgY<6BVo1M3Y&B-^_!sZmT(Bo4DE$penwd|>awfwQDmCY?QRMf`i z7PlFX?QCvIJDW%9VDpMQ_+#lhHowFWelF`|^GiF~)8*^A%D5JkcX3sij~m#6$_@0m zu(F#itm@{El|3x6Y9mXm>S2l1c-+Vmt9w~uO)pzi-OCo$^sz;?rZ%x8Q+55MWT<|? zR>L4ErD2dQZhVHNG;H}~QR9|B$4$wo*gJiBEt{HI$)=I2xEuhhIXqJqL=&J0vCala z5N(h%O%XsY0Gvq>4G}=VGamuY)|~uiL2C?vYdAnt09d$|;L#&sk_?c=ZERj~yDb1L z>0t9s0idC$%Q^*_5G9Z#17M~3$VC820LaAwn(8AZK>|nur~sJ;c=dyOfYHG-0D8c)!3BgD z0ANym|IN6m>E(B(W>gS3OJ2?cFb6y?Pk02*2B>qM3!dyehx-I@f^^Pv!7~6xXt|F=bO5v<25_GNQSUxC#H8AO4sJ?%xdNQ*J~@1Hcqd#$aK3;6*i@;(*!qp zN$K#ErDYCqdVq%egycmAs0*E1fQrN=5J1U&?wU^l&=op;GEa^BbZsAjqqfh>eP;6F z)Ao6|Z&6L30M3*Oo+ETR-KT=*2%XxtPvO3(3wF_Ye(o~>8t!w1PapS14NvJlvF#Jx zr)&H4@D^1Y;3h9EV~`Hsl#Ft1yQhN}IO}{ANsFp%P!{Yu=K-8E>wGlM^MYqOFId^& z2%iLQaft=a3!W})l0KJV3MhJsTZ0AEbxpuK9u{Elz-8;ROww zR?TOCTT;p_`2Zw{iqPrGJT3QmCoihd8BOM?bf27cD&6OV=tx|mt@-5SMRuQ8=yZio zXXvycikWBcaNHyroC=-<(Ct1seEPUg8#$OY0`0p9-Gr zJ{g`@!7jLOL4{YlXVrYeag)-D-QWax7C_l`CNRIYFQbZ{KY1z6$Kr!#EgZ*Wg$`|~Vo!z(PSdoO8a_bY<-k(PJvp>_%Gv>m7eXoO8y z=v38xA~-=V_sQW?)$U2~rleO;=quWCh;^O(o^87Do2U9N{K`5{e9LlrUu6j^#7=7= z^K^|iM3;Gn?{VS6|5NG*Skr-zS?{?!Y}3WxJ=J&q7q<4T&slowP6s>-T#)3&a-QwJ zg;hNOSL6bxgy$1FEqErlrDbgSx;<>mwFhkL2aIjG<}W;N@XCF*V*N{$o67aDm_{0CMM*lVjE#{Dj{8=BvSl58QYDE=yg#mCogYr)>Me!L!;uW!7o8S@;_g zl7_uQY-^OC2f{JS*1bsWqcU+(hEBQd6W{?{d}`5fJiw)yk^ly7`)qjlJuY(RJKqrq z`GfcguyO3dif(EzUGOZ31~>troplQEFvkURT|{^Q=UekNvpM)VZw)KievEA%dfrbnvSA!cLKpGf(cmS-Gnz;po5gd(_bb!i_sP^nlvAr;IF94NvVp2S5{E zIe{x2j!!EhSb%2%93eco6oY=f@81m-UN#$TMCJ2m2ny;!--6vrS|s-=;JMtVsQK{x z#ZBAU#4ZIzS;aOu?=L=xFyyPUm3?Au_nm}G zSbMn7S+E-pEG2LS!||zwHaG#E*!EfQ6af9;A!OnMw7>mzcz7Q$)_Ul3`k>r-s{YQf z&a>g+c?iv$uRaJBBI%f+2drT5@6;XkD29qGXyza-z)iHk1%yWcqj%nyCZ5l%AP$=9nz{{A|*f(qAK$Qw5&SJ zo?;tL{umCL@!pybd`vwo2)C-yhLO6Dw|gDjaV9t?JOP~4_C;OuneH?GpWBB}^e|le zag}$AW%vAz`a|7QB%*g8o`;LG_RW8Vt~&u6od!uYy_Dq0U1st~gFVI<0$vHgCFeW9 zY2hhCr&r?Qf@c7ngiJHNoWjXST(|Mu9k#gnSvqH6=P3$yBu=={wJ%+went7Uf(X!h z-~WXzZrVoeq^hse9xCz}qzwws23No)rt09S0S1Am1V|yXgw4pRV@44sR;1jC7Xs)LiHjQyTH$)pCo|DzJmknGB*FnHD z$os71*<)0+QxUHkDiUUvsUTYIo+5d105l-ZFV=!{!t(V?P4YG}AheuQ< z)a-emPmxfTfpUQ9H^!gPggt-s-G2;kgHCgTT|RX;9RB!N9ozB9ZX| zC?+lh(FY)Yh70OmycjxJ!EqZ-{m7DPHj(pOlXI8VSTHMgo+MZULl4-%l?OEJ5)_`9 z(Pp&nqIS~RQSCoP!Z}bY&#D&NJyVLBFFJ5unWq3x0WgUB;?oN0L#}=Ev(RZd;I$w5 znl1!QRFdFfPE1bke%62S4+_m(COm)VAtDT1xySPR-{571QM}KRM;lZ_MXEts6QFA; zw9b8|16!j67Zjc@boz9c;XFw15r)DzZxBWORmK|j4NgF5PGS zm+!Fw$N|sWdE)pWr0VN*zzZ}*!U@j?XipDx^|lm-0Y}oo696jUd1alV^A}LqXi93!NSS@!Xt^``O^oeP4KkLkM3S8lMvPE8BGbE_G&O zSOp#2xInA-f57@K{6;+-NMz8F40r&k255&)Kx;ev74^(xYZ0-#OziKJ0jff0xXe@P zaVZ!nI7}*&F!yQT0hY^wYabyvt@HHoT#1VeFSU>ga2wjm_3!?`+K+t0I!}Dhx=#K`zsKJ#c;`7hI8_PnDx<%n&g8Sr z!jJQb?hEfc?!GyX3>@hZz>@&tSiDhOd}r?l!k8HzeONU`<1)T0q+4D7<$MGH@|7DY0!)`sNf0U{Nc%2ryo3k zYk8F5MCl*~1Sr76`LGIe<9oNQf*FoLRtTr`T28yoHk`i6)*H~)9sibok2Z4f@_qjM zf^u?^wsN8?5{EPmb(x)1}`npzBZk zK+o?!^Aiu9M%Xmr;kebWen8VL(9`1XsMZV>1%xNpe5UigK7Hn?bf27c3htXV*5HC9E-H8v zQVLjNO&@DL^o0>3BNXanKw4qK>g9W^^Tc=LJcQTM9q-Uo1{xL`!*%Sgf?ZrmYrvb8 z+cXy7py=U=p_6`}TEx;@o?~r?zoa&5vz(0h=^8Jt0lwzN^VD`=`IvGYy9CcYNNZ1# za5+x{FL2wZSWPsB;6&*lx&d+-0FD2b?l|twAOi|v6&dNmiVZZ|T3k`<4bVGuqCYdJ z`@8_k?z7syoW@uNr-o;`58)D7R_oy}Jo_{KgdP}_8OU5QZBX8Xz_ovi1TR?ld~^kS z41qHh0GVslES5%vl{{;4J9? z7P1M+d2D6RJ~ntY+3(wPe{{`Su0WFVqlEuC$BTYv9L^HWA zV=cfwTA*TX1!4iyWSsz~+Unn3^U3i1O9l&`0dU5D1Govv`7A!Igw^i37<#IBw8HCP z?j*u8k`;R!kP)~~129qxa52`At{d)Zax&K5Asn8Q`3=YVpf$Fp2mId zn$OF9qVs~)e6bBKxq#X=EbZ;T9BwrYg!V9OB7$1h9~cut4myz&XdXp z^=9L-3+~B9IkX3H&zYN{lM#fxhJ9CQur&;L90Tapf5mE`YBpKtJeQ=5Q-bFRn-)NQ z$w1%)fYVpj#U{8XlXBUtye8IuG!i-)!EsHmen=MzdtuU+P9foKzIvbK558e7ht_UB zr)c*`8h~Ll&*@n)2o7x;QgQ6#720W7t$Hj%+YBPyqKm9x;Em8B6YwzKWogTE)WI!p zf2D`#?>?6U1GsN`R())NL$3&W{mY*_75XCcjPPfcTt7(R7TshxHzwFt?LHU!W-wRj zV4J9ksYP0NYJfI8ch;$cXE@JFUL4MgA#lhblk-?My0eDvg$ipBdi$}Z?h}i09o=O( z|BS2#w&smbL&L*Ofl+hJCPsg$1y2TOZg;E*oBUf+)HRyMd|DWY2|=M9{Q(l5YZS8LvDQJ_zf6|OeXj5+*L)g) zCd7mlF#-xlnb`aj=d?B?5Z$K>of`KAtogTb+Y7WWR~Tq?%(aKKqYuKZ ze15HjaXz$n*kl{i`@+JbLjT}DDNG|2b8rFSxfEcKmbUw-9vg5^BxchW*!8~8SJTKB z!%sM7>-C4MY{v--d3G`qPy)Os5_4#lSI?QBLc_yx)w|Ep#mD;*UFQqWbBctVbsBBo zSb)P_j&;qLfgHAdL3@bRGzc3wpZprZh4v0*LLSZ0#H69H?+M2h48BP-(5#TDa-XW* zbHTISr)>K&s~#^nTx3`si{QiV--Qc=wE~#Y>&zfS%1BnQlq;8Zh7C6Xi4VZuW0~!{ z>AVhjDhEpLQ#(-nT2sM&j}sj3YV`9}>^e0fXjmVBFOqmN(H|Nw#QB(>{5M$u7oS>8 zAunvmL^*?nBiObd=hVT|g-#2e!g-FwCF3!IgWGcZUgLLjm^<3oUJzfrFoD7&ptxVh z5ruBAj_xl+kEh!p>HlED{XIoMcrbKOj8F>0Z`>UN7C}Rz#)TNx@HGm z_%O)`2YsK~5c2}duOSOJ3cmJiaOe-#eDG81;|tOr;yD=g#Kkc z-20a~LZ`0g)3$vTqYn;uI(7@fnzXP*HZDZ%ix=qw2MK$~TCs6=&zakFEL6GcG_4N^ zbo`=R+A1F#0*vH6m+w4j?BnU&ghvT48uz7-GB`lPLWS11z6c$9Z~?HKDj~Bf&_$=( zET@rr@y%^EXe9gELzdI`nsJU`j>R`n4PJQ=DzxJmEJnd=B4nB?YhCbE;Zp@qRC@X7 zf#cm-6|DZ{%OeUoSgVcpg12k#*Kxe2(Hnv-4P~d8S>KX1&jw0t%=2Musm)mvHl=WbI^evqCKK8=qryV7NafQRsjtQ3W}d=6Ro47iAO zFNgnDF1Wt!@Yif+b|VeIn(&kWgSv0jz$NF?8^bPkVLmXqex(#`AKi z`wK~oR|13!+4|O3Y<6z5f2L`M`|MS0?V-=ZM~>@^zAKe?Zaa5SQUdgWH)XWJVdJwf zj>8AVG;r=*A85F^iP^@Qf?#88+x~%X47p5=KGq}1=aYr37~ilFcDxYBEUN9JbBX)6 zXu+w%rwDFo*(iZSm`TZ}FRG39F2(cFV~;m))?!|QBe11=*U29OK{JKtqW$9M3HCbp zE;;P663xws&x7qe(9=UniemR9EkN0MCcsezXAF=wzV;!#dB{x7l`@{weDD*RfEpx& zgd4GL41=)Ge55$1hY&IW0N)s54l(%&pNW5a<8SPlf4I*n3(whf;TM+C{=BfNR@&-c z0Z|9ghFCg!;2>Ol%u>ScbCu7Zrf>Jq2|KT@=z5vWSWy=U8l68mk2WQ(*>m1?zChZ2 zj=uX8#d(mB%p*V{jck-`dz)45R-Ds1_gPl5?Fh{Z11xkL$U8}dcHfdw1_$9T#RcXI z?)Ado@w^}zq$v9GUGeUXR>!P_YGw zSNL?mTU;^@ZuF#srv_-c&kLRfa2(udO9wAN=+wX)2lrT|f+qlUgiSki>flWp2lqIo zb6*sp(}b8-JPz)$OR)`z4xZM1odhs- z99)b@2TuSf!W#z{Lki+N6JToLIJnqS0C?lzVpv-D84y!AxI3-`#}y+|!6R^Y0Pd#y zfN{l+6z)qYycst!Iqv|5(%lD)D<&j`P4t{hOer`JH!(H)FB4OWzCm&4I%r%mq(a)B z1FR;d7W^d+|DH_Bc_Kcw;Pv?A{9DxKAz>L;Ea?u|fYF4My#E2S{{wP}i{H;-Y-|7k N002ovPDHLkV1i-n!nFVZ literal 0 HcmV?d00001 diff --git a/src/Cortex.Streams.PostgreSQL/Assets/license.md b/src/Cortex.Streams.PostgreSQL/Assets/license.md new file mode 100644 index 0000000..3c845d4 --- /dev/null +++ b/src/Cortex.Streams.PostgreSQL/Assets/license.md @@ -0,0 +1,20 @@ +The MIT License (MIT) + +Copyright (c) 2025 Buildersoft + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/Cortex.Streams.PostgreSQL/Cortex.Streams.PostgreSQL.csproj b/src/Cortex.Streams.PostgreSQL/Cortex.Streams.PostgreSQL.csproj new file mode 100644 index 0000000..c9de90d --- /dev/null +++ b/src/Cortex.Streams.PostgreSQL/Cortex.Streams.PostgreSQL.csproj @@ -0,0 +1,17 @@ + + + + net8.0 + enable + enable + + + + + + + + + + + diff --git a/src/Cortex.Streams.PostgreSQL/PostgresCDCSourceOperator.cs b/src/Cortex.Streams.PostgreSQL/PostgresCDCSourceOperator.cs new file mode 100644 index 0000000..a99d05b --- /dev/null +++ b/src/Cortex.Streams.PostgreSQL/PostgresCDCSourceOperator.cs @@ -0,0 +1,468 @@ +using Cortex.States; +using Cortex.Streams.Operators; +using Npgsql; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Security.Cryptography; +using System.Text; +using System.Text.Json.Nodes; +using System.Threading.Tasks; + +namespace Cortex.Streams.PostgreSQL +{ + /// + /// PostgreSQL CDC Source Operator that: + /// 1. Optionally configures logical replication for a table (via publication + slot). + /// 2. Performs an initial load of the table if requested. + /// 3. Polls changes from a replication slot using wal2json. + /// 4. Emits new records, skipping duplicates with a hashed checkpoint. + /// 5. Uses robust error handling and exponential back-off. + /// + public class PostgresSourceOperator : ISourceOperator, IDisposable + { + private readonly string _connectionString; + private readonly string _schemaName; + private readonly string _tableName; + private readonly bool _autoEnableCdc; + private readonly bool _doInitialLoad; + + private readonly int _pollIntervalMs; + private readonly int _maxBackOffSeconds; + + private readonly IDataStore _checkpointStore; + + // Keys for checkpoint store + private readonly string _checkpointKey; + private readonly string _initialLoadCheckpointKey; + private readonly string _lastRecordHashKey; + + // Logical replication details + private readonly string _slotName; + private readonly string _publicationName; + + // Thread & cancellation + private Thread _pollingThread; + private bool _stopRequested; + private bool _disposed; + + public PostgresSourceOperator( + string connectionString, + string schemaName, + string tableName, + string slotName = "my_slot", + string publicationName = "my_publication", + PostgresSettings postgresSettings = null, + IDataStore checkpointStore = null) + { + if (string.IsNullOrWhiteSpace(connectionString)) + throw new ArgumentException("Connection string cannot be null or empty.", nameof(connectionString)); + if (string.IsNullOrWhiteSpace(schemaName)) + throw new ArgumentException("Schema name cannot be null or empty.", nameof(schemaName)); + if (string.IsNullOrWhiteSpace(tableName)) + throw new ArgumentException("Table name cannot be null or empty.", nameof(tableName)); + + postgresSettings ??= new PostgresSettings(); + + _checkpointStore = checkpointStore + ?? new InMemoryStateStore($"{_schemaName}.{_tableName}.STORE"); + + _connectionString = connectionString; + _schemaName = schemaName; + _tableName = tableName; + + _autoEnableCdc = postgresSettings.ConfigureCDCInServer; + _doInitialLoad = postgresSettings.DoInitialLoad; + _pollIntervalMs = (int)postgresSettings.PullInterval.TotalMilliseconds; + _maxBackOffSeconds = postgresSettings.MaxBackOffSeconds; + + // Checkpoint keys + _checkpointKey = $"{_schemaName}.{_tableName}.CDC.LSN"; + _initialLoadCheckpointKey = $"{_schemaName}.{_tableName}.INITIAL_LOAD_DONE"; + _lastRecordHashKey = $"{_schemaName}.{_tableName}.CDC.LAST_HASH"; + + _slotName = slotName; + _publicationName = publicationName; + } + + /// + /// Start the operator. This sets up CDC (if requested) and launches the polling thread. + /// + public void Start(Action emit) + { + if (emit == null) throw new ArgumentNullException(nameof(emit)); + + Console.WriteLine($"Starting PostgreSQL CDC operator for {_schemaName}.{_tableName}..."); + + // 1. Optionally configure logical replication (publication + slot). + if (_autoEnableCdc) + { + EnsureLogicalReplicationSetup(); + } + + // 2. Perform initial load if needed. + if (_doInitialLoad && _checkpointStore.Get(_initialLoadCheckpointKey) == null) + { + Console.WriteLine($"Performing initial load for {_schemaName}.{_tableName}..."); + RunInitialLoad(emit); + _checkpointStore.Put(_initialLoadCheckpointKey, new byte[] { 0x01 }); + Console.WriteLine($"Initial load completed for {_schemaName}.{_tableName}"); + } + else + { + Console.WriteLine($"Skipping initial load for {_schemaName}.{_tableName} (already done or disabled)."); + } + + // 3. If we have no saved LSN, we can either start from the slot's current position or from scratch. + var lastLsnBytes = _checkpointStore.Get(_checkpointKey); + if (lastLsnBytes == null) + { + // For production, it's typical to set the checkpoint to the current tip + // so we only see future changes. But you can also read from the slot's + // start if you want old changes that are still retained. + Console.WriteLine($"No existing LSN checkpoint found for {_schemaName}.{_tableName}. Will start from slot's current position."); + } + else + { + var lastLsn = Encoding.UTF8.GetString(lastLsnBytes); + Console.WriteLine($"Found existing LSN checkpoint = {lastLsn} for {_schemaName}.{_tableName}."); + } + + // 4. Spin up the background polling thread. + _stopRequested = false; + _pollingThread = new Thread(() => PollCdcChanges(emit)) + { + IsBackground = true, + Name = $"PostgresCdcPolling_{_schemaName}_{_tableName}" + }; + _pollingThread.Start(); + } + + /// + /// Stop the operator gracefully. + /// + public void Stop() + { + Console.WriteLine($"Stop requested for PostgreSQL CDC operator {_schemaName}.{_tableName}."); + _stopRequested = true; + _pollingThread?.Join(); + Console.WriteLine($"PostgreSQL CDC operator stopped for {_schemaName}.{_tableName}."); + } + + /// + /// Implements the main CDC polling logic with exponential back-off on errors. + /// + private void PollCdcChanges(Action emit) + { + int backOffSeconds = 1; + + while (!_stopRequested) + { + try + { + var lastLsnBytes = _checkpointStore.Get(_checkpointKey); + string lastLsn = lastLsnBytes == null ? null : Encoding.UTF8.GetString(lastLsnBytes); + + // Retrieve new changes from the slot + var (newChanges, latestLsn) = GetChangesSinceLastLsn(lastLsn); + + // Retrieve the last record's hash we stored (for dedup) + var lastHashBytes = _checkpointStore.Get(_lastRecordHashKey); + string lastHash = lastHashBytes == null ? null : Encoding.UTF8.GetString(lastHashBytes); + + // Emit each new record + foreach (var change in newChanges) + { + if (_stopRequested) break; + + // Compute a hash of this record to detect duplicates + var currentHash = ComputeHash(change); + if (currentHash == lastHash) + { + Console.WriteLine($"Skipping duplicate record for {_schemaName}.{_tableName}."); + continue; + } + + emit(change); + + // Update last-hash checkpoint + lastHash = currentHash; + _checkpointStore.Put(_lastRecordHashKey, Encoding.UTF8.GetBytes(lastHash)); + } + + // Update LSN checkpoint if new changes arrived + if (newChanges.Any() && !string.IsNullOrEmpty(latestLsn)) + { + _checkpointStore.Put(_checkpointKey, Encoding.UTF8.GetBytes(latestLsn)); + Console.WriteLine($"Updated LSN checkpoint to {latestLsn} for {_schemaName}.{_tableName}."); + } + + // Reset back-off if we succeeded + backOffSeconds = 1; + + // Sleep before the next poll + Thread.Sleep(_pollIntervalMs); + } + catch (Exception ex) + { + Console.WriteLine($"Error in Postgres CDC polling loop for {_schemaName}.{_tableName}."); + + // Exponential back-off + Thread.Sleep(TimeSpan.FromSeconds(backOffSeconds)); + backOffSeconds = Math.Min(backOffSeconds * 2, _maxBackOffSeconds); + } + } + } + + /// + /// Reads the entire table once and emits each row as an Operation='InitialLoad'. + /// + private void RunInitialLoad(Action emit) + { + using var conn = new NpgsqlConnection(_connectionString); + conn.Open(); + + // For production usage, consider chunking or paging if the table is large. + string sql = $@"SELECT * FROM ""{_schemaName}"".""{_tableName}"";"; + using var cmd = new NpgsqlCommand(sql, conn) + { + CommandTimeout = 180 // or as appropriate for large table scans + }; + + using var reader = cmd.ExecuteReader(); + while (reader.Read()) + { + if (_stopRequested) + break; + + var record = new PostgresRecord + { + Operation = "InitialLoad", + Data = new Dictionary(), + ChangeTime = DateTime.UtcNow + }; + + for (int i = 0; i < reader.FieldCount; i++) + { + string colName = reader.GetName(i); + object value = reader.GetValue(i); + record.Data[colName] = (value == DBNull.Value) ? null : value; + } + + emit(record); + } + } + + /// + /// Ensures that a publication and replication slot exist for this table, if requested. + /// + private void EnsureLogicalReplicationSetup() + { + Console.WriteLine($"Ensuring logical replication setup for table {_schemaName}.{_tableName}..."); + + using var conn = new NpgsqlConnection(_connectionString); + conn.Open(); + + // 1. Create publication if it doesn't exist + // We do not have an IF NOT EXISTS for CREATE PUBLICATION in older PG versions, + // so we can check pg_publication or catch an error. + try + { + string createPubSql = $@" + CREATE PUBLICATION ""{_publicationName}"" + FOR TABLE ""{_schemaName}"".""{_tableName}""; + "; + using var pubCmd = new NpgsqlCommand(createPubSql, conn); + pubCmd.ExecuteNonQuery(); + Console.WriteLine($"Created publication '{_publicationName}' for {_schemaName}.{_tableName}."); + } + catch (PostgresException ex) when (ex.SqlState == "42710") + { + // 42710 = duplicate_object + Console.WriteLine($"Publication '{_publicationName}' already exists, skipping."); + } + + // 2. Create replication slot if it doesn't exist + // Similar approach with a try/catch. + try + { + string createSlotSql = $@" + SELECT * FROM pg_create_logical_replication_slot('{_slotName}', 'wal2json'); + "; + using var slotCmd = new NpgsqlCommand(createSlotSql, conn); + slotCmd.ExecuteNonQuery(); + Console.WriteLine($"Created logical replication slot '{_slotName}' for {_schemaName}.{_tableName}."); + } + catch (PostgresException ex) when (ex.SqlState == "42710") + { + Console.WriteLine($"Logical replication slot '{_slotName}' already exists, skipping." ); + } + + Console.WriteLine($"Logical replication enabled for table {_schemaName}.{_tableName}. Publication: {_publicationName}, Slot: {_slotName}"); + } + + /// + /// Retrieves changes since the last known LSN using pg_logical_slot_get_changes(...). + /// Returns the list of PostgresRecords and the highest LSN observed. + /// + private (List, string) GetChangesSinceLastLsn(string lastLsn) + { + var changes = new List(); + string newLastLsn = lastLsn; + + // Choose whether to pass lastLsn or NULL + // If lastLsn is NULL, we start from slot's current position + var sql = lastLsn == null + ? $@"SELECT location, xid, data + FROM pg_logical_slot_get_changes('{_slotName}', NULL, NULL, + 'pretty-print', 'false', + 'include-lsn', 'true', + 'include-timestamp', 'true');" + : $@"SELECT location, xid, data + FROM pg_logical_slot_get_changes('{_slotName}', '{lastLsn}', NULL, + 'pretty-print', 'false', + 'include-lsn', 'true', + 'include-timestamp', 'true');"; + + using var conn = new NpgsqlConnection(_connectionString); + conn.Open(); + + using var cmd = new NpgsqlCommand(sql, conn) + { + CommandTimeout = 60 + }; + + using var reader = cmd.ExecuteReader(); + while (reader.Read()) + { + if (_stopRequested) break; + + // location: text (LSN), e.g. "0/16B6A98" + // xid: int + // data: JSON describing the changes + var location = reader.GetString(0); + var xid = reader.GetInt32(1); + var json = reader.GetString(2); + + newLastLsn = location; // track the highest location + + // Parse the wal2json output + // Typically, it looks like: + // { + // "change": [ + // { + // "kind": "insert", + // "schema": "public", + // "table": "mytable", + // "columnnames": ["id","name"], + // "columntypes": ["integer","text"], + // "columnvalues": [1,"Alice"] + // } + // ] + // } + + try + { + var rootNode = JsonNode.Parse(json); + var changeArray = rootNode?["change"] as JsonArray; + if (changeArray == null || changeArray.Count == 0) + continue; + + foreach (var changeObj in changeArray) + { + if (_stopRequested) break; + + var kind = changeObj?["kind"]?.ToString()?.ToLowerInvariant() ?? "unknown"; + var colNames = changeObj?["columnnames"] as JsonArray; + var colValues = changeObj?["columnvalues"] as JsonArray; + + // For updates or deletes, wal2json can present different fields, e.g. "oldkeys" + // or "keynames"/"keyvalues". We simplify by focusing on inserts/updates here, + // but handle "delete" with oldkeys if needed. + + var record = new PostgresRecord + { + Operation = kind switch + { + "insert" => "INSERT", + "update" => "UPDATE", + "delete" => "DELETE", + _ => "UNKNOWN" + }, + Data = new Dictionary(), + ChangeTime = DateTime.UtcNow + }; + + if (colNames != null && colValues != null) + { + for (int i = 0; i < colNames.Count; i++) + { + string name = colNames[i]?.ToString(); + JsonNode val = colValues[i]; + record.Data[name] = val?.GetValue(); + } + } + else if (kind == "delete") + { + // For DELETE, wal2json can present "oldkeys": { "keynames": [...], "keyvalues": [...] } + var oldKeys = changeObj?["oldkeys"]; + var keyNames = oldKeys?["keynames"] as JsonArray; + var keyValues = oldKeys?["keyvalues"] as JsonArray; + if (keyNames != null && keyValues != null) + { + for (int i = 0; i < keyNames.Count; i++) + { + string name = keyNames[i]?.ToString(); + record.Data[name] = keyValues[i]?.GetValue(); + } + } + } + + changes.Add(record); + } + } + catch (Exception parseEx) + { + Console.WriteLine($"Failed to parse wal2json output for XID={xid} at LSN={location}. JSON: {json}"); + } + } + + return (changes, newLastLsn); + } + + /// + /// Compute an MD5 hash from the PostgresRecord's Data dictionary, + /// sorted by key for deterministic ordering. + /// + private string ComputeHash(PostgresRecord record) + { + var sb = new StringBuilder(); + foreach (var kv in record.Data.OrderBy(x => x.Key)) + { + sb.Append(kv.Key).Append('=').Append(kv.Value ?? "null").Append(';'); + } + + using var md5 = MD5.Create(); + var bytes = Encoding.UTF8.GetBytes(sb.ToString()); + var hashBytes = md5.ComputeHash(bytes); + return Convert.ToBase64String(hashBytes); + } + + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + + protected virtual void Dispose(bool disposing) + { + if (_disposed) return; + if (disposing) + { + // free managed resources + Stop(); + } + _disposed = true; + } + } +} diff --git a/src/Cortex.Streams.PostgreSQL/PostgresRecord.cs b/src/Cortex.Streams.PostgreSQL/PostgresRecord.cs new file mode 100644 index 0000000..35f5351 --- /dev/null +++ b/src/Cortex.Streams.PostgreSQL/PostgresRecord.cs @@ -0,0 +1,24 @@ +namespace Cortex.Streams.PostgreSQL +{ + /// + /// Represents a generic CDC record for PostgreSQL. + /// + public class PostgresRecord + { + /// + /// The operation name or type, e.g. 'INSERT', 'UPDATE', 'DELETE', 'InitialLoad'. + /// + public string Operation { get; set; } + + /// + /// The primary key or other columns in the changed record, + /// as a dictionary of column names to values. + /// + public Dictionary Data { get; set; } + + /// + /// The approximate time of this change or emission. + /// + public DateTime ChangeTime { get; set; } + } +} diff --git a/src/Cortex.Streams.PostgreSQL/PostgresSettings.cs b/src/Cortex.Streams.PostgreSQL/PostgresSettings.cs new file mode 100644 index 0000000..2e9007a --- /dev/null +++ b/src/Cortex.Streams.PostgreSQL/PostgresSettings.cs @@ -0,0 +1,33 @@ +namespace Cortex.Streams.PostgreSQL +{ + public class PostgresSettings + { + /// + /// Whether to perform an initial table load before starting the CDC stream. + /// + public bool DoInitialLoad { get; set; } + + /// + /// Interval at which we poll the replication slot for new changes (if not streaming). + /// + public TimeSpan PullInterval { get; set; } + + /// + /// If true, will attempt to create the needed publication and logical slot if they don’t exist. + /// + public bool ConfigureCDCInServer { get; set; } + + /// + /// Maximum wait time for back-off in seconds if an error repeatedly occurs. + /// + public int MaxBackOffSeconds { get; set; } + + public PostgresSettings() + { + DoInitialLoad = true; + PullInterval = TimeSpan.FromSeconds(3); + ConfigureCDCInServer = false; + MaxBackOffSeconds = 60; + } + } +} \ No newline at end of file From b7eb1fbb7ab32a58de8cf7b87d2606bb70b6ccb1 Mon Sep 17 00:00:00 2001 From: Enes Hoxha Date: Sat, 18 Jan 2025 17:35:04 +0100 Subject: [PATCH 08/12] [v1/feature/86]: Implementation of CDC for PostgreSQL Add PostgresRecord, Add PostgresSettings, Update PostgresCDCSourceOperator --- .../Cortex.Streams.MSSqlServer.csproj | 12 +- .../Cortex.Streams.PostgreSQL.csproj | 64 ++++-- .../PostgresCDCSourceOperator.cs | 194 ++++++++++-------- .../PostgresRecord.cs | 5 +- .../PostgresSettings.cs | 35 +++- 5 files changed, 207 insertions(+), 103 deletions(-) diff --git a/src/Cortex.Streams.MSSqlServer/Cortex.Streams.MSSqlServer.csproj b/src/Cortex.Streams.MSSqlServer/Cortex.Streams.MSSqlServer.csproj index 70866ce..ff5ec02 100644 --- a/src/Cortex.Streams.MSSqlServer/Cortex.Streams.MSSqlServer.csproj +++ b/src/Cortex.Streams.MSSqlServer/Cortex.Streams.MSSqlServer.csproj @@ -44,12 +44,12 @@ - - - + + + - - - + + + diff --git a/src/Cortex.Streams.PostgreSQL/Cortex.Streams.PostgreSQL.csproj b/src/Cortex.Streams.PostgreSQL/Cortex.Streams.PostgreSQL.csproj index c9de90d..84bb8c3 100644 --- a/src/Cortex.Streams.PostgreSQL/Cortex.Streams.PostgreSQL.csproj +++ b/src/Cortex.Streams.PostgreSQL/Cortex.Streams.PostgreSQL.csproj @@ -1,17 +1,55 @@  - - net8.0 - enable - enable - - - - - - - - - + + net9.0;net8.0;net7.0 + + 1.0.1 + 1.0.1 + Buildersoft Cortex Framework + Buildersoft + Buildersoft,EnesHoxha + Copyright © Buildersoft 2025 + + Cortex Data Framework is a robust, extensible platform designed to facilitate real-time data streaming, processing, and state management. It provides developers with a comprehensive suite of tools and libraries to build scalable, high-performance data pipelines tailored to diverse use cases. By abstracting underlying streaming technologies and state management solutions, Cortex Data Framework enables seamless integration, simplified development workflows, and enhanced maintainability for complex data-driven applications. + + + https://github.com/buildersoftio/cortex + cortex cdc mediator eda streaming distributed streams states postgres + + 1.0.1 + license.md + cortex.png + Cortex.Streams.PostgreSQL + True + True + True + + Just as the Cortex in our brains handles complex processing efficiently, Cortex Data Framework brings brainpower to your data management! + https://buildersoft.io/ + README.md + + + + + True + \ + + + True + + + + True + + + + + + + + + + + diff --git a/src/Cortex.Streams.PostgreSQL/PostgresCDCSourceOperator.cs b/src/Cortex.Streams.PostgreSQL/PostgresCDCSourceOperator.cs index a99d05b..542d7aa 100644 --- a/src/Cortex.Streams.PostgreSQL/PostgresCDCSourceOperator.cs +++ b/src/Cortex.Streams.PostgreSQL/PostgresCDCSourceOperator.cs @@ -1,13 +1,14 @@ using Cortex.States; using Cortex.Streams.Operators; using Npgsql; -using System; -using System.Collections.Generic; -using System.Linq; +using Microsoft.Extensions.Logging; using System.Security.Cryptography; using System.Text; using System.Text.Json.Nodes; -using System.Threading.Tasks; +using System.Threading; +using System; +using System.Collections.Generic; +using System.Linq; namespace Cortex.Streams.PostgreSQL { @@ -26,6 +27,7 @@ public class PostgresSourceOperator : ISourceOperator, IDisposab private readonly string _tableName; private readonly bool _autoEnableCdc; private readonly bool _doInitialLoad; + private readonly ReplicaIdentityMode _replicaIdentity; private readonly int _pollIntervalMs; private readonly int _maxBackOffSeconds; @@ -46,6 +48,9 @@ public class PostgresSourceOperator : ISourceOperator, IDisposab private bool _stopRequested; private bool _disposed; + // Optional logger + private readonly ILogger _logger; + public PostgresSourceOperator( string connectionString, string schemaName, @@ -53,7 +58,8 @@ public PostgresSourceOperator( string slotName = "my_slot", string publicationName = "my_publication", PostgresSettings postgresSettings = null, - IDataStore checkpointStore = null) + IDataStore checkpointStore = null, + ILogger logger = null) // <-- Optional ILogger { if (string.IsNullOrWhiteSpace(connectionString)) throw new ArgumentException("Connection string cannot be null or empty.", nameof(connectionString)); @@ -65,7 +71,7 @@ public PostgresSourceOperator( postgresSettings ??= new PostgresSettings(); _checkpointStore = checkpointStore - ?? new InMemoryStateStore($"{_schemaName}.{_tableName}.STORE"); + ?? new InMemoryStateStore($"{schemaName}.{tableName}.STORE"); _connectionString = connectionString; _schemaName = schemaName; @@ -73,6 +79,7 @@ public PostgresSourceOperator( _autoEnableCdc = postgresSettings.ConfigureCDCInServer; _doInitialLoad = postgresSettings.DoInitialLoad; + _replicaIdentity = postgresSettings.ReplicaIdentity; _pollIntervalMs = (int)postgresSettings.PullInterval.TotalMilliseconds; _maxBackOffSeconds = postgresSettings.MaxBackOffSeconds; @@ -83,6 +90,9 @@ public PostgresSourceOperator( _slotName = slotName; _publicationName = publicationName; + + // Store logger (can be null) + _logger = logger; } /// @@ -92,7 +102,7 @@ public void Start(Action emit) { if (emit == null) throw new ArgumentNullException(nameof(emit)); - Console.WriteLine($"Starting PostgreSQL CDC operator for {_schemaName}.{_tableName}..."); + LogInformation($"Starting PostgreSQL CDC operator for {_schemaName}.{_tableName}..."); // 1. Optionally configure logical replication (publication + slot). if (_autoEnableCdc) @@ -103,29 +113,26 @@ public void Start(Action emit) // 2. Perform initial load if needed. if (_doInitialLoad && _checkpointStore.Get(_initialLoadCheckpointKey) == null) { - Console.WriteLine($"Performing initial load for {_schemaName}.{_tableName}..."); + LogInformation($"Performing initial load for {_schemaName}.{_tableName}..."); RunInitialLoad(emit); _checkpointStore.Put(_initialLoadCheckpointKey, new byte[] { 0x01 }); - Console.WriteLine($"Initial load completed for {_schemaName}.{_tableName}"); + LogInformation($"Initial load completed for {_schemaName}.{_tableName}"); } else { - Console.WriteLine($"Skipping initial load for {_schemaName}.{_tableName} (already done or disabled)."); + LogInformation($"Skipping initial load for {_schemaName}.{_tableName} (already done or disabled)."); } // 3. If we have no saved LSN, we can either start from the slot's current position or from scratch. var lastLsnBytes = _checkpointStore.Get(_checkpointKey); if (lastLsnBytes == null) { - // For production, it's typical to set the checkpoint to the current tip - // so we only see future changes. But you can also read from the slot's - // start if you want old changes that are still retained. - Console.WriteLine($"No existing LSN checkpoint found for {_schemaName}.{_tableName}. Will start from slot's current position."); + LogInformation($"No existing LSN checkpoint found for {_schemaName}.{_tableName}. Will start from slot's current position."); } else { var lastLsn = Encoding.UTF8.GetString(lastLsnBytes); - Console.WriteLine($"Found existing LSN checkpoint = {lastLsn} for {_schemaName}.{_tableName}."); + LogInformation($"Found existing LSN checkpoint = {lastLsn} for {_schemaName}.{_tableName}."); } // 4. Spin up the background polling thread. @@ -143,10 +150,10 @@ public void Start(Action emit) /// public void Stop() { - Console.WriteLine($"Stop requested for PostgreSQL CDC operator {_schemaName}.{_tableName}."); + LogInformation($"Stop requested for PostgreSQL CDC operator {_schemaName}.{_tableName}."); _stopRequested = true; _pollingThread?.Join(); - Console.WriteLine($"PostgreSQL CDC operator stopped for {_schemaName}.{_tableName}."); + LogInformation($"PostgreSQL CDC operator stopped for {_schemaName}.{_tableName}."); } /// @@ -179,7 +186,7 @@ private void PollCdcChanges(Action emit) var currentHash = ComputeHash(change); if (currentHash == lastHash) { - Console.WriteLine($"Skipping duplicate record for {_schemaName}.{_tableName}."); + LogInformation($"Skipping duplicate record for {_schemaName}.{_tableName}."); continue; } @@ -194,7 +201,7 @@ private void PollCdcChanges(Action emit) if (newChanges.Any() && !string.IsNullOrEmpty(latestLsn)) { _checkpointStore.Put(_checkpointKey, Encoding.UTF8.GetBytes(latestLsn)); - Console.WriteLine($"Updated LSN checkpoint to {latestLsn} for {_schemaName}.{_tableName}."); + LogInformation($"Updated LSN checkpoint to {latestLsn} for {_schemaName}.{_tableName}."); } // Reset back-off if we succeeded @@ -205,7 +212,7 @@ private void PollCdcChanges(Action emit) } catch (Exception ex) { - Console.WriteLine($"Error in Postgres CDC polling loop for {_schemaName}.{_tableName}."); + LogError($"Error in Postgres CDC polling loop for {_schemaName}.{_tableName}.", ex); // Exponential back-off Thread.Sleep(TimeSpan.FromSeconds(backOffSeconds)); @@ -258,14 +265,33 @@ private void RunInitialLoad(Action emit) /// private void EnsureLogicalReplicationSetup() { - Console.WriteLine($"Ensuring logical replication setup for table {_schemaName}.{_tableName}..."); + LogInformation($"Ensuring logical replication setup for table {_schemaName}.{_tableName}..."); using var conn = new NpgsqlConnection(_connectionString); conn.Open(); - // 1. Create publication if it doesn't exist - // We do not have an IF NOT EXISTS for CREATE PUBLICATION in older PG versions, - // so we can check pg_publication or catch an error. + // 1. Adjust REPLICA IDENTITY if requested + if (_replicaIdentity != ReplicaIdentityMode.None) + { + // Build the SQL statement for altering the table + string replicaIdentitySql = _replicaIdentity switch + { + ReplicaIdentityMode.Default => + $@"ALTER TABLE ""{_schemaName}"".""{_tableName}"" REPLICA IDENTITY DEFAULT;", + ReplicaIdentityMode.Full => + $@"ALTER TABLE ""{_schemaName}"".""{_tableName}"" REPLICA IDENTITY FULL;", + _ => null + }; + + if (!string.IsNullOrEmpty(replicaIdentitySql)) + { + using var alterCmd = new NpgsqlCommand(replicaIdentitySql, conn); + alterCmd.ExecuteNonQuery(); + LogInformation($"Set table {_schemaName}.{_tableName} to REPLICA IDENTITY = {_replicaIdentity}."); + } + } + + // 2. Create publication if it doesn't exist try { string createPubSql = $@" @@ -274,16 +300,14 @@ CREATE PUBLICATION ""{_publicationName}"" "; using var pubCmd = new NpgsqlCommand(createPubSql, conn); pubCmd.ExecuteNonQuery(); - Console.WriteLine($"Created publication '{_publicationName}' for {_schemaName}.{_tableName}."); + LogInformation($"Created publication '{_publicationName}' for {_schemaName}.{_tableName}."); } - catch (PostgresException ex) when (ex.SqlState == "42710") + catch (PostgresException ex) when (ex.SqlState == "42710") // 42710 = duplicate_object { - // 42710 = duplicate_object - Console.WriteLine($"Publication '{_publicationName}' already exists, skipping."); + LogInformation($"Publication '{_publicationName}' already exists, skipping."); } - // 2. Create replication slot if it doesn't exist - // Similar approach with a try/catch. + // 3. Create replication slot if it doesn't exist try { string createSlotSql = $@" @@ -291,14 +315,15 @@ CREATE PUBLICATION ""{_publicationName}"" "; using var slotCmd = new NpgsqlCommand(createSlotSql, conn); slotCmd.ExecuteNonQuery(); - Console.WriteLine($"Created logical replication slot '{_slotName}' for {_schemaName}.{_tableName}."); + LogInformation($"Created logical replication slot '{_slotName}' for {_schemaName}.{_tableName}."); } - catch (PostgresException ex) when (ex.SqlState == "42710") + catch (PostgresException ex) when (ex.SqlState == "42710") // 42710 = duplicate_object { - Console.WriteLine($"Logical replication slot '{_slotName}' already exists, skipping." ); + LogInformation($"Logical replication slot '{_slotName}' already exists, skipping."); } - Console.WriteLine($"Logical replication enabled for table {_schemaName}.{_tableName}. Publication: {_publicationName}, Slot: {_slotName}"); + LogInformation($"Logical replication enabled for table {_schemaName}.{_tableName}. " + + $"Publication: {_publicationName}, Slot: {_slotName}"); } /// @@ -310,16 +335,10 @@ CREATE PUBLICATION ""{_publicationName}"" var changes = new List(); string newLastLsn = lastLsn; - // Choose whether to pass lastLsn or NULL - // If lastLsn is NULL, we start from slot's current position - var sql = lastLsn == null - ? $@"SELECT location, xid, data + // For simplicity, ignoring lastLsn in the query and letting the slot's position track: + var sql = + $@"SELECT lsn::text, xid::text, data FROM pg_logical_slot_get_changes('{_slotName}', NULL, NULL, - 'pretty-print', 'false', - 'include-lsn', 'true', - 'include-timestamp', 'true');" - : $@"SELECT location, xid, data - FROM pg_logical_slot_get_changes('{_slotName}', '{lastLsn}', NULL, 'pretty-print', 'false', 'include-lsn', 'true', 'include-timestamp', 'true');"; @@ -327,39 +346,17 @@ FROM pg_logical_slot_get_changes('{_slotName}', '{lastLsn}', NULL, using var conn = new NpgsqlConnection(_connectionString); conn.Open(); - using var cmd = new NpgsqlCommand(sql, conn) - { - CommandTimeout = 60 - }; - + using var cmd = new NpgsqlCommand(sql, conn) { CommandTimeout = 60 }; using var reader = cmd.ExecuteReader(); while (reader.Read()) { if (_stopRequested) break; - // location: text (LSN), e.g. "0/16B6A98" - // xid: int - // data: JSON describing the changes - var location = reader.GetString(0); - var xid = reader.GetInt32(1); - var json = reader.GetString(2); - - newLastLsn = location; // track the highest location - - // Parse the wal2json output - // Typically, it looks like: - // { - // "change": [ - // { - // "kind": "insert", - // "schema": "public", - // "table": "mytable", - // "columnnames": ["id","name"], - // "columntypes": ["integer","text"], - // "columnvalues": [1,"Alice"] - // } - // ] - // } + string lsnString = reader.GetString(0); + string xidString = reader.GetString(1); + string json = reader.GetString(2); + + newLastLsn = lsnString; // track the highest LSN try { @@ -376,10 +373,6 @@ FROM pg_logical_slot_get_changes('{_slotName}', '{lastLsn}', NULL, var colNames = changeObj?["columnnames"] as JsonArray; var colValues = changeObj?["columnvalues"] as JsonArray; - // For updates or deletes, wal2json can present different fields, e.g. "oldkeys" - // or "keynames"/"keyvalues". We simplify by focusing on inserts/updates here, - // but handle "delete" with oldkeys if needed. - var record = new PostgresRecord { Operation = kind switch @@ -393,27 +386,29 @@ FROM pg_logical_slot_get_changes('{_slotName}', '{lastLsn}', NULL, ChangeTime = DateTime.UtcNow }; + // If REPLICA IDENTITY FULL is set, wal2json includes colNames/colValues for delete + // Otherwise it might only have "oldkeys" if (colNames != null && colValues != null) { for (int i = 0; i < colNames.Count; i++) { - string name = colNames[i]?.ToString(); - JsonNode val = colValues[i]; - record.Data[name] = val?.GetValue(); + string name = colNames[i]?.ToString()!; + record.Data[name] = colValues[i]?.GetValue()!; } } else if (kind == "delete") { - // For DELETE, wal2json can present "oldkeys": { "keynames": [...], "keyvalues": [...] } + // Fallback to oldkeys if no colNames for DELETE var oldKeys = changeObj?["oldkeys"]; var keyNames = oldKeys?["keynames"] as JsonArray; var keyValues = oldKeys?["keyvalues"] as JsonArray; + if (keyNames != null && keyValues != null) { for (int i = 0; i < keyNames.Count; i++) { - string name = keyNames[i]?.ToString(); - record.Data[name] = keyValues[i]?.GetValue(); + string name = keyNames[i]?.ToString()!; + record.Data[name] = keyValues[i]?.GetValue()!; } } } @@ -423,7 +418,7 @@ FROM pg_logical_slot_get_changes('{_slotName}', '{lastLsn}', NULL, } catch (Exception parseEx) { - Console.WriteLine($"Failed to parse wal2json output for XID={xid} at LSN={location}. JSON: {json}"); + LogError($"Failed to parse wal2json output for XID={xidString} at LSN={lsnString}. JSON: {json}.", parseEx); } } @@ -464,5 +459,40 @@ protected virtual void Dispose(bool disposing) } _disposed = true; } + + // -------------------------------------------------------------------- + // LOGGING HELPERS: If _logger is null, we fall back to Console.WriteLine + // -------------------------------------------------------------------- + private void LogInformation(string message) + { + if (_logger != null) + { + _logger.LogInformation(message); + } + else + { + Console.WriteLine(message); + } + } + + private void LogError(string message, Exception ex = null) + { + if (_logger != null) + { + _logger.LogError(ex, message); + } + else + { + // Log with exception details on console if present + if (ex != null) + { + Console.WriteLine($"ERROR: {message}\n{ex}"); + } + else + { + Console.WriteLine($"ERROR: {message}"); + } + } + } } } diff --git a/src/Cortex.Streams.PostgreSQL/PostgresRecord.cs b/src/Cortex.Streams.PostgreSQL/PostgresRecord.cs index 35f5351..ac780ac 100644 --- a/src/Cortex.Streams.PostgreSQL/PostgresRecord.cs +++ b/src/Cortex.Streams.PostgreSQL/PostgresRecord.cs @@ -1,4 +1,7 @@ -namespace Cortex.Streams.PostgreSQL +using System; +using System.Collections.Generic; + +namespace Cortex.Streams.PostgreSQL { /// /// Represents a generic CDC record for PostgreSQL. diff --git a/src/Cortex.Streams.PostgreSQL/PostgresSettings.cs b/src/Cortex.Streams.PostgreSQL/PostgresSettings.cs index 2e9007a..17c4c47 100644 --- a/src/Cortex.Streams.PostgreSQL/PostgresSettings.cs +++ b/src/Cortex.Streams.PostgreSQL/PostgresSettings.cs @@ -1,4 +1,6 @@ -namespace Cortex.Streams.PostgreSQL +using System; + +namespace Cortex.Streams.PostgreSQL { public class PostgresSettings { @@ -22,12 +24,43 @@ public class PostgresSettings /// public int MaxBackOffSeconds { get; set; } + /// + /// Determines the REPLICA IDENTITY mode for the target table, if we are configuring CDC in the server. + /// - None: Do not alter replica identity. + /// - Default: Set REPLICA IDENTITY to DEFAULT. + /// - Full: Set REPLICA IDENTITY FULL. + /// + public ReplicaIdentityMode ReplicaIdentity { get; set; } + public PostgresSettings() { DoInitialLoad = true; PullInterval = TimeSpan.FromSeconds(3); ConfigureCDCInServer = false; MaxBackOffSeconds = 60; + + ReplicaIdentity = ReplicaIdentityMode.None; // default to no changes } } + + /// + /// Represents possible settings for REPLICA IDENTITY on a PostgreSQL table. + /// + public enum ReplicaIdentityMode + { + /// + /// Do not alter the table’s REPLICA IDENTITY; leave it as-is. + /// + None, + + /// + /// Use the default replica identity, typically the primary key only. + /// + Default, + + /// + /// Use REPLICA IDENTITY FULL, meaning deleted rows emit all columns, not just PK. + /// + Full + } } \ No newline at end of file From 85346bef27e9b2296e074ccb5fbfa12b874b3192 Mon Sep 17 00:00:00 2001 From: Enes Hoxha Date: Sat, 18 Jan 2025 17:38:19 +0100 Subject: [PATCH 09/12] [v1/feature/86]: Add logging in SqlServerCDCSourceOperator --- .../Cortex.Streams.MSSqlServer.csproj | 1 + .../SqlServerCDCSourceOperator.cs | 169 ++++++++++++++---- 2 files changed, 131 insertions(+), 39 deletions(-) diff --git a/src/Cortex.Streams.MSSqlServer/Cortex.Streams.MSSqlServer.csproj b/src/Cortex.Streams.MSSqlServer/Cortex.Streams.MSSqlServer.csproj index ff5ec02..1383cd8 100644 --- a/src/Cortex.Streams.MSSqlServer/Cortex.Streams.MSSqlServer.csproj +++ b/src/Cortex.Streams.MSSqlServer/Cortex.Streams.MSSqlServer.csproj @@ -46,6 +46,7 @@ + diff --git a/src/Cortex.Streams.MSSqlServer/SqlServerCDCSourceOperator.cs b/src/Cortex.Streams.MSSqlServer/SqlServerCDCSourceOperator.cs index a317188..6c58872 100644 --- a/src/Cortex.Streams.MSSqlServer/SqlServerCDCSourceOperator.cs +++ b/src/Cortex.Streams.MSSqlServer/SqlServerCDCSourceOperator.cs @@ -1,6 +1,7 @@ using Cortex.States; using Cortex.Streams.Operators; using Microsoft.Data.SqlClient; +using Microsoft.Extensions.Logging; using System; using System.Collections.Generic; using System.Data; @@ -12,9 +13,9 @@ namespace Cortex.Streams.MSSqlServer { /// - /// MSSQL CDC Source Operator that optionally performs an initial load of the table, - /// then continues reading incremental changes via CDC. - /// Now we skip duplicates by storing a hash of the last record we emitted. + /// MSSQL CDC (Change Data Capture) Source Operator that optionally performs an initial load of the table, + /// then continues reading incremental changes via CDC. + /// Duplicates are skipped by storing and comparing a hash of the last record emitted. /// public class SqlServerCDCSourceOperator : ISourceOperator { @@ -23,7 +24,6 @@ public class SqlServerCDCSourceOperator : ISourceOperator private readonly string _tableName; private readonly bool _autoEnableCdc; private readonly bool _doInitialLoad; - private readonly int _pollIntervalMs; private Thread _pollingThread; private bool _stopRequested; @@ -40,12 +40,29 @@ public class SqlServerCDCSourceOperator : ISourceOperator // Key to store the last emitted record's hash private readonly string _lastRecordHashKey; + // Optional logger (may be null) + private readonly ILogger _logger; + + /// + /// Creates a new instance of which can optionally + /// perform an initial load of the table, then read incremental changes via CDC. + /// Duplicates are skipped by storing a hash of the last record emitted. + /// + /// The connection string to the SQL Server instance. + /// The schema name containing the table. + /// The table name from which to read changes. + /// Optional settings to configure CDC and polling intervals. + /// Optional data store for saving checkpoints (LSN, last-hash, etc.). + /// + /// An optional . If null, messages will be written to the console. + /// public SqlServerCDCSourceOperator( string connectionString, string schemaName, string tableName, SqlServerSettings sqlServerSettings = null, - IDataStore checkpointStore = null) + IDataStore checkpointStore = null, + ILogger logger = null) // <-- Optional ILogger { _connectionString = connectionString; _schemaName = schemaName; @@ -69,32 +86,42 @@ public SqlServerCDCSourceOperator( // A unique key to store the last emitted record's hash _lastRecordHashKey = $"{_schemaName}.{_tableName}.CDC.LAST_HASH"; + + // Store the logger (can be null) + _logger = logger; } + /// + /// Starts the CDC operator by optionally enabling CDC on the table (if requested), + /// performing a one-time initial load (if requested), and starting the background thread + /// that polls for new changes. + /// + /// Action to call for each new record. public void Start(Action emit) { // Optionally enable CDC if requested if (_autoEnableCdc && !IsCdcEnabledForTable()) { + LogInformation("Enabling CDC for table..."); EnableCdcForTable(); - // Sleep for 10s; believe that MS SQL Server will enable CDC for the connected table! + // Sleep for 10s to allow MS SQL Server to enable CDC on the table Thread.Sleep(10000); } // 1. If doInitialLoad = true and we haven't done it yet, run initial load if (_doInitialLoad && _checkpointStore.Get(_initialLoadCheckpointKey) == null) { - Console.WriteLine("Starting one-time initial load..."); + LogInformation("Starting one-time initial load..."); RunInitialLoad(emit); // Mark initial load as completed _checkpointStore.Put(_initialLoadCheckpointKey, new byte[] { 0x01 }); - Console.WriteLine("Initial load completed."); + LogInformation("Initial load completed."); } else { - Console.WriteLine("Skipping initial load (already done or disabled)."); + LogInformation("Skipping initial load (already done or disabled)."); } // 2. Initialize the LSN checkpoint if we don’t already have one @@ -103,26 +130,39 @@ public void Start(Action emit) { // By default, start from "current" so we only see future changes lastCommittedLsn = GetMaxLsn(); - _checkpointStore.Put(_checkpointKey, lastCommittedLsn); + if (lastCommittedLsn != null) + { + _checkpointStore.Put(_checkpointKey, lastCommittedLsn); + LogInformation("Initialized LSN checkpoint to the current max LSN."); + } } // 3. Start CDC polling _stopRequested = false; - _pollingThread = new Thread(() => PollCdcChanges(emit)); - _pollingThread.IsBackground = true; + _pollingThread = new Thread(() => PollCdcChanges(emit)) + { + IsBackground = true + }; _pollingThread.Start(); + LogInformation("CDC polling thread started."); } + /// + /// Stops the CDC operator by signaling the background thread to stop and waiting for it to finish. + /// public void Stop() { + LogInformation("Stop requested. Waiting for polling thread to complete..."); _stopRequested = true; _pollingThread?.Join(); + LogInformation("Polling thread stopped."); } /// - /// Polls the CDC table periodically for new changes, - /// using the last LSN from the checkpoint store. + /// Polls the CDC function table periodically for new changes, using the last LSN from + /// the checkpoint store, and emits each new record found. /// + /// Action to call for each new record. private void PollCdcChanges(Action emit) { string captureInstanceName = $"{_schemaName}_{_tableName}"; @@ -164,9 +204,7 @@ private void PollCdcChanges(Action emit) // If it's the same as the last emitted, skip if (currentHash == lastHash) - { continue; - } // Otherwise, emit emit(change); @@ -177,27 +215,27 @@ private void PollCdcChanges(Action emit) } // Update the LSN checkpoint if new changes arrived - if (changes.Count() <= 0) - { - } - else + if (changes.Count > 0) { _checkpointStore.Put(_checkpointKey, maxLsn); + LogInformation($"Updated LSN checkpoint to: {BitConverter.ToString(maxLsn)}"); } Thread.Sleep(_pollIntervalMs); } catch (Exception ex) { - Console.WriteLine($"Error in CDC polling: {ex}"); + LogError("Error in CDC polling.", ex); + // Backoff a bit Thread.Sleep(5000); } } } /// - /// Reads all data from the base table and emits it once. + /// Reads all data from the base table once and emits it, primarily used for the one-time initial load. /// + /// Action to call for each record. private void RunInitialLoad(Action emit) { using (var conn = new SqlConnection(_connectionString)) @@ -234,6 +272,10 @@ private void RunInitialLoad(Action emit) } } + /// + /// Checks whether CDC is enabled for the current database and table. + /// + /// True if CDC is enabled for this table, otherwise false. private bool IsCdcEnabledForTable() { using (var conn = new SqlConnection(_connectionString)) @@ -268,6 +310,9 @@ FROM sys.tables t } } + /// + /// Enables CDC at the database level (if needed) and on the specific table. + /// private void EnableCdcForTable() { using (var conn = new SqlConnection(_connectionString)) @@ -298,10 +343,14 @@ EXEC sys.sp_cdc_enable_table "; cmd.ExecuteNonQuery(); - Console.WriteLine($"CDC enabled for table [{_schemaName}].[{_tableName}]."); + LogInformation($"CDC enabled for table [{_schemaName}].[{_tableName}]."); } } + /// + /// Retrieves the current maximum LSN from the server. + /// + /// The varbinary(10) max LSN, or null if none. private byte[] GetMaxLsn() { using (var conn = new SqlConnection(_connectionString)) @@ -315,20 +364,32 @@ private byte[] GetMaxLsn() } } + /// + /// Retrieves the minimum LSN for the given capture instance. + /// + /// The capture instance name (schema_table). + /// The varbinary(10) min LSN, or null if none. private byte[] GetMinLsn(string captureInstanceName) { using (var conn = new SqlConnection(_connectionString)) using (var cmd = conn.CreateCommand()) { conn.Open(); - cmd.CommandText = - $"SELECT sys.fn_cdc_get_min_lsn('{captureInstanceName}');"; + cmd.CommandText = $"SELECT sys.fn_cdc_get_min_lsn('{captureInstanceName}');"; object result = cmd.ExecuteScalar(); if (result == DBNull.Value) return null; return (byte[])result; } } + /// + /// Queries the CDC function table for changes between the specified LSN range. + /// Filters out "old" updates (operation 3). + /// + /// The capture instance name (schema_table). + /// Starting LSN (inclusive). + /// Ending LSN (inclusive). + /// A list of new or updated records. private List GetChangesSinceLastLsn( string captureInstanceName, byte[] fromLsn, @@ -343,9 +404,9 @@ private List GetChangesSinceLastLsn( conn.Open(); // Use 'all update old' to get both old & new update rows cmd.CommandText = $@" - SELECT * - FROM {functionName}(@from_lsn, @to_lsn, 'all update old') - "; + SELECT * + FROM {functionName}(@from_lsn, @to_lsn, 'all update old') + "; cmd.Parameters.Add(new SqlParameter("@from_lsn", SqlDbType.VarBinary, 10) { Value = fromLsn }); cmd.Parameters.Add(new SqlParameter("@to_lsn", SqlDbType.VarBinary, 10) { Value = toLsn }); @@ -375,8 +436,7 @@ private List GetChangesSinceLastLsn( } } - // Filter out rows that are the "old" side of an update - // (operation code 3 = "Update (old)") + // Filter out rows that are the "old" side of an update (operation code 3 = "Update (old)") changes = changes .Where(c => c.Operation != "Update (old)") .ToList(); @@ -384,8 +444,11 @@ private List GetChangesSinceLastLsn( return changes; } - - + /// + /// Returns a human-readable operation name from a CDC operation code. + /// + /// The integer CDC operation code. + /// A descriptive string for the operation. private string GetOperationName(int operationCode) { switch (operationCode) @@ -401,9 +464,9 @@ private string GetOperationName(int operationCode) /// /// Compare two varbinary(10) LSNs: - /// -1 if lsnA < lsnB - /// 0 if lsnA == lsnB - /// 1 if lsnA > lsnB + /// -1 if lsnA < lsnB, + /// 0 if lsnA == lsnB, + /// 1 if lsnA > lsnB. /// private int CompareLsn(byte[] lsnA, byte[] lsnB) { @@ -420,12 +483,13 @@ private int CompareLsn(byte[] lsnA, byte[] lsnB) } /// - /// Computes an MD5 hash from the SqlServerRecord's Data dictionary. + /// Computes an MD5 hash from the SqlServerRecord's Data dictionary + /// to detect duplicates. The data is sorted by key for deterministic ordering. /// + /// The to hash. + /// A Base64-encoded MD5 hash string. private string ComputeHash(SqlServerRecord record) { - // Build a stable string from the record's Data - // Sort by key so that the order is deterministic var sb = new StringBuilder(); foreach (var kv in record.Data.OrderBy(x => x.Key)) { @@ -433,7 +497,6 @@ private string ComputeHash(SqlServerRecord record) sb.Append(kv.Key).Append('=').Append(kv.Value ?? "null").Append(';'); } - // Get MD5 hash of that string using (var md5 = MD5.Create()) { var bytes = Encoding.UTF8.GetBytes(sb.ToString()); @@ -441,5 +504,33 @@ private string ComputeHash(SqlServerRecord record) return Convert.ToBase64String(hashBytes); } } + + #region Logging Helpers + + private void LogInformation(string message) + { + if (_logger != null) + { + _logger.LogInformation(message); + } + else + { + Console.WriteLine(message); + } + } + + private void LogError(string message, Exception ex) + { + if (_logger != null) + { + _logger.LogError(ex, message); + } + else + { + Console.WriteLine($"ERROR: {message}\n{ex}"); + } + } + + #endregion } } From 330682d57936b2a3eceaab4d7006513c737375f5 Mon Sep 17 00:00:00 2001 From: Enes Hoxha Date: Sat, 18 Jan 2025 17:54:21 +0100 Subject: [PATCH 10/12] Add CDC prefix in the name --- .../Cortex.Streams.MSSqlServer.csproj | 2 +- src/Cortex.Streams.PostgreSQL/Cortex.Streams.PostgreSQL.csproj | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Cortex.Streams.MSSqlServer/Cortex.Streams.MSSqlServer.csproj b/src/Cortex.Streams.MSSqlServer/Cortex.Streams.MSSqlServer.csproj index 1383cd8..7a6d362 100644 --- a/src/Cortex.Streams.MSSqlServer/Cortex.Streams.MSSqlServer.csproj +++ b/src/Cortex.Streams.MSSqlServer/Cortex.Streams.MSSqlServer.csproj @@ -19,7 +19,7 @@ 1.0.1 license.md cortex.png - Cortex.Streams.MSSqlServer + Cortex.Streams.CDC.MSSqlServer True True True diff --git a/src/Cortex.Streams.PostgreSQL/Cortex.Streams.PostgreSQL.csproj b/src/Cortex.Streams.PostgreSQL/Cortex.Streams.PostgreSQL.csproj index 84bb8c3..4b9cd24 100644 --- a/src/Cortex.Streams.PostgreSQL/Cortex.Streams.PostgreSQL.csproj +++ b/src/Cortex.Streams.PostgreSQL/Cortex.Streams.PostgreSQL.csproj @@ -19,7 +19,7 @@ 1.0.1 license.md cortex.png - Cortex.Streams.PostgreSQL + Cortex.Streams.CDC.PostgreSQL True True True From 027abc573daa3d2900efaad27eee87637362c76e Mon Sep 17 00:00:00 2001 From: Enes Hoxha Date: Sat, 18 Jan 2025 22:53:31 +0100 Subject: [PATCH 11/12] [v1/feature/90]: Add MongoDb CDC support. Compare to SQL Server & PostgreSQL MongoDb has two different implementation for MongoDbRecord and MongoDbRecord. Add MongoDbCDCSourceOperator, Add MongoDbCDCSourceOperator --- Cortex.sln | 6 + src/Cortex.Streams.MongoDb/Assets/cortex.png | Bin 0 -> 7179 bytes src/Cortex.Streams.MongoDb/Assets/license.md | 20 + .../Cortex.Streams.MongoDb.csproj | 55 +++ .../MongoDbCDCSettings.cs | 36 ++ .../MongoDbCDCSourceOperator.cs | 417 ++++++++++++++++++ .../MongoDbCDCSourceOperator_Typed.cs | 356 +++++++++++++++ src/Cortex.Streams.MongoDb/MongoDbRecord.cs | 45 ++ 8 files changed, 935 insertions(+) create mode 100644 src/Cortex.Streams.MongoDb/Assets/cortex.png create mode 100644 src/Cortex.Streams.MongoDb/Assets/license.md create mode 100644 src/Cortex.Streams.MongoDb/Cortex.Streams.MongoDb.csproj create mode 100644 src/Cortex.Streams.MongoDb/MongoDbCDCSettings.cs create mode 100644 src/Cortex.Streams.MongoDb/MongoDbCDCSourceOperator.cs create mode 100644 src/Cortex.Streams.MongoDb/MongoDbCDCSourceOperator_Typed.cs create mode 100644 src/Cortex.Streams.MongoDb/MongoDbRecord.cs diff --git a/Cortex.sln b/Cortex.sln index 3d51707..5449f85 100644 --- a/Cortex.sln +++ b/Cortex.sln @@ -51,6 +51,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cortex.Streams.MSSqlServer" EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cortex.Streams.PostgreSQL", "src\Cortex.Streams.PostgreSQL\Cortex.Streams.PostgreSQL.csproj", "{0E60F75D-C44B-428A-9252-A11C365E2C56}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Cortex.Streams.MongoDb", "src\Cortex.Streams.MongoDb\Cortex.Streams.MongoDb.csproj", "{FC86D3AB-778D-45D7-AF36-1F89FC16DE55}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -152,6 +154,10 @@ Global {0E60F75D-C44B-428A-9252-A11C365E2C56}.Debug|Any CPU.Build.0 = Debug|Any CPU {0E60F75D-C44B-428A-9252-A11C365E2C56}.Release|Any CPU.ActiveCfg = Release|Any CPU {0E60F75D-C44B-428A-9252-A11C365E2C56}.Release|Any CPU.Build.0 = Release|Any CPU + {FC86D3AB-778D-45D7-AF36-1F89FC16DE55}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {FC86D3AB-778D-45D7-AF36-1F89FC16DE55}.Debug|Any CPU.Build.0 = Debug|Any CPU + {FC86D3AB-778D-45D7-AF36-1F89FC16DE55}.Release|Any CPU.ActiveCfg = Release|Any CPU + {FC86D3AB-778D-45D7-AF36-1F89FC16DE55}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/src/Cortex.Streams.MongoDb/Assets/cortex.png b/src/Cortex.Streams.MongoDb/Assets/cortex.png new file mode 100644 index 0000000000000000000000000000000000000000..a4f9727d04f91f61f20720bd64b9c139fb041236 GIT binary patch literal 7179 zcmV+m9Q5OfP)Px#1ZP1_K>z@;j|==^1poj532;bRa{vGf5&!@T5&_cPe*6Fc8=y%C^C!)jU*}Z_TUZ5zkPS!zgzWpiKoUY0LP$tR!Xokt%Ag{H3XX^@B8wnv03972 z_hwv1kf6A*-1mH+Tg$Cm_f}O`27H#NQDPYKIv zcTCHy`fO@u)s1PH)sbnLH4!`}kZK|c%WX|xUTdf>GJS=u8BkWepa#-RXr-xDk(t@H zW@WFA%*qkfWGlBRGCQ}KE6>y#u7Wj@IeBX$a|&84%`IF@nqxf9Eo!w@+!~oz+$N|! zGOwgPGT%~1WPT~v)1{qUWzc%A^7WAg4s}HqRBUi)VHMOZs3)?px+juI+89Z!G1VJc zB&xQLlmylF3mS+d)%Qn|>-%q{Gz@%}+OT;?VqO2Az$`8yqwdcMnN^=m&#Gn9GOO9N zWmSgoa}7%%RkMWUH7sFyEou6S8a90e9_zTW>Pa(9t!!{;=E_xE+0bgztQ@G(RxZ@U zX6H7t+4KlCv)TDgY<6BVo1M3Y&B-^_!sZmT(Bo4DE$penwd|>awfwQDmCY?QRMf`i z7PlFX?QCvIJDW%9VDpMQ_+#lhHowFWelF`|^GiF~)8*^A%D5JkcX3sij~m#6$_@0m zu(F#itm@{El|3x6Y9mXm>S2l1c-+Vmt9w~uO)pzi-OCo$^sz;?rZ%x8Q+55MWT<|? zR>L4ErD2dQZhVHNG;H}~QR9|B$4$wo*gJiBEt{HI$)=I2xEuhhIXqJqL=&J0vCala z5N(h%O%XsY0Gvq>4G}=VGamuY)|~uiL2C?vYdAnt09d$|;L#&sk_?c=ZERj~yDb1L z>0t9s0idC$%Q^*_5G9Z#17M~3$VC820LaAwn(8AZK>|nur~sJ;c=dyOfYHG-0D8c)!3BgD z0ANym|IN6m>E(B(W>gS3OJ2?cFb6y?Pk02*2B>qM3!dyehx-I@f^^Pv!7~6xXt|F=bO5v<25_GNQSUxC#H8AO4sJ?%xdNQ*J~@1Hcqd#$aK3;6*i@;(*!qp zN$K#ErDYCqdVq%egycmAs0*E1fQrN=5J1U&?wU^l&=op;GEa^BbZsAjqqfh>eP;6F z)Ao6|Z&6L30M3*Oo+ETR-KT=*2%XxtPvO3(3wF_Ye(o~>8t!w1PapS14NvJlvF#Jx zr)&H4@D^1Y;3h9EV~`Hsl#Ft1yQhN}IO}{ANsFp%P!{Yu=K-8E>wGlM^MYqOFId^& z2%iLQaft=a3!W})l0KJV3MhJsTZ0AEbxpuK9u{Elz-8;ROww zR?TOCTT;p_`2Zw{iqPrGJT3QmCoihd8BOM?bf27cD&6OV=tx|mt@-5SMRuQ8=yZio zXXvycikWBcaNHyroC=-<(Ct1seEPUg8#$OY0`0p9-Gr zJ{g`@!7jLOL4{YlXVrYeag)-D-QWax7C_l`CNRIYFQbZ{KY1z6$Kr!#EgZ*Wg$`|~Vo!z(PSdoO8a_bY<-k(PJvp>_%Gv>m7eXoO8y z=v38xA~-=V_sQW?)$U2~rleO;=quWCh;^O(o^87Do2U9N{K`5{e9LlrUu6j^#7=7= z^K^|iM3;Gn?{VS6|5NG*Skr-zS?{?!Y}3WxJ=J&q7q<4T&slowP6s>-T#)3&a-QwJ zg;hNOSL6bxgy$1FEqErlrDbgSx;<>mwFhkL2aIjG<}W;N@XCF*V*N{$o67aDm_{0CMM*lVjE#{Dj{8=BvSl58QYDE=yg#mCogYr)>Me!L!;uW!7o8S@;_g zl7_uQY-^OC2f{JS*1bsWqcU+(hEBQd6W{?{d}`5fJiw)yk^ly7`)qjlJuY(RJKqrq z`GfcguyO3dif(EzUGOZ31~>troplQEFvkURT|{^Q=UekNvpM)VZw)KievEA%dfrbnvSA!cLKpGf(cmS-Gnz;po5gd(_bb!i_sP^nlvAr;IF94NvVp2S5{E zIe{x2j!!EhSb%2%93eco6oY=f@81m-UN#$TMCJ2m2ny;!--6vrS|s-=;JMtVsQK{x z#ZBAU#4ZIzS;aOu?=L=xFyyPUm3?Au_nm}G zSbMn7S+E-pEG2LS!||zwHaG#E*!EfQ6af9;A!OnMw7>mzcz7Q$)_Ul3`k>r-s{YQf z&a>g+c?iv$uRaJBBI%f+2drT5@6;XkD29qGXyza-z)iHk1%yWcqj%nyCZ5l%AP$=9nz{{A|*f(qAK$Qw5&SJ zo?;tL{umCL@!pybd`vwo2)C-yhLO6Dw|gDjaV9t?JOP~4_C;OuneH?GpWBB}^e|le zag}$AW%vAz`a|7QB%*g8o`;LG_RW8Vt~&u6od!uYy_Dq0U1st~gFVI<0$vHgCFeW9 zY2hhCr&r?Qf@c7ngiJHNoWjXST(|Mu9k#gnSvqH6=P3$yBu=={wJ%+went7Uf(X!h z-~WXzZrVoeq^hse9xCz}qzwws23No)rt09S0S1Am1V|yXgw4pRV@44sR;1jC7Xs)LiHjQyTH$)pCo|DzJmknGB*FnHD z$os71*<)0+QxUHkDiUUvsUTYIo+5d105l-ZFV=!{!t(V?P4YG}AheuQ< z)a-emPmxfTfpUQ9H^!gPggt-s-G2;kgHCgTT|RX;9RB!N9ozB9ZX| zC?+lh(FY)Yh70OmycjxJ!EqZ-{m7DPHj(pOlXI8VSTHMgo+MZULl4-%l?OEJ5)_`9 z(Pp&nqIS~RQSCoP!Z}bY&#D&NJyVLBFFJ5unWq3x0WgUB;?oN0L#}=Ev(RZd;I$w5 znl1!QRFdFfPE1bke%62S4+_m(COm)VAtDT1xySPR-{571QM}KRM;lZ_MXEts6QFA; zw9b8|16!j67Zjc@boz9c;XFw15r)DzZxBWORmK|j4NgF5PGS zm+!Fw$N|sWdE)pWr0VN*zzZ}*!U@j?XipDx^|lm-0Y}oo696jUd1alV^A}LqXi93!NSS@!Xt^``O^oeP4KkLkM3S8lMvPE8BGbE_G&O zSOp#2xInA-f57@K{6;+-NMz8F40r&k255&)Kx;ev74^(xYZ0-#OziKJ0jff0xXe@P zaVZ!nI7}*&F!yQT0hY^wYabyvt@HHoT#1VeFSU>ga2wjm_3!?`+K+t0I!}Dhx=#K`zsKJ#c;`7hI8_PnDx<%n&g8Sr z!jJQb?hEfc?!GyX3>@hZz>@&tSiDhOd}r?l!k8HzeONU`<1)T0q+4D7<$MGH@|7DY0!)`sNf0U{Nc%2ryo3k zYk8F5MCl*~1Sr76`LGIe<9oNQf*FoLRtTr`T28yoHk`i6)*H~)9sibok2Z4f@_qjM zf^u?^wsN8?5{EPmb(x)1}`npzBZk zK+o?!^Aiu9M%Xmr;kebWen8VL(9`1XsMZV>1%xNpe5UigK7Hn?bf27c3htXV*5HC9E-H8v zQVLjNO&@DL^o0>3BNXanKw4qK>g9W^^Tc=LJcQTM9q-Uo1{xL`!*%Sgf?ZrmYrvb8 z+cXy7py=U=p_6`}TEx;@o?~r?zoa&5vz(0h=^8Jt0lwzN^VD`=`IvGYy9CcYNNZ1# za5+x{FL2wZSWPsB;6&*lx&d+-0FD2b?l|twAOi|v6&dNmiVZZ|T3k`<4bVGuqCYdJ z`@8_k?z7syoW@uNr-o;`58)D7R_oy}Jo_{KgdP}_8OU5QZBX8Xz_ovi1TR?ld~^kS z41qHh0GVslES5%vl{{;4J9? z7P1M+d2D6RJ~ntY+3(wPe{{`Su0WFVqlEuC$BTYv9L^HWA zV=cfwTA*TX1!4iyWSsz~+Unn3^U3i1O9l&`0dU5D1Govv`7A!Igw^i37<#IBw8HCP z?j*u8k`;R!kP)~~129qxa52`At{d)Zax&K5Asn8Q`3=YVpf$Fp2mId zn$OF9qVs~)e6bBKxq#X=EbZ;T9BwrYg!V9OB7$1h9~cut4myz&XdXp z^=9L-3+~B9IkX3H&zYN{lM#fxhJ9CQur&;L90Tapf5mE`YBpKtJeQ=5Q-bFRn-)NQ z$w1%)fYVpj#U{8XlXBUtye8IuG!i-)!EsHmen=MzdtuU+P9foKzIvbK558e7ht_UB zr)c*`8h~Ll&*@n)2o7x;QgQ6#720W7t$Hj%+YBPyqKm9x;Em8B6YwzKWogTE)WI!p zf2D`#?>?6U1GsN`R())NL$3&W{mY*_75XCcjPPfcTt7(R7TshxHzwFt?LHU!W-wRj zV4J9ksYP0NYJfI8ch;$cXE@JFUL4MgA#lhblk-?My0eDvg$ipBdi$}Z?h}i09o=O( z|BS2#w&smbL&L*Ofl+hJCPsg$1y2TOZg;E*oBUf+)HRyMd|DWY2|=M9{Q(l5YZS8LvDQJ_zf6|OeXj5+*L)g) zCd7mlF#-xlnb`aj=d?B?5Z$K>of`KAtogTb+Y7WWR~Tq?%(aKKqYuKZ ze15HjaXz$n*kl{i`@+JbLjT}DDNG|2b8rFSxfEcKmbUw-9vg5^BxchW*!8~8SJTKB z!%sM7>-C4MY{v--d3G`qPy)Os5_4#lSI?QBLc_yx)w|Ep#mD;*UFQqWbBctVbsBBo zSb)P_j&;qLfgHAdL3@bRGzc3wpZprZh4v0*LLSZ0#H69H?+M2h48BP-(5#TDa-XW* zbHTISr)>K&s~#^nTx3`si{QiV--Qc=wE~#Y>&zfS%1BnQlq;8Zh7C6Xi4VZuW0~!{ z>AVhjDhEpLQ#(-nT2sM&j}sj3YV`9}>^e0fXjmVBFOqmN(H|Nw#QB(>{5M$u7oS>8 zAunvmL^*?nBiObd=hVT|g-#2e!g-FwCF3!IgWGcZUgLLjm^<3oUJzfrFoD7&ptxVh z5ruBAj_xl+kEh!p>HlED{XIoMcrbKOj8F>0Z`>UN7C}Rz#)TNx@HGm z_%O)`2YsK~5c2}duOSOJ3cmJiaOe-#eDG81;|tOr;yD=g#Kkc z-20a~LZ`0g)3$vTqYn;uI(7@fnzXP*HZDZ%ix=qw2MK$~TCs6=&zakFEL6GcG_4N^ zbo`=R+A1F#0*vH6m+w4j?BnU&ghvT48uz7-GB`lPLWS11z6c$9Z~?HKDj~Bf&_$=( zET@rr@y%^EXe9gELzdI`nsJU`j>R`n4PJQ=DzxJmEJnd=B4nB?YhCbE;Zp@qRC@X7 zf#cm-6|DZ{%OeUoSgVcpg12k#*Kxe2(Hnv-4P~d8S>KX1&jw0t%=2Musm)mvHl=WbI^evqCKK8=qryV7NafQRsjtQ3W}d=6Ro47iAO zFNgnDF1Wt!@Yif+b|VeIn(&kWgSv0jz$NF?8^bPkVLmXqex(#`AKi z`wK~oR|13!+4|O3Y<6z5f2L`M`|MS0?V-=ZM~>@^zAKe?Zaa5SQUdgWH)XWJVdJwf zj>8AVG;r=*A85F^iP^@Qf?#88+x~%X47p5=KGq}1=aYr37~ilFcDxYBEUN9JbBX)6 zXu+w%rwDFo*(iZSm`TZ}FRG39F2(cFV~;m))?!|QBe11=*U29OK{JKtqW$9M3HCbp zE;;P663xws&x7qe(9=UniemR9EkN0MCcsezXAF=wzV;!#dB{x7l`@{weDD*RfEpx& zgd4GL41=)Ge55$1hY&IW0N)s54l(%&pNW5a<8SPlf4I*n3(whf;TM+C{=BfNR@&-c z0Z|9ghFCg!;2>Ol%u>ScbCu7Zrf>Jq2|KT@=z5vWSWy=U8l68mk2WQ(*>m1?zChZ2 zj=uX8#d(mB%p*V{jck-`dz)45R-Ds1_gPl5?Fh{Z11xkL$U8}dcHfdw1_$9T#RcXI z?)Ado@w^}zq$v9GUGeUXR>!P_YGw zSNL?mTU;^@ZuF#srv_-c&kLRfa2(udO9wAN=+wX)2lrT|f+qlUgiSki>flWp2lqIo zb6*sp(}b8-JPz)$OR)`z4xZM1odhs- z99)b@2TuSf!W#z{Lki+N6JToLIJnqS0C?lzVpv-D84y!AxI3-`#}y+|!6R^Y0Pd#y zfN{l+6z)qYycst!Iqv|5(%lD)D<&j`P4t{hOer`JH!(H)FB4OWzCm&4I%r%mq(a)B z1FR;d7W^d+|DH_Bc_Kcw;Pv?A{9DxKAz>L;Ea?u|fYF4My#E2S{{wP}i{H;-Y-|7k N002ovPDHLkV1i-n!nFVZ literal 0 HcmV?d00001 diff --git a/src/Cortex.Streams.MongoDb/Assets/license.md b/src/Cortex.Streams.MongoDb/Assets/license.md new file mode 100644 index 0000000..3c845d4 --- /dev/null +++ b/src/Cortex.Streams.MongoDb/Assets/license.md @@ -0,0 +1,20 @@ +The MIT License (MIT) + +Copyright (c) 2025 Buildersoft + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/Cortex.Streams.MongoDb/Cortex.Streams.MongoDb.csproj b/src/Cortex.Streams.MongoDb/Cortex.Streams.MongoDb.csproj new file mode 100644 index 0000000..a315882 --- /dev/null +++ b/src/Cortex.Streams.MongoDb/Cortex.Streams.MongoDb.csproj @@ -0,0 +1,55 @@ + + + + net9.0;net8.0;net7.0 + + 1.0.1 + 1.0.1 + Buildersoft Cortex Framework + Buildersoft + Buildersoft,EnesHoxha + Copyright © Buildersoft 2025 + + Cortex Data Framework is a robust, extensible platform designed to facilitate real-time data streaming, processing, and state management. It provides developers with a comprehensive suite of tools and libraries to build scalable, high-performance data pipelines tailored to diverse use cases. By abstracting underlying streaming technologies and state management solutions, Cortex Data Framework enables seamless integration, simplified development workflows, and enhanced maintainability for complex data-driven applications. + + + https://github.com/buildersoftio/cortex + cortex cdc eda streaming distributed streams states mongodb + + 1.0.1 + license.md + cortex.png + Cortex.Streams.CDC.MongoDb + True + True + True + + Just as the Cortex in our brains handles complex processing efficiently, Cortex Data Framework brings brainpower to your data management! + https://buildersoft.io/ + README.md + + + + + True + \ + + + True + + + + True + + + + + + + + + + + + + diff --git a/src/Cortex.Streams.MongoDb/MongoDbCDCSettings.cs b/src/Cortex.Streams.MongoDb/MongoDbCDCSettings.cs new file mode 100644 index 0000000..f7e383d --- /dev/null +++ b/src/Cortex.Streams.MongoDb/MongoDbCDCSettings.cs @@ -0,0 +1,36 @@ +using System; + +namespace Cortex.Streams.MongoDb +{ + public class MongoDbCDCSettings + { + /// + /// Whether to perform an initial scan/load of the entire collection + /// before starting the Change Stream. + /// + public bool DoInitialLoad { get; set; } + + /// + /// For MongoDB, we use a Change Stream (which blocks). However, we may still + /// use this interval as a minimal delay between reconnection attempts or + /// to check if the operator was stopped. + /// + public TimeSpan Delay { get; set; } + + /// + /// Maximum back-off in seconds if repeated errors occur while listening + /// to the MongoDB Change Stream. + /// + public int MaxBackOffSeconds { get; set; } + + public MongoDbCDCSettings() + { + DoInitialLoad = true; + + // Typically not used for "polling" as we do streaming, but can be used + // for a small wait loop to check cancellation or errors. + Delay = TimeSpan.FromSeconds(3); + MaxBackOffSeconds = 60; + } + } +} diff --git a/src/Cortex.Streams.MongoDb/MongoDbCDCSourceOperator.cs b/src/Cortex.Streams.MongoDb/MongoDbCDCSourceOperator.cs new file mode 100644 index 0000000..52f11f0 --- /dev/null +++ b/src/Cortex.Streams.MongoDb/MongoDbCDCSourceOperator.cs @@ -0,0 +1,417 @@ +using Cortex.States; +using Cortex.Streams.Operators; +using Microsoft.Extensions.Logging; +using MongoDB.Bson; +using MongoDB.Driver; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Security.Cryptography; +using System.Text; + +namespace Cortex.Streams.MongoDb +{ + /// + /// MongoDB CDC Source Operator that: + /// 1. Optionally performs an initial collection load if requested. + /// 2. Uses MongoDB Change Streams to capture inserts, updates, deletes, and replaces. + /// 3. Stores the last resume token to allow restart from the correct position. + /// 4. Skips duplicate records using a hash checkpoint. + /// 5. Uses robust error handling and exponential back-off on failures. + /// + public class MongoDbCDCSourceOperator : ISourceOperator, IDisposable + { + private readonly IMongoDatabase _database; + private readonly string _collectionName; + + private readonly bool _doInitialLoad; + private readonly int _delayMs; + private readonly int _maxBackOffSeconds; + + private readonly IDataStore _checkpointStore; + + // Keys for checkpoint store + private readonly string _checkpointKey; + private readonly string _initialLoadCheckpointKey; + private readonly string _lastRecordHashKey; + + private IMongoCollection _collection; + + // Thread & cancellation + private Thread _pollingThread; + private bool _stopRequested; + private bool _disposed; + + // Optional logger + private readonly ILogger _logger; + + + public MongoDbCDCSourceOperator(IMongoDatabase database, + string collectionName, + MongoDbCDCSettings mongoSettings = null, + IDataStore checkpointStore = null, + ILogger logger = null) + { + if (database == null) + throw new ArgumentNullException(nameof(database)); + if (string.IsNullOrWhiteSpace(collectionName)) + throw new ArgumentException("Collection name cannot be null or empty.", nameof(collectionName)); + + mongoSettings ??= new MongoDbCDCSettings(); + + // If no checkpoint store is provided, default to an in-memory store. + _checkpointStore = checkpointStore + ?? new InMemoryStateStore($"{database.DatabaseNamespace.DatabaseName}.{collectionName}.STORE"); + + _database = database; + _collectionName = collectionName; + + _doInitialLoad = mongoSettings.DoInitialLoad; + _delayMs = (int)mongoSettings.Delay.TotalMilliseconds; + _maxBackOffSeconds = mongoSettings.MaxBackOffSeconds; + + // Define checkpoint keys + var dbName = database.DatabaseNamespace.DatabaseName; + _checkpointKey = $"{dbName}.{collectionName}.CDC.ResumeToken"; + _initialLoadCheckpointKey = $"{dbName}.{collectionName}.INITIAL_LOAD_DONE"; + _lastRecordHashKey = $"{dbName}.{collectionName}.CDC.LAST_HASH"; + + _logger = logger; + } + + /// + /// Start the operator. Uses the provided IMongoDatabase to set up the collection, + /// performs optional initial load, and launches the background thread + /// that reads from the change stream. + /// + public void Start(Action emit) + { + if (emit == null) throw new ArgumentNullException(nameof(emit)); + + var dbName = _database.DatabaseNamespace.DatabaseName; + LogInformation($"Starting MongoDB CDC operator for {dbName}.{_collectionName}..."); + + // 1. Select the collection + _collection = _database.GetCollection(_collectionName); + + // 2. Perform initial load if needed + if (_doInitialLoad && _checkpointStore.Get(_initialLoadCheckpointKey) == null) + { + LogInformation($"Performing initial load for {dbName}.{_collectionName}..."); + RunInitialLoad(emit); + _checkpointStore.Put(_initialLoadCheckpointKey, new byte[] { 0x01 }); + LogInformation($"Initial load completed for {dbName}.{_collectionName}."); + } + else + { + LogInformation($"Skipping initial load for {dbName}.{_collectionName} (already done or disabled)."); + } + + // 3. Check if we have an existing resume token + var lastResumeTokenBytes = _checkpointStore.Get(_checkpointKey); + if (lastResumeTokenBytes == null) + { + LogInformation($"No existing resume token found for {dbName}.{_collectionName}. Will start fresh."); + } + else + { + LogInformation($"Found existing resume token for {dbName}.{_collectionName}."); + } + + // 4. Spin up the background polling (streaming) thread + _stopRequested = false; + _pollingThread = new Thread(() => PollCdcChanges(emit)) + { + IsBackground = true, + Name = $"MongoCdcPolling_{dbName}_{_collectionName}" + }; + _pollingThread.Start(); + } + + /// + /// Stops the operator gracefully. + /// + public void Stop() + { + var dbName = _database.DatabaseNamespace.DatabaseName; + LogInformation($"Stop requested for MongoDB CDC operator {dbName}.{_collectionName}."); + _stopRequested = true; + _pollingThread?.Join(); + LogInformation($"MongoDB CDC operator stopped for {dbName}.{_collectionName}."); + } + + /// + /// The main CDC logic that connects to the MongoDB Change Stream and reads events. + /// Includes exponential back-off if errors occur. + /// + private void PollCdcChanges(Action emit) + { + var dbName = _database.DatabaseNamespace.DatabaseName; + int backOffSeconds = 1; + + while (!_stopRequested) + { + try + { + // 1. Retrieve the last resume token from checkpoint store + BsonDocument resumeToken = null; + var lastResumeTokenBytes = _checkpointStore.Get(_checkpointKey); + if (lastResumeTokenBytes != null) + { + var json = Encoding.UTF8.GetString(lastResumeTokenBytes); + resumeToken = BsonDocument.Parse(json); + } + + // 2. Create a change stream with optional resume token + var options = new ChangeStreamOptions + { + FullDocument = ChangeStreamFullDocumentOption.UpdateLookup, + ResumeAfter = resumeToken + }; + + var pipeline = new EmptyPipelineDefinition>(); + + using var cursor = _collection.Watch(pipeline, options); + LogInformation($"MongoDB Change Stream opened for {dbName}.{_collectionName}."); + + // 3. Retrieve last-record-hash to skip duplicates + var lastHashBytes = _checkpointStore.Get(_lastRecordHashKey); + string lastHash = lastHashBytes == null ? null : Encoding.UTF8.GetString(lastHashBytes); + + // 4. Read changes + foreach (var change in cursor.ToEnumerable()) + { + if (_stopRequested) break; + + // Update resume token checkpoint + var currentResumeToken = change.ResumeToken?.AsBsonDocument; + if (currentResumeToken != null) + { + var currentResumeTokenString = currentResumeToken.ToJson(); + _checkpointStore.Put(_checkpointKey, Encoding.UTF8.GetBytes(currentResumeTokenString)); + } + + // Convert the change stream document to a MongoDbRecord + var record = ConvertChangeToRecord(change); + + // Deduplicate + var currentHash = ComputeHash(record); + if (currentHash == lastHash) + { + LogInformation($"Skipping duplicate record for {dbName}.{_collectionName}."); + continue; + } + + // Emit the record + emit(record); + + // Update lastHash checkpoint + lastHash = currentHash; + _checkpointStore.Put(_lastRecordHashKey, Encoding.UTF8.GetBytes(lastHash)); + } + + Thread.Sleep(_delayMs); + // Reset back-off if we successfully processed + backOffSeconds = 1; + } + catch (Exception ex) + { + LogError($"Error in MongoDB CDC polling loop for {dbName}.{_collectionName}.", ex); + + // Exponential back-off + Thread.Sleep(TimeSpan.FromSeconds(backOffSeconds)); + backOffSeconds = Math.Min(backOffSeconds * 2, _maxBackOffSeconds); + } + } + } + + /// + /// Reads the entire collection once and emits each document as Operation = 'InitialLoad'. + /// For very large collections, consider chunking/paging to avoid memory issues. + /// + private void RunInitialLoad(Action emit) + { + var filter = Builders.Filter.Empty; + var cursor = _collection.Find(filter).ToCursor(); + + while (cursor.MoveNext()) + { + foreach (var doc in cursor.Current) + { + if (_stopRequested) break; + + var record = new MongoDbRecord + { + Operation = "InitialLoad", + Data = BsonDocumentToDictionary(doc), + ChangeTime = DateTime.UtcNow + }; + + emit(record); + } + if (_stopRequested) break; + } + } + + /// + /// Converts a change stream document to a . + /// + private MongoDbRecord ConvertChangeToRecord(ChangeStreamDocument change) + { + var record = new MongoDbRecord + { + Data = new Dictionary(), + ChangeTime = DateTime.UtcNow + }; + + switch (change.OperationType) + { + case ChangeStreamOperationType.Insert: + record.Operation = "INSERT"; + record.Data = BsonDocumentToDictionary(change.FullDocument); + break; + + case ChangeStreamOperationType.Update: + record.Operation = "UPDATE"; + record.Data = BsonDocumentToDictionary(change.FullDocument); + break; + + case ChangeStreamOperationType.Replace: + record.Operation = "REPLACE"; + record.Data = BsonDocumentToDictionary(change.FullDocument); + break; + + case ChangeStreamOperationType.Delete: + record.Operation = "DELETE"; + // DocumentKey typically has the _id + if (change.DocumentKey != null) + { + var docKeyDict = BsonDocumentToDictionary(change.DocumentKey); + foreach (var kv in docKeyDict) + { + record.Data[kv.Key] = kv.Value; + } + } + break; + + default: + record.Operation = change.OperationType.ToString().ToUpperInvariant(); + break; + } + + return record; + } + + /// + /// Helper to convert a BsonDocument to a Dictionary. + /// + private Dictionary BsonDocumentToDictionary(BsonDocument doc) + { + if (doc == null) return new Dictionary(); + + var dict = new Dictionary(); + foreach (var element in doc.Elements) + { + dict[element.Name] = BsonValueToNative(element.Value); + } + return dict; + } + + /// + /// Recursively converts a BsonValue to a .NET object (e.g. string, int, nested dictionaries, etc.). + /// + private object BsonValueToNative(BsonValue value) + { + if (value == null || value.IsBsonNull) return null; + + switch (value.BsonType) + { + case BsonType.Document: + return BsonDocumentToDictionary(value.AsBsonDocument); + + case BsonType.Array: + var list = new List(); + foreach (var item in value.AsBsonArray) + { + list.Add(BsonValueToNative(item)); + } + return list; + + case BsonType.Boolean: return value.AsBoolean; + case BsonType.DateTime: return value.ToUniversalTime(); + case BsonType.Double: return value.AsDouble; + case BsonType.Int32: return value.AsInt32; + case BsonType.Int64: return value.AsInt64; + case BsonType.Decimal128: return Convert.ToDecimal(value.AsDecimal128); + case BsonType.String: return value.AsString; + case BsonType.ObjectId: return value.AsObjectId.ToString(); + default: + // Fallback for types not explicitly handled + return value.ToString(); + } + } + + /// + /// Compute an MD5 hash from the record’s Data dictionary, sorted by key for deterministic ordering. + /// + private string ComputeHash(MongoDbRecord record) + { + var sb = new StringBuilder(); + foreach (var kv in record.Data.OrderBy(x => x.Key)) + { + sb.Append(kv.Key).Append('=').Append(kv.Value ?? "null").Append(';'); + } + + using var md5 = MD5.Create(); + var bytes = Encoding.UTF8.GetBytes(sb.ToString()); + var hashBytes = md5.ComputeHash(bytes); + return Convert.ToBase64String(hashBytes); + } + + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + + protected virtual void Dispose(bool disposing) + { + if (_disposed) return; + if (disposing) + { + Stop(); + } + _disposed = true; + } + + // -------------------------------------------------------------------- + // LOGGING HELPERS + // -------------------------------------------------------------------- + private void LogInformation(string message) + { + if (_logger != null) + _logger?.LogInformation(message); + else + Console.WriteLine(message); + } + + private void LogError(string message, Exception ex = null) + { + if (_logger != null) + { + _logger.LogError(ex, message); + } + else + { + if (ex != null) + { + Console.WriteLine($"ERROR: {message}\n{ex}"); + } + else + { + Console.WriteLine($"ERROR: {message}"); + } + } + } + } +} diff --git a/src/Cortex.Streams.MongoDb/MongoDbCDCSourceOperator_Typed.cs b/src/Cortex.Streams.MongoDb/MongoDbCDCSourceOperator_Typed.cs new file mode 100644 index 0000000..83e8b96 --- /dev/null +++ b/src/Cortex.Streams.MongoDb/MongoDbCDCSourceOperator_Typed.cs @@ -0,0 +1,356 @@ +using Cortex.Streams.Operators; +using Microsoft.Extensions.Logging; +using MongoDB.Bson.Serialization; +using MongoDB.Bson; +using MongoDB.Driver; +using System; +using System.Linq; +using System.Security.Cryptography; +using System.Text; +using Cortex.States; + +namespace Cortex.Streams.MongoDb +{ + /// + /// A generic MongoDB CDC Source Operator that: + /// 1. Optionally performs an initial collection load (deserializing docs into T). + /// 2. Uses MongoDB Change Streams to capture inserts, updates, deletes, etc. + /// 3. Stores the last resume token for correct restart. + /// 4. Skips duplicate records using an MD5 hash of the serialized document. + /// 5. Uses robust error handling and exponential back-off on failures. + public class MongoDbCDCSourceOperator : ISourceOperator>, IDisposable where T : new() + { + private readonly IMongoDatabase _database; + private readonly string _collectionName; + + private readonly bool _doInitialLoad; + private readonly int _pollIntervalMs; + private readonly int _maxBackOffSeconds; + + private readonly IDataStore _checkpointStore; + + // Checkpoint keys + private readonly string _checkpointKey; + private readonly string _initialLoadCheckpointKey; + private readonly string _lastRecordHashKey; + + // Collection reference + private IMongoCollection _collection; + + // Thread control + private Thread _pollingThread; + private bool _stopRequested; + private bool _disposed; + + // Optional logger + private readonly ILogger> _logger; + + public MongoDbCDCSourceOperator( + IMongoDatabase database, + string collectionName, + MongoDbCDCSettings mongoSettings = null, + IDataStore checkpointStore = null, + ILogger> logger = null) + { + _database = database ?? throw new ArgumentNullException(nameof(database)); + if (string.IsNullOrWhiteSpace(collectionName)) + throw new ArgumentException("Collection name cannot be null or empty.", nameof(collectionName)); + + mongoSettings ??= new MongoDbCDCSettings(); + _checkpointStore = checkpointStore + ?? new InMemoryStateStore($"{database.DatabaseNamespace.DatabaseName}.{collectionName}.STORE"); + + _collectionName = collectionName; + _doInitialLoad = mongoSettings.DoInitialLoad; + _pollIntervalMs = (int)mongoSettings.Delay.TotalMilliseconds; + _maxBackOffSeconds = mongoSettings.MaxBackOffSeconds; + + // Build checkpoint keys + var dbName = _database.DatabaseNamespace.DatabaseName; + _checkpointKey = $"{dbName}.{collectionName}.CDC.ResumeToken"; + _initialLoadCheckpointKey = $"{dbName}.{collectionName}.INITIAL_LOAD_DONE"; + _lastRecordHashKey = $"{dbName}.{collectionName}.CDC.LAST_HASH"; + + _logger = logger; + } + + /// + /// Start the operator. Sets up the collection, performs optional initial load, + /// and spawns a thread that listens to the MongoDB change stream. + /// + public void Start(Action> emit) + { + if (emit == null) throw new ArgumentNullException(nameof(emit)); + + var dbName = _database.DatabaseNamespace.DatabaseName; + LogInformation($"Starting MongoDB CDC operator for {dbName}.{_collectionName}..."); + + // 1. Select the collection + _collection = _database.GetCollection(_collectionName); + + // 2. Initial Load (once) + if (_doInitialLoad && _checkpointStore.Get(_initialLoadCheckpointKey) == null) + { + LogInformation($"Performing initial load for {dbName}.{_collectionName}..."); + RunInitialLoad(emit); + _checkpointStore.Put(_initialLoadCheckpointKey, new byte[] { 0x01 }); + LogInformation($"Initial load completed for {dbName}.{_collectionName}."); + } + else + { + LogInformation($"Skipping initial load for {dbName}.{_collectionName} (already done or disabled)."); + } + + // 3. Check existing resume token + var lastResumeTokenBytes = _checkpointStore.Get(_checkpointKey); + if (lastResumeTokenBytes == null) + { + LogInformation($"No existing resume token found for {dbName}.{_collectionName}. Will start fresh."); + } + else + { + LogInformation($"Found existing resume token for {dbName}.{_collectionName}."); + } + + // 4. Spin up background thread + _stopRequested = false; + _pollingThread = new Thread(() => PollCdcChanges(emit)) + { + IsBackground = true, + Name = $"MongoCdcPolling_{dbName}_{_collectionName}" + }; + _pollingThread.Start(); + } + + /// + /// Gracefully stops the operator. + /// + public void Stop() + { + var dbName = _database.DatabaseNamespace.DatabaseName; + LogInformation($"Stop requested for MongoDB CDC operator {dbName}.{_collectionName}."); + _stopRequested = true; + _pollingThread?.Join(); + LogInformation($"MongoDB CDC operator stopped for {dbName}.{_collectionName}."); + } + + /// + /// The main CDC polling logic with a change stream + exponential back-off on errors. + /// + private void PollCdcChanges(Action> emit) + { + var dbName = _database.DatabaseNamespace.DatabaseName; + int backOffSeconds = 1; + + while (!_stopRequested) + { + try + { + // 1. Load the last resume token + BsonDocument resumeToken = null; + var lastResumeTokenBytes = _checkpointStore.Get(_checkpointKey); + if (lastResumeTokenBytes != null) + { + var tokenJson = Encoding.UTF8.GetString(lastResumeTokenBytes); + resumeToken = BsonDocument.Parse(tokenJson); + } + + // 2. Configure the change stream + var options = new ChangeStreamOptions + { + FullDocument = ChangeStreamFullDocumentOption.UpdateLookup, + ResumeAfter = resumeToken + }; + + var pipeline = new EmptyPipelineDefinition>(); + using var cursor = _collection.Watch(pipeline, options); + + LogInformation($"MongoDB Change Stream opened for {dbName}.{_collectionName}."); + + // 3. Check last-record-hash to avoid duplicates + var lastHashBytes = _checkpointStore.Get(_lastRecordHashKey); + string lastHash = lastHashBytes == null ? null : Encoding.UTF8.GetString(lastHashBytes); + + // 4. Read from the cursor + foreach (var change in cursor.ToEnumerable()) + { + if (_stopRequested) break; + + // (a) Update the resume token checkpoint + var currentResumeToken = change.ResumeToken?.AsBsonDocument; + if (currentResumeToken != null) + { + var tokenString = currentResumeToken.ToJson(); + _checkpointStore.Put(_checkpointKey, Encoding.UTF8.GetBytes(tokenString)); + } + + // (b) Convert this to a typed record + var record = ConvertChangeToRecord(change); + + // (c) Compute a hash for dedup (based on JSON) + var docForHash = change.FullDocument?.ToJson() + ?? change.DocumentKey?.ToJson() + ?? string.Empty; + var currentHash = ComputeMd5Base64(docForHash); + + // (d) Check if it's a duplicate + if (currentHash == lastHash) + { + LogInformation($"Skipping duplicate record for {dbName}.{_collectionName}."); + continue; + } + + // (e) Emit and update the last hash + emit(record); + lastHash = currentHash; + _checkpointStore.Put(_lastRecordHashKey, Encoding.UTF8.GetBytes(lastHash)); + } + + // If no changes come in, we still check the stop flag periodically + Thread.Sleep(_pollIntervalMs); + + // Reset back-off if we got here without exceptions + backOffSeconds = 1; + } + catch (Exception ex) + { + LogError($"Error in MongoDB CDC polling loop for {dbName}.{_collectionName}.", ex); + Thread.Sleep(TimeSpan.FromSeconds(backOffSeconds)); + backOffSeconds = Math.Min(backOffSeconds * 2, _maxBackOffSeconds); + } + } + } + + /// + /// Loads the entire collection once, deserializing each document into T, + /// and emitting a record with Operation="InitialLoad". + /// + private void RunInitialLoad(Action> emit) + { + var filter = Builders.Filter.Empty; + var cursor = _collection.Find(filter).ToCursor(); + + while (cursor.MoveNext()) + { + foreach (var doc in cursor.Current) + { + if (_stopRequested) break; + + T typedDoc = BsonSerializer.Deserialize(doc); + + var record = new MongoDbRecord + { + Operation = "InitialLoad", + Data = typedDoc, + ChangeTime = DateTime.UtcNow + }; + + emit(record); + } + if (_stopRequested) break; + } + } + + /// + /// Converts a change stream event into a . + /// + private MongoDbRecord ConvertChangeToRecord(ChangeStreamDocument change) + { + var record = new MongoDbRecord + { + ChangeTime = DateTime.UtcNow, + }; + + switch (change.OperationType) + { + case ChangeStreamOperationType.Insert: + record.Operation = "INSERT"; + record.Data = change.FullDocument != null + ? BsonSerializer.Deserialize(change.FullDocument) + : default; + break; + + case ChangeStreamOperationType.Update: + record.Operation = "UPDATE"; + record.Data = change.FullDocument != null + ? BsonSerializer.Deserialize(change.FullDocument) + : default; + break; + + case ChangeStreamOperationType.Replace: + record.Operation = "REPLACE"; + record.Data = change.FullDocument != null + ? BsonSerializer.Deserialize(change.FullDocument) + : default; + break; + + case ChangeStreamOperationType.Delete: + record.Operation = "DELETE"; + // For deletes, FullDocument is normally null. + // If you only have the _id, you can build a partial T or just leave it as default. + record.Data = default; + break; + + default: + // Could handle drop, rename, invalidate, etc. + record.Operation = change.OperationType.ToString().ToUpperInvariant(); + record.Data = default; + break; + } + + return record; + } + + /// + /// Computes an MD5 hash of the given string, returns Base64. + /// + private static string ComputeMd5Base64(string raw) + { + using var md5 = MD5.Create(); + var bytes = Encoding.UTF8.GetBytes(raw); + var hash = md5.ComputeHash(bytes); + return Convert.ToBase64String(hash); + } + + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + + protected virtual void Dispose(bool disposing) + { + if (_disposed) return; + if (disposing) + { + Stop(); + } + _disposed = true; + } + + // -------------------------------------------------------------------- + // LOGGING HELPERS + // -------------------------------------------------------------------- + private void LogInformation(string message) + { + if (_logger != null) + _logger?.LogInformation(message); + else + Console.WriteLine(message); + } + + private void LogError(string message, Exception ex = null) + { + if (_logger != null) + { + _logger.LogError(ex, message); + } + else + { + Console.WriteLine(ex != null + ? $"ERROR: {message}\n{ex}" + : $"ERROR: {message}"); + } + } + } +} diff --git a/src/Cortex.Streams.MongoDb/MongoDbRecord.cs b/src/Cortex.Streams.MongoDb/MongoDbRecord.cs new file mode 100644 index 0000000..001a085 --- /dev/null +++ b/src/Cortex.Streams.MongoDb/MongoDbRecord.cs @@ -0,0 +1,45 @@ +using System; +using System.Collections.Generic; + +namespace Cortex.Streams.MongoDb +{ + /// + /// Represents a generic CDC record for MongoDB. + /// + public class MongoDbRecord + { + /// + /// The operation type: 'INSERT', 'UPDATE', 'DELETE', 'REPLACE', or 'InitialLoad'. + /// + public string Operation { get; set; } + + /// + /// The document data (fields and values) after the operation (for insert/update/replace). + /// For deletes, this may contain only the _id or other keys available before deletion. + /// + public Dictionary Data { get; set; } + + /// + /// The approximate time (UTC) when this change was captured or emitted. + /// + public DateTime ChangeTime { get; set; } + } + public class MongoDbRecord + { + /// + /// The operation type: 'INSERT', 'UPDATE', 'DELETE', 'REPLACE', or 'InitialLoad'. + /// + public string Operation { get; set; } + + /// + /// The document data, deserialized into type T. + /// For deletes, this may be default(T) if the document no longer exists. + /// + public T Data { get; set; } + + /// + /// The approximate time (UTC) when this change was captured or emitted. + /// + public DateTime ChangeTime { get; set; } + } +} From a3323537bc8933a85abdbda8f07e64a4acd18339 Mon Sep 17 00:00:00 2001 From: Enes Hoxha Date: Sat, 18 Jan 2025 22:57:08 +0100 Subject: [PATCH 12/12] [v1/feature/90]: Update README --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index 3647c7f..66e606b 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,15 @@ - **Cortex.Streams.Http:** Implementation of Http Source and Sink operators. [![NuGet Version](https://img.shields.io/nuget/v/Cortex.Streams.Http?label=Cortex.Streams.Http)](https://www.nuget.org/packages/Cortex.Streams.Http) +- **Cortex.Streams.CDC.MSSqlServer:** CDC adapter for Microsoft Sql Server. +[![NuGet Version](https://img.shields.io/nuget/v/Cortex.Streams.CDC.MSSqlServer?label=Cortex.Streams.CDC.MSSqlServer)](https://www.nuget.org/packages/Cortex.Streams.CDC.MSSqlServer) + +- **Cortex.Streams.CDC.PostgreSQL:** CDC adapter for PostgreSQL. +[![NuGet Version](https://img.shields.io/nuget/v/Cortex.Streams.CDC.PostgreSQL?label=Cortex.Streams.CDC.PostgreSQL)](https://www.nuget.org/packages/Cortex.Streams.CDC.PostgreSQL) + +- **Cortex.Streams.CDC.MongoDb:** CDC adapter for MongoDb. +[![NuGet Version](https://img.shields.io/nuget/v/Cortex.Streams.CDC.MongoDb?label=Cortex.Streams.CDC.MongoDb)](https://www.nuget.org/packages/Cortex.Streams.CDC.MongoDb) + - **Cortex.States:** Core state management functionalities. [![NuGet Version](https://img.shields.io/nuget/v/Cortex.States?label=Cortex.States)](https://www.nuget.org/packages/Cortex.States)