diff --git a/Project.toml b/Project.toml
index 0f2df88..2deeccd 100644
--- a/Project.toml
+++ b/Project.toml
@@ -12,6 +12,7 @@ Manifolds = "1cead3c2-87b3-11e9-0ccd-23c62b72b94e"
NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
OneHotArrays = "0b1bfda6-eb8a-41d2-88d8-f5af5cad476f"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
[compat]
Adapt = "4.1.1"
diff --git a/editflow_code/Project.toml b/editflow_code/Project.toml
new file mode 100644
index 0000000..632a12f
--- /dev/null
+++ b/editflow_code/Project.toml
@@ -0,0 +1,26 @@
+[deps]
+Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+CannotWaitForTheseOptimisers = "16124dda-d9fe-413b-a880-e3f4df3aa341"
+Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+Flowfusion = "5b4e93c8-7b6e-4682-b400-fc3b238f52b1"
+Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
+ForwardBackward = "e879419d-bb0f-4252-adee-d266c51ac92d"
+Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
+IJulia = "7073ff75-c697-5162-941a-fcdaad2a7d2a"
+LearningSchedules = "7e85a9e7-c65d-495e-8ec0-baa06f07ebbf"
+Manifolds = "1cead3c2-87b3-11e9-0ccd-23c62b72b94e"
+Metal = "dde4c033-4e86-420c-a63e-0dd931031962"
+NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
+OneHotArrays = "0b1bfda6-eb8a-41d2-88d8-f5af5cad476f"
+Onion = "fdebf6c2-71da-43a1-b539-c3bc3e09c5c6"
+Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
+Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
+RandomFeatureMaps = "780baa95-dd42-481b-93db-80fe3d88832c"
+Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+StringDistances = "88034a9c-02f8-509d-84a9-84ec65e18404"
+Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
+cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
diff --git a/editflow_code/abs.txt b/editflow_code/abs.txt
new file mode 100644
index 0000000..b914fde
--- /dev/null
+++ b/editflow_code/abs.txt
@@ -0,0 +1,108 @@
+QLQLQESGPGLVKPSETLSLTCTVPGGPISSSSYYWGWIRQPPGKGLEWIGSIYYSGSTYYNPSLKSRVTISVDTSKNQSSLKLSSGTAADTAVYYCYSNYKVDYYVDVLGKGTTVTGSS
+EGKRVECGGGGGGRGGARRGGCEEEGVTFDEEGMSGGSQARGKGREGVSGRNWKGGSTGYADSVKGRFTISRDNAKNSLYLQMNSLRAEDTAFSSCAIYITLFEFLYSSFDISGQGTLFTFSS
+EGQRVECGGGGGQRGRDRRGECIEEGVTFGEEGMSGGGQAKGKGREGVGGRRSKEEGGTTEEGVSVKGRFTISRDDSKSIAYLQMNSLKTEDTAVYYCTRALSADYYSSGISLYYSSLDVSGKGTPFTFSS
+EVHLVESGGGLVQPGGSLRLSCAASVLTFSSFALSWVLHAPGKALEWVSAISGSGGCTYYAVTVKGRFTISREKSKNTLYMQMNSLRAEDTAVYCCAKDQQQLFSDWGQETMVTVSS
+EVQLVESGGDLVQPGGSLRLSCAISGFTVSNNHVTWFRQAPGKGLEWVSLIYNAGSTIYADSVKGRFTISRENSKNIVYLQMNSVTAEGTAVYYCLGLGAKAVWGQGTVVTVSA
+QVQLVQSGAEVKKPGASVKISCTASGYTITNHYMHWVRQAPGQGPEWVGVINPSGDRTVYAQKFQGKVTMTRDTSTSTVYMEVSSLKSDDTAVYYCARDNSAQEKSWWFDPWGQGTLVTVSS
+EVQLVESGGGLVQPGGSLILSCAASGFTFSSHWMGWVRQAPGEGLEWVANLNQDGSETYYVDSLKGRFTISRDNTKNSLCLQMNSLRTEDTAVYFCARLGYRLAEYWGQGTLVTVSA
+QVQLVQSGAEVKKPGASVKISCTASGYTITNHYMHWVRQAPGQGPEWVGVINPSGDRTVYAQKFQGKDTMTRDTSTSTVYMEVSSLKSDDTAVYYCARDNSAQEKSWWFVHWGQGTLVTVSS
+QVQLVESGGGVVQPGRSLRLSCAASGFTFSSYGMQWVRQAPGKGLEWVAVISDDGSTKHYADSVNGRFTVSRDNSKNTLYLQMNSLRAEDTAVYYCAKEKGGNYMPLDYWGQGTLVIVSA
+EVQLVESGGGPVKPGGSLRLSCTVSGFTFTSYSMDWVRQAPGQGLEWLSYIHNGGNDVSYADSVRGRFTISRDNAKNSVDLQMNSLRAEDTAVYYCVRDYDYSFDYWGQGILVTVSS
+QVQLVQSGTEVKKPGASVKVSCKASGYTFTSYAMRWVRQAPGQRLEWMGWINAGDGNTKYSQKFQDRVTITRDTSTSTAYMELSSLRSEDTAVYYCAREMVVVMRGRSDTASGGKGTLVTVSS
+QVQLVESGGGVVQPGRSLRLSCAASGFTFSSYGMHWVRQAPGKGLEWVAVIWYDGSNKYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARDVVTGYSNLSGLGYWGQGTLVTVSP
+EVQLVESGGGLVQTGGSLRLSCAASGFTIINYWMHWVRQAPGKGLVWVSHINGDGRRTTYADSVKGRFTISRDDAKNTVYLQMNSLRDEDTAVYYCARDRFYSPGHWGQGILVTVSS
+QVQVVQSGAEVKRPGASVTVSCKPSGYTFTSYGISWVRQAPGQGLEWMGWISIYNGNTKYAQTLQGRVTLTRDTSTNTAYMELRSLRSDDTAVYYCARGGPACCNGGTCYGDAFDFLGQGTTGTVSS
+EVQLVESGGGLVQPGGSLILSCAASGFTFSSHWMGWVRQAPGEGLEWVANLNQDGSETYYVDSLKGRFTISRDNTKNSLYLQMNSLRTEDTAVYFCARLGYRLAEYWGQGTLVTVSA
+QVQLGQSGAEVEKPGASVKVSCKASGYTFTGYYMHWVRQAPGQGLEWMGWINPNSGGTNYAQKFQGWVTMTRDTSISTAYMELSRLRSDDTAVYYCARDYSSGWYYFDYWGQGTLVIVSP
+QVELRESGRVMVRPSETLSLTCTVSGGSISSYYWSWIRQPPGKGLEWIGYIYYSGSTNYNPSLKSRVTISVDTSKNQFSLKLGSVTAADTAVYYCARRKVVPADISQPHNDAFDIWGQGIMDSVSS
+QLQLQESGPGLVKPSETLSLTCTVSGGSISSSSYYWGWIRQPPGKGLEGIGSIYYSGSTYYNPSLKSRVTISVDTSKNQFSLKLSSVTAADTAVYYCARHRSGLELLSYYYYYMDVWRKGTTHTFSS
+QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYYMHWVRQAPGQGLEWMGIINPSGGSTSYAQKFQGRVTMTRDTSTSTVYMELSSLRSEDTAVYYCARVGVITGTTGGWFDPWGQGTLVTVSS
+QVQLVQSGAEVKRAGASVKVSCKASGYTFTSYYMHWVRQAPGQGLEWMGIINPSGGSTSYAQKFQGRVTMTRDTSTSTVYMELSSLRSEDTAVYYCATSGVGATPFDYWGQGTLVTVSS
+QVLLVQSGTEVKKPGASVKVSCKAYGYSFTTYGTTWVRQAQGLEYMGWISPYNGDTNYAQELQSRVTMTTDTSTSTAYMELRSLTYDDTAVYCWTRSGLQYDSGMNGGGQGTAVTVSS
+QVQLVQSGAEVKKPGASVKVSCRTSGYSFTTYAIHWVRQAHGQRLAWVGCINADNGNTQYSQSFQGRVTITRDTSASTAYMELSSLRSEDAAVYYCARDLAFYGSFGGDYCGQGALIAFSS
+QVQLQESGPGLVKPSETLYLTCTVSGGSISSYYWSWIRQPPGKGLEWIGYIYYSGSTNYNPSLKSRVTKSVDTSKNQFSPKLSSVTAADTAVYYCARDGGGSGSYYPHYYYYYGMDVWGQGFSDTVSS
+QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYGISWVRQAPGQGLEWMGWISAYNGNTNYAQKLQGRVTMTTDTSTSTAYMELRSLRSDDTAVYYCAKAGGYSYGWGLFDPWGQGTLVTVSS
+QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYAMHWVRQAPGQRLEWMGWINAGNGNTKYSQKFQGRVTITRDTSASTAYMELSSLRSEDTAVYYCARSITPSGYDYVYFDYWGQGTLFTVSS
+EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYEMNWVRQAPGKGLEWVSYISSSGSTIYYADSVKGRFTISRDNAKNSLYLQMNSLRAEDTAVYYCARDGSGGSLGWFDPWGQGTLVTVSS
+EVQLVESGGGLVQPGGSLRLSCAASGFTFSNYAMTWVRQAPGKGLEWVSSISEAGGSSYYADSVKGHFTISRDNSKNTLYLQMNSLRAEDTAVYYCAKGLLYSGRQALVDYWGQGTLVTVFS
+QVQLQQSGPGLVKPSQTLSLTCAISGDSVSSNSAVWNWIRQSPSRGLEWLGRTYYRSKWYNDYAVSMKSRVTINTDTSRNQFSLQLNSVTPEDTAVYFCARDQSGWYATSWYFDFWGQGPLATLSS
+EVQLVESGGGLVQPGGSLRLSCAASGFTVSSNYMSWVRQAPGKGLEWVSVIYSGVTTYYADSVKGGLTISRDTSKNTLYLQMNSLRAEDTAVYYCARDRSRNWGFDCLGQGTLGTGFS
+QVQLVQSGAEVKKPGASVKVSCKTSGYTFSTYAITWVRQAPGQGLEWVGWISGYNGNTRYGEKLLGRVTMTTDTSTSTAYMELRSLRSDDTAVYYCVAAPTSSDCRSTNCPRGHYYYGVDVWGQGTTVTVSS
+EVQLVESGGGLVQPGGSLTLSCAASAFLFSSYWMHWVRQAPGKGLVWASRINSDGRSTKYADSVTGRFTTSRDNAKNTLYLQMNSRRDEDPAVHYCAREFGPALDDWGQGTLVTVAS
+QVQLQESGPGLVKPSETLPLTCTVSGVSISSYLWSWIRQSPGKGLEWIGYIHYSVSTNYNPSLKSRVTMSVDTSKKQFSLKLSSVTAADTAVYYCARCTGAGRSACDAFDIWGQGTMVTVSS
+QVQLVQSGAEVKKPGSSVNVSCKASGGTFSSYTISWVRQAPGQGLEWMGRIIPILGIANYAQKFQGRVTITADKSTSTAYMELSSLRSEDTAVYYCARGLWFGELLYSWFDPWGQGTLFSVSS
+QVQRVQSGAEVKKPGASVKCSCKASGYTFTSYGISWVRQAPGQGLEWMGWISAYNGNTNYAQKLQGRVTMTTDTSTSTAYMELRSLRSDDTAVYYCARDLQGENYDFWSGYYTGPVWYMDVWGKVFTDTVSS
+QVTLRDSVXALVKPTHTLTLTCTFSGFSLSSSGMCVSWIRHPPGNSLEWLALIDWDDDKYYSTSLKTRLTISKDTYKNQVVLTMTNMDPVDTATYYCSRIQAEEGTEYYIDYRGQGNLVNVSS
+QVQLVESGGGVVQPGRSLRLSCAASGFTFSSYGMHWVRQAPGKGLEWVAVIWYDGSNKYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVDYCERPAAIWSGSYIDDWSQGTLVIVSS
+EVQLVESGGGLVQPGGSLRLSCAASGFILTNYWMNWVRQAPGKGLEWVANIKRDGSETYYVDSVKGRFTISRDIAKNLLYLQMNSLRAEDTAIYYCVRGSAGAKDYWGQGSLVNVSS
+QVQLVQSGAEVKKPGASVKISCTASGYTLTNHYMHWVRQAPGQGPEWVGVINPSGDRTVYAQKFQGKVTMTRDTSTSTVYMEVSSLKSDDTAVYYCARDNSAQEKSWWFDPWGQGILVTVSS
+EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYSMNWVRQAPGKGLEWVSAISGSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCAKHPGSIAGEFDYWGQGTLVTVSS
+QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYGISWVRQAPGQGLEWMGWISAYNGNTNYAQKLQGRVNMTTDTSTSTAYMELRSLRSDDTAVYYCARDGDLAVAGISQDWGRGTLVTISS
+QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYYMHWVRQAPGQGLEWMGIINPSGGSTSYAQKFQGRVTMTRDTSTSTVYMELSSLRSEDTAVYYCARVRKCGGDCKRVWFDPWGQGTLVIVSS
+QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYGISWVRQAPGQGLEWMGWISAYNGNTNYAQKLQGRVILTSDTSTSTAYMQLRSLRSDDTAVYYCARTPERQLGARSEYWGERALGSVSS
+EVQLVTSEVGPLEPGGSLKLSCETTGFIFSDSGMHWVRQASGKGLEWVGRIRSKANSYTTAYAASVKGRFTISRDDSKNTAYLQMNSLKTEDTAVYYCTRYPAGQIMPTPFDYWGQGALVTFSS
+QVYLQQSGPGLVKPSQTLSLTCAISGDSVSSNIAAWNWIRQSPSRGLEWLGRTYYKSKWYNDYAVSVKSRITINPATTKNQFSLQLNSVTPEDTAVYYCARGPGGLVGFDFWGQGTLVTVSS
+QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYGISWVRQAPGQGLEWMGWISAYNGNTNYAQKLQGRVTMTTDTSTSTAYMELRSLRSDDTAVYYCARGRYYDSSGYPLYWYFDLWGRGTLVTVSS
+QVQLVQSGAAVKKPGASVRVSCQASGYTFTDYMMHWVRQAPGQRLEWMGWITSGNGNTKYSENFQGRVTITRDTSASTAYMELSSLTSEDTAVYYCARALTRSSNWFDPWGQGSFDTIST
+EVQVVEHGGGLVQPGGSLRLSCAASGFTVSSNYMSWVRQAPGKGLEWVSVIYSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARGRIAFSYWGQGTLVTVSS
+QVQLVQYGSDVKKPGASVKVSCKASGYTFTSYGISWVRQAPGQGLEWMGWISAYNGNTNYAQKLQGRVTMTTDTSTSTAYMELRSLRSDDTAVYYCARDLRAVAGTIFRGVLSYWGQGILVTVSS
+QVLLVQSGAEVKKPGASVKLSCKGFGFMFTRYAIHWVRQAPGQGLEWMGCIVPVDGKTYYSQRFQDKVTITRDTSASTAYVDVSRLTPEDTAVYYCARDLVGAGYFDYWGQGALVTVSP
+EVQLVESGGGLVKPGGSLRLSCAASGFTFSNAWMSWVRQAPGKGLGWVGRIKSKTDGGTTDYAAPVKGRFTISRDDSKNTLYLQMNSLKTEDPSVYYCTRDLRTPVVRIVEWLHYTQGGGTPTTVTPSS
+EVHLVESGGGLVKPGGSLRLSCAASGFTVSSNYMSWVRQAPGKGLEWVSVIYSGGSAYYADSVKGRFTISRHNSKNTLYLQMNSLRAEDTAVYYCGRCRRRAAAGTGYYDDFDIWGQGTMVTVIS
+QVQLQESGPGLVKPSETLSLTCTVSGGSISSYYWSWIRQPPGKGLEWIGYIYYSGSTNYNPSLKSRVTISVDTSKNQFSLKLSSVTAADTAVYYCARMKYGSELLDYYYYMDVWGKGTTVTVSS
+QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYGIRWVRQAPGQGLEWMGWISADNGNTNYAQKLQGRVTMTTDTSTSTAYMELRSLRSDDTAVYYCARVRGLSLGFDYWGQGTLVNVSS
+QVRLVQYGANVKKPGASVKVSCKASGYTFTGYYMHWVRQAPGQGLEWMGWINPNSGGTNYAQKFQGWVTMTRDTSISTAYMELSRLRSDDTAVYYCARDQNINDAFDIWGQGIIDTVSS
+EVQMAKYGGGPDKPGGSMKITCAASGFTFSSYSMNWVRQAPGKGLEWVSSISSSSSYIYYADSVKGRFTISRDNAKNSLYLQMNSLRAEDTAVYYCARDGKRGSVVVAATLGWGYYYYGMDVLFHWTTVTVSS
+QVQLVESGGGVVQPGRSLRLSCAASGFTFSSYGMHWVRQAPGKGLEWVAVIWYDGSNKYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCASLMSFGESRYYYYYGMDVWGQGSTVSVAS
+QVQLVQSGAEVKKPGASVKVSCKASGYTFTNYGISWVRQAPGQGLEWMGWISAHNGNTNYAQNLQGRVTMTTDTSTSTAYMELRSLRSDDTAVYYCARGTKAFDIWGQVIIVTVSS
+QVQLVQSGAEVKKPGASVKISCTASGYTITNHYMHWVRQAPGQGPEWVGVIDPSGDRTVYAQKFQGKVTMTRDTSTSTVYMEVSSLKSDDTAVYYCARDNSAQEKSWWFDPWGQGTLVTVSS
+EVQLVESGGGLVQPGGSLRLSCAASGFTFSSFAMSWVRQAPGKGLEWVSGIRGSGGNTHYGDSVKGRFTISRDNSKNTLYLQIDGLRAEDTALYYCAKATGVANFWSHDAFDIWGQGTIVTVSS
+QEKGGQYGGKVKKPRSWVKVSGKASRGTFSSYTISWVRQAPGQGLEWMGRIIPILGIANYAQKFQGRVTITADKSTSTAYMELSSLRSEDTAVYYCASSGYCSSTSCYDPLKDAFDIWGQGNMVIDYS
+QVQLQESGPGLVKPSETLPLTCTVSGVSLSSYLWSWIRQSPGKGLEWIGYIHYSVSTNYNPSLKSRVTMSVDTSKKQFSLKLSSVTAADTAVYYCARCTGAGRSACDAFDIWGQGTMVTVSS
+EVQLVESGGGLVKPGGSLRLSCAASGFTFSSYSMNWVRQAPGKGLEWVSSISSSSSYIYYADSVKGRFTISRDNAKNSLYLQMNSLRAEDTAVYYCARARAPWGQGNLVTLSS
+QLQLQESGPGLVKPSETLSLNCTVSGGSISSSSYYWGWIRQPPGKGLEWIGSIYYSGSTYYNPSLKSRVTISVDTSKNQFSLKLSSVTAADTAVYYCARPSHMITFGGVIVTRSFDYWGQRTLVTVSS
+QVQLVQSGAEVKKPGSSVKVSCKASGGTFSSYTISWVRQAPGQGLEWMGRIIPILGIANYAQKFQGRVTITADKSTSTAYMELSSLRSEDTAVYYCARGGGYCSGGSCYPDAFDIWGQGTMVTVAS
+QVQLVQSGAAVKTPGASVKVSCKASGYNFTNYGISWVRQAPGQGLEWMGWISAHNGNTNYAQNLQGRVTMTTDTSTSTAYMELRSLRSDDTAVYYCAGGTKAFDIWGQGTMVIVFS
+QITLKESDPTLVKPTQTLTLTCTFSVFSLSTSGVGVGWIRQPPGKALEWLALIYWDDDKRYSPSLKSSLTITKDTSKNQVVLTMTNMDPVDTATYYCAHRPYGDARDADDIGDQGAMVTVSS
+QVQLVQSGAEVKKPGSSVKVSCKASGYTFTSYGISWVRQAPGQGLEWMGWISAYNGNTNYAQKLQGRVTMTTDTSTSTAYMELRSLRSDDTAVYYCARDGAVSHTSLECSGGSCYDAFDIWGQVTMNTVSS
+QVQLVQSGAEVQKPGASVKVSCKASGYIFTSYFIHWMRQAPGQGLEWMGSINPRGGNTNYALKFQGRVTMTRDTSTGTVHMELSRLGSEHTAVFYCAKGSGRPDLARLSRFGPWGQGTLGIVSS
+QVQLVKSGDEVKKPGASVKVYCKASGYTFTSYAMHWVRQAPGQRLEWMGWINAGNGNTKYSQKFQGRVTITRDTSASTAYMELSSLRSEDTAVYYCARVVVPAAMVNWFDPWGQGTLVTVSS
+QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYAMHWVRQAPGQRLEWMGWINAGNGNTKYSQKFQGRVTITRDTSASTAYMELSSLRSEDTAVYYCASAIVATMAFDYWGQGTLVTVSS
+QVQLVQSDADVKKPLASVEVSCKASGGTFSSYTISWVRQAPGQGLEWMGRIIPILGIAHYAQKFQGRVTITADKSTSTAYMELSSLRSEDTAVYYCASEEKGSMMVARAFDYWGQVILVTASS
+QVQLVQSGTEVKRPGASVKVACKAYGYTFTSYGISWVRQAPGQGLEWMGWISAYNGNTNYAQKLQGRVTMTTDTSTSTAYMELRSLRSDDTAVYYCARGHGECSSTSCYGLNVDDYWGQGTLVTVSS
+EVQLVESGGGLVQPGGSLILSCAASGFTFSSHWMGWVRQAPGEGLEWVANLNQDGSETYYVDSLKGRFTISRDNTKNSLYLQMNSLRTEDTAMYFCARLGYRLAEYWGQGTLVTVSA
+QVQLVQSGAEVKKPGASVKVSCKASGYTFTGYYMHWVRQAPGQGLEWMGWINPNSGGTNYAQKFQGWVTMTRDTSISTAYMELSRLRSDDTAVYYCAREVVLTYYYGSGSYYNVPGSPGGQYYFDYWGQGTLVIVSS
+EVQLVESGGGLVQPGGSLILSCAASGFTFSSHWMGWVRQAPGEGLEWVANLNQDGSDTYYVDSLKGRFTISRDNTKNSLYLQMNSLRTEDTDVYFCARLGYRLAEYWGQGTRVIVSA
+EVQLVESGGGLVKPGGSLRLSCAASGFTFSNACMSWVRQAPGKGLEWVGRIKSKTDGGTTDYAAPVKGRFTISRDDSKNTLYLQMNSLKTEDTAVYYCTTVSGDIVVVPAAIPTRGSIAVAGTFGYWGQGALVTVSS
+EVQLLESGGGLVQPGGSLRLSCAASGFTFSSYAMSWVRQAPGKGLEWVAAISGSGGSTYYADSVKCRFTSSRDNSKNTLYLQMNSLRAEDTAVYYCAKVSRYYYGSGIEFDYRGQGTLVNVSS
+QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYGISWVRQAPGQGLEWMGWISAYNGNTNYAQKIQGRVTMTTDTSTSTAYMELRSLRSDDTAVYYCARGVYPDDSSDHPFDYWGQGTLVSVSP
+QVQLVESGGGVVQPGRSLRLSCAASGFTFSSYAMHWVRQAPGKGLEWVAVISYDGSNKYYADSVKGRFTISRDNSKSTLYLQMNSLRAEDTAVYYCARDPDGYWSGGSCYLYYFDYWGQGILVTGSA
+QITLKESGPTLVKPTQTLTLTCTFSGFSLSTSGVGVGWIRQPPGKALEWLALIYWDDDKRYSPSLKSRLTITKDTSKSQVVLTMTNMDPVDTATYYCAHFGFGELLLENGAQGTLVTVSS
+QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYGISWVRQAPGQGLEWMGWISAYNGNTNYAQKLQGRVTMTTDTSTSTAYMELRSLRSDDTAVYYCARDRPTVGGSTRIGFRGPGNLGTGSS
+EVQVVESGGGLVQPGGSLRLSCAATGFTVNRNHVKWVRQAPGKGLECVSVILDTGVTYYADAVKGRFTISRDNSKNTVNLEMNSLRAEDTAIYYCGGYGANSVWGQGSLGTVSP
+EVELVESGGGLVQPGGSLRLSCAASGFNCNIYAMHWVRQAPGKGLEFLSSVLGNGAKRQYASSVKGSFTMSRDNSKTTLHLHMGSLRPDDTALYYCARDKDSGYAFVYWGQGALLTVSS
+QVQLVQSGAEVKKPGSSVKVSCKASGDTFSSYTISWVRQAPGQGLEWMGRIIPILGIANYAQKFQGRVTITADKSTSTAYMELSSLRSEDTAVYYCASLAYCGGDWYSTNRNYYFDDWGQGTLINVSS
+EVQLVKSGGGLVKPGGSLRLSCAASGFTFSNAWMSWVRQAPGKGLEWVGRIKSKTDGGTTDYAAPVKGRFTISRDDSKNTLYLQMNSLKTEDTAVYYCTTDAVPVGYWGQGFLVTASS
+QVQRVQAGAAVKKPGTSVKVSCKASGGTFSSYTISWVRQAPGQGLEWMGRIIPILGIANYAQKFQGRVTITADKSTSTAYMELSSLRSEDTAVYYCARDYGGRLAAMGYYWGQGSLVTVAS
+QVQLQESGPGLVKPSGTLSLTCAVSGGSISSSNWWSWVRQPPGKGLEWIGGIYHSGSTNYNPSLKSRVTISVDKSKNQFFLKLSSVTAADTAVYYCASDNWNYSWFDPWGGEALVAISS
+QVQLVQSGAAVKKPGASVQVSCKASGYTFTSYGISWVRQAPGQGLEWMGWISAYNGNTNYAQKLQGRVTMTTDTSTSTAYMELRSLRSDDTAVYYCARESGGWYYFDNWGQGTLVTVSS
+EVLLVXXXGGLVQPXXSLRLSXAASGFTFTDYWMHWVRQSPGKWLVWVSRINNDVSDTIYADSVKGRFTLSRDKAKNTLYLQMNSLRVEYTAVYYCARGGWSHGFDIWGQGKMVTVSS
+EVQLVESGGGLVQPGVSLRLSCAASGFTFSSYAMHWVRQAPGKGLEYVSAISSTRGSTYSANAVKGRFTISIENSKKTLYLQMGRLRAEDSAVYYWASMGNLRYLDWLPTVPDDFEIWGEGAMGTVSS
+QVQLVQSGAEVKKPGASVKVFCKASGYTFTNYIMHWVRQAPGQRLEWMGWISAGNGNTNFSQDFQDRVTFTRDTSESTDYMELSSLNSEDTAVYYCAREGFEPWGQGNLVNVSS
+QVQLVQSGAEVKKTGASVKVSCKASGYTFTSYGISWVRQAPGQGLEWMGWISAYNGNTNYAQKLQGRVTMTTDTSTSTAYMELRSLRSDDTAVYYCARSSDIVVVPDYYGMDVWGQGTTVTVSS
+QLQLQESGPGLVKPSETLSPTCTVSGGSISSSSYYCGWIRRPPGKGLEWIGSIYYSGSTYYNPSLKSRVTISVDTSQNQFSLKLSSVTAADPAVDYCARHLVERPFGYWGQGTLGTVSS
+QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYGISWVRQAPGQGLEWMGWISAYNGNTNYAQKLQGRVTMTTDTSTSTAYMELRSLRSDDTAVYYCASDYYDTSLPGDYWRPRTLVPVSS
+QVQLVPSGAAVKKPGASVKVSCKASGYTFTSYGISWVRQAPGQGLEWMGWISAYNGNTNYAQKLQGRVTMTTDTSTSTAYMELRSLRSDDTAVYYCAREWVGFDPWGQGTLVTVSP
+QLQLQESGPGLVKPSETLSLTCTVSAGSISSNNYFWGWIREPPGKGLEWIGSVYSSGSTYYNPSLKGRVTISVDTSKNQFSLKLTSVTAADTAVYYCATPWRATYYYGSSGYYGWDAFDTWGQGTMVTVFS
+QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYGISWVRQATGQGLEWMGWISAYNGNTNYAQKLQGRVTMTTDTSTSTAYMELRSLRSDDTAVYYCARGDILPVYFCHFGAQGPGVTVST
+QVQLVESGGGVVQPGRSLRVSCAASGFTFSNYGMHWVRQAPGRGLEWVAVILKDGSKDDYADSVKGRFTISRENSKNTLYLQMNSLRAEDTVFFFCGRDDEGVQNGFVGGGQGKMAPFFS
+EVQLVASGGVLVKPEGSLRLSCAASGFTFSSYSMNWVRQAPGKGLEWVSSISSSSSYIYYADSVKGRFTISRDNAKNSLYLQMKSLRAEDTAVYYCATSGYDWRYWGQGSLLTVSS
+EVQVVESGGDLVHPGGSLRLSCVASGFTVSNNHVSWVRQAPGKGLEWVSFIHNSGTTHYADSVKGRFTISRDNSRNTVYLQMNSVRVEDTAVYYCMGYGGNSVWGQGTLVTVSS
+QVQLVQSGAEVKKPGASVKVSCKASGYTFTNYGINWVRQAPGQGLEWMGWIGAYNAYTNYAQKLQGRVTMTTDASTNTAYMELRSLRSDDTAVYYCAREYSGYDLEDYWSQGTLVTVAS
+EVQLVESGGGPVKPGGSLRLSCTVSGFTFTSYSMDWVRQAPGQGLEWLSYIHNGGNDVSYADSVRGRFTISRDNAKNSVDLQMNSLRAEDTAVYYCVRDYDYSFDDWGQGILVTVSS
+HLQLKESGTRLLKPSETLSLTCTVSCSYISSSSYYWGWIRQPPGKGLEWIGSIYYSGSTYYNPSLKSRVTISVDTSKNQFSLKLSSVTAADTAVYYCARPLVVPAAYFDYWGQGTLVIVSS
+QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYGISWVRQAPGQGLEWMGWISAYNGNTNYAQKLQGRVTMTTDTSTSTAYMELRSLRSDDTAVYYCARDGPSYSSGTGIDYWGQGTLFTVSS
+QVQLVQSGSEVKKSGASVKVSCKASGYTFTSYGISWVRQAPGQGLEWMGWISAYNGNTNYAQKLQGRVTMTTDTSTSTAYMELRSLRSDDTAVYYCARDAGAAMADYGMDVWGQGTTVTVSS
+QVQLVQAGAEVKKPGASVKVSCKASGYTFTSYGISWVRQAPGQGLEWMGWISAYNGNTNYAQKLQGRVTMTTDTSTSTAYMELRSLRSDDTAVYYCARDFPTYYYDSSGYYPAGWGHGTLVTVSS
+QVKLVQSGAEVKKPGSSVKVSCKASGGTFSSYTISWVRQAPGQGLEWMGRIIPILGIANYAQKFQGRVTITADKSTSTAYMELSSLRSEDTAVYYCAREGSGPNWFDPWGQGTLVTVFS
+EVQLVESGGGLVKPGESLRLSCAASGFTFSNAWMGWVRLAPGKGLEWLGRFINKPGGGTADYSAPVRGRYIISRDDSKDTLYFEMKSLKTEDTGVYYCTTSLTTGGFDFWGQGTMVTVSS
\ No newline at end of file
diff --git a/editflow_code/analysis_notebook copy 2.ipynb b/editflow_code/analysis_notebook copy 2.ipynb
new file mode 100644
index 0000000..532ad3a
--- /dev/null
+++ b/editflow_code/analysis_notebook copy 2.ipynb
@@ -0,0 +1,3051 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32m\u001b[1m Activating\u001b[22m\u001b[39m project at `~/dev/Flowfusion.jl/editflow_code`\n",
+ "\u001b[92m\u001b[1mPrecompiling\u001b[22m\u001b[39m project...\n",
+ " 2645.6 ms\u001b[33m ✓ \u001b[39mFlowfusion\n",
+ " 1 dependency successfully precompiled in 4 seconds. 470 already precompiled.\n",
+ " \u001b[33m1\u001b[39m dependency precompiled but a different version is currently loaded. Restart julia to access the new version. Otherwise, loading dependents of this package may trigger further precompilation to work with the unexpected version.\n",
+ "\u001b[32m\u001b[1m Resolving\u001b[22m\u001b[39m package versions...\n",
+ "\u001b[32m\u001b[1m No Changes\u001b[22m\u001b[39m to `~/dev/Flowfusion.jl/editflow_code/Project.toml`\n",
+ "\u001b[32m\u001b[1m No Changes\u001b[22m\u001b[39m to `~/dev/Flowfusion.jl/editflow_code/Manifest.toml`\n",
+ "WARNING: redefinition of constant Main.AA20. This may fail, cause incorrect answers, or produce other errors.\n",
+ "WARNING: redefinition of constant Main.TOK2ID_AA. This may fail, cause incorrect answers, or produce other errors.\n",
+ "WARNING: redefinition of constant Main.PM. This may fail, cause incorrect answers, or produce other errors.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "plot_len_dist (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "using Pkg\n",
+ "Pkg.activate(@__DIR__)\n",
+ "Pkg.instantiate()\n",
+ "using Revise\n",
+ "\n",
+ "using Random\n",
+ "using Statistics\n",
+ "using Adapt\n",
+ "using Functors\n",
+ "using Flux\n",
+ "using Onion\n",
+ "using RandomFeatureMaps\n",
+ "using Zygote\n",
+ "\n",
+ "Pkg.develop(path=joinpath(@__DIR__, \"..\"))\n",
+ "using Flowfusion\n",
+ "const FF = Flowfusion\n",
+ "\n",
+ "include(joinpath(@__DIR__, \"prob_model.jl\"))\n",
+ "include(joinpath(@__DIR__, \"helper_funcs.jl\"))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "to_same_device (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import CUDA\n",
+ "\n",
+ "# Device helpers\n",
+ "const _gpu_enabled = try\n",
+ " CUDA.has_cuda()\n",
+ "catch\n",
+ " false\n",
+ "end\n",
+ "\n",
+ "to_dev(x) = _gpu_enabled ? Adapt.adapt(CUDA.CuArray, x) : x\n",
+ "to_cpu(x) = _gpu_enabled ? Adapt.adapt(Array, x) : x\n",
+ "# move x to the same device type as y (Array or CuArray)\n",
+ "to_same_device(x, y) = (_gpu_enabled && (y isa CUDA.CuArray)) ? Adapt.adapt(CUDA.CuArray, x) : Adapt.adapt(Array, x)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "make_minibatch (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Minibatch builder (uses true PM)\n",
+ "function make_minibatch(B::Int, P::FF.EditFlow; rng=Random.default_rng())\n",
+ " K = P.k\n",
+ " x0s = Vector{FF.DiscreteState}(undef, B)\n",
+ " x1s = Vector{FF.DiscreteState}(undef, B)\n",
+ " for b in 1:B\n",
+ " # x1 from true PM\n",
+ " seq1 = sample(PM; rng=rng)\n",
+ " @assert all(1 .<= seq1 .<= K)\n",
+ " x1s[b] = FF.DiscreteState(K, seq1)\n",
+ " # x0: uniform tokens with random length in 1:10 (no BOS in x0)\n",
+ " L0 = rand(rng, 1:10)\n",
+ " seq0 = rand(rng, 1:K, L0)\n",
+ " x0s[b] = FF.DiscreteState(K, seq0)\n",
+ " end\n",
+ " ts = rand(rng, Float32, B)\n",
+ " return x0s, x1s, ts\n",
+ "end\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "EditFlowModel"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Model definition\n",
+ "struct EditFlowModel{L}\n",
+ " layers::L\n",
+ "end\n",
+ "Flux.@layer EditFlowModel\n",
+ "#=\n",
+ "function EditFlowModel(; d=128, num_heads=8, nlayers=6, rff_dim=128, cond_dim=128, K::Int)\n",
+ " embedding = Flux.Embedding(K + 2 => d)\n",
+ " time_embed = Flux.Chain(RandomFourierFeatures(1 => rff_dim, 1.0f0), Dense(rff_dim => cond_dim))\n",
+ " blocks = [Onion.AdaTransformerBlock(d, cond_dim, num_heads) for _ in 1:nlayers]\n",
+ " head_combined = Dense(d => 2K + 1, bias=false)\n",
+ " rope = RoPE(d ÷ num_heads, 4096)\n",
+ " return EditFlowModel((; embedding, time_embed, blocks, head_combined, rope, K))\n",
+ "end\n",
+ "=#\n",
+ "function EditFlowModel(; d=128, num_heads=8, nlayers=6, rff_dim=128, cond_dim=128, K::Int)\n",
+ " embedding = Flux.Embedding(K + 2 => d)\n",
+ " time_embed = Flux.Chain(RandomFourierFeatures(1 => rff_dim, 1.0f0), Dense(rff_dim => cond_dim))\n",
+ " blocks = [Onion.AdaTransformerBlock(d, cond_dim, num_heads) for _ in 1:nlayers]\n",
+ " ins_q = Dense(d => K, bias=false)\n",
+ " sub_q = Dense(d => K, bias=false)\n",
+ " ins_lambda = Dense(d => 1, bias=false)\n",
+ " sub_lambda = Dense(d => 1, bias=false)\n",
+ " del_lambda = Dense(d => 1, bias=false)\n",
+ " rope = RoPE(d ÷ num_heads, 4096)\n",
+ " return EditFlowModel((; embedding, time_embed, blocks, ins_q, sub_q, ins_lambda, sub_lambda, del_lambda, rope, K))\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Forward pass\n",
+ "function (model::EditFlowModel)(t, Xt_ms)\n",
+ " m = model.layers\n",
+ " X = FF.tensor(Xt_ms)\n",
+ " X = ndims(X) == 1 ? reshape(X, :, 1) : X\n",
+ " L, B = size(X)\n",
+ "\n",
+ " pmask = Zygote.@ignore FF.getlmask(Xt_ms)\n",
+ " Xp = X .+ 1\n",
+ " #println(\"min/max X: \", minimum(X), \" / \", maximum(X), \" pad=\", P.padding_token, \" lat=\", P.latent_token)\n",
+ " if maximum(X) > P.padding_token\n",
+ " error(\"X contains values greater than the padding token; illegal token values in input\")\n",
+ " end\n",
+ "\n",
+ " H = m.embedding(Xp)\n",
+ "\n",
+ " t = ndims(t) == 0 ? fill(Float32(t), B) : Float32.(t)\n",
+ " cond = m.time_embed(reshape(t, 1, B))\n",
+ "\n",
+ " cond = to_same_device(cond, H)\n",
+ " pmask = Zygote.@ignore to_same_device(pmask, H)\n",
+ " rope = Zygote.@ignore to_same_device(m.rope[1:L], H)\n",
+ "\n",
+ " for blk in m.blocks\n",
+ " H = blk(H; cond, rope, kpad_mask=pmask)\n",
+ " end\n",
+ "\n",
+ " ins_head = m.ins_q(H)\n",
+ " sub_head = m.sub_q(H)\n",
+ " lambda_ins = m.ins_lambda(H)\n",
+ " lambda_sub = m.sub_lambda(H)\n",
+ " lambda_del = m.del_lambda(H)\n",
+ " return (ins_head, sub_head, lambda_ins, lambda_sub, lambda_del)\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Forward pass\n",
+ "# function (model::EditFlowModel)(t, Xt_ms)\n",
+ "# m = model.layers\n",
+ "# X = FF.tensor(Xt_ms)\n",
+ "# X = ndims(X) == 1 ? reshape(X, :, 1) : X\n",
+ "# L, B = size(X)\n",
+ "\n",
+ "# pmask = Zygote.@ignore FF.getlmask(Xt_ms)\n",
+ "# Xp = X .+ 1\n",
+ "# #println(\"min/max X: \", minimum(X), \" / \", maximum(X), \" pad=\", P.padding_token, \" lat=\", P.latent_token)\n",
+ "# #@assert all((0 .<= X) .& (X .<= P.padding_token))\n",
+ "# mX = Zygote.@ignore maximum(X)\n",
+ "# nX = Zygote.@ignore minimum(X)\n",
+ "# @assert (0 <= nX) && (mX <= P.padding_token)\n",
+ "\n",
+ " \n",
+ "# H = m.embedding(Xp)\n",
+ "\n",
+ "# t = ndims(t) == 0 ? fill(Float32(t), B) : Float32.(t)\n",
+ "# cond = m.time_embed(reshape(t, 1, B))\n",
+ "\n",
+ "# cond = to_same_device(cond, H)\n",
+ "# pmask = Zygote.@ignore to_same_device(pmask, H)\n",
+ "# rope = Zygote.@ignore to_same_device(m.rope[1:L], H)\n",
+ "\n",
+ "# for blk in m.blocks\n",
+ "# H = blk(H; cond, rope, kpad_mask=pmask)\n",
+ "# end\n",
+ "# return m.head_combined(H)\n",
+ "# end\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "train_editflow! (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Training (GPU if available)\n",
+ "function train_editflow!(P::FF.EditFlow,\n",
+ " model;\n",
+ " epochs::Int=1,\n",
+ " steps_per_epoch::Int=100,\n",
+ " batch_size::Int=64,\n",
+ " lr::Float32=1f-2,\n",
+ " seed::Int=42,\n",
+ " print_every::Int=25)\n",
+ "\n",
+ " rng = Random.MersenneTwister(seed)\n",
+ " Random.seed!(seed)\n",
+ "\n",
+ " # Move model to device (GPU if available)\n",
+ " model = Functors.fmap(to_dev, model)\n",
+ " opt_state = Flux.setup(Flux.Adam(lr), model)\n",
+ "\n",
+ " for epoch in 1:epochs\n",
+ " for step in 1:steps_per_epoch\n",
+ "\n",
+ " # 1) Minibatch Sampling\n",
+ " x0s, x1s, ts = make_minibatch(batch_size, P; rng=rng)\n",
+ "\n",
+ " # 2) Align and batch\n",
+ " Z0, Z1 = FF.align_and_batch(P, x0s, x1s)\n",
+ "\n",
+ " # 3) Interpolate\n",
+ " Zt, Xt = FF.interpolate_Z_elementwise(P, Z0, Z1, ts)\n",
+ "\n",
+ " # 4) Prepend BOS\n",
+ " bos = P.bos_token\n",
+ " Zt = vcat(fill(bos, 1, batch_size), Zt)\n",
+ " Xt = vcat(fill(bos, 1, batch_size), Xt)\n",
+ " Z1 = vcat(fill(bos, 1, batch_size), Z1)\n",
+ " \n",
+ " # 5) Masks and multipliers\n",
+ " transition_mask = FF.transition_mask_from_Xt(P, Xt)\n",
+ " edit_multiplier = FF.remaining_edits(P, Zt, Z1, Xt)\n",
+ " #@assert size(transition_mask) == size(edit_multiplier)\n",
+ "\n",
+ " # 6) Scheduler scaling\n",
+ " den = 1f0 .- P.κ.(ts)\n",
+ " den = max.(den, 1f-3)\n",
+ " scheduler_scaling = P.dκ.(ts) ./ den\n",
+ "\n",
+ " # 7) Masked state\n",
+ " lmask = Xt .!= P.padding_token\n",
+ " cmask = trues(size(lmask))\n",
+ " Xt_ms = FF.MaskedState(FF.DiscreteState(P.k, Xt), cmask, lmask)\n",
+ "\n",
+ " ts_d = to_dev(ts)\n",
+ " Xt_ms_d = to_dev(Xt_ms)\n",
+ " Tmask_d = to_dev(transition_mask)\n",
+ " Emult_d = to_dev(edit_multiplier)\n",
+ " sched_d = to_dev(reshape(Float32.(scheduler_scaling), 1, 1, :))\n",
+ "\n",
+ " # 8) Forward + loss + update\n",
+ " loss, grad = Flux.withgradient(model) do m\n",
+ " M = m(ts_d, Xt_ms_d)\n",
+ " l = FF.edit_loss(P, M, Tmask_d, Emult_d, sched_d; eps=1f-8)\n",
+ " if isnan(l)\n",
+ " println(\"Maximum element of M: \", maximum(M))\n",
+ " end\n",
+ " l\n",
+ " end\n",
+ " Flux.update!(opt_state, model, grad[1])\n",
+ "\n",
+ " if step % print_every == 0\n",
+ " @info \"train\" epoch step loss=Float32(loss)\n",
+ " end\n",
+ " end\n",
+ " end\n",
+ "\n",
+ " return Functors.fmap(to_cpu, model)\n",
+ "end\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Init process and model\n",
+ "K = PM.K\n",
+ "P = FF.EditFlow(K; bos_token=0, impl=\"positionwise_reparam\")\n",
+ "model = EditFlowModel(; d=128, num_heads=8, nlayers=4, rff_dim=128, cond_dim=128, K=K)\n",
+ "nothing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.972092f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.547047f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.814123f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.86299f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.428902f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.44519f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.755375f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.63697f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.66145f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.980005f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.676495f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.823967f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.604435f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.628433f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.269749f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.370718f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.981895f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.29564f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.05313f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.661705f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.352457f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.975006f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.44165f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.114304f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.830158f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.29099f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.880302f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.773508f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.42518f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.253447f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.465221f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.716324f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.315796f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.590342f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.11945f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.802334f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.715399f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.133106f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.506771f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.31068f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.549904f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.21441f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.020954f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.809784f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.816322f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 11.10005f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.037218f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.556658f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.747284f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.18045f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.459362f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.183155f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.935974f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.975842f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.257816f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.16243f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.237125f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.72068f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.045029f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.03865f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.980974f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.974388f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.994131f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.29249f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.863419f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.127146f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.681866f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.440746f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.46564f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.074389f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.455484f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.068605f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.59634f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.282074f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.803133f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.92038f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.747942f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.94925f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.425396f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.448444f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.657696f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.587557f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.831514f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.390194f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.020035f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.410772f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.76912f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.478642f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.846231f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.842266f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.358135f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.717499f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.117746f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.582458f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.147333f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.345675f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.380396f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.366549f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.646837f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.5616f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.525606f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.963268f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.720358f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.79974f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.13392f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.455585f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.213562f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.929577f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.667606f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.59198f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.14951f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.727703f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.535524f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.531374f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.03439f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.629353f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.897205f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.220057f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.027626f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.112152f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.379562f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.807442f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.60457f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.335194f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.965631f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.327007f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.268284f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.73925f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.675303f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.893906f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.89526f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.98605f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.154684f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.895681f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.107021f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.837322f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.70697f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.26533f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.145313f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.1321f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.53107f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.040863f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.176334f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.094204f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.033104f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.512857f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.793964f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.60687f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.829811f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.069487f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 17.734646f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.18549f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.256248f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.401571f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.009022f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.209995f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.12656f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.539986f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.397396f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.079025f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.479889f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.610525f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.227245f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.67744f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.710548f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.983185f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.211796f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.314728f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.877901f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.496384f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.785358f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.737282f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.9102f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.662106f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 16.906403f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.716015f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.618082f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.765678f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.711426f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.011974f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.7238f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.592659f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.709558f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.07872f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.348526f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.064106f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.897255f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.324472f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.630274f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.075851f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.555912f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.24704f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.994556f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.781471f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.19625f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.328653f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.941914f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.934614f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.049961f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.934216f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.771145f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.813606f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.86628f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.586544f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.481121f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.066532f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.385017f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.676384f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.874306f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.355312f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.09821f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.159306f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.657879f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.845825f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.372375f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.752993f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.873785f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.484112f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.912094f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.653551f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.556973f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.944939f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.665516f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.347084f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.694008f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.613409f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.544956f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.902126f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.099602f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.433033f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.63831f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.330196f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.579561f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.511242f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.326582f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.177572f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.340986f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.477205f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.986431f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.133919f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.872013f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.14673f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.704659f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.198008f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.918495f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.065418f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.116703f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.001947f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.781761f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.540459f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.281252f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.846443f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.743122f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.816837f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.741148f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.262049f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.01141f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.787575f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.283468f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.458675f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.277485f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.03348f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.936434f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.165476f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.269474f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.315693f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.033718f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.826042f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.53991f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.090122f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.082125f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.51723f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.586445f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.018702f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.69442f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.83265f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.189922f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.178204f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.848724f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.149426f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.197594f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.3241f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.188025f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.411463f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.432335f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.71156f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.088894f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.21531f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.149452f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.175003f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.178764f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.635054f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.653725f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.785437f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.671728f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.6824f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.098778f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.858032f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.040945f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.54076f0\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "EditFlowModel(\n",
+ " Embedding(22 => 128), \u001b[90m# 2_816 parameters\u001b[39m\n",
+ " Chain(\n",
+ " RandomFourierFeatures{Float32, Matrix{Float32}}(Float32[-4.5109034 -3.5286963 … 9.479959 7.8329725]),\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " [\n",
+ " TransformerBlock(\n",
+ " Attention(\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " identity,\n",
+ " identity,\n",
+ " Onion.var\"#32#34\"(),\n",
+ " 16,\n",
+ " 16,\n",
+ " 8,\n",
+ " 8,\n",
+ " ),\n",
+ " StarGLU(\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(512 => 128; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " NNlib.swish,\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.9967958, 0.9869625, 0.99037206, 0.9952497, 0.9702036, 0.9800837, 0.9745326, 0.9878916, 0.9594757, 0.9897762 … 0.9973829, 0.98189306, 0.99930227, 0.9924687, 0.96413124, 0.98626226, 0.98451954, 0.99558693, 0.98469424, 0.9983752], Float32[-0.0005225724, 0.008076679, -0.004573487, -0.008991586, -0.0022569797, 0.00772917, 0.022327727, 0.0013664486, -0.006174539, 0.00085590524 … -0.0055099158, 0.0036732075, -0.001059735, 0.012229179, -0.00652443, 0.000418949, 0.0061954665, 0.0028194247, -0.0041092695, 0.00022841303], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.98412365, 0.98356694, 0.9991844, 0.98061776, 1.0074458, 1.0001413, 0.98833823, 0.9987704, 0.991204, 0.99532646 … 0.9848799, 1.0059643, 0.98044896, 0.98279643, 1.0155826, 0.9848778, 0.9933505, 0.9947882, 0.9984975, 0.99049044], Float32[0.0103082, 0.010853426, -7.58663f-5, 0.009344174, 0.004389455, -0.0016753467, 0.0062471745, -0.0046117036, -0.016211234, 0.015867772 … -0.012600643, -0.0019048655, 0.0023564878, 0.008244524, -0.00075314543, 0.0050747017, -0.006136866, -0.0053728544, -0.0018225668, 0.023820948], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " identity,\n",
+ " ),\n",
+ " TransformerBlock(\n",
+ " Attention(\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " identity,\n",
+ " identity,\n",
+ " Onion.var\"#32#34\"(),\n",
+ " 16,\n",
+ " 16,\n",
+ " 8,\n",
+ " 8,\n",
+ " ),\n",
+ " StarGLU(\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(512 => 128; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " NNlib.swish,\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.99398494, 0.99072254, 0.98964727, 0.9913998, 0.96825975, 0.9956356, 0.9806926, 0.9927313, 0.9783374, 1.0013367 … 0.9955111, 0.9714232, 0.9932761, 0.9916518, 0.97599345, 0.99484235, 0.9863062, 0.9868813, 0.99386036, 0.9897667], Float32[0.002271547, 0.0012500598, -0.0013109404, -0.009307533, -0.005532772, 0.00332578, -0.0062586013, -0.004487046, -0.009959927, -0.001968619 … -0.0092587015, 0.0016701458, 0.004358063, -0.0034356923, 0.008127138, -0.0012910413, 0.01754202, 0.01152308, -0.0062666866, 0.011563656], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[1.0095794, 1.0033473, 1.0020524, 1.0036801, 1.0157589, 1.0096356, 1.0005842, 1.0092266, 0.9975341, 0.98216057 … 1.0134289, 0.9880383, 1.0035987, 1.0086101, 1.008442, 1.0066518, 1.0115547, 1.0150577, 0.9883698, 1.0045927], Float32[0.0018625382, 0.0074362713, 0.010829576, 0.00023766467, -0.009477628, -0.005826937, -0.001142101, -0.016898308, -0.012365104, 0.01040461 … -0.0031639615, -0.008584582, -0.012951189, 0.0051505445, -0.0012443908, 0.008775254, 0.002358304, 7.299369f-5, -0.0044311574, 0.01911879], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " identity,\n",
+ " ),\n",
+ " TransformerBlock(\n",
+ " Attention(\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " identity,\n",
+ " identity,\n",
+ " Onion.var\"#32#34\"(),\n",
+ " 16,\n",
+ " 16,\n",
+ " 8,\n",
+ " 8,\n",
+ " ),\n",
+ " StarGLU(\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(512 => 128; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " NNlib.swish,\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.9964801, 0.9907213, 0.9829918, 0.9904836, 1.0075997, 0.9931966, 0.967814, 0.994991, 0.99343467, 0.9698302 … 1.0127897, 0.98682904, 0.9882866, 0.9758389, 0.9728117, 1.0221164, 0.97261125, 0.9949346, 1.0151469, 0.99335915], Float32[-0.014417081, 0.006284324, -0.009420766, 0.0028740377, -0.007851509, -0.0016916802, 0.00028211612, 0.004104854, -0.0010160299, 0.0028311377 … 0.0061248355, -0.003470834, 0.0037332277, -0.00510081, 0.00078595727, -0.0052126795, 0.0049999044, -0.007494009, -0.0076053017, -0.0034516104], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.98102707, 0.9881729, 1.0145462, 1.0161818, 1.010741, 1.0101572, 1.0054857, 0.9960153, 1.0076207, 0.9982568 … 1.0110102, 0.97780424, 1.0103778, 1.0270656, 1.0007466, 1.0155469, 1.0250667, 0.9943188, 1.0158885, 0.9926699], Float32[-0.020408258, -0.007663947, -0.002164433, 0.0045097587, -0.004046006, -0.0084987385, -0.0032783668, -0.0019530786, -0.0018806072, 0.0014927586 … 0.002657746, -0.004882839, -0.0094046, 0.0029575205, -0.010758868, 0.005579408, -0.00012946833, -0.0011488283, -0.0051159984, 0.012121034], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " identity,\n",
+ " ),\n",
+ " TransformerBlock(\n",
+ " Attention(\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " identity,\n",
+ " identity,\n",
+ " Onion.var\"#32#34\"(),\n",
+ " 16,\n",
+ " 16,\n",
+ " 8,\n",
+ " 8,\n",
+ " ),\n",
+ " StarGLU(\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(512 => 128; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " NNlib.swish,\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.97678447, 0.9843075, 0.97703946, 1.0089988, 0.9998969, 0.99314827, 1.005877, 0.9890229, 0.98229724, 0.9969458 … 0.9900987, 0.99567735, 0.9876625, 0.99173796, 1.0165083, 0.9832781, 0.9971774, 1.0104102, 0.9998328, 0.99126804], Float32[-0.0036148801, -0.0061858366, 0.0043971017, 0.0063074683, 0.005699646, 0.0021153758, -0.0040238057, 0.005645371, 0.001263952, 0.009946144 … -0.0067127184, -0.0050887824, -0.0009767002, -0.016533986, 0.011165631, -0.00066996843, 0.0014427631, -0.007045365, -0.0015280164, -0.0040101036], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[1.0210867, 1.0158178, 1.0208023, 1.047025, 1.0237092, 1.018517, 1.0044487, 0.9989978, 0.99009013, 1.0100825 … 1.01286, 1.0051826, 1.0109634, 1.0528429, 1.0127131, 1.0134708, 1.0189143, 0.9862647, 1.0223742, 0.98723614], Float32[-0.0045142854, -0.008482742, -0.0018320186, 0.0036619315, -0.010827755, -0.0021696852, -0.010647439, -0.0046669086, -0.007829798, 0.005749223 … -0.005271609, -0.015943931, 0.00100527, -0.01039895, -0.005209468, 0.00091932324, 0.0036592623, 0.009832175, -0.005363889, 0.01747159], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " identity,\n",
+ " ),\n",
+ " ],\n",
+ " Dense(128 => 20; bias=false), \u001b[90m# 2_560 parameters\u001b[39m\n",
+ " Dense(128 => 20; bias=false), \u001b[90m# 2_560 parameters\u001b[39m\n",
+ " Dense(128 => 1; bias=false), \u001b[90m# 128 parameters\u001b[39m\n",
+ " Dense(128 => 1; bias=false), \u001b[90m# 128 parameters\u001b[39m\n",
+ " Dense(128 => 1; bias=false), \u001b[90m# 128 parameters\u001b[39m\n",
+ " RoPE{Matrix{Float32}}(Float32[1.0 0.5403023 … -0.87528455 -0.065975994; 1.0 0.95041525 … 0.9552474 0.8159282; … ; 1.0 0.9999995 … -0.57972294 -0.5789079; 1.0 0.99999994 … 0.2726629 0.27235872], Float32[0.0 0.84147096 … -0.48360822 -0.9978212; 0.0 0.3109836 … 0.29580808 0.5781532; … ; 0.0 0.0009999999 … -0.8148137 -0.8153929; 0.0 0.0003162278 … 0.9621096 0.9621958]),\n",
+ " 20,\n",
+ ") \u001b[90m # Total: 84 trainable arrays, \u001b[39m1_339_648 parameters,\n",
+ "\u001b[90m # plus 3 non-trainable, 65_600 parameters, summarysize \u001b[39m5.365 MiB."
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model = train_editflow!(P, model; epochs=50, steps_per_epoch=150, batch_size=256, lr=1f-4)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "MersenneTwister(42)"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "rng=Random.MersenneTwister(42)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "10-element Vector{String}:\n",
+ " \"EVQLVEWGGGLVQPVRS\"\n",
+ " \"QVQLVKSGSPLKKPGADKV\"\n",
+ " \"QVQLVESGGGLVKCG\"\n",
+ " \"QMQLQESGPGLVKPGASVK\"\n",
+ " \"EVQEVGGGLVQPGGLSLSL\"\n",
+ " \"EVTL\"\n",
+ " \"QVQLVESGGGVVQPGRSGA\"\n",
+ " \"EVQLVQKGGL\"\n",
+ " \"NTLQESSGVGLV\"\n",
+ " \"EVYVVEHGGGL\""
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model_samples_any = sample_gen_n(P, model; n=10, ts=0f0:0.01f0:1f0, rng)\n",
+ "model_final_states = map(final_state_from_gen, model_samples_any)\n",
+ "model_strings = [state_to_PM_string(P, s) for s in model_final_states]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "10-element Vector{String}:\n",
+ " \"EVQLVESGGGLVKPPGQLR\"\n",
+ " \"QVQLVQSKAEVKKPGASVKV\"\n",
+ " \"EGQRVECGGGGGQRG\"\n",
+ " \"EVQLVESGGGLV\"\n",
+ " \"QVQLVQSGAAV\"\n",
+ " \"EVQLVESGGGLVQPGGSLTL\"\n",
+ " \"QVQLVESGGGVVQPCR\"\n",
+ " \"QVLLVQSGTEVFKPGASMKV\"\n",
+ " \"QVLLVQSTAEVKK\"\n",
+ " \"QITLKESDPTLVKP\""
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "train_states = [FF.DiscreteState(P.k, sample(PM; rng=rng)) for i in 1:10]\n",
+ "train_strings = [state_to_PM_string(P, s) for s in train_states]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1000-element Vector{String}:\n",
+ " \"EVLHVVEGGG\"\n",
+ " \"QVQLVQSGAEAK\"\n",
+ " \"QVQLQESGPGLAKPSEVLSLV\"\n",
+ " \"EVQLVESGGGLVKPGHSLR\"\n",
+ " \"QVQLVKTGMEVKKPGASVK\"\n",
+ " \"QVQLQESTPGLVKPSVSETL\"\n",
+ " \"QVQLVQSGAAVKTPGASV\"\n",
+ " \"EVQLVESGGGLV\"\n",
+ " \"QVQLVQSGGALV\"\n",
+ " \"MVMLVPSGAEVKKP\"\n",
+ " \"QVQLVRSGQALVK\"\n",
+ " \"QVQLVQSGAAV\"\n",
+ " \"QKQLLQSGGVLVKPSTLSL\"\n",
+ " ⋮\n",
+ " \"EVQLVESCSGGLVKPGGGS\"\n",
+ " \"QVQHVQSGAEYKKPGASK\"\n",
+ " \"QVQLVQSDAEV\"\n",
+ " \"QVQLVQSGAEVEQP\"\n",
+ " \"QNLKQPSGAEVKV\"\n",
+ " \"QVQLVKSGAGLVKPGASVKV\"\n",
+ " \"QVQLVQYGQGNVKKPGASVRT\"\n",
+ " \"QVQLVYSGAEVK\"\n",
+ " \"QVLLVESGGGVVQTGAS\"\n",
+ " \"QVQLVQSGHAEVKRPGA\"\n",
+ " \"QVQLQQSGPAEVKPKPCSL\"\n",
+ " \"QLQLVQSGAEVKK\""
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Sampling and Levenshtein plot\n",
+ "using Random\n",
+ "n = 1000\n",
+ "# 100 model samples (final states → strings)\n",
+ "model_samples_any = sample_gen_n(P, model; n=n, ts=0f0:0.01f0:1f0, rng=rng)\n",
+ "model_final_states = map(final_state_from_gen, model_samples_any)\n",
+ "model_strings = [state_to_PM_string(P, s) for s in model_final_states]\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:00\u001b[39m\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "",
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ],
+ "text/html": [
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "m = 1000\n",
+ "p=100000\n",
+ "# Build training strings from PM parents (the data PM was built on)\n",
+ "train_states = [FF.DiscreteState(P.k, sample(PM; rng=rng)) for i in 1:m]\n",
+ "train_strings = [state_to_PM_string(P, s) for s in train_states]\n",
+ "\n",
+ "# Independent validation strings sampled from PM\n",
+ "val_states = [FF.DiscreteState(P.k, sample(PM; rng=rng)) for i in 1:p]\n",
+ "val_strings = [state_to_PM_string(P, s) for s in val_states]\n",
+ "#val_strings = [s for s in val_strings if !(s in train_strings)]\n",
+ "\n",
+ "# Plot normalized Levenshtein vs. training set (distinct val/train)\n",
+ "plt = plot_lev_dist(\"model_vs_true\", val_strings, model_strings, train_strings; title=\"Model vs True (normalized Levenshtein)\")\n",
+ "display(plt)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:00\u001b[39m\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "",
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ],
+ "text/html": [
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "model_strings_bos = [\">\" * state_to_PM_string(P, s) for s in model_final_states]\n",
+ "plt = plot_lev_dist(\"model_vs_true\", val_strings, model_strings_bos, train_strings; title=\"Model vs True (normalized Levenshtein)\")\n",
+ "display(plt)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ],
+ "text/html": [
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ],
+ "text/html": [
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "\n",
+ "n=1000000\n",
+ "val_states = [FF.DiscreteState(P.k, sample(PM; rng)) for i in 1:n]\n",
+ "val_strings = [state_to_PM_string(P, s) for s in val_states]\n",
+ "\n",
+ "\n",
+ "\n",
+ "pLen = plot_len_dist(\"model_vs_true_len\", val_strings, model_strings; title=\"Length distribution: Natural vs Generated\")\n",
+ "display(pLen)\n",
+ "\n",
+ "pAA = plot_AA_dist(\"model_vs_true_AA\", val_strings, model_strings; title=\"AA frequency: Natural vs Generated\")\n",
+ "display(pAA)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Natural 'A' frequency: 0.047142480905978404\n"
+ ]
+ }
+ ],
+ "source": [
+ "println(\"Natural 'A' frequency: \", (isdefined(Main, :val_strings) ? sum(count(==('A'), s) for s in val_strings) / sum(length.(val_strings)) : sum(count(==('A'), s) for s in val_strings) / sum(length.(val_strings))))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Natural 'A' frequency: 0.04109772423025435\n"
+ ]
+ }
+ ],
+ "source": [
+ "println(\"Natural 'A' frequency: \", (isdefined(Main, :model_strings) ? sum(count(==('A'), s) for s in model_strings) / sum(length.(model_strings)) : sum(count(==('A'), s) for s in model_strings) / sum(length.(model_strings))))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "MersenneTwister(42)"
+ ]
+ },
+ "execution_count": 183,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "rng = Random.MersenneTwister(42)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ],
+ "text/html": [
+ "
"
+ ]
+ },
+ "execution_count": 226,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "n=1000000\n",
+ "val_states = [FF.DiscreteState(P.k, sample(PM; rng)) for i in 1:n]\n",
+ "val_strings = [state_to_PM_string(P, s) for s in val_states]\n",
+ "pLen = plot_len_dist(\"model_vs_true_len\", val_strings, val_strings; title=\"Length distribution: Natural vs Generated\")\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Julia 1.11.6",
+ "language": "julia",
+ "name": "julia-1.11"
+ },
+ "language_info": {
+ "file_extension": ".jl",
+ "mimetype": "application/julia",
+ "name": "julia",
+ "version": "1.11.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/editflow_code/analysis_notebook copy.ipynb b/editflow_code/analysis_notebook copy.ipynb
new file mode 100644
index 0000000..67c138b
--- /dev/null
+++ b/editflow_code/analysis_notebook copy.ipynb
@@ -0,0 +1,4242 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32m\u001b[1m Activating\u001b[22m\u001b[39m project at `~/dev/Flowfusion.jl/editflow_code`\n",
+ "\u001b[32m\u001b[1m Resolving\u001b[22m\u001b[39m package versions...\n",
+ "\u001b[32m\u001b[1m No Changes\u001b[22m\u001b[39m to `~/dev/Flowfusion.jl/editflow_code/Project.toml`\n",
+ "\u001b[32m\u001b[1m No Changes\u001b[22m\u001b[39m to `~/dev/Flowfusion.jl/editflow_code/Manifest.toml`\n",
+ "WARNING: using StatsBase.sample in module Main conflicts with an existing identifier.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "plot_len_dist (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 1,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "using Pkg\n",
+ "Pkg.activate(@__DIR__)\n",
+ "Pkg.instantiate()\n",
+ "using Revise\n",
+ "\n",
+ "using Random\n",
+ "using Statistics\n",
+ "using Adapt\n",
+ "using Functors\n",
+ "using Flux\n",
+ "using Onion\n",
+ "using RandomFeatureMaps\n",
+ "using Zygote\n",
+ "\n",
+ "Pkg.develop(path=joinpath(@__DIR__, \"..\"))\n",
+ "using Flowfusion\n",
+ "const FF = Flowfusion\n",
+ "\n",
+ "include(joinpath(@__DIR__, \"prob_model.jl\"))\n",
+ "include(joinpath(@__DIR__, \"helper_funcs.jl\"))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "to_same_device (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import CUDA\n",
+ "\n",
+ "# Device helpers\n",
+ "const _gpu_enabled = try\n",
+ " CUDA.has_cuda()\n",
+ "catch\n",
+ " false\n",
+ "end\n",
+ "\n",
+ "to_dev(x) = _gpu_enabled ? Adapt.adapt(CUDA.CuArray, x) : x\n",
+ "to_cpu(x) = _gpu_enabled ? Adapt.adapt(Array, x) : x\n",
+ "# move x to the same device type as y (Array or CuArray)\n",
+ "to_same_device(x, y) = (_gpu_enabled && (y isa CUDA.CuArray)) ? Adapt.adapt(CUDA.CuArray, x) : Adapt.adapt(Array, x)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "make_minibatch (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Minibatch builder (uses true PM)\n",
+ "function make_minibatch(B::Int, P::FF.EditFlow; rng=Random.default_rng())\n",
+ " K = P.k\n",
+ " x0s = Vector{FF.DiscreteState}(undef, B)\n",
+ " x1s = Vector{FF.DiscreteState}(undef, B)\n",
+ " for b in 1:B\n",
+ " # x1 from true PM\n",
+ " seq1 = sample(PM; rng=rng)\n",
+ " @assert all(1 .<= seq1 .<= K)\n",
+ " x1s[b] = FF.DiscreteState(K, seq1)\n",
+ " # x0: uniform tokens with random length in 1:10 (no BOS in x0)\n",
+ " L0 = rand(rng, 1:10)\n",
+ " seq0 = rand(rng, 1:K, L0)\n",
+ " x0s[b] = FF.DiscreteState(K, seq0)\n",
+ " end\n",
+ " ts = rand(rng, Float32, B)\n",
+ " return x0s, x1s, ts\n",
+ "end\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "EditFlowModel"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Model definition\n",
+ "struct EditFlowModel{L}\n",
+ " layers::L\n",
+ "end\n",
+ "Flux.@layer EditFlowModel\n",
+ "#=\n",
+ "function EditFlowModel(; d=128, num_heads=8, nlayers=6, rff_dim=128, cond_dim=128, K::Int)\n",
+ " embedding = Flux.Embedding(K + 2 => d)\n",
+ " time_embed = Flux.Chain(RandomFourierFeatures(1 => rff_dim, 1.0f0), Dense(rff_dim => cond_dim))\n",
+ " blocks = [Onion.AdaTransformerBlock(d, cond_dim, num_heads) for _ in 1:nlayers]\n",
+ " head_combined = Dense(d => 2K + 1, bias=false)\n",
+ " rope = RoPE(d ÷ num_heads, 4096)\n",
+ " return EditFlowModel((; embedding, time_embed, blocks, head_combined, rope, K))\n",
+ "end\n",
+ "=#\n",
+ "function EditFlowModel(; d=128, num_heads=8, nlayers=6, rff_dim=128, cond_dim=128, K::Int)\n",
+ " embedding = Flux.Embedding(K + 2 => d)\n",
+ " time_embed = Flux.Chain(RandomFourierFeatures(1 => rff_dim, 1.0f0), Dense(rff_dim => cond_dim))\n",
+ " blocks = [Onion.AdaTransformerBlock(d, cond_dim, num_heads) for _ in 1:nlayers]\n",
+ " ins_q = Dense(d => K, bias=false)\n",
+ " sub_q = Dense(d => K, bias=false)\n",
+ " ins_lambda = Dense(d => 1, bias=false)\n",
+ " sub_lambda = Dense(d => 1, bias=false)\n",
+ " del_lambda = Dense(d => 1, bias=false)\n",
+ " rope = RoPE(d ÷ num_heads, 4096)\n",
+ " return EditFlowModel((; embedding, time_embed, blocks, ins_q, sub_q, ins_lambda, sub_lambda, del_lambda, rope, K))\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Forward pass\n",
+ "function (model::EditFlowModel)(t, Xt_ms)\n",
+ " m = model.layers\n",
+ " X = FF.tensor(Xt_ms)\n",
+ " X = ndims(X) == 1 ? reshape(X, :, 1) : X\n",
+ " L, B = size(X)\n",
+ "\n",
+ " pmask = Zygote.@ignore FF.getlmask(Xt_ms)\n",
+ " Xp = X .+ 1\n",
+ " #println(\"min/max X: \", minimum(X), \" / \", maximum(X), \" pad=\", P.padding_token, \" lat=\", P.latent_token)\n",
+ " if maximum(X) > P.padding_token\n",
+ " error(\"X contains values greater than the padding token; illegal token values in input\")\n",
+ " end\n",
+ "\n",
+ " H = m.embedding(Xp)\n",
+ "\n",
+ " t = ndims(t) == 0 ? fill(Float32(t), B) : Float32.(t)\n",
+ " cond = m.time_embed(reshape(t, 1, B))\n",
+ "\n",
+ " cond = to_same_device(cond, H)\n",
+ " pmask = Zygote.@ignore to_same_device(pmask, H)\n",
+ " rope = Zygote.@ignore to_same_device(m.rope[1:L], H)\n",
+ "\n",
+ " for blk in m.blocks\n",
+ " H = blk(H; cond, rope, kpad_mask=pmask)\n",
+ " end\n",
+ "\n",
+ " ins_head = m.ins_q(H)\n",
+ " sub_head = m.sub_q(H)\n",
+ " lambda_ins = m.ins_lambda(H)\n",
+ " lambda_sub = m.sub_lambda(H)\n",
+ " lambda_del = m.del_lambda(H)\n",
+ " return (ins_head, sub_head, lambda_ins, lambda_sub, lambda_del)\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Forward pass\n",
+ "# function (model::EditFlowModel)(t, Xt_ms)\n",
+ "# m = model.layers\n",
+ "# X = FF.tensor(Xt_ms)\n",
+ "# X = ndims(X) == 1 ? reshape(X, :, 1) : X\n",
+ "# L, B = size(X)\n",
+ "\n",
+ "# pmask = Zygote.@ignore FF.getlmask(Xt_ms)\n",
+ "# Xp = X .+ 1\n",
+ "# #println(\"min/max X: \", minimum(X), \" / \", maximum(X), \" pad=\", P.padding_token, \" lat=\", P.latent_token)\n",
+ "# #@assert all((0 .<= X) .& (X .<= P.padding_token))\n",
+ "# mX = Zygote.@ignore maximum(X)\n",
+ "# nX = Zygote.@ignore minimum(X)\n",
+ "# @assert (0 <= nX) && (mX <= P.padding_token)\n",
+ "\n",
+ " \n",
+ "# H = m.embedding(Xp)\n",
+ "\n",
+ "# t = ndims(t) == 0 ? fill(Float32(t), B) : Float32.(t)\n",
+ "# cond = m.time_embed(reshape(t, 1, B))\n",
+ "\n",
+ "# cond = to_same_device(cond, H)\n",
+ "# pmask = Zygote.@ignore to_same_device(pmask, H)\n",
+ "# rope = Zygote.@ignore to_same_device(m.rope[1:L], H)\n",
+ "\n",
+ "# for blk in m.blocks\n",
+ "# H = blk(H; cond, rope, kpad_mask=pmask)\n",
+ "# end\n",
+ "# return m.head_combined(H)\n",
+ "# end\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "train_editflow! (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Training (GPU if available)\n",
+ "function train_editflow!(P::FF.EditFlow,\n",
+ " model;\n",
+ " epochs::Int=1,\n",
+ " steps_per_epoch::Int=100,\n",
+ " batch_size::Int=64,\n",
+ " lr::Float32=1f-2,\n",
+ " seed::Int=42,\n",
+ " print_every::Int=25)\n",
+ "\n",
+ " rng = Random.MersenneTwister(seed)\n",
+ " Random.seed!(seed)\n",
+ "\n",
+ " # Move model to device (GPU if available)\n",
+ " model = Functors.fmap(to_dev, model)\n",
+ " opt_state = Flux.setup(Flux.Adam(lr), model)\n",
+ "\n",
+ " for epoch in 1:epochs\n",
+ " for step in 1:steps_per_epoch\n",
+ "\n",
+ " # 1) Minibatch Sampling\n",
+ " x0s, x1s, ts = make_minibatch(batch_size, P; rng=rng)\n",
+ "\n",
+ " # 2) Align and batch\n",
+ " Z0, Z1 = FF.align_and_batch(P, x0s, x1s)\n",
+ "\n",
+ " # 3) Interpolate\n",
+ " Zt, Xt = FF.interpolate_Z_elementwise(P, Z0, Z1, ts)\n",
+ "\n",
+ " # 4) Prepend BOS\n",
+ " bos = P.bos_token\n",
+ " Zt = vcat(fill(bos, 1, batch_size), Zt)\n",
+ " Xt = vcat(fill(bos, 1, batch_size), Xt)\n",
+ " Z1 = vcat(fill(bos, 1, batch_size), Z1)\n",
+ " \n",
+ " # 5) Masks and multipliers\n",
+ " transition_mask = FF.transition_mask_from_Xt(P, Xt)\n",
+ " edit_multiplier = FF.remaining_edits(P, Zt, Z1, Xt)\n",
+ " #@assert size(transition_mask) == size(edit_multiplier)\n",
+ "\n",
+ " # 6) Scheduler scaling\n",
+ " den = 1f0 .- P.κ.(ts)\n",
+ " den = max.(den, 1f-3)\n",
+ " scheduler_scaling = P.dκ.(ts) ./ den\n",
+ "\n",
+ " # 7) Masked state\n",
+ " lmask = Xt .!= P.padding_token\n",
+ " cmask = trues(size(lmask))\n",
+ " Xt_ms = FF.MaskedState(FF.DiscreteState(P.k, Xt), cmask, lmask)\n",
+ "\n",
+ " ts_d = to_dev(ts)\n",
+ " Xt_ms_d = to_dev(Xt_ms)\n",
+ " Tmask_d = to_dev(transition_mask)\n",
+ " Emult_d = to_dev(edit_multiplier)\n",
+ " sched_d = to_dev(reshape(Float32.(scheduler_scaling), 1, 1, :))\n",
+ "\n",
+ " # 8) Forward + loss + update\n",
+ " loss, grad = Flux.withgradient(model) do m\n",
+ " M = m(ts_d, Xt_ms_d)\n",
+ " l = FF.edit_loss(P, M, Tmask_d, Emult_d, sched_d; eps=1f-8)\n",
+ " if isnan(l)\n",
+ " println(\"Maximum element of M: \", maximum(M))\n",
+ " end\n",
+ " l\n",
+ " end\n",
+ " Flux.update!(opt_state, model, grad[1])\n",
+ "\n",
+ " if step % print_every == 0\n",
+ " @info \"train\" epoch step loss=Float32(loss)\n",
+ " end\n",
+ " end\n",
+ " end\n",
+ "\n",
+ " return Functors.fmap(to_cpu, model)\n",
+ "end\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Init process and model\n",
+ "K = PM.K\n",
+ "P = FF.EditFlow(K; bos_token=0, impl=\"positionwise_reparam\")\n",
+ "model = EditFlowModel(; d=128, num_heads=8, nlayers=4, rff_dim=128, cond_dim=128, K=K)\n",
+ "nothing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 82.07346f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 62.159813f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 54.437492f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 52.70323f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 46.34868f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 47.708572f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 46.801437f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 43.88524f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 46.17478f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 46.61335f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 44.934906f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 41.69713f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 45.287468f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 43.52704f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 39.66895f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 39.165527f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 40.31173f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 36.59504f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 40.564255f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 37.86731f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 40.385826f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 40.322083f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 39.391945f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 34.939816f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 35.111343f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 35.31593f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 37.190735f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 36.458412f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 39.276863f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 38.577705f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 37.672207f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 33.816956f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 35.438004f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 36.36937f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 33.889923f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 34.536453f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 34.919273f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 33.35535f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 34.92321f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 37.54946f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.466896f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 36.90836f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 36.573483f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.769262f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 34.593307f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.809368f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 34.314964f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 34.868347f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.30793f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 35.449608f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.058853f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.30246f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 35.396187f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.03281f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.141605f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 34.48079f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.728176f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.799397f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 34.22338f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.56803f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.286154f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.165651f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.54279f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 33.8528f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.07912f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.126358f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 34.789497f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 33.40204f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 33.934967f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.35504f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 34.06865f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.93891f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.98565f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 33.669083f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.357887f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 33.41867f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.702007f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.948685f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.02884f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.43153f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.269884f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.48836f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.304775f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.295242f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.564354f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.63555f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.694923f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.10176f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.823997f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.046465f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 33.151978f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.241508f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.599903f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.371204f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.115711f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.063667f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.288649f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.17442f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.832388f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.63764f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.523653f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.578972f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.127888f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.43596f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.935762f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.834002f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.114868f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.446186f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.983818f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.701529f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.964905f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 33.798264f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.764925f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.253445f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.058111f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.854343f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.131287f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.974604f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.332983f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.150225f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.560247f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.277634f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.871674f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.280323f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.3564f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.779842f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.763954f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.29353f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.820616f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.870682f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.851711f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.26233f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.022175f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.356493f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.398968f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.326962f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.917953f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.692986f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.146017f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.466309f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.152166f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.562721f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.2849f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.194546f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.014896f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.774408f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.552523f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.040781f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.373951f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.159891f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.581429f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.509647f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.619043f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.09383f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.292168f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.818909f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.22113f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.000816f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.185415f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.700916f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.716354f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.38358f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.860315f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.69223f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.02826f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.024052f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.986008f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.56084f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.256157f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.557503f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.805897f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.765497f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.463768f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.466944f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.504349f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.668938f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.541996f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.095675f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.885004f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.725517f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.679646f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.332363f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.44205f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.786064f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.905752f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.435757f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.278011f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.903355f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.621813f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.800285f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.840708f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.018559f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.744186f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.735939f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.753193f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.21335f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.902012f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.089495f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.58031f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.263393f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.389595f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.411808f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.020435f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.144455f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.2962f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.897776f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.124271f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.111145f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.478966f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.792439f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.627457f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.22819f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.252316f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.160707f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.44091f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.25015f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.912188f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.750805f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.955189f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.026106f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.814238f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.040257f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.69309f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.04459f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.733395f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.58453f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.76516f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.80672f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.192831f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.523682f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.81498f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.908417f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.202335f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.47245f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.739391f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.068176f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.204624f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.831722f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.325375f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.1208f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.505514f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.5488f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.15102f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.176058f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.531853f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.99091f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.7192f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.728962f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.610317f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.416164f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.267582f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.809444f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.121967f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.335537f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.019642f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.637943f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.238605f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.979734f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.764843f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.702675f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.530308f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.94001f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.980862f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.945194f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.208347f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.52969f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.940765f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.146969f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.750103f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.741966f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.725286f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.318342f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.266163f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.628586f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.748112f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.119446f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.42128f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.901266f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.349586f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.926575f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.67647f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.600336f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.054993f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.485657f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.708344f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.638538f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.087189f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.382866f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.803917f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.354397f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.697845f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.903067f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.217096f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.466969f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.123995f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.66383f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.066828f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.641502f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.865017f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.352922f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 51\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.092247f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 51\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.752254f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 51\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.05304f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 51\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.390366f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 51\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.090763f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 51\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.176765f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 52\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.52108f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 52\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.761932f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 52\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.511892f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 52\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.29064f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 52\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.123562f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 52\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.654358f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 53\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.53424f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 53\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.85925f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 53\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.881779f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 53\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.724129f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 53\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.388699f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 53\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.270184f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 54\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.898977f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 54\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.655685f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 54\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.33598f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 54\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.403103f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 54\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.912945f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 54\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.995174f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 55\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.486912f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 55\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.728872f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 55\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.981642f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 55\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.466846f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 55\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.494558f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 55\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.157269f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 56\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.891315f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 56\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.81565f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 56\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.700804f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 56\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.016748f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 56\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 12.17991f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 56\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.762886f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 57\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.146517f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 57\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.501875f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 57\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.152218f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 57\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.110748f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 57\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.041103f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 57\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.101912f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 58\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.75323f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 58\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.365854f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 58\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.630274f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 58\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.837723f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 58\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.288212f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 58\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.135359f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 59\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.50134f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 59\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.57468f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 59\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.161716f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 59\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.87622f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 59\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.94062f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 59\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.8095f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 60\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.677917f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 60\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.46408f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 60\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.636616f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 60\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.922928f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 60\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.925663f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 60\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.531475f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 61\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.56285f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 61\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.698103f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 61\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.562912f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 61\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.336823f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 61\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.004013f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 61\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.274128f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 62\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.192417f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 62\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.432222f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 62\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.7392f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 62\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.985062f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 62\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.17378f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 62\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.799166f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 63\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.425634f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 63\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.468296f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 63\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = -5.995845f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 63\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.998158f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 63\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.038448f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 63\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.033749f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 64\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.00533f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 64\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.06829f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 64\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.032722f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 64\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.024298f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 64\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.170773f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 64\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.376472f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 65\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.3689f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 65\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.082571f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 65\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.11168f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 65\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.405136f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 65\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.619356f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 65\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.924015f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 66\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.195768f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 66\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.6123f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 66\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.629646f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 66\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.263647f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 66\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.222168f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 66\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.27013f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 67\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.022396f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 67\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.813812f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 67\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.486214f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 67\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.982243f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 67\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.693348f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 67\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.587395f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 68\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.629896f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 68\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.485079f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 68\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.933212f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 68\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.13412f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 68\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.339632f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 68\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.508022f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 69\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.1389f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 69\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.179682f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 69\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.82854f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 69\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.925406f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 69\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.995193f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 69\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.171566f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 70\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.341312f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 70\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.789955f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 70\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.980728f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 70\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.528828f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 70\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.014193f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 70\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.833584f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 71\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.698025f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 71\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.143219f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 71\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.284584f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 71\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.312044f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 71\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.3766f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 71\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.132614f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 72\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.464508f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 72\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.122822f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 72\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.225592f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 72\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.98212f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 72\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.111795f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 72\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.57614f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 73\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.375362f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 73\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.394024f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 73\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.27369f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 73\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.573868f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 73\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.740505f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 73\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.441765f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 74\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.771257f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 74\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.316023f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 74\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.580448f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 74\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.979656f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 74\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.400307f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 74\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.783073f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 75\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.628641f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 75\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.261189f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 75\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.686115f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 75\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.199797f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 75\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.52521f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 75\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.92347f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 76\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.527836f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 76\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.664387f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 76\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.153656f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 76\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.139687f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 76\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.865797f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 76\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.617004f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 77\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.611313f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 77\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.56675f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 77\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.861835f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 77\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.752243f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 77\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.810802f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 77\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.026962f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 78\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.86181f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 78\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.10205f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 78\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.015896f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 78\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.185467f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 78\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.688988f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 78\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.525013f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 79\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.49948f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 79\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.364954f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 79\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.283792f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 79\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.86265f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 79\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.039726f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 79\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.208265f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 80\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.437443f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 80\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.15319f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 80\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.790619f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 80\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.035023f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 80\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.27858f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 80\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.90172f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 81\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.833107f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 81\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.40643f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 81\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.92462f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 81\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.017815f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 81\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.769001f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 81\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.803432f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 82\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.338768f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 82\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.840986f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 82\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.032543f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 82\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.793646f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 82\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.137222f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 82\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.951649f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 83\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.809486f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 83\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.11989f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 83\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.108162f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 83\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.218895f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 83\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.576492f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 83\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.534967f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 84\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.878292f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 84\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.561852f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 84\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.066605f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 84\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.652271f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 84\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.827291f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 84\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.628416f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 85\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.40998f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 85\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.966393f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 85\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.59112f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 85\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.124773f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 85\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.860865f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 85\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.764847f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 86\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.170559f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 86\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.344856f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 86\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.094692f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 86\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.650303f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 86\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.443104f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 86\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.755693f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 87\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.28392f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 87\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.591637f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 87\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.558187f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 87\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.37761f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 87\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.453766f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 87\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.134775f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 88\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.580048f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 88\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.17911f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 88\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.458458f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 88\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.700092f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 88\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.836933f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 88\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.738186f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 89\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.815561f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 89\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.215977f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 89\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.129654f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 89\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.794102f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 89\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.125484f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 89\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.227345f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 90\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.045433f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 90\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.293316f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 90\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.518925f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 90\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.422106f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 90\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.499228f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 90\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.184135f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 91\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.26989f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 91\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.96262f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 91\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.837223f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 91\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.046673f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 91\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.653591f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 91\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.075289f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 92\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.342976f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 92\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.946255f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 92\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.191769f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 92\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.609737f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 92\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 17.93655f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 92\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.087547f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 93\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.67813f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 93\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.700335f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 93\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.010113f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 93\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.786858f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 93\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.679712f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 93\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.27972f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 94\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.283361f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 94\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.018883f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 94\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.054487f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 94\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.607964f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 94\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.42205f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 94\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.083143f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 95\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.265385f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 95\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.279564f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 95\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.667408f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 95\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.51435f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 95\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.896326f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 95\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.182985f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 96\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.115044f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 96\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.596788f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 96\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.342295f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 96\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.938116f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 96\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.91387f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 96\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.638369f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 97\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.230114f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 97\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.580563f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 97\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.14f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 97\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.359524f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 97\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.14893f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 97\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.068623f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 98\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.713148f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 98\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.837017f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 98\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.895924f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 98\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.283958f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 98\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.11446f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 98\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.290411f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 99\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.527283f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 99\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.730885f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 99\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.029686f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 99\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.297504f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 99\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.37918f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 99\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.384432f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 100\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.19099f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 100\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.480999f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 100\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.165169f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 100\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.805332f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 100\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.753227f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 100\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.786406f0\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "EditFlowModel(\n",
+ " Embedding(22 => 128), \u001b[90m# 2_816 parameters\u001b[39m\n",
+ " Chain(\n",
+ " RandomFourierFeatures{Float32, Matrix{Float32}}(Float32[7.531563 7.089197 … 2.0401998 3.328936]),\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " [\n",
+ " TransformerBlock(\n",
+ " Attention(\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " identity,\n",
+ " identity,\n",
+ " Onion.var\"#32#34\"(),\n",
+ " 16,\n",
+ " 16,\n",
+ " 8,\n",
+ " 8,\n",
+ " ),\n",
+ " StarGLU(\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(512 => 128; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " NNlib.swish,\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.9836579, 0.9530255, 0.9908016, 0.96244663, 0.99054164, 0.98432475, 0.9993702, 0.97146636, 0.9746882, 0.97592133 … 0.96757305, 0.98850816, 0.9889486, 0.9910423, 0.9700498, 0.9524227, 0.9703022, 0.9919154, 0.9982276, 0.98223925], Float32[-0.0059723244, -0.026827456, -0.005359013, 0.005184757, -0.015056814, 0.0036728473, -0.00411119, 0.014544279, -0.013788157, -0.0062612076 … -0.008408193, 0.007813087, 0.0057916404, -0.002667083, -0.0019434843, 0.0012296549, 0.0013133418, 0.0023761056, 0.00048080317, 0.006975226], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.9946981, 0.9968043, 0.98153776, 0.9809628, 0.99675304, 0.93618464, 0.9842954, 0.9745212, 1.0101839, 1.0036527 … 1.0099263, 0.99696696, 0.9861587, 0.9825586, 0.9812812, 0.97964483, 0.98476547, 0.98760384, 0.9870029, 0.979692], Float32[0.006245078, 0.00454478, -0.015628312, 0.006516256, -0.009261778, -0.01909565, 0.005845533, 0.008189242, -0.00068665465, 0.0016260961 … -0.0041808817, -0.00029761743, 0.0026925767, -0.007287982, -0.01596789, 0.00494587, -0.00235075, -0.0046186335, 0.0087447725, -0.0045568403], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " identity,\n",
+ " ),\n",
+ " TransformerBlock(\n",
+ " Attention(\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " identity,\n",
+ " identity,\n",
+ " Onion.var\"#32#34\"(),\n",
+ " 16,\n",
+ " 16,\n",
+ " 8,\n",
+ " 8,\n",
+ " ),\n",
+ " StarGLU(\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(512 => 128; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " NNlib.swish,\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.97298455, 0.9863549, 0.9892886, 0.9950389, 0.9884353, 0.9859241, 0.94629824, 0.9905537, 0.997098, 0.9910578 … 0.98902434, 0.9732674, 0.993341, 0.98686963, 0.97267485, 0.9891902, 0.9939604, 0.98106074, 0.989807, 0.9971157], Float32[0.0012234193, 0.0013402402, -0.0051020766, -0.0023925323, 0.0024530701, -0.007705464, 0.027468415, -0.0025647462, -0.0012954248, -0.0012756804 … -0.0062676566, -0.004466568, -0.005843617, -0.0037935532, -0.0069217044, -0.0014938526, 0.0044353334, -0.0068317032, -0.001554352, -0.006626264], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.98990935, 0.9927325, 0.99345046, 0.9694228, 1.0037662, 0.98568547, 0.99712723, 0.9872776, 1.0082906, 1.0044862 … 0.99839866, 1.0037067, 0.99369234, 0.99665964, 0.9925385, 0.958198, 0.97938985, 0.99434316, 1.0101033, 0.9903174], Float32[0.0046547917, -0.019893773, -0.009588573, 0.0028432165, 0.0047589364, -0.0028043785, 0.00833395, 0.009257588, 0.0071648816, 0.006460758 … -0.0042559416, -0.0088890875, -0.0017841165, -0.0037446455, -0.0112923, -0.05279327, 0.018110534, -0.0060070534, 0.005424199, -0.009653852], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " identity,\n",
+ " ),\n",
+ " TransformerBlock(\n",
+ " Attention(\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " identity,\n",
+ " identity,\n",
+ " Onion.var\"#32#34\"(),\n",
+ " 16,\n",
+ " 16,\n",
+ " 8,\n",
+ " 8,\n",
+ " ),\n",
+ " StarGLU(\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(512 => 128; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " NNlib.swish,\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.9813191, 0.97900933, 0.99497646, 0.9740931, 0.99270153, 0.9512806, 0.98416686, 0.98227394, 0.99777186, 0.9847334 … 0.97772944, 0.99356306, 0.99567527, 0.9884459, 0.99413466, 0.9872411, 0.9937668, 0.9934419, 0.9858429, 1.0006208], Float32[-0.008844349, -0.003362618, 0.007866318, 0.007047815, -0.0043394393, -0.011138873, 0.011454338, 0.0073125726, -0.0019595535, -0.005712127 … 0.0042020027, 0.012437841, 0.008444573, -0.010965995, 0.0033074883, 0.0013927073, -0.0025926381, 0.004861451, 0.0003271947, 0.0013459908], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.99184936, 1.0132029, 0.9949654, 0.99911726, 0.9930749, 1.004498, 1.0130638, 1.0191455, 1.0075065, 0.97904897 … 1.0130671, 1.0166669, 1.0298382, 1.0103947, 0.99601674, 0.97281563, 0.99154127, 1.0031415, 1.0141926, 0.9984006], Float32[0.0064145937, -0.012695074, -0.024113664, 0.0008978927, 0.008383268, -0.0046254005, 0.0058898213, -0.00048160343, 0.011751346, -0.001727852 … -0.011738534, -0.008813223, -0.0048925155, -0.002928767, -0.007028266, -0.028897885, 0.016255967, -0.008061163, -0.0006376766, -0.011933948], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " identity,\n",
+ " ),\n",
+ " TransformerBlock(\n",
+ " Attention(\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " identity,\n",
+ " identity,\n",
+ " Onion.var\"#32#34\"(),\n",
+ " 16,\n",
+ " 16,\n",
+ " 8,\n",
+ " 8,\n",
+ " ),\n",
+ " StarGLU(\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(512 => 128; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " NNlib.swish,\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.9871817, 0.97967505, 0.9925497, 0.9899778, 0.9875628, 1.0040362, 0.9646963, 0.98949474, 0.98805684, 0.9880806 … 1.0172434, 0.9862294, 1.0149201, 0.94580567, 1.0022846, 0.98350805, 0.99963933, 0.9940302, 1.0096109, 0.97280717], Float32[0.002797166, -0.00076662557, 0.00028469562, -0.0044771708, -0.0028138815, 0.003455172, 0.0049372525, 0.005800529, 0.0032799544, 0.0068818713 … 0.02613455, 0.006609633, 0.0013952806, 0.011928056, -0.008288531, -0.016250463, 0.006419934, -0.0055078357, -0.01332255, -0.016116258], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[1.0026083, 0.9906121, 1.0252106, 0.9807042, 1.0165613, 0.9926678, 1.0275302, 1.0164462, 1.0170248, 1.0145394 … 1.0104992, 1.0050902, 1.0078901, 1.0107113, 1.0065619, 0.99085945, 0.99034745, 1.0093502, 1.0229908, 0.9981014], Float32[0.00017921702, -0.014748752, 0.0016491384, 0.0112701375, 0.008428353, 0.0039549926, 0.016354358, -0.0023426865, 0.0004367487, 0.0045339833 … 0.0015938681, 0.0045206565, -0.00032513068, 0.0024292, 0.005038348, -0.020209014, -0.0013112819, 0.0021825603, -0.00091132557, 0.0007438597], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " identity,\n",
+ " ),\n",
+ " ],\n",
+ " Dense(128 => 20; bias=false), \u001b[90m# 2_560 parameters\u001b[39m\n",
+ " Dense(128 => 20; bias=false), \u001b[90m# 2_560 parameters\u001b[39m\n",
+ " Dense(128 => 1; bias=false), \u001b[90m# 128 parameters\u001b[39m\n",
+ " Dense(128 => 1; bias=false), \u001b[90m# 128 parameters\u001b[39m\n",
+ " Dense(128 => 1; bias=false), \u001b[90m# 128 parameters\u001b[39m\n",
+ " RoPE{Matrix{Float32}}(Float32[1.0 0.5403023 … -0.87528455 -0.065975994; 1.0 0.95041525 … 0.9552474 0.8159282; … ; 1.0 0.9999995 … -0.57972294 -0.5789079; 1.0 0.99999994 … 0.2726629 0.27235872], Float32[0.0 0.84147096 … -0.48360822 -0.9978212; 0.0 0.3109836 … 0.29580808 0.5781532; … ; 0.0 0.0009999999 … -0.8148137 -0.8153929; 0.0 0.0003162278 … 0.9621096 0.9621958]),\n",
+ " 20,\n",
+ ") \u001b[90m # Total: 84 trainable arrays, \u001b[39m1_339_648 parameters,\n",
+ "\u001b[90m # plus 3 non-trainable, 65_600 parameters, summarysize \u001b[39m5.365 MiB."
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model = train_editflow!(P, model; epochs=100, steps_per_epoch=150, batch_size=256, lr=1f-4)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "MersenneTwister(42)"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "rng=Random.MersenneTwister(42)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "10-element Vector{String}:\n",
+ " \"QVQLVQSGAEVKKPGA\"\n",
+ " \"QVLVQSGAGAVKKPRASVKV\"\n",
+ " \"QVQLVKSGEVLVKP\"\n",
+ " \"EVQLMESGGPYKP\"\n",
+ " \"QVQLVQSGAEVKKPGASVKV\"\n",
+ " \"QVQLVQSGTMVKKPGASS\"\n",
+ " \"QVQVVESGGGAVVQPGA\"\n",
+ " \"QVQLVQSGAEVEKPGA\"\n",
+ " \"QVQLVESGGGVVSQLGS\"\n",
+ " \"QVQWYQYGGPVKK\""
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model_samples_any = sample_gen_n(P, model; n=10, ts=0f0:0.01f0:1f0, rng)\n",
+ "model_final_states = map(final_state_from_gen, model_samples_any)\n",
+ "model_strings = [state_to_PM_string(P, s) for s in model_final_states]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "10-element Vector{String}:\n",
+ " \"EVQLVESGGGLVKPPGQLR\"\n",
+ " \"QVQLVQSKAEVKKPGASVKV\"\n",
+ " \"EGQRVECGGGGGQRG\"\n",
+ " \"EVQLVESGGGLV\"\n",
+ " \"QVQLVQSGAAV\"\n",
+ " \"EVQLVESGGGLVQPGGSLTL\"\n",
+ " \"QVQLVESGGGVVQPCR\"\n",
+ " \"QVLLVQSGTEVFKPGASMKV\"\n",
+ " \"QVLLVQSTAEVKK\"\n",
+ " \"QITLKESDPTLVKP\""
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "train_states = [FF.DiscreteState(P.k, sample(PM; rng=rng)) for i in 1:10]\n",
+ "train_strings = [state_to_PM_string(P, s) for s in train_states]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1000-element Vector{String}:\n",
+ " \"QVQLVESGGRV\"\n",
+ " \"QLQLGQSGAVV\"\n",
+ " \"QVQLVESGGGVVQPGR\"\n",
+ " \"QITLKESYPTLVKPS\"\n",
+ " \"EVQLVLESGGLVQPGASLRL\"\n",
+ " \"EEQLVESGGGVVQPG\"\n",
+ " \"QVQLVQSGAEVK\"\n",
+ " \"QVQLVQSGGEPGCVGTLRL\"\n",
+ " \"QQLTQKYGTAVKEGASVHV\"\n",
+ " \"QVQLVLSSAEVKKP\"\n",
+ " \"QLQLQESGPGLVKP\"\n",
+ " \"EVQLVESGGYLVQPGG\"\n",
+ " \"QLQLQESGPGVVQPGRSL\"\n",
+ " ⋮\n",
+ " \"QVQLVQSGAEVK\"\n",
+ " \"QVQLVESGGGVKPGR\"\n",
+ " \"EVHLVESGGGLVKPGGS\"\n",
+ " \"Q\"\n",
+ " \"QVQLVQYGANVVKPGASVKV\"\n",
+ " \"QVLLVQSGAEVKKYGTSVHV\"\n",
+ " \"QVQLVESSQGHVMQQPGR\"\n",
+ " \"QVQRAQAGAESKKPGTSVKV\"\n",
+ " \"QIQIKETGPGLFK\"\n",
+ " \"QVQLQKSGPGLVK\"\n",
+ " \"QVVQLQQPGAYVK\"\n",
+ " \"MVLVQSGAEV\""
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Sampling and Levenshtein plot\n",
+ "using Random\n",
+ "n = 1000\n",
+ "# 100 model samples (final states → strings)\n",
+ "model_samples_any = sample_gen_n(P, model; n=n, ts=0f0:0.01f0:1f0, rng=rng)\n",
+ "model_final_states = map(final_state_from_gen, model_samples_any)\n",
+ "model_strings = [state_to_PM_string(P, s) for s in model_final_states]\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:00\u001b[39m\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "",
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ],
+ "text/html": [
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "m = 100\n",
+ "p=100000\n",
+ "# Build training strings from PM parents (the data PM was built on)\n",
+ "train_states = [FF.DiscreteState(P.k, sample(PM; rng=rng)) for i in 1:m]\n",
+ "train_strings = [state_to_PM_string(P, s) for s in train_states]\n",
+ "\n",
+ "# Independent validation strings sampled from PM\n",
+ "val_states = [FF.DiscreteState(P.k, sample(PM; rng=rng)) for i in 1:p]\n",
+ "val_strings = [state_to_PM_string(P, s) for s in val_states]\n",
+ "#val_strings = [s for s in val_strings if !(s in train_strings)]\n",
+ "\n",
+ "# Plot normalized Levenshtein vs. training set (distinct val/train)\n",
+ "plt = plot_lev_dist(\"model_vs_true\", val_strings, model_strings, train_strings; title=\"Model vs True (normalized Levenshtein)\")\n",
+ "display(plt)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ],
+ "text/html": [
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "model_strings_bos = [\">\" * state_to_PM_string(P, s) for s in model_final_states]\n",
+ "plt = plot_lev_dist(\"model_vs_true\", val_strings, model_strings_bos, train_strings; title=\"Model vs True (normalized Levenshtein)\")\n",
+ "display(plt)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ],
+ "text/html": [
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlgAAAGQCAIAAAD9V4nPAAAABmJLR0QA/wD/AP+gvaeTAAAgAElEQVR4nOzdd2AT5f8H8CejTdOZDronUMpqWZYylC1aERFEEJClyFREEHEiCig/UZYMWbJUlggyVVD2hkJLWwp00ZnuljY7ufv98Wi+MbnWtpC05d6vv5q75+4+uSZ55+6e5yJgWZYAAADwlbChCwAAAGhI4oYuABqj33///dKlS/n5+SzLfvrpp/7+/g1dETRS33zzTXJy8v/93/95eXk1dC0kOzs7Pz/fYDB4enq2bNlSKMQXfW4XL17cunXrCy+8MGjQoIaupVHAC6WJ2bdvn0AgEAgEP/zww382nj17tkAgEAqFaWlptd/EnDlznn322QULFuzatWvv3r0PHjx4iHrhfzZu3Ej/d5MmTbKc6+HhIRAIKisr67TO7OzsDRs2nD179hHVWGfHjh37/vvv61r2o1VUVDRv3rzAwMDg4OCYmJgePXpERETIZLJXXnnlzJkzDVjYI3Hw4MENGzYoFIpHuM47d+5s2LAhLi7uEa6zSUMQNjFbtmwx+6M6Wq2WhiXLstu3b6/l+svLy1euXBkcHJyXl1deXl5aWtq6deuHKRgsbd269fbt249kVUlJSVOmTPnpp58eydqaomvXrnXo0OGrr75Sq9Vjx45duHDhkiVL3n777fDw8N27d/fu3fv3339v6Bofytdffz1lypSysrKGLuRxhlOjTUleXt7vv//eunVriURy8uTJjIyMsLCw6hofPny4qKho6NChR44c2bp166efflqbM0WpqakGg6FXr15+fn6PtHb4m7u7e1lZ2fz58/fu3dvQtTR5OTk5gwYNKiwsnDp16tKlS52dnU3nXr169Z133lGpVA1VHjQVCMKmZPv27Xq9fty4cRKJZM6cOdu2bVuwYEF1jekh4zvvvCMQCH755Ze//vprwIABNazcYDDcvHnzypUrhJCqqqrr168TQtzd3Zs3b15aWpqRkeHr6xsQEJCenn7y5MmysrIRI0YEBwfTZTMyMs6cOVNQUODt7d2nT5/Q0FDL9ZeUlBw7dqygoCAkJCQ2NtbJySkuLs7Ozi4yMpI2kMvlubm5ISEhZlebkpOTVSpVp06dzII8Jyfn5MmTcrnc3d29d+/e4eHhpnMLCgpycnKCgoK8vb2Tk5NPnz6t0Wg6dOjQp08fgUBgWV5GRsbZs2cLCwu9vLxatWrVrVs3oVCo1Wpv3bolkUjat29v1l6j0SQmJjo4OLRr166GvWpp+PDhZ86c2bdv36VLl7p161Zz48LCwsuXL2dlZWm12rCwsH79+rm6uprWnJqaSggpKiqi/y9CCH3K5eXlaWlpPj4+gYGBpitMT08vKytr06aNo6MjIYRhmBs3bkil0rZt2z548ODYsWM5OTnR0dG9evWic69fv56SklJQUODm5hYdHd2xY8c6PVlCSFpaWnl5ecuWLd3c3DhnhYeHG59UfHz8rVu35HK5TCYLCAh48sknXVxcalj5Rx99VFhYOHTo0HXr1lnOjY6OPnXqVHl5udl0nU539uzZ5ORkvV4fHh7ev39/BwcH0wY3b94UCAQdOnRQq9VHjhzJzMxs1qzZwIEDfX19LbeiVqtPnjx57949lmXbtm3bp08fOzs741zOPfzEE0/07t2b/LOH79y5I5fLLfcwfY1VVVURQm7dulVQUEAIEYlEpm30ev25c+cSExN1Ol3Lli0HDBgglUoti7xx48a5c+cEAkHXrl27du1awy7lKRaajtatWwuFwqysrIKCAjs7u5CQEIPBwNkyPz9fLBaHhoYyDHPgwAFCyOjRo2teOee1wOHDh7Msu3PnTkLIvHnz3n33XWOKHD58mGXZysrKMWPGmEaLSCR655139Hq96cr37dtn+jno6+t74cIFe3v7oKAgY5svv/ySELJ+/XqzwmjSqFQq4xS1Wj1lyhSRSGRcIb3wptFojG2WLVtGCFm+fPmMGTNMn9GAAQMUCoXp+svLy0eOHGmWjuHh4SzLMgwTHh4uEonu379vVhX9njF58mSWZXfv3j19+vSff/655j28YcMGQsi0adPomczevXubznV3dyeEPHjwwDhl2LBhZtnv5ub2008/GRuMHj3a8l+2cuVKlmXpP3327NlmNbz00kuEkBs3btCH9EO2Xbt2Bw8eNP6DpkyZwrLs2bNnmzVrZrbyp59+uri42HSF/fv3J4Skp6dX96y/+uorQsjHH39sNl2v1wcEBNjb2xcWFrIsW1FRMXDgQLPN2dnZZWdnV7fm8vJyGjm3bt2qro2lkydPmp1HCQoKOnv2rGkbV1dXT0/Pq1evmn6NcHR0/PXXX83Wtn//frNzJ61atUpISDA2oNf22rVrd+jQIZlMVqc9nJ6ebvn/dXFxMa783LlzZt///P39//zzT9MKtVrtmDFjTNsMHjyYfm/4/PPPa7/fHm8IwiaDdojo378/fUi7e504cYKzMQ2VTz/9lGVZnU7n4+Pj4OBQWlpaw/r1ev3x48f/7//+jxDy3HPPHT9+/Pjx4/Hx8ew/QRgcHOzp6fnVV1+dPHny6NGjd+/e1ev1/fr1I4QMHDjwt99+S0lJOXr0aJcuXQghH330kXHN9MhPKpWuWrUqMzPz5s2b48eP9/HxEYlE9QhChmGGDh1KCHnyyScPHTqUkpLyxx9/PPXUU4SQadOmGZeiQRgWFubv77958+Zr164dOHAgIiLCuFsojUYTExNjXFtqaurly5fXrl3br18/2oB+jn/22WdmVfXo0YMQcu3aNZZladbOmjWrht3LmgQhwzCdO3c2+/dZBuGAAQM++eSTP/744/bt29evX1+6dKmbm5tYLI6Li6MNbt269cUXXxBCnn/++eP/yMrKYusYhJ6ens7Ozm+++eahQ4fOnj1LP0n3798fGxu7Y8eOy5cv37lz5/fff6dBNWTIENMV/mcQyuVysVgcGBho9t3o6NGjhJBhw4bRh7NmzaIPz58/n5WVdevWrb1797788ss1BOGRI0cIISEhIdU1sHT58mWJROLk5LR48eKrV6/Gx8d/9dVXUqnUxcUlLS3N2MzV1dXR0dHHx2fChAnHjx+/fPky/Qook8nKy8uNzQ4fPiwUCj08PFasWBEXF3f9+vVPPvlELBb7+vrSdGf/CUK6h2fMmGG6hw8cOGC2h5955hlCyAsvvECXVSqVx48fp6dMdu7cSf+/f/31F51LDzQdHR0XLFhw+fLlhISEZcuWOTk5OTo63r5921jkW2+9RQjp0qXLyZMns7KyDh8+3KJFi4CAAAShKQRhk/Haa68RQrZt20Yf7tmzhxAyZswYzsatW7cWCASpqan04cyZMwkha9eu/c+t/PHHH4SQN954w3QiDUJCyOnTp02nb926lRAyaNAghmGMEysrKwMDAx0cHIyfBTSzV69ebWzDMAw9T1uPIDx48CAhpEePHqYfrBqNpnXr1iKRyPhxRoPQ0dExMzPT2Cw+Pl4gELRr18445dtvvyWE9OrVy/Ro0lRxcbGDg4PZ53hycjIh5IknnqAP33rrLYFAYJk6ZoxByP4TA0888YRx11kGoaV9+/YRQiZNmmSccuzYMULI1KlTzVrWKQgJIe+++27NxbMsq9PpoqOjBQJBRkaGceJ/BiH7zwvA7Ehl5MiRhBDjMVZUVJRIJFIqlf9ZhtHq1avpl7DaL9K5c2eBQPDbb7+ZTqQv49dee804hZ6qffPNN02bDRs2jBBiPO7XarVBQUF2dnb0y5ARfRl/+OGH9KGxt2ct93DXrl0FAoHp/qRf8iy/EPTs2ZMQsn//ftOJu3fvJoSMGjWKPrx//75QKHR1dTU9jr979y49m4IgNEKv0aZBoVDs3bvXycmJvhsJIUOGDPH09Ny3b59ld7Lz58+npKT07NmzRYsWdMq4ceNILTqa1qx79+700pHRjh07CCEfffSR6XlFZ2fncePG0QsnhBClUvn777/LZDIa5JRAIHj33XfrVwbd6Icffmh6atTe3n7SpEkGg8Gsi+CIESNCQkKMD6Oionx9fWk00in0LOWCBQvs7e05N+fp6fnyyy/n5OT89ttvxon0zNKUKVPow1WrVjEM880339T+WcTGxvbt2/fatWu//PJL7ZcaNGiQWCym13EfIYFA8N577/1nM7FYPGjQIJZlr169Wqf1jx8/nhCybds245SKioqDBw96e3vHxsbSKe7u7gaDoU5rrqioIIRYXkRcvHjx0yY+/vhjOj0pKSkuLi46OpoeeBmNGzfO2dmZfqUwNXfuXNOHTz/9NCEkIyODPjx9+nR2dnZsbCw9BWI0ffp0gUBgtrZHvofT09PPnz8fGRn54osvmk4fMWKEl5cXTXpCyMGDBxmGmTBhgqenp7FNeHj4Cy+88J/F8Ao6yzQNe/bsqaysHD9+vLFfnL29/fDhw9evX7979+6pU6eaNqaBR8OP6tKlS2Rk5NWrVxMSEqKioupXQ9u2bc2m3LhxgxCyd+/eX3/91XT6zZs3CSGZmZmEEHoGtVWrVmbX8OvR7cJ0o0eOHDEbPHfnzh3jRo1atWpltriPj09+fn5lZSX91k9L7dSpUw1bnDZt2o4dOzZs2ECPbFQq1Q8//ODi4kKPaeptyZIl3bp1+/DDD4cMGSIWc7wTy8vLv/7666NHj+bl5dGOElRJScnDbNdSs2bNLC9WEUJOnDjx7bffJicn5+TkqNVq4/Ti4uI6rX/IkCFeXl779u1bvXo1za1du3apVKpp06YZ+5W89tprp0+f7tu3b69evQYMGNC/f/+uXbvW3M/ZycmJEGLZKTQ9PZ12HdLpdFVVVRKJhE6nw+bUavX7779vtoiDg0N+fr5GozE2dnR0DAoKMm3j4+NDCJHL5fQhfR0WFRVZrk0ikZi9Dh/5HqbPxWAwWG5dLBYXFxdXVFTIZDJ66qJDhw5mbTp27Lh///6aN8ErCMKmgWabVqul1/Ao+hGwZcsW0yBUKBT0rGlGRoZpY3rmbevWrfScYT1Y3jqkvLxcIBDQM0tm3N3d6XdSevLN29vbrEGzZs04e2/+J9oJcNeuXZwbNZtC+0aaop+ttDa9Xq9SqaRSqbEXA6fu3bt37tz5yJEjWVlZwcHBe/bsKSsrmzZtWs0dGv9T165dBw8efPDgwe3bt5seLlMPHjzo1q3bnTt3oqKiRo0a5eHhQY9Z58+fr9frH2a7ljhvCrNp06bJkyc7OTnFxsaOGDGCfm84d+7c4cOH61qAvb39yJEj16xZs2/fvgkTJpB/jg5Nv6vRw7JvvvnmzJkzp06d+vjjj/38/L744gvanhPtyWI8RDPavHnz5s2bCSH79+83nkEh/7xy7t69m52dbbk2d3d3lUplDEKpVGr2+jR95RjXFh8fn5KSYrYqqVRqXA/FuYc3b978xhtvmO3h8+fPHzp06D/3MN16eno6Pd9u+VxorNJ3n2UGW74feQ5B2ATcu3fv3LlzhJCdO3caL9cZXblyxfQ4b+/evfQ2H/RahZnt27cvWbKkutOANbPMLRcXlwcPHmRkZFj2jDeih7CmBzQUvYJoOoV+0DAMY9bS7J4a9IJHXFwc5yCNOhGLxU5OTgqFoqyszDJETU2ePHnq1Klbt26dP38+/eh54403HnLrhJDFixcfPnx4/vz5o0aNMpu1cePGO3fuTJ48ef369caJVVVVH3zwQW3WTHemwWAwm268KGjK8j+r1+s/+OADqVR69epV0zsqlJeXHz58uDYFmBk/fvyaNWu2bds2YcKEe/fuXbp0qXPnzmZHKsOGDRs2bFhxcfHp06cPHz78008/TZw40c3NjfaNstSzZ0+BQHDnzp2cnByzUSKc6BeXUaNGff/99/V4Cpxre/vtt2l/pZpx7uH333/fcg9/+OGHhw4dquXWX3zxRcsPBMtmhYWFZtMt3488h2uETcDWrVtZlh0/fvxxC9OnTyf/vvpCjx2XLl1q2bhXr14lJSW0r90j0alTJ5ZlL168WEObiIgIOzu7u3fvKpVK0+nGcW9GdJCW2Vu0qqoqKyvLbKOEkAsXLjxM5WZrsyzGzJgxY9zc3DZt2pSYmHjhwoWYmJiaz6bWUvv27ceMGZObm2s5DC4+Pp4Q8sorr5hOpP1FTafQU4uWBxB0Z5p9ArIsa3n4wik3N7e4uLhTp05m9xWq9025oqOjIyMjT58+nZ6ebnw9c7b08vJ66aWXtmzZQr8B/Pzzz9Wt09fX95lnnmEYZunSpbWpgf7Lan651t5Drq32e5jzX0y3funSJbbGnw+iHc3oWdyat8J3DdFDB+rAYDDQaxV0JIOZzMxMgUDg7e2t1WpZlk1LSxMIBB4eHpx9IGk3k8GDB9ewuRp6jb7//vtmjWkAx8TEmA7yo5RKpbGb5ZAhQwghK1asMM5lGKZv377k371G6TW/bt26ma7H2NPBuAn6fZkOTzbbqEajMT5xegbYdKMUHbdg7ARPP227d++uVqtr2C3sP2Mk6JH35s2bTWedOnVq5cqVZ86cqXkNpr1GjdLT0+3t7b28vOhZXOOTeuedd8g/34Eog8FAd5qPj49xIr1V2zPPPGO2raKiIoFA4Ovra/qvMd6f1nIcodnilZWVAoEgNDRUp9MZJ545c4Ye2axatco4sTa9Rik6EOXTTz8NCgoyDh803aJZe9o76aWXXqphnXFxcRKJRCgUcvaIph2RaPcTlmWNo1a2bNli2di0ADqO0KyBWUdcrVYbEhIiEAiOHj1aw9qM4wgtGwiFQrM9fPbsWbqH6WBQ6tVXXyWEmI10ZP/pNcr5xI1bz87OFovFzs7Opns7OTkZvUbNIAgbO3oA17Zt2+oa0AFttBf1hx9+SLg601NVVVXOzs5isTgvL6+6tdUpCA0GA+0/EhUVtWnTpsuXL1+5cmXPnj1vvvmmTCYzDltMSEiQSCQSiWTJkiVJSUkXL14cMWJEQECA2ThCnU5Hz3aOGzfuzz//PHLkyOuvvy6TyegVDtMPdDpAuFWrVuvWrbtw4cL169d//vnn2bNnN2vWzDhipJZBqNPp6D0+nnjiib179yYmJp4+fXrFihU9e/Y0WzA5OZl+SLm5uVVVVZnOqus4QrPpdKQXZQxCOkrE29t7+/btKSkpf/3116BBgwICAiQSiWkQqtVqT09PsVg8Y8aMb7/9dv369cbR5XSAyoABA44ePXrixAl6Io6OJf/PIGRZlt5/ZOTIkRcvXkxOTl69erWHh0fz5s3rHYR5eXlisZjm/dChQ83m+vv7T5gwYc+ePdeuXUtKStq9e3fLli0JIbt37655tTt37qQX5GJiYr7++utDhw799ttv27dvnzVrFn3lmN5KIi4uzsnJSSgUTp069eDBgwkJCSdOnFi3bl2fPn3GjRtnbFabIGRZ9sSJE3Z2dvb29nPmzDl69GhCQsLvv/++cuXKrl27zps3j7apLghZlqUDWEeMGGG5h02DcOXKlYSQ9u3bf/nll+vXr//+++/p9MTERFdXV3oriQMHDiQkJPz555/r16/v37//yJEjjYvPmTOHEBIZGXn48OG7d+/u3r07ODiY3hMKQWiEIGzshg8fTgj54osvqmuwZs0aQsgLL7xgPHas4dCEXov66quvqmtQpyBkWVatVs+ZM8fsDlUikahv376mN3A5cuSIaQfugICAa9eumd1ZhmXZy5cvm17G9/HxOXPmjOWdZfR6/fz582mnQSOhUNizZ0/jN99aBiHLslVVVRMmTDDroNixY0fLJ0tHdM2YMcNs+kMGYWFhobHfjelh7pw5c0yvLTVv3vzmzZvOzs6mQciy7O+//27aOdb4GZqZmUlvIEA5OTnt3LmzujvLWFZ7+/Zt0zuwCASCWbNm0b1avyBk/xlQSEyGDxpFR0eTf3Nyclq6dGltVnv9+vWBAwdaXoeLiIhYtWoVPVlidOPGDctteXt7f/vtt8Y2tQxClmVPnz5teY+9wMBA4w2AaghCyz389ttvL1++nPw7CNVq9cSJE42drk3vLHPr1q3u3bubbd3Ly+ubb74xttHpdGbXs19++WV6IgRBaCRg8Qv1jVtWVpZer/fz8+O8hSAhRKPR5ObmikSigIAAejktLCysug6ZFRUVJSUlTk5OtC+4JZVKlZ+f7+LiYtrTrKqqqrCwUCaTeXh4cC5VVlZ24cKF7OxsR0dHPz+/zp07m8Ye9eDBgxMnThQVFQUFBfXr18/BwYEe3JhdAqysrDx+/HhRUZGvr+/AgQOlUmlOTg6906bZk6qsrDx//vz9+/clEomfn1/Hjh1NnxR9pp6enmYdeXJzczUaTWhoqFny5eXlnTt3rqysTCaTtWnThnOQSffu3S9duhQfH282t6ioqLy83N3dvebf5KusrCwqKnJ1dbVslp+fT/sAmxWWlpZ2/fr1ioqKFi1a9OrVSywW05PhpoMjKbVaLZfLGYYxfcparfavv/66f/++TCYbOHCgu7t7QUGBQqEIDAykHaZYls3IyLC3t+fsbKLRaM6fP5+enu7o6Pjkk08GBwfTverl5WW8O2hubq5SqQwLC+McAWKmrKyMDnsNCQkxHQZKyeXyuLg4epE4ODg4Ojra9Maq/6moqOjSpUuFhYU6nc7b2zsqKooeU3KiYwoVCkWzZs1CQkI6depkWg/nTlYqlfSOoGavbYZh6C1SlUqlr69vWFhYVFSU8bVa8x7WarXnzp2z3MOWr1tCCP3f0ROqptPpjYeqqqq8vLyCg4O7dOliuW9TUlIuXrwoEAg6derUoUMH+lJ0d3evuY8YfyAIocFwBmGjdeXKlZiYmN69e586daqhawGARwnDJwBqotPpsrOzS0tL6WBNy/HLANDUIQgBapKdnW28U92kSZOeffbZhq0HAB45BCE0mMWLF1ve+aWx8fDwWLJkiYODQ8eOHWn/UgB4zOAaIQAA8BruLAMAALyGIAQAAF5DEAIAAK8hCAEAgNcQhAAAwGsIQgAA4LWmOo5QpVJ9++237733XkMXUh8Mw5jd6LIBNapi6GCe+v1yvTWwLNt4imlU/6lGVUxje9nYeOdoNJqq0iJSzTi4motxdPes7ibG1tCo3lCmmuo4wuzs7J49ezaV21SaqaysNP7aQINrVMWo1WqRSER/ibTBsSyrVCrNfuOiATWq/1SjKkar1RJC6G3EGwMb75xr165d2bOlXZAv51ydTlfdG+peTn7L2BF9+vSxYnH/plAopFJp4/kKZdRUjwgBAIAQotPpJEJBiKf5r1UY51YXhPmFRfQ7BCAIAQCasNTU1Bv37juIHTjnGhiDSGj+q0xUQmqWv9ftgQMHWrO6pgFBCADQtAmlrs7NOX5BkxBiMBgsf57w76XyS5vopbFHrtGdqwUAALAlBCEAAPAaghAAAHgNQQgAALyGIAQAAF5DEAIAAK/xa/jEd999t2nTpoauonHdnurhi7G3tz969KhMJntUJQEA2BK/gvDWrVtPP/308OHDG7qQx8rgwYPLy8sRhADQRPErCAkhQUFBXbp0aegqHisSiaShSwAAqL/GcoIOAACgQSAIAQCA1xCEAADAawhCAADgNQQhAADwGt+D8LnBQwS2cvLkyYZ+un/Dr3ECABjxbviEmfIHleO/OxH6RB9rb+jAe8MePHhgOuXcuXOrVq2aMmVK//796ZTx48cvWLAgLCyMcw1yuTwpKcnY+GG4uLhkZWX5+Pg8/KoAAJo6vh8RNqCsrKz9+/fPmjXLYDDQKQcOHCgvL6+u/Y0bNz766CNbVQcAwBcIwobUqVMne3v7H3/80Wx6Zmbm4sWLJ0yY8O677yYlJRFCVCrVli1bsrOz33//fRqHmzdvTkhIoO3j4uK2bdtGCCkuLl64cOGtW7fefPPNdevWKZXK9evXT5kyZdq0aQcOHLDtkwMAaBoQhA1JKBQuWrRo/vz5Go3GdPqNGzfc3NxGjRoVGhrap0+f+/fvi0Si8PBwJyenLl26dO7cmRCye/fulJQU2j4xMXHfvn2EkNLS0i+//HLOnDlPPvlk27ZtCwsLs7OzBw8e3K9fv48++mjr1q02f4oAAI0d368RNrjY2NjmzZuvX79+5syZxolDhw4lhFRUVISHh58/f/7AgQNvv/32k08++eeff7788ss1r1ClUm3cuDEkJIQ+XLRokVqtLigomDVr1s6dOydMmGC1pwIA0CQhCBvekiVLnn/++YkTJxqnnDp1avLkyW5ubjKZLDU11ZhqteHm5mZsX1RUNHLkyJycnJCQkPLycrVa/YhLBwBo+nBqtOF17dq1W7duK1asME55++23ly1bdvXq1ePHjw8YMIBlWUKIQCAwXcrOzk6n09G/KysrTacb/16+fHlERMTdu3ePHz/+8ccf0/UAAIApBGGjsHjx4uXLlxuvFCoUCnt7e0JIVlbWL7/8Qid6enrK5XJj+LVs2fLMmTOEEKVSadndxmw9Go1m7dq11n4WAABNEYKwUYiMjHz++eeNQbhgwYJRo0bFxMQMHTp0wIABdGKXLl26desWGhpKx//NmjXr+PHjbdq0iY6O7tChA+dqp0+ffuDAgS5dunTo0KFt27bG6XZ2dmbHlwAAvMX3a4RCgeD8poUJ+7+z9oaykq4JBK+bThk9evTo0aOND7dv3759+3b696uvvjps2LCSkpLAwEBjYgmFwl27dhnbh4WFpaam5ubm+vr6Gk+HtmrVqqioyNgmIiIiLS0tLy/Px8fH9FcDq6qqHvXzAwBoqvgehN8u/zo1NdUmmxrRr1+/2rd2dHR0dHSsuY1QKAwKCqq5jVgsDg4Orv12AQD4hu9B2LZt29DQUNtsy9nZ2TYbAgCA2uN7EA57ccjZs2dFIpG1N6RQqo4cPfr0009be0NWpdPp1Gq1i4tLQxcCAPDI8D0IlVVVP33y9lNRbay9oTFfrlEqlZbTL126tGrVqsTEREdHx9atW7/22mu9evWydjFG169f12g0PXr0qGX7P//8c8GCBZcuXbJqVWr8xMAAACAASURBVAAAtoReow1pz549AwcO7NSp07Zt27777rsePXpMmjTJlgUcOnRo9+7dttwiAEBjw/cjwgakVqvffPPNb7755o033qBTOnbsOHbsWGODX3/99fDhw4SQ0aNH9+3blxBy+vTp4uJipVJ55MiR0NDQDz74wM3NjRBiMBg2bdp0/vx5mUw2ffr01q1bE0J++umngICAixcvxsXFrV69+u7duz///HNeXl5oaOjbb78dEBBw9+7dEydOqNXq999/PzQ0dOrUqSzL7tix4+TJk1KpdPLkyR07dqSV7Nq16/Dhw/7+/vQ2pwAAjxMcETaYq1evFhcXmyYfIUQqldI/vvnmm8WLFw8dOnTQoEGvv/76iRMnCCGXLl2aPn16cnLy66+/fvv27TfffJM2Hjt27PHjxydOnBgVFdW3b9/s7GxCyMGDB1999VWRSDRhwgSJRJKYmNizZ8+ZM2e6uLj07t1bo9F4eno2b948KChowIABTzzxBCFk5syZP/3006uvvtqjR4/nnnsuOTmZELJ58+b58+ePHj06KioKvwMFAI8fHBE2GLlc7uXl5eDgQB9+9NFHxcXF9I+AgIAFCxbcvHmzRYsWhJCqqqq1a9fSkfURERFffvklIcTX1/fZZ58lhCQnJ584cSInJ8fe3r5v377Jyclbt2795JNPCCHPP//83Llz6fqnTp2q1WrlcvmYMWN2794dFxfXvXv35s2bl5WV0TXn5+dv2bJFLpfT3q3379/fsGHDihUrli9fvmzZsueee44Qkp6efvToUZvvKgAAK0IQNhhXV9fy8nKDwUD7rLZv376ysvKtt96aMmWKwWBQKpUjR46kLdVqtYeHB/07PDyc/uHp6VlWVkYISU5OVqlUxg4vFRUVNCAJIaZ3k1m5cuWyZcsiIiJkMllRUVF+fr5ZPSkpKQaDoU+fPvRhZWUlPUxMS0uLjIykE6OiohCEAPCYQRA2mOjoaKFQ+Ndff9ExFaNGjSKEvPPOO4QQGnt//fWXq6ur2VJCofnZbE9PT39//2vXrlluQiz++//74MGDefPmZWdnN2vWjBDSvn174428jXfi9vT0dHZ2vnr1qtnd12QyWVlZGf1Fi9LS0od81gAAjQ2uETYYDw+P2bNnT5s2zTgaIS8vz2AwEELc3NwGDhw4f/58+lClUtHLdZxiYmI0Gs2WLVvow+Li4szMTLM2Op2OZVl6w+5Dhw4Z19asWbPMzEyahW3btvX19V22bBmdVVFRQe+5M2jQoG+//ZZlWfpLh4/s+QMANA4Iwoa0ePHiqVOnDh8+XCaTBQYG9urV69NPP23Tpg0hZMuWLdnZ2UFBQe3btw8PD6c/NOHg4GC875pQKJTJZIQQR0fHgwcPbty4MSwsLCIiIiYmJiMjgxDi5ORkvL+op6fnvHnz2rdv365duy1btgwcOJD+KsXIkSPVanVYWNgrr7wiFot/+eWXI0eOBAcHt2nTpkOHDklJSYSQL774Ii0tLTQ0tGPHjj179sRoegB4zAia6G/UZWdn9+zZMysrq05LzZgxo127dtOnTzdO6de7l+5BWTN32aMu0NylxJT1m78fPHgw59zS0lJHR0djxxkjvV5fWVnp7u5em02o1WqdTldDUKnVaq1Wa3m61YxWq1Wr1WbNKisrHRwcTH/s0CgsLOzkyZOP5E51arVaJBJxbsX2WJZVKpVOTk4NXcjfKisrG8+3kEZVjFarJYTQ73aNgY13zo4dO67+/lv/fs9wzjX2QrB06fwpn8ioWbNmWbO6f1EoFFKp1PL6ToPj+zXClavXpKSk2GBDY8Xi/v37VzfX2BfGjFgsrmUKEkIcHBwso7RODSh7e3vLz5TG86kHAPBo8T0IIyMjjV0iAQCAhxrdISoAAIAtIQgBAIDX+H5qFGyMZdndP+5QK6o45+p1eqFQIKzm2r6Xr//zQ160ZnUAwEcIQrAphmGSr158qWNLzrkGoUEgFAqFAstZSrXmfFwughAAHjkEIdiaQCAI9fXmnKXX64VCIWfv6gcKJZErrFwaAPARv4JQKpXOnTv3448/buhCHit0iGFDVwEAUE/8CsIvv/yyMfyQUFVVFf2Fh8bg4YsRiUT/OUjfiGXZ1LSMy3GOnHMZhhEIBGY3O6UUak1qJu50CgCPHr+C0M7Orvbj061HLBY3nvHpNi6GYRh5aUWZU1B1c6sLwkq2MrfwnpWrAwA+4lcQQqMgEDi4cn8dqSEIdU3yVoAA0ARgHCEAAPAaghAAAHgNQQgAALyGIAQAAF5DEAIAAK8hCAEAgNesNXxCrVa/++67hw8f9vT0nD9//pAhQ8wapKenb9myJT4+3tXV9YcffjBOf/31142/O9+hQ4evv/7aShUCAAAQ6wXhokWLEhISzp8/f+vWrREjRsTHx4eFhZk2yM7OViqVLVu2/OWXX0ynX7x48d133+3QoQPBr6IDwD+WL/miND+Hc5aBYQghIq5b1BJC3H0DZn/Q8PeTgsbMKkHIsuzGjRt37doVEBAQEBAwaNCgLVu2fP7556Ztevfu3bt3799++80sCAkhERERXbp0sUZhANBEKUqL3uofLbHj+MgyGAyEEBHXr3fp9IY1p25YvTho4qwShEVFRYWFhZ07d6YPO3XqdOXKldovPnPmTIlEEhUV9cknnwQEBFijQgBoWhiGMej1eo6bDv0dhCzLcfMhvcHAMAZr1wZNnVWCsKSkRCAQGE9symSywsLCWi77/vvvt2rVimXZ1atX9+nTJz4+3tGR4wbNKpUqLy/P9Mahs2fPfueddx6+eBtQKBScdxFrEDYuRqvVMgxDP7ks1XCLNYOBYQyGqiruX/S1BpZlVSoV58drg+Dzy4YQciUuPkBXKeY67KP/I+6XDcNcuXnPli8bYvOdo1ara3hPVTedEMKwjEajseXOUSqVBoOB83fWrMfBwUEs/o+ks0oQuru7syxbVVVFf5SgoqLC09OzlsuOGzeO/hEdHR0UFHT27NlnnnnGsplUKvXz80tISDBOcXZ2trOze+jabYFl2cbz6xM2Lkar1QqFQs6zWIQQwT8sZ4lEQqFIZMtSWZYVCoVOTk4222LN+PyyIYRoGeLaupu9hOMHv1iGIYQIuD5e9TqtLu6ejUu18c5xcHCo4T1FqjlpTAgRCoQSicSWpQoEAqlUauMgrA2rBGGzZs1cXV1v374dExNDCElJSWnRokVdVyIWi52dnVUqVXUNBAJBY/gpCQAAaNKskswikejVV19dunSpXq+/c+fOL7/8Qo/z5HL59OnTabZpNJr09HS5XK7X69PT03NzcwkhBQUFFy9e1Ov1Op1u+fLlRUVF3bt3t0aFAAAAlLUOURctWqRSqTw9PXv06PH5559HRUURQqqqqg4fPqzT6QghaWlpTz/99MKFCyUSydNPP/3WW28RQh48eDBx4kRHR0eZTLZz586DBw/6+PhYqUIAAABivXGE7u7uR44c0el0ptftWrZsaRws37Zt27S0NLOlwsPDU1JSGIZhWbaGU94AAACPinV/mLd+vVca4aVUAAB4XCFyAACA1xCEAADAawhCAADgNQQhAADwGoIQAAB4DUEIAAC8hiAEAABeQxACAACvIQgBAIDXEIQAAMBrCEIAAOA1BCEAAPAaghAAAHgNQQgAALyGIAQAAF5DEAIAAK8hCAEAgNcQhAAAwGsIQgAA4DUEIQAA8BqCEAAAeA1BCAAAvIYgBAAAXkMQAgAAryEIAQCA1xCEAADAawhCAADgNQQhAADwGoIQAAB4DUEIAAC8hiAEAABeQxACAACvIQgBAIDXEIQAAMBrCEIAAOA1BCEAAPAaghAAAHgNQQgAALyGIAQAAF5DEAIAAK8hCAEAgNcQhAAAwGsIQgAA4DUEIQAA8BqCEAAAeA1BCAAAvIYgBAAAXkMQAgAAryEIAQCA1xCEAADAawhCAADgNXEN8woKCvLz8w0Gg+nELl26WLkkAAAA2+EOwsTExClTply4cMFyFsuyVi4JAADAdriDcNSoUcXFxatWrWrVqpVIJLJxTQAAADbDEYQVFRVJSUn79+8fMmSI7QsCAACwJY7OMgKBgBDi7+9v82IAAABsjSMIXV1dBw0adOjQIdtXAwAAYGPc1whnzZr12muvlZaWxsbG+vr6ms5Cr1EAAHiccAfhmDFjCgoK1qxZs2bNGrNZ6DUKAACPE+4g3LNnj1artXEpAAAAtscdhL169bJxHQAAAA2ipjvLlJaWJiUl5eTk+Pn5tWnTxsfHx2ZlAQAA2AZ3EBoMhrlz565Zs8Z4glQkEo0dO3bt2rVSqdSG5QEAAFgXdxDOnz9/xYoVr7766ssvv+zv719UVHTo0KGNGzcSQrZs2WLbCgEAAKyIIwj1ev2aNWs+/PDDRYsWGSc+++yz7dq1mzlz5jfffOPh4WHDCgEAAKyIY0B9YWFhRUXFiBEjzKaPHDnSYDCkp6fbpDAAAABb4AhCFxcXoVCYmZlpNp1OcXNzs35VAAAANsIdhL17937rrbeuXLlinJicnPz666+3bt26ZcuWNiwPAADAurg7y6xbt65v374xMTEhISG0s0xaWpqbm9uxY8foLbkBAAAeDxxHhISQiIiIxMTEJUuWREZG6vX68PDwTz/9NDk5uVu3bjauDwAAwKqqHVDv4eExb968efPm2bIaAAAAG+M+IgQAAOCJ/x0RHjlyZNGiRdOmTRs3btygQYNKS0s5F7h48aKtagPgl8TExB8Pnahurlartbe355wlFgnHDY0NDw+3WmkAj7P/BaFUKvXx8XFyciKENGvWzM7OruGqAuCj0tLSHOeWAe25r8SzOh1bzbvy/s3TZWVl1iwN4HH2vyDs169fv3796N9bt25tmHIA+E1s7+Dg6s45S1jDEaG9xJpFATzmuK8R7tixo6CgwGxiQUHBhg0brF8SAACA7XAH4dy5c9PS0swmpqenT5kypZbrLSoqmjhxYpcuXUaOHHn//n3LBnFxcYsWLRo9evSaNWtMp9+9e3fYsGFdunSZOnVqeXl5LTcHAABQP3XoNapQKOgVxNp49dVXCSE//PBDcHDwkCFDWJY1a3Dq1Cm5XF5UVHT16lXjRIPB8Nxzz0VGRu7YsaOsrOyNN96ofXkAAAD18K9xhLdu3aKdQlUq1a+//pqYmGicpdfrf/rpp4iIiNqs9N69e6dPny4sLHR1dV2yZMm2bdvOnj1r9qv3s2fPJoR8+OGHeXl5xonHjh3T6/WfffYZIWTVqlVBQUF5eXn+/v4P8QQBmgyFQlGel+uYnsw5V6fTVdeFrTw/S6VysWZpAI+zfwXhiRMnaD4RQr766iuzpqGhoZs3b67NSm/duhUREeHq6koIEYlEnTt3TkhIMAvC6haMjo6mf/v4+AQGBiYlJSEIgSdSU1Mdbv4lKUvhnGvHMEIh9ykcUcbd9BbS3r17W7M6gMfWv4LwtddeGzJkCCEkJiZm3bp1nTt3Ns7y9vZ2dnau5UoLCwtlMpnxobu7e2FhYf0WtOyzQ6lUqvz8/LCwMOOUqVOnzpgxo5YVNiyFQtF4btlq42K0Wi3DMAaDgXMuwzACgYCzHoOBYQyGqqoqKxf4PyzLqlQqy7P61qPRaALdXbpHtuOcazAYRCIR56xTpXkajcaWO8f2r2H6suF85bAMQwgRcP2nDAYDa2BsuWeIzXeOWq2u4T1V3XRCCMMyNn7ZKJVKg8FQ3fc5K3FwcBCLq72HGvWv2W5ubm5ubhqNZtq0aS1atGjevHn9Nuzm5qZUKo0Pq6qqavnjTW5ubqaRWVlZaZqLpqRSqbe3959//mmcYhwE2fixLFv7bxXWZuNitFqtUCis7gNd8A/LWSKRUCgS2bJUlmWFQqEtX1QSiUQkFFW3cwgh1c0SCoUSicTGO8fGr2H6suHcA6xAQAgRcH28soxIIBLauFQb7xwHB4ca3lOkhpeNwNYvG4FAIJVKbRyEtcFRUElJycKFCzUaTb1XGhoamp6ertfr6cN79+6ZHrrVvODdu3fp3yqVKicnJzQ0tLrGIpGouYmmkoIAANCocASht7e3TCaTy+X1XmlMTIy7u/sPP/xACPnjjz+Ki4tjY2MJIVevXl27dm0NC7744ospKSkXLlwghGzatCkiIqJ9+/b1LgMAAOA/cQShWCz+4IMPPvvss1pe2ONYqVC4devWjz/+uEWLFmPHjt2yZYtUKiWExMfH79ixg7b57rvvPDw8vvvuu4MHD3p4eCxYsIAQIpPJNm3aNHjw4BYtWixbtmzTpk31fFoAAAC1w30J8d69e1lZWc2bN4+JifH09DSdtWfPntqs98knn7x//35BQYHpbUsnTZo0adIk+vfUqVOnTp1queDIkSOHDh1aXFzs6+vbCE8lAwDAY4Y7CO/fv0+v6lVUVFRUVNRv1SKRqH4jH+zt7TFkAgAAbIM7CP/44w8b1wEAANAgcO4RAAB4rdphhiUlJZs3b75582ZOTo6vr2/79u0nTZqEM5YAAPCY4T4ivHPnTlRU1Lx5886dO6fRaK5du7ZgwYJ27dpdvnzZxvUBAABYFXcQTp06VSgUnj9/Pisr6/Lly+np6fHx8YGBgRMmTLDlHacAAACsjSMIKysrz5w5s3r16h49ehgnRkZGbtmyJSUlJTU11YblAQAAWBd3EDIMY3lTNHrr0XqPpgAAAGiEuG+x5urqunfvXrPpe/fupbf3tElhAAAAtsDRa1QsFs+YMWPx4sXZ2dkjRozw8/MrKio6fPjwhg0bxo4d6+HhYfsqAQAArIR7+MTChQtVKtXatWu3bdtGp4hEorFjx65Zs8aGtQEAAFgddxCKRKLly5d/9NFHly9fLi0tdXNzi46O9vPzs3FxAAAA1lbT7/Z6eXkNGjTIZqUAAADYXrVBKJfL169ff/PmzdzcXHpnmcmTJ9fwM7kAADxRVlZWVlZW3VyFQlHD74SHhobid3UaG+4gvHLlSmxsbHl5efv27X18fHJzc3/77beVK1fu27fv2WeftXGJAACNyvEjh/LiLzs6OHDO1el0xt+eM1NQpZ4w+/2goCBrVgd1xh2EEydO9PX1vXDhQkREBJ2SnZ09ZsyY8ePHZ2dn29vb27BCAIDGhTEYurcMah0cwDlXq9VW9yH504UE3JyrEeI4Qi8oKEhOTl69erUxBQkhQUFBmzZtKiwsTEpKsmF5AAAA1sURhI6OjkKhsFmzZmbTvby8CCE1nPsGAABocjhOjbq4uAwaNGjdunVmowa/++67zp07t2rVyla1AQA0RuXl5cWVRfl23H1earhGWFpaotfrrVka1Af3NcKXXnpp9uzZ8fHxw4YN8/X1LSoqOnbs2Llz5xYuXGi89doTTzxheT9SAIDHXuKdeyUGVZlByjmXYQxCoYhz1q2M3P6FhbhRZWPDHYTz5s0rLS09f/78+fPnTafPnj3b+PfGjRsnTZpk3eoAABoflhCJWzOXIO7TYwaDQSTiDkLR9QRr1gX1xB2Ely9fNhgMNS9peRERAACgyeEOwpCQEBvXAQAA0CBqusXa3bt3b968mZOT4+fn1759+8jISJuVBQAAYBvcQahWqydOnLhr1y7Tic8888zOnTvd3d1tUhgAAIAtcHf/nTNnzs8//zx37tzr16/n5ubGx8cvXLjw3Llzr732mo3rAwAAsCqOI0KtVrt169Yvvvhi7ty5dIq/v39UVFRYWNjYsWMLCwu9vb1tWyQ8lKysrIXfbtYwAs65NYx5IoQM79/thedwd1kAeJxxBGFxcbFSqYyNjTWb/txzz7Ese//+fQRh06LRaFjv8Ob9RnHOreG+iPI7NxWqHGuWBgDQ8DhOjcpkMrFYnJiYaDadTsGoCQAAeJxw32v02Weffeutt3799VeGYejEkydPTpgwoXPnzvhJQgAAeJxwd5ZZt26dl5fXiy++6OTkFBYW5uTk1K9fP61Wu2PHDhvXBwAAYFXcwycCAwNv3LixZ8+eM2fOPHjwwNHRsXv37qNHj3ZxcbFxfQAAAFbFEYRlZWUjRoxYsGDBuHHjxo0bZ/uaAAAAbIbj1CjDMCdOnKjuprEAAACPE44g9PT0jIyMvHjxou2rAQAAsDHua4Tff//9yJEjJRLJCy+84O/vLxRy96kBAABo6riD8Pnnny8oKJgxY8aMGTPMZrEsa/2q4FFiWVajUqgflHHO1ep0TDV3ltEoHuiEOmuWBgDQ8LiD8OOPP1YoFDYuBawkOztbfnY/c9/8DgkUY2CEIu4j/qrSwsRWzckrw61ZHQBAA+MOwjfffNPGdYD1MAwT6OL4Ys9ozrk1/Jp2YkJcBfMfv88MANDUmQdhfHz8d999d/fuXW9v79jYWAyfAACAx9u/gjAhIaFHjx5KpdLZ2VmhUOzatev+/fuffPJJQxUHAABgbf+6OLR8+XI7O7sLFy5UVlYWFhb269dv6dKler2+oYoDAACwtn8F4e3btydMmNC9e3dCiJeX1+LFiysrK7OzsxuoNgAAAKv7VxAWFBQEBwcbH4aEhBBC5HK5rYsCAACwlX9dI2RZViD43++Y078xcBCgZizLZmVlGQz16WHr4uKC3/gEaFjmvUa/++67o0eP0r+1Wi0hZNasWW5ubsYGx48ft1lxAE1CZWXllpVL/Z0kdV1Qq9PZ+4dNnvmONaoCgFr6VxAGBwfn5uamp6cbpzRv3rykpKSkpMTmhQE0GSzLOopFI3t0rOuC+SVlpws11igJAGrvX0F45syZhqoDAACgQXDfWQYA6kSlVFVWVtZ1qaqqKrUa9+4BaGAIQoCHVVlZeT3pTpiLY10XLKl4EFeCm/oCNDAEIcDDYlmWiMRu4Z3ruqAqL0tfnGCNkgCg9vBDgwAAwGsIQgAA4DUEIQAA8BqCEAAAeA1BCAAAvIYgBAAAXkMQAgAAryEIAQCA1xCEAADAawhCAADgNQQhAADwGoIQAAB4DUEIAAC8hiAEAABeQxACAACvIQgBAIDXEIQAAMBrCEIAAOA1BCEAAPAaghAAAHgNQQgAALyGIAQAAF5DEAIAAK8hCAEAgNcQhAAAwGsIQgAA4DWxldbLsuyaNWuOHDkik8nee++9Tp06WbY5c+bMqlWr1Gr1yJEjx44dSycuWLAgPz+f/t2qVas5c+ZYqUIAAABivSD89ttv165du3r16sTExP79+6ekpHh7e5s2uHPnzvPPP79y5UofH5833njDwcHh5ZdfJoTs2bNn8ODBLVq0IIT4+flZqTwAsI2UO3duJN2tx4JCARnQq4enp+cjLwnAjLWCcOXKlcuXLx8wYMCAAQNOnDixdevW9957z7TBunXrRowYMXHiRELIggULVqxYQYOQEPLCCy/07NnTSoUBgC0l30k9kKaW+YXWdcHK1GuREQUIQrABqwRhWVlZenp6jx496MMePXpcv37drE1cXNyECROMDWbOnMmyrEAgIIQsXbp0/fr1nTt3njJlilQqtUaF1dHr9QkJCSzL1mPZZs2aBQcHP/KSAJq0zPS0qpT7wrLcui5Ylp5YXNzOGiUBmLFKEBYUFBBC3N3d6UNPT086xayNsYGHh4dara6oqJDJZCNGjAgLC2NZduPGjbt37z579qxYzFGkSqUqLCw0vfQ4adKkcePGPWTlcrn84PfrWnq61HXBSqVKHNL6lfGv1aZxVVVV3UurP5VKxTCMwWDgnFvddEIIyxi0Wl1lZeUjLEar1dZQDMMwAkIEQo4+XAYDwxgMj7aYmrEsS3fdf7ZUKBQMw9awJ6tjMLAG/f+elEajMTCGevynGIbRaDS23Dm1fw0XyvNaG4pbOvvUdRMXlIVyudz4pBjGQFm2ZBmGECLg+v5qMBis8bLR6fTVFUNq/E8ZGEapVD7aetRqdf3e4Axr65eNUqnU6/VCrje49Tg4ONjZ2dXcxipB6OzsTAhRq9VOTk6EEIVC4eJiHi3Ozs4qlYr+rVQqhUIhbbxgwQI6cfjw4cHBwefOnevTp4/lJqRSqbu7+6ZNm4xTQkJCLLdSV1VVVR5uLoO7cXTtqVlanjyRkdS+gIcvtfakUqlQKBSJRNU1qG6WQCiyt7N7tKVqtdoaihH8g6tIoVAksuV+Y1lWJBLRl2XNnJychEJBDXu4OiKRQCT+35OSSCQioage/ymhUCiR1OHl90jUcnN2dnZSJ2d/X9+6rl/qIJFKpcatCIUiyrIlKxCQar4/sYzIGi8bOztxdcVQ1c0SCYWOjo6Pth4HB4f6vcGFAlu/bIRCIf04stkWa8kqQejj4yORSNLT0yMjIwkhGRkZQUFBZm2Cg4MzMjLo3xkZGX5+fmah7ezs7OPjU1RUVN1W7OzsunTp8qhrBwAAfrFKMtvZ2b300ksbN24khJSUlPzyyy+vvPIKIaSsrGzlypVarZYQMnLkyB9//FGpVBJCNm7cOHLkSEJIeXl5YWEhXcnRo0czMzOjo6OtUSEAAABlrV6jixYteuaZZzp27CiXy4cPH96rVy9CiFwunzVr1vjx4+3t7YcPH75///5WrVq5ubk5ODisXr2aEJKdnd29e/fg4GCDwVBSUrJhw4bQ0FArVQgAAECsF4RhYWG3b99OSUnx8PAwDgds3bq1QqFwdHQkhIjF4t27d2dlZSmVyoiICHpZKDIysri4OCMjQywWh4aG/ucVTgAAgIdkrSAkhIhEonbt/tX7WSAQ0BQ0shxv4ODg0KZNG+tVBQAAYMqKQdgUPXjw4GZCUiDR1HXB/NKKLGkza5QEAABWhSD8F5VKVakXsoFRdV1QrUsrKSmxRkkAAGBVCEJzAoFALKnz7WxEXKP+AQCg8Wt0AxsBAABsCUEIAAC8hiAEAABeQxACAACvIQgBAIDXEIQAAMBrCEIAAOA1BCEAAPAaghAAAHgNQQgAALyGIAQAAF5DEAIAAK8hCAEAgNcQhAAAwGsIQgAA4DUEIQAA8BqCEAAAeA1B2SOxTQAAE7ZJREFUCAAAvIYgBAAAXkMQAgAAryEIAQCA1xCEAADAawhCAADgNQQhAADwGoIQAAB4DUEIAAC8hiAEAABeQxACAACvIQgBAIDXEIQAAMBrCEIAAOA1BCEAAPAaghAAAHgNQQgAALyGIAQAAF5DEAIAAK8hCAEAgNcQhAAAwGsIQgAA4DUEIQAA8BqCEAAAeA1BCAAAvIYgBAAAXkMQAgAAr4kbugAAABvJzc39cdsWAVufZaN7PtmnT59HXBA0DghCAOCL+Pj4a6dPtW3eoq4LFhTIy6qUCMLHFYIQAHjE1VXWKbpnXZdKuHFFZY1qoHHANUIAAOA1BCEAAPAaghAAAHgNQQgAALyGIAQAAF5DEAIAAK8hCAEAgNcwjhAAoAEwDLNj188VivoMUAwN8HnhuWcfeUm8hSAEAGgAer3+4MkL0g4D67qgTqVMvncRQfgIIQgBABqATqcrvX2tk7TOH8IalTIr9741SuItBCEAQANgWdZOKOjdMaquC1aUlWTnZFqhIv5CZxkAAOA1BCEAAPAaghAAAHgNQQgAALyGIAQAAF5Dr9FGbee+A3klFfVY0NNZOmH0iEdeDwDA4wdB2Kidj79DogaJ7CV1WoplmOQLuyeMtlJRAACPFQRhY+fmF2Ln4FinRVjGoLBSNQAAjx1cIwQAAF7DESEAADwaBoMhISGBYRjOuSqVSiKRCIXcB2AeHh5hYWHWrK5aCMJGLftuYoFmo1BsX7fFWLbibqJ1KmoscnJyThw9RFi2rgsKiCCmd9/WrVtboyoAnissLHz3k8VCV2/OuQxjEAqERCCwnGXQatoEe65Z9pWVC+SGIGzUNEV5A9pESBzq9nHPMIa9xXlWKqmRyMvLq0y52SUsoK4LpmTnZQSHIggBrEGpVDqqygY/1Z1zLmMwCIXcQVhUkHc7P8fK1VULQWgVDMNoNJrq5qpUKrG42j0vlUpNHzbz9JQ6OtVt6wZ9ndo3RWlpabczctwk0v9u+m9378vLkpJjY2PpQ5VKpVarOVuyLKtSqbRaLedcoVDo5uZW163bhk6n0+vr8xoQCoUSSd26KAOYEQqFPs2acc6qIQj1agXJz7dyadVCEFrF8uXLj/yyTyDgPhXOMEx1Z8kZln1//vxnnnnGmtU9DliWFTnK3MI713VBUXElS/53hD3zrZmpKSnV/KdYhmWF1fwTDSyz/ccfQkJC6lqADSz6elVyTlF1L78auIoN65YusrOzI4RUVVUd+3U/o9dxtlSr1Q4ODtWtp3mbdtFdY+q6dYCGgiC0CoVC8VRUVOfoHhzzWLa4uNirmm9Mx/44qlDYdOxDeVmZh4eHoJpgtjGFQmEnFjtI63ycV28alfql/v2DQppbztLrdFVVVTJ3d84Ft+3+sYaDfmsoLS1tVs3LxszNi2ddZd72dRx1Qwi5Fx+nUqloEKalpe35fmPHAC/OlooqpaOjVCDk+GpfrlBeutbSlkGoUqlYQpyc6nbixEoYg6GivNzD07OhC/lb7V82Go1GqVTWYxNisdjFxaU2LR88eODk7ExfYI2KtYJQrVavWrUqLi6ubdu2s2bNcnV1tWzz66+/7tu3z9nZecaMGe3ataMTS0tLly1blpaWFhMTM2PGjEa4yx6SVqu9l5paXRDaXnp6ulQqdazFh8hvJ/46cSWhHptwEAnefm10bd6NBXK5g1QaGBhYj608chUVFXn5+dUFoY0xDJNy+3YtP9GErKFf+1YeXtwdFmqw9k6csb+fVqsVO7q27/MiZ8sbN24EN2/OeXI49W7yvbLyum76YRQWFbEs21AdDs1UKRSZ9+83niBMSUnx8PAQiUT/2XL2zLdzU1OqO1lVAxXDbvlxp6+v73+2zMnN9fX19fLi/nbVgKwVhJMnT87JyXnrrbd27NgxfPjwP/74w6zBzz//PGPGjGXLlmVmZj711FNJSUl+fn6EkNjY2BYtWowYMeLrr79OTU1dvXq1lSqEujp18lT8vXypa52DoTIvbdjTT9XyQ7weHUEB4OGVV1Q8/WQf/4Cgui64de9OlUpV29aN8g1ulSDMzc3dvXv3/fv3fX19Y2Njvb29b9682bFjR9M2X3/99eLFi8eMGUMIuXHjxsaNG+fPn3/mzJmMjIzz58+LxeKoqKjIyMjPP//cw8PDGkVCXakqy5/ylbZoVefv3UcKbtfvlAsA2JJQJBSK63ESjuMMedNilSC8fv16y5Yt6ZGyg4ND165dL126ZBqEBoPh2rVrO3bsoA979+7922+/EUIuX77co0cP2qOyRYsWnp6e8fHxffv2tUaRUA8O9vbOdezCSggRNY4LkAAAnASsFQ5UN2zY8MMPP5w5c4Y+fOWVVyIiIj777DNjA7lc7ufnV1JSQo/2du7cuXTp0ri4uDlz5pSXl2/evJk269Sp09y5c0eP5rh7dFJSUocOHUwvUfj6+tbmJHXNlEqlQaHw86rzBTy9TptRUubn50MfygsKglxdJQ5cnT5YVqFUODk5c66nrKKiimXc/7kuVZCT29zPr849WVg2NTfXN+jvK20VFRUShvGScZ/SVCoVUgcp5ya0Wk1WWZlxr8rlBSHuMrs63gGcEFJYUmKQ2NPL6QzDFOXltwjgHv+n0WiEAsK5CZYxpMkLfAP86cOSkhKZnZ2bC8e155pp1Mrsikpf33/+U3nysGaeIq5vwYxBr9VqHaTcXU5yCgsc3Nxoz0mtVqsoLQ3yqfPLj9Hr0ouKff396MOCgoIAZxcHR84tsoqqKidn7i4JVYqqYrXaeOlFnpvf3NdbKPzvy0JmMvPzZN7e9JuoQqFglSrfai7nqJRKiUQi5LrypNdqMkvLfP2MLxt5sExmL6m2i2l1SivKFSxrfC/Ic3Ja+gdw9rzXaTUsS+w5B36wbGpenm/g36+3iooKKcN6yGR1LUarUWeVVfga3+DyglB3mZjrhcoYDBqNurohTwUlJayDxNnZmRDCMExxfn5z/zqPhWUYQ3p+gW/g3++F4uJiT4m9izP3e0FRVeXk7MR50KZRK7MrHvzvDV79e6Fm2QVyR3d3OvBGrVarKyoCvX04W6pVKnt7O6GI4wDMoNelF5X4+T/sZ7ilYcOGzZgxo+Y2VjkilEqlph3q1Gq147/f23SonLGNsYFUKi0sLKxhQaPWrVvPmTPH0+SKdGBgoLd3nXsH2B7LspmZmY3kwj4hJDMzMyQkRMD1+WJ7xcXFEomklj3QrE2j0ZSUlPj7+zd0IX/LyMhoPC+b7OxsPz+/GobD2lJ5eTlrEpkNS6/X5+fnBwXV+UqblTSql01eXp6np6eNx6rW5ulb5XUcGBiYnZ3Nsiz9eM3Kynr55ZdNG7i5ubm4uGRlZdEOMllZWbSjYGBg4NmzZ2kbvV6fl5dXXQdCkUj0f//3f9YoHgAAeMUqF2969uzJMAztKZqQkHD37t3nnnuOEHLv3r1jx47RNi+99NL27dsJIWq1es+ePS+99BIh5IUXXrh69Wpqaioh5MCBA15eXp0713nENAAAQO1Z5YjQ3t5+xYoVY8aM6dq169WrV5csWULPWpw4cWL9+vX07lbz58/v27dvUlKSXC4PDw8fOnQoIcTf3/+TTz7p2bNn586dr1279v3339djUAsAAEDtWaWzDFVYWJiUlNSqVauAf3pGKBSKqqoqH5+/r6NqNJpr1645Ozt36NDBdMGsrKy0tLSoqCjPRjMoFQAAHldWDEIAAIDGr1F0+uKV9PT0S5cuCQSC9u3bR0ZGNlQZGo3m0qVLplPatm1b25u/WEd5efm5c+cKCwuDg4Ojo6Mb/Lcd5HJ5fn5+p06dGraMysrKhISEnj17NmwZVGpqKr0VFO34p1Kpzp8/379//wbsdZyRkXHhwgWWZXv06NG8Occ9Y23pwYMHp0+fLikp8fPz69Hj/9u725Amvz4O4GeWilE+a4Uz+7e0ViZm5tMLH8oezGKSTdwmm6OsJDVLCoMIo14ISfbCNghzl4rNkIUzmWT2AC5bkmUomppTyZaIutQ51NTrfnHdXazdFv83987Gfp9XnsN58WUyf1znOv5ODK7zz9+/fx8aGoqJ+a3XcUdHh7e3N5Y28R0dHR4eHqa/nTdv3vzzzz/WcyQbkcBSlpeXc3Jy3N3dU1NTRSLRrl27OBwOrjDDw8MIobi4uAO/vHr1ClcYkiQJgnBzc0tMTDx79uyhQ4c2bdpUV1eHMQ9Jkg8fPoyIiMCbgSTJt2/frl+/HneK/yosLEQIlZaWUsOhoSGE0PLyMsY8np6ePB6Pz+d7eXlduXIFVxKSJDs7O319fTkcTk5OzpEjR6Kjo3El0Wq1a9eupY7uUwwGw4YNGzQaDZY89fX1TCZTr9dTw8bGRj8/v6mpKSxhVgWF0HJKSkr8/f2Hh4ep4crKikKhwBWGKoSzs7O4ApjSaDROTk4vXrygZ0ZGRvAWZhIK4WoKCwtDQkJ8fHxmZmZI3IWwurra09Ozv7+fGn758sXb25sgCCxhSJJMT0+/fPkyPTQYDLiSkCQZHx9fXFxMDwmC2LFjB8Y8PB4vKyuLJEm9Xu/v769SqTCG+V9wJtNy7t+/f+3aNXprgsFgnDx5Em8kKyGVSlNTUw8cOEDPbNmyJT4+Hl8i8EeRkZExMTF3797FHQSVl5dfuHAhMDCQGrJYrJycHIlEgiuP0Wg07T2N91oosVhcUVFBDwmCOHPmDMY8EomkqampqakpNzc3KSmJvhnbSkAhtBCDwTA0NIT9hZMZLpd7/JevX7/iitHd3U23ojUYDFqtVqvVjo2N4coD/q64uPjevXumTaCw6OvrM/s/47CwsK6uLlx5CgoK5HI5m80+f/68QqFYWlrClQQhxOVyx8fH3717hxAaHh5ua2ujbjjAxd3dXSKR8Pl8tVpdUlKCMcmq4LCMhSwvLyOErO16xXPnztH3jGO85WNlZYX+ZFpaWgoKCqanp6OiohobG3FFAn+xc+dODodTXFycl5eHMcbi4qLZF8rR0RHjsZ3Y2FitVqtSqdRqdXZ2tkQiaW5u/jcXAf4/uLi4pKWlEQQRGRlJEMTRo0epNl4YnThxgsViiUQiK+mhaAqeCC3E1dXV19e3t7cXd5DfJCYmHv0F407O9u3b6U8mJSVlcHCQOpQBrNbNmzfLy8upd4S4BAYG9vT0mM709PTQO6VYeHh4CAQCqVTa2dnZ2tpKPZDhIhaL5XK50Wisrq4Wi8UYk9CcnZ2dnJxwp1gFFEILYTAYmZmZd+7cmZuboyc/ffqEMZL1EAqFNTU1AwMDuIOAfysgIEAsFt++fRtjBi6XW1ZWptfrqeGPHz/Kysow/sU3/Wq7ubmtWbMGby/7qKgoPz+//Px8g8GQnJyMMYn1g61RyykqKvr48WNoaCiXy/Xw8NBoNCMjI+/fv8cY6dKlS/QFAqdOnTp48CCWGMePH8/Pzw8PDxcKhUFBQZOTk7W1tRwOB0sYK7SwsJCdnU0Pi4qK6PZMGF2/fp3FYmEMcPHixdbW1v379wuFQgaDUV1dHRUVlZubiysPl8tdt25dRESEo6NjbW1tWFhYeHg4rjCUzMzMq1evFhQUWNtLGWsDnWUsiiTJ5ubmtrY2hFBwcDCHw8G1UWAwGORyuelMdHR0cHAwljCU7u5ulUql1+uZTGZsbCzGbgOU/v7+3t5e7PV4fHxcqVSazqSlpeHqNtDe3j4/Px8bG0sNW1patFptVlYWrkcf6gulVqv7+vra2tq6urowXsY0Pj7e3Nw8MDDAYDB2796dkpKCvfxMTk4qFIqkpCQruRZKqVQGBQWx2WzcQcxBIQQA2DySJLOzs799+6ZQKKzzLRSwZlAIAQAA2DU4LAMAAMCuQSEEAABg16AQAgAAsGtQCAEAANg1KIQAAADsGhRCAAAAdg0KIQA26caNGywW69atW39aMD8/HxISwmKxoHcdAH8HhRAA27O4uCiVSnU6nUQi+dN1P0qlsqurS6fTyWQyC8cDwLZAIQTA9jQ0NExMTJSUlIyNjTU1Na26pqKiYt++fTwer6qqiroFDACwKugsA4DtSU5OHhkZ6e7uZrPZbDb7yZMnZgtGR0e3bt1aWlq6Z8+ehIQElUplbXeCA2A94IkQABuj0+mePXsmEokQQhkZGU+fPh0bGzNbI5PJHBwc0tPT4+Litm3bBrujAPwFFEIAbExlZSVJkjweDyEkEolIknz06JHpApIkKysrjx075uPjw2AwBAKBUqmcmJjAlBcAaweFEAAbU1VVdfjwYSaTiRBiMplxcXEVFRWmC16/fj04OCgUCqmhUCj8+fOnWbEEANDgYl4AbIlarf78+XNoaOiDBw+oGS8vr5cvX7a3t0dERFAzBEE4ODjodDp6zcaNG2UyWV5eHp7QAFg3OCwDgC05ffp0ZWWlq6ur6eTMzExWVpZUKqV+3rx5M0LI2dmZXrC0tDQ7O/vhw4e9e/daODAA1g+2RgGwGXNzc3V1dRkZGVO/4/P5crncaDQihGpra41G4/Pnz00XjI6Ouri4wJEZAFYFhRAAm/H48ePZ2VmBQGA2LxAIpqen6+vrEUIymSwgICA6Otp0gaura1JSUk1NzcLCguXiAmAjoBACYDNkMpmvr29CQoLZfGJiIvUWsK+vT6PRZGRkMBgMszV8Pn9qaqqhocFSYQGwGfCOEAAAgF2DJ0IAAAB2DQohAAAAuwaFEAAAgF2DQggAAMCuQSEEAABg16AQAgAAsGtQCAEAANi1/wA56QbUFTfwNAAAAABJRU5ErkJggg==",
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ],
+ "text/html": [
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "\n",
+ "n=1000000\n",
+ "val_states = [FF.DiscreteState(P.k, sample(PM; rng)) for i in 1:n]\n",
+ "val_strings = [state_to_PM_string(P, s) for s in val_states]\n",
+ "\n",
+ "\n",
+ "\n",
+ "pLen = plot_len_dist(\"model_vs_true_len\", val_strings, model_strings; title=\"Length distribution: Natural vs Generated\")\n",
+ "display(pLen)\n",
+ "\n",
+ "pAA = plot_AA_dist(\"model_vs_true_AA\", val_strings, model_strings; title=\"AA frequency: Natural vs Generated\")\n",
+ "display(pAA)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Natural 'A' frequency: 0.047142480905978404\n"
+ ]
+ }
+ ],
+ "source": [
+ "println(\"Natural 'A' frequency: \", (isdefined(Main, :val_strings) ? sum(count(==('A'), s) for s in val_strings) / sum(length.(val_strings)) : sum(count(==('A'), s) for s in val_strings) / sum(length.(val_strings))))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Natural 'A' frequency: 0.04109772423025435\n"
+ ]
+ }
+ ],
+ "source": [
+ "println(\"Natural 'A' frequency: \", (isdefined(Main, :model_strings) ? sum(count(==('A'), s) for s in model_strings) / sum(length.(model_strings)) : sum(count(==('A'), s) for s in model_strings) / sum(length.(model_strings))))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "MersenneTwister(42)"
+ ]
+ },
+ "execution_count": 183,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "rng = Random.MersenneTwister(42)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ],
+ "text/html": [
+ "
"
+ ]
+ },
+ "execution_count": 226,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "n=1000000\n",
+ "val_states = [FF.DiscreteState(P.k, sample(PM; rng)) for i in 1:n]\n",
+ "val_strings = [state_to_PM_string(P, s) for s in val_states]\n",
+ "pLen = plot_len_dist(\"model_vs_true_len\", val_strings, val_strings; title=\"Length distribution: Natural vs Generated\")\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Julia 1.11.6",
+ "language": "julia",
+ "name": "julia-1.11"
+ },
+ "language_info": {
+ "file_extension": ".jl",
+ "mimetype": "application/julia",
+ "name": "julia",
+ "version": "1.11.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/editflow_code/analysis_notebook.ipynb b/editflow_code/analysis_notebook.ipynb
new file mode 100644
index 0000000..c32ea1c
--- /dev/null
+++ b/editflow_code/analysis_notebook.ipynb
@@ -0,0 +1,2655 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32m\u001b[1m Activating\u001b[22m\u001b[39m project at `~/dev/Flowfusion.jl/editflow_code`\n",
+ "\u001b[92m\u001b[1mPrecompiling\u001b[22m\u001b[39m project...\n",
+ " 2266.3 ms\u001b[32m ✓ \u001b[39mFlowfusion\n",
+ " 1 dependency successfully precompiled in 4 seconds. 470 already precompiled.\n",
+ "\u001b[32m\u001b[1m Resolving\u001b[22m\u001b[39m package versions...\n",
+ "\u001b[32m\u001b[1m No Changes\u001b[22m\u001b[39m to `~/dev/Flowfusion.jl/editflow_code/Project.toml`\n",
+ "\u001b[32m\u001b[1m No Changes\u001b[22m\u001b[39m to `~/dev/Flowfusion.jl/editflow_code/Manifest.toml`\n",
+ "WARNING: using StatsBase.sample in module Main conflicts with an existing identifier.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "plot_len_dist (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 1,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "using Pkg\n",
+ "Pkg.activate(@__DIR__)\n",
+ "Pkg.instantiate()\n",
+ "using Revise\n",
+ "\n",
+ "using Random\n",
+ "using Statistics\n",
+ "using Adapt\n",
+ "using Functors\n",
+ "using Flux\n",
+ "using Onion\n",
+ "using RandomFeatureMaps\n",
+ "using Zygote\n",
+ "\n",
+ "Pkg.develop(path=joinpath(@__DIR__, \"..\"))\n",
+ "using Flowfusion\n",
+ "const FF = Flowfusion\n",
+ "\n",
+ "include(joinpath(@__DIR__, \"prob_model.jl\"))\n",
+ "include(joinpath(@__DIR__, \"helper_funcs.jl\"))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "to_same_device (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import CUDA\n",
+ "\n",
+ "# Device helpers\n",
+ "const _gpu_enabled = try\n",
+ " CUDA.has_cuda()\n",
+ "catch\n",
+ " false\n",
+ "end\n",
+ "\n",
+ "to_dev(x) = _gpu_enabled ? Adapt.adapt(CUDA.CuArray, x) : x\n",
+ "to_cpu(x) = _gpu_enabled ? Adapt.adapt(Array, x) : x\n",
+ "# move x to the same device type as y (Array or CuArray)\n",
+ "to_same_device(x, y) = (_gpu_enabled && (y isa CUDA.CuArray)) ? Adapt.adapt(CUDA.CuArray, x) : Adapt.adapt(Array, x)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "make_minibatch (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Minibatch builder (uses true PM)\n",
+ "function make_minibatch(B::Int, P::FF.EditFlow; rng=Random.default_rng())\n",
+ " K = P.k\n",
+ " x0s = Vector{FF.DiscreteState}(undef, B)\n",
+ " x1s = Vector{FF.DiscreteState}(undef, B)\n",
+ " for b in 1:B\n",
+ " # x1 from true PM\n",
+ " seq1 = sample(PM; rng=rng)\n",
+ " @assert all(1 .<= seq1 .<= K)\n",
+ " x1s[b] = FF.DiscreteState(K, seq1)\n",
+ " # x0: uniform tokens with random length in 1:10 (no BOS in x0)\n",
+ " L0 = rand(rng, 1:10)\n",
+ " seq0 = rand(rng, 1:K, L0)\n",
+ " x0s[b] = FF.DiscreteState(K, seq0)\n",
+ " end\n",
+ " ts = rand(rng, Float32, B)\n",
+ " return x0s, x1s, ts\n",
+ "end\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "EditFlowModel"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Model definition\n",
+ "struct EditFlowModel{L}\n",
+ " layers::L\n",
+ "end\n",
+ "Flux.@layer EditFlowModel\n",
+ "\n",
+ "function EditFlowModel(; d=128, num_heads=8, nlayers=6, rff_dim=128, cond_dim=128, K::Int)\n",
+ " embedding = Flux.Embedding(K + 2 => d)\n",
+ " time_embed = Flux.Chain(RandomFourierFeatures(1 => rff_dim, 1.0f0), Dense(rff_dim => cond_dim))\n",
+ " blocks = [Onion.AdaTransformerBlock(d, cond_dim, num_heads) for _ in 1:nlayers]\n",
+ " head_combined = Dense(d => 2K + 1, bias=false)\n",
+ " rope = RoPE(d ÷ num_heads, 4096)\n",
+ " return EditFlowModel((; embedding, time_embed, blocks, head_combined, rope, K))\n",
+ "end\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Forward pass\n",
+ "function (model::EditFlowModel)(t, Xt_ms)\n",
+ " m = model.layers\n",
+ " X = FF.tensor(Xt_ms)\n",
+ " X = ndims(X) == 1 ? reshape(X, :, 1) : X\n",
+ " L, B = size(X)\n",
+ "\n",
+ " pmask = Zygote.@ignore FF.getlmask(Xt_ms)\n",
+ " Xp = X .+ 1\n",
+ " H = m.embedding(Xp)\n",
+ "\n",
+ " t = ndims(t) == 0 ? fill(Float32(t), B) : Float32.(t)\n",
+ " cond = m.time_embed(reshape(t, 1, B))\n",
+ "\n",
+ " cond = to_same_device(cond, H)\n",
+ " pmask = Zygote.@ignore to_same_device(pmask, H)\n",
+ " rope = Zygote.@ignore to_same_device(m.rope[1:L], H)\n",
+ "\n",
+ " for blk in m.blocks\n",
+ " H = blk(H; cond, rope, kpad_mask=pmask)\n",
+ " end\n",
+ " return m.head_combined(H)\n",
+ "end\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "train_editflow! (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Training (GPU if available)\n",
+ "function train_editflow!(P::FF.EditFlow,\n",
+ " model;\n",
+ " epochs::Int=1,\n",
+ " steps_per_epoch::Int=100,\n",
+ " batch_size::Int=64,\n",
+ " lr::Float32=1f-2,\n",
+ " seed::Int=42,\n",
+ " print_every::Int=25)\n",
+ "\n",
+ " rng = Random.MersenneTwister(seed)\n",
+ " Random.seed!(seed)\n",
+ "\n",
+ " # Move model to device (GPU if available)\n",
+ " model = Functors.fmap(to_dev, model)\n",
+ " opt_state = Flux.setup(Flux.Adam(lr), model)\n",
+ "\n",
+ " for epoch in 1:epochs\n",
+ " for step in 1:steps_per_epoch\n",
+ "\n",
+ " # 1) Minibatch Sampling\n",
+ " x0s, x1s, ts = make_minibatch(batch_size, P; rng=rng)\n",
+ "\n",
+ " # 2) Align and batch\n",
+ " Z0, Z1 = FF.align_and_batch(P, x0s, x1s)\n",
+ "\n",
+ " # 3) Interpolate\n",
+ " Zt, Xt = FF.interpolate_Z_elementwise(P, Z0, Z1, ts)\n",
+ "\n",
+ " # 4) Prepend BOS\n",
+ " bos = P.bos_token\n",
+ " Zt = vcat(fill(bos, 1, batch_size), Zt)\n",
+ " Xt = vcat(fill(bos, 1, batch_size), Xt)\n",
+ " Z1 = vcat(fill(bos, 1, batch_size), Z1)\n",
+ " \n",
+ " # 5) Masks and multipliers\n",
+ " transition_mask = FF.transition_mask_from_Xt(P, Xt)\n",
+ " edit_multiplier = FF.remaining_edits(P, Zt, Z1, Xt)\n",
+ " @assert size(transition_mask) == size(edit_multiplier)\n",
+ "\n",
+ " # 6) Scheduler scaling\n",
+ " den = 1f0 .- P.κ.(ts)\n",
+ " den = max.(den, 1f-3)\n",
+ " scheduler_scaling = P.dκ.(ts) ./ den\n",
+ "\n",
+ " # 7) Masked state\n",
+ " lmask = Xt .!= P.padding_token\n",
+ " cmask = trues(size(lmask))\n",
+ " Xt_ms = FF.MaskedState(FF.DiscreteState(P.k, Xt), cmask, lmask)\n",
+ "\n",
+ " ts_d = to_dev(ts)\n",
+ " Xt_ms_d = to_dev(Xt_ms)\n",
+ " Tmask_d = to_dev(transition_mask)\n",
+ " Emult_d = to_dev(edit_multiplier)\n",
+ " sched_d = to_dev(reshape(Float32.(scheduler_scaling), 1, 1, :))\n",
+ "\n",
+ " # 8) Forward + loss + update\n",
+ " loss, grad = Flux.withgradient(model) do m\n",
+ " M = m(ts_d, Xt_ms_d)\n",
+ " l = FF.edit_loss(P, M, Tmask_d, Emult_d, sched_d; eps=1f-8)\n",
+ " if isnan(l)\n",
+ " println(\"Maximum element of M: \", maximum(M))\n",
+ " end\n",
+ " l\n",
+ " end\n",
+ " Flux.update!(opt_state, model, grad[1])\n",
+ "\n",
+ " if step % print_every == 0\n",
+ " @info \"train\" epoch step loss=Float32(loss)\n",
+ " end\n",
+ " end\n",
+ " end\n",
+ "\n",
+ " return Functors.fmap(to_cpu, model)\n",
+ "end\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Init process and model\n",
+ "K = PM.K\n",
+ "P = FF.EditFlow(K; bos_token=0, impl=\"positionwise\")\n",
+ "model = EditFlowModel(; d=128, num_heads=8, nlayers=4, rff_dim=128, cond_dim=128, K=K)\n",
+ "nothing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 77.32212f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 62.010212f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 54.765823f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 55.06506f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 51.20858f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 49.92301f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 49.66504f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 48.609978f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 51.766243f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 46.577633f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 46.013363f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 44.984547f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 45.69627f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 48.54598f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 46.083076f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 41.404823f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 41.41694f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 40.346214f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 44.24388f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 42.988144f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 44.356007f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 43.046814f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 44.238304f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 42.40614f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 40.038864f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 39.248238f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 39.251083f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 38.37234f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 39.504143f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 38.68367f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 39.95724f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 36.87685f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 34.32936f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 37.77718f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 37.4764f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 34.697372f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 35.233315f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 34.579445f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 37.07529f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 37.277664f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 33.53584f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 35.566807f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 36.145203f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 36.07258f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.562103f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 35.07919f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 34.078888f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 37.076546f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 36.745842f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 35.205795f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 33.40426f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 33.041412f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 34.140083f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.82329f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.56766f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.306625f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 33.299297f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.544945f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 33.594986f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 35.99458f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 35.273575f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.578995f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.579735f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 34.745888f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.89444f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 34.12862f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 34.716965f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.7956f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.485348f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.210342f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 34.249763f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.991463f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 33.865524f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 33.39278f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.85763f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.728132f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.789724f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.589245f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.698822f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.386007f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.859806f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 33.39788f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.370934f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.334873f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.145905f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.896702f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.935232f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.63628f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.987965f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.22433f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.780943f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.084726f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.95314f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.072214f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.637058f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.1978f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.311502f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.983727f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.378075f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.824142f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.99437f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.723846f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.51981f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 34.55582f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.413897f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.88297f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.492455f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.25613f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.992672f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.894299f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.46797f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.885025f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.204609f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.554102f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.592083f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.41414f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.871262f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.159111f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.342709f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.10995f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.296486f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.800013f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.61753f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.39122f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.753365f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.641577f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.24447f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.80323f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.964045f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.822504f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.006264f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.91555f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.783787f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.890472f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.845434f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.83451f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 32.009922f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.013016f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.106924f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.63213f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.449276f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.05313f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.290436f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.276487f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.076471f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.027508f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.74162f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.38826f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.541903f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.33956f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.591618f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.27103f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.449995f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.65526f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.825336f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.996181f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.86678f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.68143f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.710367f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.882233f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.672073f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.578857f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.669641f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.462646f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.811617f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.703606f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.874252f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.179556f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.148922f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.66884f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.42384f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.84734f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.043884f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.35019f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.323244f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.033892f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.690865f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.976942f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.335669f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.302813f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 30.665878f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.722029f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.110176f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.456757f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.793577f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.014038f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.246342f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.316242f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.664661f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.209648f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.616825f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.14788f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.519016f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.73012f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.564869f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.706131f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.197008f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 31.765541f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.374762f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.70539f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.18553f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.417723f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.525036f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.731668f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.166397f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.808641f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.353008f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.624584f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.626465f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.990967f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.641216f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.256786f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.595907f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.807907f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.43351f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 29.224606f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.267601f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.160694f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.640324f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.291248f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.828138f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.923212f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.349394f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.088284f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 17.915966f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.723827f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.686745f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.787931f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.630884f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.99827f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.101845f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.79374f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.24427f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.814526f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.683496f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.020454f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.851532f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.559927f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.804356f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.212154f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.894348f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.453125f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.238836f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.200993f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.51118f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.900951f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.565336f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.649536f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.295158f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.369617f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.885021f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.096455f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.068813f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.79005f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.681047f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.360323f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.9206f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.990232f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.18895f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.445778f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.310482f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.915564f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.857944f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.920141f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.909576f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.859642f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.192644f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.794025f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.398401f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 16.175182f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 28.37204f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.062489f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.663717f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.957298f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.496971f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.304325f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.922512f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.699911f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.254108f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.706352f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.18057f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.411335f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.421764f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.939445f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.996864f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.648907f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.776127f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.542099f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.333237f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.898983f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.80355f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.847319f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.493822f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.442886f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.023598f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.138752f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.160217f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.487549f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.946358f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.53986f0\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "EditFlowModel(\n",
+ " Embedding(22 => 128), \u001b[90m# 2_816 parameters\u001b[39m\n",
+ " Chain(\n",
+ " RandomFourierFeatures{Float32, Matrix{Float32}}(Float32[0.37109837 7.704748 … 5.5441585 1.6017374]),\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " [\n",
+ " TransformerBlock(\n",
+ " Attention(\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " identity,\n",
+ " identity,\n",
+ " Onion.var\"#32#34\"(),\n",
+ " 16,\n",
+ " 16,\n",
+ " 8,\n",
+ " 8,\n",
+ " ),\n",
+ " StarGLU(\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(512 => 128; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " NNlib.swish,\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.9961103, 0.98354983, 0.98296183, 0.9941034, 0.9855069, 0.97056925, 0.98523986, 0.98571247, 0.992952, 0.9959597 … 0.9968077, 0.974237, 0.9965491, 0.99233574, 0.975247, 0.9818705, 0.97323513, 0.9854002, 0.9814506, 1.0009559], Float32[-0.0038236545, 0.0061107413, -0.00062911137, 0.0057799933, -0.002794507, -0.001555508, -0.00058775704, -0.011436283, -0.0010510351, 0.0030899069 … 0.0054938016, 0.0007393659, -0.0042932453, -0.00040333753, 0.01101881, -0.0013433272, 0.0003267441, 0.006723721, -0.0017793853, 0.0063674543], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.9982594, 1.0053198, 0.9896972, 0.9885052, 1.0034368, 0.9959786, 1.0024047, 1.0017761, 1.0079848, 0.998037 … 0.99845004, 1.0012459, 1.0023891, 0.9972836, 1.0016316, 0.9928665, 0.9821593, 1.001839, 0.9991159, 0.98322225], Float32[0.0077800294, 0.006313712, 0.005019753, 0.009399522, 0.0057638027, 0.008210472, -0.00041128998, 0.009714841, 0.005229109, 0.010837439 … -0.008408365, 0.0010688071, -0.0026440201, -0.0047563454, -0.00029993433, -0.00017475362, 0.010465704, 0.0038032355, 0.008328137, 0.0069600623], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " identity,\n",
+ " ),\n",
+ " TransformerBlock(\n",
+ " Attention(\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " identity,\n",
+ " identity,\n",
+ " Onion.var\"#32#34\"(),\n",
+ " 16,\n",
+ " 16,\n",
+ " 8,\n",
+ " 8,\n",
+ " ),\n",
+ " StarGLU(\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(512 => 128; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " NNlib.swish,\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.98340446, 0.99515694, 0.99383044, 0.98744017, 0.98178893, 0.99770486, 0.9998854, 0.98325014, 0.99851084, 0.97928643 … 0.9928049, 0.9935282, 0.99334264, 0.9797028, 0.9886869, 0.9849082, 0.96643585, 0.99244535, 0.9834009, 0.9998993], Float32[-0.0050134575, -0.0036863368, 0.0032099392, -0.000613435, -0.0024439872, -0.0025708228, -0.0032925045, -0.0032033226, 0.006190524, 0.0064956564 … 0.005046286, -0.0013077608, -0.0036956323, -0.0070013213, -0.0011397419, -7.354888f-5, -0.00049049803, 0.0032066598, 0.00010148762, 0.0008781409], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.99697554, 0.9989613, 0.98683894, 0.9949222, 0.99189675, 0.9979521, 1.0014259, 0.9904876, 0.9937987, 1.0045815 … 1.0036364, 1.0192485, 0.9905473, 0.992812, 1.007178, 1.0157473, 1.0048788, 1.0006422, 1.005701, 1.0034955], Float32[0.01035669, 0.007019663, 0.0067280596, 0.0077844895, 0.011074385, 0.005554042, 0.009326161, 0.0038617663, -0.0066101057, 0.010390683 … 0.0052756458, -0.0034369635, -0.0035258406, -0.009991266, 0.003326182, -0.00122832, 0.010301022, 0.0126599455, -0.005512634, -0.00085900724], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " identity,\n",
+ " ),\n",
+ " TransformerBlock(\n",
+ " Attention(\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " identity,\n",
+ " identity,\n",
+ " Onion.var\"#32#34\"(),\n",
+ " 16,\n",
+ " 16,\n",
+ " 8,\n",
+ " 8,\n",
+ " ),\n",
+ " StarGLU(\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(512 => 128; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " NNlib.swish,\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.99811625, 0.9854664, 0.9989415, 0.99436367, 1.0049703, 0.9964015, 0.9879783, 1.0056946, 1.000729, 0.99081683 … 1.0057596, 1.0069537, 0.99401844, 1.0044988, 0.9727968, 0.99472237, 0.9930699, 0.9901404, 0.99556744, 0.99798906], Float32[-0.0072035347, -0.0070618317, 0.00017180148, 0.0028987664, -0.00460521, -0.0011136559, 0.0010147326, -0.0032454734, 0.0025976172, 0.003775801 … -0.00087325997, 0.0020796831, 0.0005583409, 0.0042034183, 0.0023550594, -0.0021972822, 0.0001533411, 0.0024601675, 0.0048599523, 0.0049486584], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[1.0113374, 1.0362657, 1.0129801, 1.0054824, 0.9959235, 1.0161504, 1.0031482, 1.0127438, 1.0027536, 1.0205426 … 1.0534416, 0.99659115, 1.0074198, 1.027664, 0.99709916, 0.9974137, 1.005346, 0.98684317, 1.0034771, 0.9989774], Float32[-0.0014052559, 0.0030515736, -0.0021614903, 0.0066673947, 0.007202584, 0.0017106866, -0.0010570075, 0.0007347914, 0.0047691455, -0.00045394935 … 0.0019431848, -0.007668013, 0.006576849, -0.0028465583, 0.0037018328, -0.007993788, 0.006362465, 0.008466869, -0.007678316, 0.004118309], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " identity,\n",
+ " ),\n",
+ " TransformerBlock(\n",
+ " Attention(\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " identity,\n",
+ " identity,\n",
+ " Onion.var\"#32#34\"(),\n",
+ " 16,\n",
+ " 16,\n",
+ " 8,\n",
+ " 8,\n",
+ " ),\n",
+ " StarGLU(\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(512 => 128; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " NNlib.swish,\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.9981497, 1.002889, 0.98744977, 1.0060855, 0.985402, 0.98569524, 0.9988142, 0.99421, 0.97521454, 0.98611385 … 1.004141, 1.0106245, 0.9919724, 0.99938774, 0.9956309, 1.0020154, 1.014488, 1.0070881, 0.9995131, 0.9986625], Float32[-0.00467435, -0.0041207974, 0.003574141, 0.0018393839, -0.00774704, -0.0004402911, 0.009702865, -0.0013263967, -0.0007161403, 0.0039062018 … 0.00023826491, 0.0005687163, -0.005481451, -0.002329852, 0.00050594786, 0.0018227947, -0.0053469585, -0.0071314457, -0.004511974, 0.00010769644], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[1.0128297, 1.0101146, 1.0129786, 0.9997293, 1.0020403, 0.99318516, 1.0349357, 1.0035822, 1.0150418, 1.0350649 … 1.0229619, 1.0016078, 1.0106664, 1.0060273, 1.0075238, 0.9961528, 1.0068473, 1.0010078, 1.0145574, 1.0092921], Float32[0.0013683313, 0.006150891, 0.0034406697, 0.005812049, 0.012213179, 0.0059094066, -0.0019328047, 0.0070142355, 0.0056433696, 0.00019984429 … 0.002948093, -0.011170231, 0.009000667, 0.0016508012, 0.007526409, -0.005769551, 0.0012698864, 0.0066841603, 0.00037845137, -0.0011874678], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " identity,\n",
+ " ),\n",
+ " ],\n",
+ " Dense(128 => 41; bias=false), \u001b[90m# 5_248 parameters\u001b[39m\n",
+ " RoPE{Matrix{Float32}}(Float32[1.0 0.5403023 … -0.87528455 -0.065975994; 1.0 0.95041525 … 0.9552474 0.8159282; … ; 1.0 0.9999995 … -0.57972294 -0.5789079; 1.0 0.99999994 … 0.2726629 0.27235872], Float32[0.0 0.84147096 … -0.48360822 -0.9978212; 0.0 0.3109836 … 0.29580808 0.5781532; … ; 0.0 0.0009999999 … -0.8148137 -0.8153929; 0.0 0.0003162278 … 0.9621096 0.9621958]),\n",
+ " 20,\n",
+ ") \u001b[90m # Total: 80 trainable arrays, \u001b[39m1_339_392 parameters,\n",
+ "\u001b[90m # plus 3 non-trainable, 65_600 parameters, summarysize \u001b[39m5.364 MiB."
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model = train_editflow!(P, model; epochs=50, steps_per_epoch=150, batch_size=256, lr=1f-4)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "MersenneTwister(123023)"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "rng = Random.MersenneTwister(123023)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1000-element Vector{String}:\n",
+ " \"QVQLVQSLAEVK\"\n",
+ " \"QVQLVESGGIFK\"\n",
+ " \"TPQLQESCPGLWVKPG\"\n",
+ " \"QVQLVQSGAEVKK\"\n",
+ " \"QQHLVQSGAEVK\"\n",
+ " \"QVQAVKSGGEVKPAGGKSR\"\n",
+ " \"QVQLVESGTGLVKLP\"\n",
+ " \"EVLVERGGQLVQPGNKLS\"\n",
+ " \"QTQLDQSGRLVKPSGTL\"\n",
+ " \"QFQLVQSVAA\"\n",
+ " \"VVQLVQSGAEVKKKPGASVVK\"\n",
+ " \"QVQLVESGGVLVQPGGSLV\"\n",
+ " \"QVQLVQSGTEVKKPGASVKSVK\"\n",
+ " ⋮\n",
+ " \"QVQLVQSGAEVKK\"\n",
+ " \"EVLQEGGGLVQRSL\"\n",
+ " \"QVQLVESDGAEVKKP\"\n",
+ " \"EVELMESGGGLVQPGGSL\"\n",
+ " \"EHHLVESGGGL\"\n",
+ " \"EVQLVQSGAEV\"\n",
+ " \"EVTLVEVGPL\"\n",
+ " \"QVQLLVPSGAEVKKPGS\"\n",
+ " \"EVQLVQDVGLVKCGGS\"\n",
+ " \"QVQLVQSGTEVK\"\n",
+ " \"QVQLVESGGTVKK\"\n",
+ " \"QVQLVQSGFEVKK\""
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Sampling and Levenshtein plot\n",
+ "using Random\n",
+ "n = 1000\n",
+ "# 100 model samples (final states → strings)\n",
+ "model_samples_any = sample_gen_n(P, model; n=n, ts=0f0:0.01f0:1f0, rng=rng)\n",
+ "model_final_states = map(final_state_from_gen, model_samples_any)\n",
+ "model_strings = [state_to_PM_string(P, s) for s in model_final_states]\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ],
+ "text/html": [
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Build training strings from PM parents (the data PM was built on)\n",
+ "train_states = [FF.DiscreteState(P.k, sample(PM; rng=rng)) for i in 1:100]\n",
+ "train_strings = [state_to_PM_string(P, s) for s in train_states]\n",
+ "\n",
+ "# Independent validation strings sampled from PM\n",
+ "val_states = [FF.DiscreteState(P.k, sample(PM; rng=rng)) for i in 1:n]\n",
+ "val_strings = [state_to_PM_string(P, s) for s in val_states]\n",
+ "#val_strings = [s for s in val_strings if !(s in train_strings)]\n",
+ "\n",
+ "# Plot normalized Levenshtein vs. training set (distinct val/train)\n",
+ "plt = plot_lev_dist(\"model_vs_true\", val_strings, model_strings, train_strings; title=\"Model vs True (normalized Levenshtein)\")\n",
+ "display(plt)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ],
+ "text/html": [
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "model_strings_bos = [\">\" * state_to_PM_string(P, s) for s in model_final_states]\n",
+ "plt = plot_lev_dist(\"model_vs_true\", val_strings, model_strings_bos, train_strings; title=\"Model vs True (normalized Levenshtein)\")\n",
+ "display(plt)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ],
+ "text/html": [
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ],
+ "text/html": [
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "\n",
+ "pAA = plot_AA_dist(\"model_vs_true_AA\", val_strings, model_strings; title=\"AA frequency: Natural vs Generated\")\n",
+ "display(pAA)\n",
+ "\n",
+ "pLen = plot_len_dist(\"model_vs_true_len\", val_strings, model_strings; title=\"Length distribution: Natural vs Generated\")\n",
+ "display(pLen)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Natural 'A' frequency: 0.04726267529665588\n"
+ ]
+ }
+ ],
+ "source": [
+ "println(\"Natural 'A' frequency: \", (isdefined(Main, :val_strings) ? sum(count(==('A'), s) for s in val_strings) / sum(length.(val_strings)) : sum(count(==('A'), s) for s in val_strings) / sum(length.(val_strings))))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Natural 'A' frequency: 0.04435456690259521\n"
+ ]
+ }
+ ],
+ "source": [
+ "println(\"Natural 'A' frequency: \", (isdefined(Main, :model_strings) ? sum(count(==('A'), s) for s in model_strings) / sum(length.(model_strings)) : sum(count(==('A'), s) for s in model_strings) / sum(length.(model_strings))))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Julia 1.11.6",
+ "language": "julia",
+ "name": "julia-1.11"
+ },
+ "language_info": {
+ "file_extension": ".jl",
+ "mimetype": "application/julia",
+ "name": "julia",
+ "version": "1.11.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/editflow_code/editflowsrun.jl b/editflow_code/editflowsrun.jl
new file mode 100644
index 0000000..3ef36e0
--- /dev/null
+++ b/editflow_code/editflowsrun.jl
@@ -0,0 +1,199 @@
+using Pkg
+Pkg.activate(@__DIR__)
+#Pkg.instantiate()
+using Revise
+
+using Random
+using Statistics
+using Adapt
+using Functors
+using Flux
+using Onion
+using RandomFeatureMaps
+using Zygote
+Pkg.develop(path=joinpath(@__DIR__, ".."))
+using Flowfusion
+const FF = Flowfusion
+include(joinpath(@__DIR__, "prob_model.jl"))
+include(joinpath(@__DIR__, "helper_funcs.jl"))
+
+import CUDA
+
+# Device helpers
+const _gpu_enabled = try
+ CUDA.has_cuda()
+catch
+ false
+end
+
+to_dev(x) = _gpu_enabled ? Adapt.adapt(CUDA.CuArray, x) : x
+to_cpu(x) = _gpu_enabled ? Adapt.adapt(Array, x) : x
+# move x to the same device type as y (Array or CuArray)
+to_same_device(x, y) = (_gpu_enabled && (y isa CUDA.CuArray)) ? Adapt.adapt(CUDA.CuArray, x) : Adapt.adapt(Array, x)
+
+# Load PM target (brings PM and AA20)
+
+function make_minibatch(B::Int, P::FF.EditFlow; rng=Random.default_rng())
+ K = P.k
+ x0s = Vector{FF.DiscreteState}(undef, B)
+ x1s = Vector{FF.DiscreteState}(undef, B)
+ for b in 1:B
+ # x1 from true PM
+ seq1 = sample(PM; rng=rng)
+ @assert all(1 .<= seq1 .<= K)
+ x1s[b] = FF.DiscreteState(K, seq1)
+ # x0: uniform tokens with random length in 1:10 (no BOS in x0)
+ L0 = rand(rng, 1:10)
+ seq0 = rand(rng, 1:K, L0)
+ x0s[b] = FF.DiscreteState(K, seq0)
+ end
+ ts = rand(rng, Float32, B)
+ return x0s, x1s, ts
+end
+
+struct EditFlowModel{L}
+ layers::L
+end
+Flux.@layer EditFlowModel
+
+function EditFlowModel(; d=128, num_heads=8, nlayers=6, rff_dim=128, cond_dim=128, K::Int)
+ embedding = Flux.Embedding(K + 2 => d)
+ time_embed = Flux.Chain(RandomFourierFeatures(1 => rff_dim, 1.0f0), Dense(rff_dim => cond_dim))
+ blocks = [Onion.AdaTransformerBlock(d, cond_dim, num_heads) for _ in 1:nlayers]
+ head_combined = Dense(d => 2K + 1, bias=false)
+ rope = RoPE(d ÷ num_heads, 4096)
+ return EditFlowModel((; embedding, time_embed, blocks, head_combined, rope, K))
+end
+
+function (model::EditFlowModel)(t, Xt_ms)
+ m = model.layers
+ X = FF.tensor(Xt_ms)
+ X = ndims(X) == 1 ? reshape(X, :, 1) : X
+ L, B = size(X)
+
+ pmask = Zygote.@ignore FF.getlmask(Xt_ms)
+ Xp = X .+ 1
+ H = m.embedding(Xp)
+
+ t = ndims(t) == 0 ? fill(Float32(t), B) : Float32.(t)
+ cond = m.time_embed(reshape(t, 1, B))
+
+ cond = to_same_device(cond, H)
+ pmask = Zygote.@ignore to_same_device(pmask, H)
+ rope = Zygote.@ignore to_same_device(m.rope[1:L], H)
+
+ for blk in m.blocks
+ H = blk(H; cond, rope, kpad_mask=pmask)
+ end
+ return m.head_combined(H)
+end
+
+# Training (GPU if available)
+function train_editflow!(P::FF.EditFlow,
+ model;
+ epochs::Int=1,
+ steps_per_epoch::Int=100,
+ batch_size::Int=64,
+ lr::Float32=1f-2,
+ seed::Int=42,
+ print_every::Int=25)
+
+ rng = Random.MersenneTwister(seed)
+ Random.seed!(seed)
+
+ # Move model to device (GPU if available)
+ model = Functors.fmap(to_dev, model)
+ opt_state = Flux.setup(Flux.Adam(lr), model)
+
+ for epoch in 1:epochs
+ for step in 1:steps_per_epoch
+
+ # 1) Minibatch Sampling
+ x0s, x1s, ts = make_minibatch(batch_size, P; rng=rng)
+
+ # align_and_batch basically
+ Z0, Z1 = FF.align_and_batch(P, x0s, x1s)
+
+ #
+ Zt, Xt = FF.interpolate_Z_elementwise(P, Z0, Z1, ts)
+
+ #Append BOS token to the beginning of the batch
+ bos = P.bos_token
+ Zt = vcat(fill(bos, 1, batch_size), Zt)
+ Xt = vcat(fill(bos, 1, batch_size), Xt)
+ Z1 = vcat(fill(bos, 1, batch_size), Z1)
+
+ transition_mask = FF.transition_mask_from_Xt(P, Xt)
+ edit_multiplier = FF.remaining_edits(P, Zt, Z1, Xt)
+
+ @assert size(transition_mask) == size(edit_multiplier)
+
+ den = 1f0 .- P.κ.(ts)
+ den = max.(den, 1f-6)
+ scheduler_scaling = P.dκ.(ts) ./ den
+
+ # 3) Masked state (CPU → device)
+ lmask = Xt .!= P.padding_token
+ cmask = trues(size(lmask))
+ Xt_ms = FF.MaskedState(FF.DiscreteState(P.k, Xt), cmask, lmask)
+
+ ts_d = to_dev(ts)
+ Xt_ms_d = to_dev(Xt_ms)
+ Tmask_d = to_dev(transition_mask)
+ Emult_d = to_dev(edit_multiplier)
+ sched_d = to_dev(reshape(Float32.(scheduler_scaling), 1, 1, :))
+
+ # 4) Forward + loss + update (on device)
+ loss, grad = Flux.withgradient(model) do m
+ M = m(ts_d, Xt_ms_d)
+ l = FF.edit_loss(P, M, Tmask_d, Emult_d, sched_d; eps=1f-8)
+ # INSERT_YOUR_CODE
+ if isnan(l)
+ println("Maximum element of M: ", maximum(M))
+ end
+ l
+ end
+ Flux.update!(opt_state, model, grad[1])
+
+ if step % print_every == 0
+ @info "train2" epoch step loss=Float32(loss)
+ end
+ end
+ end
+
+ # Move model back to CPU for sampling
+ return Functors.fmap(to_cpu, model)
+end
+
+# PM is the target probability model and K is the alphabet size
+K = PM.K
+println("K: ", K)
+
+P = FF.EditFlow(K; bos_token=0, impl=:positionwise)
+
+model = EditFlowModel(; d=128, num_heads=8, nlayers=4, rff_dim=128, cond_dim=128, K=K)
+
+# Train; returned model is on CPU
+model = train_editflow!(P, model; epochs=10, steps_per_epoch=150, batch_size=256, lr=1f-3)
+
+rng = Random.MersenneTwister(42)
+println("\n=== True PM samples (20) ===")
+for i in 1:20
+ seq = sample(PM; rng=rng) # Vector{Int} in 1..K_AA
+ aa_str = String(collect(AA20[seq]))
+ println("[", i, "] ", aa_str)
+end
+
+samples = sample_gen_10_strings(P, model; ts=0f0:0.01f0:1f0)
+
+println("\n=== Model samples (20) ===")
+for (i, s) in enumerate(samples)
+ if s isa AbstractString
+ println("[", i, "] ", s)
+ elseif s isa AbstractVector{<:AbstractString}
+ last_str = isempty(s) ? "" : s[end]
+ println("[", i, "] traj_last=", last_str, " (len=", length(s), ")")
+ else
+ println("[", i, "] (raw) ", s)
+ end
+end
diff --git a/editflow_code/figures/model_vs_true.pdf b/editflow_code/figures/model_vs_true.pdf
new file mode 100644
index 0000000..6f4f1a7
Binary files /dev/null and b/editflow_code/figures/model_vs_true.pdf differ
diff --git a/editflow_code/gapwise_editflowsrun.jl b/editflow_code/gapwise_editflowsrun.jl
new file mode 100644
index 0000000..385dbaf
--- /dev/null
+++ b/editflow_code/gapwise_editflowsrun.jl
@@ -0,0 +1,209 @@
+using Pkg
+Pkg.activate(@__DIR__)
+#Pkg.instantiate()
+using Revise
+
+using Random
+using Statistics
+using Adapt
+using Functors
+using Flux
+using Onion
+using RandomFeatureMaps
+using Zygote
+Pkg.develop(path=joinpath(@__DIR__, ".."))
+using Flowfusion
+const FF = Flowfusion
+include(joinpath(@__DIR__, "prob_model.jl"))
+include(joinpath(@__DIR__, "helper_funcs.jl"))
+
+import CUDA
+
+# Device helpers
+const _gpu_enabled = try
+ CUDA.has_cuda()
+catch
+ false
+end
+
+to_dev(x) = _gpu_enabled ? Adapt.adapt(CUDA.CuArray, x) : x
+to_cpu(x) = _gpu_enabled ? Adapt.adapt(Array, x) : x
+# move x to the same device type as y (Array or CuArray)
+to_same_device(x, y) = (_gpu_enabled && (y isa CUDA.CuArray)) ? Adapt.adapt(CUDA.CuArray, x) : Adapt.adapt(Array, x)
+
+# Load PM target (brings PM and AA20)
+
+function make_minibatch(B::Int, P::FF.EditFlow; rng=Random.default_rng())
+ K = P.k
+ x0s = Vector{FF.DiscreteState}(undef, B)
+ x1s = Vector{FF.DiscreteState}(undef, B)
+ for b in 1:B
+ # x1 from true PM
+ seq1 = sample(PM; rng=rng)
+ @assert all(1 .<= seq1 .<= K)
+ x1s[b] = FF.DiscreteState(K, seq1)
+ # x0: uniform tokens with random length in 1:10 (no BOS in x0)
+ L0 = rand(rng, 1:10)
+ seq0 = rand(rng, 1:K, L0)
+ x0s[b] = FF.DiscreteState(K, seq0)
+ end
+ ts = rand(rng, Float32, B)
+ return x0s, x1s, ts
+end
+
+
+############################
+# Gap-wise model + training
+############################
+
+# --- helper: build gap embeddings (pick one) ---
+gaps_from_sites_dup(H) = hcat(@view(H[:,1:1,:]), @view(H[:,2:end,:]), @view(H[:,end:end,:]))
+
+
+function gaps_from_sites_avg(H)
+ d, L, B = size(H)
+ if L == 1
+ middle = @view H[:, 1:0, :]
+ else
+ @views middle = 0.5f0 .* (H[:, 1:L-1, :] .+ H[:, 2:L, :])
+ end
+ # cat med dims=2 är säkrare på GPU än hcat i vissa kombinationer av views
+ return cat(@view(H[:, 1:1, :]), middle, @view(H[:, L:L, :]); dims=2) # d×(L+1)×B
+end
+
+struct EditFlowModel{L}
+ layers::L
+end
+Flux.@layer EditFlowModel
+
+function EditFlowModel(; d=128, num_heads=8, nlayers=6, rff_dim=128, cond_dim=128, K::Int)
+ embedding = Flux.Embedding(K + 2 => d)
+ time_embed = Flux.Chain(RandomFourierFeatures(1 => rff_dim, 1.0f0), Dense(rff_dim => cond_dim))
+ blocks = [Onion.AdaTransformerBlock(d, cond_dim, num_heads) for _ in 1:nlayers]
+ # --- split heads: ins on gaps, sub/del on sites ---
+ head_ins = Dense(d => K, bias=false) # applied on gaps (L+1)
+ head_sub = Dense(d => K, bias=false) # applied on sites (L)
+ head_del = Dense(d => 1, bias=false) # applied on sites (L)
+ rope = RoPE(d ÷ num_heads, 4096)
+ return EditFlowModel((; embedding, time_embed, blocks, head_ins, head_sub, head_del, rope, K))
+end
+
+# Forward: returns M of shape (2K+1, L+1, B)
+function (model::EditFlowModel)(t, Xt_ms)
+ m = model.layers
+ X = FF.tensor(Xt_ms)
+ X = ndims(X) == 1 ? reshape(X, :, 1) : X
+ L, B = size(X)
+
+ pmask = Zygote.@ignore FF.getlmask(Xt_ms)
+ Xp = X .+ 1 # embedding is 1-indexed
+ H = m.embedding(Xp) # d×L×B
+
+ t = ndims(t) == 0 ? fill(Float32(t), B) : Float32.(t)
+ cond = m.time_embed(reshape(t, 1, B))
+
+ cond = to_same_device(cond, H)
+ pmask = Zygote.@ignore to_same_device(pmask, H)
+ rope = Zygote.@ignore to_same_device(m.rope[1:L], H)
+
+ for blk in m.blocks
+ H = blk(H; cond, rope, kpad_mask=pmask) # d×L×B
+ end
+
+ # --- gap embeddings & heads ---
+ Hg = gaps_from_sites_avg(H) # d×(L+1)×B (or use gaps_from_sites_dup)
+ ins = m.head_ins(Hg) # K×(L+1)×B
+ sub = m.head_sub(H) # K×L×B
+ del = m.head_del(H) # 1×L×B
+
+ # pad sub/del with a zero last column to reach L+1
+ @views sub_pad = cat(sub, sub[:, 1:1, :].*0; dims=2)
+ @views del_pad = cat(del, del[:, 1:1, :].*0; dims=2)
+
+ # final combined logits (untransformed): (2K+1)×(L+1)×B
+ return vcat(ins, sub_pad, del_pad)
+end
+
+# --- training loop (gap-wise) ---
+function train_editflow!(P::FF.EditFlow,
+ model;
+ epochs::Int=1,
+ steps_per_epoch::Int=100,
+ batch_size::Int=64,
+ lr::Float32=1f-2,
+ seed::Int=42,
+ print_every::Int=25)
+
+ rng = Random.MersenneTwister(seed)
+ Random.seed!(seed)
+
+ # device
+ model = Functors.fmap(to_dev, model)
+ opt_state = Flux.setup(Flux.Adam(lr), model)
+
+ for epoch in 1:epochs
+ for step in 1:steps_per_epoch
+ # 1) minibatch
+ x0s, x1s, ts = make_minibatch(batch_size, P; rng=rng)
+ Z0, Z1 = FF.align_and_batch(P, x0s, x1s)
+ Zt, Xt = FF.interpolate_Z_elementwise(P, Z0, Z1, ts)
+
+ # prepend BOS to all streams
+ bos = P.bos_token
+ Zt = vcat(fill(bos, 1, batch_size), Zt)
+ Xt = vcat(fill(bos, 1, batch_size), Xt)
+ Z1 = vcat(fill(bos, 1, batch_size), Z1)
+
+ # 2) gap-wise masks & multipliers
+ transition_mask = FF.transition_mask_from_Xt_gapwise(P, Xt) # (2K+1, L+1, B)
+ edit_multiplier = FF.remaining_edits_gapwise(P, Zt, Z1, Xt) # (2K+1, L+1, B)
+ @assert size(transition_mask) == size(edit_multiplier)
+ @assert size(transition_mask, 2) == size(Xt, 1) + 1
+
+ # scheduler
+ den = 1f0 .- P.κ.(ts); den = max.(den, 1f-2)
+ scheduler_scaling = P.dκ.(ts) ./ den # length B
+
+ # 3) masked state → device
+ lmask = Xt .!= P.padding_token
+ cmask = trues(size(lmask))
+ Xt_ms = FF.MaskedState(FF.DiscreteState(P.k, Xt), cmask, lmask)
+
+ ts_d = to_dev(ts)
+ Xt_ms_d = to_dev(Xt_ms)
+ Tmask_d = to_dev(transition_mask)
+ Emult_d = to_dev(edit_multiplier)
+ sched_d = to_dev(reshape(Float32.(scheduler_scaling), 1, 1, :))
+
+ # 4) fwd + loss + update
+ loss, grad = Flux.withgradient(model) do m
+ M = m(ts_d, Xt_ms_d) # (2K+1, L+1, B)
+ # shape sanity
+ @assert size(M) == size(Tmask_d) == size(Emult_d)
+ FF.edit_loss_gapwise(P, M, Tmask_d, Emult_d, sched_d; eps=1f-8)
+ end
+ Flux.update!(opt_state, model, grad[1])
+
+ if step % print_every == 0
+ @info "train-gapwise" epoch step loss=Float32(loss)
+ end
+ end
+ end
+
+ return Functors.fmap(to_cpu, model)
+end
+
+# PM is the target probability model and K is the alphabet size
+K = PM.K
+println("K: ", K)
+
+P = FF.EditFlow(K; bos_token=0, impl="positionwise_reparam")
+
+model = EditFlowModel(; d=128, num_heads=8, nlayers=4, rff_dim=128, cond_dim=128, K=K)
+
+# Train; returned model is on CPU
+model = train_editflow!(P, model; epochs=10, steps_per_epoch=150, batch_size=256, lr=1f-3)
+
+
+
+
diff --git a/editflow_code/gapwise_notebook.ipynb b/editflow_code/gapwise_notebook.ipynb
new file mode 100644
index 0000000..4d36385
--- /dev/null
+++ b/editflow_code/gapwise_notebook.ipynb
@@ -0,0 +1,2554 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32m\u001b[1m Activating\u001b[22m\u001b[39m project at `~/dev/Flowfusion.jl/editflow_code`\n",
+ "\u001b[32m\u001b[1m Resolving\u001b[22m\u001b[39m package versions...\n",
+ "\u001b[32m\u001b[1m No Changes\u001b[22m\u001b[39m to `~/dev/Flowfusion.jl/editflow_code/Project.toml`\n",
+ "\u001b[32m\u001b[1m No Changes\u001b[22m\u001b[39m to `~/dev/Flowfusion.jl/editflow_code/Manifest.toml`\n",
+ "WARNING: redefinition of constant Main.AA20. This may fail, cause incorrect answers, or produce other errors.\n",
+ "WARNING: redefinition of constant Main.TOK2ID_AA. This may fail, cause incorrect answers, or produce other errors.\n",
+ "WARNING: redefinition of constant Main.PM. This may fail, cause incorrect answers, or produce other errors.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "plot_len_dist (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 50,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "using Pkg\n",
+ "Pkg.activate(@__DIR__)\n",
+ "Pkg.instantiate()\n",
+ "using Revise\n",
+ "\n",
+ "using Random\n",
+ "using Statistics\n",
+ "using Adapt\n",
+ "using Functors\n",
+ "using Flux\n",
+ "using Onion\n",
+ "using RandomFeatureMaps\n",
+ "using Zygote\n",
+ "\n",
+ "Pkg.develop(path=joinpath(@__DIR__, \"..\"))\n",
+ "using Flowfusion\n",
+ "const FF = Flowfusion\n",
+ "\n",
+ "include(joinpath(@__DIR__, \"prob_model.jl\"))\n",
+ "include(joinpath(@__DIR__, \"helper_funcs.jl\"))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "to_same_device (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import CUDA\n",
+ "\n",
+ "# Device helpers\n",
+ "const _gpu_enabled = try\n",
+ " CUDA.has_cuda()\n",
+ "catch\n",
+ " false\n",
+ "end\n",
+ "\n",
+ "to_dev(x) = _gpu_enabled ? Adapt.adapt(CUDA.CuArray, x) : x\n",
+ "to_cpu(x) = _gpu_enabled ? Adapt.adapt(Array, x) : x\n",
+ "# move x to the same device type as y (Array or CuArray)\n",
+ "to_same_device(x, y) = (_gpu_enabled && (y isa CUDA.CuArray)) ? Adapt.adapt(CUDA.CuArray, x) : Adapt.adapt(Array, x)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "make_minibatch (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Load PM target (brings PM and AA20) and minibatch\n",
+ "\n",
+ "function make_minibatch(B::Int, P::FF.EditFlow; rng=Random.default_rng())\n",
+ " K = P.k\n",
+ " x0s = Vector{FF.DiscreteState}(undef, B)\n",
+ " x1s = Vector{FF.DiscreteState}(undef, B)\n",
+ " for b in 1:B\n",
+ " # x1 from true PM\n",
+ " seq1 = sample(PM; rng=rng)\n",
+ " @assert all(1 .<= seq1 .<= K)\n",
+ " x1s[b] = FF.DiscreteState(K, seq1)\n",
+ " # x0: uniform tokens with random length in 1:10 (no BOS in x0)\n",
+ " L0 = rand(rng, 1:10)\n",
+ " seq0 = rand(rng, 1:K, L0)\n",
+ " x0s[b] = FF.DiscreteState(K, seq0)\n",
+ " end\n",
+ " ts = rand(rng, Float32, B)\n",
+ " return x0s, x1s, ts\n",
+ "end\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "gaps_from_sites_avg (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Gap-wise helpers\n",
+ "\n",
+ "gaps_from_sites_dup(H) = hcat(@view(H[:,1:1,:]), @view(H[:,2:end,:]), @view(H[:,end:end,:]))\n",
+ "\n",
+ "function gaps_from_sites_avg(H)\n",
+ " d, L, B = size(H)\n",
+ " if L == 1\n",
+ " middle = @view H[:, 1:0, :]\n",
+ " else\n",
+ " @views middle = 0.5f0 .* (H[:, 1:L-1, :] .+ H[:, 2:L, :])\n",
+ " end\n",
+ " # safer on GPU than hcat in some view combinations\n",
+ " return cat(@view(H[:, 1:1, :]), middle, @view(H[:, L:L, :]); dims=2) # d×(L+1)×B\n",
+ "end\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "EditFlowModel"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Model definition (gap-wise heads)\n",
+ "\n",
+ "struct EditFlowModel{L}\n",
+ " layers::L\n",
+ "end\n",
+ "Flux.@layer EditFlowModel\n",
+ "\n",
+ "function EditFlowModel(; d=128, num_heads=8, nlayers=6, rff_dim=128, cond_dim=128, K::Int)\n",
+ " embedding = Flux.Embedding(K + 2 => d)\n",
+ " time_embed = Flux.Chain(RandomFourierFeatures(1 => rff_dim, 1.0f0), Dense(rff_dim => cond_dim))\n",
+ " blocks = [Onion.AdaTransformerBlock(d, cond_dim, num_heads) for _ in 1:nlayers]\n",
+ " # split heads: ins on gaps, sub/del on sites\n",
+ " head_ins = Dense(d => K, bias=false) # gaps (L+1)\n",
+ " head_sub = Dense(d => K, bias=false) # sites (L)\n",
+ " head_del = Dense(d => 1, bias=false) # sites (L)\n",
+ " rope = RoPE(d ÷ num_heads, 4096)\n",
+ " return EditFlowModel((; embedding, time_embed, blocks, head_ins, head_sub, head_del, rope, K))\n",
+ "end\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Forward (returns logits of shape (2K+1, L+1, B))\n",
+ "function (model::EditFlowModel)(t, Xt_ms)\n",
+ " m = model.layers\n",
+ " X = FF.tensor(Xt_ms)\n",
+ " X = ndims(X) == 1 ? reshape(X, :, 1) : X\n",
+ " L, B = size(X)\n",
+ "\n",
+ " pmask = Zygote.@ignore FF.getlmask(Xt_ms)\n",
+ " Xp = X .+ 1 # embedding is 1-indexed\n",
+ " H = m.embedding(Xp) # d×L×B\n",
+ "\n",
+ " t = ndims(t) == 0 ? fill(Float32(t), B) : Float32.(t)\n",
+ " cond = m.time_embed(reshape(t, 1, B))\n",
+ "\n",
+ " cond = to_same_device(cond, H)\n",
+ " pmask = Zygote.@ignore to_same_device(pmask, H)\n",
+ " rope = Zygote.@ignore to_same_device(m.rope[1:L], H)\n",
+ "\n",
+ " for blk in m.blocks\n",
+ " H = blk(H; cond, rope, kpad_mask=pmask) # d×L×B\n",
+ " end\n",
+ "\n",
+ " # gap embeddings & heads\n",
+ " Hg = gaps_from_sites_avg(H) # d×(L+1)×B\n",
+ " ins = m.head_ins(Hg) # K×(L+1)×B\n",
+ " sub = m.head_sub(H) # K×L×B\n",
+ " del = m.head_del(H) # 1×L×B\n",
+ "\n",
+ " # pad sub/del with a zero FIRST column to align sites with gaps (site i ↔ gap i+1)\n",
+ " @views sub_pad = cat(sub[:, 1:1, :].*0, sub; dims=2)\n",
+ " @views del_pad = cat(del[:, 1:1, :].*0, del; dims=2)\n",
+ "\n",
+ " return vcat(ins, sub_pad, del_pad) # (2K+1)×(L+1)×B\n",
+ "end\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "train_editflow! (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Training loop (gap-wise)\n",
+ "function train_editflow!(P::FF.EditFlow,\n",
+ " model;\n",
+ " epochs::Int=1,\n",
+ " steps_per_epoch::Int=100,\n",
+ " batch_size::Int=64,\n",
+ " lr::Float32=1f-2,\n",
+ " seed::Int=42,\n",
+ " print_every::Int=25)\n",
+ "\n",
+ " rng = Random.MersenneTwister(seed)\n",
+ " Random.seed!(seed)\n",
+ "\n",
+ " # device\n",
+ " model = Functors.fmap(to_dev, model)\n",
+ " opt_state = Flux.setup(Flux.Adam(lr), model)\n",
+ "\n",
+ " for epoch in 1:epochs\n",
+ " for step in 1:steps_per_epoch\n",
+ " # 1) minibatch\n",
+ " x0s, x1s, ts = make_minibatch(batch_size, P; rng=rng)\n",
+ " Z0, Z1 = FF.align_and_batch(P, x0s, x1s)\n",
+ " Zt, Xt = FF.interpolate_Z_elementwise(P, Z0, Z1, ts)\n",
+ "\n",
+ " # prepend BOS to all streams\n",
+ " bos = P.bos_token\n",
+ " Zt = vcat(fill(bos, 1, batch_size), Zt)\n",
+ " Xt = vcat(fill(bos, 1, batch_size), Xt)\n",
+ " Z1 = vcat(fill(bos, 1, batch_size), Z1)\n",
+ "\n",
+ " # 2) gap-wise masks & multipliers\n",
+ " transition_mask = FF.transition_mask_from_Xt(P, Xt) # (2K+1, L+1, B)\n",
+ " edit_multiplier = FF.remaining_edits(P, Zt, Z1, Xt) # (2K+1, L+1, B)\n",
+ " @assert size(transition_mask) == size(edit_multiplier)\n",
+ " @assert size(transition_mask, 2) == size(Xt, 1) + 1\n",
+ "\n",
+ " # scheduler\n",
+ " den = 1f0 .- P.κ.(ts); den = max.(den, 1f-3)\n",
+ " scheduler_scaling = P.dκ.(ts) ./ den # length B\n",
+ "\n",
+ " # 3) masked state → device\n",
+ " lmask = Xt .!= P.padding_token\n",
+ " cmask = trues(size(lmask))\n",
+ " Xt_ms = FF.MaskedState(FF.DiscreteState(P.k, Xt), cmask, lmask)\n",
+ "\n",
+ " ts_d = to_dev(ts)\n",
+ " Xt_ms_d = to_dev(Xt_ms)\n",
+ " Tmask_d = to_dev(transition_mask)\n",
+ " Emult_d = to_dev(edit_multiplier)\n",
+ " sched_d = to_dev(reshape(Float32.(scheduler_scaling), 1, 1, :))\n",
+ "\n",
+ " # 4) fwd + loss + update\n",
+ " loss, grad = Flux.withgradient(model) do m\n",
+ " M = m(ts_d, Xt_ms_d) # (2K+1, L+1, B)\n",
+ " # shape sanity\n",
+ " @assert size(M) == size(Tmask_d) == size(Emult_d)\n",
+ " FF.edit_loss(P, M, Tmask_d, Emult_d, sched_d; eps=1f-8)\n",
+ " end\n",
+ " Flux.update!(opt_state, model, grad[1])\n",
+ "\n",
+ " if step % print_every == 0\n",
+ " @info \"train-gapwise\" epoch step loss=Float32(loss)\n",
+ " end\n",
+ " end\n",
+ " end\n",
+ "\n",
+ " return Functors.fmap(to_cpu, model)\n",
+ "end\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "K: 20\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Init and (optional) train\n",
+ "K = PM.K\n",
+ "println(\"K: \", K)\n",
+ "\n",
+ "#alpha_exponent = 3.0f0\n",
+ "P = FF.EditFlow(K; bos_token=0)\n",
+ "\n",
+ "model = EditFlowModel(; d=128, num_heads=8, nlayers=4, rff_dim=128, cond_dim=128, K=K)\n",
+ "nothing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.042582143f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.051574655f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.040682144f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03176706f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04301755f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.047801197f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03131537f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.025587518f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.040904406f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.051154993f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.038289122f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.040351614f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.026478458f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.052063473f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.02528245f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.049059216f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.045367613f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.0028266194f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.021066202f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.033532213f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.038553096f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04018806f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04420363f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.046000928f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.032879625f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04658139f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.033914283f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.044273406f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.053182956f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.051027846f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.043055456f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.0364012f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.033408396f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03524468f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.046422645f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04579732f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.056083847f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.02740052f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.036104627f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.040208943f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.029784188f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.031641074f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04278183f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.045025032f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.05684315f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.038700566f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.037000623f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.047675125f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03150429f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.05844419f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.026736975f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.029746149f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.035454303f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.049596604f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.02378349f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.043790184f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03123553f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04705058f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.037064686f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04445659f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.051929086f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.02436034f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.022771671f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.044702657f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.045137305f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.041192025f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.029864756f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.038833927f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.041693743f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.023466088f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.046086684f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04445929f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.05211716f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.036415737f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.035114527f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.05363507f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.031739093f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.034422822f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.045822714f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03548678f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04809461f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.040762138f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.022662576f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.034381583f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.02072578f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04044771f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.043389153f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.033454575f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.02941765f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03229374f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.030144595f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04410399f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.014296858f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.045350045f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.01808934f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.037948772f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.040850688f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.047814716f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03897f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.039783433f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.046952866f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.050687954f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.027305104f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.034051463f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04046725f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.035221793f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.015419234f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.032407716f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03907629f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04017909f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.042050876f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.06224002f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.032651253f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.034311622f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.050489604f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.01942078f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04674881f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.031366825f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.030259585f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.020899203f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03374313f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.026232053f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03176389f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04193217f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.039803006f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.0016402872f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.034368455f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.044954106f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.049633063f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.051525604f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.039586365f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.056642942f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04322856f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03591118f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.039617103f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.033589836f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.045645736f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04366725f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04571055f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04562773f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.037686266f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04003264f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03928131f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04958935f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.044996995f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.017311074f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03966754f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.045690224f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.045542553f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.043954458f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.02862454f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.027264602f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.040636502f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04547987f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03184007f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04121754f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.046075374f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.042077787f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.02793716f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.047131248f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.031824037f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.049015008f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03991092f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.041570164f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.031357594f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.023561478f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.015707273f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04599042f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.035853077f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.043617148f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03299003f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.036430042f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.041398343f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.004706221f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.037196025f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.05408126f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.040643804f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.028125618f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.039175157f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.046623144f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03860229f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04580462f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.026982484f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.01907624f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.033030905f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04366968f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.019387946f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.041732203f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.038875856f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.033090573f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03511978f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.049247496f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.034137912f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.025268145f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04250137f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.035059683f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.020794798f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.045273773f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.028975278f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03930358f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.025179597f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.026635196f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.038387585f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03936877f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.041131224f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.037794285f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.031822115f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.0286325f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03954857f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.034084857f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.032853782f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.015939223f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.038161334f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.044323556f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.021606334f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.024146589f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.046376158f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04099659f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.0414833f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.027421055f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.05385631f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03387799f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03678792f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03864511f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.031337883f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.035680942f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.028983377f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.022545489f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.033676624f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04305654f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.018965978f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03362689f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.026229195f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.033605922f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.02983573f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.040450968f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.042170297f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.014729615f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.034614924f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03768231f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.032214597f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04054915f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.05015796f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.046196725f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.034610253f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.024003044f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03458842f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04691034f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.035118952f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.028442157f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.037072852f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.017393941f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03015057f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.036646903f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.028592978f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.047810607f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04006023f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.029726554f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.038993478f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.036169015f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.035097905f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.036238242f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.013541234f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.040243626f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.042542692f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.032190338f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03143801f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.011104978f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04108549f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.021270731f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.030110102f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.04132489f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.014701558f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.038577426f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.013419241f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.036465287f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.0129490765f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.042824633f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03329256f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.038794056f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.042093784f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.039231405f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.016737167f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03380588f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.032274134f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03857955f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.035523675f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03874997f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.032295678f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.031139178f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03532363f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.034379814f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.021528132f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.03387867f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.015707625f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.027938733f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.031691514f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.035845827f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.038091443f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain-gapwise\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 0.042518776f0\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "EditFlowModel(\n",
+ " Embedding(22 => 128), \u001b[90m# 2_816 parameters\u001b[39m\n",
+ " Chain(\n",
+ " RandomFourierFeatures{Float32, Matrix{Float32}}(Float32[-3.4897678 12.4698105 … 6.522122 -6.1615295]),\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " [\n",
+ " TransformerBlock(\n",
+ " Attention(\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " identity,\n",
+ " identity,\n",
+ " Onion.var\"#32#34\"(),\n",
+ " 16,\n",
+ " 16,\n",
+ " 8,\n",
+ " 8,\n",
+ " ),\n",
+ " StarGLU(\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(512 => 128; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " NNlib.swish,\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.9485438, 0.9517056, 0.91631305, 0.93132836, 0.8952846, 0.9048501, 0.9849782, 0.97568834, 0.9770145, 0.9543596 … 0.95225495, 0.9640162, 0.96162915, 0.95250344, 0.9518388, 0.93140155, 0.95342207, 0.9564777, 0.9472097, 0.9704547], Float32[-0.0025767968, -0.005531265, -0.057881825, -0.0020387296, -0.011449936, -0.011851602, 0.002365834, -0.011678252, -0.014808152, 0.009572189 … 0.0022765587, -0.020436158, -0.015536882, 0.0023218014, 7.319261f-5, 0.00029178447, 0.010181397, -0.008527656, 0.00060043234, 0.009188443], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.92454726, 1.0295289, 0.98453337, 1.0147902, 0.9616023, 0.9640124, 0.96462286, 0.95139396, 0.968303, 1.0446378 … 0.97232085, 0.9544489, 0.94105947, 1.01344, 0.97555476, 0.9792089, 1.0025232, 0.95133525, 0.9576563, 1.0143557], Float32[0.0027763864, -0.00047614917, -0.015428355, 0.021620063, -0.017400159, 0.005237624, -0.015351549, -0.012930975, -0.02459172, -0.017333504 … -0.010810377, -0.015740385, -0.009417565, 0.008541248, 0.00019140948, -0.005366987, -0.0088723535, 0.005091113, 0.0075245644, 0.026632788], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " identity,\n",
+ " ),\n",
+ " TransformerBlock(\n",
+ " Attention(\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " identity,\n",
+ " identity,\n",
+ " Onion.var\"#32#34\"(),\n",
+ " 16,\n",
+ " 16,\n",
+ " 8,\n",
+ " 8,\n",
+ " ),\n",
+ " StarGLU(\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(512 => 128; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " NNlib.swish,\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.9579994, 0.9476913, 0.9554934, 0.9522036, 0.9599546, 0.9532042, 0.9783103, 0.9622565, 0.9757522, 0.9626176 … 0.97050655, 0.87086666, 0.97092474, 0.9631059, 0.97171074, 0.96746814, 0.97048044, 0.9612386, 0.9372323, 0.97935456], Float32[-0.01209553, -0.010203717, -0.010120125, -0.0020847165, 0.011635498, 0.00048273787, -0.007999752, 6.894706f-5, -0.021175401, -0.0140660945 … -0.0046253526, -0.0031032036, -0.012982736, 0.03514903, 0.0010869585, -0.013199106, -0.0038628152, -0.002296497, 0.02071869, 0.011734843], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.985139, 0.9774747, 1.0477108, 0.98501253, 0.98935205, 0.9846256, 0.9944955, 0.9971468, 0.98940533, 1.011992 … 0.94710165, 1.0043437, 1.0460154, 1.0454555, 0.97295123, 1.0074944, 1.0127513, 1.004472, 1.0336586, 0.98678666], Float32[0.009985156, 0.0148912845, 0.0007550778, 0.013916604, -0.0054427087, -0.0115058925, 0.016221523, 0.00095696026, -0.010937472, -0.013677097 … -0.013091496, 0.0013413352, -0.013482028, 0.00044459678, -0.0081370445, -0.02209245, -0.015265387, -0.00967068, 0.0014689483, 0.0148694515], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " identity,\n",
+ " ),\n",
+ " TransformerBlock(\n",
+ " Attention(\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " identity,\n",
+ " identity,\n",
+ " Onion.var\"#32#34\"(),\n",
+ " 16,\n",
+ " 16,\n",
+ " 8,\n",
+ " 8,\n",
+ " ),\n",
+ " StarGLU(\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(512 => 128; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " NNlib.swish,\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.9705351, 1.0028347, 0.9740206, 0.969771, 0.9363874, 0.9498614, 0.9676518, 0.9835581, 0.9829488, 0.9746027 … 1.003019, 0.9874021, 0.9921866, 0.95297444, 0.9610126, 0.98681086, 0.98136693, 0.9986948, 0.9301441, 0.9700858], Float32[-0.0007444257, -0.029364536, -0.0039363527, -0.000186554, -0.005866427, -0.009861192, -0.002090479, -0.0008000579, 0.008326826, -0.019328756 … 0.008347517, 0.00077981426, -0.0090034325, 0.0023879372, -0.014871054, -0.02297807, 0.0025592789, -0.0046469, 0.011648754, 0.023409285], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[1.0317453, 1.0391508, 1.0567257, 0.9506647, 1.0375222, 0.95721054, 0.99698806, 1.0370752, 1.0125376, 1.0758225 … 1.0426638, 1.0156405, 1.1337172, 0.98136586, 0.99563056, 1.0322379, 1.0522087, 1.0129474, 1.0208626, 0.98538053], Float32[-0.021828685, 0.016727783, -0.014718995, 0.017482841, 0.0045893975, -0.029517096, -0.018830335, 0.0072353478, -0.014692814, -0.0018668761 … 0.0047888295, -0.013738959, -0.02322664, -0.010887477, -0.01950592, -0.007834701, -0.00089994207, 0.006822281, 0.005051074, 0.02711348], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " identity,\n",
+ " ),\n",
+ " TransformerBlock(\n",
+ " Attention(\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " identity,\n",
+ " identity,\n",
+ " Onion.var\"#32#34\"(),\n",
+ " 16,\n",
+ " 16,\n",
+ " 8,\n",
+ " 8,\n",
+ " ),\n",
+ " StarGLU(\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(512 => 128; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " NNlib.swish,\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.9345033, 1.0022833, 0.9521148, 0.9575872, 0.8875572, 1.0460182, 0.9967136, 1.0072346, 0.9725838, 1.0341343 … 0.99518543, 0.98995036, 1.0307875, 1.0181413, 1.0229013, 0.9724673, 0.96660626, 1.0165946, 0.8986929, 0.9959466], Float32[-0.043202747, 0.004392449, 0.005028139, 0.033483025, -0.0054316483, -0.005291531, -0.022794988, -0.014869429, 0.013712466, -0.023869297 … 0.020763112, -0.008715548, 0.0013027693, 0.0014196149, 0.0013407562, -0.026271563, 0.013075633, -0.009605205, 0.0029699882, 0.005611375], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[1.0118818, 1.0250593, 1.0372803, 0.9790395, 1.0693208, 1.015231, 1.0267678, 1.0174215, 1.0019836, 1.0813804 … 1.043039, 1.0293376, 1.194062, 0.99059707, 1.0406202, 0.9874126, 1.0517399, 1.0073861, 1.0147883, 1.0668176], Float32[0.0047795386, -0.0060442886, -0.008400408, 0.029895015, -0.015548748, -0.022354802, -0.019373512, 0.023813745, -0.018122695, -0.007683282 … 0.005050846, -0.027234506, -0.0535788, -0.02090459, -0.0033181268, -0.032578275, 0.02245798, 0.00907158, 0.009862278, 0.0163724], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " identity,\n",
+ " ),\n",
+ " ],\n",
+ " Dense(128 => 20; bias=false), \u001b[90m# 2_560 parameters\u001b[39m\n",
+ " Dense(128 => 20; bias=false), \u001b[90m# 2_560 parameters\u001b[39m\n",
+ " Dense(128 => 1; bias=false), \u001b[90m# 128 parameters\u001b[39m\n",
+ " RoPE{Matrix{Float32}}(Float32[1.0 0.5403023 … -0.87528455 -0.065975994; 1.0 0.95041525 … 0.9552474 0.8159282; … ; 1.0 0.9999995 … -0.57972294 -0.5789079; 1.0 0.99999994 … 0.2726629 0.27235872], Float32[0.0 0.84147096 … -0.48360822 -0.9978212; 0.0 0.3109836 … 0.29580808 0.5781532; … ; 0.0 0.0009999999 … -0.8148137 -0.8153929; 0.0 0.0003162278 … 0.9621096 0.9621958]),\n",
+ " 20,\n",
+ ") \u001b[90m # Total: 82 trainable arrays, \u001b[39m1_339_392 parameters,\n",
+ "\u001b[90m # plus 3 non-trainable, 65_600 parameters, summarysize \u001b[39m5.364 MiB."
+ ]
+ },
+ "execution_count": 53,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Train; returned model is on CPU\n",
+ "model = train_editflow!(P, model; epochs=50, steps_per_epoch=150, batch_size=256, lr=3f-4)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "=== Gap-wise model samples (20) ===\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[33m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[33m\u001b[1mWarning: \u001b[22m\u001b[39mLayer with Float32 parameters got Float64 input.\n",
+ "\u001b[33m\u001b[1m│ \u001b[22m\u001b[39m The input will be converted, but any earlier layers may be very slow.\n",
+ "\u001b[33m\u001b[1m│ \u001b[22m\u001b[39m layer = Dense(128 => 128; bias=false) \u001b[90m# 16_384 parameters\u001b[39m\n",
+ "\u001b[33m\u001b[1m│ \u001b[22m\u001b[39m summary(x) = \"128×2 Matrix{Float64}\"\n",
+ "\u001b[33m\u001b[1m└ \u001b[22m\u001b[39m\u001b[90m@ Flux ~/.julia/packages/Flux/uRn8o/src/layers/stateless.jl:60\u001b[39m\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[1] EVHLQESGGGL\n",
+ "[2] QVQLVQSGAGVKPSTNTVPVSVE\n",
+ "[3] QVQLVQSKAEVKKHGGASVK\n",
+ "[4] EVQLVSYGGRVKK\n",
+ "[5] QVQLQQGYGAKKKVK\n",
+ "[6] QVQLVQSGAE\n",
+ "[7] QVQLVQSGAEVK\n",
+ "[8] QVQLVQSGAAVE\n",
+ "[9] VVQLVESGGGLVK\n",
+ "[10] QVQLVQSSGAEVSKPWGA\n",
+ "[11] QVQLQECGKGLNQH\n",
+ "[12] LVQLVQGSAEVKKK\n",
+ "[13] QVQLVQSGAEVKKPGASVK\n",
+ "[14] QVLVQSGAEVMVKPGA\n",
+ "[15] QQVKVQSGAEVKVKPG\n",
+ "[16] QVLLVQSGAQV\n",
+ "[17] EVQVESSGGGPTSP\n",
+ "[18] QVQLQESGGLVEKP\n",
+ "[19] QVQLQESGGGVKVPK\n",
+ "[20] EVQLVESGGGLVL\n"
+ ]
+ }
+ ],
+ "source": [
+ "function sample_gapwise_strings(P, model; N=20, ts=0.0f0:0.01f0:1.0f0, rng=Random.default_rng())\n",
+ " out = String[]\n",
+ " tg = collect(ts)\n",
+ " for _ in 1:N\n",
+ " x0s, _, _ = make_minibatch(1, P; rng=rng) # ta ett x0 per sample\n",
+ " x0 = x0s[1]\n",
+ " # lägg till BOS längst fram (träningen hade alltid BOS i pos 1)\n",
+ " x0_tokens = collect(FF.tensor(x0))\n",
+ " x0_bos = FF.DiscreteState(P.k, vcat(P.bos_token, x0_tokens))\n",
+ "\n",
+ " x = FF.rollout_gapwise(P, model, x0_bos, tg; rng=rng)\n",
+ " seq = collect(FF.tensor(x))\n",
+ " seq = filter(t -> t != P.bos_token && t != P.padding_token, seq)\n",
+ " push!(out, String(collect(AA20[seq])))\n",
+ " end\n",
+ " return out\n",
+ "end\n",
+ "\n",
+ "# Användning\n",
+ "rng = Random.MersenneTwister(42)\n",
+ "ts = 0.0f0:0.02f0:1.0f0\n",
+ "println(\"\\n=== Gap-wise model samples (20) ===\")\n",
+ "for (i, s) in enumerate(sample_gapwise_strings(P, model; N=20, ts=ts, rng=rng))\n",
+ " println(\"[\", i, \"] \", s)\n",
+ "end"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "500-element Vector{String}:\n",
+ " \"EVQLVESGGGLQLP\"\n",
+ " \"QVQLVESGGGVVQPGRS\"\n",
+ " \"QVQLKQSGAEVKKPSGAAVE\"\n",
+ " \"VQLVESGGGDVKPGGSLRL\"\n",
+ " \"QVQLQESGPGGLVK\"\n",
+ " \"QVQLVQSGAEVKK\"\n",
+ " \"EVQLVESGGGLVKPSGT\"\n",
+ " \"QVQVYQVGSETQKRP\"\n",
+ " \"EVQLLESGGVLVQP\"\n",
+ " \"QVQLVQSGAEVWK\"\n",
+ " \"EVQLVDSGGGLV\"\n",
+ " \"QVELRESGRVMVHPRG\"\n",
+ " \"VQLVQSGAEVK\"\n",
+ " ⋮\n",
+ " \"QVQVLEGGGGVVQPSRLSAK\"\n",
+ " \"EVQLVESGGGLVQP\"\n",
+ " \"QVQLVQSGARVKKPEAS\"\n",
+ " \"QVEGQQMGAGVKRG\"\n",
+ " \"EVYLVQPYESGYLQTGSLFL\"\n",
+ " \"QVMLQESGPGLVVK\"\n",
+ " \"EVQLVESGGGLVQPGKS\"\n",
+ " \"QVQLVQSGAEVKKPGSSV\"\n",
+ " \"EVQLVASGHVLVQTLS\"\n",
+ " \"QVQLVQSGAEVVKPSET\"\n",
+ " \"QVQLVQSQAETKKP\"\n",
+ " \"QVQLVQSGAVKKPGASVK\""
+ ]
+ },
+ "execution_count": 81,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Sampling and Levenshtein plot\n",
+ "using Random\n",
+ "n = 500\n",
+ "\n",
+ "model_strings = sample_gapwise_strings(P, model; N=n, ts=ts, rng=rng)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 83,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ],
+ "text/html": [
+ "
"
+ ]
+ },
+ "execution_count": 83,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "\n",
+ "n2 = 500\n",
+ "# Build training strings from PM parents (the data PM was built on)\n",
+ "train_states = [FF.DiscreteState(P.k, sample(PM; rng=Random.MersenneTwister(1000+i))) for i in 1:n2]\n",
+ "train_strings = [state_to_PM_string(P, s) for s in train_states]\n",
+ "\n",
+ "# Independent validation strings sampled from PM\n",
+ "val_states = [FF.DiscreteState(P.k, sample(PM; rng=Random.MersenneTwister(1000+i+2*n2))) for i in 1:n2]\n",
+ "val_strings = [state_to_PM_string(P, s) for s in val_states]\n",
+ "#val_strings = [s for s in val_strings if !(s in train_strings)]\n",
+ "\n",
+ "# Plot normalized Levenshtein vs. training set (distinct val/train)\n",
+ "plt = plot_lev_dist(\"model_vs_true\", val_strings, model_strings, train_strings; title=\"Model vs True (normalized Levenshtein)\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "100000-element Vector{String}:\n",
+ " \"EVQDVESGGGLV\"\n",
+ " \"EVQSVESGGGLVKPGGSLR\"\n",
+ " \"QVQLVPSGAAVKKP\"\n",
+ " \"SVQLVQSGAEVKKP\"\n",
+ " \"QVQLVQSWADCKKPLASVEV\"\n",
+ " \"EVQLVQSGGGLVQTGGS\"\n",
+ " \"EVQLLESGGGLVQ\"\n",
+ " \"QPQLVKPGDEVKKPGASVKV\"\n",
+ " \"FVTLRDSVALV\"\n",
+ " \"QITLKESDPTNVKP\"\n",
+ " \"QVQLVKSGAEV\"\n",
+ " \"QVQLVQSGPEVKKP\"\n",
+ " \"QVQKVQSGAEVKRP\"\n",
+ " ⋮\n",
+ " \"QVQLQNSGPGLVK\"\n",
+ " \"QVQLQQSGPGLVKPSQTLSL\"\n",
+ " \"QEKGGQEGGKV\"\n",
+ " \"QVQLVQSGAEVKKP\"\n",
+ " \"QVQLVESGGGVVQPGRSLR\"\n",
+ " \"QVQLVQSGAAV\"\n",
+ " \"QVQLVESGGG\"\n",
+ " \"EVNLVESMGGL\"\n",
+ " \"QVQLVQSGIAVKKPGA\"\n",
+ " \"QVQLPQSGAEVKKPGASVK\"\n",
+ " \"QVQLQESGPGLVK\"\n",
+ " \"QVQLVQSGAEVKKPGS\""
+ ]
+ },
+ "execution_count": 78,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model_samples_any = sample_gen_n(P, model; n=10, ts=0f0:0.01f0:1f0, rng=Random.MersenneTwister(42))\n",
+ "model_final_states = map(final_state_from_gen, model_samples_any)\n",
+ "model_strings_old = [state_to_PM_string(P, s) for s in model_final_states]\n",
+ "\n",
+ "n3 = 500\n",
+ "# Build training strings from PM parents (the data PM was built on)\n",
+ "train_states = [FF.DiscreteState(P.k, sample(PM; rng=Random.MersenneTwister(1000+i))) for i in 1:n3]\n",
+ "train_strings = [state_to_PM_string(P, s) for s in train_states]\n",
+ "\n",
+ "# Independent validation strings sampled from PM\n",
+ "val_states = [FF.DiscreteState(P.k, sample(PM; rng=Random.MersenneTwister(1000+i+2*n3))) for i in 1:n3]\n",
+ "val_strings = [state_to_PM_string(P, s) for s in val_states]\n",
+ "#val_strings = [s for s in val_strings if !(s in train_strings)]\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(\"Hej\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ],
+ "text/html": [
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ],
+ "text/html": [
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "pAA = plot_AA_dist(\"model_vs_true_AA\", val_strings, model_strings; title=\"AA frequency: Natural vs Generated\")\n",
+ "display(pAA)\n",
+ "\n",
+ "pLen = plot_len_dist(\"model_vs_true_len\", val_strings, model_strings; title=\"Length distribution: Natural vs Generated\")\n",
+ "display(pLen)\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Julia 1.11.6",
+ "language": "julia",
+ "name": "julia-1.11"
+ },
+ "language_info": {
+ "file_extension": ".jl",
+ "mimetype": "application/julia",
+ "name": "julia",
+ "version": "1.11.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/editflow_code/helper_funcs.jl b/editflow_code/helper_funcs.jl
new file mode 100644
index 0000000..6a27246
--- /dev/null
+++ b/editflow_code/helper_funcs.jl
@@ -0,0 +1,180 @@
+using StringDistances, Statistics, ProgressMeter, Plots, StatsBase
+
+
+# Sampling helpers
+function state_to_PM_string(P::FF.EditFlow, st::FF.DiscreteState)
+ xs = FF.tensor(st)
+ pad = hasfield(typeof(P), :padding_token) ? P.padding_token : -1
+ io = IOBuffer()
+ @inbounds for tok in xs
+ if tok == 0
+ write(io, '>')
+ elseif tok == pad
+ continue
+ else
+ write(io, AA20[tok])
+ end
+ end
+ return String(take!(io))
+end
+
+states_to_PM_strings(P::FF.EditFlow, sts::Vector{<:FF.DiscreteState}) =
+ [state_to_PM_string(P, s) for s in sts]
+
+function sample_gen_n(P::FF.EditFlow, model; n::Int=10, ts=0f0:0.01f0:1f0, rng=Random.default_rng())
+ gens = Vector{Any}(undef, n)
+ for i in 1:n
+ x0s, _, _ = make_minibatch(1, P; rng=rng)
+ bos = P.bos_token
+ toks = collect(FF.tensor(x0s[1]))
+ x0_bos = FF.DiscreteState(P.k, vcat([bos], toks))
+ res = FF.gen(P, x0_bos, model, ts)
+ # Strip BOS token(s) from outputs
+ strip_bos_state(st::FF.DiscreteState) = begin
+ xs = collect(FF.tensor(st))
+ if !isempty(xs) && xs[1] == bos
+ FF.DiscreteState(P.k, xs[2:end])
+ else
+ st
+ end
+ end
+ if res isa FF.DiscreteState
+ gens[i] = strip_bos_state(res)
+ elseif res isa AbstractVector{<:FF.DiscreteState}
+ gens[i] = strip_bos_state.(res)
+ else
+ gens[i] = res
+ end
+ end
+ return gens
+end
+
+function sample_gen_10_strings(P::FF.EditFlow, model; ts=0f0:0.01f0:1f0)
+ outs = sample_gen_n(P, model; n=20, ts=ts)
+ return map(outs) do o
+ if o isa FF.DiscreteState
+ state_to_PM_string(P, o)
+ elseif o isa AbstractVector{<:FF.DiscreteState}
+ states_to_PM_strings(P, o)
+ else
+ o
+ end
+ end
+end
+
+final_state_from_gen(res) = res isa FF.DiscreteState ? res : (res isa AbstractVector{<:FF.DiscreteState} ? last(res) : res)
+
+function to_aa_tokens(P::FF.EditFlow, st::FF.DiscreteState)
+ xs = FF.tensor(st)
+ pad = hasfield(typeof(P), :padding_token) ? P.padding_token : -1
+ toks = Vector{Int}()
+ @inbounds for tok in xs
+ if tok == 0 || tok == pad
+ continue
+ else
+ push!(toks, Int(tok))
+ end
+ end
+ return toks
+end
+
+function min_lev_to_set(queries::Vector{String}, refs::Vector{String}; normalize::Bool=true)
+ d = Levenshtein()
+ ref_lens = length.(refs)
+ out = Vector{Float64}(undef, length(queries))
+ @showprogress for (i, q) in enumerate(queries)
+ Lq = length(q)
+ best = typemax(Int); best_Lr = 1
+ for (r, Lr) in zip(refs, ref_lens)
+ lb = abs(Lq - Lr)
+ lb >= best && continue
+ dist = evaluate(d, q, r)
+ if dist < best
+ best = dist; best_Lr = Lr
+ best == 0 && break
+ end
+ end
+ out[i] = normalize ? best / max(Lq, best_Lr) : best
+ end
+ return out
+end
+
+function plot_lev_dist(name, val_seqs, gen_seqs, train_seqs; title="Novelty vs. training set", max_seqs=1000)
+ sample_if_large(seqs, n=max_seqs) = length(seqs) > n ? rand(seqs, n) : seqs
+
+ val_sample = sample_if_large(val_seqs)
+ gen_sample = sample_if_large(gen_seqs)
+ train_sample = sample_if_large(train_seqs)
+
+ val_lev = min_lev_to_set(val_sample, train_sample; normalize=true)
+ gen_lev = min_lev_to_set(gen_sample, train_sample; normalize=true)
+
+ # Robust bins even when all values are identical
+ minv = minimum([minimum(val_lev), minimum(gen_lev)])
+ maxv = maximum([maximum(val_lev), maximum(gen_lev)])
+ bins = (minv == maxv) ? range(0.0, 1.0, length=25) : range(minv, maxv, length=25)
+ p = histogram(val_lev; normalize=:probability, alpha=0.5, label="Natural",
+ bins=bins, xlabel="Min normalized Levenshtein", ylabel="Density", title=title)
+ histogram!(gen_lev; normalize=:probability, alpha=0.5, label="Generated", bins=bins)
+
+ #println("val_lev: ", val_lev)
+ #println("gen_lev: ", gen_lev)
+
+ #path = joinpath(@__DIR__, "figures", "$(name).pdf")
+ #mkpath(dirname(path))
+ #savefig(p, path)
+ #savefig(p, "/figures/$(name).pdf")
+ return p
+end
+
+function AA_counts(seqs)
+ counts = countmap(Iterators.flatten(seqs))
+ labels = sort!(collect(keys(counts)))
+ return labels, counts
+end
+
+function plot_AA_dist(name, real_seqs, gen_seqs; title="Normalized AA frequencies")
+ # Always show the full alphabet and filter invalid chars
+ labels = collect(AA20)
+ alphabet = Set(labels)
+ r_counts = countmap(Iterators.flatten((ch for s in real_seqs for ch in s if ch in alphabet)))
+ g_counts = countmap(Iterators.flatten((ch for s in gen_seqs for ch in s if ch in alphabet)))
+ r = Float64[get(r_counts, c, 0) for c in labels]
+ g = Float64[get(g_counts, c, 0) for c in labels]
+ rs = sum(r); r = rs > 0 ? r ./ rs : fill(0.0, length(r))
+ gs = sum(g); g = gs > 0 ? g ./ gs : fill(0.0, length(g))
+ # Grouped bars without unsupported bar_position
+ p = bar(string.(labels), [r g];
+ label=["Natural" "Generated"], xlabel="AA", ylabel="Proportion",
+ title=title, alpha=0.5)
+ return p
+end
+
+function len_dist(seqs; fig_name="len_dist.pdf", sort_labels=true)
+ isempty(seqs) && return (Int[], Float64[])
+ # Count only AA20 letters (ignore BOS '>' and any non-AA chars)
+ alphabet = Set(AA20)
+ lens = [count(ch -> ch in alphabet, s) for s in seqs]
+ cm = countmap(lens) # how many seqs of each length
+ Ls = collect(keys(cm))
+ sort_labels && sort!(Ls)
+ props = [cm[L] for L in Ls] ./ max(length(lens), 1)
+ return Ls, props
+end
+function plot_len_dist(name, real_seqs, gen_seqs; title="Sequence length distribution")
+ real_labels, real_props = len_dist(real_seqs)
+ gen_labels, gen_props = len_dist(gen_seqs)
+
+ # Build unified x-axis and zero-fill missing bins
+ all_labels = sort!(union(real_labels, gen_labels))
+ rmap = Dict(real_labels .=> real_props)
+ gmap = Dict(gen_labels .=> gen_props)
+ r = [get(rmap, L, 0.0) for L in all_labels]
+ g = [get(gmap, L, 0.0) for L in all_labels]
+
+ p = bar(string.(all_labels), [r g];
+ label=["Natural" "Generated"], xlabel="Length", ylabel="Proportion",
+ title=title, alpha=0.5)
+ #savefig(p, "../img/unconditional/len_dist_$(name).pdf")
+ return p
+end
\ No newline at end of file
diff --git a/editflow_code/jack.jl b/editflow_code/jack.jl
new file mode 100644
index 0000000..bd7ea17
--- /dev/null
+++ b/editflow_code/jack.jl
@@ -0,0 +1,200 @@
+using Pkg
+Pkg.activate(".")
+ENV["CUDA_VISIBLE_DEVICES"] = 1
+using Revise, Random, Statistics, Adapt, Functors, Flux, Onion, RandomFeatureMaps, Zygote, CannotWaitForTheseOptimisers, CodecZlib, CSV, LearningSchedules, DataFrames, Distributions
+using Flowfusion
+const FF = Flowfusion
+include("scripts/model_training/editflows/prob_model.jl")
+include("scripts/model_training/editflows/helper_funcs.jl")
+include("scripts/misc/data_loading.jl")
+import CUDA
+
+# Device helpers
+const _gpu_enabled = try
+ CUDA.has_cuda()
+catch
+ false
+end
+
+to_dev(x) = _gpu_enabled ? Adapt.adapt(CUDA.CuArray, x) : x
+to_cpu(x) = _gpu_enabled ? Adapt.adapt(Array, x) : x
+to_same_device(x, y) = (_gpu_enabled && (y isa CUDA.CuArray)) ? Adapt.adapt(CUDA.CuArray, x) : Adapt.adapt(Array, x)
+
+# Load PM target (brings PM and AA20)
+
+struct EditFlowModel{L}
+ layers::L
+end
+Flux.@layer EditFlowModel
+
+function EditFlowModel(; d=128, num_heads=8, nlayers=6, rff_dim=128, cond_dim=128, K::Int)
+ embedding = Flux.Embedding(K + 2 => d)
+ time_embed = Flux.Chain(RandomFourierFeatures(1 => rff_dim, 1.0f0), Dense(rff_dim => cond_dim))
+ blocks = [Onion.AdaTransformerBlock(d, cond_dim, num_heads) for _ in 1:nlayers]
+ head_combined = Dense(d => 2K + 1, bias=false)
+ rope = RoPE(d ÷ num_heads, 512)
+ return EditFlowModel((; embedding, time_embed, blocks, head_combined, rope, K))
+end
+
+function (model::EditFlowModel)(t, Xt_ms)
+ m = model.layers
+ X = FF.tensor(Xt_ms)
+ X = ndims(X) == 1 ? reshape(X, :, 1) : X
+ L, B = size(X)
+
+ pmask = Zygote.@ignore FF.getlmask(Xt_ms)
+ Xp = X .+ 1
+ H = m.embedding(Xp)
+
+ t = ndims(t) == 0 ? fill(Float32(t), B) : Float32.(t)
+ cond = m.time_embed(reshape(t, 1, B))
+
+ cond = to_same_device(cond, H)
+ pmask = Zygote.@ignore to_same_device(pmask, H)
+ rope = Zygote.@ignore to_same_device(m.rope[1:L], H)
+
+ for blk in m.blocks
+ H = blk(H; cond, rope, kpad_mask=pmask)
+ end
+ return m.head_combined(H)
+end
+
+data = readlines("data/oas_heavy_8M.txt")
+
+K = PM.K
+P = FF.EditFlow(K; bos_token=0)
+epochs = 1; batch_size = 64
+
+model = EditFlowModel(; d=512, num_heads=8, nlayers=12, rff_dim=512, cond_dim=512, K=K)
+const START_LR = 1f-3
+const MAX_LR = 1f-3
+const UP_GAMMA = 1.00f0
+const DOWN_GAMMA = 0.99994398f0
+
+sched = burnin_learning_schedule(START_LR, MAX_LR, UP_GAMMA, DOWN_GAMMA)
+for layer in model.layers.blocks
+ layer.attention_norm.scale.weight .= 0.0f0
+ layer.attention_norm.shift.weight .= 0.0f0
+ layer.ffn_norm.scale.weight .= 0.0f0
+ layer.ffn_norm.shift.weight .= 0.0f0
+end
+
+rng = Random.MersenneTwister(seed)
+Random.seed!(seed)
+global_step = 1
+model = Functors.fmap(to_dev, model)
+sched = burnin_learning_schedule(START_LR, MAX_LR, UP_GAMMA, DOWN_GAMMA)
+
+# do learning rate warmdown
+total_iters = div(512000, batch_size)
+DECAY_STEPS = 8000
+get_lr(i) = 1f-3 + (1f-7 - 1f-3) * (Float32(min(i, DECAY_STEPS)) / Float32(DECAY_STEPS))
+opt_state = Flux.setup(Muon(eta=get_lr(1)), model)
+
+for epoch in 1:epochs
+ # for (step, seqs) in enumerate(stream_heavy_batches(path; L=140, batch=batch_size, total=10^6))
+ for step in 1:total_iters
+ seqs = [rand(data) for _ in 1:batch_size]
+ xpairs = format_for_editflows(seqs)
+ x0s = [p[1] for p in xpairs]
+ x1s = [p[2] for p in xpairs]
+ ts = rand(Float32, batch_size)
+ # align_and_batch basically
+ Z0, Z1 = try
+ FF.align_and_batch(P, x0s, x1s)
+ catch e
+ if e isa LoadError || e isa ArgumentError
+ @warn "skip batch due to Data loading error" err=e
+ continue
+ else
+ rethrow(e)
+ end
+ end
+ Zt, Xt = FF.interpolate_Z_elementwise(P, Z0, Z1, ts)
+ #Append BOS token to the beginning of the batch
+ bos = P.bos_token
+ Zt = vcat(fill(bos, 1, batch_size), Zt)
+ Xt = vcat(fill(bos, 1, batch_size), Xt)
+ Z1 = vcat(fill(bos, 1, batch_size), Z1)
+
+ transition_mask = FF.transition_mask_from_Xt(P, Xt)
+ edit_multiplier = FF.remaining_edits(P, Zt, Z1, Xt)
+ den = (1f0 .- P.κ.(ts)) .+ 0.2f0
+ scheduler_scaling = P.dκ.(ts) ./ den
+ # 3) Masked state (CPU → device)
+ lmask = Xt .!= P.padding_token
+ cmask = trues(size(lmask))
+ Xt_ms = FF.MaskedState(FF.DiscreteState(P.k, Xt), cmask, lmask)
+ ts_d = to_dev(ts)
+ Xt_ms_d = to_dev(Xt_ms)
+ Tmask_d = to_dev(transition_mask)
+ Emult_d = to_dev(edit_multiplier)
+ sched_d = to_dev(reshape(Float32.(scheduler_scaling), 1, 1, :))
+ loss, grad = try
+ Flux.withgradient(model) do m
+ M = m(ts_d, Xt_ms_d)
+ FF.edit_loss(P, M, Tmask_d, Emult_d, sched_d; eps=1f-8)
+ end
+ catch e
+ if e isa DimensionMismatch
+ @warn "skip batch due to DimensionMismatch" err=e
+ continue
+ else
+ rethrow(e)
+ end
+ end
+ if Float32(loss) < 0
+ @warn "negative loss at step $step"
+ continue
+ end
+ open("losses/EDITFLOWS.csv", "a") do io
+ println(io, "$(global_step),$(epoch),$(step),$(Float32(loss)),$(get_lr(step)),$(time())")
+ end
+ Flux.adjust!(opt_state, get_lr(step))
+ Flux.update!(opt_state, model, grad[1])
+ if step % 10 == 0
+ @info "EditFlows" epoch step loss=Float32(loss)
+ end
+ if step % 500 == 0
+ samples = sample_gen_10_strings(P, Functors.fmap(to_cpu, model), n=5)
+ CSV.write("gens/samples_step_$(step).csv",DataFrame(step = fill(step, length(samples)), sample = samples))
+ end
+ global_step += 1
+ end
+end
+
+model_name = "EDITFLOWS"
+using JLD2
+model_state = Flux.state(cpu(model))
+model_state_file = "models/model_state_"*model_name*".jld2"
+JLD2.@save model_state_file model_state
+
+open("losses/loss_$(model_name).txt", "w") do io
+ println.(io, losses)
+end
+
+rng = Random.MersenneTwister(42)
+println("\n=== True PM samples (20) ===")
+for i in 1:20
+ seq = sample(PM; rng=rng) # Vector{Int} in 1..K_AA
+ aa_str = String(collect(AA20[seq]))
+ println("[", i, "] ", aa_str)
+end
+
+gens = sample_gen_10_strings(P, Functors.fmap(to_cpu, model); ts=0f0:0.005f0:1f0, n=500)
+gens = replace.(gens, ">"=>"")
+open("gens/$(model_name)_gens.txt", "w") do io
+ println.(io, gens)
+end
+
+println("\n=== Model samples (10) ===")
+for (i, s) in enumerate(gens)
+ if s isa AbstractString
+ println("[", i, "] ", s)
+ elseif s isa AbstractVector{<:AbstractString}
+ last_str = isempty(s) ? "" : s[end]
+ println("[", i, "] traj_last=", last_str, " (len=", length(s), ")")
+ else
+ println("[", i, "] (raw) ", s)
+ end
+end
diff --git a/editflow_code/jackcomparison.jl b/editflow_code/jackcomparison.jl
new file mode 100644
index 0000000..e69de29
diff --git a/editflow_code/prob_model.jl b/editflow_code/prob_model.jl
new file mode 100644
index 0000000..08f3a22
--- /dev/null
+++ b/editflow_code/prob_model.jl
@@ -0,0 +1,164 @@
+# ===================== Target: ProfileMixtureEOS from examples/abs.txt =====================
+# 20 AA alphabet (no '#', no '-')
+const AA20 = collect("ACDEFGHIKLMNPQRSTVWY")
+const TOK2ID_AA = Dict{Char,Int}(c => i for (i,c) in enumerate(AA20)) # 1..20
+const K_AA = length(AA20)
+
+# encode and truncate to a random length in 10:20 (inclusive), reproducible via `rng`
+function encode_line_AA(line::AbstractString; rng::AbstractRNG, trunc_range::UnitRange{Int}=10:20)
+ maxlen = rand(rng, trunc_range)
+ s = uppercase(strip(line))
+ out = Int[]
+ sizehint!(out, min(length(s), maxlen))
+ for ch in s
+ if haskey(TOK2ID_AA, ch)
+ push!(out, TOK2ID_AA[ch])
+ if length(out) == maxlen; break; end
+ end
+ end
+ return out
+end
+
+struct ProfileMixtureEOS
+ K::Int
+ parents::Vector{Vector{Int}}
+ weights::Vector{Float64}
+ base_token_probs::Vector{Vector{Vector{Float64}}} # per m, per t
+ L::Vector{Int}
+ background::Vector{Float64}
+ mode::Symbol
+ h_tail::Float64
+ h_inside::Float64 # end hazard for t ≤ L_m
+ alpha::Float64
+ beta::Float64
+end
+
+function ProfileMixtureEOS(parents::Vector{Vector{Int}};
+ K::Union{Int,Nothing}=nothing,
+ weights::AbstractVector=Float64[],
+ α::Real=1.0, β::Real=20.0,
+ background::Union{AbstractVector{<:Real},Nothing}=nothing,
+ mode::Symbol=:soft, h_tail::Real=0.99, h_inside::Real=0.001)
+ K === nothing && (K = maximum([isempty(p) ? 1 : maximum(p) for p in parents]))
+ @assert all(all(1 .<= p .<= K) for p in parents)
+ @assert mode in (:hard, :soft)
+ if mode === :soft
+ @assert 0.0 < h_tail < 1.0
+ @assert 0.0 < h_inside < 1.0
+ end
+ N = length(parents)
+ weights = isempty(weights) ? fill(1.0/N, N) : collect(weights ./ sum(weights))
+ background = background === nothing ? fill(1.0/K, K) : collect(background ./ sum(background))
+ base_token_probs = Vector{Vector{Vector{Float64}}}(undef, N)
+ L = Int[]
+ for (m, seq) in enumerate(parents)
+ push!(L, length(seq))
+ prof = Vector{Vector{Float64}}(undef, length(seq))
+ for t in 1:length(seq)
+ λv = α .* background
+ λv[seq[t]] += β
+ prof[t] = λv ./ sum(λv)
+ end
+ base_token_probs[m] = prof
+ end
+ ProfileMixtureEOS(K, parents, weights, base_token_probs, L, background, mode,
+ float(h_tail), float(h_inside), float(α), float(β))
+end
+
+@inline function _hazard(M::ProfileMixtureEOS, m::Int, t::Int)
+ Lm = M.L[m]
+ if M.mode === :hard
+ return t <= Lm ? 0.0 : 1.0
+ else
+ return t <= Lm ? M.h_inside : M.h_tail
+ end
+end
+
+@inline function _logsumexp(v::AbstractVector{<:Real})
+ m = maximum(v); !isfinite(m) && return m
+ s = 0.0
+ @inbounds for i in eachindex(v); s += exp(v[i] - m); end
+ return m + log(s)
+end
+
+function _logpmf_component(M::ProfileMixtureEOS, m::Int, x::Vector{Int})
+ n = length(x)
+ if n == 0
+ qend = _hazard(M, m, 1)
+ return qend > 0.0 ? log(qend) : -Inf
+ end
+ lp = 0.0
+ @inbounds for t in 1:n
+ s = 1.0 - _hazard(M, m, t)
+ p = (t <= M.L[m]) ? M.base_token_probs[m][t][x[t]] : M.background[x[t]]
+ q = s * p
+ if q <= 0.0; return -Inf; end
+ lp += log(q)
+ end
+ qend = _hazard(M, m, n + 1)
+ return qend > 0.0 ? lp + log(qend) : -Inf
+end
+
+function logpmf(M::ProfileMixtureEOS, x::Vector{Int})
+ @assert all(1 .<= x .<= M.K) "PMF expects tokens in 1..K_AA"
+ tmp = similar(M.weights)
+ @inbounds for m in 1:length(M.parents)
+ tmp[m] = log(M.weights[m]) + _logpmf_component(M, m, x)
+ end
+ return _logsumexp(tmp)
+end
+pmf(M::ProfileMixtureEOS, x::Vector{Int}) = exp(logpmf(M, x))
+
+@inline function _catdraw(rng::AbstractRNG, p::AbstractVector{<:Real})
+ r = rand(rng); acc = 0.0
+ @inbounds for i in eachindex(p)
+ acc += p[i]
+ if r <= acc; return i; end
+ end
+ return lastindex(p)
+end
+
+function _sample_from_component(M::ProfileMixtureEOS, m::Int; rng::AbstractRNG)
+ seq = Int[]; t = 1
+ while true
+ h = _hazard(M, m, t)
+ if h > 0.0 && rand(rng) < h
+ return seq
+ end
+ a = (t <= M.L[m]) ? _catdraw(rng, M.base_token_probs[m][t]) : _catdraw(rng, M.background)
+ push!(seq, a); t += 1
+ end
+end
+
+function sample(M::ProfileMixtureEOS; rng::AbstractRNG=Random.default_rng(), return_component::Bool=false)
+ m = _catdraw(rng, M.weights)
+ seq = _sample_from_component(M, m; rng=rng)
+ return return_component ? (seq, m) : seq
+end
+import Random: rand
+rand(rng::AbstractRNG, M::ProfileMixtureEOS) = sample(M; rng=rng)
+rand(M::ProfileMixtureEOS) = sample(M)
+
+function build_target_from_abs(; file::AbstractString=joinpath(@__DIR__, "abs.txt"),
+ maxlines::Int=100,
+ seed::Int=0,
+ α=1.0, β=20.0, mode::Symbol=:soft,
+ h_tail=0.99, h_inside=0.001,
+ trunc_range::UnitRange{Int}=10:20)
+ @assert isfile(file) "abs.txt not found at $file"
+ rng = Random.MersenneTwister(seed)
+ parents = Vector{Vector{Int}}()
+ open(file, "r") do io
+ for (i, line) in enumerate(eachline(io))
+ i > maxlines && break
+ seq = encode_line_AA(line; rng=rng, trunc_range=trunc_range)
+ if !isempty(seq); push!(parents, seq); end
+ end
+ end
+ @assert !isempty(parents) "No non-empty sequences in first $maxlines lines after filtering."
+ ProfileMixtureEOS(parents; K=K_AA, α=α, β=β, mode=mode, h_tail=h_tail, h_inside=h_inside)
+end
+
+const PM = build_target_from_abs(file=joinpath(@__DIR__, "abs.txt"))
+
+
diff --git a/editflow_code/test/analysis_notebook.ipynb b/editflow_code/test/analysis_notebook.ipynb
new file mode 100644
index 0000000..662e339
--- /dev/null
+++ b/editflow_code/test/analysis_notebook.ipynb
@@ -0,0 +1,3753 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32m\u001b[1m Activating\u001b[22m\u001b[39m project at `~/dev/Flowfusion.jl/editflow_code`\n",
+ "\u001b[32m\u001b[1m Resolving\u001b[22m\u001b[39m package versions...\n",
+ "\u001b[32m\u001b[1m No Changes\u001b[22m\u001b[39m to `~/dev/Flowfusion.jl/editflow_code/Project.toml`\n",
+ "\u001b[32m\u001b[1m No Changes\u001b[22m\u001b[39m to `~/dev/Flowfusion.jl/editflow_code/Manifest.toml`\n",
+ "WARNING: using StatsBase.sample in module Main conflicts with an existing identifier.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "plot_len_dist (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 1,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "using Pkg\n",
+ "Pkg.activate(@__DIR__)\n",
+ "Pkg.instantiate()\n",
+ "using Revise\n",
+ "\n",
+ "using Random\n",
+ "using Statistics\n",
+ "using Adapt\n",
+ "using Functors\n",
+ "using Flux\n",
+ "using Onion\n",
+ "using RandomFeatureMaps\n",
+ "using Zygote\n",
+ "\n",
+ "Pkg.develop(path=joinpath(@__DIR__, \"..\"))\n",
+ "using Flowfusion\n",
+ "const FF = Flowfusion\n",
+ "\n",
+ "include(joinpath(@__DIR__, \"prob_model.jl\"))\n",
+ "include(joinpath(@__DIR__, \"helper_funcs.jl\"))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "to_same_device (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import CUDA\n",
+ "\n",
+ "# Device helpers\n",
+ "const _gpu_enabled = try\n",
+ " CUDA.has_cuda()\n",
+ "catch\n",
+ " false\n",
+ "end\n",
+ "\n",
+ "to_dev(x) = _gpu_enabled ? Adapt.adapt(CUDA.CuArray, x) : x\n",
+ "to_cpu(x) = _gpu_enabled ? Adapt.adapt(Array, x) : x\n",
+ "# move x to the same device type as y (Array or CuArray)\n",
+ "to_same_device(x, y) = (_gpu_enabled && (y isa CUDA.CuArray)) ? Adapt.adapt(CUDA.CuArray, x) : Adapt.adapt(Array, x)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "make_minibatch (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Minibatch builder (uses true PM)\n",
+ "function make_minibatch(B::Int, P::FF.EditFlow; rng=Random.default_rng())\n",
+ " K = P.k\n",
+ " x0s = Vector{FF.DiscreteState}(undef, B)\n",
+ " x1s = Vector{FF.DiscreteState}(undef, B)\n",
+ " for b in 1:B\n",
+ " # x1 from true PM\n",
+ " seq1 = sample(PM; rng=rng)\n",
+ " @assert all(1 .<= seq1 .<= K)\n",
+ " x1s[b] = FF.DiscreteState(K, seq1)\n",
+ " # x0: uniform tokens with random length in 1:10 (no BOS in x0)\n",
+ " L0 = rand(rng, 1:10)\n",
+ " seq0 = rand(rng, 1:K, L0)\n",
+ " x0s[b] = FF.DiscreteState(K, seq0)\n",
+ " end\n",
+ " ts = rand(rng, Float32, B)\n",
+ " return x0s, x1s, ts\n",
+ "end\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "EditFlowModel"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Model definition\n",
+ "struct EditFlowModel{L}\n",
+ " layers::L\n",
+ "end\n",
+ "Flux.@layer EditFlowModel\n",
+ "\n",
+ "function EditFlowModel(; d=128, num_heads=8, nlayers=6, rff_dim=128, cond_dim=128, K::Int)\n",
+ " embedding = Flux.Embedding(K + 2 => d)\n",
+ " time_embed = Flux.Chain(RandomFourierFeatures(1 => rff_dim, 1.0f0), Dense(rff_dim => cond_dim))\n",
+ " blocks = [Onion.AdaTransformerBlock(d, cond_dim, num_heads) for _ in 1:nlayers]\n",
+ " head_combined = Dense(d => 2K + 1, bias=false)\n",
+ " rope = RoPE(d ÷ num_heads, 4096)\n",
+ " return EditFlowModel((; embedding, time_embed, blocks, head_combined, rope, K))\n",
+ "end\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Forward pass\n",
+ "function (model::EditFlowModel)(t, Xt_ms)\n",
+ " m = model.layers\n",
+ " X = FF.tensor(Xt_ms)\n",
+ " X = ndims(X) == 1 ? reshape(X, :, 1) : X\n",
+ " L, B = size(X)\n",
+ "\n",
+ " pmask = Zygote.@ignore FF.getlmask(Xt_ms)\n",
+ " Xp = X .+ 1\n",
+ " H = m.embedding(Xp)\n",
+ "\n",
+ " t = ndims(t) == 0 ? fill(Float32(t), B) : Float32.(t)\n",
+ " cond = m.time_embed(reshape(t, 1, B))\n",
+ "\n",
+ " cond = to_same_device(cond, H)\n",
+ " pmask = Zygote.@ignore to_same_device(pmask, H)\n",
+ " rope = Zygote.@ignore to_same_device(m.rope[1:L], H)\n",
+ "\n",
+ " for blk in m.blocks\n",
+ " H = blk(H; cond, rope, kpad_mask=pmask)\n",
+ " end\n",
+ " return m.head_combined(H)\n",
+ "end\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "train_editflow! (generic function with 1 method)"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Training (GPU if available)\n",
+ "function train_editflow!(P::FF.EditFlow,\n",
+ " model;\n",
+ " epochs::Int=1,\n",
+ " steps_per_epoch::Int=100,\n",
+ " batch_size::Int=64,\n",
+ " lr::Float32=1f-2,\n",
+ " seed::Int=42,\n",
+ " print_every::Int=25)\n",
+ "\n",
+ " rng = Random.MersenneTwister(seed)\n",
+ " Random.seed!(seed)\n",
+ "\n",
+ " # Move model to device (GPU if available)\n",
+ " model = Functors.fmap(to_dev, model)\n",
+ " opt_state = Flux.setup(Flux.Adam(lr), model)\n",
+ "\n",
+ " for epoch in 1:epochs\n",
+ " for step in 1:steps_per_epoch\n",
+ "\n",
+ " # 1) Minibatch Sampling\n",
+ " x0s, x1s, ts = make_minibatch(batch_size, P; rng=rng)\n",
+ "\n",
+ " # 2) Align and batch\n",
+ " Z0, Z1 = FF.align_and_batch(P, x0s, x1s)\n",
+ "\n",
+ " # 3) Interpolate\n",
+ " Zt, Xt = FF.interpolate_Z_elementwise(P, Z0, Z1, ts)\n",
+ "\n",
+ " # 4) Prepend BOS\n",
+ " bos = P.bos_token\n",
+ " Zt = vcat(fill(bos, 1, batch_size), Zt)\n",
+ " Xt = vcat(fill(bos, 1, batch_size), Xt)\n",
+ " Z1 = vcat(fill(bos, 1, batch_size), Z1)\n",
+ " \n",
+ " # 5) Masks and multipliers\n",
+ " transition_mask = FF.transition_mask_from_Xt(P, Xt)\n",
+ " edit_multiplier = FF.remaining_edits(P, Zt, Z1, Xt)\n",
+ " @assert size(transition_mask) == size(edit_multiplier)\n",
+ "\n",
+ " # 6) Scheduler scaling\n",
+ " den = 1f0 .- P.κ.(ts)\n",
+ " den = max.(den, 1f-3)\n",
+ " scheduler_scaling = P.dκ.(ts) ./ den\n",
+ "\n",
+ " # 7) Masked state\n",
+ " lmask = Xt .!= P.padding_token\n",
+ " cmask = trues(size(lmask))\n",
+ " Xt_ms = FF.MaskedState(FF.DiscreteState(P.k, Xt), cmask, lmask)\n",
+ "\n",
+ " ts_d = to_dev(ts)\n",
+ " Xt_ms_d = to_dev(Xt_ms)\n",
+ " Tmask_d = to_dev(transition_mask)\n",
+ " Emult_d = to_dev(edit_multiplier)\n",
+ " sched_d = to_dev(reshape(Float32.(scheduler_scaling), 1, 1, :))\n",
+ "\n",
+ " # 8) Forward + loss + update\n",
+ " loss, grad = Flux.withgradient(model) do m\n",
+ " M = m(ts_d, Xt_ms_d)\n",
+ " l = FF.edit_loss(P, M, Tmask_d, Emult_d, sched_d; eps=1f-8)\n",
+ " if isnan(l)\n",
+ " println(\"Maximum element of M: \", maximum(M))\n",
+ " end\n",
+ " l\n",
+ " end\n",
+ " Flux.update!(opt_state, model, grad[1])\n",
+ "\n",
+ " if step % print_every == 0\n",
+ " @info \"train\" epoch step loss=Float32(loss)\n",
+ " end\n",
+ " end\n",
+ " end\n",
+ "\n",
+ " return Functors.fmap(to_cpu, model)\n",
+ "end\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Init process and model\n",
+ "K = PM.K\n",
+ "P = FF.EditFlow(K; bos_token=0, gap_wise=false)\n",
+ "model = EditFlowModel(; d=128, num_heads=8, nlayers=4, rff_dim=128, cond_dim=128, K=K)\n",
+ "nothing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.746237f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.981628f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.213884f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.454155f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.665428f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 1\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.873983f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.157003f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 15.275973f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.903381f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.729305f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.102013f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 2\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.672424f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.530746f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.67213f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.382526f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.963814f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.835972f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 3\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.291992f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.19426f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.382185f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.135967f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.92569f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.028893f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 4\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.427214f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 17.69117f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.73476f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.384754f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.658775f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.132454f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 5\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.998142f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.203682f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.36901f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.243134f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.106112f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.82871f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 6\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.493202f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.290234f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.923895f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.832842f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.057846f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.94542f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 7\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.881554f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.718155f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.733717f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.073797f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.92004f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.047007f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 8\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.872967f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.382488f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.645565f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.287525f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.113968f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.087315f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 9\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.874767f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.860031f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.602049f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.410063f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 10.562942f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.848387f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 10\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.861128f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.541365f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.236982f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.957115f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.865005f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.583748f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 11\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.02132f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.584347f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.580858f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.981087f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.591656f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.066517f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 12\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.051542f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.067421f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.05304f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.487747f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.222967f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.143532f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 13\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.575806f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.158926f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.09401f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.851517f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.892365f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.7897f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 14\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.985878f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.019676f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.797548f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.55872f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.278553f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.254448f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 15\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.536222f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.821463f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 17.80231f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.376482f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.044758f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.442654f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 16\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.176807f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.091465f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.013334f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.81593f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.212975f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.84259f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 17\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.185429f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.470398f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.427452f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.94579f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.695702f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.645878f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 18\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.073719f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.841595f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.278421f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.68238f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.269619f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.283346f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 19\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.181093f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.541328f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.628794f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.634392f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.7462f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.353523f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 20\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.927622f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.720219f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.47013f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.810509f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.401003f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.512375f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 21\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.377796f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.350807f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 16.468513f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.804567f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.310434f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.925941f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 22\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.39031f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.91434f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.262634f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.324049f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.413176f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 27.26462f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 23\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.746334f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.85251f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.560282f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.03707f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.268227f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.471123f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 24\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.593208f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.767986f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.076988f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.13642f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.121529f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.215782f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 25\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.73236f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.485447f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.659838f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.290493f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 16.489616f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.764778f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 26\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.014236f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.763737f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.692177f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.253918f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.51273f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.86855f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 27\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.306889f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.108227f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.949062f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.045507f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.165205f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 12.919155f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 28\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.903103f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.889954f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.677599f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.235676f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.818413f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.454956f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 29\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.443413f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.767849f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.12367f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.95783f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.690418f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.498676f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 30\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.116188f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.376179f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.445965f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.826906f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.523788f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 16.482994f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 31\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.550964f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.193125f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.838432f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.289452f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.098053f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.513762f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 32\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.862543f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 16.810226f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.037594f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.468763f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.604746f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.249472f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 33\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.916328f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.879818f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.453798f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.617702f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.713636f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.131275f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 34\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.763681f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.138496f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.24766f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.000917f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.266335f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.831928f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 35\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.104105f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.901894f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.866064f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.89682f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.663845f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.070297f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 36\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.728453f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.615274f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.564804f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.089449f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.273602f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.159435f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 37\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.562922f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.734241f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.008451f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 8.860302f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.15115f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.32642f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 38\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.513498f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.381474f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.2743f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.010727f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.264381f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.826624f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 39\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.921288f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.755466f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.364662f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.126223f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.64931f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.564432f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 40\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.728107f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 17.909348f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.832344f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.323364f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.952206f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.968285f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 41\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.83363f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.986597f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.239515f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.450031f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.893978f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.543922f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 42\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.674696f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.830986f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.515713f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.128778f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.751575f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.633709f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 43\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.873392f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.810955f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.402004f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.716564f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 17.452545f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 16.890793f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 44\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.448315f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.888472f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.899237f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.195805f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.420376f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.594948f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 45\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 9.004853f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.887007f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.23485f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.913727f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.218992f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.94302f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 46\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.745415f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.451485f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.83382f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.155079f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.894794f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.932127f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 47\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.041811f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.656693f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.531162f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.119312f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.319973f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.268059f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 48\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.691246f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.474157f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.733646f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.136124f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 17.320087f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.540567f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 49\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.147991f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.83776f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.327303f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.39455f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.208961f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.616371f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 50\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.594536f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 51\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.58604f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 51\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.868906f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 51\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.821358f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 51\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.95021f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 51\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.622738f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 51\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.146685f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 52\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.15935f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 52\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.340935f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 52\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.693542f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 52\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.80805f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 52\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 26.283953f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 52\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.714622f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 53\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.754047f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 53\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.48253f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 53\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.189365f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 53\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.135786f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 53\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.220694f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 53\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.33191f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 54\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.619081f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 54\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.133217f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 54\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.216085f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 54\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.81441f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 54\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.997604f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 54\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.05939f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 55\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 16.644253f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 55\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.239815f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 55\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.43597f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 55\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 16.8848f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 55\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.401718f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 55\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.64897f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 56\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.992f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 56\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.69675f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 56\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.861366f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 56\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.179195f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 56\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = -11.587956f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 56\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.98692f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 57\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.81665f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 57\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.926167f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 57\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.971962f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 57\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.27145f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 57\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.326494f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 57\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.14252f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 58\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.31263f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 58\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.074333f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 58\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.830984f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 58\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.307228f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 58\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.105434f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 58\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.807196f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 59\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.067299f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 59\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.049335f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 59\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.681822f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 59\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.253326f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 59\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 17.409986f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 59\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.683414f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 60\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.863966f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 60\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 15.980584f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 60\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.086948f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 60\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.897078f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 60\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.529032f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 60\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.82579f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 61\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.25418f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 61\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.34762f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 61\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.448723f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 61\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.228636f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 61\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.144558f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 61\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.490255f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 62\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.441963f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 62\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.559408f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 62\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.865923f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 62\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.954279f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 62\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.403667f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 62\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.830547f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 63\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.544434f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 63\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.088642f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 63\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 5.486371f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 63\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.702126f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 63\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.829082f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 63\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.889729f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 64\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.04778f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 64\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.33698f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 64\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.940506f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 64\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.096375f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 64\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.802752f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 64\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.215298f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 65\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.598557f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 65\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.455418f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 65\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.71409f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 65\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.750744f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 65\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.510859f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 65\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.624943f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 66\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.001049f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 66\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.153976f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 66\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.199299f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 66\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.778515f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 66\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.019203f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 66\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.27776f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 67\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.881886f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 67\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.737415f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 67\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.321026f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 67\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.270313f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 67\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.653442f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 67\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.653757f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 68\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.06898f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 68\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.069595f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 68\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.993654f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 68\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.809895f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 68\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.706173f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 68\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.573029f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 69\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.283886f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 69\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.783833f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 69\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.422552f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 69\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.811619f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 69\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.002556f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 69\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.116966f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 70\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.023243f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 70\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.249037f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 70\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.984207f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 70\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.499313f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 70\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.772282f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 70\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.921055f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 71\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.7816f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 71\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.670277f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 71\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.296196f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 71\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.030935f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 71\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.194506f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 71\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.43831f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 72\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.557142f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 72\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.490828f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 72\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.104092f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 72\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.674341f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 72\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.07388f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 72\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.306126f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 73\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.135044f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 73\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.159412f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 73\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.045418f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 73\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.40194f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 73\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.572552f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 73\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.981903f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 74\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.257917f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 74\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.122993f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 74\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.006748f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 74\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.172253f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 74\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.008356f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 74\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.84019f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 75\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.151068f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 75\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.485172f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 75\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.153769f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 75\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.290163f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 75\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.762228f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 75\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.199116f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 76\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 17.930504f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 76\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.04787f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 76\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.809868f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 76\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.353975f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 76\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.562157f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 76\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 16.483915f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 77\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.138283f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 77\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.523014f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 77\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.520844f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 77\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.04196f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 77\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.441517f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 77\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.367897f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 78\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.163624f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 78\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.393862f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 78\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.718964f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 78\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.644243f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 78\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.149078f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 78\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.814312f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 79\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.866508f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 79\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.337425f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 79\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.624027f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 79\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.402237f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 79\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.410057f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 79\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.08833f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 80\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.47953f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 80\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.978199f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 80\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.085361f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 80\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.211655f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 80\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.508362f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 80\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.466614f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 81\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.391834f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 81\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.903198f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 81\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.485249f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 81\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.64059f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 81\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.317387f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 81\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.682806f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 82\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.254333f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 82\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.734877f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 82\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.537487f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 82\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.094929f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 82\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.800623f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 82\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.080275f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 83\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.002201f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 83\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.30722f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 83\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.413658f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 83\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.382023f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 83\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.645979f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 83\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.672878f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 84\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.800962f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 84\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.22847f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 84\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.500309f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 84\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.74908f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 84\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.878939f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 84\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.335938f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 85\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.956377f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 85\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.555515f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 85\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.946405f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 85\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.589813f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 85\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.090248f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 85\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.336586f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 86\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.855642f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 86\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.779556f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 86\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.152008f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 86\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.799198f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 86\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.729807f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 86\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.73042f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 87\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.806095f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 87\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.405361f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 87\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.57449f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 87\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.7407f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 87\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.59734f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 87\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.25967f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 88\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.743744f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 88\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.98431f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 88\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 15.31344f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 88\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.085484f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 88\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.487297f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 88\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.491331f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 89\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 16.660719f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 89\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.529898f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 89\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.550985f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 89\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.375748f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 89\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.937206f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 89\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.631706f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 90\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 17.535595f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 90\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.669437f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 90\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.55685f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 90\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.808851f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 90\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.87554f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 90\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.389326f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 91\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.405817f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 91\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.464638f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 91\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.111252f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 91\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.999516f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 91\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.218391f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 91\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.187355f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 92\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.420967f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 92\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.060745f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 92\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.60073f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 92\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.225628f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 92\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.36546f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 92\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.678913f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 93\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.350622f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 93\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.95152f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 93\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.601427f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 93\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.351856f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 93\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.845188f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 93\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.619637f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 94\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.598852f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 94\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 24.265049f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 94\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 25.088186f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 94\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.131905f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 94\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.927309f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 94\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.709093f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 95\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.480736f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 95\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.990673f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 95\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.88835f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 95\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.45876f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 95\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.846493f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 95\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.63565f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 96\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.12495f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 96\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.763638f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 96\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.331306f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 96\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.594742f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 96\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.491268f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 96\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.579607f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 97\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.1447f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 97\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.514366f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 97\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 18.976524f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 97\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.168167f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 97\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.450222f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 97\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.13343f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 98\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.17472f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 98\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.017311f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 98\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.751623f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 98\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.171782f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 98\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 19.81949f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 98\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.225515f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 99\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.012215f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 99\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.367924f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 99\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.439997f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 99\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.901167f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 99\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.113953f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 99\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.226475f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 100\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 25\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 23.480293f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 100\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 50\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.56205f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 100\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 75\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 20.577469f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 100\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 100\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.884739f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 100\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 125\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 21.706923f0\n",
+ "\u001b[36m\u001b[1m┌ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mtrain\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m epoch = 100\n",
+ "\u001b[36m\u001b[1m│ \u001b[22m\u001b[39m step = 150\n",
+ "\u001b[36m\u001b[1m└ \u001b[22m\u001b[39m loss = 22.257725f0\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "EditFlowModel(\n",
+ " Embedding(22 => 128), \u001b[90m# 2_816 parameters\u001b[39m\n",
+ " Chain(\n",
+ " RandomFourierFeatures{Float32, Matrix{Float32}}(Float32[4.107619 -1.6246507 … -3.6645346 -1.3967112]),\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " [\n",
+ " TransformerBlock(\n",
+ " Attention(\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " identity,\n",
+ " identity,\n",
+ " Onion.var\"#32#34\"(),\n",
+ " 16,\n",
+ " 16,\n",
+ " 8,\n",
+ " 8,\n",
+ " ),\n",
+ " StarGLU(\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(512 => 128; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " NNlib.swish,\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.96080273, 0.93640083, 0.9547064, 0.8971493, 0.95765316, 0.92207366, 0.89724094, 0.9440924, 0.94302225, 0.94956416 … 0.94014513, 0.9212964, 0.91572565, 0.9039559, 0.9231686, 0.9042348, 0.9314332, 0.87426275, 0.8752597, 0.9407146], Float32[-0.025268404, 0.0020312557, -0.010115526, 0.020099802, 0.018506693, 0.0025694135, -0.023937318, -0.038941544, -0.038218282, -0.008356358 … 0.0045030406, 0.029052407, -0.039042093, -0.009993242, 0.003901428, -0.0070870365, -0.037610408, -0.06006574, 0.015338725, 0.011129025], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.9615473, 0.97570944, 0.9112915, 0.96853894, 0.9237012, 0.9523371, 0.9733101, 0.9436969, 1.0086424, 0.98759335 … 0.96384525, 0.98503447, 0.9836052, 0.95819885, 0.969784, 0.9607591, 0.9873663, 1.0246212, 0.97779596, 0.963771], Float32[-0.027013712, -0.0054144724, 0.0071041402, -0.015267112, -0.05662798, -0.00019226706, 0.017086789, 0.010009103, 0.004368342, -0.022684177 … 0.008213096, 0.01543913, 0.008588103, -0.042858012, 0.005013047, -0.0087100575, -0.00035417278, -0.0044492367, -3.3963931f-6, -0.0070771035], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " identity,\n",
+ " ),\n",
+ " TransformerBlock(\n",
+ " Attention(\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " identity,\n",
+ " identity,\n",
+ " Onion.var\"#32#34\"(),\n",
+ " 16,\n",
+ " 16,\n",
+ " 8,\n",
+ " 8,\n",
+ " ),\n",
+ " StarGLU(\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(512 => 128; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " NNlib.swish,\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.94601697, 0.97375804, 0.9406857, 0.953404, 0.95697165, 0.9183645, 0.93461883, 0.93530893, 0.98690313, 0.92145735 … 0.8612234, 0.87250346, 0.98907375, 0.956685, 0.9498322, 0.9437079, 0.9624967, 0.949929, 0.97197264, 0.97001827], Float32[0.0011167054, -0.014636964, 0.0022019444, 0.0121619, -0.013723163, 0.001547043, 0.011414896, -0.0012122085, -0.006493586, -0.021626603 … 0.069353715, -0.012018405, 0.004048702, 0.004974579, -0.017438088, -0.010104225, -0.009309289, 4.483607f-5, -0.012501754, -0.006304365], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.97332007, 1.0309724, 0.9687911, 0.97576725, 0.96835095, 0.9971229, 0.993208, 0.9710502, 1.003863, 0.9820828 … 0.982708, 0.92708766, 1.0036467, 0.9794132, 0.99849373, 0.9908135, 0.9964946, 1.0093178, 0.9900424, 0.9617081], Float32[-0.016477505, -0.014338043, 0.040039297, -0.029744757, -0.029461078, -0.041657545, 0.027084501, 0.041064046, -0.025039643, 0.0060120723 … 0.0095715625, -0.0138390735, 0.0026735286, -0.050670695, -0.024575308, 0.006731998, -0.007667666, -6.0379556f-5, -0.0033010722, -0.048479233], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " identity,\n",
+ " ),\n",
+ " TransformerBlock(\n",
+ " Attention(\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " identity,\n",
+ " identity,\n",
+ " Onion.var\"#32#34\"(),\n",
+ " 16,\n",
+ " 16,\n",
+ " 8,\n",
+ " 8,\n",
+ " ),\n",
+ " StarGLU(\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(512 => 128; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " NNlib.swish,\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.94413495, 0.9591206, 0.9815761, 0.9860674, 0.9643858, 0.99948543, 0.95805496, 0.9728332, 0.9892072, 0.93138975 … 0.92429256, 1.0070444, 0.9443377, 0.9681403, 0.9447193, 0.9323913, 0.9746358, 0.99652237, 0.96340334, 0.97654444], Float32[-0.03537166, -0.006217378, -0.007379122, 0.009719772, -0.0314247, 0.016313963, -0.0015389565, 0.021076221, 0.010470716, -0.02639406 … 0.0006755494, 0.005295955, 0.017049158, -0.0132954335, 0.005307746, 0.02653267, -0.008163272, 0.009826559, 0.030178566, -0.0014013725], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.93666166, 1.0321827, 0.9726321, 0.98780626, 0.9613221, 1.0248475, 1.0161513, 0.96915793, 0.9948018, 1.0429728 … 1.0270013, 1.0185184, 1.0062677, 0.9811149, 1.0302101, 1.0181782, 1.0179951, 1.0333631, 1.0274625, 0.9702701], Float32[-0.06427912, 0.002420664, 0.047289338, -0.034010448, -0.03299984, -0.013876393, 0.023457177, 0.020834371, 0.010680823, -0.032694057 … -0.003318202, -0.0250829, 0.027400794, -0.031808987, -0.024685763, -0.00258377, -0.011304101, -0.01021114, -0.0004895934, -0.020227717], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " identity,\n",
+ " ),\n",
+ " TransformerBlock(\n",
+ " Attention(\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " Dense(128 => 128; bias=false), \u001b[90m# 16_384 parameters\u001b[39m\n",
+ " identity,\n",
+ " identity,\n",
+ " Onion.var\"#32#34\"(),\n",
+ " 16,\n",
+ " 16,\n",
+ " 8,\n",
+ " 8,\n",
+ " ),\n",
+ " StarGLU(\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(512 => 128; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " Dense(128 => 512; bias=false), \u001b[90m# 65_536 parameters\u001b[39m\n",
+ " NNlib.swish,\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.98147184, 0.9223102, 1.0042982, 0.97599345, 0.98011535, 0.9964276, 0.9762427, 0.94932944, 0.96026486, 0.95611215 … 0.9575027, 0.9455124, 0.99811995, 0.9754665, 0.9862882, 0.9942666, 0.9694813, 0.96348906, 0.95470506, 0.9039463], Float32[-0.014831864, 9.283005f-5, -0.008477221, 0.040162712, 0.022272639, 0.0436828, -0.009014154, 0.055058725, 0.030263849, -0.040652655 … -0.010325852, -0.023909276, 0.009594226, 0.00861934, -0.0045753345, -0.015833346, 0.019944627, 0.011142825, 0.025711212, 0.026980491], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " AdaLN(\n",
+ " Onion.LayerNorm{Vector{Float32}, Vector{Float32}, Float32}(Float32[0.9566518, 1.0749155, 1.0178746, 1.0410086, 1.0265093, 1.0722593, 1.0385908, 1.0496945, 1.1038729, 1.003326 … 1.0357652, 1.005023, 1.0573806, 1.0052382, 0.98908836, 1.0178303, 1.110416, 1.0397507, 1.0142217, 1.0475482], Float32[-0.08760491, 0.051100586, 0.08073487, -0.080240734, -0.037350997, -0.064348176, 0.01896439, 0.012915793, 0.041853514, -0.02849257 … -0.012428334, -0.040537376, 0.026203565, -0.026402708, -0.039873, -0.0071864277, 0.023559058, -0.015315336, 0.05209827, -0.06641875], 1.0f-6), \u001b[90m# 256 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " Dense(128 => 128), \u001b[90m# 16_512 parameters\u001b[39m\n",
+ " ),\n",
+ " identity,\n",
+ " ),\n",
+ " ],\n",
+ " Dense(128 => 41; bias=false), \u001b[90m# 5_248 parameters\u001b[39m\n",
+ " RoPE{Matrix{Float32}}(Float32[1.0 0.5403023 … -0.87528455 -0.065975994; 1.0 0.95041525 … 0.9552474 0.8159282; … ; 1.0 0.9999995 … -0.57972294 -0.5789079; 1.0 0.99999994 … 0.2726629 0.27235872], Float32[0.0 0.84147096 … -0.48360822 -0.9978212; 0.0 0.3109836 … 0.29580808 0.5781532; … ; 0.0 0.0009999999 … -0.8148137 -0.8153929; 0.0 0.0003162278 … 0.9621096 0.9621958]),\n",
+ " 20,\n",
+ ") \u001b[90m # Total: 80 trainable arrays, \u001b[39m1_339_392 parameters,\n",
+ "\u001b[90m # plus 3 non-trainable, 65_600 parameters, summarysize \u001b[39m5.364 MiB."
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model = train_editflow!(P, model; epochs=100, steps_per_epoch=150, batch_size=256, lr=1f-4)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ],
+ "text/html": [
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Sampling and Levenshtein plot\n",
+ "using Random\n",
+ "n = 1000\n",
+ "# 100 model samples (final states → strings)\n",
+ "model_samples_any = sample_gen_n(P, model; n=n, ts=0f0:0.01f0:1f0, rng=Random.MersenneTwister(42))\n",
+ "model_final_states = map(final_state_from_gen, model_samples_any)\n",
+ "model_strings = [state_to_PM_string(P, s) for s in model_final_states]\n",
+ "\n",
+ "# Build training strings from PM parents (the data PM was built on)\n",
+ "train_states = [FF.DiscreteState(P.k, sample(PM; rng=Random.MersenneTwister(1000+i))) for i in 1:100]\n",
+ "train_strings = [state_to_PM_string(P, s) for s in train_states]\n",
+ "\n",
+ "# Independent validation strings sampled from PM\n",
+ "val_states = [FF.DiscreteState(P.k, sample(PM; rng=Random.MersenneTwister(1000+i+2*n))) for i in 1:n]\n",
+ "val_strings = [state_to_PM_string(P, s) for s in val_states]\n",
+ "#val_strings = [s for s in val_strings if !(s in train_strings)]\n",
+ "\n",
+ "# Plot normalized Levenshtein vs. training set (distinct val/train)\n",
+ "plt = plot_lev_dist(\"model_vs_true\", val_strings, model_strings, train_strings; title=\"Model vs True (normalized Levenshtein)\")\n",
+ "display(plt)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ],
+ "text/html": [
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "model_strings_bos = [\">\" * state_to_PM_string(P, s) for s in model_final_states]\n",
+ "plt = plot_lev_dist(\"model_vs_true\", val_strings, model_strings_bos, train_strings; title=\"Model vs True (normalized Levenshtein)\")\n",
+ "display(plt)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ],
+ "text/html": [
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ],
+ "text/html": [
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "\n",
+ "pAA = plot_AA_dist(\"model_vs_true_AA\", val_strings, model_strings; title=\"AA frequency: Natural vs Generated\")\n",
+ "display(pAA)\n",
+ "\n",
+ "pLen = plot_len_dist(\"model_vs_true_len\", val_strings, model_strings; title=\"Length distribution: Natural vs Generated\")\n",
+ "display(pLen)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Natural 'A' frequency: 0.047142480905978404\n"
+ ]
+ }
+ ],
+ "source": [
+ "println(\"Natural 'A' frequency: \", (isdefined(Main, :val_strings) ? sum(count(==('A'), s) for s in val_strings) / sum(length.(val_strings)) : sum(count(==('A'), s) for s in val_strings) / sum(length.(val_strings))))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Natural 'A' frequency: 0.04109772423025435\n"
+ ]
+ }
+ ],
+ "source": [
+ "println(\"Natural 'A' frequency: \", (isdefined(Main, :model_strings) ? sum(count(==('A'), s) for s in model_strings) / sum(length.(model_strings)) : sum(count(==('A'), s) for s in model_strings) / sum(length.(model_strings))))"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Julia 1.11.6",
+ "language": "julia",
+ "name": "julia-1.11"
+ },
+ "language_info": {
+ "file_extension": ".jl",
+ "mimetype": "application/julia",
+ "name": "julia",
+ "version": "1.11.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/editflow_code/test/runtests.jl b/editflow_code/test/runtests.jl
new file mode 100644
index 0000000..98dc973
--- /dev/null
+++ b/editflow_code/test/runtests.jl
@@ -0,0 +1,271 @@
+using Pkg
+Pkg.activate(joinpath(@__DIR__, ".."))
+Pkg.instantiate()
+Pkg.develop(path=joinpath(@__DIR__, "..", ".."))
+using Flowfusion
+const FF = Flowfusion
+using Test
+
+@testset "EditFlow remaining_edits, transition mask, remove/pad, loss" begin
+
+ tokens = 21
+ pad = 22
+ lat = 23
+ bos = 0
+ P = Flowfusion.EditFlow(tokens; transform=identity, padding_token=pad, latent_token=lat, bos_token=bos=0)
+
+ # ─────────────────────────────────────────────────────────────────────
+ # remaining_edits: simple 1-column case
+ Zt = [0; 7; 23; 23; 4; 23; 22;;]
+ Xt = [0; 7; 4; 22;;]
+ Z1 = [0; 7; 20; 20; 4; 10; 22;;]
+ expected = zeros(Float32, 2*tokens+1, size(Xt)...)
+ expected[20,2,1] = 2
+ expected[10,3,1] = 1
+ got = FF.remaining_edits(P, Zt, Z1, Xt)
+
+ @test got == expected
+ # Two-column case with inserts/subs
+ Zt = [0 0; 7 23; 15 15; 15 15; 23 4; 2 2; 22 22;]
+ Xt = [0 0; 7 15; 15 15; 15 4; 2 2]
+ Z1 = [0 0; 7 7; 20 20; 20 20; 5 4; 10 10; 22 22;]
+ expected = zeros(Float32, 2*tokens+1, size(Xt)...)
+ expected[5, 4, 1] = 1
+ expected[tokens+10,5,1] = 1
+ expected[tokens+10,5,2] = 1
+ expected[tokens+20, 4, 1] = 1
+ expected[tokens+20, 3, 2] = 1
+ expected[tokens+20, 3, 1] = 1
+ expected[tokens+20, 2, 2] = 1
+ expected[7, 1, 2] = 1
+ got = FF.remaining_edits(P, Zt, Z1, Xt)
+ @test got == expected
+
+ # deletions case
+ Zt = [0 0; 7 7; 20 20; 20 20; 4 4; 19 19; 22 22;]
+ Xt = [0 0; 7 7; 20 20; 20 20; 4 4; 19 19; 22 22;]
+ Z1 = [0 0; 7 7; 23 23; 20 23; 4 4; 23 23; 22 22;]
+ expected = zeros(Float32, 2*tokens+1, size(Xt)...)
+ expected[2*tokens+1,3,1] = 1
+ expected[2*tokens+1,6,1] = 1
+ expected[2*tokens+1,3,2] = 1
+ expected[2*tokens+1,4,2] = 1
+ expected[2*tokens+1,6,2] = 1
+ got = FF.remaining_edits(P, Zt, Z1, Xt)
+ @test got == expected
+
+ # mixed inserts/subs case
+ Zt = [0 0; 7 7; 15 15; 15 15; 23 4; 2 2; 22 22;]
+ Xt = [0 0; 7 7; 15 15; 15 15; 2 4; 22 2;]
+ Z1 = [0 0; 7 7; 20 20; 20 20; 5 4; 10 10; 22 22;]
+ expected = zeros(Float32, 2*tokens+1, size(Xt)...)
+ expected[5, 4, 1] = 1
+ expected[tokens+20,3,1] = 1
+ expected[tokens+20,4,1] = 1
+ expected[tokens+10,5,1] = 1
+ expected[tokens+20,3,2] = 1
+ expected[tokens+20,4,2] = 1
+ expected[tokens+10,6,2] = 1
+ got = FF.remaining_edits(P, Zt, Z1, Xt)
+ @test got == expected
+
+ # ─────────────────────────────────────────────────────────────────────
+ # transition mask from Xt
+ Xt = [0 0; 7 7; 20 20; 20 20; 5 4; 10 22; 22 22;]
+ expected = ones(Float32, 2*tokens+1, size(Xt)...)
+ # sample 1 (no self-sub mask for BOS=0)
+ expected[tokens+7,2,1] = 0
+ expected[tokens+20,3,1] = 0
+ expected[tokens+20,4,1] = 0
+ expected[tokens+5,5,1] = 0
+ expected[tokens+10,6,1] = 0
+ expected[:,7,1] .= 0
+ expected[tokens+1:2*tokens+1,1,1] .= 0
+ # sample 2 (no self-sub mask for BOS=0)
+ expected[tokens+7,2,2] = 0
+ expected[tokens+20,3,2] = 0
+ expected[tokens+20,4,2] = 0
+ expected[tokens+4,5,2] = 0
+ expected[:,6:7,2] .= 0
+ expected[tokens+1:2*tokens+1,1,2] .= 0
+
+
+ got = FF.transition_mask_from_Xt(P, Xt)
+
+ # Display all indices in `got` where the value is zero
+ zero_indices = findall(x -> x == 0, got)
+ #@info "Indices in 'got' that are zero:" zero_indices
+ @test got == expected
+
+ # ─────────────────────────────────────────────────────────────────────
+ # loss equivalence under identity transform
+ edit_multiplier = [0; 2;; 1; 0;;; 1; 0;; 0; 1;;;] # (2,2,2)
+ transition_mask = [1; 0;; 1; 0;;; 1; 0;; 0; 1;;;] # (2,2,2)
+ M = [0.1; 0.2;; 0.3; 0.4;;; 0.5; 0.6;; 0.7; 0.8;;;] # (2,2,2)
+ t = [0.3; 0.7;;] # (1,2)
+ k(t)=t; dk(t)=1
+ scheduler_scaling = dk.(t) ./ (-k.(t) .+ 1)
+ # manual loss
+ l = (0.1+0.3+0.5+0.8 - (1/(1-0.3)*(2*log(0.2)+log(0.3)) + 1/(1-0.7)*(log(0.5)+log(0.8))))/2
+ got = Flowfusion.edit_loss(P, M, transition_mask, edit_multiplier, scheduler_scaling; op_mask=nothing, eps=0)
+ @test isapprox(got, l; atol=1e-7, rtol=1e-7)
+
+end
+
+
+@testset "EditFlow gap-wise inserts: remaining_edits, transition mask, loss" begin
+ tokens = 21
+ pad = 22
+ lat = 23
+ bos = 0
+ P = Flowfusion.EditFlow(tokens; transform=identity, padding_token=pad, latent_token=lat, bos_token=bos)
+
+ # ─────────────────────────────────────────────────────────────────────
+ # remaining_edits: simple 1-column case (gap-wise)
+ # Xt has L=4 rows → gaps = 1..4
+ Zt = [0; 7; 23; 23; 4; 23; 22;;]
+ Xt = [0; 7; 4; 22;;]
+ Z1 = [0; 7; 20; 20; 4; 10; 22;;]
+ expected = zeros(Float32, 2*tokens+1, size(Xt,1)+1, size(Xt,2)) # (2K+1, L+1=5? careful: size(Xt,1)=4 -> L+1=5 ; but real Lb=3 → valid gaps 1..4)
+ # Inserts are on gaps:
+ # - two '20' inserted between 7 and 4 -> gap 3
+ # - one '10' inserted after 4 -> gap 4
+ expected[20, 3, 1] = 2
+ expected[10, 4, 1] = 1
+ got = FF.remaining_edits_gapwise(P, Zt, Z1, Xt)
+ @test got == expected
+
+ # Two-column case with inserts/subs (gap-wise inserts)
+ Zt = [0 0; 7 23; 15 15; 15 15; 23 4; 2 2; 22 22;]
+ Xt = [0 0; 7 15; 15 15; 15 4; 2 2] # L=5 → gaps 1..6
+ Z1 = [0 0; 7 7; 20 20; 20 20; 5 4; 10 10; 22 22;]
+ expected = zeros(Float32, 2*tokens+1, size(Xt,1)+1, size(Xt,2)) # (2K+1, 6, 2)
+
+ # sample 1 (b=1):
+ # subs: 15→20 at sites 3 & 4, 2→10 at site 5
+ expected[tokens+20, 3, 1] = 1
+ expected[tokens+20, 4, 1] = 1
+ expected[tokens+10, 5, 1] = 1
+ # insert: LAT→5 occurs after site 4 (between 15 and 2) → gap 5
+ expected[5, 5, 1] = 1
+
+ # sample 2 (b=2):
+ # insert: LAT→7 occurs after site 1 (between BOS and first real) → gap 2
+ expected[7, 2, 2] = 1
+ # subs: 15→20 at sites 2 & 3, 2→10 at site 5
+ expected[tokens+20, 2, 2] = 1
+ expected[tokens+20, 3, 2] = 1
+ expected[tokens+10, 5, 2] = 1
+
+ got = FF.remaining_edits_gapwise(P, Zt, Z1, Xt)
+ @test got == expected
+
+ # deletions case (gap-wise array but deletions live on sites)
+ Zt = [0 0; 7 7; 20 20; 20 20; 4 4; 19 19; 22 22;]
+ Xt = [0 0; 7 7; 20 20; 20 20; 4 4; 19 19; 22 22;] # L=7 → gaps 1..8
+ Z1 = [0 0; 7 7; 23 23; 20 23; 4 4; 23 23; 22 22;]
+ expected = zeros(Float32, 2*tokens+1, size(Xt,1)+1, size(Xt,2))
+
+ # sample 1 deletions at sites 3 and 6
+ expected[2*tokens+1, 3, 1] = 1
+ expected[2*tokens+1, 6, 1] = 1
+ # sample 2 deletions at sites 3,4,6
+ expected[2*tokens+1, 3, 2] = 1
+ expected[2*tokens+1, 4, 2] = 1
+ expected[2*tokens+1, 6, 2] = 1
+
+ got = FF.remaining_edits_gapwise(P, Zt, Z1, Xt)
+ @test got == expected
+
+ # mixed inserts/subs case (gap-wise)
+ Zt = [0 0; 7 7; 15 15; 15 15; 23 4; 2 2; 22 22;]
+ Xt = [0 0; 7 7; 15 15; 15 15; 2 4; 22 2;] # L=6 → gaps 1..7
+ Z1 = [0 0; 7 7; 20 20; 20 20; 5 4; 10 10; 22 22;]
+ expected = zeros(Float32, 2*tokens+1, size(Xt,1)+1, size(Xt,2))
+
+ # sample 1: subs at sites 3,4 and 5; insert '5' at gap 5
+ expected[tokens+20, 3, 1] = 1
+ expected[tokens+20, 4, 1] = 1
+ expected[tokens+10, 5, 1] = 1
+ expected[5, 5, 1] = 1
+
+ # sample 2: subs at sites 3,4 and 6
+ expected[tokens+20, 3, 2] = 1
+ expected[tokens+20, 4, 2] = 1
+ expected[tokens+10, 6, 2] = 1
+
+ got = FF.remaining_edits_gapwise(P, Zt, Z1, Xt)
+ @test got == expected
+
+ # ─────────────────────────────────────────────────────────────────────
+ # transition mask from Xt (gap-wise)
+ Xt = [0 0; 7 7; 20 20; 20 20; 5 4; 10 22; 22 22;] # L=7 → gaps 1..8
+ expected = ones(Float32, 2*tokens+1, size(Xt,1)+1, size(Xt,2))
+
+ # Helper to apply rules programmatically (mirrors gap-wise mask fn)
+ function fill_expected_mask!(E, x::Vector{Int}, b::Int)
+ L = length(x)
+ Lb = count(!=(pad), x)
+
+ # sub/del never valid at last column (gap L+1)
+ E[(tokens+1):(2*tokens+1), L+1, b] .= 0
+
+ # pre-BOS insert forbidden
+ E[1:tokens, 1, b] .= 0
+
+ # padding sites off
+ if Lb < L
+ E[:, (Lb+1):L, b] .= 0
+ end
+ # inserts beyond gap (Lb+1) off
+ if (Lb+1) < (L+1)
+ E[1:tokens, (Lb+2):(L+1), b] .= 0
+ end
+
+ for i in 1:Lb
+ t = x[i]
+ if t == bos
+ # block all subs at BOS and delete BOS
+ E[(tokens+1):(2*tokens), i, b] .= 0
+ E[2*tokens+1, i, b] = 0
+ else
+ # self-sub off
+ E[tokens + t, i, b] = 0
+ end
+ end
+ return E
+ end
+
+ expected = fill_expected_mask!(expected, Xt[:,1], 1)
+ expected = fill_expected_mask!(expected, Xt[:,2], 2)
+
+ got = FF.transition_mask_from_Xt_gapwise(P, Xt)
+ @test got == expected
+
+ # ─────────────────────────────────────────────────────────────────────
+ # loss equivalence under identity transform — with L+1 axis
+ # Make tiny tensors of shape (C=2, L+1=3, B=2); third column is masked out.
+ edit_multiplier = reshape(Float32[
+ 0, 2, 0, 1, 0, 0, # batch 1 (cols 1..3)
+ 1, 0, 0, 0, 1, 0 # batch 2
+ ], (2, 3, 2))
+
+ transition_mask = reshape(Float32[
+ 1, 0, 0, 1, 0, 0,
+ 1, 0, 0, 0, 1, 0
+ ], (2, 3, 2))
+
+ M = reshape(Float32[
+ 0.1, 0.2, 0.0, 0.3, 0.4, 0.0,
+ 0.5, 0.6, 0.0, 0.7, 0.8, 0.0
+ ], (2, 3, 2))
+
+ t = reshape(Float32[0.3, 0.7], (1, 2)) # per-batch scheduler t
+ k(t)=t; dk(t)=1
+ scheduler_scaling = dk.(t) ./ (1 .- k.(t))
+
+ # Manual loss matches your earlier numbers (3rd col masked out)
+ l = (0.1+0.3+0.5+0.8 - (1/(1-0.3)*(2*log(0.2)+log(0.3)) + 1/(1-0.7)*(log(0.5)+log(0.8))))/2
+ got = Flowfusion.edit_loss(P, M, transition_mask, edit_multiplier, scheduler_scaling; op_mask=nothing, eps=1e-9)
+ @test isapprox(got, l; atol=1e-7, rtol=1e-7)
+end
diff --git a/src/Flowfusion.jl b/src/Flowfusion.jl
index 5dfc5b5..cdfe28c 100644
--- a/src/Flowfusion.jl
+++ b/src/Flowfusion.jl
@@ -19,7 +19,7 @@ Later:
module Flowfusion
-using ForwardBackward, OneHotArrays, Adapt, Manifolds, NNlib, LogExpFunctions, Distributions
+using ForwardBackward, OneHotArrays, Adapt, Manifolds, NNlib, LogExpFunctions, Distributions, Random
include("types.jl")
include("mask.jl")
@@ -32,6 +32,7 @@ include("batching.jl")
include("indel.jl")
include("dist_dfm.jl")
+include("editflows.jl")
export
#Processes not in ForwardBackward.jl
@@ -39,6 +40,7 @@ export
NoisyInterpolatingDiscreteFlow,
DoobMatchingFlow,
OUFlow,
+ EditFlow,
MaskedState,
Guide,
tangent_guide,
diff --git a/src/editflows.jl b/src/editflows.jl
new file mode 100644
index 0000000..9f1b395
--- /dev/null
+++ b/src/editflows.jl
@@ -0,0 +1,1161 @@
+@enum EditFlowImpl::UInt8 begin
+ GAPWISE
+ POSITIONWISE
+ POSITIONWISE_REPARAM
+end
+
+# Coerce user-provided selector to enum
+_coerce_impl(x::EditFlowImpl) = x
+_coerce_impl(x::Symbol) = _coerce_impl(String(x))
+_coerce_impl(x::AbstractString) = begin
+ s = lowercase(String(x))
+ if s == "gapwise"
+ return GAPWISE
+ elseif s == "positionwise"
+ return POSITIONWISE
+ elseif s == "positionwise_reparam"
+ return POSITIONWISE_REPARAM
+ else
+ error("Unknown EditFlow implementation: $x")
+ end
+end
+
+_resolve_impl(impl) = impl === nothing ? POSITIONWISE_REPARAM : _coerce_impl(impl)
+
+struct EditFlow <: DiscreteProcess
+ k::Int # alphabet size (tokens 1..k)
+ transform::Function # maps unconstrained logits to positive rates
+ κ::Function # scheduler on [0,1] → [0,1] for bridge keep-probability
+ dκ::Function # derivative of κ
+ padding_token::Int # padding token id used in X1
+ latent_token::Int # latent placeholder used in Z0 / Zt
+ bos_token::Int # beginning-of-sequence token id (optional, unused by default)
+ impl::EditFlowImpl # implementation selector
+end
+
+const EDITFLOW_DISPATCH_STRINGS = (
+ "gapwise",
+ "positionwise",
+ "positionwise_reparam",
+)
+
+EditFlow(k; transform = NNlib.softplus,
+ κ = identity,
+ dκ = t -> one(t),
+ padding_token::Int = k + 1,
+ latent_token::Int = k + 2,
+ bos_token::Int = 0,
+ impl = nothing, # accepts EditFlowImpl | Symbol | String | nothing
+ ) = begin
+ impl_enum = _resolve_impl(impl)
+ EditFlow(k, transform, κ, dκ, padding_token, latent_token, bos_token, impl_enum)
+end
+
+"""
+ EditFlow_cubic(k; transform=NNlib.softplus, padding_token=k+1, latent_token=k+2, bos_token=0)
+
+Convenience constructor that uses a cubic scheduler:
+- κ(t) = t^3
+- dκ(t) = 3t^2
+
+Compatible with both scalar and broadcasted usage (the codebase typically calls `P.κ.(ts)` and also `P.κ(ts[j])`).
+"""
+EditFlow_cubic(k; transform = NNlib.softplus,
+ padding_token::Int = k + 1,
+ latent_token::Int = k + 2,
+ bos_token::Int = 0,
+ impl = nothing,
+ ) = begin
+ κ_cubic(t) = t^3
+ dκ_cubic(t) = 3 * (t^2)
+ impl_enum = _resolve_impl(impl)
+ EditFlow(k, transform, κ_cubic, dκ_cubic, padding_token, latent_token, bos_token, impl_enum)
+end
+
+
+EditFlow_alpha(k, alpha::Float32; transform = NNlib.softplus,
+ padding_token::Int = k + 1,
+ latent_token::Int = k + 2,
+ bos_token::Int = 0,
+ impl = nothing,
+ ) = begin
+ κ_alpha(t) = t^alpha
+ dκ_alpha(t) = alpha * (t^(alpha-1))
+ impl_enum = _resolve_impl(impl)
+ EditFlow(k, transform, κ_alpha, dκ_alpha, padding_token, latent_token, bos_token, impl_enum)
+end
+
+"""
+ EditFlow_constant(k; C=0.5, eps=1e-3, transform=NNlib.softplus,
+ padding_token=k+1, latent_token=k+2, bos_token=0)
+
+Create an `EditFlow` whose scheduler is constant in time:
+ κ(t) = C (clamped to (eps, 1-eps) for numerical stability)
+ dκ(t) = 0
+
+The definitions are broadcasting-friendly and work for scalars and arrays on CPU/GPU.
+"""
+EditFlow_constant(k; C=0.5, eps=1e-3, transform = NNlib.softplus,
+ padding_token::Int = k + 1,
+ latent_token::Int = k + 2,
+ bos_token::Int = 0,
+ impl = nothing,
+ ) = begin
+ Cc = clamp(float(C), float(eps), 1.0 - float(eps))
+ # Broadcast-friendly closures (scalar or array, CPU/GPU)
+ κ_const = t -> Cc .+ zero.(t)
+ dκ_const = t -> zero.(t)
+ impl_enum = _resolve_impl(impl)
+ EditFlow(k, transform, κ_const, dκ_const, padding_token, latent_token, bos_token, impl_enum)
+end
+
+
+# Random padding of latent tokens to align the lengths of the sequences pairs (z0, z1)
+# And afterwards batch the sequences with padding tokens into a single matrix (Z0, Z1)
+function align_and_batch(P::EditFlow,
+ x0s::Vector{<:DiscreteState},
+ x1s::Vector{<:DiscreteState};
+ rng=Random.default_rng())
+ @assert length(x0s) == length(x1s)
+ B = length(x0s)
+ ltok = P.latent_token
+ pad = P.padding_token
+
+ z0s = Vector{Vector{Int}}(undef, B)
+ z1s = Vector{Vector{Int}}(undef, B)
+
+ @inbounds for b in 1:B
+ v0 = collect(tensor(x0s[b]))
+ v1 = collect(tensor(x1s[b]))
+ if length(v0) < length(v1)
+ z0 = Vector{Int}(v0)
+ d = length(v1) - length(v0)
+ for _ in 1:d
+ pos = rand(rng, 0:length(z0))
+ insert!(z0, pos + 1, ltok)
+ end
+ z1 = Vector{Int}(v1)
+ elseif length(v1) < length(v0)
+ z1 = Vector{Int}(v1)
+ d = length(v0) - length(v1)
+ for _ in 1:d
+ pos = rand(rng, 0:length(z1))
+ insert!(z1, pos + 1, ltok)
+ end
+ z0 = Vector{Int}(v0)
+ else
+ z0 = Vector{Int}(v0)
+ z1 = Vector{Int}(v1)
+ end
+ z0s[b] = z0
+ z1s[b] = z1
+ end
+
+ maxlen = maximum(length.(z0s))
+ Z0 = fill(Int(pad), maxlen, B)
+ Z1 = fill(Int(pad), maxlen, B)
+ @inbounds for b in 1:B
+ lb = length(z0s[b])
+ Z0[1:lb, b] .= z0s[b]
+ Z1[1:lb, b] .= z1s[b]
+ end
+
+ return Z0, Z1
+end
+
+function interpolate_Z_elementwise(P::EditFlow,
+ Z0::AbstractMatrix{<:Integer},
+ Z1::AbstractMatrix{<:Integer},
+ ts::AbstractVector)
+ @assert size(Z0) == size(Z1)
+ L, B = size(Z1)
+ @assert length(ts) == B
+ pad = P.padding_token
+
+ Zt = similar(Z1)
+ @inbounds for j in 1:B
+ keep = clamp(P.κ(ts[j]), 0f0, 1f0)
+ for i in 1:L
+ z1 = Z1[i, j]
+ if z1 == pad
+ Zt[i, j] = pad
+ else
+ Zt[i, j] = (rand(Float32) < keep) ? z1 : Z0[i, j]
+ end
+ end
+ end
+
+ ltok = P.latent_token
+ filtered_cols = [filter(x -> !(x in (ltok, pad)), col) for col in eachcol(Zt)]
+ Xt= hcat(map(col -> vcat(col, fill(pad, L - length(col))), filtered_cols)...)
+
+ return Zt, Xt
+end
+
+function transition_mask_from_Xt(P::EditFlow, Xt::AbstractMatrix{<:Integer})
+ if P.impl == GAPWISE
+ return transition_mask_from_Xt_gapwise(P, Xt)
+ elseif P.impl == POSITIONWISE
+ return transition_mask_from_Xt_positionwise(P, Xt)
+ elseif P.impl == POSITIONWISE_REPARAM
+ return transition_mask_from_Xt_positionwise_reparam(P, Xt)
+ else
+ error("Unknown EditFlow implementation: $(P.impl)")
+ end
+end
+
+function transition_mask_from_Xt_positionwise(P::EditFlow, Xt::AbstractMatrix{<:Integer})
+ tokens = P.k
+ pad = P.padding_token
+ xt_len, B = size(Xt)
+ T = ones(Float32, 2*tokens + 1, xt_len, B)
+ for c in 1:B
+ for i in 1:xt_len
+ x = Xt[i, c]
+ @assert x != P.latent_token
+ if x == pad
+ T[:, i, c] .= 0
+ elseif x == P.bos_token
+ @assert i == 1 #should only be BOS at position 1
+ T[tokens+1:2*tokens+1, i, c] .= 0
+ else
+ # forbid sub-to-current-token only for valid tokens 1..K
+ if 1 <= x <= tokens
+ T[tokens + x, i, c] = 0
+ end
+ end
+ end
+ end
+ return T
+end
+# Drop-in replacement (renames ok): now returns (2K+1, L+1, B)
+function transition_mask_from_Xt_gapwise(P::EditFlow, Xt::AbstractMatrix{<:Integer})
+ K = P.k
+ PAD = P.padding_token
+ BOS = P.bos_token
+
+ L, B = size(Xt)
+ M = ones(Float32, 2K+1, L+1, B)
+
+ for b in 1:B
+ x = Xt[:,b]
+ # real length (incl. BOS), sites 1..L_b; gaps 1..L_b+1
+ Lb = count(t -> t != PAD, x)
+
+ # 1) mask padding columns for sites/gaps beyond sequence
+ # - inserts valid only on gaps 1..Lb+1
+ if Lb < L
+ M[1:K, (Lb+2):(L+1), b] .= 0f0 # extra gaps off
+ M[(K+1):(2K+1), (Lb+1):(L+1), b] .= 0f0 # sub/del beyond Lb off
+ else
+ # only the last pad gap (L+1) exists; keep it on for inserts, off for sub/del below
+ nothing
+ end
+ # sub/del never valid at the last gap column
+ M[(K+1):(2K+1), L+1, b] .= 0f0
+
+ # 2) per-site constraints
+ for i in 1:Lb
+ t = x[i]
+ # no self-sub, and typically no subs at BOS (policy)
+ if t == BOS
+ M[(K+1):(2K), i, b] .= 0f0 # block all subs at BOS
+ M[(2K+1), i, b] = 0f0 # block delete BOS
+ else
+ M[K + t, i, b] = 0f0 # block self-sub to current t
+ end
+ end
+ # 3) (policy) no pre-BOS insert (gap 1)
+ M[1:K, 1, b] .= 0f0
+ end
+ return M
+end
+
+function transition_mask_from_Xt_positionwise_reparam(P::EditFlow, Xt::AbstractMatrix{<:Integer})
+ tokens = P.k
+ pad = P.padding_token
+ bos = P.bos_token
+ xt_len, B = size(Xt)
+ ins_q_mask = ones(Float32, tokens, xt_len, B)
+ sub_q_mask = ones(Float32, tokens, xt_len, B)
+ ins_lambda_mask = ones(Float32, 1, xt_len, B)
+ sub_lambda_mask = ones(Float32, 1, xt_len, B)
+ del_lambda_mask = ones(Float32, 1, xt_len, B)
+ for b in 1:B
+ x = Xt[:,b]
+ Lb = count(t -> t != pad, x)
+ # Mask the padding
+ ins_q_mask[:, (Lb+1):xt_len, b] .= 0
+ sub_q_mask[:, (Lb+1):xt_len, b] .= 0
+ ins_lambda_mask[:, (Lb+1):xt_len, b] .= 0
+ sub_lambda_mask[:, (Lb+1):xt_len, b] .= 0
+ del_lambda_mask[:, (Lb+1):xt_len, b] .= 0
+ #Mask the BOS for sub and del
+ # forbid sub/del at BOS iff BOS present at i=1
+ if Lb >= 1 && x[1] == bos
+ sub_q_mask[:, 1, b] .= 0
+ sub_lambda_mask[:, 1, b] .= 0
+ del_lambda_mask[:, 1, b] .= 0
+ end
+ # sub_q_mask[:, 1, b] .= 0
+ # sub_lambda_mask[:, 1, b] .= 0
+ # del_lambda_mask[:, 1, b] .= 0
+ #Mask the self-substitutions
+ for i in 2:Lb
+ t = x[i]
+ if 1 <= t <= tokens
+ sub_q_mask[t, i, b] = 0
+ else
+ throw(ArgumentError("Invalid token: $t"))
+ end
+ end
+ end
+ return (ins_q_mask, sub_q_mask, ins_lambda_mask, sub_lambda_mask, del_lambda_mask)
+end
+
+function remaining_edits(P::EditFlow, Zt::Matrix{Int}, Z1::Matrix{Int}, Xt::Matrix{Int}, dense=false)
+ if P.impl == GAPWISE
+ return remaining_edits_gapwise(P, Zt, Z1, Xt)
+ elseif P.impl == POSITIONWISE
+ return remaining_edits_positionwise(P, Zt, Z1, Xt)
+ elseif P.impl == POSITIONWISE_REPARAM
+ return remaining_edits_positionwise_reparam(P, Zt, Z1, Xt)
+ else
+ error("Unknown EditFlow implementation: $(P.impl)")
+ end
+end
+
+# Compute the remaining edits for the EditFlow as a Matrix
+function remaining_edits_positionwise(P::EditFlow, Zt::Matrix{Int}, Z1::Matrix{Int}, Xt::Matrix{Int}, dense=false)
+ padding_token = P.padding_token
+ latent_token = P.latent_token
+ tokens = P.k
+ (_, batch_size) = size(Z1)
+
+ #filtered_cols = [filter(x -> x != latent_token, col) for col in eachcol(Zt)]
+ #batch_length = maximum(length, filtered_cols)
+ batch_length = size(Xt, 1)
+ pos = cumsum((Zt .!= padding_token) .& (Zt .!= latent_token), dims=1)
+
+ #Insert
+ #inserts = Z1.*(Zt .== latent_token)
+ inserts = Z1 .* Int64.((Zt .== latent_token) .& (1 .≤ Z1 .≤ tokens))
+ insert_edits = zeros(Float32, (tokens, batch_length, batch_size))
+ insert_indices = findall(!iszero, inserts)
+ insert_cols_to_update = pos[insert_indices]
+ insert_rows_to_update = inserts[insert_indices]
+ insert_samples_to_update = [idx[2] for idx in insert_indices]
+ for i in 1:length(insert_rows_to_update)
+ insert_edits[insert_rows_to_update[i], insert_cols_to_update[i], insert_samples_to_update[i]] += 1
+ end
+ dense_inserts = (insert_rows_to_update, insert_cols_to_update, insert_samples_to_update)
+
+ #Substitution
+ a = Zt .!= latent_token
+ b = Z1 .!= latent_token
+ c = Z1 .!= Zt
+ subs = Z1.*(a .& b .& c)
+
+ sub_edits = zeros(Float32, (tokens, batch_length, batch_size))
+ sub_indices = findall(!iszero, subs)
+ sub_cols_to_update = pos[sub_indices]
+ sub_rows_to_update = subs[sub_indices]
+ sub_samples_to_update = [idx[2] for idx in sub_indices]
+ for i in 1:length(sub_rows_to_update)
+ sub_edits[sub_rows_to_update[i], sub_cols_to_update[i], sub_samples_to_update[i]] = 1
+ end
+ dense_subs = (sub_rows_to_update .+ tokens, sub_cols_to_update, sub_samples_to_update)
+
+ #Del
+ dels = (Z1 .== latent_token) .& (Zt .!= latent_token)
+ del_edits = zeros(Float32, (1, batch_length, batch_size))
+ del_indices = findall(!iszero, dels)
+ del_cols_to_update = pos[del_indices]
+ del_samples_to_update = [idx[2] for idx in del_indices]
+ for i in 1:length(del_cols_to_update)
+ del_edits[1, del_cols_to_update[i], del_samples_to_update[i]] = 1
+ end
+ dense_dels = ((2*tokens+1).*ones(Int64, length(del_cols_to_update)), del_cols_to_update, del_samples_to_update)
+
+ if dense == true
+ return (dense_inserts, dense_subs, dense_dels)
+ else
+ return vcat(insert_edits, sub_edits, del_edits)
+ end
+
+end
+
+# Drop-in replacement (renames ok): now returns (2K+1, L+1, B)
+function remaining_edits_gapwise(P::EditFlow, Zt::AbstractMatrix{<:Integer}, Z1::AbstractMatrix{<:Integer}, Xt::AbstractMatrix{<:Integer})
+ K = P.k
+ PAD = P.padding_token
+ LAT = P.latent_token
+
+ L, B = size(Xt)
+ ins = zeros(Float32, K, L+1, B) # gaps
+ sub = zeros(Float32, K, L, B) # sites
+ del = zeros(Float32, 1, L, B) # sites
+
+ for b in 1:B
+ Ztb = Zt[:,b]; Z1b = Z1[:,b]
+ # real (site) tokens in Zt (BOS counts as real; PAD/LAT do not)
+ real = (Ztb .!= LAT) .& (Ztb .!= PAD)
+ sites_before = cumsum(real) # length(Ztb)
+ for r in 1:length(Ztb)
+ zt = Ztb[r]; z1 = Z1b[r]
+ # insert: LAT -> real
+ if zt == LAT && (z1 != LAT && z1 != PAD)
+ g = sites_before[r] + 1 # gap index in 1..L+1
+ if 1 <= g <= L+1
+ ins[z1, g, b] += 1
+ end
+ # delete: real -> LAT
+ elseif (zt != LAT && zt != PAD) && (z1 == LAT)
+ s = sites_before[r] # site index in 1..L
+ if 1 <= s <= L
+ del[1, s, b] += 1
+ end
+ # substitute: real -> real, different
+ elseif (zt != LAT && zt != PAD) && (z1 != LAT && z1 != PAD) && (zt != z1)
+ s = sites_before[r]
+ if 1 <= s <= L
+ sub[z1, s, b] += 1
+ end
+ end
+ end
+ end
+
+ # pad sub/del with a zero column at gap L+1 so we can vcat on channel axis
+ sub_pad = hcat(sub, zeros(Float32, size(sub,1), 1, size(sub,3)))
+ del_pad = hcat(del, zeros(Float32, size(del,1), 1, size(del,3)))
+ return vcat(ins, sub_pad, del_pad) # (2K+1, L+1, B)
+end
+
+function remaining_edits_positionwise_reparam(P::EditFlow, Zt::AbstractMatrix{<:Integer}, Z1::AbstractMatrix{<:Integer}, Xt::AbstractMatrix{<:Integer}, dense=false)
+ padding_token = P.padding_token
+ latent_token = P.latent_token
+ tokens = P.k
+ (_, batch_size) = size(Z1)
+
+ #filtered_cols = [filter(x -> x != latent_token, col) for col in eachcol(Zt)]
+ #batch_length = maximum(length, filtered_cols)
+ batch_length = size(Xt, 1)
+ pos = cumsum((Zt .!= padding_token) .& (Zt .!= latent_token), dims=1)
+
+ #Insert
+ #inserts = Z1.*(Zt .== latent_token)
+ inserts = Z1 .* Int64.((Zt .== latent_token) .& (1 .≤ Z1 .≤ tokens))
+ insert_edits = zeros(Float32, (tokens, batch_length, batch_size))
+ insert_indices = findall(!iszero, inserts)
+ insert_cols_to_update = pos[insert_indices]
+ insert_rows_to_update = inserts[insert_indices]
+ insert_samples_to_update = [idx[2] for idx in insert_indices]
+ for i in 1:length(insert_rows_to_update)
+ insert_edits[insert_rows_to_update[i], insert_cols_to_update[i], insert_samples_to_update[i]] += 1
+ end
+ dense_inserts = (insert_rows_to_update, insert_cols_to_update, insert_samples_to_update)
+
+ #Substitution
+ a = Zt .!= latent_token
+ b = Z1 .!= latent_token
+ c = Z1 .!= Zt
+ subs = Z1.*(a .& b .& c)
+ sub_edits = zeros(Float32, (tokens, batch_length, batch_size))
+ sub_indices = findall(!iszero, subs)
+ sub_cols_to_update = pos[sub_indices]
+ sub_rows_to_update = subs[sub_indices]
+ sub_samples_to_update = [idx[2] for idx in sub_indices]
+ for i in 1:length(sub_rows_to_update)
+ sub_edits[sub_rows_to_update[i], sub_cols_to_update[i], sub_samples_to_update[i]] = 1
+ end
+ dense_subs = (sub_rows_to_update .+ tokens, sub_cols_to_update, sub_samples_to_update)
+
+ #Del
+ #tmp = Z1 .== latent_token
+ dels = (Z1 .== latent_token) .& (Zt .!= latent_token)
+
+ #@assert tmp == dels "Assertion failed: tmp != dels"
+
+ del_edits = zeros(Float32, (1, batch_length, batch_size))
+ del_indices = findall(!iszero, dels)
+ del_cols_to_update = pos[del_indices]
+ del_samples_to_update = [idx[2] for idx in del_indices]
+ for i in 1:length(del_cols_to_update)
+ del_edits[1, del_cols_to_update[i], del_samples_to_update[i]] = 1
+ end
+ dense_dels = ((2*tokens+1).*ones(Int64, length(del_cols_to_update)), del_cols_to_update, del_samples_to_update)
+
+ if dense == true
+ return (dense_inserts, dense_subs, dense_dels)
+ else
+ return (insert_edits, sub_edits, del_edits)
+ end
+end
+
+
+function step(P::EditFlow,
+ Xt::DiscreteState{<:AbstractArray{<:Signed}},
+ hat,
+ s1::Real, s2::Real)
+ if P.impl == GAPWISE
+ return step_gapwise(P, hat, Xt, s1, s2)
+ elseif P.impl == POSITIONWISE
+ return step_positionwise(P, Xt, hat, s1, s2)
+ elseif P.impl == POSITIONWISE_REPARAM
+ return step_positionwise_reparam(P, hat, Xt, s1, s2)
+ else
+ error("Unknown EditFlow implementation: $(P.impl)")
+ end
+end
+
+@inline function pick_index(w::AbstractVector{<:Real})::Int
+ # treat negatives as zero; assert we have some mass
+ cs = cumsum(max.(w, zero(eltype(w))))
+ s = cs[end]
+ @assert isfinite(s) && s > 0 "pick_index: all weights ≤ 0 or non-finite"
+ u = rand() * s
+ return searchsortedfirst(cs, u) # 1..length(w)
+end
+
+
+function step_positionwise(P::EditFlow,
+ Xt::DiscreteState{<:AbstractArray{<:Signed}},
+ hat,
+ s1::Real, s2::Real)
+
+ @assert ndims(Xt.state) == 1 "EditFlow.step only supports 1D DiscreteState"
+
+ # Rates
+ pins, psub, pdel = part_output(P, P.transform(hat)) # (K,n+1,B), (K,n,B), (1,n,B)
+ ins = Array(pins[:, :, 1]) # (K, n+1) or (K, n)
+ sub = Array(psub[:, :, 1])
+ del = vec(Array(pdel[1, :, 1])) # (n,) <-- fixed
+
+ K, n = size(sub, 1), size(sub, 2)
+ @assert size(ins, 1) == K
+ @assert length(del) == n
+
+ # Ensure gaps shape (K, n+1)
+ ins_gaps = if size(ins, 2) == n + 1
+ ins
+ elseif size(ins, 2) == n
+ tmp = similar(ins, K, n + 1)
+ @inbounds for s in 0:n
+ pos = clamp(s, 1, n)
+ @views tmp[:, s + 1] .= ins[:, pos]
+ end
+ tmp
+ else
+ error("EditFlow.step: bad ins size $(size(ins))")
+ end
+
+ dt = float(s2 - s1)
+ x = collect(tensor(Xt)) # Vector{Int}
+
+ # Forbid self-substitutions
+ if n > 0
+ current_mask = zeros(eltype(sub), size(sub))
+ @inbounds for i in 1:n
+ tok = x[i]
+ if 1 ≤ tok ≤ K
+ current_mask[tok, i] = 1
+ end
+ end
+ sub .*= (1 .- current_mask)
+ end
+
+ # Optionally forbid editing BOS explicitly
+ if n > 0 && x[1] == P.bos_token
+ sub[:, 1] .= 0
+ del[1] = 0
+ end
+
+ # ---- site events (delete/sub) ----
+ to_delete = falses(n)
+ sub_to = zeros(Int, n)
+ @inbounds for i in 1:n
+ r_del = del[i]
+ r_sub_total = sum(@view sub[:, i])
+ r_tot = r_del + r_sub_total
+ if r_tot > 0 && rand() < (1 - exp(-dt * r_tot))
+ u = rand() * r_tot
+ if u < r_del
+ to_delete[i] = true
+ elseif r_sub_total > 0
+ sub_to[i] = pick_index(@view sub[:, i])
+ end
+ end
+ end
+
+ # ---- gap insertions (≤1 per gap) ----
+ ins_tok = fill(0, n + 1)
+ start_gap = (n > 0 && x[1] == P.bos_token) ? 1 : 0
+ @inbounds for s in start_gap:n
+ r_ins_total = sum(@view ins_gaps[:, s + 1])
+ #println("r_ins_total", ins_gaps[:, s + 1])
+ if r_ins_total > 0 && rand() < (1 - exp(-dt * r_ins_total))
+ ins_tok[s + 1] = pick_index(@view ins_gaps[:, s + 1])
+ end
+ end
+
+ # ---- build new sequence ----
+ result = Int[]
+ if ins_tok[1] != 0; push!(result, ins_tok[1]); end
+ @inbounds for i in 1:n
+ if !to_delete[i]
+ a = (sub_to[i] == 0) ? x[i] : sub_to[i]
+ push!(result, a)
+ end
+ if ins_tok[i + 1] != 0
+ push!(result, ins_tok[i + 1])
+ end
+ end
+ return DiscreteState(Xt.K, result)
+end
+
+function part_output(P::EditFlow, M::AbstractArray)
+ K = P.k
+ ins = M[1:K,:,:]
+ sub = M[K+1:2K,:,:]
+ del = M[2K+1:2K+1,:,:]
+ return ins, sub, del
+end
+
+# Utility: pick_index over nonnegative weights (1D)
+@inline function _pick_index1d!(rng::AbstractRNG, cs::AbstractVector{<:Real})::Int
+ s = cs[end]
+ @assert isfinite(s) && s > 0 "pick_index: total mass ≤ 0 or non-finite"
+ u = rand(rng) * s
+ return searchsortedfirst(cs, u)
+end
+
+# One CTMC Euler step over a single DiscreteState, using gap-wise insertions.
+function step_gapwise(
+ P::EditFlow,
+ hat, # your EditFlowModel (returns (2K+1, L+1, 1) logits)
+ Xt::DiscreteState{<:AbstractVector{<:Signed}},
+ t1::Real, t2::Real; # times, with dt = t2 - t1 > 0
+ rng::AbstractRNG = Random.default_rng(),
+ transform::Function = P.transform,
+)
+ @assert t2 > t1
+ x = collect(tensor(Xt)) # Vector{Int}
+ K = P.k
+ n = length(x)
+
+ # Build masked state for the model
+ lmask = trues(n) # all real positions valid
+ cmask = trues(n)
+ Xt_ms = MaskedState(DiscreteState(K, reshape(x, :, 1)), reshape(cmask, :, 1), reshape(lmask, :, 1))
+
+ # Model forward (gap-wise head)
+ R = transform(hat) # positive rates
+ ins = Array(R[1:K, :, 1]) # (K, n+1)
+ sub = Array(R[K+1:2K, 1:n, 1]) # (K, n)
+ del = vec(Array(R[2K+1, 1:n, 1])) # (n,)
+
+ # Enforce CTMC constraints: no self-sub, protect BOS
+ if n > 0 && x[1] == P.bos_token
+ sub[:, 1] .= 0
+ del[1] = 0
+ # also disallow pre-BOS insert (gap 1)
+ ins[:, 1] .= 0
+ end
+ # forbid self-substitutions
+ for i in 1:n
+ tok = x[i]
+ if 1 <= tok <= K
+ sub[tok, i] = 0
+ else
+ sub[:, i] .= 0 # non-vocab token → block subs at site i
+ end
+ end
+
+ dt = Float64(t2 - t1)
+
+ # -------- sample site events (delete or substitute) --------
+ to_delete = falses(n)
+ sub_to = zeros(Int, n)
+ for i in 1:n
+ r_del = max(del[i], 0.0)
+ r_sub_total = sum(@view sub[:, i])
+ r_tot = r_del + r_sub_total
+ if r_tot > 0 && rand(rng) < (1 - exp(-dt * r_tot))
+ u = rand(rng) * r_tot
+ if u < r_del
+ to_delete[i] = true
+ elseif r_sub_total > 0
+ # draw new token from sub[:, i]
+ cs = cumsum(@view sub[:, i])
+ sub_to[i] = _pick_index1d!(rng, cs)
+ end
+ end
+ end
+
+ # -------- sample gap insertions (≤1 per gap) --------
+ ins_tok = fill(0, n + 1)
+ for g in 1:(n + 1)
+ r_ins_total = sum(@view ins[:, g])
+ if r_ins_total > 0 && rand(rng) < (1 - exp(-dt * r_ins_total))
+ cs = cumsum(@view ins[:, g])
+ ins_tok[g] = _pick_index1d!(rng, cs)
+ end
+ end
+
+ # -------- build new sequence --------
+ result = Int[]
+ if ins_tok[1] != 0; push!(result, ins_tok[1]); end
+ for i in 1:n
+ if !to_delete[i]
+ a = (sub_to[i] == 0) ? x[i] : sub_to[i]
+ push!(result, a)
+ end
+ if ins_tok[i + 1] != 0
+ push!(result, ins_tok[i + 1])
+ end
+ end
+
+ return DiscreteState(Xt.K, result)
+end
+
+# Convenience: multi-step simulate from t=0→1 with a schedule of times `ts` (sorted)
+function rollout_gapwise(P::EditFlow, model, x0::DiscreteState, ts::AbstractVector; rng=Random.default_rng())
+ @assert issorted(ts) && first(ts) >= 0 && last(ts) <= 1
+ x = x0
+ for k in 1:length(ts)-1
+ x_tilde = collect(tensor(x)) # Vector{Int}
+ K = P.k
+ n = length(x_tilde)
+ # Build masked state for the model
+ lmask = trues(n) # all real positions valid
+ cmask = trues(n)
+ Xt_ms = MaskedState(DiscreteState(K, reshape(x_tilde, :, 1)), reshape(cmask, :, 1), reshape(lmask, :, 1))
+
+ hat = model([Float32(ts[k])], Xt_ms) # (2K+1, n+1, 1) logits
+ x = step_gapwise(P, hat, x, ts[k], ts[k+1]; rng=rng)
+ end
+ return x
+end
+
+# --- helper: robust slice for (K,L,1)/(K,L) -> (K,L) and (1,L,1)/(1,L)/L -> Vector ---
+@inline _KL(A) = ndims(A) == 3 ? Array(@view A[:, :, 1]) :
+ ndims(A) == 2 ? Array(A) :
+ error("Expected (K,L,1) or (K,L), got size $(size(A))")
+
+@inline _L(A) = ndims(A) == 3 ? vec(Array(@view A[1, :, 1])) :
+ ndims(A) == 2 ? vec(Array(@view A[1, :])) :
+ ndims(A) == 1 ? vec(Array(A)) :
+ error("Expected (1,L,1) or (1,L) or (L,), got size $(size(A))")
+
+"""
+One CTMC Euler step for POSITIONWISE_REPARAM.
+
+`hat` is a tuple (ins_q_logits, sub_q_logits, ins_lambda, sub_lambda, del_lambda)
+for the current sequence, with shapes (K,L,1)/(K,L) for logits and (1,L,1)/(1,L)/L for lambdas.
+Insertions are defined per *site* here: gap g=2..L+1 uses site g-1; gap 1 (pre-BOS) is blocked.
+"""
+function step_positionwise_reparam(
+ P::EditFlow,
+ hat, # (ins_q_logits, sub_q_logits, ins_λ, sub_λ, del_λ)
+ Xt::DiscreteState{<:AbstractVector{<:Signed}},
+ t1::Real, t2::Real;
+ rng::AbstractRNG = Random.default_rng(),
+)
+ @assert t2 > t1
+ dt = Float64(t2 - t1)
+
+ # unpack + shapes -> arrays on CPU (OK for sampling)
+ ins_q_logits, sub_q_logits, ins_λ_raw, sub_λ_raw, del_λ_raw = hat
+ ins_q = NNlib.softmax(_KL(ins_q_logits); dims=1) # (K,L)
+ sub_q = NNlib.softmax(_KL(sub_q_logits); dims=1) # (K,L)
+ ins_λ = P.transform(_L(ins_λ_raw)) # (L,)
+ sub_λ = P.transform(_L(sub_λ_raw)) # (L,)
+ del_λ = P.transform(_L(del_λ_raw)) # (L,)
+
+ x = collect(tensor(Xt)) # Vector{Int}
+ K = P.k
+ L = length(x)
+
+ # guard: empty sequence quick path
+ if L == 0
+ # Only allow a single "after last" insertion; reuse last-site rule -> none here; return as-is
+ return Xt
+ end
+
+ # --- forbid BOS edits and pre-BOS insert policy ---
+ if x[1] == P.bos_token
+ sub_q[:, 1] .= 0
+ sub_λ[1] = 0
+ del_λ[1] = 0
+ end
+
+ # --- forbid self-substitutions (zero prob, no renorm needed) ---
+ @inbounds for i in 1:L
+ tok = x[i]
+ if 1 <= tok <= K
+ sub_q[tok, i] = 0
+ else
+ # Non-vocab token at site i → block substitutions at this site
+ sub_q[:, i] .= 0
+ sub_λ[i] = 0
+ end
+ end
+
+ # -------- site events (delete or substitute) --------
+ to_delete = falses(L)
+ sub_to = zeros(Int, L)
+ @inbounds for i in 1:L
+ r_del = max(del_λ[i], 0.0)
+ r_sub_total = max(sub_λ[i], 0.0) * sum(@view sub_q[:, i])
+ r_tot = r_del + r_sub_total
+ if r_tot > 0 && rand(rng) < (1 - exp(-dt * r_tot))
+ u = rand(rng) * r_tot
+ if u < r_del
+ to_delete[i] = true
+ elseif r_sub_total > 0
+ # sample token ∝ sub_q[:, i]
+ cs = cumsum(@view sub_q[:, i])
+ sub_to[i] = _pick_index1d!(rng, cs)
+ end
+ end
+ end
+
+ # -------- gap insertions (≤1 per gap) --------
+ # gap g in 1..L+1; g==1 (pre-BOS) is blocked; use site (g-1) rates otherwise,
+ # and for g==L+1 reuse site L.
+ ins_tok = fill(0, L + 1)
+ @inbounds for g in 2:(L+1)
+ src = min(g - 1, L) # site index providing insertion rates for this gap
+ r_vec = (@view ins_q[:, src]) .* ins_λ[src]
+ r_tot = sum(r_vec)
+ if r_tot > 0 && rand(rng) < (1 - exp(-dt * r_tot))
+ cs = cumsum(r_vec)
+ ins_tok[g] = _pick_index1d!(rng, cs)
+ end
+ end
+ # explicitly block pre-BOS insert
+ ins_tok[1] = 0
+
+ # -------- build new sequence --------
+ result = Int[]
+ if ins_tok[1] != 0; push!(result, ins_tok[1]); end
+ @inbounds for i in 1:L
+ if !to_delete[i]
+ a = (sub_to[i] == 0) ? x[i] : sub_to[i]
+ push!(result, a)
+ end
+ if ins_tok[i + 1] != 0
+ push!(result, ins_tok[i + 1])
+ end
+ end
+
+ return DiscreteState(Xt.K, result)
+end
+
+
+#=
+@inline _KL(A) = ndims(A) == 3 ? Array(@view A[:, :, 1]) :
+ ndims(A) == 2 ? Array(A) :
+ error("Expected (K,L,1) or (K,L), got size $(size(A))")
+
+@inline _L(A) = ndims(A) == 3 ? vec(Array(@view A[1, :, 1])) :
+ ndims(A) == 2 ? vec(Array(@view A[1, :])) :
+ ndims(A) == 1 ? vec(Array(A)) :
+ error("Expected (1,L,1) or (1,L) or (L,), got size $(size(A))")
+
+function step_positionwise_reparam(
+ P::EditFlow,
+ hat, # (ins_q_logits, sub_q_logits, ins_λ, sub_λ, del_λ)
+ Xt::DiscreteState{<:AbstractVector{<:Signed}},
+ t1::Real, t2::Real;
+ rng::AbstractRNG = Random.default_rng(),
+)
+ @assert t2 > t1
+ dt = Float64(t2 - t1)
+
+ # unpack → arrays
+ ins_q_logits, sub_q_logits, ins_λ_raw, sub_λ_raw, del_λ_raw = hat
+ ins_q = NNlib.softmax(_KL(ins_q_logits); dims=1) # (K,L)
+ sub_q = NNlib.softmax(_KL(sub_q_logits); dims=1) # (K,L)
+ ins_λ = P.transform(_L(ins_λ_raw)) # (L,)
+ sub_λ = P.transform(_L(sub_λ_raw)) # (L,)
+ del_λ = P.transform(_L(del_λ_raw)) # (L,)
+
+ x = collect(tensor(Xt))
+ K = P.k
+ L = length(x)
+ if L == 0
+ return Xt
+ end
+
+ # --- κ-scaling to encourage growth over time ---
+ κt = clamp(float(P.κ(t2)), 0.0, 1.0)
+ ins_λ .*= κt
+ del_λ .*= (1 - κt)
+ # (sub_λ unchanged; keep neutral)
+
+ # --- BOS policy + small insert floor at first site (gap 2) ---
+ if x[1] == P.bos_token
+ sub_q[:, 1] .= 0
+ sub_λ[1] = 0
+ del_λ[1] = 0
+ λmin = eltype(ins_λ)(1e-3)
+ ins_λ[1] = max(ins_λ[1], λmin)
+ end
+
+ # --- forbid self-substitutions / non-vocab guards ---
+ @inbounds for i in 1:L
+ tok = x[i]
+ if 1 <= tok <= K
+ sub_q[tok, i] = 0
+ else
+ sub_q[:, i] .= 0
+ sub_λ[i] = 0
+ end
+ end
+
+ # -------- site events (delete or substitute) --------
+ to_delete = falses(L)
+ sub_to = zeros(Int, L)
+ @inbounds for i in 1:L
+ r_del = max(del_λ[i], 0.0)
+ r_sub_total = max(sub_λ[i], 0.0) * sum(@view sub_q[:, i])
+ r_tot = r_del + r_sub_total
+ if r_tot > 0 && rand(rng) < (1 - exp(-dt * r_tot))
+ u = rand(rng) * r_tot
+ if u < r_del
+ to_delete[i] = true
+ elseif r_sub_total > 0
+ cs = cumsum(@view sub_q[:, i])
+ sub_to[i] = _pick_index1d!(rng, cs)
+ end
+ end
+ end
+
+ # -------- gap insertions (≤1 per gap) --------
+ # gap g=1..L+1 ; g==1 (pre-BOS) blocked; use site (g-1), with L reused for g==L+1
+ ins_tok = fill(0, L + 1)
+ @inbounds for g in 2:(L+1)
+ src = min(g - 1, L)
+ r_vec = (@view ins_q[:, src]) .* ins_λ[src]
+ r_tot = sum(r_vec)
+ if r_tot > 0 && rand(rng) < (1 - exp(-dt * r_tot))
+ cs = cumsum(r_vec)
+ ins_tok[g] = _pick_index1d!(rng, cs)
+ end
+ end
+ ins_tok[1] = 0 # block pre-BOS insert
+
+ # -------- build new sequence --------
+ result = Int[]
+ if ins_tok[1] != 0; push!(result, ins_tok[1]); end
+ @inbounds for i in 1:L
+ if !to_delete[i]
+ a = (sub_to[i] == 0) ? x[i] : sub_to[i]
+ push!(result, a)
+ end
+ if ins_tok[i + 1] != 0
+ push!(result, ins_tok[i + 1])
+ end
+ end
+
+ # -------- BOS-only guard (rare fallback) --------
+ if length(result) == 1 && result[1] == P.bos_token
+ r_vec = (@view ins_q[:, 1]) .* ins_λ[1] # use first site’s insertion rates
+ if sum(r_vec) > 0
+ cs = cumsum(r_vec)
+ push!(result, _pick_index1d!(rng, cs))
+ end
+ end
+
+ return DiscreteState(Xt.K, result)
+end
+
+=#
+
+
+#=
+function edit_loss(P::EditFlow,
+ M::AbstractArray,
+ transition_mask::AbstractArray,
+ edit_multiplier::AbstractArray,
+ scheduler_scaling;
+ op_mask=nothing,
+ eps=1e-8)
+ R = P.transform(M)
+ OM = isnothing(op_mask) ? one(eltype(R)) .* ones(eltype(R), size(R)) : op_mask
+ term1 = sum(transition_mask .* (OM .* R); dims=(1,2))
+ scl = reshape(scheduler_scaling, 1, 1, :)
+ term2 = sum(scl .* edit_multiplier .* log.(R .+ eps); dims=(1,2))
+ return mean(term1 .- term2)
+end
+=#
+
+
+function edit_loss(P::EditFlow, M, transition_mask, edit_multiplier, scheduler_scaling; op_mask=nothing, eps=1e-8)
+ if P.impl == GAPWISE
+ return edit_loss_gapwise(P, M, transition_mask, edit_multiplier, scheduler_scaling; op_mask=op_mask, eps=eps)
+ elseif P.impl == POSITIONWISE
+ return edit_loss_positionwise(P, M, transition_mask, edit_multiplier, scheduler_scaling; op_mask=op_mask, eps=eps)
+ elseif P.impl == POSITIONWISE_REPARAM
+ return edit_loss_positionwise_reparam(P, M, transition_mask, edit_multiplier, scheduler_scaling; eps=eps)
+ end
+end
+
+"""
+ edit_loss(P::EditFlow, M, transition_mask, edit_multiplier, scheduler_scaling; op_mask=nothing, eps=1e-8)
+
+Loss matching the reference: mean(sum(transition_mask .* (op_mask .* R)) - sum(scheduler_scaling .* edit_multiplier .* log R)),
+where R = transform(M).
+Shapes:
+- M, transition_mask, edit_multiplier, op_mask: (2K+1, n, B)
+- scheduler_scaling: (1, B) or (B,) broadcastable to (1,1,B)
+"""
+function edit_loss_positionwise(P::EditFlow,
+ M, transition_mask, edit_multiplier, scheduler_scaling;
+ op_mask=nothing, eps=1e-8)
+
+ R = P.transform(M) # must be >= 0
+ # (A) Optional op mask to apply symmetrically
+ OM = isnothing(op_mask) ? one(eltype(R)) : op_mask
+
+ # (B) Sum of valid outgoing rates
+ term1 = sum(transition_mask .* (OM .* R); dims=(1,2))
+
+ # (C) Logs only of positive rates (avoid NaN/Inf)
+ R_logsafe = max.(R, eltype(R)(eps)) # clamp BEFORE log
+ logR = log.(R_logsafe)
+
+ scl = reshape(scheduler_scaling, 1, 1, :) # (1,1,B)
+ term2 = sum(scl .* (edit_multiplier .* OM) .* logR; dims=(1,2))
+
+ return mean(term1 .- term2)
+end
+
+
+function edit_loss_gapwise(P::EditFlow,
+ M, transition_mask, edit_multiplier, scheduler_scaling;
+ op_mask=nothing, eps=1f-8)
+
+ # 1) transform -> positiva satser
+ R = P.transform(M)
+
+ # 2) kapa hastigheter för att undvika Inf/NaN i både term1 och log-delen
+ # (justera RMAX vid behov, 1e3–1e4 funkar ofta bra)
+ RMAX = 1f3
+ R = clamp.(R, eps, RMAX)
+
+ # 3) valfri op-mask
+ OM = isnothing(op_mask) ? one(eltype(R)) : op_mask
+ mask = transition_mask .* OM
+
+ # 4) regularizer (sum av giltiga satser)
+ # -> per-batch summering: 1×1×B
+ term1 = sum(mask .* R; dims=(1,2))
+
+ # 5) data-del (log) – log säkert, redan kapat ovan
+ logR = log.(R)
+ scl = reshape(scheduler_scaling, 1, 1, :)
+ term2 = sum(scl .* (edit_multiplier .* OM) .* logR; dims=(1,2))
+
+ # 6) normalisera med "antal aktiva positioner" för stabil skala
+ active = sum(mask; dims=(1,2))
+ loss_per_batch = (term1 .- term2) ./ (active .+ 1f-8)
+
+ return mean(loss_per_batch)
+end
+
+
+#=
+function edit_loss_positionwise_reparam(P::EditFlow,
+ M, transition_mask, edit_multiplier, scheduler_scaling; eps=1e-8)
+ (ins_q_mask, sub_q_mask, ins_lambda_mask, sub_lambda_mask, del_lambda_mask) = transition_mask
+ (ins_q, sub_q, ins_lambda, sub_lambda, del_lambda) = M
+ # pre-softmax mask (safe -Inf with Float32 works; on some accelerators prefer -1e9f0)
+ #ins_q = NNlib.softmax(ins_q_logits .+ ifelse.(ins_q_mask .> 0, 0f0, negInf), dims=1)
+ #sub_q = NNlib.softmax(sub_q_logits .+ ifelse.(sub_q_mask .> 0, 0f0, negInf), dims=1)
+ # ins_q = NNlib.softmax(ins_q, dims=1)
+ # sub_q = NNlib.softmax(sub_q, dims=1)
+
+ # probs
+ ins_q = NNlib.softmax(ins_q; dims=1); ins_q .*= ins_q_mask
+ sub_q = NNlib.softmax(sub_q; dims=1); sub_q .*= sub_q_mask
+
+ ins_lambda = P.transform(ins_lambda); ndims(ins_lambda)==2 && (ins_lambda = reshape(ins_lambda, 1, size(ins_lambda,1), size(ins_lambda,2)))
+ sub_lambda = P.transform(sub_lambda); ndims(sub_lambda)==2 && (sub_lambda = reshape(sub_lambda, 1, size(sub_lambda,1), size(sub_lambda,2)))
+ del_lambda = P.transform(del_lambda); ndims(del_lambda)==2 && (del_lambda = reshape(del_lambda, 1, size(del_lambda,1), size(del_lambda,2)))
+
+ # regularizer (sum of *allowed* mass)
+ sum_ins_q = sum(ins_q; dims=1)
+ sum_sub_q = sum(sub_q; dims=1)
+ loss_term_1 = sum(ins_lambda .* ins_lambda_mask .* sum_ins_q .+
+ sub_lambda .* sub_lambda_mask .* sum_sub_q .+
+ del_lambda .* del_lambda_mask; dims=(1,2))
+
+
+ #loss_term_1 = sum(ins_lambda.* ins_lambda_mask + sub_lambda.* sub_lambda_mask + del_lambda.* del_lambda_mask, dims=(1,2))
+ scl = reshape(scheduler_scaling, 1, 1, :) # (1,1,B)
+
+ # ins_lambda = reshape(ins_lambda, 1, size(ins_lambda)...)
+ # sub_lambda = reshape(sub_lambda, 1, size(sub_lambda)...)
+ # del_lambda = reshape(del_lambda, 1, size(del_lambda)...)
+
+ ins_rate_log = log.(max.(ins_lambda .* ins_q, eltype(ins_lambda)(eps)))
+ sub_rate_log = log.(max.(sub_lambda .* sub_q, eltype(sub_lambda)(eps)))
+ del_rate_log = log.(max.(del_lambda, eltype(del_lambda)(eps)))
+
+ (ins_q_edits, sub_q_edits, del_lambda_edits) = edit_multiplier
+ loss_term_2 = sum(scl .* (ins_q_edits .* ins_rate_log), dims=(1,2))
+ + sum(scl .* (sub_q_edits .* sub_rate_log), dims=(1,2))
+ + sum(scl .* (del_lambda_edits .* del_rate_log), dims=(1,2))
+ return mean(loss_term_1 .- loss_term_2)
+end
+=#
+
+function edit_loss_positionwise_reparam(P::EditFlow,
+ M, transition_mask, edit_multiplier, scheduler_scaling; eps=1e-8)
+
+ (ins_q_mask, sub_q_mask, ins_lambda_mask, sub_lambda_mask, del_lambda_mask) = transition_mask
+ (ins_q_logits, sub_q_logits, ins_lambda, sub_lambda, del_lambda) = M
+
+ # q: post-softmax + mask (K,L,B)
+ ins_q = NNlib.softmax(ins_q_logits; dims=1) .* ins_q_mask
+ sub_q = NNlib.softmax(sub_q_logits; dims=1) .* sub_q_mask
+
+ # λ: transform -> (1,L,B)
+ ins_lambda = P.transform(ins_lambda); ndims(ins_lambda)==2 && (ins_lambda = reshape(ins_lambda, 1, size(ins_lambda,1), size(ins_lambda,2)))
+ sub_lambda = P.transform(sub_lambda); ndims(sub_lambda)==2 && (sub_lambda = reshape(sub_lambda, 1, size(sub_lambda,1), size(sub_lambda,2)))
+ del_lambda = P.transform(del_lambda); ndims(del_lambda)==2 && (del_lambda = reshape(del_lambda, 1, size(del_lambda,1), size(del_lambda,2)))
+
+ # term 1: sum of allowed exit rates (1,B)
+ sum_ins_q = sum(ins_q; dims=1) # (1,L,B)
+ sum_sub_q = sum(sub_q; dims=1) # (1,L,B)
+ loss_term_1 = sum(ins_lambda .* ins_lambda_mask .* sum_ins_q .+
+ sub_lambda .* sub_lambda_mask .* sum_sub_q .+
+ del_lambda .* del_lambda_mask; dims=(1,2))
+
+
+
+ # term 2: data term (1,B)
+ scl = reshape(scheduler_scaling, 1, 1, :)
+ (ins_q_edits, sub_q_edits, del_lambda_edits) = edit_multiplier
+ ins_rate_log = log.(max.(ins_lambda .* ins_q, eltype(ins_lambda)(eps)))
+ sub_rate_log = log.(max.(sub_lambda .* sub_q, eltype(sub_lambda)(eps)))
+ del_rate_log = log.(max.(del_lambda, eltype(del_lambda)(eps)))
+
+ loss_term_2 = sum(scl .* (ins_q_edits .* ins_rate_log); dims=(1,2)) +
+ sum(scl .* (sub_q_edits .* sub_rate_log); dims=(1,2)) +
+ sum(scl .* (del_lambda_edits .* del_rate_log); dims=(1,2))
+
+ return mean(loss_term_1 .- loss_term_2)
+end
+
+"""
+getlmask(P::EditFlow, Xt::AbstractMatrix{<:Integer})
+
+Build lmask of shape (xt_length, B) from padded Xt.
+"""
+function getlmask(P::EditFlow, Xt::AbstractMatrix{<:Integer})
+ padding_token = P.padding_token
+ return Xt .!= padding_token
+end
diff --git a/src/types.jl b/src/types.jl
index e3c30b1..e6a2126 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -52,6 +52,9 @@ UState = Union{State,MaskedState, Guide}
#This is for all Flow types where the mixture probabilities are directly defined, and the gen is done via probability velocities.
abstract type ConvexInterpolatingDiscreteFlow <: DiscreteProcess end #https://arxiv.org/pdf/2407.15595
+# Edit-based discrete processes (insert/substitute/delete)
+abstract type DiscreteIndelProcess <: DiscreteProcess end
+
struct InterpolatingDiscreteFlow <: ConvexInterpolatingDiscreteFlow
κ::Function
κ̇::Function