From 3593338c754f1903ca04a3e3a4d8bae9e9f5355f Mon Sep 17 00:00:00 2001
From: Ahmad <ahmadzafarani2013@gmail.com>
Date: Wed, 3 May 2023 01:00:49 +0430
Subject: [PATCH 1/4] using first paper codes

---
 Manifest.toml                          | 253 +++++++++++++++++--------
 Project.toml                           |   7 +-
 src/Backtests/MultipleTestReporting.jl |  32 ++++
 src/Backtests/TestSetOverfitting.jl    |  67 ++++---
 src/Features/Clustering.jl             |  96 +++++-----
 5 files changed, 297 insertions(+), 158 deletions(-)
 create mode 100644 src/Backtests/MultipleTestReporting.jl

diff --git a/Manifest.toml b/Manifest.toml
index 5f3f964..4986c41 100644
--- a/Manifest.toml
+++ b/Manifest.toml
@@ -3,8 +3,11 @@
 julia_version = "1.7.2"
 manifest_format = "2.0"
 
-[[deps.ArgTools]]
-uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
+[[deps.ArrayLayouts]]
+deps = ["FillArrays", "LinearAlgebra", "SparseArrays"]
+git-tree-sha1 = "4aff5fa660eb95c2e0deb6bcdabe4d9a96bc4667"
+uuid = "4c555306-a7a7-4459-81d9-ec55ddd5c99a"
+version = "0.8.18"
 
 [[deps.Artifacts]]
 uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
@@ -12,11 +15,41 @@ uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 [[deps.Base64]]
 uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
 
+[[deps.BlockArrays]]
+deps = ["ArrayLayouts", "FillArrays", "LinearAlgebra"]
+git-tree-sha1 = "3b15c61bcece7c426ea641d143c808ace3661973"
+uuid = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
+version = "0.16.25"
+
+[[deps.BlockDiagonals]]
+deps = ["ChainRulesCore", "FillArrays", "FiniteDifferences", "LinearAlgebra"]
+git-tree-sha1 = "ffd635c19b56f50d1d4278d876219644299b5711"
+uuid = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0"
+version = "0.1.41"
+
+[[deps.ChainRulesCore]]
+deps = ["Compat", "LinearAlgebra", "SparseArrays"]
+git-tree-sha1 = "c6d890a52d2c4d55d326439580c3b8d0875a77d9"
+uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+version = "1.15.7"
+
+[[deps.ChangesOfVariables]]
+deps = ["LinearAlgebra", "Test"]
+git-tree-sha1 = "f84967c4497e0e1955f9a582c232b02847c5f589"
+uuid = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0"
+version = "0.1.7"
+
+[[deps.Clustering]]
+deps = ["Distances", "LinearAlgebra", "NearestNeighbors", "Printf", "Random", "SparseArrays", "Statistics", "StatsBase"]
+git-tree-sha1 = "a3213fa9d35edf589d0c6303f95850f7641fe2dc"
+uuid = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5"
+version = "0.15.1"
+
 [[deps.Compat]]
-deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
-git-tree-sha1 = "78bee250c6826e1cf805a88b7f1e86025275d208"
+deps = ["Dates", "LinearAlgebra", "UUIDs"]
+git-tree-sha1 = "7a60c856b9fa189eb34f5f8a6f6b5529b7942957"
 uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
-version = "3.46.0"
+version = "4.6.1"
 
 [[deps.CompilerSupportLibraries_jll]]
 deps = ["Artifacts", "Libdl"]
@@ -28,15 +61,15 @@ uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f"
 version = "4.1.1"
 
 [[deps.DataAPI]]
-git-tree-sha1 = "fb5f5316dd3fd4c5e7c30a24d50643b73e37cd40"
+git-tree-sha1 = "e8119c1a33d267e16108be441a287a6981ba1630"
 uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
-version = "1.10.0"
+version = "1.14.0"
 
 [[deps.DataFrames]]
-deps = ["Compat", "DataAPI", "Future", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrettyTables", "Printf", "REPL", "Reexport", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"]
-git-tree-sha1 = "daa21eb85147f72e41f6352a57fccea377e310a9"
+deps = ["Compat", "DataAPI", "Future", "InlineStrings", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrettyTables", "Printf", "REPL", "Random", "Reexport", "SentinelArrays", "SnoopPrecompile", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"]
+git-tree-sha1 = "aa51303df86f8626a962fccb878430cdb0a97eee"
 uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
-version = "1.3.4"
+version = "1.5.0"
 
 [[deps.DataStructures]]
 deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
@@ -57,19 +90,29 @@ uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
 deps = ["Mmap"]
 uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
 
-[[deps.Distributed]]
-deps = ["Random", "Serialization", "Sockets"]
-uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+[[deps.Distances]]
+deps = ["LinearAlgebra", "SparseArrays", "Statistics", "StatsAPI"]
+git-tree-sha1 = "49eba9ad9f7ead780bfb7ee319f962c811c6d3b2"
+uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
+version = "0.10.8"
 
 [[deps.DocStringExtensions]]
 deps = ["LibGit2"]
-git-tree-sha1 = "b19534d1895d702889b219c382a6e18010797f0b"
+git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d"
 uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
-version = "0.8.6"
+version = "0.9.3"
+
+[[deps.FillArrays]]
+deps = ["LinearAlgebra", "Random", "SparseArrays", "Statistics"]
+git-tree-sha1 = "7072f1e3e5a8be51d525d64f63d3ec1287ff2790"
+uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
+version = "0.13.11"
 
-[[deps.Downloads]]
-deps = ["ArgTools", "LibCURL", "NetworkOptions"]
-uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
+[[deps.FiniteDifferences]]
+deps = ["ChainRulesCore", "LinearAlgebra", "Printf", "Random", "Richardson", "SparseArrays", "StaticArrays"]
+git-tree-sha1 = "3f605dd6db5640c5278f2551afc9427656439f42"
+uuid = "26cc04aa-876d-5657-8c51-4c34ba976000"
+version = "0.12.26"
 
 [[deps.Formatting]]
 deps = ["Printf"]
@@ -81,36 +124,46 @@ version = "0.4.2"
 deps = ["Random"]
 uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
 
+[[deps.InlineStrings]]
+deps = ["Parsers"]
+git-tree-sha1 = "9cc2baf75c6d09f9da536ddf58eb2f29dedaf461"
+uuid = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48"
+version = "1.4.0"
+
 [[deps.InteractiveUtils]]
 deps = ["Markdown"]
 uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 
+[[deps.InverseFunctions]]
+deps = ["Test"]
+git-tree-sha1 = "6667aadd1cdee2c6cd068128b3d226ebc4fb0c67"
+uuid = "3587e190-3f89-42d0-90ee-14403ec27112"
+version = "0.1.9"
+
 [[deps.InvertedIndices]]
-git-tree-sha1 = "bee5f1ef5bf65df56bdd2e40447590b272a5471f"
+git-tree-sha1 = "0dc7b50b8d436461be01300fd8cd45aa0274b038"
 uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
-version = "1.1.0"
+version = "1.3.0"
+
+[[deps.IrrationalConstants]]
+git-tree-sha1 = "630b497eafcc20001bba38a4651b327dcfc491d2"
+uuid = "92d709cd-6900-40b7-9082-c6be49f344b6"
+version = "0.2.2"
 
 [[deps.IteratorInterfaceExtensions]]
 git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856"
 uuid = "82899510-4779-5014-852e-03e436cf321d"
 version = "1.0.0"
 
-[[deps.LibCURL]]
-deps = ["LibCURL_jll", "MozillaCACerts_jll"]
-uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
-
-[[deps.LibCURL_jll]]
-deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
-uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
+[[deps.LaTeXStrings]]
+git-tree-sha1 = "f2355693d6778a178ade15952b7ac47a4ff97996"
+uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
+version = "1.3.0"
 
 [[deps.LibGit2]]
 deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
 uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
 
-[[deps.LibSSH2_jll]]
-deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
-uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
-
 [[deps.Libdl]]
 uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 
@@ -118,6 +171,12 @@ uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 deps = ["Libdl", "libblastrampoline_jll"]
 uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 
+[[deps.LogExpFunctions]]
+deps = ["ChainRulesCore", "ChangesOfVariables", "DocStringExtensions", "InverseFunctions", "IrrationalConstants", "LinearAlgebra"]
+git-tree-sha1 = "0a1b7c2863e44523180fdb3146534e265a91870b"
+uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
+version = "0.3.23"
+
 [[deps.Logging]]
 uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
 
@@ -125,21 +184,20 @@ uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
 deps = ["Base64"]
 uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
 
-[[deps.MbedTLS_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
-
 [[deps.Missings]]
 deps = ["DataAPI"]
-git-tree-sha1 = "bf210ce90b6c9eed32d25dbcae1ebc565df2687f"
+git-tree-sha1 = "f66bdc5de519e8f8ae43bdc598782d35a25b1272"
 uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
-version = "1.0.2"
+version = "1.1.0"
 
 [[deps.Mmap]]
 uuid = "a63ad114-7e13-5084-954f-fe012c677804"
 
-[[deps.MozillaCACerts_jll]]
-uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
+[[deps.NearestNeighbors]]
+deps = ["Distances", "StaticArrays"]
+git-tree-sha1 = "2c3726ceb3388917602169bed973dbc97f1b51a8"
+uuid = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
+version = "0.4.13"
 
 [[deps.NetworkOptions]]
 uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
@@ -149,13 +207,15 @@ deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
 uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
 
 [[deps.OrderedCollections]]
-git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
+git-tree-sha1 = "d321bf2de576bf25ec4d3e4360faca399afca282"
 uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
-version = "1.4.1"
+version = "1.6.0"
 
-[[deps.Pkg]]
-deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
-uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+[[deps.Parsers]]
+deps = ["Dates", "SnoopPrecompile"]
+git-tree-sha1 = "478ac6c952fddd4399e71d4779797c538d0ff2bf"
+uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
+version = "2.5.8"
 
 [[deps.PooledArrays]]
 deps = ["DataAPI", "Future"]
@@ -163,11 +223,23 @@ git-tree-sha1 = "a6062fe4063cdafe78f4a0a81cfffb89721b30e7"
 uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
 version = "1.4.2"
 
+[[deps.PrecompileTools]]
+deps = ["Preferences"]
+git-tree-sha1 = "2e47054ffe7d0a8872e977c0d09eb4b3d162ebde"
+uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
+version = "1.0.2"
+
+[[deps.Preferences]]
+deps = ["TOML"]
+git-tree-sha1 = "47e5f437cc0e7ef2ce8406ce1e7e24d44915f88d"
+uuid = "21216c6a-2e73-6563-6e65-726566657250"
+version = "1.3.0"
+
 [[deps.PrettyTables]]
-deps = ["Crayons", "Formatting", "Markdown", "Reexport", "Tables"]
-git-tree-sha1 = "dfb54c4e414caa595a1f2ed759b160f5a3ddcba5"
+deps = ["Crayons", "Formatting", "LaTeXStrings", "Markdown", "Reexport", "StringManipulation", "Tables"]
+git-tree-sha1 = "548793c7859e28ef026dba514752275ee871169f"
 uuid = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
-version = "1.3.1"
+version = "2.2.3"
 
 [[deps.Printf]]
 deps = ["Unicode"]
@@ -182,42 +254,91 @@ deps = ["SHA", "Serialization"]
 uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 
 [[deps.RecipesBase]]
-git-tree-sha1 = "6bf3f380ff52ce0832ddd3a2a7b9538ed1bcca7d"
+deps = ["PrecompileTools"]
+git-tree-sha1 = "5c3d09cc4f31f5fc6af001c250bf1278733100ff"
 uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
-version = "1.2.1"
+version = "1.3.4"
 
 [[deps.Reexport]]
 git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b"
 uuid = "189a3867-3050-52da-a836-e630ba90ab69"
 version = "1.2.2"
 
+[[deps.Richardson]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "e03ca566bec93f8a3aeb059c8ef102f268a38949"
+uuid = "708f8203-808e-40c0-ba2d-98a6953ed40d"
+version = "1.4.0"
+
 [[deps.SHA]]
 uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
 
+[[deps.SentinelArrays]]
+deps = ["Dates", "Random"]
+git-tree-sha1 = "77d3c4726515dca71f6d80fbb5e251088defe305"
+uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c"
+version = "1.3.18"
+
 [[deps.Serialization]]
 uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 
-[[deps.SharedArrays]]
-deps = ["Distributed", "Mmap", "Random", "Serialization"]
-uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
+[[deps.Shuffle]]
+deps = ["Random"]
+git-tree-sha1 = "b812fb30d6d8b295b71dd5a4102d1ae7b60698e3"
+uuid = "bf21e494-c40e-4daa-abfb-de5ec0aad010"
+version = "0.1.1"
+
+[[deps.SnoopPrecompile]]
+deps = ["Preferences"]
+git-tree-sha1 = "e760a70afdcd461cf01a575947738d359234665c"
+uuid = "66db9d55-30c0-4569-8b51-7e840670fc0c"
+version = "1.0.3"
 
 [[deps.Sockets]]
 uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
 
 [[deps.SortingAlgorithms]]
 deps = ["DataStructures"]
-git-tree-sha1 = "b3363d7460f7d098ca0912c69b082f75625d7508"
+git-tree-sha1 = "a4ada03f999bd01b3a25dcaa30b2d929fe537e00"
 uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
-version = "1.0.1"
+version = "1.1.0"
 
 [[deps.SparseArrays]]
 deps = ["LinearAlgebra", "Random"]
 uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
+[[deps.StaticArrays]]
+deps = ["LinearAlgebra", "Random", "StaticArraysCore", "Statistics"]
+git-tree-sha1 = "c262c8e978048c2b095be1672c9bee55b4619521"
+uuid = "90137ffa-7385-5640-81b9-e52037218182"
+version = "1.5.24"
+
+[[deps.StaticArraysCore]]
+git-tree-sha1 = "6b7ba252635a5eff6a0b0664a41ee140a1c9e72a"
+uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
+version = "1.4.0"
+
 [[deps.Statistics]]
 deps = ["LinearAlgebra", "SparseArrays"]
 uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
+[[deps.StatsAPI]]
+deps = ["LinearAlgebra"]
+git-tree-sha1 = "45a7769a04a3cf80da1c1c7c60caf932e6f4c9f7"
+uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
+version = "1.6.0"
+
+[[deps.StatsBase]]
+deps = ["DataAPI", "DataStructures", "LinearAlgebra", "LogExpFunctions", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"]
+git-tree-sha1 = "d1bf48bfcc554a3761a133fe3a9bb01488e06916"
+uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+version = "0.33.21"
+
+[[deps.StringManipulation]]
+git-tree-sha1 = "46da2434b41f41ac3594ee9816ce5541c6096123"
+uuid = "892a3eda-7b42-436c-8928-eab12a02cf0e"
+version = "0.3.0"
+
 [[deps.TOML]]
 deps = ["Dates"]
 uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
@@ -230,13 +351,9 @@ version = "1.0.1"
 
 [[deps.Tables]]
 deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "OrderedCollections", "TableTraits", "Test"]
-git-tree-sha1 = "5ce79ce186cc678bbb5c5681ca3379d1ddae11a1"
+git-tree-sha1 = "1544b926975372da01227b382066ab70e574a3ec"
 uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
-version = "1.7.0"
-
-[[deps.Tar]]
-deps = ["ArgTools", "SHA"]
-uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
+version = "1.10.1"
 
 [[deps.Test]]
 deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
@@ -244,9 +361,9 @@ uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [[deps.TimeSeries]]
 deps = ["Dates", "DelimitedFiles", "DocStringExtensions", "RecipesBase", "Reexport", "Statistics", "Tables"]
-git-tree-sha1 = "3c91141a9f2276c37c3b6bc2bd83e652d50fecbc"
+git-tree-sha1 = "3dd965ee9ce5e1857172cffa6d8985cd8b299585"
 uuid = "9e3dc215-6440-5c97-bce1-76c03772f85e"
-version = "0.23.0"
+version = "0.23.1"
 
 [[deps.UUIDs]]
 deps = ["Random", "SHA"]
@@ -255,18 +372,6 @@ uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 [[deps.Unicode]]
 uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
 
-[[deps.Zlib_jll]]
-deps = ["Libdl"]
-uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
-
 [[deps.libblastrampoline_jll]]
 deps = ["Artifacts", "Libdl", "OpenBLAS_jll"]
 uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
-
-[[deps.nghttp2_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
-
-[[deps.p7zip_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
diff --git a/Project.toml b/Project.toml
index dc73a1c..09a7cd8 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,14 +4,17 @@ authors = ["RiskLab AI <research@risklab.ai>"]
 version = "0.0.1"
 
 [deps]
+BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
+BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0"
+Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Shuffle = "bf21e494-c40e-4daa-abfb-de5ec0aad010"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 TimeSeries = "9e3dc215-6440-5c97-bce1-76c03772f85e"
 
 [compat]
-julia = ">= 1.3"
 DataFrames = ">= 1.0"
 TimeSeries = ">= 0.13.0"
-
+julia = ">= 1.3"
diff --git a/src/Backtests/MultipleTestReporting.jl b/src/Backtests/MultipleTestReporting.jl
new file mode 100644
index 0000000..db552f9
--- /dev/null
+++ b/src/Backtests/MultipleTestReporting.jl
@@ -0,0 +1,32 @@
+"""
+List of Collaborators, Developers, and Research Assistants (in alphabetical order)
+Ahmad Zaferani
+"""
+
+using DataFrames
+include("../features/Clustering.jl")
+include("TestSetOverfitting.jl")
+
+#---------------------------------------------------
+"""
+function: a Template for Reporting Results of Backtest on Financial Strategies
+refernce: Fabozzi, F, De Prado, M (2018) Being Honest in Backtest Reporting: A Template for Disclosing Multiple Tests
+doi: https://doi.org/10.3905/jpm.2018.45.1.141
+"""
+function BacktestResultTemplate(nTrials::Int64, # number of trials
+    familySize::Union{UInt64,Nothing}, #  number of significantly different experiments
+    correlation::Union{DataFrame,Nothing}, # corr dataframe
+    familyWiseErrorRate::Union{Float64,Nothing}, # family-wise error rate
+    powerOfTest::Union{Float64,Nothing}, # power of the test
+)::Nothing
+    if familySize === nothing
+        println("familySize not provided; using clustering on correlation matrix of backtest returns...")
+        @assert correlation !== nothing "correlation must be provided"
+        correlationNew, clusters, silh = clusterKMeansTop(correlation)
+        familySize = length(clusters)
+        println("calculated familySize: ", familySize)
+    else
+        @assert nTrials >= familySize "familySize must be less equal to nTrials"
+    end
+
+end
diff --git a/src/Backtests/TestSetOverfitting.jl b/src/Backtests/TestSetOverfitting.jl
index a0f8499..3e7300c 100644
--- a/src/Backtests/TestSetOverfitting.jl
+++ b/src/Backtests/TestSetOverfitting.jl
@@ -15,22 +15,22 @@ refernce: De Prado, M (2020) Machine Learning for Asset Managers
 methodology: page 110, snippet 8.1
 """
 function expectedMaxSharpeRatio(nTrials, # number of trials
-                                meanSharpeRatio, # mean Sharpe Ratio
-                                stdSharpeRatio) # standard deviation of Sharpe Ratios
+    meanSharpeRatio, # mean Sharpe Ratio
+    stdSharpeRatio) # standard deviation of Sharpe Ratios
 
     emc = MathConstants.eulergamma # euler gamma constant
 
-    sharpeRatio = (1 - emc) * quantile(Normal(0, 1), 1 - 1 / nTrials) + emc * quantile(Normal(0, 1) , 1 - 1 / (nTrials * MathConstants.e)) # get expected value of sharpe ratio by using false strategy theorem
+    sharpeRatio = (1 - emc) * quantile(Normal(0, 1), 1 - 1 / nTrials) + emc * quantile(Normal(0, 1), 1 - 1 / (nTrials * MathConstants.e)) # get expected value of sharpe ratio by using false strategy theorem
     sharpeRatio = meanSharpeRatio + stdSharpeRatio * sharpeRatio # get max Sharpe Ratio, controlling for SBuMT
-    
+
     return sharpeRatio
 end
 
 #---------------------------------------------------
 function generatedMaxSharpeRatio(nSims, # number of simulations 
-                            nTrials, # number of trials
-                            stdSharpeRatio, # mean Sharpe Ratio
-                            meanSharpeRatio) # standard deviation of Sharpe Ratios
+    nTrials, # number of trials
+    stdSharpeRatio, # mean Sharpe Ratio
+    meanSharpeRatio) # standard deviation of Sharpe Ratios
 
     rng = MersenneTwister(1234) # create random number generator
     out = DataFrame() # initialize output
@@ -39,11 +39,11 @@ function generatedMaxSharpeRatio(nSims, # number of simulations
     for nTrials_ in nTrials
         #1) Simulated Sharpe ratios
         sharpeRatio = randn(rng, (Int64(nSims), Int64(nTrials_))) # generate random numbers for Sharpe Ratios
-        sharpeRatio = (sharpeRatio .- mean(sharpeRatio, dims = 2)) ./std(sharpeRatio, dims = 2) # standardize Sharpe Ratios 
+        sharpeRatio = (sharpeRatio .- mean(sharpeRatio, dims=2)) ./ std(sharpeRatio, dims=2) # standardize Sharpe Ratios 
         sharpeRatio = meanSharpeRatio .+ sharpeRatio .* stdSharpeRatio # set the mean and standard deviation
-        
+
         #2) Store output
-        output = DataFrame(maxSharpeRatio = vec(maximum(sharpeRatio, dims = 2)), nTrials = nTrials_) # generate output
+        output = DataFrame(maxSharpeRatio=vec(maximum(sharpeRatio, dims=2)), nTrials=nTrials_) # generate output
         append!(out, output) # append output
     end
     return out
@@ -56,13 +56,13 @@ refernce: De Prado, M (2020) Machine Learning for Asset Managers
 methodology: page 112, snippet 8.2
 """
 function meanAndStdError(nSims0, # number of max{SR} used to estimate E[max{SR}]
-                         nSims1, # number of errors on which std is computed
-                         nTrials, # array of numbers of SR used to derive max{SR}
-                         stdSharpeRatio, # mean Sharpe Ratio
-                         meanSharpeRatio) # standard deviation of Sharpe Ratios
+    nSims1, # number of errors on which std is computed
+    nTrials, # array of numbers of SR used to derive max{SR}
+    stdSharpeRatio, # mean Sharpe Ratio
+    meanSharpeRatio) # standard deviation of Sharpe Ratios
 
     # Compute standard deviation of errors per nTrial
-    sharpeRatio0 = DataFrame(nT = nTrials, ExpectedMaxSR = [expectedMaxSharpeRatio(i, meanSharpeRatio, stdSharpeRatio) for i in nTrials]) # compute expected max Sharpe Ratios
+    sharpeRatio0 = DataFrame(nT=nTrials, ExpectedMaxSR=[expectedMaxSharpeRatio(i, meanSharpeRatio, stdSharpeRatio) for i in nTrials]) # compute expected max Sharpe Ratios
     error = DataFrame() # initialize errors
     out = DataFrame() # initialize output
 
@@ -73,7 +73,7 @@ function meanAndStdError(nSims0, # number of max{SR} used to estimate E[max{SR}]
         error_[!, :ExpectedMaxSR] = sharpeRatio0.ExpectedMaxSR # add expected max Sharpe Ratios
         error_[!, :err] = error_.maxSharpeRatio ./ error_.ExpectedMaxSR .- 1 # calculate errors
         append!(error, error_) # append errors
-    end    
+    end
 
     out[!, :meanErr] = combine(groupby(error, :nTrials), :err => mean; renamecols=false).err # calculate mean errors
     out[!, :nTrials] = combine(groupby(error, :nTrials), :err => mean; renamecols=false).nTrials # add number of trials
@@ -89,23 +89,23 @@ refernce: De Prado, M (2020) Machine Learning for Asset Managers
 methodology: page 119, snippet 8.3
 """
 function estimatedSharpeRatioZStatistics(sharpeRatio, # estimated Sharpe Ratio
-               t, # number of observations
-               sharpeRatio_ = 0, # true Sharpe Ratio
-               skew = 0, # skewness of returns
-               kurt = 3) # kurtosis of returns
+    t, # number of observations
+    sharpeRatio_=0, # true Sharpe Ratio
+    skew=0, # skewness of returns
+    kurt=3) # kurtosis of returns
 
-    z = (sharpeRatio - sharpeRatio_)*(t - 1)^0.5 # calculate first part of z statistic
-    z /= (1 - skew*sr + (kurt - 1) / 4*sr^2)^0.5 # calculate z statistic
+    z = (sharpeRatio - sharpeRatio_) * (t - 1)^0.5 # calculate first part of z statistic
+    z /= (1 - skew * sr + (kurt - 1) / 4 * sr^2)^0.5 # calculate z statistic
 
     return z
 end
 
 #---------------------------------------------------
 function strategyType1ErrorProbability(z, # z statistic for the estimated Sharpe Ratios
-                    k = 1) # number of tests
+    k=1) # number of tests
 
     α = cdf(Normal(0, 1), -z) # find false positive rate
-    α_k = 1 - (1 - α) ^ k # correct for multi-testing 
+    α_k = 1 - (1 - α)^k # correct for multi-testing 
 
     return α_k
 end
@@ -118,25 +118,24 @@ refernce: De Prado, M (2020) Machine Learning for Asset Managers
 methodology: page 121, snippet 8.4
 """
 function thetaForType2Error(sharpeRatio, # estimated Sharpe Ratio
-               t, # number of observations
-               sharpeRatio_ = 0, # true Sharpe Ratio
-               skew = 0, # skewness of returns
-               kurt = 3) # kurtosis of returns
+    t, # number of observations
+    sharpeRatio_=0, # true Sharpe Ratio
+    skew=0, # skewness of returns
+    kurt=3) # kurtosis of returns
 
-    θ = sharpeRatio_*(t - 1)^0.5 # calculate first part of theta
-    θ /= (1 - skew*sharpeRatio + (kurt - 1) / 4*sharpeRatio^2)^0.5 # calculate theta
+    θ = sharpeRatio_ * (t - 1)^0.5 # calculate first part of theta
+    θ /= (1 - skew * sharpeRatio + (kurt - 1) / 4 * sharpeRatio^2)^0.5 # calculate theta
 
     return θ
 end
 
 #---------------------------------------------------
 function strategyType2ErrorProbability(α, # type I error
-                    k, # number of tests
-                    θ) # calculated theta parameter
+    k, # number of tests
+    θ) # calculated theta parameter
 
-    z = quantile(Normal(0, 1),(1 - α) ^ (1 / k)) # perform Sidak’s correction
+    z = quantile(Normal(0, 1), (1 - α)^(1 / k)) # perform Sidak’s correction
     β = cdf(Normal(0, 1), z - θ) # calculate false negative rate
 
     return β
 end
-
diff --git a/src/Features/Clustering.jl b/src/Features/Clustering.jl
index 682442a..c9b8242 100644
--- a/src/Features/Clustering.jl
+++ b/src/Features/Clustering.jl
@@ -1,4 +1,3 @@
-#using KernelEstimator
 using Distributions
 using LinearAlgebra
 using DataFrames
@@ -12,6 +11,7 @@ using MarketData
 using CSV
 using StatsBase
 using BlockDiagonals
+using PyCall
 @pyimport sklearn.metrics as Metrics
 
 """----------------------------------------------------------------------
@@ -23,10 +23,10 @@ function percentChange(prices::DataFrames.DataFrame)
     returns = DataFrames.DataFrame() # empty dataframe of returns
     for sym in names(prices)[2:end]
         data = prices[!, Symbol(sym)] # prices of each name
-        ret = Array{Float64}(undef,length(data)) # returns of each name
+        ret = Array{Float64}(undef, length(data)) # returns of each name
         ret[1] = NaN
         for i in 2:length(data)
-            ret[i] = (data[i]/data[i-1]) - 1 # calculate returns of each name
+            ret[i] = (data[i] / data[i-1]) - 1 # calculate returns of each name
         end
         returns[!, Symbol(sym)] = ret # append returns into dataframe
     end
@@ -38,17 +38,17 @@ function: Clustering
 reference: De Prado, M. (2020) Advances in financial machine learning. John Wiley & Sons.
 methodology: Snipet 4.1, Page 56
 ----------------------------------------------------------------------"""
-function clusterKMeansBase(correlation; 
-                           numberClusters = 10, 
-                           iterations = 10)
-    distance = sqrt.((1 .- correlation)/2) # distance matrix
+function clusterKMeansBase(correlation;
+    numberClusters=10,
+    iterations=10)
+    distance = sqrt.((1 .- correlation) / 2) # distance matrix
     silh, kmeansOut = [NaN], [NaN] # initial value for silh, kmeans
     for init ∈ 1:iterations
         for i ∈ 2:numberClusters
             kmeans_ = kmeans(distance, i) # clustering distance with maximum cluster i
             silh_ = Metrics.silhouette_samples(distance, assignments(kmeans_)) # silh score of clustering
-            statistic = (mean(silh_)/std(silh_), mean(silh)/std(silh)) # calculate t-statistic
-            if isnan(statistic[2]) || statistic[1]>statistic[2]
+            statistic = (mean(silh_) / std(silh_), mean(silh) / std(silh)) # calculate t-statistic
+            if isnan(statistic[2]) || statistic[1] > statistic[2]
                 silh, kmeansOut = silh_, kmeans_ # replace better clustering
             end
         end
@@ -56,8 +56,8 @@ function clusterKMeansBase(correlation;
     indexSorted = sortperm(assignments(kmeansOut)) # sort arguments based on clustering
     correlationSorted = correlation[indexSorted, indexSorted] # new corr matrix based on clustering
     # dictionary of clustering
-    clusters = Dict("$i"=> filter(p->assignments(kmeansOut)[p] == i, indexSorted) for i in unique(assignments(kmeansOut)))
-    silh = DataFrames.DataFrame(silh = silh) # dataframe of silh scores
+    clusters = Dict("$i" => filter(p -> assignments(kmeansOut)[p] == i, indexSorted) for i in unique(assignments(kmeansOut)))
+    silh = DataFrames.DataFrame(silh=silh) # dataframe of silh scores
     return correlationSorted, clusters, silh, indexSorted
 end
 
@@ -67,27 +67,27 @@ reference: De Prado, M. (2020) Advances in financial machine learning. John Wile
 methodology: Snipet 4.2, Page 58
 ----------------------------------------------------------------------"""
 function makeNewOutputs(correlation, # corr dataframe
-                        clusters,   # cluster 1
-                        clusters2)  # cluster 2
+    clusters,   # cluster 1
+    clusters2)  # cluster 2
     assets = names(correlation)# name of the columns of corr dataframe 
     # merge two clusters
     clustersNew = Dict()
     for i in keys(clusters)
-        clustersNew[length(keys(clustersNew)) + 1] = clusters[i] 
+        clustersNew[length(keys(clustersNew))+1] = clusters[i]
     end
     for i in keys(clusters2)
-        clustersNew[length(keys(clustersNew)) + 1] = clusters2[i]
+        clustersNew[length(keys(clustersNew))+1] = clusters2[i]
     end
     indexNew = [j for i in keys(clustersNew) for j in clustersNew[i]] # sorted index of assets
     correlationNew = correlation[indexin(indexNew, assets), indexin(indexNew, assets)] # new corr matrix
-    distance = sqrt.((1 .- Matrix(correlation))/2) # distance matrix
+    distance = sqrt.((1 .- Matrix(correlation)) / 2) # distance matrix
     labelsKmeans = zeros(size(distance)[2]) # initial labels
     for i in keys(clustersNew)
-        index = indexin(clustersNew[i], assets) 
+        index = indexin(clustersNew[i], assets)
         labelsKmeans[index] .= i # label for clusters
     end
-    silhNew = DataFrames.DataFrame(index = assets, silh = Metrics.silhouette_samples(distance, labelsKmeans)) # silh series
-    return correlationNew,clustersNew,silhNew
+    silhNew = DataFrames.DataFrame(index=assets, silh=Metrics.silhouette_samples(distance, labelsKmeans)) # silh series
+    return correlationNew, clustersNew, silhNew
 end
 
 """----------------------------------------------------------------------
@@ -95,22 +95,22 @@ function: clustering (ONC)
 reference: De Prado, M. (2020) Advances in financial machine learning. John Wiley & Sons.
 methodology: Snipet 4.2, Page 58
 ----------------------------------------------------------------------"""
-function clusterKMeansTop(correlation; # corr dataframe  
-                          numberClusters = nothing, # number of clusters
-                          iterations = 10) # number of iterations
+function clusterKMeansTop(correlation, # corr dataframe
+    numberClusters=nothing, # number of clusters
+    iterations=10) # number of iterations
     if isnothing(numberClusters)
         numberClusters = size(correlation)[2] - 1 # set number of cluster
     end
     assets = names(correlation) # names of columns
     # clustering
-    correlationSorted, clusters, silh, indexSorted = clusterKMeansBase(Matrix(correlation), numberClusters = min(numberClusters, size(correlation)[2] - 1), iterations = 10)
+    correlationSorted, clusters, silh, indexSorted = clusterKMeansBase(Matrix(correlation), numberClusters=min(numberClusters, size(correlation)[2] - 1), iterations=10)
     correlationSorted = DataFrames.DataFrame(correlationSorted, :auto) # dataframe of correlationSorted
     DataFrames.rename!(correlationSorted, Symbol.(names(correlationSorted)) .=> assets[indexSorted]) # rename columns of the dataframe of correlationSorted
     clusters = Dict("$i" => assets[clusters[i]] for i in keys(clusters)) # dictionary of clusters
     # calcultae t-statistic of each cluster
-    clusterTstats = Dict("$i" => mean(silh[indexin(clusters[i], assets), :silh])/std(silh[indexin(clusters[i], assets), :silh]) for i in keys(clusters))
-    tStatMean = sum(values(clusterTstats))/length(clusterTstats) # mean of t-statistics
-    redoClusters = [i for i in keys(clusterTstats) if clusterTstats[i]<tStatMean] # select clusters which have t-stat lower than mean
+    clusterTstats = Dict("$i" => mean(silh[indexin(clusters[i], assets), :silh]) / std(silh[indexin(clusters[i], assets), :silh]) for i in keys(clusters))
+    tStatMean = sum(values(clusterTstats)) / length(clusterTstats) # mean of t-statistics
+    redoClusters = [i for i in keys(clusterTstats) if clusterTstats[i] < tStatMean] # select clusters which have t-stat lower than mean
     if length(redoClusters) <= 1
         return correlationSorted, clusters, silh
     else
@@ -119,12 +119,12 @@ function clusterKMeansTop(correlation; # corr dataframe
         assets_ = names(correlationTemp) # names of dataframe
         tStatMean = mean([clusterTstats[i] for i in redoClusters]) # mean of t-stats redoclusters
         # call again clusterKMeansTop
-        correlationSorted2, clusters2, silh2 = clusterKMeansTop(correlationTemp, numberClusters = min(numberClusters, size(correlationTemp)[2] - 1), iterations = iterations)
+        correlationSorted2, clusters2, silh2 = clusterKMeansTop(correlationTemp, numberClusters=min(numberClusters, size(correlationTemp)[2] - 1), iterations=iterations)
         # Make new outputs, if necessary
         correlationNew, clustersNew, silhNew = makeNewOutputs(correlation, Dict("$i" => clusters[i] for i in keys(clusters) if i ∉ redoClusters), clusters2)
         # mean of t-stats new output
-        newTstatMean = mean([mean(silhNew[indexin(clustersNew[i], silhNew.index), :silh])/
-                             std(silhNew[indexin(clustersNew[i], silhNew.index), :silh]) 
+        newTstatMean = mean([mean(silhNew[indexin(clustersNew[i], silhNew.index), :silh]) /
+                             std(silhNew[indexin(clustersNew[i], silhNew.index), :silh])
                              for i in keys(clustersNew)])
         if newTstatMean <= tStatMean
             return correlationSorted, clusters, silh
@@ -140,9 +140,9 @@ reference: De Prado, M. (2020) Advances in financial machine learning. John Wile
 methodology: Snipet 4.3, Page 61
 ----------------------------------------------------------------------"""
 function randomCovarianceSub(numberObservations, # number of observations
-                             numberColumns, # number of cols
-                             σ, # sigma for normal distribution
-                             domain) # range for rand
+    numberColumns, # number of cols
+    σ, # sigma for normal distribution
+    domain) # range for rand
     # Sub correlation matrix
     if numberColumns == 1
         return ones(1, 1)
@@ -160,17 +160,17 @@ reference: De Prado, M. (2020) Advances in financial machine learning. John Wile
 methodology: Snipet 4.3, Page 61
 ----------------------------------------------------------------------"""
 function randomBlockCovariance(numberColumns, # number of cols
-                               numberBlocks; # number of blocks
-                               blockSizeMin = 1, # minimum size of block
-                               σ = 1., # sigma for normal distribution
-                               domain = nothing) # range for rand
+    numberBlocks; # number of blocks
+    blockSizeMin=1, # minimum size of block
+    σ=1.0, # sigma for normal distribution
+    domain=nothing) # range for rand
     # Generate a block random correlation matrix
-    parts = sort(StatsBase.sample(domain,  1:numberColumns - (blockSizeMin - 1)*numberBlocks - 1, numberBlocks - 1, replace = false))
-    append!(parts, numberColumns - (blockSizeMin - 1)*numberBlocks)
-    parts = append!([parts[1]], diff(parts)) .-1 .+ blockSizeMin
+    parts = sort(StatsBase.sample(domain, 1:numberColumns-(blockSizeMin-1)*numberBlocks-1, numberBlocks - 1, replace=false))
+    append!(parts, numberColumns - (blockSizeMin - 1) * numberBlocks)
+    parts = append!([parts[1]], diff(parts)) .- 1 .+ blockSizeMin
     covariance = nothing
     for column in parts
-        thisCovariance = randomCovarianceSub(Int(max(column*(column + 1)/2., 100)), column, σ, domain) # sub covariance
+        thisCovariance = randomCovarianceSub(Int(max(column * (column + 1) / 2.0, 100)), column, σ, domain) # sub covariance
         if isnothing(covariance)
             covariance = copy(thisCovariance) #copy covariance
         else
@@ -186,14 +186,14 @@ reference: De Prado, M. (2020) Advances in financial machine learning. John Wile
 methodology: Snipet 4.3, Page 61
 ----------------------------------------------------------------------"""
 function randomBlockCorrelation(numberColumns,  # number of cols
-                                numberBlocks;  # number of blocks
-                                randomState = nothing,  # for rand data
-                                blockSizeMin = 1) # minimum size of block
+    numberBlocks;  # number of blocks
+    randomState=nothing,  # for rand data
+    blockSizeMin=1) # minimum size of block
     # set seed
     domain = MersenneTwister(randomState)
     # generate 2 random block diagram cov matrix
-    covariance1 = randomBlockCovariance(numberColumns, numberBlocks, blockSizeMin = blockSizeMin, σ = .5, domain = domain)
-    covariance2 = randomBlockCovariance(numberColumns, 1, blockSizeMin = blockSizeMin, σ = 1., domain = domain) # add noise
+    covariance1 = randomBlockCovariance(numberColumns, numberBlocks, blockSizeMin=blockSizeMin, σ=0.5, domain=domain)
+    covariance2 = randomBlockCovariance(numberColumns, 1, blockSizeMin=blockSizeMin, σ=1.0, domain=domain) # add noise
     covariance1 += covariance2 # add 2 cov matrix
     correlation = covToCorr(covariance1) # corr matrix
     correlation = DataFrames.DataFrame(correlation, :auto) # dataframe of corr matrix
@@ -207,8 +207,8 @@ end
 ----------------------------------------------------------------------"""
 function covToCorr(covariance) # covariance matrix
     std = sqrt.((diag(covariance))) # standard deviations
-    correlation = covariance./(std.*std') # create correlation matrix
-    correlation[correlation .< -1] .= -1 # numerical error
-    correlation[correlation .> 1] .= 1 # numerical error
+    correlation = covariance ./ (std .* std') # create correlation matrix
+    correlation[correlation.<-1] .= -1 # numerical error
+    correlation[correlation.>1] .= 1 # numerical error
     return correlation
 end

From 9a37d3ad2c7128fe8e38c75bed6ef91239fc5aa8 Mon Sep 17 00:00:00 2001
From: Ahmad <ahmadzafarani2013@gmail.com>
Date: Fri, 5 May 2023 11:28:31 +0430
Subject: [PATCH 2/4] second reference paper

---
 src/Backtests/MultipleTestReporting.jl | 28 ++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/src/Backtests/MultipleTestReporting.jl b/src/Backtests/MultipleTestReporting.jl
index db552f9..d55b517 100644
--- a/src/Backtests/MultipleTestReporting.jl
+++ b/src/Backtests/MultipleTestReporting.jl
@@ -15,18 +15,38 @@ doi: https://doi.org/10.3905/jpm.2018.45.1.141
 """
 function BacktestResultTemplate(nTrials::Int64, # number of trials
     familySize::Union{UInt64,Nothing}, #  number of significantly different experiments
-    correlation::Union{DataFrame,Nothing}, # corr dataframe
-    familyWiseErrorRate::Union{Float64,Nothing}, # family-wise error rate
+    clusteringArgs::Union{Array{DataFrame,UInt64,UInt64},Nothing}, # corr dataframe, number of clusters, number of iterations
+    familyWiseErrorRate::Union{UFloat64,Nothing}, # family-wise error rate
+    typeOneErrorArgs::Union{Array{Float64,UInt64},Nothing}, # z statistic for the estimated Sharpe Ratios, number of tests
+    CalculatePowerOfTest::Bool, # should calculate power of the test
     powerOfTest::Union{Float64,Nothing}, # power of the test
+    typeTwoErrorArgs::Union{Array{UInt64,Float64},Nothing}, # number of tests, calculated theta parameter
 )::Nothing
     if familySize === nothing
         println("familySize not provided; using clustering on correlation matrix of backtest returns...")
-        @assert correlation !== nothing "correlation must be provided"
-        correlationNew, clusters, silh = clusterKMeansTop(correlation)
+        @assert clusteringArgs !== nothing "clustering function arguments must be provided"
+        correlationNew, clusters, silh = clusterKMeansTop(clusteringArgs...)
         familySize = length(clusters)
         println("calculated familySize: ", familySize)
     else
         @assert nTrials >= familySize "familySize must be less equal to nTrials"
     end
 
+    if familyWiseErrorRate === nothing
+        println("familyWiseErrorRate not provided; using Sharpe Ratio type 1 error under multiple testing...")
+        @assert typeOneErrorArgs !== nothing "Type 1 Error Probability function arguments must be provided"
+        familyWiseErrorRate = strategyType1ErrorProbability(typeOneErrorArgs...)
+        println("calculated familyWiseErrorRate: ", familyWiseErrorRate)
+    end
+
+    if CalculatePowerOfTest == false
+        println("skipping calculating optional parameter: powerOfTest ...")
+    else
+        @assert powerOfTest === nothing "powerOfTest must not be provided"
+        println("powerOfTest not provided, using Sharpe Ratio type 2 error under multiple testing...")
+        @assert typeTwoErrorArgs !== nothing "Type 2 Error Probability function arguments must be provided"
+        insert!(typeTwoErrorArgs, 1, familyWiseErrorRate)
+        powerOfTest = strategyType2ErrorProbability(typeTwoErrorArgs...)
+        println("calculated powerOfTest: ", powerOfTest)
+    end
 end

From a4a678b74a0ea696da9cc2b838944465b810cfd1 Mon Sep 17 00:00:00 2001
From: Ahmad <ahmadzafarani2013@gmail.com>
Date: Fri, 5 May 2023 17:09:33 +0430
Subject: [PATCH 3/4] first test set created

---
 src/Backtests/MultipleTestReporting.jl      | 17 +++++++++-------
 test/Backtests/MultipleTestReportingTest.jl | 22 +++++++++++++++++++++
 test/runtests.jl                            |  3 +--
 3 files changed, 33 insertions(+), 9 deletions(-)
 create mode 100644 test/Backtests/MultipleTestReportingTest.jl

diff --git a/src/Backtests/MultipleTestReporting.jl b/src/Backtests/MultipleTestReporting.jl
index d55b517..04ec8a4 100644
--- a/src/Backtests/MultipleTestReporting.jl
+++ b/src/Backtests/MultipleTestReporting.jl
@@ -14,14 +14,16 @@ refernce: Fabozzi, F, De Prado, M (2018) Being Honest in Backtest Reporting: A T
 doi: https://doi.org/10.3905/jpm.2018.45.1.141
 """
 function BacktestResultTemplate(nTrials::Int64, # number of trials
-    familySize::Union{UInt64,Nothing}, #  number of significantly different experiments
-    clusteringArgs::Union{Array{DataFrame,UInt64,UInt64},Nothing}, # corr dataframe, number of clusters, number of iterations
-    familyWiseErrorRate::Union{UFloat64,Nothing}, # family-wise error rate
-    typeOneErrorArgs::Union{Array{Float64,UInt64},Nothing}, # z statistic for the estimated Sharpe Ratios, number of tests
-    CalculatePowerOfTest::Bool, # should calculate power of the test
-    powerOfTest::Union{Float64,Nothing}, # power of the test
-    typeTwoErrorArgs::Union{Array{UInt64,Float64},Nothing}, # number of tests, calculated theta parameter
+    familySize::Union{Int64,Nothing}=nothing, # number of significantly different experiments
+    clusteringArgs::Union{Tuple{DataFrame,Int,Int},Nothing}=nothing, # correlation dataframe, number of clusters, number of iterations
+    familyWiseErrorRate::Union{Float64,Nothing}=nothing, # family-wise error rate
+    typeOneErrorArgs::Union{Tuple{Float64,Int64},Nothing}=nothing, # z statistic for the estimated Sharpe Ratios, number of tests
+    CalculatePowerOfTest::Bool=false, # should calculate power of the test
+    powerOfTest::Union{Float64,Nothing}=nothing, # power of the test
+    typeTwoErrorArgs::Union{Tuple{Int64,Float64},Nothing}=nothing # number of tests, calculated theta parameter
 )::Nothing
+    @assert nTrials > 0 "nTrials must be a positive integer"
+
     if familySize === nothing
         println("familySize not provided; using clustering on correlation matrix of backtest returns...")
         @assert clusteringArgs !== nothing "clustering function arguments must be provided"
@@ -29,6 +31,7 @@ function BacktestResultTemplate(nTrials::Int64, # number of trials
         familySize = length(clusters)
         println("calculated familySize: ", familySize)
     else
+        @assert familySize > 0 "familySize must be a positive integer"
         @assert nTrials >= familySize "familySize must be less equal to nTrials"
     end
 
diff --git a/test/Backtests/MultipleTestReportingTest.jl b/test/Backtests/MultipleTestReportingTest.jl
new file mode 100644
index 0000000..3e5ef37
--- /dev/null
+++ b/test/Backtests/MultipleTestReportingTest.jl
@@ -0,0 +1,22 @@
+"""
+List of Collaborators, Developers, and Research Assistants (in alphabetical order)
+Ahmad Zaferani
+"""
+
+using Test
+include("../../src/Backtests/MultipleTestReporting.jl")
+
+
+@testset "BacktestResultTemplate" begin
+    @testset "test arguments" begin
+        @test_throws AssertionError BacktestResultTemplate(-1)
+        @test_throws AssertionError BacktestResultTemplate(1, -1)
+        @test_throws AssertionError BacktestResultTemplate(1, 5)
+        @test_throws AssertionError BacktestResultTemplate(1, 1, nothing)
+        @test BacktestResultTemplate(1, 1, nothing, 1.0) === nothing
+    end
+
+    @testset "call dependencies" begin
+        @test BacktestResultTemplate(1, 1, nothing, 1.0) === nothing
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 5f1abd5..18f3018 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,2 +1 @@
-using Test 
-@test true 
+include("Backtests/MultipleTestReportingTest.jl")

From 9027cef502acad437fc613a8ecd90f6759c6bb4a Mon Sep 17 00:00:00 2001
From: Ahmad <ahmadzafarani2013@gmail.com>
Date: Fri, 5 May 2023 22:42:06 +0430
Subject: [PATCH 4/4] add mocking package - unit test dependencies

---
 Manifest.toml                               | 11 ++++
 Project.toml                                |  1 +
 src/Backtests/MultipleTestReporting.jl      | 31 +++++----
 test/Backtests/MultipleTestReportingTest.jl | 69 ++++++++++++++++++---
 4 files changed, 93 insertions(+), 19 deletions(-)

diff --git a/Manifest.toml b/Manifest.toml
index 4986c41..ceb1171 100644
--- a/Manifest.toml
+++ b/Manifest.toml
@@ -102,6 +102,11 @@ git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d"
 uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 version = "0.9.3"
 
+[[deps.ExprTools]]
+git-tree-sha1 = "c1d06d129da9f55715c6c212866f5b1bddc5fa00"
+uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
+version = "0.1.9"
+
 [[deps.FillArrays]]
 deps = ["LinearAlgebra", "Random", "SparseArrays", "Statistics"]
 git-tree-sha1 = "7072f1e3e5a8be51d525d64f63d3ec1287ff2790"
@@ -193,6 +198,12 @@ version = "1.1.0"
 [[deps.Mmap]]
 uuid = "a63ad114-7e13-5084-954f-fe012c677804"
 
+[[deps.Mocking]]
+deps = ["Compat", "ExprTools"]
+git-tree-sha1 = "782e258e80d68a73d8c916e55f8ced1de00c2cea"
+uuid = "78c3b35d-d492-501b-9361-3d52fe80e533"
+version = "0.7.6"
+
 [[deps.NearestNeighbors]]
 deps = ["Distances", "StaticArrays"]
 git-tree-sha1 = "2c3726ceb3388917602169bed973dbc97f1b51a8"
diff --git a/Project.toml b/Project.toml
index 09a7cd8..a20f9c8 100644
--- a/Project.toml
+++ b/Project.toml
@@ -9,6 +9,7 @@ BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0"
 Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Mocking = "78c3b35d-d492-501b-9361-3d52fe80e533"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Shuffle = "bf21e494-c40e-4daa-abfb-de5ec0aad010"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/src/Backtests/MultipleTestReporting.jl b/src/Backtests/MultipleTestReporting.jl
index 04ec8a4..aa1e59b 100644
--- a/src/Backtests/MultipleTestReporting.jl
+++ b/src/Backtests/MultipleTestReporting.jl
@@ -4,6 +4,7 @@ Ahmad Zaferani
 """
 
 using DataFrames
+using Mocking
 include("../features/Clustering.jl")
 include("TestSetOverfitting.jl")
 
@@ -13,7 +14,7 @@ function: a Template for Reporting Results of Backtest on Financial Strategies
 refernce: Fabozzi, F, De Prado, M (2018) Being Honest in Backtest Reporting: A Template for Disclosing Multiple Tests
 doi: https://doi.org/10.3905/jpm.2018.45.1.141
 """
-function BacktestResultTemplate(nTrials::Int64, # number of trials
+function backtestResultTemplate(nTrials::Int64, # number of trials
     familySize::Union{Int64,Nothing}=nothing, # number of significantly different experiments
     clusteringArgs::Union{Tuple{DataFrame,Int,Int},Nothing}=nothing, # correlation dataframe, number of clusters, number of iterations
     familyWiseErrorRate::Union{Float64,Nothing}=nothing, # family-wise error rate
@@ -21,35 +22,41 @@ function BacktestResultTemplate(nTrials::Int64, # number of trials
     CalculatePowerOfTest::Bool=false, # should calculate power of the test
     powerOfTest::Union{Float64,Nothing}=nothing, # power of the test
     typeTwoErrorArgs::Union{Tuple{Int64,Float64},Nothing}=nothing # number of tests, calculated theta parameter
-)::Nothing
+)::String
+    buffer = IOBuffer()
+
     @assert nTrials > 0 "nTrials must be a positive integer"
 
     if familySize === nothing
-        println("familySize not provided; using clustering on correlation matrix of backtest returns...")
+        println(buffer, "familySize not provided; using clustering on correlation matrix of backtest returns...")
         @assert clusteringArgs !== nothing "clustering function arguments must be provided"
-        correlationNew, clusters, silh = clusterKMeansTop(clusteringArgs...)
+        correlationNew, clusters, silh = @mock clusterKMeansTop(clusteringArgs...)
         familySize = length(clusters)
-        println("calculated familySize: ", familySize)
+        println(buffer, "calculated familySize: ", familySize)
     else
         @assert familySize > 0 "familySize must be a positive integer"
         @assert nTrials >= familySize "familySize must be less equal to nTrials"
     end
 
     if familyWiseErrorRate === nothing
-        println("familyWiseErrorRate not provided; using Sharpe Ratio type 1 error under multiple testing...")
+        println(buffer, "familyWiseErrorRate not provided; using Sharpe Ratio type 1 error under multiple testing...")
         @assert typeOneErrorArgs !== nothing "Type 1 Error Probability function arguments must be provided"
-        familyWiseErrorRate = strategyType1ErrorProbability(typeOneErrorArgs...)
-        println("calculated familyWiseErrorRate: ", familyWiseErrorRate)
+        familyWiseErrorRate = @mock strategyType1ErrorProbability(typeOneErrorArgs...)
+        println(buffer, "calculated familyWiseErrorRate: ", familyWiseErrorRate)
     end
 
     if CalculatePowerOfTest == false
-        println("skipping calculating optional parameter: powerOfTest ...")
+        println(buffer, "skipping calculating optional parameter: powerOfTest ...")
     else
         @assert powerOfTest === nothing "powerOfTest must not be provided"
-        println("powerOfTest not provided, using Sharpe Ratio type 2 error under multiple testing...")
+        println(buffer, "powerOfTest not provided, using Sharpe Ratio type 2 error under multiple testing...")
         @assert typeTwoErrorArgs !== nothing "Type 2 Error Probability function arguments must be provided"
+        typeTwoErrorArgs = [typeTwoErrorArgs...]
         insert!(typeTwoErrorArgs, 1, familyWiseErrorRate)
-        powerOfTest = strategyType2ErrorProbability(typeTwoErrorArgs...)
-        println("calculated powerOfTest: ", powerOfTest)
+        powerOfTest = @mock strategyType2ErrorProbability(typeTwoErrorArgs...)
+        println(buffer, "calculated powerOfTest: ", powerOfTest)
     end
+
+    println(buffer, "Strategy Results are as follows:\n1. number of trials: $nTrials\n2. number of significantly different experiments: $familySize\n3. family-wise error rate: $familyWiseErrorRate\n4. power of the test: $powerOfTest")
+    return String(take!(buffer))
 end
diff --git a/test/Backtests/MultipleTestReportingTest.jl b/test/Backtests/MultipleTestReportingTest.jl
index 3e5ef37..b2e1c3d 100644
--- a/test/Backtests/MultipleTestReportingTest.jl
+++ b/test/Backtests/MultipleTestReportingTest.jl
@@ -4,19 +4,74 @@ Ahmad Zaferani
 """
 
 using Test
+using Mocking
+using DataFrames
 include("../../src/Backtests/MultipleTestReporting.jl")
 
+Mocking.activate()
 
-@testset "BacktestResultTemplate" begin
+function generateSuccessOutput(nTrials, familySize, familyWiseErrorRate, powerOfTest,
+    familySizeNotProvided=false, calculatedFamilySize=0, familyWiseErrorRateNotProvided=false,
+    calculatedFamilyWizeErrorRate=0, skipPowerOfTest=true, calculatedPowerOfTest=0)
+    familySizeNotProvidedMsg = "familySize not provided; using clustering on correlation matrix of backtest returns..."
+    calculatedFamilySizeMsg = "calculated familySize: $calculatedFamilySize"
+    familyWiseErrorRateNotProvidedMsg = "familyWiseErrorRate not provided; using Sharpe Ratio type 1 error under multiple testing..."
+    calculatedFamilyWizeErrorRateMsg = "calculated familyWiseErrorRate: $calculatedFamilyWizeErrorRate"
+    skipPowerOfTestMsg = "skipping calculating optional parameter: powerOfTest ..."
+    powerOfTestNotProvidedMsg = "powerOfTest not provided, using Sharpe Ratio type 2 error under multiple testing..."
+    calculatedPowerOfTestMsg = "calculated powerOfTest: $calculatedPowerOfTest"
+
+    buffer = IOBuffer()
+    if familySizeNotProvided == true
+        println(buffer, familySizeNotProvidedMsg)
+        println(buffer, calculatedFamilySizeMsg)
+        familySize = calculatedFamilySize
+    end
+    if familyWiseErrorRateNotProvided == true
+        println(buffer, familyWiseErrorRateNotProvidedMsg)
+        println(buffer, calculatedFamilyWizeErrorRateMsg)
+        familyWiseErrorRate = calculatedFamilyWizeErrorRate
+    end
+    if skipPowerOfTest == true
+        println(buffer, skipPowerOfTestMsg)
+    else
+        println(buffer, powerOfTestNotProvidedMsg)
+        println(buffer, calculatedPowerOfTestMsg)
+        powerOfTest = calculatedPowerOfTest
+    end
+    println(buffer, "Strategy Results are as follows:\n1. number of trials: $nTrials\n2. number of significantly different experiments: $familySize\n3. family-wise error rate: $familyWiseErrorRate\n4. power of the test: $powerOfTest")
+    return String(take!(buffer))
+end
+
+@testset "backtestResultTemplate" begin
     @testset "test arguments" begin
-        @test_throws AssertionError BacktestResultTemplate(-1)
-        @test_throws AssertionError BacktestResultTemplate(1, -1)
-        @test_throws AssertionError BacktestResultTemplate(1, 5)
-        @test_throws AssertionError BacktestResultTemplate(1, 1, nothing)
-        @test BacktestResultTemplate(1, 1, nothing, 1.0) === nothing
+        @test_throws AssertionError backtestResultTemplate(-1)
+        @test_throws AssertionError backtestResultTemplate(1, -1)
+        @test_throws AssertionError backtestResultTemplate(1, 5)
+        @test_throws AssertionError backtestResultTemplate(1, 1, nothing)
+        @test backtestResultTemplate(1, 1, nothing, 1.0) === generateSuccessOutput(1, 1, 1.0, nothing)
     end
 
     @testset "call dependencies" begin
-        @test BacktestResultTemplate(1, 1, nothing, 1.0) === nothing
+        twentyCluster = @patch clusterKMeansTop(correlation, numberClusters=nothing, itetations=10) = nothing, 1:20, nothing
+        apply(twentyCluster) do
+            @test backtestResultTemplate(1, nothing, (DataFrame(), 1, 1), 1.0) == generateSuccessOutput(1, 10, 1.0, nothing, true, 20)
+        end
+
+        zeroPointSixError = @patch strategyType1ErrorProbability(z, k=1) = 0.6
+        apply(zeroPointSixError) do
+            @test backtestResultTemplate(20, 10, nothing, nothing, (0.1, 1)) == generateSuccessOutput(20, 10, 1.0, nothing, false, 0, true, 0.6)
+        end
+    end
+
+    @testset "optional parameters" begin
+        @test backtestResultTemplate(1, 1, nothing, 1.0, nothing, false, 0.44) == generateSuccessOutput(1, 1, 1.0, 0.44)
+        @test_throws AssertionError backtestResultTemplate(1, 1, nothing, 1.0, nothing, true)
+        @test_throws AssertionError backtestResultTemplate(1, 1, nothing, 1.0, nothing, true, 1.0)
+
+        zeroPointFourtyFourError = @patch strategyType2ErrorProbability(α, k, θ) = 0.44
+        apply(zeroPointFourtyFourError) do
+            @test backtestResultTemplate(1, 1, nothing, 1.0, nothing, true, nothing, (1, 1.0)) == generateSuccessOutput(1, 1, 1.0, nothing, false, 0, false, 0, false, 0.44)
+        end
     end
 end