From e8dba9fae3bda4d811b2983fd4b042b9261f095a Mon Sep 17 00:00:00 2001 From: beguinotj Date: Tue, 14 Oct 2025 21:55:57 +0200 Subject: [PATCH 1/4] Leakage to success --- docs/conf.py | 1 + docs/refs.bib | 36 ++ docs/source/papers.rst | 8 +- src/scalib/postprocessing/__init__.py | 15 +- .../postprocessing/leakage_to_success.py | 329 ++++++++++++++++++ tests/test_leakage_to_success.py | 79 +++++ 6 files changed, 462 insertions(+), 6 deletions(-) create mode 100644 src/scalib/postprocessing/leakage_to_success.py create mode 100644 tests/test_leakage_to_success.py diff --git a/docs/conf.py b/docs/conf.py index f4a1eb44..cd34735d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -58,6 +58,7 @@ "scalib.version", "scalib.build_config", "numpy", + "scipy", "cpuinfo", ] diff --git a/docs/refs.bib b/docs/refs.bib index 5fbb0f0b..4955bc50 100644 --- a/docs/refs.bib +++ b/docs/refs.bib @@ -89,3 +89,39 @@ @inproceedings{rankest_histograms publisher = {Springer}, year = {2016} } + +@inproceedings{mgl, + author = {Julien B{\'{e}}guinot and + Wei Cheng and + Sylvain Guilley and + Yi Liu and + Lo{\"{\i}}c Masure and + Olivier Rioul and + Fran{\c{c}}ois{-}Xavier Standaert}, + editor = {Elif Bilge Kavun and + Michael Pehl}, + title = {Removing the Field Size Loss from Duc et al.'s Conjectured Bound for + Masked Encodings}, + booktitle = {Constructive Side-Channel Analysis and Secure Design - 14th International + Workshop, {COSADE} 2023, Munich, Germany, April 3-4, 2023, Proceedings}, + series = {Lecture Notes in Computer Science}, + volume = {13979}, + pages = {86--104}, + publisher = {Springer}, + year = {2023} +} + +@inproceedings{figMerit, + author = {Julien B{\'{e}}guinot and + Olivier Rioul and + Lo{\"{\i}}c Masure and + Fran{\c{c}}ois{-}Xavier Standaert and + Wei Cheng and + Sylvain Guilley}, + title = {Scalable Information Theoretic Evaluation of the Rank Statistics in Side-Channel Attacks}, + booktitle = {{CHES}}, + series = {Lecture Notes in Computer Science}, + publisher = {Springer}, + year = {2026} +} + diff --git a/docs/source/papers.rst b/docs/source/papers.rst index bae53093..2534dc99 100644 --- a/docs/source/papers.rst +++ b/docs/source/papers.rst @@ -69,14 +69,12 @@ Help us keeping it up-to-date: add your publications by 12. "Low-Latency Masking with Arbitrary Protection Order Based on Click Elements", M. Simões, L. Bossuet, N. Bruneau, V. Grosso, P. Haddad, T. Sarno, HOST 2023. -13. "Removing the Field Size Loss from Duc et al.’s Conjectured Bound for - Masked Encodings.", Béguinot, J. et al., COSADE 2023. -14. "Self-timed Masking: Implementing Masked S-Boxes Without Registers." +13. "Self-timed Masking: Implementing Masked S-Boxes Without Registers." Simões, M., Bossuet, L., Bruneau, N., Grosso, V., Haddad, P., Sarno, T., CARDIS 2022 -15. "Prime-Field Masking in Hardware and its Soundness against Low-Noise SCA +14. "Prime-Field Masking in Hardware and its Soundness against Low-Noise SCA Attacks", Cassiers, G., Masure, L., Momin, C., Moos, T., Standaert, F.-X., TCHES 2023. -16. "Effective and Efficient Masking with Low Noise Using Small-Mersenne-Prime +15. "Effective and Efficient Masking with Low Noise Using Small-Mersenne-Prime Ciphers.", Masure, L., Méaux, P., Moos, T., Standaert, F. X., EUROCRYPT 2023. diff --git a/src/scalib/postprocessing/__init__.py b/src/scalib/postprocessing/__init__.py index 85b2cf0d..e5c33091 100644 --- a/src/scalib/postprocessing/__init__.py +++ b/src/scalib/postprocessing/__init__.py @@ -10,8 +10,21 @@ :nosignatures: scalib.postprocessing.rankestimation + scalib.postprocessing.leakage_to_success """ -__all__ = ["rankestimation"] +__all__ = [ + "rankestimation", + "success_rate", + "guessing_entropy", + "log_guessing_entropy", + "median", +] from .rankestimation import rank_nbin, rank_accuracy +from .leakage_to_success import ( + success_rate, + guessing_entropy, + log_guessing_entropy, + median, +) diff --git a/src/scalib/postprocessing/leakage_to_success.py b/src/scalib/postprocessing/leakage_to_success.py new file mode 100644 index 00000000..71e3b4ad --- /dev/null +++ b/src/scalib/postprocessing/leakage_to_success.py @@ -0,0 +1,329 @@ +r"""Lower bounds on the log-guessing entropy, log of the guessing entropy, log of the median rank and upper bound on the proability of a successful attack in presence of key enumeration. + +The bounds depends on the leakage as measured by mutual information, the size of the secret key (number of bits) and eventually the number of key enumerated. + +Examples +-------- +>>> from scalib.postprocessing import success_rate, guessing_entropy, log_guessing_entropy, median +>>> import numpy as np +>>> key_size=128 +>>> enumeration_effort=32 +>>> mi = np.random.uniform(low=0, high=key_size, size=10**3) +>>> sr = success_rate(mi,key_size,enumeration_effort) +>>> med = median(mi,key_size) +>>> lg = log_guessing_entropy(mi,key_size) +>>> ge = guessing_entropy(mi, key_size) + +Reference +--------- +.. currentmodule:: scalib.postprocessing.leakage_to_success + +.. autosummary:: + :toctree: + :nosignatures: + :recursive: + + success_rate + guessing_entropy + log_guessing_entropy + median + +Notes +----- +The upper bound is based on the article :footcite:p:`figMerit`, +References +---------- +.. footbibliography:: +""" + +__all__ = ["success_rate", "guessing_entropy", "log_guessing_entropy", "median"] +import numpy as np +from scipy.special import gamma, bernoulli, factorial + + +def binary_divergence_log( + log_p: np.ndarray[float], log_q: np.ndarray[float] +) -> np.ndarray[float]: + p = np.exp(log_p) + q = np.exp(log_q) + return p * (log_p - log_q) + (1 - p) * (np.log1p(-p) - np.log1p(-q)) + + +def success_rate( + mutual_information: np.ndarray[float], + key_size: int, + enumeration_effort: float, + base=2, + niter=30, +): + r"""Output an upper bound on the logarithm in base 'base' of the probability of correctly guessing a secret key of of size key_size bits + when a leakage upper bounded by the mutual information 'mutual_information' is disclosed to the adversary and the adversary is allowed to enumerate up to + 2 ** enumeration_effort key hypotheses. + + Parameters + ---------- + mutual_information : array_like, f64 + Upper bound on the mutual information expressed in base 'base' + key_size : int + Number of bits of the secrets + enumeration_effort : array_like, f64 + logarithm in base 2 of the number of key hypothesis that the adversary is allowed to enumerate + base : f64 + Base for the information used (by default base=2 i.e. the base is the bit) + niter : int + Number of iteration of the dichotomic search used internaly. A higher niter improves the precision + but also the runtime and vice versa. By default it is set to 30 iterations. + + Returns + ------- + Output an upper bound on the logarithm in base 'base' of the probability of correctly guessing a secret key of of size key_size bits when a leakage upper bounded by the mutual information 'mutual_information' is disclosed to the adversary and the adversary is allowed to enumerate up to 2^enumeration_effort key_hypotheses. + """ + + MI_nats = mutual_information * np.log(base) + log_q = (enumeration_effort - key_size) * np.log(2) # in nats + + # Initial intervall for the dichotomic search + log_p_lb = np.array(log_q).reshape((1,)) + log_p_ub = np.array(0).reshape((1,)) + + # Dichotomic serach + for _ in range(niter): + log_p_mid = (log_p_lb + log_p_ub) / 2 + y = binary_divergence_log(log_p_mid, log_q) - MI_nats + log_p_lb, log_p_ub = np.where( + y > 0, (log_p_lb, log_p_mid), (log_p_mid, log_p_ub) + ) + return log_p_ub / np.log(base) # in base 'base' + + +def f(x): + return np.where(x > 0, x - np.expm1(x) * np.log1p(-np.exp(-x)), 0) + + +def f_inv(y, niter=20): + + # Ensure that y is an array + y = np.atleast_1d(y) + + # Initial guess + x_lb = np.maximum(np.log(1), y - 1 + np.log1p(np.exp(1 - y) / 2)).reshape((-1,)) + x_ub = np.maximum(np.log(2), y - 1 + np.log1p(np.exp(1 - y) / 2 + 1)).reshape((-1,)) + + assert (f(x_lb) <= y).all() + assert (f(x_ub) >= y).all() + + # Dichotomic search + for _ in range(niter): + x_mid = (x_lb + x_ub) / 2 + x_lb, x_ub = np.where(f(x_mid) - y < 0, (x_mid, x_ub), (x_lb, x_mid)) + + return x_lb + + +def guessing_entropy(mutual_information, key_size, base=2): + r"""Output a lower bound on the logarithm in base 'base' of the guessing entropy + when a leakage upper bounded by the mutual information 'mutual_information' is disclosed to the adversary. + + Parameters + ---------- + mutual_information : array_like, f64 + Upper bound on the mutual information expressed in base 'base' + key_size : int + Number of bits of the secrets + base : f64 + Unit for the logarithms used (by default base=2 i.e. the unit is the bit) + + Returns + ------- + Lower bound on the logarithm in base 'base' of the guessing entropy + """ + MI_nats = mutual_information * np.log(base) + key_size_nats = key_size * np.log(2) + entropy_nats = key_size_nats - MI_nats + sqrt = np.sqrt(np.tanh(key_size_nats / 2) * 2 * MI_nats / 3) + + finite_size_bound = np.zeros_like(mutual_information) + mask = sqrt < 1 + finite_size_bound[mask] = ( + key_size_nats + - np.log(2) + + np.log1p(np.exp(-key_size_nats)) + + np.log1p(-sqrt[mask]) + ) + generic_bound = f_inv(entropy_nats) + bound = np.max([finite_size_bound, generic_bound], axis=0) / np.log(base) + return bound + + +def partial_sum_zeta(a, p, q): + r"""Output the partial sum sum_{i=p}^q i^{-a} + + Parameters + ---------- + a : array_like, f64 + Evaluation points + p,q : int + Summation boundaries + Returns + ------- + The sum sum_{i=p}^q i^{-a} + """ + I = np.arange(p, q + 1).reshape((-1, 1)).astype(float) + return np.sum(I ** -np.expand_dims(a, axis=0), axis=0) + + +def euler_maclaurin_correction(a, k, log_M, order): + if order == 0: + return np.zeros_like(a) + d_O = (np.arange(1, order + 1) * 2 - 1).reshape((order,) + (1,) * a.ndim) + derivatives_kp1 = -((k + 1) ** (-a - d_O)) * gamma(a + d_O) / gamma(a) + derivatives_M = -np.exp2(log_M * (-a - d_O)) * gamma(a + d_O) / gamma(a) + EM_coeff = ( + bernoulli(2 * order)[2::2] / factorial(np.arange(1, order + 1) * 2) + ).reshape((order,) + (1,) * a.ndim) + return np.sum((derivatives_M - derivatives_kp1) * EM_coeff, axis=0) + + +def log_zeta_larger_than_1(a, log_M, order, k): + + # Partial Sum + partial_sum = partial_sum_zeta(a, 1, k) + + # Boundary coerrections + correction = ((k + 1) ** (-a) + np.exp2(-a * log_M)) / 2 + + # Integral term + integral = (np.exp2((1 - a) * log_M) - (k + 1) ** (1 - a)) / (1 - a) + + # Euler Maclaurin correction + correction += euler_maclaurin_correction(a, k, log_M, order) + + return np.log(partial_sum + integral + correction) + + +def log_zeta_less_than_1(a, log_M, order, k): + log_M_nats = log_M * np.log(2) + partial_sum = partial_sum_zeta(a, 1, k) + correction = ((k + 1) ** (-a) + np.exp2(-a * log_M)) / 2 + + # Euler Maclaurin correction + correction += euler_maclaurin_correction(a, k, log_M, order) + + # return np.log(partial_sum + integral + correction) + return ( + (1 - a) * log_M_nats + - np.log1p(-a) + + np.log1p( + (1 - a) * (partial_sum + correction) * np.exp2((a - 1) * log_M) + - np.exp2((a - 1) * (log_M - np.log2(k + 1))) + ) + ) + + +def log_zeta(a: np.ndarray[float], log_M: float, order=10, k=50): + + # Ensure that a is treated as a Numpy Array + a = np.asarray(a) + + # If less than 2 ** 10 terms we compute the exact sum + if log_M < 10: + return np.log(partial_sum_zeta(a, 1, int(np.exp2(log_M)))) + + # Otherwise, we estimate numerically the sum via: + # - exact sum of the first term k terms sum_{i=1}^k i^{-a} + # - comparison to an integral for the "sum tail" sum_{i=k+1}^M i^{-a} + # - eventually corrective terms as given by Euler MacLaurin summation formula + + # Allocate array to store the result + log_zeta_a = np.empty_like(a, dtype=float) + + a_less_than_1 = a < 1 + a_equal_1 = a == 1 + a_larger_than_1 = a > 1 + + if a_equal_1.any(): + log_M_nats = log_M * np.log(2) + log_zeta_a[a_equal_1] = np.log( + log_M_nats + np.euler_gamma + np.log1p(np.exp(-log_M_nats) / 2) + ) + if a_less_than_1.any(): + log_zeta_a[a_less_than_1] = log_zeta_less_than_1( + a[a_less_than_1], log_M, order, k + ) + if a_larger_than_1.any(): + log_zeta_a[a_larger_than_1] = log_zeta_larger_than_1( + a[a_larger_than_1], log_M, order, k + ) + + return log_zeta_a + + +def log_guessing_entropy(mutual_information, key_size, base=2, order=10, k=50): + r"""Output a lower bound on the logarithm in base 'base' of the log_guessing_entropy rank of the key of size key_size + when a leakage upper bounded by the mutual information 'mutual_information' is disclosed to the adversary. + + Parameters + ---------- + mutual_information : array_like, f64 + Upper bound on the mutual information expressed in base 'base' + key_size : int + Number of bits of the secrets + base : f64 + Base for the information used (by default base=2 i.e. the base is the bit) + order : int + Number of Euler-Maclaurin corrective term used in the evaluation (by default order=10) + k : int + Number of term in the sum that we compute exactly (by default k=50) + Returns + ------- + Lower bound on the logarithm in base 'base' of the log-guessing entropy rank of the key of size key_size when a leakage upper bounded by the mutual information 'mutual_information' is disclosed to the adversary. + """ + mutual_information = np.atleast_1d(mutual_information) + + MI_nats = mutual_information * np.log(base) + key_size_nats = key_size * np.log(2) + entropy_nats = key_size_nats - MI_nats + + A = np.hstack( + [np.geomspace(10**-10, 1, 90), 1, np.linspace(1 + 10**-10, 5, 10)] + ).reshape((-1,)) + LG = np.max( + (np.expand_dims(entropy_nats, axis=1) - log_zeta(A, key_size, order, k)) / A, + axis=1, + ) + return np.maximum(0, LG / np.log(base)) + + +def median( + mutual_information: np.ndarray[float], key_size: int, base: float = 2 +) -> np.ndarray[float]: + r"""Output a lower bound on the logarithm in base 'base' of the median rank of the key of size key_size + when a leakage upper bounded by the mutual information 'mutual_information' is disclosed to the adversary. + + Parameters + ---------- + mutual_information : array_like, f64 + Upper bound on the mutual information expressed in base 'base' + key_size : int + Number of bits of the secrets + base : f64 + Base for the information used (by default base=2 i.e. the base is the bit) + + Returns + ------- + Lower bound on the logarithm in base 'base' of the median rank of the key of size key_size when a leakage upper bounded by the mutual information 'mutual_information' is disclosed to the adversary. + """ + + # We use log1p and exp1m to have a stable expression + # We deal with eventual overflows in the exponential for large mutual information next + median = (key_size - 1) / np.log2(base) + np.log1p( + -np.sqrt(-np.expm1(-2 * mutual_information * np.log(base))) + ) / np.log(base) + + # To avoid numerical overflows we use an assymptotic expansion for large values of the mutual information + large_mi = mutual_information > 12 + expansion_for_large_mi = np.maximum( + 0, (key_size - 2) / np.log2(base) - 2 * mutual_information + ) + + return np.where(large_mi, expansion_for_large_mi, median) diff --git a/tests/test_leakage_to_success.py b/tests/test_leakage_to_success.py new file mode 100644 index 00000000..e72d49ca --- /dev/null +++ b/tests/test_leakage_to_success.py @@ -0,0 +1,79 @@ +import pytest +import numpy as np + +from scalib.postprocessing import ( + success_rate, + guessing_entropy, + log_guessing_entropy, + median, +) + + +def test_succss_rate(): + key_size = 128 + + mi = np.random.uniform(low=0, high=key_size, size=10**3) + + # Check bound for typical key lenght + sr_32 = success_rate(mi, key_size, enumeration_effort=32) + assert (-key_size <= sr_32).all() + assert (sr_32 <= 0).all() + + # Check that sr increase with enumeration effort + sr_64 = success_rate(mi, key_size, enumeration_effort=64) + assert (sr_64 >= sr_32).all() + + # Check wether it works for large keys + key_size = 1024 + mi = np.random.uniform(low=0, high=key_size, size=10**3) + sr = success_rate(mi, key_size, enumeration_effort=32) + assert (-key_size <= sr).all() + assert (sr <= 0).all() + + +def test_guessing_entropy(): + # Check bound for typical key length + key_size = 128 + mi = np.random.uniform(low=0, high=key_size, size=10**3) + ge = guessing_entropy(mi, key_size) + assert (ge >= 0).all() + assert (ge <= key_size).all() + + # Check wether it works for large keys + key_size = 1024 + mi = np.random.uniform(low=0, high=key_size, size=10**3) + ge = guessing_entropy(mi, key_size) + assert (ge >= 0).all() + assert (ge <= key_size).all() + + +def test_log_guessing_entropy(): + # Check wether it works for typical key lenght + key_size = 128 + mi = np.random.uniform(low=0, high=key_size, size=10**3) + lg = log_guessing_entropy(mi, key_size) + assert (lg >= 0).all() + assert (lg <= key_size).all() + + # Check wether it works for large keys + key_size = 1024 + mi = np.random.uniform(low=0, high=key_size, size=10**3) + lg = log_guessing_entropy(mi, key_size) + assert (lg >= 0).all() + assert (lg <= key_size).all() + + +def test_median(): + # Check wether it works for typical key lenght + key_size = 128 + mi = np.random.uniform(low=0, high=key_size, size=10**3) + med = median(mi, key_size) + assert (med >= 0).all() + assert (med <= key_size).all() + + # Check wether it works for large keys + key_size = 1024 + mi = np.random.uniform(low=0, high=key_size, size=10**3) + med = median(mi, key_size) + assert (med >= 0).all() + assert (med <= key_size).all() From f102c84be2fa56662c9f7719e811be02b25dddf3 Mon Sep 17 00:00:00 2001 From: beguinotj Date: Tue, 14 Oct 2025 22:45:56 +0200 Subject: [PATCH 2/4] remove silley assert that was producing occasional errors --- src/scalib/postprocessing/leakage_to_success.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/scalib/postprocessing/leakage_to_success.py b/src/scalib/postprocessing/leakage_to_success.py index 71e3b4ad..f9a5fc4c 100644 --- a/src/scalib/postprocessing/leakage_to_success.py +++ b/src/scalib/postprocessing/leakage_to_success.py @@ -97,7 +97,10 @@ def success_rate( def f(x): - return np.where(x > 0, x - np.expm1(x) * np.log1p(-np.exp(-x)), 0) + fx = np.zeros_like(x) + mask = (x > 0) + fx[mask] = x[mask] - np.expm1(x[mask]) * np.log1p(-np.exp(-x[mask])) + return fx def f_inv(y, niter=20): @@ -109,14 +112,13 @@ def f_inv(y, niter=20): x_lb = np.maximum(np.log(1), y - 1 + np.log1p(np.exp(1 - y) / 2)).reshape((-1,)) x_ub = np.maximum(np.log(2), y - 1 + np.log1p(np.exp(1 - y) / 2 + 1)).reshape((-1,)) - assert (f(x_lb) <= y).all() - assert (f(x_ub) >= y).all() - # Dichotomic search for _ in range(niter): x_mid = (x_lb + x_ub) / 2 x_lb, x_ub = np.where(f(x_mid) - y < 0, (x_mid, x_ub), (x_lb, x_mid)) + if (abs(f(x_lb)-y) > 10**-2).any(): + print("Inversion error exceeds 10^{-2} consider increasing niter") return x_lb @@ -140,9 +142,9 @@ def guessing_entropy(mutual_information, key_size, base=2): MI_nats = mutual_information * np.log(base) key_size_nats = key_size * np.log(2) entropy_nats = key_size_nats - MI_nats - sqrt = np.sqrt(np.tanh(key_size_nats / 2) * 2 * MI_nats / 3) finite_size_bound = np.zeros_like(mutual_information) + sqrt = np.sqrt(np.tanh(key_size_nats / 2) * 2 * MI_nats / 3) mask = sqrt < 1 finite_size_bound[mask] = ( key_size_nats From 75119c7ba00866c361675d301e58c3b307bbe1c8 Mon Sep 17 00:00:00 2001 From: beguinotj Date: Tue, 14 Oct 2025 22:49:27 +0200 Subject: [PATCH 3/4] missing formating --- src/scalib/postprocessing/leakage_to_success.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/scalib/postprocessing/leakage_to_success.py b/src/scalib/postprocessing/leakage_to_success.py index f9a5fc4c..2c79bfa9 100644 --- a/src/scalib/postprocessing/leakage_to_success.py +++ b/src/scalib/postprocessing/leakage_to_success.py @@ -98,7 +98,7 @@ def success_rate( def f(x): fx = np.zeros_like(x) - mask = (x > 0) + mask = x > 0 fx[mask] = x[mask] - np.expm1(x[mask]) * np.log1p(-np.exp(-x[mask])) return fx @@ -117,7 +117,7 @@ def f_inv(y, niter=20): x_mid = (x_lb + x_ub) / 2 x_lb, x_ub = np.where(f(x_mid) - y < 0, (x_mid, x_ub), (x_lb, x_mid)) - if (abs(f(x_lb)-y) > 10**-2).any(): + if (abs(f(x_lb) - y) > 10**-2).any(): print("Inversion error exceeds 10^{-2} consider increasing niter") return x_lb From 800520d4e8b19f9aacfcf880904c1c99a49954c8 Mon Sep 17 00:00:00 2001 From: beguinotj Date: Mon, 20 Oct 2025 12:13:51 +0200 Subject: [PATCH 4/4] update based on ciode review --- .../postprocessing/leakage_to_success.py | 300 +++++++++++++----- tests/test_leakage_to_success.py | 28 +- 2 files changed, 247 insertions(+), 81 deletions(-) diff --git a/src/scalib/postprocessing/leakage_to_success.py b/src/scalib/postprocessing/leakage_to_success.py index 2c79bfa9..5252459d 100644 --- a/src/scalib/postprocessing/leakage_to_success.py +++ b/src/scalib/postprocessing/leakage_to_success.py @@ -2,17 +2,39 @@ The bounds depends on the leakage as measured by mutual information, the size of the secret key (number of bits) and eventually the number of key enumerated. -Examples --------- ->>> from scalib.postprocessing import success_rate, guessing_entropy, log_guessing_entropy, median ->>> import numpy as np ->>> key_size=128 ->>> enumeration_effort=32 ->>> mi = np.random.uniform(low=0, high=key_size, size=10**3) ->>> sr = success_rate(mi,key_size,enumeration_effort) ->>> med = median(mi,key_size) ->>> lg = log_guessing_entropy(mi,key_size) ->>> ge = guessing_entropy(mi, key_size) +Let say that an adversary acquires side channel information about a uniformly distributed secret K through some side channel information Y. +At the end of a side channel attack the adversary produces a ranking of the different key hypotheses from most likely to least likely. +We let :math:`\mathrm{rank}(K|Y)` be the position of the correct key in this ranking. +The probability distribution of :math:`\mathrm{rank}(K|Y)` provides securitry guarantees about the performances of the attacker. +It is hard to obtain :math:`\mathrm{rank}(K|Y)` for a full key, hoverwer, it is often possible to upper bound the informational leakages :math:`I(K;Y)`. +Using :math:`I(K;Y)` we provide bounds on some statistics of the rank: + +* The probability of success of level L : + +.. math:: + \mathbb{P}_{s,L}(K|Y) = \mathbb{P}( \mathrm{rank}(K|Y) \leq L), + +which bounds the probability of a successful attack when L keys are enumerated; + +* The guessing entropy : + +.. math:: + G(K|Y) = \mathbb{E}[\mathrm{rank}(K|Y)], + +which corresponds to the average enumeration time to recover the secret key; + +* The median rank, which is the minimum number of hypotheses to enumerate to achieve a success rate of 1/2 i.e. med is the mimimum value such that + +.. math:: + \mathrm{Med}(K|Y) = \min \{ m \mid \mathbb{P}( \mathrm{rank}(K|Y) \leq m) \geq 1/2 \}; + +* The log-guessing entropy, + +.. math:: + \mathrm{LG}(K|Y) = \mathbb{E}[ \log_2 \mathrm{rank}(K|Y)], + +which is corresponds to the average security parameter of the implementation (it is more conservative than the guessing entropy). + Reference --------- @@ -39,14 +61,27 @@ __all__ = ["success_rate", "guessing_entropy", "log_guessing_entropy", "median"] import numpy as np from scipy.special import gamma, bernoulli, factorial +from scipy.optimize import brentq -def binary_divergence_log( - log_p: np.ndarray[float], log_q: np.ndarray[float] -) -> np.ndarray[float]: +def binary_divergence_log(log_p: float, log_q: float) -> float: + r"""Outputs the binary divergence d(p||q) in a numerically stable way when log(p) and log(q) are given. + + Parameters + ---------- + log_p, log_q : float + + Returns + ------- + An estimation of the binary divergence d(p||q). + """ + if log_p == 0: + return -log_q + p = np.exp(log_p) q = np.exp(log_q) - return p * (log_p - log_q) + (1 - p) * (np.log1p(-p) - np.log1p(-q)) + + return p * (log_p - log_q) - np.expm1(log_p) * (np.log1p(-p) - np.log1p(-q)) def success_rate( @@ -54,11 +89,13 @@ def success_rate( key_size: int, enumeration_effort: float, base=2, - niter=30, -): - r"""Output an upper bound on the logarithm in base 'base' of the probability of correctly guessing a secret key of of size key_size bits +) -> np.ndarray[float]: + r"""Output an upper bound on the logarithm in base 2 of the probability of correctly guessing a secret key of of size key_size bits when a leakage upper bounded by the mutual information 'mutual_information' is disclosed to the adversary and the adversary is allowed to enumerate up to - 2 ** enumeration_effort key hypotheses. + :math:`L = 2^{\mathrm{enumeration \; effort}}` key hypotheses. + + .. math:: + \log_2 \mathbb{P}_{s,L}(K|Y) = \log_2 \mathbb{P}( \mathrm{rank}(K|Y) \leq L). Parameters ---------- @@ -77,54 +114,91 @@ def success_rate( Returns ------- Output an upper bound on the logarithm in base 'base' of the probability of correctly guessing a secret key of of size key_size bits when a leakage upper bounded by the mutual information 'mutual_information' is disclosed to the adversary and the adversary is allowed to enumerate up to 2^enumeration_effort key_hypotheses. + + + Examples + -------- + >>> from scalib.postprocessing import success_rate + >>> import numpy as np + >>> key_size=128 + >>> enumeration_effort=32 + >>> mi = np.random.uniform(low=0, high=key_size, size=1) + >>> sr = success_rate(mi,key_size,enumeration_effort) """ + if not (mutual_information >= 0).all(): + raise ValueError("Invalid inputs, the mutual information should be positive.") + if not (mutual_information * np.log2(base) <= key_size).all(): + raise ValueError( + "Invalid inputs, the mutual information in bits should be less than the key_size." + ) - MI_nats = mutual_information * np.log(base) + mutual_information = np.atleast_1d(mutual_information) + mi_nats = mutual_information * np.log(base) log_q = (enumeration_effort - key_size) * np.log(2) # in nats - # Initial intervall for the dichotomic search - log_p_lb = np.array(log_q).reshape((1,)) - log_p_ub = np.array(0).reshape((1,)) + log_sr = np.empty_like(mutual_information) + for i in range(len(mutual_information)): + if mi_nats[i] >= -log_q: + log_sr[i] = 0 + else: + log_sr[i] = brentq( + lambda log_p: binary_divergence_log(log_p, log_q) - mi_nats[i], + 0, + log_q, + xtol=10**-2, + maxiter=100, + full_output=False, + ) + + return log_sr / np.log(2) # in bits - # Dichotomic serach - for _ in range(niter): - log_p_mid = (log_p_lb + log_p_ub) / 2 - y = binary_divergence_log(log_p_mid, log_q) - MI_nats - log_p_lb, log_p_ub = np.where( - y > 0, (log_p_lb, log_p_mid), (log_p_mid, log_p_ub) - ) - return log_p_ub / np.log(base) # in base 'base' +def massey_inequality(x: float) -> float: + r""" + This sub-function implements the optimal version of Massey's inequality. -def f(x): - fx = np.zeros_like(x) - mask = x > 0 - fx[mask] = x[mask] - np.expm1(x[mask]) * np.log1p(-np.exp(-x[mask])) - return fx + Massey's inequality when optimaly writen reads as H(X|Y) >= G(X|Y) Hb( G(X|Y)^{-1}) where Hb is the binary entropy. + Let x = log( G(X|Y) ) this reads as H(X|Y) >= x - (exp(x)-1) * log(1 - exp(-x)). + + We implement this function using exp1m and log1p to be stable. + """ + if x == 0: + return 0 + return x - np.expm1(x) * np.log1p(-np.exp(-x)) -def f_inv(y, niter=20): +def massey_inequality_inverse(y: float) -> float: + r""" + This sub-function implements the inverse of the optimal Massey's inequality using brent's rootfinding algorithm. + """ # Ensure that y is an array y = np.atleast_1d(y) - # Initial guess - x_lb = np.maximum(np.log(1), y - 1 + np.log1p(np.exp(1 - y) / 2)).reshape((-1,)) - x_ub = np.maximum(np.log(2), y - 1 + np.log1p(np.exp(1 - y) / 2 + 1)).reshape((-1,)) - - # Dichotomic search - for _ in range(niter): - x_mid = (x_lb + x_ub) / 2 - x_lb, x_ub = np.where(f(x_mid) - y < 0, (x_mid, x_ub), (x_lb, x_mid)) + inverse = np.zeros_like(y) + for i in range(len(y)): + # A precise approximation of the inverse would be y - 1 + log(1 + exp(1-y[i])/2) + lb = np.maximum(0, y[i] - 1) + ub = y[i] + inverse[i] = brentq( + lambda x: massey_inequality(x) - y[i], + lb, + ub, + xtol=10**-5, + maxiter=100, + full_output=False, + ) + return inverse - if (abs(f(x_lb) - y) > 10**-2).any(): - print("Inversion error exceeds 10^{-2} consider increasing niter") - return x_lb +def guessing_entropy( + mutual_information: np.ndarray[float], key_size: int, base=2 +) -> np.ndarray[float]: + r"""Output a lower bound on the logarithm in base 2 of the guessing entropy + when a leakage upper bounded by the mutual information 'mutual_information' is disclosed to the adversary: -def guessing_entropy(mutual_information, key_size, base=2): - r"""Output a lower bound on the logarithm in base 'base' of the guessing entropy - when a leakage upper bounded by the mutual information 'mutual_information' is disclosed to the adversary. + .. math:: + \mathrm{G}(K|Y) = \log_2 \mathbb{E}[ \mathrm{rank}(K|Y)]. Parameters ---------- @@ -138,7 +212,22 @@ def guessing_entropy(mutual_information, key_size, base=2): Returns ------- Lower bound on the logarithm in base 'base' of the guessing entropy + + Examples + -------- + >>> from scalib.postprocessing import guessing_entropy + >>> import numpy as np + >>> key_size=128 + >>> mi = np.random.uniform(low=0, high=key_size, size=1) + >>> ge = guessing_entropy(mi, key_size) """ + if not (mutual_information >= 0).all(): + raise ValueError("Invalid inputs, the mutual information should be positive.") + if not (mutual_information * np.log2(base) <= key_size).all(): + raise ValueError( + "Invalid inputs, the mutual information in bits should be less than the key_size." + ) + MI_nats = mutual_information * np.log(base) key_size_nats = key_size * np.log(2) entropy_nats = key_size_nats - MI_nats @@ -152,12 +241,12 @@ def guessing_entropy(mutual_information, key_size, base=2): + np.log1p(np.exp(-key_size_nats)) + np.log1p(-sqrt[mask]) ) - generic_bound = f_inv(entropy_nats) - bound = np.max([finite_size_bound, generic_bound], axis=0) / np.log(base) + generic_bound = massey_inequality_inverse(entropy_nats) + bound = np.max([finite_size_bound, generic_bound], axis=0) / np.log(2) # in bits return bound -def partial_sum_zeta(a, p, q): +def partial_sum_zeta(a: np.ndarray[float], p: int, q: int) -> np.ndarray[float]: r"""Output the partial sum sum_{i=p}^q i^{-a} Parameters @@ -174,7 +263,16 @@ def partial_sum_zeta(a, p, q): return np.sum(I ** -np.expand_dims(a, axis=0), axis=0) -def euler_maclaurin_correction(a, k, log_M, order): +def euler_maclaurin_correction( + a: np.ndarray[float], k: int, log_M: float, order: int +) -> np.ndarray[float]: + r"""This sub-function computes the first "order" Euler-MacLaurin corrective terms needed to approximate the sum + sum_{i=k}^{exp(log_M)} i^{-a}. + + These terms depends on both the Bernoulli numbers and the evalutation of the derivatives of i -> i^{-a}. + See https://en.wikipedia.org/wiki/Euler%E2%80%93Maclaurin_formula + + """ if order == 0: return np.zeros_like(a) d_O = (np.arange(1, order + 1) * 2 - 1).reshape((order,) + (1,) * a.ndim) @@ -186,8 +284,12 @@ def euler_maclaurin_correction(a, k, log_M, order): return np.sum((derivatives_M - derivatives_kp1) * EM_coeff, axis=0) -def log_zeta_larger_than_1(a, log_M, order, k): - +def log_zeta_larger_than_1( + a: np.ndarray[float], log_M: float, order: int, k: int +) -> np.ndarray[float]: + r"""This sub-function approximates the log of the truncated zeta function log(sum_{i=1}^{exp(log_M)} i^{-a}) + for a parameter a > 1. + """ # Partial Sum partial_sum = partial_sum_zeta(a, 1, k) @@ -203,7 +305,12 @@ def log_zeta_larger_than_1(a, log_M, order, k): return np.log(partial_sum + integral + correction) -def log_zeta_less_than_1(a, log_M, order, k): +def log_zeta_less_than_1( + a: np.ndarray[float], log_M: float, order: int, k: int +) -> np.ndarray[float]: + r"""This sub-function approximates the log of the truncated zeta function log(sum_{i=1}^{exp(log_M)} i^{-a}) + for a parameter 0 < a < 1. + """ log_M_nats = log_M * np.log(2) partial_sum = partial_sum_zeta(a, 1, k) correction = ((k + 1) ** (-a) + np.exp2(-a * log_M)) / 2 @@ -222,8 +329,12 @@ def log_zeta_less_than_1(a, log_M, order, k): ) -def log_zeta(a: np.ndarray[float], log_M: float, order=10, k=50): - +def log_zeta( + a: np.ndarray[float], log_M: float, order: int = 10, k: int = 50 +) -> np.ndarray[float]: + r"""This sub-function approximates the log of the truncated zeta function log(sum_{i=1}^{exp(log_M)} i^{-a}). + Depending on wether 01 it uses a specific expression taylored to ensure numerical stability. + """ # Ensure that a is treated as a Numpy Array a = np.asarray(a) @@ -260,9 +371,18 @@ def log_zeta(a: np.ndarray[float], log_M: float, order=10, k=50): return log_zeta_a -def log_guessing_entropy(mutual_information, key_size, base=2, order=10, k=50): - r"""Output a lower bound on the logarithm in base 'base' of the log_guessing_entropy rank of the key of size key_size - when a leakage upper bounded by the mutual information 'mutual_information' is disclosed to the adversary. +def log_guessing_entropy( + mutual_information: np.ndarray[float], + key_size: int, + base: int = 2, + order: int = 10, + k: int = 50, +) -> np.ndarray[float]: + r"""Output a lower bound on the log-guessing entropy in base 2 of the rank of the key of size key_size + when a leakage upper bounded by the mutual information 'mutual_information' is disclosed to the adversary: + + .. math:: + \mathrm{LG}(K|Y) = \mathbb{E}[\log_2 \mathrm{rank}(K|Y)]. Parameters ---------- @@ -279,7 +399,22 @@ def log_guessing_entropy(mutual_information, key_size, base=2, order=10, k=50): Returns ------- Lower bound on the logarithm in base 'base' of the log-guessing entropy rank of the key of size key_size when a leakage upper bounded by the mutual information 'mutual_information' is disclosed to the adversary. + + Examples + -------- + >>> from scalib.postprocessing import log_guessing_entropy + >>> import numpy as np + >>> key_size=128 + >>> mi = np.random.uniform(low=0, high=key_size, size=1) + >>> lg = log_guessing_entropy(mi,key_size) """ + if not (mutual_information >= 0).all(): + raise ValueError("Invalid inputs, the mutual information should be positive.") + if not (mutual_information * np.log2(base) <= key_size).all(): + raise ValueError( + "Invalid inputs, the mutual information in bits should be less than the key_size." + ) + mutual_information = np.atleast_1d(mutual_information) MI_nats = mutual_information * np.log(base) @@ -293,14 +428,17 @@ def log_guessing_entropy(mutual_information, key_size, base=2, order=10, k=50): (np.expand_dims(entropy_nats, axis=1) - log_zeta(A, key_size, order, k)) / A, axis=1, ) - return np.maximum(0, LG / np.log(base)) + return np.maximum(0, LG / np.log(2)) # in bits def median( mutual_information: np.ndarray[float], key_size: int, base: float = 2 ) -> np.ndarray[float]: - r"""Output a lower bound on the logarithm in base 'base' of the median rank of the key of size key_size - when a leakage upper bounded by the mutual information 'mutual_information' is disclosed to the adversary. + r"""Output a lower bound on the logarithm in base 2 of the median rank of the key of size key_size + when a leakage upper bounded by the mutual information 'mutual_information' is disclosed to the adversary: + + .. math:: + \log_2 \mathrm{Med}(K|Y) = \log_2 \min \{ m \mid \mathbb{P}( \mathrm{rank}(K|Y) \leq m) \geq 1/2 \}. Parameters ---------- @@ -314,18 +452,34 @@ def median( Returns ------- Lower bound on the logarithm in base 'base' of the median rank of the key of size key_size when a leakage upper bounded by the mutual information 'mutual_information' is disclosed to the adversary. + + Examples + -------- + >>> from scalib.postprocessing import median + >>> import numpy as np + >>> key_size=128 + >>> mi = np.random.uniform(low=0, high=key_size, size=10**3) + >>> med = median(mi,key_size) """ + if not (mutual_information >= 0).all(): + raise ValueError("Invalid inputs, the mutual information should be positive.") + if not (mutual_information * np.log2(base) <= key_size).all(): + raise ValueError( + "Invalid inputs, the mutual information in bits should be less than the key_size." + ) + med = np.zeros_like(mutual_information) # We use log1p and exp1m to have a stable expression # We deal with eventual overflows in the exponential for large mutual information next - median = (key_size - 1) / np.log2(base) + np.log1p( - -np.sqrt(-np.expm1(-2 * mutual_information * np.log(base))) - ) / np.log(base) + sqrt = np.sqrt(-np.expm1(-2 * mutual_information * np.log(base))) + med[sqrt < 1] = key_size - 1 + np.log1p(-sqrt[sqrt < 1]) / np.log(2) # To avoid numerical overflows we use an assymptotic expansion for large values of the mutual information - large_mi = mutual_information > 12 - expansion_for_large_mi = np.maximum( - 0, (key_size - 2) / np.log2(base) - 2 * mutual_information + # sqrt = 1 means that 1-exp(-2 * mutual_information * np.log(base)) is numerically simplified to 1 (large value of mutual_information) + # Hence 1-sqrt is zero and the log overflows. + # In this case we use the Taylor expansion log(1-sqrt(1-exp(-x))) = - (log 2) - x + med[sqrt == 1] = np.maximum( + 0, key_size - 2 - 2 * mutual_information[sqrt == 1] * np.log2(base) ) - return np.where(large_mi, expansion_for_large_mi, median) + return med diff --git a/tests/test_leakage_to_success.py b/tests/test_leakage_to_success.py index e72d49ca..7da98454 100644 --- a/tests/test_leakage_to_success.py +++ b/tests/test_leakage_to_success.py @@ -10,9 +10,12 @@ def test_succss_rate(): + + rng = np.random.default_rng(seed=42) + key_size = 128 - mi = np.random.uniform(low=0, high=key_size, size=10**3) + mi = rng.uniform(low=0, high=key_size, size=10**3) # Check bound for typical key lenght sr_32 = success_rate(mi, key_size, enumeration_effort=32) @@ -25,55 +28,64 @@ def test_succss_rate(): # Check wether it works for large keys key_size = 1024 - mi = np.random.uniform(low=0, high=key_size, size=10**3) + mi = rng.uniform(low=0, high=key_size, size=10**3) sr = success_rate(mi, key_size, enumeration_effort=32) assert (-key_size <= sr).all() assert (sr <= 0).all() def test_guessing_entropy(): + + rng = np.random.default_rng(seed=42) + # Check bound for typical key length key_size = 128 - mi = np.random.uniform(low=0, high=key_size, size=10**3) + mi = rng.uniform(low=0, high=key_size, size=10**3) ge = guessing_entropy(mi, key_size) assert (ge >= 0).all() assert (ge <= key_size).all() # Check wether it works for large keys key_size = 1024 - mi = np.random.uniform(low=0, high=key_size, size=10**3) + mi = rng.uniform(low=0, high=key_size, size=10**3) ge = guessing_entropy(mi, key_size) assert (ge >= 0).all() assert (ge <= key_size).all() def test_log_guessing_entropy(): + + rng = np.random.default_rng(seed=42) + # Check wether it works for typical key lenght key_size = 128 - mi = np.random.uniform(low=0, high=key_size, size=10**3) + mi = rng.uniform(low=0, high=key_size, size=10**3) lg = log_guessing_entropy(mi, key_size) assert (lg >= 0).all() assert (lg <= key_size).all() # Check wether it works for large keys key_size = 1024 - mi = np.random.uniform(low=0, high=key_size, size=10**3) + mi = rng.uniform(low=0, high=key_size, size=10**3) lg = log_guessing_entropy(mi, key_size) assert (lg >= 0).all() assert (lg <= key_size).all() def test_median(): + + rng = np.random.default_rng(seed=42) + # Check wether it works for typical key lenght key_size = 128 - mi = np.random.uniform(low=0, high=key_size, size=10**3) + mi = rng.uniform(low=0, high=key_size, size=10**3) med = median(mi, key_size) assert (med >= 0).all() assert (med <= key_size).all() # Check wether it works for large keys key_size = 1024 - mi = np.random.uniform(low=0, high=key_size, size=10**3) + mi = rng.uniform(low=0, high=key_size, size=10**3) med = median(mi, key_size) assert (med >= 0).all() assert (med <= key_size).all()