-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
33 lines (24 loc) · 1.09 KB
/
utils.py
File metadata and controls
33 lines (24 loc) · 1.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import math
from collections import Counter
def shannon_entropy(data: str) -> float:
"""
The following function calculates the Shannon entropy of a given string.
Entropy is a measure of randomness or unpredictability in data. The higher the entropy, the more likley the randomness -
a key characteristic of secrets such as API keys, tokens, database URLs, etc.,.
Args:
data (str): the input string for calculating entropy.
Returns:
float: the Shannon entropy value (higher value -> more random).
sources consulted:
- https://www.sciencedirect.com/topics/engineering/shannon-entropy
- https://www.reddit.com/r/learnpython/comments/g1sdkh/python_programming_challenge_calculating_shannon/
"""
#returns 0 if the input is empty
if not data:
return 0
#counts frequency of each character in the given string
counter = Counter(data)
length = len(data)
#application of the shannon entropy formula
entropy = -sum(count / length * math.log2(count / length) for count in counter.values())
return entropy