From 682023a25a1ffc5e13491a09f541cf85a3f1b36b Mon Sep 17 00:00:00 2001
From: Abhin Shah <abhinshah02@gmail.com>
Date: Tue, 22 Feb 2022 16:34:33 -0500
Subject: [PATCH 1/4] Adding code for frequency estimation and updating code
 for mean estimation

---
 rcc_dp/frequency_estimation/config.py         |  58 ++++
 rcc_dp/frequency_estimation/experiment.py     | 302 ++++++++++++++++++
 .../experiment_coding_cost.py                 | 198 ++++++++++++
 .../frequency_estimation/experiment_test.py   |  41 +++
 rcc_dp/frequency_estimation/miracle.py        | 177 ++++++++++
 rcc_dp/frequency_estimation/modify_pi.py      | 151 +++++++++
 rcc_dp/frequency_estimation/modify_pi_test.py |  82 +++++
 rcc_dp/frequency_estimation/rhr.py            | 141 ++++++++
 rcc_dp/frequency_estimation/ss.py             |  82 +++++
 rcc_dp/frequency_estimation/unbias.py         | 112 +++++++
 rcc_dp/{ => mean_estimation}/BUILD            |   0
 rcc_dp/{ => mean_estimation}/config.py        |  11 +-
 rcc_dp/{ => mean_estimation}/experiment.py    |  67 ++--
 .../mean_estimation/experiment_coding_cost.py | 183 +++++++++++
 .../{ => mean_estimation}/experiment_test.py  |  18 +-
 .../{ => mean_estimation}/get_parameters.py   |   4 +-
 .../get_parameters_test.py                    |   6 +-
 rcc_dp/{ => mean_estimation}/miracle.py       |   0
 rcc_dp/{ => mean_estimation}/miracle_test.py  |   4 +-
 rcc_dp/{ => mean_estimation}/modify_pi.py     |   0
 .../{ => mean_estimation}/modify_pi_test.py   |   6 +-
 .../{ => mean_estimation}/optimize_unbias.py  |   2 +-
 rcc_dp/{ => mean_estimation}/privunit.py      |   0
 rcc_dp/{ => mean_estimation}/privunit_test.py |   2 +-
 rcc_dp/{ => mean_estimation}/sqkr.py          |   0
 rcc_dp/{ => mean_estimation}/sqkr_test.py     |   2 +-
 26 files changed, 1600 insertions(+), 49 deletions(-)
 create mode 100644 rcc_dp/frequency_estimation/config.py
 create mode 100644 rcc_dp/frequency_estimation/experiment.py
 create mode 100644 rcc_dp/frequency_estimation/experiment_coding_cost.py
 create mode 100644 rcc_dp/frequency_estimation/experiment_test.py
 create mode 100644 rcc_dp/frequency_estimation/miracle.py
 create mode 100644 rcc_dp/frequency_estimation/modify_pi.py
 create mode 100644 rcc_dp/frequency_estimation/modify_pi_test.py
 create mode 100644 rcc_dp/frequency_estimation/rhr.py
 create mode 100644 rcc_dp/frequency_estimation/ss.py
 create mode 100644 rcc_dp/frequency_estimation/unbias.py
 rename rcc_dp/{ => mean_estimation}/BUILD (100%)
 rename rcc_dp/{ => mean_estimation}/config.py (88%)
 rename rcc_dp/{ => mean_estimation}/experiment.py (86%)
 create mode 100644 rcc_dp/mean_estimation/experiment_coding_cost.py
 rename rcc_dp/{ => mean_estimation}/experiment_test.py (67%)
 rename rcc_dp/{ => mean_estimation}/get_parameters.py (97%)
 rename rcc_dp/{ => mean_estimation}/get_parameters_test.py (96%)
 rename rcc_dp/{ => mean_estimation}/miracle.py (100%)
 rename rcc_dp/{ => mean_estimation}/miracle_test.py (94%)
 rename rcc_dp/{ => mean_estimation}/modify_pi.py (100%)
 rename rcc_dp/{ => mean_estimation}/modify_pi_test.py (96%)
 rename rcc_dp/{ => mean_estimation}/optimize_unbias.py (99%)
 rename rcc_dp/{ => mean_estimation}/privunit.py (100%)
 rename rcc_dp/{ => mean_estimation}/privunit_test.py (98%)
 rename rcc_dp/{ => mean_estimation}/sqkr.py (100%)
 rename rcc_dp/{ => mean_estimation}/sqkr_test.py (98%)

diff --git a/rcc_dp/frequency_estimation/config.py b/rcc_dp/frequency_estimation/config.py
new file mode 100644
index 00000000..e6b89dd7
--- /dev/null
+++ b/rcc_dp/frequency_estimation/config.py
@@ -0,0 +1,58 @@
+# Copyright 2021, Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Base configuration."""
+
+from ml_collections.config_dict import config_dict
+
+
+def get_config():
+  """Returns config dictionary for model."""
+  config = dict(
+      name="defaults",
+      # Either use geometric, zipf, or uniform i.e., data variable
+      # can take one of "geometric", "zipf", "uniform".
+      distribution="zipf",
+      lbd_geometric=0.8,
+      degree_zipf=1.0,
+      # Flags to indicate which methods to compare.
+      run_approx_miracle=False,
+      run_miracle=False,
+      run_modified_miracle=True,
+      run_ss=True,
+      run_rhr=True,
+      encoding_type="fast", # Can take either fast or normal
+      # Common parameters.
+      num_itr=1,
+      coding_cost=14,
+      coding_cost_multiplier=1,
+      approx_coding_cost_multiplier=3,
+      approx_t=6,
+      # Specific parameters (leave them as they are for now).
+      delta=10**(-6),
+      alpha=1.0,
+      # Variation.
+      vary="eps",  # Can take one of "cc", "k", "n", "eps".
+      cc_space=[6, 8, 10, 12],
+      k_space=[200, 400, 600, 800, 1000],
+      n_space=[2000, 4000, 6000, 8000, 10000],
+      eps_space=list(range(1, 9)),
+      # Defaults.
+      n=5000,
+      k=500,
+      t=3,
+      epsilon_target=6,
+  )
+  config = config_dict.ConfigDict(config)
+  config.lock()  # Prevent addition of new fields.
+  return config
\ No newline at end of file
diff --git a/rcc_dp/frequency_estimation/experiment.py b/rcc_dp/frequency_estimation/experiment.py
new file mode 100644
index 00000000..3ab43d91
--- /dev/null
+++ b/rcc_dp/frequency_estimation/experiment.py
@@ -0,0 +1,302 @@
+# Copyright 2021, Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Experiment definitions (i.e., evaluation of miracle, rhr, subset selection 
+methods when either the data dimension k, the number of users n, or the 
+privacy parameter epsilon is varied)."""
+
+import json
+import math
+import time
+import matplotlib
+matplotlib.rcParams['ps.useafm'] = True
+matplotlib.rcParams['pdf.use14corefonts'] = True
+matplotlib.rcParams['text.usetex'] = True
+import matplotlib.pyplot as plt
+import matplotlib.lines as mlines
+import numpy as np
+from rcc_dp.frequency_estimation import miracle
+from rcc_dp.frequency_estimation import modify_pi
+from rcc_dp.frequency_estimation import rhr
+from rcc_dp.frequency_estimation import ss
+from rcc_dp.frequency_estimation import unbias
+
+
+def generate_geometric_distribution(k,lbd):
+    elements = range(0,k)
+    prob = [(1-lbd)*math.pow(lbd,x)/(1-math.pow(lbd,k)) for x in elements]
+    return prob
+
+
+def generate_uniform_distribution(k):
+    raw_distribution = [1] * k
+    sum_raw = sum(raw_distribution)
+    prob = [float(y)/float(sum_raw) for y in raw_distribution]
+    return prob
+
+
+def generate_zipf_distribution(k,degree):
+    raw_distribution = [1/(float(i)**(degree)) for i in range(1,k+1)]
+    sum_raw = sum(raw_distribution)
+    prob = [float(y)/float(sum_raw) for y in raw_distribution]
+    return prob
+
+
+def evaluate(work_path, config, file_open=open):
+  """Evaluates miracle, rhr, ss methods."""
+  with file_open(work_path + "/config.json", "w") as f:
+    json.dump(config.to_dict(), f)
+
+  start_time = time.time()
+
+  delta = config.delta
+  alpha = config.alpha
+  # Get default values.
+  k = config.k
+  n = config.n
+  epsilon_target = config.epsilon_target
+
+  if config.vary == "k":
+    vary_space = config.k_space
+    print("k space = " + str(vary_space))
+  elif config.vary == "n":
+    vary_space = config.n_space
+    print("n space = " + str(vary_space))
+  elif config.vary == "eps":
+    vary_space = config.eps_space
+    print("eps space = " + str(vary_space))
+  else:
+    raise ValueError("vary should be either be k, n or eps.")
+
+  approx_miracle_error = np.zeros((config.num_itr, len(vary_space)))
+  miracle_error = np.zeros((config.num_itr, len(vary_space)))
+  modified_miracle_error = np.zeros((config.num_itr, len(vary_space)))
+  rhr_error = np.zeros((config.num_itr, len(vary_space)))
+  ss_error = np.zeros((config.num_itr, len(vary_space)))
+
+  for step, vary_parameter in enumerate(vary_space):
+    if config.vary == "k":
+      k = vary_parameter
+      coding_cost = config.coding_cost
+    elif config.vary == "n":
+      n = vary_parameter
+      coding_cost = config.coding_cost
+    elif config.vary == "eps":
+      epsilon_target = vary_parameter
+      coding_cost = config.coding_cost_multiplier * epsilon_target / np.log(2)
+      coding_cost += config.t
+      coding_cost = max(int(np.ceil(coding_cost)), 8)
+    print("epsilon target = " + str(epsilon_target))
+    print("n = " + str(n))
+    print("k = %d" % k)
+    print("coding cost = %d" % coding_cost)
+
+    if config.run_modified_miracle:
+      eta = epsilon_target / 2.0
+      print("eta = " + str(eta))
+      print("alpha = " + str(alpha))
+
+    for itr in range(config.num_itr):
+      print("itr = %d" % itr)
+      if config.distribution == "geometric":
+        lbd = config.lbd_geometric
+        prob = generate_geometric_distribution(k,lbd)
+      elif config.distribution == "zipf":
+        degree = config.degree_zipf
+        prob = generate_zipf_distribution(k,degree)
+      elif config.distribution == "uniform":
+        prob = generate_uniform_distribution(k)
+      else:
+        raise ValueError(
+            "distribution should be either be geometric, zipf, uniform.")
+      x = np.random.choice(k, n, p=prob)
+
+      if config.run_miracle:
+        x_miracle = np.zeros((k,n))
+        for i in range(n):
+          if config.encoding_type == "fast":
+            x_miracle[:,i] = miracle.encode_decode_miracle_fast(i+itr*n, x[i], 
+              k, epsilon_target/2, 2**coding_cost)
+          else:
+            _, _, index = miracle.encoder(i+itr*n, x[i], k, epsilon_target/2, 
+              2**coding_cost)
+            x_miracle[:,i] = miracle.decoder(i+itr*n, index, k, epsilon_target/2, 
+              2**coding_cost)
+        prob_miracle = unbias.unbias_miracle(k, epsilon_target/2, 2**coding_cost, 
+          x_miracle.T, n, normalization = 1)
+        miracle_error[itr, step] = np.linalg.norm([p_i - phat_i for p_i, phat_i 
+          in zip(prob, prob_miracle)], ord=1)
+
+      if config.run_modified_miracle:
+        x_modified_miracle = np.zeros((k, n))
+        for i in range(n):
+          if config.encoding_type == "fast":
+            x_modified_miracle[:,i] = miracle.encode_decode_modified_miracle_fast(
+              i+itr*n, x[i], k, alpha*epsilon_target, 2**coding_cost)
+          else:
+            _, pi, _ = miracle.encoder(i+itr*n, x[i], k, alpha*epsilon_target, 
+              2**coding_cost)
+            expected_beta = np.ceil(k/(np.exp(epsilon_target)+1))/k
+            pi_all = modify_pi.modify_pi(pi, eta, epsilon_target, 
+              (np.exp(epsilon_target/2))/(1+expected_beta*(np.exp(epsilon_target)-1)))
+            index = np.random.choice(2**coding_cost, 1, p=pi_all[-1])[0]
+            x_modified_miracle[:,i] = miracle.decoder(i+itr*n, index, k, 
+              alpha*epsilon_target, 2**coding_cost)
+        prob_modified_miracle = unbias.unbias_modified_miracle(k, alpha*epsilon_target, 
+          2**coding_cost, x_modified_miracle.T, n, normalization = 1)
+        modified_miracle_error[itr, step] = np.linalg.norm([p_i - phat_i for p_i, phat_i 
+          in zip(prob, prob_modified_miracle)], ord=1)
+
+      if config.run_approx_miracle:
+        x_approx_miracle = np.zeros((k,n))
+        approx_coding_cost = int(np.ceil(
+          config.approx_coding_cost_multiplier*epsilon_target/np.log(2) + config.approx_t))
+        epsilon_approx = miracle.get_approx_epsilon(epsilon_target, k, 
+          2**approx_coding_cost, delta)
+        for i in range(n):
+          if config.encoding_type == "fast":
+            x_approx_miracle[:,i] = miracle.encode_decode_miracle_fast(i+itr*n, x[i], 
+              k, epsilon_approx, 2**coding_cost)
+          else:
+            _, _, index = miracle.encoder(i+itr*n, x[i], k, epsilon_approx, 2**coding_cost)
+            x_approx_miracle[:,i] = miracle.decoder(i+itr*n, index, k, epsilon_approx, 
+              2**coding_cost)
+        prob_approx_miracle = unbias.unbias_miracle(k, epsilon_approx, 2**coding_cost, 
+          x_approx_miracle.T, n, normalization = 1)
+        approx_miracle_error[itr, step] = np.linalg.norm([p_i - phat_i for p_i, phat_i 
+          in zip(prob, prob_approx_miracle)], ord=1)
+
+      if config.run_ss:
+        x_ss = ss.encode_string_fast(k, epsilon_target, x)
+        prob_ss = ss.decode_string(k, epsilon_target, x_ss, n, normalization = 1)
+        ss_error[itr, step] = np.linalg.norm([p_i - phat_i for p_i, phat_i 
+          in zip(prob, prob_ss)], ord=1)
+
+      if config.run_rhr:
+        x_rhr = rhr.encode_string(k, epsilon_target, coding_cost, x)
+        prob_rhr = rhr.decode_string_fast(k, epsilon_target, coding_cost, x_rhr, 
+        normalization = 1) # estimate the original underlying distribution
+        rhr_error[itr, step] = np.linalg.norm([p_i - phat_i for p_i, phat_i 
+          in zip(prob, prob_rhr)], ord=1)
+
+    if config.run_approx_miracle:
+      print("approx miracle error: " +
+            str(approx_miracle_error[:, step]))
+    if config.run_miracle:
+      print("miracle error: " + str(miracle_error[:, step]))
+    if config.run_modified_miracle:
+      print("modified miracle error: " +
+            str(modified_miracle_error[:, step]))
+    if config.run_ss:
+      print("ss error: " + str(ss_error[:, step]))
+    if config.run_rhr:
+      print("rhr error: " + str(rhr_error[:, step]))
+    print(time.time() - start_time)
+
+  print("--------------")
+  if config.run_approx_miracle:
+    print("approx miracle error:")
+    print(np.mean(approx_miracle_error, axis=0))
+  if config.run_miracle:
+    print("miracle error:")
+    print(np.mean(miracle_error, axis=0))
+  if config.run_modified_miracle:
+    print("modified miracle error:")
+    print(np.mean(modified_miracle_error, axis=0))
+  if config.run_ss:
+    print("ss error:")
+    print(np.mean(ss_error, axis=0))
+  if config.run_rhr:
+    print("rhr error:")
+    print(np.mean(rhr_error, axis=0))
+
+  plt.figure(figsize=((8, 5)), dpi=80)
+  plt.axes((.15, .2, .83, .75))
+  if config.run_approx_miracle:
+    plt.errorbar(
+        vary_space,
+        np.mean(approx_miracle_error, axis=0),
+        yerr=np.std(approx_miracle_error, axis=0)/np.sqrt(num_itr),
+        linewidth = 3.0,
+        label="MRC")
+  if config.run_miracle:
+    plt.errorbar(
+        vary_space,
+        np.mean(miracle_error, axis=0),
+        yerr=np.std(miracle_error, axis=0)/np.sqrt(num_itr),
+        linewidth = 3.0,
+        label="MRC")
+  if config.run_modified_miracle:
+    plt.errorbar(
+        vary_space,
+        np.mean(modified_miracle_error, axis=0),
+        yerr=np.std(modified_miracle_error, axis=0)/np.sqrt(num_itr),
+        linewidth = 3.0,
+        label="MMRC")
+  if config.run_ss:
+    plt.errorbar(
+        vary_space,
+        np.mean(ss_error, axis=0),
+        yerr=np.std(ss_error, axis=0)/np.sqrt(num_itr),
+        ls='--',
+        linewidth = 3.0,
+        label="Subset Selection")
+  if config.run_rhr:
+    plt.errorbar(
+        vary_space,
+        np.mean(rhr_error, axis=0),
+        yerr=np.std(rhr_error, axis=0)/np.sqrt(num_itr),
+        ls='--',
+        linewidth = 3.0,
+        label="RHR")
+  plt.xticks(fontsize=28)
+  plt.yticks(fontsize=28)
+  plt.ylabel("$\ell_{2}$ error", fontsize=28)
+  if config.vary == "k":
+    plt.xlabel(r"$d$", fontsize=28)
+    plt.xticks([200,400,600,800,1000])
+    plt.yticks([0.2,0.4,0.6,0.8])
+    plt.legend(fontsize=24, loc='upper left')
+  elif config.vary == "n":
+    plt.xlabel(r"$n$", fontsize=28)
+    plt.ticklabel_format(style='sci', axis='x', scilimits=(3,3))
+    plt.gca().xaxis.get_offset_text().set_fontsize(16)
+    plt.yticks([0.3,0.4,0.5,0.6,0.7])
+    plt.legend(fontsize=24, loc='upper right')
+  elif config.vary == "eps":
+    plt.xlabel(r"$\varepsilon$", fontsize=28)
+    plt.yticks([0.3,0.6,0.9,1.2,1.5])
+    plt.legend(fontsize=24, loc='upper right')
+
+  with file_open(work_path + "/rcc_dp_ss_comparison.png", "wb") as f:
+    plt.savefig(f, format="png")
+
+  if config.run_approx_miracle:
+    with file_open(work_path + "/approx_miracle_error.csv", "w") as f:
+      np.savetxt(f, approx_miracle_error, delimiter=",")
+
+  if config.run_miracle:
+    with file_open(work_path + "/miracle_error.csv", "w") as f:
+      np.savetxt(f, miracle_error, delimiter=",")
+
+  if config.run_modified_miracle:
+    with file_open(work_path + "/modified_miracle_error.csv", "w") as f:
+      np.savetxt(f, modified_miracle_error, delimiter=",")
+
+  if config.run_ss:
+    with file_open(work_path + "/ss_error.csv", "w") as f:
+      np.savetxt(f, ss_error, delimiter=",")
+
+  if config.run_rhr:
+    with file_open(work_path + "/rhr_error.csv", "w") as f:
+      np.savetxt(f, rhr_error, delimiter=",")
\ No newline at end of file
diff --git a/rcc_dp/frequency_estimation/experiment_coding_cost.py b/rcc_dp/frequency_estimation/experiment_coding_cost.py
new file mode 100644
index 00000000..41804c56
--- /dev/null
+++ b/rcc_dp/frequency_estimation/experiment_coding_cost.py
@@ -0,0 +1,198 @@
+# Copyright 2021, Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Experiment definitions (i.e., evaluation of miracle, rhr, subset selection 
+methods when the coding cost is varied)."""
+
+import json
+import math
+import time
+import matplotlib
+matplotlib.rcParams['ps.useafm'] = True
+matplotlib.rcParams['pdf.use14corefonts'] = True
+matplotlib.rcParams['text.usetex'] = True
+import matplotlib.pyplot as plt
+import matplotlib.lines as mlines
+import numpy as np
+from rcc.frequency_estimation import miracle
+from rcc.frequency_estimation import modify_pi
+from rcc.frequency_estimation import rhr
+from rcc.frequency_estimation import ss
+from rcc.frequency_estimation import unbias
+
+
+def generate_geometric_distribution(k,lbd):
+    elements = range(0,k)
+    prob = [(1-lbd)*math.pow(lbd,x)/(1-math.pow(lbd,k)) for x in elements]
+    return prob
+
+
+def generate_uniform_distribution(k):
+    raw_distribution = [1] * k
+    sum_raw = sum(raw_distribution)
+    prob = [float(y)/float(sum_raw) for y in raw_distribution]
+    return prob
+
+
+def generate_zipf_distribution(k,degree):
+    raw_distribution = [1/(float(i)**(degree)) for i in range(1,k+1)]
+    sum_raw = sum(raw_distribution)
+    prob = [float(y)/float(sum_raw) for y in raw_distribution]
+    return prob
+
+
+def evaluate(work_path, config, file_open=open):
+  """Evaluates miracle, rhr, ss methods."""
+  with file_open(work_path + "/config.json", "w") as f:
+    json.dump(config.to_dict(), f)
+
+  start_time = time.time()
+
+  delta = config.delta
+  alpha = config.alpha
+  # Get default values.
+  k = config.k
+  n = config.n
+  epsilon_target = config.epsilon_target
+
+  vary_space = config.cc
+
+  print("coding space = " + str(vary_space))
+
+  modified_miracle_error = np.zeros((config.num_itr, len(vary_space)))
+  rhr_error = np.zeros((config.num_itr, 1))
+  ss_error = np.zeros((config.num_itr, 1))
+
+  rhr_coding_cost = epsilon_target
+
+  for itr in range(num_itr):
+    print("itr = %d" % itr)
+    print("epsilon target = " + str(epsilon_target))
+    print("n = " + str(n))
+    print("k = %d" % k)
+
+    if config.run_modified_miracle:
+      eta = epsilon_target / 2.0
+      print("eta = " + str(eta))
+      print("alpha = " + str(alpha))
+
+    if config.distribution == "geometric":
+      lbd = config.lbd_geometric
+      prob = generate_geometric_distribution(k,lbd)
+    elif config.distribution == "zipf":
+      degree = config.degree_zipf
+      prob = generate_zipf_distribution(k,degree)
+    elif config.distribution == "uniform":
+      prob = generate_uniform_distribution(k)
+    else:
+      raise ValueError(
+          "distribution should be either be geometric, zipf, uniform.")
+    x = np.random.choice(k, n, p=prob)
+
+    
+    if config.run_ss:
+      x_ss = ss.encode_string_fast(k, epsilon_target, x)
+      prob_ss = ss.decode_string(k, epsilon_target, x_ss, n, normalization = 1)
+      ss_error[itr, 0] = np.linalg.norm([p_i - phat_i for p_i, phat_i 
+        in zip(prob, prob_ss)], ord=1)
+
+    if config.run_rhr:
+      x_rhr = rhr.encode_string(k, epsilon_target, coding_cost, x)
+      prob_rhr = rhr.decode_string_fast(k, epsilon_target, coding_cost, x_rhr, 
+      normalization = 1) # estimate the original underlying distribution
+      rhr_error[itr, 0] = np.linalg.norm([p_i - phat_i for p_i, phat_i 
+        in zip(prob, prob_rhr)], ord=1)
+
+    for step, vary_parameter in enumerate(vary_space):
+      coding_cost = vary_parameter
+      print("coding cost = %d" % coding_cost)
+
+      if config.run_modified_miracle:
+        x_modified_miracle = np.zeros((k, n))
+        for i in range(n):
+          if config.encoding_type == "fast":
+            x_modified_miracle[:,i] = miracle.encode_decode_modified_miracle_fast(
+              i+itr*n, x[i], k, alpha*epsilon_target, 2**coding_cost)
+          else:
+            _, pi, _ = miracle.encoder(i+itr*n, x[i], k, alpha*epsilon_target, 
+              2**coding_cost)
+            expected_beta = np.ceil(k/(np.exp(epsilon_target)+1))/k
+            pi_all = modify_pi.modify_pi(pi, eta, epsilon_target, 
+              (np.exp(epsilon_target/2))/(1+expected_beta*(np.exp(epsilon_target)-1)))
+            index = np.random.choice(2**coding_cost, 1, p=pi_all[-1])[0]
+            x_modified_miracle[:,i] = miracle.decoder(i+itr*n, index, k, 
+              alpha*epsilon_target, 2**coding_cost)
+        prob_modified_miracle = unbias.unbias_modified_miracle(k, alpha*epsilon_target, 
+          2**coding_cost, x_modified_miracle.T, n, normalization = 1)
+        modified_miracle_error[itr, step] = np.linalg.norm([p_i - phat_i for p_i, phat_i 
+          in zip(prob, prob_modified_miracle)], ord=1)
+
+    print(time.time() - start_time)
+
+  print("--------------")
+  if config.run_modified_miracle:
+    print("modified miracle error:")
+    print(np.mean(modified_miracle_error, axis=0))
+  if config.run_ss:
+    print("ss error:")
+    print(np.mean(ss_error, axis=0))
+  if config.run_rhr:
+    print("rhr error:")
+    print(np.mean(rhr_error, axis=0))
+
+  plt.figure(figsize=(10, 8), dpi=80)
+  if config.run_modified_miracle:
+    plt.errorbar(
+      vary_space,
+      np.mean(modified_miracle_error, axis=0),
+      yerr=np.std(modified_miracle_error, axis=0),
+      linewidth = 3.0,
+      label="MMRC")
+  if config.run_ss:
+    line1 = plt.errorbar(vary_space, 
+      [np.mean(ss_error, axis=0)[0]]*len(vary_space), 
+      yerr = [np.std(ss_error, axis=0)[0]/np.sqrt(num_itr)]*len(vary_space), 
+      ls='--',
+      linewidth = 3.0,
+      label="Subset Selection")
+  if config.run_rhr:
+    line2 = plt.errorbar(vary_space, 
+      [np.mean(rhr_error, axis=0)[0]]*len(vary_space), 
+      yerr = [np.std(rhr_error, axis=0)[0]/np.sqrt(num_itr)]*len(vary_space), 
+      ls='--',
+      linewidth = 3.0,
+      label="RHR")
+  plt.xticks(fontsize=28)
+  plt.yticks(fontsize=28)
+  plt.ylabel("$\ell_{2}$ error", fontsize=28)
+  plt.xlabel(r"$\#$ bits", fontsize=28)
+  plt.yticks([0.4,0.5,0.6,0.7,0.8,0.9])
+  plt.legend(fontsize=24, loc='upper right')
+
+  with file_open(work_path + "/rcc_dp_ss_mse_vs_coding_cost.png", "wb") as f:
+    plt.savefig(f, format="png")
+
+  with file_open(work_path + "/time.txt", "w") as f:
+    np.savetxt(f, np.array(time.time() - start_time).reshape(-1, 1))
+
+  if config.run_modified_miracle:
+    with file_open(work_path + "/modified_miracle_error.csv", "w") as f:
+      np.savetxt(f, modified_miracle_error, delimiter=",")
+
+  if config.run_ss:
+    with file_open(work_path + "/ss_error.csv", "w") as f:
+      np.savetxt(f, ss_error, delimiter=",")
+
+  if config.run_rhr:
+    with file_open(work_path + "/rhr_error.csv", "w") as f:
+      np.savetxt(f, rhr_error, delimiter=",")
\ No newline at end of file
diff --git a/rcc_dp/frequency_estimation/experiment_test.py b/rcc_dp/frequency_estimation/experiment_test.py
new file mode 100644
index 00000000..af731c32
--- /dev/null
+++ b/rcc_dp/frequency_estimation/experiment_test.py
@@ -0,0 +1,41 @@
+# Copyright 2021, Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for experiment definitions."""
+
+from absl.testing import absltest
+from rcc_dp.frequency_estimation import config as defaults
+from rcc_dp.frequency_estimation import experiment
+from rcc_dp.frequency_estimation import experiment_coding_cost
+
+
+class ExperimentTest(absltest.TestCase):
+
+  def test_evaluate_does_not_fail(self):
+    work_path = self.create_tempdir().full_path
+    config = defaults.get_config()
+    config.num_itr = 1
+    config.k = 100
+    config.n = 200
+    if config.vary == "cc":
+        config.epsilon_target = 1
+        config.cc_space = [4]
+        experiment_coding_cost.evaluate(work_path, config)
+    else:
+        config.eps_space = [1]
+        config.t = 2
+        experiment.evaluate(work_path, config)
+
+
+if __name__ == "__main__":
+  absltest.main()
\ No newline at end of file
diff --git a/rcc_dp/frequency_estimation/miracle.py b/rcc_dp/frequency_estimation/miracle.py
new file mode 100644
index 00000000..5f748b54
--- /dev/null
+++ b/rcc_dp/frequency_estimation/miracle.py
@@ -0,0 +1,177 @@
+# Copyright 2021, Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Minimal random coding (likelihood decoder) definitions.
+
+This is the code to be used to simulate the minimum random coding algorithm 
+(tailored to subset selection). The algorithm was introduced by Havasi et al. 
+in "Minimal Random Code Learning: Getting Bits Back from Compressed Model 
+Parameters" - https://arxiv.org/pdf/1810.00440.pdf.
+
+For brevity, we may refer to it as 'MIRACLE', although technically this refers
+to the complete model compression pipeline of Havasi et al.
+"""
+
+import numpy as np
+import scipy.special as sc
+
+def encoder(seed, x, k, epsilon, number_candidates):
+  """This is the encoder used by the miracle algorithm.
+
+  Args:
+    seed: The random seed to be used by the encoder.
+    x: The user input data.
+    k: The dimension of the one-hot encoded x.
+    epsilon: The privacy parameter epsilon.
+    number_candidates: The number of candidates to be sampled.
+
+  Returns:
+    z: The set of candidates sampled at the encoder.
+    index: The index sampled by the encoder.
+    pi: The distribution over the candidates for the given input data x.
+  """
+  d = int(np.ceil(k/(np.exp(epsilon)+1)))
+
+  c1 = np.exp(epsilon)/(sc.comb(k-1, d-1)* np.exp(epsilon) + sc.comb(k-1, d))
+  c2 = 1/(sc.comb(k-1, d-1)* np.exp(epsilon) + sc.comb(k-1, d))
+
+  # Proposal distribution is chosen to be uniform on all d-hot encoded vectors.
+  z = np.zeros((k, number_candidates))
+  z[:d, :] = 1
+  rs = np.random.RandomState(seed)
+  for i in range(number_candidates):
+    rs.shuffle(z[:, i])
+
+  pi = np.where(z[x, :] == 1, c1, c2)
+  pi /= np.sum(pi)
+  index = np.random.choice(number_candidates, 1, p=pi)[0]
+  return z, pi, index
+
+
+def decoder(seed, index, k, epsilon, number_candidates):
+  """This is the decoder used by the miracle algorithm.
+
+  Args:
+    seed: The random seed to be used by the decoder (this seed should be the
+      same as the one used by the encoder).
+    index: The index transmitted by the encoder.
+    k: The dimension of the data.
+    epsilon: The privacy parameter epsilon.
+    number_candidates: The number of candidates to be sampled.
+
+  Returns:
+    z_k: The candidate corresponding to the index (This is the candidate that
+    is distributed according to the conditional distribution of privunit).
+  """
+  d = int(np.ceil(k/(np.exp(epsilon)+1)))
+
+  # Proposal distribution should be the same as the one used by the encoder.
+  z = np.zeros((k, number_candidates))
+  z[:d, :] = 1
+  rs = np.random.RandomState(seed)
+  for i in range(number_candidates):
+    rs.shuffle(z[:, i])
+  z_k = z[:, index]
+  return z_k
+
+
+def get_approx_epsilon(epsilon_target, k, number_candidates, delta):
+  """Get the effective epsilon for miracle with approxmiate-DP guarantees.
+
+  Args:
+    epsilon_target: The privacy guarantee we desire.
+    k: The number of dimensions.
+    number_candidates: The number of candidates.
+    delta: The delta in the differential privacy guarantee.
+
+  Returns:
+    epsilon_approx: The resulting epsilon that this version of miracle ends
+    up using.
+  """
+  epsilon_search_space = np.linspace(0,epsilon_target,100)
+  epsilon_search_space = epsilon_search_space[:-1]
+  epsilon_approx = 0
+  # Find the largest epsilon for SS so that MIRACLE meets epsilon_target
+  for _, epsilon in enumerate(epsilon_search_space):
+    d = int(np.ceil(k/(np.exp(epsilon)+1)))
+    cupper = (k*np.exp(epsilon))/(k-d+d*np.exp(epsilon))
+    clower = k/(k-d+d*np.exp(epsilon))
+    t = np.abs(cupper-clower)*np.sqrt((np.log(2/delta))/(2*number_candidates))
+    if -1 < t < 1:
+      if epsilon + np.log(1+t) - np.log(1-t) <= epsilon_target:
+        epsilon_approx = epsilon
+  return epsilon_approx
+
+
+def encode_decode_miracle_fast(seed, x, k, epsilon, number_candidates):
+  """A fast implementation of the miracle protocol -- instead of 
+  generating number_candidates samples, generate one sample with 
+  true expectation.
+  """
+  d = int(np.ceil(k/(np.exp(epsilon)+1)))
+  num_cand_in_cap = np.random.binomial(number_candidates, d/k, size=None)
+  pi_in = np.exp(epsilon)/(num_cand_in_cap*np.exp(epsilon) 
+    + (number_candidates-num_cand_in_cap))
+  prob_sample_from_cap = num_cand_in_cap*pi_in
+
+  if np.random.uniform(0,1) <= prob_sample_from_cap:
+    # Generate a sample uniformly from the cap
+    z = [1]*(d-1)+[0]*(k-d)
+    rs = np.random.RandomState(seed)
+    rs.shuffle(z)
+    z = z[:x] + [1] + z[x:]
+  else:
+    # Generate a sample uniformly from outside the cap
+    z = [1]*(d)+[0]*(k-d-1)
+    rs = np.random.RandomState(seed)
+    rs.shuffle(z)
+    z = z[:x] + [0] + z[x:]
+  return np.array(z)
+
+
+def encode_decode_modified_miracle_fast(seed, x, k, epsilon, number_candidates):
+  """A fast implementation of the modified miracle protocol -- instead of 
+  generating number_candidates samples, generate one sample with 
+  true expectation.
+  """
+  d = int(np.ceil(k/(np.exp(epsilon)+1)))
+  c1 = np.exp(epsilon)/(sc.comb(k-1,d-1)* np.exp(epsilon) + sc.comb(k-1,d))
+  c2 = 1/(sc.comb(k-1,d-1)* np.exp(epsilon) + sc.comb(k-1,d))
+
+  num_cand_in_cap = np.random.binomial(number_candidates, d/k, size=None)
+
+  beta = num_cand_in_cap/number_candidates
+  expected_beta = np.ceil(k/(np.exp(epsilon)+1))/k
+
+  pi_in = c1/(num_cand_in_cap*c1+(number_candidates-num_cand_in_cap)*c2)
+  tilde_pi_in = pi_in*(1+beta*(np.exp(epsilon)-1))/((1 
+    + expected_beta*(np.exp(epsilon)-1)))
+  if beta > expected_beta:
+    tilde_pi_in = tilde_pi_in*(beta+expected_beta*(np.exp(epsilon) 
+      - 1))/(beta*np.exp(epsilon))
+
+  prob_sample_from_cap = num_cand_in_cap*tilde_pi_in
+
+  if np.random.uniform(0,1) <= prob_sample_from_cap:
+    # Generate a sample uniformly from the cap
+    z = [1]*(d-1)+[0]*(k-d)
+    rs = np.random.RandomState(seed)
+    rs.shuffle(z)
+    z = z[:x] + [1] + z[x:]
+  else:
+    # Generate a sample uniformly from outside the cap
+    z = [1]*(d)+[0]*(k-d-1)
+    rs = np.random.RandomState(seed)
+    rs.shuffle(z)
+    z = z[:x] + [0] + z[x:]
+  return np.array(z)
\ No newline at end of file
diff --git a/rcc_dp/frequency_estimation/modify_pi.py b/rcc_dp/frequency_estimation/modify_pi.py
new file mode 100644
index 00000000..6e55865f
--- /dev/null
+++ b/rcc_dp/frequency_estimation/modify_pi.py
@@ -0,0 +1,151 @@
+# Copyright 2021, Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Code to modify the distribution pi to tilde_pi.
+
+The distribution tilde_pi (which is equal to pi_all[-1]) is 2 * eta-DP.
+"""
+
+import numpy as np
+
+
+def modify_pi(pi, eta, epsilon, multiplicative_factor):
+  """This function modifies the distribution pi to make it 2eta-LDP.
+
+  The function essentially ensures that the new distribution lies between the
+  upper threshold `exp(eta) * multiplicative_factor * p` and the lower threshold
+  `exp(-eta) * multiplicative_factor * p`. It first checks if the distribution
+  already lies inside the thresholds. If not, it trades as mass beyond the upper
+  threshold with mass beyond the lower threshold. In other words, it ensures
+  that at least one of the constraint is satisfied (the one which is violated
+  less severely). This is done with the help of the helper function
+  `trade_mass`. Next, it iteratively enforces the constraint that is still
+  violated and renormalizes the distribution. This is done with the help of the
+  helper function `normalize`.
+
+  Args:
+    pi: The input distribution to be modified
+    eta: The privacy parameter that is half times the desired privacy guarantee.
+    epsilon: The privacy parameter epsilon.
+    multiplicative_factor: The factor by which the uniform distribution over the
+      candidates is scaled with.
+
+  Returns:
+    pi_all: The container containing how the distrbution pi evolved from pi to
+      tilde_pi (which is equal to pi_all[-1]). Further, tilde_pi is 2eta-LDP.
+  """
+  if eta < epsilon / 2:
+    raise ValueError('eta should be larger than epsilon/2.')
+
+  number_candidates = len(pi)
+  p = np.zeros(number_candidates) + 1.0 / number_candidates
+  p = p * multiplicative_factor
+  # The container used to track changes in the distribution.
+  pi_all = [pi]
+
+  # Calculate how much mass lies above the upper threshold.
+  mass_above = np.sum(np.maximum(pi - np.exp(eta) * p, 0))
+  # Calculate how much mass lies below the lower threshold.
+  mass_below = np.sum(np.maximum(np.exp(-eta) * p - pi, 0))
+
+  if mass_above == 0 and mass_below == 0:
+    return pi_all
+  elif mass_above > mass_below:
+    # Since the lower threshold is violated less severely, correct all
+    # probabilities which are too low.
+    below = pi < np.exp(-eta) * p
+    pi_new = pi.copy()
+    pi_new[below] = np.exp(-eta) * p[below]
+
+    # Sort by distance from upper threshold (biggest offenders first). The
+    # indices above the upper threshold are first in order.
+    indices = np.argsort(np.exp(eta) * p - pi_all[-1])
+
+    # Trade mass above the upper threshold against mass below the lower
+    # threshold as much as possible.
+    budget = mass_below
+    for i in indices:
+      # Correct probability above threshold as much as possible.
+      diff = pi_new[i] - np.exp(eta) * p[i]
+      if diff > budget:
+        pi_new[i] -= budget
+        break
+      elif diff > 0:
+        pi_new[i] -= diff
+        budget -= diff
+    pi_all.append(pi_new.copy())
+
+    # Now, pi_new disobeys at-most one constraint i.e., some mass is either
+    # above the upper threshold or all the mass is between the thresholds.
+
+    # Calculate the remaining mass above the upper threshold.
+    mass_above = np.sum(np.maximum(pi_new - np.exp(eta) * p, 0))
+
+    while mass_above > 0:
+      # Correct probabilities above the upper threshold.
+      above = pi_new >= np.exp(eta) * p
+      pi_new[above] = np.exp(eta) * p[above]
+
+      # Renormalize distribution.
+      not_above = ~above
+      pi_new[not_above] += mass_above * pi[not_above] / np.sum(pi[not_above])
+      pi_all.append(pi_new.copy())
+
+      # Calculate the remaining mass above the upper threshold.
+      mass_above = np.sum(np.maximum(pi_new - np.exp(eta) * p, 0))
+
+  else:
+    # Since the upper threshold is violated less severely, correct all
+    # probabilities which are too high.
+    above = pi > np.exp(eta) * p
+    pi_new = pi.copy()
+    pi_new[above] = np.exp(eta) * p[above]
+
+    # Sort by distance from lower threshold (biggest offenders first). The
+    # indices below the lower threshold are first in order.
+    indices = np.argsort(pi_all[-1] - np.exp(-eta) * p)
+
+    # Trade mass below the lower threshold against mass above the upper
+    # threshold as much as possible.
+    budget = mass_above
+    for i in indices:
+      # Correct probability below threshold as much as possible.
+      diff = np.exp(-eta) * p[i] - pi_new[i]
+      if diff > budget:
+        pi_new[i] += budget
+        break
+      elif diff > 0:
+        pi_new[i] += diff
+        budget -= diff
+    pi_all.append(pi_new.copy())
+
+    # Now, pi_new disobeys at-most one constraint i.e., some mass is either
+    # below the lower threshold or all the mass is between the thresholds.
+
+    # Calculate the remaining mass below the lower threshold.
+    mass_below = np.sum(np.maximum(np.exp(-eta) * p - pi_new, 0))
+
+    while mass_below > 0:
+      # Correct probabilities below the lower threshold.
+      below = pi_new <= np.exp(-eta) * p
+      pi_new[below] = np.exp(-eta) * p[below]
+
+      # Renormalize distribution.
+      not_below = ~below
+      pi_new[not_below] -= mass_below * pi[not_below] / np.sum(pi[not_below])
+      pi_all.append(pi_new.copy())
+
+      # Calculate the remaining mass below the lower threshold.
+      mass_below = np.sum(np.maximum(np.exp(-eta) * p - pi_new, 0))
+
+  return pi_all
\ No newline at end of file
diff --git a/rcc_dp/frequency_estimation/modify_pi_test.py b/rcc_dp/frequency_estimation/modify_pi_test.py
new file mode 100644
index 00000000..bc8d7491
--- /dev/null
+++ b/rcc_dp/frequency_estimation/modify_pi_test.py
@@ -0,0 +1,82 @@
+# Copyright 2021, Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for modify pi."""
+
+from absl.testing import absltest
+import numpy as np
+from rcc_dp.frequency_estimation import get_parameters
+from rcc_dp.frequency_estimation import miracle
+from rcc_dp.frequency_estimation import modify_pi
+
+
+class ModifyPiTest(absltest.TestCase):
+
+  def test_tilde_pi_is_a_distribution(self):
+    """Test whether every distribution generated by modify_all sums to 1."""
+    budget = 0.5
+    for d in [100, 200]:
+      for number_candidates in [2**4, 2**5]:
+        for epsilon in [2, 3]:
+          x = np.random.normal(0, 1, (d, 1))
+          x = np.divide(x, np.linalg.norm(x, axis=0).reshape(1, -1))
+          c1, c2, _, gamma = get_parameters.get_parameters_unbiased_miracle(
+              epsilon, d, number_candidates, budget)
+          _, _, pi = miracle.encoder(0, x[:, 0], number_candidates, c1, c2,
+                                     gamma)
+          eta = epsilon / 2
+          pi_all = modify_pi.modify_pi(pi, eta, epsilon,
+                                       c1 / (np.exp(epsilon / 2)))
+          self.assertLessEqual(len(pi_all), 3)
+          for distribution in pi_all:
+            self.assertLessEqual(np.abs(np.sum(distribution)-1), 0.0001)
+
+  def test_tilde_pi_is_private(self):
+    """Test whether tilde pi satisfies the DP constraint."""
+    budget = 0.5
+    for d in [100, 200]:
+      for number_candidates in [2**4, 2**5]:
+        for epsilon in [2, 3]:
+          x = np.random.normal(0, 1, (d, 1))
+          x = np.divide(x, np.linalg.norm(x, axis=0).reshape(1, -1))
+          c1, c2, _, gamma = get_parameters.get_parameters_unbiased_miracle(
+              epsilon, d, number_candidates, budget)
+          _, _, pi = miracle.encoder(0, x[:, 0], number_candidates, c1, c2,
+                                     gamma)
+          eta = epsilon / 2
+          pi_all = modify_pi.modify_pi(pi, eta, epsilon,
+                                       c1 / (np.exp(epsilon / 2)))
+          self.assertLessEqual(np.max(pi_all[-1]), c1 / number_candidates)
+          self.assertLessEqual(c1 * np.exp(-epsilon) / number_candidates,
+                               np.min(pi_all[-1]) + 1e-6)
+
+  def test_convergence(self):
+    """Test whether modify_pi converges in at-most 3 iterations as expected."""
+    budget = 0.5
+    for d in [100, 200]:
+      for number_candidates in [2**4, 2**5]:
+        for epsilon in [2, 3]:
+          x = np.random.normal(0, 1, (d, 1))
+          x = np.divide(x, np.linalg.norm(x, axis=0).reshape(1, -1))
+          c1, c2, _, gamma = get_parameters.get_parameters_unbiased_miracle(
+              epsilon, d, number_candidates, budget)
+          _, _, pi = miracle.encoder(0, x[:, 0], number_candidates, c1, c2,
+                                     gamma)
+          eta = epsilon / 2
+          pi_all = modify_pi.modify_pi(pi, eta, epsilon,
+                                       c1 / (np.exp(epsilon / 2)))
+          self.assertLessEqual(len(pi_all), 3)
+
+
+if __name__ == "__main__":
+  absltest.main()
\ No newline at end of file
diff --git a/rcc_dp/frequency_estimation/rhr.py b/rcc_dp/frequency_estimation/rhr.py
new file mode 100644
index 00000000..dad7a0df
--- /dev/null
+++ b/rcc_dp/frequency_estimation/rhr.py
@@ -0,0 +1,141 @@
+# Copyright 2021, Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""RHR definitions.
+
+This is the code to be used to simulate RHR.
+
+RHR was introduced by Chen et al in "Breaking the Communication-Privacy-
+Accuracy Trilemma" - https://arxiv.org/pdf/2007.11707.pdf
+"""
+
+import math
+import numpy as np
+from rcc_dp.frequency_estimation import ss
+
+
+def encode_string(dim, epsilon, comm, x):
+  """An implementation of the RHR protocol to privatize x."""
+  n = len(x)
+
+  # Pad dim to the power of 2.
+  padded_d = int(math.pow(2, math.ceil(math.log(dim, 2))))
+  # Calculate the effective communication cost 
+  # i.e., min(comm, log(e^\epsilon)).
+  eff_comm = min(comm, math.ceil(epsilon*math.log(math.e, 2)), 
+    math.ceil(math.log(dim, 2))+1)
+  # Set the block size.
+  block_size = int(padded_d/math.pow(2, eff_comm-1))
+
+  loc_sign = np.zeros(n)
+  for idx in range(n):
+    j = int(idx % block_size)
+    x_individual = int(x[idx])
+    loc_sign[idx] = 2*int(x_individual/block_size)
+    if get_hadamard_entry(padded_d, j, x_individual) == -1:
+      loc_sign[idx] = loc_sign[idx] + 1
+
+  z = rr_encode_string(int(math.pow(2, eff_comm)), epsilon, 
+    np.array(loc_sign))
+  return z
+
+
+def rr_encode_string(alphabet_size, epsilon, samples):
+  """An implementation of the RR protocol to privatize x."""
+  n = len(samples)
+  # Start by setting private_samples = samples.
+  private_samples_rr = np.copy(samples)
+
+  # Determine which samples need to be noised (i.e., flipped).
+  flip = np.random.random_sample(n) < (alphabet_size 
+    - 1)/(math.exp(epsilon) + alphabet_size - 1)
+  flip_samples = samples[flip]
+
+  # Select new samples uniformly at random to replace the original ones.
+  rand_samples = np.random.randint(0, alphabet_size - 1, len(flip_samples))
+
+  # Shift the samples if needed to avoid sampling the orginal samples.
+  rand_samples[rand_samples >= flip_samples] += 1
+
+  # Replace the original samples by the randomly selected ones.
+  private_samples_rr[flip] = rand_samples
+  return private_samples_rr
+
+
+def decode_string_fast(dim, epsilon, comm, z, normalization = 1):
+  """Learn the original distribution from the privatized strings when
+  the input is a matrix consisting of all the bit vectors.
+  """
+  l = len(z)
+  # Pad dim to the power of 2.
+  padded_d = int(math.pow(2, math.ceil(math.log(dim,2))))
+  # Calculate the effective communication cost 
+  # i.e., min(comm, log(e^\epsilon)).
+  eff_comm = min(comm, math.ceil(epsilon*math.log(math.e, 2)), 
+    math.ceil(math.log(dim, 2))+1)
+  # Set the block size.
+  block_size = int(padded_d/math.pow(2, eff_comm-1))
+  n = int(l/block_size)*block_size
+
+  group_list = np.array(z[:n]).reshape(int(n/block_size), block_size).T
+
+  # Create histograms to specify the empirical distributions of each group.
+  histograms = np.zeros((block_size, int(padded_d/block_size)))
+  for g_idx in range(block_size):
+    g_count,g_axis = np.histogram(group_list[g_idx], 
+      range(int(math.pow(2, eff_comm))+1))
+    histograms[g_idx] = g_count[::2] - g_count[1::2]
+    histograms[g_idx] = histograms[g_idx] * (math.exp(epsilon) 
+      + math.pow(2, eff_comm)-1)/(math.exp(epsilon)-1)*(block_size/n)
+
+  # Obtain estimator of q.
+  q = np.zeros((block_size, int(padded_d/block_size)))
+  for j in range(block_size):
+    q[j, :] = fast_inverse_hadamard_transform(int(padded_d/block_size), 
+      histograms[j, :])
+
+  q = q.reshape((padded_d, ), order = 'F')
+
+  # Perform inverse Hadamard transform to get p
+  p_estimate = fast_inverse_hadamard_transform(padded_d, q)/padded_d
+  p_estimate = p_estimate[:dim]
+
+  if normalization == 0:
+    # Clip and normalize.
+    p_estimate = ss.probability_normalize(p_estimate)
+  if normalization == 1:
+    # Project on the simplex.
+    p_estimate = ss.probability_project_simplex(p_estimate)
+  return p_estimate
+
+
+def get_hadamard_entry(d, x, y):
+  """Get (H_d)_{x,y}."""
+  z = x & y
+  z_bit = bin(z)[2:].zfill(int(math.log(d, 2)))
+  check = 0
+  for i in range(0, int(math.log(d, 2))):
+    check = check^int(z_bit[i])
+  return (-1)**check
+
+
+def fast_inverse_hadamard_transform(k, dist):
+  """ Performs inverse Hadamard transform."""
+  if k == 1:
+    return dist
+  dist1 = dist[0 : k//2]
+  dist2 = dist[k//2 : k]
+  trans1 = fast_inverse_hadamard_transform(k//2, dist1)
+  trans2 = fast_inverse_hadamard_transform(k//2, dist2)
+  trans = np.concatenate((trans1+ trans2, trans1 - trans2))
+  return trans
\ No newline at end of file
diff --git a/rcc_dp/frequency_estimation/ss.py b/rcc_dp/frequency_estimation/ss.py
new file mode 100644
index 00000000..e3763741
--- /dev/null
+++ b/rcc_dp/frequency_estimation/ss.py
@@ -0,0 +1,82 @@
+# Copyright 2021, Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Subset selection definitions.
+
+This is the code to be used to simulate the subset selection algorithm.
+
+The subset selection algorithm was introduced by Ye et al in
+"Optimal Schemes for Discrete Distribution Estimation under
+Locally Differential Privacy" - https://arxiv.org/pdf/1702.00610.pdf.
+"""
+
+import numpy as np
+
+
+def probability_project_simplex(p):
+  k = len(p)  # Infer the size of the alphabet.
+  p_sorted = np.sort(p)
+  p_sorted[:] = p_sorted[::-1]
+  p_sorted_cumsum = np.cumsum(p_sorted)
+  i = 1
+  while i < k:
+    if p_sorted[i] + (1.0 / (i + 1)) * (1 - p_sorted_cumsum[i]) < 0:
+      break
+    i += 1
+  lmd = (1.0 / i) * (1 - p_sorted_cumsum[i - 1])
+  return np.maximum(p + lmd, 0)
+
+
+def probability_normalize(p):
+  p = np.maximum(p,0) # Map it to be positive.
+  norm = np.sum(p)
+  p = np.true_divide(p,norm) # Ensure the l_1 norm is one.
+  return p
+
+
+def encode_string_fast(k, epsilon, x):
+  """A fast implementation of the subset selection protocol -- instead 
+  of selecting exactly d ones, set each bit to be one independently 
+  with true expectation.
+  """
+  d = int(np.ceil(k/(np.exp(epsilon)+1)))
+  p = d*np.exp(epsilon)/(d*np.exp(epsilon)+k-d)
+  q = (d-p)/(k-1)
+
+  n = len(x)
+  z = np.zeros((n, k))
+  flip = np.random.random_sample((n, k))
+
+  for i in range(n):
+    z[i,x[i]] = np.logical_or(0,flip[i,x[i]] < p)
+  return np.logical_or(z, flip < q)
+
+def decode_string(k, epsilon, z, length, normalization = 1):
+  """Learn the original distribution from the privatized strings when
+  the input is a matrix consisting of all the bit vectors.
+  """
+  d = int(np.ceil(k/(np.exp(epsilon)+1)))
+
+  temp1 = ((k-1)*np.exp(epsilon)
+    +1.0*(k-1)*(k-d)/d)/((k-d)*(np.exp(epsilon)-1))
+  temp2 = ((d-1)*np.exp(epsilon)+k-d) / (1.0*(k-d)*(np.exp(epsilon)-1))
+
+  p_estimate = (1.0*np.sum(z, axis=0)*temp1/length)-temp2
+
+  if normalization == 0:
+    # Clip and normalize.
+    p_estimate = probability_normalize(p_estimate)
+  if normalization == 1:
+    # Project on the simplex.
+    p_estimate = probability_project_simplex(p_estimate)
+  return p_estimate
\ No newline at end of file
diff --git a/rcc_dp/frequency_estimation/unbias.py b/rcc_dp/frequency_estimation/unbias.py
new file mode 100644
index 00000000..b6b199aa
--- /dev/null
+++ b/rcc_dp/frequency_estimation/unbias.py
@@ -0,0 +1,112 @@
+# Copyright 2021, Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Code to un-bias miracle & modified miracle for ss.
+
+Please note that this unbiasing techniques are specific to ss + miracle
+and are not general strategies for miracle. The un-biasing happens by
+modifying the corresponding m and b parameters.
+"""
+
+import numpy as np
+from scipy import stats
+import scipy.special as sc
+from rcc_dp.frequency_estimation import ss
+
+
+def unbias_miracle(k, epsilon, number_candidates, z, n, normalization = 1):
+  """Get the unbiased estimate for miracle.
+
+  Args:
+    k: The dimension of the one-hot encoded x.
+    epsilon: The privacy parameter epsilon.
+    number_candidates: The number of candidates to be sampled.
+    z: The privatized data.
+    n: The number of users.
+    normalization: Indicator whether to clip and normalize or 
+    project on the simplex
+
+  Returns:
+    p_estimate: The unbiased estimate for miracle
+  """
+  d = int(np.ceil(k/(np.exp(epsilon)+1)))
+  c1 = np.exp(epsilon)/(sc.comb(k-1,d-1)* np.exp(epsilon) + sc.comb(k-1,d))
+  c2 = 1/(sc.comb(k-1,d-1)* np.exp(epsilon) + sc.comb(k-1,d))
+  beta = np.array(range(number_candidates+1))/number_candidates
+  # The probability with which you choose a candidate inside the cap.
+  pi_in = 1/number_candidates*(c1/(beta*c1+(1-beta)*c2))
+  expectation = np.sum(stats.binom.pmf(range(number_candidates+1), 
+    number_candidates, d/k)*range(number_candidates+1)*pi_in)
+  b_hat = (d - expectation)/(k-1)
+  m_hat = k*expectation/(k-1) - d/(k-1)
+
+  p_estimate = (1.0*np.sum(z, axis=0)/(n*m_hat))-b_hat/m_hat
+
+  if normalization == 0:
+    # Clip and normalize.
+    p_estimate = ss.probability_normalize(p_estimate)
+  elif normalization == 1:
+    # Project on the simplex.
+    p_estimate = ss.probability_project_simplex(p_estimate)
+  return p_estimate
+
+
+def unbias_modified_miracle(k, epsilon, number_candidates, z, n, 
+  normalization = 1):
+  """Get the unbiased estimate for modified miracle.
+
+  Args:
+    k: The dimension of the one-hot encoded x.
+    epsilon: The privacy parameter epsilon.
+    number_candidates: The number of candidates to be sampled.
+    z: The privatized data.
+    n: The number of users.
+    normalization: Indicator whether to clip and normalize or 
+    project on the simplex
+
+  Returns:
+    p_estimate: The unbiased estimate for modified miracle
+  """
+  expected_beta = np.ceil(k/(np.exp(epsilon)+1))/k
+  d = int(np.ceil(k/(np.exp(epsilon)+1)))
+  c1 = np.exp(epsilon)/(sc.comb(k-1,d-1)* np.exp(epsilon) + sc.comb(k-1,d))
+  c2 = 1/(sc.comb(k-1,d-1)* np.exp(epsilon) + sc.comb(k-1,d))
+  # The fraction of candidates that lie inside the cap.
+  beta = np.array(range(number_candidates + 1)) / number_candidates
+
+  pi_in = 1 / number_candidates * (c1 / (beta * c1 + (1 - beta) * c2))
+  tilde_pi_in_1 = (
+      pi_in * (1 + beta * (np.exp(epsilon) - 1)) / (1 + expected_beta *
+                                                    (np.exp(epsilon) - 1)))
+  tilde_pi_in_2 = (
+      tilde_pi_in_1 * (beta + expected_beta * (np.exp(epsilon) - 1)) /
+      (beta * np.exp(epsilon)))
+  tilde_pi_in_2[0,] = 0
+  indicator = beta <= expected_beta
+  # The probability with which you choose a candidate inside the cap.
+  tilde_pi_in = indicator * tilde_pi_in_1 + (1 - indicator) * tilde_pi_in_2
+  expectation = np.sum(
+      stats.binom.pmf(range(number_candidates + 1), number_candidates, 
+        d / k) * range(number_candidates + 1) * tilde_pi_in)
+  b_tilde = (d - expectation)/(k-1)
+  m_tilde = k*expectation/(k-1) - d/(k-1)
+
+  p_estimate = (1.0*np.sum(z, axis=0)/(n*m_tilde))-b_tilde/m_tilde
+
+  if normalization == 0:
+    # Clip and normalize.
+    p_estimate = ss.probability_normalize(p_estimate)
+  elif normalization == 1:
+    # Project on the simplex.
+    p_estimate = ss.probability_project_simplex(p_estimate)
+  return p_estimate
\ No newline at end of file
diff --git a/rcc_dp/BUILD b/rcc_dp/mean_estimation/BUILD
similarity index 100%
rename from rcc_dp/BUILD
rename to rcc_dp/mean_estimation/BUILD
diff --git a/rcc_dp/config.py b/rcc_dp/mean_estimation/config.py
similarity index 88%
rename from rcc_dp/config.py
rename to rcc_dp/mean_estimation/config.py
index 772f4ae7..a40d033e 100644
--- a/rcc_dp/config.py
+++ b/rcc_dp/mean_estimation/config.py
@@ -24,14 +24,14 @@ def get_config():
       # can take one of "biased_data", "unbiased_data", "same_data".
       data="biased_data",
       # Flags to indicate which methods to compare.
-      run_approx_miracle=True,
+      run_approx_miracle=False,
       run_miracle=False,
-      run_modified_miracle=False,
+      run_modified_miracle=True,
       run_privunit=True,
       run_sqkr=True,
       # Common parameters.
       num_itr=1,
-      coding_cost=8,
+      coding_cost=11,
       coding_cost_multiplier=1,
       approx_coding_cost_multiplier=3,
       approx_t=6,
@@ -40,7 +40,8 @@ def get_config():
       budget=0.5,
       alpha=1.0,
       # Variation.
-      vary="eps",  # Can take one of "d", "n", "eps".
+      vary="eps",  # Can take one of "cc", "d", "n", "eps".
+      cc_space=[6, 8, 10, 12],
       d_space=[200, 400, 600, 800, 1000],
       n_space=[2000, 4000, 6000, 8000, 10000],
       eps_space=list(range(1, 9)),
@@ -48,7 +49,7 @@ def get_config():
       n=5000,
       d=500,
       t=2,
-      epsilon_target=4,
+      epsilon_target=6,
   )
   config = config_dict.ConfigDict(config)
   config.lock()  # Prevent addition of new fields.
diff --git a/rcc_dp/experiment.py b/rcc_dp/mean_estimation/experiment.py
similarity index 86%
rename from rcc_dp/experiment.py
rename to rcc_dp/mean_estimation/experiment.py
index a8866ded..4dd401b7 100644
--- a/rcc_dp/experiment.py
+++ b/rcc_dp/mean_estimation/experiment.py
@@ -11,19 +11,26 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Experiment definitions (i.e., evaluation of miracle, sqkr, privunit methods)."""
+"""Experiment definitions (i.e., evaluation of miracle, sqkr, privunit methods 
+when either the data dimension d, the number of users n, or the privacy 
+parameter epsilon is varied)."""
 
 import json
 import math
 import time
+import matplotlib
+matplotlib.rcParams['ps.useafm'] = True
+matplotlib.rcParams['pdf.use14corefonts'] = True
+matplotlib.rcParams['text.usetex'] = True
 import matplotlib.pyplot as plt
+import matplotlib.lines as mlines
 import numpy as np
 from scipy import stats
-from rcc_dp import get_parameters
-from rcc_dp import miracle
-from rcc_dp import modify_pi
-from rcc_dp import privunit
-from rcc_dp import sqkr
+from rcc_dp.mean_estimation import get_parameters
+from rcc_dp.mean_estimation import miracle
+from rcc_dp.mean_estimation import modify_pi
+from rcc_dp.mean_estimation import privunit
+from rcc_dp.mean_estimation import sqkr
 
 
 def evaluate(work_path, config, file_open=open):
@@ -196,54 +203,64 @@ def evaluate(work_path, config, file_open=open):
     print("sqkr mse:")
     print(np.mean(sqkr_mse, axis=0))
 
-  plt.figure(figsize=(10, 8), dpi=80)
+  plt.figure(figsize=((8, 5)), dpi=80)
+  plt.axes((.15, .2, .83, .75))
   if config.run_approx_miracle:
     plt.errorbar(
         vary_space,
         np.mean(approx_miracle_mse, axis=0),
         yerr=np.std(approx_miracle_mse, axis=0) / np.sqrt(config.num_itr),
-        label="Approx-DP Miracle")
+        linewidth = 3.0,
+        label="MRC")
   if config.run_miracle:
     plt.errorbar(
         vary_space,
         np.mean(miracle_mse, axis=0),
         yerr=np.std(miracle_mse, axis=0) / np.sqrt(config.num_itr),
-        label="Miracle")
+        linewidth = 3.0,
+        label="MRC")
   if config.run_modified_miracle:
     plt.errorbar(
         vary_space,
         np.mean(modified_miracle_mse, axis=0),
         yerr=np.std(modified_miracle_mse, axis=0) / np.sqrt(config.num_itr),
-        label="Modified Miracle")
+        linewidth = 3.0,
+        label="MMRC")
   if config.run_privunit:
     plt.errorbar(
         vary_space,
         np.mean(privunit_mse, axis=0),
         yerr=np.std(privunit_mse, axis=0) / np.sqrt(config.num_itr),
-        label="PrivUnit")
+        ls='--',
+        linewidth = 3.0,
+        label="PrivUnit$_{2}$")
   if config.run_sqkr:
     plt.errorbar(
         vary_space,
         np.mean(sqkr_mse, axis=0),
         yerr=np.std(sqkr_mse, axis=0) / np.sqrt(config.num_itr),
+        ls='--',
+        linewidth = 3.0,
         label="SQKR")
-  plt.legend(fontsize=18)
-  plt.xticks(fontsize=18)
-  plt.yticks(fontsize=18)
-  plt.ylabel("mse", fontsize=18)
+  plt.legend(fontsize=24)
+  plt.xticks(fontsize=28)
+  plt.yticks(fontsize=28)
+  plt.ylabel("$\ell_{2}$ error", fontsize=28)
   if config.vary == "d":
-    plt.xlabel(r"$d$", fontsize=18)
-    plt.title(
-        "n = " + str(n) + ", " + r"$\epsilon = $" + str(epsilon_target),
-        fontsize=18)
+    plt.xlabel(r"$d$", fontsize=28)
+    plt.xticks([200,400,600,800,1000])
+    plt.yticks([0.00,0.04,0.08,0.12,0.16])
+    plt.legend(fontsize=24, loc='upper left')
   elif config.vary == "n":
-    plt.xlabel(r"$n$", fontsize=18)
-    plt.title(
-        "d = " + str(d) + ", " + r"$\epsilon = $" + str(epsilon_target),
-        fontsize=18)
+    plt.xlabel(r"$n$", fontsize=28)
+    plt.ticklabel_format(style='sci', axis='x', scilimits=(3,3))
+    plt.gca().xaxis.get_offset_text().set_fontsize(16)
+    plt.yticks([0.00,0.05,0.10,0.15,0.20])
+    plt.legend(fontsize=24, loc='upper right')
   elif config.vary == "eps":
-    plt.xlabel(r"$\epsilon$", fontsize=18)
-    plt.title("d = " + str(d) + ", $n = $" + str(n), fontsize=18)
+    plt.xlabel(r"$\varepsilon$", fontsize=28)
+    plt.yticks([0.3,0.6,0.9,1.2,1.5])
+    plt.legend(fontsize=24, loc='upper right')
 
   with file_open(work_path + "/rcc_dp_comparison.png", "wb") as f:
     plt.savefig(f, format="png")
diff --git a/rcc_dp/mean_estimation/experiment_coding_cost.py b/rcc_dp/mean_estimation/experiment_coding_cost.py
new file mode 100644
index 00000000..21dceb28
--- /dev/null
+++ b/rcc_dp/mean_estimation/experiment_coding_cost.py
@@ -0,0 +1,183 @@
+# Copyright 2021, Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Experiment definitions (i.e., evaluation of miracle, sqkr, privunit methods 
+when the coding cost is varied)."""
+
+import json
+import math
+import time
+import matplotlib
+matplotlib.rcParams['ps.useafm'] = True
+matplotlib.rcParams['pdf.use14corefonts'] = True
+matplotlib.rcParams['text.usetex'] = True
+import matplotlib.pyplot as plt
+import matplotlib.lines as mlines
+import numpy as np
+from scipy import stats
+from rcc_dp.mean_estimation import get_parameters
+from rcc_dp.mean_estimation import miracle
+from rcc_dp.mean_estimation import modify_pi
+from rcc_dp.mean_estimation import privunit
+from rcc_dp.mean_estimation import sqkr
+
+
+def evaluate(work_path, config, file_open=open):
+  """Evaluates miracle, sqkr, privunit methods."""
+  with file_open(work_path + "/config.json", "w") as f:
+    json.dump(config.to_dict(), f)
+
+  start_time = time.time()
+
+  delta = config.delta
+  budget = config.budget
+  alpha = config.alpha
+  # Get default values.
+  d = config.d
+  n = config.n
+  epsilon_target = config.epsilon_target
+
+  vary_space = config.cc
+
+  print("coding space = " + str(vary_space))
+
+  modified_miracle_mse = np.zeros((config.num_itr, len(vary_space)))
+  sqkr_mse = np.zeros((config.num_itr, 1))
+  privunit_mse = np.zeros((config.num_itr, 1))
+
+  sqkr_coding_cost = epsilon_target
+
+  for itr in range(num_itr):
+    print("itr = %d" % itr)
+    print("epsilon target = " + str(epsilon_target))
+    print("n = " + str(n))
+    print("d = %d" % d)
+    if config.run_modified_miracle:
+      eta = epsilon_target / 2.0
+      print("eta = " + str(eta))
+      print("alpha = " + str(alpha))
+
+    if config.data == "unbiased_data":
+        x = np.random.normal(0, 1, (d, n))
+        x /= np.linalg.norm(x, axis=0)
+      elif config.data == "biased_data":
+        x = np.zeros((d, n))
+        x[:, 0::2] = np.random.normal(10, 1, (d, (n + 1) // 2))
+        x[:, 1::2] = np.random.normal(1, 1, (d, n // 2))
+        x /= np.linalg.norm(x, axis=0)
+      elif config.data == "same_data":
+        x = np.random.normal(0, 1, (d, 1))
+        x /= np.linalg.norm(x, axis=0)
+        x = np.repeat(x, n, axis=1)
+      else:
+        raise ValueError(
+            "data should be either be biased_data, unbiased_data, same_data.")
+
+    if config.run_privunit:
+      x_privunit, _ = privunit.apply_privunit(x, epsilon_target, budget)
+      x_privunit = np.mean(np.array(x_privunit), axis=1, keepdims=True)
+      privunit_mse[itr, 0] = np.linalg.norm(
+          np.mean(x, axis=1, keepdims=True) - x_privunit)**2
+
+    if config.run_sqkr:
+      # Generate a random tight frame satisfying UP -- for sqkr.
+      frame = 2**int(math.ceil(math.log(d, 2)) + 1)
+      u = stats.ortho_group.rvs(dim=frame).T[:, 0:d]
+
+      k_equiv = min(epsilon_target, sqkr_coding_cost)
+      [_, _, q_perturb] = sqkr.kashin_encode(u, x, k_equiv, epsilon_target)
+      x_kashin = sqkr.kashin_decode(u, k_equiv, epsilon_target, q_perturb)
+      sqkr_mse[itr, 0] = np.linalg.norm(
+          np.mean(x, axis=1, keepdims=True) - x_kashin)**2
+
+    for step, vary_parameter in enumerate(vary_space):
+      coding_cost = vary_parameter
+      print("coding cost = %d" % coding_cost)    
+
+      if config.run_modified_miracle:
+        x_modified_miracle = np.zeros((d, n))
+        c1, c2, m, gamma = (
+            get_parameters.get_parameters_unbiased_modified_miracle(
+                alpha * epsilon_target, d, 2**coding_cost, budget))
+        for i in range(n):
+          _, _, pi = miracle.encoder(i + itr * n, x[:, i], 2**coding_cost, c1,
+                                     c2, gamma)
+          pi_all = modify_pi.modify_pi(pi, eta, epsilon_target,
+                                       c1 / (np.exp(epsilon_target / 2)))
+          k = np.random.choice(2**coding_cost, 1, p=pi_all[-1])[0]
+          z_k = miracle.decoder(i + itr * n, k, d, 2**coding_cost)
+          x_modified_miracle[:, i] = z_k / m
+        x_modified_miracle = np.mean(x_modified_miracle, axis=1, keepdims=True)
+        modified_miracle_mse[itr, step] = np.linalg.norm(
+            np.mean(x, axis=1, keepdims=True) - x_modified_miracle)**2
+
+    print(time.time() - start_time)
+
+  print("--------------")
+  if config.run_approx_miracle:
+    print("approx miracle mse:")
+    print(np.mean(approx_miracle_mse, axis=0))
+  if config.run_privunit:
+    print("privunit mse:")
+    print(np.mean(privunit_mse, axis=0))
+  if config.run_sqkr:
+    print("sqkr mse:")
+    print(np.mean(sqkr_mse, axis=0))
+
+  plt.figure(figsize=((8, 5)), dpi=80)
+  plt.axes((.15, .2, .83, .75))
+  if config.run_modified_miracle:
+    plt.errorbar(
+        vary_space,
+        np.mean(modified_miracle_mse, axis=0),
+        yerr=np.std(modified_miracle_mse, axis=0)/np.sqrt(num_itr),
+        linewidth = 3.0,
+        label="MMRC")
+  if config.run_privunit:
+    line1 = plt.errorbar(vary_space, 
+      [np.mean(privunit_mse, axis=0)[0]]*len(vary_space), 
+      yerr = [np.std(privunit_mse, axis=0)[0]/np.sqrt(num_itr)]*len(vary_space), 
+      ls='--',
+      linewidth = 3.0,
+      label="PrivUnit$_{2}$")
+  if config.run_sqkr:
+    line2 = plt.errorbar(vary_space, 
+      [np.mean(sqkr_mse, axis=0)[0]]*len(vary_space), 
+      yerr = [np.std(sqkr_mse, axis=0)[0]/np.sqrt(num_itr)]*len(vary_space), 
+      ls='--',
+      linewidth = 3.0,
+      label="SQKR")
+  plt.xticks(fontsize=28)
+  plt.yticks(fontsize=28)
+  plt.ylabel("$\ell_{2}$ error", fontsize=28)
+  plt.xlabel(r"$\#$ bits", fontsize=28)
+  plt.yticks([0.04,0.05,0.06,0.07,0.08])
+  plt.legend(fontsize=24, loc='center right')
+
+  with file_open(work_path + "/rcc_dp_mse_vs_coding_cost.png", "wb") as f:
+    plt.savefig(f, format="png")
+
+  with file_open(work_path + "/time.txt", "w") as f:
+    np.savetxt(f, np.array(time.time() - start_time).reshape(-1, 1))
+
+  if config.run_modified_miracle:
+    with file_open(work_path + "/modified_miracle_mse.csv", "w") as f:
+      np.savetxt(f, modified_miracle_mse, delimiter=",")
+
+  if config.run_privunit:
+    with file_open(work_path + "/privunit_mse.csv", "w") as f:
+      np.savetxt(f, privunit_mse, delimiter=",")
+
+  if config.run_sqkr:
+    with file_open(work_path + "/sqkr_mse.csv", "w") as f:
+      np.savetxt(f, sqkr_mse, delimiter=",")
\ No newline at end of file
diff --git a/rcc_dp/experiment_test.py b/rcc_dp/mean_estimation/experiment_test.py
similarity index 67%
rename from rcc_dp/experiment_test.py
rename to rcc_dp/mean_estimation/experiment_test.py
index a03bc032..ac71d55d 100644
--- a/rcc_dp/experiment_test.py
+++ b/rcc_dp/mean_estimation/experiment_test.py
@@ -14,8 +14,9 @@
 """Tests for experiment definitions."""
 
 from absl.testing import absltest
-from rcc_dp import config as defaults
-from rcc_dp import experiment
+from rcc_dp.mean_estimation import config as defaults
+from rcc_dp.mean_estimation import experiment
+from rcc_dp.mean_estimation import experiment_coding_cost
 
 
 class ExperimentTest(absltest.TestCase):
@@ -24,12 +25,17 @@ def test_evaluate_does_not_fail(self):
     work_path = self.create_tempdir().full_path
     config = defaults.get_config()
     config.num_itr = 1
-    config.eps_space = [1]
-    config.t = 2
     config.d = 100
     config.n = 200
-    experiment.evaluate(work_path, config)
+    if config.vary == "cc":
+        config.epsilon_target = 1
+        config.cc_space = [4]
+        experiment_coding_cost.evaluate(work_path, config)
+    else:
+        config.eps_space = [1]
+        config.t = 2
+        experiment.evaluate(work_path, config)
 
 
 if __name__ == "__main__":
-  absltest.main()
+  absltest.main()
\ No newline at end of file
diff --git a/rcc_dp/get_parameters.py b/rcc_dp/mean_estimation/get_parameters.py
similarity index 97%
rename from rcc_dp/get_parameters.py
rename to rcc_dp/mean_estimation/get_parameters.py
index 4ddc5c67..bba1d220 100644
--- a/rcc_dp/get_parameters.py
+++ b/rcc_dp/mean_estimation/get_parameters.py
@@ -14,8 +14,8 @@
 """Code to obtain parameters of various miracle methods."""
 
 import numpy as np
-from rcc_dp import optimize_unbias
-from rcc_dp import privunit
+from rcc_dp.mean_estimation import optimize_unbias
+from rcc_dp.mean_estimation import privunit
 
 
 def get_parameters_unbiased_approx_miracle(epsilon_target, d,
diff --git a/rcc_dp/get_parameters_test.py b/rcc_dp/mean_estimation/get_parameters_test.py
similarity index 96%
rename from rcc_dp/get_parameters_test.py
rename to rcc_dp/mean_estimation/get_parameters_test.py
index 00c2c4ec..b5e0db8b 100644
--- a/rcc_dp/get_parameters_test.py
+++ b/rcc_dp/mean_estimation/get_parameters_test.py
@@ -15,9 +15,9 @@
 
 from absl.testing import absltest
 import numpy as np
-from rcc_dp import get_parameters
-from rcc_dp import miracle
-from rcc_dp import modify_pi
+from rcc_dp.mean_estimation import get_parameters
+from rcc_dp.mean_estimation import miracle
+from rcc_dp.mean_estimation import modify_pi
 
 
 class GetParametersTest(absltest.TestCase):
diff --git a/rcc_dp/miracle.py b/rcc_dp/mean_estimation/miracle.py
similarity index 100%
rename from rcc_dp/miracle.py
rename to rcc_dp/mean_estimation/miracle.py
diff --git a/rcc_dp/miracle_test.py b/rcc_dp/mean_estimation/miracle_test.py
similarity index 94%
rename from rcc_dp/miracle_test.py
rename to rcc_dp/mean_estimation/miracle_test.py
index 9f0f65a8..c2710bf0 100644
--- a/rcc_dp/miracle_test.py
+++ b/rcc_dp/mean_estimation/miracle_test.py
@@ -15,8 +15,8 @@
 
 from absl.testing import absltest
 import numpy as np
-from rcc_dp import get_parameters
-from rcc_dp import miracle
+from rcc_dp.mean_estimation import get_parameters
+from rcc_dp.mean_estimation import miracle
 
 
 class MiracleTest(absltest.TestCase):
diff --git a/rcc_dp/modify_pi.py b/rcc_dp/mean_estimation/modify_pi.py
similarity index 100%
rename from rcc_dp/modify_pi.py
rename to rcc_dp/mean_estimation/modify_pi.py
diff --git a/rcc_dp/modify_pi_test.py b/rcc_dp/mean_estimation/modify_pi_test.py
similarity index 96%
rename from rcc_dp/modify_pi_test.py
rename to rcc_dp/mean_estimation/modify_pi_test.py
index 75e39bcc..963c71e5 100644
--- a/rcc_dp/modify_pi_test.py
+++ b/rcc_dp/mean_estimation/modify_pi_test.py
@@ -15,9 +15,9 @@
 
 from absl.testing import absltest
 import numpy as np
-from rcc_dp import get_parameters
-from rcc_dp import miracle
-from rcc_dp import modify_pi
+from rcc_dp.mean_estimation import get_parameters
+from rcc_dp.mean_estimation import miracle
+from rcc_dp.mean_estimation import modify_pi
 
 
 class ModifyPiTest(absltest.TestCase):
diff --git a/rcc_dp/optimize_unbias.py b/rcc_dp/mean_estimation/optimize_unbias.py
similarity index 99%
rename from rcc_dp/optimize_unbias.py
rename to rcc_dp/mean_estimation/optimize_unbias.py
index 9cf0fa41..c70625d8 100644
--- a/rcc_dp/optimize_unbias.py
+++ b/rcc_dp/mean_estimation/optimize_unbias.py
@@ -28,7 +28,7 @@
 
 import numpy as np
 from scipy import stats
-from rcc_dp import privunit
+from rcc_dp.mean_estimation import privunit
 
 
 def get_unbiased_p_hat(number_candidates, c1, c2, p):
diff --git a/rcc_dp/privunit.py b/rcc_dp/mean_estimation/privunit.py
similarity index 100%
rename from rcc_dp/privunit.py
rename to rcc_dp/mean_estimation/privunit.py
diff --git a/rcc_dp/privunit_test.py b/rcc_dp/mean_estimation/privunit_test.py
similarity index 98%
rename from rcc_dp/privunit_test.py
rename to rcc_dp/mean_estimation/privunit_test.py
index 5e5296de..821d2604 100644
--- a/rcc_dp/privunit_test.py
+++ b/rcc_dp/mean_estimation/privunit_test.py
@@ -15,7 +15,7 @@
 
 from absl.testing import absltest
 import numpy as np
-from rcc_dp import privunit
+from rcc_dp.mean_estimation import privunit
 
 
 class PrivunitTest(absltest.TestCase):
diff --git a/rcc_dp/sqkr.py b/rcc_dp/mean_estimation/sqkr.py
similarity index 100%
rename from rcc_dp/sqkr.py
rename to rcc_dp/mean_estimation/sqkr.py
diff --git a/rcc_dp/sqkr_test.py b/rcc_dp/mean_estimation/sqkr_test.py
similarity index 98%
rename from rcc_dp/sqkr_test.py
rename to rcc_dp/mean_estimation/sqkr_test.py
index e806647d..8545eae6 100644
--- a/rcc_dp/sqkr_test.py
+++ b/rcc_dp/mean_estimation/sqkr_test.py
@@ -17,7 +17,7 @@
 from absl.testing import parameterized
 import numpy as np
 from scipy import stats
-from rcc_dp import sqkr
+from rcc_dp.mean_estimation import sqkr
 
 
 class SqkrTest(parameterized.TestCase):

From 78a8657187ee79021e99d473b4181a12aa1f62df Mon Sep 17 00:00:00 2001
From: Abhin Shah <abhinshah02@gmail.com>
Date: Tue, 22 Feb 2022 19:52:18 -0500
Subject: [PATCH 2/4] Updating readme and a parameter for freq estimation

---
 rcc_dp/README.md                      | 155 +++++++++++++++++++++++++-
 rcc_dp/frequency_estimation/config.py |   2 +-
 2 files changed, 154 insertions(+), 3 deletions(-)

diff --git a/rcc_dp/README.md b/rcc_dp/README.md
index 864c6836..9d5ffc3b 100644
--- a/rcc_dp/README.md
+++ b/rcc_dp/README.md
@@ -1,3 +1,154 @@
-# RCC/DP project
+# Source code for "Optimal Compression of Locally Differentially Private Mechanisms"
 
-This is work in progress. Links to relevant publications will be added later.
+Reference: Abhin Shah, Wei-Ning Chen, Johannes Balle, Peter Kairouz, Lucas Theis,
+"Optimal Compression of Locally Differentially Private Mechanisms," 
+The 25th International Conference on Artificial Intelligence and Statistics (AISTATS), 2022
+
+Contact: abhin@mit.edu, jballe@google.com
+
+Arxiv: [https://arxiv.org/pdf/2111.00092.pdf](https://arxiv.org/pdf/2111.00092.pdf)
+
+### Dependencies:
+
+In order to successfully execute the code, the following libraries must be installed:
+
+Python --- json, math, time, matplotlib, numpy, scipy, [absl](https://github.com/abseil/abseil-py), [ml_collections](https://github.com/google/ml_collections)
+
+### Wrapper functions:
+
+This repository contains the code for (a) mean estimation and (b) frequency estimation. To run the code, a wrapper function needs to be written. For example, to run the mean estimation code, the following could be used:
+
+```
+from mean_estimation import config as defaults
+from mean_estimation import experiment
+from mean_estimation import experiment_coding_cost
+
+def main():
+	config = defaults.get_config()
+	experiment.evaluate('path-to-the-mean-estimation-code', config)
+	
+if __name__ == "__main__":
+	main()
+```
+
+Similarly, to run the frequency estimation code, the following could be used:
+
+```
+from frequency_estimation import config as defaults
+from frequency_estimation import experiment
+from frequency_estimation import experiment_coding_cost
+
+def main():
+	config = defaults.get_config()
+	experiment.evaluate('path-to-the-frequency-estimation-code', config)
+	
+if __name__ == "__main__":
+	main()
+```
+
+### Reproducing the figures
+
+1. To reproduce Figure 1(Top), make the following changes in mean_estimation/config.py:
+```
+num_itr = 10
+vary = "cc"
+```
+and add the following commands in the main function of the wrapper:
+```
+config = defaults.get_config()
+experiment_coding_cost.evaluate('path-to-the-mean-estimation-code', config)
+```
+2. To reproduce Figure 1(Bottom), make the following changes in mean_estimation/config.py:
+```
+num_itr = 10
+vary = "eps"
+```
+and add the following commands in the main function of the wrapper:
+```
+config = defaults.get_config()
+experiment.evaluate('path-to-the-mean-estimation-code', config)
+```
+3. To reproduce Figure 2(Top), make the following changes in frequency_estimation/config.py:
+```
+num_itr = 10
+vary = "cc"
+```
+and add the following commands in the main function of the wrapper:
+```
+config = defaults.get_config()
+experiment_coding_cost.evaluate('path-to-the-frequency-estimation-code', config)
+```
+4. To reproduce Figure 2(Bottom), make the following changes in frequency_estimation/config.py:
+```
+num_itr = 10
+vary = "eps"
+```
+and add the following commands in the main function of the wrapper:
+```
+config = defaults.get_config()
+experiment.evaluate('path-to-the-frequency-estimation-code', config)
+```
+5. To reproduce Figure 3, make the following changes in mean_estimation/config.py:
+```
+run_approx_miracle=True,
+run_modified_miracle=False,
+num_itr = 10
+vary = "eps"
+```
+and add the following commands in the main function of the wrapper:
+```
+config = defaults.get_config()
+experiment.evaluate('path-to-the-mean-estimation-code', config)
+```
+6. To reproduce Figure 4(Left), make the following changes in mean_estimation/config.py:
+```
+num_itr = 10
+vary = "d"
+```
+and add the following commands in the main function of the wrapper:
+```
+config = defaults.get_config()
+experiment.evaluate('path-to-the-mean-estimation-code', config)
+```
+7. To reproduce Figure 4(Right), make the following changes in mean_estimation/config.py:
+```
+num_itr = 10
+vary = "n"
+```
+and add the following commands in the main function of the wrapper:
+```
+config = defaults.get_config()
+experiment.evaluate('path-to-the-mean-estimation-code', config)
+```
+8. To reproduce Figure 5, make the following changes in frequency_estimation/config.py:
+```
+run_approx_miracle=True,
+run_modified_miracle=False,
+num_itr = 10
+vary = "eps"
+```
+and add the following commands in the main function of the wrapper:
+```
+config = defaults.get_config()
+experiment.evaluate('path-to-the-frequency-estimation-code', config)
+```
+9. To reproduce Figure 6(Left), make the following changes in frequency_estimation/config.py:
+```
+num_itr = 10
+vary = "d"
+```
+and add the following commands in the main function of the wrapper:
+```
+config = defaults.get_config()
+experiment.evaluate('path-to-the-frequency-estimation-code', config)
+```
+10. To reproduce Figure 6(Right), make the following changes in frequency_estimation/config.py:
+```
+num_itr = 10
+vary = "n"
+```
+and add the following commands in the main function of the wrapper:
+```
+config = defaults.get_config()
+experiment.evaluate('path-to-the-frequency-estimation-code', config)
+```
\ No newline at end of file
diff --git a/rcc_dp/frequency_estimation/config.py b/rcc_dp/frequency_estimation/config.py
index e6b89dd7..4969de7d 100644
--- a/rcc_dp/frequency_estimation/config.py
+++ b/rcc_dp/frequency_estimation/config.py
@@ -43,7 +43,7 @@ def get_config():
       alpha=1.0,
       # Variation.
       vary="eps",  # Can take one of "cc", "k", "n", "eps".
-      cc_space=[6, 8, 10, 12],
+      cc_space=[6, 8, 10, 12, 14],
       k_space=[200, 400, 600, 800, 1000],
       n_space=[2000, 4000, 6000, 8000, 10000],
       eps_space=list(range(1, 9)),

From 8a8263dc4ece1a4b46180b721e77afa47c33ad7d Mon Sep 17 00:00:00 2001
From: Abhin Shah <abhinshah02@gmail.com>
Date: Tue, 22 Feb 2022 21:13:52 -0500
Subject: [PATCH 3/4] Fixing pylint issues

---
 rcc_dp/frequency_estimation/config.py         |  2 +-
 rcc_dp/frequency_estimation/experiment.py     | 83 ++++++++++---------
 .../experiment_coding_cost.py                 | 70 ++++++++--------
 .../frequency_estimation/experiment_test.py   |  2 +-
 rcc_dp/frequency_estimation/miracle.py        | 22 ++---
 rcc_dp/frequency_estimation/rhr.py            | 20 ++---
 rcc_dp/frequency_estimation/ss.py             | 10 ++-
 rcc_dp/frequency_estimation/unbias.py         | 12 +--
 rcc_dp/mean_estimation/experiment.py          | 10 +--
 .../mean_estimation/experiment_coding_cost.py | 61 +++++++-------
 10 files changed, 149 insertions(+), 143 deletions(-)

diff --git a/rcc_dp/frequency_estimation/config.py b/rcc_dp/frequency_estimation/config.py
index 4969de7d..47e8fef4 100644
--- a/rcc_dp/frequency_estimation/config.py
+++ b/rcc_dp/frequency_estimation/config.py
@@ -55,4 +55,4 @@ def get_config():
   )
   config = config_dict.ConfigDict(config)
   config.lock()  # Prevent addition of new fields.
-  return config
\ No newline at end of file
+  return config
diff --git a/rcc_dp/frequency_estimation/experiment.py b/rcc_dp/frequency_estimation/experiment.py
index 3ab43d91..c1e3ac60 100644
--- a/rcc_dp/frequency_estimation/experiment.py
+++ b/rcc_dp/frequency_estimation/experiment.py
@@ -11,17 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Experiment definitions (i.e., evaluation of miracle, rhr, subset selection 
-methods when either the data dimension k, the number of users n, or the 
+"""Experiment definitions (i.e., evaluation of miracle, rhr, subset selection
+methods when either the data dimension k, the number of users n, or the
 privacy parameter epsilon is varied)."""
 
 import json
 import math
 import time
 import matplotlib
-matplotlib.rcParams['ps.useafm'] = True
-matplotlib.rcParams['pdf.use14corefonts'] = True
-matplotlib.rcParams['text.usetex'] = True
 import matplotlib.pyplot as plt
 import matplotlib.lines as mlines
 import numpy as np
@@ -30,26 +27,32 @@
 from rcc_dp.frequency_estimation import rhr
 from rcc_dp.frequency_estimation import ss
 from rcc_dp.frequency_estimation import unbias
+matplotlib.rcParams['ps.useafm'] = True
+matplotlib.rcParams['pdf.use14corefonts'] = True
+matplotlib.rcParams['text.usetex'] = True
 
 
 def generate_geometric_distribution(k,lbd):
-    elements = range(0,k)
-    prob = [(1-lbd)*math.pow(lbd,x)/(1-math.pow(lbd,k)) for x in elements]
-    return prob
+  """Generate the discrete geometric distribution."""
+  elements = range(0,k)
+  prob = [(1-lbd)*math.pow(lbd,x)/(1-math.pow(lbd,k)) for x in elements]
+  return prob
 
 
 def generate_uniform_distribution(k):
-    raw_distribution = [1] * k
-    sum_raw = sum(raw_distribution)
-    prob = [float(y)/float(sum_raw) for y in raw_distribution]
-    return prob
+  """Generate the discrete uniform distribution."""
+  raw_distribution = [1] * k
+  sum_raw = sum(raw_distribution)
+  prob = [float(y)/float(sum_raw) for y in raw_distribution]
+  return prob
 
 
 def generate_zipf_distribution(k,degree):
-    raw_distribution = [1/(float(i)**(degree)) for i in range(1,k+1)]
-    sum_raw = sum(raw_distribution)
-    prob = [float(y)/float(sum_raw) for y in raw_distribution]
-    return prob
+  """Generate the discrete zipf distribution."""
+  raw_distribution = [1/(float(i)**(degree)) for i in range(1,k+1)]
+  sum_raw = sum(raw_distribution)
+  prob = [float(y)/float(sum_raw) for y in raw_distribution]
+  return prob
 
 
 def evaluate(work_path, config, file_open=open):
@@ -125,16 +128,16 @@ def evaluate(work_path, config, file_open=open):
         x_miracle = np.zeros((k,n))
         for i in range(n):
           if config.encoding_type == "fast":
-            x_miracle[:,i] = miracle.encode_decode_miracle_fast(i+itr*n, x[i], 
+            x_miracle[:,i] = miracle.encode_decode_miracle_fast(i+itr*n, x[i],
               k, epsilon_target/2, 2**coding_cost)
           else:
-            _, _, index = miracle.encoder(i+itr*n, x[i], k, epsilon_target/2, 
+            _, _, index = miracle.encoder(i+itr*n, x[i], k, epsilon_target/2,
               2**coding_cost)
-            x_miracle[:,i] = miracle.decoder(i+itr*n, index, k, epsilon_target/2, 
+            x_miracle[:,i] = miracle.decoder(i+itr*n, index, k, epsilon_target/2,
               2**coding_cost)
-        prob_miracle = unbias.unbias_miracle(k, epsilon_target/2, 2**coding_cost, 
+        prob_miracle = unbias.unbias_miracle(k, epsilon_target/2, 2**coding_cost,
           x_miracle.T, n, normalization = 1)
-        miracle_error[itr, step] = np.linalg.norm([p_i - phat_i for p_i, phat_i 
+        miracle_error[itr, step] = np.linalg.norm([p_i - phat_i for p_i, phat_i
           in zip(prob, prob_miracle)], ord=1)
 
       if config.run_modified_miracle:
@@ -144,49 +147,49 @@ def evaluate(work_path, config, file_open=open):
             x_modified_miracle[:,i] = miracle.encode_decode_modified_miracle_fast(
               i+itr*n, x[i], k, alpha*epsilon_target, 2**coding_cost)
           else:
-            _, pi, _ = miracle.encoder(i+itr*n, x[i], k, alpha*epsilon_target, 
+            _, pi, _ = miracle.encoder(i+itr*n, x[i], k, alpha*epsilon_target,
               2**coding_cost)
             expected_beta = np.ceil(k/(np.exp(epsilon_target)+1))/k
-            pi_all = modify_pi.modify_pi(pi, eta, epsilon_target, 
+            pi_all = modify_pi.modify_pi(pi, eta, epsilon_target,
               (np.exp(epsilon_target/2))/(1+expected_beta*(np.exp(epsilon_target)-1)))
             index = np.random.choice(2**coding_cost, 1, p=pi_all[-1])[0]
-            x_modified_miracle[:,i] = miracle.decoder(i+itr*n, index, k, 
+            x_modified_miracle[:,i] = miracle.decoder(i+itr*n, index, k,
               alpha*epsilon_target, 2**coding_cost)
-        prob_modified_miracle = unbias.unbias_modified_miracle(k, alpha*epsilon_target, 
+        prob_modified_miracle = unbias.unbias_modified_miracle(k, alpha*epsilon_target,
           2**coding_cost, x_modified_miracle.T, n, normalization = 1)
-        modified_miracle_error[itr, step] = np.linalg.norm([p_i - phat_i for p_i, phat_i 
+        modified_miracle_error[itr, step] = np.linalg.norm([p_i - phat_i for p_i, phat_i
           in zip(prob, prob_modified_miracle)], ord=1)
 
       if config.run_approx_miracle:
         x_approx_miracle = np.zeros((k,n))
         approx_coding_cost = int(np.ceil(
           config.approx_coding_cost_multiplier*epsilon_target/np.log(2) + config.approx_t))
-        epsilon_approx = miracle.get_approx_epsilon(epsilon_target, k, 
+        epsilon_approx = miracle.get_approx_epsilon(epsilon_target, k,
           2**approx_coding_cost, delta)
         for i in range(n):
           if config.encoding_type == "fast":
-            x_approx_miracle[:,i] = miracle.encode_decode_miracle_fast(i+itr*n, x[i], 
+            x_approx_miracle[:,i] = miracle.encode_decode_miracle_fast(i+itr*n, x[i],
               k, epsilon_approx, 2**coding_cost)
           else:
             _, _, index = miracle.encoder(i+itr*n, x[i], k, epsilon_approx, 2**coding_cost)
-            x_approx_miracle[:,i] = miracle.decoder(i+itr*n, index, k, epsilon_approx, 
+            x_approx_miracle[:,i] = miracle.decoder(i+itr*n, index, k, epsilon_approx,
               2**coding_cost)
-        prob_approx_miracle = unbias.unbias_miracle(k, epsilon_approx, 2**coding_cost, 
+        prob_approx_miracle = unbias.unbias_miracle(k, epsilon_approx, 2**coding_cost,
           x_approx_miracle.T, n, normalization = 1)
-        approx_miracle_error[itr, step] = np.linalg.norm([p_i - phat_i for p_i, phat_i 
+        approx_miracle_error[itr, step] = np.linalg.norm([p_i - phat_i for p_i, phat_i
           in zip(prob, prob_approx_miracle)], ord=1)
 
       if config.run_ss:
         x_ss = ss.encode_string_fast(k, epsilon_target, x)
         prob_ss = ss.decode_string(k, epsilon_target, x_ss, n, normalization = 1)
-        ss_error[itr, step] = np.linalg.norm([p_i - phat_i for p_i, phat_i 
+        ss_error[itr, step] = np.linalg.norm([p_i - phat_i for p_i, phat_i
           in zip(prob, prob_ss)], ord=1)
 
       if config.run_rhr:
         x_rhr = rhr.encode_string(k, epsilon_target, coding_cost, x)
-        prob_rhr = rhr.decode_string_fast(k, epsilon_target, coding_cost, x_rhr, 
+        prob_rhr = rhr.decode_string_fast(k, epsilon_target, coding_cost, x_rhr,
         normalization = 1) # estimate the original underlying distribution
-        rhr_error[itr, step] = np.linalg.norm([p_i - phat_i for p_i, phat_i 
+        rhr_error[itr, step] = np.linalg.norm([p_i - phat_i for p_i, phat_i
           in zip(prob, prob_rhr)], ord=1)
 
     if config.run_approx_miracle:
@@ -226,28 +229,28 @@ def evaluate(work_path, config, file_open=open):
     plt.errorbar(
         vary_space,
         np.mean(approx_miracle_error, axis=0),
-        yerr=np.std(approx_miracle_error, axis=0)/np.sqrt(num_itr),
+        yerr=np.std(approx_miracle_error, axis=0)/np.sqrt(config.num_itr),
         linewidth = 3.0,
         label="MRC")
   if config.run_miracle:
     plt.errorbar(
         vary_space,
         np.mean(miracle_error, axis=0),
-        yerr=np.std(miracle_error, axis=0)/np.sqrt(num_itr),
+        yerr=np.std(miracle_error, axis=0)/np.sqrt(config.num_itr),
         linewidth = 3.0,
         label="MRC")
   if config.run_modified_miracle:
     plt.errorbar(
         vary_space,
         np.mean(modified_miracle_error, axis=0),
-        yerr=np.std(modified_miracle_error, axis=0)/np.sqrt(num_itr),
+        yerr=np.std(modified_miracle_error, axis=0)/np.sqrt(config.num_itr),
         linewidth = 3.0,
         label="MMRC")
   if config.run_ss:
     plt.errorbar(
         vary_space,
         np.mean(ss_error, axis=0),
-        yerr=np.std(ss_error, axis=0)/np.sqrt(num_itr),
+        yerr=np.std(ss_error, axis=0)/np.sqrt(config.num_itr),
         ls='--',
         linewidth = 3.0,
         label="Subset Selection")
@@ -255,7 +258,7 @@ def evaluate(work_path, config, file_open=open):
     plt.errorbar(
         vary_space,
         np.mean(rhr_error, axis=0),
-        yerr=np.std(rhr_error, axis=0)/np.sqrt(num_itr),
+        yerr=np.std(rhr_error, axis=0)/np.sqrt(config.num_itr),
         ls='--',
         linewidth = 3.0,
         label="RHR")
@@ -299,4 +302,4 @@ def evaluate(work_path, config, file_open=open):
 
   if config.run_rhr:
     with file_open(work_path + "/rhr_error.csv", "w") as f:
-      np.savetxt(f, rhr_error, delimiter=",")
\ No newline at end of file
+      np.savetxt(f, rhr_error, delimiter=",")
diff --git a/rcc_dp/frequency_estimation/experiment_coding_cost.py b/rcc_dp/frequency_estimation/experiment_coding_cost.py
index 41804c56..b017245b 100644
--- a/rcc_dp/frequency_estimation/experiment_coding_cost.py
+++ b/rcc_dp/frequency_estimation/experiment_coding_cost.py
@@ -11,16 +11,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Experiment definitions (i.e., evaluation of miracle, rhr, subset selection 
+"""Experiment definitions (i.e., evaluation of miracle, rhr, subset selection
 methods when the coding cost is varied)."""
 
 import json
 import math
 import time
 import matplotlib
-matplotlib.rcParams['ps.useafm'] = True
-matplotlib.rcParams['pdf.use14corefonts'] = True
-matplotlib.rcParams['text.usetex'] = True
 import matplotlib.pyplot as plt
 import matplotlib.lines as mlines
 import numpy as np
@@ -29,26 +26,32 @@
 from rcc.frequency_estimation import rhr
 from rcc.frequency_estimation import ss
 from rcc.frequency_estimation import unbias
+matplotlib.rcParams['ps.useafm'] = True
+matplotlib.rcParams['pdf.use14corefonts'] = True
+matplotlib.rcParams['text.usetex'] = True
 
 
 def generate_geometric_distribution(k,lbd):
-    elements = range(0,k)
-    prob = [(1-lbd)*math.pow(lbd,x)/(1-math.pow(lbd,k)) for x in elements]
-    return prob
+  """Generate the discrete geometric distribution."""
+  elements = range(0,k)
+  prob = [(1-lbd)*math.pow(lbd,x)/(1-math.pow(lbd,k)) for x in elements]
+  return prob
 
 
 def generate_uniform_distribution(k):
-    raw_distribution = [1] * k
-    sum_raw = sum(raw_distribution)
-    prob = [float(y)/float(sum_raw) for y in raw_distribution]
-    return prob
+  """Generate the discrete uniform distribution."""
+  raw_distribution = [1] * k
+  sum_raw = sum(raw_distribution)
+  prob = [float(y)/float(sum_raw) for y in raw_distribution]
+  return prob
 
 
 def generate_zipf_distribution(k,degree):
-    raw_distribution = [1/(float(i)**(degree)) for i in range(1,k+1)]
-    sum_raw = sum(raw_distribution)
-    prob = [float(y)/float(sum_raw) for y in raw_distribution]
-    return prob
+  """Generate the discrete zipf distribution."""
+  raw_distribution = [1/(float(i)**(degree)) for i in range(1,k+1)]
+  sum_raw = sum(raw_distribution)
+  prob = [float(y)/float(sum_raw) for y in raw_distribution]
+  return prob
 
 
 def evaluate(work_path, config, file_open=open):
@@ -58,7 +61,6 @@ def evaluate(work_path, config, file_open=open):
 
   start_time = time.time()
 
-  delta = config.delta
   alpha = config.alpha
   # Get default values.
   k = config.k
@@ -75,7 +77,7 @@ def evaluate(work_path, config, file_open=open):
 
   rhr_coding_cost = epsilon_target
 
-  for itr in range(num_itr):
+  for itr in range(config.num_itr):
     print("itr = %d" % itr)
     print("epsilon target = " + str(epsilon_target))
     print("n = " + str(n))
@@ -99,18 +101,18 @@ def evaluate(work_path, config, file_open=open):
           "distribution should be either be geometric, zipf, uniform.")
     x = np.random.choice(k, n, p=prob)
 
-    
+
     if config.run_ss:
       x_ss = ss.encode_string_fast(k, epsilon_target, x)
       prob_ss = ss.decode_string(k, epsilon_target, x_ss, n, normalization = 1)
-      ss_error[itr, 0] = np.linalg.norm([p_i - phat_i for p_i, phat_i 
+      ss_error[itr, 0] = np.linalg.norm([p_i - phat_i for p_i, phat_i
         in zip(prob, prob_ss)], ord=1)
 
     if config.run_rhr:
-      x_rhr = rhr.encode_string(k, epsilon_target, coding_cost, x)
-      prob_rhr = rhr.decode_string_fast(k, epsilon_target, coding_cost, x_rhr, 
+      x_rhr = rhr.encode_string(k, epsilon_target, rhr_coding_cost, x)
+      prob_rhr = rhr.decode_string_fast(k, epsilon_target, rhr_coding_cost, x_rhr,
       normalization = 1) # estimate the original underlying distribution
-      rhr_error[itr, 0] = np.linalg.norm([p_i - phat_i for p_i, phat_i 
+      rhr_error[itr, 0] = np.linalg.norm([p_i - phat_i for p_i, phat_i
         in zip(prob, prob_rhr)], ord=1)
 
     for step, vary_parameter in enumerate(vary_space):
@@ -124,17 +126,17 @@ def evaluate(work_path, config, file_open=open):
             x_modified_miracle[:,i] = miracle.encode_decode_modified_miracle_fast(
               i+itr*n, x[i], k, alpha*epsilon_target, 2**coding_cost)
           else:
-            _, pi, _ = miracle.encoder(i+itr*n, x[i], k, alpha*epsilon_target, 
+            _, pi, _ = miracle.encoder(i+itr*n, x[i], k, alpha*epsilon_target,
               2**coding_cost)
             expected_beta = np.ceil(k/(np.exp(epsilon_target)+1))/k
-            pi_all = modify_pi.modify_pi(pi, eta, epsilon_target, 
+            pi_all = modify_pi.modify_pi(pi, eta, epsilon_target,
               (np.exp(epsilon_target/2))/(1+expected_beta*(np.exp(epsilon_target)-1)))
             index = np.random.choice(2**coding_cost, 1, p=pi_all[-1])[0]
-            x_modified_miracle[:,i] = miracle.decoder(i+itr*n, index, k, 
+            x_modified_miracle[:,i] = miracle.decoder(i+itr*n, index, k,
               alpha*epsilon_target, 2**coding_cost)
-        prob_modified_miracle = unbias.unbias_modified_miracle(k, alpha*epsilon_target, 
+        prob_modified_miracle = unbias.unbias_modified_miracle(k, alpha*epsilon_target,
           2**coding_cost, x_modified_miracle.T, n, normalization = 1)
-        modified_miracle_error[itr, step] = np.linalg.norm([p_i - phat_i for p_i, phat_i 
+        modified_miracle_error[itr, step] = np.linalg.norm([p_i - phat_i for p_i, phat_i
           in zip(prob, prob_modified_miracle)], ord=1)
 
     print(time.time() - start_time)
@@ -159,16 +161,16 @@ def evaluate(work_path, config, file_open=open):
       linewidth = 3.0,
       label="MMRC")
   if config.run_ss:
-    line1 = plt.errorbar(vary_space, 
-      [np.mean(ss_error, axis=0)[0]]*len(vary_space), 
-      yerr = [np.std(ss_error, axis=0)[0]/np.sqrt(num_itr)]*len(vary_space), 
+    line1 = plt.errorbar(vary_space,
+      [np.mean(ss_error, axis=0)[0]]*len(vary_space),
+      yerr = [np.std(ss_error, axis=0)[0]/np.sqrt(config.num_itr)]*len(vary_space),
       ls='--',
       linewidth = 3.0,
       label="Subset Selection")
   if config.run_rhr:
-    line2 = plt.errorbar(vary_space, 
-      [np.mean(rhr_error, axis=0)[0]]*len(vary_space), 
-      yerr = [np.std(rhr_error, axis=0)[0]/np.sqrt(num_itr)]*len(vary_space), 
+    line2 = plt.errorbar(vary_space,
+      [np.mean(rhr_error, axis=0)[0]]*len(vary_space),
+      yerr = [np.std(rhr_error, axis=0)[0]/np.sqrt(config.num_itr)]*len(vary_space),
       ls='--',
       linewidth = 3.0,
       label="RHR")
@@ -195,4 +197,4 @@ def evaluate(work_path, config, file_open=open):
 
   if config.run_rhr:
     with file_open(work_path + "/rhr_error.csv", "w") as f:
-      np.savetxt(f, rhr_error, delimiter=",")
\ No newline at end of file
+      np.savetxt(f, rhr_error, delimiter=",")
diff --git a/rcc_dp/frequency_estimation/experiment_test.py b/rcc_dp/frequency_estimation/experiment_test.py
index af731c32..32abd110 100644
--- a/rcc_dp/frequency_estimation/experiment_test.py
+++ b/rcc_dp/frequency_estimation/experiment_test.py
@@ -38,4 +38,4 @@ def test_evaluate_does_not_fail(self):
 
 
 if __name__ == "__main__":
-  absltest.main()
\ No newline at end of file
+  absltest.main()
diff --git a/rcc_dp/frequency_estimation/miracle.py b/rcc_dp/frequency_estimation/miracle.py
index 5f748b54..84c9432c 100644
--- a/rcc_dp/frequency_estimation/miracle.py
+++ b/rcc_dp/frequency_estimation/miracle.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 """Minimal random coding (likelihood decoder) definitions.
 
-This is the code to be used to simulate the minimum random coding algorithm 
-(tailored to subset selection). The algorithm was introduced by Havasi et al. 
-in "Minimal Random Code Learning: Getting Bits Back from Compressed Model 
+This is the code to be used to simulate the minimum random coding algorithm
+(tailored to subset selection). The algorithm was introduced by Havasi et al.
+in "Minimal Random Code Learning: Getting Bits Back from Compressed Model
 Parameters" - https://arxiv.org/pdf/1810.00440.pdf.
 
 For brevity, we may refer to it as 'MIRACLE', although technically this refers
@@ -114,13 +114,13 @@ def get_approx_epsilon(epsilon_target, k, number_candidates, delta):
 
 
 def encode_decode_miracle_fast(seed, x, k, epsilon, number_candidates):
-  """A fast implementation of the miracle protocol -- instead of 
-  generating number_candidates samples, generate one sample with 
+  """A fast implementation of the miracle protocol -- instead of
+  generating number_candidates samples, generate one sample with
   true expectation.
   """
   d = int(np.ceil(k/(np.exp(epsilon)+1)))
   num_cand_in_cap = np.random.binomial(number_candidates, d/k, size=None)
-  pi_in = np.exp(epsilon)/(num_cand_in_cap*np.exp(epsilon) 
+  pi_in = np.exp(epsilon)/(num_cand_in_cap*np.exp(epsilon)
     + (number_candidates-num_cand_in_cap))
   prob_sample_from_cap = num_cand_in_cap*pi_in
 
@@ -140,8 +140,8 @@ def encode_decode_miracle_fast(seed, x, k, epsilon, number_candidates):
 
 
 def encode_decode_modified_miracle_fast(seed, x, k, epsilon, number_candidates):
-  """A fast implementation of the modified miracle protocol -- instead of 
-  generating number_candidates samples, generate one sample with 
+  """A fast implementation of the modified miracle protocol -- instead of
+  generating number_candidates samples, generate one sample with
   true expectation.
   """
   d = int(np.ceil(k/(np.exp(epsilon)+1)))
@@ -154,10 +154,10 @@ def encode_decode_modified_miracle_fast(seed, x, k, epsilon, number_candidates):
   expected_beta = np.ceil(k/(np.exp(epsilon)+1))/k
 
   pi_in = c1/(num_cand_in_cap*c1+(number_candidates-num_cand_in_cap)*c2)
-  tilde_pi_in = pi_in*(1+beta*(np.exp(epsilon)-1))/((1 
+  tilde_pi_in = pi_in*(1+beta*(np.exp(epsilon)-1))/((1
     + expected_beta*(np.exp(epsilon)-1)))
   if beta > expected_beta:
-    tilde_pi_in = tilde_pi_in*(beta+expected_beta*(np.exp(epsilon) 
+    tilde_pi_in = tilde_pi_in*(beta+expected_beta*(np.exp(epsilon)
       - 1))/(beta*np.exp(epsilon))
 
   prob_sample_from_cap = num_cand_in_cap*tilde_pi_in
@@ -174,4 +174,4 @@ def encode_decode_modified_miracle_fast(seed, x, k, epsilon, number_candidates):
     rs = np.random.RandomState(seed)
     rs.shuffle(z)
     z = z[:x] + [0] + z[x:]
-  return np.array(z)
\ No newline at end of file
+  return np.array(z)
diff --git a/rcc_dp/frequency_estimation/rhr.py b/rcc_dp/frequency_estimation/rhr.py
index dad7a0df..8e021f5d 100644
--- a/rcc_dp/frequency_estimation/rhr.py
+++ b/rcc_dp/frequency_estimation/rhr.py
@@ -30,9 +30,9 @@ def encode_string(dim, epsilon, comm, x):
 
   # Pad dim to the power of 2.
   padded_d = int(math.pow(2, math.ceil(math.log(dim, 2))))
-  # Calculate the effective communication cost 
+  # Calculate the effective communication cost
   # i.e., min(comm, log(e^\epsilon)).
-  eff_comm = min(comm, math.ceil(epsilon*math.log(math.e, 2)), 
+  eff_comm = min(comm, math.ceil(epsilon*math.log(math.e, 2)),
     math.ceil(math.log(dim, 2))+1)
   # Set the block size.
   block_size = int(padded_d/math.pow(2, eff_comm-1))
@@ -45,7 +45,7 @@ def encode_string(dim, epsilon, comm, x):
     if get_hadamard_entry(padded_d, j, x_individual) == -1:
       loc_sign[idx] = loc_sign[idx] + 1
 
-  z = rr_encode_string(int(math.pow(2, eff_comm)), epsilon, 
+  z = rr_encode_string(int(math.pow(2, eff_comm)), epsilon,
     np.array(loc_sign))
   return z
 
@@ -57,7 +57,7 @@ def rr_encode_string(alphabet_size, epsilon, samples):
   private_samples_rr = np.copy(samples)
 
   # Determine which samples need to be noised (i.e., flipped).
-  flip = np.random.random_sample(n) < (alphabet_size 
+  flip = np.random.random_sample(n) < (alphabet_size
     - 1)/(math.exp(epsilon) + alphabet_size - 1)
   flip_samples = samples[flip]
 
@@ -79,9 +79,9 @@ def decode_string_fast(dim, epsilon, comm, z, normalization = 1):
   l = len(z)
   # Pad dim to the power of 2.
   padded_d = int(math.pow(2, math.ceil(math.log(dim,2))))
-  # Calculate the effective communication cost 
+  # Calculate the effective communication cost
   # i.e., min(comm, log(e^\epsilon)).
-  eff_comm = min(comm, math.ceil(epsilon*math.log(math.e, 2)), 
+  eff_comm = min(comm, math.ceil(epsilon*math.log(math.e, 2)),
     math.ceil(math.log(dim, 2))+1)
   # Set the block size.
   block_size = int(padded_d/math.pow(2, eff_comm-1))
@@ -92,16 +92,16 @@ def decode_string_fast(dim, epsilon, comm, z, normalization = 1):
   # Create histograms to specify the empirical distributions of each group.
   histograms = np.zeros((block_size, int(padded_d/block_size)))
   for g_idx in range(block_size):
-    g_count,g_axis = np.histogram(group_list[g_idx], 
+    g_count, _ = np.histogram(group_list[g_idx],
       range(int(math.pow(2, eff_comm))+1))
     histograms[g_idx] = g_count[::2] - g_count[1::2]
-    histograms[g_idx] = histograms[g_idx] * (math.exp(epsilon) 
+    histograms[g_idx] = histograms[g_idx] * (math.exp(epsilon)
       + math.pow(2, eff_comm)-1)/(math.exp(epsilon)-1)*(block_size/n)
 
   # Obtain estimator of q.
   q = np.zeros((block_size, int(padded_d/block_size)))
   for j in range(block_size):
-    q[j, :] = fast_inverse_hadamard_transform(int(padded_d/block_size), 
+    q[j, :] = fast_inverse_hadamard_transform(int(padded_d/block_size),
       histograms[j, :])
 
   q = q.reshape((padded_d, ), order = 'F')
@@ -138,4 +138,4 @@ def fast_inverse_hadamard_transform(k, dist):
   trans1 = fast_inverse_hadamard_transform(k//2, dist1)
   trans2 = fast_inverse_hadamard_transform(k//2, dist2)
   trans = np.concatenate((trans1+ trans2, trans1 - trans2))
-  return trans
\ No newline at end of file
+  return trans
diff --git a/rcc_dp/frequency_estimation/ss.py b/rcc_dp/frequency_estimation/ss.py
index e3763741..86ac8e6d 100644
--- a/rcc_dp/frequency_estimation/ss.py
+++ b/rcc_dp/frequency_estimation/ss.py
@@ -24,6 +24,7 @@
 
 
 def probability_project_simplex(p):
+  """Project the probabilitiess on the simplex."""
   k = len(p)  # Infer the size of the alphabet.
   p_sorted = np.sort(p)
   p_sorted[:] = p_sorted[::-1]
@@ -38,15 +39,16 @@ def probability_project_simplex(p):
 
 
 def probability_normalize(p):
+  """Normalize the probabilities so that they sum to 1."""
   p = np.maximum(p,0) # Map it to be positive.
   norm = np.sum(p)
-  p = np.true_divide(p,norm) # Ensure the l_1 norm is one.
+  p = np.true_divide(p,norm)
   return p
 
 
 def encode_string_fast(k, epsilon, x):
-  """A fast implementation of the subset selection protocol -- instead 
-  of selecting exactly d ones, set each bit to be one independently 
+  """A fast implementation of the subset selection protocol -- instead
+  of selecting exactly d ones, set each bit to be one independently
   with true expectation.
   """
   d = int(np.ceil(k/(np.exp(epsilon)+1)))
@@ -79,4 +81,4 @@ def decode_string(k, epsilon, z, length, normalization = 1):
   if normalization == 1:
     # Project on the simplex.
     p_estimate = probability_project_simplex(p_estimate)
-  return p_estimate
\ No newline at end of file
+  return p_estimate
diff --git a/rcc_dp/frequency_estimation/unbias.py b/rcc_dp/frequency_estimation/unbias.py
index b6b199aa..f45b2b3a 100644
--- a/rcc_dp/frequency_estimation/unbias.py
+++ b/rcc_dp/frequency_estimation/unbias.py
@@ -33,7 +33,7 @@ def unbias_miracle(k, epsilon, number_candidates, z, n, normalization = 1):
     number_candidates: The number of candidates to be sampled.
     z: The privatized data.
     n: The number of users.
-    normalization: Indicator whether to clip and normalize or 
+    normalization: Indicator whether to clip and normalize or
     project on the simplex
 
   Returns:
@@ -45,7 +45,7 @@ def unbias_miracle(k, epsilon, number_candidates, z, n, normalization = 1):
   beta = np.array(range(number_candidates+1))/number_candidates
   # The probability with which you choose a candidate inside the cap.
   pi_in = 1/number_candidates*(c1/(beta*c1+(1-beta)*c2))
-  expectation = np.sum(stats.binom.pmf(range(number_candidates+1), 
+  expectation = np.sum(stats.binom.pmf(range(number_candidates+1),
     number_candidates, d/k)*range(number_candidates+1)*pi_in)
   b_hat = (d - expectation)/(k-1)
   m_hat = k*expectation/(k-1) - d/(k-1)
@@ -61,7 +61,7 @@ def unbias_miracle(k, epsilon, number_candidates, z, n, normalization = 1):
   return p_estimate
 
 
-def unbias_modified_miracle(k, epsilon, number_candidates, z, n, 
+def unbias_modified_miracle(k, epsilon, number_candidates, z, n,
   normalization = 1):
   """Get the unbiased estimate for modified miracle.
 
@@ -71,7 +71,7 @@ def unbias_modified_miracle(k, epsilon, number_candidates, z, n,
     number_candidates: The number of candidates to be sampled.
     z: The privatized data.
     n: The number of users.
-    normalization: Indicator whether to clip and normalize or 
+    normalization: Indicator whether to clip and normalize or
     project on the simplex
 
   Returns:
@@ -96,7 +96,7 @@ def unbias_modified_miracle(k, epsilon, number_candidates, z, n,
   # The probability with which you choose a candidate inside the cap.
   tilde_pi_in = indicator * tilde_pi_in_1 + (1 - indicator) * tilde_pi_in_2
   expectation = np.sum(
-      stats.binom.pmf(range(number_candidates + 1), number_candidates, 
+      stats.binom.pmf(range(number_candidates + 1), number_candidates,
         d / k) * range(number_candidates + 1) * tilde_pi_in)
   b_tilde = (d - expectation)/(k-1)
   m_tilde = k*expectation/(k-1) - d/(k-1)
@@ -109,4 +109,4 @@ def unbias_modified_miracle(k, epsilon, number_candidates, z, n,
   elif normalization == 1:
     # Project on the simplex.
     p_estimate = ss.probability_project_simplex(p_estimate)
-  return p_estimate
\ No newline at end of file
+  return p_estimate
diff --git a/rcc_dp/mean_estimation/experiment.py b/rcc_dp/mean_estimation/experiment.py
index 4dd401b7..5d0c01e6 100644
--- a/rcc_dp/mean_estimation/experiment.py
+++ b/rcc_dp/mean_estimation/experiment.py
@@ -11,17 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Experiment definitions (i.e., evaluation of miracle, sqkr, privunit methods 
-when either the data dimension d, the number of users n, or the privacy 
+"""Experiment definitions (i.e., evaluation of miracle, sqkr, privunit methods
+when either the data dimension d, the number of users n, or the privacy
 parameter epsilon is varied)."""
 
 import json
 import math
 import time
 import matplotlib
-matplotlib.rcParams['ps.useafm'] = True
-matplotlib.rcParams['pdf.use14corefonts'] = True
-matplotlib.rcParams['text.usetex'] = True
 import matplotlib.pyplot as plt
 import matplotlib.lines as mlines
 import numpy as np
@@ -31,6 +28,9 @@
 from rcc_dp.mean_estimation import modify_pi
 from rcc_dp.mean_estimation import privunit
 from rcc_dp.mean_estimation import sqkr
+matplotlib.rcParams['ps.useafm'] = True
+matplotlib.rcParams['pdf.use14corefonts'] = True
+matplotlib.rcParams['text.usetex'] = True
 
 
 def evaluate(work_path, config, file_open=open):
diff --git a/rcc_dp/mean_estimation/experiment_coding_cost.py b/rcc_dp/mean_estimation/experiment_coding_cost.py
index 21dceb28..fc037ced 100644
--- a/rcc_dp/mean_estimation/experiment_coding_cost.py
+++ b/rcc_dp/mean_estimation/experiment_coding_cost.py
@@ -11,16 +11,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Experiment definitions (i.e., evaluation of miracle, sqkr, privunit methods 
+"""Experiment definitions (i.e., evaluation of miracle, sqkr, privunit methods
 when the coding cost is varied)."""
 
 import json
 import math
 import time
 import matplotlib
-matplotlib.rcParams['ps.useafm'] = True
-matplotlib.rcParams['pdf.use14corefonts'] = True
-matplotlib.rcParams['text.usetex'] = True
 import matplotlib.pyplot as plt
 import matplotlib.lines as mlines
 import numpy as np
@@ -30,6 +27,9 @@
 from rcc_dp.mean_estimation import modify_pi
 from rcc_dp.mean_estimation import privunit
 from rcc_dp.mean_estimation import sqkr
+matplotlib.rcParams['ps.useafm'] = True
+matplotlib.rcParams['pdf.use14corefonts'] = True
+matplotlib.rcParams['text.usetex'] = True
 
 
 def evaluate(work_path, config, file_open=open):
@@ -39,7 +39,6 @@ def evaluate(work_path, config, file_open=open):
 
   start_time = time.time()
 
-  delta = config.delta
   budget = config.budget
   alpha = config.alpha
   # Get default values.
@@ -57,7 +56,7 @@ def evaluate(work_path, config, file_open=open):
 
   sqkr_coding_cost = epsilon_target
 
-  for itr in range(num_itr):
+  for itr in range(config.num_itr):
     print("itr = %d" % itr)
     print("epsilon target = " + str(epsilon_target))
     print("n = " + str(n))
@@ -68,20 +67,20 @@ def evaluate(work_path, config, file_open=open):
       print("alpha = " + str(alpha))
 
     if config.data == "unbiased_data":
-        x = np.random.normal(0, 1, (d, n))
-        x /= np.linalg.norm(x, axis=0)
-      elif config.data == "biased_data":
-        x = np.zeros((d, n))
-        x[:, 0::2] = np.random.normal(10, 1, (d, (n + 1) // 2))
-        x[:, 1::2] = np.random.normal(1, 1, (d, n // 2))
-        x /= np.linalg.norm(x, axis=0)
-      elif config.data == "same_data":
-        x = np.random.normal(0, 1, (d, 1))
-        x /= np.linalg.norm(x, axis=0)
-        x = np.repeat(x, n, axis=1)
-      else:
-        raise ValueError(
-            "data should be either be biased_data, unbiased_data, same_data.")
+      x = np.random.normal(0, 1, (d, n))
+      x /= np.linalg.norm(x, axis=0)
+    elif config.data == "biased_data":
+      x = np.zeros((d, n))
+      x[:, 0::2] = np.random.normal(10, 1, (d, (n + 1) // 2))
+      x[:, 1::2] = np.random.normal(1, 1, (d, n // 2))
+      x /= np.linalg.norm(x, axis=0)
+    elif config.data == "same_data":
+      x = np.random.normal(0, 1, (d, 1))
+      x /= np.linalg.norm(x, axis=0)
+      x = np.repeat(x, n, axis=1)
+    else:
+      raise ValueError(
+          "data should be either be biased_data, unbiased_data, same_data.")
 
     if config.run_privunit:
       x_privunit, _ = privunit.apply_privunit(x, epsilon_target, budget)
@@ -102,7 +101,7 @@ def evaluate(work_path, config, file_open=open):
 
     for step, vary_parameter in enumerate(vary_space):
       coding_cost = vary_parameter
-      print("coding cost = %d" % coding_cost)    
+      print("coding cost = %d" % coding_cost)
 
       if config.run_modified_miracle:
         x_modified_miracle = np.zeros((d, n))
@@ -124,9 +123,9 @@ def evaluate(work_path, config, file_open=open):
     print(time.time() - start_time)
 
   print("--------------")
-  if config.run_approx_miracle:
+  if config.run_modified_miracle:
     print("approx miracle mse:")
-    print(np.mean(approx_miracle_mse, axis=0))
+    print(np.mean(modified_miracle_mse, axis=0))
   if config.run_privunit:
     print("privunit mse:")
     print(np.mean(privunit_mse, axis=0))
@@ -140,20 +139,20 @@ def evaluate(work_path, config, file_open=open):
     plt.errorbar(
         vary_space,
         np.mean(modified_miracle_mse, axis=0),
-        yerr=np.std(modified_miracle_mse, axis=0)/np.sqrt(num_itr),
+        yerr=np.std(modified_miracle_mse, axis=0)/np.sqrt(config.num_itr),
         linewidth = 3.0,
         label="MMRC")
   if config.run_privunit:
-    line1 = plt.errorbar(vary_space, 
-      [np.mean(privunit_mse, axis=0)[0]]*len(vary_space), 
-      yerr = [np.std(privunit_mse, axis=0)[0]/np.sqrt(num_itr)]*len(vary_space), 
+    line1 = plt.errorbar(vary_space,
+      [np.mean(privunit_mse, axis=0)[0]]*len(vary_space),
+      yerr = [np.std(privunit_mse, axis=0)[0]/np.sqrt(config.num_itr)]*len(vary_space),
       ls='--',
       linewidth = 3.0,
       label="PrivUnit$_{2}$")
   if config.run_sqkr:
-    line2 = plt.errorbar(vary_space, 
-      [np.mean(sqkr_mse, axis=0)[0]]*len(vary_space), 
-      yerr = [np.std(sqkr_mse, axis=0)[0]/np.sqrt(num_itr)]*len(vary_space), 
+    line2 = plt.errorbar(vary_space,
+      [np.mean(sqkr_mse, axis=0)[0]]*len(vary_space),
+      yerr = [np.std(sqkr_mse, axis=0)[0]/np.sqrt(config.num_itr)]*len(vary_space),
       ls='--',
       linewidth = 3.0,
       label="SQKR")
@@ -180,4 +179,4 @@ def evaluate(work_path, config, file_open=open):
 
   if config.run_sqkr:
     with file_open(work_path + "/sqkr_mse.csv", "w") as f:
-      np.savetxt(f, sqkr_mse, delimiter=",")
\ No newline at end of file
+      np.savetxt(f, sqkr_mse, delimiter=",")

From 441630e18063ccad5206837fbfeb330318f15235 Mon Sep 17 00:00:00 2001
From: Abhin Shah <abhinshah02@gmail.com>
Date: Wed, 23 Feb 2022 20:43:31 -0500
Subject: [PATCH 4/4] Fixing more pylint issues

---
 .../experiment_coding_cost.py                 |  7 +++--
 rcc_dp/frequency_estimation/miracle.py        | 26 +++++++++++++++++--
 rcc_dp/frequency_estimation/rhr.py            |  7 +++--
 rcc_dp/frequency_estimation/ss.py             | 12 ++++-----
 .../mean_estimation/experiment_coding_cost.py |  7 +++--
 5 files changed, 38 insertions(+), 21 deletions(-)

diff --git a/rcc_dp/frequency_estimation/experiment_coding_cost.py b/rcc_dp/frequency_estimation/experiment_coding_cost.py
index b017245b..5f4dfbfa 100644
--- a/rcc_dp/frequency_estimation/experiment_coding_cost.py
+++ b/rcc_dp/frequency_estimation/experiment_coding_cost.py
@@ -11,8 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Experiment definitions (i.e., evaluation of miracle, rhr, subset selection
-methods when the coding cost is varied)."""
+"""Evaluation of miracle, rhr, and subset selection when coding cost is varied."""
 
 import json
 import math
@@ -161,14 +160,14 @@ def evaluate(work_path, config, file_open=open):
       linewidth = 3.0,
       label="MMRC")
   if config.run_ss:
-    line1 = plt.errorbar(vary_space,
+    plt.errorbar(vary_space,
       [np.mean(ss_error, axis=0)[0]]*len(vary_space),
       yerr = [np.std(ss_error, axis=0)[0]/np.sqrt(config.num_itr)]*len(vary_space),
       ls='--',
       linewidth = 3.0,
       label="Subset Selection")
   if config.run_rhr:
-    line2 = plt.errorbar(vary_space,
+    plt.errorbar(vary_space,
       [np.mean(rhr_error, axis=0)[0]]*len(vary_space),
       yerr = [np.std(rhr_error, axis=0)[0]/np.sqrt(config.num_itr)]*len(vary_space),
       ls='--',
diff --git a/rcc_dp/frequency_estimation/miracle.py b/rcc_dp/frequency_estimation/miracle.py
index 84c9432c..89c8a648 100644
--- a/rcc_dp/frequency_estimation/miracle.py
+++ b/rcc_dp/frequency_estimation/miracle.py
@@ -117,6 +117,16 @@ def encode_decode_miracle_fast(seed, x, k, epsilon, number_candidates):
   """A fast implementation of the miracle protocol -- instead of
   generating number_candidates samples, generate one sample with
   true expectation.
+
+  Args:
+    seed: The random seed to be used by the encoder.
+    x: The user input data.
+    k: The dimension of the one-hot encoded x.
+    epsilon: The privacy parameter epsilon.
+    number_candidates: The number of candidates to be sampled.
+
+  Returns:
+    z: The candidate sampled at the decoder.
   """
   d = int(np.ceil(k/(np.exp(epsilon)+1)))
   num_cand_in_cap = np.random.binomial(number_candidates, d/k, size=None)
@@ -136,13 +146,24 @@ def encode_decode_miracle_fast(seed, x, k, epsilon, number_candidates):
     rs = np.random.RandomState(seed)
     rs.shuffle(z)
     z = z[:x] + [0] + z[x:]
-  return np.array(z)
+  z = np.array(z)
+  return z
 
 
 def encode_decode_modified_miracle_fast(seed, x, k, epsilon, number_candidates):
   """A fast implementation of the modified miracle protocol -- instead of
   generating number_candidates samples, generate one sample with
   true expectation.
+
+  Args:
+    seed: The random seed to be used by the encoder.
+    x: The user input data.
+    k: The dimension of the one-hot encoded x.
+    epsilon: The privacy parameter epsilon.
+    number_candidates: The number of candidates to be sampled.
+
+  Returns:
+    z: The candidate sampled at the decoder.
   """
   d = int(np.ceil(k/(np.exp(epsilon)+1)))
   c1 = np.exp(epsilon)/(sc.comb(k-1,d-1)* np.exp(epsilon) + sc.comb(k-1,d))
@@ -174,4 +195,5 @@ def encode_decode_modified_miracle_fast(seed, x, k, epsilon, number_candidates):
     rs = np.random.RandomState(seed)
     rs.shuffle(z)
     z = z[:x] + [0] + z[x:]
-  return np.array(z)
+  z = np.array(z)
+  return z
diff --git a/rcc_dp/frequency_estimation/rhr.py b/rcc_dp/frequency_estimation/rhr.py
index 8e021f5d..f783f705 100644
--- a/rcc_dp/frequency_estimation/rhr.py
+++ b/rcc_dp/frequency_estimation/rhr.py
@@ -73,9 +73,7 @@ def rr_encode_string(alphabet_size, epsilon, samples):
 
 
 def decode_string_fast(dim, epsilon, comm, z, normalization = 1):
-  """Learn the original distribution from the privatized strings when
-  the input is a matrix consisting of all the bit vectors.
-  """
+  """Learn the original distribution from the privatized strings faster."""
   l = len(z)
   # Pad dim to the power of 2.
   padded_d = int(math.pow(2, math.ceil(math.log(dim,2))))
@@ -87,6 +85,7 @@ def decode_string_fast(dim, epsilon, comm, z, normalization = 1):
   block_size = int(padded_d/math.pow(2, eff_comm-1))
   n = int(l/block_size)*block_size
 
+  # The input z is a matrix consisting of all the bit vectors.
   group_list = np.array(z[:n]).reshape(int(n/block_size), block_size).T
 
   # Create histograms to specify the empirical distributions of each group.
@@ -130,7 +129,7 @@ def get_hadamard_entry(d, x, y):
 
 
 def fast_inverse_hadamard_transform(k, dist):
-  """ Performs inverse Hadamard transform."""
+  """Performs inverse Hadamard transform."""
   if k == 1:
     return dist
   dist1 = dist[0 : k//2]
diff --git a/rcc_dp/frequency_estimation/ss.py b/rcc_dp/frequency_estimation/ss.py
index 86ac8e6d..572f42ad 100644
--- a/rcc_dp/frequency_estimation/ss.py
+++ b/rcc_dp/frequency_estimation/ss.py
@@ -47,10 +47,7 @@ def probability_normalize(p):
 
 
 def encode_string_fast(k, epsilon, x):
-  """A fast implementation of the subset selection protocol -- instead
-  of selecting exactly d ones, set each bit to be one independently
-  with true expectation.
-  """
+  """A fast implementation of the subset selection protocol."""
   d = int(np.ceil(k/(np.exp(epsilon)+1)))
   p = d*np.exp(epsilon)/(d*np.exp(epsilon)+k-d)
   q = (d-p)/(k-1)
@@ -59,20 +56,21 @@ def encode_string_fast(k, epsilon, x):
   z = np.zeros((n, k))
   flip = np.random.random_sample((n, k))
 
+  # Instead of selecting exactly d ones, set each bit to be one 
+  # independently with true expectation.
   for i in range(n):
     z[i,x[i]] = np.logical_or(0,flip[i,x[i]] < p)
   return np.logical_or(z, flip < q)
 
 def decode_string(k, epsilon, z, length, normalization = 1):
-  """Learn the original distribution from the privatized strings when
-  the input is a matrix consisting of all the bit vectors.
-  """
+  """Learn the original distribution from the privatized strings faster."""
   d = int(np.ceil(k/(np.exp(epsilon)+1)))
 
   temp1 = ((k-1)*np.exp(epsilon)
     +1.0*(k-1)*(k-d)/d)/((k-d)*(np.exp(epsilon)-1))
   temp2 = ((d-1)*np.exp(epsilon)+k-d) / (1.0*(k-d)*(np.exp(epsilon)-1))
 
+  # The input z is a matrix consisting of all the bit vectors.
   p_estimate = (1.0*np.sum(z, axis=0)*temp1/length)-temp2
 
   if normalization == 0:
diff --git a/rcc_dp/mean_estimation/experiment_coding_cost.py b/rcc_dp/mean_estimation/experiment_coding_cost.py
index fc037ced..c8bf74e2 100644
--- a/rcc_dp/mean_estimation/experiment_coding_cost.py
+++ b/rcc_dp/mean_estimation/experiment_coding_cost.py
@@ -11,8 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Experiment definitions (i.e., evaluation of miracle, sqkr, privunit methods
-when the coding cost is varied)."""
+"""Evaluation of miracle, sqkr, and privunit when coding cost is varied."""
 
 import json
 import math
@@ -143,14 +142,14 @@ def evaluate(work_path, config, file_open=open):
         linewidth = 3.0,
         label="MMRC")
   if config.run_privunit:
-    line1 = plt.errorbar(vary_space,
+    plt.errorbar(vary_space,
       [np.mean(privunit_mse, axis=0)[0]]*len(vary_space),
       yerr = [np.std(privunit_mse, axis=0)[0]/np.sqrt(config.num_itr)]*len(vary_space),
       ls='--',
       linewidth = 3.0,
       label="PrivUnit$_{2}$")
   if config.run_sqkr:
-    line2 = plt.errorbar(vary_space,
+    plt.errorbar(vary_space,
       [np.mean(sqkr_mse, axis=0)[0]]*len(vary_space),
       yerr = [np.std(sqkr_mse, axis=0)[0]/np.sqrt(config.num_itr)]*len(vary_space),
       ls='--',