diff --git a/analysis/R/assoc.R b/analysis/R/assoc.R
new file mode 100755
index 00000000..662ab1f6
--- /dev/null
+++ b/analysis/R/assoc.R
@@ -0,0 +1,185 @@
+#!/usr/bin/env Rscript
+#
+# Copyright 2015 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Reads map files, report files, and RAPPOR parameters to run
+# an EM algorithm to estimate joint distribution over two or more variables
+#
+# Usage:
+#       $ ./assoc.R --inp <JSON file>
+#
+# Input: JSON file with the following fields
+#        "maps" for map files of each var
+#        "reports" for a list of reports
+#        "counts" for 2 way marginal counts, individual marginal counts 
+#                 respectively
+#        "params" for params file with RAPPOR params
+#        "csv_out" for a file name into which results will be written
+#                 as comma separated values
+#
+# Output: A table with joint distribution to stdout and csv file with results
+
+library("jsonlite")
+library("optparse")
+
+options(stringsAsFactors = FALSE)
+
+if(!interactive()) {
+  option_list <- list(
+    make_option(c("--inp"), default = "inp.json",
+                help = "JSON file with inputs for assoc.R"))
+  opts <- parse_args(OptionParser(option_list = option_list))
+}
+
+source("analysis/R/decode2way.R")
+source("analysis/R/encode.R")
+source("analysis/R/decode.R")
+source("analysis/R/simulation.R")
+source("analysis/R/read_input.R")
+source("analysis/R/association.R")
+source("tests/gen_counts.R")
+source("tests/compare_assoc.R")  # For CombineMaps; it should be moved elsewhere
+
+TwoWayAlg <- function(inp) {
+  ptm <- proc.time()
+  params <- ReadParameterFile(inp$params)
+  # Ensure sufficient maps as required by number of vars
+  # Correct map from ReadMapFile() for assoc analysis
+  stopifnot(inp$numvars == length(inp$maps))
+  map <- lapply(inp$maps, function(o)
+    CorrectMapForAssoc(ReadMapFile(o, params = params),
+                       params = params))
+  
+  # (2 way counts, marginal 1 counts, marginal 2 counts)
+  counts <- lapply(1:3, function(i) ReadCountsFile(inp$counts[[i]]))
+  
+  # TODO: account for different parameters across different variables
+  params2 <- params
+  params2$k <- (params$k ** 2) * 4
+  
+  # Prune candidates
+  fit <- lapply(1:2, function(i)
+    Decode(counts[[i + 1]],
+           map[[i]]$rmap,
+           params, quick = FALSE)$fit)
+  
+  found_strings = list(fit[[1]][,"string"], fit[[2]][,"string"])
+  
+  if (length(found_strings[[1]]) == 0 || length(found_strings[[2]]) == 0) {
+    stop("No strings found in 1-way marginal.")
+  }
+  
+  # Combine maps to feed into Decode2Way
+  # Prune first to found_strings from Decode on 1-way counts
+  pruned <- lapply(1:2, function(i)
+    lapply(map[[i]]$map, function(z) z[,found_strings[[i]], drop = FALSE]))
+  crmap <- CombineMaps(pruned[[1]], pruned[[2]])$crmap
+  marginal <- Decode2Way(counts[[1]], crmap, params2, fit = fit)$fit
+  
+  # Reconstruct 2-way table from marginals
+  ed <- matrix(0, nrow = length(found_strings[[1]]), ncol = length(found_strings[[2]]))
+  colnames(ed) <- found_strings[[2]]
+  rownames(ed) <- found_strings[[1]]
+  for (cols in found_strings[[2]]) {
+    for (rows in found_strings[[1]]) {
+      ed[rows, cols] <- marginal[paste(rows, cols, sep = "x"), "Estimate"]
+    }
+  }
+  ed[is.na(ed)] <- 0
+  ed[ed<0] <- 0
+  
+  time_taken <- proc.time() - ptm
+  print("Two Way Algorithm Results")
+  print(ed[order(-rowSums(ed)), order(-colSums(ed))])
+  if(inp$time == TRUE)
+    print(time_taken)
+}
+
+EMAlg <- function(inp) {
+  ptm <- proc.time()
+  params <- ReadParameterFile(inp$params)
+  # Ensure sufficient maps as required by number of vars
+  stopifnot(inp$numvars == length(inp$maps))
+  # Correct map from ReadMapFile() for assoc analysis
+  map <- lapply(inp$maps, function(o)
+    CorrectMapForAssoc(LoadMapFile(o, params = params),
+                       params = params))
+  
+  # For BASIC only
+  m1 <- lapply(1:params$m, function(z) {
+    m <- sparseMatrix(c(1), c(2), dims = c(1, 2))
+    colnames(m) <- c("FALSE", "TRUE")
+    m
+  })
+  m2 <- sparseMatrix(1:params$m, rep(2, params$m))
+  colnames(m2) <- colnames(m1[[1]])
+  map[[2]]$map <- m1
+  map[[2]]$rmap <- m2
+  
+  # Reports must be of the format
+  #     client name, cohort no, rappor bitstring 1, rappor bitstring 2, ...
+  reportsObj <- read.csv(inp$reports,
+                         colClasses = c("character", "integer",
+                                        rep("character", inp$numvars)),
+                         header = TRUE)
+  # Ignore the first column
+  reportsObj <- reportsObj[,-1]
+  
+  params = list(params, params)
+  params[[2]]$k = 1
+  
+  # Parsing reportsObj
+  # ComputeDistributionEM allows for different sets of cohorts
+  # for each variable. Here, both sets of cohorts are identical
+  co <- as.list(reportsObj[1])[[1]]
+  co <- co + 1  # 1 indexing
+  cohorts <- rep(list(co), inp$numvars)
+  # Parse reports from reportObj cols 2, 3, ...
+  reports <- lapply(1:inp$numvars, function(x) as.list(reportsObj[x + 1]))
+  
+  # Split strings into bit arrays (as required by assoc analysis)
+  reports <- lapply(1:inp$numvars, function(i) {
+    # apply the following function to each of reports[[1]] and reports[[2]]
+    lapply(reports[[i]][[1]], function(x) {
+      # function splits strings and converts them to numeric values
+      # rev needed for endianness
+      rev(as.numeric(strsplit(x, split = "")[[1]]))
+    })
+  })
+  
+  joint_dist <- ComputeDistributionEM(reports, cohorts, map,
+                                      ignore_other = FALSE,
+                                      quick = TRUE,
+                                      params, marginals = NULL,
+                                      estimate_var = FALSE,
+                                      verbose = inp$time)
+  em <- joint_dist$fit
+  time_taken <- proc.time() - ptm
+  print("EM Algorithm Results")
+  print(em[order(-rowSums(em)), order(-colSums(em))])
+  if(inp$time == TRUE)
+    print(time_taken)
+}
+
+main <- function(opts) {
+  inp <- fromJSON(opts$inp)
+  TwoWayAlg(inp)
+  if(inp$also_em == TRUE)
+    EMAlg(inp)
+}
+
+if(!interactive()) {
+  main(opts)
+}
\ No newline at end of file
diff --git a/analysis/R/association.R b/analysis/R/association.R
index 422fc634..328d3292 100644
--- a/analysis/R/association.R
+++ b/analysis/R/association.R
@@ -44,7 +44,7 @@ GetOtherProbs <- function(counts, map, marginal, params) {
   # Counts to remove from each cohort.
   top_counts <- ceiling(marginal$proportion * N / params$m)
   sum_top <- sum(top_counts)
-  candidate_map <- lapply(map, function(x) x[, candidate_strings])
+  candidate_map <- lapply(map, function(x) x[, candidate_strings, drop = FALSE])
 
   # Counts set by known strings without noise considerations.
   if (length(marginal) > 0) {
@@ -63,6 +63,10 @@ GetOtherProbs <- function(counts, map, marginal, params) {
   pstar <- (1 - f / 2) * p + (f / 2) * q
   top_counts_cohort <- (sum_top - top_counts_cohort) * pstar +
       top_counts_cohort * qstar
+  
+  # Adjustment for basic rappor
+  if(nrow(top_counts_cohort) == 1) 
+    top_counts_cohort <- t(top_counts_cohort)
   top_counts_cohort <- cbind(sum_top, top_counts_cohort)
 
   # Counts set by the "other" category.
@@ -72,6 +76,9 @@ GetOtherProbs <- function(counts, map, marginal, params) {
   props_other[props_other > 1] <- 1
   props_other[is.nan(props_other)] <- 0
   props_other[is.infinite(props_other)] <- 0
+  # Adjustmet for basic rappor
+  if(is.null(nrow(props_other)))
+    props_other <- t(props_other)
   as.list(as.data.frame(props_other))
 }
 
@@ -213,8 +220,15 @@ EM <- function(cond_prob, starting_pij = NULL, estimate_var = FALSE,
   if (nrow(pij[[1]]) > 0) {
     # Run EM
     for (i in 1:max_iter) {
+      if (i == 1) {
+        ptm_iter <- proc.time()
+      }
       pij[[i + 1]] <- UpdatePij(pij[[i]], cond_prob)
       dif <- max(abs(pij[[i + 1]] - pij[[i]]))
+      if (i == 1) {
+        PrintIfVerbose("ONE ITERATION", verbose)
+        PrintIfVerbose(proc.time() - ptm_iter, verbose)
+      }
       if (dif < epsilon) {
         break
       }
@@ -283,9 +297,10 @@ UpdateJointConditional <- function(cond_report_dist, joint_conditional = NULL) {
 
 ComputeDistributionEM <- function(reports, report_cohorts,
                                   maps, ignore_other = FALSE,
-                                  params,
+                                  params, quick = FALSE,
                                   marginals = NULL,
-                                  estimate_var = FALSE) {
+                                  estimate_var = FALSE,
+                                  verbose = FALSE) {
   # Computes the distribution of num_variables variables, where
   #     num_variables is chosen by the client, using the EM algorithm.
   #
@@ -312,8 +327,10 @@ ComputeDistributionEM <- function(reports, report_cohorts,
   # Compute the counts for each variable and then do conditionals.
   joint_conditional = NULL
   found_strings <- list()
+  cd_for_reports <- list()
 
   for (j in (1:num_variables)) {
+    ptm <- proc.time()
     variable_report <- reports[[j]]
     variable_cohort <- report_cohorts[[j]]
     map <- maps[[j]]
@@ -321,8 +338,12 @@ ComputeDistributionEM <- function(reports, report_cohorts,
     # Compute the probability of the "other" category
     variable_counts <- NULL
     if (is.null(marginals)) {
-      variable_counts <- ComputeCounts(variable_report, variable_cohort, params)
-      marginal <- Decode(variable_counts, map$rmap, params, quiet = TRUE)$fit
+      ptm2 <- proc.time()
+      variable_counts <- ComputeCounts(variable_report, variable_cohort, params[[j]])
+      marginal <- Decode(variable_counts, map$rmap, params[[j]], quick,
+                         quiet = TRUE)$fit
+      PrintIfVerbose("TIME IN MARGINALS", verbose)
+      PrintIfVerbose(proc.time() - ptm2, verbose)
       if (nrow(marginal) == 0) {
         return (NULL)
       }
@@ -332,14 +353,14 @@ ComputeDistributionEM <- function(reports, report_cohorts,
     found_strings[[j]] <- marginal$string
 
     if (ignore_other) {
-      prob_other <- vector(mode = "list", length = params$m)
+      prob_other <- vector(mode = "list", length = params[[j]]$m)
     } else {
       if (is.null(variable_counts)) {
         variable_counts <- ComputeCounts(variable_report, variable_cohort,
-                                         params)
+                                         params[[j]])
       }
       prob_other <- GetOtherProbs(variable_counts, map$map, marginal,
-                                  params)
+                                  params[[j]])
       found_strings[[j]] <- c(found_strings[[j]], "Other")
     }
 
@@ -348,7 +369,7 @@ ComputeDistributionEM <- function(reports, report_cohorts,
       idx <- variable_cohort[i]
       rep <- GetCondProb(variable_report[[i]],
                          candidate_strings = rownames(marginal),
-                         params = params,
+                         params = params[[j]],
                          map$map[[idx]],
                          prob_other[[idx]])
       rep
@@ -356,14 +377,19 @@ ComputeDistributionEM <- function(reports, report_cohorts,
 
     # Update the joint conditional distribution of all variables
     joint_conditional <- UpdateJointConditional(cond_report_dist,
-                                                joint_conditional)
+                                              joint_conditional)
+    PrintIfVerbose("TIME IN COND_REPORT_DIST", verbose)
+    PrintIfVerbose(proc.time()-ptm, verbose)
   }
 
+  ptm <- proc.time()
   # Run expectation maximization to find joint distribution
   em <- EM(joint_conditional, epsilon = 10 ^ -6, verbose = FALSE,
            estimate_var = estimate_var)
+  PrintIfVerbose("TIME IN EM", verbose)
+  PrintIfVerbose(proc.time() - ptm, verbose)
   dimnames(em$est) <- found_strings
+
   # Return results in a usable format
   list(fit = em$est, sd = em$sd, em = em)
-
 }
diff --git a/analysis/R/decode.R b/analysis/R/decode.R
index fe314cd9..626274e2 100644
--- a/analysis/R/decode.R
+++ b/analysis/R/decode.R
@@ -74,9 +74,14 @@ EstimateBloomCounts <- function(params, obs_counts) {
 
   # Transform counts from absolute values to fractional, removing bias due to
   #      variability of reporting between cohorts.
-  ests <- apply(ests, 1, function(x) x / obs_counts[,1])
-  stds <- apply(variances^.5, 1, function(x) x / obs_counts[,1])
-
+  if (ncol(obs_counts) == 2) {
+    ests <- apply(t(ests), 1, function(x) x / obs_counts[,1])
+    stds <- apply(t(variances^.5), 1, function(x) x / obs_counts[,1])
+  } else {
+    ests <- apply((ests), 1, function(x) x / obs_counts[,1])
+    stds <- apply((variances^.5), 1, function(x) x / obs_counts[,1])
+  }
+  
   # Some estimates may be set to infinity, e.g. if f=1. We want to
   #     account for this possibility, and set the corresponding counts
   #     to 0.
diff --git a/analysis/R/decode2way.R b/analysis/R/decode2way.R
new file mode 100644
index 00000000..e8b546fa
--- /dev/null
+++ b/analysis/R/decode2way.R
@@ -0,0 +1,174 @@
+# Copyright 2015 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# This library implements RAPPOR decoding algorithms for 2 way association.
+#
+
+library(limSolve)
+source("analysis/R/decode.R")
+
+EstimateBloomCounts2Way <- function(params, obs_counts) {
+  # Estimates original bloom filter counts of each pair of bits
+  # in the original bloom filters of each report
+  #
+  # Input:
+  #    params: a list of RAPPOR parameters:
+  #            k - size of a Bloom filter
+  #            h - number of hash functions
+  #            m - number of cohorts
+  #            p - P(IRR = 1 | PRR = 0)
+  #            q - P(IRR = 1 | PRR = 1)
+  #            f - Proportion of bits in the Bloom filter that are set randomly
+  #                to 0 or 1 regardless of the underlying true bit value
+  #    obs_counts: a matrix of size m by (4k^2 + 1). Column one contains sample
+  #                sizes for each cohort. Other counts indicated how many times
+  #                pairs of bits {11, 10, 01, 00} were set across the two
+  #                reports (in a "1st report"-major order)
+  #
+  # Output:
+  #    ests: a matrix of size m by 4k**2 with estimated counts
+  #    stds: currently, just a filler value of 100
+  
+  p <- params$p
+  q <- params$q
+  f <- params$f
+  m <- params$m
+  k <- params$k
+  
+  stopifnot(m == nrow(obs_counts), params$k + 1 == ncol(obs_counts))
+  
+  p11 <- q * (1 - f/2) + p * f / 2  # probability of a true 1 reported as 1
+  p01 <- p * (1 - f/2) + q * f / 2  # probability of a true 0 reported as 1
+  p10 <- 1 - p11  # probability of a true 1 reported as 0
+  p00 <- 1 - p01  # probability of a true 0 reported as 0
+  
+  # The NoiseMatrix describes the probability that input pairs of bits
+  # are mapped to outputs {11, 10, 01, 00} due to noise added by RAPPOR
+  NoiseMatrix <- matrix(rep(0, 16), 4)
+  NoiseMatrix[1,] <- c(p11**2, p11*p10, p10*p11, p10**2)
+  NoiseMatrix[2,] <- c(p11*p01, p11*p00, p10*p01, p10*p00)
+  NoiseMatrix[3,] <- c(p01*p11, p01*p10, p00*p11, p00*p01)
+  NoiseMatrix[4,] <- c(p01**2, p00*p01, p01*p00, p00**2)
+  # Invert NoiseMatrix for estimator
+  InvNoiseMatrix <- t(solve(NoiseMatrix))
+  
+  # Apply the inverse of NoiseMatrix to get an unbiased estimator for
+  # the number of times input pairs of bits were seen.
+  # Apply the matrix to 4 values at a time from obs_counts
+  ests <- apply(obs_counts, 1, function(x) {
+    N <- x[1]
+    inds <- seq(0, (k/4)-1)
+    v <- x[-1]
+    sapply(inds, function(i){
+      as.vector(InvNoiseMatrix %*% v[(i*4 + 1):((i+1)*4)])
+    })
+  })
+  
+  # Transform counts from absolute values to fractional, removing bias due to
+  #      variability of reporting between cohorts.
+  ests <- apply(ests, 1, function(x) x / obs_counts[,1])
+  # TODO: compute stddev in distribution induced by estimation
+  # stds <- apply(variances^.5, 1, function(x) x / obs_counts[,1])
+  
+  # Some estimates may be set to infinity, e.g. if f=1. We want to
+  #     account for this possibility, and set the corresponding counts
+  #     to 0.
+  ests[abs(ests) == Inf] <- 0
+  
+  list(estimates = ests,
+       stds = matrix(rep(100, length(ests[,1]) * length(ests[1,])),
+                     length(ests[,1])))
+}
+
+# Implements lsei
+FitDistribution2Way <- function(estimates_stds, map,
+                                fit = NULL,
+                                quiet = FALSE,
+                                add_constraints = FALSE) {
+  X <- map
+  Y <- as.vector(t(estimates_stds$estimates))
+  m <- dim(X)[1]
+  n <- dim(X)[2]
+  
+  G <- rbind2(Diagonal(n), rep(-1, n))
+  H <- c(rep(0, n), -1)
+  if (add_constraints == TRUE) {
+    res <- AddConstraints(fit, X, Y, m, n, G, H)
+    lsei(A = res$X, B = res$Y, G = res$G, H = res$H, type = 2)$X
+  } else {
+    lsei(A = X, B = Y, G = G, H = H, type = 2)$X
+  }
+}
+
+AddConstraints <- function(fit, X, Y, m, n, G, H) {
+  # Experimental code
+  # Computes the same output as FitDistribution by 
+  # additionally throwing in constraints corresponding to
+  # 1-way marginals
+  # Requires non-NULL fit as input (with "proportion" containing marginal info)
+
+  # Adding marginals constraints to X and Y
+  fstrs <- lapply(fit, function(x) x[,"string"]) #  found strings
+  
+  Y <- c(Y, wt * t(fit[[1]]["proportion"]), wt * t(fit[[2]]["proportion"]))
+  
+  for (strs in fstrs[[1]]) {
+    indices <- which(colnames(map) %in% outer(strs,
+                                    fstrs[[2]],
+                                    function(x, y) paste(x, y, sep = "x")))
+    vec <- rep(0, n)
+    vec[indices] <- wt
+    X <- rbind2(X, vec)
+  }
+  for (strs in fstrs[[2]]) {
+    indices <- which(colnames(map) %in% outer(fstrs[[1]],
+                                    strs,
+                                    function(x, y) paste(x, y, sep = "x")))
+    vec <- rep(0, n)
+    vec[indices] <- wt
+    X <- rbind2(X, vec)
+  }
+  list(X = X, Y = Y, G = G, H = H)
+} 
+
+Decode2Way <- function(counts, map, params, fit = NULL) {
+  k <- params$k
+  p <- params$p
+  q <- params$q
+  f <- params$f
+  h <- params$h
+  m <- params$m
+  
+  S <- ncol(map)  # total number of candidates
+  
+  N <- sum(counts[, 1])
+  
+  filter_cohorts <- which(counts[, 1] != 0)  # exclude cohorts with zero reports
+  
+  # stretch cohorts to bits
+  filter_bits <- as.vector(
+    t(matrix(1:nrow(map), nrow = m, byrow = TRUE)[filter_cohorts,]))
+  
+  es <- EstimateBloomCounts2Way(params, counts)
+  e <- list(estimates = es$estimates[filter_cohorts, , drop = FALSE],
+            stds = es$stds[filter_cohorts, , drop = FALSE])
+  coefs <- FitDistribution2Way(e, map[filter_bits, , drop = FALSE], fit)
+  fit <- data.frame(String = colnames(map[filter_bits, , drop = FALSE]),
+                    Estimate = matrix(coefs, ncol = 1),
+                    SD = matrix(coefs, ncol = 1),
+                    stringsAsFactors = FALSE)
+  rownames(fit) <- fit[,"String"]
+  list(fit = fit)
+}
diff --git a/analysis/R/read_input.R b/analysis/R/read_input.R
index 95ea1b0d..051b35c4 100644
--- a/analysis/R/read_input.R
+++ b/analysis/R/read_input.R
@@ -101,6 +101,23 @@ ReadMapFile <- function(map_file, params = NULL, quote = "") {
   list(map = map, strs = strs, map_pos = map_pos)
 }
 
+# This function processes the maps loaded using ReadMapFile
+# Association analysis requires a map object with a map
+# field that has the map split into cohorts and an rmap field
+# that has all the cohorts combined
+# Arguments:
+#       map = map object with cohorts as sparse matrix in
+#             object map$map
+#             This is the expected object from ReadMapFile
+#       params = data field with parameters
+CorrectMapForAssoc <- function(map, params) {
+  map$rmap <- map$map
+  map$map <- lapply(1:params$m, function(i)
+    map$rmap[seq(from = ((i - 1) * params$k + 1),
+                 length.out = params$k),])
+  map
+}
+
 LoadMapFile <- function(map_file, params = NULL, quote = "") {
   # Reads the map file and creates an R binary .rda. If the .rda file already
   # exists, just loads that file. NOTE: It assumes the map file is
diff --git a/analysis/tools/sum_bits_assoc.py b/analysis/tools/sum_bits_assoc.py
new file mode 100755
index 00000000..8e01d669
--- /dev/null
+++ b/analysis/tools/sum_bits_assoc.py
@@ -0,0 +1,158 @@
+#!/usr/bin/python
+#
+# Copyright 2015 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Read RAPPOR values of 2 variables from stdin.
+Read parameters from parameter file and a prefix.
+
+Output counts of bloom filter bits set for each variable (1-way totals)
+and counts of pairwise bits set (2-way totals) into files with suffixes
+_marg1.csv, _marg2.csv, _2way.csv respectively.
+
+The file formats for each of the files are as follows:
+_marg1.csv, _marg2.csv
+Each row corresponds to a cohort with:
+num reports, total count for bit 1, total count for bit 2, ...
+
+_2way.csv
+Each row corresponds to a cohort
+The first entry corresponds to total number of reports in that cohort
+The next set of values indicate 2 way counts grouped 4 elements at a time:
+  the first 4 refer to information about bit 1 of irr1 and bit 1 of irr2
+  the next 4 refer to information about bit 1 of irr1 and bit 2 of irr2
+  ...
+  the next 4 refer to information about bit 1 of irr1 and bit k of irr2
+  the next 4 refer to information about bit 2 of irr1 and bit 1 of irr2
+  (pairwise information about tuples is stored in a "1st report"-major order)
+  ...
+  the last 4 refer to information about bit k of irr1 and bit k of irr2
+
+  for each 4-tuple, the values represents the counts for the pair of bits from
+  irr1 and irr2 having the value:
+  11, 01, 10, and 00, respectively.
+
+  See sum_bits_assoc_test.py for an example
+"""
+
+import csv
+import sys
+
+import rappor
+
+
+def SumBits(params, stdin, f_2way, f_1, f_2):
+  csv_in = csv.reader(stdin)
+  csv_out_two_way = csv.writer(f_2way)
+  csv_out_1 = csv.writer(f_1)
+  csv_out_2 = csv.writer(f_2)
+#  csv_out_two_way = csv.writer(open(f_2way, "w"))
+#  csv_out_1 = csv.writer(open(f_1, "w"))
+#  csv_out_2 = csv.writer(open(f_2, "w"))
+
+  num_cohorts = params.num_cohorts
+  num_bloombits = params.num_bloombits
+
+  sums = [[0] * (4 * (num_bloombits ** 2)) for _ in xrange(num_cohorts)]
+  sums_1 = [[0] * num_bloombits for _ in xrange(num_cohorts)]
+  sums_2 = [[0] * num_bloombits for _ in xrange(num_cohorts)]
+  num_reports = [0] * num_cohorts
+
+  for i, row in enumerate(csv_in):
+    try:
+      (_, cohort, irr_1, irr_2) = row
+    except ValueError:
+      raise RuntimeError('Error parsing row %r' % row)
+
+    if i == 0:
+      continue  # skip header
+
+    cohort = int(cohort)
+    try:
+      num_reports[cohort] += 1
+    except IndexError:
+      raise RuntimeError('Error indexing cohort number %d (num_cohorts is %d) \
+                         ' % (cohort, num_cohorts))
+
+    if not len(irr_1) == params.num_bloombits:
+      raise RuntimeError(
+        "Expected %d bits in report 1, got %r" % 
+        (params.num_bloombits, len(irr_1)))
+    if not len(irr_2) == params.num_bloombits:
+      raise RuntimeError(
+        "Expected %d bits in report 2, got %r" % 
+        (params.num_bloombits, len(irr_2)))
+    # "Unrolled" joint encoding of both reports
+    index_array = [[3, 1], [2, 0]]
+    for i, c in enumerate(irr_1):
+      for j, d in enumerate(irr_2):
+        index = 4 * ((num_bloombits - i - 1) * params.num_bloombits +
+                     num_bloombits - j - 1)
+        try: 
+          diff = index_array[int(c)][int(d)]
+        except IndexError:
+          raise RuntimeError('Invalid IRRs; digits should be 0/1')
+        sums[cohort][index + diff] += 1
+
+    for i, c in enumerate(irr_1):
+      bit_num = num_bloombits - i - 1  # e.g. char 0 = bit 15, char 15 = bit 0
+      if c == '1':
+        sums_1[cohort][bit_num] += 1
+      else:
+        if c != '0':
+          raise RuntimeError('Invalid IRRs; digits should be 0/1')
+
+    for i, c in enumerate(irr_2):
+      bit_num = num_bloombits - i - 1  # e.g. char 0 = bit 15, char 15 = bit 0
+      if c == '1':
+        sums_2[cohort][bit_num] += 1
+      else:
+        if c != '0':
+          raise RuntimeError('Invalid IRRs; digits should be 0/1')
+
+  for cohort in xrange(num_cohorts):
+    # First column is the total number of reports in the cohort.
+    row = [num_reports[cohort]] + sums[cohort]
+    csv_out_two_way.writerow(row)
+    row = [num_reports[cohort]] + sums_1[cohort]
+    csv_out_1.writerow(row)
+    row = [num_reports[cohort]] + sums_2[cohort]
+    csv_out_2.writerow(row)
+
+
+def main(argv):
+  try:
+    filename = argv[1]
+    prefix = argv[2]
+  except IndexError:
+    raise RuntimeError('Usage: sum_bits.py <params file> <prefix>')
+  with open(filename) as f:
+    try:
+      params = rappor.Params.from_csv(f)
+    except rappor.Error as e:
+      raise RuntimeError(e)
+
+  with open(prefix + "_2way.csv", "w") as f_2way:
+    with open(prefix + "_marg1.csv", "w") as f_1:
+      with open(prefix + "_marg2.csv", "w") as f_2:
+        SumBits(params, sys.stdin, f_2way, f_1, f_2)
+
+
+if __name__ == '__main__':
+  try:
+    main(sys.argv)
+  except RuntimeError, e:
+    print >>sys.stderr, e.args[0]
+    sys.exit(1)
diff --git a/analysis/tools/sum_bits_assoc_test.py b/analysis/tools/sum_bits_assoc_test.py
new file mode 100755
index 00000000..e19b9fed
--- /dev/null
+++ b/analysis/tools/sum_bits_assoc_test.py
@@ -0,0 +1,134 @@
+#!/usr/bin/python -S
+#
+# Copyright 2015 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+sum_bits_assoc_test.py: Tests for sum_bits_assoc.py
+"""
+
+import cStringIO
+import unittest
+
+import rappor
+import sum_bits_assoc  # module under test
+
+
+# The header doesn't matter
+CSV_IN = """\
+user_id,cohort,irr1,irr2  
+5,1,0011,1010
+5,1,0011,1010
+5,1,0000,0000
+"""
+
+# ###############################
+# EXPECTED_F_2WAY
+#
+# NOTE: bit order is reversed.
+# First row is 65 zeroes because there are no reports with cohort 0
+expected_f_2way = """\
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,\
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0\r
+"""
+
+# Cohort 1
+# Total # of reports
+expected_f_2way = expected_f_2way + "3,"
+
+# Looking at LSBs of both irrs
+# Total # of (11, 01, 10, 00) that appear
+expected_f_2way = expected_f_2way + "0,0,2,1,"
+
+# Report 1-major order. So looking at LSB of irr1 and 2nd LSB of irr2
+expected_f_2way = expected_f_2way + "2,0,0,1,"
+
+# And so on ...
+expected_f_2way = expected_f_2way + "0,0,2,1,"
+expected_f_2way = expected_f_2way + "2,0,0,1,"
+
+# Now moving on to 2nd LSB of irr1
+expected_f_2way = expected_f_2way + ("0,0,2,1,2,0,0,1," * 2)
+
+# Now moving on to 3rd LSB of irr1
+# Note that for 3rd LSB of irr1 and LSB of irr2, there are three 00s
+expected_f_2way = expected_f_2way + ("0,0,0,3,0,2,0,1," * 2)
+# MSB of irr1
+expected_f_2way = expected_f_2way + "0,0,0,3,0,2,0,1," + "0,0,0,3,0,2,0,1\r\n"
+
+# EXPECTED_F_2WAY is a constant
+EXPECTED_F_2WAY = expected_f_2way
+
+# end of EXPECTED_F_2WAY
+# ###############################
+
+# NOTE: bit order is reversed.
+EXPECTED_F_1 = """\
+0,0,0,0,0\r
+3,2,2,0,0\r
+"""
+
+# NOTE: bit order is reversed.
+EXPECTED_F_2 = """\
+0,0,0,0,0\r
+3,0,2,0,2\r
+"""
+
+WRONG_IRR_BITS = """\
+user_id,cohort,irr1,irr2
+cli1,1,00123,11223
+"""
+
+WRONG_COHORT = """\
+user_id,cohort,irr1,irr2
+cli1,3,0011,0001
+"""
+
+class SumBitsAssocTest(unittest.TestCase):
+
+  def setUp(self):
+    self.params = rappor.Params()
+    self.params.num_bloombits = 4
+    self.params.num_cohorts = 2
+    self.maxDiff = None
+
+  def testSum(self):
+    stdin = cStringIO.StringIO(CSV_IN)
+    f_2way = cStringIO.StringIO()
+    f_1 = cStringIO.StringIO()
+    f_2 = cStringIO.StringIO()
+
+    sum_bits_assoc.SumBits(self.params, stdin, f_2way, f_1, f_2)
+    self.assertMultiLineEqual(EXPECTED_F_1, f_1.getvalue())
+    self.assertMultiLineEqual(EXPECTED_F_2, f_2.getvalue())
+    self.assertMultiLineEqual(EXPECTED_F_2WAY, f_2way.getvalue())
+
+  def testErrors(self):
+    f_2way = cStringIO.StringIO()
+    f_1 = cStringIO.StringIO()
+    f_2 = cStringIO.StringIO()
+
+    stdin = cStringIO.StringIO(WRONG_IRR_BITS)
+    self.assertRaises(
+        RuntimeError, sum_bits_assoc.SumBits, self.params, stdin,
+        f_2way, f_1, f_2)
+
+    stdin = cStringIO.StringIO(WRONG_COHORT)
+    self.assertRaises(
+        RuntimeError, sum_bits_assoc.SumBits, self.params, stdin,
+        f_2way, f_1, f_2)
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/assoctest.sh b/assoctest.sh
new file mode 100755
index 00000000..42e1799f
--- /dev/null
+++ b/assoctest.sh
@@ -0,0 +1,325 @@
+#!/bin/bash
+#
+# Run and end-to-end association test in parallel.
+#
+# Usage:
+#   ./assoctest.sh <function name>
+
+# At the end, it will print an HTML summary.
+#
+# Three main functions are
+#    run [[<pattern> [<num> [<compare>]]] - run tests matching <pattern> in
+#                               parallel, each <num> times, additionally
+#                               running the EM algorithm if <compare> = T
+#
+#    run-seq [<pattern> [<num> [<compare>]]] - ditto, except that tests are run 
+#                                              sequentially
+#
+#    run-all [<num> [<compare>]]             - run all tests, in parallel,
+#                                              each <num> times
+#
+# Note: Patterns always start with a-.
+#
+# Examples:
+# $ ./assoctest.sh run-seq a-toy      # Sequential run, matches 2 cases
+# $ ./assoctest.sh run-seq a-fizz 3   # Sequential, each test is run three
+#                                       times
+# $ ./assoctest.sh run-all            # Run all tests once
+# $ ./assoctest.sh run-all 5 T        # Run all tests five times with EM
+#                                       comparisons
+#
+# The <pattern> argument is a regex in 'grep -E' format. (Detail: Don't
+# use $ in the pattern, since it matches the whole spec line and not just the
+# test case name.) The number of processors used in a parallel run is 5.
+#
+# fast_counts param inherited from regtest.sh, but currently not used
+
+set -o nounset
+set -o pipefail
+set -o errexit
+
+. util.sh
+
+readonly THIS_DIR=$(dirname $0)
+readonly REPO_ROOT=$THIS_DIR
+readonly CLIENT_DIR=$REPO_ROOT/client/python
+readonly ASSOCTEST_DIR=_tmp/assoctest
+
+# All the Python tools need this
+export PYTHONPATH=$CLIENT_DIR
+
+# Print true inputs into a file with selected prefix
+print-true-inputs() {
+  local num_unique_values=$1
+  local prefix=$2
+  seq 1 $num_unique_values | awk '{print "'$prefix'" $1}'
+}
+
+# Generate a single test case, specified by a line of the test spec.
+# This is a helper function for _run_tests().
+_setup-one-case() {
+  local test_case=$1
+
+  # Input parameters
+  local num_unique_values=$2
+  local num_unique_values2=$3
+  local num_clients=$4
+  local num_extras=$5
+
+  # RAPPOR params
+  local num_bits=$6
+  local num_hashes=$7
+  local num_cohorts=$8
+  local p=$9
+  local q=${10}  # need curly braces to get the 10th arg
+  local f=${11}
+
+  banner 'Setting up parameters and candidate files for '$test_case
+
+  local case_dir=$ASSOCTEST_DIR/$test_case
+  mkdir --verbose -p $case_dir
+
+  # Save the "spec"
+  echo "$@" > $case_dir/spec.txt
+
+  local params_path=$case_dir/case_params.csv
+
+  echo 'k,h,m,p,q,f' > $params_path
+  echo "$num_bits,$num_hashes,$num_cohorts,$p,$q,$f" >> $params_path
+
+  print-true-inputs $[num_unique_values+num_extras] \
+    "str" > $case_dir/case_true_inputs1.txt
+  print-true-inputs $num_unique_values2 "opt" > $case_dir/case_true_inputs2.txt
+
+  # Hash candidates
+  analysis/tools/hash_candidates.py \
+    $params_path \
+    < $case_dir/case_true_inputs1.txt \
+    > $case_dir/case_map1.csv
+
+  analysis/tools/hash_candidates.py \
+    $params_path \
+    < $case_dir/case_true_inputs2.txt \
+    > $case_dir/case_map2.csv
+}
+
+# Run a single test instance, specified by <test_name, instance_num>.
+# This is a helper function for _run_tests().
+_run-one-instance() {
+  local test_case=$1
+  local test_instance=$2
+
+  local case_dir=$ASSOCTEST_DIR/$test_case
+
+  read -r case_name num_unique_values num_unique_values2 \
+    num_clients num_extras \
+    num_bits num_hashes num_cohorts p q f compare < $case_dir/spec.txt
+
+  local instance_dir=$ASSOCTEST_DIR/$test_case/$test_instance
+  mkdir --verbose -p $instance_dir
+
+  banner "Generating input"
+
+  tests/gen_true_values_assoc.R $num_unique_values $num_unique_values2 \
+                            $num_clients $num_cohorts $instance_dir/case.csv
+
+  banner "Running RAPPOR client"
+  tests/rappor_assoc_sim.py \
+    --num-bits $num_bits \
+    --num-hashes $num_hashes \
+    --num-cohorts $num_cohorts \
+    -p $p \
+    -q $q \
+    -f $f \
+    < $instance_dir/case.csv \
+    > "$instance_dir/case_reports.csv"
+
+  analysis/tools/sum_bits_assoc.py \
+    $case_dir/case_params.csv \
+    "$instance_dir/case" \
+    < $instance_dir/case_reports.csv
+
+
+  local out_dir=${instance_dir}_report
+  mkdir --verbose -p $out_dir
+
+  # Currently, the summary file shows and aggregates timing of the inference
+  # engine, which excludes R's loading time and reading of the (possibly
+  # substantial) map file. Timing below is more inclusive.
+  TIMEFORMAT='Running analyze.R took %R seconds'
+
+  # Setting up JSON file
+  json_file="{\
+    \"maps\":           [\"$case_dir/case_map1.csv\",\
+                       \"$case_dir/case_map2.csv\"],\
+    \"reports\":        \"$instance_dir/case_reports.csv\",\
+    \"truefile\":       \"$instance_dir/case.csv\",\
+    \"outdir\":         \"$out_dir\",\
+    \"params\":         \"$case_dir/case_params.csv\",\
+    \"newalg\":         \"false\",\
+    \"numvars\":        2,\
+    \"num\":            $num_clients,\
+    \"extras\":         $num_extras,\
+    \"varcandidates\":  [$num_unique_values, $num_unique_values2],\
+    \"verbose\":        \"true\",\
+    \"counts\":         [\"$instance_dir/case_2way.csv\",\
+                        \"$instance_dir/case_marg1.csv\",\
+                        \"$instance_dir/case_marg2.csv\"],"
+
+  # Adding EM comparison depending on $compare flag
+  if test $compare = F; then
+    json_file=$json_file"\"expt\": [\"external-counts\"]"
+  else 
+    json_file=$json_file"\"expt\": [\"external-counts\", \
+      \"external-reports-em\"]"
+  fi
+  json_file=$json_file"}"
+  echo $json_file > $instance_dir/analyze_inp.json
+  
+  time {
+    tests/compare_assoc.R --inp $instance_dir/analyze_inp.json
+  }
+}
+
+# Like _run-once-case, but log to a file.
+_run-one-instance-logged() {
+  local test_case_id=$1
+  local test_case_run=$2
+
+  local log_dir=$ASSOCTEST_DIR/$test_case_id/${test_case_run}_report
+  mkdir --verbose -p $log_dir
+
+  log "Started '$test_case_id' (instance $test_case_run) -- logging to $log_dir/log.txt"
+  _run-one-instance "$@" >$log_dir/log.txt 2>&1 \
+    && log "Test case $test_case_id (instance $test_case_run) done" \
+    || log "Test case $test_case_id (instance $test_case_run) failed"
+}
+
+make-summary() {
+  local dir=$1
+  local filename=${2:-results.html}
+  local instances=${3:-1}
+
+  tests/make_summary_assoc.py $dir $instances > $dir/rows.html
+
+  pushd $dir >/dev/null
+
+  cat ../../tests/assoctest.html \
+    | sed -e '/TABLE_ROWS/ r rows.html' \
+    > $filename
+
+  popd >/dev/null
+
+  log "Wrote $dir/$filename"
+  log "URL: file://$PWD/$dir/$filename"
+}
+
+test-error() {
+  local spec_regex=${1:-}
+  log "Some test cases failed"
+  if test -n "$spec_regex"; then
+    log "(Perhaps none matched pattern '$spec_regex')"
+  fi
+  # don't quit just yet
+  # exit 1
+}
+
+# Assuming the spec file, write a list of test case names (first column) with
+# the instance ids (second column), where instance ids run from 1 to $1.
+_setup-test-instances() {
+  local instances=$1
+
+  while read line; do
+    for i in $(seq 1 $instances); do
+      read case_name _ <<< $line  # extract the first token
+      echo $case_name $i
+    done
+  done
+}
+
+# Args:
+#   regexp: A pattern selecting the subset of tests to run
+#   instances: A number of times each test case is run
+#   parallel: Whether the tests are run in parallel (T/F)
+#   fast_counts: Whether counts are sampled directly (T/F)
+#   compare: Whether the tests run comparisons between EM and Marginal
+#   algorithms or not
+#
+_run-tests() {
+  local spec_regex=$1  # grep -E format on the spec
+  local instances=$2
+  local parallel=$3
+  local fast_counts=$4
+  local $compare=$5
+
+  rm -r -f --verbose $ASSOCTEST_DIR
+
+  mkdir --verbose -p $ASSOCTEST_DIR
+
+  echo "PARAMS"
+  echo $spec_regex
+  echo $instances
+  echo $parallel
+  echo $fast_counts
+  echo $compare
+
+  local func
+  local processors=1
+
+  if test $parallel = F; then
+    func=_run-one-instance-logged  # output to the console
+  else
+    func=_run-one-instance-logged
+    processors=$(grep -c ^processor /proc/cpuinfo || echo 4)  # POSIX-specific
+    if test $processors -gt 6; then  # leave few CPUs for the OS
+      # Association tests take up a lot of memory; so restricted to a few
+      # processes at a time
+      processors=5
+    else
+      processors=1
+    fi
+    log "Running $processors parallel processes"
+  fi
+
+  local cases_list=$ASSOCTEST_DIR/test-cases.txt
+  tests/assoctest_spec.py | grep -E $spec_regex | sed "s/$/ $compare/" > $cases_list
+
+  # Generate parameters for all test cases.
+  cat $cases_list \
+    | xargs -l -P $processors -- $0 _setup-one-case \
+    || test-error
+
+  log "Done generating parameters for all test cases"
+
+  local instances_list=$ASSOCTEST_DIR/test-instances.txt
+  _setup-test-instances $instances $fast_counts < $cases_list > $instances_list
+
+  cat $instances_list \
+    | xargs -l -P $processors -- $0 $func || test-error
+
+  log "Done running all test instances"
+
+  make-summary $ASSOCTEST_DIR "results.html" $instances
+}
+
+# Run tests sequentially
+run-seq() {
+  local spec_regex=${1:-'^a-'}  # grep -E format on the spec
+  local instances=${2:-1}
+  local compare=${3:-F}
+
+  _run-tests $spec_regex $instances F T $compare
+}
+
+# Run tests in parallel
+run-all() {
+  local instances=${1:-1}
+  local compare=${2:-F}
+
+  log "Running all tests. Can take a while."
+  # a- for assoc tests
+  # F for sequential
+  _run-tests '^a-' $instances T T $compare
+}
+
+"$@"
diff --git a/experimental/assoc/analyze_assoc.R b/experimental/assoc/analyze_assoc.R
new file mode 100755
index 00000000..100d204f
--- /dev/null
+++ b/experimental/assoc/analyze_assoc.R
@@ -0,0 +1,184 @@
+#!/usr/bin/env Rscript
+#
+# Copyright 2015 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Reads map files, report files, and RAPPOR parameters to run
+# an EM algorithm to estimate joint distribution over two or more variables
+#
+# Usage:
+#       $ ./analyze_assoc.R -map1 map_1.csv -map2 map_2.csv \
+#                                 -reports reports.csv \
+# Inputs: map1, map2, reports, params
+#         see how options are parsed below for more information
+# Outputs:
+#         prints a table with estimated joint probability masses
+#         over candidate strings
+#         Ex.
+#                 ssl   nossl
+#         intel   0.1   0.3
+#         google  0.5   0.1
+
+##############################################################################
+##############################################################################
+##############################################################################
+# D E P R E C A T E D
+# Please use analyze_assoc_expt.R to run assoc analysis experiments 
+##############################################################################
+##############################################################################
+##############################################################################
+
+library("optparse")
+
+options(stringsAsFactors = FALSE)
+
+if(!interactive()) {
+  option_list <- list(
+    # Flags
+    make_option(c("--map1"), default = "map_1.csv",
+                help = "Hashed candidates for 1st variable"),
+    make_option(c("--map2"), default = "map_2.csv",
+                help = "Hashed candidates for 2nd variable"),
+    make_option(c("--map3"), default = "map_3.csv",
+                help = "Hashed candidates for 3rd variable"),
+    make_option(c("--reports", "-r"), default = "reports.csv",
+                help = "File with raw reports as <cohort, report1, report2, ...>"),
+    make_option(c("--truefile", "-t"), default = "truedist.csv",
+                help = "File with true distribution generated by assoc_sim.R"),
+    make_option(c("--outdir", "-o"), default = ".",
+                help = "File where the metrics go"),
+    make_option(c("--params", "-p"), default = "params.csv",
+                help = "Filename for RAPPOR parameters"),
+    make_option(c("--newalg", "-a"), default = FALSE,
+                help = "Flag to run new EM3 algorithm or not")
+  )
+  opts <- parse_args(OptionParser(option_list = option_list))
+}
+
+source("analysis/R/encode.R")
+source("analysis/R/decode.R")
+source("analysis/R/simulation.R")
+source("analysis/R/read_input.R")
+source("analysis/R/association.R")
+
+# This function processes the maps loaded using ReadMapFile
+# Association analysis requires a map object with a map
+# field that has the map split into cohorts and an rmap field
+# that has all the cohorts combined
+# Arguments:
+#       map = map object with cohorts as sparse matrix in
+#             object map$map
+#             This is the expected object from ReadMapFile
+#       params = data field with parameters
+# TODO(pseudorandom): move this functionality to ReadMapFile
+ProcessMap <- function(map, params) {
+  map$rmap <- map$map
+  map$map <- lapply(1:params$m, function(i)
+                          map$rmap[seq(from = ((i - 1) * params$k + 1),
+                                   length.out = params$k),])
+  map
+}
+
+main <- function(opts) {
+  ptm <- proc.time()
+
+  params <- ReadParameterFile(opts$params)
+  opts_map <- list(opts$map1, opts$map2, opts$map3)
+  map <- lapply(opts_map, function(o)
+                  ProcessMap(ReadMapFile(o, params = params),
+                             params = params))
+  # Reports must be of the format
+  #     cohort no, rappor bitstring 1, rappor bitstring 2
+  reportsObj <- read.csv(opts$reports,
+                         colClasses = c("integer", "character",
+                                        "character", "character"),
+                         header = FALSE)
+
+  # Parsing reportsObj
+  # ComputeDistributionEM allows for different sets of cohorts
+  # for each variable. Here, both sets of cohorts are identical
+  co <- as.list(reportsObj[1])[[1]]
+  cohorts <- list(co, co, co)
+  # Parse reports from reportObj cols 2, 3, and 4
+  reports <- lapply(1:3, function(x) as.list(reportsObj[x + 1]))
+
+  # Split strings into bit arrays (as required by assoc analysis)
+  reports <- lapply(1:3, function(i) {
+    # apply the following function to each of reports[[1]] and reports[[2]]
+    lapply(reports[[i]][[1]], function(x) {
+      # function splits strings and converts them to numeric values
+      as.numeric(strsplit(x, split = "")[[1]])
+    })
+  })
+
+  joint_dist <- ComputeDistributionEM(reports, cohorts, map,
+                                      ignore_other = TRUE,
+                                      quick = TRUE,
+                                      params, marginals = NULL,
+                                      estimate_var = FALSE,
+                                      new_alg = opts$newalg)
+
+  
+  td <- read.csv(file = opts$truefile)
+  ed <- joint_dist$orig$fit
+  if(length(reports) == 3) {
+    ed <- as.data.frame(ed) 
+  }
+  
+  # We can see if chi-squared tests show different results
+  # for estimated vs real distribution
+  print("CHI-SQUARED")
+  td_chisq <- chisq.test(td)
+  ed_chisq <- chisq.test(ed)
+  print(td_chisq)
+  print(ed_chisq)
+  print(l1d(td, ed, "L1 DISTANCE"))
+  l1d_metric <- l1d(td, ed, "")
+  print("JOINT_DIST$FIT")
+  print(signif(ed[order(rowSums(ed)),], 4))
+  td_metric <- td_chisq[1][[1]][[1]]
+  ed_metric <- ed_chisq[1][[1]][[1]]
+  
+  print("PROC.TIME")
+  time_taken <- proc.time() - ptm
+  print(time_taken)
+  
+  metrics <- list(td_chisq = td_metric,
+                  ed_chisq = ed_metric,
+                  tv = l1d_metric/2,
+                  time = time_taken[1],
+                  dim1 = dim(ed)[[2]],
+                  dim2 = dim(ed)[[1]])
+  
+  # Write metrics to metrics.csv
+  # Report l1 distance / 2 to be consistent with histogram analysis
+  filename <- file.path(opts$outdir, 'metrics.csv')
+  write.csv(metrics, file = filename, row.names = FALSE)
+}
+
+# L1 distance = 1 - sum(min(df1|x, df2|x)) where
+# df1|x / df2|x projects the distribution to the intersection x of the
+# supports of df1 and df2
+l1d <- function(df1, df2, statement = "L1 DISTANCE") {
+  rowsi <- intersect(rownames(df1), rownames(df2))
+  colsi <- intersect(colnames(df1), colnames(df2))
+  print(statement)
+  1 - sum(mapply(min, 
+                 unlist(as.data.frame(df1)[rowsi, colsi], use.names = FALSE),
+                 unlist(as.data.frame(df2)[rowsi, colsi], use.names = FALSE)))
+}
+
+if(!interactive()) {
+  main(opts)
+}
diff --git a/experimental/assoc/assoc_sim.R b/experimental/assoc/assoc_sim.R
new file mode 100755
index 00000000..1b1726de
--- /dev/null
+++ b/experimental/assoc/assoc_sim.R
@@ -0,0 +1,279 @@
+#!/usr/bin/env Rscript
+#
+# Copyright 2015 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Simulates inputs on which association analysis can be run.
+# Currently assoc_sim.R only supports 2 variables but can
+# be easily extended to support more.
+#
+# Usage:
+#       $ ./assoc_sim.R -n 1000
+# Inputs: uvals, params, reports, map, num, unif
+#         see how options are parsed below for more information
+# Outputs:
+#         reports.csv file containing reports
+#         map_{1, 2, ...}.csv file(s) containing maps of variables
+
+##############################################################################
+##############################################################################
+##############################################################################
+# D E P R E C A T E D
+# Please look at workflow to use analyze_assoc_expt.R and 
+# run gen_assoc_reports.R, rappor_assoc_sim.py, and sum_bits_assoc.py
+# to generate inputs to association analysis
+# (For more details, see _run-one-instance() in assoctest.sh)
+##############################################################################
+##############################################################################
+##############################################################################
+
+library("optparse")
+
+options(stringsAsFactors = FALSE)
+
+if(!interactive()) {
+  option_list <- list(
+    make_option(c("--uvals", "-v"),
+                help = "Filename for list of values over which
+                distributions are simulated. The file is a list of
+                comma-separated strings each line of which refers
+                to a new variable."),
+    make_option(c("--params", "-p"), default = "params.csv",
+                help = "Filename for RAPPOR parameters"),
+    make_option(c("--reports", "-r"), default = "reports.csv",
+                help = "Filename for reports"),
+    make_option(c("--true", "-t"), default = "truedist.csv",
+                help = "Filename for the true distribution"),
+    make_option(c("--map", "-m"), default = "map",
+                help = "Filename *prefix* for map(s)"),
+    make_option(c("--num", "-n"), default = 1e05,
+                help = "Number of reports"),
+    make_option(c("--var1_num", "-z"), default = 100,
+                help = "Number of values for var1"),
+    make_option(c("--var2_num", "-y"), default = 20,
+                help = "Number of values for var2"),
+    make_option(c("--extras", "-e"), default = 1e05,
+                help = "How many spurious candidates does the 1st map have?"),
+    make_option(c("--distr", "-d"), default = "zipf3",
+                help = "Type of distribution. Choose between
+                {unif, poisson, poisson2, zipf2}"),
+    make_option(c("--prefix", "-x"), default = "./",
+                help = "Path to prefix all default files")
+  )
+  opts <- parse_args(OptionParser(option_list = option_list))
+}
+
+apply_prefix <- function(path) {
+  paste(opts$prefix, path, sep = "")
+}
+
+source("analysis/R/encode.R")
+source("analysis/R/decode.R")
+source("analysis/R/simulation.R")
+source("analysis/R/read_input.R")
+source("analysis/R/association.R")
+source("tests/gen_counts.R")
+
+# Read unique values of reports from a csv file
+# Inputs: filename. The file is expected to contain two rows of strings
+#         (one for each variable):
+#         "google.com", "apple.com", ...
+#         "ssl", "nossl", ...
+# Returns: a list containing strings
+GetUniqueValsFromFile <- function(filename) {
+  contents <- read.csv(filename, header = FALSE)
+  # Expect 2 rows of unique vals
+  if(nrow(contents) != 2) {
+    stop(paste("Unique vals file", filename, "expected to have
+               two rows of strings."))
+  }
+  # Removes superfluous "" entries if the lists of unique values
+  # differ in length
+  strip_empty <- function(vec) {
+    vec[!vec %in% c("")]
+  }
+  list(var1 = strip_empty(as.vector(t(contents[1,]))),
+       var2 = strip_empty(as.vector(t(contents[2,]))))
+}
+
+# Simulate correlated reports and write into reportsfile
+# Inputs: N = number of reports
+#         uvals = list containing a list of unique values
+#         params = list with RAPPOR parameters
+#         distr = the type of distribution to use
+#                 {unif, poisson, poisson2, zipfg}
+#         extras = whether map_1.csv has spurious candidates or not
+#         truefile = name of the file with true distribution
+#         var1_num = number of var1 candidates
+#         var2_num = number of var2 candidates
+#         *** FOR ASSOCTEST TEST SUITE, USE ONLY ZIPF2 / ZIPF3 ***
+#         mapfile = file to write maps into (with .csv suffixes)
+#         reportsfile = file to write reports into (with .csv suffix)
+SimulateReports <- function(N, uvals, params, distr, extras, truefile,
+                            var1_num, var2_num,
+                            mapfile, reportsfile) {
+  # Compute true distribution
+  m <- params$m
+
+  if (distr == "unif") {
+    # Draw uniformly from 1 to 10
+    v1_samples <- as.integer(runif(N, 1, 10))
+
+    # Pr[var2 = N + 1 | var1 = N] = 0.5
+    # Pr[var2 = N     | var1 = N] = 0.5
+    v2_samples <- v1_samples + sample(c(0, 1), N, replace = TRUE)
+
+  } else if(distr == "poisson") {
+    # Draw from a Poisson random variable
+    v1_samples <- rpois(N, 1) + rep(1, N)
+
+    # Pr[var2 = N + 1 | var1 = N] = 0.5
+    # Pr[var2 = N     | var1 = N] = 0.5
+    v2_samples <- v1_samples + sample(c(0, 1), N, replace = TRUE)
+  } else if (distr == "poisson2") {
+
+    v1_samples <- rpois(N, 1) + rep(1, N)
+    # supp(var2) = {1, 2}
+    # Pr[var2 = 1 | var1 = even] = 0.75
+    # Pr[var2 = 1 | var1 = odd]  = 0.25
+    pr25 <- rbinom(N, 1, 0.25) + 1
+    pr75 <- rbinom(N, 1, 0.75) + 1
+    v2_samples <- rep(1, N)
+    v2_samples[v1_samples %% 2 == 0] <- pr25[v1_samples %% 2 == 0]
+    v2_samples[v1_samples %% 2 == 1] <- pr75[v1_samples %% 2 == 1]
+  } else if (distr == "zipf2" || distr == "zipf3") {
+
+    # Zipfian over var1_num strings
+    partition <- RandomPartition(N, ComputePdf("zipf1.5", var1_num))
+    v1_samples <- rep(1:var1_num, partition)  # expand partition
+    # Shuffle values randomly (may take a few sec for > 10^8 inputs)
+    v1_samples <- sample(v1_samples)
+
+    # supp(var2) = {1, 2, 3, ..., var2_num}
+    # We look at two zipfian distributions over supp(var2)
+    # D1 = zipfian distribution
+    # D2 = zipfian distr over {var2_num, ..., 4, 3, 2, 1}
+    # (i.e., D1 in reverse)
+    # var2 ~ D1 if var1 = even
+    # var2 ~ D2 if var1 = odd
+    d1 <- sample(rep(1:var2_num,
+                     RandomPartition(N, ComputePdf("zipf1.5", var2_num))))
+    d2 <- (var2_num:1)[d1]
+    v2_samples <- rep(1, N)
+    v3_samples <- rep(1, N)
+    v2_samples[v1_samples %% 2 == 0] <- d1[v1_samples %% 2 == 0]
+    v2_samples[v1_samples %% 2 == 1] <- d2[v1_samples %% 2 == 1]
+    if(distr == "zipf3") {
+      bool1 <- rbinom(N, 1, 0.25) + rep(1, N)
+      bool2 <- rbinom(N, 1, 0.75) + rep(1, N)
+      v3_samples[v1_samples %% 2 == 0] <- bool1[v1_samples %% 2 == 0]
+      v3_samples[v1_samples %% 2 == 1] <- bool2[v1_samples %% 2 == 1]
+    }
+  }
+
+  if(distr == "zipf2") {
+    tmp_samples <- list(v1_samples, v2_samples)
+  } else if(distr == "zipf3") {
+    tmp_samples <- list(v1_samples, v2_samples, v3_samples)
+  }
+
+
+  # Function to pad strings to uval_vec if sample_vec has
+  # larger support than the number of strings in uval_vec
+  # For e.g., if samples have support {1, 2, 3, 4, ...} and uvals
+  # only have "value1", "value2", and "value3", samples now
+  # over support {"value1", "value2", "value3", "str4", ...}
+  PadStrings <- function(sample_vec, uval_vec) {
+    if (max(sample_vec) > length(uval_vec)) {
+      # Padding uvals to required length
+      len <- length(uval_vec)
+      max_of_samples <- max(sample_vec)
+      uval_vec[(len + 1):max_of_samples] <- apply(
+        as.matrix((len + 1):max_of_samples),
+        1,
+        function(i) sprintf("str%d", i))
+    }
+    uval_vec
+  }
+
+  # Pad and update uvals
+  uvals <- lapply(1:2, function(i) PadStrings(tmp_samples[[i]],
+                                              uvals[[i]]))
+  
+  uvals[[3]] <- c("true", "false")
+  # Replace integers in tmp_samples with actual sample strings
+  samples <- lapply(1:3, function(i) uvals[[i]][tmp_samples[[i]]])
+
+  print("TRUE DISTR")
+  td <- table(samples)/sum(table(samples))
+  td <- td[order(rowSums(td), decreasing = TRUE),,]
+  print(td)
+  write.table(td, file = truefile, sep = ",", col.names = TRUE,
+              row.names = TRUE, quote = FALSE)
+  # Randomly assign cohorts in each dimension
+  cohorts <- sample(1:m, N, replace = TRUE)
+
+  # Create and write map into mapfile_1.csv and mapfile_2.csv
+  if (extras > 0) {
+    # spurious candidates for mapfile_1.csv
+    len <- length(uvals[[1]]) + as.numeric(extras)
+    uvals[[1]] <- PadStrings(len, uvals[[1]])
+  }
+  map <- lapply(uvals, function(u) CreateMap(u, params))
+  write.table(map[[1]]$map_pos, file = paste(mapfile, "_1.csv", sep = ""),
+                sep = ",", col.names = FALSE, na = "", quote = FALSE)
+  write.table(map[[2]]$map_pos, file = paste(mapfile, "_2.csv", sep = ""),
+              sep = ",", col.names = FALSE, na = "", quote = FALSE)
+  write.table(map[[3]]$map_pos, file = paste(mapfile, "_3.csv", sep = ""),
+              sep = ",", col.names = FALSE, na = "", quote = FALSE)
+
+  # Write reports into a csv file
+  # Format:
+  #     cohort, bloom filter var1, bloom filter var2
+  reports <- lapply(1:3, function(i)
+    EncodeAll(samples[[i]], cohorts, map[[i]]$map, params))
+  # Organize cohorts and reports into format
+  write_matrix <- cbind(as.matrix(cohorts),
+                        as.matrix(lapply(reports[[1]],
+                            function(x) paste(x, collapse = ""))),
+                        as.matrix(lapply(reports[[2]],
+                            function(x) paste(x, collapse = ""))),
+                        as.matrix(lapply(reports[[3]],
+                            function(x) paste(x, collapse = ""))))
+  write.table(write_matrix, file = reportsfile, quote = FALSE,
+              row.names = FALSE, col.names = FALSE, sep = ",")
+}
+
+main <- function(opts) {
+  ptm <- proc.time()
+
+  if(is.null(opts$uvals)) {
+    uvals = list(var1 = c("str1"), var2 = c("option1"))
+  } else {
+    uvals <- GetUniqueValsFromFile(apply_prefix(opts$uvals))
+  }
+  params <- ReadParameterFile(apply_prefix(opts$params))
+  SimulateReports(opts$num, uvals, params,  opts$distr,   # inuts
+                  opts$extras,  apply_prefix(opts$true),  # inputs
+                  opts$var1_num,  opts$var2_num,          # inputs
+                  apply_prefix(opts$map),
+                  apply_prefix(opts$reports))             # outputs
+
+  print("PROC.TIME")
+  print(proc.time() - ptm)
+}
+
+if(!interactive()) {
+  main(opts)
+}
diff --git a/experimental/assoc/assoc_sim_expt.R b/experimental/assoc/assoc_sim_expt.R
new file mode 100755
index 00000000..5d3438ef
--- /dev/null
+++ b/experimental/assoc/assoc_sim_expt.R
@@ -0,0 +1,250 @@
+#!/usr/bin/env Rscript
+#
+# Copyright 2015 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Simulates inputs on which association analysis can be run.
+# Currently assoc_sim.R only supports 2 variables but can
+# be easily extended to support more.
+#
+# Usage:
+#       $ ./assoc_sim_expt.R --inp sim_inp.json
+# Inputs: uvals, params, reports, map, num, unif
+#         see how options are parsed below for more information
+# Outputs:
+#         reports.csv file containing reports
+#         map_{1, 2, ...}.csv file(s) containing maps of variables
+
+library("jsonlite")
+library("optparse")
+
+options(stringsAsFactors = FALSE)
+
+if(!interactive()) {
+  option_list <- list(
+    make_option(c("--inp"), default = "assoc_inp.json",
+                help = "JSON file with inputs for assoc_sim_expt"))
+  opts <- parse_args(OptionParser(option_list = option_list))
+  inp <- fromJSON(opts$inp)
+}
+
+apply_prefix <- function(path) {
+  paste(inp$prefix, path, sep = "")
+}
+
+source("analysis/R/encode.R")
+source("analysis/R/decode.R")
+source("analysis/R/simulation.R")
+source("analysis/R/read_input.R")
+source("analysis/R/association.R")
+source("tests/gen_counts.R")
+
+# Read unique values of reports from a csv file
+# Inputs: filename. The file is expected to contain two rows of strings
+#         (one for each variable):
+#         "google.com", "apple.com", ...
+#         "ssl", "nossl", ...
+# Returns: a list containing strings
+GetUniqueValsFromFile <- function(filename) {
+  contents <- read.csv(filename, header = FALSE)
+  # Expect 2 rows of unique vals
+  if(nrow(contents) != 2) {
+    stop(paste("Unique vals file", filename, "expected to have
+               two rows of strings."))
+  }
+  # Removes superfluous "" entries if the lists of unique values
+  # differ in length
+  strip_empty <- function(vec) {
+    vec[!vec %in% c("")]
+  }
+  list(var1 = strip_empty(as.vector(t(contents[1,]))),
+       var2 = strip_empty(as.vector(t(contents[2,]))))
+}
+
+# Simulate correlated reports and write into reportsfile
+# Inputs: N = number of reports
+#         uvals = list containing a list of unique values
+#         params = list with RAPPOR parameters
+#         distr = the type of distribution to use
+#                 {unif, poisson, poisson2, zipfg}
+#         extras = whether map_1.csv has spurious candidates or not
+#         truefile = name of the file with true distribution
+#         varcandidates = list of number of candidates for each var
+#         *** FOR ASSOCTEST TEST SUITE, USE ONLY ZIPF2 / ZIPF3 ***
+#         mapfile = file to write maps into (with .csv suffixes)
+#         reportsfile = file to write reports into (with .csv suffix)
+SimulateReports <- function(N, uvals, params, distr, extras, truefile,
+                            varcandidates,
+                            mapfile, reportsfile) {
+  # Compute true distribution
+  m <- params$m
+
+  if (distr == "unif") {
+    # Draw uniformly from 1 to 10
+    v1_samples <- as.integer(runif(N, 1, 10))
+
+    # Pr[var2 = N + 1 | var1 = N] = 0.5
+    # Pr[var2 = N     | var1 = N] = 0.5
+    v2_samples <- v1_samples + sample(c(0, 1), N, replace = TRUE)
+
+  } else if(distr == "poisson") {
+    # Draw from a Poisson random variable
+    v1_samples <- rpois(N, 1) + rep(1, N)
+
+    # Pr[var2 = N + 1 | var1 = N] = 0.5
+    # Pr[var2 = N     | var1 = N] = 0.5
+    v2_samples <- v1_samples + sample(c(0, 1), N, replace = TRUE)
+  } else if (distr == "poisson2") {
+
+    v1_samples <- rpois(N, 1) + rep(1, N)
+    # supp(var2) = {1, 2}
+    # Pr[var2 = 1 | var1 = even] = 0.75
+    # Pr[var2 = 1 | var1 = odd]  = 0.25
+    pr25 <- rbinom(N, 1, 0.25) + 1
+    pr75 <- rbinom(N, 1, 0.75) + 1
+    v2_samples <- rep(1, N)
+    v2_samples[v1_samples %% 2 == 0] <- pr25[v1_samples %% 2 == 0]
+    v2_samples[v1_samples %% 2 == 1] <- pr75[v1_samples %% 2 == 1]
+  } else if (distr == "zipf2" || distr == "zipf3") {
+
+    var1_num <- varcandidates[[1]]
+    var2_num <- varcandidates[[2]]
+    
+    # Zipfian over var1_num strings
+    partition <- RandomPartition(N, ComputePdf("zipf1.5", var1_num))
+    v1_samples <- rep(1:var1_num, partition)  # expand partition
+    # Shuffle values randomly (may take a few sec for > 10^8 inputs)
+    v1_samples <- sample(v1_samples)
+
+    # supp(var2) = {1, 2, 3, ..., var2_num}
+    # We look at two zipfian distributions over supp(var2)
+    # D1 = zipfian distribution
+    # D2 = zipfian distr over {var2_num, ..., 4, 3, 2, 1}
+    # (i.e., D1 in reverse)
+    # var2 ~ D1 if var1 = even
+    # var2 ~ D2 if var1 = odd
+    d1 <- sample(rep(1:var2_num,
+                     RandomPartition(N, ComputePdf("zipf1.5", var2_num))))
+    d2 <- (var2_num:1)[d1]
+    v2_samples <- rep(1, N)
+    v3_samples <- rep(1, N)
+    v2_samples[v1_samples %% 2 == 0] <- d1[v1_samples %% 2 == 0]
+    v2_samples[v1_samples %% 2 == 1] <- d2[v1_samples %% 2 == 1]
+    if(distr == "zipf3") {
+      bool1 <- rbinom(N, 1, 0.25) + rep(1, N)
+      bool2 <- rbinom(N, 1, 0.75) + rep(1, N)
+      v3_samples[v1_samples %% 2 == 0] <- bool1[v1_samples %% 2 == 0]
+      v3_samples[v1_samples %% 2 == 1] <- bool2[v1_samples %% 2 == 1]
+    }
+  }
+
+  if(length(varcandidates) == 3) {
+    tmp_samples <- list(v1_samples, v2_samples, v3_samples)
+  } else if (length(varcandidates) == 2) {
+    tmp_samples <- list(v1_samples, v2_samples)
+  }
+
+  # Function to pad strings to uval_vec if sample_vec has
+  # larger support than the number of strings in uval_vec
+  # For e.g., if samples have support {1, 2, 3, 4, ...} and uvals
+  # only have "value1", "value2", and "value3", samples now
+  # over support {"value1", "value2", "value3", "str4", ...}
+  PadStrings <- function(sample_vec, uval_vec) {
+    if (max(sample_vec) > length(uval_vec)) {
+      # Padding uvals to required length
+      len <- length(uval_vec)
+      max_of_samples <- max(sample_vec)
+      uval_vec[(len + 1):max_of_samples] <- apply(
+        as.matrix((len + 1):max_of_samples),
+        1,
+        function(i) sprintf("str%d", i))
+    }
+    uval_vec
+  }
+
+  # Pad and update uvals
+  uvals <- lapply(1:length(varcandidates),
+                  function(i) PadStrings(tmp_samples[[i]],
+                                              uvals[[i]]))
+  # Replace integers in tmp_samples with actual sample strings
+  samples <- lapply(1:length(varcandidates),
+                    function(i) uvals[[i]][tmp_samples[[i]]])
+
+  print("TRUE DISTR")
+  td <- table(samples)/sum(table(samples))
+  if (length(varcandidates) == 2) {
+    td <- td[order(rowSums(td), decreasing = TRUE),]
+  } else {
+    td <- td[order(rowSums(td), decreasing = TRUE),,]
+  }
+  print(td)
+  write.table(td, file = truefile, sep = ",", col.names = TRUE,
+              row.names = TRUE, quote = FALSE)
+  # Randomly assign cohorts in each dimension
+  cohorts <- sample(1:m, N, replace = TRUE)
+
+  # Create and write map into mapfile_1.csv and mapfile_2.csv
+  if (extras > 0) {
+    # spurious candidates for mapfile_1.csv
+    len <- length(uvals[[1]]) + as.numeric(extras)
+    uvals[[1]] <- PadStrings(len, uvals[[1]])
+  }
+  map <- lapply(uvals, function(u) CreateMap(u, params))
+  write.table(map[[1]]$map_pos, file = paste(mapfile, "_1.csv", sep = ""),
+                sep = ",", col.names = FALSE, na = "", quote = FALSE)
+  write.table(map[[2]]$map_pos, file = paste(mapfile, "_2.csv", sep = ""),
+              sep = ",", col.names = FALSE, na = "", quote = FALSE)
+  if(length(varcandidates) == 3) {
+    write.table(map[[3]]$map_pos, file = paste(mapfile, "_3.csv", sep = ""),
+              sep = ",", col.names = FALSE, na = "", quote = FALSE)
+  }
+
+  # Write reports into a csv file
+  # Format:
+  #     cohort, bloom filter var1, bloom filter var2
+  reports <- lapply(1:length(varcandidates), function(i)
+    EncodeAll(samples[[i]], cohorts, map[[i]]$map, params))
+  # Organize cohorts and reports into format
+  write_matrix <- cbind(as.matrix(cohorts),
+                        sapply(reports,
+                               function(x) as.matrix(lapply(x,
+                                                            function(z) paste(z, collapse = "")))))
+  write.table(write_matrix, file = reportsfile, quote = FALSE,
+              row.names = FALSE, col.names = FALSE, sep = ",")
+}
+
+main <- function(inp) {
+  ptm <- proc.time()
+  
+  if(is.null(inp$uvals)) {
+    # One off case.
+    # TODO(pseudorandom): More sensible defaults.
+    uvals = list(var1 = c("str1", "str2"), var2 = c("option1", "option2"))
+  } else {
+    uvals <- GetUniqueValsFromFile(apply_prefix(inp$uvals))
+  }
+  params <- ReadParameterFile(apply_prefix(inp$params))
+  SimulateReports(inp$num, uvals, params,  inp$distr,   # inuts
+                  inp$extras,  apply_prefix(inp$true),  # inputs
+                  inp$varcandidates,          # inputs
+                  apply_prefix(inp$map),
+                  apply_prefix(inp$reports))             # outputs
+
+  print("PROC.TIME")
+  print(proc.time() - ptm)
+}
+
+if(!interactive()) {
+  main(inp)
+}
diff --git a/quick_assoc.sh b/quick_assoc.sh
new file mode 100755
index 00000000..024e321e
--- /dev/null
+++ b/quick_assoc.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+#
+# Quick script to wrap assoc.R
+#
+# Usage:
+#   ./quick_assoc.sh <dir name> [<EM also? T/F>]
+#
+# For directory name $dir, quick_assoc.sh expects the following files:
+#   $dir/map1.csv -- map files
+#   $dir/map2.csv
+#   $dir/reports.csv -- these are the raw reports
+#   $dir/params.csv -- parameters file
+#
+# At the end, it will output results of the Two Way Algorithm and EM algorithm
+# (if EM also is set to T) to stdout
+#
+# Examples:
+# $ ./quick_assoc.sh . T
+
+readonly THIS_DIR=$(dirname $0)
+readonly REPO_ROOT=$THIS_DIR
+readonly CLIENT_DIR=$REPO_ROOT/client/python
+readonly MAP_SUFFIX=map
+readonly COUNT_SUFFIX=count
+
+# All the Python tools need this
+export PYTHONPATH=$CLIENT_DIR
+
+_run-input() {
+  
+  # Read reports and compute two way counts
+  analysis/tools/sum_bits_assoc.py \
+    $1/params.csv \
+    "$1/$COUNT_SUFFIX" \
+    < $1/reports.csv
+
+  # Currently, the summary file shows and aggregates timing of the inference
+  # engine, which excludes R's loading time and reading of the (possibly
+  # substantial) map file. Timing below is more inclusive.
+  TIMEFORMAT='Running analyze.R took %R seconds'
+
+  # Setting up JSON file inp.json in current directory
+  json_file="{\
+    \"time\":           false,
+    \"maps\":           [\"$1/${MAP_SUFFIX}1.csv\",\
+                       \"$1/${MAP_SUFFIX}2.csv\"],\
+    \"reports\":        \"$1/reports.csv\",\
+    \"params\":         \"$1/params.csv\",\
+    \"numvars\":        2,\
+    \"verbose\":        \"false\",\
+    \"counts\":         [\"$1/${COUNT_SUFFIX}_2way.csv\",\
+                        \"${COUNT_SUFFIX}_marg1.csv\",\
+                        \"${COUNT_SUFFIX}_marg2.csv\"],"
+
+  # Adding EM comparison depending on flag
+  if test $2 = T; then
+    json_file=$json_file"\"also_em\": true"
+  else 
+    json_file=$json_file"\"also_em\": false"
+  fi
+  json_file=$json_file"}"
+  echo $json_file > inp.json
+  
+  time {
+    analysis/R/assoc.R --inp inp.json
+  }
+}
+
+main() {
+  dir=$1
+  also_em=${2:-F}
+  _run-input $dir $also_em
+}
+
+main "$@"
diff --git a/setup.sh b/setup.sh
index 90b6537f..fbaaff41 100755
--- a/setup.sh
+++ b/setup.sh
@@ -30,7 +30,7 @@ r-packages() {
   # glmnet, limSolve: solvers for decode.R
   # RJSONIO: for analysis_tool.R
   sudo R -e \
-    'install.packages(c("glmnet", "optparse", "limSolve", "RUnit", "abind", "RJSONIO"), repos="http://cran.rstudio.com/")'
+    'install.packages(c("glmnet", "optparse", "limSolve", "RUnit", "abind", "RJSONIO", "jsonlite"), repos="http://cran.rstudio.com/")'
 }
 
 # R 3.0.2 on Trusty is out of date with CRAN, so we need this workaround.
diff --git a/test.sh b/test.sh
index 37ef0f14..22df91f5 100755
--- a/test.sh
+++ b/test.sh
@@ -112,6 +112,8 @@ r-unit() {
 
   tests/gen_true_values_test.R
 
+  tests/gen_true_values_assoc_test.R
+
   analysis/R/decode_test.R
 
   analysis/test/run_tests.R
diff --git a/tests/analyze_assoc.R b/tests/analyze_assoc.R
deleted file mode 100755
index 5d78806f..00000000
--- a/tests/analyze_assoc.R
+++ /dev/null
@@ -1,126 +0,0 @@
-#!/usr/bin/env Rscript
-#
-# Copyright 2015 Google Inc. All rights reserved.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# 
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Reads map files, report files, and RAPPOR parameters to run 
-# an EM algorithm to estimate joint distribution over two or more variables
-# 
-# Usage:
-#       $ ./analyze_assoc.R -map1 map_1.csv -map2 map_2.csv \
-#                                 -reports reports.csv \
-# Inputs: map1, map2, reports, params
-#         see how options are parsed below for more information
-# Outputs:
-#         prints a table with estimated joint probability masses
-#         over candidate strings
-#         Ex. 
-#                 ssl   nossl
-#         intel   0.1   0.3
-#         google  0.5   0.1
-
-library("optparse")
-
-options(stringsAsFactors = FALSE)
-
-if(!interactive()) {
-  option_list <- list(
-    # Flags
-    make_option(c("--map1", "-m1"), default = "map_1.csv",
-                help = "Hashed candidates for 1st variable"),
-    make_option(c("--map2", "-m2"), default = "map_2.csv",
-                help = "Hashed candidates for 2nd variable"),
-    make_option(c("--reports", "-r"), default = "reports.csv",
-                help = "File with raw reports as <cohort, report1, report2>"),
-    make_option(c("--params", "-p"), default = "params.csv",
-                help = "Filename for RAPPOR parameters")
-  )
-  opts <- parse_args(OptionParser(option_list = option_list))
-}    
-
-source("../analysis/R/encode.R")
-source("../analysis/R/decode.R")
-source("../analysis/R/simulation.R")
-source("../analysis/R/read_input.R")
-source("../analysis/R/association.R")
-
-# This function processes the maps loaded using ReadMapFile
-# Association analysis requires a map object with a map
-# field that has the map split into cohorts and an rmap field
-# that has all the cohorts combined
-# Arguments: 
-#       map = map object with cohorts as sparse matrix in
-#             object map$map
-#             This is the expected object from ReadMapFile
-#       params = data field with parameters
-# TODO(pseudorandom): move this functionality to ReadMapFile
-ProcessMap <- function(map, params) {
-  map$rmap <- map$map
-  split_map <- function(i, map_struct) {
-    numbits <- params$k
-    indices <- which(as.matrix(
-      map_struct[((i - 1) * numbits + 1):(i * numbits),]) == TRUE,
-      arr.ind = TRUE)
-    sparseMatrix(indices[, "row"], indices[, "col"],
-                 dims = c(numbits, max(indices[, "col"])))
-  }
-  map$map <- lapply(1:params$m, function(i) split_map(i, map$rmap))
-  map
-}
-
-main <- function(opts) {
-  ptm <- proc.time()
-  
-  params <- ReadParameterFile(opts$params)
-  opts_map <- list(opts$map1, opts$map2)
-  map <- lapply(opts_map, function(o)
-                  ProcessMap(ReadMapFile(o, params = params),
-                             params = params))
-  # Reports must be of the format
-  #     cohort no, rappor bitstring 1, rappor bitstring 2
-  reportsObj <- read.csv(opts$reports, 
-                         colClasses = c("integer", "character", "character"),
-                         header = FALSE)
-  
-  # Parsing reportsObj
-  # ComputeDistributionEM allows for different sets of cohorts
-  # for each variable. Here, both sets of cohorts are identical
-  co <- as.list(reportsObj[1])[[1]]
-  cohorts <- list(co, co)
-  # Parse reports from reportObj cols 2 and 3
-  reports <- lapply(1:2, function(x) as.list(reportsObj[x + 1]))
-  
-  # Split strings into bit arrays (as required by assoc analysis)
-  reports <- lapply(1:2, function(i) {
-    # apply the following function to each of reports[[1]] and reports[[2]]
-    lapply(reports[[i]][[1]], function(x) {
-      # function splits strings and converts them to numeric values  
-      as.numeric(strsplit(x, split = "")[[1]])
-    })
-  })
-  
-  joint_dist <- ComputeDistributionEM(reports, cohorts, map, 
-                                      ignore_other = TRUE,
-                                      params, marginals = NULL,
-                                      estimate_var = FALSE)
-  # TODO(pseudorandom): Export the results to a file for further analysis
-  print("JOINT_DIST$FIT")
-  print(joint_dist$fit)
-  print("PROC.TIME")
-  print(proc.time() - ptm)
-}
-
-if(!interactive()) {
-  main(opts)
-}
\ No newline at end of file
diff --git a/tests/assoc_sim.R b/tests/assoc_sim.R
deleted file mode 100755
index 3ff1e5df..00000000
--- a/tests/assoc_sim.R
+++ /dev/null
@@ -1,172 +0,0 @@
-#!/usr/bin/env Rscript
-#
-# Copyright 2015 Google Inc. All rights reserved.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# 
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Simulates inputs on which association analysis can be run.
-# Currently assoc_sim.R only supports 2 variables but can
-# be easily extended to support more.
-# 
-# Usage:
-#       $ ./assoc_sim.R -n 1000
-# Inputs: uvals, params, reports, map, num, unif
-#         see how options are parsed below for more information
-# Outputs:
-#         reports.csv file containing reports
-#         map_{1, 2, ...}.csv file(s) containing maps of variables
-
-library("optparse")
-
-options(stringsAsFactors = FALSE)
-
-if(!interactive()) {
-  option_list <- list(
-    make_option(c("--uvals", "-v"), default = "uvals.csv",
-                help = "Filename for list of values over which
-                distributions are simulated. The file is a list of
-                comma-separated strings each line of which refers
-                to a new variable."),
-    make_option(c("--params", "-p"), default = "params.csv",
-                help = "Filename for RAPPOR parameters"),
-    make_option(c("--reports", "-r"), default = "reports.csv",
-                help = "Filename for reports"),
-    make_option(c("--map", "-m"), default = "map",
-                help = "Filename *prefix* for map(s)"),
-    make_option(c("--num", "-n"), default = 1e05,
-                help = "Number of reports"),
-    make_option(c("--unif", "-u"), default = FALSE,
-                help = "Run simulation with uniform distribution")
-  )
-  opts <- parse_args(OptionParser(option_list = option_list))
-}    
-
-source("../analysis/R/encode.R")
-source("../analysis/R/decode.R")
-source("../analysis/R/simulation.R")
-source("../analysis/R/read_input.R")
-source("../analysis/R/association.R")
-
-# Read unique values of reports from a csv file
-# Inputs: filename. The file is expected to contain two rows of strings
-#         (one for each variable):
-#         "google.com", "apple.com", ...
-#         "ssl", "nossl", ...
-# Returns: a list containing strings
-GetUniqueValsFromFile <- function(filename) {
-  contents <- read.csv(filename, header = FALSE)
-  # Expect 2 rows of unique vals
-  if(nrow(contents) != 2) {
-    stop(paste("Unique vals file", filename, "expected to have
-               two rows of strings."))
-  }
-  # Removes superfluous "" entries if the lists of unique values
-  # differ in length
-  strip_empty <- function(vec) {
-    vec[!vec %in% c("")]
-  }
-  list(var1 = strip_empty(as.vector(t(contents[1,]))),
-       var2 = strip_empty(as.vector(t(contents[2,]))))
-}
-
-# Simulate correlated reports and write into reportsfile
-# Inputs: N = number of reports
-#         uvals = list containing a list of unique values
-#         params = list with RAPPOR parameters
-#         unif = whether to replace poisson with uniform
-#         mapfile = file to write maps into (with .csv suffixes)
-#         reportsfile = file to write reports into (with .csv suffix)
-SimulateReports <- function(N, uvals, params, unif,
-                            mapfile, reportsfile) {
-  # Compute true distribution
-  m <- params$m  
-
-  if (unif) {
-    # Draw uniformly from 1 to 10
-    v1_samples <- as.integer(runif(N, 1, 10))
-  } else {
-    # Draw from a Poisson random variable
-    v1_samples <- rpois(N, 1) + rep(1, N)
-  }
-  
-  # Pr[var2 = N + 1 | var1 = N] = 0.5
-  # Pr[var2 = N     | var1 = N] = 0.5
-  v2_samples <- v1_samples + sample(c(0, 1), N, replace = TRUE)
-  
-  tmp_samples <- list(v1_samples, v2_samples)
-  
-  # Function to pad strings to uval_vec if sample_vec has
-  # larger support than the number of strings in uval_vec
-  # For e.g., if samples have support {1, 2, 3, 4, ...} and uvals
-  # only have "value1", "value2", and "value3", samples now
-  # over support {"value1", "value2", "value3", "str4", ...}
-  PadStrings <- function(sample_vec, uval_vec) {
-    if (max(sample_vec) > length(uval_vec)) {
-      # Padding uvals to required length
-      len <- length(uval_vec)
-      max_of_samples <- max(sample_vec)
-      uval_vec[(len + 1):max_of_samples] <- apply(
-        as.matrix((len + 1):max_of_samples),
-        1,
-        function(i) sprintf("str%d", i))
-    }
-    uval_vec
-  }
-  
-  # Pad and update uvals
-  uvals <- lapply(1:2, function(i) PadStrings(tmp_samples[[i]],
-                                              uvals[[i]]))
-
-  # Replace integers in tmp_samples with actual sample strings
-  samples <- lapply(1:2, function(i) uvals[[i]][tmp_samples[[i]]])
-
-  # Randomly assign cohorts in each dimension
-  cohorts <- sample(1:m, N, replace = TRUE)
-  
-  # Create and write map into mapfile_1.csv and mapfile_2.csv
-  map <- lapply(uvals, function(u) CreateMap(u, params))
-  write.table(map[[1]]$map_pos, file = paste(mapfile, "_1.csv", sep = ""),
-              sep = ",", col.names = FALSE, na = "", quote = FALSE)
-  write.table(map[[2]]$map_pos, file = paste(mapfile, "_2.csv", sep = ""),
-              sep = ",", col.names = FALSE, na = "", quote = FALSE)
-  
-  # Write reports into a csv file
-  # Format:
-  #     cohort, bloom filter var1, bloom filter var2
-  reports <- lapply(1:2, function(i)
-    EncodeAll(samples[[i]], cohorts, map[[i]]$map, params))
-  # Organize cohorts and reports into format
-  write_matrix <- cbind(as.matrix(cohorts),
-                        as.matrix(lapply(reports[[1]], 
-                            function(x) paste(x, collapse = ""))),
-                        as.matrix(lapply(reports[[2]],
-                            function(x) paste(x, collapse = ""))))
-  write.table(write_matrix, file = reportsfile, quote = FALSE,
-              row.names = FALSE, col.names = FALSE, sep = ",")
-}
-
-main <- function(opts) {
-  ptm <- proc.time()
-  
-  uvals <- GetUniqueValsFromFile(opts$uvals)
-  params <- ReadParameterFile(opts$params)
-  SimulateReports(opts$num, uvals, params,  opts$unif, # inputs
-                  opts$map, opts$reports)              # outputs
-  
-  print("PROC.TIME")
-  print(proc.time() - ptm)
-}
-
-if(!interactive()) {
-  main(opts)
-}
diff --git a/tests/assoctest.html b/tests/assoctest.html
new file mode 100644
index 00000000..e4b23875
--- /dev/null
+++ b/tests/assoctest.html
@@ -0,0 +1,98 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <title>RAPPOR assoctest.sh</title>
+  <style type="text/css">
+    h2 { text-align: center }
+    p { margin: 0 auto; width: 80%; text-align: center }
+
+    table { width: 100%; border-spacing: 0 }
+    .tophead { text-align: center; font-weight: bold }
+    .explain { text-align: left; font-weight: normal }
+    .subhead { text-align: right; font-weight: bold }
+    .highlight { background-color: #eeeeee }
+    tbody td { text-align: right }
+  </style>
+</head>
+
+<body>
+  <a id="top"></a>
+
+  <h2>RAPPOR assoctest.sh</h2>
+
+  <!-- These strings will be replaced by a sed script. -->
+
+  <table cellspacing="0" cellpadding="5">
+    <colgroup>
+      <col span="1" class="highlight" />
+      <col span="4" />
+      <col span="6" class="highlight" />
+      <col span="4" />
+    </colgroup>
+
+    <thead>
+      <tr class="tophead">
+        <td>
+          Test Case
+        </td>
+        <td colspan=4>
+          Input Params
+        </td>
+        <td colspan=6>
+          RAPPOR Params
+        </td>
+        <td colspan=4>
+          Result Metrics
+        </td>
+      </tr>
+
+      <tr class="explain">
+        <td></td>
+        <td colspan=4>
+          d1: orig. support(var1)<br />
+          d2: orig. support(var2)<br />
+          n: num reports<br/>
+          e: num extras<br/>
+        </td>
+        <td colspan=6>
+          k: report bits<br/>
+          h: hashes<br/>
+          m: cohorts<br/>
+          p, q, f: probabilities<br/>
+        </td>
+        <td colspan=4>
+          d1: dimension of var1 solutions. <br />
+          d2: dimension of var2 solutions. <br />
+          tv: tot. var. distance<br/>
+          rtime: R runtime<br/>
+        </td>
+      </tr>
+
+      <tr class="subhead">
+        <td></td>
+
+        <td>d1</td>
+        <td>d2</td>
+        <td>n</td>
+        <td>e</td>
+
+        <td>k</td>
+        <td>h</td>
+        <td>m</td>
+        <td>p</td>
+        <td>q</td>
+        <td>f</td>
+
+        <td>d1</td>
+        <td>d2</td>
+        <td>tv</td>
+        <td>rtime</td>
+      </tr>
+    </thead>
+
+    <tbody>
+  <!-- TABLE_ROWS -->
+
+</body>
+
+</html>
diff --git a/tests/assoctest_spec.py b/tests/assoctest_spec.py
new file mode 100755
index 00000000..b6952730
--- /dev/null
+++ b/tests/assoctest_spec.py
@@ -0,0 +1,137 @@
+#!/usr/bin/python
+"""Print a test spec on stdout.
+
+Each line has parameters for a test case.  The assoctest.sh shell script reads
+these lines and runs parallel processes.
+
+We use Python data structures so the test cases are easier to read and edit.
+"""
+
+import optparse
+import sys
+
+DISTRIBUTION_PARAMS_ASSOC = {
+    # name, num unique values 1,
+    # num unique values 2, num clients
+    'tiny': (100, 2, int(1e03)),   # test for insufficient data
+    'small': (100, 10, int(1e04)),
+#    'fizz-tiny': (100, 20, int(1e03)),
+#    'fizz-tiny-bool': (100, 2, int(1e03)),
+#    'fizz-small': (100, 20, int(1e04)),
+#    'fizz-small-bool': (100, 2, int(1e04)),
+#    'fizz': (100, 20, int(1e05)),
+#    'fizz-large': (100, 50, int(1e05)),
+#    'fizz-2large': (100, 50, int(5e05)),
+#    'fizz-bool': (100, 2, int(1e05)),
+    'medium': (1000, 10, int(1e05)),
+    'medium2': (1000, 2, int(1e05)),
+    'large': (10000, 10, int(1e06)),
+    'large2': (10000, 2, int(1e06)),
+    'largesquared': (int(1e04), 100, int(1e06)),
+
+    # new test names for 2-way marginals
+    # includes testing for extras
+    'fizz-tiny': (100, 20, int(1e03), int(1e04)),
+    'fizz-tiny-bool': (100, 2, int(1e03), int(1e04)),
+    'fizz-small': (100, 20, int(1e04), int(1e04)),
+    'fizz-small-bool': (100, 2, int(1e04), int(1e04)),
+    'fizz': (100, 20, int(1e05), int(1e04)),
+    'fizz-bool': (100, 2, int(1e05), int(1e04)),
+
+    'toy': (5, 2, 1e04, 20),  # for testing purposes only
+    'compact-noextra-small': (40, 5, 1e04, 0),
+    'loose-noextra-small': (100, 20, 1e04, 0),
+    'compact-noextra-large': (40, 5, 1e06, 0),
+    'loose-noextra-large': (100, 20, 1e06, 0),
+    'compact-extra-small': (40, 5, int(1e04), int(1e04)),
+    'loose-extra-small': (100, 20, int(1e04), int(1e04)),
+    'compact-extra-large': (40, 5, int(1e06), int(1e04)),
+    'loose-extra-large': (100, 20, int(1e06), int(1e04)),
+    'compact-excess-small': (40, 5, int(1e04), int(1e05)),
+    'loose-excess-small': (100, 20, int(1e04), int(1e05)),
+    'compact-excess-large': (40, 5, int(1e06), int(1e05)),
+    'loose-excess-large': (100, 20, int(1e06), int(1e05)),
+}
+
+# 'k, h, m' as in params file.
+BLOOMFILTER_PARAMS = {
+    '8x16': (8, 2, 16),  # 16 cohorts, 8 bits each, 2 bits set in each
+    '8x32': (8, 2, 32),  # 32 cohorts, 8 bits each, 2 bits set in each
+    '16x32': (16, 2, 32),  # 32 cohorts, 16 bits each, 2 bits set in each
+    '8x128': (8, 2, 128),  # 128 cohorts, 8 bits each, 2 bits set in each
+    '128x128': (128, 2, 128),  # 8 cohorts, 128 bits each, 2 bits set in each
+}
+
+# 'p, q, f' as in params file.
+PRIVACY_PARAMS = {
+    'eps_zero': (0, 0.99, 0),  # testing purposes only!
+    'eps_1_1': (0.39, 0.61, 0.45),  # eps_1 = 1, eps_inf = 5:
+    'eps_1_5': (0.225, 0.775, 0.0),  # eps_1 = 5, no eps_inf
+    'eps_verysmall': (0.125, 0.875, 0.125),
+    'eps_small': (0.125, 0.875, 0.5),
+    'eps_chrome': (0.25, 0.75, 0.5),
+    'uma_rappor_type': (0.50, 0.75, 0.5),
+}
+
+# assoc test configuration ->
+#   (distribution params set, bloomfilter params set,
+#    privacy params set)
+# The test config runs a test suite that is the cross product of all the above
+# sets
+ASSOC_TEST_CONFIG = {
+  'distr': (
+#            'fizz-tiny',
+#            'fizz-tiny-bool',
+#            'fizz-small',
+#            'fizz-small-bool',
+#            'fizz',
+#            'fizz-bool',),
+#            'toy',),
+            'compact-noextra-small',),
+#            'loose-noextra-small',),
+#            'compact-extra-small',
+#            'loose-extra-small',
+#            'compact-excess-small',
+#            'loose-excess-small',),
+#            'compact-noextra-large',
+#            'loose-noextra-large',
+#            'compact-extra-large',
+#            'loose-extra-large',
+#            'compact-excess-large',
+#            'loose-excess-large'),
+  'blooms': (
+             '8x32',),
+#             '16x32',),
+  'privacy': (
+              'eps_small',
+              'eps_chrome',)
+}
+
+#
+# END TEST CONFIGURATION
+#
+
+def main(argv):
+  rows = []
+  test_case = []
+  # Association tests
+  for distr in ASSOC_TEST_CONFIG['distr']:
+    for blooms in ASSOC_TEST_CONFIG['blooms']:
+      for privacy in ASSOC_TEST_CONFIG['privacy']:
+        print distr, blooms, privacy
+        test_name = 'a-{}-{}-{}'.format(distr, blooms, privacy)
+        params = (BLOOMFILTER_PARAMS[blooms] +
+                  PRIVACY_PARAMS[privacy])
+        test_case = (test_name,) + DISTRIBUTION_PARAMS_ASSOC[distr] + params
+        row_str = [str(element) for element in test_case]
+        rows.append(row_str)
+
+  for row in rows:
+    print ' '.join(row)
+
+if __name__ == '__main__':
+  try:
+    main(sys.argv)
+  except RuntimeError, e:
+    print >>sys.stderr, 'FATAL: %s' % e
+    sys.exit(1)
diff --git a/tests/compare_assoc.R b/tests/compare_assoc.R
new file mode 100755
index 00000000..a8105662
--- /dev/null
+++ b/tests/compare_assoc.R
@@ -0,0 +1,549 @@
+#!/usr/bin/env Rscript
+#
+# Copyright 2015 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Reads map files, report files, and RAPPOR parameters to run
+# an EM algorithm to estimate joint distribution over two or more variables
+#
+# Usage:
+#       $ ./compare_assoc.R --inp <input JSON file>
+#
+# Input file: 
+# Outputs:
+
+library("jsonlite")
+library("optparse")
+
+options(stringsAsFactors = FALSE)
+
+if(!interactive()) {
+  option_list <- list(
+    make_option(c("--inp"), default = "analyze_inp.json",
+                help = "JSON file with inputs for analyze_assoc_expt"))
+  opts <- parse_args(OptionParser(option_list = option_list))
+}
+
+source("analysis/R/decode2way.R")
+source("analysis/R/encode.R")
+source("analysis/R/decode.R")
+source("analysis/R/simulation.R")
+source("analysis/R/read_input.R")
+source("analysis/R/association.R")
+source("tests/gen_counts.R")
+
+# Wrapper function to print strings only if verbose flag is passed in
+PrintIfVerbose <- function(string, flag = FALSE) {
+  if(flag == TRUE) {
+    print(string)
+  }
+}
+
+# TV distance = L1 distance / 2 = 1 - sum(min(df1|x, df2|x)) where
+# df1|x / df2|x projects the distribution to the intersection x of the
+# supports of df1 and df2
+TVDistance <- function(df1, df2, statement = "TV DISTANCE") {
+  rowsi <- intersect(rownames(df1), rownames(df2))
+  colsi <- intersect(colnames(df1), colnames(df2))
+  print(statement)
+  1 - sum(mapply(min, 
+                 unlist(as.data.frame(df1[rowsi, colsi]), use.names = FALSE),
+                 unlist(as.data.frame(df2[rowsi, colsi]), use.names = FALSE)))
+}
+
+# Function to combine reports
+# Currently assume 2-way marginals
+CombineReports <- function(reports1, reports2) {
+  # Encoding (var1, var2) \in {(0, 0), (0, 1), (1, 0), (1, 1)}
+  two_bits <- list(c(0, 0, 0, 1), c(0, 1, 0, 0), c(0, 0, 1, 0), c(1, 0, 0, 0))
+  OuterProd <- function(x, y) {
+    as.vector(outer(x, y,
+                    function(z, t) z + 2 * t))
+  }
+  # "report1-major" order
+  creports <- mapply(OuterProd, reports2, reports1,
+                     SIMPLIFY = FALSE)
+  # Collapse counts to bit vector according to two_bits
+  lapply(creports,
+         function(x) as.vector(sapply(x, function(z) two_bits[[z+1]])))
+}
+
+# Given 2 lists of maps, maps1 and maps2, the function
+# combines the maps by cohort and outputs both
+# cohort-organized maps and flattened versions
+CombineMaps <- function(maps1, maps2) {
+  # Combine maps
+  cmap <- mapply(CombineMapsInternal, maps1, maps2)
+  
+  # Flatten map
+  inds <- lapply(cmap, function(x) which(x, arr.ind = TRUE))
+  for (i in seq(1, length(inds))) {
+    inds[[i]][, 1] <- inds[[i]][, 1] + (i-1) * dim(cmap[[1]])[1]
+  }
+  inds <- do.call("rbind", inds)
+  crmap <- sparseMatrix(inds[, 1], inds[, 2], dims = c(
+    nrow(cmap[[1]]) * length(cmap),
+    ncol(cmap[[1]])))
+  colnames(crmap) <- colnames(cmap[[1]])
+  list(cmap = cmap, crmap = crmap)
+}
+
+# Function to combine maps
+# Using map1-major order for both candidates and bits of the report
+# to be consistent with how CombineReports works
+# Currently assume 2-way marginals
+CombineMapsInternal <- function(map1, map2) {
+  # Retrieve set indices and dimensions
+  rows1 <- which(map1, arr.ind = TRUE)[,1]
+  cols1 <- which(map1, arr.ind = TRUE)[,2]
+  length1 <- dim(map1)[[1]]
+  width1 <- dim(map1)[[2]]
+  rows2 <- which(map2, arr.ind = TRUE)[,1]
+  cols2 <- which(map2, arr.ind = TRUE)[,2]
+  length2 <- dim(map2)[[1]]
+  width2 <- dim(map2)[[2]]
+  
+  # Now process map1
+  map1fn <- function(i, j) {
+    i1 <- seq(1, length2) + ((i-1) * length2)
+    j1 <- seq(1, width2) + ((j-1) * width2)
+    expand.grid(i1, j1)  
+  }
+  map1indices <- do.call(rbind,
+                         mapply(map1fn, rows1, cols1, SIMPLIFY = FALSE))
+  map1_big <- sparseMatrix(map1indices[,"Var1"],
+                           map1indices[,"Var2"],
+                           dims = c(length1 * length2,
+                                    width1 * width2))
+  colnames(map1_big) <- t(outer(colnames(map1),
+                              colnames(map2),
+                              function(x, y) paste(x, y, sep = "x")))
+  
+  # Now process map2
+  map2fn <- function(i, j) {
+    i2 <- i + (seq(0, length1 - 1) * length2)
+    j2 <- j + (seq(0, width1 - 1) * width2)
+    expand.grid(i2, j2)
+  }
+  map2indices <- do.call(rbind,
+                         mapply(map2fn, rows2, cols2, SIMPLIFY = FALSE))
+  map2_big <- sparseMatrix(map2indices[,"Var1"],
+                           map2indices[,"Var2"],
+                           dims = c(length1 * length2,
+                                    width1 * width2))
+  colnames(map2_big) <- t(outer(colnames(map1),
+                              colnames(map2),
+                              function(x, y) paste(x, y, sep = "x")))
+  
+  # Now collate two maps with entries in (1000, 0100, 0010, 0001)
+  # (m1&m2, !m1 & m2, m1 & !m2, !(m1 & m2)) respectively
+  findices <- which(map1_big & map2_big, arr.ind = TRUE)
+  # 1000
+  findices[, 1] <- findices[, 1] * 4 - 3
+  # 0100
+  indices_0100 <- which((!map1_big) & map2_big, arr.ind = TRUE)
+  indices_0100[, 1] <- indices_0100[, 1] * 4 - 2
+  findices <- rbind(findices, indices_0100)
+  # 0010
+  indices_0010 <- which(map1_big & (!map2_big), arr.ind = TRUE)
+  indices_0010[, 1] <- indices_0010[, 1] * 4 - 1
+  findices <- rbind(findices, indices_0010)
+  # 0001
+  indices_0001 <- which((!map1_big) & (!map2_big), arr.ind = TRUE)
+  indices_0001[, 1] <- indices_0001[, 1] * 4
+  findices <- rbind(findices, indices_0001)
+  sm <- sparseMatrix(findices[, 1], findices[, 2],
+                     dims = c(4 * length1 * length2,
+                        width1 * width2))
+  colnames(sm) <- colnames(map1_big)
+  sm
+}
+
+GenerateNoiseMatrix <- function(params) {
+  p <- params$p
+  q <- params$q
+  f <- params$f
+  m <- params$m
+  k <- params$k
+  
+  p11 <- q * (1 - f/2) + p * f / 2  # probability of a true 1 reported as 1
+  p01 <- p * (1 - f/2) + q * f / 2  # probability of a true 0 reported as 1
+  p10 <- 1 - p11  # probability of a true 1 reported as 0
+  p00 <- 1 - p01  # probability of a true 0 reported as 0
+  
+  NoiseMatrix <- matrix(rep(0, 16), 4)
+  NoiseMatrix[1,] <- c(p11**2, p11*p10, p10*p11, p10**2)
+  NoiseMatrix[2,] <- c(p11*p01, p11*p00, p10*p01, p10*p00)
+  NoiseMatrix[3,] <- c(p01*p11, p01*p10, p00*p11, p00*p01)
+  NoiseMatrix[4,] <- c(p01**2, p00*p01, p01*p00, p00**2)
+
+  NoiseMatrix
+}
+
+#####################################################################
+##
+## Direct simulation of reports WITHOUT simulated variance
+## 
+## Inputs: inp object (from parsing JSON) with
+##         num - # of reports
+##         params - file containing RAPPOR params
+##         varcandidates - list containing # of candidates for each var
+##         numvars - # of vars (>=2 for association)
+##         extra - # of extra candidates for var 1 
+##         
+##
+## Outputs: Runs simulation of two-way association analysis by directly
+##          simulating the counts of one way and two way marginals
+##
+#####################################################################
+DirectSimulationOfReports <- function(inp, verbose = FALSE) {
+  ptm <- proc.time()
+  params <- ReadParameterFile(inp$params)  
+  strconstant <- c("string", "option")
+  N <- inp$num
+  n1 <- inp$varcandidates[[1]]
+  n2 <- inp$varcandidates[[2]]
+  
+  # Construct unique vals for each variable using strconstant
+  stopifnot(length(strconstant) == inp$numvars)
+  uvals <- lapply(1:inp$numvars,
+                  function(i) {
+                    apply(as.matrix(1:inp$varcandidates[[i]]),
+                          1,
+                          function(z) sprintf("%s%d", strconstant[[i]], z))
+                  })
+  
+  # Add extras if any
+  if(inp$extras > 0) {
+    uvals[[1]] <- c(uvals[[1]], apply(as.matrix(1:inp$extras), 1,
+                                      function(z) sprintf("%s%d", strconstant[[1]], z + n1)))
+  }
+  
+  # Compute map
+  map <- lapply(uvals, function(u) CreateMap(u, params))
+  
+  # Trim maps to real # of candidates
+  # Use extras only for decoding
+  tmap <- lapply(map[[1]]$map, function(i) i[, 1:n1])
+  crmap_trimmed <- CombineMaps(tmap, map[[2]]$map)$crmap
+  
+  # Sample values to compute partition
+  # Zipfian over n1 strings
+  v1_part <- RandomPartition(N, ComputePdf("zipf1.5", n1))
+  # Zipfian over n2 strings for each of variable 1
+  # Distr. are correlated as in assoc_sim.R
+  final_part <- as.vector(sapply(1:n1,
+                                 function(i) {
+                                   v2_part <- RandomPartition(v1_part[[i]],
+                                                              ComputePdf("zipf1.5", n2))
+                                   if (i %% 2 == 0) {v2_part} else {rev(v2_part)}
+                                 }))
+  
+  td <- matrix(final_part/sum(final_part), nrow = n1, ncol = n2, byrow = TRUE)
+  v2_part <- RandomPartition(N, apply(td, 2, sum))
+  ow_parts <- list(v1_part, v2_part)
+  ow_parts[[1]] <- c(ow_parts[[1]], rep(0, inp$extra))
+  
+  # --------------
+  # Generate 1-way counts
+  ow_counts <- lapply(1:2, function(i)
+    GenerateCounts(params, map[[i]]$rmap, ow_parts[[i]], 1))
+  found_strings <- lapply(1:2, function(i)
+    Decode(ow_counts[[i]],
+           map[[i]]$rmap,
+           params, quick = TRUE)$fit[,"string"])
+  # --------------
+  
+  rownames(td) <- uvals[[1]][1:n1]  # Don't take into account extras
+  colnames(td) <- uvals[[2]]
+  PrintIfVerbose("TRUE DISTRIBUTION", verbose)
+  PrintIfVerbose(signif(td, 4), verbose)
+  cohorts <- as.matrix(
+    apply(as.data.frame(final_part), 1,
+          function(count) RandomPartition(count, rep(1, params$m))))
+  expanded <- apply(cohorts, 2, function(vec) rep(vec, each = ((params$k)**2)*4))
+  true_ones <- apply(expanded * crmap_trimmed, 1, sum)
+  
+  NoiseMatrix <- GenerateNoiseMatrix(params)
+  after_noise <- as.vector(sapply(1:(length(true_ones)/4), 
+                                  function(x) 
+                                    t(NoiseMatrix) %*% true_ones[((x-1)*4+1):(x*4)]))
+  counts <- cbind(apply(cohorts, 1, sum),
+                  matrix(after_noise,
+                         nrow = params$m,
+                         ncol = 4 * (params$k**2),
+                         byrow = TRUE))
+  
+  params2 <- params
+  params2$k <- (params$k ** 2) * 4
+  
+  # Combine maps to feed into Decode2Way
+  # Prune first to found_strings from Decode on 1-way counts
+  pruned <- lapply(1:2, function(i)
+    lapply(map[[i]]$map, function(z) z[,found_strings[[i]]]))
+  crmap <- CombineMaps(pruned[[1]], pruned[[2]])$crmap
+  marginal <- Decode2Way(counts, crmap, params2)$fit
+  
+  # Fill in estimated results with rows and cols from td
+  ed <- matrix(0, nrow = (n1+inp$extra), ncol = n2)
+  rownames(ed) <- uvals[[1]]
+  colnames(ed) <- uvals[[2]]
+  for (cols in colnames(td)) {
+    for (rows in rownames(td)) {
+      ed[rows, cols] <- marginal[paste(rows, cols, sep = "x"), "Estimate"]
+    }
+  }
+  ed[is.na(ed)] <- 0
+  time_taken <- proc.time() - ptm
+  
+  PrintIfVerbose("2 WAY RESULTS", verbose)
+  PrintIfVerbose(signif(ed, 4), verbose)
+  PrintIfVerbose(TVDistance(td, ed, "TV DISTANCE 2 WAY ALGORITHM"), verbose)
+  PrintIfVerbose("PROC.TIME", verbose)
+  PrintIfVerbose(time_taken, verbose)
+  chisq_td <- chisq.test(td)[1][[1]][[1]]
+  chisq_ed <- chisq.test(ed)[1][[1]][[1]]
+  if(is.nan(chisq_ed)) {
+    chisq_ed <- 0
+  }
+  if(is.nan(chisq_td)) {
+    chisq_td <- 0
+  }
+  
+  metrics <- list(
+    td_chisq = chisq_td,
+    ed_chisq = chisq_ed,
+    tv = TVDistance(td, ed, ""),
+    time = time_taken[1],
+    dim1 = length(found_strings[[1]]),
+    dim2 = length(found_strings[[2]])
+  )
+  filename <- file.path(inp$outdir, 'metrics.csv')
+  write.csv(metrics, file = filename, row.names = FALSE)
+}
+
+#####################################################################
+##
+## Externally provided counts (gen_assoc_counts.R and sum_assoc_reports.py)
+## new_decode flag allows you to switch between two decode algorithm choices
+## Note: Only for two way associations
+## 
+## Inputs: inp object (from parsing JSON) with
+##    count files (2 way counts, individual marginal counts)
+##    map files (2 variables)
+##    params file with RAPPOR params
+##
+## Outputs: Runs simulation of two-way association analysis reading inputs
+##          from counts, maps, and params file.
+#####################################################################
+ExternalCounts <- function(inp, verbose = FALSE, metrics_filename = "metrics.csv") {
+  ptm <- proc.time()
+  params <- ReadParameterFile(inp$params)
+  # Ensure sufficient maps as required by number of vars
+  # Correct map from ReadMapFile() for assoc analysis
+  stopifnot(inp$numvars == length(inp$maps))
+  map <- lapply(inp$maps, function(o)
+    CorrectMapForAssoc(ReadMapFile(o, params = params),
+                       params = params))
+
+  # (2 way counts, marginal 1 counts, marginal 2 counts)
+  counts <- lapply(1:3, function(i) ReadCountsFile(inp$counts[[i]]))
+  
+  params2 <- params
+  params2$k <- (params$k ** 2) * 4
+  
+  # Prune candidates
+  fit <- lapply(1:2, function(i)
+    Decode(counts[[i + 1]],
+           map[[i]]$rmap,
+           params, quick = FALSE)$fit)
+  
+  found_strings = list(fit[[1]][,"string"], fit[[2]][,"string"])
+
+  if (length(found_strings[[1]]) == 0 || length(found_strings[[2]]) == 0) {
+    PrintIfVerbose("FOUND_STRINGS", verbose)
+    PrintIfVerbose(found_strings, verbose)
+    stop("No strings found in 1-way marginal.")
+  }
+  
+  # Combine maps to feed into Decode2Way
+  # Prune first to found_strings from Decode on 1-way counts
+  pruned <- lapply(1:2, function(i)
+    lapply(map[[i]]$map, function(z) z[,found_strings[[i]], drop = FALSE]))
+  crmap <- CombineMaps(pruned[[1]], pruned[[2]])$crmap
+  marginal <- Decode2Way(counts[[1]], crmap, params2, fit = fit)$fit
+  td <- read.csv(file = inp$truefile, header = TRUE)
+  td <- table(td[,3:4])
+  td <- td / sum(td)
+  ed <- td
+  for (cols in colnames(td)) {
+    for (rows in rownames(td)) {
+      ed[rows, cols] <- marginal[paste(rows, cols, sep = "x"), "Estimate"]
+    }
+  }
+  ed[is.na(ed)] <- 0
+  ed[ed<0] <- 0
+  
+  time_taken <- proc.time() - ptm
+  
+  PrintIfVerbose(TVDistance(td, ed, "TV DISTANCE 2 WAY"), verbose)
+  PrintIfVerbose("PROC.TIME", verbose)
+  PrintIfVerbose(time_taken, verbose)
+  chisq_td <- chisq.test(td)[1][[1]][[1]]
+  chisq_ed <- chisq.test(ed)[1][[1]][[1]]
+  if(is.nan(chisq_td)) {
+    chisq_td <- 0
+  }
+  if(is.nan(chisq_ed)) {
+    chisq_ed <- 0
+  }
+  
+  metrics <- list(
+    td_chisq = chisq_td,
+    ed_chisq = chisq_ed,
+    tv = TVDistance(td, ed, ""),
+    time = time_taken[1],
+    dim1 = length(found_strings[[1]]),
+    dim2 = length(found_strings[[2]])
+  )
+  
+  # Write metrics to metrics_filename (default: metrics.csv)
+  filename <- file.path(inp$outdir, metrics_filename)
+  write.csv(metrics, file = filename, row.names = FALSE)
+}
+
+#####################################################################
+##
+## Externally provided reports
+## EM ALGORITHM
+## TODO: Also support 3 way association
+## 
+## Inputs:
+##    
+## Outputs:
+##
+#####################################################################
+ExternalReportsEM <- function(inp,
+                              verbose = FALSE,
+                              metrics_filename = "metrics.csv") {
+  ptm <- proc.time()
+  params <- ReadParameterFile(inp$params)
+  # Ensure sufficient maps as required by number of vars
+  stopifnot(inp$numvars == length(inp$maps))
+  # Correct map from ReadMapFile() for assoc analysis
+  map <- lapply(inp$maps, function(o)
+    CorrectMapForAssoc(ReadMapFile(o, params = params),
+                       params = params))
+  
+  # Reports must be of the format
+  #     client name, cohort no, rappor bitstring 1, rappor bitstring 2, ...
+  reportsObj <- read.csv(inp$reports,
+                           colClasses = c("character", "integer",
+                                          rep("character", inp$numvars)),
+                           header = TRUE)
+  # Ignore the first column
+  reportsObj <- reportsObj[,-1]
+
+  # Parsing reportsObj
+  # ComputeDistributionEM allows for different sets of cohorts
+  # for each variable. Here, both sets of cohorts are identical
+  co <- as.list(reportsObj[1])[[1]]
+  co <- co + 1  # 1 indexing
+  cohorts <- rep(list(co), inp$numvars)
+  # Parse reports from reportObj cols 2, 3, ...
+  reports <- lapply(1:inp$numvars, function(x) as.list(reportsObj[x + 1]))
+    
+  # Split strings into bit arrays (as required by assoc analysis)
+  reports <- lapply(1:inp$numvars, function(i) {
+    # apply the following function to each of reports[[1]] and reports[[2]]
+    lapply(reports[[i]][[1]], function(x) {
+      # function splits strings and converts them to numeric values
+      # rev needed for endianness
+      rev(as.numeric(strsplit(x, split = "")[[1]]))
+    })
+  })
+  
+  params = list(params, params)
+  joint_dist <- ComputeDistributionEM(reports, cohorts, map,
+                                      ignore_other = TRUE,
+                                      quick = TRUE,
+                                      params, marginals = NULL,
+                                      estimate_var = FALSE)
+  em <- joint_dist$fit
+  td <- read.csv(file = inp$truefile, header = FALSE)
+  td <- table(td[,3:4])
+  td <- td / sum(td)
+  time_taken <- proc.time() - ptm
+  
+  PrintIfVerbose(TVDistance(td, em, "TV DISTANCE EM"), verbose)
+  PrintIfVerbose("PROC.TIME", verbose)
+  PrintIfVerbose(time_taken, verbose)
+  chisq_td <- chisq.test(td)[1][[1]][[1]]
+  chisq_ed <- chisq.test(em)[1][[1]][[1]]
+  if(is.nan(chisq_td)) {
+    chisq_td <- 0
+  }
+  if(is.nan(chisq_ed)) {
+    chisq_ed <- 0
+  }
+  
+  metrics <- list(
+    td_chisq = chisq_td,
+    ed_chisq = chisq_ed,
+    tv = TVDistance(td, em, ""),
+    time = time_taken[1],
+    dim1 = dim(em)[[1]],
+    dim2 = dim(em)[[2]]
+  )
+  
+  # Write metrics to metrics_filename (default: metrics.csv)
+  filename <- file.path(inp$outdir, metrics_filename)
+  write.csv(metrics, file = filename, row.names = FALSE)
+}
+
+main <- function(opts) {
+  inp <- fromJSON(opts$inp)
+  verbose_flag <- inp$verbose
+  # Choose from a set of experiments to run
+  # direct -> direct simulation of reports (without variances)
+  # external-counts -> externally supplied counts for 2 way and marginals
+  # external-reports -> externally supplied reports 
+
+  if("direct" %in% inp$expt) {
+    PrintIfVerbose("Running Experiment Direct", verbose_flag)
+    DirectSimulationOfReports(inp, verbose = verbose_flag)
+  } 
+  if ("external-counts" %in% inp$expt) {
+    PrintIfVerbose("Running Experiment Ext Counts", verbose_flag)
+    if ("direct" %in% inp$expt) {
+      # external-counts expt is run to compare results
+      ExternalCounts(inp, verbose = verbose_flag, metrics_filename = "metrics_2.csv")
+    } else {
+      ExternalCounts(inp, verbose = verbose_flag)
+    }
+  }
+  if ("external-reports-em" %in% inp$expt) {
+    PrintIfVerbose("Running Experiment Ext Reports", verbose_flag)
+    if (("direct" %in% inp$expt)||("external-counts" %in% inp$expt)) {
+      # external-reports-em expt is run to compare results
+      ExternalReportsEM(inp, verbose = verbose_flag, metrics_filename = "metrics_2.csv")
+    } else {
+      ExternalReportsEM(inp, verbose = verbose_flag)
+    }
+  }
+}
+
+if(!interactive()) {
+  main(opts)
+}
diff --git a/tests/gen_true_values_assoc.R b/tests/gen_true_values_assoc.R
new file mode 100755
index 00000000..779fe398
--- /dev/null
+++ b/tests/gen_true_values_assoc.R
@@ -0,0 +1,99 @@
+#!/usr/bin/env Rscript
+#
+# Copyright 2015 Google Inc. All rights reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+source('tests/gen_counts.R')
+
+# Usage:
+#
+# $ ./gen_true_values_assoc.R 100 20 10000 foo.csv
+#
+# Inputs:
+#   size of the distribution's support for var 1
+#   size of the distribution's support for var 2
+#   number of clients
+#   name of the output file
+# Output:
+#   csv file with reports sampled according to the specified distribution. 
+
+GenerateTrueValuesAssoc <- function(n, N, num_cohorts) {
+  # Inputs: n, a list of supports for vars 1, 2
+  #         N, the number of reports/clients
+  #         num_cohorts, the number of cohorts
+  # Output: tuples of values sampled according to a zipf x zipf distr
+  #         with support n[[1]] and n[[2]] respectively
+
+  # Sample values to compute partition
+  # Resulting distribution is a correlated zipf x zipf
+  # distribution over 2 variables
+  PartitionWithCorrelation <- function(size, support, index) {
+    part <- RandomPartition(size, ComputePdf("zipf1.5", support))
+    if (index %% 2 == 0) {part} else {rev(part)}
+  }
+  
+  # Zipfian over n[[1]] strings
+  part <- RandomPartition(N, ComputePdf("zipf1.5", n[[1]]))
+  # Zipfian over n[[2]] strings for each of variable 1
+  final_part <- as.vector(sapply(1:n[[1]],
+                  function(i) PartitionWithCorrelation(part[[i]], n[[2]], i)))
+  
+  final_part <- matrix(final_part, nrow = n[[1]], byrow = TRUE)
+  rownames(final_part) <- sapply(1:n[[1]], function(x) paste("str", x, sep = ""))
+  colnames(final_part) <- sapply(1:n[[2]], function(x) paste("opt", x, sep = ""))
+  distr <- final_part/sum(final_part)
+  print("DISTRIBUTION")
+  print(distr)
+
+  print('PARTITION')
+  print(final_part)
+
+  # Expand partition
+  values <- list(
+    rep(1:n[[1]], rowSums(final_part)),
+    unlist(sapply(1:n[[1]], function(x) rep(1:n[[2]], final_part[x, ]))))
+  
+  stopifnot((length(values[[1]]) == N) &
+              (length(values[[2]]) == N))
+
+  # Shuffle values randomly (may take a few sec for > 10^8 inputs)
+  perm <- sample(N)
+  values <- list(values[[1]][perm], values[[2]][perm])
+  cohorts <- rep(1:N) %% num_cohorts
+  list(cohorts = cohorts, values = values)
+}
+
+main <- function(argv) {
+  n <- list(as.integer(argv[[1]]), as.integer(argv[[2]]))
+  N <- as.integer(argv[[3]])
+  num_cohorts <- as.integer(argv[[4]])
+  out_file <- argv[[5]]
+
+  res <- GenerateTrueValuesAssoc(n, N, num_cohorts)
+  # Prepend with str and opt
+  reports <- list(sprintf("str%d", res$values[[1]]),
+                  sprintf("opt%d", res$values[[2]]))
+
+
+  # Paste together client name, cohort input, report1, report2
+  reports <- cbind(sprintf("cli%d", 1:N),
+                   res$cohorts, reports[[1]], reports[[2]])
+  colnames(reports) <- c("client", "cohort", "value1", "value2")
+  write.table(reports, file = out_file, row.names = FALSE, col.names = TRUE, 
+              sep = ",", quote = FALSE)
+}
+
+if (length(sys.frames()) == 0) {
+  main(commandArgs(TRUE))
+}
diff --git a/tests/gen_true_values_assoc_test.R b/tests/gen_true_values_assoc_test.R
new file mode 100755
index 00000000..ebef1e77
--- /dev/null
+++ b/tests/gen_true_values_assoc_test.R
@@ -0,0 +1,37 @@
+#!/usr/bin/Rscript
+#
+# gen_reports_test.R
+
+source('analysis/R/util.R')  # Log()
+
+source('tests/gen_true_values_assoc.R')  # module under test
+
+library(RUnit)
+
+TestGenerateTrueValuesAssoc <- function() {
+  # list for support of var1, var2, 
+  # total number of reports
+  # num_cohorts
+  res <- GenerateTrueValuesAssoc(list(20, 5), 1000, 32)
+  # print(res$values)
+
+  # 1000 reports
+  checkEquals(1000, length(res$values[[1]]))
+
+  # support(var1) <= 20
+  # support(var2) <= 5
+  checkTrue(max(res$values[[1]]) <= 20)
+  checkTrue(max(res$values[[2]]) <= 5)
+
+  # Ensure cohorts are filled up
+  checkEquals(32, length(unique(res$cohort)))
+
+  # TODO: Add tests to confirm (w.h.p.?) that certain distribution aspects are
+  # as expected (such as the zipfian on marginals)
+}
+
+TestAll <- function(){
+  TestGenerateTrueValuesAssoc()
+}
+
+TestAll()
diff --git a/tests/make_summary_assoc.py b/tests/make_summary_assoc.py
new file mode 100755
index 00000000..0f1af04d
--- /dev/null
+++ b/tests/make_summary_assoc.py
@@ -0,0 +1,377 @@
+#!/usr/bin/python
+"""Given a regtest result tree, prints an HTML summary on stdout.
+
+See HTML skeleton in tests/assoctest.html.
+"""
+
+import os
+import re
+import sys
+
+
+SUMMARY_ROW = """\
+<tfoot style="font-weight: bold; text-align: right">
+<tr>
+  <td>
+    %(name)s
+  </td>
+
+  <!-- input params -->
+  <td></td>
+  <td></td>
+  <td></td>
+  <td></td>
+
+  <!-- RAPPOR params -->
+  <td></td>
+  <td></td>
+  <td></td>
+  <td></td>
+  <td></td>
+  <td></td>
+
+  <!-- Result metrics -->
+  <td></td>
+  <td></td>
+  <td>%(mean_l1d)s</td>
+  <td>%(mean_rtime)s</td>
+</tr>
+</tfoot>
+"""
+
+# Navigation and links to plot.
+DETAILS = """\
+<p style="text-align: right">
+  <a href="#top">Up</a>
+</p>
+
+<a id="%(anchor)s"></a>
+
+<p style="text-align: center">
+  <img src="%(instance_dir)s/dist.png"/>
+</p>
+
+<p>
+<a href="%(instance_dir)s">%(name)s files</a>
+</p>
+"""
+
+
+def FormatFloat(x, percent):
+  """Formats a floating-point number."""
+  if percent:
+    return '{:.1f}%'.format(x * 100.0)
+  else:
+    return '{:.3f}'.format(x)
+
+
+def FormatMeanWithSem(m_std_error, percent=False):
+  """Formats an estimate with standard error."""
+  if m_std_error is None:
+    return ''
+  m, std_error = m_std_error
+  if std_error is None:
+    return FormatFloat(m, percent)
+  else:
+    return '{}&plusmn;{}'.format(
+        FormatFloat(m, percent),
+        FormatFloat(std_error, percent))
+
+
+def Mean(l):
+  """Computes the mean (average) for a list of numbers."""
+  if l:
+    return float(sum(l)) / len(l)
+  else:
+    return None
+
+
+def SampleVar(l):
+  """Computes the sample variance for a list of numbers."""
+  if len(l) > 1:
+    mean = Mean(l)
+    var = sum([(x - mean) ** 2 for x in l]) / (len(l) - 1)
+    return var
+  else:
+    return None
+
+
+def StandardErrorEstimate(l):
+  """Returns the standard error estimate for a list of numbers.
+
+  For a singleton the standard error is assumed to be 10% of its value.
+  """
+  if len(l) > 1:
+    return (SampleVar(l) / len(l)) ** .5
+  elif l:
+    return l[0] / 10.0
+  else:
+    return None
+
+
+def MeanOfMeans(dict_of_lists):
+  """Returns the average of averages with the standard error of the estimate.
+  """
+  means = [Mean(dict_of_lists[key]) for key in dict_of_lists
+           if dict_of_lists[key]]
+  if means:
+    # Compute variances of the estimate for each sublist.
+    se = [StandardErrorEstimate(dict_of_lists[key]) ** 2 for key
+          in dict_of_lists if dict_of_lists[key]]
+    return (Mean(means),  # Mean over all sublists
+            sum(se) ** .5 / len(se))  # Standard deviation of the mean
+  else:
+    return None
+
+
+def ParseSpecFile(spec_filename, empty = False):
+  #Parses the spec (parameters) file.
+
+  with open(spec_filename) as s:
+    spec_row = s.readline().split()
+
+  spec_in_html = ' '.join('<td>%s</td>' % cell for cell in spec_row[1:len(spec_row)-1])
+  if empty == True:
+    spec_in_html = ' '.join('<td></td>' for cell in spec_row[1:len(spec_row)-1])
+
+  return spec_in_html
+
+
+def ExtractTime(log_filename):
+  """Extracts the elapsed time information from the log file.
+
+  Returns:
+     Elapsed time (in seconds) or None in case of failure.
+  """
+  if os.path.isfile(log_filename):
+    with open(log_filename) as log:
+      log_str = log.read()
+    # Matching a line output by analyze.R.
+    match = re.search(r'Inference took ([0-9.]+) seconds', log_str)
+    if match:
+      return float(match.group(1))
+  return None
+
+
+def ParseMetrics(metrics_file, log_file, italics = False):
+  """Processes the metrics file.
+
+  Args:
+    report_dir: A directory name containing metrics.csv and log.txt.
+    num_additional: A number of bogus candidates added to the candidate list.
+
+  Returns a pair:
+    - A dictionary of metrics (some can be []).
+    - An HTML-formatted portion of the report row.
+  """
+  with open(metrics_file) as m:
+    m.readline()
+    metrics_row = m.readline().split(',')
+
+  (td_chisq, ed_chisq, l1d, rtime, d1, d2) = metrics_row
+
+  td_chisq = float(td_chisq)
+  ed_chisq = float(ed_chisq)
+
+  l1d = float(l1d)
+  rtime = float(rtime)
+
+  elapsed_time = ExtractTime(log_file)
+
+  metrics_row_str = [
+    '%s' % d1,
+    '%s' % d2,
+    '%.3f' % l1d,
+    str(rtime),
+  ]
+
+  metrics_row_dict = {
+    'd1': [d1],
+    'd2': [d2],
+    'l1d': [l1d],
+    'rtime': [rtime],
+    'chisqdiff': [abs(td_chisq - ed_chisq)],
+  }
+
+  # return metrics formatted as HTML table entries
+  if(italics == True):
+    return (metrics_row_dict,
+          ' '.join('<td><i>%s</i></td>' % cell for cell in metrics_row_str))
+  else:
+    return (metrics_row_dict,
+          ' '.join('<td>%s</td>' % cell for cell in metrics_row_str))
+
+
+def FormatCell1(test_case, test_instance, metrics_file, log_file, plot_file,
+                link_to_plots):
+  """Outputs an HTML table entry for the first cell of the row.
+
+  The row is filled if the metrics file exist. The first cell contains a link
+  that for short tables points to a plot file inline, for large tables to an
+  external file.
+
+  If the metrics file is missing, the link points to the log file (if one
+  exists)
+  """
+  relpath_report = '{}/{}_report'.format(test_case, test_instance)
+  if os.path.isfile(metrics_file):
+    external_file = plot_file
+    if link_to_plots:
+      link = '#{}_{}'.format(test_case, test_instance)  # anchor
+    else:
+      link = os.path.join(relpath_report, 'dist.png')
+  else:  # no results likely due to an error, puts a link to the log file
+    external_file = log_file
+    link = os.path.join(relpath_report, 'log.txt')
+
+  if os.path.isfile(external_file):
+    return '<td><a href="{}">{}</a></td>'.format(link, test_case)
+  else:  # if no file to link to
+    return '<td>{}</td>'.format(test_case)
+
+
+def FormatSummaryRow(metrics_lists):
+  """Outputs an HTML-formatted summary row."""
+  means_with_sem = {}  # SEM - standard error of the mean
+
+  for key in metrics_lists:
+    means_with_sem[key] = MeanOfMeans(metrics_lists[key])
+    # If none of the lists is longer than one element, drop the SEM component.
+    if means_with_sem[key] and max([len(l) for l in metrics_lists[key]]) < 2:
+      means_with_sem[key] = [means_with_sem[key][0], None]
+
+  summary = {
+      'name': 'Means',
+      'mean_l1d': FormatMeanWithSem(means_with_sem['l1d'], percent=False),
+  #    'mean_chisqdiff': FormatMeanWithSem(means_with_sem['chisqdiff'], percent=False),
+      'mean_rtime': FormatMeanWithSem(means_with_sem['rtime']),
+  }
+  return SUMMARY_ROW % summary
+
+
+def FormatPlots(base_dir, test_instances):
+  """Outputs HTML-formatted plots."""
+  result = ''
+  for instance in test_instances:
+    # A test instance is identified by the test name and the test run.
+    test_case, test_instance, _ = instance.split(' ')
+    instance_dir = test_case + '/' + test_instance + '_report'
+    if os.path.isfile(os.path.join(base_dir, instance_dir, 'dist.png')):
+      result += DETAILS % {'anchor': test_case + '_' + test_instance,
+                           'name': '{} (instance {})'.format(test_case,
+                                                             test_instance),
+                           'instance_dir': instance_dir}
+  return result
+
+
+def main(argv):
+  base_dir = argv[1]
+  num_instances = int(argv[2])
+
+  # This file has the test case names, in the order that they should be
+  # displayed.
+  path = os.path.join(base_dir, 'test-instances.txt')
+  with open(path) as f:
+    test_instances = [line.strip() for line in f]
+
+  # Metrics are assembled into a dictionary of dictionaries. The top-level
+  # key is the metric name ('tv', 'fpr', etc.), the second level key is
+  # the test case. These keys reference a list of floats, which can be empty.
+  metrics = {
+      'l1d': {},  # l1 distance
+      'chisqdiff': {}, # abs diff in values for the chisq test between true
+                       # distr and estimated distr.
+      'rtime': {},  # R run time
+  }
+
+  # If there are too many tests, the plots are not included in the results
+  # file. Instead, rows' names are links to the corresponding .png files.
+  include_plots = len(test_instances) < 20
+  include_plots = False
+  l1d_list = []
+  l1d_list2 = []
+
+  for instance in test_instances:
+    # A test instance is idenfied by the test name and the test run.
+    test_case, test_instance = instance.split(' ')
+
+    spec_file = os.path.join(base_dir, test_case, 'spec.txt')
+    if not os.path.isfile(spec_file):
+      raise RuntimeError('{} is missing'.format(spec_file))
+
+    spec_html = ParseSpecFile(spec_file)
+    metrics_html = ''  # will be filled in later on, if metrics exist
+
+    report_dir = os.path.join(base_dir, test_case, test_instance + '_report')
+
+    metrics_file = os.path.join(report_dir, 'metrics.csv')
+    log_file = os.path.join(report_dir, 'log.txt')
+    plot_file = os.path.join(report_dir, 'dist.png')
+
+    cell1_html = FormatCell1(test_case, test_instance, metrics_file, log_file,
+                             plot_file, include_plots)
+
+    if(int(test_instance) == 1):
+      l1d_list = [] 
+      l1d_list2 = []
+
+    if os.path.isfile(metrics_file):
+      # ParseMetrics outputs an HTML table row and also updates lists
+      metrics_dict, metrics_html = ParseMetrics(metrics_file, log_file)
+      l1d_list += metrics_dict['l1d']
+      # Update the metrics structure. Initialize dictionaries if necessary.
+      for m in metrics:
+        if not test_case in metrics[m]:
+          metrics[m][test_case] = metrics_dict[m]
+        else:
+          metrics[m][test_case] += metrics_dict[m]
+
+    print '<tr>{}{}{}</tr>'.format(cell1_html, spec_html, metrics_html)
+
+    # Printing metrics 2 if available
+    metrics_file = os.path.join(report_dir, 'metrics_2.csv')
+    if (os.path.isfile(metrics_file)):
+      metrics_dict, metrics_html = ParseMetrics(metrics_file, log_file,
+                                                italics = True)
+      l1d_list2 += metrics_dict['l1d']
+      print '<tr><td></td>{}{}</tr>'.format(ParseSpecFile(spec_file, empty =
+                                                        True), metrics_html)
+
+    # Print summary of test instances
+    if(int(test_instance) == num_instances):
+      row_str = ['', '',
+        '%.3f&plusmn;%.3f' % (Mean(l1d_list), StandardErrorEstimate(l1d_list)),
+        '',
+      ]
+      print '<tr><td></td>{}{}</tr>'.format(ParseSpecFile(spec_file, empty =
+              True), ' '.join('<td><b>%s</b></td>' % cell for cell in
+                              row_str))
+      if (os.path.isfile(metrics_file)):
+        row_str2 = ['', '',
+          '%.3f&plusmn;%.3f' % (Mean(l1d_list2), StandardErrorEstimate(l1d_list2)),
+          '',
+        ]
+        print '<tr><td></td>{}{}</tr>'.format(ParseSpecFile(spec_file, empty =
+              True), ' '.join('<td><b><i>%s</i></b></td>' % cell for cell in
+                              row_str2))
+
+  print FormatSummaryRow(metrics)
+
+  print '</tbody>'
+  print '</table>'
+  print '<p style="padding-bottom: 3em"></p>'  # vertical space
+
+  # Plot links.
+  if include_plots:
+    print FormatPlots(base_dir, test_instances)
+  else:
+    print ('<p>Too many tests to include plots. '
+           'Click links within rows for details.</p>')
+
+
+if __name__ == '__main__':
+  try:
+    main(sys.argv)
+  except RuntimeError, e:
+    print >>sys.stderr, 'FATAL: %s' % e
+    sys.exit(1)
diff --git a/tests/params.csv b/tests/params.csv
deleted file mode 100644
index a2114c90..00000000
--- a/tests/params.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-k, h, m, p, q, f
-16, 2, 4, 0.1, 0.9, 0.2
diff --git a/tests/rappor_assoc_sim.py b/tests/rappor_assoc_sim.py
new file mode 100755
index 00000000..b46c8436
--- /dev/null
+++ b/tests/rappor_assoc_sim.py
@@ -0,0 +1,161 @@
+#!/usr/bin/python
+#
+# Copyright 2015 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tool to run RAPPOR on simulated client input.
+
+TODO: fill up
+
+Input columns: client,true_value
+Ouput coumns: client,cohort,rappor
+"""
+
+import csv
+import collections
+import optparse
+import os
+import random
+import sys
+import time
+
+import rappor  # client library
+try:
+  import fastrand
+except ImportError:
+  print >>sys.stderr, (
+      "Native fastrand module not imported; see README for speedups")
+  fastrand = None
+
+
+def log(msg, *args):
+  if args:
+    msg = msg % args
+  print >>sys.stderr, msg
+
+
+def CreateOptionsParser():
+  p = optparse.OptionParser()
+
+  p.add_option(
+      '--num-bits', type='int', metavar='INT', dest='num_bits', default=16,
+      help='Number of bloom filter bits.')
+  p.add_option(
+      '--num-hashes', type='int', metavar='INT', dest='num_hashes', default=2,
+      help='Number of hashes.')
+  p.add_option(
+      '--num-cohorts', type='int', metavar='INT', dest='num_cohorts',
+      default=64, help='Number of cohorts.')
+
+  p.add_option(
+      '-p', type='float', metavar='FLOAT', dest='prob_p', default=1,
+      help='Probability p')
+  p.add_option(
+      '-q', type='float', metavar='FLOAT', dest='prob_q', default=1,
+      help='Probability q')
+  p.add_option(
+      '-f', type='float', metavar='FLOAT', dest='prob_f', default=1,
+      help='Probability f')
+
+  choices = ['simple', 'fast']
+  p.add_option(
+      '-r', type='choice', metavar='STR',
+      dest='random_mode', default='fast', choices=choices,
+      help='Random algorithm (%s)' % '|'.join(choices))
+
+  return p
+
+def main(argv):
+  (opts, argv) = CreateOptionsParser().parse_args(argv)
+
+  # Copy flags into params
+  params = rappor.Params()
+  params.num_bloombits = opts.num_bits
+  params.num_hashes = opts.num_hashes
+  params.num_cohorts = opts.num_cohorts
+  params.prob_p = opts.prob_p
+  params.prob_q = opts.prob_q
+  params.prob_f = opts.prob_f
+
+  if opts.random_mode == 'simple':
+    irr_rand = rappor.SimpleIrrRand(params)
+  elif opts.random_mode == 'fast':
+    if fastrand:
+      log('Using fastrand extension')
+      # NOTE: This doesn't take 'rand'.  It's seeded in C with srand().
+      irr_rand = fastrand.FastIrrRand(params)
+    else:
+      log('Warning: fastrand module not importable; see README for build '
+          'instructions.  Falling back to simple randomness.')
+      irr_rand = rappor.SimpleIrrRand(params)
+  else:
+    raise AssertionError
+  # Other possible implementations:
+  # - random.SystemRandom (probably uses /dev/urandom on Linux)
+  # - HMAC-SHA256 with another secret?  This could match C++ byte for byte.
+  #   - or srand(0) might do it.
+
+  csv_in = csv.reader(sys.stdin)
+  csv_out = csv.writer(sys.stdout)
+
+  # NOTE: We can also modify the output to include
+  # bloombits and the prr for each variable.
+  # This is useful for debugging purposes
+  header = ('client', 'cohort', 'irr1', 'irr2')
+  csv_out.writerow(header)
+
+  # TODO: It would be more instructive/efficient to construct an encoder
+  # instance up front per client, rather than one per row below.
+  start_time = time.time()
+
+  for i, (client_str, cohort_str, true_value_1, 
+          true_value_2) in enumerate(csv_in):
+    if i == 0:
+      if client_str != 'client':
+        raise RuntimeError('Expected client header, got %s' % client_str)
+      if cohort_str != 'cohort':
+        raise RuntimeError('Expected cohort header, got %s' % cohort_str)
+      if true_value_1 != 'value1':
+        raise RuntimeError('Expected value1 header, got %s' % value)
+      if true_value_2 != 'value2':
+        raise RuntimeError('Expected value2 header, got %s' % value)
+      continue  # skip header row
+
+    #if i == 30:  # EARLY STOP
+    #  break
+
+    if i % 10000 == 0:
+      elapsed = time.time() - start_time
+      log('Processed %d inputs in %.2f seconds', i, elapsed)
+
+    cohort = int(cohort_str) % params.num_cohorts
+    secret = client_str
+    e = rappor.Encoder(params, cohort, secret, irr_rand)
+
+    # For testing purposes, call e._internal_encode()
+    irr_1 = e.encode(true_value_1)
+    irr_2 = e.encode(true_value_2)
+
+    irr_1_str = rappor.bit_string(irr_1, params.num_bloombits)
+    irr_2_str = rappor.bit_string(irr_2, params.num_bloombits)
+
+    out_row = (client_str, cohort, irr_1_str, irr_2_str)
+    csv_out.writerow(out_row)
+
+
+if __name__ == "__main__":
+  try:
+    main(sys.argv)
+  except RuntimeError, e:
+    log('rappor_sim.py: FATAL: %s', e)
diff --git a/tests/uvals.csv b/tests/uvals.csv
index cebc17ec..18600571 100644
--- a/tests/uvals.csv
+++ b/tests/uvals.csv
@@ -1,2 +1,2 @@
-google.com,intel.com,yahoo.com
-ssl,nossl
+str1
+option1

+ Test Case +	+ Input Params +				+ RAPPOR Params +						+ Result Metrics +
	+ d1: orig. support(var1) + d2: orig. support(var2) + n: num reports + e: num extras +				+ k: report bits + h: hashes + m: cohorts + p, q, f: probabilities +						+ d1: dimension of var1 solutions. + d2: dimension of var2 solutions. + tv: tot. var. distance + rtime: R runtime +
	d1	d2	n	e	k	h	m	p	q	f	d1	d2	tv	rtime
+ %(name)s +													%(mean_l1d)s	%(mean_rtime)s
%s		%s	%s	{}	{}


%s

%s