diff --git a/analysis/DESCRIPTION b/analysis/DESCRIPTION new file mode 100644 index 00000000..b438cad5 --- /dev/null +++ b/analysis/DESCRIPTION @@ -0,0 +1,10 @@ +Package: rappor +Title: R package for Rappor Analysis +Description: R functions used for analyzing Rappor data. +Author: Rappor Discuss +Maintainer: Rappor Discuss +Copyright: Copyright (C) 2014 Google, Inc. +Version: 0.1 +License: Apache License 2.0 +Url: https://github.com/google/rappor +Depends: Matrix, RUnit, glmnet, limSolve, parallel diff --git a/analysis/NAMESPACE b/analysis/NAMESPACE new file mode 100644 index 00000000..6635d4c6 --- /dev/null +++ b/analysis/NAMESPACE @@ -0,0 +1,6 @@ +import(Matrix) +import(RUnit) +import(glmnet) +import(limSolve) +import(parallel) +exportPattern("^[^\\.]") diff --git a/analysis/R/alternative.R b/analysis/R/alternative.R index 3f0e66d3..8cb6442f 100755 --- a/analysis/R/alternative.R +++ b/analysis/R/alternative.R @@ -12,9 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -library(limSolve) -library(Matrix) - # The next two functions create a matrix (G) and a vector (H) encoding # linear inequality constraints that a solution vector (x) must satisfy: # G * x >= H @@ -80,4 +77,4 @@ ConstrainedLinModel <- function(X,Y) { names(coefs) <- colnames(X) coefs -} \ No newline at end of file +} diff --git a/analysis/R/association.R b/analysis/R/association.R index d1c7b5ee..7b2219cc 100755 --- a/analysis/R/association.R +++ b/analysis/R/association.R @@ -12,16 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -library(parallel) # mclapply - -source.rappor <- function(rel_path) { - abs_path <- paste0(Sys.getenv("RAPPOR_REPO", ""), rel_path) - source(abs_path) -} - -source.rappor("analysis/R/util.R") # for Log -source.rappor("analysis/R/decode.R") # for ComputeCounts - # # Tools used to estimate variable distributions of up to three variables # in RAPPOR. This contains the functions relevant to estimating joint diff --git a/analysis/R/decode.R b/analysis/R/decode.R index 7d83f2b9..4ee2a223 100755 --- a/analysis/R/decode.R +++ b/analysis/R/decode.R @@ -12,18 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# -# This library implements the RAPPOR marginal decoding algorithms using LASSO. - -library(glmnet) - -# So we don't have to change pwd -source.rappor <- function(rel_path) { - abs_path <- paste0(Sys.getenv("RAPPOR_REPO", ""), rel_path) - source(abs_path) -} - -source.rappor('analysis/R/alternative.R') +# Implements the RAPPOR marginal decoding algorithms using LASSO. EstimateBloomCounts <- function(params, obs_counts) { # Estimates the number of times each bit in each cohort was set in original diff --git a/analysis/R/decode_ngrams.R b/analysis/R/decode_ngrams.R index e2585cbd..bf67f5fa 100755 --- a/analysis/R/decode_ngrams.R +++ b/analysis/R/decode_ngrams.R @@ -49,7 +49,6 @@ FindPairwiseCandidates <- function(report_data, N, ngram_params, params) { .ComputeDist <- function(i, inds, cohorts, reports, maps, params, num_ngrams_collected) { - library(glmnet) ind <- inds[[i]] cohort_subset <- lapply(1:num_ngrams_collected, function(x) cohorts[ind]) diff --git a/analysis/R/ngrams_simulation.R b/analysis/R/ngrams_simulation.R index ca7ce49b..2bcd39b0 100755 --- a/analysis/R/ngrams_simulation.R +++ b/analysis/R/ngrams_simulation.R @@ -18,15 +18,6 @@ # dictionary of terms over which we want to learn a distribution. This # mostly contains functions that aid in the generation of synthetic data. -library(RUnit) -library(parallel) - -source("analysis/R/encode.R") -source("analysis/R/decode.R") -source("analysis/R/simulation.R") -source("analysis/R/association.R") -source("analysis/R/decode_ngrams.R") - # The alphabet is the set of all possible characters that will appear in a # string. Here we use the English alphabet, but one might want to include # numbers or punctuation marks. @@ -240,11 +231,6 @@ EstimateDictionaryTrial <- function(N, str_len, num_strs, # # Returns: # List with recovered and true marginals. - - # We call the needed libraries here in order to make them available when this - # function gets called by BorgApply. Otherwise, they do not get included. - library(glmnet) - library(parallel) sim <- SimulateNGrams(N, ngram_params, str_len, num_strs = num_strs, alphabet, params, distribution) diff --git a/analysis/R/read_input.R b/analysis/R/read_input.R index 47f8be5d..5f79d686 100755 --- a/analysis/R/read_input.R +++ b/analysis/R/read_input.R @@ -15,16 +15,6 @@ # # Read parameter, counts and map files. -library(Matrix) - -source.rappor <- function(rel_path) { - abs_path <- paste0(Sys.getenv("RAPPOR_REPO", ""), rel_path) - source(abs_path) -} - -source.rappor("analysis/R/util.R") # for Log - - ReadParameterFile <- function(params_file) { # Read parameter file. Format: # k, h, m, p, q, f diff --git a/analysis/R/simulation.R b/analysis/R/simulation.R index 251c5959..27b194dc 100755 --- a/analysis/R/simulation.R +++ b/analysis/R/simulation.R @@ -16,9 +16,6 @@ # RAPPOR simulation library. Contains code for encoding simulated data and # creating the map used to encode and decode reports. -library(glmnet) -library(parallel) # mclapply - SetOfStrings <- function(num_strings = 100) { # Generates a set of strings for simulation purposes. strs <- paste0("V_", as.character(1:num_strings)) diff --git a/bin/decode_assoc.R b/bin/decode_assoc.R index 58e35f25..f7993a0c 100755 --- a/bin/decode_assoc.R +++ b/bin/decode_assoc.R @@ -113,26 +113,12 @@ if (!interactive()) { opts <- ParseOptions() } -# -# Load libraries and source our own code. -# - -library(RJSONIO) # toJSON() - -# So we don't have to change pwd -source.rappor <- function(rel_path) { - abs_path <- paste0(Sys.getenv("RAPPOR_REPO", ""), rel_path) - source(abs_path) -} - -source.rappor("analysis/R/association.R") -source.rappor("analysis/R/fast_em.R") -source.rappor("analysis/R/read_input.R") -source.rappor("analysis/R/util.R") - options(stringsAsFactors = FALSE) options(max.print = 100) # So our structure() debug calls look better +library(RJSONIO) # toJSON() +library(rappor) + CreateAssocStringMap <- function(all_cohorts_map, params) { # Processes the maps loaded using ReadMapFile and turns it into something # that association.R can use. Namely, we want a map per cohort. diff --git a/bin/decode_dist.R b/bin/decode_dist.R index 5c83f741..05e219e2 100755 --- a/bin/decode_dist.R +++ b/bin/decode_dist.R @@ -56,25 +56,10 @@ if (!interactive()) { opts <- ParseOptions() } -# -# Load libraries and source our own code. -# +options(stringsAsFactors = FALSE) library(RJSONIO) - -# So we don't have to change pwd -source.rappor <- function(rel_path) { - abs_path <- paste0(Sys.getenv("RAPPOR_REPO", ""), rel_path) - source(abs_path) -} - -source.rappor("analysis/R/read_input.R") -source.rappor("analysis/R/decode.R") -source.rappor("analysis/R/util.R") - -source.rappor("analysis/R/alternative.R") - -options(stringsAsFactors = FALSE) +library(rappor) main <- function(opts) { diff --git a/build.sh b/build.sh index 44c5cd51..740bd634 100755 --- a/build.sh +++ b/build.sh @@ -111,10 +111,16 @@ cpp-client() { popd } +r-package() { + R CMD BUILD analysis/ + sudo R CMD INSTALL rappor_0.1.tar.gz +} + if test $# -eq 0 ; then cpp-client doc fastrand + r-package else "$@" fi diff --git a/analysis/R/association_test.R b/tests/RUnit/association_test.R similarity index 98% rename from analysis/R/association_test.R rename to tests/RUnit/association_test.R index 0cd24ce0..85abf081 100755 --- a/analysis/R/association_test.R +++ b/tests/RUnit/association_test.R @@ -15,12 +15,7 @@ # Authors: vpihur@google.com (Vasyl Pihur), fanti@google.com (Giulia Fanti) library(RUnit) -source("analysis/R/encode.R") -source("analysis/R/decode.R") -source("analysis/R/simulation.R") -source("analysis/R/association.R") -source("analysis/R/fast_em.R") -source("analysis/R/util.R") +library(rappor) SamplePopulations <- function(N, num_variables = 1, params, variable_opts) { diff --git a/analysis/R/decode_test.R b/tests/RUnit/decode_test.R similarity index 99% rename from analysis/R/decode_test.R rename to tests/RUnit/decode_test.R index 74c46ce4..83ef4373 100755 --- a/analysis/R/decode_test.R +++ b/tests/RUnit/decode_test.R @@ -15,10 +15,10 @@ library(RUnit) library(abind) - -source('analysis/R/decode.R') +library(rappor) source('tests/gen_counts.R') + L1Distance <- function(X, Y) { # Computes the L1 distance between two named vectors common <- intersect(names(X), names(Y)) diff --git a/analysis/R/run_tests.R b/tests/RUnit/run_tests.R similarity index 95% rename from analysis/R/run_tests.R rename to tests/RUnit/run_tests.R index 8a4692fa..9ffb50be 100755 --- a/analysis/R/run_tests.R +++ b/tests/RUnit/run_tests.R @@ -17,10 +17,8 @@ # # Run unit tests for RAPPOR R code. -library(RUnit) - run_tests <- function() { - dirs <- "analysis/R" # Run from root + dirs <- "tests/RUnit" # Run from root test_suite <- defineTestSuite("rappor", dirs, testFileRegexp = "_test.R$", testFuncRegexp = "^Test") stopifnot(isValidTestSuite(test_suite)) diff --git a/analysis/R/unknowns_test.R b/tests/RUnit/unknowns_test.R similarity index 95% rename from analysis/R/unknowns_test.R rename to tests/RUnit/unknowns_test.R index 5efd7383..9478d775 100755 --- a/analysis/R/unknowns_test.R +++ b/tests/RUnit/unknowns_test.R @@ -27,12 +27,8 @@ # Both tests generate their own data. library(parallel) -source("analysis/R/encode.R") -source("analysis/R/decode.R") -source("analysis/R/simulation.R") -source("analysis/R/association.R") -source("analysis/R/decode_ngrams.R") -source("analysis/R/ngrams_simulation.R") +library(rappor) + alphabet <- letters options(warn = -1) diff --git a/tests/analyze_assoc.R b/tests/analyze_assoc.R index 5d78806f..5c50231e 100755 --- a/tests/analyze_assoc.R +++ b/tests/analyze_assoc.R @@ -49,11 +49,7 @@ if(!interactive()) { opts <- parse_args(OptionParser(option_list = option_list)) } -source("../analysis/R/encode.R") -source("../analysis/R/decode.R") -source("../analysis/R/simulation.R") -source("../analysis/R/read_input.R") -source("../analysis/R/association.R") +library(rappor) # This function processes the maps loaded using ReadMapFile # Association analysis requires a map object with a map @@ -123,4 +119,4 @@ main <- function(opts) { if(!interactive()) { main(opts) -} \ No newline at end of file +} diff --git a/tests/assoc_sim.R b/tests/assoc_sim.R index 3ff1e5df..c967e8bd 100755 --- a/tests/assoc_sim.R +++ b/tests/assoc_sim.R @@ -49,13 +49,9 @@ if(!interactive()) { help = "Run simulation with uniform distribution") ) opts <- parse_args(OptionParser(option_list = option_list)) -} +} -source("../analysis/R/encode.R") -source("../analysis/R/decode.R") -source("../analysis/R/simulation.R") -source("../analysis/R/read_input.R") -source("../analysis/R/association.R") +library(rappor) # Read unique values of reports from a csv file # Inputs: filename. The file is expected to contain two rows of strings diff --git a/tests/compare_dist.R b/tests/compare_dist.R index eb6521d3..4a99a328 100755 --- a/tests/compare_dist.R +++ b/tests/compare_dist.R @@ -36,6 +36,7 @@ if (is_main) { positional_arguments = 3) # input and output } +library(rappor) library(ggplot2) # Use CairoPNG if available. Useful for headless R. @@ -47,12 +48,6 @@ if (library(Cairo, quietly = TRUE, logical.return = TRUE)) { cat('Using png\n') } -source("analysis/R/read_input.R") -source("analysis/R/decode.R") -source("analysis/R/util.R") - -source("analysis/R/alternative.R") # temporary - LoadContext <- function(prefix_case) { # Creates the context, filling it with privacy parameters # Arg: diff --git a/tests/gen_counts.R b/tests/gen_counts.R index 769677c4..3602fd57 100755 --- a/tests/gen_counts.R +++ b/tests/gen_counts.R @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -source('analysis/R/read_input.R') +library(rappor) RandomPartition <- function(total, weights) { # Outputs a random partition according to a specified distribution diff --git a/tests/gen_true_values_test.R b/tests/gen_true_values_test.R index e46d1e2b..894affed 100755 --- a/tests/gen_true_values_test.R +++ b/tests/gen_true_values_test.R @@ -2,7 +2,7 @@ # # gen_reports_test.R -source('analysis/R/util.R') # Log() +library(rappor) source('tests/gen_true_values.R') # module under test