diff --git a/docs/UNDERSTAND_REPO_AND_GSOC_TESTS.md b/docs/UNDERSTAND_REPO_AND_GSOC_TESTS.md new file mode 100644 index 0000000..9f04704 --- /dev/null +++ b/docs/UNDERSTAND_REPO_AND_GSOC_TESTS.md @@ -0,0 +1,80 @@ +# changepoint repo quick guide + +This repository is the CRAN `changepoint` package. It provides exact and approximate changepoint detection algorithms with R front-end APIs and C back-end compute kernels. + +## 1) How this repo is organized + +- `R/`: + - User entry points: `cpt.mean()`, `cpt.var()`, `cpt.meanvar()` in `R/cpt.R`, and `cpt.reg()` in `R/CptReg.R`. + - Algorithm wrappers: `PELT`, `BINSEG`, `SEGNEIGH` wrappers call into C where needed. +- `src/`: + - Core high-performance implementations for PELT/BinSeg/regression changepoint routines. +- `tests/testthat/`: + - Regression tests for mean/variance/meanvar/regression behavior and plots/examples. +- `man/`: + - `.Rd` documentation, including package-level and function examples. + +## 2) Mental model for `changepoint` + +At a high level: + +1. You call a front-end function (for example `cpt.mean(data, method="PELT")`). +2. Input checks + penalty logic are applied in R. +3. The chosen method dispatches to method-specific R wrappers. +4. Wrappers call C implementations for speed where appropriate. +5. Results are wrapped into S4 classes (`cpt`, `cpt.reg`, `cpt.range`) with plotting/summary methods. + +## 3) Where to read first + +1. `DESCRIPTION` and `NAMESPACE` for package scope and exports. +2. `R/cpt.R` for top-level method/test-stat dispatch. +3. `R/CptReg.R` for regression-changepoint flow. +4. `R/PELT_one_func_minseglen.R` + `src/PELT_one_func_minseglen.c` to see R-to-C integration. +5. `tests/testthat/` for expected behavior and edge cases. + +## 4) Easy test from GSoC spec + +Use `inst/gsoc/easy_envcpt_ar_tests.R` in this repo. It: + +- simulates a series with changing AR structure, +- uses non-exported EnvCpt internals via `getFromNamespace("cpt.reg", "EnvCpt")`, +- fits AR1 and AR2 changepoint models (without calling `envcpt()`), +- saves a plot to `inst/gsoc/easy_envcpt_ar_plots.png`. + +Run: + +```bash +Rscript inst/gsoc/easy_envcpt_ar_tests.R +``` + +## 5) Medium test in this repo + +Added: `tests/testthat/test-cptreg-edgecases.R` + +Coverage-focused edge cases include: +- invalid method handling, +- invalid `minseglen` handling, +- warning path when `minseglen` is auto-bumped, +- multi-dataset array input path, +- unsupported `CROPS` path in `cpt.reg`. + +Run only this file quickly: + +```bash +R -q -e "library(changepoint); testthat::test_file('tests/testthat/test-cptreg-edgecases.R')" +``` + +Note: full local `testthat::test_local('.')` may fail on systems missing local Fortran toolchain libs due native package compilation requirements. + +## 6) Hard test package scaffold + +A separate package scaffold is created at: + +- `/Users/kaikaizhang/Documents/research/gsoc-envcpt-ar-wrapper` + +It contains: +- a wrapper function for AR1/AR2 with optional trend, +- input validation, +- unit tests, +- GitHub Actions `R-CMD-check`, +- covr + codecov workflow. diff --git a/inst/gsoc/SUBMISSION_RESULTS.md b/inst/gsoc/SUBMISSION_RESULTS.md new file mode 100644 index 0000000..a5fba47 --- /dev/null +++ b/inst/gsoc/SUBMISSION_RESULTS.md @@ -0,0 +1,55 @@ +# GSoC test results + +## Easy + +- Script: `inst/gsoc/easy_envcpt_ar_tests.R` +- Plot output: `inst/gsoc/easy_envcpt_ar_plots.png` +- Reproducible command: + - `Rscript inst/gsoc/easy_envcpt_ar_tests.R` +- Latest local result: + - AR1 changepoints: `200` + - AR2 changepoints: `199` +- GitHub artifacts: + - Script: `https://github.com/KaiKz/changepoint/blob/gsoc-tests-deliverables/inst/gsoc/easy_envcpt_ar_tests.R` + - Plot: `https://github.com/KaiKz/changepoint/blob/gsoc-tests-deliverables/inst/gsoc/easy_envcpt_ar_plots.png` + +## Medium + +- Added tests: + - `tests/testthat/test-cptreg-edgecases.R` +- Reproducible command: + - `R -q -e "library(changepoint); testthat::test_file('tests/testthat/test-cptreg-edgecases.R')"` +- Latest local result: + - test file passes locally +- GitHub artifacts: + - Test file: `https://github.com/KaiKz/changepoint/blob/gsoc-tests-deliverables/tests/testthat/test-cptreg-edgecases.R` + - Commit: `https://github.com/KaiKz/changepoint/commit/d92ae02` + - PR: `https://github.com/KaiKz/changepoint/pull/new/gsoc-tests-deliverables` + +## Hard + +- Package scaffold path: + - `/Users/kaikaizhang/Documents/research/gsoc-envcpt-ar-wrapper` +- Key function: + - `R/fit_ar_changepoint.R` +- Reproducible commands: + - `R -q -e "testthat::test_local('.', reporter='summary')"` + - `R -q -e "cov <- covr::package_coverage(path='.'); covr::percent_coverage(cov)"` +- Latest local result: + - tests pass + - coverage: `85.71%` +- GitHub artifacts: + - Repository: `https://github.com/KaiKz/gsoc-envcpt-ar-wrapper` + - Main function: `https://github.com/KaiKz/gsoc-envcpt-ar-wrapper/blob/main/R/fit_ar_changepoint.R` + - Commit: `https://github.com/KaiKz/gsoc-envcpt-ar-wrapper/commit/717a815` + - CI workflow: `https://github.com/KaiKz/gsoc-envcpt-ar-wrapper/actions/workflows/R-CMD-check.yaml` + - Coverage workflow: `https://github.com/KaiKz/gsoc-envcpt-ar-wrapper/actions/workflows/coverage.yaml` + +## Submission links + +- Medium fork URL: `https://github.com/KaiKz/changepoint` +- Medium commit URL: `https://github.com/KaiKz/changepoint/commit/d92ae02` +- Medium PR URL: `https://github.com/KaiKz/changepoint/pull/new/gsoc-tests-deliverables` +- Hard package repository URL: `https://github.com/KaiKz/gsoc-envcpt-ar-wrapper` +- Hard CI URL: `https://github.com/KaiKz/gsoc-envcpt-ar-wrapper/actions/workflows/R-CMD-check.yaml` +- Hard coverage URL: `https://github.com/KaiKz/gsoc-envcpt-ar-wrapper/actions/workflows/coverage.yaml` diff --git a/inst/gsoc/easy_envcpt_ar_plots.png b/inst/gsoc/easy_envcpt_ar_plots.png new file mode 100644 index 0000000..953603f Binary files /dev/null and b/inst/gsoc/easy_envcpt_ar_plots.png differ diff --git a/inst/gsoc/easy_envcpt_ar_tests.R b/inst/gsoc/easy_envcpt_ar_tests.R new file mode 100644 index 0000000..72daac2 --- /dev/null +++ b/inst/gsoc/easy_envcpt_ar_tests.R @@ -0,0 +1,84 @@ +set.seed(20260331) + +suppressPackageStartupMessages({ + library(EnvCpt) +}) + +# Generate piecewise AR(1) and AR(2) signals with changepoints in dependence. +generate_piecewise_ar <- function(n_per_segment = 200) { + seg1 <- as.numeric(stats::arima.sim( + n = n_per_segment, + model = list(ar = 0.2), + sd = 1 + )) + seg2 <- as.numeric(stats::arima.sim( + n = n_per_segment, + model = list(ar = 0.75), + sd = 1 + )) + seg3 <- as.numeric(stats::arima.sim( + n = n_per_segment, + model = list(ar = c(0.6, -0.25)), + sd = 1 + )) + c(seg1, seg2, seg3) +} + +# Use EnvCpt non-exported function, as required by the GSoC easy test. +fit_envcpt_nonexported_ar <- function(x, order = c(1L, 2L), with_trend = FALSE, minseglen = 20L) { + order <- as.integer(order[1]) + stopifnot(is.numeric(x), length(x) > 100) + stopifnot(order %in% c(1L, 2L)) + stopifnot(is.logical(with_trend), length(with_trend) == 1L) + + cpt_reg <- getFromNamespace("cpt.reg", "EnvCpt") + n <- length(x) + + if (order == 1L) { + if (!with_trend) { + design <- cbind(x[-1], rep(1, n - 1), x[-n]) + } else { + design <- cbind(x[-1], rep(1, n - 1), seq_len(n - 1), x[-n]) + } + } else { + if (!with_trend) { + design <- cbind(x[-c(1, 2)], rep(1, n - 2), x[2:(n - 1)], x[1:(n - 2)]) + } else { + design <- cbind( + x[-c(1, 2)], + rep(1, n - 2), + seq_len(n - 2), + x[2:(n - 1)], + x[1:(n - 2)] + ) + } + } + + cpt_reg( + data = design, + method = "PELT", + minseglen = minseglen, + dist = "Normal", + class = TRUE, + param.estimates = TRUE + ) +} + +series <- generate_piecewise_ar() + +fit_ar1 <- fit_envcpt_nonexported_ar(series, order = 1L, with_trend = FALSE) +fit_ar2 <- fit_envcpt_nonexported_ar(series, order = 2L, with_trend = FALSE) + +png("inst/gsoc/easy_envcpt_ar_plots.png", width = 1600, height = 700, res = 130) +par(mfrow = c(1, 2)) +plot(fit_ar1, main = "EnvCpt non-exported AR1 changepoints") +plot(fit_ar2, main = "EnvCpt non-exported AR2 changepoints") +dev.off() + +cat("AR1 changepoints:\n") +print(cpts(fit_ar1)) +cat("AR2 changepoints:\n") +print(cpts(fit_ar2)) + +cat("\nSession info:\n") +print(sessionInfo()) diff --git a/tests/testthat/test-cptreg-edgecases.R b/tests/testthat/test-cptreg-edgecases.R new file mode 100644 index 0000000..b511537 --- /dev/null +++ b/tests/testthat/test-cptreg-edgecases.R @@ -0,0 +1,54 @@ +context("cpt.reg edge-case tests") + +set.seed(42) +n <- 120 +x1 <- rnorm(n) +x2 <- rnorm(n) +y <- c(rnorm(n / 2, mean = 0), rnorm(n / 2, mean = 2)) +reg_data <- cbind(y, 1, x1, x2) + +test_that("unsupported method is rejected", { + expect_error( + cpt.reg(reg_data, method = "BinSeg"), + "Invalid method, must be AMOC or PELT" + ) +}) + +test_that("invalid minseglen values are rejected", { + expect_error( + cpt.reg(reg_data, minseglen = 0), + "must be positive integer" + ) + expect_error( + cpt.reg(reg_data, minseglen = 2.5), + "must be positive integer" + ) +}) + +test_that("small minseglen is bumped to number of columns", { + expect_warning( + fit <- cpt.reg(reg_data, method = "AMOC", minseglen = 1), + "minseglen is too small" + ) + expect_s4_class(fit, "cpt.reg") +}) + +test_that("multiple datasets return list of cpt.reg", { + reg_data_2 <- reg_data + reg_data_2[, 1] <- reg_data_2[, 1] + rnorm(nrow(reg_data_2), sd = 0.05) + arr <- array(NA_real_, dim = c(2, nrow(reg_data), ncol(reg_data))) + arr[1, , ] <- reg_data + arr[2, , ] <- reg_data_2 + out <- cpt.reg(arr, method = "AMOC", minseglen = 5) + expect_type(out, "list") + expect_equal(length(out), 2) + expect_s4_class(out[[1]], "cpt.reg") + expect_s4_class(out[[2]], "cpt.reg") +}) + +test_that("CROPS penalty remains unsupported for cpt.reg", { + expect_error( + cpt.reg(reg_data, penalty = "CROPS", pen.value = c(2, 4)), + "CROPS has not yet been implemented for cpt.reg" + ) +})