diff --git a/README.md b/README.md
index 5da46e4..a81e9e9 100644
--- a/README.md
+++ b/README.md
@@ -44,46 +44,78 @@ bash build.sh
 
 ## Usage
 
-`yamet` processes (covered CpG) DNA methylation report(s) (`-cell` argument), a reference file listing all CpG positions in a genome (`--reference`), and a bedfile specifying the genomic regions to calculate scores for (`--intervals`; e.g. promoters, genes, etc). Full CLI args:
+`yamet` processes (covered CpG) DNA methylation report(s), a reference file listing all CpG positions in a genome, and a bedfile specifying the genomic regions to calculate scores for (e.g. promoters, genes, etc). Full CLI args:
 
 ```text
-Usage:
-  yamet (-c <cell>... | <cell>...) \
-        -r <reference> \
-        -i <intervals> \
-        [OPTIONS]
-
-Required inputs:
-  -c --cell <cell>...                 One or more tab-separated methylation files,
-                                      OR provide them directly as positional arguments.
-  <cell>...                           (Positional alternative) Cell files (same format as above).
-  -r --reference <reference>          Reference file, tab-separated and sorted by chromosome/position.
-  -i --intervals <intervals>          BED file of intervals of interest.
-
-Optional output:
-  -d --det-out <file>                 Path to detailed output file.
-  -m --meth-out <file>                Path to average methylation output file.
-  -o --out <file>                     Path to simple output file.
-
-Resource options:
-  --cores <n>                         Number of cores for parallel parsing
-                                      [default: 0, implying program decides].
-  --chunk-size <size>                 Buffer size per file (e.g., 64K, 128M, 2G) [default: 64K].
-
-Optional input control:
-  --skip-header[=<n>]                 Skip <n> lines in all input files [default: 1].
-  --skip-header-cell[=<n>]            Skip <n> lines in cell files (overrides --skip-header).
-  --skip-header-reference[=<n>]       Skip <n> lines in reference file (overrides --skip-header).
-  --skip-header-intervals[=<n>]       Skip <n> lines in intervals file (overrides --skip-header).
-
-Verbose/debugging:
-  --print-intervals                   Print parsed intervals file.
-  --print-reference                   Print parsed reference file.
-  --print-sampens[=<true|false>]      Print computed sample entropies [default: true].
-
-Miscellaneous:
-  -h --help                           Show help message.
-  --version                           Show version information.
+yamet
+
+input:
+  -c [ --cytosine_report ] arg          Per-cell cytosine report file(s) 
+                                        (Bismark-like for covered cytosines). 
+                                        Synonyms: --cytosine_report, --cell, 
+                                        -c.
+                                        Tab-separated, sorted by chromosome and
+                                        position:
+                                          chr   pos   meth_reads   total_reads 
+                                          rate
+  -r [ --cytosine_locations ] arg       Genomic locations of all cytosines 
+                                        (typically CpGs). Synonyms: 
+                                        --cytosine_locations, --reference, -r.
+                                        Required to reconstruct contiguous CpG 
+                                        sequences.
+                                        Columns:
+                                          chr   pos
+  -i [ --regions ] arg                  BED file defining genomic regions where
+                                        entropies will be computed. Synonyms: 
+                                        --regions, --features, --target, 
+                                        --intervals, -i.
+                                        Columns:
+                                          chr   start   end
+  --skip-header-all [=arg(=1)]          Header lines to skip in all input files
+                                        (default: 0). Synonyms: 
+                                        --skip-header-all, --skip-header.
+  --skip-header-cytosine_report [=arg(=1)]
+                                        Header lines to skip in 
+                                        cytosine_report/cell files (default: 
+                                        0).
+  --skip-header-cytosine_locations [=arg(=1)]
+                                        Header lines to skip in 
+                                        cytosine_locations/reference file 
+                                        (default: 0).
+  --skip-header-regions [=arg(=1)]      Header lines to skip in 
+                                        regions/features/target/intervals file 
+                                        (default: 0).
+
+output:
+  -d [ --det-out ] arg                  (optional) path to detailed output file
+  -n [ --norm-det-out ] arg             (optional) path to detailed normalized 
+                                        output file
+  -m [ --meth-out ] arg                 (optional) path to average methylation 
+                                        output file
+  --all-meth [=arg(=true)] (=false)     If true, include all CpGs in 
+                                        methylation summaries, including those 
+                                        not used for template construction 
+                                        (default: false).
+  -o [ --out ] arg                      (optional) path to simple output file
+
+resource utilisation:
+  --cores arg (=0)                      number of cores used for simultaneously
+                                        parsing methylation files
+  --chunk-size arg (=64K)               size of the buffer (per file) used for 
+                                        reading data. Can be specified as a 
+                                        positive integer (bytes) or with a 
+                                        suffix: B, K, M, G.
+
+verbose:
+  --print-intervals                     print parsed intervals file
+  --print-reference                     print parsed reference file
+  --print-sampens [=arg(=true)] (=true) print computed sample entropies
+
+misc:
+  -h [ --help ]                         print help message
+  --version                             current version information
+
+
 ```
 
 ## Repository
diff --git a/workflow/Snakefile b/workflow/Snakefile
index 832011d..311a818 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -34,7 +34,7 @@ rule all:
         # op.join("annotation", "mm10", "done.flag"),  ## this to download regions for Ecker's
         # op.join("data", "crc", "download.flag"),  # this to download CRC cytosine reports, caution will redownload them
         # op.join("data", "crc", "results",  "crc.html") , ## this requires the `download.flag` being present (and the cytosine reports; dependencies are not well captured)
-        # op.join(op.expanduser("~"), ".local", "yamet", "bin", "yamet")
+        op.join(op.expanduser("~"), ".local", "yamet", "bin", "yamet"),
         # op.join('scnmt_gastrulation_data', 'downloaded_data.tsv'), ## this is Argelaguet's gastrulation
         # expand(
         #     op.join(
@@ -58,12 +58,16 @@ rule all:
 
 rule install_yamet:
     """
-    Binary to goes to ~/.local/yamet; export path accordingly
+    Binary to goes to ~/.local/yamet; exported path accordingly
     """
     conda:
         "envs/yamet.yml"
     output:
         op.join(op.expanduser("~"), ".local", "yamet", "bin", "yamet")
+    priority:
+        100
+    threads:
+        workflow.cores
     log:
         op.join("logs", "build.log")
     shell:
diff --git a/workflow/rules/crc.smk b/workflow/rules/crc.smk
index 6b2d8a2..080327b 100644
--- a/workflow/rules/crc.smk
+++ b/workflow/rules/crc.smk
@@ -45,6 +45,8 @@ ANNOTATIONS = {
     "chip": ["H3K27me3", "H3K9me3", "H3K4me3"],
     "lad": ["laminb1"],
     "genes": ["genes"],
+    "lines": ["lines"],
+    "sines": ['sines'],
     "cpgIslandExt": ['cpgIslandExt'],
     "scna": ["crc01_nc_scna", "crc01_gain_scna", "crc01_lost_scna"]
 }
@@ -224,10 +226,12 @@ rule run_yamet_on_separate_features:
     output:
         simple_uncomp = temp(op.join(CRC_OUTPUT, "{subcat}_{cat}_{patient}_{location}.out")),
         det_uncomp = temp(op.join(CRC_OUTPUT, "{subcat}_{cat}_{patient}_{location}.det.out")),
+        norm_det_uncomp=temp(op.join(CRC_OUTPUT, "{subcat}_{cat}_{patient}_{location}.norm.det.out")),
         meth_uncomp = temp(op.join(CRC_OUTPUT, "{subcat}_{cat}_{patient}_{location}.meth.out")),
         simple=op.join(CRC_OUTPUT, "{subcat}_{cat}_{patient}_{location}.out.gz"),
         det=op.join(CRC_OUTPUT, "{subcat}_{cat}_{patient}_{location}.det.out.gz"),
-        meth=op.join(CRC_OUTPUT, "{subcat}_{cat}_{patient}_{location}.meth.out.gz")
+        meth=op.join(CRC_OUTPUT, "{subcat}_{cat}_{patient}_{location}.meth.out.gz"),
+        norm_det=op.join(CRC_OUTPUT, "{subcat}_{cat}_{patient}_{location}.norm.det.out.gz")
     log:
         op.join('logs', 'yamet_{subcat}_{cat}_{patient}_{location}.log')
     group:
@@ -246,7 +250,8 @@ rule run_yamet_on_separate_features:
          --print-sampens F \
          --out {output.simple_uncomp} \
          --det-out {output.det_uncomp} \
-         --meth-out {output.meth_uncomp} &> {log}
+         --meth-out {output.meth_uncomp} \
+         --norm-det-out {output.norm_det_uncomp} &> {log}
         
         gzip --keep -f {params.path}/{wildcards.subcat}_{wildcards.cat}_{wildcards.patient}_{wildcards.location}*out  &>> {log}
         """
@@ -267,6 +272,8 @@ rule render_crc_report:
         list_relevant_yamet_outputs()
     params:
         output_path=CRC_OUTPUT
+    threads:
+        round(workflow.cores/2)
     output:
         op.join(CRC, "results", "crc.html")
     log:
@@ -376,9 +383,11 @@ rule run_yamet_on_windows:
         simple_uncomp=temp(op.join(CRC_WINDOWS_OUTPUT, "{win_size}_{patient}_{location}.out")),
         det_uncomp=temp(op.join(CRC_WINDOWS_OUTPUT, "{win_size}_{patient}_{location}.det.out")),
         meth_uncomp=temp(op.join(CRC_WINDOWS_OUTPUT, "{win_size}_{patient}_{location}.meth.out")),
+        norm_det_uncomp=temp(op.join(CRC_WINDOWS_OUTPUT, "{win_size}_{patient}_{location}.norm.det.out")),        
         simple=op.join(CRC_WINDOWS_OUTPUT, "{win_size}_{patient}_{location}.out.gz"),
         det=op.join(CRC_WINDOWS_OUTPUT, "{win_size}_{patient}_{location}.det.out.gz"),
-        meth=op.join(CRC_WINDOWS_OUTPUT, "{win_size}_{patient}_{location}.meth.out.gz")
+        meth=op.join(CRC_WINDOWS_OUTPUT, "{win_size}_{patient}_{location}.meth.out.gz"),
+        norm_det=op.join(CRC_WINDOWS_OUTPUT, "{win_size}_{patient}_{location}.norm.det.out.gz")
     log:
         op.join('logs', 'yamet_{win_size}_{patient}_{location}.log')
     group:
@@ -397,7 +406,8 @@ rule run_yamet_on_windows:
          --print-sampens F \
          --out {output.simple_uncomp} \
          --det-out {output.det_uncomp} \
-         --meth-out {output.meth_uncomp} &> {log}
+         --meth-out {output.meth_uncomp} \
+         --norm-det-out {output.norm_det_uncomp} &> {log}
 
         gzip --keep -f {params.path}/{wildcards.win_size}_{wildcards.patient}_{wildcards.location}*out  &>> {log}
         """
@@ -416,9 +426,9 @@ rule get_scna_patient1_from_supplementary_data:
     conda:
         op.join("..", "envs", "r.yml")
     input:
-        op.join("src", "scna_hg19.xlsx"),
+        op.join(".", "src", "scna_hg19.xlsx"),
     output:
-        scna_bed = op.join(HG19_BASE, "patient_crc01_scna.scna.bed.gz")
+        scna_bed = op.join(HG19_BASE, "patient_crc01_scna.scna.bed.gz.pre")
     script:
         "src/parse_scna.R"
 
@@ -426,7 +436,7 @@ rule split_patient1_crc_in_kept_lost_gained:
     conda:
         op.join("..", "envs", "r.yml")
     input:
-        scna_bed = op.join(HG19_BASE, "patient_crc01_scna.scna.bed.gz"),
+        scna_bed = op.join(HG19_BASE, "patient_crc01_scna.scna.bed.gz.pre"),
         genome_sizes = op.join(HG19_BASE, "genome.sizes"),
     output:
         uncomp = temp(op.join(HG19_BASE, "crc01_scna.bed")),
diff --git a/workflow/rules/hg19.smk b/workflow/rules/hg19.smk
index 43b4800..85f36a0 100644
--- a/workflow/rules/hg19.smk
+++ b/workflow/rules/hg19.smk
@@ -43,6 +43,23 @@ rule build_genome_hg19_ref:
         "src/make_ref.sh"
 
 
+rule get_hg19_lines:
+    conda:
+        op.join("..", "envs", "processing.yml")
+    output:
+        op.join(HG19_BASE, "rmsk.lines.bed.gz"),
+    script:
+        "src/download_hg19_lines.sh"
+
+        
+rule get_hg19_sines:
+    conda:
+        op.join("..", "envs", "processing.yml")
+    output:
+        op.join(HG19_BASE, "rmsk.sines.bed.gz"),
+    script:
+        "src/download_hg19_sines.sh"
+        
 rule get_genes_hg19:
     conda:
         op.join("..", "envs", "processing.yml")
@@ -51,6 +68,16 @@ rule get_genes_hg19:
     script:
         "src/download_hg19_genes.sh"
 
+rule uncompress_repeats:
+    conda:
+        op.join("..", "envs", "processing.yml")
+    input:
+        op.join(HG19_BASE, "rmsk.{type}.bed.gz"),        
+    output:
+        op.join(HG19_BASE, "{type}.{type}.bed"),
+    shell:
+        "gunzip -c {input} > {output}"
+
 
 rule uncompress_hg19_genes:
     conda:
diff --git a/workflow/rules/mm10.smk b/workflow/rules/mm10.smk
index 2d730ed..7e09ec1 100644
--- a/workflow/rules/mm10.smk
+++ b/workflow/rules/mm10.smk
@@ -6,7 +6,7 @@ CHRS = [str(i) for i in range(1, 20)] + ["X", "Y"]
 MM10_BASE = "mm10"
 
 
-rule mm10_chr_ref:
+rule mm10_per_chr_ref:
     conda:
         op.join("..", "envs", "processing.yml")
     output:
@@ -18,7 +18,7 @@ rule mm10_chr_ref:
         "src/get_chr_ref.sh"
 
 
-rule mm10_ref:
+rule mm10_aggregate_ref:
     input:
         expand(op.join(MM10_BASE, "{chr}.{{meth_pat}}.ref"), chr=CHRS),
     params:
@@ -46,7 +46,24 @@ rule get_mm10_genes:
     script:
         "src/download_mm10_genes.sh"
 
+rule get_mm10_lines:
+    conda:
+        op.join("..", "envs", "processing.yml")
+    output:
+        op.join(MM10_BASE, "lines.bed.gz"),
+    script:
+        "src/download_hg19_lines.sh"
+
+        
+rule get_mm10_sines:
+    conda:
+        op.join("..", "envs", "processing.yml")
+    output:
+        op.join(MM10_BASE, "sines.bed.gz"),
+    script:
+        "src/download_mm10_sines.sh"
 
+        
 ENCODE_MAP = {
     "h3k4me3": "ENCFF160SCR",
     "h3k9me3": "ENCFF658QTP",
diff --git a/workflow/rules/src/crc.Rmd b/workflow/rules/src/crc.Rmd
index 4344c1d..2f3b503 100644
--- a/workflow/rules/src/crc.Rmd
+++ b/workflow/rules/src/crc.Rmd
@@ -49,6 +49,7 @@ opts_chunk$set(
 
 ```{r save_snmk_object_for_debugging, include = FALSE}
 saveRDS(file = 'snmk_object_features.rds', object = snakemake)
+## snakemake <- readRDS('../../snmk_object_features.rds')
 ```
 
 
@@ -58,11 +59,9 @@ sink(log)
 sink(log, type = "message")
 ```
 
-# Short (global) reports by annotation  {.tabset .tabset-pills}
 
-## Modelling
 
-Firstly, we read in the yamet output files and create a table storing the biopsy location, patient id and genomic segmentation.
+These are not det.out.gz, but only out.gz files.
 
 ```{r, import_short_reports}
 filepaths <- list.files(
@@ -71,9 +70,9 @@ filepaths <- list.files(
 )
 
 filepaths <- filepaths[!grepl("\\.det\\.out\\.gz$", filepaths)]
-filepaths <- filepaths[!grepl("\\.norm\\.out\\.gz$", filepaths)]
 filepaths <- filepaths[!grepl("\\.meth\\.out\\.gz$", filepaths)]
 
+
 regex <- "(.*)_(.*)_(.*)_(.*).out.gz"
 process_file <- function(filepath) {
   filename <- basename(filepath)
@@ -98,17 +97,6 @@ files$location <- factor(
 )
 ```
 
-Patient as random effect
-
-```{r, lmer}
-lmer.files <- lmer(
-  sampen ~ location * avg_meth + (1 | patient),
-  data = subset(files, annotation == "pmds")
-)
-summary(lmer.files)
-anova(lmer.files, type = 3)
-```
-
 ## Scatters by location and annotation
  
 ```{r}
@@ -262,169 +250,360 @@ ggplot(ranges, aes(color = location)) +
 
 ```
 
-## Others
+# After normalization - same plots (the yamet-way)
 
-```{r, fig.height = 10, fig.width = 10}
+That is, not by locus but by cell norm sampEns (that are calculated according to some regionset anyway)
 
-ggplot(ranges, aes(x = compartment, y = location, fill = sampen_mean)) +
-  geom_tile(color = "white") +
-  scale_fill_viridis_c() +
-  theme_minimal() +
-  theme(axis.text.x = element_text(angle = 90))
+```{r, import_short_reports_norm}
+filepaths <- list.files(
+  snakemake@params[["output_path"]],
+  pattern = "out\\.gz$", full.names = TRUE)
 
 
-ggplot(ranges, aes(x = compartment, y = location, fill = meth_mean)) +
-  geom_tile(color = "white") +
-  scale_fill_viridis_c() +
-  theme_minimal() +
-  theme(axis.text.x = element_text(angle = 90))
+filepaths <- filepaths[!grepl("\\.det\\.out\\.gz$", filepaths)]
+filepaths <- filepaths[!grepl("\\.norm\\.out\\.gz$", filepaths)]
+filepaths <- filepaths[!grepl("\\.meth\\.out\\.gz$", filepaths)]
+
+head(filepaths)
+regex <- "(.*)_(.*)_(.*)_(.*).out.gz"
+process_file <- function(filepath) {
+  filename <- basename(filepath)
+
+  dt <- read_tsv(filepath, na = "-1", show_col_types = FALSE)
+  dt <- dt[, c("sampen_norm", "avg_meth")]
+  colnames(dt) <- c('sampen', 'avg_meth') ## caution they're normalized though
+
+  dt$annotation <- gsub(regex, "\\1", filename)
+  dt$patient <- gsub(regex, "\\3", filename)
+  dt$location <- gsub(regex, "\\4", filename)
+
+  return(dt)
+}
+
+head(filepaths)
+
+files <- do.call(rbind, bplapply(filepaths, process_file, BPPARAM = param))
+
+files$annotation <- as.factor(files$annotation)
+files$patient <- as.factor(files$patient)
+files$location <- factor(
+  files$location,
+  levels = c("NC", "PT", "LN", "ML", "MP", "MO")
+)
+```
+
+## Scatters by location and annotation
+ 
+```{r}
+ggplot(
+  ## subset(files, patient == "CRC01"),
+  files,
+  aes(y = sampen, x = avg_meth, color = location, shape = patient)
+) +
+  geom_point(size = 0.9, alpha = 0.2) +
+  scale_color_brewer(palette = "Set1") +
+  scale_shape_manual(values = 1:nlevels(files$patient)) +
+  facet_wrap(~annotation) +
+  labs(
+    x = "average methylation",
+    y = "norm sample entropy"
+  ) +
+  guides(color = guide_legend(override.aes = list(size = 2))) +
+  theme_ng()
+```
 
+## Scatters by location and annotation and patient
+ 
+```{r, fig.height = 15}
+ggplot(
+  ## subset(files, patient == "CRC01"),
+  files,
+  aes(y = sampen, x = avg_meth, color = location)
+) +
+  geom_point(size = 0.9, alpha = 0.2) +
+  scale_color_brewer(palette = "Set1") +
+  facet_grid(annotation ~ patient) +
+  labs(
+    x = "average methylation",
+    y = "norm sample entropy"
+  ) +
+  guides(color = guide_legend(override.aes = list(size = 2))) +
+  theme_ng() +
+  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
 ```
 
-# Full (local) reports for **downsampled** PMDs and CRC01
+## Scatters by location and annotation and patient (shorter)
+ 
+```{r, fig.height = 3.2}
+ggplot(
+  subset(files, annotation %in% c("laminb1", "H3K27me3", "H3K9me3")),
+  aes(y = sampen, x = avg_meth, color = location)
+) +
+  geom_point(size = 0.9, alpha = 0.2) +
+  scale_color_brewer(palette = "Set1") +
+  facet_grid(annotation ~ patient) +
+  labs(
+    x = "average methylation",
+    y = "norm sample entropy"
+  ) +
+  guides(color = guide_legend(override.aes = list(size = 2))) +
+  theme_ng() +
+  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
+```
 
-Caution downsampled to some rows only.
+## sampEn boxes by location and annotation - crc01
 
-```{r, import_log_reports}
-DOWNSAMPLING <- 5e4
+```{r, fig.height = 6, fig.width = 10}
+ggplot(
+  subset(files, patient == "CRC01"),
+  aes(y = sampen, x = annotation, color = location)
+) +
+  geom_boxplot(
+    outlier.size = 0.6,
+    outlier.alpha = 0.3
+  ) +
+  labs(y = "norm sample entropy") +
+  geom_jitter(
+    position = position_dodge(width = 0.75),
+    size = 0.6, alpha = 0.3
+  ) +
+  scale_color_brewer(palette = "Dark2") +
+  theme_ng() +
+  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
+```
 
-annotation <- "pmds"
-patient <- "CRC01"
+## meth boxes by location and annotation - crc01
 
-local <- list()
-for (location in c("NC", "PT", "LN", "ML", "MP")) {
-  filepaths <- list.files(
-    snakemake@params[["output_path"]],
-    pattern = "\\.det\\.out\\.gz$", full.names = TRUE
-  )
-  filepaths <- grep(location, filepaths, value = TRUE)
-  filepaths <- grep(annotation, filepaths, value = TRUE)
-  filepaths <- grep(patient, filepaths, value = TRUE)
-
-  d <- bplapply(filepaths, \(filepath) {
-    dt <- read_tsv(
-      filepath,
-      na = "-1", n_max = DOWNSAMPLING, show_col_types = FALSE
-    )
-    colnames(dt) <- basename(colnames(dt))
-    dt
-  }, BPPARAM = param)
-
-  d2 <- bplapply(d, \(x) {
-    avg_sampen <- apply(
-      x[, grep(location, colnames(x))], 1, \(y) mean(y, na.rm = TRUE)
-    )
-    median_sampen <- apply(
-      x[, grep(location, colnames(x))], 1, \(y) median(y, na.rm = TRUE)
-    )
-    q25_sampen <- apply(
-      x[, grep(location, colnames(x))], 1, \(y) quantile(y, 0.25, na.rm = TRUE)
-    )
-    q75_sampen <- apply(
-      x[, grep(location, colnames(x))], 1, \(y) quantile(y, 0.75, na.rm = TRUE)
-    )
-    cbind(x, data.frame(
-      avg_sampen = avg_sampen, median_sampen = median_sampen,
-      q25_sampen = q25_sampen, q75_sampen = q75_sampen,
-      location = location,
-      patient = patient,
-      annotation = annotation
-    ))
-  }, BPPARAM = param)
-
-  local[[location]] <- d2[[1]]
-  rm(d, d2)
-}
+```{r, fig.height = 6, fig.width = 10}
+ggplot(
+  subset(files, patient == "CRC01"),
+  aes(y = avg_meth, x = annotation, color = location)
+) +
+  geom_boxplot(
+    outlier.size = 0.6,
+    outlier.alpha = 0.3
+  ) +
+  labs(y = "average methylation") +
+  geom_jitter(
+    position = position_dodge(width = 0.75),
+    size = 0.6, alpha = 0.3
+  ) +
+  scale_color_brewer(palette = "Dark2") +
+  theme_ng() +
+  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
 ```
 
-## Scatters {.tabset .tabset-pills}
-
-### By meth NC
-
-```{r, fig.width = 10, fig.height = 10}
-pal <- viridis(10, alpha = 0.8)
-
-beta2m <- function(betas, epsilon = 1e-05) {
-  if (!is.numeric(betas)) {
-    stop("Non-numeric betas")
-  }
-  if (!(is.numeric(epsilon) && (!is.na(epsilon)))) {
-    stop("Epison not valid")
-  }
-  if (epsilon < 0 || epsilon >= 0.5) {
-    stop("Not 0 < epsilon < 0.5")
-  }
-  betas[betas < epsilon] <- epsilon
-  betas[betas > (1 - epsilon)] <- 1 - epsilon
-  return(log2(betas / (1 - betas)))
-}
+## Range plots - by compartment
 
-pmds <- data.frame(
-  shannon_NC = local$NC$shannon,
-  meth_NC = local$NC$avg_meth,
-  med_sampen_NC = local$NC$median_sampen,
-  shannon_PT = local$PT$shannon,
-  meth_PT = local$PT$avg_meth,
-  med_sampen_PT = local$PT$median_sampen
-)
-pmds$meth_change <- pmds$meth_PT - pmds$meth_NC
-pmds <- na.omit(pmds)
+```{r, fig.height = 10, fig.width = 10}
 
-pmds$by_meth_nc <- cut(pmds$meth_NC, breaks = 10)
-pmds$by_meth_pt <- cut(pmds$meth_PT, breaks = 10)
 
-plot(pmds[, grep("by", colnames(pmds), invert = TRUE)],
-  pch = ".", col = pal[as.numeric(pmds$by_meth_nc)], main = "by_meth_nc"
-)
+ranges <- as.data.frame(files %>%
+   filter(patient == "CRC01") %>%
+   group_by(compartment = annotation, location) %>%
+   summarise(
+     sampen_mean = mean(sampen, na.rm = TRUE),
+     sampen_min  = min(sampen, na.rm = TRUE),
+     sampen_max  = max(sampen, na.rm = TRUE),
+     meth_mean   = mean(avg_meth, na.rm = TRUE),
+     meth_min    = min(avg_meth, na.rm = TRUE),
+     meth_max    = max(avg_meth, na.rm = TRUE)))
+
+## ggplot(ranges, aes(x = meth_mean, y = sampen_mean, color = location)) +
+##   geom_point(shape = 3, size = 1.2) +  # "+" symbol
+##   geom_errorbar(aes(ymin = sampen_min, ymax = sampen_max), width = 0.3, alpha = 1, size = 1) +
+##   geom_errorbarh(aes(xmin = meth_min, xmax = meth_max), height = 0.3, alpha = 1, size = 0) +
+##   scale_color_brewer(palette = "Dark2") +
+##   labs(
+##     x = "DNA methylation",
+##     y = "Sample entropy"
+##   ) +
+##   facet_wrap(~ compartment) +
+##   theme_ng()
+
+# I think this is the correct one
+ggplot(ranges, aes(color = location)) +
+  ## horizontal segment: range of methylations for that location/compartment
+  geom_segment(
+    aes(x = meth_min, xend = meth_max, y = sampen_mean, yend = sampen_mean),
+    size = 0.6, alpha = 1
+  ) +
+  # vertical segment: range of sampens for that location/compartment
+  geom_segment(
+    aes(x = meth_mean, xend = meth_mean, y = sampen_min, yend = sampen_max),
+    size = 0.6, alpha = 1
+  ) +
+  # center point
+  geom_point(
+    aes(x = meth_mean, y = sampen_mean),
+    shape = 3, size = 1.2
+  ) +
+  scale_color_brewer(palette = "Dark2") +
+  labs(x = "DNA methylation", y = "Normalized sample entropy") +
+  facet_wrap(~ compartment) +
+  theme_ng()
+
 ```
 
-### By median sampen PT
+# Unnormalized Shannon's MEDIANS DOWNSAMPLED {.tabset .tabset-pills}
 
+This cannot be done on short `out.gz` files, but on detailed ones only. Median values, not all loci included!
 
-```{r, fig.width = 10, fig.height = 10}
-pmds$by_med_sampen_pt <- cut(pmds$med_sampen_PT, breaks = 10)
-plot(pmds[, grep("by", colnames(pmds), invert = TRUE)],
-  pch = ".", col = pal[as.numeric(pmds$by_med_sampen_pt)],
-  main = "by_med_sampen_PT"
+```{r, eval = TRUE}
+filepaths <- list.files(
+  snakemake@params[["output_path"]],
+  pattern = "det\\.out\\.gz$", full.names = TRUE
 )
+
+filepaths <- filepaths[!grepl("norm", filepaths)]
+
+head(filepaths)
+
+## todo extract Shannon's here instead
+
+regex <- "(.*)_(.*)_(.*)_(.*).det.out.gz"
+
+process_file <- function(filepath) {
+  filename <- basename(filepath)
+
+  tmp <- read_tsv(filepath, na = "-1", show_col_types = FALSE, n_max = 10e4) ## caution downsampling
+
+  tibble(
+    file = filename,
+    median_shannon   = median(tmp$shannon,   na.rm = TRUE),
+    median_avg_meth  = median(tmp$avg_meth,  na.rm = TRUE),
+    annotation = gsub(regex, "\\1", filename),
+    patient    = gsub(regex, "\\3", filename),
+    location   = gsub(regex, "\\4", filename)
+  )
+}
+
+files <- bplapply(filepaths, process_file, BPPARAM = param) |> bind_rows()
+
+files$annotation <- factor(files$annotation)
+files$patient    <- factor(files$patient)
+files$location   <- factor(files$location,
+                           levels = c("NC", "PT", "LN", "ML", "MP", "MO"))
+
 ```
 
-### By median sampen NC
+## Scatters by location and annotation
+ 
+```{r}
+ggplot(
+  files,
+  aes(y = median_shannon, x = median_avg_meth, color = location, shape = patient)
+) +
+  geom_point(size = 0.9, alpha = 0.2) +
+  scale_color_brewer(palette = "Set1") +
+  scale_shape_manual(values = 1:nlevels(files$patient)) +
+  facet_wrap(~annotation) +
+  labs(
+    x = "median average methylation",
+    y = "median Shannon entropy"
+  ) +
+  guides(color = guide_legend(override.aes = list(size = 2))) +
+  theme_ng()
+```
 
-```{r, fig.width = 10, fig.height = 10, results = 'asis'}
-pmds$by_med_sampen_nc <- cut(pmds$med_sampen_NC, breaks = 10)
-plot(pmds[, grep("by", colnames(pmds), invert = TRUE)],
-  pch = ".", col = pal[as.numeric(pmds$by_med_sampen_nc)],
-  main = "by_med_sampen_nc"
-)
+## Scatters by location and annotation and patient
+ 
+```{r, fig.height = 15}
+ggplot(
+  files,
+  aes(y = median_shannon, x = median_avg_meth, color = location)
+) +
+  geom_point(size = 0.9, alpha = 0.2) +
+  ## scale_color_brewer(palette = "Set1") +
+  ## facet_grid(annotation ~ patient) +
+  labs(
+    x = "median average methylation",
+    y = "median Shannon entropy"
+  ) +
+  guides(color = guide_legend(override.aes = list(size = 2))) +
+  theme_ng() +
+  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
 ```
 
-### By methylation change
 
-```{r, fig.width = 10, fig.height = 10, results = 'asis'}
-pmds$by_meth_change <- cut(pmds$meth_change, breaks = 10)
-plot(pmds[, grep("by", colnames(pmds), invert = TRUE)],
-  pch = ".", col = pal[as.numeric(pmds$by_meth_change)],
-  main = "by_meth_change"
+
+# Meth-normalized Shannons (the yamet way) MEDIANS {.tabset .tabset-pills}
+
+This cannot be done on short `out.gz` files, but on detailed ones only. Median values, not all loci included!
+
+```{r, eval = TRUE}
+filepaths <- list.files(
+  snakemake@params[["output_path"]],
+  pattern = "norm\\.det\\.out\\.gz$", full.names = TRUE
 )
-```
 
-Add extra discretization.
+head(filepaths)
 
-```{r}
-pmds$by_sampen_pt <- cut(pmds$med_sampen_PT, breaks = 10)
-pmds$by_sampen_nc <- cut(pmds$med_sampen_NC, breaks = 10)
-pmds$by_shannon_pt <- cut(pmds$shannon_PT, breaks = 10)
-pmds$by_shannon_nc <- cut(pmds$shannon_NC, breaks = 10)
-```
+## todo extract Shannon's here instead
+
+regex <- "(.*)_(.*)_(.*)_(.*).norm.det.out.gz"
+
+process_file <- function(filepath) {
+  filename <- basename(filepath)
 
-# SCNA checks, by cytoband
+  tmp <- read_tsv(filepath, na = "-1", show_col_types = FALSE, n_max = 10e4) # caution downsampling
 
-Is this necessary? given we don't see much of a change https://github.com/imallona/yamet/blob/9156b817e04d9294024631dc25b528e191d913f4/workflow/rules/src/crc_deletions.Rmd#L96
+  tibble(
+    file = filename,
+    median_shannon   = median(tmp$shannon_norm,   na.rm = TRUE),
+    median_avg_meth  = median(tmp$avg_meth,  na.rm = TRUE),
+    annotation = gsub(regex, "\\1", filename),
+    patient    = gsub(regex, "\\3", filename),
+    location   = gsub(regex, "\\4", filename)
+  )
+}
 
+files <- bplapply(filepaths, process_file, BPPARAM = param) |> bind_rows()
 
+files$annotation <- factor(files$annotation)
+files$patient    <- factor(files$patient)
+files$location   <- factor(files$location,
+                           levels = c("NC", "PT", "LN", "ML", "MP", "MO"))
 
+```
 
+## Scatters by location and annotation
+ 
+```{r}
+ggplot(
+  files,
+  aes(y = median_shannon, x = median_avg_meth, color = location, shape = patient)
+) +
+  geom_point(size = 0.9, alpha = 0.2) +
+  scale_color_brewer(palette = "Set1") +
+  scale_shape_manual(values = 1:nlevels(files$patient)) +
+  facet_wrap(~annotation) +
+  labs(
+    x = "median average methylation",
+    y = "median norm Shannon entropy"
+  ) +
+  guides(color = guide_legend(override.aes = list(size = 2))) +
+  theme_ng()
+```
+
+## Scatters by location and annotation and patient
+ 
+```{r, fig.height = 15}
+ggplot(
+  files,
+  aes(y = median_shannon, x = median_avg_meth, color = location)
+) +
+  geom_point(size = 0.9, alpha = 0.2) +
+  ## scale_color_brewer(palette = "Set1") +
+  ## facet_grid(annotation ~ patient) +
+  labs(
+    x = "median average methylation",
+    y = "median norm Shannon entropy"
+  ) +
+  guides(color = guide_legend(override.aes = list(size = 2))) +
+  theme_ng() +
+  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
+```
 
 
 
diff --git a/workflow/rules/src/download_hg19_lines.sh b/workflow/rules/src/download_hg19_lines.sh
new file mode 100644
index 0000000..a418bc6
--- /dev/null
+++ b/workflow/rules/src/download_hg19_lines.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+##
+## Downloads RepeatMasker (rmsk) for hg19 and extracts only SINE elements
+##
+
+mysql --user=genome --host=genome-mysql.cse.ucsc.edu -N -s -e \
+  'SELECT genoName, genoStart, genoEnd, repName, repClass, repFamily
+     FROM hg19.rmsk
+     WHERE repClass = "LINE";' |
+  awk 'BEGIN{OFS="\t"} {print $1, $2, $3, $4, $5, $6}' |
+  bedtools merge -i - |
+  gzip -c > ${snakemake_output[0]}
diff --git a/workflow/rules/src/download_hg19_sines.sh b/workflow/rules/src/download_hg19_sines.sh
new file mode 100644
index 0000000..fb8689b
--- /dev/null
+++ b/workflow/rules/src/download_hg19_sines.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+##
+## Downloads RepeatMasker (rmsk) for hg19 and extracts only SINE elements
+##
+
+mysql --user=genome --host=genome-mysql.cse.ucsc.edu -N -s -e \
+  'SELECT genoName, genoStart, genoEnd, repName, repClass, repFamily
+     FROM hg19.rmsk
+     WHERE repClass = "SINE";' |
+  awk 'BEGIN{OFS="\t"} {print $1, $2, $3, $4, $5, $6}' |
+  bedtools merge -i - |
+  gzip -c > ${snakemake_output[0]}
diff --git a/workflow/rules/src/download_mm10_lines.sh b/workflow/rules/src/download_mm10_lines.sh
new file mode 100644
index 0000000..8ea0e27
--- /dev/null
+++ b/workflow/rules/src/download_mm10_lines.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+##
+## Downloads RepeatMasker (rmsk) for mm10 and extracts only SINE elements
+##
+
+mysql --user=genome --host=genome-mysql.cse.ucsc.edu -N -s -e \
+  'SELECT genoName, genoStart, genoEnd, repName, repClass, repFamily
+     FROM mm10.rmsk
+     WHERE repClass = "LINE";' |
+  awk 'BEGIN{OFS="\t"} {print $1, $2, $3, $4, $5, $6}' |
+  bedtools merge -i - |
+  gzip -c > ${snakemake_output[0]}
diff --git a/workflow/rules/src/download_mm10_sines.sh b/workflow/rules/src/download_mm10_sines.sh
new file mode 100644
index 0000000..c618543
--- /dev/null
+++ b/workflow/rules/src/download_mm10_sines.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+##
+## Downloads RepeatMasker (rmsk) for mm10 and extracts only SINE elements
+##
+
+mysql --user=genome --host=genome-mysql.cse.ucsc.edu -N -s -e \
+  'SELECT genoName, genoStart, genoEnd, repName, repClass, repFamily
+     FROM mm10.rmsk
+     WHERE repClass = "SINE";' |
+  awk 'BEGIN{OFS="\t"} {print $1, $2, $3, $4, $5, $6}' |
+  bedtools merge -i - |
+  gzip -c > ${snakemake_output[0]}