Skip to content

Error: grouping factors must have > 1 sampled level #187

@andrewsong777

Description

@andrewsong777

I am trying to write code, but I am getting this error. I have attached a csv of my data as well.

find_de_MAST_RE <- function(adata_){
    # create a MAST object
    sca <- SceToSingleCellAssay(adata_, class = "SingleCellAssay")
    print("Dimensions before subsetting:")
    print(dim(sca))
    print("")
    # keep genes that are expressed in more than 10% of all cells
    sca <- sca[freq(sca)>0.1,]
    print("Dimensions after subsetting:")
    print(dim(sca))
    print("")
    # add a column to the data which contains scaled number of genes that are expressed in each cell
    cdr2 <- colSums(assay(sca)>0)
    colData(sca)$ngeneson <- scale(cdr2)
    # store the columns that we are interested in as factors
    label <- factor(colData(sca)$condition)
    # set the reference level
    label <- relevel(label,"control")
    colData(sca)$label <- label
    #celltype <- factor(colData(sca)$auto_manual_merged_annotation)
    #colData(sca)$celltype <- celltype
    # same for donors (which we need to model random effects)
    replicate <- factor(colData(sca)$source)
    colData(sca)$replicate <- replicate
    # create a group per condition-celltype combination
    colData(sca)$group <- paste0(colData(adata_)$condition, ".", colData(adata_)$auto_manual_merged_annotation)
    colData(sca)$group <- factor(colData(sca)$group)
    
    #selected_cols <- c("group", "ngeneson", "replicate")
    #selected_data <- colData(sca)[, selected_cols]
    #print(selected_data)
    #write.csv(selected_data, file = "/project/data/nb24/Control_Dataset_Analysis/test.csv", row.names = FALSE)

    print(levels(colData(sca)$replicate))
    #print(table(colData(sca)$group, colData(sca)$ngeneson))

    # define and fit the model
    zlmCond <- zlm(formula =~ ngeneson + group + (1 | replicate), 
                   sca=sca, 
                   method='glmer', 
                   ebayes=F, 
                   strictConvergence=F,
                   fitArgsD=list(nAGQ = 0)) # to speed up calculations
    print("done")

    # perform likelihood-ratio test for the condition that we are interested in    
    summaryCond <- summary(zlmCond, doLRT='neuroblastoma.Bridge')
    # get the table with log-fold changes and p-values
    summaryDt <- summaryCond$datatable
    result <- merge(summaryDt[contrast=='neuroblastoma.Bridge' & component=='H',.(primerid, `Pr(>Chisq)`)], # p-values
                     summaryDt[contrast=='neuroblastoma.Bridge' & component=='logFC', .(primerid, coef)],
                     by='primerid') # logFC coefficients
    # MAST uses natural logarithm so we convert the coefficients to log2 base to be comparable to edgeR
    result[,coef:=result[,coef]/log(2)]
    # do multiple testing correction
    result[,FDR:=p.adjust(`Pr(>Chisq)`, 'fdr')]
    result = result[result$FDR<0.01,, drop=F]

    result <- stats::na.omit(as.data.frame(result))
    return(result)
}

test.csv

My error message: Error: grouping factors must have > 1 sampled level

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions