Question

"object '..sample_identity' not found " error in permutation_test with scProportionTest

0

Entering edit mode

2.6 years ago

camila_fullio • 0

Hi everyone,

I'm new here and I'm also quite new in the world of scRNA seq analysis so I apologize for the mistakes in format I will probably make.

I am analyzing some scRNA seq data, I have a dataset with 2 control animals and 2 mutants. I want to analyze if there is a significant difference in the proportion of cells between these two groups. I found a post correct way of analyzing cell proportions in singlecell data where rpolicastro presents a very useful R library he created. However, I tried to use it but I get this error and I don't know what else to try:

Error in `[.data.frame`(meta.data, get(sample_identity) %in% c(sample_1,  : object '..sample_identity' not found

And this is the traceback:

3.`[.data.frame`(meta.data, get(sample_identity) %in% c(sample_1, 
sample_2), c(..sample_identity, ..cluster_identity)) 

2.meta.data[get(sample_identity) %in% c(sample_1, sample_2), c(..sample_identity, 
..cluster_identity)] 

1. permutation_test(clustered.by.ref, cluster_identity = active.ident, 
sample_1 = starts_with("C"), sample_2 = starts_with("k"))

sample_identity is one of the arguments of the function so I don't understand why R cannot find it.

Thank you for your help!

scProportionTest r scRNAseq Seurat • 782 views

ADD COMMENT • link 2.5 years ago by camila_fullio • 0

0

Entering edit mode

Could you add your code?

ADD REPLY • link 2.6 years ago by ATpoint 82k

0

Entering edit mode

I used the following functions:

setClass(
  "sc_utils",
  representation(
    meta_data = "data.table",
    results = "list"
  ),
  prototype(
    meta_data = data.frame(),
    results = list()
  )
)


sc_utils <- function(seurat_object) {

  ## Pull meta-data out of Seurat object.
  metadata <- as.data.table(
    seurat_object[[]],
    keep.rownames = "cell_id"
  )




  ## Create new sc_utils object.
  sc_utils_obj <- new(
    "sc_utils",
    meta_data = metadata
  )

  return(sc_utils_obj)
}

sc_utils(clustered.by.ref)



permutation_test <- function(
  sc_utils_obj,
  cluster_identity = NA,
  sample_1 = NA,
  sample_2 = NA,
  sample_identity = "orig.ident",
  n_permutations = 1000
) {

  ## Prepare data.
  meta.data <- copy(sc_utils_obj@meta.data)

  meta.data <- meta.data[
    get(sample_identity) %in% c(sample_1, sample_2),
    c(..sample_identity, ..cluster_identity)
  ]

  setnames(
    meta.data,
    old = c(sample_identity, cluster_identity),
    new = c("condition", "clusters")
  )

  meta.data[, clusters := as.character(clusters)]
  cluster_cases <- unique(meta.data[["clusters"]])

  ## Get observed differences in fraction.
  obs_diff <- meta.data[, .(count = .N), by = .(samples, clusters)]
  obs_diff <- obs_diff[
    CJ(samples = samples, clusters = cluster_cases, unique = TRUE),
    on = .(samples, clusters)
  ][
    is.na(count), count := 0
  ][]
  obs_diff[, fraction := count / sum(count), by = samples]
  obs_diff <- dcast(obs_diff, clusters ~ samples, value.var = "fraction")
  obs_diff[, obs_log2FD := log2(get(sample_2)) - log2(get(sample_1))]

  ## Permutation test.
  perm_results <- matrix(NA, nrow(obs_diff), n_permutations)
  rownames(perm_results) <- sort(cluster_cases)

  for (i in seq_len(n_permutations)) {
    permuted <- copy(meta.data)
    permuted[["samples"]] <- sample(permuted[["samples"]])
    permuted <- permuted[, .(count = .N), by = .(samples, clusters)]
    permuted <- permuted[
      CJ(samples = samples, clusters = cluster_cases, unique = TRUE),
      on = .(samples, clusters)
    ][
      is.na(count), count := 0
    ][]
    permuted[, fraction := count / sum(count), by = samples]
    permuted <- dcast(permuted, clusters ~ samples, value.var = "fraction")
    permuted[, perm_log2FD := log2(get(sample_2)) - log2(get(sample_1))]

    perm_results[, i] <- permuted[["perm_log2FD"]]
  }

  increased <- rowSums(apply(perm_results, 2, function(x) obs_diff[["obs_log2FD"]] <= x))
  increased <- (increased + 1) / (n_permutations + 1)

  decreased <- rowSums(apply(perm_results, 2, function(x) obs_diff[["obs_log2FD"]] >= x))
  decreased <- (decreased + 1) / (n_permutations + 1)

  obs_diff[, pval := ifelse(obs_log2FD > 0, increased[.I], decreased[.I])]
  obs_diff[, FDR := p.adjust(pval, "fdr")]

  ## Boostrap log2FD CI.

  boot_results <- matrix(NA, nrow(obs_diff), n_permutations)
  rownames(boot_results) <- sort(cluster_cases)

  for (i in seq_len(n_permutations)) {
    booted <- copy(meta.data)
    booted[, clusters := sample(clusters, replace = TRUE), by = samples]
    booted <- booted[, .(count = .N), by = .(samples, clusters)]
    booted <- booted[
      CJ(samples = samples, clusters = cluster_cases, unique = TRUE),
      on = .(samples, clusters)
    ][
      is.na(count), count := 0
    ][]
    booted[, fraction := count / sum(count), by = samples]
    booted <- dcast(booted, clusters ~ samples, value.var = "fraction")
    booted[, boot_log2FD := log2(get(sample_2)) - log2(get(sample_1))]

    boot_results[, i] <- booted[["boot_log2FD"]]
  }

  boot_results[!is.finite(boot_results)] <- NA
  boot_mean <- rowMeans(boot_results, na.rm = TRUE)
  boot_ci <- t(apply(boot_results, 1, function(x) quantile(x, probs = c(0.025, 0.975), na.rm = TRUE)))
  boot_ci <- as.data.table(boot_ci)
  setnames(boot_ci, old = c(1, 2), new = c("boot_CI_2.5", "boot_CI_97.5"))

  obs_diff[, boot_mean_log2FD := boot_mean]
  obs_diff <- cbind(obs_diff, boot_ci)

  ## Store results and return object.
  sc_utils_obj@results$permutation <- obs_diff
  return(sc_utils_obj)
}

permutation <- permutation_test(clustered.by.ref, cluster_identity = active.ident,
                 sample_1 = starts_with("C"), sample_2 = starts_with("k"))

clustered.by.ref is a Seurat object with 4 samples, two are KOs (K376-3 and K376-7) and two controls (C376-5 and C376-6). The problem does not seem to be the "sample_1" and "sample_2" arguments as I tried using just two of the samples with the correct names and I still get the same error.

Thanks!!

ADD REPLY • link 2.5 years ago by camila_fullio • 0