batch correction: cds from seurat
0
0
Entering edit mode
20 days ago
sooni ▴ 20

Hello.

I want to integrate the 7 Seurat objects and then convert them to cds objects to perform batch correction. First of all, this is the code I ran before changing to cds.

donor1 <- Read10X_h5("GSM5723843_Donor1_raw_feature_bc_matrix.h5")
donor2 <- Read10X_h5("GSM5723844_Donor2_raw_feature_bc_matrix.h5")
donor3 <- Read10X_h5("GSM5723845_Donor3_raw_feature_bc_matrix.h5")
donor4 <- Read10X_h5("GSM5723846_Donor4_raw_feature_bc_matrix.h5")
donor5 <- Read10X_h5("GSM5723847_Donor5_raw_feature_bc_matrix.h5")
donor6 <- Read10X_h5("GSM5723848_Donor6_raw_feature_bc_matrix.h5")
donor7 <- Read10X_h5("GSM5723849_Donor7_raw_feature_bc_matrix.h5")

donor1 <- CreateSeuratObject(donor1, project = "Donor1", min.cells = 3,
                             min.features = 200)
donor2 <- CreateSeuratObject(donor2, project = "Donor2", min.cells = 3,
                             min.features = 200)
donor3 <- CreateSeuratObject(donor3, project = "Donor3", min.cells = 3,
                             min.features = 200)
donor4 <- CreateSeuratObject(donor4, project = "Donor4", min.cells = 3,
                             min.features = 200)
donor5 <- CreateSeuratObject(donor5, project = "Donor5", min.cells = 3,
                             min.features = 200)
donor6 <- CreateSeuratObject(donor6, project = "Donor6", min.cells = 3,
                             min.features = 200)
donor7 <- CreateSeuratObject(donor7, project = "Donor7", min.cells = 3,
                             min.features = 200)

donor1 <- PercentageFeatureSet(donor1, pattern = "^MT-", col.name = "percent.mt")
donor2 <- PercentageFeatureSet(donor2, pattern = "^MT-", col.name = "percent.mt")
donor3 <- PercentageFeatureSet(donor3, pattern = "^MT-", col.name = "percent.mt")
donor4 <- PercentageFeatureSet(donor4, pattern = "^MT-", col.name = "percent.mt")
donor5 <- PercentageFeatureSet(donor5, pattern = "^MT-", col.name = "percent.mt")
donor6 <- PercentageFeatureSet(donor6, pattern = "^MT-", col.name = "percent.mt")
donor7 <- PercentageFeatureSet(donor7, pattern = "^MT-", col.name = "percent.mt")

donors <- merge(donor1, y = c(donor2, donor3, donor4,
                              donor5, donor6, donor7))

VlnPlot(donors, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), 
        ncol = 3, pt.size = 0)

donors <- subset(donors, nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)

donors.list <- SplitObject(donors, split.by = "orig.ident")

donors.list <- lapply(X = donors.list, FUN = function(x) {
  x <- NormalizeData(x)
  x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

features <- SelectIntegrationFeatures(object.list = donors.list)
donors.anchors <- FindIntegrationAnchors(object.list = donors.list, 
                                         anchor.features = features)

saydonors.combined <- IntegrateData(anchorset = donors.anchors)

DefaultAssay(donors.combined) <- "integrated"

donors.combined <- ScaleData(donors.combined, verbose = FALSE)
donors.combined <- RunPCA(donors.combined, npcs = 30, verbose = FALSE)
donors.combined <- RunUMAP(donors.combined, reduction = "pca", dims = 1:30)
donors.combined <- FindNeighbors(donors.combined, reduction = "pca", dims = 1:30)

#### another way to convert seurat to cds ####
my.so <- donors.combined

# Project PC dimensions to whole data set
my.so <- ProjectDim(my.so, reduction = "pca")

expression_matrix <- my.so@assays$RNA@counts

cell_metadata <- my.so@meta.data
if (all.equal(colnames(expression_matrix), rownames(cell_metadata))) {
  print(sprintf("Cell identifiers match"))
} else {
  print(sprintf("Cell identifier mismatch - %i cells in expression matrix, %i cells in metadata",
                ncol(expression_matrix), nrow(cell_metadata)))
  print("If the counts are equal, sort differences will throw this error")
}

gene_annotation <- data.frame(gene_short_name = rownames(my.so@assays$RNA), 
                              row.names = rownames(my.so@assays$RNA))
if (all.equal(rownames(expression_matrix), rownames(gene_annotation))) {
  print(sprintf("Gene identifiers all match"))
} else {
  print(sprintf("Gene identifier mismatch - %i genes in expression matrix, %i gene in gene annotation",
                nrow(expression_matrix), nrow(gene_annotation)))
  print("If the counts are equal, sort differences will throw this error")
}

cell_metadata$Samples <- rep(NA)
cell_metadata <- cell_metadata %>%
  mutate(Samples = case_when(
    grepl("Donor1", orig.ident) ~ "Donor1",
    grepl("Donor2", orig.ident) ~ "Donor2",
    grepl("Donor3", orig.ident) ~ "Donor3",
    grepl("Donor4", orig.ident) ~ "Donor4",
    grepl("Donor5", orig.ident) ~ "Donor5",
    grepl("Donor6", orig.ident) ~ "Donor6",
    grepl("Donor7", orig.ident) ~ "Donor7",
    TRUE ~ NA_character_
  ))

my.cds <- new_cell_data_set(expression_matrix,
                            cell_metadata = cell_metadata,
                            gene_metadata = gene_annotation)

I would like to perform batch correction on the cds object created this way. The code that I batch corrected in the cds object is as follows.

my.cds <- preprocess_cds(my.cds, num_dim = 100)
reduce_my.cds <- reduce_dimension(my.cds)
plot_cells(reduce_my.cds, color_cells_by = "Samples", label_cell_groups = F,
           group_label_size = 5)

batch_my.cds <- align_cds(reduce_my.cds, num_dim = 100, alignment_group = "Samples")
batch_my.cds <- reduce_dimension(batch_my.cds)
plot_cells(batch_my.cds, color_cells_by = "Samples", label_cell_groups = F)

When UMAP is drawn after performing batch correction with the code above, the UMAP is drawn poorly. Each cluster is held together like a single lump.

enter image description here

Is my code wrong? If there is a site with useful code, please let me know.

Thank you for help!

batch-correction R seurat monocle3 cds • 261 views
ADD COMMENT

Login before adding your answer.

Traffic: 2589 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6