Question

scATAC-seq and scRNA-seq integration issue

0

Entering edit mode

3.1 years ago

inayat45shaikh ▴ 40

Hi All, I have scATAC-seq and scRNA-seq multi-OMICs dataset and I am trying to do integration analysis with the tutorial using vignette https://satijalab.org/seurat/articles/atacseq_integration_vignette.html But at co-embedding step when I am trying to visualize the integrated data, the plot seems not properly integrated (see below plot), as it labeled ATAC cells as “NA” in the UMAP. enter image description here

Here is the detail of my data

scRNA-seq: One cell line treated with four timepoints (DMSO, Treat1, Treat2, Treat3)
scATAC-seq: One cell line treated with same above timepoints (DMSO, Treat1, Treat2, Treat3)

What do you think went wrong with the integration? I have pasted script below for your reference

####### scATA-seq reading and preparation #######

counts <- Read10X_h5(filename = "../scATAC-seq/LNCaP_scATAC_HDF5/RES-B_filtered_peak_bc_matrix.h5")
metadata <- read.csv(
file = "atac_v1_pbmc_10k_singlecell.csv",
header = TRUE,
row.names = 1
)

chrom_assay <- CreateChromatinAssay(
counts = counts,
sep = c(":", "-"),
genome = 'hg19',
fragments = '../scATAC-seq/LNCaP_scATAC_HDF5/RESB_fragments.tsv.gz',
min.cells = 10,
min.features = 200
)

pbmc <- CreateSeuratObject(
counts = chrom_assay,
assay = "peaks",
project = "ATAC"
#meta.data = metadata
)

pbmc$dataset <- 'RESB_ATAC'
pbmc@meta.data
pbmc[['peaks']]

ATAC analysis add gene annotation information
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v75)
seqlevelsStyle(annotations) <- "UCSC"
genome(annotations) <- "hg19"
Annotation(pbmc) <- annotations
saveRDS(pbmc, file = "RESB_ATAC.rds")
############################################################################# END #######################################

############## scATAC-seq merged dataset preparation ############

DMSO <- readRDS("DMSO_ATAC.rds")
ENZ48 <- readRDS("ENZ48_ATAC.rds")
RESA <- readRDS("RESA_ATAC.rds")
RESB <- readRDS("RESB_ATAC.rds")

merge all datasets, adding a cell ID to make sure cell names are unique
combined <- merge(
x = DMSO,
y = list(ENZ48, RESA, RESB),
add.cell.ids = c("DMSO.ATAC", "ENZ48.ATAC", "RESA.ATAC", "RESB.ATAC"),
merge.data = TRUE
)

pbmc.combined <- combined
combined[["peaks"]]

process the combined dataset
pbmc.combined <- FindTopFeatures(pbmc.combined, min.cutoff = 10)
pbmc.combined <- RunTFIDF(pbmc.combined)
pbmc.combined <- RunSVD(pbmc.combined)
pbmc.combined <- RunUMAP(pbmc.combined, reduction = "lsi", dims = 2:30)
p1 <- DimPlot(pbmc.combined, group.by = "dataset")
DimPlot(pbmc.combined, group.by = 'dataset', pt.size = 0.1)
saveRDS(pbmc.combined, file = "scATAC_DMSO-ENZ48-REASA-RESB.rds")
############################################################################# END #######################################

######################### scRNA-seq merged dataset preparation ##########################
DMSO.data <- Read10X(data.dir = "../LNCaP-DMSO/")
ENZ48.data <- Read10X(data.dir = "../LNCaP-ENZ48/")
RESA.data <- Read10X(data.dir = "../LNCaP-RESA/")
RESB.data <- Read10X(data.dir = "../LNCaP-RESB/")

DMSO <- CreateSeuratObject(counts = DMSO.data, min.cells = 3, min.features = 200, project = "RNA")
ENZ48 <- CreateSeuratObject(counts = ENZ48.data, min.cells = 3, min.features = 200, project = "RNA")
RESA <- CreateSeuratObject(counts = RESA.data, min.cells = 3, min.features = 200, project = "RNA")
RESB <- CreateSeuratObject(counts = RESB.data, min.cells = 3, min.features = 200, project = "RNA")

add information to identify dataset of origin
DMSO$dataset <- 'DMSO'
ENZ48$dataset <- 'ENZ48'
RESA$dataset <- 'RESA'
RESB$dataset <- 'RESB'

combined <- merge(
x = DMSO,
y = list(ENZ48, RESA, RESB),
add.cell.ids = c("DMSO", "ENZ48", "RESA", "RESB"),
merge.data = TRUE
)

combined@meta.data

split the dataset into a list of two seurat objects (stim and CTRL)
ifnb.list <- SplitObject(combined, split.by = "dataset")

normalize and identify variable features for each dataset independently
ifnb.list <- lapply(X = ifnb.list, FUN = function(x) {
x <- NormalizeData(x)
x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

select features that are repeatedly variable across datasets for integration
features <- SelectIntegrationFeatures(object.list = ifnb.list)
immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, anchor.features = features)

this command creates an 'integrated' data assay
immune.combined <- IntegrateData(anchorset = immune.anchors)

specify that we will perform downstream analysis on the corrected data note that the
original unmodified data still resides in the 'RNA' assay
DefaultAssay(immune.combined) <- "integrated"

Run the standard workflow for visualization and clustering
immune.combined <- ScaleData(immune.combined, verbose = FALSE)
immune.combined <- RunPCA(immune.combined, npcs = 30, verbose = FALSE)
immune.combined <- RunUMAP(immune.combined, reduction = "pca", dims = 1:30)
immune.combined <- FindNeighbors(immune.combined, reduction = "pca", dims = 1:30)
immune.combined <- FindClusters(immune.combined, resolution = 0.4)

Visualization
p1 <- DimPlot(immune.combined, reduction = "umap", group.by = "dataset")
p2 <- DimPlot(immune.combined, reduction = "umap", label = TRUE, repel = TRUE)
p1 + p2
DimPlot(immune.combined, reduction = "umap", split.by = "dataset")

saveRDS(immune.combined, file = "scRNA-seq_DMSO-ENZ48-RESA-RESB.rds")

############################################################################# END #######################################

#################################### scATAC-seq and scRNA-seq integration #################################

load both modalities
pbmc.rna <- readRDS("scRNA-seq_DMSO-ENZ48-RESA-RESB.rds")
pbmc.atac <- readRDS("scATAC_DMSO-ENZ48-REASA-RESB.rds")
pbmc.rna@meta.data
pbmc.atac@meta.data

Perform standard analysis of each modality independently RNA analysis
pbmc.rna <- NormalizeData(pbmc.rna)
pbmc.rna <- FindVariableFeatures(pbmc.rna)
pbmc.rna <- ScaleData(pbmc.rna)
pbmc.rna <- RunPCA(pbmc.rna)
pbmc.rna <- RunUMAP(pbmc.rna, dims = 1:30)

ATAC analysis add gene annotation information
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v75)
seqlevelsStyle(annotations) <- "UCSC"
genome(annotations) <- "hg19"
Annotation(pbmc.atac) <- annotations

We exclude the first dimension as this is typically correlated with sequencing depth
pbmc.atac <- RunTFIDF(pbmc.atac)
pbmc.atac <- FindTopFeatures(pbmc.atac, min.cutoff = "q0")
pbmc.atac <- RunSVD(pbmc.atac)
pbmc.atac <- RunUMAP(pbmc.atac, reduction = "lsi", dims = 2:30, reduction.name = "umap.atac", reduction.key = "atacUMAP_")

p1 <- DimPlot(pbmc.rna, group.by = "seurat_clusters", label = TRUE) + NoLegend() + ggtitle("RNA")
p2 <- DimPlot(pbmc.atac, group.by = "orig.ident", label = FALSE) + NoLegend() + ggtitle("ATAC")
p1 + p2
pbmc.atac@meta.data

quantify gene activity
gene.activities <- GeneActivity(pbmc.atac, features = VariableFeatures(pbmc.rna))

add gene activities as a new assay
pbmc.atac[["ACTIVITY"]] <- CreateAssayObject(counts = gene.activities)

normalize gene activities
DefaultAssay(pbmc.atac) <- "ACTIVITY"
pbmc.atac <- NormalizeData(pbmc.atac)
pbmc.atac <- ScaleData(pbmc.atac, features = rownames(pbmc.atac))

Identify anchors
transfer.anchors <- FindTransferAnchors(reference = pbmc.rna, query = pbmc.atac, features = VariableFeatures(object = pbmc.rna),
reference.assay = "RNA", query.assay = "ACTIVITY", reduction = "cca")
celltype.predictions <- TransferData(anchorset = transfer.anchors, refdata = pbmc.rna$seurat_clusters,
weight.reduction = pbmc.atac[["lsi"]], dims = 2:30)

pbmc.atac <- AddMetaData(pbmc.atac, metadata = celltype.predictions)

pbmc.atac$annotation_correct <- pbmc.atac$predicted.id == pbmc.atac$seurat_clusters
p1 <- DimPlot(pbmc.atac, group.by = "predicted.id", label = TRUE) + NoLegend() + ggtitle("Predicted annotation")
p2 <- DimPlot(pbmc.atac, group.by = "seurat_clusters", label = TRUE) + NoLegend() + ggtitle("Ground-truth annotation")
p1 | p2

note that we restrict the imputation to variable genes from scRNA-seq, but could impute the
full transcriptome if we wanted to
genes.use <- VariableFeatures(pbmc.rna)
refdata <- GetAssayData(pbmc.rna, assay = "RNA", slot = "data")[genes.use, ]

refdata (input) contains a scRNA-seq expression matrix for the scRNA-seq cells. imputation
(output) will contain an imputed scRNA-seq matrix for each of the ATAC cells
imputation <- TransferData(anchorset = transfer.anchors, refdata = refdata, weight.reduction = pbmc.atac[["lsi"]],
dims = 2:30)
pbmc.atac[["RNA"]] <- imputation

coembed <- merge(x = pbmc.rna, y = pbmc.atac)

Finally, we run PCA and UMAP on this combined object, to visualize the co-embedding of both
datasets
coembed <- ScaleData(coembed, features = genes.use, do.scale = FALSE)
coembed <- RunPCA(coembed, features = genes.use, verbose = FALSE)
coembed <- RunUMAP(coembed, dims = 1:30)

DimPlot(coembed, group.by = c("orig.ident", "seurat_clusters"))

image

############################################################################# END ##########################################

scRNA-seq scATAC Integration • 1.1k views

ADD COMMENT • link updated 3.1 years ago by GenoMax 154k • written 3.1 years ago by inayat45shaikh ▴ 40