diff --git a/DESCRIPTION b/DESCRIPTION index 7c1e56b..1c237fd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: Tempora Type: Package Title: Pathway-based Trajectory Inference for Time-series Single Cell RNA-Seq Data -Version: 0.1.0 +Version: 0.1.10 Author: Thinh Tran Maintainer: Thinh Tran Description: Tempora is a trajectory inference method that infers cell lineage maps from time-series scRNAseq data using pathway enrichment profiles of single cells and experimental time information. Tempora uses an information theoretic approach to build a trajectory at the cluster level based on the clusters’ pathway enrichment profiles, effectively connecting cell types and states across multiple time points. Taking advantage of the available time information, Tempora can infer the directions of all connections in a trajectory that go from early to late clusters. @@ -19,7 +19,7 @@ Imports: scales Encoding: UTF-8 LazyData: true -RoxygenNote: 7.0.2 +RoxygenNote: 7.1.1 Suggests: knitr, rmarkdown, @@ -30,6 +30,7 @@ Collate: 'CalculatePWProfiles.R' 'IdentifyCellTypes.R' 'IdentifyVaryingPWs.R' + 'IdentifyVaryingPWsParallel.R' 'PlotVaryingPWs.R' 'dataAccess.R' 'TemporaObjClass.R' diff --git a/NAMESPACE b/NAMESPACE index a8392c5..f07b470 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ export(CalculatePWProfiles) export(CreateTemporaObject) export(IdentifyCellTypes) export(IdentifyVaryingPWs) +export(IdentifyVaryingPWsParallel) export(ImportSeuratObject) export(PlotTrajectory) export(PlotVaryingPWs) @@ -44,6 +45,7 @@ importFrom(igraph,E) importFrom(igraph,graph_from_data_frame) importFrom(igraph,layout_with_sugiyama) importFrom(igraph,plot.igraph) +importFrom(magrittr,'%>%') importFrom(methods,new) importFrom(methods,validObject) importFrom(mgcv,anova.gam) diff --git a/R/CalculatePWProfiles.R b/R/CalculatePWProfiles.R index 0354d12..be1de84 100644 --- a/R/CalculatePWProfiles.R +++ b/R/CalculatePWProfiles.R @@ -37,6 +37,12 @@ CalculatePWProfiles <- function(object, gmt_path, method="gsva", min.sz=5, max.s cat("\nCalculating cluster pathway enrichment profiles...\n") gsva_bycluster <- GSVA::gsva(as.matrix(exprMatrix_bycluster), pathwaygmt, method=method, min.sz=min.sz, max.sz=max.sz, parallel.sz=parallel.sz) + oldrownames =rownames(gsva_bycluster) + newrownames <- sub("%.*", "", oldrownames) + newrownames <- gsub("\\s*\\([^\\)]+\\)","",newrownames) + newrownames <- gsub("[[:punct:]]", "_", newrownames) + rownames(gsva_bycluster) <- newrownames + colnames(gsva_bycluster) <- colnames(exprMatrix_bycluster) object@cluster.pathways <- gsva_bycluster diff --git a/R/IdentifyVaryingPWs.R b/R/IdentifyVaryingPWs.R index 088e9ba..e6b0f3f 100644 --- a/R/IdentifyVaryingPWs.R +++ b/R/IdentifyVaryingPWs.R @@ -28,13 +28,20 @@ IdentifyVaryingPWs <- function(object, pval_threshold=0.05){ significant_pathways <- c() for (i in 1:object@n.pcs){ genes_scaled <- scale(object@cluster.pathways.dr$rotation[,i]) - significant_pathways <- c(names(which(genes_scaled[,1] > 1.5 | genes_scaled[,1] < -1.5)), significant_pathways) + significant_pathways <- c(names(which(genes_scaled[,1] > 1.0 | genes_scaled[,1] < -1.0)), significant_pathways) } - pca_pathways <- sub("%.*", "", significant_pathways) - pca_pathways <- gsub("\\s*\\([^\\)]+\\)","",pca_pathways) - pca_pathways_cleaned <- gsub("[[:punct:]]", "", pca_pathways) - themes <- pca_pathways_cleaned + # pca_pathways <- sub("%.*", "", significant_pathways) + # pca_pathways <- gsub("\\s*\\([^\\)]+\\)","",pca_pathways) + # pca_pathways_cleaned <- gsub("[[:punct:]]", "_", pca_pathways) + # themes <- pca_pathways_cleaned + themes <- significant_pathways + + # oldrownames =rownames(gsva_bycluster) + # newrownames <- sub("%.*", "", oldrownames) + # newrownames <- gsub("\\s*\\([^\\)]+\\)","",newrownames) + # newrownames <- gsub("[[:punct:]]", "_", newrownames) + # rownames(gsva_bycluster) <- newrownames cat("Fitting GAM models...") @@ -45,12 +52,13 @@ IdentifyVaryingPWs <- function(object, pval_threshold=0.05){ p_vals[[i]] <- 1 gams[[i]] <- NA next - } + } if (length(grep(themes[i], rownames(gsva_bycluster))) > 1){ plot_df <- data.frame(cluster=colnames(gsva_bycluster[grep(themes[i], rownames(gsva_bycluster)), ]), value=colMeans(gsva_bycluster[grep(themes[i], rownames(gsva_bycluster)), ], na.rm=T)) } else if (length(grep(themes[i], rownames(gsva_bycluster))) == 1){ plot_df <- data.frame(cluster=names(gsva_bycluster[grep(themes[i], rownames(gsva_bycluster)), ]), value=gsva_bycluster[grep(themes[i], rownames(gsva_bycluster)), ]) } plot_df$time <- object@cluster.metadata$Cluster_time_score + # gam: Generalized additive models with integrated smoothness estimation gams[[i]] <- mgcv::gam(value ~ s(time, k=3, bs='cr'), data=plot_df) temp_anova <- mgcv::anova.gam(gams[[i]]) p_vals[[i]] <- temp_anova$s.pv @@ -67,7 +75,7 @@ IdentifyVaryingPWs <- function(object, pval_threshold=0.05){ cat("No temporally varying pathways detected. Please try running IdentifyVaryingPWs with a more relaxed p-value cutoff.") #eventhough the function was not successful return the object because in the vignette # this function call sets the original object to what is returned and if it is null - # you loose all the processing you have done until now. + # you loose all the processing you have done until now. return(object) } else { object@varying.pws <- varying_pathways diff --git a/R/IdentifyVaryingPWsParallel.R b/R/IdentifyVaryingPWsParallel.R new file mode 100644 index 0000000..499ee25 --- /dev/null +++ b/R/IdentifyVaryingPWsParallel.R @@ -0,0 +1,108 @@ + +# ---- +#' Calculate temporally changing pathways (parallel version) +#' +#' Identify the pathways that change over time by fitting a generalized additive model +#' @param object A Tempora object +#' @param pval_threshold P-value threshold to determine the significance of pathway enrichment over time. Default to 0.05. +#' @export +#' @importFrom mgcv gam anova.gam plot.gam +#' @importFrom methods new validObject +#' @importFrom stats p.adjust prcomp screeplot +#' @importFrom grDevices colorRampPalette +#' @importFrom graphics axis legend par points text +#' @importFrom reshape2 dcast +#' @examples \dontrun{tempora_data <- IdentifyVaryingPWsParallel(tempora_data, pval_threshold = 0.05)} +IdentifyVaryingPWsParallel <- function(object, pval_threshold=0.05){ + require(BiocParallel) + if (class(object)[1] != "Tempora"){ + stop("Not a valid Tempora object") + } + if (is.null(object@n.pcs)){ + stop("BuildTrajectory has not been run. See ?Tempora::BuildTrajectory for details") + } + if (is.null(object@cluster.pathways)){ + stop("CalculatePWProfiles has not been run. See ?Tempora::CalculatePWProfiles for details") + } + gsva_bycluster <- object@cluster.pathways + + significant_pathways <- c() + for (i in 1:object@n.pcs){ + genes_scaled <- scale(object@cluster.pathways.dr$rotation[,i]) + significant_pathways <- c(names(which(genes_scaled[,1] > 1.0 | genes_scaled[,1] < -1.0)), significant_pathways) + } + + # pca_pathways <- sub("%.*", "", significant_pathways) + # pca_pathways <- gsub("\\s*\\([^\\)]+\\)","",pca_pathways) + # pca_pathways_cleaned <- gsub("[[:punct:]]", "_", pca_pathways) + # themes <- pca_pathways_cleaned + themes <- significant_pathways + + # oldrownames =rownames(gsva_bycluster) + # newrownames <- sub("%.*", "", oldrownames) + # newrownames <- gsub("\\s*\\([^\\)]+\\)","",newrownames) + # newrownames <- gsub("[[:punct:]]", "_", newrownames) + # rownames(gsva_bycluster) <- newrownames + + if (DEBUG) cat("Fitting GAM models...") + + # system.time({ + p_vals <- gams <- list() + + func = function(idx, object, themes, gsva_bycluster) { + require(Tempora) + require(Matrix) + require(BiocGenerics) + # cat(file = stderr(), paste(idx, "\n")) + grp = BiocGenerics::grep(themes[idx], rownames(gsva_bycluster), ignore.case = T) + crit = length(grp) + if(crit == 0) { + return(list(1, NA)) + } + if (crit > 1){ + plot_df <- data.frame( + cluster=colnames(gsva_bycluster[grp, ]), + value=Matrix::colMeans(gsva_bycluster[grp, ], + na.rm=T)) + } else if (crit == 1){ + plot_df <- data.frame(cluster=names(gsva_bycluster[grp, ]), + value=gsva_bycluster[grp, ]) + } else { + #should not happen + return(list(1, NA)) + } + plot_df$time <- object@cluster.metadata$Cluster_time_score + gams <- mgcv::gam(value ~ s(time, k=3, bs='cr'), data=plot_df) + temp_anova <- mgcv::anova.gam(gams) + p_vals = temp_anova$s.pv + list(p_vals, gams) + } + + p_valsNew <- bplapply(1:length(themes), function(x) { func(x, object, themes, gsva_bycluster) }) + for (idx in 1:length(p_valsNew)){ + p_vals[[idx]] = p_valsNew[[idx]][[1]] + gams[[idx]] = p_valsNew[[idx]][[2]] + } + names(p_vals) <- names(gams) <- themes + + pval_threshold = pval_threshold + p_vals_adj <- p.adjust(unlist(p_vals[which(unlist(p_vals) > 0)]), method = "BH") + varying_pathways <- p_vals_adj[which(p_vals_adj < pval_threshold)] + varying_pathways <- varying_pathways[!duplicated(names(varying_pathways))] + + if (length(varying_pathways)==0){ + cat("No temporally varying pathways detected. Please try running IdentifyVaryingPWs with a more relaxed p-value cutoff.") + #eventhough the function was not successful return the object because in the vignette + # this function call sets the original object to what is returned and if it is null + # you loose all the processing you have done until now. + if (!is.null(getDefaultReactiveDomain())) { + showNotification("No temporally varying pathways detected.", id = "temporaIdentifyVaryingPWs2", duration = 20) + } + return(object) + } else { + object@varying.pws <- varying_pathways + object@gams <- gams + return(object) + } + # to = object +} diff --git a/R/PlotVaryingPWs.R b/R/PlotVaryingPWs.R index 309b6ea..bfdf37d 100644 --- a/R/PlotVaryingPWs.R +++ b/R/PlotVaryingPWs.R @@ -25,15 +25,14 @@ gams <- object@gams cat("\nPlotting time-dependent pathways...") -for (i in 1:length(varying_pathways)){ +for (i in order(varying_pathways)){ if (length(grep(names(varying_pathways)[i], rownames(gsva_bycluster))) > 1){ plot_df <- data.frame(cluster=colnames(gsva_bycluster[grep(names(varying_pathways)[i], rownames(gsva_bycluster)), ]), value=colMeans(gsva_bycluster[grep(names(varying_pathways)[i], rownames(gsva_bycluster)), ])) plot_df$time <- object@cluster.metadata$Cluster_time_score - } - else if (length(grep(names(varying_pathways)[i], rownames(gsva_bycluster))) == 1) { + } else if (length(grep(names(varying_pathways)[i], rownames(gsva_bycluster))) == 1) { plot_df <- data.frame(cluster=names(gsva_bycluster[grep(names(varying_pathways)[i], rownames(gsva_bycluster)), ]), value=gsva_bycluster[grep(names(varying_pathways)[i], rownames(gsva_bycluster)), ]) plot_df$time <- object@cluster.metadata$Cluster_time_score - } + } else {return(NULL)} id <- which(names(gams)==names(varying_pathways)[i]) mgcv::plot.gam(gams[[id[1]]], main = paste0(names(varying_pathways)[i]), xlab = "Inferred time", ylab="Pathway expression level", bty="l", cex.main = 1, xaxt = "n", shade= F, se=3, scheme=1) diff --git a/R/TemporaObjClass.R b/R/TemporaObjClass.R index 0bf2f3d..fe0c2b9 100644 --- a/R/TemporaObjClass.R +++ b/R/TemporaObjClass.R @@ -319,7 +319,8 @@ CreateTemporaObject <- function(exprMatrix, meta.data, timepoint_order, cluster_ if (!is.null(cluster_labels) & !is.null(cell_markers)){ clustmd$label <- paste0("Cluster ", paste(rownames(clustmd), cluster_labels, sep="-")) } else if (!is.null(cluster_labels)){ - clustmd$label <- paste0("Cluster ", paste(rownames(clustmd), cluster_labels, sep="-")) + # clustmd$label <- paste0("Cluster ", paste(rownames(clustmd), cluster_labels, sep="-")) + clustmd$label <- cluster_labels } else if (!is.null(cell_markers)) { cat("\nPerforming automated cluster labeling with GSVA...") cluster_number <- as.numeric(meta.data$Clusters) diff --git a/R/Visualize.R b/R/Visualize.R index f97e2c9..1d6d8bf 100644 --- a/R/Visualize.R +++ b/R/Visualize.R @@ -14,6 +14,7 @@ #' @importFrom stats p.adjust prcomp screeplot #' @importFrom scales rescale #' @importFrom reshape2 dcast +#' @importFrom magrittr '%>%' #' #' PlotTrajectory <- function(object, layout=NULL, ...){ @@ -29,20 +30,60 @@ PlotTrajectory <- function(object, layout=NULL, ...){ edge_graph <- igraph::graph_from_data_frame(d=object@trajectory, vertices = object@cluster.metadata, directed = T) if (is.null(layout)){ - l <- igraph::layout_with_sugiyama(edge_graph, layers = object@cluster.metadata$Cluster_time_score, maxiter = 1000) + l <- igraph::layout_with_sugiyama(edge_graph, + layers = object@cluster.metadata$Cluster_time_score, + hgap = 100, + maxiter = 1000) + layer = as.integer(object@cluster.metadata$Cluster_time_score) + layerOrder = order(object@cluster.metadata$Cluster_time_score, decreasing = T) + # l$layout[layerOrder,] + # max number of spacers (each layer is treated independantly) + spacers = l$layout[,2] %>% round() %>% table %>% max %>% -1 + # distribute along x axis + # the 100 most probably is related to hgap + l$layout[layerOrder,1] = l$layout[,2] %>% + round() %>% + table() %>% + lapply( FUN = function(x){((0:spacers)[1:x] + (spacers - x + 1)/2) * 100 }) %>% + unlist + + #l$layout[,2] <- 3-(rescale(object@cluster.metadata$Cluster_time_score, to=c(0,3))) if (length(levels(object@meta.data$Timepoints)) > 9){ colours <- colorRampPalette(RColorBrewer::brewer.pal(7, "YlOrRd")) - plot.igraph(edge_graph, ylim=c(-1,1), layout = l$layout, ylab = "Inferred time", vertex.shape = "pie", vertex.pie = lapply(1:nrow(object@cluster.metadata), function(x) as.numeric(object@cluster.metadata[x,2:((length(levels(object@meta.data$Timepoints)))+1)])), - vertex.pie.color=list(colours(length(levels(object@meta.data$Timepoints)))), pie.border=list(rep("white", 4)), vertex.frame.color="white", edge.arrow.size = 0.5, edge.width = 1.5, vertex.label.family="Arial", - vertex.label.color="black", edge.lty = E(edge_graph)$type, ...) + plot.igraph(edge_graph, ylim=c(-1,1), + layout = l$layout, + ylab = "Inferred time", + vertex.shape = "pie", + vertex.pie = lapply(1:nrow(object@cluster.metadata), + function(x) as.numeric(object@cluster.metadata[x,2:((length(levels(object@meta.data$Timepoints)))+1)])), + vertex.pie.color=list(colours(length(levels(object@meta.data$Timepoints)))), + pie.border=list(rep("white", 4)), + vertex.frame.color="white", + edge.arrow.size = 0.5, + edge.width = 1.5, + vertex.label.family="Arial", + vertex.label.color="black", + edge.lty = E(edge_graph)$type, + ...) axis(side=2, at=c(-1,1), labels=c("Late","Early"), las=1) legend("topleft", legend = levels(object@meta.data$Timepoints), fill=colours, bty = "n", border="black") } else { colours <- brewer.pal(length(levels(object@meta.data$Timepoints)), "YlOrRd") - plot.igraph(edge_graph, ylim=c(-1,1), ylab = "Inferred time", layout = l$layout, vertex.shape = "pie", vertex.pie = lapply(1:nrow(object@cluster.metadata), function(x) as.numeric(object@cluster.metadata[x,2:((length(levels(object@meta.data$Timepoints)))+1)])), - vertex.pie.color=list(colours), pie.border=list(rep("white", length(levels(object@meta.data$Timepoints)))), vertex.frame.color="white", - vertex.label.family="Arial", vertex.label.color="black", edge.lty = E(edge_graph)$type,...) + plot.igraph(edge_graph, + ylim=c(-1,1), + ylab = "Inferred time", + layout = l$layout, + vertex.shape = "pie", + vertex.pie = lapply(1:nrow(object@cluster.metadata), + function(x) as.numeric(object@cluster.metadata[x,2:((length(levels(object@meta.data$Timepoints)))+1)])), + vertex.pie.color=list(colours), + pie.border=list(rep("white", length(levels(object@meta.data$Timepoints)))), + vertex.frame.color="white", + # vertex.label.family="Arial", + vertex.label.color="black", + edge.lty = E(edge_graph)$type, + ...) legend("topleft", legend = levels(object@meta.data$Timepoints), fill=colours, bty = "n", border = "black") axis(side=2, at=c(-1,1), labels=c("Late","Early"), las=1) } @@ -51,7 +92,11 @@ PlotTrajectory <- function(object, layout=NULL, ...){ } else { if (length(levels(object@meta.data$Timepoints)) > 9){ colours <- colorRampPalette(RColorBrewer::brewer.pal(7, "YlOrRd")) - plot.igraph(edge_graph, ylim=c(-1,1), layout = layout, ylab = "Inferred time", vertex.shape = "pie", vertex.pie = lapply(1:nrow(object@cluster.metadata), function(x) as.numeric(object@cluster.metadata[x,2:((length(levels(object@meta.data$Timepoints)))+1)])), + plot.igraph(edge_graph, ylim=c(-1,1), + layout = layout, + ylab = "Inferred time", + vertex.shape = "pie", + vertex.pie = lapply(1:nrow(object@cluster.metadata), function(x) as.numeric(object@cluster.metadata[x,2:((length(levels(object@meta.data$Timepoints)))+1)])), vertex.pie.color=list(colours(length(levels(object@meta.data$Timepoints)))), pie.border=list(rep("white", 4)), vertex.frame.color="white", edge.arrow.size = 0.5, edge.width = 1.5, vertex.label.family="Arial", vertex.label.color="black", edge.lty = E(edge_graph)$type, ...) axis(side=2, at=c(-1,1), labels=c("Late","Early"), las=1) diff --git a/R/dataAccess.R b/R/dataAccess.R index 5d6cbe5..927dbb7 100644 --- a/R/dataAccess.R +++ b/R/dataAccess.R @@ -142,5 +142,5 @@ suppressMessages( suppressMessages( setMethod("getMD","SingleCellExperiment", - function(x) SingleCellExperiment::colData(x)) + function(x) data.frame(SingleCellExperiment::colData(x))) ) diff --git a/man/BuildTrajectory.Rd b/man/BuildTrajectory.Rd index 56a464c..40350c1 100644 --- a/man/BuildTrajectory.Rd +++ b/man/BuildTrajectory.Rd @@ -15,7 +15,6 @@ BuildTrajectory(object, n_pcs, difference_threshold = 0.01, loadings = 0.4) \item{loadings}{Threshold of PCA loadings for pathways to be used in trajectory construction. The higher the loading, the more the pathway contributes to a principal component. Default at 0.4.} } - \description{ Build the information-based cluster-cluster network using reduced-dimensionality pathway enrichment profiles of all clusters. This network connects related cell types and states across multiple time points. Taking advantage of the available time information, Tempora also infers the directions of all connections in a trajectory that go from early to late clusters. diff --git a/man/IdentifyVaryingPWsParallel.Rd b/man/IdentifyVaryingPWsParallel.Rd new file mode 100644 index 0000000..8fff046 --- /dev/null +++ b/man/IdentifyVaryingPWsParallel.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/IdentifyVaryingPWsParallel.R +\name{IdentifyVaryingPWsParallel} +\alias{IdentifyVaryingPWsParallel} +\title{Calculate temporally changing pathways (parallel version)} +\usage{ +IdentifyVaryingPWsParallel(object, pval_threshold = 0.05) +} +\arguments{ +\item{object}{A Tempora object} + +\item{pval_threshold}{P-value threshold to determine the significance of pathway enrichment over time. Default to 0.05.} +} +\description{ +Identify the pathways that change over time by fitting a generalized additive model +} +\examples{ +\dontrun{tempora_data <- IdentifyVaryingPWsParallel(tempora_data, pval_threshold = 0.05)} +}