From 3a589dc2f78c8ef5ad7daaeb66982bae7bd1d17b Mon Sep 17 00:00:00 2001 From: Bernd Jagla Date: Mon, 26 Apr 2021 11:20:41 +0200 Subject: [PATCH 01/11] parallel version of identify varying pathways. --- R/IdentifyVaryingPWs.R | 105 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 103 insertions(+), 2 deletions(-) diff --git a/R/IdentifyVaryingPWs.R b/R/IdentifyVaryingPWs.R index 088e9ba..bd76823 100644 --- a/R/IdentifyVaryingPWs.R +++ b/R/IdentifyVaryingPWs.R @@ -45,7 +45,7 @@ IdentifyVaryingPWs <- function(object, pval_threshold=0.05){ p_vals[[i]] <- 1 gams[[i]] <- NA next - } + } if (length(grep(themes[i], rownames(gsva_bycluster))) > 1){ plot_df <- data.frame(cluster=colnames(gsva_bycluster[grep(themes[i], rownames(gsva_bycluster)), ]), value=colMeans(gsva_bycluster[grep(themes[i], rownames(gsva_bycluster)), ], na.rm=T)) } else if (length(grep(themes[i], rownames(gsva_bycluster))) == 1){ @@ -67,7 +67,7 @@ IdentifyVaryingPWs <- function(object, pval_threshold=0.05){ cat("No temporally varying pathways detected. Please try running IdentifyVaryingPWs with a more relaxed p-value cutoff.") #eventhough the function was not successful return the object because in the vignette # this function call sets the original object to what is returned and if it is null - # you loose all the processing you have done until now. + # you loose all the processing you have done until now. return(object) } else { object@varying.pws <- varying_pathways @@ -77,3 +77,104 @@ IdentifyVaryingPWs <- function(object, pval_threshold=0.05){ } +# ---- +#' Calculate temporally changing pathways (parallel version) +#' +#' Identify the pathways that change over time by fitting a generalized additive model +#' @param object A Tempora object +#' @param pval_threshold P-value threshold to determine the significance of pathway enrichment over time. Default to 0.05. +#' @export +#' @importFrom mgcv gam anova.gam plot.gam +#' @importFrom methods new validObject +#' @importFrom stats p.adjust prcomp screeplot +#' @importFrom grDevices colorRampPalette +#' @importFrom graphics axis legend par points text +#' @importFrom reshape2 dcast +#' @examples \dontrun{tempora_data <- IdentifyVaryingPWsParallel(tempora_data, pval_threshold = 0.05)} +IdentifyVaryingPWsParallel <- function(object, pval_threshold=0.05){ + require(BiocParallel) + if (class(object)[1] != "Tempora"){ + stop("Not a valid Tempora object") + } + if (is.null(object@n.pcs)){ + stop("BuildTrajectory has not been run. See ?Tempora::BuildTrajectory for details") + } + if (is.null(object@cluster.pathways)){ + stop("CalculatePWProfiles has not been run. See ?Tempora::CalculatePWProfiles for details") + } + gsva_bycluster <- object@cluster.pathways + + significant_pathways <- c() + for (i in 1:object@n.pcs){ + genes_scaled <- scale(object@cluster.pathways.dr$rotation[,i]) + significant_pathways <- c(names(which(genes_scaled[,1] > 1.5 | genes_scaled[,1] < -1.5)), significant_pathways) + } + + pca_pathways <- sub("%.*", "", significant_pathways) + pca_pathways <- gsub("\\s*\\([^\\)]+\\)","",pca_pathways) + pca_pathways_cleaned <- gsub("[[:punct:]]", "", pca_pathways) + themes <- pca_pathways_cleaned + + if (DEBUG) cat("Fitting GAM models...") + + # system.time({ + p_vals <- gams <- list() + + func = function(idx, object, themes, gsva_bycluster) { + require(Tempora) + require(Matrix) + require(BiocGenerics) + # cat(file = stderr(), paste(idx, "\n")) + grp = BiocGenerics::grep(themes[idx], rownames(gsva_bycluster), ignore.case = T) + crit = length(grp) + if(crit == 0) { + return(list(1, NA)) + } + if (crit > 1){ + plot_df <- data.frame( + cluster=colnames(gsva_bycluster[grp, ]), + value=Matrix::colMeans(gsva_bycluster[grp, ], + na.rm=T)) + } else if (crit == 1){ + plot_df <- data.frame(cluster=names(gsva_bycluster[grp, ]), + value=gsva_bycluster[grp, ]) + } else { + #should not happen + return(list(1, NA)) + } + plot_df$time <- object@cluster.metadata$Cluster_time_score + gams <- mgcv::gam(value ~ s(time, k=3, bs='cr'), data=plot_df) + temp_anova <- mgcv::anova.gam(gams) + p_vals = temp_anova$s.pv + list(p_vals, gams) + } + + p_valsNew <- bplapply(1:length(themes), function(x) { func(x, object, themes, gsva_bycluster) }) + for (idx in 1:length(p_valsNew)){ + p_vals[[idx]] = p_valsNew[[idx]][[1]] + gams[[idx]] = p_valsNew[[idx]][[2]] + } + names(p_vals) <- names(gams) <- themes + + pval_threshold = pval_threshold + p_vals_adj <- p.adjust(unlist(p_vals[which(unlist(p_vals) > 0)]), method = "BH") + varying_pathways <- p_vals_adj[which(p_vals_adj < pval_threshold)] + varying_pathways <- varying_pathways[!duplicated(names(varying_pathways))] + + if (length(varying_pathways)==0){ + cat("No temporally varying pathways detected. Please try running IdentifyVaryingPWs with a more relaxed p-value cutoff.") + #eventhough the function was not successful return the object because in the vignette + # this function call sets the original object to what is returned and if it is null + # you loose all the processing you have done until now. + if (!is.null(getDefaultReactiveDomain())) { + showNotification("No temporally varying pathways detected.", id = "temporaIdentifyVaryingPWs2", duration = 20) + } + return(object) + } else { + object@varying.pws <- varying_pathways + object@gams <- gams + return(object) + } + # to = object +} + From 5a5a24b9c2227443c3c2539252d2ab572f51ac34 Mon Sep 17 00:00:00 2001 From: Bernd Jagla Date: Mon, 26 Apr 2021 11:20:55 +0200 Subject: [PATCH 02/11] ordered plots --- R/PlotVaryingPWs.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/PlotVaryingPWs.R b/R/PlotVaryingPWs.R index 309b6ea..6d29d0e 100644 --- a/R/PlotVaryingPWs.R +++ b/R/PlotVaryingPWs.R @@ -25,7 +25,7 @@ gams <- object@gams cat("\nPlotting time-dependent pathways...") -for (i in 1:length(varying_pathways)){ +for (i in order(varying_pathways)){ if (length(grep(names(varying_pathways)[i], rownames(gsva_bycluster))) > 1){ plot_df <- data.frame(cluster=colnames(gsva_bycluster[grep(names(varying_pathways)[i], rownames(gsva_bycluster)), ]), value=colMeans(gsva_bycluster[grep(names(varying_pathways)[i], rownames(gsva_bycluster)), ])) plot_df$time <- object@cluster.metadata$Cluster_time_score From e0953487172d7edcb039d55be45da25b6a13becc Mon Sep 17 00:00:00 2001 From: Bernd Jagla Date: Mon, 26 Apr 2021 11:21:12 +0200 Subject: [PATCH 03/11] shorter labels --- R/TemporaObjClass.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/TemporaObjClass.R b/R/TemporaObjClass.R index 0bf2f3d..fe0c2b9 100644 --- a/R/TemporaObjClass.R +++ b/R/TemporaObjClass.R @@ -319,7 +319,8 @@ CreateTemporaObject <- function(exprMatrix, meta.data, timepoint_order, cluster_ if (!is.null(cluster_labels) & !is.null(cell_markers)){ clustmd$label <- paste0("Cluster ", paste(rownames(clustmd), cluster_labels, sep="-")) } else if (!is.null(cluster_labels)){ - clustmd$label <- paste0("Cluster ", paste(rownames(clustmd), cluster_labels, sep="-")) + # clustmd$label <- paste0("Cluster ", paste(rownames(clustmd), cluster_labels, sep="-")) + clustmd$label <- cluster_labels } else if (!is.null(cell_markers)) { cat("\nPerforming automated cluster labeling with GSVA...") cluster_number <- as.numeric(meta.data$Clusters) From ef57496b2d6cacc314bd1a3a6b21275440c19c68 Mon Sep 17 00:00:00 2001 From: Bernd Jagla Date: Mon, 26 Apr 2021 11:21:28 +0200 Subject: [PATCH 04/11] import %>% --- R/Visualize.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/Visualize.R b/R/Visualize.R index f97e2c9..2722ddf 100644 --- a/R/Visualize.R +++ b/R/Visualize.R @@ -14,6 +14,7 @@ #' @importFrom stats p.adjust prcomp screeplot #' @importFrom scales rescale #' @importFrom reshape2 dcast +#' @importFrom magrittr '%>%' #' #' PlotTrajectory <- function(object, layout=NULL, ...){ From 63b81014b30c64aadfdf57615dbc8bb289d46427 Mon Sep 17 00:00:00 2001 From: Bernd Jagla Date: Mon, 26 Apr 2021 11:25:59 +0200 Subject: [PATCH 05/11] optimized sugiyama plot. --- R/Visualize.R | 60 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 8 deletions(-) diff --git a/R/Visualize.R b/R/Visualize.R index 2722ddf..1d6d8bf 100644 --- a/R/Visualize.R +++ b/R/Visualize.R @@ -30,20 +30,60 @@ PlotTrajectory <- function(object, layout=NULL, ...){ edge_graph <- igraph::graph_from_data_frame(d=object@trajectory, vertices = object@cluster.metadata, directed = T) if (is.null(layout)){ - l <- igraph::layout_with_sugiyama(edge_graph, layers = object@cluster.metadata$Cluster_time_score, maxiter = 1000) + l <- igraph::layout_with_sugiyama(edge_graph, + layers = object@cluster.metadata$Cluster_time_score, + hgap = 100, + maxiter = 1000) + layer = as.integer(object@cluster.metadata$Cluster_time_score) + layerOrder = order(object@cluster.metadata$Cluster_time_score, decreasing = T) + # l$layout[layerOrder,] + # max number of spacers (each layer is treated independantly) + spacers = l$layout[,2] %>% round() %>% table %>% max %>% -1 + # distribute along x axis + # the 100 most probably is related to hgap + l$layout[layerOrder,1] = l$layout[,2] %>% + round() %>% + table() %>% + lapply( FUN = function(x){((0:spacers)[1:x] + (spacers - x + 1)/2) * 100 }) %>% + unlist + + #l$layout[,2] <- 3-(rescale(object@cluster.metadata$Cluster_time_score, to=c(0,3))) if (length(levels(object@meta.data$Timepoints)) > 9){ colours <- colorRampPalette(RColorBrewer::brewer.pal(7, "YlOrRd")) - plot.igraph(edge_graph, ylim=c(-1,1), layout = l$layout, ylab = "Inferred time", vertex.shape = "pie", vertex.pie = lapply(1:nrow(object@cluster.metadata), function(x) as.numeric(object@cluster.metadata[x,2:((length(levels(object@meta.data$Timepoints)))+1)])), - vertex.pie.color=list(colours(length(levels(object@meta.data$Timepoints)))), pie.border=list(rep("white", 4)), vertex.frame.color="white", edge.arrow.size = 0.5, edge.width = 1.5, vertex.label.family="Arial", - vertex.label.color="black", edge.lty = E(edge_graph)$type, ...) + plot.igraph(edge_graph, ylim=c(-1,1), + layout = l$layout, + ylab = "Inferred time", + vertex.shape = "pie", + vertex.pie = lapply(1:nrow(object@cluster.metadata), + function(x) as.numeric(object@cluster.metadata[x,2:((length(levels(object@meta.data$Timepoints)))+1)])), + vertex.pie.color=list(colours(length(levels(object@meta.data$Timepoints)))), + pie.border=list(rep("white", 4)), + vertex.frame.color="white", + edge.arrow.size = 0.5, + edge.width = 1.5, + vertex.label.family="Arial", + vertex.label.color="black", + edge.lty = E(edge_graph)$type, + ...) axis(side=2, at=c(-1,1), labels=c("Late","Early"), las=1) legend("topleft", legend = levels(object@meta.data$Timepoints), fill=colours, bty = "n", border="black") } else { colours <- brewer.pal(length(levels(object@meta.data$Timepoints)), "YlOrRd") - plot.igraph(edge_graph, ylim=c(-1,1), ylab = "Inferred time", layout = l$layout, vertex.shape = "pie", vertex.pie = lapply(1:nrow(object@cluster.metadata), function(x) as.numeric(object@cluster.metadata[x,2:((length(levels(object@meta.data$Timepoints)))+1)])), - vertex.pie.color=list(colours), pie.border=list(rep("white", length(levels(object@meta.data$Timepoints)))), vertex.frame.color="white", - vertex.label.family="Arial", vertex.label.color="black", edge.lty = E(edge_graph)$type,...) + plot.igraph(edge_graph, + ylim=c(-1,1), + ylab = "Inferred time", + layout = l$layout, + vertex.shape = "pie", + vertex.pie = lapply(1:nrow(object@cluster.metadata), + function(x) as.numeric(object@cluster.metadata[x,2:((length(levels(object@meta.data$Timepoints)))+1)])), + vertex.pie.color=list(colours), + pie.border=list(rep("white", length(levels(object@meta.data$Timepoints)))), + vertex.frame.color="white", + # vertex.label.family="Arial", + vertex.label.color="black", + edge.lty = E(edge_graph)$type, + ...) legend("topleft", legend = levels(object@meta.data$Timepoints), fill=colours, bty = "n", border = "black") axis(side=2, at=c(-1,1), labels=c("Late","Early"), las=1) } @@ -52,7 +92,11 @@ PlotTrajectory <- function(object, layout=NULL, ...){ } else { if (length(levels(object@meta.data$Timepoints)) > 9){ colours <- colorRampPalette(RColorBrewer::brewer.pal(7, "YlOrRd")) - plot.igraph(edge_graph, ylim=c(-1,1), layout = layout, ylab = "Inferred time", vertex.shape = "pie", vertex.pie = lapply(1:nrow(object@cluster.metadata), function(x) as.numeric(object@cluster.metadata[x,2:((length(levels(object@meta.data$Timepoints)))+1)])), + plot.igraph(edge_graph, ylim=c(-1,1), + layout = layout, + ylab = "Inferred time", + vertex.shape = "pie", + vertex.pie = lapply(1:nrow(object@cluster.metadata), function(x) as.numeric(object@cluster.metadata[x,2:((length(levels(object@meta.data$Timepoints)))+1)])), vertex.pie.color=list(colours(length(levels(object@meta.data$Timepoints)))), pie.border=list(rep("white", 4)), vertex.frame.color="white", edge.arrow.size = 0.5, edge.width = 1.5, vertex.label.family="Arial", vertex.label.color="black", edge.lty = E(edge_graph)$type, ...) axis(side=2, at=c(-1,1), labels=c("Late","Early"), las=1) From e8305b7f1e6f405a23e75637e1380f650e3ee2cc Mon Sep 17 00:00:00 2001 From: Bernd Jagla Date: Mon, 26 Apr 2021 11:30:53 +0200 Subject: [PATCH 06/11] SingleCellExperiment compatibility. --- R/dataAccess.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/dataAccess.R b/R/dataAccess.R index 5d6cbe5..927dbb7 100644 --- a/R/dataAccess.R +++ b/R/dataAccess.R @@ -142,5 +142,5 @@ suppressMessages( suppressMessages( setMethod("getMD","SingleCellExperiment", - function(x) SingleCellExperiment::colData(x)) + function(x) data.frame(SingleCellExperiment::colData(x))) ) From 23fc96c29f3abffdb4951f9813a6389aaa8fa842 Mon Sep 17 00:00:00 2001 From: Bernd Jagla Date: Mon, 26 Apr 2021 11:31:03 +0200 Subject: [PATCH 07/11] version bump --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7c1e56b..d01afc9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: Tempora Type: Package Title: Pathway-based Trajectory Inference for Time-series Single Cell RNA-Seq Data -Version: 0.1.0 +Version: 0.1.08 Author: Thinh Tran Maintainer: Thinh Tran Description: Tempora is a trajectory inference method that infers cell lineage maps from time-series scRNAseq data using pathway enrichment profiles of single cells and experimental time information. Tempora uses an information theoretic approach to build a trajectory at the cluster level based on the clusters’ pathway enrichment profiles, effectively connecting cell types and states across multiple time points. Taking advantage of the available time information, Tempora can infer the directions of all connections in a trajectory that go from early to late clusters. From 11d86d3fd697f90e92b0c646894cffeb7b7355fc Mon Sep 17 00:00:00 2001 From: Bernd Jagla Date: Mon, 10 May 2021 12:35:01 +0200 Subject: [PATCH 08/11] bj improvements --- DESCRIPTION | 3 +- NAMESPACE | 2 + R/IdentifyVaryingPWs.R | 101 ------------------------------ R/IdentifyVaryingPWsParallel.R | 101 ++++++++++++++++++++++++++++++ man/BuildTrajectory.Rd | 1 - man/IdentifyVaryingPWsParallel.Rd | 19 ++++++ 6 files changed, 124 insertions(+), 103 deletions(-) create mode 100644 R/IdentifyVaryingPWsParallel.R create mode 100644 man/IdentifyVaryingPWsParallel.Rd diff --git a/DESCRIPTION b/DESCRIPTION index d01afc9..3f46671 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,7 +19,7 @@ Imports: scales Encoding: UTF-8 LazyData: true -RoxygenNote: 7.0.2 +RoxygenNote: 7.1.1 Suggests: knitr, rmarkdown, @@ -30,6 +30,7 @@ Collate: 'CalculatePWProfiles.R' 'IdentifyCellTypes.R' 'IdentifyVaryingPWs.R' + 'IdentifyVaryingPWsParallel.R' 'PlotVaryingPWs.R' 'dataAccess.R' 'TemporaObjClass.R' diff --git a/NAMESPACE b/NAMESPACE index a8392c5..f07b470 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ export(CalculatePWProfiles) export(CreateTemporaObject) export(IdentifyCellTypes) export(IdentifyVaryingPWs) +export(IdentifyVaryingPWsParallel) export(ImportSeuratObject) export(PlotTrajectory) export(PlotVaryingPWs) @@ -44,6 +45,7 @@ importFrom(igraph,E) importFrom(igraph,graph_from_data_frame) importFrom(igraph,layout_with_sugiyama) importFrom(igraph,plot.igraph) +importFrom(magrittr,'%>%') importFrom(methods,new) importFrom(methods,validObject) importFrom(mgcv,anova.gam) diff --git a/R/IdentifyVaryingPWs.R b/R/IdentifyVaryingPWs.R index bd76823..ceaec74 100644 --- a/R/IdentifyVaryingPWs.R +++ b/R/IdentifyVaryingPWs.R @@ -77,104 +77,3 @@ IdentifyVaryingPWs <- function(object, pval_threshold=0.05){ } -# ---- -#' Calculate temporally changing pathways (parallel version) -#' -#' Identify the pathways that change over time by fitting a generalized additive model -#' @param object A Tempora object -#' @param pval_threshold P-value threshold to determine the significance of pathway enrichment over time. Default to 0.05. -#' @export -#' @importFrom mgcv gam anova.gam plot.gam -#' @importFrom methods new validObject -#' @importFrom stats p.adjust prcomp screeplot -#' @importFrom grDevices colorRampPalette -#' @importFrom graphics axis legend par points text -#' @importFrom reshape2 dcast -#' @examples \dontrun{tempora_data <- IdentifyVaryingPWsParallel(tempora_data, pval_threshold = 0.05)} -IdentifyVaryingPWsParallel <- function(object, pval_threshold=0.05){ - require(BiocParallel) - if (class(object)[1] != "Tempora"){ - stop("Not a valid Tempora object") - } - if (is.null(object@n.pcs)){ - stop("BuildTrajectory has not been run. See ?Tempora::BuildTrajectory for details") - } - if (is.null(object@cluster.pathways)){ - stop("CalculatePWProfiles has not been run. See ?Tempora::CalculatePWProfiles for details") - } - gsva_bycluster <- object@cluster.pathways - - significant_pathways <- c() - for (i in 1:object@n.pcs){ - genes_scaled <- scale(object@cluster.pathways.dr$rotation[,i]) - significant_pathways <- c(names(which(genes_scaled[,1] > 1.5 | genes_scaled[,1] < -1.5)), significant_pathways) - } - - pca_pathways <- sub("%.*", "", significant_pathways) - pca_pathways <- gsub("\\s*\\([^\\)]+\\)","",pca_pathways) - pca_pathways_cleaned <- gsub("[[:punct:]]", "", pca_pathways) - themes <- pca_pathways_cleaned - - if (DEBUG) cat("Fitting GAM models...") - - # system.time({ - p_vals <- gams <- list() - - func = function(idx, object, themes, gsva_bycluster) { - require(Tempora) - require(Matrix) - require(BiocGenerics) - # cat(file = stderr(), paste(idx, "\n")) - grp = BiocGenerics::grep(themes[idx], rownames(gsva_bycluster), ignore.case = T) - crit = length(grp) - if(crit == 0) { - return(list(1, NA)) - } - if (crit > 1){ - plot_df <- data.frame( - cluster=colnames(gsva_bycluster[grp, ]), - value=Matrix::colMeans(gsva_bycluster[grp, ], - na.rm=T)) - } else if (crit == 1){ - plot_df <- data.frame(cluster=names(gsva_bycluster[grp, ]), - value=gsva_bycluster[grp, ]) - } else { - #should not happen - return(list(1, NA)) - } - plot_df$time <- object@cluster.metadata$Cluster_time_score - gams <- mgcv::gam(value ~ s(time, k=3, bs='cr'), data=plot_df) - temp_anova <- mgcv::anova.gam(gams) - p_vals = temp_anova$s.pv - list(p_vals, gams) - } - - p_valsNew <- bplapply(1:length(themes), function(x) { func(x, object, themes, gsva_bycluster) }) - for (idx in 1:length(p_valsNew)){ - p_vals[[idx]] = p_valsNew[[idx]][[1]] - gams[[idx]] = p_valsNew[[idx]][[2]] - } - names(p_vals) <- names(gams) <- themes - - pval_threshold = pval_threshold - p_vals_adj <- p.adjust(unlist(p_vals[which(unlist(p_vals) > 0)]), method = "BH") - varying_pathways <- p_vals_adj[which(p_vals_adj < pval_threshold)] - varying_pathways <- varying_pathways[!duplicated(names(varying_pathways))] - - if (length(varying_pathways)==0){ - cat("No temporally varying pathways detected. Please try running IdentifyVaryingPWs with a more relaxed p-value cutoff.") - #eventhough the function was not successful return the object because in the vignette - # this function call sets the original object to what is returned and if it is null - # you loose all the processing you have done until now. - if (!is.null(getDefaultReactiveDomain())) { - showNotification("No temporally varying pathways detected.", id = "temporaIdentifyVaryingPWs2", duration = 20) - } - return(object) - } else { - object@varying.pws <- varying_pathways - object@gams <- gams - return(object) - } - # to = object -} - diff --git a/R/IdentifyVaryingPWsParallel.R b/R/IdentifyVaryingPWsParallel.R new file mode 100644 index 0000000..b1868a1 --- /dev/null +++ b/R/IdentifyVaryingPWsParallel.R @@ -0,0 +1,101 @@ + +# ---- +#' Calculate temporally changing pathways (parallel version) +#' +#' Identify the pathways that change over time by fitting a generalized additive model +#' @param object A Tempora object +#' @param pval_threshold P-value threshold to determine the significance of pathway enrichment over time. Default to 0.05. +#' @export +#' @importFrom mgcv gam anova.gam plot.gam +#' @importFrom methods new validObject +#' @importFrom stats p.adjust prcomp screeplot +#' @importFrom grDevices colorRampPalette +#' @importFrom graphics axis legend par points text +#' @importFrom reshape2 dcast +#' @examples \dontrun{tempora_data <- IdentifyVaryingPWsParallel(tempora_data, pval_threshold = 0.05)} +IdentifyVaryingPWsParallel <- function(object, pval_threshold=0.05){ + require(BiocParallel) + if (class(object)[1] != "Tempora"){ + stop("Not a valid Tempora object") + } + if (is.null(object@n.pcs)){ + stop("BuildTrajectory has not been run. See ?Tempora::BuildTrajectory for details") + } + if (is.null(object@cluster.pathways)){ + stop("CalculatePWProfiles has not been run. See ?Tempora::CalculatePWProfiles for details") + } + gsva_bycluster <- object@cluster.pathways + + significant_pathways <- c() + for (i in 1:object@n.pcs){ + genes_scaled <- scale(object@cluster.pathways.dr$rotation[,i]) + significant_pathways <- c(names(which(genes_scaled[,1] > 1.5 | genes_scaled[,1] < -1.5)), significant_pathways) + } + + pca_pathways <- sub("%.*", "", significant_pathways) + pca_pathways <- gsub("\\s*\\([^\\)]+\\)","",pca_pathways) + pca_pathways_cleaned <- gsub("[[:punct:]]", "", pca_pathways) + themes <- pca_pathways_cleaned + + if (DEBUG) cat("Fitting GAM models...") + + # system.time({ + p_vals <- gams <- list() + + func = function(idx, object, themes, gsva_bycluster) { + require(Tempora) + require(Matrix) + require(BiocGenerics) + # cat(file = stderr(), paste(idx, "\n")) + grp = BiocGenerics::grep(themes[idx], rownames(gsva_bycluster), ignore.case = T) + crit = length(grp) + if(crit == 0) { + return(list(1, NA)) + } + if (crit > 1){ + plot_df <- data.frame( + cluster=colnames(gsva_bycluster[grp, ]), + value=Matrix::colMeans(gsva_bycluster[grp, ], + na.rm=T)) + } else if (crit == 1){ + plot_df <- data.frame(cluster=names(gsva_bycluster[grp, ]), + value=gsva_bycluster[grp, ]) + } else { + #should not happen + return(list(1, NA)) + } + plot_df$time <- object@cluster.metadata$Cluster_time_score + gams <- mgcv::gam(value ~ s(time, k=3, bs='cr'), data=plot_df) + temp_anova <- mgcv::anova.gam(gams) + p_vals = temp_anova$s.pv + list(p_vals, gams) + } + + p_valsNew <- bplapply(1:length(themes), function(x) { func(x, object, themes, gsva_bycluster) }) + for (idx in 1:length(p_valsNew)){ + p_vals[[idx]] = p_valsNew[[idx]][[1]] + gams[[idx]] = p_valsNew[[idx]][[2]] + } + names(p_vals) <- names(gams) <- themes + + pval_threshold = pval_threshold + p_vals_adj <- p.adjust(unlist(p_vals[which(unlist(p_vals) > 0)]), method = "BH") + varying_pathways <- p_vals_adj[which(p_vals_adj < pval_threshold)] + varying_pathways <- varying_pathways[!duplicated(names(varying_pathways))] + + if (length(varying_pathways)==0){ + cat("No temporally varying pathways detected. Please try running IdentifyVaryingPWs with a more relaxed p-value cutoff.") + #eventhough the function was not successful return the object because in the vignette + # this function call sets the original object to what is returned and if it is null + # you loose all the processing you have done until now. + if (!is.null(getDefaultReactiveDomain())) { + showNotification("No temporally varying pathways detected.", id = "temporaIdentifyVaryingPWs2", duration = 20) + } + return(object) + } else { + object@varying.pws <- varying_pathways + object@gams <- gams + return(object) + } + # to = object +} diff --git a/man/BuildTrajectory.Rd b/man/BuildTrajectory.Rd index 56a464c..40350c1 100644 --- a/man/BuildTrajectory.Rd +++ b/man/BuildTrajectory.Rd @@ -15,7 +15,6 @@ BuildTrajectory(object, n_pcs, difference_threshold = 0.01, loadings = 0.4) \item{loadings}{Threshold of PCA loadings for pathways to be used in trajectory construction. The higher the loading, the more the pathway contributes to a principal component. Default at 0.4.} } - \description{ Build the information-based cluster-cluster network using reduced-dimensionality pathway enrichment profiles of all clusters. This network connects related cell types and states across multiple time points. Taking advantage of the available time information, Tempora also infers the directions of all connections in a trajectory that go from early to late clusters. diff --git a/man/IdentifyVaryingPWsParallel.Rd b/man/IdentifyVaryingPWsParallel.Rd new file mode 100644 index 0000000..8fff046 --- /dev/null +++ b/man/IdentifyVaryingPWsParallel.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/IdentifyVaryingPWsParallel.R +\name{IdentifyVaryingPWsParallel} +\alias{IdentifyVaryingPWsParallel} +\title{Calculate temporally changing pathways (parallel version)} +\usage{ +IdentifyVaryingPWsParallel(object, pval_threshold = 0.05) +} +\arguments{ +\item{object}{A Tempora object} + +\item{pval_threshold}{P-value threshold to determine the significance of pathway enrichment over time. Default to 0.05.} +} +\description{ +Identify the pathways that change over time by fitting a generalized additive model +} +\examples{ +\dontrun{tempora_data <- IdentifyVaryingPWsParallel(tempora_data, pval_threshold = 0.05)} +} From 10e30e3ab4409d48f4eb7189470cebf430c1af93 Mon Sep 17 00:00:00 2001 From: baj12 Date: Tue, 11 May 2021 14:21:57 +0200 Subject: [PATCH 09/11] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3f46671..bbc5de4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: Tempora Type: Package Title: Pathway-based Trajectory Inference for Time-series Single Cell RNA-Seq Data -Version: 0.1.08 +Version: 0.1.09 Author: Thinh Tran Maintainer: Thinh Tran Description: Tempora is a trajectory inference method that infers cell lineage maps from time-series scRNAseq data using pathway enrichment profiles of single cells and experimental time information. Tempora uses an information theoretic approach to build a trajectory at the cluster level based on the clusters’ pathway enrichment profiles, effectively connecting cell types and states across multiple time points. Taking advantage of the available time information, Tempora can infer the directions of all connections in a trajectory that go from early to late clusters. From 38b8c9b92dd0094cfd71f9cc2bddd6a1f8547800 Mon Sep 17 00:00:00 2001 From: Bernd Jagla Date: Tue, 18 May 2021 15:34:12 +0200 Subject: [PATCH 10/11] test --- R/CalculatePWProfiles.R | 6 ++++++ R/IdentifyVaryingPWs.R | 18 +++++++++++++----- R/IdentifyVaryingPWsParallel.R | 17 ++++++++++++----- R/PlotVaryingPWs.R | 5 ++--- 4 files changed, 33 insertions(+), 13 deletions(-) diff --git a/R/CalculatePWProfiles.R b/R/CalculatePWProfiles.R index 0354d12..be1de84 100644 --- a/R/CalculatePWProfiles.R +++ b/R/CalculatePWProfiles.R @@ -37,6 +37,12 @@ CalculatePWProfiles <- function(object, gmt_path, method="gsva", min.sz=5, max.s cat("\nCalculating cluster pathway enrichment profiles...\n") gsva_bycluster <- GSVA::gsva(as.matrix(exprMatrix_bycluster), pathwaygmt, method=method, min.sz=min.sz, max.sz=max.sz, parallel.sz=parallel.sz) + oldrownames =rownames(gsva_bycluster) + newrownames <- sub("%.*", "", oldrownames) + newrownames <- gsub("\\s*\\([^\\)]+\\)","",newrownames) + newrownames <- gsub("[[:punct:]]", "_", newrownames) + rownames(gsva_bycluster) <- newrownames + colnames(gsva_bycluster) <- colnames(exprMatrix_bycluster) object@cluster.pathways <- gsva_bycluster diff --git a/R/IdentifyVaryingPWs.R b/R/IdentifyVaryingPWs.R index ceaec74..e6b0f3f 100644 --- a/R/IdentifyVaryingPWs.R +++ b/R/IdentifyVaryingPWs.R @@ -28,13 +28,20 @@ IdentifyVaryingPWs <- function(object, pval_threshold=0.05){ significant_pathways <- c() for (i in 1:object@n.pcs){ genes_scaled <- scale(object@cluster.pathways.dr$rotation[,i]) - significant_pathways <- c(names(which(genes_scaled[,1] > 1.5 | genes_scaled[,1] < -1.5)), significant_pathways) + significant_pathways <- c(names(which(genes_scaled[,1] > 1.0 | genes_scaled[,1] < -1.0)), significant_pathways) } - pca_pathways <- sub("%.*", "", significant_pathways) - pca_pathways <- gsub("\\s*\\([^\\)]+\\)","",pca_pathways) - pca_pathways_cleaned <- gsub("[[:punct:]]", "", pca_pathways) - themes <- pca_pathways_cleaned + # pca_pathways <- sub("%.*", "", significant_pathways) + # pca_pathways <- gsub("\\s*\\([^\\)]+\\)","",pca_pathways) + # pca_pathways_cleaned <- gsub("[[:punct:]]", "_", pca_pathways) + # themes <- pca_pathways_cleaned + themes <- significant_pathways + + # oldrownames =rownames(gsva_bycluster) + # newrownames <- sub("%.*", "", oldrownames) + # newrownames <- gsub("\\s*\\([^\\)]+\\)","",newrownames) + # newrownames <- gsub("[[:punct:]]", "_", newrownames) + # rownames(gsva_bycluster) <- newrownames cat("Fitting GAM models...") @@ -51,6 +58,7 @@ IdentifyVaryingPWs <- function(object, pval_threshold=0.05){ } else if (length(grep(themes[i], rownames(gsva_bycluster))) == 1){ plot_df <- data.frame(cluster=names(gsva_bycluster[grep(themes[i], rownames(gsva_bycluster)), ]), value=gsva_bycluster[grep(themes[i], rownames(gsva_bycluster)), ]) } plot_df$time <- object@cluster.metadata$Cluster_time_score + # gam: Generalized additive models with integrated smoothness estimation gams[[i]] <- mgcv::gam(value ~ s(time, k=3, bs='cr'), data=plot_df) temp_anova <- mgcv::anova.gam(gams[[i]]) p_vals[[i]] <- temp_anova$s.pv diff --git a/R/IdentifyVaryingPWsParallel.R b/R/IdentifyVaryingPWsParallel.R index b1868a1..499ee25 100644 --- a/R/IdentifyVaryingPWsParallel.R +++ b/R/IdentifyVaryingPWsParallel.R @@ -29,13 +29,20 @@ IdentifyVaryingPWsParallel <- function(object, pval_threshold=0.05){ significant_pathways <- c() for (i in 1:object@n.pcs){ genes_scaled <- scale(object@cluster.pathways.dr$rotation[,i]) - significant_pathways <- c(names(which(genes_scaled[,1] > 1.5 | genes_scaled[,1] < -1.5)), significant_pathways) + significant_pathways <- c(names(which(genes_scaled[,1] > 1.0 | genes_scaled[,1] < -1.0)), significant_pathways) } - pca_pathways <- sub("%.*", "", significant_pathways) - pca_pathways <- gsub("\\s*\\([^\\)]+\\)","",pca_pathways) - pca_pathways_cleaned <- gsub("[[:punct:]]", "", pca_pathways) - themes <- pca_pathways_cleaned + # pca_pathways <- sub("%.*", "", significant_pathways) + # pca_pathways <- gsub("\\s*\\([^\\)]+\\)","",pca_pathways) + # pca_pathways_cleaned <- gsub("[[:punct:]]", "_", pca_pathways) + # themes <- pca_pathways_cleaned + themes <- significant_pathways + + # oldrownames =rownames(gsva_bycluster) + # newrownames <- sub("%.*", "", oldrownames) + # newrownames <- gsub("\\s*\\([^\\)]+\\)","",newrownames) + # newrownames <- gsub("[[:punct:]]", "_", newrownames) + # rownames(gsva_bycluster) <- newrownames if (DEBUG) cat("Fitting GAM models...") diff --git a/R/PlotVaryingPWs.R b/R/PlotVaryingPWs.R index 6d29d0e..bfdf37d 100644 --- a/R/PlotVaryingPWs.R +++ b/R/PlotVaryingPWs.R @@ -29,11 +29,10 @@ for (i in order(varying_pathways)){ if (length(grep(names(varying_pathways)[i], rownames(gsva_bycluster))) > 1){ plot_df <- data.frame(cluster=colnames(gsva_bycluster[grep(names(varying_pathways)[i], rownames(gsva_bycluster)), ]), value=colMeans(gsva_bycluster[grep(names(varying_pathways)[i], rownames(gsva_bycluster)), ])) plot_df$time <- object@cluster.metadata$Cluster_time_score - } - else if (length(grep(names(varying_pathways)[i], rownames(gsva_bycluster))) == 1) { + } else if (length(grep(names(varying_pathways)[i], rownames(gsva_bycluster))) == 1) { plot_df <- data.frame(cluster=names(gsva_bycluster[grep(names(varying_pathways)[i], rownames(gsva_bycluster)), ]), value=gsva_bycluster[grep(names(varying_pathways)[i], rownames(gsva_bycluster)), ]) plot_df$time <- object@cluster.metadata$Cluster_time_score - } + } else {return(NULL)} id <- which(names(gams)==names(varying_pathways)[i]) mgcv::plot.gam(gams[[id[1]]], main = paste0(names(varying_pathways)[i]), xlab = "Inferred time", ylab="Pathway expression level", bty="l", cex.main = 1, xaxt = "n", shade= F, se=3, scheme=1) From 9bf0141fa0eb816b7cce326271d7addb209064e5 Mon Sep 17 00:00:00 2001 From: Bernd Jagla Date: Tue, 18 May 2021 15:35:08 +0200 Subject: [PATCH 11/11] bump --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index bbc5de4..1c237fd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: Tempora Type: Package Title: Pathway-based Trajectory Inference for Time-series Single Cell RNA-Seq Data -Version: 0.1.09 +Version: 0.1.10 Author: Thinh Tran Maintainer: Thinh Tran Description: Tempora is a trajectory inference method that infers cell lineage maps from time-series scRNAseq data using pathway enrichment profiles of single cells and experimental time information. Tempora uses an information theoretic approach to build a trajectory at the cluster level based on the clusters’ pathway enrichment profiles, effectively connecting cell types and states across multiple time points. Taking advantage of the available time information, Tempora can infer the directions of all connections in a trajectory that go from early to late clusters.