added check for if underscore is in celltype name

csangara · csangara · commit 50f400181c01 · 2024-05-29T16:25:22.000+02:00
diff --git a/R/prioritization.R b/R/prioritization.R
@@ -125,14 +125,26 @@ get_exprs_avg = function(seurat_obj, celltype_colname,
 
   # Subset seurat object
   if (!is.null(condition_oi)) {
-    seurat_obj = seurat_obj[,seurat_obj[[condition_colname]] == condition_oi]
+    seurat_obj <- seurat_obj[,seurat_obj[[condition_colname]] == condition_oi]
   }
 
-  seurat_obj <- NormalizeData(seurat_obj, verbose = FALSE)
+  celltypes <- unique(seurat_obj[[celltype_colname, drop=TRUE]])
+
   avg_celltype <- AverageExpression(seurat_obj, group.by = celltype_colname, assays = assay_oi, ...) %>%
                     .[[assay_oi]] %>% data.frame(check.names=FALSE) %>% rownames_to_column("gene") %>%
                     pivot_longer(!gene, names_to = "cluster_id", values_to = "avg_expr")
 
+  # If any celltypes had an underscore in their name
+  if (any(grepl("_", celltypes))){
+    # Map the new names and the original names
+    # This is so it works in the case the original name also already has a hyphen in in
+    mapping <- data.frame(orig_name = sort(celltypes), cluster_id = sort(unique(avg_celltype$cluster_id)))
+    avg_celltype <- avg_celltype %>% left_join(mapping, by = "cluster_id") %>%
+                    dplyr::select(gene, cluster_id = orig_name, avg_expr)
+
+
+  }
+
   return (avg_celltype)
 
 }
diff --git a/tests/testthat/test-prioritization.R b/tests/testthat/test-prioritization.R
@@ -172,13 +172,22 @@ test_that("Prioritization scheme works", {
                                      condition_colname = "aggregate", condition_oi = condition_oi,
                                      features = feature_list)
 
-      # Calculate condition specificity - only for datasets with two conditions!
-      condition_markers <- FindMarkers(object = seurat_obj_test, ident.1 = condition_oi, ident.2 = condition_reference,
+    # Test cell type names conversion for Seurat object
+    # Replace space with underscore
+    seurat_obj_test$celltype2 <- gsub(" ", "_", seurat_obj_test$celltype)
+    new_celltypes <- suppressWarnings(get_exprs_avg(seurat_obj_test, "celltype2") %>% pull(cluster_id) %>% unique())
+    expect_equal(new_celltypes, sort(unique(seurat_obj_test$celltype2)))
+
+    # Replace CD8 T with CD8_T-test & replace Mono with Mono-test
+    seurat_obj_test$celltype2 <- gsub("CD8 T", "CD8_T-test", seurat_obj_test$celltype) %>% gsub("Mono", "Mono-test", .)
+    new_celltypes <- suppressWarnings(get_exprs_avg(seurat_obj_test, "celltype2") %>% pull(cluster_id) %>% unique())
+    expect_equal(new_celltypes, sort(unique(seurat_obj_test$celltype2)))
+
+    # Calculate condition specificity - only for datasets with two conditions!
+    condition_markers <- FindMarkers(object = seurat_obj_test, ident.1 = condition_oi, ident.2 = condition_reference,
                                        group.by = "aggregate", min.pct = 0, logfc.threshold = 0,
                                        features = feature_list) %>% rownames_to_column("gene")
 
-    # TODO: TESTS FOR PROCESS_TABLE_TO_IC
-
     # Combine DE of senders and receivers -> used for prioritization
     processed_DE_table <- process_table_to_ic(DE_table, table_type = "celltype_DE", lr_network,
                                             senders_oi = sender_celltypes, receivers_oi = receiver)