Skip to content

Commit a38bd23

Browse files
committed
clean up unused functions 2
1 parent 95981df commit a38bd23

File tree

105 files changed

+8600
-82
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

105 files changed

+8600
-82
lines changed

NAMESPACE

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ export(pos_neg_marker)
5353
export(pos_neg_select)
5454
export(ref_feature_select)
5555
export(ref_marker_select)
56-
export(remove_background)
5756
export(reverse_marker_matrix)
5857
export(run_gsea)
5958
export(seurat_meta)

R/common_dplyr.R

Lines changed: 384 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,384 @@
1+
#' get best calls for each cluster
2+
#'
3+
#' @param cor_mat input similarity matrix
4+
#' @param metadata input metadata with tsne or umap coordinates and cluster ids
5+
#' @param cluster_col metadata column, can be cluster or cellid
6+
#' @param collapse_to_cluster if a column name is provided, takes the most
7+
#' frequent call of entire cluster to color in plot
8+
#' @param threshold minimum correlation coefficent cutoff for calling clusters
9+
#' @param rename_prefix prefix to add to type and r column names
10+
#' @param carry_r whether to include threshold in unassigned names
11+
#' @return dataframe of cluster, new ident, and r info
12+
#' @examples
13+
#' res <- clustify(
14+
#' input = pbmc_matrix_small,
15+
#' metadata = pbmc_meta,
16+
#' cluster_col = "classified",
17+
#' ref_mat = cbmc_ref
18+
#' )
19+
#'
20+
#' cor_to_call(res)
21+
#' @export
22+
cor_to_call <- function(cor_mat,
23+
metadata = NULL,
24+
cluster_col = "cluster",
25+
collapse_to_cluster = FALSE,
26+
threshold = 0,
27+
rename_prefix = NULL,
28+
carry_r = FALSE) {
29+
correlation_matrix <- cor_mat
30+
if (threshold == "auto") {
31+
threshold <- round(0.75 * max(correlation_matrix), 2)
32+
message(paste0("using threshold of ", threshold))
33+
}
34+
correlation_matrix[is.na(correlation_matrix)] <- 0
35+
df_temp <-
36+
tibble::as_tibble(correlation_matrix, rownames = cluster_col)
37+
df_temp <- tidyr::gather(
38+
df_temp,
39+
key = !!dplyr::sym("type"),
40+
value = !!dplyr::sym("r"), -!!cluster_col
41+
)
42+
43+
if (carry_r) {
44+
df_temp[["type"]][df_temp$r < threshold] <-
45+
paste0("r<", threshold, ", unassigned")
46+
} else {
47+
df_temp[["type"]][df_temp$r < threshold] <- "unassigned"
48+
}
49+
50+
df_temp <-
51+
dplyr::top_n(dplyr::group_by_at(df_temp, 1), 1, !!dplyr::sym("r"))
52+
if (nrow(df_temp) != nrow(correlation_matrix)) {
53+
clash <- dplyr::summarize(dplyr::group_by_at(df_temp, 1), n = n())
54+
clash <- dplyr::filter(clash, n > 1)
55+
clash <- dplyr::pull(clash, 1)
56+
df_temp[lapply(
57+
df_temp[, 1],
58+
FUN = function(x) {
59+
x %in% clash
60+
}
61+
)[[1]], 2] <-
62+
paste0(df_temp[["type"]][lapply(
63+
df_temp[, 1],
64+
FUN = function(x) {
65+
x %in% clash
66+
}
67+
)[[1]]], "-CLASH!")
68+
df_temp2 <- df_temp
69+
df_temp_full <-
70+
dplyr::distinct_at(df_temp,
71+
vars(-!!dplyr::sym("type")),
72+
.keep_all = TRUE)
73+
} else {
74+
df_temp_full <- df_temp
75+
}
76+
77+
if (collapse_to_cluster != FALSE) {
78+
if (!(cluster_col %in% colnames(metadata))) {
79+
metadata <- tibble::as_tibble(metadata, rownames = "rn")
80+
}
81+
df_temp_full <-
82+
collapse_to_cluster(
83+
df_temp_full,
84+
metadata = metadata,
85+
cluster_col = cluster_col,
86+
threshold = threshold
87+
)
88+
}
89+
90+
if (!is.null(rename_prefix)) {
91+
if (collapse_to_cluster) {
92+
eval(parse(
93+
text = paste0(
94+
"df_temp_full <- dplyr::rename(df_temp_full, ",
95+
paste0(rename_prefix, "_type"),
96+
" = type, ",
97+
paste0(rename_prefix, "_sum"),
98+
" = sum, ",
99+
paste0(rename_prefix, "_n"),
100+
" = n)"
101+
)
102+
))
103+
} else {
104+
eval(parse(
105+
text = paste0(
106+
"df_temp_full <- dplyr::rename(df_temp_full, ",
107+
paste0(rename_prefix, "_type"),
108+
" = type, ",
109+
paste0(rename_prefix, "_r"),
110+
" = r)"
111+
)
112+
))
113+
}
114+
}
115+
df_temp_full
116+
}
117+
118+
#' Insert called ident results into metadata
119+
#'
120+
#' @param res dataframe of idents, such as output of cor_to_call
121+
#' @param metadata input metadata with tsne or umap coordinates and cluster ids
122+
#' @param cluster_col metadata column, can be cluster or cellid
123+
#' @param per_cell whether the res dataframe is listed per cell
124+
#' @param rename_prefix prefix to add to type and r column names
125+
#' @return new metadata with added columns
126+
#' @examples
127+
#' \donttest{
128+
#' res <- clustify(
129+
#' input = pbmc_matrix_small,
130+
#' metadata = pbmc_meta,
131+
#' cluster_col = "classified",
132+
#' ref_mat = cbmc_ref
133+
#' )
134+
#'
135+
#' res2 <- cor_to_call(res, cluster_col = "classified")
136+
#'
137+
#' call_to_metadata(
138+
#' res = res2,
139+
#' metadata = pbmc_meta,
140+
#' cluster_col = "classified",
141+
#' rename_prefix = "assigned"
142+
#' )
143+
#' }
144+
#' @export
145+
call_to_metadata <- function(res,
146+
metadata,
147+
cluster_col,
148+
per_cell = FALSE,
149+
rename_prefix = NULL) {
150+
temp_col_id <- get_unique_column(metadata, "rn")
151+
152+
df_temp <- res
153+
if (!is.null(rename_prefix)) {
154+
eval(parse(
155+
text = paste0(
156+
"df_temp <- dplyr::rename(df_temp, ",
157+
paste0(rename_prefix, "_type"),
158+
" = type, ",
159+
paste0(rename_prefix, "_r"),
160+
" = r)"
161+
)
162+
))
163+
}
164+
165+
if (per_cell == FALSE) {
166+
if (!(cluster_col %in% colnames(metadata))) {
167+
stop("cluster_col is not a column of metadata",
168+
call. = FALSE)
169+
}
170+
171+
if (!(cluster_col %in% colnames(res))) {
172+
stop("cluster_col is not a column ",
173+
"of called cell type dataframe",
174+
call. = FALSE
175+
)
176+
}
177+
178+
if (!(all(unique(df_temp[[cluster_col]]) %in%
179+
unique(metadata[[cluster_col]])))) {
180+
stop("cluster_col from clustify step and",
181+
"joining to metadata step are not the same",
182+
call. = FALSE
183+
)
184+
}
185+
186+
df_temp_full <-
187+
suppressWarnings(
188+
dplyr::left_join(
189+
tibble::rownames_to_column(
190+
metadata,
191+
temp_col_id
192+
),
193+
df_temp,
194+
by = cluster_col,
195+
suffix = c("", ".clustify")
196+
)
197+
)
198+
199+
df_temp_full <- tibble::column_to_rownames(
200+
df_temp_full,
201+
temp_col_id
202+
)
203+
} else {
204+
colnames(df_temp)[1] <- cluster_col
205+
names(cluster_col) <- temp_col_id
206+
207+
df_temp_full <-
208+
suppressWarnings(
209+
dplyr::left_join(
210+
tibble::rownames_to_column(
211+
metadata,
212+
temp_col_id
213+
),
214+
df_temp,
215+
by = cluster_col,
216+
suffix = c("", ".clustify")
217+
)
218+
)
219+
220+
df_temp_full <-
221+
tibble::column_to_rownames(df_temp_full,
222+
temp_col_id)
223+
}
224+
df_temp_full
225+
}
226+
227+
#' From per-cell calls, take highest freq call in each cluster
228+
#'
229+
#' @param res dataframe of idents, such as output of cor_to_call
230+
#' @param metadata input metadata with tsne or umap coordinates and cluster ids
231+
#' @param cluster_col metadata column for cluster
232+
#' @param threshold minimum correlation coefficent cutoff for calling clusters
233+
#' @return new metadata with added columns
234+
#' @examples
235+
#' res <- clustify(
236+
#' input = pbmc_matrix_small,
237+
#' metadata = pbmc_meta,
238+
#' cluster_col = "classified",
239+
#' ref_mat = cbmc_ref,
240+
#' per_cell = TRUE
241+
#' )
242+
#'
243+
#' res2 <- cor_to_call(res)
244+
#'
245+
#' collapse_to_cluster(
246+
#' res2,
247+
#' metadata = pbmc_meta,
248+
#' cluster_col = "classified",
249+
#' threshold = 0
250+
#' )
251+
#' @export
252+
collapse_to_cluster <- function(res,
253+
metadata,
254+
cluster_col,
255+
threshold = 0) {
256+
res_temp <- res
257+
colnames(res_temp)[1] <- "rn"
258+
df_temp_full <- as.data.frame(res_temp)
259+
df_temp_full <-
260+
dplyr::mutate(df_temp_full,
261+
cluster = metadata[[cluster_col]])
262+
df_temp_full2 <-
263+
dplyr::group_by(df_temp_full,
264+
!!dplyr::sym("type"),
265+
!!dplyr::sym("cluster"))
266+
df_temp_full2 <-
267+
dplyr::summarize(df_temp_full2,
268+
sum = sum(!!dplyr::sym("r")),
269+
n = n()
270+
)
271+
df_temp_full2 <-
272+
dplyr::group_by(df_temp_full2,
273+
!!dplyr::sym("cluster"))
274+
df_temp_full2 <-
275+
dplyr::arrange(df_temp_full2,
276+
desc(n),
277+
desc(sum))
278+
df_temp_full2 <-
279+
dplyr::filter(df_temp_full2,
280+
!!dplyr::sym("type") != paste0("r<",
281+
threshold,
282+
", unassigned"))
283+
df_temp_full2 <- dplyr::slice(df_temp_full2, 1)
284+
df_temp_full2 <-
285+
dplyr::rename(df_temp_full2,
286+
!!cluster_col := cluster)
287+
dplyr::select(df_temp_full2, 2, 1,
288+
tidyr::everything())
289+
}
290+
291+
#' get ranked calls for each cluster
292+
#'
293+
#' @param cor_mat input similarity matrix
294+
#' @param metadata input metadata with tsne or umap coordinates
295+
#' and cluster ids
296+
#' @param cluster_col metadata column, can be cluster or cellid
297+
#' @param collapse_to_cluster if a column name is provided, takes the most
298+
#' frequent call of entire cluster to color in plot
299+
#' @param threshold minimum correlation coefficent cutoff for calling clusters
300+
#' @param rename_prefix prefix to add to type and r column names
301+
#' @param top_n the number of ranks to keep, the rest will be set to 100
302+
#' @return dataframe of cluster, new ident, and r info
303+
#' @examples
304+
#' res <- clustify(
305+
#' input = pbmc_matrix_small,
306+
#' metadata = pbmc_meta,
307+
#' cluster_col = "classified",
308+
#' ref_mat = cbmc_ref
309+
#' )
310+
#'
311+
#' cor_to_call_rank(res, threshold = "auto")
312+
#' @export
313+
cor_to_call_rank <- function(cor_mat,
314+
metadata = NULL,
315+
cluster_col = "cluster",
316+
collapse_to_cluster = FALSE,
317+
threshold = 0,
318+
rename_prefix = NULL,
319+
top_n = NULL) {
320+
correlation_matrix <- cor_mat
321+
if (threshold == "auto") {
322+
threshold <- round(0.75 * max(correlation_matrix), 2)
323+
message(paste0("using threshold of ", threshold))
324+
}
325+
df_temp <- tibble::as_tibble(correlation_matrix,
326+
rownames = cluster_col
327+
)
328+
df_temp <-
329+
tidyr::gather(
330+
df_temp,
331+
key = !!dplyr::sym("type"),
332+
value = !!dplyr::sym("r"), -!!cluster_col
333+
)
334+
df_temp <-
335+
dplyr::mutate(dplyr::group_by_at(df_temp, 1),
336+
rank = dplyr::dense_rank(desc(!!dplyr::sym("r"))))
337+
df_temp[["rank"]][df_temp$r < threshold] <- 100
338+
if (!(is.null(top_n))) {
339+
df_temp <- dplyr::filter(df_temp, rank <= top_n)
340+
}
341+
df_temp_full <- df_temp
342+
if (!is.null(rename_prefix)) {
343+
eval(parse(
344+
text = paste0(
345+
"df_temp_full <- dplyr::rename(df_temp_full, ",
346+
paste0(rename_prefix, "_type"),
347+
" = type, ",
348+
paste0(rename_prefix, "_r"),
349+
" = r)"
350+
)
351+
))
352+
}
353+
df_temp_full
354+
}
355+
356+
#' get concensus calls for a list of cor calls
357+
#'
358+
#' @param list_of_res list of call dataframes from cor_to_call_rank
359+
#' @return dataframe of cluster, new ident, and mean rank
360+
#' @examples
361+
#' res <- clustify(
362+
#' input = pbmc_matrix_small,
363+
#' metadata = pbmc_meta,
364+
#' cluster_col = "classified",
365+
#' ref_mat = cbmc_ref
366+
#' )
367+
#'
368+
#' res2 <- cor_to_call_rank(res, threshold = "auto")
369+
#' res3 <- cor_to_call_rank(res)
370+
#' call_consensus(list(res2, res3))
371+
#' @export
372+
call_consensus <- function(list_of_res) {
373+
374+
res <- do.call("rbind", list_of_res)
375+
df_temp <- dplyr::group_by_at(res, c(1, 2))
376+
df_temp <- dplyr::summarize_at(df_temp, 2, mean)
377+
df_temp <- dplyr::top_n(df_temp, -1)
378+
df_temp <- dplyr::group_by_at(df_temp, c(1, 3))
379+
df_temp <-
380+
dplyr::summarize_at(df_temp, 1, function(x) {
381+
stringr::str_c(x, collapse = "__")
382+
})
383+
df_temp <- dplyr::select(df_temp, c(1, 3, 2))
384+
}

R/common_dplyr.R.REMOVED.git-id

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)