Created
January 2, 2023 15:11
-
-
Save lgatto/e66befc2331d6cb616b2e9811f831edc to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(scp) | |
## Read the data table | |
x <- read.delim("data/02ng/Task2-SearchTask/AllQuantifiedPeptides.tsv") |> | |
select(-18) |> ## column 18 is all NAs | |
janitor::clean_names() |> | |
rename_with(~ gsub("intensity_ex_auto_", "int_", .x, fixed = TRUE)) |> | |
rename_with(~ gsub("detection_type_ex_auto_", "det_", .x, fixed = TRUE)) | |
## topN aggregating function | |
topN <- function(x, n = 3L) { | |
## Keep n rows with highest sum if there are > n rows available | |
if (nrow(x) > n) { | |
o <- order(rowSums(x, na.rm = TRUE), | |
decreasing = TRUE, na.last = TRUE) | |
x <- x[head(o, n), ] | |
} | |
## Aggregate by sum on top N features | |
colSums(x, na.rm = TRUE) | |
} | |
## Create a QFeatures object, replace 0s by NA and aggregate peptides | |
## into proteins using sums, medians and sums on top 3. | |
qf <- readQFeatures(x, fnames = 1, | |
ecol = grep("^int", names(x)), | |
name = "peptides") |> | |
zeroIsNA(i = "peptides") |> | |
aggregateFeatures(i = "peptides", | |
fcol = "protein_groups", | |
name = "sums", | |
fun = colSums, | |
na.rm = TRUE) |> | |
aggregateFeatures(i = "peptides", | |
fcol = "protein_groups", | |
name = "medians", | |
fun = colMedians, | |
na.rm = TRUE) |> | |
aggregateFeatures(i = "peptides", | |
fcol = "protein_groups", | |
name = "top3", | |
fun = topN) | |
## Visualise peptide and protein intensities for three proteins of | |
## interest | |
qf |> | |
filterFeatures(~!is.na(gene_names)) |> | |
filterFeatures(~gene_names %in% c("ACO2", "RRS1", "PGM1")) |> | |
longFormat(rowvars = "gene_names") |> | |
as_tibble() |> | |
ggplot(aes(x = colname, y = value, colour = rowname)) + | |
geom_point() + | |
geom_line(aes(group = rowname)) + | |
facet_grid(factor(gene_names, levels = c("ACO2", "RRS1", "PGM1")) ~ | |
factor(assay, levels = c("peptides", "medians", "sums", "top3"))) + | |
theme(axis.text.x = element_text(angle = 90)) + | |
theme(legend.position="none") | |
## sessionInfo() | |
## R version 4.2.2 Patched (2022-11-02 r83238) | |
## Platform: x86_64-pc-linux-gnu (64-bit) | |
## Running under: Manjaro Linux | |
## Matrix products: default | |
## BLAS: /usr/lib/libblas.so.3.11.0 | |
## LAPACK: /usr/lib/liblapack.so.3.11.0 | |
## locale: | |
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C | |
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 | |
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 | |
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C | |
## [9] LC_ADDRESS=C LC_TELEPHONE=C | |
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C | |
## attached base packages: | |
## [1] stats4 stats graphics grDevices utils datasets methods | |
## [8] base | |
## other attached packages: | |
## [1] scp_1.8.0 QFeatures_1.9.0 | |
## [3] MultiAssayExperiment_1.24.0 SummarizedExperiment_1.28.0 | |
## [5] Biobase_2.58.0 GenomicRanges_1.50.1 | |
## [7] GenomeInfoDb_1.34.4 IRanges_2.32.0 | |
## [9] S4Vectors_0.36.1 BiocGenerics_0.44.0 | |
## [11] MatrixGenerics_1.10.0 matrixStats_0.63.0 | |
## [13] forcats_0.5.2 stringr_1.5.0 | |
## [15] dplyr_1.0.10 purrr_0.3.5 | |
## [17] readr_2.1.3 tidyr_1.2.1 | |
## [19] tibble_3.1.8 ggplot2_3.4.0 | |
## [21] tidyverse_1.3.2 | |
## loaded via a namespace (and not attached): | |
## [1] ProtGenerics_1.30.0 bitops_1.0-7 | |
## [3] fs_1.5.2 lubridate_1.9.0 | |
## [5] bit64_4.0.5 httr_1.4.4 | |
## [7] tools_4.2.2 backports_1.4.1 | |
## [9] utf8_1.2.2 R6_2.5.1 | |
## [11] DBI_1.1.3 lazyeval_0.2.2 | |
## [13] colorspace_2.0-3 withr_2.5.0 | |
## [15] tidyselect_1.2.0 bit_4.0.5 | |
## [17] compiler_4.2.2 textshaping_0.3.6 | |
## [19] cli_3.4.1 rvest_1.0.3 | |
## [21] xml2_1.3.3 DelayedArray_0.24.0 | |
## [23] labeling_0.4.2 scales_1.2.1 | |
## [25] systemfonts_1.0.4 XVector_0.38.0 | |
## [27] pkgconfig_2.0.3 dbplyr_2.2.1 | |
## [29] rlang_1.0.6 readxl_1.4.1 | |
## [31] generics_0.1.3 farver_2.1.1 | |
## [33] jsonlite_1.8.4 vroom_1.6.0 | |
## [35] googlesheets4_1.0.1 RCurl_1.98-1.9 | |
## [37] magrittr_2.0.3 GenomeInfoDbData_1.2.9 | |
## [39] Matrix_1.5-3 munsell_0.5.0 | |
## [41] fansi_1.0.3 MsCoreUtils_1.11.1 | |
## [43] lifecycle_1.0.3 stringi_1.7.8 | |
## [45] snakecase_0.11.0 MASS_7.3-58.1 | |
## [47] zlibbioc_1.44.0 grid_4.2.2 | |
## [49] parallel_4.2.2 crayon_1.5.2 | |
## [51] lattice_0.20-45 haven_2.5.1 | |
## [53] hms_1.1.2 pillar_1.8.1 | |
## [55] igraph_1.3.5 reprex_2.0.2 | |
## [57] glue_1.6.2 modelr_0.1.10 | |
## [59] vctrs_0.5.1 tzdb_0.3.0 | |
## [61] cellranger_1.1.0 gtable_0.3.1 | |
## [63] clue_0.3-63 assertthat_0.2.1 | |
## [65] BiocBaseUtils_1.0.0 janitor_2.1.0 | |
## [67] broom_1.0.1 AnnotationFilter_1.22.0 | |
## [69] ragg_1.2.4 googledrive_2.0.0 | |
## [71] gargle_1.2.1 SingleCellExperiment_1.20.0 | |
## [73] cluster_2.1.4 timechange_0.1.1 | |
## [75] ellipsis_0.3.2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The results of the script can be seen here