1) References

  • The R2 database link (R2 2023):

https://hgserver1.amc.nl/cgi-bin/r2/main.cgi

2) Load data-frame

2.1) Data-frame downloaded from the R2 website, in the data folder

The file downloaded from R2 comes with the “#” symbols, which need to be removed.

r2_gse62564 <- read.table("../data/ps_avgpres_gse62564geo498_seqcnb1_box1687888611-datagrabber-.txt")
class(r2_gse62564)
saveRDS(r2_gse62564, file = "../data/r2_gse62564.rds")

Removal of “#” will create and save the appropriate data-frame

r2_gse62564 <- read.table("../data/ps_avgpres_gse62564geo498_seqcnb1_box1687888611-datagrabber_modified.txt")
class(r2_gse62564)
saveRDS(r2_gse62564, file = "../data/r2_gse62564.rds")

2.2) Load rds object and calculate how long it takes R to load the object

start_time <- Sys.time()
r2_gse62564 <- readRDS("../data/r2_gse62564.rds")
end_time <- Sys.time()
end_time - start_time
## Time difference of 4.952882 secs

2.3) Visualize the dataframe

head(r2_gse62564, n = 20)
dim(r2_gse62564)
## [1] 24961   500

3) Process dataframe

3.1) Metadata

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
metadata_gse62564               <- r2_gse62564[1:17,]
## View(metadata_gse62564)
head(metadata_gse62564, n = 20)
## View data-frame
head(r2_gse62564, n = 20)
## Remove the second column, which is repeated
r2_gse62564   <- dplyr::select(r2_gse62564, -V2)
head(r2_gse62564, n = 20)
## Remove all metadata information, keep only gene counts
r2_gse62564   <- r2_gse62564[-c(2:18),]

## Give names to columns using row 1
colnames(r2_gse62564) <- r2_gse62564[1, ]

## Remove column that contain rows names
r2_gse62564           <- r2_gse62564[-1,]
head(r2_gse62564, n = 20)

3.2) More processing

library(maditr)
## Warning: package 'maditr' was built under R version 4.1.2
## 
## To select columns from data: columns(mtcars, mpg, vs:carb)
## 
## Attaching package: 'maditr'
## The following objects are masked from 'package:dplyr':
## 
##     between, coalesce, first, last
## Remove duplicated rows
## %>% requires maditr
r2_gse62564_distinct           <- r2_gse62564 %>% distinct(`H:hugo`, .keep_all = TRUE)
head(r2_gse62564, n = 20)
## Give names to rows
rownames(r2_gse62564_distinct) <- r2_gse62564_distinct$`H:hugo`
r2_gse62564_distinct           <- dplyr::select(r2_gse62564_distinct, -`H:hugo`)
head(r2_gse62564, n = 20)
names(r2_gse62564_distinct)    <- toupper(names(r2_gse62564_distinct))
r2_gse62564_distinct           <- r2_gse62564_distinct %>% mutate_if(is.character, as.numeric)
r2_gse62564_matrix             <- as.matrix(r2_gse62564_distinct)
r2_gse62564_df                 <- as.data.frame(r2_gse62564_matrix)

4) Calculate GSVA using Gene Sets

In this part, we construct the dataframe to include phenotype scoring with the GSVA library.

4.1) Gene and GSVA data-frame

4.1.2) Prepare GSVA DF

# install.packages(BiocManager)
# library(BiocManager)
# BiocManager::install()
library(GSVA)
library(GSEABase)
#####################################################
###### 2) Load gene set and Construct GSVA data-frame
#####################################################

cfDNA_PCA_gene_list    <- getGmt("../data/cfDNA_genes_PCA.txt")
## Warning in readLines(con, ...): incomplete final line found on
## '../data/cfDNA_genes_PCA.txt'
## Warning in getGmt("../data/cfDNA_genes_PCA.txt"): 321 record(s) contain
## duplicate ids: ADRN_Gene_List_373, ADRN_Gronigen, ...,
## WP_GLYCOLYSIS_AND_GLUCONEOGENESIS_7, WP_GLYCOLYSIS_IN_SENESCENCE_Genes_4
###################################
###### 3) Construct GSVA data-frame
###################################

# Needs to re-run this step if there are new gene sets
r2_gse62564_GSVA <- gsva(r2_gse62564_matrix, 
                          cfDNA_PCA_gene_list, 
                          min.sz=1, max.sz=Inf, 
                          verbose=TRUE)
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning in .filterFeatures(expr, method): 164 genes with constant expression
## values throuhgout the samples.
## Warning in .filterFeatures(expr, method): Since argument method!="ssgsea",
## genes with constant expression values are discarded.
## Warning in .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking,
## : Some gene sets have size one. Consider setting 'min.sz > 1'.
## Estimating GSVA scores for 312 gene sets.
## Estimating ECDFs with Gaussian kernels
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |                                                                      |   1%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |==                                                                    |   4%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=======                                                               |  11%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |============================                                          |  41%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |===================================                                   |  51%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=====================================                                 |  54%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |=================================================                     |  71%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |===============================================================       |  91%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================|  99%
  |                                                                            
  |======================================================================| 100%
########################################
###### 4) Merge Gene Expression and GSVA
########################################

r2_gse62564_GSVA_genes <- rbind(r2_gse62564_GSVA, r2_gse62564_matrix)

4.1.3) Include Metadata Based on R2 information

gse62564_metadata <- metadata_gse62564
# Repeat row with IDs to confirm matching later
gse62564_metadata[18,] <- gse62564_metadata[1,]
# Remove first column of metadata DF
gse62564_metadata <- gse62564_metadata[,-c(1)]
# Make all IDs in row 2, uppercase
gse62564_metadata[1,2:499] <- toupper(gse62564_metadata[1,2:499])
# Make row 1, column names
library(janitor)
## Warning: package 'janitor' was built under R version 4.1.2
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
gse62564_metadata <- gse62564_metadata %>%
  row_to_names(row_number = 1)
# Make column as row names in metadata
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## Warning: package 'ggplot2' was built under R version 4.1.2
## Warning: package 'tibble' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'purrr' was built under R version 4.1.2
## Warning: package 'stringr' was built under R version 4.1.2
## Warning: package 'forcats' was built under R version 4.1.2
## Warning: package 'lubridate' was built under R version 4.1.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.4
## ✔ ggplot2   3.4.2     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%()   masks IRanges::%within%()
## ✖ maditr::between()       masks dplyr::between()
## ✖ stringr::boundary()     masks graph::boundary()
## ✖ maditr::coalesce()      masks dplyr::coalesce()
## ✖ IRanges::collapse()     masks dplyr::collapse()
## ✖ readr::cols()           masks maditr::cols()
## ✖ Biobase::combine()      masks BiocGenerics::combine(), dplyr::combine()
## ✖ IRanges::desc()         masks dplyr::desc()
## ✖ tidyr::expand()         masks S4Vectors::expand()
## ✖ dplyr::filter()         masks stats::filter()
## ✖ S4Vectors::first()      masks maditr::first(), dplyr::first()
## ✖ dplyr::lag()            masks stats::lag()
## ✖ maditr::last()          masks dplyr::last()
## ✖ ggplot2::Position()     masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()         masks IRanges::reduce()
## ✖ S4Vectors::rename()     masks dplyr::rename()
## ✖ lubridate::second()     masks S4Vectors::second()
## ✖ lubridate::second<-()   masks S4Vectors::second<-()
## ✖ AnnotationDbi::select() masks dplyr::select()
## ✖ IRanges::slice()        masks dplyr::slice()
## ✖ purrr::transpose()      masks maditr::transpose()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
gse62564_metadata <- gse62564_metadata %>% remove_rownames %>% column_to_rownames(var="probeset")
r2_gse62564_GSVA_Metadata <- rbind(gse62564_metadata, r2_gse62564_GSVA_genes)
r2_gse62564_GSVA_Metadata <- t(r2_gse62564_GSVA_Metadata)
r2_gse62564_GSVA_Metadata <- as.data.frame(r2_gse62564_GSVA_Metadata)
saveRDS(r2_gse62564_GSVA_Metadata, file = "../results/r2_gse62564_GSVA_Metadata.rds")

7) References

R2. 2023. R2: Genomics Analysis and Visualization Platform. http://r2.amc.nl.

8) Session Info

sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] lubridate_1.9.2      forcats_1.0.0        stringr_1.5.0       
##  [4] purrr_1.0.1          readr_2.1.4          tidyr_1.3.0         
##  [7] tibble_3.2.1         ggplot2_3.4.2        tidyverse_2.0.0     
## [10] janitor_2.2.0        GSEABase_1.54.0      graph_1.70.0        
## [13] annotate_1.70.0      XML_3.99-0.14        AnnotationDbi_1.54.1
## [16] IRanges_2.26.0       S4Vectors_0.30.2     Biobase_2.52.0      
## [19] BiocGenerics_0.38.0  GSVA_1.44.2          maditr_0.8.3        
## [22] dplyr_1.1.2         
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-7                matrixStats_1.0.0          
##  [3] bit64_4.0.5                 httr_1.4.6                 
##  [5] GenomeInfoDb_1.30.0         tools_4.1.1                
##  [7] bslib_0.5.0                 utf8_1.2.3                 
##  [9] R6_2.5.1                    irlba_2.3.5.1              
## [11] HDF5Array_1.20.0            DBI_1.1.3                  
## [13] colorspace_2.1-0            rhdf5filters_1.4.0         
## [15] withr_2.5.0                 tidyselect_1.2.0           
## [17] bit_4.0.5                   compiler_4.1.1             
## [19] cli_3.6.1                   DelayedArray_0.18.0        
## [21] sass_0.4.6                  scales_1.2.1               
## [23] digest_0.6.32               rmarkdown_2.22             
## [25] XVector_0.32.0              pkgconfig_2.0.3            
## [27] htmltools_0.5.5             sparseMatrixStats_1.4.2    
## [29] MatrixGenerics_1.4.3        fastmap_1.1.1              
## [31] rlang_1.1.1                 rstudioapi_0.14            
## [33] RSQLite_2.3.1               DelayedMatrixStats_1.14.3  
## [35] jquerylib_0.1.4             generics_0.1.3             
## [37] jsonlite_1.8.7              BiocParallel_1.26.2        
## [39] RCurl_1.98-1.12             magrittr_2.0.3             
## [41] BiocSingular_1.8.1          GenomeInfoDbData_1.2.6     
## [43] Matrix_1.5-1                Rcpp_1.0.10                
## [45] munsell_0.5.0               Rhdf5lib_1.14.2            
## [47] fansi_1.0.4                 lifecycle_1.0.3            
## [49] stringi_1.7.12              yaml_2.3.7                 
## [51] snakecase_0.11.0            SummarizedExperiment_1.22.0
## [53] zlibbioc_1.38.0             rhdf5_2.36.0               
## [55] grid_4.1.1                  blob_1.2.4                 
## [57] crayon_1.5.2                lattice_0.21-8             
## [59] Biostrings_2.60.2           beachmat_2.8.1             
## [61] hms_1.1.3                   KEGGREST_1.32.0            
## [63] knitr_1.43                  pillar_1.9.0               
## [65] GenomicRanges_1.44.0        ScaledMatrix_1.0.0         
## [67] glue_1.6.2                  evaluate_0.21              
## [69] data.table_1.14.8           png_0.1-8                  
## [71] vctrs_0.6.3                 tzdb_0.4.0                 
## [73] gtable_0.3.3                cachem_1.0.8               
## [75] xfun_0.39                   rsvd_1.0.5                 
## [77] xtable_1.8-4                SingleCellExperiment_1.14.1
## [79] memoise_2.0.1               timechange_0.2.0