Data retrieval

Get the latest version of the 293T PPI network and the HCT116 PPI network:

bp.293t <- BioPlex::getBioPlex(cell.line = "293T", version = "3.0")
## Using cached version from 2022-06-21 11:49:51
bp.hct116 <- BioPlex::getBioPlex(cell.line = "HCT116", version = "1.0")
## Using cached version from 2022-06-21 12:14:36

and turn into a graph object:

bp.gr <- BioPlex::bioplex2graph(bp.293t)
bp.gr
## A graphNEL graph with directed edges
## Number of Nodes = 13689 
## Number of Edges = 115868
hct.gr <- BioPlex::bioplex2graph(bp.hct116)
hct.gr
## A graphNEL graph with directed edges
## Number of Nodes = 10024 
## Number of Edges = 70421

Annotate PFAM domains to the node metadata:

Connect to AnnotationHub:

ah <- AnnotationHub::AnnotationHub()

OrgDb package for human:

orgdb <- AnnotationHub::query(ah, c("orgDb", "Homo sapiens"))
orgdb <- orgdb[[1]] 
orgdb
## OrgDb object:
## | DBSCHEMAVERSION: 2.1
## | Db type: OrgDb
## | Supporting package: AnnotationDbi
## | DBSCHEMA: HUMAN_DB
## | ORGANISM: Homo sapiens
## | SPECIES: Human
## | EGSOURCEDATE: 2022-Mar17
## | EGSOURCENAME: Entrez Gene
## | EGSOURCEURL: ftp://ftp.ncbi.nlm.nih.gov/gene/DATA
## | CENTRALID: EG
## | TAXID: 9606
## | GOSOURCENAME: Gene Ontology
## | GOSOURCEURL: http://current.geneontology.org/ontology/go-basic.obo
## | GOSOURCEDATE: 2022-03-10
## | GOEGSOURCEDATE: 2022-Mar17
## | GOEGSOURCENAME: Entrez Gene
## | GOEGSOURCEURL: ftp://ftp.ncbi.nlm.nih.gov/gene/DATA
## | KEGGSOURCENAME: KEGG GENOME
## | KEGGSOURCEURL: ftp://ftp.genome.jp/pub/kegg/genomes
## | KEGGSOURCEDATE: 2011-Mar15
## | GPSOURCENAME: UCSC Genome Bioinformatics (Homo sapiens)
## | GPSOURCEURL: 
## | GPSOURCEDATE: 2022-Nov23
## | ENSOURCEDATE: 2021-Dec21
## | ENSOURCENAME: Ensembl
## | ENSOURCEURL: ftp://ftp.ensembl.org/pub/current_fasta
## | UPSOURCENAME: Uniprot
## | UPSOURCEURL: http://www.UniProt.org/
## | UPSOURCEDATE: Fri Apr  1 14:42:16 2022
AnnotationDbi::keytypes(orgdb)
##  [1] "ACCNUM"       "ALIAS"        "ENSEMBL"      "ENSEMBLPROT"  "ENSEMBLTRANS"
##  [6] "ENTREZID"     "ENZYME"       "EVIDENCE"     "EVIDENCEALL"  "GENENAME"    
## [11] "GENETYPE"     "GO"           "GOALL"        "IPI"          "MAP"         
## [16] "OMIM"         "ONTOLOGY"     "ONTOLOGYALL"  "PATH"         "PFAM"        
## [21] "PMID"         "PROSITE"      "REFSEQ"       "SYMBOL"       "UCSCKG"      
## [26] "UNIPROT"
bp.gr <- BioPlex::annotatePFAM(bp.gr, orgdb)
hct.gr <- BioPlex::annotatePFAM(hct.gr, orgdb)

Domain-domain association analysis

system.time( res.bp <- BioPlexAnalysis::testDomainAssociation(bp.gr) ) 
##    user  system elapsed 
## 104.525  13.414 125.921
head(res.bp)
##            PFAM1   PFAM2 FREQ          PVAL      ADJ.PVAL
## 17502626 PF00227 PF10584  100 2.153754e-220 8.179526e-216
## 973147   PF00227 PF00227  114 1.040375e-204 1.975569e-200
## 3221307  PF00071 PF00996   97 3.705095e-196 4.690403e-192
## 5552981  PF02023 PF02023  208 6.333035e-194 6.012900e-190
## 5552053  PF00096 PF02023  389 1.857595e-176 1.410955e-172
## 14006382 PF00028 PF08266  238 6.427670e-171 4.068501e-167
system.time( res.hct <- BioPlexAnalysis::testDomainAssociation(hct.gr) )
##    user  system elapsed 
##  49.149   3.100  52.620
head(res.hct)
##            PFAM1   PFAM2 FREQ          PVAL      ADJ.PVAL
## 13871254 PF00227 PF10584   92 1.627363e-177 3.689720e-173
## 2735815  PF00071 PF00996   87 9.079702e-177 1.029320e-172
## 16453714 PF01352 PF14634  114 3.490617e-174 2.638092e-170
## 831958   PF00227 PF00227  105 3.452324e-166 1.956863e-162
## 2192905  PF00735 PF00735   50 3.355670e-144 1.521662e-140
## 3543907  PF00643 PF01352  128 2.347637e-143 8.871329e-140

Get human readable description for the PFAM domain IDs:

library(PFAM.db)
## 
id2de <- as.list(PFAMDE)
id2de <- unlist(id2de)

What are the top interacting domain pairs for the 293T network:

res.bp$DOMAIN1 <- id2de[res.bp$PFAM1]
res.bp$DOMAIN2 <- id2de[res.bp$PFAM2]
head(res.bp)
##            PFAM1   PFAM2 FREQ          PVAL      ADJ.PVAL
## 17502626 PF00227 PF10584  100 2.153754e-220 8.179526e-216
## 973147   PF00227 PF00227  114 1.040375e-204 1.975569e-200
## 3221307  PF00071 PF00996   97 3.705095e-196 4.690403e-192
## 5552981  PF02023 PF02023  208 6.333035e-194 6.012900e-190
## 5552053  PF00096 PF02023  389 1.857595e-176 1.410955e-172
## 14006382 PF00028 PF08266  238 6.427670e-171 4.068501e-167
##                               DOMAIN1
## 17502626            Chitinase class I
## 973147              Chitinase class I
## 3221307                    LNR domain
## 5552981     Transthyretin-like family
## 5552053     Sushi repeat (SCR repeat)
## 14006382 Eukaryotic aspartyl protease
##                                                  DOMAIN2
## 17502626                                     PCRF domain
## 973147                                 Chitinase class I
## 3221307          Fes/CIP4, and EFC/F-BAR homology domain
## 5552981                        Transthyretin-like family
## 5552053                        Transthyretin-like family
## 14006382 AIR synthase related protein, C-terminal domain

What are the top interacting domain pairs for the HCT116 network:

res.hct$DOMAIN1 <- id2de[res.hct$PFAM1]
res.hct$DOMAIN2 <- id2de[res.hct$PFAM2]
head(res.hct)
##            PFAM1   PFAM2 FREQ          PVAL      ADJ.PVAL
## 13871254 PF00227 PF10584   92 1.627363e-177 3.689720e-173
## 2735815  PF00071 PF00996   87 9.079702e-177 1.029320e-172
## 16453714 PF01352 PF14634  114 3.490617e-174 2.638092e-170
## 831958   PF00227 PF00227  105 3.452324e-166 1.956863e-162
## 2192905  PF00735 PF00735   50 3.355670e-144 1.521662e-140
## 3543907  PF00643 PF01352  128 2.347637e-143 8.871329e-140
##                                      DOMAIN1
## 13871254                          Ets-domain
## 2735815           Myosin head (motor domain)
## 16453714 D-alanyl-D-alanine carboxypeptidase
## 831958                            Ets-domain
## 2192905              DNA polymerase family A
## 3543907     Phycobilisome Linker polypeptide
##                                         DOMAIN2
## 13871254 Elongation factor Tu C-terminal domain
## 2735815                 TonB dependent receptor
## 16453714           Hemocyanin, all-alpha domain
## 831958                               Ets-domain
## 2192905                 DNA polymerase family A
## 3543907     D-alanyl-D-alanine carboxypeptidase

Visualization

top10 <- unique(c(res.hct[1:8,1], res.hct[1:8,2]))
ind <- res.hct[,1] %in% top10 & res.hct[,2] %in% top10
pldf <- res.hct[ind,]
ggplot(pldf, aes(y = FREQ, axis1 = PFAM1, axis2 = PFAM2)) + 
    geom_alluvium(aes(fill = PFAM1)) + 
    scale_x_discrete(limits = c("PFAM1", "PFAM2"), expand = c(.05, .05)) + 
    geom_stratum(width = 1/12, fill = "lightgrey", color = "darkgrey") + 
    geom_label(stat = "stratum", aes(label = after_stat(stratum))) + 
    ylab("#PPIs") + 
    theme_bw() + theme(legend.position = "none")