Setup

Map from CORUM complex subunits given as UniProt IDs via SIFTS to PDB structures:

url <- "ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/flatfiles/csv/uniprot_pdb.csv.gz"
dest <- basename(url)
download.file(url, destfile = dest)
df <- read.csv("uniprot_pdb.csv.gz", skip = 1)
file.remove(dest)
## [1] TRUE
head(df)
##   SP_PRIMARY                                                              PDB
## 1     A0A003                                                             6kv9
## 2 A0A009I821                               7m4x;7m4y;7ryg;7m4w;7ryf;7ryh;7m4z
## 3 A0A009L7S8                               7m4y;7m4z;7ryg;7m4x;7ryh;7ryf;7m4w
## 4 A0A009QSN8                                              6v3a;6v39;6v3d;6v3b
## 5     A0A010 5b01;6j8v;5b02;5gww;5b0i;5b00;5b0m;5b03;6j8w;5b0k;5b0l;5gwv;5b0j
## 6     A0A011                                         3vkd;3vkc;3vka;3vkb;3vk5

Turn into a mapping:

unip2pdb <- strsplit(df$PDB, ";")
names(unip2pdb) <- df$SP_PRIMARY
head(unip2pdb)
## $A0A003
## [1] "6kv9"
## 
## $A0A009I821
## [1] "7m4x" "7m4y" "7ryg" "7m4w" "7ryf" "7ryh" "7m4z"
## 
## $A0A009L7S8
## [1] "7m4y" "7m4z" "7ryg" "7m4x" "7ryh" "7ryf" "7m4w"
## 
## $A0A009QSN8
## [1] "6v3a" "6v39" "6v3d" "6v3b"
## 
## $A0A010
##  [1] "5b01" "6j8v" "5b02" "5gww" "5b0i" "5b00" "5b0m" "5b03" "6j8w" "5b0k"
## [11] "5b0l" "5gwv" "5b0j"
## 
## $A0A011
## [1] "3vkd" "3vkc" "3vka" "3vkb" "3vk5"

Get the PDB structure for the TFIIH core complex (PDB ID: 6nmi)

pdb.id <- "6nmi"
pdb.file <- bio3d::get.pdb(pdb.id)
pdb <- bio3d::read.pdb(pdb.file)
pdb
## 
##  Call:  bio3d::read.pdb(file = pdb.file)
## 
##    Total Models#: 1
##      Total Atoms#: 24379,  XYZs#: 73137  Chains#: 8  (values: A B C D E F G H)
## 
##      Protein Atoms#: 23015  (residues/Calpha atoms#: 2908)
##      Nucleic acid Atoms#: 0  (residues/phosphate atoms#: 0)
## 
##      Non-protein/nucleic Atoms#: 1364  (residues: 277)
##      Non-protein/nucleic resid values: [ SF4 (1), UNK (270), ZN (6) ]
## 
##    Protein sequence:
##       PQEAVPSAAGKQVDESGTKVDEYGAKDYRLQMPLKDDHTSRPLWVAPDGHIFLEAFSPVY
##       KYAQDFLVAIAEPVCRPTHVHEYKLTAYSLYAAVSVGLQTSDITEYLRKLSKTGVPDGIM
##       QFIKLCTVSYGKVKLVLKHNRYFVESCHPDVIQHLLQDPVIRECRLRNSEQTVSFEVKQE
##       MIEELQKRCIHLEYPLLAEYDFRNDSVNPDINIDLKPTAVLRPYQ...<cut>...QLEM
## 
## + attr: atom, xyz, seqres, helix, sheet,
##         calpha, remark, call
str(pdb)
## List of 8
##  $ atom  :'data.frame':  24379 obs. of  16 variables:
##   ..$ type  : chr [1:24379] "ATOM" "ATOM" "ATOM" "ATOM" ...
##   ..$ eleno : int [1:24379] 1 2 3 4 5 6 7 8 9 10 ...
##   ..$ elety : chr [1:24379] "N" "CA" "C" "O" ...
##   ..$ alt   : chr [1:24379] NA NA NA NA ...
##   ..$ resid : chr [1:24379] "PRO" "PRO" "PRO" "PRO" ...
##   ..$ chain : chr [1:24379] "A" "A" "A" "A" ...
##   ..$ resno : int [1:24379] 34 34 34 34 34 34 34 35 35 35 ...
##   ..$ insert: chr [1:24379] NA NA NA NA ...
##   ..$ x     : num [1:24379] 177 177 175 175 177 ...
##   ..$ y     : num [1:24379] 185 185 185 186 186 ...
##   ..$ z     : num [1:24379] 131 133 133 133 133 ...
##   ..$ o     : num [1:24379] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ b     : num [1:24379] 60.6 60.6 60.6 60.6 60.6 ...
##   ..$ segid : chr [1:24379] NA NA NA NA ...
##   ..$ elesy : chr [1:24379] "N" "C" "C" "O" ...
##   ..$ charge: chr [1:24379] NA NA NA NA ...
##  $ xyz   : 'xyz' num [1, 1:73137] 177 185 131 177 185 ...
##  $ seqres: Named chr [1:3477] "PRO" "GLN" "GLU" "ALA" ...
##   ..- attr(*, "names")= chr [1:3477] "A" "A" "A" "A" ...
##  $ helix :List of 4
##   ..$ start: Named num [1:128] 37 92 119 132 149 181 191 249 273 276 ...
##   .. ..- attr(*, "names")= chr [1:128] "" "" "" "" ...
##   ..$ end  : Named num [1:128] 42 104 130 142 161 190 198 262 275 287 ...
##   .. ..- attr(*, "names")= chr [1:128] "" "" "" "" ...
##   ..$ chain: chr [1:128] "A" "A" "A" "A" ...
##   ..$ type : chr [1:128] "1" "1" "1" "1" ...
##  $ sheet :List of 4
##   ..$ start: Named num [1:80] 59 333 342 314 307 76 83 113 105 268 ...
##   .. ..- attr(*, "names")= chr [1:80] "" "" "" "" ...
##   ..$ end  : Named num [1:80] 60 337 345 317 310 78 87 117 108 272 ...
##   .. ..- attr(*, "names")= chr [1:80] "" "" "" "" ...
##   ..$ chain: chr [1:80] "A" "D" "D" "D" ...
##   ..$ sense: chr [1:80] "0" "1" "-1" "-1" ...
##  $ calpha: logi [1:24379] FALSE TRUE FALSE FALSE FALSE FALSE ...
##  $ remark:List of 1
##   ..$ biomat:List of 4
##   .. ..$ num   : int 1
##   .. ..$ chain :List of 1
##   .. .. ..$ : chr [1:8] "A" "B" "C" "D" ...
##   .. ..$ mat   :List of 1
##   .. .. ..$ :List of 1
##   .. .. .. ..$ A B C D E F G H: num [1:3, 1:4] 1 0 0 0 1 0 0 0 1 0 ...
##   .. ..$ method: chr "AUTHOR"
##  $ call  : language bio3d::read.pdb(file = pdb.file)
##  - attr(*, "class")= chr [1:2] "pdb" "sse"

Color for each chain:

chains <- unlist(pdb$remark$biomat$chain)
chains
## [1] "A" "B" "C" "D" "E" "F" "G" "H"
nr.chains <- length(chains)
# chain.colors <- ggpubr::get_palette("ucscgb", nr.chains)
chain.colors <- RColorBrewer::brewer.pal(8, "Set1")
chain.colors <- tolower(chain.colors)

Visualize the structure:

# Set up the initial viewer
viewer <- r3dmol(
  #viewer_spec = m_viewer_spec(
  #  cartoonQuality = 10,
  #  lowerZoomLimit = 10,
  #  upperZoomLimit = 300
  #),
  id = "",
  elementId = "demo"
) %>%
  # Add model to scene
  m_add_model(data = m_bio3d(pdb), format = "pdb") %>%
  # Zoom to encompass the whole scene
  m_zoom_to() %>%
  # Set style of structures
  m_set_style(style = m_style_cartoon(color = "#00cc96"))
for(i in seq_len(nr.chains))
  viewer <- m_set_style(viewer,
    sel = m_sel(chain = LETTERS[i]),
    style = m_style_cartoon(color = chain.colors[i])
  ) 
viewer %>% m_rotate(angle = 90, axis = "y") %>% m_spin()