PDB.Rmd
Map from CORUM complex subunits given as UniProt IDs via SIFTS to PDB structures:
url <- "ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/flatfiles/csv/uniprot_pdb.csv.gz"
dest <- basename(url)
download.file(url, destfile = dest)
df <- read.csv("uniprot_pdb.csv.gz", skip = 1)
file.remove(dest)
## [1] TRUE
head(df)
## SP_PRIMARY PDB
## 1 A0A003 6kv9
## 2 A0A009I821 7m4x;7m4y;7ryg;7m4w;7ryf;7ryh;7m4z
## 3 A0A009L7S8 7m4y;7m4z;7ryg;7m4x;7ryh;7ryf;7m4w
## 4 A0A009QSN8 6v3a;6v39;6v3d;6v3b
## 5 A0A010 5b01;6j8v;5b02;5gww;5b0i;5b00;5b0m;5b03;6j8w;5b0k;5b0l;5gwv;5b0j
## 6 A0A011 3vkd;3vkc;3vka;3vkb;3vk5
Turn into a mapping:
## $A0A003
## [1] "6kv9"
##
## $A0A009I821
## [1] "7m4x" "7m4y" "7ryg" "7m4w" "7ryf" "7ryh" "7m4z"
##
## $A0A009L7S8
## [1] "7m4y" "7m4z" "7ryg" "7m4x" "7ryh" "7ryf" "7m4w"
##
## $A0A009QSN8
## [1] "6v3a" "6v39" "6v3d" "6v3b"
##
## $A0A010
## [1] "5b01" "6j8v" "5b02" "5gww" "5b0i" "5b00" "5b0m" "5b03" "6j8w" "5b0k"
## [11] "5b0l" "5gwv" "5b0j"
##
## $A0A011
## [1] "3vkd" "3vkc" "3vka" "3vkb" "3vk5"
Get the PDB structure for the TFIIH core complex (PDB ID: 6nmi)
##
## Call: bio3d::read.pdb(file = pdb.file)
##
## Total Models#: 1
## Total Atoms#: 24379, XYZs#: 73137 Chains#: 8 (values: A B C D E F G H)
##
## Protein Atoms#: 23015 (residues/Calpha atoms#: 2908)
## Nucleic acid Atoms#: 0 (residues/phosphate atoms#: 0)
##
## Non-protein/nucleic Atoms#: 1364 (residues: 277)
## Non-protein/nucleic resid values: [ SF4 (1), UNK (270), ZN (6) ]
##
## Protein sequence:
## PQEAVPSAAGKQVDESGTKVDEYGAKDYRLQMPLKDDHTSRPLWVAPDGHIFLEAFSPVY
## KYAQDFLVAIAEPVCRPTHVHEYKLTAYSLYAAVSVGLQTSDITEYLRKLSKTGVPDGIM
## QFIKLCTVSYGKVKLVLKHNRYFVESCHPDVIQHLLQDPVIRECRLRNSEQTVSFEVKQE
## MIEELQKRCIHLEYPLLAEYDFRNDSVNPDINIDLKPTAVLRPYQ...<cut>...QLEM
##
## + attr: atom, xyz, seqres, helix, sheet,
## calpha, remark, call
str(pdb)
## List of 8
## $ atom :'data.frame': 24379 obs. of 16 variables:
## ..$ type : chr [1:24379] "ATOM" "ATOM" "ATOM" "ATOM" ...
## ..$ eleno : int [1:24379] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ elety : chr [1:24379] "N" "CA" "C" "O" ...
## ..$ alt : chr [1:24379] NA NA NA NA ...
## ..$ resid : chr [1:24379] "PRO" "PRO" "PRO" "PRO" ...
## ..$ chain : chr [1:24379] "A" "A" "A" "A" ...
## ..$ resno : int [1:24379] 34 34 34 34 34 34 34 35 35 35 ...
## ..$ insert: chr [1:24379] NA NA NA NA ...
## ..$ x : num [1:24379] 177 177 175 175 177 ...
## ..$ y : num [1:24379] 185 185 185 186 186 ...
## ..$ z : num [1:24379] 131 133 133 133 133 ...
## ..$ o : num [1:24379] 1 1 1 1 1 1 1 1 1 1 ...
## ..$ b : num [1:24379] 60.6 60.6 60.6 60.6 60.6 ...
## ..$ segid : chr [1:24379] NA NA NA NA ...
## ..$ elesy : chr [1:24379] "N" "C" "C" "O" ...
## ..$ charge: chr [1:24379] NA NA NA NA ...
## $ xyz : 'xyz' num [1, 1:73137] 177 185 131 177 185 ...
## $ seqres: Named chr [1:3477] "PRO" "GLN" "GLU" "ALA" ...
## ..- attr(*, "names")= chr [1:3477] "A" "A" "A" "A" ...
## $ helix :List of 4
## ..$ start: Named num [1:128] 37 92 119 132 149 181 191 249 273 276 ...
## .. ..- attr(*, "names")= chr [1:128] "" "" "" "" ...
## ..$ end : Named num [1:128] 42 104 130 142 161 190 198 262 275 287 ...
## .. ..- attr(*, "names")= chr [1:128] "" "" "" "" ...
## ..$ chain: chr [1:128] "A" "A" "A" "A" ...
## ..$ type : chr [1:128] "1" "1" "1" "1" ...
## $ sheet :List of 4
## ..$ start: Named num [1:80] 59 333 342 314 307 76 83 113 105 268 ...
## .. ..- attr(*, "names")= chr [1:80] "" "" "" "" ...
## ..$ end : Named num [1:80] 60 337 345 317 310 78 87 117 108 272 ...
## .. ..- attr(*, "names")= chr [1:80] "" "" "" "" ...
## ..$ chain: chr [1:80] "A" "D" "D" "D" ...
## ..$ sense: chr [1:80] "0" "1" "-1" "-1" ...
## $ calpha: logi [1:24379] FALSE TRUE FALSE FALSE FALSE FALSE ...
## $ remark:List of 1
## ..$ biomat:List of 4
## .. ..$ num : int 1
## .. ..$ chain :List of 1
## .. .. ..$ : chr [1:8] "A" "B" "C" "D" ...
## .. ..$ mat :List of 1
## .. .. ..$ :List of 1
## .. .. .. ..$ A B C D E F G H: num [1:3, 1:4] 1 0 0 0 1 0 0 0 1 0 ...
## .. ..$ method: chr "AUTHOR"
## $ call : language bio3d::read.pdb(file = pdb.file)
## - attr(*, "class")= chr [1:2] "pdb" "sse"
Color for each chain:
chains <- unlist(pdb$remark$biomat$chain)
chains
## [1] "A" "B" "C" "D" "E" "F" "G" "H"
nr.chains <- length(chains)
# chain.colors <- ggpubr::get_palette("ucscgb", nr.chains)
chain.colors <- RColorBrewer::brewer.pal(8, "Set1")
chain.colors <- tolower(chain.colors)
Visualize the structure:
# Set up the initial viewer
viewer <- r3dmol(
#viewer_spec = m_viewer_spec(
# cartoonQuality = 10,
# lowerZoomLimit = 10,
# upperZoomLimit = 300
#),
id = "",
elementId = "demo"
) %>%
# Add model to scene
m_add_model(data = m_bio3d(pdb), format = "pdb") %>%
# Zoom to encompass the whole scene
m_zoom_to() %>%
# Set style of structures
m_set_style(style = m_style_cartoon(color = "#00cc96"))
for(i in seq_len(nr.chains))
viewer <- m_set_style(viewer,
sel = m_sel(chain = LETTERS[i]),
style = m_style_cartoon(color = chain.colors[i])
)
viewer %>% m_rotate(angle = 90, axis = "y") %>% m_spin()