Pinder system#
from pathlib import Path
from pinder.core import PinderSystem, get_index
Example usage of Pinder index API shown below. For more detailed usage examples, check the pinder-indexnotebook.
index = get_index()
hetero_test_apo = index.query(
    '(uniprot_L != uniprot_R) and split == "test" and (apo_R and apo_L)'
)
hetero_test_apo.reset_index(drop=True, inplace=True)
hetero_test_apo
| split | id | pdb_id | cluster_id | cluster_id_R | cluster_id_L | pinder_s | pinder_xl | pinder_af2 | uniprot_R | ... | apo_L | apo_R_quality | apo_L_quality | chain1_neff | chain2_neff | chain_R | chain_L | contains_antibody | contains_antigen | contains_enzyme | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | test | 3k1i__D1_O25709--3k1i__A1_O25448 | 3k1i | cluster_26031_5179 | cluster_26031 | cluster_5179 | True | True | False | O25709 | ... | True | high | high | 12.351562 | 514.000000 | D1 | A1 | False | False | False | 
| 1 | test | 6qta__A1_G0SHE6--6qta__B1_G0SC29 | 6qta | cluster_11327_11328 | cluster_11327 | cluster_11328 | False | True | False | G0SHE6 | ... | True | high | high | 115.750000 | 689.500000 | A1 | B1 | False | False | False | 
| 2 | test | 3vf0__B1_Q8IY67--3vf0__A2_P18206 | 3vf0 | cluster_5612_993 | cluster_993 | cluster_5612 | True | True | False | Q8IY67 | ... | True | high | high | 251.125000 | 35.531250 | B1 | A2 | False | False | False | 
| 3 | test | 4aye__D1_Q9JXV4--4aye__A1_P08603 | 4aye | cluster_3949_4866 | cluster_3949 | cluster_4866 | True | True | False | Q9JXV4 | ... | True | high | high | 14.546875 | 310.000000 | D1 | A1 | False | False | False | 
| 4 | test | 2w8b__A1_P0A855--2w8b__H1_P0A912 | 2w8b | cluster_15535_1924 | cluster_15535 | cluster_1924 | True | True | False | P0A855 | ... | True | high | high | 308.250000 | 1150.000000 | A1 | H1 | False | False | False | 
| 5 | test | 5y4r__A1_O87131--5y4r__B1_Q9HVI1 | 5y4r | cluster_8825_8826 | cluster_8825 | cluster_8826 | True | True | False | O87131 | ... | True | high | high | 610.000000 | 144.875000 | A1 | B1 | False | False | True | 
| 6 | test | 3egv__A1_Q84BQ9--3egv__B1_Q5SLP6 | 3egv | cluster_33015_371 | cluster_33015 | cluster_371 | True | True | False | Q84BQ9 | ... | True | high | high | 806.500000 | 637.000000 | A1 | B1 | False | False | True | 
| 7 | test | 6wjc__A1_P11229--6wjc__B1_Q8QGR0 | 6wjc | cluster_1057_1356 | cluster_1057 | cluster_1356 | True | True | False | P11229 | ... | True | high | high | 462.750000 | 518.500000 | A1 | B1 | False | False | True | 
| 8 | test | 6tx3__B1_Q9NWY4--6tx3__A1_Q9UGN5 | 6tx3 | cluster_11866_335 | cluster_11866 | cluster_335 | True | True | False | Q9NWY4 | ... | True | high | high | 59.468750 | 213.000000 | B1 | A1 | False | False | True | 
| 9 | test | 2grx__A1_P06971--2grx__C1_P02929 | 2grx | cluster_12107_8897 | cluster_12107 | cluster_8897 | True | True | False | P06971 | ... | True | high | high | 288.000000 | 562.500000 | A1 | C1 | False | False | False | 
| 10 | test | 3kbu__A1_P11277--3kbu__D1_P16157 | 3kbu | cluster_16732_8658 | cluster_8658 | cluster_16732 | True | True | False | P11277 | ... | True | high | high | 229.625000 | 210.625000 | A1 | D1 | False | False | False | 
| 11 | test | 2j0t__A1_P03956--2j0t__D1_P01033 | 2j0t | cluster_939_940 | cluster_939 | cluster_940 | True | True | False | P03956 | ... | True | high | high | 612.500000 | 68.312500 | A1 | D1 | False | False | True | 
| 12 | test | 8a60__A1_P06971--8a60__B1_Q38162 | 8a60 | cluster_12107_26846 | cluster_12107 | cluster_26846 | False | True | True | P06971 | ... | True | high | high | 288.000000 | 2.734375 | A1 | B1 | False | False | False | 
| 13 | test | 4je4__A1_Q06124--4je4__B1_P02751 | 4je4 | cluster_1465_1605 | cluster_1465 | cluster_1605 | True | True | False | Q06124 | ... | True | high | high | 611.000000 | 75.062500 | A1 | B1 | False | False | False | 
| 14 | test | 4uae__A1_O00629--4uae__B1_P31345 | 4uae | cluster_10331_1373 | cluster_1373 | cluster_10331 | False | True | False | O00629 | ... | True | high | high | 240.750000 | 3.058594 | A1 | B1 | False | False | False | 
| 15 | test | 3k9m__A1_P07858--3k9m__B1_P01040 | 3k9m | cluster_4628_6704 | cluster_6704 | cluster_4628 | True | True | False | P07858 | ... | True | high | high | 770.000000 | 429.750000 | A1 | B1 | False | False | True | 
| 16 | test | 2wo2__A1_P54764--2wo2__B1_P52799 | 2wo2 | cluster_130_8064 | cluster_130 | cluster_8064 | True | True | False | P54764 | ... | True | high | high | 567.500000 | 104.562500 | A1 | B1 | False | False | True | 
| 17 | test | 5dob__A1_P16794--5dob__B1_P16791 | 5dob | cluster_12247_23692 | cluster_12247 | cluster_23692 | False | True | False | P16794 | ... | True | high | high | 3.312500 | 3.855469 | A1 | B1 | False | False | False | 
| 18 | test | 8i2e__A1_O34841--8i2e__B1_P54421 | 8i2e | cluster_11087_12465 | cluster_12465 | cluster_11087 | True | True | True | O34841 | ... | True | high | high | 9.031250 | 865.000000 | A1 | B1 | False | False | True | 
| 19 | test | 1zlh__A1_P00730--1zlh__B1_Q5EPH2 | 1zlh | cluster_2416_8594 | cluster_2416 | cluster_8594 | True | True | False | P00730 | ... | True | high | high | 541.000000 | 637.000000 | A1 | B1 | False | False | True | 
| 20 | test | 6yev__C1_P0A744--6yev__A1_P0AA25 | 6yev | cluster_4231_621 | cluster_4231 | cluster_621 | True | True | False | P0A744 | ... | True | high | high | 1099.000000 | 1440.000000 | C1 | A1 | False | False | True | 
| 21 | test | 1dtd__A1_P48052--1dtd__B1_P81511 | 1dtd | cluster_2416_9476 | cluster_2416 | cluster_9476 | True | True | False | P48052 | ... | True | high | high | 581.000000 | 8.781250 | A1 | B1 | False | False | True | 
| 22 | test | 7fn1__B1_P32357--7fn1__A1_P33334 | 7fn1 | cluster_635_7263 | cluster_7263 | cluster_635 | True | True | False | P32357 | ... | True | high | high | 8.070312 | 69.062500 | B1 | A1 | False | False | False | 
| 23 | test | 1yu6__A1_P00780--1yu6__C1_P68390 | 1yu6 | cluster_1952_5091 | cluster_1952 | cluster_5091 | True | True | False | P00780 | ... | True | high | high | 1000.000000 | 704.000000 | A1 | C1 | False | False | True | 
| 24 | test | 6s8v__B1_P08195--6s8v__A1_P80188 | 6s8v | cluster_19347_5331 | cluster_19347 | cluster_5331 | True | True | False | P08195 | ... | True | high | high | 511.000000 | 34.812500 | B1 | A1 | False | False | False | 
| 25 | test | 5ja2__A1_P11454--5ja2__B1_Q9I169 | 5ja2 | cluster_4489_7234 | cluster_7234 | cluster_4489 | True | True | False | P11454 | ... | True | high | high | 641.500000 | 545.500000 | A1 | B1 | False | False | True | 
| 26 | test | 6f3z__A1_P0ADC3--6f3z__B1_P61316 | 6f3z | cluster_12985_22189 | cluster_22189 | cluster_12985 | True | True | False | P0ADC3 | ... | True | high | high | 402.500000 | 368.000000 | A1 | B1 | False | False | False | 
| 27 | test | 8gt0__A1_Q8I6U4--8gt0__B1_P01040 | 8gt0 | cluster_2085_4628 | cluster_2085 | cluster_4628 | True | True | False | Q8I6U4 | ... | True | high | high | 651.000000 | 429.750000 | A1 | B1 | False | False | True | 
| 28 | test | 2gkv__A1_P00777--2gkv__B1_P68390 | 2gkv | cluster_5091_5268 | cluster_5268 | cluster_5091 | True | True | False | P00777 | ... | True | high | high | 409.000000 | 704.000000 | A1 | B1 | False | False | True | 
| 29 | test | 6m4v__A1_P0AEX9--6m4v__B1_P62942 | 6m4v | cluster_1772_409 | cluster_409 | cluster_1772 | True | True | False | P0AEX9 | ... | True | high | high | 171.250000 | 1408.000000 | A1 | B1 | False | False | True | 
| 30 | test | 4us1__B1_Q07889--4us1__A1_P01112 | 4us1 | cluster_10806_437 | cluster_10806 | cluster_437 | True | True | False | Q07889 | ... | True | high | high | 183.375000 | 946.500000 | B1 | A1 | False | False | False | 
| 31 | test | 1tec__A1_P04072--1tec__B1_P01051 | 1tec | cluster_1952_387 | cluster_1952 | cluster_387 | True | True | False | P04072 | ... | True | high | high | 1017.500000 | 133.750000 | A1 | B1 | False | False | True | 
| 32 | test | 1zhh__A1_P54300--1zhh__B1_P54302 | 1zhh | cluster_3962_8641 | cluster_3962 | cluster_8641 | True | True | False | P54300 | ... | True | high | high | 23.531250 | 524.500000 | A1 | B1 | False | False | False | 
| 33 | test | 5n47__B1_P02751--5n47__A1_P80188 | 5n47 | cluster_1605_5331 | cluster_1605 | cluster_5331 | True | True | False | P02751 | ... | True | high | high | 75.062500 | 34.812500 | B1 | A1 | False | False | False | 
| 34 | test | 7b80__A1_G3I8R9--7b80__B1_Q9BVA6 | 7b80 | cluster_1039_643 | cluster_643 | cluster_1039 | True | True | False | G3I8R9 | ... | True | high | high | 1069.000000 | 526.500000 | A1 | B1 | False | False | True | 
35 rows × 34 columns
pinder_id = list(hetero_test_apo.id)[2]
pinder_id
'3vf0__B1_Q8IY67--3vf0__A2_P18206'
PinderSystem API - base class representing Structure’s in a pinder entry#
# Simplest interface - get a single pinder system
ps = PinderSystem(pinder_id)
ps
2024-11-15 12:15:04,493 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=15
PinderSystem(
entry = IndexEntry(
    (
        'split',
        'test',
    ),
    (
        'id',
        '3vf0__B1_Q8IY67--3vf0__A2_P18206',
    ),
    (
        'pdb_id',
        '3vf0',
    ),
    (
        'cluster_id',
        'cluster_5612_993',
    ),
    (
        'cluster_id_R',
        'cluster_993',
    ),
    (
        'cluster_id_L',
        'cluster_5612',
    ),
    (
        'pinder_s',
        True,
    ),
    (
        'pinder_xl',
        True,
    ),
    (
        'pinder_af2',
        False,
    ),
    (
        'uniprot_R',
        'Q8IY67',
    ),
    (
        'uniprot_L',
        'P18206',
    ),
    (
        'holo_R_pdb',
        '3vf0__B1_Q8IY67-R.pdb',
    ),
    (
        'holo_L_pdb',
        '3vf0__A2_P18206-L.pdb',
    ),
    (
        'predicted_R_pdb',
        'af__Q8IY67.pdb',
    ),
    (
        'predicted_L_pdb',
        'af__P18206.pdb',
    ),
    (
        'apo_R_pdb',
        '3smz__A1_Q8IY67.pdb',
    ),
    (
        'apo_L_pdb',
        '5l0h__A1_P18206.pdb',
    ),
    (
        'apo_R_pdbs',
        '3smz__A1_Q8IY67.pdb',
    ),
    (
        'apo_L_pdbs',
        '5l0h__A1_P18206.pdb;5l0f__A1_P18206.pdb;5l0i__A1_P18206.pdb',
    ),
    (
        'holo_R',
        True,
    ),
    (
        'holo_L',
        True,
    ),
    (
        'predicted_R',
        True,
    ),
    (
        'predicted_L',
        True,
    ),
    (
        'apo_R',
        True,
    ),
    (
        'apo_L',
        True,
    ),
    (
        'apo_R_quality',
        'high',
    ),
    (
        'apo_L_quality',
        'high',
    ),
    (
        'chain1_neff',
        251.125,
    ),
    (
        'chain2_neff',
        35.53125,
    ),
    (
        'chain_R',
        'B1',
    ),
    (
        'chain_L',
        'A2',
    ),
    (
        'contains_antibody',
        False,
    ),
    (
        'contains_antigen',
        False,
    ),
    (
        'contains_enzyme',
        False,
    ),
)
native=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/3vf0__B1_Q8IY67--3vf0__A2_P18206.pdb,
    uniprot_map=None,
    pinder_id='3vf0__B1_Q8IY67--3vf0__A2_P18206',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (3584,),
    pdb_engine='fastpdb',
)
holo_receptor=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/test_set_pdbs/3vf0__B1_Q8IY67-R.pdb,
    uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/3vf0__B1_Q8IY67-R.parquet,
    pinder_id='3vf0__B1_Q8IY67-R',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (2204,),
    pdb_engine='fastpdb',
)
holo_ligand=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/test_set_pdbs/3vf0__A2_P18206-L.pdb,
    uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/3vf0__A2_P18206-L.parquet,
    pinder_id='3vf0__A2_P18206-L',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (1380,),
    pdb_engine='fastpdb',
)
apo_receptor=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/3smz__A1_Q8IY67.pdb,
    uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/3smz__A1_Q8IY67.parquet,
    pinder_id='3smz__A1_Q8IY67',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (2183,),
    pdb_engine='fastpdb',
)
apo_ligand=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/5l0h__A1_P18206.pdb,
    uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/5l0h__A1_P18206.parquet,
    pinder_id='5l0h__A1_P18206',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (1341,),
    pdb_engine='fastpdb',
)
pred_receptor=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/af__Q8IY67.pdb,
    uniprot_map=None,
    pinder_id='af__Q8IY67',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (4495,),
    pdb_engine='fastpdb',
)
pred_ligand=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/af__P18206.pdb,
    uniprot_map=None,
    pinder_id='af__P18206',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (8664,),
    pdb_engine='fastpdb',
)
)
holo_L, holo_R = ps.holo_ligand, ps.holo_receptor
pred_L, pred_R = ps.pred_ligand, ps.pred_receptor
apo_L, apo_R = ps.apo_ligand, ps.apo_receptor
holo_L
Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/test_set_pdbs/3vf0__A2_P18206-L.pdb,
    uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/3vf0__A2_P18206-L.parquet,
    pinder_id='3vf0__A2_P18206-L',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (1380,),
    pdb_engine='fastpdb',
)
Classify system difficulty based on degree of conformational shift in unbound and bound#
ps.unbound_difficulty("apo")
{'Fnat': 0.5421686746987951,
 'Fnonnat': 0.3076923076923077,
 'common_contacts': 45,
 'differing_contacts': 20,
 'bound_contacts': 83,
 'unbound_contacts': 65,
 'fnonnat_R': 0.2857142857142857,
 'fnonnat_L': 0.0,
 'fnat_R': 0.5882352941176471,
 'fnat_L': 0.7692307692307693,
 'difficulty': 'Rigid-body',
 'I-RMSD': 1.125529,
 'matched_interface_chains': 2,
 'holo_receptor_interface_res': 34,
 'holo_ligand_interface_res': 26,
 'apo_receptor_interface_res': 28,
 'apo_ligand_interface_res': 20,
 'L-RMSD': 0.94191533,
 'R-RMSD': 1.4988925,
 'unbound_id': '3smz__A1_Q8IY67--5l0h__A1_P18206',
 'unbound_body': 'receptor_ligand',
 'monomer_name': 'apo'}
ps.unbound_difficulty("predicted")
{'Fnat': 0.5662650602409639,
 'Fnonnat': 0.9225700164744646,
 'common_contacts': 47,
 'differing_contacts': 560,
 'bound_contacts': 83,
 'unbound_contacts': 607,
 'fnonnat_R': 0.8435374149659864,
 'fnonnat_L': 0.8670520231213873,
 'fnat_R': 0.6764705882352942,
 'fnat_L': 0.8846153846153846,
 'difficulty': 'Difficult',
 'I-RMSD': 3.4248848,
 'matched_interface_chains': 2,
 'holo_receptor_interface_res': 34,
 'holo_ligand_interface_res': 26,
 'apo_receptor_interface_res': 147,
 'apo_ligand_interface_res': 173,
 'L-RMSD': 2.1201644,
 'R-RMSD': 0.62853533,
 'unbound_id': 'af__Q8IY67--af__P18206',
 'unbound_body': 'receptor_ligand',
 'monomer_name': 'predicted'}
Illustrating utilities available in Structure instances#
holo_L.filter("atom_name", mask=["CA"])
Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/test_set_pdbs/3vf0__A2_P18206-L.pdb,
    uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (283, 14),
    pinder_id='3vf0__A2_P18206-L',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (178,),
    pdb_engine='fastpdb',
)
apo_L.filter("atom_name", mask=["CA"])
Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/5l0h__A1_P18206.pdb,
    uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (176, 14),
    pinder_id='5l0h__A1_P18206',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (173,),
    pdb_engine='fastpdb',
)
Can also filter “in place” rather than returning a copy (a la pandas)#
apo_L.filter("atom_name", mask=["CA"], copy=False)
(
    ps.apo_ligand.filter("atom_name", mask=["CA"]),
    ps.holo_ligand.filter("atom_name", mask=["CA"])
)
(Structure(
     filepath=/home/runner/.local/share/pinder/2024-02/pdbs/5l0h__A1_P18206.pdb,
     uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (176, 14),
     pinder_id='5l0h__A1_P18206',
     atom_array=<class 'biotite.structure.AtomArray'> with shape (173,),
     pdb_engine='fastpdb',
 ),
 Structure(
     filepath=/home/runner/.local/share/pinder/2024-02/test_set_pdbs/3vf0__A2_P18206-L.pdb,
     uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (283, 14),
     pinder_id='3vf0__A2_P18206-L',
     atom_array=<class 'biotite.structure.AtomArray'> with shape (178,),
     pdb_engine='fastpdb',
 ))
Create masked unbound complex aligned to bound for apo#
apo_complex = ps.create_apo_complex()
apo_complex
Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/3smz__A1_Q8IY67--5l0h__A1_P18206.pdb,
    uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (460, 14),
    pinder_id='3smz__A1_Q8IY67--5l0h__A1_P18206',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (2355,),
    pdb_engine='fastpdb',
)
apo_complex.dataframe
| chain_id | res_name | res_code | res_id | atom_name | b_factor | ins_code | hetero | element | x | y | z | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | R | HIS | H | 1 | N | 0.0 | False | N | 69.377380 | 105.086411 | -51.232784 | |
| 1 | R | HIS | H | 1 | CA | 0.0 | False | C | 69.471100 | 103.738152 | -50.680832 | |
| 2 | R | HIS | H | 1 | C | 0.0 | False | C | 70.264641 | 103.716263 | -49.368168 | |
| 3 | R | HIS | H | 1 | O | 0.0 | False | O | 70.223633 | 102.714134 | -48.653946 | |
| 4 | R | HIS | H | 1 | CB | 0.0 | False | C | 70.093376 | 102.773239 | -51.701912 | |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | 
| 2350 | L | ARG | R | 170 | CA | 0.0 | False | C | 45.438423 | 86.089317 | -54.533417 | |
| 2351 | L | LYS | K | 171 | CA | 0.0 | False | C | 44.941048 | 83.492149 | -57.280712 | |
| 2352 | L | THR | T | 172 | CA | 0.0 | False | C | 46.387222 | 85.555809 | -60.138439 | |
| 2353 | L | PRO | P | 173 | CA | 0.0 | False | C | 49.873409 | 84.154137 | -59.526482 | |
| 2354 | L | TRP | W | 174 | CA | 0.0 | False | C | 50.158199 | 80.451759 | -60.405190 | 
2355 rows × 12 columns
What’s going on under the hood#
ps = PinderSystem(pinder_id)
apo_L, apo_R = ps.apo_ligand, ps.apo_receptor
# After getting the "in common" masked structures, they can be superimposed
apo_R, holo_R = apo_R.align_common_sequence(ps.aligned_holo_R)
apo_L, holo_L = apo_L.align_common_sequence(ps.aligned_holo_L)
# Rmsd after superposition (without outlier removal) is stored in `rms`
R_super, rms, _ = apo_R.superimpose(holo_R)
L_super, rms, _ = apo_L.superimpose(holo_L)
L_super
Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/5l0h__A1_P18206.pdb,
    uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/5l0h__A1_P18206.parquet,
    pinder_id='5l0h__A1_P18206',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (1327,),
    pdb_engine='fastpdb',
)
# Now we can create the complexes using Structure.__add__ methods
apo_binary = R_super + L_super
holo_binary = holo_R + holo_L
apo_binary
Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/3smz__A1_Q8IY67--5l0h__A1_P18206.pdb,
    uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (460, 14),
    pinder_id='3smz__A1_Q8IY67--5l0h__A1_P18206',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (3510,),
    pdb_engine='fastpdb',
)
# Alternatively, there exist utils for creating the masked apo and predicted complex
pred_complex = ps.create_pred_complex()
pred_complex
Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/af__Q8IY67--af__P18206.pdb,
    uniprot_map=None,
    pinder_id='af__Q8IY67--af__P18206',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (3566,),
    pdb_engine='fastpdb',
)
masked_complex_dir = Path("./").absolute() / "unbound_complexes"
masked_complex_dir.mkdir(exist_ok=True, parents=True)
masked_complex_dir
PosixPath('/home/runner/work/pinder/pinder/docs/unbound_complexes')
# If the output PDB filepath is omitted, the structure will be written to Structure.filepath, which may overwrite
# In this case, it would be a new file composed of the added complex filepaths if we omit
pred_complex.to_pdb(masked_complex_dir / "pred_complex.pdb")
apo_complex.to_pdb(masked_complex_dir / "apo_complex.pdb")
(masked_complex_dir / "pred_complex.pdb").unlink()
(masked_complex_dir / "apo_complex.pdb").unlink()
Structures have resolved_pdb2uniprot and resolved_uniprot2pdb properties#
They return dicts of resolved residue numbers mapped from pdb numbering to uniprot numbering, and vice versa
The full mapping is available in Structure.uniprot_mapping
To get only the resolved mapping, access the Structure.resolved_mapping attrribute.
apo_L.resolved_pdb2uniprot
{2: 960,
 3: 961,
 4: 962,
 5: 963,
 6: 964,
 7: 965,
 8: 966,
 9: 967,
 10: 968,
 11: 969,
 12: 970,
 13: 971,
 14: 972,
 15: 973,
 16: 974,
 18: 976,
 19: 977,
 20: 978,
 21: 979,
 22: 980,
 23: 981,
 24: 982,
 25: 983,
 26: 984,
 27: 985,
 28: 986,
 29: 987,
 30: 988,
 31: 989,
 32: 990,
 33: 991,
 34: 992,
 35: 993,
 36: 994,
 37: 995,
 38: 996,
 39: 997,
 40: 998,
 41: 999,
 42: 1000,
 43: 1001,
 44: 1002,
 45: 1003,
 46: 1004,
 47: 1005,
 48: 1006,
 49: 1007,
 50: 1008,
 51: 1009,
 52: 1010,
 53: 1011,
 54: 1012,
 55: 1013,
 56: 1014,
 57: 1015,
 58: 1016,
 59: 1017,
 60: 1018,
 61: 1019,
 62: 1020,
 63: 1021,
 64: 1022,
 65: 1023,
 66: 1024,
 67: 1025,
 68: 1026,
 69: 1027,
 70: 1028,
 71: 1029,
 72: 1030,
 73: 1031,
 74: 1032,
 75: 1033,
 76: 1034,
 77: 1035,
 78: 1036,
 79: 1037,
 80: 1038,
 81: 1039,
 82: 1040,
 83: 1041,
 84: 1042,
 85: 1043,
 86: 1044,
 87: 1045,
 88: 1046,
 89: 1047,
 90: 1048,
 91: 1049,
 92: 1050,
 93: 1051,
 94: 1052,
 95: 1053,
 96: 1054,
 97: 1055,
 98: 1056,
 99: 1057,
 100: 1058,
 101: 1059,
 102: 1060,
 103: 1061,
 104: 1062,
 105: 1063,
 106: 1064,
 107: 1065,
 108: 1066,
 109: 1067,
 110: 1068,
 111: 1069,
 112: 1070,
 113: 1071,
 114: 1072,
 115: 1073,
 116: 1074,
 117: 1075,
 118: 1076,
 119: 1077,
 120: 1078,
 121: 1079,
 122: 1080,
 123: 1081,
 124: 1082,
 125: 1083,
 126: 1084,
 127: 1085,
 128: 1086,
 129: 1087,
 130: 1088,
 131: 1089,
 132: 1090,
 133: 1091,
 134: 1092,
 135: 1093,
 136: 1094,
 137: 1095,
 138: 1096,
 139: 1097,
 140: 1098,
 141: 1099,
 142: 1100,
 143: 1101,
 144: 1102,
 145: 1103,
 146: 1104,
 147: 1105,
 148: 1106,
 149: 1107,
 150: 1108,
 151: 1109,
 152: 1110,
 153: 1111,
 154: 1112,
 155: 1113,
 156: 1114,
 157: 1115,
 158: 1116,
 159: 1117,
 160: 1118,
 161: 1119,
 162: 1120,
 163: 1121,
 164: 1122,
 165: 1123,
 166: 1124,
 167: 1125,
 168: 1126,
 169: 1127,
 170: 1128,
 171: 1129,
 172: 1130,
 173: 1131,
 174: 1132}
apo_complex.resolved_mapping
| entry_id | entity_id | asym_id | pdb_strand_id | resi | resi_pdb | resi_auth | resn | one_letter_code_can | resolved | one_letter_code_uniprot | resi_uniprot | uniprot_acc | chain | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NaN | 1 | A | A | 3 | 39 | 39.0 | LEU | L | 1 | L | 39 | NaN | A1 | 
| 1 | NaN | 1 | A | A | 4 | 40 | 40.0 | ASP | D | 1 | D | 40 | NaN | A1 | 
| 2 | NaN | 1 | A | A | 5 | 41 | 41.0 | PRO | P | 1 | P | 41 | NaN | A1 | 
| 3 | NaN | 1 | A | A | 6 | 42 | 42.0 | GLU | E | 1 | E | 42 | NaN | A1 | 
| 4 | NaN | 1 | A | A | 7 | 43 | 43.0 | GLU | E | 1 | E | 43 | NaN | A1 | 
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | 
| 446 | NaN | 1 | A | A | 170 | 1128 | 1128.0 | ARG | R | 1 | R | 1128 | NaN | A1 | 
| 447 | NaN | 1 | A | A | 171 | 1129 | 1129.0 | LYS | K | 1 | K | 1129 | NaN | A1 | 
| 448 | NaN | 1 | A | A | 172 | 1130 | 1130.0 | THR | T | 1 | T | 1130 | NaN | A1 | 
| 449 | NaN | 1 | A | A | 173 | 1131 | 1131.0 | PRO | P | 1 | P | 1131 | NaN | A1 | 
| 450 | NaN | 1 | A | A | 174 | 1132 | 1132.0 | TRP | W | 1 | W | 1132 | NaN | A1 | 
451 rows × 14 columns
Case with multiple available apo structures#
pinder_id = "1ldt__A1_P00761--1ldt__B1_P80424"
ps_canon = PinderSystem(pinder_id)
ps_canon.entry.apo_R_alt
2024-11-15 12:15:08,889 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=43
['1s6f__A1_P00761.pdb',
 '1s85__A1_P00761.pdb',
 '1s84__A1_P00761.pdb',
 '2a32__A1_P00761.pdb',
 '1s6h__A1_P00761.pdb',
 '1s5s__A1_P00761.pdb',
 '1fni__A1_P00761.pdb',
 '1s81__A1_P00761.pdb',
 '1fmg__A1_P00761.pdb',
 '1qqu__A1_P00761.pdb',
 '2a31__A1_P00761.pdb',
 '1fn6__A1_P00761.pdb',
 '1s83__A1_P00761.pdb']
ps_canon.entry.apo_L_alt
['2kmq__A1_P80424.pdb', '2kmp__A1_P80424.pdb', '2kmr__A1_P80424.pdb']
Specify 2kmr as apo ligand and 1fmg as apo receptor#
Note: the default apo_receptor and apo_ligand are determined based on the selected canonical apo monomer.
The canonical monomers were selected based on their sequence overlap and difficulty metrics calculated in pinder.eval.dockq.unbound
ps = PinderSystem(pinder_id, apo_ligand_pdb_code="2kmr", apo_receptor_pdb_code="1fmg")
ps
PinderSystem(
entry = IndexEntry(
    (
        'split',
        'train',
    ),
    (
        'id',
        '1ldt__A1_P00761--1ldt__B1_P80424',
    ),
    (
        'pdb_id',
        '1ldt',
    ),
    (
        'cluster_id',
        'cluster_312_5091',
    ),
    (
        'cluster_id_R',
        'cluster_312',
    ),
    (
        'cluster_id_L',
        'cluster_5091',
    ),
    (
        'pinder_s',
        False,
    ),
    (
        'pinder_xl',
        False,
    ),
    (
        'pinder_af2',
        False,
    ),
    (
        'uniprot_R',
        'P00761',
    ),
    (
        'uniprot_L',
        'P80424',
    ),
    (
        'holo_R_pdb',
        '1ldt__A1_P00761-R.pdb',
    ),
    (
        'holo_L_pdb',
        '1ldt__B1_P80424-L.pdb',
    ),
    (
        'predicted_R_pdb',
        'af__P00761.pdb',
    ),
    (
        'predicted_L_pdb',
        'af__P80424.pdb',
    ),
    (
        'apo_R_pdb',
        '1s82__A1_P00761.pdb',
    ),
    (
        'apo_L_pdb',
        '2kmo__A1_P80424.pdb',
    ),
    (
        'apo_R_pdbs',
        '1s82__A1_P00761.pdb;1s6f__A1_P00761.pdb;1s85__A1_P00761.pdb;1s84__A1_P00761.pdb;2a32__A1_P00761.pdb;1s6h__A1_P00761.pdb;1s5s__A1_P00761.pdb;1fni__A1_P00761.pdb;1s81__A1_P00761.pdb;1fmg__A1_P00761.pdb;1qqu__A1_P00761.pdb;2a31__A1_P00761.pdb;1fn6__A1_P00761.pdb;1s83__A1_P00761.pdb',
    ),
    (
        'apo_L_pdbs',
        '2kmo__A1_P80424.pdb;2kmq__A1_P80424.pdb;2kmp__A1_P80424.pdb;2kmr__A1_P80424.pdb',
    ),
    (
        'holo_R',
        True,
    ),
    (
        'holo_L',
        True,
    ),
    (
        'predicted_R',
        True,
    ),
    (
        'predicted_L',
        True,
    ),
    (
        'apo_R',
        True,
    ),
    (
        'apo_L',
        True,
    ),
    (
        'apo_R_quality',
        'high',
    ),
    (
        'apo_L_quality',
        'high',
    ),
    (
        'chain1_neff',
        997.0,
    ),
    (
        'chain2_neff',
        2220.0,
    ),
    (
        'chain_R',
        'A1',
    ),
    (
        'chain_L',
        'B1',
    ),
    (
        'contains_antibody',
        False,
    ),
    (
        'contains_antigen',
        False,
    ),
    (
        'contains_enzyme',
        True,
    ),
)
native=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/1ldt__A1_P00761--1ldt__B1_P80424.pdb,
    uniprot_map=None,
    pinder_id='1ldt__A1_P00761--1ldt__B1_P80424',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (1992,),
    pdb_engine='fastpdb',
)
holo_receptor=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/1ldt__A1_P00761-R.pdb,
    uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/1ldt__A1_P00761-R.parquet,
    pinder_id='1ldt__A1_P00761-R',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (1666,),
    pdb_engine='fastpdb',
)
holo_ligand=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/1ldt__B1_P80424-L.pdb,
    uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/1ldt__B1_P80424-L.parquet,
    pinder_id='1ldt__B1_P80424-L',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (326,),
    pdb_engine='fastpdb',
)
apo_receptor=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/1fmg__A1_P00761.pdb,
    uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/1fmg__A1_P00761.parquet,
    pinder_id='1fmg__A1_P00761',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (1642,),
    pdb_engine='fastpdb',
)
apo_ligand=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/2kmr__A1_P80424.pdb,
    uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/2kmr__A1_P80424.parquet,
    pinder_id='2kmr__A1_P80424',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (630,),
    pdb_engine='fastpdb',
)
pred_receptor=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/af__P00761.pdb,
    uniprot_map=None,
    pinder_id='af__P00761',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (1708,),
    pdb_engine='fastpdb',
)
pred_ligand=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/af__P80424.pdb,
    uniprot_map=None,
    pinder_id='af__P80424',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (326,),
    pdb_engine='fastpdb',
)
)
Classify system difficulty based on degree of conformational shift in unbound and bound#
ps.unbound_difficulty("apo")
{'Fnat': 0.847457627118644,
 'Fnonnat': 0.5,
 'common_contacts': 50,
 'differing_contacts': 50,
 'bound_contacts': 59,
 'unbound_contacts': 100,
 'fnonnat_R': 0.4318181818181818,
 'fnonnat_L': 0.35,
 'fnat_R': 0.8620689655172413,
 'fnat_L': 1.0,
 'difficulty': 'Difficult',
 'I-RMSD': 2.3008883,
 'matched_interface_chains': 2,
 'holo_receptor_interface_res': 29,
 'holo_ligand_interface_res': 13,
 'apo_receptor_interface_res': 44,
 'apo_ligand_interface_res': 20,
 'L-RMSD': 9.371291,
 'R-RMSD': 0.47199312,
 'unbound_id': '1fmg__A1_P00761--2kmr__A1_P80424',
 'unbound_body': 'receptor_ligand',
 'monomer_name': 'apo'}
ps.unbound_difficulty("predicted")
{'Fnat': 0.847457627118644,
 'Fnonnat': 0.05660377358490566,
 'common_contacts': 50,
 'differing_contacts': 3,
 'bound_contacts': 59,
 'unbound_contacts': 53,
 'fnonnat_R': 0.07692307692307693,
 'fnonnat_L': 0.0,
 'fnat_R': 0.8275862068965517,
 'fnat_L': 0.9230769230769231,
 'difficulty': 'Rigid-body',
 'I-RMSD': 1.252618,
 'matched_interface_chains': 2,
 'holo_receptor_interface_res': 29,
 'holo_ligand_interface_res': 13,
 'apo_receptor_interface_res': 26,
 'apo_ligand_interface_res': 12,
 'L-RMSD': 3.9255776,
 'R-RMSD': 0.40761372,
 'unbound_id': 'af__P00761--af__P80424',
 'unbound_body': 'receptor_ligand',
 'monomer_name': 'predicted'}
A bunch of other features of the Structure class are illustrated below#
apo_L.atom_array[0:10]
array([
	Atom(np.array([36.778, 16.379, 60.821], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="N", element="N", b_factor=0.0),
	Atom(np.array([37.219, 15.836, 59.542], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([36.18 , 16.079, 58.452], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="C", element="C", b_factor=0.0),
	Atom(np.array([36.532, 16.338, 57.297], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="O", element="O", b_factor=0.0),
	Atom(np.array([37.509, 14.339, 59.666], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="CB", element="C", b_factor=0.0),
	Atom(np.array([38.703, 14.01 , 60.548], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="CG", element="C", b_factor=0.0),
	Atom(np.array([38.916, 12.517, 60.706], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="CD", element="C", b_factor=0.0),
	Atom(np.array([37.981, 11.727, 60.572], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="OE1", element="O", b_factor=0.0),
	Atom(np.array([40.151, 12.123, 60.994], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="NE2", element="N", b_factor=0.0),
	Atom(np.array([34.852, 15.989, 58.853], dtype=float32), chain_id="L", res_id=3, ins_code="", res_name="PRO", hetero=False, atom_name="N", element="N", b_factor=0.0)
])
apo_L.coords[0:10]
array([[36.778, 16.379, 60.821],
       [37.219, 15.836, 59.542],
       [36.18 , 16.079, 58.452],
       [36.532, 16.338, 57.297],
       [37.509, 14.339, 59.666],
       [38.703, 14.01 , 60.548],
       [38.916, 12.517, 60.706],
       [37.981, 11.727, 60.572],
       [40.151, 12.123, 60.994],
       [34.852, 15.989, 58.853]], dtype=float32)
apo_L.residue_names
['ALA',
 'ARG',
 'ASN',
 'ASP',
 'CYS',
 'GLN',
 'GLU',
 'GLY',
 'HIS',
 'ILE',
 'LEU',
 'LYS',
 'MET',
 'PHE',
 'PRO',
 'SER',
 'THR',
 'TRP',
 'VAL']
apo_L.sequence
'QPVNQPILAAAQSLHEATKWSSKGNDIIAAAKRMALLMAEMSRLVRGGSGTKRALIQCAKDIAKASDEVTRLAKEVAKQCTDKRIRTNLLQVCERIPTISTQLKILSTVKATMLGRTNISDEESEQATEMLVHNAQNLMQSVKETVREAEAASIKIRTDAGFTLRWVRKTPW'
apo_L.atom_array.res_name
array(['GLN', 'GLN', 'GLN', ..., 'TRP', 'TRP', 'TRP'], dtype='<U5')
apo_L.atom_array[apo_L.backbone_mask][0:10]
array([
	Atom(np.array([36.778, 16.379, 60.821], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="N", element="N", b_factor=0.0),
	Atom(np.array([37.219, 15.836, 59.542], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([36.18 , 16.079, 58.452], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="C", element="C", b_factor=0.0),
	Atom(np.array([34.852, 15.989, 58.853], dtype=float32), chain_id="L", res_id=3, ins_code="", res_name="PRO", hetero=False, atom_name="N", element="N", b_factor=0.0),
	Atom(np.array([33.88 , 16.232, 57.771], dtype=float32), chain_id="L", res_id=3, ins_code="", res_name="PRO", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([33.954, 17.664, 57.252], dtype=float32), chain_id="L", res_id=3, ins_code="", res_name="PRO", hetero=False, atom_name="C", element="C", b_factor=0.0),
	Atom(np.array([33.798, 17.833, 55.943], dtype=float32), chain_id="L", res_id=4, ins_code="", res_name="VAL", hetero=False, atom_name="N", element="N", b_factor=0.0),
	Atom(np.array([33.848, 19.155, 55.33 ], dtype=float32), chain_id="L", res_id=4, ins_code="", res_name="VAL", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([32.629, 19.402, 54.448], dtype=float32), chain_id="L", res_id=4, ins_code="", res_name="VAL", hetero=False, atom_name="C", element="C", b_factor=0.0),
	Atom(np.array([32.136, 20.636, 54.462], dtype=float32), chain_id="L", res_id=5, ins_code="", res_name="ASN", hetero=False, atom_name="N", element="N", b_factor=0.0)
])
apo_L.atom_array[apo_L.calpha_mask][0:10]
array([
	Atom(np.array([37.219, 15.836, 59.542], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([33.88 , 16.232, 57.771], dtype=float32), chain_id="L", res_id=3, ins_code="", res_name="PRO", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([33.848, 19.155, 55.33 ], dtype=float32), chain_id="L", res_id=4, ins_code="", res_name="VAL", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([30.979, 21.004, 53.668], dtype=float32), chain_id="L", res_id=5, ins_code="", res_name="ASN", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([31.94 , 20.151, 50.049], dtype=float32), chain_id="L", res_id=6, ins_code="", res_name="GLN", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([30.278, 23.116, 48.249], dtype=float32), chain_id="L", res_id=7, ins_code="", res_name="PRO", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([32.112, 25.664, 50.491], dtype=float32), chain_id="L", res_id=8, ins_code="", res_name="ILE", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([35.441, 23.864, 49.936], dtype=float32), chain_id="L", res_id=9, ins_code="", res_name="LEU", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([34.772, 23.807, 46.168], dtype=float32), chain_id="L", res_id=10, ins_code="", res_name="ALA", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([33.923, 27.56 , 46.154], dtype=float32), chain_id="L", res_id=11, ins_code="", res_name="ALA", hetero=False, atom_name="CA", element="C", b_factor=0.0)
])
apo_L.chain_sequence
{'L': ['Q',
  'P',
  'V',
  'N',
  'Q',
  'P',
  'I',
  'L',
  'A',
  'A',
  'A',
  'Q',
  'S',
  'L',
  'H',
  'E',
  'A',
  'T',
  'K',
  'W',
  'S',
  'S',
  'K',
  'G',
  'N',
  'D',
  'I',
  'I',
  'A',
  'A',
  'A',
  'K',
  'R',
  'M',
  'A',
  'L',
  'L',
  'M',
  'A',
  'E',
  'M',
  'S',
  'R',
  'L',
  'V',
  'R',
  'G',
  'G',
  'S',
  'G',
  'T',
  'K',
  'R',
  'A',
  'L',
  'I',
  'Q',
  'C',
  'A',
  'K',
  'D',
  'I',
  'A',
  'K',
  'A',
  'S',
  'D',
  'E',
  'V',
  'T',
  'R',
  'L',
  'A',
  'K',
  'E',
  'V',
  'A',
  'K',
  'Q',
  'C',
  'T',
  'D',
  'K',
  'R',
  'I',
  'R',
  'T',
  'N',
  'L',
  'L',
  'Q',
  'V',
  'C',
  'E',
  'R',
  'I',
  'P',
  'T',
  'I',
  'S',
  'T',
  'Q',
  'L',
  'K',
  'I',
  'L',
  'S',
  'T',
  'V',
  'K',
  'A',
  'T',
  'M',
  'L',
  'G',
  'R',
  'T',
  'N',
  'I',
  'S',
  'D',
  'E',
  'E',
  'S',
  'E',
  'Q',
  'A',
  'T',
  'E',
  'M',
  'L',
  'V',
  'H',
  'N',
  'A',
  'Q',
  'N',
  'L',
  'M',
  'Q',
  'S',
  'V',
  'K',
  'E',
  'T',
  'V',
  'R',
  'E',
  'A',
  'E',
  'A',
  'A',
  'S',
  'I',
  'K',
  'I',
  'R',
  'T',
  'D',
  'A',
  'G',
  'F',
  'T',
  'L',
  'R',
  'W',
  'V',
  'R',
  'K',
  'T',
  'P',
  'W']}
apo_complex.chain_sequence
{'L': ['Q',
  'P',
  'V',
  'N',
  'Q',
  'P',
  'I',
  'L',
  'A',
  'A',
  'A',
  'Q',
  'S',
  'L',
  'H',
  'E',
  'A',
  'T',
  'K',
  'W',
  'S',
  'S',
  'K',
  'G',
  'N',
  'D',
  'I',
  'I',
  'A',
  'A',
  'A',
  'K',
  'R',
  'M',
  'A',
  'L',
  'L',
  'M',
  'A',
  'E',
  'M',
  'S',
  'R',
  'L',
  'V',
  'R',
  'G',
  'G',
  'S',
  'G',
  'T',
  'K',
  'R',
  'A',
  'L',
  'I',
  'Q',
  'C',
  'A',
  'K',
  'D',
  'I',
  'A',
  'K',
  'A',
  'S',
  'D',
  'E',
  'V',
  'T',
  'R',
  'L',
  'A',
  'K',
  'E',
  'V',
  'A',
  'K',
  'Q',
  'C',
  'T',
  'D',
  'K',
  'R',
  'I',
  'R',
  'T',
  'N',
  'L',
  'L',
  'Q',
  'V',
  'C',
  'E',
  'R',
  'I',
  'P',
  'T',
  'I',
  'S',
  'T',
  'Q',
  'L',
  'K',
  'I',
  'L',
  'S',
  'T',
  'V',
  'K',
  'A',
  'T',
  'M',
  'L',
  'G',
  'R',
  'T',
  'N',
  'I',
  'S',
  'D',
  'E',
  'E',
  'S',
  'E',
  'Q',
  'A',
  'T',
  'E',
  'M',
  'L',
  'V',
  'H',
  'N',
  'A',
  'Q',
  'N',
  'L',
  'M',
  'Q',
  'S',
  'V',
  'K',
  'E',
  'T',
  'V',
  'R',
  'E',
  'A',
  'E',
  'A',
  'A',
  'S',
  'I',
  'K',
  'I',
  'R',
  'T',
  'D',
  'A',
  'G',
  'F',
  'T',
  'L',
  'R',
  'W',
  'V',
  'R',
  'K',
  'T',
  'P',
  'W'],
 'R': ['H',
  'M',
  'L',
  'D',
  'P',
  'E',
  'E',
  'I',
  'R',
  'K',
  'R',
  'L',
  'E',
  'H',
  'T',
  'E',
  'R',
  'Q',
  'F',
  'R',
  'N',
  'R',
  'R',
  'K',
  'I',
  'L',
  'I',
  'R',
  'G',
  'L',
  'P',
  'G',
  'D',
  'V',
  'T',
  'N',
  'Q',
  'E',
  'V',
  'H',
  'D',
  'L',
  'L',
  'S',
  'D',
  'Y',
  'E',
  'L',
  'K',
  'Y',
  'C',
  'F',
  'V',
  'D',
  'K',
  'Y',
  'K',
  'G',
  'T',
  'A',
  'F',
  'V',
  'T',
  'L',
  'L',
  'N',
  'G',
  'E',
  'Q',
  'A',
  'E',
  'A',
  'A',
  'I',
  'N',
  'A',
  'F',
  'H',
  'Q',
  'S',
  'R',
  'L',
  'R',
  'E',
  'R',
  'E',
  'L',
  'S',
  'V',
  'Q',
  'L',
  'Q',
  'P',
  'T',
  'D',
  'A',
  'L',
  'L',
  'C',
  'V',
  'A',
  'N',
  'L',
  'P',
  'P',
  'S',
  'L',
  'T',
  'Q',
  'Q',
  'Q',
  'F',
  'E',
  'E',
  'L',
  'V',
  'R',
  'P',
  'F',
  'G',
  'S',
  'L',
  'E',
  'R',
  'C',
  'F',
  'L',
  'V',
  'Y',
  'S',
  'E',
  'R',
  'T',
  'G',
  'Q',
  'S',
  'K',
  'G',
  'Y',
  'G',
  'F',
  'A',
  'E',
  'Y',
  'M',
  'K',
  'K',
  'D',
  'S',
  'A',
  'A',
  'R',
  'A',
  'K',
  'S',
  'D',
  'L',
  'L',
  'G',
  'K',
  'P',
  'L',
  'G',
  'P',
  'R',
  'T',
  'L',
  'Y',
  'V',
  'H',
  'W',
  'T',
  'D',
  'A',
  'G',
  'Q',
  'L',
  'T',
  'P',
  'A',
  'L',
  'L',
  'H',
  'S',
  'R',
  'C',
  'L',
  'C',
  'V',
  'D',
  'R',
  'L',
  'P',
  'P',
  'G',
  'F',
  'N',
  'D',
  'V',
  'D',
  'A',
  'L',
  'C',
  'R',
  'A',
  'L',
  'S',
  'A',
  'V',
  'H',
  'S',
  'P',
  'T',
  'F',
  'C',
  'Q',
  'L',
  'A',
  'C',
  'G',
  'Q',
  'D',
  'G',
  'Q',
  'L',
  'K',
  'G',
  'F',
  'A',
  'V',
  'L',
  'E',
  'Y',
  'E',
  'T',
  'A',
  'E',
  'M',
  'A',
  'E',
  'E',
  'A',
  'Q',
  'Q',
  'Q',
  'A',
  'D',
  'G',
  'L',
  'S',
  'L',
  'G',
  'G',
  'S',
  'H',
  'L',
  'R',
  'V',
  'S',
  'F',
  'C',
  'A',
  'P',
  'G',
  'P',
  'P',
  'G',
  'R',
  'S',
  'M',
  'L',
  'A',
  'A',
  'L',
  'I',
  'A',
  'A',
  'Q',
  'A',
  'T']}
apo_L.sequence
'QPVNQPILAAAQSLHEATKWSSKGNDIIAAAKRMALLMAEMSRLVRGGSGTKRALIQCAKDIAKASDEVTRLAKEVAKQCTDKRIRTNLLQVCERIPTISTQLKILSTVKATMLGRTNISDEESEQATEMLVHNAQNLMQSVKETVREAEAASIKIRTDAGFTLRWVRKTPW'