Pinder system#
from pathlib import Path
from pinder.core import PinderSystem, get_index
Example usage of Pinder index API shown below. For more detailed usage examples, check the pinder-index
notebook.
index = get_index()
hetero_test_apo = index.query(
'(uniprot_L != uniprot_R) and split == "test" and (apo_R and apo_L)'
)
hetero_test_apo.reset_index(drop=True, inplace=True)
hetero_test_apo
split | id | pdb_id | cluster_id | cluster_id_R | cluster_id_L | pinder_s | pinder_xl | pinder_af2 | uniprot_R | ... | apo_L | apo_R_quality | apo_L_quality | chain1_neff | chain2_neff | chain_R | chain_L | contains_antibody | contains_antigen | contains_enzyme | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | test | 3k1i__D1_O25709--3k1i__A1_O25448 | 3k1i | cluster_26031_5179 | cluster_26031 | cluster_5179 | True | True | False | O25709 | ... | True | high | high | 12.351562 | 514.000000 | D1 | A1 | False | False | False |
1 | test | 6qta__A1_G0SHE6--6qta__B1_G0SC29 | 6qta | cluster_11327_11328 | cluster_11327 | cluster_11328 | False | True | False | G0SHE6 | ... | True | high | high | 115.750000 | 689.500000 | A1 | B1 | False | False | False |
2 | test | 3vf0__B1_Q8IY67--3vf0__A2_P18206 | 3vf0 | cluster_5612_993 | cluster_993 | cluster_5612 | True | True | False | Q8IY67 | ... | True | high | high | 251.125000 | 35.531250 | B1 | A2 | False | False | False |
3 | test | 4aye__D1_Q9JXV4--4aye__A1_P08603 | 4aye | cluster_3949_4866 | cluster_3949 | cluster_4866 | True | True | False | Q9JXV4 | ... | True | high | high | 14.546875 | 310.000000 | D1 | A1 | False | False | False |
4 | test | 2w8b__A1_P0A855--2w8b__H1_P0A912 | 2w8b | cluster_15535_1924 | cluster_15535 | cluster_1924 | True | True | False | P0A855 | ... | True | high | high | 308.250000 | 1150.000000 | A1 | H1 | False | False | False |
5 | test | 5y4r__A1_O87131--5y4r__B1_Q9HVI1 | 5y4r | cluster_8825_8826 | cluster_8825 | cluster_8826 | True | True | False | O87131 | ... | True | high | high | 610.000000 | 144.875000 | A1 | B1 | False | False | True |
6 | test | 3egv__A1_Q84BQ9--3egv__B1_Q5SLP6 | 3egv | cluster_33015_371 | cluster_33015 | cluster_371 | True | True | False | Q84BQ9 | ... | True | high | high | 806.500000 | 637.000000 | A1 | B1 | False | False | True |
7 | test | 6wjc__A1_P11229--6wjc__B1_Q8QGR0 | 6wjc | cluster_1057_1356 | cluster_1057 | cluster_1356 | True | True | False | P11229 | ... | True | high | high | 462.750000 | 518.500000 | A1 | B1 | False | False | True |
8 | test | 6tx3__B1_Q9NWY4--6tx3__A1_Q9UGN5 | 6tx3 | cluster_11866_335 | cluster_11866 | cluster_335 | True | True | False | Q9NWY4 | ... | True | high | high | 59.468750 | 213.000000 | B1 | A1 | False | False | True |
9 | test | 2grx__A1_P06971--2grx__C1_P02929 | 2grx | cluster_12107_8897 | cluster_12107 | cluster_8897 | True | True | False | P06971 | ... | True | high | high | 288.000000 | 562.500000 | A1 | C1 | False | False | False |
10 | test | 3kbu__A1_P11277--3kbu__D1_P16157 | 3kbu | cluster_16732_8658 | cluster_8658 | cluster_16732 | True | True | False | P11277 | ... | True | high | high | 229.625000 | 210.625000 | A1 | D1 | False | False | False |
11 | test | 2j0t__A1_P03956--2j0t__D1_P01033 | 2j0t | cluster_939_940 | cluster_939 | cluster_940 | True | True | False | P03956 | ... | True | high | high | 612.500000 | 68.312500 | A1 | D1 | False | False | True |
12 | test | 8a60__A1_P06971--8a60__B1_Q38162 | 8a60 | cluster_12107_26846 | cluster_12107 | cluster_26846 | False | True | True | P06971 | ... | True | high | high | 288.000000 | 2.734375 | A1 | B1 | False | False | False |
13 | test | 4je4__A1_Q06124--4je4__B1_P02751 | 4je4 | cluster_1465_1605 | cluster_1465 | cluster_1605 | True | True | False | Q06124 | ... | True | high | high | 611.000000 | 75.062500 | A1 | B1 | False | False | False |
14 | test | 4uae__A1_O00629--4uae__B1_P31345 | 4uae | cluster_10331_1373 | cluster_1373 | cluster_10331 | False | True | False | O00629 | ... | True | high | high | 240.750000 | 3.058594 | A1 | B1 | False | False | False |
15 | test | 3k9m__A1_P07858--3k9m__B1_P01040 | 3k9m | cluster_4628_6704 | cluster_6704 | cluster_4628 | True | True | False | P07858 | ... | True | high | high | 770.000000 | 429.750000 | A1 | B1 | False | False | True |
16 | test | 2wo2__A1_P54764--2wo2__B1_P52799 | 2wo2 | cluster_130_8064 | cluster_130 | cluster_8064 | True | True | False | P54764 | ... | True | high | high | 567.500000 | 104.562500 | A1 | B1 | False | False | True |
17 | test | 5dob__A1_P16794--5dob__B1_P16791 | 5dob | cluster_12247_23692 | cluster_12247 | cluster_23692 | False | True | False | P16794 | ... | True | high | high | 3.312500 | 3.855469 | A1 | B1 | False | False | False |
18 | test | 8i2e__A1_O34841--8i2e__B1_P54421 | 8i2e | cluster_11087_12465 | cluster_12465 | cluster_11087 | True | True | True | O34841 | ... | True | high | high | 9.031250 | 865.000000 | A1 | B1 | False | False | True |
19 | test | 1zlh__A1_P00730--1zlh__B1_Q5EPH2 | 1zlh | cluster_2416_8594 | cluster_2416 | cluster_8594 | True | True | False | P00730 | ... | True | high | high | 541.000000 | 637.000000 | A1 | B1 | False | False | True |
20 | test | 6yev__C1_P0A744--6yev__A1_P0AA25 | 6yev | cluster_4231_621 | cluster_4231 | cluster_621 | True | True | False | P0A744 | ... | True | high | high | 1099.000000 | 1440.000000 | C1 | A1 | False | False | True |
21 | test | 1dtd__A1_P48052--1dtd__B1_P81511 | 1dtd | cluster_2416_9476 | cluster_2416 | cluster_9476 | True | True | False | P48052 | ... | True | high | high | 581.000000 | 8.781250 | A1 | B1 | False | False | True |
22 | test | 7fn1__B1_P32357--7fn1__A1_P33334 | 7fn1 | cluster_635_7263 | cluster_7263 | cluster_635 | True | True | False | P32357 | ... | True | high | high | 8.070312 | 69.062500 | B1 | A1 | False | False | False |
23 | test | 1yu6__A1_P00780--1yu6__C1_P68390 | 1yu6 | cluster_1952_5091 | cluster_1952 | cluster_5091 | True | True | False | P00780 | ... | True | high | high | 1000.000000 | 704.000000 | A1 | C1 | False | False | True |
24 | test | 6s8v__B1_P08195--6s8v__A1_P80188 | 6s8v | cluster_19347_5331 | cluster_19347 | cluster_5331 | True | True | False | P08195 | ... | True | high | high | 511.000000 | 34.812500 | B1 | A1 | False | False | False |
25 | test | 5ja2__A1_P11454--5ja2__B1_Q9I169 | 5ja2 | cluster_4489_7234 | cluster_7234 | cluster_4489 | True | True | False | P11454 | ... | True | high | high | 641.500000 | 545.500000 | A1 | B1 | False | False | True |
26 | test | 6f3z__A1_P0ADC3--6f3z__B1_P61316 | 6f3z | cluster_12985_22189 | cluster_22189 | cluster_12985 | True | True | False | P0ADC3 | ... | True | high | high | 402.500000 | 368.000000 | A1 | B1 | False | False | False |
27 | test | 8gt0__A1_Q8I6U4--8gt0__B1_P01040 | 8gt0 | cluster_2085_4628 | cluster_2085 | cluster_4628 | True | True | False | Q8I6U4 | ... | True | high | high | 651.000000 | 429.750000 | A1 | B1 | False | False | True |
28 | test | 2gkv__A1_P00777--2gkv__B1_P68390 | 2gkv | cluster_5091_5268 | cluster_5268 | cluster_5091 | True | True | False | P00777 | ... | True | high | high | 409.000000 | 704.000000 | A1 | B1 | False | False | True |
29 | test | 6m4v__A1_P0AEX9--6m4v__B1_P62942 | 6m4v | cluster_1772_409 | cluster_409 | cluster_1772 | True | True | False | P0AEX9 | ... | True | high | high | 171.250000 | 1408.000000 | A1 | B1 | False | False | True |
30 | test | 4us1__B1_Q07889--4us1__A1_P01112 | 4us1 | cluster_10806_437 | cluster_10806 | cluster_437 | True | True | False | Q07889 | ... | True | high | high | 183.375000 | 946.500000 | B1 | A1 | False | False | False |
31 | test | 1tec__A1_P04072--1tec__B1_P01051 | 1tec | cluster_1952_387 | cluster_1952 | cluster_387 | True | True | False | P04072 | ... | True | high | high | 1017.500000 | 133.750000 | A1 | B1 | False | False | True |
32 | test | 1zhh__A1_P54300--1zhh__B1_P54302 | 1zhh | cluster_3962_8641 | cluster_3962 | cluster_8641 | True | True | False | P54300 | ... | True | high | high | 23.531250 | 524.500000 | A1 | B1 | False | False | False |
33 | test | 5n47__B1_P02751--5n47__A1_P80188 | 5n47 | cluster_1605_5331 | cluster_1605 | cluster_5331 | True | True | False | P02751 | ... | True | high | high | 75.062500 | 34.812500 | B1 | A1 | False | False | False |
34 | test | 7b80__A1_G3I8R9--7b80__B1_Q9BVA6 | 7b80 | cluster_1039_643 | cluster_643 | cluster_1039 | True | True | False | G3I8R9 | ... | True | high | high | 1069.000000 | 526.500000 | A1 | B1 | False | False | True |
35 rows × 34 columns
pinder_id = list(hetero_test_apo.id)[2]
pinder_id
'3vf0__B1_Q8IY67--3vf0__A2_P18206'
PinderSystem API - base class representing Structure
’s in a pinder entry#
# Simplest interface - get a single pinder system
ps = PinderSystem(pinder_id)
ps
2024-11-15 12:15:04,493 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=15
PinderSystem(
entry = IndexEntry(
(
'split',
'test',
),
(
'id',
'3vf0__B1_Q8IY67--3vf0__A2_P18206',
),
(
'pdb_id',
'3vf0',
),
(
'cluster_id',
'cluster_5612_993',
),
(
'cluster_id_R',
'cluster_993',
),
(
'cluster_id_L',
'cluster_5612',
),
(
'pinder_s',
True,
),
(
'pinder_xl',
True,
),
(
'pinder_af2',
False,
),
(
'uniprot_R',
'Q8IY67',
),
(
'uniprot_L',
'P18206',
),
(
'holo_R_pdb',
'3vf0__B1_Q8IY67-R.pdb',
),
(
'holo_L_pdb',
'3vf0__A2_P18206-L.pdb',
),
(
'predicted_R_pdb',
'af__Q8IY67.pdb',
),
(
'predicted_L_pdb',
'af__P18206.pdb',
),
(
'apo_R_pdb',
'3smz__A1_Q8IY67.pdb',
),
(
'apo_L_pdb',
'5l0h__A1_P18206.pdb',
),
(
'apo_R_pdbs',
'3smz__A1_Q8IY67.pdb',
),
(
'apo_L_pdbs',
'5l0h__A1_P18206.pdb;5l0f__A1_P18206.pdb;5l0i__A1_P18206.pdb',
),
(
'holo_R',
True,
),
(
'holo_L',
True,
),
(
'predicted_R',
True,
),
(
'predicted_L',
True,
),
(
'apo_R',
True,
),
(
'apo_L',
True,
),
(
'apo_R_quality',
'high',
),
(
'apo_L_quality',
'high',
),
(
'chain1_neff',
251.125,
),
(
'chain2_neff',
35.53125,
),
(
'chain_R',
'B1',
),
(
'chain_L',
'A2',
),
(
'contains_antibody',
False,
),
(
'contains_antigen',
False,
),
(
'contains_enzyme',
False,
),
)
native=Structure(
filepath=/home/runner/.local/share/pinder/2024-02/pdbs/3vf0__B1_Q8IY67--3vf0__A2_P18206.pdb,
uniprot_map=None,
pinder_id='3vf0__B1_Q8IY67--3vf0__A2_P18206',
atom_array=<class 'biotite.structure.AtomArray'> with shape (3584,),
pdb_engine='fastpdb',
)
holo_receptor=Structure(
filepath=/home/runner/.local/share/pinder/2024-02/test_set_pdbs/3vf0__B1_Q8IY67-R.pdb,
uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/3vf0__B1_Q8IY67-R.parquet,
pinder_id='3vf0__B1_Q8IY67-R',
atom_array=<class 'biotite.structure.AtomArray'> with shape (2204,),
pdb_engine='fastpdb',
)
holo_ligand=Structure(
filepath=/home/runner/.local/share/pinder/2024-02/test_set_pdbs/3vf0__A2_P18206-L.pdb,
uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/3vf0__A2_P18206-L.parquet,
pinder_id='3vf0__A2_P18206-L',
atom_array=<class 'biotite.structure.AtomArray'> with shape (1380,),
pdb_engine='fastpdb',
)
apo_receptor=Structure(
filepath=/home/runner/.local/share/pinder/2024-02/pdbs/3smz__A1_Q8IY67.pdb,
uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/3smz__A1_Q8IY67.parquet,
pinder_id='3smz__A1_Q8IY67',
atom_array=<class 'biotite.structure.AtomArray'> with shape (2183,),
pdb_engine='fastpdb',
)
apo_ligand=Structure(
filepath=/home/runner/.local/share/pinder/2024-02/pdbs/5l0h__A1_P18206.pdb,
uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/5l0h__A1_P18206.parquet,
pinder_id='5l0h__A1_P18206',
atom_array=<class 'biotite.structure.AtomArray'> with shape (1341,),
pdb_engine='fastpdb',
)
pred_receptor=Structure(
filepath=/home/runner/.local/share/pinder/2024-02/pdbs/af__Q8IY67.pdb,
uniprot_map=None,
pinder_id='af__Q8IY67',
atom_array=<class 'biotite.structure.AtomArray'> with shape (4495,),
pdb_engine='fastpdb',
)
pred_ligand=Structure(
filepath=/home/runner/.local/share/pinder/2024-02/pdbs/af__P18206.pdb,
uniprot_map=None,
pinder_id='af__P18206',
atom_array=<class 'biotite.structure.AtomArray'> with shape (8664,),
pdb_engine='fastpdb',
)
)
holo_L, holo_R = ps.holo_ligand, ps.holo_receptor
pred_L, pred_R = ps.pred_ligand, ps.pred_receptor
apo_L, apo_R = ps.apo_ligand, ps.apo_receptor
holo_L
Structure(
filepath=/home/runner/.local/share/pinder/2024-02/test_set_pdbs/3vf0__A2_P18206-L.pdb,
uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/3vf0__A2_P18206-L.parquet,
pinder_id='3vf0__A2_P18206-L',
atom_array=<class 'biotite.structure.AtomArray'> with shape (1380,),
pdb_engine='fastpdb',
)
Classify system difficulty based on degree of conformational shift in unbound and bound#
ps.unbound_difficulty("apo")
{'Fnat': 0.5421686746987951,
'Fnonnat': 0.3076923076923077,
'common_contacts': 45,
'differing_contacts': 20,
'bound_contacts': 83,
'unbound_contacts': 65,
'fnonnat_R': 0.2857142857142857,
'fnonnat_L': 0.0,
'fnat_R': 0.5882352941176471,
'fnat_L': 0.7692307692307693,
'difficulty': 'Rigid-body',
'I-RMSD': 1.125529,
'matched_interface_chains': 2,
'holo_receptor_interface_res': 34,
'holo_ligand_interface_res': 26,
'apo_receptor_interface_res': 28,
'apo_ligand_interface_res': 20,
'L-RMSD': 0.94191533,
'R-RMSD': 1.4988925,
'unbound_id': '3smz__A1_Q8IY67--5l0h__A1_P18206',
'unbound_body': 'receptor_ligand',
'monomer_name': 'apo'}
ps.unbound_difficulty("predicted")
{'Fnat': 0.5662650602409639,
'Fnonnat': 0.9225700164744646,
'common_contacts': 47,
'differing_contacts': 560,
'bound_contacts': 83,
'unbound_contacts': 607,
'fnonnat_R': 0.8435374149659864,
'fnonnat_L': 0.8670520231213873,
'fnat_R': 0.6764705882352942,
'fnat_L': 0.8846153846153846,
'difficulty': 'Difficult',
'I-RMSD': 3.4248848,
'matched_interface_chains': 2,
'holo_receptor_interface_res': 34,
'holo_ligand_interface_res': 26,
'apo_receptor_interface_res': 147,
'apo_ligand_interface_res': 173,
'L-RMSD': 2.1201644,
'R-RMSD': 0.62853533,
'unbound_id': 'af__Q8IY67--af__P18206',
'unbound_body': 'receptor_ligand',
'monomer_name': 'predicted'}
Illustrating utilities available in Structure
instances#
holo_L.filter("atom_name", mask=["CA"])
Structure(
filepath=/home/runner/.local/share/pinder/2024-02/test_set_pdbs/3vf0__A2_P18206-L.pdb,
uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (283, 14),
pinder_id='3vf0__A2_P18206-L',
atom_array=<class 'biotite.structure.AtomArray'> with shape (178,),
pdb_engine='fastpdb',
)
apo_L.filter("atom_name", mask=["CA"])
Structure(
filepath=/home/runner/.local/share/pinder/2024-02/pdbs/5l0h__A1_P18206.pdb,
uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (176, 14),
pinder_id='5l0h__A1_P18206',
atom_array=<class 'biotite.structure.AtomArray'> with shape (173,),
pdb_engine='fastpdb',
)
Can also filter “in place” rather than returning a copy (a la pandas)#
apo_L.filter("atom_name", mask=["CA"], copy=False)
(
ps.apo_ligand.filter("atom_name", mask=["CA"]),
ps.holo_ligand.filter("atom_name", mask=["CA"])
)
(Structure(
filepath=/home/runner/.local/share/pinder/2024-02/pdbs/5l0h__A1_P18206.pdb,
uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (176, 14),
pinder_id='5l0h__A1_P18206',
atom_array=<class 'biotite.structure.AtomArray'> with shape (173,),
pdb_engine='fastpdb',
),
Structure(
filepath=/home/runner/.local/share/pinder/2024-02/test_set_pdbs/3vf0__A2_P18206-L.pdb,
uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (283, 14),
pinder_id='3vf0__A2_P18206-L',
atom_array=<class 'biotite.structure.AtomArray'> with shape (178,),
pdb_engine='fastpdb',
))
Create masked unbound complex aligned to bound for apo#
apo_complex = ps.create_apo_complex()
apo_complex
Structure(
filepath=/home/runner/.local/share/pinder/2024-02/pdbs/3smz__A1_Q8IY67--5l0h__A1_P18206.pdb,
uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (460, 14),
pinder_id='3smz__A1_Q8IY67--5l0h__A1_P18206',
atom_array=<class 'biotite.structure.AtomArray'> with shape (2355,),
pdb_engine='fastpdb',
)
apo_complex.dataframe
chain_id | res_name | res_code | res_id | atom_name | b_factor | ins_code | hetero | element | x | y | z | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | R | HIS | H | 1 | N | 0.0 | False | N | 69.377380 | 105.086411 | -51.232784 | |
1 | R | HIS | H | 1 | CA | 0.0 | False | C | 69.471100 | 103.738152 | -50.680832 | |
2 | R | HIS | H | 1 | C | 0.0 | False | C | 70.264641 | 103.716263 | -49.368168 | |
3 | R | HIS | H | 1 | O | 0.0 | False | O | 70.223633 | 102.714134 | -48.653946 | |
4 | R | HIS | H | 1 | CB | 0.0 | False | C | 70.093376 | 102.773239 | -51.701912 | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2350 | L | ARG | R | 170 | CA | 0.0 | False | C | 45.438423 | 86.089317 | -54.533417 | |
2351 | L | LYS | K | 171 | CA | 0.0 | False | C | 44.941048 | 83.492149 | -57.280712 | |
2352 | L | THR | T | 172 | CA | 0.0 | False | C | 46.387222 | 85.555809 | -60.138439 | |
2353 | L | PRO | P | 173 | CA | 0.0 | False | C | 49.873409 | 84.154137 | -59.526482 | |
2354 | L | TRP | W | 174 | CA | 0.0 | False | C | 50.158199 | 80.451759 | -60.405190 |
2355 rows × 12 columns
What’s going on under the hood#
ps = PinderSystem(pinder_id)
apo_L, apo_R = ps.apo_ligand, ps.apo_receptor
# After getting the "in common" masked structures, they can be superimposed
apo_R, holo_R = apo_R.align_common_sequence(ps.aligned_holo_R)
apo_L, holo_L = apo_L.align_common_sequence(ps.aligned_holo_L)
# Rmsd after superposition (without outlier removal) is stored in `rms`
R_super, rms, _ = apo_R.superimpose(holo_R)
L_super, rms, _ = apo_L.superimpose(holo_L)
L_super
Structure(
filepath=/home/runner/.local/share/pinder/2024-02/pdbs/5l0h__A1_P18206.pdb,
uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/5l0h__A1_P18206.parquet,
pinder_id='5l0h__A1_P18206',
atom_array=<class 'biotite.structure.AtomArray'> with shape (1327,),
pdb_engine='fastpdb',
)
# Now we can create the complexes using Structure.__add__ methods
apo_binary = R_super + L_super
holo_binary = holo_R + holo_L
apo_binary
Structure(
filepath=/home/runner/.local/share/pinder/2024-02/pdbs/3smz__A1_Q8IY67--5l0h__A1_P18206.pdb,
uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (460, 14),
pinder_id='3smz__A1_Q8IY67--5l0h__A1_P18206',
atom_array=<class 'biotite.structure.AtomArray'> with shape (3510,),
pdb_engine='fastpdb',
)
# Alternatively, there exist utils for creating the masked apo and predicted complex
pred_complex = ps.create_pred_complex()
pred_complex
Structure(
filepath=/home/runner/.local/share/pinder/2024-02/pdbs/af__Q8IY67--af__P18206.pdb,
uniprot_map=None,
pinder_id='af__Q8IY67--af__P18206',
atom_array=<class 'biotite.structure.AtomArray'> with shape (3566,),
pdb_engine='fastpdb',
)
masked_complex_dir = Path("./").absolute() / "unbound_complexes"
masked_complex_dir.mkdir(exist_ok=True, parents=True)
masked_complex_dir
PosixPath('/home/runner/work/pinder/pinder/docs/unbound_complexes')
# If the output PDB filepath is omitted, the structure will be written to Structure.filepath, which may overwrite
# In this case, it would be a new file composed of the added complex filepaths if we omit
pred_complex.to_pdb(masked_complex_dir / "pred_complex.pdb")
apo_complex.to_pdb(masked_complex_dir / "apo_complex.pdb")
(masked_complex_dir / "pred_complex.pdb").unlink()
(masked_complex_dir / "apo_complex.pdb").unlink()
Structures have resolved_pdb2uniprot
and resolved_uniprot2pdb
properties#
They return dicts of resolved residue numbers mapped from pdb numbering to uniprot numbering, and vice versa
The full mapping is available in Structure.uniprot_mapping
To get only the resolved mapping, access the Structure.resolved_mapping
attrribute.
apo_L.resolved_pdb2uniprot
{2: 960,
3: 961,
4: 962,
5: 963,
6: 964,
7: 965,
8: 966,
9: 967,
10: 968,
11: 969,
12: 970,
13: 971,
14: 972,
15: 973,
16: 974,
18: 976,
19: 977,
20: 978,
21: 979,
22: 980,
23: 981,
24: 982,
25: 983,
26: 984,
27: 985,
28: 986,
29: 987,
30: 988,
31: 989,
32: 990,
33: 991,
34: 992,
35: 993,
36: 994,
37: 995,
38: 996,
39: 997,
40: 998,
41: 999,
42: 1000,
43: 1001,
44: 1002,
45: 1003,
46: 1004,
47: 1005,
48: 1006,
49: 1007,
50: 1008,
51: 1009,
52: 1010,
53: 1011,
54: 1012,
55: 1013,
56: 1014,
57: 1015,
58: 1016,
59: 1017,
60: 1018,
61: 1019,
62: 1020,
63: 1021,
64: 1022,
65: 1023,
66: 1024,
67: 1025,
68: 1026,
69: 1027,
70: 1028,
71: 1029,
72: 1030,
73: 1031,
74: 1032,
75: 1033,
76: 1034,
77: 1035,
78: 1036,
79: 1037,
80: 1038,
81: 1039,
82: 1040,
83: 1041,
84: 1042,
85: 1043,
86: 1044,
87: 1045,
88: 1046,
89: 1047,
90: 1048,
91: 1049,
92: 1050,
93: 1051,
94: 1052,
95: 1053,
96: 1054,
97: 1055,
98: 1056,
99: 1057,
100: 1058,
101: 1059,
102: 1060,
103: 1061,
104: 1062,
105: 1063,
106: 1064,
107: 1065,
108: 1066,
109: 1067,
110: 1068,
111: 1069,
112: 1070,
113: 1071,
114: 1072,
115: 1073,
116: 1074,
117: 1075,
118: 1076,
119: 1077,
120: 1078,
121: 1079,
122: 1080,
123: 1081,
124: 1082,
125: 1083,
126: 1084,
127: 1085,
128: 1086,
129: 1087,
130: 1088,
131: 1089,
132: 1090,
133: 1091,
134: 1092,
135: 1093,
136: 1094,
137: 1095,
138: 1096,
139: 1097,
140: 1098,
141: 1099,
142: 1100,
143: 1101,
144: 1102,
145: 1103,
146: 1104,
147: 1105,
148: 1106,
149: 1107,
150: 1108,
151: 1109,
152: 1110,
153: 1111,
154: 1112,
155: 1113,
156: 1114,
157: 1115,
158: 1116,
159: 1117,
160: 1118,
161: 1119,
162: 1120,
163: 1121,
164: 1122,
165: 1123,
166: 1124,
167: 1125,
168: 1126,
169: 1127,
170: 1128,
171: 1129,
172: 1130,
173: 1131,
174: 1132}
apo_complex.resolved_mapping
entry_id | entity_id | asym_id | pdb_strand_id | resi | resi_pdb | resi_auth | resn | one_letter_code_can | resolved | one_letter_code_uniprot | resi_uniprot | uniprot_acc | chain | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | NaN | 1 | A | A | 3 | 39 | 39.0 | LEU | L | 1 | L | 39 | NaN | A1 |
1 | NaN | 1 | A | A | 4 | 40 | 40.0 | ASP | D | 1 | D | 40 | NaN | A1 |
2 | NaN | 1 | A | A | 5 | 41 | 41.0 | PRO | P | 1 | P | 41 | NaN | A1 |
3 | NaN | 1 | A | A | 6 | 42 | 42.0 | GLU | E | 1 | E | 42 | NaN | A1 |
4 | NaN | 1 | A | A | 7 | 43 | 43.0 | GLU | E | 1 | E | 43 | NaN | A1 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
446 | NaN | 1 | A | A | 170 | 1128 | 1128.0 | ARG | R | 1 | R | 1128 | NaN | A1 |
447 | NaN | 1 | A | A | 171 | 1129 | 1129.0 | LYS | K | 1 | K | 1129 | NaN | A1 |
448 | NaN | 1 | A | A | 172 | 1130 | 1130.0 | THR | T | 1 | T | 1130 | NaN | A1 |
449 | NaN | 1 | A | A | 173 | 1131 | 1131.0 | PRO | P | 1 | P | 1131 | NaN | A1 |
450 | NaN | 1 | A | A | 174 | 1132 | 1132.0 | TRP | W | 1 | W | 1132 | NaN | A1 |
451 rows × 14 columns
Case with multiple available apo structures#
pinder_id = "1ldt__A1_P00761--1ldt__B1_P80424"
ps_canon = PinderSystem(pinder_id)
ps_canon.entry.apo_R_alt
2024-11-15 12:15:08,889 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=43
['1s6f__A1_P00761.pdb',
'1s85__A1_P00761.pdb',
'1s84__A1_P00761.pdb',
'2a32__A1_P00761.pdb',
'1s6h__A1_P00761.pdb',
'1s5s__A1_P00761.pdb',
'1fni__A1_P00761.pdb',
'1s81__A1_P00761.pdb',
'1fmg__A1_P00761.pdb',
'1qqu__A1_P00761.pdb',
'2a31__A1_P00761.pdb',
'1fn6__A1_P00761.pdb',
'1s83__A1_P00761.pdb']
ps_canon.entry.apo_L_alt
['2kmq__A1_P80424.pdb', '2kmp__A1_P80424.pdb', '2kmr__A1_P80424.pdb']
Specify 2kmr as apo ligand and 1fmg as apo receptor#
Note: the default apo_receptor
and apo_ligand
are determined based on the selected canonical apo monomer.
The canonical monomers were selected based on their sequence overlap and difficulty metrics calculated in pinder.eval.dockq.unbound
ps = PinderSystem(pinder_id, apo_ligand_pdb_code="2kmr", apo_receptor_pdb_code="1fmg")
ps
PinderSystem(
entry = IndexEntry(
(
'split',
'train',
),
(
'id',
'1ldt__A1_P00761--1ldt__B1_P80424',
),
(
'pdb_id',
'1ldt',
),
(
'cluster_id',
'cluster_312_5091',
),
(
'cluster_id_R',
'cluster_312',
),
(
'cluster_id_L',
'cluster_5091',
),
(
'pinder_s',
False,
),
(
'pinder_xl',
False,
),
(
'pinder_af2',
False,
),
(
'uniprot_R',
'P00761',
),
(
'uniprot_L',
'P80424',
),
(
'holo_R_pdb',
'1ldt__A1_P00761-R.pdb',
),
(
'holo_L_pdb',
'1ldt__B1_P80424-L.pdb',
),
(
'predicted_R_pdb',
'af__P00761.pdb',
),
(
'predicted_L_pdb',
'af__P80424.pdb',
),
(
'apo_R_pdb',
'1s82__A1_P00761.pdb',
),
(
'apo_L_pdb',
'2kmo__A1_P80424.pdb',
),
(
'apo_R_pdbs',
'1s82__A1_P00761.pdb;1s6f__A1_P00761.pdb;1s85__A1_P00761.pdb;1s84__A1_P00761.pdb;2a32__A1_P00761.pdb;1s6h__A1_P00761.pdb;1s5s__A1_P00761.pdb;1fni__A1_P00761.pdb;1s81__A1_P00761.pdb;1fmg__A1_P00761.pdb;1qqu__A1_P00761.pdb;2a31__A1_P00761.pdb;1fn6__A1_P00761.pdb;1s83__A1_P00761.pdb',
),
(
'apo_L_pdbs',
'2kmo__A1_P80424.pdb;2kmq__A1_P80424.pdb;2kmp__A1_P80424.pdb;2kmr__A1_P80424.pdb',
),
(
'holo_R',
True,
),
(
'holo_L',
True,
),
(
'predicted_R',
True,
),
(
'predicted_L',
True,
),
(
'apo_R',
True,
),
(
'apo_L',
True,
),
(
'apo_R_quality',
'high',
),
(
'apo_L_quality',
'high',
),
(
'chain1_neff',
997.0,
),
(
'chain2_neff',
2220.0,
),
(
'chain_R',
'A1',
),
(
'chain_L',
'B1',
),
(
'contains_antibody',
False,
),
(
'contains_antigen',
False,
),
(
'contains_enzyme',
True,
),
)
native=Structure(
filepath=/home/runner/.local/share/pinder/2024-02/pdbs/1ldt__A1_P00761--1ldt__B1_P80424.pdb,
uniprot_map=None,
pinder_id='1ldt__A1_P00761--1ldt__B1_P80424',
atom_array=<class 'biotite.structure.AtomArray'> with shape (1992,),
pdb_engine='fastpdb',
)
holo_receptor=Structure(
filepath=/home/runner/.local/share/pinder/2024-02/pdbs/1ldt__A1_P00761-R.pdb,
uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/1ldt__A1_P00761-R.parquet,
pinder_id='1ldt__A1_P00761-R',
atom_array=<class 'biotite.structure.AtomArray'> with shape (1666,),
pdb_engine='fastpdb',
)
holo_ligand=Structure(
filepath=/home/runner/.local/share/pinder/2024-02/pdbs/1ldt__B1_P80424-L.pdb,
uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/1ldt__B1_P80424-L.parquet,
pinder_id='1ldt__B1_P80424-L',
atom_array=<class 'biotite.structure.AtomArray'> with shape (326,),
pdb_engine='fastpdb',
)
apo_receptor=Structure(
filepath=/home/runner/.local/share/pinder/2024-02/pdbs/1fmg__A1_P00761.pdb,
uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/1fmg__A1_P00761.parquet,
pinder_id='1fmg__A1_P00761',
atom_array=<class 'biotite.structure.AtomArray'> with shape (1642,),
pdb_engine='fastpdb',
)
apo_ligand=Structure(
filepath=/home/runner/.local/share/pinder/2024-02/pdbs/2kmr__A1_P80424.pdb,
uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/2kmr__A1_P80424.parquet,
pinder_id='2kmr__A1_P80424',
atom_array=<class 'biotite.structure.AtomArray'> with shape (630,),
pdb_engine='fastpdb',
)
pred_receptor=Structure(
filepath=/home/runner/.local/share/pinder/2024-02/pdbs/af__P00761.pdb,
uniprot_map=None,
pinder_id='af__P00761',
atom_array=<class 'biotite.structure.AtomArray'> with shape (1708,),
pdb_engine='fastpdb',
)
pred_ligand=Structure(
filepath=/home/runner/.local/share/pinder/2024-02/pdbs/af__P80424.pdb,
uniprot_map=None,
pinder_id='af__P80424',
atom_array=<class 'biotite.structure.AtomArray'> with shape (326,),
pdb_engine='fastpdb',
)
)
Classify system difficulty based on degree of conformational shift in unbound and bound#
ps.unbound_difficulty("apo")
{'Fnat': 0.847457627118644,
'Fnonnat': 0.5,
'common_contacts': 50,
'differing_contacts': 50,
'bound_contacts': 59,
'unbound_contacts': 100,
'fnonnat_R': 0.4318181818181818,
'fnonnat_L': 0.35,
'fnat_R': 0.8620689655172413,
'fnat_L': 1.0,
'difficulty': 'Difficult',
'I-RMSD': 2.3008883,
'matched_interface_chains': 2,
'holo_receptor_interface_res': 29,
'holo_ligand_interface_res': 13,
'apo_receptor_interface_res': 44,
'apo_ligand_interface_res': 20,
'L-RMSD': 9.371291,
'R-RMSD': 0.47199312,
'unbound_id': '1fmg__A1_P00761--2kmr__A1_P80424',
'unbound_body': 'receptor_ligand',
'monomer_name': 'apo'}
ps.unbound_difficulty("predicted")
{'Fnat': 0.847457627118644,
'Fnonnat': 0.05660377358490566,
'common_contacts': 50,
'differing_contacts': 3,
'bound_contacts': 59,
'unbound_contacts': 53,
'fnonnat_R': 0.07692307692307693,
'fnonnat_L': 0.0,
'fnat_R': 0.8275862068965517,
'fnat_L': 0.9230769230769231,
'difficulty': 'Rigid-body',
'I-RMSD': 1.252618,
'matched_interface_chains': 2,
'holo_receptor_interface_res': 29,
'holo_ligand_interface_res': 13,
'apo_receptor_interface_res': 26,
'apo_ligand_interface_res': 12,
'L-RMSD': 3.9255776,
'R-RMSD': 0.40761372,
'unbound_id': 'af__P00761--af__P80424',
'unbound_body': 'receptor_ligand',
'monomer_name': 'predicted'}
A bunch of other features of the Structure
class are illustrated below#
apo_L.atom_array[0:10]
array([
Atom(np.array([36.778, 16.379, 60.821], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="N", element="N", b_factor=0.0),
Atom(np.array([37.219, 15.836, 59.542], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="CA", element="C", b_factor=0.0),
Atom(np.array([36.18 , 16.079, 58.452], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="C", element="C", b_factor=0.0),
Atom(np.array([36.532, 16.338, 57.297], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="O", element="O", b_factor=0.0),
Atom(np.array([37.509, 14.339, 59.666], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="CB", element="C", b_factor=0.0),
Atom(np.array([38.703, 14.01 , 60.548], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="CG", element="C", b_factor=0.0),
Atom(np.array([38.916, 12.517, 60.706], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="CD", element="C", b_factor=0.0),
Atom(np.array([37.981, 11.727, 60.572], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="OE1", element="O", b_factor=0.0),
Atom(np.array([40.151, 12.123, 60.994], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="NE2", element="N", b_factor=0.0),
Atom(np.array([34.852, 15.989, 58.853], dtype=float32), chain_id="L", res_id=3, ins_code="", res_name="PRO", hetero=False, atom_name="N", element="N", b_factor=0.0)
])
apo_L.coords[0:10]
array([[36.778, 16.379, 60.821],
[37.219, 15.836, 59.542],
[36.18 , 16.079, 58.452],
[36.532, 16.338, 57.297],
[37.509, 14.339, 59.666],
[38.703, 14.01 , 60.548],
[38.916, 12.517, 60.706],
[37.981, 11.727, 60.572],
[40.151, 12.123, 60.994],
[34.852, 15.989, 58.853]], dtype=float32)
apo_L.residue_names
['ALA',
'ARG',
'ASN',
'ASP',
'CYS',
'GLN',
'GLU',
'GLY',
'HIS',
'ILE',
'LEU',
'LYS',
'MET',
'PHE',
'PRO',
'SER',
'THR',
'TRP',
'VAL']
apo_L.sequence
'QPVNQPILAAAQSLHEATKWSSKGNDIIAAAKRMALLMAEMSRLVRGGSGTKRALIQCAKDIAKASDEVTRLAKEVAKQCTDKRIRTNLLQVCERIPTISTQLKILSTVKATMLGRTNISDEESEQATEMLVHNAQNLMQSVKETVREAEAASIKIRTDAGFTLRWVRKTPW'
apo_L.atom_array.res_name
array(['GLN', 'GLN', 'GLN', ..., 'TRP', 'TRP', 'TRP'], dtype='<U5')
apo_L.atom_array[apo_L.backbone_mask][0:10]
array([
Atom(np.array([36.778, 16.379, 60.821], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="N", element="N", b_factor=0.0),
Atom(np.array([37.219, 15.836, 59.542], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="CA", element="C", b_factor=0.0),
Atom(np.array([36.18 , 16.079, 58.452], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="C", element="C", b_factor=0.0),
Atom(np.array([34.852, 15.989, 58.853], dtype=float32), chain_id="L", res_id=3, ins_code="", res_name="PRO", hetero=False, atom_name="N", element="N", b_factor=0.0),
Atom(np.array([33.88 , 16.232, 57.771], dtype=float32), chain_id="L", res_id=3, ins_code="", res_name="PRO", hetero=False, atom_name="CA", element="C", b_factor=0.0),
Atom(np.array([33.954, 17.664, 57.252], dtype=float32), chain_id="L", res_id=3, ins_code="", res_name="PRO", hetero=False, atom_name="C", element="C", b_factor=0.0),
Atom(np.array([33.798, 17.833, 55.943], dtype=float32), chain_id="L", res_id=4, ins_code="", res_name="VAL", hetero=False, atom_name="N", element="N", b_factor=0.0),
Atom(np.array([33.848, 19.155, 55.33 ], dtype=float32), chain_id="L", res_id=4, ins_code="", res_name="VAL", hetero=False, atom_name="CA", element="C", b_factor=0.0),
Atom(np.array([32.629, 19.402, 54.448], dtype=float32), chain_id="L", res_id=4, ins_code="", res_name="VAL", hetero=False, atom_name="C", element="C", b_factor=0.0),
Atom(np.array([32.136, 20.636, 54.462], dtype=float32), chain_id="L", res_id=5, ins_code="", res_name="ASN", hetero=False, atom_name="N", element="N", b_factor=0.0)
])
apo_L.atom_array[apo_L.calpha_mask][0:10]
array([
Atom(np.array([37.219, 15.836, 59.542], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="CA", element="C", b_factor=0.0),
Atom(np.array([33.88 , 16.232, 57.771], dtype=float32), chain_id="L", res_id=3, ins_code="", res_name="PRO", hetero=False, atom_name="CA", element="C", b_factor=0.0),
Atom(np.array([33.848, 19.155, 55.33 ], dtype=float32), chain_id="L", res_id=4, ins_code="", res_name="VAL", hetero=False, atom_name="CA", element="C", b_factor=0.0),
Atom(np.array([30.979, 21.004, 53.668], dtype=float32), chain_id="L", res_id=5, ins_code="", res_name="ASN", hetero=False, atom_name="CA", element="C", b_factor=0.0),
Atom(np.array([31.94 , 20.151, 50.049], dtype=float32), chain_id="L", res_id=6, ins_code="", res_name="GLN", hetero=False, atom_name="CA", element="C", b_factor=0.0),
Atom(np.array([30.278, 23.116, 48.249], dtype=float32), chain_id="L", res_id=7, ins_code="", res_name="PRO", hetero=False, atom_name="CA", element="C", b_factor=0.0),
Atom(np.array([32.112, 25.664, 50.491], dtype=float32), chain_id="L", res_id=8, ins_code="", res_name="ILE", hetero=False, atom_name="CA", element="C", b_factor=0.0),
Atom(np.array([35.441, 23.864, 49.936], dtype=float32), chain_id="L", res_id=9, ins_code="", res_name="LEU", hetero=False, atom_name="CA", element="C", b_factor=0.0),
Atom(np.array([34.772, 23.807, 46.168], dtype=float32), chain_id="L", res_id=10, ins_code="", res_name="ALA", hetero=False, atom_name="CA", element="C", b_factor=0.0),
Atom(np.array([33.923, 27.56 , 46.154], dtype=float32), chain_id="L", res_id=11, ins_code="", res_name="ALA", hetero=False, atom_name="CA", element="C", b_factor=0.0)
])
apo_L.chain_sequence
{'L': ['Q',
'P',
'V',
'N',
'Q',
'P',
'I',
'L',
'A',
'A',
'A',
'Q',
'S',
'L',
'H',
'E',
'A',
'T',
'K',
'W',
'S',
'S',
'K',
'G',
'N',
'D',
'I',
'I',
'A',
'A',
'A',
'K',
'R',
'M',
'A',
'L',
'L',
'M',
'A',
'E',
'M',
'S',
'R',
'L',
'V',
'R',
'G',
'G',
'S',
'G',
'T',
'K',
'R',
'A',
'L',
'I',
'Q',
'C',
'A',
'K',
'D',
'I',
'A',
'K',
'A',
'S',
'D',
'E',
'V',
'T',
'R',
'L',
'A',
'K',
'E',
'V',
'A',
'K',
'Q',
'C',
'T',
'D',
'K',
'R',
'I',
'R',
'T',
'N',
'L',
'L',
'Q',
'V',
'C',
'E',
'R',
'I',
'P',
'T',
'I',
'S',
'T',
'Q',
'L',
'K',
'I',
'L',
'S',
'T',
'V',
'K',
'A',
'T',
'M',
'L',
'G',
'R',
'T',
'N',
'I',
'S',
'D',
'E',
'E',
'S',
'E',
'Q',
'A',
'T',
'E',
'M',
'L',
'V',
'H',
'N',
'A',
'Q',
'N',
'L',
'M',
'Q',
'S',
'V',
'K',
'E',
'T',
'V',
'R',
'E',
'A',
'E',
'A',
'A',
'S',
'I',
'K',
'I',
'R',
'T',
'D',
'A',
'G',
'F',
'T',
'L',
'R',
'W',
'V',
'R',
'K',
'T',
'P',
'W']}
apo_complex.chain_sequence
{'L': ['Q',
'P',
'V',
'N',
'Q',
'P',
'I',
'L',
'A',
'A',
'A',
'Q',
'S',
'L',
'H',
'E',
'A',
'T',
'K',
'W',
'S',
'S',
'K',
'G',
'N',
'D',
'I',
'I',
'A',
'A',
'A',
'K',
'R',
'M',
'A',
'L',
'L',
'M',
'A',
'E',
'M',
'S',
'R',
'L',
'V',
'R',
'G',
'G',
'S',
'G',
'T',
'K',
'R',
'A',
'L',
'I',
'Q',
'C',
'A',
'K',
'D',
'I',
'A',
'K',
'A',
'S',
'D',
'E',
'V',
'T',
'R',
'L',
'A',
'K',
'E',
'V',
'A',
'K',
'Q',
'C',
'T',
'D',
'K',
'R',
'I',
'R',
'T',
'N',
'L',
'L',
'Q',
'V',
'C',
'E',
'R',
'I',
'P',
'T',
'I',
'S',
'T',
'Q',
'L',
'K',
'I',
'L',
'S',
'T',
'V',
'K',
'A',
'T',
'M',
'L',
'G',
'R',
'T',
'N',
'I',
'S',
'D',
'E',
'E',
'S',
'E',
'Q',
'A',
'T',
'E',
'M',
'L',
'V',
'H',
'N',
'A',
'Q',
'N',
'L',
'M',
'Q',
'S',
'V',
'K',
'E',
'T',
'V',
'R',
'E',
'A',
'E',
'A',
'A',
'S',
'I',
'K',
'I',
'R',
'T',
'D',
'A',
'G',
'F',
'T',
'L',
'R',
'W',
'V',
'R',
'K',
'T',
'P',
'W'],
'R': ['H',
'M',
'L',
'D',
'P',
'E',
'E',
'I',
'R',
'K',
'R',
'L',
'E',
'H',
'T',
'E',
'R',
'Q',
'F',
'R',
'N',
'R',
'R',
'K',
'I',
'L',
'I',
'R',
'G',
'L',
'P',
'G',
'D',
'V',
'T',
'N',
'Q',
'E',
'V',
'H',
'D',
'L',
'L',
'S',
'D',
'Y',
'E',
'L',
'K',
'Y',
'C',
'F',
'V',
'D',
'K',
'Y',
'K',
'G',
'T',
'A',
'F',
'V',
'T',
'L',
'L',
'N',
'G',
'E',
'Q',
'A',
'E',
'A',
'A',
'I',
'N',
'A',
'F',
'H',
'Q',
'S',
'R',
'L',
'R',
'E',
'R',
'E',
'L',
'S',
'V',
'Q',
'L',
'Q',
'P',
'T',
'D',
'A',
'L',
'L',
'C',
'V',
'A',
'N',
'L',
'P',
'P',
'S',
'L',
'T',
'Q',
'Q',
'Q',
'F',
'E',
'E',
'L',
'V',
'R',
'P',
'F',
'G',
'S',
'L',
'E',
'R',
'C',
'F',
'L',
'V',
'Y',
'S',
'E',
'R',
'T',
'G',
'Q',
'S',
'K',
'G',
'Y',
'G',
'F',
'A',
'E',
'Y',
'M',
'K',
'K',
'D',
'S',
'A',
'A',
'R',
'A',
'K',
'S',
'D',
'L',
'L',
'G',
'K',
'P',
'L',
'G',
'P',
'R',
'T',
'L',
'Y',
'V',
'H',
'W',
'T',
'D',
'A',
'G',
'Q',
'L',
'T',
'P',
'A',
'L',
'L',
'H',
'S',
'R',
'C',
'L',
'C',
'V',
'D',
'R',
'L',
'P',
'P',
'G',
'F',
'N',
'D',
'V',
'D',
'A',
'L',
'C',
'R',
'A',
'L',
'S',
'A',
'V',
'H',
'S',
'P',
'T',
'F',
'C',
'Q',
'L',
'A',
'C',
'G',
'Q',
'D',
'G',
'Q',
'L',
'K',
'G',
'F',
'A',
'V',
'L',
'E',
'Y',
'E',
'T',
'A',
'E',
'M',
'A',
'E',
'E',
'A',
'Q',
'Q',
'Q',
'A',
'D',
'G',
'L',
'S',
'L',
'G',
'G',
'S',
'H',
'L',
'R',
'V',
'S',
'F',
'C',
'A',
'P',
'G',
'P',
'P',
'G',
'R',
'S',
'M',
'L',
'A',
'A',
'L',
'I',
'A',
'A',
'Q',
'A',
'T']}
apo_L.sequence
'QPVNQPILAAAQSLHEATKWSSKGNDIIAAAKRMALLMAEMSRLVRGGSGTKRALIQCAKDIAKASDEVTRLAKEVAKQCTDKRIRTNLLQVCERIPTISTQLKILSTVKATMLGRTNISDEESEQATEMLVHNAQNLMQSVKETVREAEAASIKIRTDAGFTLRWVRKTPW'