Pinder system#

from pathlib import Path

from pinder.core import PinderSystem, get_index

Example usage of Pinder index API shown below. For more detailed usage examples, check the pinder-indexnotebook.

index = get_index()
hetero_test_apo = index.query(
    '(uniprot_L != uniprot_R) and split == "test" and (apo_R and apo_L)'
)
hetero_test_apo.reset_index(drop=True, inplace=True)
hetero_test_apo
split id pdb_id cluster_id cluster_id_R cluster_id_L pinder_s pinder_xl pinder_af2 uniprot_R ... apo_L apo_R_quality apo_L_quality chain1_neff chain2_neff chain_R chain_L contains_antibody contains_antigen contains_enzyme
0 test 3k1i__D1_O25709--3k1i__A1_O25448 3k1i cluster_26031_5179 cluster_26031 cluster_5179 True True False O25709 ... True high high 12.351562 514.000000 D1 A1 False False False
1 test 6qta__A1_G0SHE6--6qta__B1_G0SC29 6qta cluster_11327_11328 cluster_11327 cluster_11328 False True False G0SHE6 ... True high high 115.750000 689.500000 A1 B1 False False False
2 test 3vf0__B1_Q8IY67--3vf0__A2_P18206 3vf0 cluster_5612_993 cluster_993 cluster_5612 True True False Q8IY67 ... True high high 251.125000 35.531250 B1 A2 False False False
3 test 4aye__D1_Q9JXV4--4aye__A1_P08603 4aye cluster_3949_4866 cluster_3949 cluster_4866 True True False Q9JXV4 ... True high high 14.546875 310.000000 D1 A1 False False False
4 test 2w8b__A1_P0A855--2w8b__H1_P0A912 2w8b cluster_15535_1924 cluster_15535 cluster_1924 True True False P0A855 ... True high high 308.250000 1150.000000 A1 H1 False False False
5 test 5y4r__A1_O87131--5y4r__B1_Q9HVI1 5y4r cluster_8825_8826 cluster_8825 cluster_8826 True True False O87131 ... True high high 610.000000 144.875000 A1 B1 False False True
6 test 3egv__A1_Q84BQ9--3egv__B1_Q5SLP6 3egv cluster_33015_371 cluster_33015 cluster_371 True True False Q84BQ9 ... True high high 806.500000 637.000000 A1 B1 False False True
7 test 6wjc__A1_P11229--6wjc__B1_Q8QGR0 6wjc cluster_1057_1356 cluster_1057 cluster_1356 True True False P11229 ... True high high 462.750000 518.500000 A1 B1 False False True
8 test 6tx3__B1_Q9NWY4--6tx3__A1_Q9UGN5 6tx3 cluster_11866_335 cluster_11866 cluster_335 True True False Q9NWY4 ... True high high 59.468750 213.000000 B1 A1 False False True
9 test 2grx__A1_P06971--2grx__C1_P02929 2grx cluster_12107_8897 cluster_12107 cluster_8897 True True False P06971 ... True high high 288.000000 562.500000 A1 C1 False False False
10 test 3kbu__A1_P11277--3kbu__D1_P16157 3kbu cluster_16732_8658 cluster_8658 cluster_16732 True True False P11277 ... True high high 229.625000 210.625000 A1 D1 False False False
11 test 2j0t__A1_P03956--2j0t__D1_P01033 2j0t cluster_939_940 cluster_939 cluster_940 True True False P03956 ... True high high 612.500000 68.312500 A1 D1 False False True
12 test 8a60__A1_P06971--8a60__B1_Q38162 8a60 cluster_12107_26846 cluster_12107 cluster_26846 False True True P06971 ... True high high 288.000000 2.734375 A1 B1 False False False
13 test 4je4__A1_Q06124--4je4__B1_P02751 4je4 cluster_1465_1605 cluster_1465 cluster_1605 True True False Q06124 ... True high high 611.000000 75.062500 A1 B1 False False False
14 test 4uae__A1_O00629--4uae__B1_P31345 4uae cluster_10331_1373 cluster_1373 cluster_10331 False True False O00629 ... True high high 240.750000 3.058594 A1 B1 False False False
15 test 3k9m__A1_P07858--3k9m__B1_P01040 3k9m cluster_4628_6704 cluster_6704 cluster_4628 True True False P07858 ... True high high 770.000000 429.750000 A1 B1 False False True
16 test 2wo2__A1_P54764--2wo2__B1_P52799 2wo2 cluster_130_8064 cluster_130 cluster_8064 True True False P54764 ... True high high 567.500000 104.562500 A1 B1 False False True
17 test 5dob__A1_P16794--5dob__B1_P16791 5dob cluster_12247_23692 cluster_12247 cluster_23692 False True False P16794 ... True high high 3.312500 3.855469 A1 B1 False False False
18 test 8i2e__A1_O34841--8i2e__B1_P54421 8i2e cluster_11087_12465 cluster_12465 cluster_11087 True True True O34841 ... True high high 9.031250 865.000000 A1 B1 False False True
19 test 1zlh__A1_P00730--1zlh__B1_Q5EPH2 1zlh cluster_2416_8594 cluster_2416 cluster_8594 True True False P00730 ... True high high 541.000000 637.000000 A1 B1 False False True
20 test 6yev__C1_P0A744--6yev__A1_P0AA25 6yev cluster_4231_621 cluster_4231 cluster_621 True True False P0A744 ... True high high 1099.000000 1440.000000 C1 A1 False False True
21 test 1dtd__A1_P48052--1dtd__B1_P81511 1dtd cluster_2416_9476 cluster_2416 cluster_9476 True True False P48052 ... True high high 581.000000 8.781250 A1 B1 False False True
22 test 7fn1__B1_P32357--7fn1__A1_P33334 7fn1 cluster_635_7263 cluster_7263 cluster_635 True True False P32357 ... True high high 8.070312 69.062500 B1 A1 False False False
23 test 1yu6__A1_P00780--1yu6__C1_P68390 1yu6 cluster_1952_5091 cluster_1952 cluster_5091 True True False P00780 ... True high high 1000.000000 704.000000 A1 C1 False False True
24 test 6s8v__B1_P08195--6s8v__A1_P80188 6s8v cluster_19347_5331 cluster_19347 cluster_5331 True True False P08195 ... True high high 511.000000 34.812500 B1 A1 False False False
25 test 5ja2__A1_P11454--5ja2__B1_Q9I169 5ja2 cluster_4489_7234 cluster_7234 cluster_4489 True True False P11454 ... True high high 641.500000 545.500000 A1 B1 False False True
26 test 6f3z__A1_P0ADC3--6f3z__B1_P61316 6f3z cluster_12985_22189 cluster_22189 cluster_12985 True True False P0ADC3 ... True high high 402.500000 368.000000 A1 B1 False False False
27 test 8gt0__A1_Q8I6U4--8gt0__B1_P01040 8gt0 cluster_2085_4628 cluster_2085 cluster_4628 True True False Q8I6U4 ... True high high 651.000000 429.750000 A1 B1 False False True
28 test 2gkv__A1_P00777--2gkv__B1_P68390 2gkv cluster_5091_5268 cluster_5268 cluster_5091 True True False P00777 ... True high high 409.000000 704.000000 A1 B1 False False True
29 test 6m4v__A1_P0AEX9--6m4v__B1_P62942 6m4v cluster_1772_409 cluster_409 cluster_1772 True True False P0AEX9 ... True high high 171.250000 1408.000000 A1 B1 False False True
30 test 4us1__B1_Q07889--4us1__A1_P01112 4us1 cluster_10806_437 cluster_10806 cluster_437 True True False Q07889 ... True high high 183.375000 946.500000 B1 A1 False False False
31 test 1tec__A1_P04072--1tec__B1_P01051 1tec cluster_1952_387 cluster_1952 cluster_387 True True False P04072 ... True high high 1017.500000 133.750000 A1 B1 False False True
32 test 1zhh__A1_P54300--1zhh__B1_P54302 1zhh cluster_3962_8641 cluster_3962 cluster_8641 True True False P54300 ... True high high 23.531250 524.500000 A1 B1 False False False
33 test 5n47__B1_P02751--5n47__A1_P80188 5n47 cluster_1605_5331 cluster_1605 cluster_5331 True True False P02751 ... True high high 75.062500 34.812500 B1 A1 False False False
34 test 7b80__A1_G3I8R9--7b80__B1_Q9BVA6 7b80 cluster_1039_643 cluster_643 cluster_1039 True True False G3I8R9 ... True high high 1069.000000 526.500000 A1 B1 False False True

35 rows × 34 columns

pinder_id = list(hetero_test_apo.id)[2]
pinder_id
'3vf0__B1_Q8IY67--3vf0__A2_P18206'

PinderSystem API - base class representing Structure’s in a pinder entry#

# Simplest interface - get a single pinder system
ps = PinderSystem(pinder_id)
ps
2024-11-15 12:15:04,493 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=15
PinderSystem(
entry = IndexEntry(
    (
        'split',
        'test',
    ),
    (
        'id',
        '3vf0__B1_Q8IY67--3vf0__A2_P18206',
    ),
    (
        'pdb_id',
        '3vf0',
    ),
    (
        'cluster_id',
        'cluster_5612_993',
    ),
    (
        'cluster_id_R',
        'cluster_993',
    ),
    (
        'cluster_id_L',
        'cluster_5612',
    ),
    (
        'pinder_s',
        True,
    ),
    (
        'pinder_xl',
        True,
    ),
    (
        'pinder_af2',
        False,
    ),
    (
        'uniprot_R',
        'Q8IY67',
    ),
    (
        'uniprot_L',
        'P18206',
    ),
    (
        'holo_R_pdb',
        '3vf0__B1_Q8IY67-R.pdb',
    ),
    (
        'holo_L_pdb',
        '3vf0__A2_P18206-L.pdb',
    ),
    (
        'predicted_R_pdb',
        'af__Q8IY67.pdb',
    ),
    (
        'predicted_L_pdb',
        'af__P18206.pdb',
    ),
    (
        'apo_R_pdb',
        '3smz__A1_Q8IY67.pdb',
    ),
    (
        'apo_L_pdb',
        '5l0h__A1_P18206.pdb',
    ),
    (
        'apo_R_pdbs',
        '3smz__A1_Q8IY67.pdb',
    ),
    (
        'apo_L_pdbs',
        '5l0h__A1_P18206.pdb;5l0f__A1_P18206.pdb;5l0i__A1_P18206.pdb',
    ),
    (
        'holo_R',
        True,
    ),
    (
        'holo_L',
        True,
    ),
    (
        'predicted_R',
        True,
    ),
    (
        'predicted_L',
        True,
    ),
    (
        'apo_R',
        True,
    ),
    (
        'apo_L',
        True,
    ),
    (
        'apo_R_quality',
        'high',
    ),
    (
        'apo_L_quality',
        'high',
    ),
    (
        'chain1_neff',
        251.125,
    ),
    (
        'chain2_neff',
        35.53125,
    ),
    (
        'chain_R',
        'B1',
    ),
    (
        'chain_L',
        'A2',
    ),
    (
        'contains_antibody',
        False,
    ),
    (
        'contains_antigen',
        False,
    ),
    (
        'contains_enzyme',
        False,
    ),
)
native=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/3vf0__B1_Q8IY67--3vf0__A2_P18206.pdb,
    uniprot_map=None,
    pinder_id='3vf0__B1_Q8IY67--3vf0__A2_P18206',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (3584,),
    pdb_engine='fastpdb',
)
holo_receptor=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/test_set_pdbs/3vf0__B1_Q8IY67-R.pdb,
    uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/3vf0__B1_Q8IY67-R.parquet,
    pinder_id='3vf0__B1_Q8IY67-R',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (2204,),
    pdb_engine='fastpdb',
)
holo_ligand=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/test_set_pdbs/3vf0__A2_P18206-L.pdb,
    uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/3vf0__A2_P18206-L.parquet,
    pinder_id='3vf0__A2_P18206-L',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (1380,),
    pdb_engine='fastpdb',
)
apo_receptor=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/3smz__A1_Q8IY67.pdb,
    uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/3smz__A1_Q8IY67.parquet,
    pinder_id='3smz__A1_Q8IY67',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (2183,),
    pdb_engine='fastpdb',
)
apo_ligand=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/5l0h__A1_P18206.pdb,
    uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/5l0h__A1_P18206.parquet,
    pinder_id='5l0h__A1_P18206',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (1341,),
    pdb_engine='fastpdb',
)
pred_receptor=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/af__Q8IY67.pdb,
    uniprot_map=None,
    pinder_id='af__Q8IY67',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (4495,),
    pdb_engine='fastpdb',
)
pred_ligand=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/af__P18206.pdb,
    uniprot_map=None,
    pinder_id='af__P18206',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (8664,),
    pdb_engine='fastpdb',
)
)
holo_L, holo_R = ps.holo_ligand, ps.holo_receptor
pred_L, pred_R = ps.pred_ligand, ps.pred_receptor
apo_L, apo_R = ps.apo_ligand, ps.apo_receptor

holo_L
Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/test_set_pdbs/3vf0__A2_P18206-L.pdb,
    uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/3vf0__A2_P18206-L.parquet,
    pinder_id='3vf0__A2_P18206-L',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (1380,),
    pdb_engine='fastpdb',
)

Classify system difficulty based on degree of conformational shift in unbound and bound#

ps.unbound_difficulty("apo")
{'Fnat': 0.5421686746987951,
 'Fnonnat': 0.3076923076923077,
 'common_contacts': 45,
 'differing_contacts': 20,
 'bound_contacts': 83,
 'unbound_contacts': 65,
 'fnonnat_R': 0.2857142857142857,
 'fnonnat_L': 0.0,
 'fnat_R': 0.5882352941176471,
 'fnat_L': 0.7692307692307693,
 'difficulty': 'Rigid-body',
 'I-RMSD': 1.125529,
 'matched_interface_chains': 2,
 'holo_receptor_interface_res': 34,
 'holo_ligand_interface_res': 26,
 'apo_receptor_interface_res': 28,
 'apo_ligand_interface_res': 20,
 'L-RMSD': 0.94191533,
 'R-RMSD': 1.4988925,
 'unbound_id': '3smz__A1_Q8IY67--5l0h__A1_P18206',
 'unbound_body': 'receptor_ligand',
 'monomer_name': 'apo'}
ps.unbound_difficulty("predicted")
{'Fnat': 0.5662650602409639,
 'Fnonnat': 0.9225700164744646,
 'common_contacts': 47,
 'differing_contacts': 560,
 'bound_contacts': 83,
 'unbound_contacts': 607,
 'fnonnat_R': 0.8435374149659864,
 'fnonnat_L': 0.8670520231213873,
 'fnat_R': 0.6764705882352942,
 'fnat_L': 0.8846153846153846,
 'difficulty': 'Difficult',
 'I-RMSD': 3.4248848,
 'matched_interface_chains': 2,
 'holo_receptor_interface_res': 34,
 'holo_ligand_interface_res': 26,
 'apo_receptor_interface_res': 147,
 'apo_ligand_interface_res': 173,
 'L-RMSD': 2.1201644,
 'R-RMSD': 0.62853533,
 'unbound_id': 'af__Q8IY67--af__P18206',
 'unbound_body': 'receptor_ligand',
 'monomer_name': 'predicted'}

Illustrating utilities available in Structure instances#

holo_L.filter("atom_name", mask=["CA"])
Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/test_set_pdbs/3vf0__A2_P18206-L.pdb,
    uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (283, 14),
    pinder_id='3vf0__A2_P18206-L',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (178,),
    pdb_engine='fastpdb',
)
apo_L.filter("atom_name", mask=["CA"])
Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/5l0h__A1_P18206.pdb,
    uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (176, 14),
    pinder_id='5l0h__A1_P18206',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (173,),
    pdb_engine='fastpdb',
)

Can also filter “in place” rather than returning a copy (a la pandas)#

apo_L.filter("atom_name", mask=["CA"], copy=False)
(
    ps.apo_ligand.filter("atom_name", mask=["CA"]),
    ps.holo_ligand.filter("atom_name", mask=["CA"])
)
(Structure(
     filepath=/home/runner/.local/share/pinder/2024-02/pdbs/5l0h__A1_P18206.pdb,
     uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (176, 14),
     pinder_id='5l0h__A1_P18206',
     atom_array=<class 'biotite.structure.AtomArray'> with shape (173,),
     pdb_engine='fastpdb',
 ),
 Structure(
     filepath=/home/runner/.local/share/pinder/2024-02/test_set_pdbs/3vf0__A2_P18206-L.pdb,
     uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (283, 14),
     pinder_id='3vf0__A2_P18206-L',
     atom_array=<class 'biotite.structure.AtomArray'> with shape (178,),
     pdb_engine='fastpdb',
 ))

Create masked unbound complex aligned to bound for apo#

apo_complex = ps.create_apo_complex()
apo_complex
Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/3smz__A1_Q8IY67--5l0h__A1_P18206.pdb,
    uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (460, 14),
    pinder_id='3smz__A1_Q8IY67--5l0h__A1_P18206',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (2355,),
    pdb_engine='fastpdb',
)
apo_complex.dataframe
chain_id res_name res_code res_id atom_name b_factor ins_code hetero element x y z
0 R HIS H 1 N 0.0 False N 69.377380 105.086411 -51.232784
1 R HIS H 1 CA 0.0 False C 69.471100 103.738152 -50.680832
2 R HIS H 1 C 0.0 False C 70.264641 103.716263 -49.368168
3 R HIS H 1 O 0.0 False O 70.223633 102.714134 -48.653946
4 R HIS H 1 CB 0.0 False C 70.093376 102.773239 -51.701912
... ... ... ... ... ... ... ... ... ... ... ... ...
2350 L ARG R 170 CA 0.0 False C 45.438423 86.089317 -54.533417
2351 L LYS K 171 CA 0.0 False C 44.941048 83.492149 -57.280712
2352 L THR T 172 CA 0.0 False C 46.387222 85.555809 -60.138439
2353 L PRO P 173 CA 0.0 False C 49.873409 84.154137 -59.526482
2354 L TRP W 174 CA 0.0 False C 50.158199 80.451759 -60.405190

2355 rows × 12 columns

What’s going on under the hood#

ps = PinderSystem(pinder_id)
apo_L, apo_R = ps.apo_ligand, ps.apo_receptor

# After getting the "in common" masked structures, they can be superimposed
apo_R, holo_R = apo_R.align_common_sequence(ps.aligned_holo_R)
apo_L, holo_L = apo_L.align_common_sequence(ps.aligned_holo_L)

# Rmsd after superposition (without outlier removal) is stored in `rms`
R_super, rms, _ = apo_R.superimpose(holo_R)
L_super, rms, _ = apo_L.superimpose(holo_L)
L_super
Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/5l0h__A1_P18206.pdb,
    uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/5l0h__A1_P18206.parquet,
    pinder_id='5l0h__A1_P18206',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (1327,),
    pdb_engine='fastpdb',
)
# Now we can create the complexes using Structure.__add__ methods
apo_binary = R_super + L_super
holo_binary = holo_R + holo_L

apo_binary
Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/3smz__A1_Q8IY67--5l0h__A1_P18206.pdb,
    uniprot_map=<class 'pandas.core.frame.DataFrame'> with shape (460, 14),
    pinder_id='3smz__A1_Q8IY67--5l0h__A1_P18206',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (3510,),
    pdb_engine='fastpdb',
)
# Alternatively, there exist utils for creating the masked apo and predicted complex
pred_complex = ps.create_pred_complex()
pred_complex
Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/af__Q8IY67--af__P18206.pdb,
    uniprot_map=None,
    pinder_id='af__Q8IY67--af__P18206',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (3566,),
    pdb_engine='fastpdb',
)
masked_complex_dir = Path("./").absolute() / "unbound_complexes"
masked_complex_dir.mkdir(exist_ok=True, parents=True)

masked_complex_dir
PosixPath('/home/runner/work/pinder/pinder/docs/unbound_complexes')
# If the output PDB filepath is omitted, the structure will be written to Structure.filepath, which may overwrite
# In this case, it would be a new file composed of the added complex filepaths if we omit
pred_complex.to_pdb(masked_complex_dir / "pred_complex.pdb")
apo_complex.to_pdb(masked_complex_dir / "apo_complex.pdb")
(masked_complex_dir / "pred_complex.pdb").unlink()
(masked_complex_dir / "apo_complex.pdb").unlink()

Structures have resolved_pdb2uniprot and resolved_uniprot2pdb properties#

They return dicts of resolved residue numbers mapped from pdb numbering to uniprot numbering, and vice versa

The full mapping is available in Structure.uniprot_mapping To get only the resolved mapping, access the Structure.resolved_mapping attrribute.

apo_L.resolved_pdb2uniprot
{2: 960,
 3: 961,
 4: 962,
 5: 963,
 6: 964,
 7: 965,
 8: 966,
 9: 967,
 10: 968,
 11: 969,
 12: 970,
 13: 971,
 14: 972,
 15: 973,
 16: 974,
 18: 976,
 19: 977,
 20: 978,
 21: 979,
 22: 980,
 23: 981,
 24: 982,
 25: 983,
 26: 984,
 27: 985,
 28: 986,
 29: 987,
 30: 988,
 31: 989,
 32: 990,
 33: 991,
 34: 992,
 35: 993,
 36: 994,
 37: 995,
 38: 996,
 39: 997,
 40: 998,
 41: 999,
 42: 1000,
 43: 1001,
 44: 1002,
 45: 1003,
 46: 1004,
 47: 1005,
 48: 1006,
 49: 1007,
 50: 1008,
 51: 1009,
 52: 1010,
 53: 1011,
 54: 1012,
 55: 1013,
 56: 1014,
 57: 1015,
 58: 1016,
 59: 1017,
 60: 1018,
 61: 1019,
 62: 1020,
 63: 1021,
 64: 1022,
 65: 1023,
 66: 1024,
 67: 1025,
 68: 1026,
 69: 1027,
 70: 1028,
 71: 1029,
 72: 1030,
 73: 1031,
 74: 1032,
 75: 1033,
 76: 1034,
 77: 1035,
 78: 1036,
 79: 1037,
 80: 1038,
 81: 1039,
 82: 1040,
 83: 1041,
 84: 1042,
 85: 1043,
 86: 1044,
 87: 1045,
 88: 1046,
 89: 1047,
 90: 1048,
 91: 1049,
 92: 1050,
 93: 1051,
 94: 1052,
 95: 1053,
 96: 1054,
 97: 1055,
 98: 1056,
 99: 1057,
 100: 1058,
 101: 1059,
 102: 1060,
 103: 1061,
 104: 1062,
 105: 1063,
 106: 1064,
 107: 1065,
 108: 1066,
 109: 1067,
 110: 1068,
 111: 1069,
 112: 1070,
 113: 1071,
 114: 1072,
 115: 1073,
 116: 1074,
 117: 1075,
 118: 1076,
 119: 1077,
 120: 1078,
 121: 1079,
 122: 1080,
 123: 1081,
 124: 1082,
 125: 1083,
 126: 1084,
 127: 1085,
 128: 1086,
 129: 1087,
 130: 1088,
 131: 1089,
 132: 1090,
 133: 1091,
 134: 1092,
 135: 1093,
 136: 1094,
 137: 1095,
 138: 1096,
 139: 1097,
 140: 1098,
 141: 1099,
 142: 1100,
 143: 1101,
 144: 1102,
 145: 1103,
 146: 1104,
 147: 1105,
 148: 1106,
 149: 1107,
 150: 1108,
 151: 1109,
 152: 1110,
 153: 1111,
 154: 1112,
 155: 1113,
 156: 1114,
 157: 1115,
 158: 1116,
 159: 1117,
 160: 1118,
 161: 1119,
 162: 1120,
 163: 1121,
 164: 1122,
 165: 1123,
 166: 1124,
 167: 1125,
 168: 1126,
 169: 1127,
 170: 1128,
 171: 1129,
 172: 1130,
 173: 1131,
 174: 1132}
apo_complex.resolved_mapping
entry_id entity_id asym_id pdb_strand_id resi resi_pdb resi_auth resn one_letter_code_can resolved one_letter_code_uniprot resi_uniprot uniprot_acc chain
0 NaN 1 A A 3 39 39.0 LEU L 1 L 39 NaN A1
1 NaN 1 A A 4 40 40.0 ASP D 1 D 40 NaN A1
2 NaN 1 A A 5 41 41.0 PRO P 1 P 41 NaN A1
3 NaN 1 A A 6 42 42.0 GLU E 1 E 42 NaN A1
4 NaN 1 A A 7 43 43.0 GLU E 1 E 43 NaN A1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
446 NaN 1 A A 170 1128 1128.0 ARG R 1 R 1128 NaN A1
447 NaN 1 A A 171 1129 1129.0 LYS K 1 K 1129 NaN A1
448 NaN 1 A A 172 1130 1130.0 THR T 1 T 1130 NaN A1
449 NaN 1 A A 173 1131 1131.0 PRO P 1 P 1131 NaN A1
450 NaN 1 A A 174 1132 1132.0 TRP W 1 W 1132 NaN A1

451 rows × 14 columns

Case with multiple available apo structures#

pinder_id = "1ldt__A1_P00761--1ldt__B1_P80424"
ps_canon = PinderSystem(pinder_id)
ps_canon.entry.apo_R_alt
2024-11-15 12:15:08,889 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=43
['1s6f__A1_P00761.pdb',
 '1s85__A1_P00761.pdb',
 '1s84__A1_P00761.pdb',
 '2a32__A1_P00761.pdb',
 '1s6h__A1_P00761.pdb',
 '1s5s__A1_P00761.pdb',
 '1fni__A1_P00761.pdb',
 '1s81__A1_P00761.pdb',
 '1fmg__A1_P00761.pdb',
 '1qqu__A1_P00761.pdb',
 '2a31__A1_P00761.pdb',
 '1fn6__A1_P00761.pdb',
 '1s83__A1_P00761.pdb']
ps_canon.entry.apo_L_alt
['2kmq__A1_P80424.pdb', '2kmp__A1_P80424.pdb', '2kmr__A1_P80424.pdb']

Specify 2kmr as apo ligand and 1fmg as apo receptor#

Note: the default apo_receptor and apo_ligand are determined based on the selected canonical apo monomer.

The canonical monomers were selected based on their sequence overlap and difficulty metrics calculated in pinder.eval.dockq.unbound

ps = PinderSystem(pinder_id, apo_ligand_pdb_code="2kmr", apo_receptor_pdb_code="1fmg")
ps
PinderSystem(
entry = IndexEntry(
    (
        'split',
        'train',
    ),
    (
        'id',
        '1ldt__A1_P00761--1ldt__B1_P80424',
    ),
    (
        'pdb_id',
        '1ldt',
    ),
    (
        'cluster_id',
        'cluster_312_5091',
    ),
    (
        'cluster_id_R',
        'cluster_312',
    ),
    (
        'cluster_id_L',
        'cluster_5091',
    ),
    (
        'pinder_s',
        False,
    ),
    (
        'pinder_xl',
        False,
    ),
    (
        'pinder_af2',
        False,
    ),
    (
        'uniprot_R',
        'P00761',
    ),
    (
        'uniprot_L',
        'P80424',
    ),
    (
        'holo_R_pdb',
        '1ldt__A1_P00761-R.pdb',
    ),
    (
        'holo_L_pdb',
        '1ldt__B1_P80424-L.pdb',
    ),
    (
        'predicted_R_pdb',
        'af__P00761.pdb',
    ),
    (
        'predicted_L_pdb',
        'af__P80424.pdb',
    ),
    (
        'apo_R_pdb',
        '1s82__A1_P00761.pdb',
    ),
    (
        'apo_L_pdb',
        '2kmo__A1_P80424.pdb',
    ),
    (
        'apo_R_pdbs',
        '1s82__A1_P00761.pdb;1s6f__A1_P00761.pdb;1s85__A1_P00761.pdb;1s84__A1_P00761.pdb;2a32__A1_P00761.pdb;1s6h__A1_P00761.pdb;1s5s__A1_P00761.pdb;1fni__A1_P00761.pdb;1s81__A1_P00761.pdb;1fmg__A1_P00761.pdb;1qqu__A1_P00761.pdb;2a31__A1_P00761.pdb;1fn6__A1_P00761.pdb;1s83__A1_P00761.pdb',
    ),
    (
        'apo_L_pdbs',
        '2kmo__A1_P80424.pdb;2kmq__A1_P80424.pdb;2kmp__A1_P80424.pdb;2kmr__A1_P80424.pdb',
    ),
    (
        'holo_R',
        True,
    ),
    (
        'holo_L',
        True,
    ),
    (
        'predicted_R',
        True,
    ),
    (
        'predicted_L',
        True,
    ),
    (
        'apo_R',
        True,
    ),
    (
        'apo_L',
        True,
    ),
    (
        'apo_R_quality',
        'high',
    ),
    (
        'apo_L_quality',
        'high',
    ),
    (
        'chain1_neff',
        997.0,
    ),
    (
        'chain2_neff',
        2220.0,
    ),
    (
        'chain_R',
        'A1',
    ),
    (
        'chain_L',
        'B1',
    ),
    (
        'contains_antibody',
        False,
    ),
    (
        'contains_antigen',
        False,
    ),
    (
        'contains_enzyme',
        True,
    ),
)
native=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/1ldt__A1_P00761--1ldt__B1_P80424.pdb,
    uniprot_map=None,
    pinder_id='1ldt__A1_P00761--1ldt__B1_P80424',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (1992,),
    pdb_engine='fastpdb',
)
holo_receptor=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/1ldt__A1_P00761-R.pdb,
    uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/1ldt__A1_P00761-R.parquet,
    pinder_id='1ldt__A1_P00761-R',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (1666,),
    pdb_engine='fastpdb',
)
holo_ligand=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/1ldt__B1_P80424-L.pdb,
    uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/1ldt__B1_P80424-L.parquet,
    pinder_id='1ldt__B1_P80424-L',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (326,),
    pdb_engine='fastpdb',
)
apo_receptor=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/1fmg__A1_P00761.pdb,
    uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/1fmg__A1_P00761.parquet,
    pinder_id='1fmg__A1_P00761',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (1642,),
    pdb_engine='fastpdb',
)
apo_ligand=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/2kmr__A1_P80424.pdb,
    uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/2kmr__A1_P80424.parquet,
    pinder_id='2kmr__A1_P80424',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (630,),
    pdb_engine='fastpdb',
)
pred_receptor=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/af__P00761.pdb,
    uniprot_map=None,
    pinder_id='af__P00761',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (1708,),
    pdb_engine='fastpdb',
)
pred_ligand=Structure(
    filepath=/home/runner/.local/share/pinder/2024-02/pdbs/af__P80424.pdb,
    uniprot_map=None,
    pinder_id='af__P80424',
    atom_array=<class 'biotite.structure.AtomArray'> with shape (326,),
    pdb_engine='fastpdb',
)
)

Classify system difficulty based on degree of conformational shift in unbound and bound#

ps.unbound_difficulty("apo")
{'Fnat': 0.847457627118644,
 'Fnonnat': 0.5,
 'common_contacts': 50,
 'differing_contacts': 50,
 'bound_contacts': 59,
 'unbound_contacts': 100,
 'fnonnat_R': 0.4318181818181818,
 'fnonnat_L': 0.35,
 'fnat_R': 0.8620689655172413,
 'fnat_L': 1.0,
 'difficulty': 'Difficult',
 'I-RMSD': 2.3008883,
 'matched_interface_chains': 2,
 'holo_receptor_interface_res': 29,
 'holo_ligand_interface_res': 13,
 'apo_receptor_interface_res': 44,
 'apo_ligand_interface_res': 20,
 'L-RMSD': 9.371291,
 'R-RMSD': 0.47199312,
 'unbound_id': '1fmg__A1_P00761--2kmr__A1_P80424',
 'unbound_body': 'receptor_ligand',
 'monomer_name': 'apo'}
ps.unbound_difficulty("predicted")
{'Fnat': 0.847457627118644,
 'Fnonnat': 0.05660377358490566,
 'common_contacts': 50,
 'differing_contacts': 3,
 'bound_contacts': 59,
 'unbound_contacts': 53,
 'fnonnat_R': 0.07692307692307693,
 'fnonnat_L': 0.0,
 'fnat_R': 0.8275862068965517,
 'fnat_L': 0.9230769230769231,
 'difficulty': 'Rigid-body',
 'I-RMSD': 1.252618,
 'matched_interface_chains': 2,
 'holo_receptor_interface_res': 29,
 'holo_ligand_interface_res': 13,
 'apo_receptor_interface_res': 26,
 'apo_ligand_interface_res': 12,
 'L-RMSD': 3.9255776,
 'R-RMSD': 0.40761372,
 'unbound_id': 'af__P00761--af__P80424',
 'unbound_body': 'receptor_ligand',
 'monomer_name': 'predicted'}

A bunch of other features of the Structure class are illustrated below#

apo_L.atom_array[0:10]
array([
	Atom(np.array([36.778, 16.379, 60.821], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="N", element="N", b_factor=0.0),
	Atom(np.array([37.219, 15.836, 59.542], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([36.18 , 16.079, 58.452], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="C", element="C", b_factor=0.0),
	Atom(np.array([36.532, 16.338, 57.297], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="O", element="O", b_factor=0.0),
	Atom(np.array([37.509, 14.339, 59.666], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="CB", element="C", b_factor=0.0),
	Atom(np.array([38.703, 14.01 , 60.548], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="CG", element="C", b_factor=0.0),
	Atom(np.array([38.916, 12.517, 60.706], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="CD", element="C", b_factor=0.0),
	Atom(np.array([37.981, 11.727, 60.572], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="OE1", element="O", b_factor=0.0),
	Atom(np.array([40.151, 12.123, 60.994], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="NE2", element="N", b_factor=0.0),
	Atom(np.array([34.852, 15.989, 58.853], dtype=float32), chain_id="L", res_id=3, ins_code="", res_name="PRO", hetero=False, atom_name="N", element="N", b_factor=0.0)
])
apo_L.coords[0:10]
array([[36.778, 16.379, 60.821],
       [37.219, 15.836, 59.542],
       [36.18 , 16.079, 58.452],
       [36.532, 16.338, 57.297],
       [37.509, 14.339, 59.666],
       [38.703, 14.01 , 60.548],
       [38.916, 12.517, 60.706],
       [37.981, 11.727, 60.572],
       [40.151, 12.123, 60.994],
       [34.852, 15.989, 58.853]], dtype=float32)
apo_L.residue_names
['ALA',
 'ARG',
 'ASN',
 'ASP',
 'CYS',
 'GLN',
 'GLU',
 'GLY',
 'HIS',
 'ILE',
 'LEU',
 'LYS',
 'MET',
 'PHE',
 'PRO',
 'SER',
 'THR',
 'TRP',
 'VAL']
apo_L.sequence
'QPVNQPILAAAQSLHEATKWSSKGNDIIAAAKRMALLMAEMSRLVRGGSGTKRALIQCAKDIAKASDEVTRLAKEVAKQCTDKRIRTNLLQVCERIPTISTQLKILSTVKATMLGRTNISDEESEQATEMLVHNAQNLMQSVKETVREAEAASIKIRTDAGFTLRWVRKTPW'
apo_L.atom_array.res_name
array(['GLN', 'GLN', 'GLN', ..., 'TRP', 'TRP', 'TRP'], dtype='<U5')
apo_L.atom_array[apo_L.backbone_mask][0:10]
array([
	Atom(np.array([36.778, 16.379, 60.821], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="N", element="N", b_factor=0.0),
	Atom(np.array([37.219, 15.836, 59.542], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([36.18 , 16.079, 58.452], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="C", element="C", b_factor=0.0),
	Atom(np.array([34.852, 15.989, 58.853], dtype=float32), chain_id="L", res_id=3, ins_code="", res_name="PRO", hetero=False, atom_name="N", element="N", b_factor=0.0),
	Atom(np.array([33.88 , 16.232, 57.771], dtype=float32), chain_id="L", res_id=3, ins_code="", res_name="PRO", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([33.954, 17.664, 57.252], dtype=float32), chain_id="L", res_id=3, ins_code="", res_name="PRO", hetero=False, atom_name="C", element="C", b_factor=0.0),
	Atom(np.array([33.798, 17.833, 55.943], dtype=float32), chain_id="L", res_id=4, ins_code="", res_name="VAL", hetero=False, atom_name="N", element="N", b_factor=0.0),
	Atom(np.array([33.848, 19.155, 55.33 ], dtype=float32), chain_id="L", res_id=4, ins_code="", res_name="VAL", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([32.629, 19.402, 54.448], dtype=float32), chain_id="L", res_id=4, ins_code="", res_name="VAL", hetero=False, atom_name="C", element="C", b_factor=0.0),
	Atom(np.array([32.136, 20.636, 54.462], dtype=float32), chain_id="L", res_id=5, ins_code="", res_name="ASN", hetero=False, atom_name="N", element="N", b_factor=0.0)
])
apo_L.atom_array[apo_L.calpha_mask][0:10]
array([
	Atom(np.array([37.219, 15.836, 59.542], dtype=float32), chain_id="L", res_id=2, ins_code="", res_name="GLN", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([33.88 , 16.232, 57.771], dtype=float32), chain_id="L", res_id=3, ins_code="", res_name="PRO", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([33.848, 19.155, 55.33 ], dtype=float32), chain_id="L", res_id=4, ins_code="", res_name="VAL", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([30.979, 21.004, 53.668], dtype=float32), chain_id="L", res_id=5, ins_code="", res_name="ASN", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([31.94 , 20.151, 50.049], dtype=float32), chain_id="L", res_id=6, ins_code="", res_name="GLN", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([30.278, 23.116, 48.249], dtype=float32), chain_id="L", res_id=7, ins_code="", res_name="PRO", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([32.112, 25.664, 50.491], dtype=float32), chain_id="L", res_id=8, ins_code="", res_name="ILE", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([35.441, 23.864, 49.936], dtype=float32), chain_id="L", res_id=9, ins_code="", res_name="LEU", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([34.772, 23.807, 46.168], dtype=float32), chain_id="L", res_id=10, ins_code="", res_name="ALA", hetero=False, atom_name="CA", element="C", b_factor=0.0),
	Atom(np.array([33.923, 27.56 , 46.154], dtype=float32), chain_id="L", res_id=11, ins_code="", res_name="ALA", hetero=False, atom_name="CA", element="C", b_factor=0.0)
])
apo_L.chain_sequence
{'L': ['Q',
  'P',
  'V',
  'N',
  'Q',
  'P',
  'I',
  'L',
  'A',
  'A',
  'A',
  'Q',
  'S',
  'L',
  'H',
  'E',
  'A',
  'T',
  'K',
  'W',
  'S',
  'S',
  'K',
  'G',
  'N',
  'D',
  'I',
  'I',
  'A',
  'A',
  'A',
  'K',
  'R',
  'M',
  'A',
  'L',
  'L',
  'M',
  'A',
  'E',
  'M',
  'S',
  'R',
  'L',
  'V',
  'R',
  'G',
  'G',
  'S',
  'G',
  'T',
  'K',
  'R',
  'A',
  'L',
  'I',
  'Q',
  'C',
  'A',
  'K',
  'D',
  'I',
  'A',
  'K',
  'A',
  'S',
  'D',
  'E',
  'V',
  'T',
  'R',
  'L',
  'A',
  'K',
  'E',
  'V',
  'A',
  'K',
  'Q',
  'C',
  'T',
  'D',
  'K',
  'R',
  'I',
  'R',
  'T',
  'N',
  'L',
  'L',
  'Q',
  'V',
  'C',
  'E',
  'R',
  'I',
  'P',
  'T',
  'I',
  'S',
  'T',
  'Q',
  'L',
  'K',
  'I',
  'L',
  'S',
  'T',
  'V',
  'K',
  'A',
  'T',
  'M',
  'L',
  'G',
  'R',
  'T',
  'N',
  'I',
  'S',
  'D',
  'E',
  'E',
  'S',
  'E',
  'Q',
  'A',
  'T',
  'E',
  'M',
  'L',
  'V',
  'H',
  'N',
  'A',
  'Q',
  'N',
  'L',
  'M',
  'Q',
  'S',
  'V',
  'K',
  'E',
  'T',
  'V',
  'R',
  'E',
  'A',
  'E',
  'A',
  'A',
  'S',
  'I',
  'K',
  'I',
  'R',
  'T',
  'D',
  'A',
  'G',
  'F',
  'T',
  'L',
  'R',
  'W',
  'V',
  'R',
  'K',
  'T',
  'P',
  'W']}
apo_complex.chain_sequence
{'L': ['Q',
  'P',
  'V',
  'N',
  'Q',
  'P',
  'I',
  'L',
  'A',
  'A',
  'A',
  'Q',
  'S',
  'L',
  'H',
  'E',
  'A',
  'T',
  'K',
  'W',
  'S',
  'S',
  'K',
  'G',
  'N',
  'D',
  'I',
  'I',
  'A',
  'A',
  'A',
  'K',
  'R',
  'M',
  'A',
  'L',
  'L',
  'M',
  'A',
  'E',
  'M',
  'S',
  'R',
  'L',
  'V',
  'R',
  'G',
  'G',
  'S',
  'G',
  'T',
  'K',
  'R',
  'A',
  'L',
  'I',
  'Q',
  'C',
  'A',
  'K',
  'D',
  'I',
  'A',
  'K',
  'A',
  'S',
  'D',
  'E',
  'V',
  'T',
  'R',
  'L',
  'A',
  'K',
  'E',
  'V',
  'A',
  'K',
  'Q',
  'C',
  'T',
  'D',
  'K',
  'R',
  'I',
  'R',
  'T',
  'N',
  'L',
  'L',
  'Q',
  'V',
  'C',
  'E',
  'R',
  'I',
  'P',
  'T',
  'I',
  'S',
  'T',
  'Q',
  'L',
  'K',
  'I',
  'L',
  'S',
  'T',
  'V',
  'K',
  'A',
  'T',
  'M',
  'L',
  'G',
  'R',
  'T',
  'N',
  'I',
  'S',
  'D',
  'E',
  'E',
  'S',
  'E',
  'Q',
  'A',
  'T',
  'E',
  'M',
  'L',
  'V',
  'H',
  'N',
  'A',
  'Q',
  'N',
  'L',
  'M',
  'Q',
  'S',
  'V',
  'K',
  'E',
  'T',
  'V',
  'R',
  'E',
  'A',
  'E',
  'A',
  'A',
  'S',
  'I',
  'K',
  'I',
  'R',
  'T',
  'D',
  'A',
  'G',
  'F',
  'T',
  'L',
  'R',
  'W',
  'V',
  'R',
  'K',
  'T',
  'P',
  'W'],
 'R': ['H',
  'M',
  'L',
  'D',
  'P',
  'E',
  'E',
  'I',
  'R',
  'K',
  'R',
  'L',
  'E',
  'H',
  'T',
  'E',
  'R',
  'Q',
  'F',
  'R',
  'N',
  'R',
  'R',
  'K',
  'I',
  'L',
  'I',
  'R',
  'G',
  'L',
  'P',
  'G',
  'D',
  'V',
  'T',
  'N',
  'Q',
  'E',
  'V',
  'H',
  'D',
  'L',
  'L',
  'S',
  'D',
  'Y',
  'E',
  'L',
  'K',
  'Y',
  'C',
  'F',
  'V',
  'D',
  'K',
  'Y',
  'K',
  'G',
  'T',
  'A',
  'F',
  'V',
  'T',
  'L',
  'L',
  'N',
  'G',
  'E',
  'Q',
  'A',
  'E',
  'A',
  'A',
  'I',
  'N',
  'A',
  'F',
  'H',
  'Q',
  'S',
  'R',
  'L',
  'R',
  'E',
  'R',
  'E',
  'L',
  'S',
  'V',
  'Q',
  'L',
  'Q',
  'P',
  'T',
  'D',
  'A',
  'L',
  'L',
  'C',
  'V',
  'A',
  'N',
  'L',
  'P',
  'P',
  'S',
  'L',
  'T',
  'Q',
  'Q',
  'Q',
  'F',
  'E',
  'E',
  'L',
  'V',
  'R',
  'P',
  'F',
  'G',
  'S',
  'L',
  'E',
  'R',
  'C',
  'F',
  'L',
  'V',
  'Y',
  'S',
  'E',
  'R',
  'T',
  'G',
  'Q',
  'S',
  'K',
  'G',
  'Y',
  'G',
  'F',
  'A',
  'E',
  'Y',
  'M',
  'K',
  'K',
  'D',
  'S',
  'A',
  'A',
  'R',
  'A',
  'K',
  'S',
  'D',
  'L',
  'L',
  'G',
  'K',
  'P',
  'L',
  'G',
  'P',
  'R',
  'T',
  'L',
  'Y',
  'V',
  'H',
  'W',
  'T',
  'D',
  'A',
  'G',
  'Q',
  'L',
  'T',
  'P',
  'A',
  'L',
  'L',
  'H',
  'S',
  'R',
  'C',
  'L',
  'C',
  'V',
  'D',
  'R',
  'L',
  'P',
  'P',
  'G',
  'F',
  'N',
  'D',
  'V',
  'D',
  'A',
  'L',
  'C',
  'R',
  'A',
  'L',
  'S',
  'A',
  'V',
  'H',
  'S',
  'P',
  'T',
  'F',
  'C',
  'Q',
  'L',
  'A',
  'C',
  'G',
  'Q',
  'D',
  'G',
  'Q',
  'L',
  'K',
  'G',
  'F',
  'A',
  'V',
  'L',
  'E',
  'Y',
  'E',
  'T',
  'A',
  'E',
  'M',
  'A',
  'E',
  'E',
  'A',
  'Q',
  'Q',
  'Q',
  'A',
  'D',
  'G',
  'L',
  'S',
  'L',
  'G',
  'G',
  'S',
  'H',
  'L',
  'R',
  'V',
  'S',
  'F',
  'C',
  'A',
  'P',
  'G',
  'P',
  'P',
  'G',
  'R',
  'S',
  'M',
  'L',
  'A',
  'A',
  'L',
  'I',
  'A',
  'A',
  'Q',
  'A',
  'T']}
apo_L.sequence
'QPVNQPILAAAQSLHEATKWSSKGNDIIAAAKRMALLMAEMSRLVRGGSGTKRALIQCAKDIAKASDEVTRLAKEVAKQCTDKRIRTNLLQVCERIPTISTQLKILSTVKATMLGRTNISDEESEQATEMLVHNAQNLMQSVKETVREAEAASIKIRTDAGFTLRWVRKTPW'