Pinder index#
Download the dataset#
NOTE: the default location for the dataset is ~/.local/share/pinder/<release version>
If you want to use a different location, you can do so by setting the PINDER_BASE_DIR
environment variable.
The base dir refers to a fully qualified path name up until the <release version>
(not inclusive).
For instance, you could:
export PINDER_BASE_DIR=~/my-custom-location-for-pinder/pinder
You can always check the current location of the dataset like so:
from pinder.core import get_pinder_location
get_pinder_location()
from pinder.core import get_pinder_location
get_pinder_location()
PosixPath('/home/runner/.local/share/pinder/2024-02')
To download the complete dataset run the following#
from pinder.core import download_dataset
# download_dataset()
Alternatively, use the CLI script pinder_download
#
pinder_download --help
usage: Download latest pinder dataset to disk [-h] [--pinder_base_dir PINDER_BASE_DIR] [--pinder_release PINDER_RELEASE] [--skip_inflation]
optional arguments:
-h, --help show this help message and exit
--pinder_base_dir PINDER_BASE_DIR
specify a non-default pinder base directory
--pinder_release PINDER_RELEASE
specify a pinder dataset version
--skip_inflation if passed, will only download the compressed archives without unpacking
The full dataset should look like this#
~/.local/share/pinder/<release version>/
pdbs/
csvs/
index.csv.gz
Pinder metadata API#
from pinder.core import get_metadata
metadata = get_metadata()
metadata
id | entry_id | method | date | release_date | resolution | label | probability | chain1_id | chain2_id | ... | interface_atom_gaps_4A | missing_interface_residues_4A | interface_atom_gaps_8A | missing_interface_residues_8A | entity_id_R | entity_id_L | pdb_strand_id_R | pdb_strand_id_L | ECOD_names_R | ECOD_names_L | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 7rzb__A1_A0A229LVN5--7rzb__A2_A0A229LVN5 | 7rzb | X-RAY DIFFRACTION | 2021-08-27 | 2022-04-13 | 1.599609 | BIO | 0.576172 | R | L | ... | 0 | 0 | 0 | 0 | 1 | 1 | A | A | PF06491 | PF06491 |
1 | 3t2l__A1_Q5LE95--3t2l__A2_Q5LE95 | 3t2l | X-RAY DIFFRACTION | 2011-07-22 | 2011-08-10 | 2.330078 | BIO | 0.983887 | R | L | ... | 0 | 0 | 0 | 0 | 1 | 1 | A | A | F_UNCLASSIFIED,PF13149 | F_UNCLASSIFIED,PF13149 |
2 | 6ikj__A1_Q9I4L6--6ikj__B1_Q9I4L6 | 6ikj | X-RAY DIFFRACTION | 2018-10-16 | 2019-03-13 | 1.759766 | BIO | 0.543945 | R | L | ... | 0 | 0 | 0 | 0 | 1 | 1 | A | B | PF00691 | PF00691 |
3 | 8iyi__A1_Q6CVU4--8iyi__B1_Q6CVU4 | 8iyi | X-RAY DIFFRACTION | 2023-04-05 | 2023-06-28 | 1.900391 | BIO | 0.992188 | R | L | ... | 0 | 0 | 0 | 0 | 1 | 1 | A | B | PF17284,PF01564 | PF17284,PF01564 |
4 | 3uws__B1_A7A9N3--3uws__A1_A7A9N3 | 3uws | X-RAY DIFFRACTION | 2011-12-02 | 2012-06-13 | 1.700195 | BIO | 0.996094 | R | L | ... | 0 | 0 | 0 | 0 | 2 | 1 | B | A | ||
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2319559 | 6hbg__C28_Q8V635--6hbg__C35_Q8V635 | 6hbg | ELECTRON MICROSCOPY | 2018-08-10 | 2019-03-20 | 3.160156 | BIO | 0.512207 | R | L | ... | 0 | 0 | 0 | 0 | 3 | 3 | C | C | PF00073 | PF00073 |
2319560 | 6hbg__A11_Q8V635--6hbg__A54_Q8V635 | 6hbg | ELECTRON MICROSCOPY | 2018-08-10 | 2019-03-20 | 3.160156 | 0.000000 | R | L | ... | 0 | 0 | 0 | 0 | 1 | 1 | A | A | PF00073 | PF00073 | |
2319561 | 6hbg__C15_Q8V635--6hbg__D4_Q8V635 | 6hbg | ELECTRON MICROSCOPY | 2018-08-10 | 2019-03-20 | 3.160156 | XTAL | 0.491943 | R | L | ... | 0 | 0 | 0 | 0 | 3 | 4 | C | D | PF00073 | PF02226 |
2319562 | 6hbg__C33_Q8V635--6hbg__D52_Q8V635 | 6hbg | ELECTRON MICROSCOPY | 2018-08-10 | 2019-03-20 | 3.160156 | XTAL | 0.491943 | R | L | ... | 0 | 0 | 0 | 0 | 3 | 4 | C | D | PF00073 | PF02226 |
2319563 | 6rwh__A1_P31947--6rwh__A2_P31947 | 6rwh | X-RAY DIFFRACTION | 2019-06-05 | 2020-06-17 | 1.679688 | BIO | 0.835938 | R | L | ... | 4 | 0 | 64 | 0 | 1 | 1 | A | A | PF00244 | PF00244 |
2319564 rows × 51 columns
Pinder index API#
from pinder.core import PinderSystem, get_index
index = get_index()
index
split | id | pdb_id | cluster_id | cluster_id_R | cluster_id_L | pinder_s | pinder_xl | pinder_af2 | uniprot_R | ... | apo_L | apo_R_quality | apo_L_quality | chain1_neff | chain2_neff | chain_R | chain_L | contains_antibody | contains_antigen | contains_enzyme | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | test | 7rzb__A1_A0A229LVN5--7rzb__A2_A0A229LVN5 | 7rzb | cluster_16129_16129 | cluster_16129 | cluster_16129 | False | True | True | A0A229LVN5 | ... | False | 287.000000 | 287.000000 | A1 | A2 | False | False | False | ||
1 | test | 3t2l__A1_Q5LE95--3t2l__A2_Q5LE95 | 3t2l | cluster_30933_30933 | cluster_30933 | cluster_30933 | False | True | False | Q5LE95 | ... | False | 7.175781 | 7.175781 | A1 | A2 | False | False | False | ||
2 | test | 6ikj__A1_Q9I4L6--6ikj__B1_Q9I4L6 | 6ikj | cluster_1924_1924 | cluster_1924 | cluster_1924 | False | True | False | Q9I4L6 | ... | True | high | high | 845.000000 | 845.000000 | A1 | B1 | False | False | False |
3 | test | 8iyi__A1_Q6CVU4--8iyi__B1_Q6CVU4 | 8iyi | cluster_142_142 | cluster_142 | cluster_142 | False | True | False | Q6CVU4 | ... | False | 525.000000 | 525.000000 | A1 | B1 | False | False | False | ||
4 | test | 3uws__B1_A7A9N3--3uws__A1_A7A9N3 | 3uws | cluster_21030_21031 | cluster_21030 | cluster_21031 | False | True | False | A7A9N3 | ... | False | 147.375000 | 147.375000 | B1 | A1 | False | False | False | ||
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2319559 | invalid | 6hbg__C28_Q8V635--6hbg__C35_Q8V635 | 6hbg | cluster_-1_-1 | cluster_-1 | cluster_-1 | False | False | False | Q8V635 | ... | False | 37.656250 | 37.656250 | C28 | C35 | False | False | True | ||
2319560 | invalid | 6hbg__A11_Q8V635--6hbg__A54_Q8V635 | 6hbg | cluster_-1_-1 | cluster_-1 | cluster_-1 | False | False | False | Q8V635 | ... | False | 37.656250 | 37.656250 | A11 | A54 | False | False | True | ||
2319561 | invalid | 6hbg__C15_Q8V635--6hbg__D4_Q8V635 | 6hbg | cluster_-1_p | cluster_p | cluster_-1 | False | False | False | Q8V635 | ... | False | 37.656250 | 37.656250 | C15 | D4 | False | False | True | ||
2319562 | invalid | 6hbg__C33_Q8V635--6hbg__D52_Q8V635 | 6hbg | cluster_-1_p | cluster_p | cluster_-1 | False | False | False | Q8V635 | ... | False | 37.656250 | 37.656250 | C33 | D52 | False | False | True | ||
2319563 | invalid | 6rwh__A1_P31947--6rwh__A2_P31947 | 6rwh | cluster_2_2 | cluster_2 | cluster_2 | False | False | False | P31947 | ... | False | 457.750000 | 457.750000 | A1 | A2 | False | False | False |
2319564 rows × 34 columns
How to get subsets of data from the index#
# Example: I want all of pinder_af2 - apo
af2_apo = index.query(
'pinder_af2 == True and apo_R and apo_L'
).reset_index(drop=True)
af2_apo
split | id | pdb_id | cluster_id | cluster_id_R | cluster_id_L | pinder_s | pinder_xl | pinder_af2 | uniprot_R | ... | apo_L | apo_R_quality | apo_L_quality | chain1_neff | chain2_neff | chain_R | chain_L | contains_antibody | contains_antigen | contains_enzyme | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | test | 7ubk__B1_P03047--7ubk__A1_P03047 | 7ubk | cluster_31523_31523 | cluster_31523 | cluster_31523 | False | True | True | P03047 | ... | True | high | high | 81.750000 | 81.750000 | B1 | A1 | False | False | False |
1 | test | 7zj1__B1_P55265--7zj1__A1_P55265 | 7zj1 | cluster_1209_1209 | cluster_1209 | cluster_1209 | False | True | True | P55265 | ... | True | high | high | 174.250000 | 174.250000 | B1 | A1 | False | False | True |
2 | test | 7zjc__A1_P0DV83--7zjc__A2_P0DV83 | 7zjc | cluster_26441_26441 | cluster_26441 | cluster_26441 | False | True | True | P0DV83 | ... | True | high | high | 6.925781 | 6.925781 | A1 | A2 | False | False | False |
3 | test | 7ztw__A1_Q66578--7ztw__B1_Q66578 | 7ztw | cluster_22347_22347 | cluster_22347 | cluster_22347 | False | True | True | Q66578 | ... | True | high | high | 13.382812 | 13.382812 | A1 | B1 | False | False | False |
4 | test | 8ard__A1_Q64331--8ard__A2_Q64331 | 8ard | cluster_34280_34280 | cluster_34280 | cluster_34280 | False | True | True | Q64331 | ... | True | high | high | 444.750000 | 444.750000 | A1 | A2 | False | False | False |
5 | test | 7wbt__A1_Q288C4--7wbt__B1_Q288C4 | 7wbt | cluster_7371_7371 | cluster_7371 | cluster_7371 | True | True | True | Q288C4 | ... | True | high | high | 169.500000 | 169.500000 | A1 | B1 | False | False | False |
6 | test | 7z7o__A1_A0A1S4NYF2--7z7o__C1_A0A1S4NYF2 | 7z7o | cluster_91_91 | cluster_91 | cluster_91 | False | True | True | A0A1S4NYF2 | ... | True | high | high | 10.632812 | 10.632812 | A1 | C1 | False | False | False |
7 | test | 8pte__A1_P00698--8pte__A2_P00698 | 8pte | cluster_274_274 | cluster_274 | cluster_274 | False | True | True | P00698 | ... | True | high | high | 254.375000 | 254.375000 | A1 | A2 | False | False | True |
8 | test | 7yka__B1_Q9Y3D6--7yka__A1_Q9Y3D6 | 7yka | cluster_13298_13298 | cluster_13298 | cluster_13298 | False | True | True | Q9Y3D6 | ... | True | high | high | 132.875000 | 132.875000 | B1 | A1 | False | False | False |
9 | test | 7ykv__B1_Q58241--7ykv__A1_Q58241 | 7ykv | cluster_5358_5358 | cluster_5358 | cluster_5358 | False | True | True | Q58241 | ... | True | high | high | 171.625000 | 171.625000 | B1 | A1 | False | False | False |
10 | test | 8a60__A1_P06971--8a60__B1_Q38162 | 8a60 | cluster_12107_26846 | cluster_12107 | cluster_26846 | False | True | True | P06971 | ... | True | high | high | 288.000000 | 2.734375 | A1 | B1 | False | False | False |
11 | test | 7t5y__A1_P0A7E1--7t5y__B1_P0A7E1 | 7t5y | cluster_7939_7939 | cluster_7939 | cluster_7939 | False | True | True | P0A7E1 | ... | True | high | high | 735.500000 | 735.500000 | A1 | B1 | False | False | True |
12 | test | 8oru__A1_O93732--8oru__B1_O93732 | 8oru | cluster_14951_14951 | cluster_14951 | cluster_14951 | False | True | True | O93732 | ... | True | high | high | 103.187500 | 103.187500 | A1 | B1 | False | False | True |
13 | test | 7z6m__A1_A0A0H3LM39--7z6m__A2_A0A0H3LM39 | 7z6m | cluster_5362_5362 | cluster_5362 | cluster_5362 | False | True | True | A0A0H3LM39 | ... | True | high | high | 533.000000 | 533.000000 | A1 | A2 | False | False | False |
14 | test | 8cnx__A1_Q68T42--8cnx__B1_Q68T42 | 8cnx | cluster_609_609 | cluster_609 | cluster_609 | False | True | True | Q68T42 | ... | True | high | high | 45.000000 | 45.000000 | A1 | B1 | False | False | True |
15 | test | 7wwo__B1_Q5SH57--7wwo__A1_Q5SH57 | 7wwo | cluster_5115_5115 | cluster_5115 | cluster_5115 | False | True | True | Q5SH57 | ... | True | high | high | 2.939453 | 2.939453 | B1 | A1 | False | False | False |
16 | test | 8d0m__A1_P28907--8d0m__A2_P28907 | 8d0m | cluster_2975_2975 | cluster_2975 | cluster_2975 | True | True | True | P28907 | ... | True | high | high | 48.312500 | 48.312500 | A1 | A2 | False | False | True |
17 | test | 8avu__A1_Q8GPI4--8avu__A2_Q8GPI4 | 8avu | cluster_29712_29712 | cluster_29712 | cluster_29712 | False | True | True | Q8GPI4 | ... | True | high | high | 11.882812 | 11.882812 | A1 | A2 | False | False | False |
18 | test | 8i2e__A1_O34841--8i2e__B1_P54421 | 8i2e | cluster_11087_12465 | cluster_12465 | cluster_11087 | True | True | True | O34841 | ... | True | high | high | 9.031250 | 865.000000 | A1 | B1 | False | False | True |
19 | test | 8aeu__A1_Q00987--8aeu__A2_Q00987 | 8aeu | cluster_1537_1537 | cluster_1537 | cluster_1537 | False | True | True | Q00987 | ... | True | high | high | 350.750000 | 350.750000 | A1 | A2 | False | False | True |
20 | test | 7vso__A1_P02945--7vso__A2_P02945 | 7vso | cluster_1035_1035 | cluster_1035 | cluster_1035 | False | True | True | P02945 | ... | True | high | high | 267.250000 | 267.250000 | A1 | A2 | False | False | False |
21 | test | 7yuj__B1_Q9BYM8--7yuj__A1_Q9BYM8 | 7yuj | cluster_19439_19439 | cluster_19439 | cluster_19439 | True | True | True | Q9BYM8 | ... | True | high | high | 266.750000 | 266.750000 | B1 | A1 | False | False | True |
22 | test | 8pvm__A1_P29166--8pvm__B1_P29166 | 8pvm | cluster_6440_6440 | cluster_6440 | cluster_6440 | False | True | True | P29166 | ... | True | high | high | 392.500000 | 392.500000 | A1 | B1 | False | False | True |
23 | test | 7yo8__A1_P60520--7yo8__A2_P60520 | 7yo8 | cluster_1022_1022 | cluster_1022 | cluster_1022 | False | True | True | P60520 | ... | True | high | high | 385.500000 | 385.500000 | A1 | A2 | False | False | False |
24 | test | 7y51__A1_Q8RBF4--7y51__A2_Q8RBF4 | 7y51 | cluster_711_711 | cluster_711 | cluster_711 | False | True | True | Q8RBF4 | ... | True | high | high | 954.500000 | 954.500000 | A1 | A2 | False | False | False |
25 | test | 7tvh__B1_Q9I2Q1--7tvh__A1_Q9I2Q1 | 7tvh | cluster_8106_8106 | cluster_8106 | cluster_8106 | False | True | True | Q9I2Q1 | ... | True | high | high | 209.125000 | 209.125000 | B1 | A1 | False | False | True |
26 | test | 8bwv__D2_A0A482M8M0--8bwv__A1_A0A482M8M0 | 8bwv | cluster_27613_27613 | cluster_27613 | cluster_27613 | False | True | True | A0A482M8M0 | ... | True | high | high | 395.000000 | 395.000000 | D2 | A1 | False | False | False |
27 | test | 7t91__A1_P08151--7t91__B1_P08151 | 7t91 | cluster_1801_1801 | cluster_1801 | cluster_1801 | False | True | True | P08151 | ... | True | high | high | 321.250000 | 321.250000 | A1 | B1 | False | False | False |
28 | test | 7zoo__B1_A0A979GQH9--7zoo__A1_A0A979GQH9 | 7zoo | cluster_32633_32633 | cluster_32633 | cluster_32633 | False | True | True | A0A979GQH9 | ... | True | high | high | 419.250000 | 419.250000 | B1 | A1 | False | False | False |
29 | test | 7x4b__A1_A0A2D0TCG3--7x4b__B1_A0A2D0TCG3 | 7x4b | cluster_7334_7334 | cluster_7334 | cluster_7334 | False | True | True | A0A2D0TCG3 | ... | True | high | high | 9.453125 | 9.453125 | A1 | B1 | False | False | False |
30 rows × 34 columns
Finding the existing local filepaths for systems (without re-writing them per system)#
from pinder.core import get_systems
local_paths = {}
for system in get_systems(list(af2_apo.id)):
local_paths[system.entry.id] = system.filepaths
0%| | 0/30 [00:00<?, ?it/s]
2024-11-15 12:04:22,514 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=13
3%|▎ | 1/30 [00:02<01:16, 2.63s/it]
2024-11-15 12:04:24,637 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=11
7%|▋ | 2/30 [00:03<00:48, 1.73s/it]
2024-11-15 12:04:25,729 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=9
10%|█ | 3/30 [00:05<00:43, 1.62s/it]
2024-11-15 12:04:27,213 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=13
13%|█▎ | 4/30 [00:06<00:43, 1.67s/it]
2024-11-15 12:04:28,954 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=11
17%|█▋ | 5/30 [00:08<00:42, 1.71s/it]
2024-11-15 12:04:30,741 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=11
20%|██ | 6/30 [00:10<00:44, 1.84s/it]
2024-11-15 12:04:32,829 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=19
23%|██▎ | 7/30 [00:13<00:48, 2.11s/it]
2024-11-15 12:04:35,709 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=4323
27%|██▋ | 8/30 [03:56<26:33, 72.42s/it]
2024-11-15 12:08:18,457 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=15
30%|███ | 9/30 [03:57<17:32, 50.11s/it]
2024-11-15 12:08:19,516 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=11
33%|███▎ | 10/30 [03:58<11:40, 35.01s/it]
2024-11-15 12:08:20,722 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=24
37%|███▋ | 11/30 [04:00<07:54, 24.99s/it]
2024-11-15 12:08:22,987 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=11
40%|████ | 12/30 [04:02<05:19, 17.73s/it]
2024-11-15 12:08:24,113 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=11
43%|████▎ | 13/30 [04:03<03:35, 12.70s/it]
2024-11-15 12:08:25,221 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=19
47%|████▋ | 14/30 [04:04<02:28, 9.29s/it]
2024-11-15 12:08:26,655 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=421
50%|█████ | 15/30 [04:27<03:18, 13.25s/it]
2024-11-15 12:08:49,076 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=11
53%|█████▎ | 16/30 [04:27<02:13, 9.53s/it]
2024-11-15 12:08:49,961 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=163
57%|█████▋ | 17/30 [04:37<02:02, 9.46s/it]
2024-11-15 12:08:59,257 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=19
60%|██████ | 18/30 [04:38<01:23, 6.93s/it]
2024-11-15 12:09:00,302 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=11
63%|██████▎ | 19/30 [04:38<00:55, 5.05s/it]
2024-11-15 12:09:00,979 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=287
67%|██████▋ | 20/30 [04:51<01:13, 7.36s/it]
2024-11-15 12:09:13,717 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=187
70%|███████ | 21/30 [05:02<01:14, 8.33s/it]
2024-11-15 12:09:24,302 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=11
73%|███████▎ | 22/30 [05:03<00:48, 6.09s/it]
2024-11-15 12:09:25,160 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=127
77%|███████▋ | 23/30 [05:11<00:47, 6.80s/it]
2024-11-15 12:09:33,636 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=15
80%|████████ | 24/30 [05:12<00:30, 5.08s/it]
2024-11-15 12:09:34,700 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=11
83%|████████▎ | 25/30 [05:13<00:19, 3.86s/it]
2024-11-15 12:09:35,707 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=43
87%|████████▋ | 26/30 [05:16<00:13, 3.44s/it]
2024-11-15 12:09:38,181 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=15
90%|█████████ | 27/30 [05:17<00:08, 2.70s/it]
2024-11-15 12:09:39,142 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=11
93%|█████████▎| 28/30 [05:18<00:04, 2.17s/it]
2024-11-15 12:09:40,080 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=17
97%|█████████▋| 29/30 [05:19<00:01, 1.84s/it]
2024-11-15 12:09:41,151 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=11
100%|██████████| 30/30 [05:19<00:00, 1.43s/it]
100%|██████████| 30/30 [05:19<00:00, 10.65s/it]
It should be cached, no need to download if you re-run#
local_paths = {}
for system in get_systems(list(af2_apo.id)):
local_paths[system.entry.id] = system.filepaths
local_paths
0%| | 0/30 [00:00<?, ?it/s]
7%|▋ | 2/30 [00:00<00:01, 15.29it/s]
13%|█▎ | 4/30 [00:00<00:01, 17.17it/s]
20%|██ | 6/30 [00:00<00:01, 13.80it/s]
27%|██▋ | 8/30 [00:00<00:01, 12.90it/s]
33%|███▎ | 10/30 [00:00<00:01, 13.72it/s]
40%|████ | 12/30 [00:00<00:01, 13.94it/s]
47%|████▋ | 14/30 [00:01<00:01, 13.59it/s]
53%|█████▎ | 16/30 [00:01<00:00, 14.54it/s]
60%|██████ | 18/30 [00:01<00:00, 15.30it/s]
67%|██████▋ | 20/30 [00:01<00:00, 15.60it/s]
73%|███████▎ | 22/30 [00:01<00:00, 15.66it/s]
80%|████████ | 24/30 [00:01<00:00, 14.99it/s]
87%|████████▋ | 26/30 [00:01<00:00, 15.56it/s]
93%|█████████▎| 28/30 [00:01<00:00, 15.74it/s]
100%|██████████| 30/30 [00:01<00:00, 16.28it/s]
100%|██████████| 30/30 [00:01<00:00, 15.05it/s]
{'7ubk__B1_P03047--7ubk__A1_P03047': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7ubk__B1_P03047-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7ubk__A1_P03047-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/4mo1__A1_P03047.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/4mo1__A1_P03047.pdb'),
'pred_receptor': None,
'pred_ligand': None},
'7zj1__B1_P55265--7zj1__A1_P55265': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7zj1__B1_P55265-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7zj1__A1_P55265-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/2mdr__A1_P55265.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/2mdr__A1_P55265.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__P55265.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__P55265.pdb')},
'7zjc__A1_P0DV83--7zjc__A2_P0DV83': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7zjc__A1_P0DV83-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7zjc__A2_P0DV83-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/7qdv__A1_P0DV83.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/7qdv__A1_P0DV83.pdb'),
'pred_receptor': None,
'pred_ligand': None},
'7ztw__A1_Q66578--7ztw__B1_Q66578': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7ztw__A1_Q66578-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7ztw__B1_Q66578-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/7zu3__A1_Q66578.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/7zu4__A1_Q66578.pdb'),
'pred_receptor': None,
'pred_ligand': None},
'8ard__A1_Q64331--8ard__A2_Q64331': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/8ard__A1_Q64331-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/8ard__A2_Q64331-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/2ld3__A1_Q64331.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/2ld3__A1_Q64331.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__Q64331.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__Q64331.pdb')},
'7wbt__A1_Q288C4--7wbt__B1_Q288C4': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7wbt__A1_Q288C4-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7wbt__B1_Q288C4-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/7wbu__A1_Q288C4.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/7wbu__A1_Q288C4.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__Q288C4.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__Q288C4.pdb')},
'7z7o__A1_A0A1S4NYF2--7z7o__C1_A0A1S4NYF2': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7z7o__A1_A0A1S4NYF2-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7z7o__C1_A0A1S4NYF2-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/7z7p__A1_A0A1S4NYF2.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/7z7p__A1_A0A1S4NYF2.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__A0A1S4NYF2.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__A0A1S4NYF2.pdb')},
'8pte__A1_P00698--8pte__A2_P00698': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/8pte__A1_P00698-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/8pte__A2_P00698-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/2q0m__A1_P00698.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/2q0m__A1_P00698.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__P00698.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__P00698.pdb')},
'7yka__B1_Q9Y3D6--7yka__A1_Q9Y3D6': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7yka__B1_Q9Y3D6-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7yka__A1_Q9Y3D6-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/1nzn__A1_Q9Y3D6.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/1nzn__A1_Q9Y3D6.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__Q9Y3D6.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__Q9Y3D6.pdb')},
'7ykv__B1_Q58241--7ykv__A1_Q58241': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7ykv__B1_Q58241-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7ykv__A1_Q58241-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/6tvv__A1_Q58241.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/6tvv__A1_Q58241.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__Q58241.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__Q58241.pdb')},
'8a60__A1_P06971--8a60__B1_Q38162': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/8a60__A1_P06971-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/8a60__B1_Q38162-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/2fcp__A1_P06971.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/7qjf__A1_Q38162.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__P06971.pdb'),
'pred_ligand': None},
'7t5y__A1_P0A7E1--7t5y__B1_P0A7E1': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7t5y__A1_P0A7E1-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7t5y__B1_P0A7E1-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/1f76__A1_P0A7E1.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/1f76__A1_P0A7E1.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__P0A7E1.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__P0A7E1.pdb')},
'8oru__A1_O93732--8oru__B1_O93732': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/8oru__A1_O93732-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/8oru__B1_O93732-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/8ork__A1_O93732.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/8ork__A1_O93732.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__O93732.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__O93732.pdb')},
'7z6m__A1_A0A0H3LM39--7z6m__A2_A0A0H3LM39': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7z6m__A1_A0A0H3LM39-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7z6m__A2_A0A0H3LM39-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/5tsb__A1_A0A0H3LM39.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/5tsb__A1_A0A0H3LM39.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__A0A0H3LM39.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__A0A0H3LM39.pdb')},
'8cnx__A1_Q68T42--8cnx__B1_Q68T42': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/8cnx__A1_Q68T42-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/8cnx__B1_Q68T42-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/7goq__A1_Q68T42.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/7gqn__A1_Q68T42.pdb'),
'pred_receptor': None,
'pred_ligand': None},
'7wwo__B1_Q5SH57--7wwo__A1_Q5SH57': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7wwo__B1_Q5SH57-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7wwo__A1_Q5SH57-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/7wrk__A1_Q5SH57.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/7wrk__A1_Q5SH57.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__Q5SH57.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__Q5SH57.pdb')},
'8d0m__A1_P28907--8d0m__A2_P28907': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/8d0m__A1_P28907-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/8d0m__A2_P28907-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/8p8c__A1_P28907.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/8p8c__A1_P28907.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__P28907.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__P28907.pdb')},
'8avu__A1_Q8GPI4--8avu__A2_Q8GPI4': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/8avu__A1_Q8GPI4-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/8avu__A2_Q8GPI4-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/8avs__A1_Q8GPI4.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/8avs__A1_Q8GPI4.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__Q8GPI4.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__Q8GPI4.pdb')},
'8i2e__A1_O34841--8i2e__B1_P54421': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/8i2e__A1_O34841-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/8i2e__B1_P54421-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/2rsx__A1_O34841.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/8i2d__A1_P54421.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__O34841.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__P54421.pdb')},
'8aeu__A1_Q00987--8aeu__A2_Q00987': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/8aeu__A1_Q00987-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/8aeu__A2_Q00987-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/6kzu__A1_Q00987.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/6kzu__A1_Q00987.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__Q00987.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__Q00987.pdb')},
'7vso__A1_P02945--7vso__A2_P02945': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7vso__A1_P02945-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7vso__A2_P02945-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/5b34__A1_P02945.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/5b34__A1_P02945.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__P02945.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__P02945.pdb')},
'7yuj__B1_Q9BYM8--7yuj__A1_Q9BYM8': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7yuj__B1_Q9BYM8-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7yuj__A1_Q9BYM8-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/8bvl__A1_Q9BYM8.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/8bvl__A1_Q9BYM8.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__Q9BYM8.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__Q9BYM8.pdb')},
'8pvm__A1_P29166--8pvm__B1_P29166': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/8pvm__A1_P29166-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/8pvm__B1_P29166-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/8aio__A1_P29166.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/5byq__A1_P29166.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__P29166.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__P29166.pdb')},
'7yo8__A1_P60520--7yo8__A2_P60520': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7yo8__A1_P60520-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7yo8__A2_P60520-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/7lk3__A1_P60520.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/7lk3__A1_P60520.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__P60520.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__P60520.pdb')},
'7y51__A1_Q8RBF4--7y51__A2_Q8RBF4': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7y51__A1_Q8RBF4-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7y51__A2_Q8RBF4-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/7fbw__A1_Q8RBF4.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/7fbw__A1_Q8RBF4.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__Q8RBF4.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__Q8RBF4.pdb')},
'7tvh__B1_Q9I2Q1--7tvh__A1_Q9I2Q1': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7tvh__B1_Q9I2Q1-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7tvh__A1_Q9I2Q1-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/4fgd__A1_Q9I2Q1.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/4f4m__A1_Q9I2Q1.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__Q9I2Q1.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__Q9I2Q1.pdb')},
'8bwv__D2_A0A482M8M0--8bwv__A1_A0A482M8M0': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/8bwv__D2_A0A482M8M0-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/8bwv__A1_A0A482M8M0-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/8bbz__A1_A0A482M8M0.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/8bhu__A1_A0A482M8M0.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__A0A482M8M0.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__A0A482M8M0.pdb')},
'7t91__A1_P08151--7t91__B1_P08151': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7t91__A1_P08151-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7t91__B1_P08151-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/2gli__C1_P08151.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/2gli__C1_P08151.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__P08151.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__P08151.pdb')},
'7zoo__B1_A0A979GQH9--7zoo__A1_A0A979GQH9': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7zoo__B1_A0A979GQH9-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7zoo__A1_A0A979GQH9-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/7zoh__A1_A0A979GQH9.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/7zoh__A1_A0A979GQH9.pdb'),
'pred_receptor': None,
'pred_ligand': None},
'7x4b__A1_A0A2D0TCG3--7x4b__B1_A0A2D0TCG3': {'holo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7x4b__A1_A0A2D0TCG3-R.pdb'),
'holo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/test_set_pdbs/7x4b__B1_A0A2D0TCG3-L.pdb'),
'apo_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/7x31__A1_A0A2D0TCG3.pdb'),
'apo_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/7x31__A1_A0A2D0TCG3.pdb'),
'pred_receptor': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__A0A2D0TCG3.pdb'),
'pred_ligand': PosixPath('/home/runner/.local/share/pinder/2024-02/pdbs/af__A0A2D0TCG3.pdb')}}
Pinder filters#
from pinder.core.loader import filters
pinder_id = "1df0__A1_Q07009--1df0__B1_Q64537"
dimer = PinderSystem(entry=pinder_id)
base_filters = [
filters.FilterByMissingHolo(),
filters.FilterSubByContacts(min_contacts=5, radius=10.0, calpha_only=True),
filters.FilterByHoloElongation(max_var_contribution=0.92),
filters.FilterDetachedHolo(radius=12, max_components=2),
]
sub_filters = [
filters.FilterSubByAtomTypes(min_atom_types=4),
filters.FilterByHoloOverlap(min_overlap=5),
filters.FilterByHoloSeqIdentity(min_sequence_identity=0.8),
filters.FilterSubLengths(min_length=0, max_length=1000),
filters.FilterSubRmsds(rmsd_cutoff=7.5),
filters.FilterByElongation(max_var_contribution=0.92),
filters.FilterDetachedSub(radius=12, max_components=2),
]
dimers = [dimer]
for sub_filter in sub_filters:
dimers = (sub_filter(dimer) for dimer in dimers)
for base_filter in base_filters:
dimers = (dimer for dimer in dimers if base_filter(dimer))
dimers
2024-11-15 12:09:43,643 | pinder.core.utils.cloud:375 | INFO : Gsutil process_many=download_to_filename, threads=4, items=7
<generator object <genexpr> at 0x7ff54f6061f0>
list(dimers)[0]
PinderSystem(
entry = IndexEntry(
(
'split',
'invalid',
),
(
'id',
'1df0__A1_Q07009--1df0__B1_Q64537',
),
(
'pdb_id',
'1df0',
),
(
'cluster_id',
'cluster_1030_1030',
),
(
'cluster_id_R',
'cluster_1030',
),
(
'cluster_id_L',
'cluster_1030',
),
(
'pinder_s',
False,
),
(
'pinder_xl',
False,
),
(
'pinder_af2',
False,
),
(
'uniprot_R',
'Q07009',
),
(
'uniprot_L',
'Q64537',
),
(
'holo_R_pdb',
'1df0__A1_Q07009-R.pdb',
),
(
'holo_L_pdb',
'1df0__B1_Q64537-L.pdb',
),
(
'predicted_R_pdb',
'af__Q07009.pdb',
),
(
'predicted_L_pdb',
'af__Q64537.pdb',
),
(
'apo_R_pdb',
'',
),
(
'apo_L_pdb',
'',
),
(
'apo_R_pdbs',
'',
),
(
'apo_L_pdbs',
'',
),
(
'holo_R',
True,
),
(
'holo_L',
True,
),
(
'predicted_R',
True,
),
(
'predicted_L',
True,
),
(
'apo_R',
False,
),
(
'apo_L',
False,
),
(
'apo_R_quality',
'',
),
(
'apo_L_quality',
'',
),
(
'chain1_neff',
492.25,
),
(
'chain2_neff',
528.0,
),
(
'chain_R',
'A1',
),
(
'chain_L',
'B1',
),
(
'contains_antibody',
False,
),
(
'contains_antigen',
False,
),
(
'contains_enzyme',
True,
),
)
native=Structure(
filepath=/home/runner/.local/share/pinder/2024-02/pdbs/1df0__A1_Q07009--1df0__B1_Q64537.pdb,
uniprot_map=None,
pinder_id='1df0__A1_Q07009--1df0__B1_Q64537',
atom_array=<class 'biotite.structure.AtomArray'> with shape (6391,),
pdb_engine='fastpdb',
)
holo_receptor=Structure(
filepath=/home/runner/.local/share/pinder/2024-02/pdbs/1df0__A1_Q07009-R.pdb,
uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/1df0__A1_Q07009-R.parquet,
pinder_id='1df0__A1_Q07009-R',
atom_array=<class 'biotite.structure.AtomArray'> with shape (4964,),
pdb_engine='fastpdb',
)
holo_ligand=Structure(
filepath=/home/runner/.local/share/pinder/2024-02/pdbs/1df0__B1_Q64537-L.pdb,
uniprot_map=/home/runner/.local/share/pinder/2024-02/mappings/1df0__B1_Q64537-L.parquet,
pinder_id='1df0__B1_Q64537-L',
atom_array=<class 'biotite.structure.AtomArray'> with shape (1427,),
pdb_engine='fastpdb',
)
apo_receptor=None
apo_ligand=None
pred_receptor=Structure(
filepath=/home/runner/.local/share/pinder/2024-02/pdbs/af__Q07009.pdb,
uniprot_map=None,
pinder_id='af__Q07009',
atom_array=<class 'biotite.structure.AtomArray'> with shape (5631,),
pdb_engine='fastpdb',
)
pred_ligand=Structure(
filepath=/home/runner/.local/share/pinder/2024-02/pdbs/af__Q64537.pdb,
uniprot_map=None,
pinder_id='af__Q64537',
atom_array=<class 'biotite.structure.AtomArray'> with shape (2006,),
pdb_engine='fastpdb',
)
)