{ "cells": [ { "cell_type": "markdown", "id": "4293a2f3-c374-46a3-9c6b-e953a5982130", "metadata": {}, "source": [ "# Pinder system" ] }, { "cell_type": "code", "execution_count": 1, "id": "656e3d46-72b2-42fd-b61c-f13d1a113bce", "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "\n", "from pinder.core import PinderSystem, get_index\n" ] }, { "cell_type": "markdown", "id": "00427ee6-105a-4901-b647-52d667cf9eac", "metadata": {}, "source": [ "Example usage of Pinder index API shown below. For more detailed usage examples, check the `pinder-index`notebook. " ] }, { "cell_type": "code", "execution_count": 2, "id": "71223020-07c6-4a55-ba37-2343e7b4c400", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
splitidpdb_idcluster_idcluster_id_Rcluster_id_Lpinder_spinder_xlpinder_af2uniprot_R...apo_Lapo_R_qualityapo_L_qualitychain1_neffchain2_neffchain_Rchain_Lcontains_antibodycontains_antigencontains_enzyme
0test3k1i__D1_O25709--3k1i__A1_O254483k1icluster_26031_5179cluster_26031cluster_5179TrueTrueFalseO25709...Truehighhigh12.351562514.000000D1A1FalseFalseFalse
1test6qta__A1_G0SHE6--6qta__B1_G0SC296qtacluster_11327_11328cluster_11327cluster_11328FalseTrueFalseG0SHE6...Truehighhigh115.750000689.500000A1B1FalseFalseFalse
2test3vf0__B1_Q8IY67--3vf0__A2_P182063vf0cluster_5612_993cluster_993cluster_5612TrueTrueFalseQ8IY67...Truehighhigh251.12500035.531250B1A2FalseFalseFalse
3test4aye__D1_Q9JXV4--4aye__A1_P086034ayecluster_3949_4866cluster_3949cluster_4866TrueTrueFalseQ9JXV4...Truehighhigh14.546875310.000000D1A1FalseFalseFalse
4test2w8b__A1_P0A855--2w8b__H1_P0A9122w8bcluster_15535_1924cluster_15535cluster_1924TrueTrueFalseP0A855...Truehighhigh308.2500001150.000000A1H1FalseFalseFalse
5test5y4r__A1_O87131--5y4r__B1_Q9HVI15y4rcluster_8825_8826cluster_8825cluster_8826TrueTrueFalseO87131...Truehighhigh610.000000144.875000A1B1FalseFalseTrue
6test3egv__A1_Q84BQ9--3egv__B1_Q5SLP63egvcluster_33015_371cluster_33015cluster_371TrueTrueFalseQ84BQ9...Truehighhigh806.500000637.000000A1B1FalseFalseTrue
7test6wjc__A1_P11229--6wjc__B1_Q8QGR06wjccluster_1057_1356cluster_1057cluster_1356TrueTrueFalseP11229...Truehighhigh462.750000518.500000A1B1FalseFalseTrue
8test6tx3__B1_Q9NWY4--6tx3__A1_Q9UGN56tx3cluster_11866_335cluster_11866cluster_335TrueTrueFalseQ9NWY4...Truehighhigh59.468750213.000000B1A1FalseFalseTrue
9test2grx__A1_P06971--2grx__C1_P029292grxcluster_12107_8897cluster_12107cluster_8897TrueTrueFalseP06971...Truehighhigh288.000000562.500000A1C1FalseFalseFalse
10test3kbu__A1_P11277--3kbu__D1_P161573kbucluster_16732_8658cluster_8658cluster_16732TrueTrueFalseP11277...Truehighhigh229.625000210.625000A1D1FalseFalseFalse
11test2j0t__A1_P03956--2j0t__D1_P010332j0tcluster_939_940cluster_939cluster_940TrueTrueFalseP03956...Truehighhigh612.50000068.312500A1D1FalseFalseTrue
12test8a60__A1_P06971--8a60__B1_Q381628a60cluster_12107_26846cluster_12107cluster_26846FalseTrueTrueP06971...Truehighhigh288.0000002.734375A1B1FalseFalseFalse
13test4je4__A1_Q06124--4je4__B1_P027514je4cluster_1465_1605cluster_1465cluster_1605TrueTrueFalseQ06124...Truehighhigh611.00000075.062500A1B1FalseFalseFalse
14test4uae__A1_O00629--4uae__B1_P313454uaecluster_10331_1373cluster_1373cluster_10331FalseTrueFalseO00629...Truehighhigh240.7500003.058594A1B1FalseFalseFalse
15test3k9m__A1_P07858--3k9m__B1_P010403k9mcluster_4628_6704cluster_6704cluster_4628TrueTrueFalseP07858...Truehighhigh770.000000429.750000A1B1FalseFalseTrue
16test2wo2__A1_P54764--2wo2__B1_P527992wo2cluster_130_8064cluster_130cluster_8064TrueTrueFalseP54764...Truehighhigh567.500000104.562500A1B1FalseFalseTrue
17test5dob__A1_P16794--5dob__B1_P167915dobcluster_12247_23692cluster_12247cluster_23692FalseTrueFalseP16794...Truehighhigh3.3125003.855469A1B1FalseFalseFalse
18test8i2e__A1_O34841--8i2e__B1_P544218i2ecluster_11087_12465cluster_12465cluster_11087TrueTrueTrueO34841...Truehighhigh9.031250865.000000A1B1FalseFalseTrue
19test1zlh__A1_P00730--1zlh__B1_Q5EPH21zlhcluster_2416_8594cluster_2416cluster_8594TrueTrueFalseP00730...Truehighhigh541.000000637.000000A1B1FalseFalseTrue
20test6yev__C1_P0A744--6yev__A1_P0AA256yevcluster_4231_621cluster_4231cluster_621TrueTrueFalseP0A744...Truehighhigh1099.0000001440.000000C1A1FalseFalseTrue
21test1dtd__A1_P48052--1dtd__B1_P815111dtdcluster_2416_9476cluster_2416cluster_9476TrueTrueFalseP48052...Truehighhigh581.0000008.781250A1B1FalseFalseTrue
22test7fn1__B1_P32357--7fn1__A1_P333347fn1cluster_635_7263cluster_7263cluster_635TrueTrueFalseP32357...Truehighhigh8.07031269.062500B1A1FalseFalseFalse
23test1yu6__A1_P00780--1yu6__C1_P683901yu6cluster_1952_5091cluster_1952cluster_5091TrueTrueFalseP00780...Truehighhigh1000.000000704.000000A1C1FalseFalseTrue
24test6s8v__B1_P08195--6s8v__A1_P801886s8vcluster_19347_5331cluster_19347cluster_5331TrueTrueFalseP08195...Truehighhigh511.00000034.812500B1A1FalseFalseFalse
25test5ja2__A1_P11454--5ja2__B1_Q9I1695ja2cluster_4489_7234cluster_7234cluster_4489TrueTrueFalseP11454...Truehighhigh641.500000545.500000A1B1FalseFalseTrue
26test6f3z__A1_P0ADC3--6f3z__B1_P613166f3zcluster_12985_22189cluster_22189cluster_12985TrueTrueFalseP0ADC3...Truehighhigh402.500000368.000000A1B1FalseFalseFalse
27test8gt0__A1_Q8I6U4--8gt0__B1_P010408gt0cluster_2085_4628cluster_2085cluster_4628TrueTrueFalseQ8I6U4...Truehighhigh651.000000429.750000A1B1FalseFalseTrue
28test2gkv__A1_P00777--2gkv__B1_P683902gkvcluster_5091_5268cluster_5268cluster_5091TrueTrueFalseP00777...Truehighhigh409.000000704.000000A1B1FalseFalseTrue
29test6m4v__A1_P0AEX9--6m4v__B1_P629426m4vcluster_1772_409cluster_409cluster_1772TrueTrueFalseP0AEX9...Truehighhigh171.2500001408.000000A1B1FalseFalseTrue
30test4us1__B1_Q07889--4us1__A1_P011124us1cluster_10806_437cluster_10806cluster_437TrueTrueFalseQ07889...Truehighhigh183.375000946.500000B1A1FalseFalseFalse
31test1tec__A1_P04072--1tec__B1_P010511teccluster_1952_387cluster_1952cluster_387TrueTrueFalseP04072...Truehighhigh1017.500000133.750000A1B1FalseFalseTrue
32test1zhh__A1_P54300--1zhh__B1_P543021zhhcluster_3962_8641cluster_3962cluster_8641TrueTrueFalseP54300...Truehighhigh23.531250524.500000A1B1FalseFalseFalse
33test5n47__B1_P02751--5n47__A1_P801885n47cluster_1605_5331cluster_1605cluster_5331TrueTrueFalseP02751...Truehighhigh75.06250034.812500B1A1FalseFalseFalse
34test7b80__A1_G3I8R9--7b80__B1_Q9BVA67b80cluster_1039_643cluster_643cluster_1039TrueTrueFalseG3I8R9...Truehighhigh1069.000000526.500000A1B1FalseFalseTrue
\n", "

35 rows × 34 columns

\n", "
" ], "text/plain": [ " split id pdb_id cluster_id \\\n", "0 test 3k1i__D1_O25709--3k1i__A1_O25448 3k1i cluster_26031_5179 \n", "1 test 6qta__A1_G0SHE6--6qta__B1_G0SC29 6qta cluster_11327_11328 \n", "2 test 3vf0__B1_Q8IY67--3vf0__A2_P18206 3vf0 cluster_5612_993 \n", "3 test 4aye__D1_Q9JXV4--4aye__A1_P08603 4aye cluster_3949_4866 \n", "4 test 2w8b__A1_P0A855--2w8b__H1_P0A912 2w8b cluster_15535_1924 \n", "5 test 5y4r__A1_O87131--5y4r__B1_Q9HVI1 5y4r cluster_8825_8826 \n", "6 test 3egv__A1_Q84BQ9--3egv__B1_Q5SLP6 3egv cluster_33015_371 \n", "7 test 6wjc__A1_P11229--6wjc__B1_Q8QGR0 6wjc cluster_1057_1356 \n", "8 test 6tx3__B1_Q9NWY4--6tx3__A1_Q9UGN5 6tx3 cluster_11866_335 \n", "9 test 2grx__A1_P06971--2grx__C1_P02929 2grx cluster_12107_8897 \n", "10 test 3kbu__A1_P11277--3kbu__D1_P16157 3kbu cluster_16732_8658 \n", "11 test 2j0t__A1_P03956--2j0t__D1_P01033 2j0t cluster_939_940 \n", "12 test 8a60__A1_P06971--8a60__B1_Q38162 8a60 cluster_12107_26846 \n", "13 test 4je4__A1_Q06124--4je4__B1_P02751 4je4 cluster_1465_1605 \n", "14 test 4uae__A1_O00629--4uae__B1_P31345 4uae cluster_10331_1373 \n", "15 test 3k9m__A1_P07858--3k9m__B1_P01040 3k9m cluster_4628_6704 \n", "16 test 2wo2__A1_P54764--2wo2__B1_P52799 2wo2 cluster_130_8064 \n", "17 test 5dob__A1_P16794--5dob__B1_P16791 5dob cluster_12247_23692 \n", "18 test 8i2e__A1_O34841--8i2e__B1_P54421 8i2e cluster_11087_12465 \n", "19 test 1zlh__A1_P00730--1zlh__B1_Q5EPH2 1zlh cluster_2416_8594 \n", "20 test 6yev__C1_P0A744--6yev__A1_P0AA25 6yev cluster_4231_621 \n", "21 test 1dtd__A1_P48052--1dtd__B1_P81511 1dtd cluster_2416_9476 \n", "22 test 7fn1__B1_P32357--7fn1__A1_P33334 7fn1 cluster_635_7263 \n", "23 test 1yu6__A1_P00780--1yu6__C1_P68390 1yu6 cluster_1952_5091 \n", "24 test 6s8v__B1_P08195--6s8v__A1_P80188 6s8v cluster_19347_5331 \n", "25 test 5ja2__A1_P11454--5ja2__B1_Q9I169 5ja2 cluster_4489_7234 \n", "26 test 6f3z__A1_P0ADC3--6f3z__B1_P61316 6f3z cluster_12985_22189 \n", "27 test 8gt0__A1_Q8I6U4--8gt0__B1_P01040 8gt0 cluster_2085_4628 \n", "28 test 2gkv__A1_P00777--2gkv__B1_P68390 2gkv cluster_5091_5268 \n", "29 test 6m4v__A1_P0AEX9--6m4v__B1_P62942 6m4v cluster_1772_409 \n", "30 test 4us1__B1_Q07889--4us1__A1_P01112 4us1 cluster_10806_437 \n", "31 test 1tec__A1_P04072--1tec__B1_P01051 1tec cluster_1952_387 \n", "32 test 1zhh__A1_P54300--1zhh__B1_P54302 1zhh cluster_3962_8641 \n", "33 test 5n47__B1_P02751--5n47__A1_P80188 5n47 cluster_1605_5331 \n", "34 test 7b80__A1_G3I8R9--7b80__B1_Q9BVA6 7b80 cluster_1039_643 \n", "\n", " cluster_id_R cluster_id_L pinder_s pinder_xl pinder_af2 uniprot_R \\\n", "0 cluster_26031 cluster_5179 True True False O25709 \n", "1 cluster_11327 cluster_11328 False True False G0SHE6 \n", "2 cluster_993 cluster_5612 True True False Q8IY67 \n", "3 cluster_3949 cluster_4866 True True False Q9JXV4 \n", "4 cluster_15535 cluster_1924 True True False P0A855 \n", "5 cluster_8825 cluster_8826 True True False O87131 \n", "6 cluster_33015 cluster_371 True True False Q84BQ9 \n", "7 cluster_1057 cluster_1356 True True False P11229 \n", "8 cluster_11866 cluster_335 True True False Q9NWY4 \n", "9 cluster_12107 cluster_8897 True True False P06971 \n", "10 cluster_8658 cluster_16732 True True False P11277 \n", "11 cluster_939 cluster_940 True True False P03956 \n", "12 cluster_12107 cluster_26846 False True True P06971 \n", "13 cluster_1465 cluster_1605 True True False Q06124 \n", "14 cluster_1373 cluster_10331 False True False O00629 \n", "15 cluster_6704 cluster_4628 True True False P07858 \n", "16 cluster_130 cluster_8064 True True False P54764 \n", "17 cluster_12247 cluster_23692 False True False P16794 \n", "18 cluster_12465 cluster_11087 True True True O34841 \n", "19 cluster_2416 cluster_8594 True True False P00730 \n", "20 cluster_4231 cluster_621 True True False P0A744 \n", "21 cluster_2416 cluster_9476 True True False P48052 \n", "22 cluster_7263 cluster_635 True True False P32357 \n", "23 cluster_1952 cluster_5091 True True False P00780 \n", "24 cluster_19347 cluster_5331 True True False P08195 \n", "25 cluster_7234 cluster_4489 True True False P11454 \n", "26 cluster_22189 cluster_12985 True True False P0ADC3 \n", "27 cluster_2085 cluster_4628 True True False Q8I6U4 \n", "28 cluster_5268 cluster_5091 True True False P00777 \n", "29 cluster_409 cluster_1772 True True False P0AEX9 \n", "30 cluster_10806 cluster_437 True True False Q07889 \n", "31 cluster_1952 cluster_387 True True False P04072 \n", "32 cluster_3962 cluster_8641 True True False P54300 \n", "33 cluster_1605 cluster_5331 True True False P02751 \n", "34 cluster_643 cluster_1039 True True False G3I8R9 \n", "\n", " ... apo_L apo_R_quality apo_L_quality chain1_neff chain2_neff chain_R \\\n", "0 ... True high high 12.351562 514.000000 D1 \n", "1 ... True high high 115.750000 689.500000 A1 \n", "2 ... True high high 251.125000 35.531250 B1 \n", "3 ... True high high 14.546875 310.000000 D1 \n", "4 ... True high high 308.250000 1150.000000 A1 \n", "5 ... True high high 610.000000 144.875000 A1 \n", "6 ... True high high 806.500000 637.000000 A1 \n", "7 ... True high high 462.750000 518.500000 A1 \n", "8 ... True high high 59.468750 213.000000 B1 \n", "9 ... True high high 288.000000 562.500000 A1 \n", "10 ... True high high 229.625000 210.625000 A1 \n", "11 ... True high high 612.500000 68.312500 A1 \n", "12 ... True high high 288.000000 2.734375 A1 \n", "13 ... True high high 611.000000 75.062500 A1 \n", "14 ... True high high 240.750000 3.058594 A1 \n", "15 ... True high high 770.000000 429.750000 A1 \n", "16 ... True high high 567.500000 104.562500 A1 \n", "17 ... True high high 3.312500 3.855469 A1 \n", "18 ... True high high 9.031250 865.000000 A1 \n", "19 ... True high high 541.000000 637.000000 A1 \n", "20 ... True high high 1099.000000 1440.000000 C1 \n", "21 ... True high high 581.000000 8.781250 A1 \n", "22 ... True high high 8.070312 69.062500 B1 \n", "23 ... True high high 1000.000000 704.000000 A1 \n", "24 ... True high high 511.000000 34.812500 B1 \n", "25 ... True high high 641.500000 545.500000 A1 \n", "26 ... True high high 402.500000 368.000000 A1 \n", "27 ... True high high 651.000000 429.750000 A1 \n", "28 ... True high high 409.000000 704.000000 A1 \n", "29 ... True high high 171.250000 1408.000000 A1 \n", "30 ... True high high 183.375000 946.500000 B1 \n", "31 ... True high high 1017.500000 133.750000 A1 \n", "32 ... True high high 23.531250 524.500000 A1 \n", "33 ... True high high 75.062500 34.812500 B1 \n", "34 ... True high high 1069.000000 526.500000 A1 \n", "\n", " chain_L contains_antibody contains_antigen contains_enzyme \n", "0 A1 False False False \n", "1 B1 False False False \n", "2 A2 False False False \n", "3 A1 False False False \n", "4 H1 False False False \n", "5 B1 False False True \n", "6 B1 False False True \n", "7 B1 False False True \n", "8 A1 False False True \n", "9 C1 False False False \n", "10 D1 False False False \n", "11 D1 False False True \n", "12 B1 False False False \n", "13 B1 False False False \n", "14 B1 False False False \n", "15 B1 False False True \n", "16 B1 False False True \n", "17 B1 False False False \n", "18 B1 False False True \n", "19 B1 False False True \n", "20 A1 False False True \n", "21 B1 False False True \n", "22 A1 False False False \n", "23 C1 False False True \n", "24 A1 False False False \n", "25 B1 False False True \n", "26 B1 False False False \n", "27 B1 False False True \n", "28 B1 False False True \n", "29 B1 False False True \n", "30 A1 False False False \n", "31 B1 False False True \n", "32 B1 False False False \n", "33 A1 False False False \n", "34 B1 False False True \n", "\n", "[35 rows x 34 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "index = get_index()\n", "hetero_test_apo = index.query(\n", " '(uniprot_L != uniprot_R) and split == \"test\" and (apo_R and apo_L)'\n", ")\n", "hetero_test_apo.reset_index(drop=True, inplace=True)\n", "hetero_test_apo\n" ] }, { "cell_type": "code", "execution_count": 3, "id": "aa97b738-8a64-4e43-b1dc-17b226c470fb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'3vf0__B1_Q8IY67--3vf0__A2_P18206'" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pinder_id = list(hetero_test_apo.id)[2]\n", "pinder_id" ] }, { "cell_type": "markdown", "id": "1b68d224-b8f2-4037-ad74-439f5345e64e", "metadata": {}, "source": [ "## PinderSystem API - base class representing `Structure`'s in a pinder entry" ] }, { "cell_type": "code", "execution_count": 4, "id": "cf0386fa-aa33-49b5-b0a8-02c5c2b24e37", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PinderSystem(\n", "entry = IndexEntry(\n", " (\n", " 'split',\n", " 'test',\n", " ),\n", " (\n", " 'id',\n", " '3vf0__B1_Q8IY67--3vf0__A2_P18206',\n", " ),\n", " (\n", " 'pdb_id',\n", " '3vf0',\n", " ),\n", " (\n", " 'cluster_id',\n", " 'cluster_5612_993',\n", " ),\n", " (\n", " 'cluster_id_R',\n", " 'cluster_993',\n", " ),\n", " (\n", " 'cluster_id_L',\n", " 'cluster_5612',\n", " ),\n", " (\n", " 'pinder_s',\n", " True,\n", " ),\n", " (\n", " 'pinder_xl',\n", " True,\n", " ),\n", " (\n", " 'pinder_af2',\n", " False,\n", " ),\n", " (\n", " 'uniprot_R',\n", " 'Q8IY67',\n", " ),\n", " (\n", " 'uniprot_L',\n", " 'P18206',\n", " ),\n", " (\n", " 'holo_R_pdb',\n", " '3vf0__B1_Q8IY67-R.pdb',\n", " ),\n", " (\n", " 'holo_L_pdb',\n", " '3vf0__A2_P18206-L.pdb',\n", " ),\n", " (\n", " 'predicted_R_pdb',\n", " 'af__Q8IY67.pdb',\n", " ),\n", " (\n", " 'predicted_L_pdb',\n", " 'af__P18206.pdb',\n", " ),\n", " (\n", " 'apo_R_pdb',\n", " '3smz__A1_Q8IY67.pdb',\n", " ),\n", " (\n", " 'apo_L_pdb',\n", " '5l0h__A1_P18206.pdb',\n", " ),\n", " (\n", " 'apo_R_pdbs',\n", " '3smz__A1_Q8IY67.pdb',\n", " ),\n", " (\n", " 'apo_L_pdbs',\n", " '5l0h__A1_P18206.pdb;5l0f__A1_P18206.pdb;5l0i__A1_P18206.pdb',\n", " ),\n", " (\n", " 'holo_R',\n", " True,\n", " ),\n", " (\n", " 'holo_L',\n", " True,\n", " ),\n", " (\n", " 'predicted_R',\n", " True,\n", " ),\n", " (\n", " 'predicted_L',\n", " True,\n", " ),\n", " (\n", " 'apo_R',\n", " True,\n", " ),\n", " (\n", " 'apo_L',\n", " True,\n", " ),\n", " (\n", " 'apo_R_quality',\n", " 'high',\n", " ),\n", " (\n", " 'apo_L_quality',\n", " 'high',\n", " ),\n", " (\n", " 'chain1_neff',\n", " 251.125,\n", " ),\n", " (\n", " 'chain2_neff',\n", " 35.53125,\n", " ),\n", " (\n", " 'chain_R',\n", " 'B1',\n", " ),\n", " (\n", " 'chain_L',\n", " 'A2',\n", " ),\n", " (\n", " 'contains_antibody',\n", " False,\n", " ),\n", " (\n", " 'contains_antigen',\n", " False,\n", " ),\n", " (\n", " 'contains_enzyme',\n", " False,\n", " ),\n", ")\n", "native=Structure(\n", " filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/3vf0__B1_Q8IY67--3vf0__A2_P18206.pdb,\n", " uniprot_map=None,\n", " pinder_id='3vf0__B1_Q8IY67--3vf0__A2_P18206',\n", " atom_array= with shape (3584,),\n", ")\n", "holo_receptor=Structure(\n", " filepath=/Users/danielkovtun/.local/share/pinder/2024-02/test_set_pdbs/3vf0__B1_Q8IY67-R.pdb,\n", " uniprot_map=/Users/danielkovtun/.local/share/pinder/2024-02/mappings/3vf0__B1_Q8IY67-R.parquet,\n", " pinder_id='3vf0__B1_Q8IY67-R',\n", " atom_array= with shape (2204,),\n", ")\n", "holo_ligand=Structure(\n", " filepath=/Users/danielkovtun/.local/share/pinder/2024-02/test_set_pdbs/3vf0__A2_P18206-L.pdb,\n", " uniprot_map=/Users/danielkovtun/.local/share/pinder/2024-02/mappings/3vf0__A2_P18206-L.parquet,\n", " pinder_id='3vf0__A2_P18206-L',\n", " atom_array= with shape (1380,),\n", ")\n", "apo_receptor=Structure(\n", " filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/3smz__A1_Q8IY67.pdb,\n", " uniprot_map=/Users/danielkovtun/.local/share/pinder/2024-02/mappings/3smz__A1_Q8IY67.parquet,\n", " pinder_id='3smz__A1_Q8IY67',\n", " atom_array= with shape (2183,),\n", ")\n", "apo_ligand=Structure(\n", " filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/5l0h__A1_P18206.pdb,\n", " uniprot_map=/Users/danielkovtun/.local/share/pinder/2024-02/mappings/5l0h__A1_P18206.parquet,\n", " pinder_id='5l0h__A1_P18206',\n", " atom_array= with shape (1341,),\n", ")\n", "pred_receptor=Structure(\n", " filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/af__Q8IY67.pdb,\n", " uniprot_map=None,\n", " pinder_id='af__Q8IY67',\n", " atom_array= with shape (4495,),\n", ")\n", "pred_ligand=Structure(\n", " filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/af__P18206.pdb,\n", " uniprot_map=None,\n", " pinder_id='af__P18206',\n", " atom_array= with shape (8664,),\n", ")\n", ")" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Simplest interface - get a single pinder system\n", "ps = PinderSystem(pinder_id)\n", "ps\n" ] }, { "cell_type": "code", "execution_count": 5, "id": "8d0ce8c7-8679-49cb-a47b-0c07a787d9b7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Structure(\n", " filepath=/Users/danielkovtun/.local/share/pinder/2024-02/test_set_pdbs/3vf0__A2_P18206-L.pdb,\n", " uniprot_map=/Users/danielkovtun/.local/share/pinder/2024-02/mappings/3vf0__A2_P18206-L.parquet,\n", " pinder_id='3vf0__A2_P18206-L',\n", " atom_array= with shape (1380,),\n", ")" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "holo_L, holo_R = ps.holo_ligand, ps.holo_receptor\n", "pred_L, pred_R = ps.pred_ligand, ps.pred_receptor\n", "apo_L, apo_R = ps.apo_ligand, ps.apo_receptor\n", "\n", "holo_L\n" ] }, { "cell_type": "markdown", "id": "dcb458c6-2411-4530-8d1b-49432f6a9710", "metadata": {}, "source": [ "## Classify system difficulty based on degree of conformational shift in unbound and bound " ] }, { "cell_type": "code", "execution_count": 6, "id": "23e418c0-e9a9-4e6f-a368-f28fe552ca3e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Fnat': 0.5421686746987951,\n", " 'Fnonnat': 0.3076923076923077,\n", " 'common_contacts': 45,\n", " 'differing_contacts': 20,\n", " 'bound_contacts': 83,\n", " 'unbound_contacts': 65,\n", " 'fnonnat_R': 0.2857142857142857,\n", " 'fnonnat_L': 0.0,\n", " 'fnat_R': 0.5882352941176471,\n", " 'fnat_L': 0.7692307692307693,\n", " 'difficulty': 'Rigid-body',\n", " 'I-RMSD': 1.1255295,\n", " 'matched_interface_chains': 2,\n", " 'holo_receptor_interface_res': 34,\n", " 'holo_ligand_interface_res': 26,\n", " 'apo_receptor_interface_res': 28,\n", " 'apo_ligand_interface_res': 20,\n", " 'L-RMSD': 0.94191545,\n", " 'R-RMSD': 1.4988927,\n", " 'unbound_id': '3smz__A1_Q8IY67--5l0h__A1_P18206',\n", " 'unbound_body': 'receptor_ligand',\n", " 'monomer_name': 'apo'}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ps.unbound_difficulty(\"apo\")" ] }, { "cell_type": "code", "execution_count": 7, "id": "66e74ce5-7bd1-4d3c-9949-ac684f58709f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Fnat': 0.5662650602409639,\n", " 'Fnonnat': 0.9225700164744646,\n", " 'common_contacts': 47,\n", " 'differing_contacts': 560,\n", " 'bound_contacts': 83,\n", " 'unbound_contacts': 607,\n", " 'fnonnat_R': 0.8435374149659864,\n", " 'fnonnat_L': 0.8670520231213873,\n", " 'fnat_R': 0.6764705882352942,\n", " 'fnat_L': 0.8846153846153846,\n", " 'difficulty': 'Difficult',\n", " 'I-RMSD': 3.424884,\n", " 'matched_interface_chains': 2,\n", " 'holo_receptor_interface_res': 34,\n", " 'holo_ligand_interface_res': 26,\n", " 'apo_receptor_interface_res': 147,\n", " 'apo_ligand_interface_res': 173,\n", " 'L-RMSD': 2.1201644,\n", " 'R-RMSD': 0.6285352,\n", " 'unbound_id': 'af__Q8IY67--af__P18206',\n", " 'unbound_body': 'receptor_ligand',\n", " 'monomer_name': 'predicted'}" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ps.unbound_difficulty(\"predicted\")" ] }, { "cell_type": "markdown", "id": "10dd61ad-941b-4299-b264-32cd626a9c36", "metadata": {}, "source": [ "## Illustrating utilities available in `Structure` instances" ] }, { "cell_type": "code", "execution_count": 8, "id": "229cc534-be79-4a94-9019-ab441f402eb0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Structure(\n", " filepath=/Users/danielkovtun/.local/share/pinder/2024-02/test_set_pdbs/3vf0__A2_P18206-L.pdb,\n", " uniprot_map= with shape (283, 14),\n", " pinder_id='3vf0__A2_P18206-L',\n", " atom_array= with shape (178,),\n", ")" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "holo_L.filter(\"atom_name\", mask=[\"CA\"])\n" ] }, { "cell_type": "code", "execution_count": 9, "id": "89a84179-26bb-4a10-86df-febe88266968", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Structure(\n", " filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/5l0h__A1_P18206.pdb,\n", " uniprot_map= with shape (176, 14),\n", " pinder_id='5l0h__A1_P18206',\n", " atom_array= with shape (173,),\n", ")" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "apo_L.filter(\"atom_name\", mask=[\"CA\"])\n" ] }, { "cell_type": "markdown", "id": "2dc21c5c-acf5-4049-adc4-143515d7f04a", "metadata": {}, "source": [ "## Can also filter \"in place\" rather than returning a copy (a la pandas)" ] }, { "cell_type": "code", "execution_count": 10, "id": "268518de-a794-4290-8398-4ec4e2cc402b", "metadata": {}, "outputs": [], "source": [ "apo_L.filter(\"atom_name\", mask=[\"CA\"], copy=False)" ] }, { "cell_type": "code", "execution_count": 11, "id": "dcac0379-5c72-4312-9592-2deff39f5bfa", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(Structure(\n", " filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/5l0h__A1_P18206.pdb,\n", " uniprot_map= with shape (176, 14),\n", " pinder_id='5l0h__A1_P18206',\n", " atom_array= with shape (173,),\n", " ),\n", " Structure(\n", " filepath=/Users/danielkovtun/.local/share/pinder/2024-02/test_set_pdbs/3vf0__A2_P18206-L.pdb,\n", " uniprot_map= with shape (283, 14),\n", " pinder_id='3vf0__A2_P18206-L',\n", " atom_array= with shape (178,),\n", " ))" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(\n", " ps.apo_ligand.filter(\"atom_name\", mask=[\"CA\"]),\n", " ps.holo_ligand.filter(\"atom_name\", mask=[\"CA\"])\n", ")\n" ] }, { "cell_type": "markdown", "id": "d0c49149-8a88-44d8-9d67-f88a85e6e808", "metadata": {}, "source": [ "## Create masked unbound complex aligned to bound for apo " ] }, { "cell_type": "code", "execution_count": 12, "id": "fdda523e-0a75-4e56-9b01-9923a361a876", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Structure(\n", " filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/3smz__A1_Q8IY67--5l0h__A1_P18206.pdb,\n", " uniprot_map= with shape (460, 14),\n", " pinder_id='3smz__A1_Q8IY67--5l0h__A1_P18206',\n", " atom_array= with shape (2355,),\n", ")" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "apo_complex = ps.create_apo_complex()\n", "apo_complex\n" ] }, { "cell_type": "code", "execution_count": 13, "id": "149e2564-41a5-4150-b427-56be5a063d11", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
chain_idres_nameres_coderes_idatom_nameb_factorins_codeheteroelementxyz
0RHISH1N0.0FalseN69.377380105.086411-51.232784
1RHISH1CA0.0FalseC69.471100103.738152-50.680832
2RHISH1C0.0FalseC70.264641103.716263-49.368172
3RHISH1O0.0FalseO70.223633102.714134-48.653946
4RHISH1CB0.0FalseC70.093376102.773239-51.701916
.......................................
2350LARGR170CA0.0FalseC45.43841986.089317-54.533417
2351LLYSK171CA0.0FalseC44.94104883.492149-57.280712
2352LTHRT172CA0.0FalseC46.38722285.555809-60.138443
2353LPROP173CA0.0FalseC49.87340584.154137-59.526482
2354LTRPW174CA0.0FalseC50.15819580.451759-60.405190
\n", "

2355 rows × 12 columns

\n", "
" ], "text/plain": [ " chain_id res_name res_code res_id atom_name b_factor ins_code hetero \\\n", "0 R HIS H 1 N 0.0 False \n", "1 R HIS H 1 CA 0.0 False \n", "2 R HIS H 1 C 0.0 False \n", "3 R HIS H 1 O 0.0 False \n", "4 R HIS H 1 CB 0.0 False \n", "... ... ... ... ... ... ... ... ... \n", "2350 L ARG R 170 CA 0.0 False \n", "2351 L LYS K 171 CA 0.0 False \n", "2352 L THR T 172 CA 0.0 False \n", "2353 L PRO P 173 CA 0.0 False \n", "2354 L TRP W 174 CA 0.0 False \n", "\n", " element x y z \n", "0 N 69.377380 105.086411 -51.232784 \n", "1 C 69.471100 103.738152 -50.680832 \n", "2 C 70.264641 103.716263 -49.368172 \n", "3 O 70.223633 102.714134 -48.653946 \n", "4 C 70.093376 102.773239 -51.701916 \n", "... ... ... ... ... \n", "2350 C 45.438419 86.089317 -54.533417 \n", "2351 C 44.941048 83.492149 -57.280712 \n", "2352 C 46.387222 85.555809 -60.138443 \n", "2353 C 49.873405 84.154137 -59.526482 \n", "2354 C 50.158195 80.451759 -60.405190 \n", "\n", "[2355 rows x 12 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "apo_complex.dataframe" ] }, { "cell_type": "markdown", "id": "928a07a4-4275-404e-a602-909b2aa0594d", "metadata": {}, "source": [ "### What's going on under the hood " ] }, { "cell_type": "code", "execution_count": 14, "id": "d6ededf3-e1e1-4a07-92c8-a07b03adc8ca", "metadata": {}, "outputs": [], "source": [ "ps = PinderSystem(pinder_id)\n", "apo_L, apo_R = ps.apo_ligand, ps.apo_receptor\n", "\n", "# After getting the \"in common\" masked structures, they can be superimposed\n", "apo_R, holo_R = apo_R.align_common_sequence(ps.aligned_holo_R)\n", "apo_L, holo_L = apo_L.align_common_sequence(ps.aligned_holo_L)\n", "\n", "# Rmsd after superposition (without outlier removal) is stored in `rms`\n", "R_super, rms, _ = apo_R.superimpose(holo_R)\n", "L_super, rms, _ = apo_L.superimpose(holo_L)" ] }, { "cell_type": "code", "execution_count": 15, "id": "959e92f4-6f46-4d28-8fad-f47fac3027a2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Structure(\n", " filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/5l0h__A1_P18206.pdb,\n", " uniprot_map=/Users/danielkovtun/.local/share/pinder/2024-02/mappings/5l0h__A1_P18206.parquet,\n", " pinder_id='5l0h__A1_P18206',\n", " atom_array= with shape (1327,),\n", ")" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "L_super" ] }, { "cell_type": "code", "execution_count": 16, "id": "fa0b8939-e06f-41f0-b634-dc51e6ede82d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Structure(\n", " filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/3smz__A1_Q8IY67--5l0h__A1_P18206.pdb,\n", " uniprot_map= with shape (460, 14),\n", " pinder_id='3smz__A1_Q8IY67--5l0h__A1_P18206',\n", " atom_array= with shape (3510,),\n", ")" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Now we can create the complexes using Structure.__add__ methods\n", "apo_binary = R_super + L_super\n", "holo_binary = holo_R + holo_L\n", "\n", "apo_binary" ] }, { "cell_type": "code", "execution_count": 17, "id": "05f62afe-c744-4d63-9409-2697e932b260", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Structure(\n", " filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/af__Q8IY67--af__P18206.pdb,\n", " uniprot_map=None,\n", " pinder_id='af__Q8IY67--af__P18206',\n", " atom_array= with shape (3566,),\n", ")" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Alternatively, there exist utils for creating the masked apo and predicted complex\n", "pred_complex = ps.create_pred_complex()\n", "pred_complex\n" ] }, { "cell_type": "code", "execution_count": 18, "id": "5999c1ff-34d6-45ea-8f17-d8ed0e7481f3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PosixPath('/Users/danielkovtun/dev/pinder_dataset/examples/unbound_complexes')" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "masked_complex_dir = Path(\"./\").absolute() / \"unbound_complexes\"\n", "masked_complex_dir.mkdir(exist_ok=True, parents=True)\n", "\n", "masked_complex_dir\n", "\n" ] }, { "cell_type": "code", "execution_count": 19, "id": "01e7bfa3-ae28-484c-b61a-786d23b439d3", "metadata": {}, "outputs": [], "source": [ "# If the output PDB filepath is omitted, the structure will be written to Structure.filepath, which may overwrite\n", "# In this case, it would be a new file composed of the added complex filepaths if we omit\n", "pred_complex.to_pdb(masked_complex_dir / \"pred_complex.pdb\")\n", "apo_complex.to_pdb(masked_complex_dir / \"apo_complex.pdb\")\n" ] }, { "cell_type": "code", "execution_count": 20, "id": "f0ed813b-2697-49d1-a48c-1676eb50500e", "metadata": {}, "outputs": [], "source": [ "(masked_complex_dir / \"pred_complex.pdb\").unlink()\n", "(masked_complex_dir / \"apo_complex.pdb\").unlink()" ] }, { "cell_type": "markdown", "id": "cc62ffbc-1e1e-4dfe-97b4-037ead864b0b", "metadata": {}, "source": [ "## Structures have `resolved_pdb2uniprot` and `resolved_uniprot2pdb` properties\n", "\n", "They return dicts of resolved residue numbers mapped from pdb numbering to uniprot numbering, and vice versa\n", "\n", "\n", "The full mapping is available in `Structure.uniprot_mapping`\n", "To get only the _resolved_ mapping, access the `Structure.resolved_mapping` attrribute." ] }, { "cell_type": "code", "execution_count": 21, "id": "a8b29e88-f33e-4cd8-838a-15e8bd9e3511", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "{2: 960,\n", " 3: 961,\n", " 4: 962,\n", " 5: 963,\n", " 6: 964,\n", " 7: 965,\n", " 8: 966,\n", " 9: 967,\n", " 10: 968,\n", " 11: 969,\n", " 12: 970,\n", " 13: 971,\n", " 14: 972,\n", " 15: 973,\n", " 16: 974,\n", " 18: 976,\n", " 19: 977,\n", " 20: 978,\n", " 21: 979,\n", " 22: 980,\n", " 23: 981,\n", " 24: 982,\n", " 25: 983,\n", " 26: 984,\n", " 27: 985,\n", " 28: 986,\n", " 29: 987,\n", " 30: 988,\n", " 31: 989,\n", " 32: 990,\n", " 33: 991,\n", " 34: 992,\n", " 35: 993,\n", " 36: 994,\n", " 37: 995,\n", " 38: 996,\n", " 39: 997,\n", " 40: 998,\n", " 41: 999,\n", " 42: 1000,\n", " 43: 1001,\n", " 44: 1002,\n", " 45: 1003,\n", " 46: 1004,\n", " 47: 1005,\n", " 48: 1006,\n", " 49: 1007,\n", " 50: 1008,\n", " 51: 1009,\n", " 52: 1010,\n", " 53: 1011,\n", " 54: 1012,\n", " 55: 1013,\n", " 56: 1014,\n", " 57: 1015,\n", " 58: 1016,\n", " 59: 1017,\n", " 60: 1018,\n", " 61: 1019,\n", " 62: 1020,\n", " 63: 1021,\n", " 64: 1022,\n", " 65: 1023,\n", " 66: 1024,\n", " 67: 1025,\n", " 68: 1026,\n", " 69: 1027,\n", " 70: 1028,\n", " 71: 1029,\n", " 72: 1030,\n", " 73: 1031,\n", " 74: 1032,\n", " 75: 1033,\n", " 76: 1034,\n", " 77: 1035,\n", " 78: 1036,\n", " 79: 1037,\n", " 80: 1038,\n", " 81: 1039,\n", " 82: 1040,\n", " 83: 1041,\n", " 84: 1042,\n", " 85: 1043,\n", " 86: 1044,\n", " 87: 1045,\n", " 88: 1046,\n", " 89: 1047,\n", " 90: 1048,\n", " 91: 1049,\n", " 92: 1050,\n", " 93: 1051,\n", " 94: 1052,\n", " 95: 1053,\n", " 96: 1054,\n", " 97: 1055,\n", " 98: 1056,\n", " 99: 1057,\n", " 100: 1058,\n", " 101: 1059,\n", " 102: 1060,\n", " 103: 1061,\n", " 104: 1062,\n", " 105: 1063,\n", " 106: 1064,\n", " 107: 1065,\n", " 108: 1066,\n", " 109: 1067,\n", " 110: 1068,\n", " 111: 1069,\n", " 112: 1070,\n", " 113: 1071,\n", " 114: 1072,\n", " 115: 1073,\n", " 116: 1074,\n", " 117: 1075,\n", " 118: 1076,\n", " 119: 1077,\n", " 120: 1078,\n", " 121: 1079,\n", " 122: 1080,\n", " 123: 1081,\n", " 124: 1082,\n", " 125: 1083,\n", " 126: 1084,\n", " 127: 1085,\n", " 128: 1086,\n", " 129: 1087,\n", " 130: 1088,\n", " 131: 1089,\n", " 132: 1090,\n", " 133: 1091,\n", " 134: 1092,\n", " 135: 1093,\n", " 136: 1094,\n", " 137: 1095,\n", " 138: 1096,\n", " 139: 1097,\n", " 140: 1098,\n", " 141: 1099,\n", " 142: 1100,\n", " 143: 1101,\n", " 144: 1102,\n", " 145: 1103,\n", " 146: 1104,\n", " 147: 1105,\n", " 148: 1106,\n", " 149: 1107,\n", " 150: 1108,\n", " 151: 1109,\n", " 152: 1110,\n", " 153: 1111,\n", " 154: 1112,\n", " 155: 1113,\n", " 156: 1114,\n", " 157: 1115,\n", " 158: 1116,\n", " 159: 1117,\n", " 160: 1118,\n", " 161: 1119,\n", " 162: 1120,\n", " 163: 1121,\n", " 164: 1122,\n", " 165: 1123,\n", " 166: 1124,\n", " 167: 1125,\n", " 168: 1126,\n", " 169: 1127,\n", " 170: 1128,\n", " 171: 1129,\n", " 172: 1130,\n", " 173: 1131,\n", " 174: 1132}" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "apo_L.resolved_pdb2uniprot" ] }, { "cell_type": "code", "execution_count": 22, "id": "1d6cfd6c-7793-494b-806c-bd623972baf6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
entry_identity_idasym_idpdb_strand_idresiresi_pdbresi_authresnone_letter_code_canresolvedone_letter_code_uniprotresi_uniprotuniprot_accchain
0NaN1AA33939.0LEUL1L39NaNA1
1NaN1AA44040.0ASPD1D40NaNA1
2NaN1AA54141.0PROP1P41NaNA1
3NaN1AA64242.0GLUE1E42NaNA1
4NaN1AA74343.0GLUE1E43NaNA1
.............................................
446NaN1AA17011281128.0ARGR1R1128NaNA1
447NaN1AA17111291129.0LYSK1K1129NaNA1
448NaN1AA17211301130.0THRT1T1130NaNA1
449NaN1AA17311311131.0PROP1P1131NaNA1
450NaN1AA17411321132.0TRPW1W1132NaNA1
\n", "

451 rows × 14 columns

\n", "
" ], "text/plain": [ " entry_id entity_id asym_id pdb_strand_id resi resi_pdb resi_auth resn \\\n", "0 NaN 1 A A 3 39 39.0 LEU \n", "1 NaN 1 A A 4 40 40.0 ASP \n", "2 NaN 1 A A 5 41 41.0 PRO \n", "3 NaN 1 A A 6 42 42.0 GLU \n", "4 NaN 1 A A 7 43 43.0 GLU \n", ".. ... ... ... ... ... ... ... ... \n", "446 NaN 1 A A 170 1128 1128.0 ARG \n", "447 NaN 1 A A 171 1129 1129.0 LYS \n", "448 NaN 1 A A 172 1130 1130.0 THR \n", "449 NaN 1 A A 173 1131 1131.0 PRO \n", "450 NaN 1 A A 174 1132 1132.0 TRP \n", "\n", " one_letter_code_can resolved one_letter_code_uniprot resi_uniprot \\\n", "0 L 1 L 39 \n", "1 D 1 D 40 \n", "2 P 1 P 41 \n", "3 E 1 E 42 \n", "4 E 1 E 43 \n", ".. ... ... ... ... \n", "446 R 1 R 1128 \n", "447 K 1 K 1129 \n", "448 T 1 T 1130 \n", "449 P 1 P 1131 \n", "450 W 1 W 1132 \n", "\n", " uniprot_acc chain \n", "0 NaN A1 \n", "1 NaN A1 \n", "2 NaN A1 \n", "3 NaN A1 \n", "4 NaN A1 \n", ".. ... ... \n", "446 NaN A1 \n", "447 NaN A1 \n", "448 NaN A1 \n", "449 NaN A1 \n", "450 NaN A1 \n", "\n", "[451 rows x 14 columns]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "apo_complex.resolved_mapping" ] }, { "cell_type": "markdown", "id": "0edb008f-38c6-4f77-9e94-267f39cf82ac", "metadata": {}, "source": [ "## Case with multiple available apo structures" ] }, { "cell_type": "code", "execution_count": 23, "id": "d2301212-4876-4ea5-b893-1cfd2e0b085b", "metadata": {}, "outputs": [], "source": [ "pinder_id = \"1ldt__A1_P00761--1ldt__B1_P80424\"\n" ] }, { "cell_type": "code", "execution_count": 24, "id": "55b4653e-116e-46d0-816c-f86b725c67b3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['1s6f__A1_P00761.pdb',\n", " '1s85__A1_P00761.pdb',\n", " '1s84__A1_P00761.pdb',\n", " '2a32__A1_P00761.pdb',\n", " '1s6h__A1_P00761.pdb',\n", " '1s5s__A1_P00761.pdb',\n", " '1fni__A1_P00761.pdb',\n", " '1s81__A1_P00761.pdb',\n", " '1fmg__A1_P00761.pdb',\n", " '1qqu__A1_P00761.pdb',\n", " '2a31__A1_P00761.pdb',\n", " '1fn6__A1_P00761.pdb',\n", " '1s83__A1_P00761.pdb']" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ps_canon = PinderSystem(pinder_id)\n", "ps_canon.entry.apo_R_alt\n", "\n" ] }, { "cell_type": "code", "execution_count": 25, "id": "05069c0f-bfd3-4c76-8556-c5e828dd8b0a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['2kmq__A1_P80424.pdb', '2kmp__A1_P80424.pdb', '2kmr__A1_P80424.pdb']" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ps_canon.entry.apo_L_alt" ] }, { "cell_type": "markdown", "id": "c44ca222-1521-4d3e-9047-6c685bcfdbcb", "metadata": {}, "source": [ "### Specify 2kmr as apo ligand and 1fmg as apo receptor\n", "Note: the default `apo_receptor` and `apo_ligand` are determined based on the selected canonical apo monomer.\n", "\n", "The canonical monomers were selected based on their sequence overlap and difficulty metrics calculated in `pinder.eval.dockq.unbound`" ] }, { "cell_type": "code", "execution_count": 26, "id": "6be6b83c-01cc-4885-8ab8-490ff662d8e0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PinderSystem(\n", "entry = IndexEntry(\n", " (\n", " 'split',\n", " 'train',\n", " ),\n", " (\n", " 'id',\n", " '1ldt__A1_P00761--1ldt__B1_P80424',\n", " ),\n", " (\n", " 'pdb_id',\n", " '1ldt',\n", " ),\n", " (\n", " 'cluster_id',\n", " 'cluster_312_5091',\n", " ),\n", " (\n", " 'cluster_id_R',\n", " 'cluster_312',\n", " ),\n", " (\n", " 'cluster_id_L',\n", " 'cluster_5091',\n", " ),\n", " (\n", " 'pinder_s',\n", " False,\n", " ),\n", " (\n", " 'pinder_xl',\n", " False,\n", " ),\n", " (\n", " 'pinder_af2',\n", " False,\n", " ),\n", " (\n", " 'uniprot_R',\n", " 'P00761',\n", " ),\n", " (\n", " 'uniprot_L',\n", " 'P80424',\n", " ),\n", " (\n", " 'holo_R_pdb',\n", " '1ldt__A1_P00761-R.pdb',\n", " ),\n", " (\n", " 'holo_L_pdb',\n", " '1ldt__B1_P80424-L.pdb',\n", " ),\n", " (\n", " 'predicted_R_pdb',\n", " 'af__P00761.pdb',\n", " ),\n", " (\n", " 'predicted_L_pdb',\n", " 'af__P80424.pdb',\n", " ),\n", " (\n", " 'apo_R_pdb',\n", " '1s82__A1_P00761.pdb',\n", " ),\n", " (\n", " 'apo_L_pdb',\n", " '2kmo__A1_P80424.pdb',\n", " ),\n", " (\n", " 'apo_R_pdbs',\n", " '1s82__A1_P00761.pdb;1s6f__A1_P00761.pdb;1s85__A1_P00761.pdb;1s84__A1_P00761.pdb;2a32__A1_P00761.pdb;1s6h__A1_P00761.pdb;1s5s__A1_P00761.pdb;1fni__A1_P00761.pdb;1s81__A1_P00761.pdb;1fmg__A1_P00761.pdb;1qqu__A1_P00761.pdb;2a31__A1_P00761.pdb;1fn6__A1_P00761.pdb;1s83__A1_P00761.pdb',\n", " ),\n", " (\n", " 'apo_L_pdbs',\n", " '2kmo__A1_P80424.pdb;2kmq__A1_P80424.pdb;2kmp__A1_P80424.pdb;2kmr__A1_P80424.pdb',\n", " ),\n", " (\n", " 'holo_R',\n", " True,\n", " ),\n", " (\n", " 'holo_L',\n", " True,\n", " ),\n", " (\n", " 'predicted_R',\n", " True,\n", " ),\n", " (\n", " 'predicted_L',\n", " True,\n", " ),\n", " (\n", " 'apo_R',\n", " True,\n", " ),\n", " (\n", " 'apo_L',\n", " True,\n", " ),\n", " (\n", " 'apo_R_quality',\n", " 'high',\n", " ),\n", " (\n", " 'apo_L_quality',\n", " 'high',\n", " ),\n", " (\n", " 'chain1_neff',\n", " 997.0,\n", " ),\n", " (\n", " 'chain2_neff',\n", " 2220.0,\n", " ),\n", " (\n", " 'chain_R',\n", " 'A1',\n", " ),\n", " (\n", " 'chain_L',\n", " 'B1',\n", " ),\n", " (\n", " 'contains_antibody',\n", " False,\n", " ),\n", " (\n", " 'contains_antigen',\n", " False,\n", " ),\n", " (\n", " 'contains_enzyme',\n", " True,\n", " ),\n", ")\n", "native=Structure(\n", " filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/1ldt__A1_P00761--1ldt__B1_P80424.pdb,\n", " uniprot_map=None,\n", " pinder_id='1ldt__A1_P00761--1ldt__B1_P80424',\n", " atom_array= with shape (1992,),\n", ")\n", "holo_receptor=Structure(\n", " filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/1ldt__A1_P00761-R.pdb,\n", " uniprot_map=/Users/danielkovtun/.local/share/pinder/2024-02/mappings/1ldt__A1_P00761-R.parquet,\n", " pinder_id='1ldt__A1_P00761-R',\n", " atom_array= with shape (1666,),\n", ")\n", "holo_ligand=Structure(\n", " filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/1ldt__B1_P80424-L.pdb,\n", " uniprot_map=/Users/danielkovtun/.local/share/pinder/2024-02/mappings/1ldt__B1_P80424-L.parquet,\n", " pinder_id='1ldt__B1_P80424-L',\n", " atom_array= with shape (326,),\n", ")\n", "apo_receptor=Structure(\n", " filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/1fmg__A1_P00761.pdb,\n", " uniprot_map=/Users/danielkovtun/.local/share/pinder/2024-02/mappings/1fmg__A1_P00761.parquet,\n", " pinder_id='1fmg__A1_P00761',\n", " atom_array= with shape (1642,),\n", ")\n", "apo_ligand=Structure(\n", " filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/2kmr__A1_P80424.pdb,\n", " uniprot_map=/Users/danielkovtun/.local/share/pinder/2024-02/mappings/2kmr__A1_P80424.parquet,\n", " pinder_id='2kmr__A1_P80424',\n", " atom_array= with shape (630,),\n", ")\n", "pred_receptor=Structure(\n", " filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/af__P00761.pdb,\n", " uniprot_map=None,\n", " pinder_id='af__P00761',\n", " atom_array= with shape (1708,),\n", ")\n", "pred_ligand=Structure(\n", " filepath=/Users/danielkovtun/.local/share/pinder/2024-02/pdbs/af__P80424.pdb,\n", " uniprot_map=None,\n", " pinder_id='af__P80424',\n", " atom_array= with shape (326,),\n", ")\n", ")" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "ps = PinderSystem(pinder_id, apo_ligand_pdb_code=\"2kmr\", apo_receptor_pdb_code=\"1fmg\")\n", "ps" ] }, { "cell_type": "markdown", "id": "50e786a7-7677-4096-9193-5a174b8a5e55", "metadata": {}, "source": [ "## Classify system difficulty based on degree of conformational shift in unbound and bound " ] }, { "cell_type": "code", "execution_count": 27, "id": "83514691-cf42-4657-8c3f-d927c366f7a5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Fnat': 0.847457627118644,\n", " 'Fnonnat': 0.5,\n", " 'common_contacts': 50,\n", " 'differing_contacts': 50,\n", " 'bound_contacts': 59,\n", " 'unbound_contacts': 100,\n", " 'fnonnat_R': 0.4318181818181818,\n", " 'fnonnat_L': 0.35,\n", " 'fnat_R': 0.8620689655172413,\n", " 'fnat_L': 1.0,\n", " 'difficulty': 'Difficult',\n", " 'I-RMSD': 2.3008885,\n", " 'matched_interface_chains': 2,\n", " 'holo_receptor_interface_res': 29,\n", " 'holo_ligand_interface_res': 13,\n", " 'apo_receptor_interface_res': 44,\n", " 'apo_ligand_interface_res': 20,\n", " 'L-RMSD': 9.371291,\n", " 'R-RMSD': 0.47199318,\n", " 'unbound_id': '1fmg__A1_P00761--2kmr__A1_P80424',\n", " 'unbound_body': 'receptor_ligand',\n", " 'monomer_name': 'apo'}" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ps.unbound_difficulty(\"apo\")" ] }, { "cell_type": "code", "execution_count": 28, "id": "e99b4542-e837-4cf1-b744-492002a5defb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Fnat': 0.847457627118644,\n", " 'Fnonnat': 0.05660377358490566,\n", " 'common_contacts': 50,\n", " 'differing_contacts': 3,\n", " 'bound_contacts': 59,\n", " 'unbound_contacts': 53,\n", " 'fnonnat_R': 0.07692307692307693,\n", " 'fnonnat_L': 0.0,\n", " 'fnat_R': 0.8275862068965517,\n", " 'fnat_L': 0.9230769230769231,\n", " 'difficulty': 'Rigid-body',\n", " 'I-RMSD': 1.2526181,\n", " 'matched_interface_chains': 2,\n", " 'holo_receptor_interface_res': 29,\n", " 'holo_ligand_interface_res': 13,\n", " 'apo_receptor_interface_res': 26,\n", " 'apo_ligand_interface_res': 12,\n", " 'L-RMSD': 3.9255776,\n", " 'R-RMSD': 0.40761378,\n", " 'unbound_id': 'af__P00761--af__P80424',\n", " 'unbound_body': 'receptor_ligand',\n", " 'monomer_name': 'predicted'}" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ps.unbound_difficulty(\"predicted\")" ] }, { "cell_type": "code", "execution_count": null, "id": "dacfed74-f0be-4241-9f7a-6c90af78b55e", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "495ad596-6761-40c7-a3e4-57334d7a1279", "metadata": {}, "source": [ "## A bunch of other features of the `Structure` class are illustrated below" ] }, { "cell_type": "code", "execution_count": 29, "id": "b0b908bd-b924-4fe1-9d71-a3cd30986441", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([\n", "\tAtom(np.array([36.778, 16.379, 60.821], dtype=float32), chain_id=\"L\", res_id=2, ins_code=\"\", res_name=\"GLN\", hetero=False, atom_name=\"N\", element=\"N\"),\n", "\tAtom(np.array([37.219, 15.836, 59.542], dtype=float32), chain_id=\"L\", res_id=2, ins_code=\"\", res_name=\"GLN\", hetero=False, atom_name=\"CA\", element=\"C\"),\n", "\tAtom(np.array([36.18 , 16.079, 58.452], dtype=float32), chain_id=\"L\", res_id=2, ins_code=\"\", res_name=\"GLN\", hetero=False, atom_name=\"C\", element=\"C\"),\n", "\tAtom(np.array([36.532, 16.338, 57.297], dtype=float32), chain_id=\"L\", res_id=2, ins_code=\"\", res_name=\"GLN\", hetero=False, atom_name=\"O\", element=\"O\"),\n", "\tAtom(np.array([37.509, 14.339, 59.666], dtype=float32), chain_id=\"L\", res_id=2, ins_code=\"\", res_name=\"GLN\", hetero=False, atom_name=\"CB\", element=\"C\"),\n", "\tAtom(np.array([38.703, 14.01 , 60.548], dtype=float32), chain_id=\"L\", res_id=2, ins_code=\"\", res_name=\"GLN\", hetero=False, atom_name=\"CG\", element=\"C\"),\n", "\tAtom(np.array([38.916, 12.517, 60.706], dtype=float32), chain_id=\"L\", res_id=2, ins_code=\"\", res_name=\"GLN\", hetero=False, atom_name=\"CD\", element=\"C\"),\n", "\tAtom(np.array([37.981, 11.727, 60.572], dtype=float32), chain_id=\"L\", res_id=2, ins_code=\"\", res_name=\"GLN\", hetero=False, atom_name=\"OE1\", element=\"O\"),\n", "\tAtom(np.array([40.151, 12.123, 60.994], dtype=float32), chain_id=\"L\", res_id=2, ins_code=\"\", res_name=\"GLN\", hetero=False, atom_name=\"NE2\", element=\"N\"),\n", "\tAtom(np.array([34.852, 15.989, 58.853], dtype=float32), chain_id=\"L\", res_id=3, ins_code=\"\", res_name=\"PRO\", hetero=False, atom_name=\"N\", element=\"N\")\n", "])" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "apo_L.atom_array[0:10]" ] }, { "cell_type": "code", "execution_count": 30, "id": "e86a6e9e-f39b-457d-81b5-9457d0dc67cb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[36.778, 16.379, 60.821],\n", " [37.219, 15.836, 59.542],\n", " [36.18 , 16.079, 58.452],\n", " [36.532, 16.338, 57.297],\n", " [37.509, 14.339, 59.666],\n", " [38.703, 14.01 , 60.548],\n", " [38.916, 12.517, 60.706],\n", " [37.981, 11.727, 60.572],\n", " [40.151, 12.123, 60.994],\n", " [34.852, 15.989, 58.853]], dtype=float32)" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "apo_L.coords[0:10]" ] }, { "cell_type": "code", "execution_count": 31, "id": "aa468cf0-0057-4edf-9242-e5472a022139", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['ALA',\n", " 'ARG',\n", " 'ASN',\n", " 'ASP',\n", " 'CYS',\n", " 'GLN',\n", " 'GLU',\n", " 'GLY',\n", " 'HIS',\n", " 'ILE',\n", " 'LEU',\n", " 'LYS',\n", " 'MET',\n", " 'PHE',\n", " 'PRO',\n", " 'SER',\n", " 'THR',\n", " 'TRP',\n", " 'VAL']" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "apo_L.residue_names" ] }, { "cell_type": "code", "execution_count": 32, "id": "149f8376-6956-4c04-a209-6c49c03b5a4d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'QPVNQPILAAAQSLHEATKWSSKGNDIIAAAKRMALLMAEMSRLVRGGSGTKRALIQCAKDIAKASDEVTRLAKEVAKQCTDKRIRTNLLQVCERIPTISTQLKILSTVKATMLGRTNISDEESEQATEMLVHNAQNLMQSVKETVREAEAASIKIRTDAGFTLRWVRKTPW'" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "apo_L.sequence" ] }, { "cell_type": "code", "execution_count": 33, "id": "ddf7502e-9bd4-4e21-8d2f-8f8459cf3318", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['GLN', 'GLN', 'GLN', ..., 'TRP', 'TRP', 'TRP'], dtype='