Source code for pinder.core.structure.surgery

from __future__ import annotations

import biotite.structure as struc
import numpy as np
from numpy.typing import NDArray
from biotite.structure.atoms import AtomArray, AtomArrayStack
from pinder.core.utils import setup_logger
from pinder.core.structure.atoms import apply_mask, get_seq_aligned_structures
from pinder.core.structure.models import ChainConfig

log = setup_logger(__name__)



[docs]
def remove_annotations(
    structure: AtomArray | AtomArrayStack,
    categories: list[str] = ["element", "ins_code"],
) -> AtomArray | AtomArrayStack:
    if isinstance(structure, AtomArrayStack):
        shape_idx = 1
    else:
        shape_idx = 0

    for annotation in categories:
        val: float | str = 0.0 if annotation == "b_factor" else ""
        annotation_arr: NDArray[np.double | np.str_] = np.repeat(
            val, structure.shape[shape_idx]
        )
        structure.set_annotation(annotation, annotation_arr)
    return structure




[docs]
def fix_annotation_mismatch(
    ref: AtomArray,
    decoys: AtomArrayStack,
    categories: list[str] = ["element", "ins_code", "b_factor"],
) -> tuple[AtomArray, AtomArrayStack]:
    for annot in ref.get_annotation_categories():
        ref_annot = ref.get_annotation(annot)
        decoy_annot = decoys.get_annotation(annot)
        if not np.array_equal(ref_annot, decoy_annot):
            log.debug(f"Decoy and ref have differing {annot} categories!")
            if annot not in categories:
                continue

            decoys = remove_annotations(decoys, categories=[annot])
            ref = remove_annotations(ref, categories=[annot])
    return ref, decoys




[docs]
def fix_mismatched_atoms(
    native: AtomArray, decoy_stack: AtomArrayStack, max_atom_delta: int
) -> tuple[AtomArray, AtomArrayStack]:
    identical = np.array_equal(decoy_stack.res_id, native.res_id)
    if identical:
        # Both shape and res_id elements are identical
        return native, decoy_stack

    log.debug("Detected mismatch between native and decoy stack!")
    # Sequence based structural alignment
    # Make the decoy_stack match numbering of native
    log.debug("Attempting sequence-based structural alignment")
    native, decoy_stack = get_seq_aligned_structures(native, decoy_stack)
    native_intersect = native[struc.filter_intersection(native, decoy_stack)]
    in_common = native_intersect.shape[0]
    native_atoms = native.shape[0]
    mismatch = native_atoms - in_common
    if mismatch > max_atom_delta:
        log.debug(
            f"Large atom mismatch detected between native and models: {mismatch} atoms."
        )
        log.debug("Attempting to fix mismatch")

        # In test case there are missing element annotations
        native = remove_annotations(native)
        decoy_stack = remove_annotations(decoy_stack)
        native_intersect = native[struc.filter_intersection(native, decoy_stack)]
        in_common = native_intersect.shape[0]
        log.warning(
            "Caution: results will only represent the atoms in common! "
            f"keeping {in_common} / {native_atoms} atoms in common"
        )

    native = native_intersect.copy()
    decoy_stack = decoy_stack[..., struc.filter_intersection(decoy_stack, native)]
    return native, decoy_stack




[docs]
def set_canonical_chain_order(
    structure: AtomArray | AtomArrayStack | list[AtomArray],
    chains: ChainConfig,
    subject: str,
) -> AtomArray | AtomArrayStack | list[AtomArray]:
    # Create set of residues in interface split into receptor and ligand
    # Conflict between residue numbers in different chains is handled by
    # logical mask on array.chain_id and array.res_id
    lig_chains = getattr(chains, f"{subject}_ligand")
    rec_chains = getattr(chains, f"{subject}_receptor")

    if isinstance(structure, list):
        for i, arr in enumerate(structure):
            R_mask = np.isin(arr.chain_id, rec_chains)
            L_mask = np.isin(arr.chain_id, lig_chains)
            R = arr[R_mask].copy()
            L = arr[L_mask].copy()
            structure[i] = R + L
        return structure
    else:
        R_mask = np.isin(structure.chain_id, rec_chains)
        L_mask = np.isin(structure.chain_id, lig_chains)
        R = apply_mask(structure, R_mask)
        L = apply_mask(structure, L_mask)
        return R + L




[docs]
def remove_duplicate_calpha(atoms: AtomArray) -> AtomArray:
    unique_mask = []
    unique = set()
    for at in atoms:
        at_id = f"{at.chain_id}-{at.res_id}"
        mask = not (at_id in unique)
        if mask:
            unique.add(at_id)
        else:
            log.warning(f"{at_id} is duplicated!")
        unique_mask.append(mask)
    atoms = atoms[np.array(unique_mask)].copy()
    return atoms