Source code for modelarchive.modelcif.fix_af3

"""ModelCIF files generated by AlphaFold 3 deviate from the official ModelCIF
definition dictionary in specific cases. In particular, for homomeric
assemblies, each molecular entity copy is written as a separate entity in the
CIF document, instead of defining a single entity referenced multiple times.
This module provides functionality to correct the deviations.
"""

from . import access
from . import edit


def _is_null(value): # pragma: no cover
    """Borrowed from gemmi."""
    # ToDo: This may become a public function in the future.
    return len(value) == 1 and value[0] in ("?", ".")


def _char_table(c):
    """Borrowed from gemmi."""
    # fmt: off
    table = [
        0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        2, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0
    ] + [0] * 128
    # fmt: on
    return table[ord(c) % 256]


def _quote(v): # pragma: no cover
    """Borrowed from gemmi, prefer double quotes above single quotes for AF."""
    if all(_char_table(c) == 1 for c in v) and len(v) > 0 and not _is_null(v):
        return v
    q = ";"
    if "\n" not in v:
        if '"' not in v:
            q = '"'
        elif "'" not in v:
            q = "'"
    v = q + v
    if q == ";":
        v += "\n"
    v += q
    return v


[docs] def fix_model_name(block, mdl_rank): """Normalise ``_ma_model_list.model_name`` for given rank. AlphaFold 3 sets ``_ma_model_list.model_name`` to "Top ranked model" for all models, regardless of their rank. This function rewrites the value such that only ``mdl_rank == 1`` is labelled "Top ranked model". All other ranks are renamed to "#<``mdl_rank``> ranked model". Examples: >>> from gemmi import cif >>> from modelarchive.modelcif import fix_af3 >>> # get sample CIF data >>> cif_data = '''data_test ... _ma_model_list.data_id 1 ... _ma_model_list.model_name "Top ranked model" ... _ma_model_list.model_type "Ab initio model" ... _ma_model_list.ordinal_id 1 ... ''' >>> block = cif.read_string(cif_data).sole_block() >>> fix_af3.fix_model_name(block, 2) >>> print(block.as_string()) data_test _ma_model_list.data_id 1 _ma_model_list.model_name "#2 ranked model" _ma_model_list.model_type "Ab initio model" _ma_model_list.ordinal_id 1 <BLANKLINE> >>> fix_af3.fix_model_name(block, 1) >>> print(block.as_string()) data_test _ma_model_list.data_id 1 _ma_model_list.model_name "Top ranked model" _ma_model_list.model_type "Ab initio model" _ma_model_list.ordinal_id 1 <BLANKLINE> Args: block (|gemmicifBlock|): CIF block to operate on. mdl_rank (int): Rank of the AlphaFold 3 model. If ``mdl_rank == 1``, the name is set to "Top ranked model". Returns: None Raises: edit.NotFoundItemError: If ``_ma_model_list.model_name`` can not be found in ``block``. RuntimeError: If the ``_ma_model_list`` category contains more than one row. """ if mdl_rank == 1: mdl_name = "Top ranked model" else: mdl_name = f"#{mdl_rank} ranked model" table = access.get_table(block, "_ma_model_list", items=["model_name"]) if table is None: raise edit.NotFoundItemError( msg="File is missing _ma_model_list.model_name, single model " + "required" ) if len(table) != 1: raise RuntimeError("File must have a single model in _ma_model_list.") table[0]["model_name"] = _quote(mdl_name)
# LocalWords: CIF homomeric mdl gemmi cif modelarchive modelcif af BLANKLINE # LocalWords: Args gemmicifBlock NotFoundItemError RuntimeError