Source code for modelarchive.modelcif.edit

"""Functionality to extend and modify ModelCIF files."""

from gemmi import cif

from .. import _utils
from . import access


[docs] class NotFoundError(RuntimeError): """General exception for 'things' that can not be found. If ``msg`` is omitted, generates a message "<SUBJECT> '<VALUE>' does not exist". Args: subject (str): The 'thing' that can not be found, used in the generated message. value (str): The name of what can not be found, used in the generated message. msg (str): Optional alternative error message. """ def __item__(self, subject, value, msg): if msg is None: msg = f"{subject} '{value}' does not exist." super().__init__(msg)
[docs] class NotFoundCategoryError(NotFoundError): """Exception if a category can not be found. This exception should be raised when a function expects a specific category to exist in the corresponding |gemmicifBlock|, but the category cannot be retrieved. Args: category (str): Name of the category that could not be found. msg (str): Optional alternative error message. """ def __init__(self, category=None, msg=None): super().__init__("Category", category, msg)
[docs] class NotFoundItemError(RuntimeError): """Exception if an item can not be found. This exception should be raised when a function expects a specific item to exist in the corresponding CIF category, but the item cannot be retrieved. Args: item (str): Name of the item that could not be found. Use as "<CATEGORY>.<ITEM>" for clarity. msg (str): Optional alternative error message. """ def __init__(self, item=None, msg=None): super().__init__("Item", item, msg)
def _get_idx_for_placement(plcmnt, block): """Turn a relative placement into an index in a block.""" try: pos, cat = plcmnt.split(":", maxsplit=1) except ValueError: # ToDo: turn into own exception if needed raise ValueError( f"Couldn't split placement string '{plcmnt}', maybe the " + "':' is missing. Placement string needs to be of form " + "'[after|before]:<CATEGORY>'." ) from None table = access.get_table(block, cat) if table is None: _utils.warn_msg( f"Category '{cat}' for relative placement not found. " + "Skipping relocation." ) return -1 # get idx of first or last if pos.upper() == "AFTER": idx = block.get_index(table.tags[-1]) if idx < sum(1 for _ in block) - 1: idx += 1 return idx if pos.upper() == "BEFORE": return block.get_index(table.tags[0]) # ToDo: turn into own exception if needed raise ValueError( f"Relative placement string '{pos}' not recognised. " + "Valid directions are 'after' and 'before'." )
[docs] class MoveIdxToFarError(RuntimeError): """Exception if repositioning exceeds the size of document-category-list. Primarily used by :func:`move_category`, on the attempt to move a category to a position that does not exist within the corresponding |gemmicifBlock|. For example, if the |gemmicifBlock| object contains 10 categories, trying to move a category to position 15 will fail and should raise this exception. Args: category (str): Name of the category that could not be moved. idx (int): Target position to which the category was to be moved. """ def __init__(self, category, idx): super().__init__( f"Cannot move '{category}' to position '{idx}', exceeds range." )
[docs] def move_category(block, cat, idx): """Move a category to a new position in a |gemmicifBlock|. By design, ModelCIF files are not intended to be read or edited manually. Instead, dedicated applications should handle the format, providing functionality to view and modify the data. However, at `ModelArchive`_ we occasionally need to open ModelCIF files in an editor to inspect specific details. In such cases, it is helpful to have related categories grouped together, reducing the need to jump back and forth between different categories. This asks for a function to reposition categories within a ModelCIF file. :func:`move_category` takes category ``cat`` and moves it to position ``idx`` in the CIF block ``block``. The parameter ``idx`` is somewhat special: it can be just an integer index, specifying the exact position to move ``cat`` to. That comes in handy placing categories at the beginning (``idx=0``) or at the end (``idx=-1``) of ``block``. However, specifying an absolute index is often less useful in practice, as categories are typically organised relative to related categories. For this purpose, ``idx`` provides a special syntax: ``[after|before]:<CATEGORY>``. For example, if you want to put category ``_ma_qa_metric`` in front of category ``_ma_qa_metric_local``, you can use ``idx="before:_ma_qa_metric_local"`` for ``cat=_ma_qa_metric``... Examples: >>> from gemmi import cif >>> from modelarchive.modelcif import edit >>> # get sample CIF data >>> cif_data = '''data_test ... _ma_qa_metric.id 1 ... _ma_qa_metric.description test_score ... loop_ ... _ma_qa_metric_local.ordinal_id ... _ma_qa_metric_local.metric_value ... _ma_qa_metric_local.metric_id ... 1 1.0 1 ... 2 1.5 1 ... ''' >>> block = cif.read_string(cif_data).sole_block() >>> # move _ma_qa_metric_local to BEFORE _ma_qa_metric >>> edit.move_category( ... block, ... "_ma_qa_metric_local", ... "before:_ma_qa_metric", ... ) >>> print(block.as_string()) data_test loop_ _ma_qa_metric_local.ordinal_id _ma_qa_metric_local.metric_value _ma_qa_metric_local.metric_id 1 1.0 1 2 1.5 1 <BLANKLINE> _ma_qa_metric.id 1 _ma_qa_metric.description test_score <BLANKLINE> >>> # move _ma_qa_metric to the front >>> edit.move_category(block, "_ma_qa_metric", 0) >>> print(block.as_string()) data_test _ma_qa_metric.id 1 _ma_qa_metric.description test_score <BLANKLINE> loop_ _ma_qa_metric_local.ordinal_id _ma_qa_metric_local.metric_value _ma_qa_metric_local.metric_id 1 1.0 1 2 1.5 1 <BLANKLINE> Args: block (|gemmicifBlock|): CIF block to operate on. cat (str): Name of the CIF category to be moved. idx (int|str): Position to move ``cat`` to. This can be an integer for exact positioning, or a string of form ``[after|before]:<CATEGORY>`` for relative positioning. In relative positioning, ``<CATEGORY>`` specifies the name of the category before or after which ``cat`` will be placed. Returns: None Raises: NotFoundCategoryError: If ``cat`` can not be found in ``block``. MoveIdxToFarError: If the target position is outside ``block``. For example, if ``block`` contains 10 categories, trying to move a category to position 15 will raise this error. """ if idx is None: return if isinstance(idx, str): idx = _get_idx_for_placement(idx, block) table = access.get_table(block, cat) if table is None: raise NotFoundCategoryError(cat) if table.loop is None: _move_pairs(block, cat, idx, table) else: try: block.move_item(block.get_index(table.tags[0]), idx) except RuntimeError as rexc: if str(rexc) == "move_item: new_pos out of range": raise MoveIdxToFarError(cat, idx) from None raise
def _move_pairs(block, cat, idx, table): """Move a named-pair category.""" # We need to create a new list of tags because we are going to modify # the global gemmi.cif list in the loop. if idx < 0: items = list(table.tags) else: items = list(reversed(table.tags)) for i, itm in enumerate(items): itm_idx = block.get_index(itm) # adapt the idx to moving before/ after a category if idx > itm_idx: dst = idx - i else: dst = idx try: block.move_item(itm_idx, dst) except RuntimeError as rexc: if str(rexc) == "move_item: new_pos out of range": raise MoveIdxToFarError(cat, idx) from None raise
[docs] def make_res_per_chain_counter(asym_id_item): """Returns a stateful callback function counting residues per chain. :func:`make_res_per_chain_counter` returns a function that can be used as ``callback`` in :func:`add_column`. The returned callback assigns consecutive residue numbers within each chain of a table, starting at 1. When the chain identifier changes between two rows while iterating over the table, the counter is reset to 1. Examples: >>> # Add item "ndb_seq_num" to category "_pdbx_nonpoly_scheme" >>> # Reminder: "ndb_seq_num" -> column, "_pdbx_nonpoly_scheme" -> table >>> from gemmi import cif >>> from modelarchive.modelcif import edit >>> cif_data = '''data_test ... loop_ ... _pdbx_nonpoly_scheme.asym_id ... _pdbx_nonpoly_scheme.auth_seq_num ... _pdbx_nonpoly_scheme.entity_id ... _pdbx_nonpoly_scheme.mon_id ... _pdbx_nonpoly_scheme.pdb_seq_num ... C 1 3 ATP 1 ... D 1 4 HEM 1 ... E 1 5 HOH 1 ... E 2 5 HOH 2 ... ''' >>> block = cif.read_string(cif_data).sole_block() >>> # Using make_res_per_chain_counter() in add_column() will add a >>> # column to the loop_ and populate it with values: >>> edit.add_column( ... block, ... "_pdbx_nonpoly_scheme", ... "ndb_seq_num", ... edit.make_res_per_chain_counter("asym_id"), # CALLBACK ... pos=5, ... ) >>> print(block.as_string()) data_test loop_ _pdbx_nonpoly_scheme.asym_id _pdbx_nonpoly_scheme.auth_seq_num _pdbx_nonpoly_scheme.entity_id _pdbx_nonpoly_scheme.mon_id _pdbx_nonpoly_scheme.ndb_seq_num _pdbx_nonpoly_scheme.pdb_seq_num C 1 3 ATP 1 1 D 1 4 HEM 1 1 E 1 5 HOH 1 1 E 2 5 HOH 2 2 <BLANKLINE> >>> # "ndb_seq_num" is inserted as fifth column. The ATP in chain C >>> # ("asym_id") gets "ndb_seq_num" 1 and the HEM in chain D also gets >>> # "ndb_seq_num" 1. But the HOH, both live in chain E together, get >>> # "ndb_seq_num" 1 and 2. So for each chain, counting starts at 1 >>> # and per compound in a chain, the counter is increased by 1. Args: asym_id_item (str): Item name hosting the chain name. Returns: Callable[[:class:`gemmi.cif.Table.Row`], int]: Callback function usable as ``callback`` in :func:`add_column`. Note: This function may be outsourced to a supporting module, if :mod:`~modelarchive.modelcif.edit` gets to big. """ last_asym_id = None last_num = 0 def callback(row): nonlocal last_asym_id, last_num if last_asym_id != row[asym_id_item]: last_num = 1 else: last_num += 1 last_asym_id = row[asym_id_item] return last_num return callback
def _add_column(cat_itm, pos, table, block): """Add a new item to a name-value pair category.""" if table.loop is None: block.set_pair(cat_itm, "?") if pos == -1: len(table.tags) else: pos -= 1 block.move_item( block.get_index(cat_itm), block.get_index(table.tags[0]) + pos, ) else: if pos != -1: pos -= 1 table.loop.add_columns([cat_itm], value="?", pos=pos)
[docs] def add_column( block, category, item, callback, pos=-1, # mod_cat_itms=None, raw=False, ): # No clue how to reduce no. of arguments, so allow it # pylint: disable=too-many-arguments,too-many-positional-arguments """Extend a category with a new item and populate it using a callback. Thinking of ModelCIF categories as tables, this function adds a new column (item) to a table that already exists in ``block``. A ``callback`` function, to be provided, is executed with each row to compute the value for the new column. This avoids having a static list to fetch the values from. :func:`make_res_per_chain_counter()` is an example of a stateful implementation of a working callback. The callback has to be of form ``function(row)`` and return the value to be set for the ``item`` in the given ``row``. Examples: >>> # Add "ndb_seq_num" to "_pdbx_nonpoly_scheme" including values >>> # Reminder: "ndb_seq_num" -> column, "_pdbx_nonpoly_scheme" -> table >>> from gemmi import cif >>> from modelarchive.modelcif import edit >>> cif_data = '''data_test ... loop_ ... _pdbx_nonpoly_scheme.asym_id ... _pdbx_nonpoly_scheme.entity_id ... _pdbx_nonpoly_scheme.mon_id ... _pdbx_nonpoly_scheme.pdb_seq_num ... C 1 ATP 1 ... D 2 HEM 1 ... E 3 HOH 1 ... E 3 HOH 2 ... ''' >>> block = cif.read_string(cif_data).sole_block() >>> edit.add_column( ... block, ... "_pdbx_nonpoly_scheme", ... "ndb_seq_num", ... edit.make_res_per_chain_counter("asym_id"), ... pos=-1, ... ) >>> print(block.as_string()) data_test loop_ _pdbx_nonpoly_scheme.asym_id _pdbx_nonpoly_scheme.entity_id _pdbx_nonpoly_scheme.mon_id _pdbx_nonpoly_scheme.pdb_seq_num _pdbx_nonpoly_scheme.ndb_seq_num C 1 ATP 1 1 D 2 HEM 1 1 E 3 HOH 1 1 E 3 HOH 2 2 <BLANKLINE> >>> # "ndb_seq_num" was appended as last column according to pos=-1 Args: block (|gemmicifBlock|): block holding the categories of the CIF document. category (str): The CIF category (table) to add the item to. item (str): The item (column) to be added. callback (Callable[[:class:`gemmi.cif.Table.Row`], int]): Function to be executed to compute values for each row of the new column. pos (int): Position to insert the column at. Default is at the end (-1). Inserting at the beginning requires ``pos=1``. raw (bool): Force to not quote strings containing white-spaces. Returns: None Raises: NotFoundCategoryError: If ``category`` can not be found in ``block``. """ # fetch original data table = access.get_table(block, category) if table is None: raise NotFoundCategoryError(category) # init mod_cat_itms if needed # mod_cat_itms = _add_or_init_mod_cat_itms(mod_cat_itms, category) # check if column exists try: table.find_column(f"{category}.{item}") except RuntimeError: # If we get an exception, the item does not exist and we add it pass else: # if we do *NOT* get an exception, the item already exists _utils.warn_msg(f"'{category}.{item}' already exists', not updated.") # if len(mod_cat_itms[category]) == 0: # del mod_cat_itms[category] # return mod_cat_itms return # add column, independend if category is loop or name-value pairs _add_column(f"{category}.{item}", pos, table, block) # Re-fetch table: easier to work with but is not updated after adding a # column. Instead, the last column goes missing. That means if a column is # inserted in the middle of the table, the last column from the original # table becomes invisible. table = access.get_table(block, category) # add values for row in table: val = callback(row) val = str(val) if " " in val and not raw: val = cif.quote(val) row[item] = val
# # Register item for revision annotation # if item not in mod_cat_itms[category]: # mod_cat_itms[category].add(item) # return mod_cat_itms def _add_loop(block, cat, itms, mod_cat_itms, raw): """Add a loop to block - supporter for `add_category()`.""" block.set_mmcif_category(cat, itms, raw=raw) for itm in itms.keys(): mod_cat_itms[cat].add(itm) def _add_or_init_mod_cat_itms(mod_cat_itms, category): """Init modified categories/ items counter or add the category.""" if mod_cat_itms is None: mod_cat_itms = {category: set()} else: if category not in mod_cat_itms: mod_cat_itms[category] = set() return mod_cat_itms def _add_pairs(block, cat, itms, mod_cat_itms, raw): """Add named pairs to block - supporter for `add_category()`.""" pyld = {} # payload for the set_pairs() call for k, v in itms.items(): if isinstance(v, list): pyld[k] = v[0] else: pyld[k] = v block.set_pairs(f"{cat}.", pyld, raw=raw) for itm in itms.keys(): mod_cat_itms[cat].add(itm)
[docs] def add_category( block, category, item_data, index=None, mod_cat_itms=None, raw=False, ): # No clue how to reduce no. of arguments, so allow it # pylint: disable=too-many-positional-arguments,too-many-arguments """Introduce a new category to a |gemmicifblock| and populate it. Add ``category`` to ``block`` using data from ``item_data``. ``item_data`` is a dictionary with the CIF item names as keys and values as values to the items. On single values, named-pairs will be created, on lists with more than one value, a loop will be created. ``index`` can be used to place the category at a certain position. Use an integer for a specific place in the category list or a string of form ``[after|before]:<CATEGORY>`` for relative positioning. Examples: >>> from gemmi import cif >>> from modelarchive.modelcif import edit >>> # start with an empty CIF document >>> cif_data = '''data_test ... ''' >>> block = cif.read_string(cif_data).sole_block() >>> # lets add entities >>> _ = edit.add_category( ... block, ... "_entity", ... { ... "id": [1, 2, 3], ... "type": ["polymer", "non-polymer", "water"], ... } ... ) >>> print(block.as_string()) data_test loop_ _entity.id _entity.type 1 polymer 2 non-polymer 3 water <BLANKLINE> >>> # lets add an "_entry" ID before the entities >>> _ = edit.add_category( ... block, "_entry", {"id": "1FOO"}, index="before:_entity" ... ) >>> print(block.as_string()) data_test _entry.id 1FOO <BLANKLINE> loop_ _entity.id _entity.type 1 polymer 2 non-polymer 3 water <BLANKLINE> Args: block (|gemmicifblock|): CIF data block holding the categories of the CIF document. category (str): Name of the new category to be created. item_data (dict[str, list[Any]|Any]): Attributes and values to be added to the new category. Dictionary with item names as keys. Values are either a list of values or a single value. If a single value is provided (or a list containing only one element), a named key-value pair is created instead of a loop. index (int|str): Placement of the new category within ``block``. This can be an integer for exact positioning, or a string of form ``[after|before]:<CATEGORY>`` for relative positioning. In relative positioning, ``<CATEGORY>`` specifies the name of the category before or after which ``cat`` will be placed. mod_cat_itms (dict[str, set]): A record of what has been modified. Dictionary of category assigned a set of items changed. Items which already have the value of the update, are not recorded. This is meant for the revision history, most likely you can ignore it. raw (bool): Force to not quote strings containing white-spaces. Returns: dict[str, set]: A record of what has been modified. To be used with a revision history, most likely you can ignore it. Raises: MoveIdxToFarError: If the target position is outside ``block``. For example, if ``block`` contains 10 categories, trying to create a category at position 15 will raise this error. """ mod_cat_itms = _add_or_init_mod_cat_itms(mod_cat_itms, category) if access.get_table(block, category) is not None: _utils.warn_msg( f"Category '{category} already exists, will not be added.'" ) if len(mod_cat_itms[category]) == 0: del mod_cat_itms[category] return mod_cat_itms if len(item_data) == 0: _utils.warn_msg(f"No itmes provided, not adding category '{category}'.") if len(mod_cat_itms[category]) == 0: del mod_cat_itms[category] return mod_cat_itms # figure out if we got pair or loop vals = next(iter(item_data.values())) if isinstance(vals, list): if len(vals) == 1: _add_pairs(block, category, item_data, mod_cat_itms, raw) else: _add_loop(block, category, item_data, mod_cat_itms, raw) else: _add_pairs(block, category, item_data, mod_cat_itms, raw) move_category(block, category, index) if len(mod_cat_itms[category]) == 0: del mod_cat_itms[category] return mod_cat_itms
# LocalWords: gemmicifBlock func idx CIF qa str ValueError ModelArchive ndb # LocalWords: MoveIdxToFarError Args num pdbx nonpoly gemmi cif modelarchive # LocalWords: modelcif asym auth mon pdb HOH pos BLANKLINE bool itms msg # LocalWords: NotFoundCategoryError gemmicifblock