"""Functionality to extend and modify ModelCIF files."""
from gemmi import cif
from .. import _utils
from . import access
[docs]
class NotFoundError(RuntimeError):
"""General exception for 'things' that can not be found.
If ``msg`` is omitted, generates a message
"<SUBJECT> '<VALUE>' does not exist".
Args:
subject (str): The 'thing' that can not be found, used in the
generated message.
value (str): The name of what can not be found, used in the generated
message.
msg (str): Optional alternative error message.
"""
def __item__(self, subject, value, msg):
if msg is None:
msg = f"{subject} '{value}' does not exist."
super().__init__(msg)
[docs]
class NotFoundCategoryError(NotFoundError):
"""Exception if a category can not be found.
This exception should be raised when a function expects a specific
category to exist in the corresponding |gemmicifBlock|, but the category
cannot be retrieved.
Args:
category (str): Name of the category that could not be found.
msg (str): Optional alternative error message.
"""
def __init__(self, category=None, msg=None):
super().__init__("Category", category, msg)
[docs]
class NotFoundItemError(RuntimeError):
"""Exception if an item can not be found.
This exception should be raised when a function expects a specific
item to exist in the corresponding CIF category, but the item cannot be
retrieved.
Args:
item (str): Name of the item that could not be found. Use as
"<CATEGORY>.<ITEM>" for clarity.
msg (str): Optional alternative error message.
"""
def __init__(self, item=None, msg=None):
super().__init__("Item", item, msg)
def _get_idx_for_placement(plcmnt, block):
"""Turn a relative placement into an index in a block."""
try:
pos, cat = plcmnt.split(":", maxsplit=1)
except ValueError:
# ToDo: turn into own exception if needed
raise ValueError(
f"Couldn't split placement string '{plcmnt}', maybe the "
+ "':' is missing. Placement string needs to be of form "
+ "'[after|before]:<CATEGORY>'."
) from None
table = access.get_table(block, cat)
if table is None:
_utils.warn_msg(
f"Category '{cat}' for relative placement not found. "
+ "Skipping relocation."
)
return -1
# get idx of first or last
if pos.upper() == "AFTER":
idx = block.get_index(table.tags[-1])
if idx < sum(1 for _ in block) - 1:
idx += 1
return idx
if pos.upper() == "BEFORE":
return block.get_index(table.tags[0])
# ToDo: turn into own exception if needed
raise ValueError(
f"Relative placement string '{pos}' not recognised. "
+ "Valid directions are 'after' and 'before'."
)
[docs]
class MoveIdxToFarError(RuntimeError):
"""Exception if repositioning exceeds the size of document-category-list.
Primarily used by :func:`move_category`, on the attempt to move a category
to a position that does not exist within the corresponding |gemmicifBlock|.
For example, if the |gemmicifBlock| object contains 10 categories, trying
to move a category to position 15 will fail and should raise this
exception.
Args:
category (str): Name of the category that could not be moved.
idx (int): Target position to which the category was to be moved.
"""
def __init__(self, category, idx):
super().__init__(
f"Cannot move '{category}' to position '{idx}', exceeds range."
)
[docs]
def move_category(block, cat, idx):
"""Move a category to a new position in a |gemmicifBlock|.
By design, ModelCIF files are not intended to be read or edited manually.
Instead, dedicated applications should handle the format, providing
functionality to view and modify the data. However, at `ModelArchive`_ we
occasionally need to open ModelCIF files in an editor to inspect specific
details. In such cases, it is helpful to have related categories grouped
together, reducing the need to jump back and forth between different
categories. This asks for a function to reposition categories within a
ModelCIF file.
:func:`move_category` takes category ``cat`` and moves it to position
``idx`` in the CIF block ``block``. The parameter ``idx`` is somewhat
special: it can be just an integer index, specifying the exact position to
move ``cat`` to. That comes in handy placing categories at the beginning
(``idx=0``) or at the end (``idx=-1``) of ``block``. However, specifying
an absolute index is often less useful in practice, as categories are
typically organised relative to related categories. For this purpose,
``idx`` provides a special syntax: ``[after|before]:<CATEGORY>``. For
example, if you want to put category ``_ma_qa_metric`` in front of
category ``_ma_qa_metric_local``, you can use
``idx="before:_ma_qa_metric_local"`` for ``cat=_ma_qa_metric``...
Examples:
>>> from gemmi import cif
>>> from modelarchive.modelcif import edit
>>> # get sample CIF data
>>> cif_data = '''data_test
... _ma_qa_metric.id 1
... _ma_qa_metric.description test_score
... loop_
... _ma_qa_metric_local.ordinal_id
... _ma_qa_metric_local.metric_value
... _ma_qa_metric_local.metric_id
... 1 1.0 1
... 2 1.5 1
... '''
>>> block = cif.read_string(cif_data).sole_block()
>>> # move _ma_qa_metric_local to BEFORE _ma_qa_metric
>>> edit.move_category(
... block,
... "_ma_qa_metric_local",
... "before:_ma_qa_metric",
... )
>>> print(block.as_string())
data_test
loop_
_ma_qa_metric_local.ordinal_id
_ma_qa_metric_local.metric_value
_ma_qa_metric_local.metric_id
1 1.0 1
2 1.5 1
<BLANKLINE>
_ma_qa_metric.id 1
_ma_qa_metric.description test_score
<BLANKLINE>
>>> # move _ma_qa_metric to the front
>>> edit.move_category(block, "_ma_qa_metric", 0)
>>> print(block.as_string())
data_test
_ma_qa_metric.id 1
_ma_qa_metric.description test_score
<BLANKLINE>
loop_
_ma_qa_metric_local.ordinal_id
_ma_qa_metric_local.metric_value
_ma_qa_metric_local.metric_id
1 1.0 1
2 1.5 1
<BLANKLINE>
Args:
block (|gemmicifBlock|): CIF block to operate on.
cat (str): Name of the CIF category to be moved.
idx (int|str): Position to move ``cat`` to. This can be an integer for
exact positioning, or a string of form
``[after|before]:<CATEGORY>`` for relative positioning. In
relative positioning, ``<CATEGORY>`` specifies the name of the
category before or after which ``cat`` will be placed.
Returns:
None
Raises:
NotFoundCategoryError: If ``cat`` can not be found in ``block``.
MoveIdxToFarError: If the target position is outside ``block``. For
example, if ``block`` contains 10 categories, trying to move a
category to position 15 will raise this error.
"""
if idx is None:
return
if isinstance(idx, str):
idx = _get_idx_for_placement(idx, block)
table = access.get_table(block, cat)
if table is None:
raise NotFoundCategoryError(cat)
if table.loop is None:
_move_pairs(block, cat, idx, table)
else:
try:
block.move_item(block.get_index(table.tags[0]), idx)
except RuntimeError as rexc:
if str(rexc) == "move_item: new_pos out of range":
raise MoveIdxToFarError(cat, idx) from None
raise
def _move_pairs(block, cat, idx, table):
"""Move a named-pair category."""
# We need to create a new list of tags because we are going to modify
# the global gemmi.cif list in the loop.
if idx < 0:
items = list(table.tags)
else:
items = list(reversed(table.tags))
for i, itm in enumerate(items):
itm_idx = block.get_index(itm)
# adapt the idx to moving before/ after a category
if idx > itm_idx:
dst = idx - i
else:
dst = idx
try:
block.move_item(itm_idx, dst)
except RuntimeError as rexc:
if str(rexc) == "move_item: new_pos out of range":
raise MoveIdxToFarError(cat, idx) from None
raise
[docs]
def make_res_per_chain_counter(asym_id_item):
"""Returns a stateful callback function counting residues per chain.
:func:`make_res_per_chain_counter` returns a function that can be used as
``callback`` in :func:`add_column`.
The returned callback assigns consecutive residue numbers within each chain
of a table, starting at 1. When the chain identifier changes between two
rows while iterating over the table, the counter is reset to 1.
Examples:
>>> # Add item "ndb_seq_num" to category "_pdbx_nonpoly_scheme"
>>> # Reminder: "ndb_seq_num" -> column, "_pdbx_nonpoly_scheme" -> table
>>> from gemmi import cif
>>> from modelarchive.modelcif import edit
>>> cif_data = '''data_test
... loop_
... _pdbx_nonpoly_scheme.asym_id
... _pdbx_nonpoly_scheme.auth_seq_num
... _pdbx_nonpoly_scheme.entity_id
... _pdbx_nonpoly_scheme.mon_id
... _pdbx_nonpoly_scheme.pdb_seq_num
... C 1 3 ATP 1
... D 1 4 HEM 1
... E 1 5 HOH 1
... E 2 5 HOH 2
... '''
>>> block = cif.read_string(cif_data).sole_block()
>>> # Using make_res_per_chain_counter() in add_column() will add a
>>> # column to the loop_ and populate it with values:
>>> edit.add_column(
... block,
... "_pdbx_nonpoly_scheme",
... "ndb_seq_num",
... edit.make_res_per_chain_counter("asym_id"), # CALLBACK
... pos=5,
... )
>>> print(block.as_string())
data_test
loop_
_pdbx_nonpoly_scheme.asym_id
_pdbx_nonpoly_scheme.auth_seq_num
_pdbx_nonpoly_scheme.entity_id
_pdbx_nonpoly_scheme.mon_id
_pdbx_nonpoly_scheme.ndb_seq_num
_pdbx_nonpoly_scheme.pdb_seq_num
C 1 3 ATP 1 1
D 1 4 HEM 1 1
E 1 5 HOH 1 1
E 2 5 HOH 2 2
<BLANKLINE>
>>> # "ndb_seq_num" is inserted as fifth column. The ATP in chain C
>>> # ("asym_id") gets "ndb_seq_num" 1 and the HEM in chain D also gets
>>> # "ndb_seq_num" 1. But the HOH, both live in chain E together, get
>>> # "ndb_seq_num" 1 and 2. So for each chain, counting starts at 1
>>> # and per compound in a chain, the counter is increased by 1.
Args:
asym_id_item (str): Item name hosting the chain name.
Returns:
Callable[[:class:`gemmi.cif.Table.Row`], int]: Callback function usable
as ``callback`` in :func:`add_column`.
Note:
This function may be outsourced to a supporting module, if
:mod:`~modelarchive.modelcif.edit` gets to big.
"""
last_asym_id = None
last_num = 0
def callback(row):
nonlocal last_asym_id, last_num
if last_asym_id != row[asym_id_item]:
last_num = 1
else:
last_num += 1
last_asym_id = row[asym_id_item]
return last_num
return callback
def _add_column(cat_itm, pos, table, block):
"""Add a new item to a name-value pair category."""
if table.loop is None:
block.set_pair(cat_itm, "?")
if pos == -1:
len(table.tags)
else:
pos -= 1
block.move_item(
block.get_index(cat_itm),
block.get_index(table.tags[0]) + pos,
)
else:
if pos != -1:
pos -= 1
table.loop.add_columns([cat_itm], value="?", pos=pos)
[docs]
def add_column(
block,
category,
item,
callback,
pos=-1,
# mod_cat_itms=None,
raw=False,
):
# No clue how to reduce no. of arguments, so allow it
# pylint: disable=too-many-arguments,too-many-positional-arguments
"""Extend a category with a new item and populate it using a callback.
Thinking of ModelCIF categories as tables, this function adds a new column
(item) to a table that already exists in ``block``. A ``callback``
function, to be provided, is executed with each row to compute the value
for the new column. This avoids having a static list to fetch the values
from.
:func:`make_res_per_chain_counter()` is an example of a stateful
implementation of a working callback.
The callback has to be of form ``function(row)`` and return the value to be
set for the ``item`` in the given ``row``.
Examples:
>>> # Add "ndb_seq_num" to "_pdbx_nonpoly_scheme" including values
>>> # Reminder: "ndb_seq_num" -> column, "_pdbx_nonpoly_scheme" -> table
>>> from gemmi import cif
>>> from modelarchive.modelcif import edit
>>> cif_data = '''data_test
... loop_
... _pdbx_nonpoly_scheme.asym_id
... _pdbx_nonpoly_scheme.entity_id
... _pdbx_nonpoly_scheme.mon_id
... _pdbx_nonpoly_scheme.pdb_seq_num
... C 1 ATP 1
... D 2 HEM 1
... E 3 HOH 1
... E 3 HOH 2
... '''
>>> block = cif.read_string(cif_data).sole_block()
>>> edit.add_column(
... block,
... "_pdbx_nonpoly_scheme",
... "ndb_seq_num",
... edit.make_res_per_chain_counter("asym_id"),
... pos=-1,
... )
>>> print(block.as_string())
data_test
loop_
_pdbx_nonpoly_scheme.asym_id
_pdbx_nonpoly_scheme.entity_id
_pdbx_nonpoly_scheme.mon_id
_pdbx_nonpoly_scheme.pdb_seq_num
_pdbx_nonpoly_scheme.ndb_seq_num
C 1 ATP 1 1
D 2 HEM 1 1
E 3 HOH 1 1
E 3 HOH 2 2
<BLANKLINE>
>>> # "ndb_seq_num" was appended as last column according to pos=-1
Args:
block (|gemmicifBlock|): block holding the categories of the CIF
document.
category (str): The CIF category (table) to add the item to.
item (str): The item (column) to be added.
callback (Callable[[:class:`gemmi.cif.Table.Row`], int]): Function to be
executed to compute values for each row of the new column.
pos (int): Position to insert the column at. Default is at the end (-1).
Inserting at the beginning requires ``pos=1``.
raw (bool): Force to not quote strings containing white-spaces.
Returns:
None
Raises:
NotFoundCategoryError: If ``category`` can not be found in ``block``.
"""
# fetch original data
table = access.get_table(block, category)
if table is None:
raise NotFoundCategoryError(category)
# init mod_cat_itms if needed
# mod_cat_itms = _add_or_init_mod_cat_itms(mod_cat_itms, category)
# check if column exists
try:
table.find_column(f"{category}.{item}")
except RuntimeError:
# If we get an exception, the item does not exist and we add it
pass
else:
# if we do *NOT* get an exception, the item already exists
_utils.warn_msg(f"'{category}.{item}' already exists', not updated.")
# if len(mod_cat_itms[category]) == 0:
# del mod_cat_itms[category]
# return mod_cat_itms
return
# add column, independend if category is loop or name-value pairs
_add_column(f"{category}.{item}", pos, table, block)
# Re-fetch table: easier to work with but is not updated after adding a
# column. Instead, the last column goes missing. That means if a column is
# inserted in the middle of the table, the last column from the original
# table becomes invisible.
table = access.get_table(block, category)
# add values
for row in table:
val = callback(row)
val = str(val)
if " " in val and not raw:
val = cif.quote(val)
row[item] = val
# # Register item for revision annotation
# if item not in mod_cat_itms[category]:
# mod_cat_itms[category].add(item)
# return mod_cat_itms
def _add_loop(block, cat, itms, mod_cat_itms, raw):
"""Add a loop to block - supporter for `add_category()`."""
block.set_mmcif_category(cat, itms, raw=raw)
for itm in itms.keys():
mod_cat_itms[cat].add(itm)
def _add_or_init_mod_cat_itms(mod_cat_itms, category):
"""Init modified categories/ items counter or add the category."""
if mod_cat_itms is None:
mod_cat_itms = {category: set()}
else:
if category not in mod_cat_itms:
mod_cat_itms[category] = set()
return mod_cat_itms
def _add_pairs(block, cat, itms, mod_cat_itms, raw):
"""Add named pairs to block - supporter for `add_category()`."""
pyld = {} # payload for the set_pairs() call
for k, v in itms.items():
if isinstance(v, list):
pyld[k] = v[0]
else:
pyld[k] = v
block.set_pairs(f"{cat}.", pyld, raw=raw)
for itm in itms.keys():
mod_cat_itms[cat].add(itm)
[docs]
def add_category(
block,
category,
item_data,
index=None,
mod_cat_itms=None,
raw=False,
):
# No clue how to reduce no. of arguments, so allow it
# pylint: disable=too-many-positional-arguments,too-many-arguments
"""Introduce a new category to a |gemmicifblock| and populate it.
Add ``category`` to ``block`` using data from ``item_data``. ``item_data``
is a dictionary with the CIF item names as keys and values as values to the
items. On single values, named-pairs will be created, on lists with more
than one value, a loop will be created. ``index`` can be used to place the
category at a certain position. Use an integer for a specific place in the
category list or a string of form ``[after|before]:<CATEGORY>`` for relative
positioning.
Examples:
>>> from gemmi import cif
>>> from modelarchive.modelcif import edit
>>> # start with an empty CIF document
>>> cif_data = '''data_test
... '''
>>> block = cif.read_string(cif_data).sole_block()
>>> # lets add entities
>>> _ = edit.add_category(
... block,
... "_entity",
... {
... "id": [1, 2, 3],
... "type": ["polymer", "non-polymer", "water"],
... }
... )
>>> print(block.as_string())
data_test
loop_
_entity.id
_entity.type
1 polymer
2 non-polymer
3 water
<BLANKLINE>
>>> # lets add an "_entry" ID before the entities
>>> _ = edit.add_category(
... block, "_entry", {"id": "1FOO"}, index="before:_entity"
... )
>>> print(block.as_string())
data_test
_entry.id 1FOO
<BLANKLINE>
loop_
_entity.id
_entity.type
1 polymer
2 non-polymer
3 water
<BLANKLINE>
Args:
block (|gemmicifblock|): CIF data block holding the categories of the
CIF document.
category (str): Name of the new category to be created.
item_data (dict[str, list[Any]|Any]): Attributes and values to be
added to the new category. Dictionary with item names as keys.
Values are either a list of values or a single value. If a single
value is provided (or a list containing only one element), a named
key-value pair is created instead of a loop.
index (int|str): Placement of the new category within ``block``. This
can be an integer for exact positioning, or a string of form
``[after|before]:<CATEGORY>`` for relative positioning. In relative
positioning, ``<CATEGORY>`` specifies the name of the category
before or after which ``cat`` will be placed.
mod_cat_itms (dict[str, set]): A record of what has been modified.
Dictionary of category assigned a set of items changed. Items which
already have the value of the update, are not recorded. This is
meant for the revision history, most likely you can ignore it.
raw (bool): Force to not quote strings containing white-spaces.
Returns:
dict[str, set]: A record of what has been modified. To be used with a
revision history, most likely you can ignore it.
Raises:
MoveIdxToFarError: If the target position is outside ``block``. For
example, if ``block`` contains 10 categories, trying to create a
category at position 15 will raise this error.
"""
mod_cat_itms = _add_or_init_mod_cat_itms(mod_cat_itms, category)
if access.get_table(block, category) is not None:
_utils.warn_msg(
f"Category '{category} already exists, will not be added.'"
)
if len(mod_cat_itms[category]) == 0:
del mod_cat_itms[category]
return mod_cat_itms
if len(item_data) == 0:
_utils.warn_msg(f"No itmes provided, not adding category '{category}'.")
if len(mod_cat_itms[category]) == 0:
del mod_cat_itms[category]
return mod_cat_itms
# figure out if we got pair or loop
vals = next(iter(item_data.values()))
if isinstance(vals, list):
if len(vals) == 1:
_add_pairs(block, category, item_data, mod_cat_itms, raw)
else:
_add_loop(block, category, item_data, mod_cat_itms, raw)
else:
_add_pairs(block, category, item_data, mod_cat_itms, raw)
move_category(block, category, index)
if len(mod_cat_itms[category]) == 0:
del mod_cat_itms[category]
return mod_cat_itms
# LocalWords: gemmicifBlock func idx CIF qa str ValueError ModelArchive ndb
# LocalWords: MoveIdxToFarError Args num pdbx nonpoly gemmi cif modelarchive
# LocalWords: modelcif asym auth mon pdb HOH pos BLANKLINE bool itms msg
# LocalWords: NotFoundCategoryError gemmicifblock