Source code for modelarchive.tools.maxit

"""`MAXIT`_ from `RCSB`_ converts coordinate files in PDB legacy format to CIF
and CIF files to mmCIF. This module also adds functionality to turn a PDB file
into a (minimalist) ModelCIF file. But don't get too excited - none of the
functionality will turn a PDB file into a fully annotated ModelCIF file. It just
makes sure the starting point is of valid CIF syntax. Extra data still need to be
added...

`MAXIT`_ is not bundled with this module. The source code can be downloaded
`here <https://sw-tools.rcsb.org/apps/MAXIT/source.html>`_.
`Installation instructions <https://sw-tools.rcsb.org/apps/MAXIT/README-source>`_
are available, and here is a TL;DR how to compile on macOS and most Linux
distributions:

.. code-block:: bash

   # cd into the unpacked source directory first
   export RCSBROOT=$(pwd)
   make
   make binary
   # binaries are found in bin/
   # RCSBROOT needs to point at data/ when running maxit
"""

# Copyright (c) 2026, SIB - Swiss Institute of Bioinformatics and
#                     Biozentrum - University of Basel
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import os
import subprocess
import tempfile

import gemmi

MAXIT_BINARY = os.environ.get("MAXIT_BINARY", "maxit")
"""Path to the maxit binary, defaults to ``maxit`` from ``$PATH``.

Can be overridden by setting the ``MAXIT_BINARY`` environment variable
before import, or by assigning directly in a function call.
"""


[docs] def run_maxit(infile, outfile, mode, logfile=None): """Run MAXIT without checks, mode-preselection, or cleanup. Args: infile (~pathlib.Path | str): Input file. Either PDB legacy format or CIF. outfile (~pathlib.Path | str): Output file. mode (str): MAXIT operation mode. Use ``"1"`` for PDB to CIF, ``"2"`` for CIF to PDB, ``"8"`` for CIF to mmCIF. logfile (~pathlib.Path | str, optional): File for MAXIT log messages. Returns: subprocess.CompletedProcess: Result of the MAXIT run. """ cmd = [ MAXIT_BINARY, "-input", os.fspath(infile), "-output", os.fspath(outfile), "-o", mode, ] if logfile is not None: cmd.extend(["-log", os.fspath(logfile)]) return subprocess.run(cmd, check=True)
[docs] def run_maxit_log2list(infile, outfile, mode): """Run MAXIT and return the log file content as a list. Args: infile (~pathlib.Path | str): Input file. Either PDB legacy format or CIF. outfile (~pathlib.Path | str): Output file. mode (str): MAXIT operation mode. Use ``"1"`` for PDB to CIF, ``"2"`` for CIF to PDB, ``"8"`` for CIF to mmCIF. Returns: tuple[list[str], int]: A tuple of the log file content as a list of strings and the MAXIT exit status. """ log = [] extstts = 0 # Note: `delete_on_close=False` (Python >= 3.12) would be more performant # but we support Python >= 3.10. with tempfile.NamedTemporaryFile(mode="r") as mxtlog: try: prc = run_maxit(infile, outfile, mode, logfile=mxtlog.name) extstts = prc.returncode except subprocess.CalledProcessError as exc: extstts = exc.returncode for line in mxtlog: line = line.strip() log.append(line) return (log, extstts)
def _format2format(infile, outfile, mode): """Wrapper for convenience converters.""" if "RCSBROOT" not in os.environ: raise RuntimeError("RCSBROOT environment variable is not set.") log, extstts = run_maxit_log2list(infile, outfile, mode) if extstts != 0: return log return []
[docs] def pdb2cif(infile, outfile): """Convert a PDB legacy format file to CIF using MAXIT. Only returns log messages upon failure. Args: infile (~pathlib.Path | str): Input file in PDB legacy format. outfile (~pathlib.Path | str): Output CIF file. Returns: list[str]: MAXIT log messages on failure, empty list on success. Raises: RuntimeError: If ``RCSBROOT`` environment variable is not set. """ return _format2format(infile, outfile, "1")
[docs] def cif2mmcif(infile, outfile): """Convert a CIF file to mmCIF using MAXIT. Only returns log messages upon failure. Args: infile (~pathlib.Path | str): Input CIF file. outfile (~pathlib.Path | str): Output mmCIF file. Returns: list[str]: MAXIT log messages on failure, empty list on success. Raises: RuntimeError: If ``RCSBROOT`` environment variable is not set. """ return _format2format(infile, outfile, "8")
[docs] def pdb2mmcif(infile, outfile): """Convert a PDB legacy format file to mmCIF using MAXIT. Runs MAXIT first in PDB to CIF mode, then converts the result to mmCIF. Only returns log messages upon failure. Args: infile (~pathlib.Path | str): Input file in PDB legacy format. outfile (~pathlib.Path | str): Output mmCIF file. Returns: list[str]: MAXIT log messages on failure, empty list on success. On failure, the first element indicates which conversion step failed. Raises: RuntimeError: If ``RCSBROOT`` environment variable is not set. """ log = pdb2cif(infile, outfile) if log: log.insert(0, "PDB to CIF conversion failed") return log log = cif2mmcif(outfile, outfile) if log: log.insert(0, "CIF to mmCIF conversion failed") return log
[docs] def fixing_pdb2mmcif(pdb_as_string, outfile): """Convert a PDB legacy format string to mmCIF, fixing known issues. Adds missing chain names if necessary before conversion. Only returns log messages upon failure. Args: pdb_as_string (str): Input file content in PDB legacy format. outfile (~pathlib.Path | str): Output mmCIF file. Returns: list[str]: MAXIT log messages on failure, empty list on success. Raises: RuntimeError: If ``RCSBROOT`` environment variable is not set. """ def _get_cname(idxs): chn_nms = ( "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789" + "abcdefghijklmnopqrstuvwxyz" ) idxs[-1] += 1 if idxs[-1] == len(chn_nms): for i in range(len(idxs) - 1, -1, -1): idxs[i] += 1 if idxs[i] >= len(chn_nms): idxs[i] = 0 if i == 0: idxs.append(0) else: break name = "" for i in idxs: name += chn_nms[i] return name # fix PDB input entry = gemmi.read_pdb_string(pdb_as_string) entry.setup_entities() entry.assign_label_seq_id() fixed = False for mdl in entry: idxs = [-1] for chn in mdl: if len(chn.name) == 0: for schn in chn.subchains(): nchn = gemmi.Chain(_get_cname(idxs)) nchn.append_residues(schn) mdl.add_chain(nchn) mdl.remove_chain("") fixed = True # input underwent fixing, needs to be written as new input file if fixed: entry.make_mmcif_document().write_file(os.fspath(outfile)) return cif2mmcif(outfile, outfile) # convert to mmCIF with open(outfile, "w", encoding="ascii") as ofh: ofh.write(pdb_as_string) return pdb2mmcif(outfile, outfile)
[docs] def main(): """Entry point for the ``ma-maxit`` command line tool.""" # For main functions we allow bad imports # pylint: disable=import-outside-toplevel from pathlib import Path import argparse import sys def _parse_command_line(): """Get arguments.""" parser = argparse.ArgumentParser( description="Run RCSB MAXIT from Python " + "(https://sw-tools.rcsb.org/apps/MAXIT/index.html)", ) parser.add_argument( "--input", help="Input/ source file", metavar="<INPUTFILE>", required=True, type=str, ) parser.add_argument( "--output", help="Output/ destination file", metavar="<OUTPUTFILE>", required=True, type=str, ) parser.add_argument( "--mode", "-o", dest="mode", help="Mode, 1: PDB to CIF, 2: CIF to PDB, 3: CIF to mmCIF, " + "ma: PDB to mmCIF with some fixes", metavar="<NUM>", required=True, type=str, ) opts = parser.parse_args() if not Path(opts.input).is_file(): print(f"No file '{opts.input}' found.", file=sys.stderr) sys.exit(1) return opts def _main(): """Run as script.""" opts = _parse_command_line() if opts.mode != "ma": log, extstts = run_maxit_log2list( opts.input, opts.output, opts.mode ) else: with open(opts.input, "r", encoding="ascii") as lfh: pdblines = lfh.read() log = fixing_pdb2mmcif(pdblines, opts.output) if len(log) > 0: extstts = 1 else: extstts = 0 if extstts == 0: ostream = sys.stdout else: ostream = sys.stderr for line in log: print(line, file=ostream) sys.exit(extstts) _main()