"""`MAXIT`_ from `RCSB`_ converts coordinate files in PDB legacy format to CIF
and CIF files to mmCIF. This module also adds functionality to turn a PDB file
into a (minimalist) ModelCIF file. But don't get too excited - none of the
functionality will turn a PDB file into a fully annotated ModelCIF file. It just
makes sure the starting point is of valid CIF syntax. Extra data still need to be
added...
`MAXIT`_ is not bundled with this module. The source code can be downloaded
`here <https://sw-tools.rcsb.org/apps/MAXIT/source.html>`_.
`Installation instructions <https://sw-tools.rcsb.org/apps/MAXIT/README-source>`_
are available, and here is a TL;DR how to compile on macOS and most Linux
distributions:
.. code-block:: bash
# cd into the unpacked source directory first
export RCSBROOT=$(pwd)
make
make binary
# binaries are found in bin/
# RCSBROOT needs to point at data/ when running maxit
"""
# Copyright (c) 2026, SIB - Swiss Institute of Bioinformatics and
# Biozentrum - University of Basel
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import os
import subprocess
import tempfile
import gemmi
MAXIT_BINARY = os.environ.get("MAXIT_BINARY", "maxit")
"""Path to the maxit binary, defaults to ``maxit`` from ``$PATH``.
Can be overridden by setting the ``MAXIT_BINARY`` environment variable
before import, or by assigning directly in a function call.
"""
[docs]
def run_maxit(infile, outfile, mode, logfile=None):
"""Run MAXIT without checks, mode-preselection, or cleanup.
Args:
infile (~pathlib.Path | str): Input file. Either PDB legacy format or
CIF.
outfile (~pathlib.Path | str): Output file.
mode (str): MAXIT operation mode. Use ``"1"`` for PDB to CIF, ``"2"``
for CIF to PDB, ``"8"`` for CIF to mmCIF.
logfile (~pathlib.Path | str, optional): File for MAXIT log messages.
Returns:
subprocess.CompletedProcess: Result of the MAXIT run.
"""
cmd = [
MAXIT_BINARY,
"-input",
os.fspath(infile),
"-output",
os.fspath(outfile),
"-o",
mode,
]
if logfile is not None:
cmd.extend(["-log", os.fspath(logfile)])
return subprocess.run(cmd, check=True)
[docs]
def run_maxit_log2list(infile, outfile, mode):
"""Run MAXIT and return the log file content as a list.
Args:
infile (~pathlib.Path | str): Input file. Either PDB legacy format or
CIF.
outfile (~pathlib.Path | str): Output file.
mode (str): MAXIT operation mode. Use ``"1"`` for PDB to CIF, ``"2"``
for CIF to PDB, ``"8"`` for CIF to mmCIF.
Returns:
tuple[list[str], int]: A tuple of the log file content as a list
of strings and the MAXIT exit status.
"""
log = []
extstts = 0
# Note: `delete_on_close=False` (Python >= 3.12) would be more performant
# but we support Python >= 3.10.
with tempfile.NamedTemporaryFile(mode="r") as mxtlog:
try:
prc = run_maxit(infile, outfile, mode, logfile=mxtlog.name)
extstts = prc.returncode
except subprocess.CalledProcessError as exc:
extstts = exc.returncode
for line in mxtlog:
line = line.strip()
log.append(line)
return (log, extstts)
def _format2format(infile, outfile, mode):
"""Wrapper for convenience converters."""
if "RCSBROOT" not in os.environ:
raise RuntimeError("RCSBROOT environment variable is not set.")
log, extstts = run_maxit_log2list(infile, outfile, mode)
if extstts != 0:
return log
return []
[docs]
def pdb2cif(infile, outfile):
"""Convert a PDB legacy format file to CIF using MAXIT.
Only returns log messages upon failure.
Args:
infile (~pathlib.Path | str): Input file in PDB legacy format.
outfile (~pathlib.Path | str): Output CIF file.
Returns:
list[str]: MAXIT log messages on failure, empty list on success.
Raises:
RuntimeError: If ``RCSBROOT`` environment variable is not set.
"""
return _format2format(infile, outfile, "1")
[docs]
def cif2mmcif(infile, outfile):
"""Convert a CIF file to mmCIF using MAXIT.
Only returns log messages upon failure.
Args:
infile (~pathlib.Path | str): Input CIF file.
outfile (~pathlib.Path | str): Output mmCIF file.
Returns:
list[str]: MAXIT log messages on failure, empty list on success.
Raises:
RuntimeError: If ``RCSBROOT`` environment variable is not set.
"""
return _format2format(infile, outfile, "8")
[docs]
def pdb2mmcif(infile, outfile):
"""Convert a PDB legacy format file to mmCIF using MAXIT.
Runs MAXIT first in PDB to CIF mode, then converts the result to mmCIF.
Only returns log messages upon failure.
Args:
infile (~pathlib.Path | str): Input file in PDB legacy format.
outfile (~pathlib.Path | str): Output mmCIF file.
Returns:
list[str]: MAXIT log messages on failure, empty list on success.
On failure, the first element indicates which conversion step
failed.
Raises:
RuntimeError: If ``RCSBROOT`` environment variable is not set.
"""
log = pdb2cif(infile, outfile)
if log:
log.insert(0, "PDB to CIF conversion failed")
return log
log = cif2mmcif(outfile, outfile)
if log:
log.insert(0, "CIF to mmCIF conversion failed")
return log
[docs]
def fixing_pdb2mmcif(pdb_as_string, outfile):
"""Convert a PDB legacy format string to mmCIF, fixing known issues.
Adds missing chain names if necessary before conversion. Only returns
log messages upon failure.
Args:
pdb_as_string (str): Input file content in PDB legacy format.
outfile (~pathlib.Path | str): Output mmCIF file.
Returns:
list[str]: MAXIT log messages on failure, empty list on success.
Raises:
RuntimeError: If ``RCSBROOT`` environment variable is not set.
"""
def _get_cname(idxs):
chn_nms = (
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "0123456789"
+ "abcdefghijklmnopqrstuvwxyz"
)
idxs[-1] += 1
if idxs[-1] == len(chn_nms):
for i in range(len(idxs) - 1, -1, -1):
idxs[i] += 1
if idxs[i] >= len(chn_nms):
idxs[i] = 0
if i == 0:
idxs.append(0)
else:
break
name = ""
for i in idxs:
name += chn_nms[i]
return name
# fix PDB input
entry = gemmi.read_pdb_string(pdb_as_string)
entry.setup_entities()
entry.assign_label_seq_id()
fixed = False
for mdl in entry:
idxs = [-1]
for chn in mdl:
if len(chn.name) == 0:
for schn in chn.subchains():
nchn = gemmi.Chain(_get_cname(idxs))
nchn.append_residues(schn)
mdl.add_chain(nchn)
mdl.remove_chain("")
fixed = True
# input underwent fixing, needs to be written as new input file
if fixed:
entry.make_mmcif_document().write_file(os.fspath(outfile))
return cif2mmcif(outfile, outfile)
# convert to mmCIF
with open(outfile, "w", encoding="ascii") as ofh:
ofh.write(pdb_as_string)
return pdb2mmcif(outfile, outfile)
[docs]
def main():
"""Entry point for the ``ma-maxit`` command line tool."""
# For main functions we allow bad imports
# pylint: disable=import-outside-toplevel
from pathlib import Path
import argparse
import sys
def _parse_command_line():
"""Get arguments."""
parser = argparse.ArgumentParser(
description="Run RCSB MAXIT from Python "
+ "(https://sw-tools.rcsb.org/apps/MAXIT/index.html)",
)
parser.add_argument(
"--input",
help="Input/ source file",
metavar="<INPUTFILE>",
required=True,
type=str,
)
parser.add_argument(
"--output",
help="Output/ destination file",
metavar="<OUTPUTFILE>",
required=True,
type=str,
)
parser.add_argument(
"--mode",
"-o",
dest="mode",
help="Mode, 1: PDB to CIF, 2: CIF to PDB, 3: CIF to mmCIF, "
+ "ma: PDB to mmCIF with some fixes",
metavar="<NUM>",
required=True,
type=str,
)
opts = parser.parse_args()
if not Path(opts.input).is_file():
print(f"No file '{opts.input}' found.", file=sys.stderr)
sys.exit(1)
return opts
def _main():
"""Run as script."""
opts = _parse_command_line()
if opts.mode != "ma":
log, extstts = run_maxit_log2list(
opts.input, opts.output, opts.mode
)
else:
with open(opts.input, "r", encoding="ascii") as lfh:
pdblines = lfh.read()
log = fixing_pdb2mmcif(pdblines, opts.output)
if len(log) > 0:
extstts = 1
else:
extstts = 0
if extstts == 0:
ostream = sys.stdout
else:
ostream = sys.stderr
for line in log:
print(line, file=ostream)
sys.exit(extstts)
_main()