Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
251 changes: 141 additions & 110 deletions src/subscript/restartthinner/restartthinner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,22 @@

import argparse
import datetime
import glob
import logging
import os
import shutil
import sys
import subprocess
import tempfile
from collections.abc import Iterator
from contextlib import contextmanager
from pathlib import Path

import numpy
import pandas
import numpy as np
import pandas as pd
from resdata.resfile import ResdataFile

from subscript import __version__
from subscript import __version__, getLogger

logger = getLogger(__name__)

DESCRIPTION = """
Slice a subset of restart-dates from an E100 Restart file (UNRST)
Expand All @@ -28,97 +32,110 @@


def find_resdata_app(toolname: str) -> str:
"""Locate path of apps in resdata.

These have varying suffixes due through the history of resdata Makefiles.
"""Locate path of resdata apps, trying common suffixes (.x, .c.x, .cpp.x).

Depending on resdata-version, it has the .x or the .c.x suffix
We prefer .x.
Args:
toolname: Base name of the tool (e.g., 'rd_unpack')

Returns:
String with path if found.
Full path to the executable.

Raises:
IOError: if tool can't be found
OSError: If tool cannot be found in PATH.
"""
extensions = [".x", ".c.x", ".cpp.x", ""] # Order matters.
candidates = [toolname + extension for extension in extensions]
for candidate in candidates:
for path in os.environ["PATH"].split(os.pathsep):
candidatepath = Path(path) / candidate
if candidatepath.exists():
return str(candidatepath)
raise OSError(toolname + " not found in path, PATH=" + str(os.environ["PATH"]))


def date_slicer(slicedates: list, restartdates: list, restartindices: list) -> dict:
"""Make a dict that maps a chosen restart date to a report index"""
slicedatemap = {}
for ext in [".x", ".c.x", ".cpp.x", ""]: # Order matters.
if path := shutil.which(toolname + ext):
return path
raise OSError(f"{toolname} not found in PATH")


def date_slicer(
slicedates: list[pd.Timestamp],
restartdates: list[datetime.datetime],
restartindices: list[int],
) -> list[int]:
"""Make a list of report indices that match the input slicedates."""
slicedatelist = []
for slicedate in slicedates:
daydistances = [
abs((pandas.Timestamp(slicedate) - x).days) for x in restartdates
]
slicedatemap[slicedate] = restartindices[daydistances.index(min(daydistances))]
return slicedatemap
daydistances = [abs((pd.Timestamp(slicedate) - x).days) for x in restartdates]
slicedatelist.append(restartindices[daydistances.index(min(daydistances))])
return slicedatelist


def rd_repacker(rstfilename: str, slicerstindices: list, quiet: bool) -> None:
"""
Wrapper for ecl_unpack.x and ecl_pack.x utilities. These
utilities are from resdata.
@contextmanager
def _working_directory(path: Path) -> Iterator[None]:
original_cwd = Path.cwd()
try:
os.chdir(path)
yield
finally:
os.chdir(original_cwd)

First unpacking a UNRST file, then deleting dates the dont't want, then
pack the remainding files into a new UNRST file

This function will change working directory to the
location of the UNRST file, dump temporary files in there, and
modify the original filename.
"""
out = " >/dev/null" if quiet else ""
# Error early if resdata tools are not available
try:
find_resdata_app("rd_unpack")
find_resdata_app("rd_pack")
except OSError:
sys.exit(
"ERROR: rd_unpack.x and/or rd_pack.x not found.\n"
"These tools are required and must be installed separately"
)

# Take special care if the UNRST file we get in is not in current directory
cwd = os.getcwd()
rstfilepath = Path(rstfilename).parent
tempdir = None
def rd_repacker(rstfilename: str, slicerstindices: list[int], quiet: bool) -> None:
Copy link

Copilot AI Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The parameter name "slicerstindices" appears to have a typo - it should likely be "slicedrstindices" or "sliced_rst_indices" for clarity. The "r" seems to be missing between "slice" and "st", making it read as "slicer-st-indices" rather than "sliced-rst-indices" (sliced restart indices).

Copilot uses AI. Check for mistakes.
"""Repack a UNRST file keeping only selected restart indices.

try:
os.chdir(Path(rstfilename).parent)
tempdir = tempfile.mkdtemp(dir=".")
os.rename(
os.path.basename(rstfilename),
os.path.join(tempdir, os.path.basename(rstfilename)),
)
os.chdir(tempdir)
os.system(
find_resdata_app("rd_unpack") + " " + os.path.basename(rstfilename) + out
)
unpackedfiles = glob.glob("*.X*")
for file in unpackedfiles:
if int(file.split(".X")[1]) not in slicerstindices:
os.remove(file)
os.system(find_resdata_app("rd_pack") + " *.X*" + out)
# We are inside the tmp directory, move file one step up:
os.rename(
os.path.join(os.getcwd(), os.path.basename(rstfilename)),
os.path.join(os.getcwd(), "../", os.path.basename(rstfilename)),
)
finally:
os.chdir(cwd)
if tempdir is not None:
shutil.rmtree(rstfilepath / tempdir)
Uses rd_unpack and rd_pack utilities from resdata to unpack the UNRST file,
remove unwanted dates, and repack into a new UNRST file.

Args:
rstfilename: Path to the UNRST file.
slicerstindices: List of restart indices to keep.
quiet: If True, suppress subprocess output.

Raises:
OSError: If rd_unpack or rd_pack tools are not found.
"""
rd_unpack = find_resdata_app("rd_unpack")
rd_pack = find_resdata_app("rd_pack")

rstpath = Path(rstfilename)
rstdir = rstpath.parent or Path(".")
rstname = rstpath.name

with _working_directory(rstdir):
tempdir = Path(tempfile.mkdtemp(dir="."))
try:
# Move UNRST into temp directory and work there
shutil.move(rstname, tempdir / rstname)

with _working_directory(tempdir):
subprocess.run(
[rd_unpack, rstname],
capture_output=quiet,
check=True,
)

for file in Path(".").glob("*.X*"):
index = int(file.suffix.lstrip(".X"))
if index not in slicerstindices:
file.unlink()

remaining_files = sorted(Path(".").glob("*.X*"))
subprocess.run(
[rd_pack, *[str(f) for f in remaining_files]],
capture_output=quiet,
check=True,
)
Comment on lines +103 to +119
Copy link

Copilot AI Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The subprocess.run calls use capture_output=quiet which will be either True or False. When True, both stdout and stderr are captured and discarded. When False, they are not captured. However, this means error messages from the subprocess will be hidden when quiet=True, making debugging difficult if the subprocess fails. Consider using stdout=subprocess.DEVNULL if quiet else None and always showing stderr for better error visibility, or at least including stderr in the exception message when check=True fails.

Copilot uses AI. Check for mistakes.

# Move result back up
shutil.move(rstname, f"../{rstname}")
Copy link

Copilot AI Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The path construction using string concatenation f"../{rstname}" is less robust and platform-agnostic than using Path operations. Consider using Path("..") / rstname instead for better portability and consistency with the rest of the codebase which uses Path objects.

Suggested change
shutil.move(rstname, f"../{rstname}")
shutil.move(rstname, Path("..") / rstname)

Copilot uses AI. Check for mistakes.
finally:
shutil.rmtree(tempdir)
Comment on lines +96 to +124
Copy link

Copilot AI Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If an error occurs in rd_repacker after moving the original file into the temp directory but before successfully moving it back, the original file could be lost when the temp directory is cleaned up in the finally block. This is especially problematic when keep=False. Consider creating a temporary backup inside rd_repacker or ensuring the original file is only moved after successful processing, using a different approach like working on a copy.

Copilot uses AI. Check for mistakes.


def get_restart_indices(rstfilename: str) -> list[int]:
"""Extract a list of restart indices for a filename.

Args:
rstfilename: Path to the UNRST file.

def get_restart_indices(rstfilename: str) -> list:
"""Extract a list of RST indices for a filename"""
Returns:
List of restart report indices.

Raises:
FileNotFoundError: If the file does not exist.
"""
if Path(rstfilename).exists():
# This function segfaults if file does not exist
return ResdataFile.file_report_list(str(rstfilename))
Expand All @@ -132,8 +149,14 @@ def restartthinner(
dryrun: bool = True,
keep: bool = False,
) -> None:
"""
Thin an existing UNRST file to selected number of restarts.
"""Thin an existing UNRST file to selected number of restarts.

Args:
filename: Path to the UNRST file.
numberofslices: Number of restart dates to keep.
quiet: If True, suppress informational output.
dryrun: If True, only show what would be done without modifying files.
keep: If True, keep original file with .orig suffix.
"""
rst = ResdataFile(filename)
restart_indices = get_restart_indices(filename)
Expand All @@ -142,41 +165,39 @@ def restartthinner(
]

if numberofslices > 1:
slicedates = pandas.DatetimeIndex(
numpy.linspace(
pandas.Timestamp(restart_dates[0]).value,
pandas.Timestamp(restart_dates[-1]).value,
slicedates = pd.DatetimeIndex(
np.linspace(
pd.Timestamp(restart_dates[0]).value,
pd.Timestamp(restart_dates[-1]).value,
int(numberofslices),
)
).to_list()
else:
slicedates = [restart_dates[-1]] # Only return last date if only one is wanted

slicerstindices = list(
date_slicer(slicedates, restart_dates, restart_indices).values()
)
slicerstindices.sort()
slicerstindices = list(set(slicerstindices)) # uniquify
slicerstindices = date_slicer(slicedates, restart_dates, restart_indices)
slicerstindices = sorted(set(slicerstindices)) # uniquify
Comment on lines 167 to +179
Copy link

Copilot AI Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When numberofslices equals the number of existing restart points, the date_slicer logic may still try to calculate evenly-spaced dates which could theoretically select duplicates. The sorted(set(...)) on line 179 handles this, but it means the user might get fewer restart points than requested. Consider adding a check or warning when numberofslices >= len(restart_indices) to inform the user that all restart points are being kept.

Copilot uses AI. Check for mistakes.

if not quiet:
print("Selected restarts:")
print("-----------------------")
logger.info("Selected restarts:")
logger.info("-----------------------")
for idx, rstidx in enumerate(restart_indices):
slicepresent = "X" if rstidx in slicerstindices else ""
print(
f"{rstidx:4d} "
f"{datetime.date.strftime(restart_dates[idx], '%Y-%m-%d')} "
f"{slicepresent}"
logger.info(
"%4d %s %s",
rstidx,
datetime.date.strftime(restart_dates[idx], "%Y-%m-%d"),
slicepresent,
)
print("-----------------------")
logger.info("-----------------------")

if not dryrun:
if keep:
backupname = filename + ".orig"
if not quiet:
print(f"Info: Backing up {filename} to {backupname}")
logger.info("Backing up %s to %s", filename, backupname)
shutil.copyfile(filename, backupname)
rd_repacker(filename, slicerstindices, quiet)
print(f"Written to {filename}")
logger.info("Written to %s", filename)


def get_parser() -> argparse.ArgumentParser:
Expand All @@ -186,7 +207,11 @@ def get_parser() -> argparse.ArgumentParser:
)
parser.add_argument("UNRST", help="Name of UNRST file")
parser.add_argument(
"-n", "--restarts", type=int, help="Number of restart dates wanted", default=0
"-n",
"--restarts",
type=int,
help="Number of restart dates wanted",
required=True,
)
parser.add_argument(
"-d",
Expand Down Expand Up @@ -218,13 +243,19 @@ def get_parser() -> argparse.ArgumentParser:


def main() -> None:
"""Endpoint for command line script"""
"""Endpoint for command line script."""
parser = get_parser()
args = parser.parse_args()

if args.restarts <= 0:
print("ERROR: Number of restarts must be a positive number")
sys.exit(1)
if args.UNRST.endswith("DATA"):
print("ERROR: Provide the UNRST file, not the DATA file")
sys.exit(1)
parser.error("Number of restarts must be a positive number")
if args.UNRST.endswith(".DATA"):
parser.error("Provide the UNRST file, not the DATA file")
if args.quiet:
logger.setLevel(logging.WARNING)

restartthinner(args.UNRST, args.restarts, args.quiet, args.dryrun, args.keep)


if __name__ == "__main__":
main()
Loading
Loading