Source code for cases.moose_grid.etl.data_sources.csv_source
"""CSVProbeSource: reads MOOSE CSV line-probe output files.
CSV files produced by MOOSE VectorPostprocessors follow the naming pattern:
{sim_prefix}_out_{probe_name}_{timestep:04d}.csv
All CSVs belonging to the same simulation run share the same {sim_prefix}.
Each file holds a column-per-field table (TKE, TKED, id, pressure,
vel_x, vel_y, x, y, z, ...) with one row per sample point along the probe.
This helper is called by ExodusDataSource.read_file() — it is not a
DataSource subclass because it does not manage its own file list.
"""
import logging
import re
from pathlib import Path
import numpy as np
logger = logging.getLogger(__name__)
# Filename pattern: {prefix}_out_{probe_name}_{timestep:04d}.csv
_PROBE_PATTERN = re.compile(r"^(?P<prefix>.+)_out_(?P<probe>.+?)_(?P<ts>\d+)\.csv$")
[docs]
def find_probe_files(sim_prefix: str, data_dir: Path) -> dict[str, list[Path]]:
"""Find all CSV probe files that belong to a simulation run.
Args:
sim_prefix: Stem of the Exodus file (e.g. 'lid-driven-segregated_out'
stripped of the trailing '_out' is *not* needed — just
pass the full exodus stem without extension).
data_dir: Directory to search for CSV files.
Returns:
Mapping from probe name to sorted list of CSV file paths
(one entry per time step).
"""
probes: dict[str, list[Path]] = {}
for csv_path in sorted(data_dir.glob("*.csv")):
m = _PROBE_PATTERN.match(csv_path.name)
if m is None:
continue
# Match files whose prefix is a prefix of sim_prefix or vice-versa.
# MOOSE names: exodus stem = "case_out", CSV prefix = "case_out"
# Accept any CSV whose extracted prefix starts with the sim name root.
file_prefix = m.group("prefix")
# Simple heuristic: accept if either is a substring of the other.
if sim_prefix not in file_prefix and file_prefix not in sim_prefix:
# Try stripping trailing '_out' from either side
sp_root = sim_prefix.replace("_out", "")
fp_root = file_prefix.replace("_out", "")
if sp_root not in fp_root and fp_root not in sp_root:
continue
probe_name = m.group("probe")
probes.setdefault(probe_name, []).append(csv_path)
# Sort each probe's file list by time step index
for probe_name in probes:
probes[probe_name].sort(key=lambda p: int(_PROBE_PATTERN.match(p.name).group("ts")))
return probes
[docs]
class CSVProbeSource:
"""Reads and aggregates MOOSE CSV line-probe files for one simulation run."""
def __init__(self, data_dir: str | Path):
self.data_dir = Path(data_dir)
[docs]
def read_all(self, sim_prefix: str) -> tuple[dict[str, np.ndarray], list[str]]:
"""Read all probe CSVs for a simulation run.
Returns:
A pair ``(probe_data, probe_columns)``:
- ``probe_data``: dict mapping ``probe_name`` to a numpy array
of shape ``[Np, C]`` where ``Np`` is the number of sample
points and ``C`` the number of columns. When multiple time
steps are found, data from the last time step is used
(steady-state typical).
- ``probe_columns``: ordered list of column names shared
across probes.
"""
probe_files = find_probe_files(sim_prefix, self.data_dir)
if not probe_files:
logger.warning(
"No CSV probe files found for sim_prefix='%s' in %s",
sim_prefix,
self.data_dir,
)
return {}, []
probe_data: dict[str, np.ndarray] = {}
probe_columns: list[str] = []
for probe_name, file_list in probe_files.items():
# Use the last time step file (typically steady state)
csv_path = file_list[-1]
try:
arr, columns = read_csv(csv_path)
probe_data[probe_name] = arr
if not probe_columns:
probe_columns = columns
except Exception as exc:
logger.error("Failed to read probe '%s' from %s: %s", probe_name, csv_path, exc)
return probe_data, probe_columns
[docs]
def read_csv(path: Path) -> tuple[np.ndarray, list[str]]:
"""Read a MOOSE output CSV file into a numpy array.
Returns:
arr : [Np, C] float32 array
columns : list of column name strings
"""
with open(path) as fh:
header = fh.readline().strip()
columns = [c.strip() for c in header.split(",")]
# Skip header row; load remaining rows as float
arr = np.loadtxt(path, delimiter=",", skiprows=1, dtype=np.float32)
if arr.ndim == 1:
arr = arr[np.newaxis, :] # single-row file
return arr, columns