Source code for cases.moose_grid.etl.data_sources.csv_source

"""CSVProbeSource: reads MOOSE CSV line-probe output files.

CSV files produced by MOOSE VectorPostprocessors follow the naming pattern:
    {sim_prefix}_out_{probe_name}_{timestep:04d}.csv

All CSVs belonging to the same simulation run share the same {sim_prefix}.
Each file holds a column-per-field table (TKE, TKED, id, pressure,
vel_x, vel_y, x, y, z, ...) with one row per sample point along the probe.

This helper is called by ExodusDataSource.read_file() — it is not a
DataSource subclass because it does not manage its own file list.
"""

import logging
import re
from pathlib import Path

import numpy as np

logger = logging.getLogger(__name__)

# Filename pattern: {prefix}_out_{probe_name}_{timestep:04d}.csv
_PROBE_PATTERN = re.compile(r"^(?P<prefix>.+)_out_(?P<probe>.+?)_(?P<ts>\d+)\.csv$")



[docs]
def find_probe_files(sim_prefix: str, data_dir: Path) -> dict[str, list[Path]]:
    """Find all CSV probe files that belong to a simulation run.

    Args:
        sim_prefix: Stem of the Exodus file (e.g. 'lid-driven-segregated_out'
                    stripped of the trailing '_out' is *not* needed — just
                    pass the full exodus stem without extension).
        data_dir:   Directory to search for CSV files.

    Returns:
        Mapping from probe name to sorted list of CSV file paths
        (one entry per time step).
    """
    probes: dict[str, list[Path]] = {}
    for csv_path in sorted(data_dir.glob("*.csv")):
        m = _PROBE_PATTERN.match(csv_path.name)
        if m is None:
            continue
        # Match files whose prefix is a prefix of sim_prefix or vice-versa.
        # MOOSE names: exodus stem = "case_out", CSV prefix = "case_out"
        # Accept any CSV whose extracted prefix starts with the sim name root.
        file_prefix = m.group("prefix")
        # Simple heuristic: accept if either is a substring of the other.
        if sim_prefix not in file_prefix and file_prefix not in sim_prefix:
            # Try stripping trailing '_out' from either side
            sp_root = sim_prefix.replace("_out", "")
            fp_root = file_prefix.replace("_out", "")
            if sp_root not in fp_root and fp_root not in sp_root:
                continue
        probe_name = m.group("probe")
        probes.setdefault(probe_name, []).append(csv_path)

    # Sort each probe's file list by time step index
    for probe_name in probes:
        probes[probe_name].sort(key=lambda p: int(_PROBE_PATTERN.match(p.name).group("ts")))

    return probes




[docs]
class CSVProbeSource:
    """Reads and aggregates MOOSE CSV line-probe files for one simulation run."""

    def __init__(self, data_dir: str | Path):
        self.data_dir = Path(data_dir)


[docs]
    def read_all(self, sim_prefix: str) -> tuple[dict[str, np.ndarray], list[str]]:
        """Read all probe CSVs for a simulation run.

        Returns:
            A pair ``(probe_data, probe_columns)``:

            - ``probe_data``: dict mapping ``probe_name`` to a numpy array
              of shape ``[Np, C]`` where ``Np`` is the number of sample
              points and ``C`` the number of columns. When multiple time
              steps are found, data from the last time step is used
              (steady-state typical).
            - ``probe_columns``: ordered list of column names shared
              across probes.
        """
        probe_files = find_probe_files(sim_prefix, self.data_dir)

        if not probe_files:
            logger.warning(
                "No CSV probe files found for sim_prefix='%s' in %s",
                sim_prefix,
                self.data_dir,
            )
            return {}, []

        probe_data: dict[str, np.ndarray] = {}
        probe_columns: list[str] = []

        for probe_name, file_list in probe_files.items():
            # Use the last time step file (typically steady state)
            csv_path = file_list[-1]
            try:
                arr, columns = read_csv(csv_path)
                probe_data[probe_name] = arr
                if not probe_columns:
                    probe_columns = columns
            except Exception as exc:
                logger.error("Failed to read probe '%s' from %s: %s", probe_name, csv_path, exc)

        return probe_data, probe_columns





[docs]
def read_csv(path: Path) -> tuple[np.ndarray, list[str]]:
    """Read a MOOSE output CSV file into a numpy array.

    Returns:
        arr     : [Np, C] float32 array
        columns : list of column name strings
    """
    with open(path) as fh:
        header = fh.readline().strip()
    columns = [c.strip() for c in header.split(",")]

    # Skip header row; load remaining rows as float
    arr = np.loadtxt(path, delimiter=",", skiprows=1, dtype=np.float32)
    if arr.ndim == 1:
        arr = arr[np.newaxis, :]  # single-row file

    return arr, columns