Source code for cases.alpha_d.transforms
"""Case-specific target transforms for the alpha-D surrogate.
The generic ``TabularPairDataset`` accepts a ``target_transform`` callable
that rewrites encoded targets before the dataset materialises tensors.
This module provides the alpha-D closed-form residual transform, which
optionally applies local-velocity normalisation first and then subtracts
the closed-form baseline:
encoded_truth_lv = LV_norm(encoded_truth) if requested
encoded_residual = encoded_truth_lv − encoded_baseline_lv
where ``encoded_baseline`` is the per-station alpha-D baseline encoded with
the same target convention as the truth (see ``cases.alpha_d.physics``).
A transform returns ``(transformed_y, extras)`` where ``extras`` is a dict
of well-known extras the dataset stashes on ``self``. Recognised keys:
* ``baseline_encoded`` — ``ndarray`` of the encoded baseline, stashed at
``dataset._baseline_encoded`` so metrics / plotting / the Δp integral
can re-add it at decode boundaries.
* ``local_velocity_normalization`` — ``bool`` indicating whether LV-norm
was actually applied (the dataset propagates this onto
``dataset.local_velocity_normalization``).
"""
from __future__ import annotations
from typing import Any
import numpy as np
from cases.alpha_d.physics.baseline import BaselineGeometry, alpha_d_baseline_profile
from cases.alpha_d.physics.targets import (
alpha_d_bulk_to_values,
convert_alpha_d_values_between_bases,
is_alpha_d_target,
)
# Physical defaults for alpha-D zarrs that pre-date the ETL's case-metadata
# additions. Reads from ``meta.attrs`` fall back to these via ``.get``.
_ALPHA_D_GEOMETRY_DEFAULTS: dict[str, float] = {
"Re": 0.0,
"Dr": 0.0,
"Lr": 0.0,
"D_big": 0.2,
"outer_height_m": 1.0,
"buffer_diams": 1.0,
"rho": 1.0,
"V_bulk": 1.0,
}
def _geom_get(cm: dict, key: str) -> float:
return float(cm.get(key, _ALPHA_D_GEOMETRY_DEFAULTS[key]))
[docs]
def alpha_d_residual_transform(
full_y: np.ndarray,
full_x: np.ndarray,
*,
target_names: list[str],
feature_names: list[str],
case_meta_list: list[dict],
rows_per_case: list[int],
local_velocity_normalization: bool = False,
) -> tuple[np.ndarray, dict[str, Any]]:
"""Optionally LV-normalise and subtract the closed-form alpha-D baseline.
No-op (returns ``(full_y, {})``) when the dataset cannot satisfy the
prerequisites: ``z_hat`` / ``d_local_over_D`` features missing, or no
alpha-D-shaped column in ``target_names``.
"""
try:
z_hat_col = feature_names.index("z_hat")
d_over_D_col = feature_names.index("d_local_over_D")
except ValueError:
return full_y, {}
if not any(is_alpha_d_target(c) for c in target_names):
return full_y, {}
d_over_D = full_x[:, d_over_D_col].astype(np.float64)
z_hat_all = full_x[:, z_hat_col].astype(np.float64)
# Step 1 (optional): LV-normalise the alpha-D-shaped truth columns in
# place. We rewrite ``full_y`` so the residual subtraction below
# operates in LV-normalised space.
applied_lv_norm = False
if local_velocity_normalization:
full_y = full_y.copy()
for j, tgt_name in enumerate(target_names):
if is_alpha_d_target(tgt_name):
full_y[:, j] = convert_alpha_d_values_between_bases(
full_y[:, j].astype(np.float64),
target_name=tgt_name,
d_over_D=d_over_D,
from_local_velocity_normalization=False,
to_local_velocity_normalization=True,
).astype(np.float32)
applied_lv_norm = True
# Step 2: build the closed-form baseline in the same encoded space and
# subtract.
baseline_encoded = np.zeros_like(full_y, dtype=np.float64)
row_offset = 0
for case_idx, n_rows in enumerate(rows_per_case):
cm = case_meta_list[case_idx]
geom = BaselineGeometry(
Re=_geom_get(cm, "Re"),
Dr=_geom_get(cm, "Dr"),
Lr=_geom_get(cm, "Lr"),
D_big=_geom_get(cm, "D_big"),
outer_height_m=_geom_get(cm, "outer_height_m"),
buffer_diams=_geom_get(cm, "buffer_diams"),
rho=_geom_get(cm, "rho"),
V_bulk=_geom_get(cm, "V_bulk"),
n_stations=int(n_rows),
)
end = row_offset + n_rows
baseline_bulk = alpha_d_baseline_profile(z_hat_all[row_offset:end], geom)
d_local = d_over_D[row_offset:end]
for j, tgt_name in enumerate(target_names):
if is_alpha_d_target(tgt_name):
baseline_encoded[row_offset:end, j] = alpha_d_bulk_to_values(
baseline_bulk,
target_name=tgt_name,
d_over_D=d_local,
local_velocity_normalization=applied_lv_norm,
)
row_offset = end
transformed_y = (full_y.astype(np.float64) - baseline_encoded).astype(np.float32)
extras: dict[str, Any] = {
"baseline_encoded": baseline_encoded.astype(np.float32),
"local_velocity_normalization": applied_lv_norm,
}
return transformed_y, extras