Source code for cuperiod.core.columns

"""Frictionless column mapping for heterogeneous light-curve tables.

Astronomers' light curves carry the same three physical quantities — time,
brightness, brightness error — under wildly different column names (``HJD`` vs
``BJD_TDB`` vs ``time``; ``Vmag`` vs ``flux`` vs ``m``; ``e_mag`` vs ``dmag`` vs
``flux_err``). :class:`ColumnMap` removes that friction: the common names are
auto-detected, and any column can be pinned explicitly. Detection is
case-insensitive and ordered, so the most specific/standard name wins.

The brightness *domain* (magnitude vs flux) is inferred from the matched value
column when possible (``flux`` → flux, ``mag`` → magnitude) and can always be set
explicitly. Methods that are naturally flux-based (BLS, TLS) convert internally.

The built-in detection lists cover the standard light-curve products of the major
time-domain surveys — ASAS-SN, ASAS-3, ATLAS, CRTS/CSS, ZTF, Pan-STARRS, TESS, Kepler,
Gaia, LSST, and MACHO — so a downloaded table usually needs no column hints. Error
resolution is *domain-aware*: a flux value is paired with a flux error and a magnitude
value with a magnitude error, which disambiguates surveys (e.g. ATLAS) that expose both
``m``/``dm`` and ``uJy``/``duJy`` in one table. Headerless products (OGLE ``.dat``) have
no column names to detect — pass the data as a ``(time, value, error)`` tuple or with an
explicit :class:`ColumnMap`.
"""

from __future__ import annotations

from dataclasses import dataclass
from enum import StrEnum
from typing import Final

from cuperiod.core.errors import ColumnResolutionError



[docs]
class Domain(StrEnum):
    """Brightness domain of a light curve's value column."""

    MAGNITUDE = "magnitude"
    FLUX = "flux"



#: Time-column name candidates, most-specific first. Barycentric/heliocentric
#: corrected times precede plain JD so a table carrying both picks the better one.
#: Covers JD/HJD/BJD (ASAS, ASAS-SN, OGLE, Gaia), MJD (ATLAS, CRTS, ZTF, Pan-STARRS),
#: BTJD (TESS), BKJD (Kepler), ``midpointMjdTai`` (LSST), and ``obsTime`` (Pan-STARRS).
TIME_NAMES: Final[tuple[str, ...]] = (
    "bjd_tdb",
    "bjd",
    "hjd",
    "btjd",
    "bkjd",
    "mjd",
    "hmjd",
    "midpointmjdtai",
    "midpointtai",
    "obstime",
    "jd",
    "time",
    "date",
    "t",
)

#: Magnitude value-column candidates. Includes ASAS-3 ``MAG_0`` (first aperture),
#: ``psfMag`` (Pan-STARRS/LSST), and ``Rmag``/``Bmag`` (MACHO red/blue).
MAG_NAMES: Final[tuple[str, ...]] = (
    "mag",
    "magnitude",
    "vmag",
    "gmag",
    "rmag",
    "bmag",
    "imag",
    "psfmag",
    "phot_mag",
    "mag_0",
    "m",
)

#: Flux value-column candidates. Includes ``PDCSAP_FLUX``/``SAP_FLUX``/``KSPSAP_FLUX``
#: (TESS/Kepler), ``psfFlux`` (Pan-STARRS/LSST), and ``uJy``/``mJy`` (ATLAS, ASAS-SN).
#: ``uJy`` precedes the bare ``f`` so ATLAS's flux column wins over its ``F`` filter.
FLUX_NAMES: Final[tuple[str, ...]] = (
    "pdcsap_flux",
    "kspsap_flux",
    "sap_flux",
    "psfflux",
    "psflux",
    "psfdiffflux",
    "norm_flux",
    "rel_flux",
    "flux",
    "ujy",
    "mjy",
    "fnu",
    "f",
)

#: Magnitude-error candidates (used when the value column is a magnitude): ``magerr``
#: (ZTF, CRTS, ASAS-SN), ``dm`` (ATLAS), ``MER_0`` (ASAS-3), ``e_mag``, ``dmag``, ...
MAG_ERR_NAMES: Final[tuple[str, ...]] = (
    "mag_err",
    "magnitude_err",
    "mag_error",
    "magerr",
    "e_mag",
    "merr",
    "dmag",
    "dm",
    "mer_0",
)

#: Flux-error candidates (used when the value column is a flux): ``flux_error`` (Gaia),
#: ``*SAP_FLUX_ERR`` (TESS/Kepler), ``psfFluxErr`` (Pan-STARRS/LSST), ``duJy`` (ATLAS).
FLUX_ERR_NAMES: Final[tuple[str, ...]] = (
    "flux_err",
    "flux_error",
    "sap_flux_err",
    "pdcsap_flux_err",
    "kspsap_flux_err",
    "psfflux_err",
    "psffluxerr",
    "psfluxerr",
    "psfdiffflux_err",
    "psfdifffluxerr",
    "e_flux",
    "dujy",
)

#: Domain-neutral error spellings, tried after the domain-specific lists.
GENERIC_ERR_NAMES: Final[tuple[str, ...]] = (
    "err",
    "error",
    "sigma",
)

#: All error spellings. Resolution is domain-aware (see :meth:`ColumnMap.resolve`): a
#: flux value pairs with a flux error and a magnitude value with a magnitude error.
ERR_NAMES: Final[tuple[str, ...]] = MAG_ERR_NAMES + FLUX_ERR_NAMES + GENERIC_ERR_NAMES

#: Band/filter-column candidates (used only by multi-band ingestion): ``filtercode``
#: (ZTF), ``filterID`` (Pan-STARRS), ``band`` (Gaia/LSST), ``fid`` (ZTF alerts).
BAND_NAMES: Final[tuple[str, ...]] = (
    "band",
    "filter",
    "filtercode",
    "filterid",
    "filtername",
    "phot_filter",
    "passband",
    "fid",
)


@dataclass(frozen=True)
class ResolvedColumns:
    """Concrete column names + domain, the result of resolving a :class:`ColumnMap`."""

    time: str
    value: str
    error: str | None
    band: str | None
    domain: Domain


def _match(role: str, explicit: str | None, candidates: tuple[str, ...],
           available: list[str], lower_to_actual: dict[str, str]) -> str | None:
    """Resolve one role to an actual column name, or ``None`` if optional & absent.

    An explicit override is honored verbatim (raising if absent); otherwise the
    first case-insensitive candidate present in ``available`` wins.
    """
    if explicit is not None:
        if explicit in available:
            return explicit
        if explicit.lower() in lower_to_actual:
            return lower_to_actual[explicit.lower()]
        raise ColumnResolutionError(
            f"{role} column {explicit!r} not found; available columns: {available}"
        )
    for cand in candidates:
        if cand in lower_to_actual:
            return lower_to_actual[cand]
    return None


def infer_domain(value_column: str) -> Domain | None:
    """Infer the brightness domain from a value column's name, or ``None``.

    Parameters
    ----------
    value_column : str
        The resolved value-column name.

    Returns
    -------
    Domain or None
        :attr:`Domain.FLUX` if the name looks flux-like, :attr:`Domain.MAGNITUDE`
        if magnitude-like, otherwise ``None`` (caller should default or require an
        explicit domain).
    """
    low = value_column.lower()
    flux_tokens = ("flux", "fnu", "ujy", "mjy", "njy", "nmgy", "jansky")
    if any(tok in low for tok in flux_tokens):
        return Domain.FLUX
    if "mag" in low or low in {"m", "vmag", "gmag", "rmag", "bmag", "imag"}:
        return Domain.MAGNITUDE
    return None



[docs]
@dataclass(frozen=True)
class ColumnMap:
    """Selects the time/value/error/band columns of a light-curve table.

    Any field left ``None`` is auto-detected from the table's column names using
    the package detection lists (:data:`TIME_NAMES`, :data:`MAG_NAMES`,
    :data:`FLUX_NAMES`, :data:`ERR_NAMES`, :data:`BAND_NAMES`); a non-``None`` field
    pins that column explicitly. Matching is case-insensitive.

    Parameters
    ----------
    time, value, error, band : str, optional
        Explicit column names. ``time`` and ``value`` are required (by detection or
        override); ``error`` and ``band`` are optional.

    Examples
    --------
    >>> ColumnMap(time="HJD", value="Vmag", error="e_Vmag")
    ColumnMap(time='HJD', value='Vmag', error='e_Vmag', band=None)
    """

    time: str | None = None
    value: str | None = None
    error: str | None = None
    band: str | None = None


[docs]
    def resolve(
        self, columns: list[str], *, domain: Domain | None = None
    ) -> ResolvedColumns:
        """Resolve against a table's actual column names.

        Parameters
        ----------
        columns : list of str
            The table's column names.
        domain : Domain, optional
            Explicit domain override. If ``None``, the domain is inferred from the
            resolved value-column name, defaulting to :attr:`Domain.MAGNITUDE`.

        Returns
        -------
        ResolvedColumns

        Raises
        ------
        ColumnResolutionError
            If the time or value column cannot be resolved.
        """
        available = list(columns)
        lower_to_actual: dict[str, str] = {}
        for col in available:  # first spelling wins on case collisions
            lower_to_actual.setdefault(col.lower(), col)

        value_candidates = FLUX_NAMES + MAG_NAMES if self.value is None else ()
        time = _match("time", self.time, TIME_NAMES, available, lower_to_actual)
        value = _match(
            "value", self.value, value_candidates, available, lower_to_actual
        )
        band = _match("band", self.band, BAND_NAMES, available, lower_to_actual)

        if time is None:
            raise ColumnResolutionError(
                "could not resolve a time column; pass "
                "ColumnMap(time=...) explicitly. Available columns: "
                f"{available}"
            )
        if value is None:
            raise ColumnResolutionError(
                "could not resolve a value (magnitude/flux) column; pass "
                "ColumnMap(value=...) explicitly. Available columns: "
                f"{available}"
            )
        resolved_domain = domain or infer_domain(value) or Domain.MAGNITUDE
        # Resolve the error for the *same* measurement as the value: first try names
        # derived from the value column (so ``PDCSAP_FLUX`` pairs with
        # ``PDCSAP_FLUX_ERR`` and ``uJy`` with ``duJy``), then the spellings for the
        # value's domain, then domain-neutral spellings. This keeps a flux value with a
        # flux error and a magnitude value with a magnitude error.
        v = value.lower()
        derived = (f"{v}_err", f"{v}err", f"{v}_error", f"{v}error", f"e_{v}", f"d{v}")
        domain_err = FLUX_ERR_NAMES if resolved_domain is Domain.FLUX else MAG_ERR_NAMES
        err_candidates = derived + domain_err + GENERIC_ERR_NAMES
        error = _match("error", self.error, err_candidates, available, lower_to_actual)
        return ResolvedColumns(
            time=time, value=value, error=error, band=band, domain=resolved_domain
        )




__all__ = [
    "BAND_NAMES",
    "ERR_NAMES",
    "FLUX_ERR_NAMES",
    "FLUX_NAMES",
    "GENERIC_ERR_NAMES",
    "MAG_ERR_NAMES",
    "MAG_NAMES",
    "TIME_NAMES",
    "ColumnMap",
    "Domain",
    "ResolvedColumns",
    "infer_domain",
]