Source code for cuperiod.core.columns

"""Frictionless column mapping for heterogeneous light-curve tables.

Astronomers' light curves carry the same three physical quantities — time,
brightness, brightness error — under wildly different column names (``HJD`` vs
``BJD_TDB`` vs ``time``; ``Vmag`` vs ``flux`` vs ``m``; ``e_mag`` vs ``dmag`` vs
``flux_err``). :class:`ColumnMap` removes that friction: the common names are
auto-detected, and any column can be pinned explicitly. Detection is
case-insensitive and ordered, so the most specific/standard name wins.

The brightness *domain* (magnitude vs flux) is inferred from the matched value
column when possible (``flux`` → flux, ``mag`` → magnitude) and can always be set
explicitly. Methods that are naturally flux-based (BLS, TLS) convert internally.

The built-in detection lists cover the standard light-curve products of the major
time-domain surveys — ASAS-SN, ASAS-3, ATLAS, CRTS/CSS, ZTF, Pan-STARRS, TESS, Kepler,
Gaia, LSST, and MACHO — so a downloaded table usually needs no column hints. Error
resolution is *domain-aware*: a flux value is paired with a flux error and a magnitude
value with a magnitude error, which disambiguates surveys (e.g. ATLAS) that expose both
``m``/``dm`` and ``uJy``/``duJy`` in one table. Headerless products (OGLE ``.dat``) have
no column names to detect — pass the data as a ``(time, value, error)`` tuple or with an
explicit :class:`ColumnMap`.
"""

from __future__ import annotations

from dataclasses import dataclass
from enum import StrEnum
from typing import Final

from cuperiod.core.errors import ColumnResolutionError


[docs] class Domain(StrEnum): """Brightness domain of a light curve's value column.""" MAGNITUDE = "magnitude" FLUX = "flux"
#: Time-column name candidates, most-specific first. Barycentric/heliocentric #: corrected times precede plain JD so a table carrying both picks the better one. #: Covers JD/HJD/BJD (ASAS, ASAS-SN, OGLE, Gaia), MJD (ATLAS, CRTS, ZTF, Pan-STARRS), #: BTJD (TESS), BKJD (Kepler), ``midpointMjdTai`` (LSST), and ``obsTime`` (Pan-STARRS). TIME_NAMES: Final[tuple[str, ...]] = ( "bjd_tdb", "bjd", "hjd", "btjd", "bkjd", "mjd", "hmjd", "midpointmjdtai", "midpointtai", "obstime", "jd", "time", "date", "t", ) #: Magnitude value-column candidates. Includes ASAS-3 ``MAG_0`` (first aperture), #: ``psfMag`` (Pan-STARRS/LSST), and ``Rmag``/``Bmag`` (MACHO red/blue). MAG_NAMES: Final[tuple[str, ...]] = ( "mag", "magnitude", "vmag", "gmag", "rmag", "bmag", "imag", "psfmag", "phot_mag", "mag_0", "m", ) #: Flux value-column candidates. Includes ``PDCSAP_FLUX``/``SAP_FLUX``/``KSPSAP_FLUX`` #: (TESS/Kepler), ``psfFlux`` (Pan-STARRS/LSST), and ``uJy``/``mJy`` (ATLAS, ASAS-SN). #: ``uJy`` precedes the bare ``f`` so ATLAS's flux column wins over its ``F`` filter. FLUX_NAMES: Final[tuple[str, ...]] = ( "pdcsap_flux", "kspsap_flux", "sap_flux", "psfflux", "psflux", "psfdiffflux", "norm_flux", "rel_flux", "flux", "ujy", "mjy", "fnu", "f", ) #: Magnitude-error candidates (used when the value column is a magnitude): ``magerr`` #: (ZTF, CRTS, ASAS-SN), ``dm`` (ATLAS), ``MER_0`` (ASAS-3), ``e_mag``, ``dmag``, ... MAG_ERR_NAMES: Final[tuple[str, ...]] = ( "mag_err", "magnitude_err", "mag_error", "magerr", "e_mag", "merr", "dmag", "dm", "mer_0", ) #: Flux-error candidates (used when the value column is a flux): ``flux_error`` (Gaia), #: ``*SAP_FLUX_ERR`` (TESS/Kepler), ``psfFluxErr`` (Pan-STARRS/LSST), ``duJy`` (ATLAS). FLUX_ERR_NAMES: Final[tuple[str, ...]] = ( "flux_err", "flux_error", "sap_flux_err", "pdcsap_flux_err", "kspsap_flux_err", "psfflux_err", "psffluxerr", "psfluxerr", "psfdiffflux_err", "psfdifffluxerr", "e_flux", "dujy", ) #: Domain-neutral error spellings, tried after the domain-specific lists. GENERIC_ERR_NAMES: Final[tuple[str, ...]] = ( "err", "error", "sigma", ) #: All error spellings. Resolution is domain-aware (see :meth:`ColumnMap.resolve`): a #: flux value pairs with a flux error and a magnitude value with a magnitude error. ERR_NAMES: Final[tuple[str, ...]] = MAG_ERR_NAMES + FLUX_ERR_NAMES + GENERIC_ERR_NAMES #: Band/filter-column candidates (used only by multi-band ingestion): ``filtercode`` #: (ZTF), ``filterID`` (Pan-STARRS), ``band`` (Gaia/LSST), ``fid`` (ZTF alerts). BAND_NAMES: Final[tuple[str, ...]] = ( "band", "filter", "filtercode", "filterid", "filtername", "phot_filter", "passband", "fid", ) @dataclass(frozen=True) class ResolvedColumns: """Concrete column names + domain, the result of resolving a :class:`ColumnMap`.""" time: str value: str error: str | None band: str | None domain: Domain def _match(role: str, explicit: str | None, candidates: tuple[str, ...], available: list[str], lower_to_actual: dict[str, str]) -> str | None: """Resolve one role to an actual column name, or ``None`` if optional & absent. An explicit override is honored verbatim (raising if absent); otherwise the first case-insensitive candidate present in ``available`` wins. """ if explicit is not None: if explicit in available: return explicit if explicit.lower() in lower_to_actual: return lower_to_actual[explicit.lower()] raise ColumnResolutionError( f"{role} column {explicit!r} not found; available columns: {available}" ) for cand in candidates: if cand in lower_to_actual: return lower_to_actual[cand] return None def infer_domain(value_column: str) -> Domain | None: """Infer the brightness domain from a value column's name, or ``None``. Parameters ---------- value_column : str The resolved value-column name. Returns ------- Domain or None :attr:`Domain.FLUX` if the name looks flux-like, :attr:`Domain.MAGNITUDE` if magnitude-like, otherwise ``None`` (caller should default or require an explicit domain). """ low = value_column.lower() flux_tokens = ("flux", "fnu", "ujy", "mjy", "njy", "nmgy", "jansky") if any(tok in low for tok in flux_tokens): return Domain.FLUX if "mag" in low or low in {"m", "vmag", "gmag", "rmag", "bmag", "imag"}: return Domain.MAGNITUDE return None
[docs] @dataclass(frozen=True) class ColumnMap: """Selects the time/value/error/band columns of a light-curve table. Any field left ``None`` is auto-detected from the table's column names using the package detection lists (:data:`TIME_NAMES`, :data:`MAG_NAMES`, :data:`FLUX_NAMES`, :data:`ERR_NAMES`, :data:`BAND_NAMES`); a non-``None`` field pins that column explicitly. Matching is case-insensitive. Parameters ---------- time, value, error, band : str, optional Explicit column names. ``time`` and ``value`` are required (by detection or override); ``error`` and ``band`` are optional. Examples -------- >>> ColumnMap(time="HJD", value="Vmag", error="e_Vmag") ColumnMap(time='HJD', value='Vmag', error='e_Vmag', band=None) """ time: str | None = None value: str | None = None error: str | None = None band: str | None = None
[docs] def resolve( self, columns: list[str], *, domain: Domain | None = None ) -> ResolvedColumns: """Resolve against a table's actual column names. Parameters ---------- columns : list of str The table's column names. domain : Domain, optional Explicit domain override. If ``None``, the domain is inferred from the resolved value-column name, defaulting to :attr:`Domain.MAGNITUDE`. Returns ------- ResolvedColumns Raises ------ ColumnResolutionError If the time or value column cannot be resolved. """ available = list(columns) lower_to_actual: dict[str, str] = {} for col in available: # first spelling wins on case collisions lower_to_actual.setdefault(col.lower(), col) value_candidates = FLUX_NAMES + MAG_NAMES if self.value is None else () time = _match("time", self.time, TIME_NAMES, available, lower_to_actual) value = _match( "value", self.value, value_candidates, available, lower_to_actual ) band = _match("band", self.band, BAND_NAMES, available, lower_to_actual) if time is None: raise ColumnResolutionError( "could not resolve a time column; pass " "ColumnMap(time=...) explicitly. Available columns: " f"{available}" ) if value is None: raise ColumnResolutionError( "could not resolve a value (magnitude/flux) column; pass " "ColumnMap(value=...) explicitly. Available columns: " f"{available}" ) resolved_domain = domain or infer_domain(value) or Domain.MAGNITUDE # Resolve the error for the *same* measurement as the value: first try names # derived from the value column (so ``PDCSAP_FLUX`` pairs with # ``PDCSAP_FLUX_ERR`` and ``uJy`` with ``duJy``), then the spellings for the # value's domain, then domain-neutral spellings. This keeps a flux value with a # flux error and a magnitude value with a magnitude error. v = value.lower() derived = (f"{v}_err", f"{v}err", f"{v}_error", f"{v}error", f"e_{v}", f"d{v}") domain_err = FLUX_ERR_NAMES if resolved_domain is Domain.FLUX else MAG_ERR_NAMES err_candidates = derived + domain_err + GENERIC_ERR_NAMES error = _match("error", self.error, err_candidates, available, lower_to_actual) return ResolvedColumns( time=time, value=value, error=error, band=band, domain=resolved_domain )
__all__ = [ "BAND_NAMES", "ERR_NAMES", "FLUX_ERR_NAMES", "FLUX_NAMES", "GENERIC_ERR_NAMES", "MAG_ERR_NAMES", "MAG_NAMES", "TIME_NAMES", "ColumnMap", "Domain", "ResolvedColumns", "infer_domain", ]