tanat.criterion package#

Subpackages#

Submodules#

tanat.criterion.base module#

Criterion base: abstract class, level enum, and core exceptions.

class tanat.criterion.base.Criterion(settings: Any = None)[source]#

Bases: SettingsMixin, Registrable, ABC

Abstract base for all filtering criteria.

Subclasses must declare LEVELS and implement the three _*_impl hooks. All public methods enforce compatibility and delegate to those hooks.

LEVELS: ClassVar[frozenset[CriterionLevel]][source]#

Declare which levels this criterion supports.

ensure_compatible(level: CriterionLevel) None[source]#

Raise CriterionLevelError if level not in LEVELS.

filter_entities(target: Sequence | SequencePool, *, inplace: bool = False, verbose: bool = True)[source]#

Return a filtered view at entity level.

Parameters:
  • target – A Sequence or SequencePool.

  • inplace – If True, modify target in place.

  • verbose – If True, emit a one-line report.

Returns:

Filtered target (or target itself when inplace=True).

Raises:
match(target: Sequence | Trajectory) bool[source]#

Return True if target satisfies this criterion.

Parameters:

target – A Sequence or Trajectory.

Returns:

True when target matches.

Raises:
  • TypeError – If target is not a Sequence or Trajectory.

  • CriterionLevelError – If the criterion is incompatible with target’s level.

which_ids(pool: SequencePool | TrajectoryPool, *, verbose: bool = False) set[source]#

Return the set of IDs in pool that satisfy this criterion.

Parameters:
Returns:

Set of matching IDs.

Raises:
  • TypeError – If pool is not a supported pool type.

  • CriterionLevelError – If the criterion is incompatible with the pool’s level.

exception tanat.criterion.base.CriterionError[source]#

Bases: TanaTException

Raised when a criterion expression is invalid or fails schema probing.

class tanat.criterion.base.CriterionLevel(*values)[source]#

Bases: Enum

Compatibility level for a criterion.

ENTITY = 1[source]#
SEQUENCE = 2[source]#
TRAJECTORY = 3[source]#
exception tanat.criterion.base.CriterionLevelError[source]#

Bases: TanaTException

Raised when a criterion is applied at an incompatible level.

Module contents#

Criterion module: filtering primitives for sequences, entities, and trajectories.

class tanat.criterion.Criterion(settings: Any = None)[source]#

Bases: SettingsMixin, Registrable, ABC

Abstract base for all filtering criteria.

Subclasses must declare LEVELS and implement the three _*_impl hooks. All public methods enforce compatibility and delegate to those hooks.

LEVELS: ClassVar[frozenset[CriterionLevel]][source]#

Declare which levels this criterion supports.

ensure_compatible(level: CriterionLevel) None[source]#

Raise CriterionLevelError if level not in LEVELS.

filter_entities(target: Sequence | SequencePool, *, inplace: bool = False, verbose: bool = True)[source]#

Return a filtered view at entity level.

Parameters:
  • target – A Sequence or SequencePool.

  • inplace – If True, modify target in place.

  • verbose – If True, emit a one-line report.

Returns:

Filtered target (or target itself when inplace=True).

Raises:
match(target: Sequence | Trajectory) bool[source]#

Return True if target satisfies this criterion.

Parameters:

target – A Sequence or Trajectory.

Returns:

True when target matches.

Raises:
  • TypeError – If target is not a Sequence or Trajectory.

  • CriterionLevelError – If the criterion is incompatible with target’s level.

which_ids(pool: SequencePool | TrajectoryPool, *, verbose: bool = False) set[source]#

Return the set of IDs in pool that satisfy this criterion.

Parameters:
Returns:

Set of matching IDs.

Raises:
  • TypeError – If pool is not a supported pool type.

  • CriterionLevelError – If the criterion is incompatible with the pool’s level.

exception tanat.criterion.CriterionError[source]#

Bases: TanaTException

Raised when a criterion expression is invalid or fails schema probing.

class tanat.criterion.CriterionLevel(*values)[source]#

Bases: Enum

Compatibility level for a criterion.

ENTITY = 1[source]#
SEQUENCE = 2[source]#
TRAJECTORY = 3[source]#
exception tanat.criterion.CriterionLevelError[source]#

Bases: TanaTException

Raised when a criterion is applied at an incompatible level.

class tanat.criterion.EntityCriterion(query: Expr)[source]#

Bases: Criterion

Filter entities or select sequences using a Polars expression.

Supported levels: ENTITY, SEQUENCE.

Example:

# entity-level pruning (keep only rows where diag_type == "DP")
pool2 = pool.filter_entities(EntityCriterion(query=pl.col("diag_type") == "DP"))

# sequence selection: IDs that have at least one such row
ids = pool.which(EntityCriterion(query=pl.col("diag_type") == "DP"))

# single sequence match
ok = seq.match(EntityCriterion(query=pl.col("diag_type") == "DP"))
LEVELS: ClassVar[frozenset[CriterionLevel]] = frozenset({CriterionLevel.ENTITY, CriterionLevel.SEQUENCE})[source]#

Declare which levels this criterion supports.

SETTINGS_CLASS[source]#

alias of EntityCriterionSettings

__init__(query: Expr) None[source]#
class tanat.criterion.LengthCriterion(*, gt: int | None = None, ge: int | None = None, lt: int | None = None, le: int | None = None)[source]#

Bases: Criterion

Select sequences by their number of entities (rows).

Supported levels: SEQUENCE.

Example:

# sequences with more than 5 entities
ids = pool.which(LengthCriterion(gt=5))
pool2 = pool.subset(ids)

# a single sequence
ok = seq.match(LengthCriterion(ge=3, lt=20))
LEVELS: ClassVar[frozenset[CriterionLevel]] = frozenset({CriterionLevel.SEQUENCE})[source]#

Declare which levels this criterion supports.

SETTINGS_CLASS[source]#

alias of LengthCriterionSettings

__init__(*, gt: int | None = None, ge: int | None = None, lt: int | None = None, le: int | None = None) None[source]#
class tanat.criterion.PatternCriterion(feature: str, pattern: str | list[str], present: bool = True, regex: bool = True, case_sensitive: bool = True)[source]#

Bases: Criterion

Filter entities or sequences by an ordered pattern of string values.

A sequence matches when its entities (in temporal order) contain the given pattern as an ordered sub-sequence: pattern element k must appear after element k-1 in the sequence.

Supported levels: ENTITY, SEQUENCE.

Entity level (filter_entities):
  • present=True: keeps only the rows that are “witnesses” of the greedy first match. Sequences without a complete match → 0 rows.

  • present=False: keeps all rows that are not witnesses (rows that don’t participate in the pattern). Sequences without a complete match → all their rows are kept.

Sequence level (which, match):

Keeps (or excludes, with present=False) whole sequences based on whether the ordered pattern is found.

Example:

# IDs where "A" appears directly before "B" (adjacent)
ids = pool.which(PatternCriterion(feature="code", pattern=["A", "B"]))

# Entity pruning: keep only the matched witness rows
pool2 = pool.filter_entities(
    PatternCriterion(feature="code", pattern=["A", "B"])
)

# Free gap: A before B with any rows in between
ids = pool.which(
    PatternCriterion(feature="code", pattern=["A", ANY, "B"])
)

# Exactly one element between A and B
ids = pool.which(
    PatternCriterion(feature="code", pattern=["A", WILDCARD, "B"])
)

# Single-element pattern: at least one row matching "ICU"
ids = pool.which(PatternCriterion(feature="label", pattern="ICU"))

# Exclusion: sequences that never contain adjacent A→B
ids = pool.which(
    PatternCriterion(feature="code", pattern=["A", "B"], present=False)
)

# Literal, case-insensitive
ids = pool.which(
    PatternCriterion(feature="code", pattern="icu", regex=False, case_sensitive=False)
)

# Single-sequence match
ok = seq.match(PatternCriterion(feature="code", pattern=["A", "B"]))
LEVELS: ClassVar[frozenset[CriterionLevel]] = frozenset({CriterionLevel.ENTITY, CriterionLevel.SEQUENCE})[source]#

Declare which levels this criterion supports.

SETTINGS_CLASS[source]#

alias of PatternCriterionSettings

__init__(feature: str, pattern: str | list[str], present: bool = True, regex: bool = True, case_sensitive: bool = True) None[source]#
class tanat.criterion.RankCriterion(*, first: int | None = None, last: int | None = None, start: int | None = None, end: int | None = None, step: int | None = None, ranks: list[int] | int | None = None, relative: bool = False)[source]#

Bases: Criterion

Select entities by their positional rank within a sequence.

Supported levels: ENTITY only.

Entities are numbered 0-based within each sequence in their natural store order. Negative indices use Python-style semantics (from the end).

Example:

# keep the first 3 entities
pool2 = pool.filter_entities(RankCriterion(first=3))

# keep all except the last 2 entities
pool2 = pool.filter_entities(RankCriterion(first=-2))

# keep the last 2 entities
pool2 = pool.filter_entities(RankCriterion(last=2))

# keep all except the first 3 entities
pool2 = pool.filter_entities(RankCriterion(last=-3))

# keep entities at ranks 2, 3, 4 (Python slice semantics)
pool2 = pool.filter_entities(RankCriterion(start=2, end=5))

# keep from rank 5 to 2nd-from-end
pool2 = pool.filter_entities(RankCriterion(start=5, end=-2))

# keep every other entity
pool2 = pool.filter_entities(RankCriterion(step=2))

# keep specific ranks (first and last)
pool2 = pool.filter_entities(RankCriterion(ranks=[0, -1]))

# keep the 3 entities centered on T0 (requires set_t0())
pool2 = pool.filter_entities(RankCriterion(start=-1, end=2, relative=True))
LEVELS: ClassVar[frozenset[CriterionLevel]] = frozenset({CriterionLevel.ENTITY})[source]#

Declare which levels this criterion supports.

SETTINGS_CLASS[source]#

alias of RankCriterionSettings

__init__(*, first: int | None = None, last: int | None = None, start: int | None = None, end: int | None = None, step: int | None = None, ranks: list[int] | int | None = None, relative: bool = False) None[source]#
class tanat.criterion.StaticCriterion(query: Expr)[source]#

Bases: Criterion

Select sequences or trajectories using a static-feature expression.

Supported levels: SEQUENCE, TRAJECTORY.

Example:

# sequence pool: keep IDs where age > 50
ids = seq_pool.which(StaticCriterion(query=pl.col("age") > 50))
seq_pool2 = seq_pool.subset(ids)

# trajectory pool
ids = traj_pool.which(StaticCriterion(query=pl.col("group") == "A"))
traj_pool2 = traj_pool.subset(ids)

# single match
ok = seq.match(StaticCriterion(query=pl.col("age") > 50))
ok = traj.match(StaticCriterion(query=pl.col("group") == "A"))
LEVELS: ClassVar[frozenset[CriterionLevel]] = frozenset({CriterionLevel.SEQUENCE, CriterionLevel.TRAJECTORY})[source]#

Declare which levels this criterion supports.

SETTINGS_CLASS[source]#

alias of StaticCriterionSettings

__init__(query: Expr) None[source]#
class tanat.criterion.TimeCriterion(*, start_ge: datetime | date | int | float | None = None, start_le: datetime | date | int | float | None = None, end_ge: datetime | date | int | float | None = None, end_le: datetime | date | int | float | None = None, duration_within: bool = False, all_entities: bool = False)[source]#

Bases: Criterion

Filter entities or select sequences by temporal position.

Supported levels: ENTITY, SEQUENCE.

Example:

import datetime as dt
from tanat.criterion import TimeCriterion

t0 = dt.datetime(2020, 1, 1)
t1 = dt.datetime(2021, 1, 1)

# entity pruning: keep rows whose start time is in [t0, t1]
pool2 = pool.filter_entities(TimeCriterion(start_ge=t0, start_le=t1))

# entity pruning: interval must be fully contained in [t0, t1]
pool3 = pool.filter_entities(
    TimeCriterion(start_ge=t0, end_le=t1, duration_within=True)
)

# sequence selection: IDs with at least one row in the window (default)
ids = pool.which(TimeCriterion(start_ge=t0))

# sequence selection: IDs where ALL rows are in the window
ids = pool.which(TimeCriterion(start_ge=t0, end_le=t1, all_entities=True))

# match
ok = seq.match(TimeCriterion(start_le=t1))
LEVELS: ClassVar[frozenset[CriterionLevel]] = frozenset({CriterionLevel.ENTITY, CriterionLevel.SEQUENCE})[source]#

Declare which levels this criterion supports.

SETTINGS_CLASS[source]#

alias of TimeCriterionSettings

__init__(*, start_ge: datetime | date | int | float | None = None, start_le: datetime | date | int | float | None = None, end_ge: datetime | date | int | float | None = None, end_le: datetime | date | int | float | None = None, duration_within: bool = False, all_entities: bool = False) None[source]#