Source code for sklearn_nominal.backend.conditions

from __future__ import annotations

import abc
from multiprocessing import Value
from typing import Any

import numpy as np
import pandas as pd

from sklearn_nominal.backend import ColumnID, InputSample


[docs] class Condition(abc.ABC): """ A condition to filter rows of a :class: Dataset """ def __init__(self, column: ColumnID): super().__init__() self.column = column @abc.abstractmethod def __call__(self, x: InputSample) -> bool: pass
[docs] @abc.abstractmethod def short_description(self) -> str: """ Returns a short description of this Condition for displaying purposes """ pass
def na_to_false(self, s: bool | Any | pd.Series | np.ndarray): if isinstance(s, (pd.Series, np.ndarray)): if isinstance(s, pd.Series): return s.fillna(False).astype(bool) else: return np.nan_to_num(s, nan=False).astype(bool) if not isinstance(s, (bool, np.bool_)): return False else: return s
[docs] def is_similar(self, c: Condition): """ returns True if the condition uses the same attributes and values as this condition. """ pass
class ValueCondition(Condition): def __init__(self, column: ColumnID, value): super().__init__(column) self.value = value def __call__(self, x: InputSample): return self.na_to_false(x[self.column] == self.value) def __repr__(self): return f"{self.column}={self.value}" def short_description(self): return f"{self.value}" def is_similar(self, c: Condition): if isinstance(c, ValueCondition): return c.column == self.column else: return False def __eq__(self, x): if not isinstance(x, ValueCondition): return False return self.value == x.value and self.column == x.column def __hash__(self): return hash((self.column, self.value)) class RangeCondition(Condition): def __init__(self, column: ColumnID, value: float, less: bool): super().__init__(column) self.value = value self.less = less @classmethod def make(cls, column, value): return [RangeCondition(column, value, t) for t in [True, False]] def __call__(self, x: InputSample): if self.less: return self.na_to_false(x[self.column] <= self.value) else: return self.na_to_false(x[self.column] > self.value) def __repr__(self): op = "<=" if self.less else ">" return f"{self.column} {op} {self.value:.4g}" def short_description(self): op = "<=" if self.less else ">" return f"{op} {self.value:.4g}" def is_similar(self, c: Condition): if isinstance(c, RangeCondition): return c.column == self.column and c.less == self.less else: return False def __eq__(self, x): if not isinstance(x, RangeCondition): return False return self.value == x.value and self.column == x.column and self.less == x.less def __hash__(self): return hash((self.column, self.value, self.less)) class AndCondition(Condition): def __init__(self, conditions: list[Condition]): column = ",".join([str(c.column) for c in conditions]) super().__init__(column) self.conditions = conditions def short_description(self): descriptions = [c.short_description() for c in self.conditions] descriptions = ",".join(descriptions) return f"And({descriptions})" def __call__(self, x: InputSample | pd.DataFrame): result = None for c in self.conditions: c_result = c(x) if result is None: result = c_result else: result = result & c_result return result def __repr__(self): conditions = [f"({c})" for c in self.conditions] descriptions = " AND ".join(conditions) return f"{descriptions}" def is_similar(self, c: Condition): if isinstance(c, AndCondition): # must have the same number of conditions if len(c.conditions) != len(self.conditions): return False for child in self.conditions: # every child condition must be similar to a condition in c if not any([a.is_similar(child) for a in c.conditions]): return False return True else: return False def __eq__(self, x): if not isinstance(x, AndCondition): return False if len(x.conditions) != len(self.conditions): return False for child in self.conditions: # every child condition must be similar to a condition in x if not any([a == child for a in x.conditions]): return False return True def __hash__(self): return hash(self.conditions) class TrueCondition(Condition): def __init__(self): super().__init__("") def __call__(self, x: InputSample | pd.DataFrame): if isinstance(x, pd.DataFrame): return pd.Series([True] * len(x), index=x.index) return True def short_description(self): return "True" def __repr__(self): return self.short_description() def is_similar(self, c: Condition): return isinstance(c, TrueCondition) def __eq__(self, x): return isinstance(x, TrueCondition) def __hash__(self): return 1 class NotCondition(Condition): def __init__(self, condition: Condition): super().__init__(condition.column) self.condition = condition def __call__(self, x: InputSample | pd.DataFrame): return ~self.condition(x) def short_description(self): return f"NOT {self.condition.short_description}" def __repr__(self): return self.short_description() def is_similar(self, c: Condition): if isinstance(c, NotCondition): return self.condition.is_similar(c.condition) return False def __eq__(self, x): if not isinstance(x, NotCondition): return False return self.condition == x.condition def __hash__(self): return hash((1, self.condition))