Source code for sklearn_nominal.sklearn.rule_oner

import numpy as np
import pandas as pd
from scipy.odr import Output
from sklearn.base import BaseEstimator
from sklearn.utils import compute_class_weight

from sklearn_nominal.backend import Input
from sklearn_nominal.backend.core import Dataset
from sklearn_nominal.backend.factory import DEFAULT_BACKEND
from sklearn_nominal.rules.oner import OneR
from sklearn_nominal.shared.target_error import TargetError
from sklearn_nominal.sklearn.nominal_model import NominalClassifier, NominalRegressor


[docs] class OneRClassifier(NominalClassifier, BaseEstimator): """A OneR classifier, equivalent to a TreeClassifier with a depth of 1 (root and children). [1] Holte, Robert C. "Very simple classification rules perform well on most commonly used datasets." Machine learning 11.1 (1993): 63-90. Args: criterion (str, optional): The function to measure the quality of a split. Supported criteria are "gini" for the Gini impurity and "log_loss" and "entropy" both for the Shannon information gain. Defaults to "entropy". backend (str, optional): The backend to use for computations. Defaults to DEFAULT_BACKEND. class_weight (dict or "balanced", optional): Weights associated with classes in the form ``{class_label: weight}``. If None, all classes are assumed to have weight one. Defaults to None. Attributes: classes_ (ndarray of shape (n_classes,)): The classes labels. n_classes_ (int): The number of classes. n_features_in_ (int): Number of features seen during :term:`fit`. feature_names_in_ (ndarray of shape (n_features_in_,)): Names of features seen during :term:`fit`. Defined only when `X` has feature names that are all strings. n_outputs_ (int): The number of outputs when ``fit`` is performed. model_ (RuleModel): The underlying model object. See Also: TreeRegressor: A decision tree regressor with nominal support. NaiveBayesClassifier: A NaiveBayesClassifier with nominal support. CN2Classifier: A CN2Classifier classifier with nominal support. ZeroRClassifier: A ZeroR classifier with nominal support. PRISMClassifier: A PRISM classifier with nominal support. Examples: >>> from sklearn.datasets import fetch_openml >>> df = fetch_openml("credit-g",version=2).frame >>> x,y = df.iloc[:,0:-1], df.iloc[:,-1] >>> >>> from sklearn_nominal import OneR >>> model = OneR() >>> model.fit(x,y) >>> >>> from sklearn.metrics import accuracy_score >>> y_pred = model.predict(x) >>> print(accuracy_score(y,y_pred)) ... 0.787 """ def __sklearn_tags__(self): """Returns the scikit-learn tags for the estimator. Returns: Tags: The scikit-learn tags. """ tags = super().__sklearn_tags__() tags.classifier_tags.poor_score = True return tags def __init__(self, criterion="entropy", backend=DEFAULT_BACKEND, class_weight=None): """Initializes the OneRClassifier. Args: criterion (str): The function to measure the quality of a split. Supported criteria are "gini" for the Gini impurity and "log_loss" and "entropy" both for the Shannon information gain. Defaults to "entropy". backend (str): The backend to use for computations. Defaults to DEFAULT_BACKEND. class_weight (dict or "balanced", optional): Weights associated with classes in the form ``{class_label: weight}``. If None, all classes are assumed to have weight one. Defaults to None. """ super().__init__(backend=backend, class_weight=class_weight) self.criterion = criterion
[docs] def make_model(self, d: Dataset, class_weight: np.ndarray): """Creates the OneR trainer for the model. Args: d (Dataset): The dataset to train on. class_weight (np.ndarray): The weights for each class. Returns: OneR: The OneR trainer instance. """ error = self.build_error(self.criterion, class_weight) return OneR(error_function=error)
[docs] def fit(self, x, y): """Fit the OneR model according to the given training data. The OneR (One Rule) algorithm identifies the single best feature (the "One Rule") that minimizes the classification error on the training data. It creates a simple decision rule based on this feature alone. Args: x (pd.DataFrame or np.ndarray): The training input samples. y (np.ndarray): The target values (class labels) as integers or strings. Returns: self: Returns the instance itself. """ return super().fit(x, y)
[docs] def predict(self, x): """Perform classification on an array of test vectors X. Predicts the target class based on the single best feature identified during :meth:`fit`. If a value in the test set was not seen in the training set for that feature, the default (majority) class is predicted. Args: x (pd.DataFrame or np.ndarray): The input samples. Returns: np.ndarray: Predicted target values for X. """ return super().predict(x)
[docs] def predict_proba(self, x): """Return probability estimates for the test data X. The probability estimates are derived from the class distribution of the training samples that matched the same value of the "One Rule" feature. Args: x (pd.DataFrame or np.ndarray): The input samples. Returns: np.ndarray: Returns the probability of the sample for each class in the model. """ return super().predict_proba(x)
[docs] class OneRRegressor(NominalRegressor, BaseEstimator): """A OneR Regressor, equivalent to a TreeClassifier with a depth of 1. [1] Holte, Robert C. "Very simple classification rules perform well on most commonly used datasets." Machine learning 11.1 (1993): 63-90. Args: criterion (str, optional): The function to measure the error of a split. Supported criteria are currently only "std", for standard deviation (equivalent to root MSE). Defaults to "std". backend (str, optional): The backend to use for computations. Defaults to DEFAULT_BACKEND. Attributes: n_features_in_ (int): Number of features seen during :term:`fit`. feature_names_in_ (ndarray of shape (n_features_in_,)): Names of features seen during :term:`fit`. Defined only when `X` has feature names that are all strings. n_outputs_ (int): The number of outputs when ``fit`` is performed. model_ (RuleModel): The underlying model object. See Also: TreeRegressor: A decision tree regressor with nominal support. CN2Regressor: A CN2Classifier regressor with nominal support. ZeroRRegressor: A ZeroR regressor with nominal support. Examples: >>> from sklearn_nominal import OneRRegressor, read_golf_regression_dataset >>> x, y = read_golf_regression_dataset(url) >>> model = OneRRegressor() >>> from sklearn.metrics import mean_absolute_error >>> model.fit(x, y) >>> y_pred = model.predict(x) >>> print(f"{mean_absolute_error(y, y_pred):.2f}") 0.07 """ def __init__(self, criterion="std", backend=DEFAULT_BACKEND): """Initializes the OneRRegressor. Args: criterion (str): The function to measure the error of a split. Supported criteria are currently only "std", for standard deviation (equivalent to root MSE). Defaults to "std". backend (str): The backend to use for computations. Defaults to DEFAULT_BACKEND. """ super().__init__(backend=backend) self.criterion = criterion def __sklearn_tags__(self): """Returns the scikit-learn tags for the estimator. Returns: Tags: The scikit-learn tags. """ tags = super().__sklearn_tags__() tags.regressor_tags.poor_score = True return tags
[docs] def make_model(self, d: Dataset): """Creates the OneR trainer for the model. Args: d (Dataset): The dataset to train on. Returns: OneR: The OneR trainer instance. """ error = self.build_error(self.criterion) return OneR(error_function=error)
[docs] def fit(self, x, y): """Fit the OneR model according to the given training data. The OneR algorithm identifies the single best feature that minimizes the regression error (e.g., standard deviation) on the training data. Args: x (pd.DataFrame or np.ndarray): The training input samples. y (np.ndarray): The target values (real numbers). Returns: self: Returns the instance itself. """ return super().fit(x, y)
[docs] def predict(self, x): """Predict regression value for X. Predicts the target value based on the single best feature identified during :meth:`fit`. Typically, this is the mean target value of training samples matching the same feature value. Args: x (pd.DataFrame or np.ndarray): The input samples. Returns: np.ndarray: Predicted target values for X. """ return super().predict(x)