Source code for sklearn_nominal.sklearn.naive_bayes

import numpy as np
from sklearn.base import BaseEstimator

from sklearn_nominal.backend.core import Dataset
from sklearn_nominal.backend.factory import DEFAULT_BACKEND
from sklearn_nominal.bayes.model import NaiveBayes
from sklearn_nominal.bayes.trainer import NaiveBayesTrainer
from sklearn_nominal.sklearn.nominal_model import NominalClassifier


[docs] class NaiveBayesClassifier(NominalClassifier, BaseEstimator): """A Naive Bayes classifier supporting nominal attributes. A NaiveBayesClassifier that mimics `scikit-learn`'s :class:`sklearn.tree.GaussianNB` but adds support for nominal attributes with categorical distributions. Args: smoothing (float, optional): The Laplace smoothing factor for categorical distributions. This value will be added to the count of each value to generate a smoothed categorical distribution. The default value, 0.0, indicates no smoothing. backend (str, optional): The backend to use for computations. Defaults to "pandas". class_weight (dict or "balanced", optional): Weights associated with classes in the form ``{class_label: weight}``. If None, all classes are assumed to have weight one. The "balanced" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies. Defaults to None. Attributes: classes_ (ndarray of shape (n_classes,)): The classes labels. n_classes_ (int): The number of classes. n_features_in_ (int): Number of features seen during :term:`fit`. feature_names_in_ (ndarray of shape (n_features_in_,)): Names of features seen during :term:`fit`. Defined only when `X` has feature names that are all strings. n_outputs_ (int): The number of outputs when ``fit`` is performed. model_ (NaiveBayes): The underlying NaiveBayes that actually holds the distribution parameters and can perform inference. See Also: TreeClassifier: A decision tree classifier. Notes: The :meth:`predict` method operates using the :func:`numpy.argmax` function on the outputs of :meth:`predict_proba`. This means that in case the highest predicted probabilities are tied, the classifier will predict the tied class with the lowest index in :term:`classes_`. Examples: >>> from sklearn.datasets import fetch_openml >>> df = fetch_openml("credit-g",version=2).frame >>> x,y = df.iloc[:,0:-1], df.iloc[:,-1] >>> >>> from sklearn_nominal import NaiveBayesClassifier >>> model = NaiveBayesClassifier(smoothing = 0.01) >>> model.fit(x,y) >>> >>> from sklearn.metrics import accuracy_score >>> y_pred = model.predict(x) >>> print(accuracy_score(y,y_pred)) ... 0.787 """ def __sklearn_tags__(self): """Returns the scikit-learn tags for the estimator. Returns: Tags: The scikit-learn tags. """ tags = super().__sklearn_tags__() tags.classifier_tags.poor_score = True return tags def __init__(self, smoothing=0.0, backend=DEFAULT_BACKEND, class_weight=None): """Initializes the NaiveBayesClassifier. Args: smoothing (float): The Laplace smoothing factor for categorical distributions. This value will be added to the count of each value to generate a smoothed categorical distribution. The default value, 0.0, indicates no smoothing. backend (str): The backend to use for computations. Defaults to "pandas". class_weight (dict or "balanced", optional): Weights associated with classes in the form ``{class_label: weight}``. If None, all classes are assumed to have weight one. The "balanced" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies. Defaults to None. """ super().__init__(backend=backend, class_weight=class_weight) self.smoothing = smoothing
[docs] def make_model(self, d: Dataset, class_weight: np.ndarray): """Creates the NaiveBayesTrainer for the model. Args: d (Dataset): The dataset to train on. class_weight (np.ndarray): The weights for each class. Returns: NaiveBayesTrainer: The trainer instance for Naive Bayes. """ return NaiveBayesTrainer(class_weight, smoothing=self.smoothing)
[docs] def fit(self, x, y): """Fit the Naive Bayes model according to the given training data. This algorithm calculates the prior probabilities of each class and the conditional probabilities of each feature given the class. For nominal features, categorical distributions are estimated (with Laplace smoothing if requested). For numeric features, Gaussian distributions are typically assumed. Args: x (pd.DataFrame or np.ndarray): The training input samples. y (np.ndarray): The target values (class labels) as integers or strings. Returns: self: Returns the instance itself. """ return super().fit(x, y)
[docs] def predict(self, x): """Perform classification on an array of test vectors X. Predictions are made using Bayes' Theorem by multiplying the prior class probability with the conditional probabilities of all feature values given the class, and selecting the class with the highest posterior probability. Ties are resolved by choosing the class with the lowest index in :term:`classes_`. Args: x (pd.DataFrame or np.ndarray): The input samples. Returns: np.ndarray: Predicted target values for X. """ return super().predict(x)
[docs] def predict_proba(self, x): """Return probability estimates for the test data X. Probabilities are calculated by normalizing the posterior probabilities obtained via Bayes' Theorem for each class across all samples. Args: x (pd.DataFrame or np.ndarray): The input samples. Returns: np.ndarray: Returns the probability of the sample for each class in the model. """ return super().predict_proba(x)
def plot_distributions( self, class_names: list[str] | None = None, feature_names: list[str] | None = None, n_cols=4 ): self.check_is_fitted() model: NaiveBayes = self.model_ model.plot_distributions(class_names, feature_names, n_cols) def explain(self, x, class_names: list[str] | None = None): self.check_is_fitted() return self.model_.explain(x, class_names)