import numpy as np from .base import LinearClassifierMixin from ..feature_selection.selector_mixin import SelectorMixin from ..svm.base import BaseLibLinear class LogisticRegression(BaseLibLinear, LinearClassifierMixin, SelectorMixin): """Logistic Regression (aka logit, MaxEnt) classifier. In the multiclass case, the training algorithm uses a one-vs.-all (OvA) scheme, rather than the "true" multinomial LR. This class implements L1 and L2 regularized logistic regression using the `liblinear` library. It can handle both dense and sparse input. Use C-ordered arrays or CSR matrices containing 64-bit floats for optimal performance; any other input format will be converted (and copied). Parameters ---------- penalty : string, 'l1' or 'l2' Used to specify the norm used in the penalization. dual : boolean Dual or primal formulation. Dual formulation is only implemented for l2 penalty. Prefer dual=False when n_samples > n_features. C : float, optional (default=1.0) Specifies the strength of the regularization. The smaller it is the bigger is the regularization. fit_intercept : bool, default: True Specifies if a constant (a.k.a. bias or intercept) should be added the decision function. intercept_scaling : float, default: 1 when self.fit_intercept is True, instance vector x becomes [x, self.intercept_scaling], i.e. a "synthetic" feature with constant value equals to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic feature weight Note! the synthetic feature weight is subject to l1/l2 regularization as all other features. To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) intercept_scaling has to be increased class_weight : {dict, 'auto'}, optional Set the parameter C of class i to class_weight[i]*C for SVC. If not given, all classes are supposed to have weight one. The 'auto' mode uses the values of y to automatically adjust weights inversely proportional to class frequencies. tol: float, optional Tolerance for stopping criteria. Attributes ---------- `coef_` : array, shape = [n_classes-1, n_features] Coefficient of the features in the decision function. `coef_` is readonly property derived from `raw_coef_` that \ follows the internal memory layout of liblinear. `intercept_` : array, shape = [n_classes-1] Intercept (a.k.a. bias) added to the decision function. It is available only when parameter intercept is set to True. See also -------- LinearSVC Notes ----- The underlying C implementation uses a random number generator to select features when fitting the model. It is thus not uncommon, to have slightly different results for the same input data. If that happens, try with a smaller tol parameter. References: LIBLINEAR -- A Library for Large Linear Classification http://www.csie.ntu.edu.tw/~cjlin/liblinear/ Hsiang-Fu Yu, Fang-Lan Huang, Chih-Jen Lin (2011). Dual coordinate descent methods for logistic regression and maximum entropy models. Machine Learning 85(1-2):41-75. http://www.csie.ntu.edu.tw/~cjlin/papers/maxent_dual.pdf """ def __init__(self, penalty='l2', dual=False, tol=1e-4, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None): super(LogisticRegression, self).__init__( penalty=penalty, dual=dual, loss='lr', tol=tol, C=C, fit_intercept=fit_intercept, intercept_scaling=intercept_scaling, class_weight=class_weight, random_state=None) def predict_proba(self, X): """Probability estimates. The returned estimates for all classes are ordered by the label of classes. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- T : array-like, shape = [n_samples, n_classes] Returns the probability of the sample for each class in the model, where classes are ordered as they are in ``self.classes_``. """ # 1. / (1. + np.exp(-scores)), computed in-place prob = self.decision_function(X) prob *= -1 np.exp(prob, prob) prob += 1 np.reciprocal(prob, prob) if len(prob.shape) == 1: return np.vstack([1 - prob, prob]).T else: # OvR, not softmax, like Liblinear's predict_probability prob /= prob.sum(axis=1).reshape((prob.shape[0], -1)) return prob def predict_log_proba(self, X): """Log of probability estimates. The returned estimates for all classes are ordered by the label of classes. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- T : array-like, shape = [n_samples, n_classes] Returns the log-probability of the sample for each class in the model, where classes are ordered as they are in ``self.classes_``. """ return np.log(self.predict_proba(X))