# Authors: Andreas Mueller
# License: Simplified BSD

import numpy as np


def compute_class_weight(class_weight, classes, y):
    """Estimate class weights for unbalanced datasets.

    Parameters
    ----------
    class_weight : dict, 'auto' or None
        If 'auto', class weights will be given inverse proportional
        to the frequency of the class in the data.
        If a dictionary is given, keys are classes and values
        are corresponding class weights.
        If None is given, the class weights will be uniform.

    classes : list
        List of the classes occuring in the data, as given by
        ``np.unique(y_org)`` with ``y_org`` the original class labels.

    y : array-like, shape=(n_samples,), dtype=int
        Array of class indices per sample;
        0 <= y[i] < n_classes for i in range(n_samples).

    Returns
    -------
    class_weight_vect : ndarray, shape=(n_classes,)
        Array with class_weight_vect[i] the weight for i-th class
        (as determined by sorting).
    """
    if class_weight is None or len(class_weight) == 0:
        # uniform class weights
        weight = np.ones(classes.shape[0], dtype=np.float64, order='C')
    elif class_weight == 'auto':
        # anti-proportional to the number of samples in the class
        weight = np.array([1.0 / np.sum(y == i) for i in classes],
                          dtype=np.float64, order='C')
        weight *= classes.shape[0] / np.sum(weight)
    else:
        # user-defined dictionary
        weight = np.ones(classes.shape[0], dtype=np.float64, order='C')
        if not isinstance(class_weight, dict):
            raise ValueError("class_weight must be dict, 'auto', or None,"
                             " got: %r" % class_weight)
        for c in class_weight:
            i = np.searchsorted(classes, c)
            if classes[i] != c:
                raise ValueError("Class label %d not present." % c)
            else:
                weight[i] = class_weight[c]

    return weight