import {formatPrettyString} from "src/utils/formatting.js";
import {VariableTypes} from "src/utils/tableData.js";
import {DISTRIBUTIONS} from "src/components/distributions/index.jsx";

export const RegressionModel = {
    LINEAR: "linear",
    DECISION_TREE: "decision_tree",
    RANDOM_FOREST: "random_forest",
    MULTI_LAYER_PERCEPTRON: "multi_layer_perceptron"
};

export const modelOptions = Object.keys(RegressionModel).map((key) => ({
    key,
    value: RegressionModel[key],
    name: formatPrettyString(key)
}));


export const Transformations = {
    PASSTHROUGH: "passthrough",
    STANDARD: "standard",
    LOG_NORMAL: "log_normal",
    MIN_MAX: "min_max",
    ONE_HOT: "one_hot"
};

export const TransformationReasoning = {
    [Transformations.STANDARD]: "Standardizes data to zero mean and unit variance, ideal for normally distributed data",
    [Transformations.LOG_NORMAL]: "Helps handle right-skewed distributions and compress large ranges",
    [Transformations.MIN_MAX]: "Scales features to a fixed range, preserving zero values and distribution shape",
    [Transformations.PASSTHROUGH]: "No transformation applied, use when distribution is already suitable"
};

export const transformationOptions = Object.keys(Transformations).map((key) => ({
    key,
    value: Transformations[key],
    name: formatPrettyString(key)
}));


export function getRecommendedContinuousTransformation(bestDistributionFit) {
    switch (bestDistributionFit) {
    case DISTRIBUTIONS.norm:
        // If already normally distributed, standardize
        return Transformations.STANDARD;

    case DISTRIBUTIONS.lognorm:
        // Log-normal suggests log transformation
        return Transformations.LOG_NORMAL;

    case DISTRIBUTIONS.uniform:
        // Uniform distribution works well with min-max scaling
        return Transformations.MIN_MAX;

    case DISTRIBUTIONS.expon:
        // Exponential distribution might benefit from log or standard scaling
        return Transformations.STANDARD;

    case DISTRIBUTIONS.gamma:
        // Gamma distributions are right-skewed, benefit from log or min-max
        return Transformations.LOG_NORMAL;

    case DISTRIBUTIONS.pareto:
        // Pareto is heavy-tailed, often benefits from log transformation
        return Transformations.LOG_NORMAL;

    default:
        // If distribution is unknown or doesn't match, use passthrough
        return Transformations.PASSTHROUGH;
    }
}


export function getRecommendedTransformation(column, bestDistributionFit) {
    if (!column) {
        return Transformations.PASSTHROUGH;
    }

    const {variableType} = column;

    if (variableType === VariableTypes.CATEGORICAL) {
        return Transformations.ONE_HOT;
    }

    if (variableType === VariableTypes.CONTINUOUS) {
        return getRecommendedContinuousTransformation(bestDistributionFit);
    }
    // Fallback
    return null;
}

export function getTransformationReasoning(transformation, bestFitDistribution) {
    // Defensive checks
    if (!transformation || !bestFitDistribution) {
        return "No specific reasoning available due to missing parameters.";
    }

    // Explicitly use if-else statements for reasoning
    if (transformation === Transformations.STANDARD && bestFitDistribution === DISTRIBUTIONS.norm) {
        // eslint-disable-next-line max-len
        return "Standardization is recommended for normally distributed data to center the feature around zero with unit variance. This transformation helps many machine learning algorithms converge faster and perform more consistently.";
    }

    if (transformation === Transformations.LOG_NORMAL && bestFitDistribution === DISTRIBUTIONS.lognorm) {
        // eslint-disable-next-line max-len
        return "Log transformation is ideal for log-normally distributed data. It helps to symmetrize the distribution, reduce right-skewness, and compress the range of extreme values, making the data more suitable for linear models.";
    }

    if (transformation === Transformations.LOG_NORMAL && bestFitDistribution === DISTRIBUTIONS.gamma) {
        // eslint-disable-next-line max-len
        return "Log transformation is particularly effective for gamma-distributed features. Gamma distributions are typically right-skewed, and the log transformation helps to make the distribution more symmetric and closer to a normal distribution.";
    }

    if (transformation === Transformations.LOG_NORMAL && bestFitDistribution === DISTRIBUTIONS.pareto) {
        // eslint-disable-next-line max-len
        return "For Pareto-distributed data, log transformation is crucial. Pareto distributions have heavy right tails, and log transformation helps to reduce the impact of extreme values and make the distribution more manageable for machine learning algorithms.";
    }

    if (transformation === Transformations.MIN_MAX && bestFitDistribution === DISTRIBUTIONS.uniform) {
        // eslint-disable-next-line max-len
        return "Min-Max scaling is recommended for uniformly distributed data. It preserves the shape of the original distribution while scaling all features to a fixed range (typically [0, 1]), which can be beneficial for algorithms sensitive to feature scales.";
    }

    if (transformation === Transformations.STANDARD && bestFitDistribution === DISTRIBUTIONS.expon) {
        // eslint-disable-next-line max-len
        return "Standardization is suggested for exponentially distributed features. This transformation helps to center the data and reduce the impact of the exponential distribution's characteristic right-skew.";
    }

    if (transformation === Transformations.PASSTHROUGH) {
        // eslint-disable-next-line max-len
        return "No specific transformation is recommended. This could be because the feature's distribution is already suitable for the machine learning algorithm, or additional domain knowledge is required to make a more informed decision.";
    }

    // Fallback for any unhandled combinations
    return "No specific reasoning could be determined for the given transformation and distribution combination.";
}
