import React from 'react'

// Can be accessed at locale.t('model.<key>')

export default {
  tooltip: {
    about: (
      <span>
        A <b>model</b> uses historical data to learn patterns and uncover relationships
        between features and an outcome (predictor) for a given population. A model
        applies the patterns it's learned to current data to make predictions.
      </span>
    ),
    algorithm: (
      <span>
        An <b>algorithm</b>, in machine learning, is a method used to learn from data and
        produce predictions.
      </span>
    ),
    aupr: (
      <span>
        <b>Average Precision Score (Avg. Prec.)</b> summarizes the tradeoff between
        precision and recall across different probability thresholds. Precision is the
        proportion of cases identified as positive that were actual positives. Recall is
        the proportion of actual positive cases identified correctly. This is used instead
        of Area Under the Precision-Recall (AUPR) Curve as it is less prone to
        interpolation errors at low sample sizes. Reference:{' '}
        <a
          href="https://scikit-learn.org/stable/modules/generated/sklearn.metrics.average_precision_score.html"
          target="_blank"
          rel="noopener noreferrer"
        >
          sklearn
        </a>
      </span>
    ),
    biasAndFairness: (
      <span>
        <b>Bias and fairness</b> analysis enables you to assess whether a machine learning
        model uses a representative sample, is impacted by label bias, or may result in
        the unfair distribution of resources. After reviewing each metric, you can
        indicate the model has been validated, and whether it is biased or unbiased.
      </span>
    ),
    biasAndFairnessMissingTestRun: ({ name }) =>
      `${name} is missing a test run. Bias and fairness analysis is not available without a test run.`,
    biasAndFairnessAnalyze: 'Analyze bias and fairness for this model',
    biasAndFairnessViewValidated:
      'Bias and fairness analysis has been run and has been validated',
    biasAndFairnessView:
      'Bias and fairness analysis has been run, but the model has not been validated yet',
    biasAndFairnessState: ({ user, time, state }) =>
      `Validated - ${state} by ${user} at ${time}`,
    biasPredictionPercentile: (
      <span>
        The <b>Prediction Percentile</b> is the percentile of the prediction value based
        on the model run population.
      </span>
    ),
    crossValidation: (
      <span>
        The <b>Cross Validation</b> graph plots the ROC AUC metric averaged across the
        cross-validation folds for each iteration of the hyperparameter optimization.
      </span>
    ),
    dataAdapterSnapshot: (
      <span>
        The <b>data adapter snapshot</b> used for train and test runs of the model. The
        data adapter snapshot determines the data adapter, entity, and terminology
        versions used in a model run.
      </span>
    ),
    hyperparameterOptimizationIteration: (
      <span>
        A model performance metric (e.g. ROC AUC for binary classification models)
        averaged across the cross-validation folds for each iteration of the
        hyperparameter optimization. Hyperparameters are specific parameters fed into a
        model. The training and validation performance numbers are the mean and standard
        deviation of the performance on the 5 cross-validation splits.
      </span>
    ),
    mae: (
      <span>
        <b>Mean Absolute Error (MAE)</b> is the mean of the absolute difference between
        the forecasted value and the actual value. MAE tells us how big of an error we can
        expect from the forecast on average. In general, a lower MAE is better than a
        higher one.
      </span>
    ),
    mape: (
      <span>
        <b>Mean Absolute Percentage Error (MAPE)</b> is the Mean Absolute Error (MAE)
        expressed as a percentage of the population average.
      </span>
    ),
    models: (
      <span>
        A <b>model</b> uses historical data to learn patterns and uncover relationships
        between features and an outcome (predictor) for a given population. A model
        applies the patterns it's learned to current data to make predictions.
      </span>
    ),
    population: ({ type = 'training and test' } = {}) => (
      <span>
        The <b>population</b> used for {type.toLowerCase()} model runs. The population
        will be split so that test data is not used for training.
      </span>
    ),
    populationSplit: (
      <span>
        In this option, 80% of individuals in the population are included in the training
        model run, and 20% are included in the test model run.
        <br />
        <br />
        Rather than a fully random 80% / 20% split, this approach uses an intelligent
        grouped, stratified sampling approach. Stratified sampling ensures that the
        proportion of positive events is the same in the train and test set as it is in
        the overall sample. Grouped sampling splits the data based on the primary entity,
        regardless of the entity used for the population. Data for a single patient will
        never be split between the train and test groups, a prime cause of data leakage.
        As an example, in a readmissions model, if a patient has multiple admissions, all
        admissions for that patient will be grouped into either the test or the training
        set.
      </span>
    ),
    populationAsOfDate: ({ type }) => {
      const copyMap = {
        View: 'for the example population displayed in the selection steps funnel.',
        'Train/Test': 'for the population used in train and test model runs.',
        Train: 'for the population used in the training model run.',
        Test: 'for the population used in the test model run. This must be the same as the As of Date used for the training model run.'
      }

      return (
        <span>
          The <b>As of Date</b> {copyMap[type] || copyMap['Train/Test']}
        </span>
      )
    },
    factor: (
      <span>
        A <b>factor</b> is a term used to describe a feature once it has been used in a
        model and has a weight assigned to it. A feature is a variable relating to the
        population used in a model.
      </span>
    ),
    prevalence: (
      <span>
        <b>Prevalence</b> indicates the proportion of the population in which the outcome
        occurs.
      </span>
    ),
    rmse: (
      <span>
        <b>Root Mean Squared Error (RMSE)</b> is the square root of the average squared
        errors. It measures how concentrated the data is around the line of best fit. In
        general, a lower RMSE is better than a higher one.
      </span>
    ),
    rocAuc: (
      <span>
        The <b>Receiver Operating Characteristic (ROC) Area Under the Curve (AUC)</b> is a
        model performance metric for binary classification models. ROC AUC represents the
        degree of separability between the two classes that the model is predicting on.
        ROC AUC is the numeric value for area under the ROC Curve - a curve that plots the
        True Positive Rate (TPR) against the False Positive Rate (FPR). A higher AUC
        indicates better model performance.
      </span>
    ),
    rSquared: (
      <span>
        <b>R-Squared</b> is a goodness-of-fit measure for regression models. R-squared
        measures the strength of the relationship between your model and the outcome on a
        0 – 100% scale. R-squared can be between 0 and 1 where values closer to 0
        represent a poor fit while values closer to 1 represent a perfect fit. In certain
        cases of a poor fit, the computed R-squared can be negative.
      </span>
    ),
    setSize: ({ stage }) =>
      `The number of rows in the ${stage === 'train' ? 'training' : stage} data set.`,
    biasGroupBenefitEquality: (
      <span>
        <b>Group Benefit Equality</b> is the percent (%) of resources directed to a
        population subgroup relative to their need. For example, at .8, the subgroup is
        getting 80% of the resources they need. In a perfectly fair model, a subgroup gets
        100% of the resources they need, and the GBE value is 1.
      </span>
    )
  },
  inline: {
    biasAnalysisSubgroupsTitle: 'Analysis Subgroups',
    biasAnalysisSubgroupsDescription: (
      <span>
        Select a feature by which to segment the population for analysis.
        <br /> Common subgroups for bias and fairness analysis include race, gender, age,
        and disability status.
      </span>
    ),
    biasAnalysisSubgroupsHelper:
      'This feature must be of data type string or boolean, and it must be a single feature (feature generators and reference data cannot be selected).',
    biasMinSubgroupSizeTitle: 'Minimum Subgroup Size',
    biasMinSubgroupSizeDescription: (
      <span>
        Define the minimum percent of the population required for a subgroup to be
        displayed independently in bias and fairness analysis charts. <br />
        Subgroups that make up a smaller percent of the population will be grouped into an
        "All Other Subgroups" category.
      </span>
    ),
    biasAlternativeOutcomeTitle: 'Alternative Outcome (Label)',
    biasAlternativeOutcomeDescription:
      'Select a feature to evaluate as an alternative outcome, or label.',
    biasAlternativeOutcomeHelper:
      'This feature must be of data type integer, double, or long, and it must be a single feature (feature generators and reference data cannot be selected).',
    biasInterventionThresholdTitle: 'Intervention Threshold',
    biasInterventionThresholdDescription: (
      <span>
        Define the percent of the population that will receive the intervention when this
        model is used in production.
        <br /> This threshold is used to calculate Group Benefit Equality (GBE), in order
        to assess fair distribution of intervention resources to analysis subgroups.
      </span>
    ),
    biasDistributionLabel: 'Assess Whether the Model Uses a Representative Sample',
    biasSubgroupDistributionLabel: (
      <div>
        Analysis Subgroup Distribution{' '}
        <span>
          Default population for trial, staged, and production model runs are up to date.
        </span>
      </div>
    ),
    biasSubgroupDistributionLoadingLabel:
      'Analysis Subgroup Distribution chart is generating. This may take a few minutes.',
    biasSubgroupDistributionDescription: (
      <span>
        The population used for training/testing should be representative of the
        population used to make predictions. <br />
        Meaning, the subgroup distribution should be similar among the two populations.
      </span>
    ),
    biasSubgroupDistributionLoadingDescription: 'Analysis Subgroup Distribution',
    biasOutcomeDistributionLabel: 'Outcome Distribution',
    biasOutcomeDistributionLoadingLabel:
      'Outcome Distribution chart is generating. This may take a few minutes.',
    biasOutcomeDistributionDescription: (
      <span>
        The outcome distribution may vary by analysis subgroup; understanding these
        differences can be useful in assessing whether the distribution of resources is
        fair.
        <br /> If the outcome distribution is strongly skewed, it's especially important
        to evaluate label bias closely.
      </span>
    ),
    biasOutcomeDistributionLoadingDescription: 'Outcome Distribution',
    biasSubgroupValidityLabel: 'Diagnose Subgroup Validity',
    biasSubgroupValidityLoadingLabel:
      'Diagnose Subgroup Validity chart is generating. This may take a few minutes.',
    biasSubgroupValidityDescription: (
      <span>
        The mean outcome value for the test population subgroups in each prediction
        percentile decile should be similar. <br />
        In this chart, overlapping lines means the model is well-calibrated for each
        subgroup.
      </span>
    ),
    biasSubgroupValidityLoadingDescription: 'Diagnose Subgroup Validity',
    biasLabelBiasLabel: 'Evaluate Potential for Label Bias',
    biasLabelBiasLoadingLabel:
      'Potential Bias chart is generating. This may take a few minutes.',
    biasLabelBiasDescription: (
      <span>
        The mean alternative outcome value for the test population subgroups in each
        prediction percentile decile should be similar. <br />
        In this chart, overlapping lines means the model is well-calibrated for each
        subgroup.{' '}
      </span>
    ),
    biasLabelBiasLoadingDescription: 'Evaluate potential for label bias',
    biasFairnessLabel: 'Evaluate Fairness',
    biasFairnessLoadingLabel:
      'Fairness chart is generating. This may take a few minutes.',
    biasFairnessDescription: (
      <span>
        Healthcare models should be evaluated to ensure fair distribution of services. The
        predicted versus actual outcome should be similar for each subgroup. <br />
        This is measured using Group Benefit Equality (GBE). A GBE of one means the
        predicted versus actual outcomes for each subgroup are equal.
        <br />
        This ensures the proportion of services distributed to each subgroup based on
        predictions aligns with actual needs.
      </span>
    ),
    biasFairnessLoadingDescription: 'Evaluate fairness',
    biasValidationInstructions:
      'Based on your assessment of the analysis above, indicate whether this model has passed or failed your assessment of bias and fairness.',
    biasAndFairnessValidationState: ({ user, time, state }) =>
      state === 'Unvalidated'
        ? 'This model has not been validated.'
        : `This bias and fairness analysis was validated by ${user} at ${time}.`
  }
}
