Source code for flood_forecast.evaluator

"""
Author: Isaac Godfried
Description:
    This module contains functions for evaluating models. The basic logic flow is as follows:
    1. `evaluate_model` is called from `trainer.py` at the end of training. It calls `infer_on_torch_model` which does the actual inference. # noqa
    2. `infer_on_torch_model` calls `generate_predictions` which calls `generate_decoded_predictions` or `generate_predictions_non_decoded` depending on whether the model uses a decoder or not.
    3. `generate_decoded_predictions` calls `decoding_functions` which calls `greedy_decode` or `beam_decode` depending on the decoder function specified in the config file.
    4. The returned value from `generate_decoded_predictions` is then used to calculate the evaluation metrics in `run_evaluation`.
    5. `run_evaluation` returns the evaluation metrics to `evaluate_model` which returns them to `trainer.py`.
"""
from datetime import datetime
from typing import Callable, Dict, List, Tuple, Type, Union

import numpy as np
import pandas as pd
import sklearn.metrics
import torch

from flood_forecast.explain_model_output import (
    deep_explain_model_heatmap,
    deep_explain_model_summary_plot,
)
from flood_forecast.model_dict_function import decoding_functions
from flood_forecast.custom.custom_opt import MASELoss, GaussianLoss
from flood_forecast.preprocessing.pytorch_loaders import CSVTestLoader, TemporalTestLoader, SeriesIDTestLoader
from flood_forecast.time_model import TimeSeriesModel
from flood_forecast.utils import flatten_list_function
from flood_forecast.temporal_decoding import decoding_function



[docs]
def stream_baseline(
    river_flow_df: pd.DataFrame, forecast_column: str, hours_forecast=336
) -> Tuple[pd.DataFrame, float]:
    """Function to compute the baseline MSE by using the mean value from the train data."""
    total_length = len(river_flow_df.index)
    train_river_data = river_flow_df[: total_length - hours_forecast]
    test_river_data = river_flow_df[total_length - hours_forecast:]
    mean_value = train_river_data[[forecast_column]].median()[0]
    test_river_data["predicted_baseline"] = mean_value
    mse_baseline = sklearn.metrics.mean_squared_error(
        test_river_data[forecast_column], test_river_data["predicted_baseline"]
    )
    return test_river_data, round(mse_baseline, ndigits=3)




[docs]
def get_model_r2_score(
    river_flow_df: pd.DataFrame,
    model_evaluate_function: Callable,
    forecast_column: str,
    hours_forecast=336,
):
    """model_evaluate_function should call any necessary preprocessing."""
    test_river_data, baseline_mse = stream_baseline(river_flow_df, forecast_column)




[docs]
def get_r2_value(model_mse, baseline_mse):
    return 1 - model_mse / baseline_mse




[docs]
def get_value(the_path: str) -> None:
    df = pd.read_csv(the_path)
    res = stream_baseline(df, "cfs", 336)
    print(get_r2_value(0.120, res[1]))




[docs]
def evaluate_model(
    model: Type[TimeSeriesModel],
    model_type: str,
    target_col: List[str],
    evaluation_metrics: List,
    inference_params: Dict,
    eval_log: Dict,
) -> Tuple[Dict, pd.DataFrame, int, pd.DataFrame]:
    """A function to evaluate a model. Called automatically at end of training. Can be imported for continuing to evaluate
    a model in other places as well.

    .. highlight:: python
    .. code-block:: python

        from flood_forecast.evaluator import evaluate_model
        forecast_model = PyTorchForecast(config_file)
        e_log, df_train_test, f_idx, df_preds = evaluate_model(forecast_model, "PyTorch", ["cfs"], ["MSE", "MAPE"], {})
        print(e_log) # {"MSE":0.2, "MAPE":0.1}
        print(df_train_test) # will print a pandas dataframe
        ...
    '''
    """
    if model_type == "PyTorch":
        (
            df_train_and_test,
            end_tensor,
            forecast_history,
            forecast_start_idx,
            test_data,
            df_predictions,
            # df_prediction_samples_std_dev,
        ) = infer_on_torch_model(model, **inference_params)
        if model.params["dataset_params"]["class"] == "SeriesIDLoader":
            print(end_tensor[0].shape)
            print("forecast_history", forecast_history)
            eval_logs = []
            i = 0
            print(df_train_and_test)
            for end_tenso in end_tensor:
                eval_log = run_evaluation(model, df_train_and_test[i], forecast_history, target_col, end_tenso)
                eval_logs.append(eval_log)
                i += 1
            return eval_logs, df_train_and_test, forecast_start_idx, df_predictions
        g_loss = False
        end_tensor_0 = None
        probablistic = True if "probabilistic" in inference_params else False
        if isinstance(end_tensor, tuple) and not probablistic:
            end_tensor_0 = end_tensor[1]
            end_tensor = end_tensor[0]
            g_loss = True
        if test_data.scale:
            print("Un-transforming data")
            if probablistic:
                print('probabilistic running on infer_on_torch_model')
                end_tensor_mean = test_data.inverse_scale(end_tensor[0].detach().reshape(-1, 1))
                end_tensor_list = flatten_list_function(end_tensor_mean.numpy().tolist())
                end_tensor_mean = end_tensor_mean.squeeze(1)
            else:
                if "n_targets" in model.params:
                    if model.params["model_name"] == "Informer":
                        end_tensor = end_tensor[:, :, 0:model.params["n_targets"]]
                    end_tensor = test_data.inverse_scale(end_tensor.detach())
                else:
                    end_tensor = test_data.inverse_scale(end_tensor.detach().reshape(-1, 1))
                end_tensor_list = flatten_list_function(end_tensor.numpy().tolist())
                end_tensor = end_tensor.squeeze(1)  # Removing extra dim from reshape?
            history_length = model.params["dataset_params"]["forecast_history"]
            if "n_targets" in model.params:
                df_train_and_test.loc[df_train_and_test.index[history_length:],
                                      "preds"] = end_tensor[:, 0].numpy().tolist()
                for i, target in enumerate(target_col):
                    df_train_and_test["pred_" + target] = 0
                    df_train_and_test.loc[df_train_and_test.index[history_length:],
                                          "pred_" + target] = end_tensor[:, i].numpy().tolist()
            else:
                df_train_and_test.loc[df_train_and_test.index[history_length:], "preds"] = end_tensor_list
                df_train_and_test["pred_" + target_col[0]] = 0
                df_train_and_test.loc[df_train_and_test.index[history_length:],
                                      "pred_" + target_col[0]] = end_tensor_list
        print("Current historical dataframe ")
        print(df_train_and_test)
        eval_log = run_evaluation(model, df_train_and_test, forecast_history, target_col, end_tensor, g_loss, eval_log,
                                  end_tensor_0)
    # Explain model behaviour using shap
    if "probabilistic" in inference_params:
        print("Probabilistic explainability currently not supported.")
    elif "n_targets" in model.params:
        print("Multitask forecasting support coming soon")
    elif g_loss:
        print("SHAP not yet supported for these models with multiple outputs")
    else:
        deep_explain_model_summary_plot(
            model, test_data, inference_params["datetime_start"]
        )
        deep_explain_model_heatmap(model, test_data, inference_params["datetime_start"])

    return eval_log, df_train_and_test, forecast_start_idx, df_predictions




[docs]
def run_evaluation(model, df_train_and_test, forecast_history, target_col, end_tensor, g_loss=False, eval_log={},
                   end_tensor_0=None) -> Dict:
    inference_params = model.params["inference_params"]
    for evaluation_metric in model.crit:
        idx = 0
        for target in target_col:
            labels = torch.from_numpy(df_train_and_test[target][forecast_history:].to_numpy())
            if labels.shape[0] == 0:
                print("No labels to evaluate")
                continue
            evaluation_metric_function = evaluation_metric
            if "probabilistic" in inference_params:
                s = evaluation_metric_function(
                    torch.distributions.Normal(end_tensor[0], end_tensor[1][0]),
                    labels,
                )
            elif isinstance(evaluation_metric_function, MASELoss):
                s = evaluation_metric_function(
                    labels,
                    end_tensor,
                    torch.from_numpy(
                        df_train_and_test[target][:forecast_history].to_numpy()
                    )
                )
            elif g_loss:
                g = GaussianLoss(end_tensor.unsqueeze(1), end_tensor_0.unsqueeze(1))
                s = g(labels.unsqueeze(1))

            else:
                if "n_targets" in model.params:
                    s = evaluation_metric_function(
                        labels,
                        end_tensor[:, idx],
                    )
                else:
                    s = evaluation_metric_function(
                        labels,
                        end_tensor,
                    )
            idx += 1
            eval_log[target + "_" + evaluation_metric.__class__.__name__] = s

    return eval_log




[docs]
def infer_on_torch_model(
    model,
    test_csv_path: str = None,
    datetime_start: datetime = datetime(2018, 9, 22, 0),
    hours_to_forecast: int = 336,
    decoder_params=None,
    dataset_params: Dict = {},
    num_prediction_samples: int = None,
    probabilistic: bool = False,
    criterion_params: Dict = None
) -> Tuple[pd.DataFrame, torch.Tensor, int, int, CSVTestLoader, List[pd.DataFrame]]:
    """Function to handle both test evaluation and inference on a test data-frame.

    :param model: The time series model present in the model zoo
    :param test_csv_path: The path to the test data-frame
    :return:
        df: df including training and test data
        end_tensor: the final tensor after the model has finished predictions
        history_length: num rows to use in training
        forecast_start_idx: row index to start forecasting
        test_data: CSVTestLoader instance
        df_prediction_samples: has same index as df, and num cols equal to num_prediction_samples
        or no columns if num_prediction_samples is None
    :rtype: tuple()
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if isinstance(datetime_start, str):
        datetime_start = datetime.strptime(datetime_start, "%Y-%m-%d")
    multi_params = 1
    if "n_targets" in model.params:
        multi_params = model.params["n_targets"]
    print("This model is currently forecasting for: " + str(multi_params) + " targets")
    history_length = model.params["dataset_params"]["forecast_history"]
    forecast_length = model.params["dataset_params"]["forecast_length"]
    sort_column2 = None
    #
    # If the test dataframe is nonbe use default one supplied in params
    if test_csv_path is None:
        csv_test_loader = model.test_data
    elif model.params["dataset_params"]["class"] == "TemporalLoader":
        input_dict = {
            "df_path": test_csv_path,
            "forecast_total": hours_to_forecast,
            "kwargs": dataset_params
        }
        test_idx = None
        if "label_len" in model.params["model_params"]:
            test_idx = model.params["model_params"]["label_len"] - model.params["dataset_params"]["forecast_length"]
        csv_test_loader = TemporalTestLoader(model.params["dataset_params"]["temporal_feats"], input_dict, test_idx)
    elif model.params["dataset_params"]["class"] == "SeriesIDLoader":
        print("forecas thour")
        print(hours_to_forecast)
        print("CSVSeriesIDLoader not yet supported for inference, but is coming very soon.")
        print(dataset_params)
        series_id_col = dataset_params.pop("series_id_col")
        return_method = dataset_params.pop("return_method")
        dataset_params["file_path"] = test_csv_path
        # dataset_params["scaling"] = model.params["dataset_params"]["scaler"]
        # do stufF
        csv_series_id_loader = SeriesIDTestLoader(series_id_col, dataset_params, return_method, hours_to_forecast, True)
        # data is a list of tuples (history, df_train_and_test, forecast_start_idx)
        # returns data, end_tenor_arr, model.params["dataset_params"]["forecast_history"], forecast_start_idx,
        # csv_series_id_loader, []
        vals = handle_evaluation_series_loader(csv_series_id_loader, model, device, hours_to_forecast, datetime_start)
        df_train_and_test_arr = []
        end_tensor_arr = []
        forecast_start_idx_arr = []
        df_prediction_arr_1 = []

        for i in range(0, len(vals[0])):
            df_train_and_test, end_tensor, history_length, forecast_start_idx, csv_test_loader, df_prediction = handle_later_ev(model, vals[0][i][1], vals[1][i], model.params, csv_series_id_loader, multi_params, vals[0][i][2], vals[0][i][0], datetime_start=datetime_start)  # noqa
            df_train_and_test_arr.append(df_train_and_test)
            end_tensor_arr.append(end_tensor)
            forecast_start_idx_arr.append(forecast_start_idx)
            df_prediction_arr_1.append(df_prediction)
        return df_train_and_test_arr, end_tensor_arr, history_length, forecast_start_idx_arr, csv_test_loader, df_prediction_arr_1  # noqa
    else:
        csv_test_loader = CSVTestLoader(
            test_csv_path,
            hours_to_forecast,
            **dataset_params,
            sort_column_clone=sort_column2,
            interpolate=dataset_params["interpolate_param"]
        )
    # TODO move bottom to
    model.model.eval()
    targ = False
    if model.params["dataset_params"]["class"] == "TemporalLoader":
        history, targ, df_train_and_test, forecast_start_idx = csv_test_loader.get_from_start_date(datetime_start)
    else:
        (
            history,
            df_train_and_test,
            forecast_start_idx,
        ) = csv_test_loader.get_from_start_date(datetime_start)
    end_tensor = generate_predictions(
        model,
        df_train_and_test,
        csv_test_loader,
        history,
        device,
        forecast_start_idx,
        forecast_length,
        hours_to_forecast,
        decoder_params,
        multi_params=multi_params,
        targs=targ
    )
    return handle_later_ev(model, df_train_and_test, end_tensor, model.params, csv_test_loader, multi_params,
                           forecast_start_idx, history, datetime_start)




[docs]
def handle_later_ev(model, df_train_and_test, end_tensor, params, csv_test_loader, multi_params, forecast_start_idx,
                    history, datetime_start):
    targ = False
    decoder_params = None
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("These are the params " + str(params))
    if "decoder_params" in params["inference_params"]:
        decoder_params = params["inference_params"]["decoder_params"]
    history_length = params["dataset_params"]["forecast_history"]
    forecast_length = params["dataset_params"]["forecast_length"]
    hours_to_forecast = params["inference_params"]["hours_to_forecast"]
    num_prediction_samples = params["inference_params"].get("num_prediction_samples")
    df_train_and_test["preds"] = 0
    if decoder_params is not None:
        if "probabilistic" in decoder_params:
            df_train_and_test.loc[df_train_and_test.index[history_length:], "preds"] = end_tensor[0].numpy().tolist()
            df_train_and_test["std_dev"] = 0
            print('end_tensor[1][0].numpy().tolist()', end_tensor[1][0].numpy().tolist())
            try:
                df_train_and_test.loc[df_train_and_test.index[history_length:],
                                      "std_dev"] = end_tensor[1][0].numpy().tolist()
            except Exception as e:
                df_train_and_test.loc[df_train_and_test.index[history_length:],
                                      "std_dev"] = [x[0] for x in end_tensor[1][0].numpy().tolist()]
                print(e)
    else:
        df_train_and_test.loc[df_train_and_test.index[history_length:], "preds"] = end_tensor.numpy().tolist()
    df_prediction_arr = []
    df_prediction_samples = pd.DataFrame(index=df_train_and_test.index)
    # df_prediction_samples_std_dev = pd.DataFrame(index=df_train_and_test.index)
    if num_prediction_samples is not None:
        model.model.train()  # sets mode to train so the dropout layers will be touched
        assert num_prediction_samples > 0
        if csv_test_loader.__class__.__name__ == "SeriesIDTestLoader":
            raise NotImplementedError("SeriesIDTestLoader not yet supported for predictions.")
        if model.params["dataset_params"]["class"] == "TemporalLoader":
            history, targ, df_train_and_test, forecast_start_idx = csv_test_loader.get_from_start_date(datetime_start)
        prediction_samples = generate_prediction_samples(
            model,
            df_train_and_test,
            csv_test_loader,
            history,
            device,
            forecast_start_idx,
            forecast_length,
            hours_to_forecast,
            decoder_params,
            num_prediction_samples,
            multi_params=multi_params,
            targs=targ
        )
        df_prediction_samples = pd.DataFrame(
            index=df_train_and_test.index,
            columns=list(range(num_prediction_samples)),
            dtype="float",
        )
        num_samples = model.params["inference_params"].get("num_prediction_samples")
        df_prediction_arr = handle_ci_multi(prediction_samples, csv_test_loader, multi_params,
                                            df_prediction_samples, decoder_params, history_length, num_samples)
    return (
        df_train_and_test,
        end_tensor,
        history_length,
        forecast_start_idx,
        csv_test_loader,
        df_prediction_arr,
        # df_prediction_samples_std_dev
    )




[docs]
def handle_evaluation_series_loader(csv_series_id_loader: SeriesIDTestLoader, model, device,
                                    hours_to_forecast: int, datetime_start) -> Tuple[List[pd.DataFrame], List]:
    data = csv_series_id_loader.get_from_start_date_all(datetime_start)
    end_tenor_arr = []
    for i in range(0, len(data)):
        history, df_train_and_test, forecast_start_idx = data[i]
        print("values below here")
        print(history.shape)
        print(df_train_and_test.columns)
        print(forecast_start_idx)
        end_tensor = generate_predictions(
            model,
            df_train_and_test,
            csv_series_id_loader.csv_test_loaders[i],
            history,
            device,
            forecast_start_idx,
            model.params["dataset_params"]["forecast_length"],
            hours_to_forecast,
            decoder_params=model.params["inference_params"]["decoder_params"],
            multi_params=1
        )
        end_tenor_arr.append(end_tensor)
    return data, end_tenor_arr, model.params["dataset_params"]["forecast_history"], forecast_start_idx, csv_series_id_loader, []  # noqa




[docs]
def handle_ci_multi(prediction_samples: torch.Tensor, csv_test_loader: CSVTestLoader, multi_params: int,
                    df_pred, decoder_param: bool, history_length: int, num_samples: int) -> List[pd.DataFrame]:
    """Handles the CI confidence interval.

    :param prediction_samples: The number of predictions to generate
    :type prediction_samples: torch.Tensor
    :param csv_test_loader: The test loader genreated in the previous
    :type csv_test_loader: CSVTestLoader
    :param multi_params: [description]
    :type multi_params: int
    :param df_pred: The pandas dataframe of the returned prediction
    :type df_pred: [type]
    :param decoder_param: [description]
    :type decoder_param: bool
    :param history_length: The number of historical time-steps
    :type history_length: int
    :param num_samples: The number of samples to generate (i.e. larger ci)
    :type num_samples: int
    :raises ValueError: [description]
    :raises ValueError: [descriptsion]
    :return: Returns an array with different CI predictions
    :rtype: List[pd.DataFrame]
    """
    df_prediction_arr = []
    if decoder_param is not None:
        if "probabilistic" in decoder_param:
            prediction_samples = prediction_samples[0]
        if multi_params == 1:
            predict = csv_test_loader.inverse_scale(prediction_samples).numpy()
            prediction_samples = predict
            df_pred.iloc[history_length:] = prediction_samples
            df_prediction_arr.append(df_pred)
        else:
            print(prediction_samples.shape)
            for i in range(0, num_samples):
                tra = prediction_samples[:, :, 0, i]
                prediction_samples[:, :, 0, i] = csv_test_loader.inverse_scale(tra.transpose(1, 0)).transpose(1, 0)
                if i > 0:
                    if np.equal(tra, prediction_samples[:, :, 0, i - 1]).all():
                        print("WARNING model values are the same. Try varying dropout or other mechanism")
            for i in range(0, multi_params):
                if i > 0:
                    if np.equal(prediction_samples[i, :, 0, :], prediction_samples[i - 1, :, 0, :]).all():
                        raise ValueError("Something is wrong data for the targets is equal")
                df_pred.iloc[history_length:] = prediction_samples[i, :, 0, :]
                df_prediction_arr.append(df_pred.copy())
    else:
        df_pred.iloc[history_length:] = prediction_samples
        df_prediction_arr.append(df_pred)
    if len(df_prediction_arr) < 1:
        raise ValueError("Error length of the prediction array must be one or greater")
    return df_prediction_arr




[docs]
def generate_predictions(
    model: Type[TimeSeriesModel],
    df: pd.DataFrame,
    test_data: CSVTestLoader,
    history: torch.Tensor,
    device: torch.device,
    forecast_start_idx: int,
    forecast_length: int,
    hours_to_forecast: int,
    decoder_params: Dict,
    targs=False,
    multi_params: int = 1
) -> torch.Tensor:
    """A function to generate the actual model prediction.

    :param model: A PyTorchForecast
    :type model: Type[TimeSeriesModel]
    :param df: The main dataframe containing data
    :type df: pd.DataFrame
    :param test_data: The test data loader
    :type test_data: CSVTestLoader
    :param history: The forecast historical data
    :type history: torch.Tensor
    :param device: The device usually cpu or cuda
    :type device: torch.device
    :param forecast_start_idx: The index you want the forecast to begin
    :type forecast_start_idx: int
    :param forecast_length: The length of the forecast the model outputs per forward pass
    :type forecast_length: int
    :param hours_to_forecast: The number of time_steps to forecast in future
    :type hours_to_forecast: int
    :param decoder_params: The parameters the decoder function takes..
    :type decoder_params: Dict
    :param multi_params: n_targets, defaults to 1
    :type multi_params: int, optional
    :return: The forecasted values for the time-series in a tensor
    :rtype: torch.Tensor
    """
    if targs or model.params["dataset_params"]["class"] == "TemporalLoader":
        history_dim = history
    else:
        history_dim = history.unsqueeze(0).to(model.device)
    if decoder_params is None:
        end_tensor = generate_predictions_non_decoded(
            model, df, test_data, history_dim, forecast_length, hours_to_forecast,
        )
    else:
        # model, src, max_seq_len, real_target, output_len=1, unsqueeze_dim=1
        # hours_to_forecast 336
        # greedy_decode(model, src, sequence_size, targ, src, device=device)[:, :, 0]
        # greedy_decode(model, src:torch.Tensor, max_len:int,
        # real_target:torch.Tensor, start_symbol:torch.Tensor
        # unsqueeze_dim=1, device='cpu')
        end_tensor = generate_decoded_predictions(
            model,
            test_data,
            forecast_start_idx,
            device,
            history_dim,
            hours_to_forecast,
            decoder_params,
            multi_targets=multi_params,
            targs=targs
        )
    return end_tensor




[docs]
def generate_predictions_non_decoded(
    model: Type[TimeSeriesModel],
    df: pd.DataFrame,
    test_data: CSVTestLoader,
    history_dim: torch.Tensor,
    forecast_length: int,
    hours_to_forecast: int,
) -> torch.Tensor:
    """Generates predictions for the models that do not use a decoder.

    :param model: A PyTorchForecast
    :type model: Type[TimeSeriesModel]
    :param df: [description]
    :type df: pd.DataFrame
    :param test_data: [description]
    :type test_data: CSVTestLoader
    :param history_dim: [description]
    :type history_dim: torch.Tensor
    :param forecast_length: [description]
    :type forecast_length: int
    :param hours_to_forecast: [description]
    :type hours_to_forecast: int
    :return: [description]
    :rtype: torch.Tensor
    """
    full_history = [history_dim]
    all_tensor = []
    if test_data.use_real_precip:
        precip_cols = test_data.convert_real_batches("precip", df[forecast_length:])
    if test_data.use_real_temp:
        temp_cols = test_data.convert_real_batches("temp", df[forecast_length:])
    for i in range(0, int(np.ceil(hours_to_forecast / forecast_length).item())):
        output = model.model(full_history[i].to(model.device))
        all_tensor.append(output.view(-1))
        if i == int(np.ceil(hours_to_forecast / forecast_length).item()) - 1:
            break
        rel_cols = model.params["dataset_params"]["relevant_cols"]
        if test_data.use_real_precip and test_data.use_real_temp:
            # Order here should match order of original tensor... But what is the best way todo that...?
            # Hmm right now this will create a bug if for some reason the order [precip, temp, output]
            intial_numpy = (
                torch.stack(
                    [
                        output.view(-1).float().to(model.device),
                        precip_cols[i].float().to(model.device),
                        temp_cols[i].float().to(model.device),
                    ]
                )
                .to("cpu")
                .detach()
                .numpy()
            )
            temp_df = pd.DataFrame(intial_numpy.T, columns=rel_cols)
            revised_np = temp_df[rel_cols].to_numpy()
            full_history.append(
                torch.from_numpy(revised_np).to(model.device).unsqueeze(0)
            )
    remainder = forecast_length - hours_to_forecast % forecast_length
    if remainder != forecast_length:
        # Subtract remainder from array
        end_tensor = torch.cat(all_tensor, axis=0).to("cpu").detach()[:-remainder]
    else:
        end_tensor = torch.cat(all_tensor, axis=0).to("cpu").detach()
    return end_tensor




[docs]
def generate_decoded_predictions(
    model: Type[TimeSeriesModel],
    test_data: CSVTestLoader,
    forecast_start_idx: int,
    device: torch.device,
    history_dim: torch.Tensor,
    hours_to_forecast: int,
    decoder_params: Dict,
    multi_targets: int = 1,
    targs: Union[bool, torch.Tensor] = False
) -> torch.Tensor:
    probabilistic = False
    scaler = None
    if test_data.no_scale:
        scaler = test_data
    if decoder_params is not None:
        if "probabilistic" in decoder_params:
            probabilistic = True

        real_target_tensor = (
            torch.from_numpy(test_data.df[forecast_start_idx:].to_numpy())
            .to(device)
            .unsqueeze(0)
            .to(model.device)
        )
        if targs:
            src = history_dim
            src0 = src[0]
            trg = targs
            if "label_len" not in model.params["model_params"]:
                decoder_seq_len = model.params["dataset_params"]["forecast_length"]
            else:
                decoder_seq_len = model.params["model_params"]["label_len"]
            end_tensor = decoding_function(model.model, src0, trg[1], model.params["dataset_params"]["forecast_length"],
                                           src[1], trg[0], 1, decoder_seq_len, hours_to_forecast, device)
        else:
            end_tensor = decoding_functions[decoder_params["decoder_function"]](
                model.model,
                history_dim,
                hours_to_forecast,
                real_target_tensor,
                decoder_params["unsqueeze_dim"],
                output_len=model.params["dataset_params"]["forecast_length"],
                multi_targets=multi_targets,
                device=model.device,
                probabilistic=probabilistic,
                scaler=scaler
            )
        if probabilistic:
            end_tensor_mean = end_tensor[0][:, :, 0].view(-1).to("cpu").detach()
            return end_tensor_mean, end_tensor[1]
        elif isinstance(end_tensor, tuple):
            e = end_tensor[0][:, :, 0].view(-1).to("cpu").detach(), end_tensor[1][:, :, 0].view(-1).to("cpu").detach()
            return e
        if multi_targets == 1:
            end_tensor = end_tensor[:, :, 0].view(-1)
    return end_tensor.to("cpu").detach()




[docs]
def generate_prediction_samples(
    model: Type[TimeSeriesModel],
    df: pd.DataFrame,
    test_data: CSVTestLoader,
    history: torch.Tensor,
    device: torch.device,
    forecast_start_idx: int,
    forecast_length: int,
    hours_to_forecast: int,
    decoder_params: Dict,
    num_prediction_samples: int,
    multi_params=1,
    targs=False
) -> np.ndarray:
    """Generates."""
    pred_samples = []
    std_dev_samples = []
    probabilistic = False
    if decoder_params is not None:
        if "probabilistic" in decoder_params:
            probabilistic = True

    for _ in range(num_prediction_samples):
        end_tensor = generate_predictions(
            model,
            df,
            test_data,
            history,
            device,
            forecast_start_idx,
            forecast_length,
            hours_to_forecast,
            decoder_params,
            multi_params=multi_params,
            targs=targs
        )

        if probabilistic:
            pred_samples.append(end_tensor[0].numpy())
            std_dev_samples.append(end_tensor[1].numpy())
        else:
            pred_samples.append(end_tensor.numpy())
    if probabilistic:
        return np.array(pred_samples).T, np.array(std_dev_samples).T
    else:
        return np.array(pred_samples).T  # each column is 1 array of predictions