Source code for flood_forecast.preprocessing.preprocess_da_rnn

import numpy as np
import pandas as pd
from typing import List
from flood_forecast.da_rnn.custom_types import TrainData



[docs]
def format_data(dat, targ_column: List[str]) -> TrainData:
    # Test numpy conversion
    proc_dat = dat.to_numpy()
    mask = np.ones(proc_dat.shape[1], dtype=bool)
    dat_cols = list(dat.columns)
    for col_name in targ_column:
        mask[dat_cols.index(col_name)] = False
    feats = proc_dat[:, mask].astype(float)
    targs = proc_dat[:, ~mask].astype(float)
    return TrainData(feats, targs)




[docs]
def make_data(
    csv_path: str,
    target_col: List[str],
    test_length: int,
    relevant_cols=[
        "cfs",
        "temp",
        "precip"]) -> TrainData:
    """Returns full preprocessed data.

    Does not split train/test that must be done later.
    """
    final_df = pd.read_csv(csv_path)
    print(final_df.shape[0])
    if len(target_col) > 1:
        # Restrict target columns to height and cfs. Alternatively could replace this with loop
        height_df = final_df[[target_col[0], target_col[1], 'precip', 'temp']]
        height_df.columns = [target_col[0], target_col[1], 'precip', 'temp']
    else:
        height_df = final_df[[target_col[0]] + relevant_cols]
    preprocessed_data2 = format_data(height_df, target_col)
    return preprocessed_data2