Source code for flood_forecast.preprocessing.interpolate_preprocess

import pandas as pd
from typing import List


[docs] def fix_timezones(df: pd.DataFrame) -> pd.DataFrame: """ Basic function to fix initil data bug related to NaN values in non-eastern-time zones due to UTC conversion. """ the_count = df[0:2]['cfs'].isna().sum() return df[the_count:]
[docs] def interpolate_missing_values(df: pd.DataFrame) -> pd.DataFrame: """ Function to fill missing values with nearest value. Should be run only after splitting on the NaN chunks. """ df = fix_timezones(df) df['cfs1'] = df['cfs'].interpolate(method='nearest').ffill().bfill() df['precip'] = df['p01m'].interpolate(method='nearest').ffill().bfill() df['temp'] = df['tmpf'].interpolate(method='nearest').ffill().bfill() return df
[docs] def forward_back_generic(df: pd.DataFrame, relevant_columns: List) -> pd.DataFrame: """ Function to fill missing values with nearest value (forward first) """ for col in relevant_columns: df[col] = df[col].interpolate(method='nearest').ffill().bfill() return df
[docs] def back_forward_generic(df: pd.DataFrame, relevant_columns: List[str]) -> pd.DataFrame: """ Function to fill missing values with nearest values (backward first) """ for col in relevant_columns: df[col] = df[col].interpolate(method='nearest').bfill().ffill() return df