Source code for flood_forecast.preprocessing.interpolate_preprocess

import pandas as pd


[docs]def fix_timezones(csv_path: str) -> pd.DataFrame: """ Basic function to fix intial data bug related to NaN values in non-eastern-time zones due to UTC conversion. """ df = pd.read_csv(csv_path) the_count = df[0:2]['cfs'].isna().sum() return df[the_count:]
[docs]def split_on_na_chunks(df: pd.DataFrame) -> None: pass
[docs]def interpolate_missing_values(df: pd.DataFrame) -> pd.DataFrame: """ Function to fill missing values with nearest value. Should be run only after splitting on the NaN chunks. """ df['cfs1'] = df['cfs'].interpolate(method='nearest').ffill().bfill() df['precip'] = df['p01m'].interpolate(method='nearest').ffill().bfill() df['temp'] = df['tmpf'].interpolate(method='nearest').ffill().bfill() return df