Source code for energy_balance.netcdf.soil_quality_control

__author__ = 'Elle Smith'
__date__ = '09 Aug 2021'
__contact__ = 'eleanor.smith@stfc.ac.uk'

import numpy as np
import pandas as pd
import os
import glob

from .quality_control import QualityControl

from energy_balance import CONFIG

[docs]class SoilQualityControl(QualityControl): soil_moisture_headers = CONFIG['soil']['soil_moisture_headers'] soil_temperature_headers = CONFIG['soil']['soil_temperature_headers'] soil_heat_flux_headers = CONFIG['soil']['soil_heat_flux_headers'] headers = soil_moisture_headers + soil_temperature_headers + soil_heat_flux_headers
[docs] def create_dataframes(self): """ SoilQualityControl specific implementation to create pandas dataframe from input csvs and empty QC dataframe other than column names. Sets self._df and self._qc """ date = self.prepare_date(CONFIG['soil']['input_date_format']) input_file_path = os.path.expanduser(CONFIG['soil']['input_file_path']) soil_moisture_file = CONFIG['soil']['soil_moisture_file'] soil_temperature_file = CONFIG['soil']['soil_temperature_file'] soil_heat_flux_file = CONFIG['soil']['soil_heat_flux_file'] try: df_soilmoisture = pd.concat([pd.read_csv(f) for f in glob.glob(os.path.join(input_file_path, soil_moisture_file.format(date=date)))],ignore_index=True) df_soiltemp= pd.concat([pd.read_csv(f) for f in glob.glob(os.path.join(input_file_path, soil_temperature_file.format(date=date)))],ignore_index=True) df_soilheatflux = pd.concat([pd.read_csv(f) for f in glob.glob(os.path.join(input_file_path, soil_heat_flux_file.format(date=date)))],ignore_index=True) except ValueError: print(f"No files found for {date}, skipping") raise FileNotFoundError # all data needed is selected using column headers self._df = df_soilmoisture[[self.dt_header] + self.soil_moisture_headers].merge(df_soiltemp[[self.dt_header] + self.soil_temperature_headers], on=self.dt_header).merge(df_soilheatflux[[self.dt_header] + self.soil_heat_flux_headers], on=self.dt_header) # np.select(conditions, choices, default='1') self._qc = pd.DataFrame(columns = [h+ '_qc' for h in self.headers])
[docs] def qc_variables(self): """ SoilQualityControl specific implementation to set QC conditions and flags and record in QC dataframe. """ # soil temperature max_temp = CONFIG['soil']['max_expected_temp'] min_temp = CONFIG['soil']['min_expected_temp'] for col in self.soil_temperature_headers: temp_conditions = [np.isnan(self._df[col]), self._df[col] < -35, self._df[col] > 50, self._df[col] < min_temp, self._df[col] > max_temp] temp_choices = [2, 2, 2, 3, 3] self.apply_qc(temp_conditions, temp_choices, col) # soil heat flux, depends on temperature for col_shf, col_temp in zip(self.soil_heat_flux_headers, self.soil_temperature_headers): shf_conditions = [np.isnan(self._df[col_shf]), np.isnan(self._df[col_temp]), self._df[col_temp] < -30, self._df[col_temp] > 70] shf_choices = [2, 2, 2, 2] self.apply_qc(shf_conditions, shf_choices, col_shf) # soil water potential for col in self.soil_moisture_headers: swp_conditions = [np.isnan(self._df[col]), self._df[col] > 80, self._df[col] > 200, self._df[col] < 0] swp_choices = [3, 2, 3, 3] self.apply_qc(swp_conditions, swp_choices, col)