Source code for energy_balance.scripts.download_data_by_date

#!/usr/bin/env python

__author__ = 'Elle Smith'
__date__ = '23 Jul 2021'
__contact__ = 'eleanor.smith@stfc.ac.uk'


import argparse
import os
import pandas as pd
from datetime import datetime, timedelta
from pycampbellcr1000 import CR1000
from energy_balance import CONFIG
import time


def arg_parse():
    parser = argparse.ArgumentParser()

    parser.add_argument('-s', '--start-date',
                        type=str,
                        required=True,
                        help="The start date to extract data for, in the format YYYY-MM-DD.")

    parser.add_argument('-e', '--end-date',
                        type=str,
                        required=False,
                        help="The end date to extract data for, in the format YYYY-MM-DD.")
                        
    return parser.parse_args()


[docs]def get_data(url, start_date, end_date, dir_path): """ Extract data from the campbell data logger for each specified table and save to a daily csv file between the date ranges specified. Default tables are: Housekeeping, GPS_datetime, SoilTemperature, SoilMoisture, SoilHeatFlux and Radiation :param url: (str) URL for connection with logger in format 'tcp:iphost:port' or 'serial:/dev/ttyUSB0:19200:8N1' :param start_date: (datetime.datetime) The start date from which to collect data :param end_date: (datetime.datetime) The end date after which to stop collecting data. (the end date will be included in the data.) :param dir_path: (str) The path to the top level directory in which to create the csv files and folders. :returns: None """ device = CR1000.from_url(url) # device.list_tables(): # ['Status', 'Housekeeping', 'GPS_datetime', 'SoilTemperature', 'SoilMoisture', 'SoilHeatFlux', 'Radiation', 'DataTableInfo', 'Public'] tables = CONFIG['common']['logger_tables'] while start_date <= end_date: for table in tables: end_of_day = start_date + timedelta(hours=23, minutes=59, seconds=59, microseconds=59) csv_dirs = os.path.join(dir_path, table) csv_name = f"{table}_{start_date.strftime('%Y-%m-%d')}.csv" csv_path = os.path.join(csv_dirs, csv_name) # create csv path if not os.path.exists(csv_dirs): os.makedirs(csv_dirs) # if file doesn't exist - make it if not os.path.isfile(csv_path): open(csv_path, 'w').close() # open the csv file try: df = pd.read_csv(csv_path) except pd.errors.EmptyDataError: get_data_from_range(device, table, csv_path, start_date, end_of_day, header=True) continue if df.empty: get_data_from_range(device, table, csv_path, start_date, end_of_day, header=False) else: # if the file already has data in it, it will just update the existing data from latest entry onwards # get lastest date before updating - add a microsecond on, so no issues with duplication of record latest = datetime.strptime(df['Datetime'].iloc[-1], "%Y-%m-%d %H:%M:%S") + timedelta(microseconds=1) get_data_from_range(device, table, csv_path, latest, end_of_day, header=False) print(f"Completed for {table} for {start_date.strftime('%Y-%m-%d')}") time.sleep(3) start_date = start_date + timedelta(1)
[docs]def get_data_from_range(device, table, csv_path, start, end, header): """ Gets range of data specified by start and end dates and saves to csv at the path specified. :param device: (pycampbellcr1000.CR1000 object) URL for connection with logger in format 'tcp:iphost:port' or 'serial:/dev/ttyUSB0:19200:8N1' :param table: (str) The name of the table on the logger from which the data is being extracted. :param csv_path: (str) The path to the csv file to back fill with todays data. :param start: (datetime.datetime) The start datetime from which to collect data :param end: (datetime.datetime) The end datetime after which to stop collecting data. (end will be included in the data.) :returns: None """ data = device.get_data(table, start, end) content = data.to_csv(header=header) with open(csv_path, "a") as f: f.write(content)
def main(): args = arg_parse() start_date = datetime.strptime(args.start_date, "%Y-%m-%d") # if no end date, make it the same as the start date, then data will be downloaded for one day only if args.end_date: end_date = datetime.strptime(args.end_date, "%Y-%m-%d") complete_stmnt = f'Data downloaded for range {args.start_date} {args.end_date}' else: end_date = start_date complete_stmnt = f'Data downloaded for {args.start_date}' url = CONFIG['common']['logger_url'] dir_path = os.path.expanduser(CONFIG['common']['logger_csv_path']) get_data(url, start_date, end_date, dir_path) print(complete_stmnt) if __name__ == '__main__': main()