Source code for pymepps.loader.datasets.tsdataset

#!/bin/env python
# -*- coding: utf-8 -*-
# """
# Created on 10.12.16
#
# Created for pymepps
#
# @author: Tobias Sebastian Finn, tobias.sebastian.finn@studium.uni-hamburg.de
#
#     Copyright (C) {2016}  {Tobias Sebastian Finn}
#
#     This program is free software: you can redistribute it and/or modify
#     it under the terms of the GNU General Public License as published by
#     the Free Software Foundation, either version 3 of the License, or
#     (at your option) any later version.
#
#     This program is distributed in the hope that it will be useful,
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#     GNU General Public License for more details.
#
#     You should have received a copy of the GNU General Public License
#     along with this program.  If not, see <http://www.gnu.org/licenses/>.
# """
# System modules
import logging

# External modules
import pandas as pd

# Internal modules
import pymepps
from .metdataset import MetDataset


logger = logging.getLogger(__name__)


[docs]class TSDataset(MetDataset):
    """
    TSDataset is a class for a pool of file handlers. Typically a
    time series dataset combines the files of a station, such that it
    is possible to select a variable and get a TSData instance. For
    memory reasons the data of a variable is only loaded if it is selected.

    Parameters
    ----------
    file_handlers : list of childs of FileHandler or None
        The spatial dataset is based on these files. The files should be
        either instances of NetCDFHandler or TextHandler. If file handlers
        is None then the dataset is used for conversion from SpatialData to
        TSData.
    data_origin : optional
        The data origin. This parameter is important to trace the data
        flow. If this is None, there is no data origin and this
        dataset will be the starting point of the data flow. Default is
        None.
    lonlat : tuple(float, float) or None
        The coordinates (longitude, latitude) where the data is valid. If 
        this is None the coordinates will be set based on data_origin or 
        based on the first file handler.

    Methods
    -------
    select
        Method to select a variable.
    """
    def __init__(self, file_handlers, data_origin=None, lonlat=None,
                 processes=1):
        super().__init__(file_handlers, data_origin, processes)
        self.lon_lat = lonlat

    def __str__(self):
        parent_str = super().__str__()
        return '{0:s}\nLonlat: {1}'.format(parent_str, self._get_lon_lat())

    def _get_lon_lat(self):
        if self.data_origin is not None:
            try:
                return self.data_origin.lon_lat()
            except Exception as e:
                logger.debug('Couldn\'t get lon/lat from data origin, due to '
                             '{0:s}'.format(str(e)))
        else:
            try:
                return self.file_handlers[0].lon_lat()
            except Exception as e:
                logger.debug('Couldn\'t get lon/lat from first file handler, '
                             'due to {0:s}'.format(str(e)))
                return None

    def _get_file_data(self, file, var_name, **kwargs):
        file.open()
        try:
            ts_data = file.get_timeseries(var_name, **kwargs)
        finally:
            file.close()
        return ts_data

    def _multi_select_var(self, data, var_name):
        for d in data:
            df = pd.DataFrame(d)
            df.columns = ["{0:s}".format(var_name), ]
            yield df

[docs]    def select_by_pattern(self, pattern, return_list=False, **kwargs):
        return_list = super().select_by_pattern(pattern, return_list=True)
        return self.data_merge(
            [pd.DataFrame(l) for l in return_list], pattern)

[docs]    def data_merge(self, data, var_name):
        if isinstance(data, (list, tuple)):
            merged_data = data[0]
            merged_data = merged_data.pp.update(*data[1:])
        elif isinstance(data, (pd.Series, pd.DataFrame)):
            merged_data = data
        merged_data.name = var_name
        return merged_data