Source code for pymepps.accessor.pandas

#!/bin/env python
# -*- coding: utf-8 -*-
#
#Created on 07.07.17
#
#Created for pymepps
#
#@author: Tobias Sebastian Finn, tobias.sebastian.finn@studium.uni-hamburg.de
#
#    Copyright (C) {2017}  {Tobias Sebastian Finn}
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

# System modules
import logging
import abc

# External modules
import json
import pandas as pd

# Internal modules
from .base import MetData
from .utilities import register_dataframe_accessor, register_series_accessor


logger = logging.getLogger(__name__)


[docs]@register_series_accessor('pp')
@register_dataframe_accessor('pp')
class PandasAccessor(MetData):
    """
    An accessor to extend the pandas data structure. This could be used more
    actively in the  future to add more post-processing specific features to
    pandas.
    """
    def __init__(self, data):
        super().__init__(data)
        self.lonlat = None

[docs]    def update(self, *items):
        """
        Update the data.
        """
        update_data = [self.data.copy(), ]
        for item in items:
            if isinstance(item, (pd.Series, pd.DataFrame)):
                update_data.append(item)
            else:
                raise TypeError(
                    'The given item {0} need to be in a pandas conform data '
                    'type!'.format(item))
        concatenated_data = pd.concat(update_data, axis=1)
        dup_cols = concatenated_data.columns.duplicated(keep='last')
        columned_data = concatenated_data.loc[:, ~dup_cols].sort_index(axis=1)
        for name, val in concatenated_data.loc[:, dup_cols][::-1].iteritems():
            columned_data[name] = columned_data[name].fillna(val)
        columned_data = columned_data.squeeze()
        dup_rows = columned_data.index.duplicated(keep='last')
        updated_array = columned_data.loc[~dup_rows].sort_index(axis=0)
        for ind, val in concatenated_data.loc[dup_rows][::-1].T.iteritems():
            updated_array.loc[ind] = updated_array.loc[ind].fillna(val)
        updated_array.pp.lonlat = self.lonlat
        return updated_array

[docs]    def save(self, save_path):
        """
        The data is saved as json file. The pandas to_json method is used to
        generate convert the data to json. If lonlat was given it will be saved
        under a lonlat key. Json is used instead of HDF5 due to possible
        corruption problems.

        Parameters
        ----------
        save_path: str
            Path where the json file should be saved.
        """
        save_dict = dict(data=self.data.to_json(orient='split',
                                                date_format='iso'),
                         lonlat=self.lonlat)
        with open(save_path, mode='w+') as fp:
            json.dump(save_dict, fp)

[docs]    @staticmethod
    def load(load_path):
        """
        Load the given json file and return a TSData instance with the loaded
        file. The loader uses tries to locate the lonlat and the data keys
        within the json file. If there are not these keys the loader tries to
        load the whole json file into pandas.

        Parameters
        ----------
        load_path: str
            Path to the json file which should be loaded. It is recommended to
            load only previously saved TSData instances.

        Returns
        -------
        load_data: pandas object
            The loaded pandas object.
        """
        if isinstance(load_path, str):
            fp = open(load_path, mode='r')
        elif getattr(load_path, 'read'):
            fp = load_path
        else:
            raise TypeError('Path needs to be either a string '
                            'or an opened file!')
        json_str = fp.read()
        if not isinstance(json_str, str):
            json_str = json_str.decode()
        saved_json_instance = json.loads(json_str)
        fp.close()
        if 'lonlat' in list(saved_json_instance.keys()) and \
                saved_json_instance['lonlat'] is not None:
            lonlat = tuple(saved_json_instance['lonlat'])
        else:
            lonlat = None
        if 'data' in list(saved_json_instance.keys()):
            pd_data_json = saved_json_instance['data']

        else:
            pd_data_json = saved_json_instance
        try:
            load_data = pd.read_json(pd_data_json, orient='split',
                                     typ='frame')
        except ValueError:
            load_data = pd.read_json(pd_data_json, orient='split',
                                     typ='series')
        if isinstance(load_data.index, pd.DatetimeIndex):
            load_data.index = load_data.index.tz_localize('UTC')
        load_data.pp.lonlat = lonlat
        return load_data