Source code for pymepps.loader.datasets.spatialdataset

#!/bin/env python
# -*- coding: utf-8 -*-
# """
# Created on 10.12.16
#
# Created for pymepps
#
# @author: Tobias Sebastian Finn, tobias.sebastian.finn@studium.uni-hamburg.de
#
#     Copyright (C) {2016}  {Tobias Sebastian Finn}
#
#     This program is free software: you can redistribute it and/or modify
#     it under the terms of the GNU General Public License as published by
#     the Free Software Foundation, either version 3 of the License, or
#     (at your option) any later version.
#
#     This program is distributed in the hope that it will be useful,
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#     GNU General Public License for more details.
#
#     You should have received a copy of the GNU General Public License
#     along with this program.  If not, see <http://www.gn
# """

# System modules
import logging
import getpass
import datetime as dt

# Internal modules
import pymepps
from pymepps.grid import GridBuilder
import pymepps.utilities.cdo_funcs as cdo
from .metdataset import MetDataset


logger = logging.getLogger(__name__)


[docs]class SpatialDataset(MetDataset): """ SpatialDataset is a class for a pool of file handlers. Typically a spatial dataset combines the files of one model run, such that it is possible to select a variable and get a SpatialData instance. For memory reasons the data of a variable is only loaded if it is selected. Parameters ---------- file_handlers : list of childs of FileHandler or None The spatial dataset is based on these files. The files should be either instances of GribHandler or NetCDFHandler. If file handlers is None then the dataset is used for conversion from TSData to SpatialData. grid : str or Grid or None The grid describes the horizontal grid of the spatial data. The grid will be appended to every created SpatialData instance. If a str is given it will be checked if the str is a path to a cdo-conform grid file or a cdo-conform grid string. If this is a instance of a child of Grid it is assumed that the grid is already initialized and this grid will be used. If this is None the Grid will be automatically read from the first file handler. Default is None. data_origin : optional The data origin. This parameter is important to trace the data flow. If this is None, there is no data origin and this dataset will be the starting point of the data flow. Default is None. processes : int, optional This number of processes is used to calculate time-consuming functions. For time-consuming functions a progress bar is shown. If the number of processes is one the functions will be processed sequential. For more processes than one the multiprocessing module will be used. Default is 1. Methods ------- select Method to select a variable. selnearest Method to select the nearest grid point for given coordinates. sellonlatbox Method to slice a box with the given coordinates. """ def __init__(self, file_handlers, grid=None, data_origin=None, processes=1): super().__init__(file_handlers, data_origin, processes) self.grid = grid
[docs] def get_grid(self, var_name, data_array=None): """ Method to get for given variable name a Grid instance. If the grid attribute is already a Grid instance this grid will be returned. If the grid attribute is a str instance, the str will be read from file or from the given grid str. If the grid attribute isn't set the grid instance will be the grid for the variable selected with the first corresponding file handler and cdo. Parameters ---------- var_name: str The variable name, which should be used to generate the grid. data_array: xarray.DataArray or None, optional If the data array is given the method will try to load the grid from the data array's attributes. If None the DataArray method will be skipped. Default is None. Returns ------- grid: Instance of child of grid or None The returned grid. If the returned grid is None, the grid could not be read. """ grid = self._get_grid_from_dataarray(data_array) if grid is None: if isinstance(self.grid, str): grid = self._get_grid_from_str(self.grid) elif hasattr(self.grid, 'get_coords'): grid = self.grid if grid is None: grid = self._get_grid_from_cdo(var_name) return grid
@staticmethod def _get_grid_from_dataarray(data_array): try: grid_attrs = {attr[7:]: data_array.attrs[attr] for attr in data_array.attrs if attr[:7] == 'ppgrid_'} grid_builder = GridBuilder(grid_attrs) grid = grid_builder.build_grid() logger.info('Got the grid from the data array') except (KeyError, ValueError, AttributeError): grid = None return grid def _get_grid_from_cdo(self, var_name): grid = None file = self.variables[var_name][0].file try: grid_str = cdo.griddes( input='-selvar,{0:s} {1:s}'.format(var_name, file)) grid = self._get_grid_from_str(grid_str) except AttributeError: logger.warning('To load the grid description with the cdos you ' 'need to install the cdos!') return grid def _get_grid_from_str(self, grid_str): try: gf = open(grid_str, 'r') read_str = gf.read() gf.close() except (IOError, TypeError): read_str = grid_str try: grid_builder = GridBuilder(read_str) grid = grid_builder.build_grid() except (KeyError, ValueError): grid = None return grid def _get_file_data(self, file, var_name, **kwargs): file.open() try: data = file.get_messages(var_name, **kwargs) finally: file.close() return data def _multi_select_var(self, data, var_name): for d in data: add_coordinate = d.expand_dims('variable') add_coordinate = add_coordinate.assign_coords( variable=[var_name, ]) yield add_coordinate
[docs] def data_merge(self, data, var_name): """ Method to merge instances of xarray.DataArray into a single xarray.DataArray. Also the grid is read and set to the xarray.DataArray. Parameters ---------- data : list of xarray.DataArray The data list. var_name : str The name of the variable which is selected within the data list. Returns ------- merged_array : xarray.DataArray The merged DataArray with the grid coordinates and the extracted grid. If the grid could not extracted the grid is None and a DataArray without set grid is returned. """ grid = self.get_grid(var_name, data[0]) merged_array = data[0] merged_array.pp.grid = grid #merged_array = merged_array.pp.set_grid(grid) if len(data) > 1: logger.debug('Number of data items: {0:d}'.format(len(data))) merged_array = merged_array.pp.update(*data[1:]) loaded_attrs = {attr: merged_array.attrs[attr] for attr in merged_array.attrs if not attr.startswith('ppgrid_')} loaded_attrs['name'] = merged_array._name = var_name merged_array.attrs = loaded_attrs merged_array = merged_array.pp.set_grid(grid) # try: # merged_array = merged_array.pp.set_grid(grid) # except ValueError: # pass return merged_array