Source code for pymepps.loader.filehandler.netcdfhandler

#!/bin/env python
# -*- coding: utf-8 -*-
# """
# Created on 14.12.16
#
# Created for pymepps
#
# @author: Tobias Sebastian Finn, tobias.sebastian.finn@studium.uni-hamburg.de
#
#     Copyright (C) {2016}  {Tobias Sebastian Finn}
#
#     This program is free software: you can redistribute it and/or modify
#     it under the terms of the GNU General Public License as published by
#     the Free Software Foundation, either version 3 of the License, or
#     (at your option) any later version.
#
#     This program is distributed in the hope that it will be useful,
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#     GNU General Public License for more details.
#
#     You should have received a copy of the GNU General Public License
#     along with this program.  If not, see <http://www.gnu.org/licenses/>.
# """
# System modules
import logging

# External modules
import xarray as xr
import numpy as np

# Internal modules
from .filehandler import FileHandler


logger = logging.getLogger(__name__)


[docs]def cube_to_series(cube, var_name): cleaned_dims = list(cube.dims) if 'index' in cleaned_dims: cleaned_dims.remove('index') elif 'time' in cleaned_dims: cleaned_dims.remove('time') elif 'validtime' in cleaned_dims: cleaned_dims.remove('validtime') if cleaned_dims: stacked = cube.stack(col=cleaned_dims) data = stacked.to_pandas() else: data = cube.to_series() data.name = var_name return data
[docs]class NetCDFHandler(FileHandler): def _get_varnames(self): var_names = list(self.ds.data_vars) return var_names
[docs] def is_type(self): try: self.open() self.close() return True except OSError: return False
[docs] def open(self): if self.ds is None: self.ds = xr.open_dataset(self.file, engine='netcdf4') return self
[docs] def close(self): if self.ds is not None: self.ds.close() self.ds = None
@property def lon_lat(self): attrs = {} try: attrs['latitude'] = float(self.ds.lat.values) except TypeError or KeyError: pass try: attrs['longitude'] = float(self.ds.lon.values) except TypeError or KeyError: pass try: attrs['altitude'] = float(self.ds.zsl.values) except TypeError or KeyError: pass return attrs
[docs] def load_cube(self, var_name): """ Method to load a variable from the netcdf file and return it as xr.DataArray. Parameters ---------- var_name : str The variable name, which should be extracted. Returns ------- variable : xr.DataArray The DataArray of the variable. """ variable = self.ds[var_name] if hasattr(variable, '_FillValue'): variable.values[variable.values == variable._FillValue] = np.nan elif hasattr(variable, 'missing_value'): variable.values[variable.values == variable.missing_value] = np.nan else: variable.values[variable.values==9.96921e+36] = np.nan return variable
[docs] def get_timeseries(self, var_name, **kwargs): """ Method to get the time series from a NetCDF file. This is designed for measurement site data in netcdf format. At the moment this method is only tested for Wettermast Hamburg data! Parameters ---------- var_name : str The variable name, which should be extracted. Returns ------- data : dict with pandas series The selected variable is extracted as dict with pandas series as values. """ cube = self.load_cube(var_name).load() data = cube_to_series(cube, var_name) return data
[docs] def get_messages(self, var_name, **kwargs): """ Method to imitate the message-like behaviour of grib files. Parameters ---------- var_name : str The variable name, which should be extracted. runtime : np.datetime64, optional If the dataset has no runtime this runtime is used. If the runtime is not set, the runtime will be inferred from file name. ensemble : int or str, optional If the dataset has no ensemble information this ensemble is used. If the ensemble is not set, the ensemble will be inferred from file name. sliced_coords : tuple(slice), optional If the cube should be sliced before it is loaded. This is helpful by large opendap requests. These slice will be used from the behind. So (slice(1,2,1), slice(3,5,1)) means [..., 1:2, 3:5]. If it is not set all data is used. T Returns ------- data : list of xr.DataArray The list with the message-wise data as DataArray. The DataArray have six coordinates (analysis, ensemble, time, level, y, x). The shape of DataArray are normally (1,1,1,1,y_size,x_size). """ cube = self.load_cube(var_name) if 'sliced_coords' in kwargs: cube = cube[(...,)+kwargs['sliced_coords']] cube.attrs.update(self.ds.attrs) cube = cube.load() cube = cube.pp.normalize_coords( runtime=self._get_runtime(**kwargs), ensemble=self._get_ensemble(**kwargs), validtime=self._get_validtime(**kwargs) ) return cube