Source code for pymepps.utilities.path_encoder

# -*- coding: utf-8 -*-
# """
# Created on 28.09.16
#
# Created for pymepps
#
# @author: Tobias Sebastian Finn, tobias.sebastian.finn@studium.uni-hamburg.de
#
#     Copyright (C) {2016}  {Tobias Sebastian Finn}
#
#     This program is free software: you can redistribute it and/or modify
#     it under the terms of the GNU General Public License as published by
#     the Free Software Foundation, either version 3 of the License, or
#     (at your option) any later version.
#
#     This program is distributed in the hope that it will be useful,
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#     GNU General Public License for more details.
#
#     You should have received a copy of the GNU General Public License
#     along with this program.  If not, see <http://www.gnu.org/licenses/>.
# """
# System modules
import logging
import re
import itertools
import datetime
from collections import Counter

# External modules

# Internal modules

logger = logging.getLogger(__name__)


[docs]class PathEncoder(object):
    def __init__(self, base_path, date=None, undet_numbers=None):
        """
        This template decode paths and can calculate paths, replace date, text
        and undetermined numbers commands.
        Commands:
            ${X}$: Replace with X
                X could be:
                    text(Y): text will be replaced by Y
                        (e.g. text(SP1,SP2,IP1) => [SP1, SP2, SP3])
                    date(Y): date will be replaced by specified date
                        with specific format Y
                        (e.g. date(%Y%m%d_%H) => 20160518_12)
                    unde(Y): unde stands for undetermined numbers and is
                        replaced by specified undet_numbers with a given
                        format Y. For possible format types look [1]
                        (e.g. dete(02d) => [00,01,02,03,04,05])
            @{X}@: Calculate X. It is possible to use numpy commands for the
                calculations. Be careful, the calculations are done with the
                eval() function!
                (e.g. @{1+4+6}@ => 11)
        [1] https://docs.python.org/3/library/string.html#formatstrings

        Parameters
        ----------
        base_path : str
            The path template which should be decoded. The path template is
            composed of the commands showed above.
        date : datetime.datetime, optional
            The path is valid for this date. For numerical weather models,
            this is usually the initialization date. If there is no valid date
            or it isn't needed this could be None. Default is None.
        numbers : list(int/float), optional
            If there are numbers within the path, which are given now given.
            For numerical weather model path, these are usually the model lead
            times. If there are no numbers or aren't needed this could be None.
            Default is None.
        """
        self.base_path = base_path
        self.undet_numbers = undet_numbers
        self.date = date
        self.replace_delimiters = ("${", "}$")
        self.calc_delimiters = ("@{", "}@")
        self.replace_methods = {
            "text": self._text,
            "date": self._date,
            "numb": self._unde}
        logger.debug('Url template: {0:s}\n'
                     'Undetermined numbers: {1:s}\n'
                     'Date: {2:s}'.format(
            str(self.base_path), str(undet_numbers), str(date)))

    def __repr__(self):
        return "{0:s}(template: {1:s})".format(self.__class__.__name__,
                                               self.base_path)

[docs]    def get_file_number(self):
        return len(self.get_encoded())

[docs]    def get_encoded(self):
        """
        Encode the path with given data.

        Returns
        -------
        list of str
            List with encoded paths.
        """
        url_list = self._replace_static(self.base_path)
        for k, url in enumerate(url_list):
            url_list[k] = self._calculation(url)
        logger.debug(str(url_list))
        return url_list

    def _replace_static(self, url_temp):
        start, end = self.replace_delimiters
        escaped = (re.escape(start), re.escape(end))
        regex = re.compile('%s(.*?)%s' % escaped, re.DOTALL)
        replaced_url = []
        splitted_url = regex.split(url_temp)
        logger.debug('Splitted {0:s} to {1:s}'.format(url_temp, str(splitted_url)))
        for i, part in enumerate(splitted_url):
            try:
                part = self.replace_methods[part[:4]](part[5:-1])
            except:
                part = [part, ]
            replaced_url.append(part)
        logger.debug(replaced_url)
        replaced_url = tuple(itertools.product(*replaced_url))
        logger.debug(replaced_url)
        cnt = Counter(splitted_url)
        splitted_url_same = [True if cnt[p]>1 else False for p in splitted_url]
        cleaned_url = []
        for url in replaced_url:
            cnt = Counter(url)
            url_same = [True if cnt[p] > 1 else False for p in url]
            if url_same == splitted_url_same:
                cleaned_url.append(url)
        combined_url = [''.join(u) for u in replaced_url]
        return combined_url

    def _calculation(self, url):
        """
        Methods calculations triggered by the calculate delimiters.
        Attention, this method uses eval() as method to calculate string type
        operations, so be careful!

        Args:
            url:

        Returns:

        """
        start, end = self.calc_delimiters
        escaped = (re.escape(start), re.escape(end))
        regex = re.compile('%s(.*?)%s' % escaped, re.DOTALL)
        replaced_url = []
        for i, part in enumerate(regex.split(url)):
            try:
                part = str(eval(part))
            except:
                part = part
            replaced_url.append(part)
        url = "".join(replaced_url)
        return url

    @staticmethod
    def _text(*args):
        """
        Split the first argument by comma and return it.
        Args:
            *args:

        Returns:
            text (list[str]): The first argument splitted by comma as list with
                strings.
        """
        text = args[0].split(",")
        return text

    def _date(self, date_format):
        """
        Method to evaluate an date regex (!init)
        in the url with given date.

        Args:
            date_format(str): The date format for the initialization regex.
                This date format is the same date format as datetime format.
        """
        if isinstance(self.date, datetime.datetime):
            return [self.date.strftime(date_format), ]
        else:
            logger.error('The date isn\'t set yet, but is called!')


    def _unde(self, fixed_format='03d'):
        """

        Args:
            fixed_format

        Returns:

        """
        if hasattr(self.undet_numbers, '__iter__') and \
                not isinstance(self.undet_numbers, str):
            leads = ['{number:fixed_format}'.format(
                        fixed_format=fixed_format, number=lead)
                     for lead in self.undet_numbers]
            return leads
        else:
            logger.error('The undetermined numbers aren\'t set yet,'
                         'but are called!')