Source code for vdat.command_interpreter.types

"""Define enumerate-like classes that allows to map from keys to key types and
to the functions that needs to be called to deal with any of them.

It uses pkg_resources and entry points to make the framework extendible
"""

import itertools as it
import os
import re

from astropy.io import fits
import numpy as np
import pkg_resources
import pyhetdex.tools.files.file_tools as pyhft
import six

from vdat.command_interpreter import exceptions


[docs]def _load_entrypoints(group): """Get all the entry points for the ``group`` and load them. Parameters ---------- group : string name of the group to load Returns ------- entry_points : dictionary key: name; value: callable loaded from the entry point """ entry_points = {} for ep in pkg_resources.iter_entry_points(group): name = ep.name func = ep.load() entry_points[name] = func return entry_points
[docs]class _Types(object): """Base class for the types. It shouldn't be used directly. If a type ``loop`` exists, it can be accessed as ``instance.loop`` or ``instance['loop']`` Attributes ---------- known_types : list of strings list of types known by the code """ entry_point_group = None def __init__(self): self._map_types = {} # map a type with the function to call self._map_types.update(_load_entrypoints(self.entry_point_group)) def __contains__(self, item): """item in know_types""" return item in self._map_types def __getattr__(self, name): """Gets values from the internal types dictionary as class attributes""" try: return self._map_types[name] except KeyError: msg = "'{}' object has no attribute '{}'" raise AttributeError(msg.format(self.__class__.__name__, name)) def __getitem__(self, name): """Gets values from the internal types dictionary as if the class is a dictionary""" return self._map_types[name]
[docs] def known_types(self): """return the list of known types""" return list(self._map_types.keys())
[docs]class PrimaryTypes(_Types): """Fill the type<-->function mapping using the ``vdat.cit.primary`` entry point. """ entry_point_group = 'vdat.cit.primary'
[docs]class KeywordTypes(_Types): """Fill the type<-->function mapping using the ``vdat.cit.keyword`` entry point. """ entry_point_group = 'vdat.cit.keyword'
[docs]class ExecuteTypes(_Types): """Fill the type<-->function mapping using the ``vdat.cit.execute`` entry point. """ entry_point_group = 'vdat.cit.execute'
[docs]def primary_template(target_dir, key_val): # pragma: no cover """Template for a function that deals with a primary keyword. It collects the files from the ``target_dir`` according to the instructions in ``key_val``, if any and either ``yield`` a value or return an iterable. Parameters ---------- target_dir : string directory in which the files must be collected key_val : dictionary configuration for the key handle Returns ------- yield a string or iterable of strings Raises ------ CIPrimaryError if something goes wrong when handling the primary key """ pass
[docs]def keyword_template(primary, key_val): # pragma: no cover """Template for a function that deals with a non-primary keyword. A keyword has a value either statically stored in ``key_val`` or its value need to be extracted from the value of the primary file(s). Parameters ---------- primary : string the value of one of the items returned by :func:`.primary_template` key_val : dictionary configuration for the key handle Returns ------- string value to associate to the keyword Raises ------ CIKeywordError if something goes wrong when handling the key """ pass
[docs]def execute_template(primary, config): # pragma: no cover """For each of the primary entry, this function is called to decide whether to execute or skip the command. Parameters ---------- primary : string the value of one of the items returned by :func:`.primary_template` config : dictionary configuration for the command Returns ------- bool ``True``: the command is executed; ``False``: the command is skipped """ pass # implementation of the types # primary types
[docs]def primary_plain(target_dir, key_val): """Get all the files in ``target_dir`` matching the string in ``key_val['value']`` Parameters ---------- target_dir : string directory in which the files must be collected key_val : dictionary configuration for the key handle Returns ------- iterator yields file names matching the value recursively """ return pyhft.scan_files(target_dir, matches="*/" + key_val['value'])
[docs]def primary_loop(target_dir, key_val): """Make a nested loop over the set of given keys, in each step of the loop construct the value using python `format string syntax <https://docs.python.org/3/library/string.html#format-string-syntax>`_ and then get all the files matching it. If any of the steps doesn't produce any file, no value is yielded. Parameters ---------- target_dir : string directory in which the files must be collected key_val : dictionary configuration for the key handle Returns ------- yields a string of space separated file names """ # prepare the keys for the loop keys, values = [], [] for k, v in six.iteritems(key_val['keys']): if isinstance(v, six.string_types): start, stop, step = [_to_number(i) for i in v.split(',')] value = np.arange(start, stop, step) else: value = v keys.append(k) values.append(value) # loop the product of values and collect the files for vals in it.product(*values): # convert the values into a dictionary dvals = dict(zip(keys, vals)) match = key_val['value'].format(**dvals) matches = pyhft.scan_files(target_dir, matches="*/" + match) matches = ' '.join(matches) if matches: yield matches
[docs]def primary_groupby(target_dir, key_val): """Loop over all the files matching the ``value`` entry. For each one, create a list of file names replacing the regex in ``pattern`` with the elements of ``replace``. Parameters ---------- target_dir : string directory in which the files must be collected key_val : dictionary configuration for the key handle Returns ------- yields a string of space separated file names """ pattern = re.compile(key_val['match']) for fn in pyhft.scan_files(target_dir, matches="*/" + key_val['value']): files = [fn] for r in key_val['replace']: files.append(pattern.sub(r, fn)) yield ' '.join(files) # secondary types
[docs]def keyword_plain(_, key_val): """Returns the value contained in the keyword Parameters ---------- primary : string ignored key_val : dictionary configuration for the key handle Returns ------- string value to associate to the keyword """ return key_val['value']
[docs]def keyword_header(primary, key_val): """Extract and parse an fits header keyword from the first file. Extract the ``value`` keyword from the header. If ``extract`` is in the configuration, it instruct hos to build a variable out of the extracted header value. Parameters ---------- primary : string primary file name(s) key_val : dictionary configuration for the key handle Returns ------- string value to associate to the keyword """ # get only one file name primary = primary.split()[0] with open(primary, mode='rb') as f: head_key = fits.getval(f, key_val["value"]) if "extract" in key_val: pattern, repl = key_val['extract'][:2] head_key = re.sub(pattern, repl, head_key) return head_key
[docs]def keyword_format(primary, key_val): """Create a new string formatting ``value`` according to the provided ``keys``. The keys are substituted using `format string syntax <https://docs.python.org/3/library/string.html#format-string-syntax>`_. The value of ``keys`` is a map between values to substitute in ``value`` and keyword types used to extract them from the primary file name. Strings are interpreted as of type ``plain``. Parameters ---------- primary : string primary file name(s) key_val : dictionary configuration for the key handle Returns ------- string value to associate to the keyword """ # get only one file name primary = primary.split()[0] # get the keys keys = {} types = KeywordTypes() for k, v in six.iteritems(key_val['keys']): if isinstance(v, six.string_types): v = {'type': 'plain', 'value': v} if v['type'] == 'format': msg = "'format' type is not valid for keywords in a 'format'." raise exceptions.CIKeywordError(msg) try: keys[k] = types[v['type']](primary, v) except KeyError as e: msg = ("The keywords ({}) is unknown." " Edit the command or the configuration to" " sync the keys.".format(", ".join(k))) six.raise_from(exceptions.CIKeywordError(msg), e) return key_val['value'].format(**keys)
[docs]def keyword_regex(primary, key_val): """Extract a string from the primary using regular expression substitution Parameters ---------- primary : string primary file name(s) key_val : dictionary configuration for the key handle Returns ------- string string built from the primary file name """ # get only one file name primary = primary.split()[0] return re.sub(key_val['match'], key_val['replace'], primary) # execute types
[docs]def execute_new_file(primary, config): """For each of the primary entry, it constructs a string using the keyword type defined by ``subtype``. If that string corresponds to something existing in the file system, returns ``False``. Parameters ---------- primary : string the value of one of the items returned by :func:`.primary_template` config : dictionary configuration for the command (not for the type) Returns ------- bool ``True``: if the output of the keyword handling does not exist """ # extract the configuration for the type type_conf = config['execute'] types = KeywordTypes() try: ofile = types[type_conf['subtype']](primary, type_conf) except KeyError as e: msg = ("The keywords ({}) is unknown." " Edit the command or the configuration to" " sync the keys.".format(e.args[0])) six.raise_from(exceptions.CIKeywordError(msg), e) return not os.path.exists(ofile) # helper functions
[docs]def _to_number(string): """Convert the string to a number. It first tries to covert it to an int and then to a float. If it fails gives up. Parameters ---------- string : string string to convert Returns ------- int or float converted value Raises ------ ValueError if the conversion fails """ try: # let's see if it's integer return int(string) except ValueError: pass try: # and now if it's a float return float(string) except ValueError: msg = "The string '{}' cannot be converted to integer or float" raise ValueError(msg.format(string))