Source code for jwst.associations.lib.constraint

"""Constraints
"""
import abc
import collections
from copy import deepcopy
from itertools import chain
import logging
import re
import typing

from .process_list import ListCategory, ProcessList
from .utilities import (
    evaluate,
    getattr_from_list,
    is_iterable
)
from ..pool import PoolRow

__all__ = [
    'AttrConstraint',
    'Constraint',
    'ConstraintTrue',
    'SimpleConstraint',
]

# Configure logging
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())


class SimpleConstraintABC(abc.ABC):
    """Simple Constraint ABC

    Parameters
    ----------
    init : dict
        dict where the key:value pairs define
        the following parameters

    value : object or None
        Value that must be matched.

    name : str or None
        Option name for constraint

    **kwargs : key:value pairs
        Other initialization parameters

    Attributes
    ----------
    matched : bool
        Last call to `check_and_set`
    """

    # Attributes to show in the string representation.
    _str_attrs = ('name', 'value')

    def __init__(self, init=None, value=None, name=None, **kwargs):

        # Defined attributes
        self.value = value
        self.name = name
        self.matched = False

        if init is not None:
            self.__dict__.update(init)
        else:
            self.__dict__.update(kwargs)

    @abc.abstractmethod
    def check_and_set(self, item):
        """Check and set the constraint

        Returns
        -------
        success, reprocess : bool, [~jwst.associations.ProcessList[,...]]
            Returns 2-tuple of

                - True if check is successful.
                - List of `~jwst.associations.ProcessList`.
        """
        self.matched = True
        return self.matched, []

    @property
    def dup_names(self): #  -> dict[str, list[typing.Union[SimpleConstraint, Constraint]]]
        """Return dictionary of constraints with duplicate names

        This method is meant to be overridden by classes
        that need to traverse a list of constraints.

        Returns
        -------
        dups : {str: [constraint[,...]][,...]}
            Returns a mapping between the duplicated name
            and all the constraints that define that name.
        """
        return {}

    @property
    def id(self):
        """Return identifyer for the constraint

        Returns
        -------
        id : str
            The identifyer
        """
        return f'{self.__class__.__name__}:{self.name}'

    def copy(self):
        """Copy ourselves"""
        return deepcopy(self)

    def get_all_attr(self, attribute: str): # -> list[tuple[SimpleConstraint, typing.Any]]:
        """Return the specified attribute

        This method is meant to be overridden by classes
        that need to traverse a list of constraints.

        Parameters
        ----------
        attribute : str
            The attribute to retrieve

        Returns
        -------
        [(self, value)] : [(SimpleConstraint, object)]
            The value of the attribute in a tuple. If there is no attribute,
            an empty tuple is returned.
        """
        value = getattr(self, attribute)
        if value is not None:
            return [(self, value)]
        return []

    # Make iterable to work with `Constraint`.
    # Since this is a leaf, simple return ourselves.
    def __iter__(self):
        yield self

    def __repr__(self):
        result = '{}({})'.format(
            self.__class__.__name__,
            str(self.__dict__)
        )
        return result

    def __str__(self):
        result = '{}({})'.format(
            self.__class__.__name__,
            {
                str_attr: getattr(self, str_attr)
                for str_attr in self._str_attrs
            }
        )
        return result


[docs] class ConstraintTrue(SimpleConstraintABC): """Always return True"""
[docs] def check_and_set(self, item): return super(ConstraintTrue, self).check_and_set(item)
[docs] class SimpleConstraint(SimpleConstraintABC): """A basic constraint Parameters ---------- init : dict dict where the key:value pairs define the following parameters value : object or None Value that must be matched. If None, any retrieved value will match. sources : func(item) or None Function taking `item` as argument used to retrieve a value to check against. If None, the item itself is used as the value. force_unique : bool If the constraint is satisfied, reset `value` to the value of the source. test : function The test function for the constraint. Takes two arguments: - constraint - object to compare against. Returns a boolean. Default is `SimpleConstraint.eq` name : str or None Option name for constraint reprocess_on_match : bool Reprocess the item if the constraint is satisfied. reprocess_on_fail : bool Reprocess the item if the constraint is not satisfied. work_over : ListCategory.[BOTH, EXISTING, RULES] The condition on which this constraint should operate. reprocess_rules : [rule[,..]] or None List of rules to be applied to. If None, calling function will determine the ruleset. If empty, [], all rules will be used. Attributes ---------- All `Parameters` are also `Attributes` Examples -------- Create a constraint where the attribute `attr` of an object matches the value `my_value`: >>> c = SimpleConstraint(value='my_value') >>> print(c) SimpleConstraint({'name': None, 'value': 'my_value'}) To check a constraint, call `check_and_set`. A successful match will return a tuple of `True` and a reprocess list. >>> item = 'my_value' >>> c.check_and_set(item) (True, []) If it doesn't match, `False` will be returned. >>> bad_item = 'not_my_value' >>> c.check_and_set(bad_item) (False, []) A `SimpleConstraint` can also be initialized by a `dict` of the relevant parameters: >>> init = {'value': 'my_value'} >>> c = SimpleConstraint(init) >>> print(c) SimpleConstraint({'name': None, 'value': 'my_value'}) If the value to check is `None`, the `SimpleConstraint` will successfully match whatever object given. However, a new `SimpleConstraint` will be returned where the `value` is now set to whatever the attribute was of the object. >>> c = SimpleConstraint(value=None) >>> matched, reprocess = c.check_and_set(item) >>> print(c) SimpleConstraint({'name': None, 'value': 'my_value'}) This behavior can be overridden by the `force_unique` parameter: >>> c = SimpleConstraint(value=None, force_unique=False) >>> matched, reprocess = c.check_and_set(item) >>> print(c) SimpleConstraint({'name': None, 'value': None}) """ def __init__( self, init=None, sources=None, force_unique=True, test=None, reprocess_on_match=False, reprocess_on_fail=False, work_over=ListCategory.BOTH, reprocess_rules=None, **kwargs ): # Defined attributes self.sources = sources self.force_unique = force_unique self.test = test self.reprocess_on_match = reprocess_on_match self.reprocess_on_fail = reprocess_on_fail self.work_over = work_over self.reprocess_rules = reprocess_rules super(SimpleConstraint, self).__init__(init=init, **kwargs) # Give defaults some real meaning. if self.sources is None: self.sources = lambda item: item if test is None: self.test = self.eq
[docs] def check_and_set(self, item): """Check and set the constraint Returns ------- success, reprocess : bool, [~jwst.associations.ProcessList[,...]] Returns 2-tuple of - True if check is successful. - List of `~jwst.associations.ProcessList`. """ source_value = self.sources(item) satisfied = True if self.value is not None: satisfied = self.test(self.value, source_value) self.matched = satisfied if self.matched: if self.force_unique: self.value = source_value # Determine reprocessing reprocess = [] if ((self.matched and self.reprocess_on_match) or (not self.matched and self.reprocess_on_fail)): reprocess.append(ProcessList( items=[item], work_over=self.work_over, rules=self.reprocess_rules, trigger_constraints=[self.id] )) return self.matched, reprocess
[docs] def eq(self, value1, value2): """True if constraint.value and item are equal.""" return value1 == value2
[docs] class AttrConstraint(SimpleConstraintABC): """Test attribute of an item Parameters ---------- sources : [str[,...]] List of attributes to query value : str, function or None The value to check for. If None and `force_unique`, any value in the first available source will become the value. If function, the function takes no arguments and returns a string. evaluate : bool Evaluate the item's value before checking condition. force_reprocess : ListCategory.state or False Add item back onto the reprocess list using the specified `~jwst.associations.ProcessList` work over state. force_unique : bool If the initial value is `None` or a list of possible values, the constraint will be modified to be the value first matched. invalid_values : [str[,...]] List of values that are invalid in an item. Will cause a non-match. name : str or None Name of the constraint. only_on_match : bool If `force_reprocess`, only do the reprocess if the entire constraint is satisfied. onlyif : function Boolean function that takes `item` as argument. If True, the rest of the condition is checked. Otherwise return as a matched condition required : bool One of the sources must exist. Otherwise, return as a matched constraint. Attributes ---------- found_values : set(str[,...]) Set of actual found values for this condition. matched : bool Last result of `check_and_set` """ # Attributes to show in the string representation. _str_attrs = ('name', 'sources', 'value') def __init__(self, init=None, sources=None, evaluate=False, force_reprocess=False, force_undefined=False, force_unique=True, invalid_values=None, only_on_match=False, onlyif=None, required=True, **kwargs): # Attributes self.sources = sources self.evaluate = evaluate self.force_reprocess = force_reprocess self.force_undefined = force_undefined self.force_unique = force_unique self.invalid_values = invalid_values self.only_on_match = only_on_match self.onlyif = onlyif self.required = required super(AttrConstraint, self).__init__(init=init, **kwargs) # Give some defaults real meaning. if invalid_values is None: self.invalid_values = [] if onlyif is None: self.onlyif = lambda item: True # Haven't actually matched anything yet. self.found_values = set() self.matched = False
[docs] def check_and_set(self, item): """Check and set constraints based on item Parameters ---------- item : dict The item to check on. Returns ------- success, reprocess : bool, [~jwst.associations.ProcessList[,...]] Returns 2-tuple of - True if check is successful. - List of `~jwst.associations.ProcessList`. """ reprocess = [] # Only perform check on specified `onlyif` condition if not self.onlyif(item): if self.force_reprocess: reprocess.append( ProcessList( items=[item], work_over=self.force_reprocess, only_on_match=self.only_on_match, trigger_constraints=[self.id] ) ) self.matched = True return self.matched, reprocess # Get the condition information. try: source, value = getattr_from_list( item, self.sources, invalid_values=self.invalid_values ) except KeyError: if self.required and not self.force_undefined: self.matched = False return self.matched, reprocess else: self.matched = True return self.matched, reprocess else: if self.force_undefined: self.matched = False return self.matched, reprocess evaled = value if self.evaluate: evaled = evaluate(value) # If the constraint has no value to check against, and given # value evaluates to a list, the item must be duplicated, # with each value from its list, and all the new items reprocessed. # Otherwise, the value is the value to set the constraint by. if self.value is None: if is_iterable(evaled): reprocess.append(reprocess_multivalue(item, source, evaled, self)) self.matched = False return self.matched, reprocess value = str(evaled) # Else, the constraint does have a value. Check against it. else: if callable(self.value): match_value = self.value() else: match_value = self.value if not is_iterable(evaled): evaled = [evaled] for evaled_item in evaled: value = str(evaled_item) if meets_conditions(value, match_value): break else: # The condition is not matched, leave now. self.matched = False return self.matched, reprocess # A match was found. If there is a list of potential values, # set them up for reprocessing. next_evaleds = [next_evaled for next_evaled in evaled if next_evaled != evaled_item] if next_evaleds: reprocess.append(reprocess_multivalue(item, source, next_evaleds, self)) # At this point, the constraint has passed. # Fix the conditions. escaped_value = re.escape(value) self.found_values.add(escaped_value) if self.force_unique: self.value = escaped_value self.sources = [source] self.force_unique = False # If required to reprocess, add to the reprocess list. if self.force_reprocess: reprocess.append( ProcessList( items=[item], work_over=self.force_reprocess, only_on_match=self.only_on_match, trigger_constraints=[self.id] ) ) # That's all folks self.matched = True return self.matched, reprocess
[docs] class Constraint: """Constraint that is made up of SimpleConstraints Parameters ---------- init : object or [object[,...]] A single object or list of objects where the objects are as follows. - SimpleConstraint or subclass - Constraint reduce : function A reduction function with signature `x(iterable)` where `iterable` is the `components` list. Returns boolean indicating state of the components. Default value is `Constraint.all` name : str or None Optional name for constraint. reprocess_on_match : bool Reprocess the item if the constraint is satisfied. reprocess_on_fail : bool Reprocess the item if the constraint is not satisfied. work_over : ListCategory.[BOTH, EXISTING, RULES] The condition on which this constraint should operate. reprocess_rules : [rule[,..]] or None List of rules to be applied to. If None, calling function will determine the ruleset. If empty, [], all rules will be used. Attributes ---------- constraints : [Constraint[,...]] List of `Constraint` or `SimpleConstraint` that make this constraint. matched : bool Result of the last `check_and_set` reduce : function A reduction function with signature `x(iterable)` where `iterable` is the `components` list. Returns boolean indicating state of the components. Predefined functions are: - `all`: True if all components return True - `any`: True if any component returns True Notes ----- Named constraints can be accessed directly through indexing: >>> c = Constraint(SimpleConstraint(name='simple', value='a_value')) >>> c['simple'] # doctest: +SKIP SimpleConstraint({'sources': <function SimpleConstraint.__init__.<locals>.<lambda> at 0x7f8be05f5730>, 'force_unique': True, 'test': <bound method SimpleConstraint.eq of SimpleConstraint({...})>, 'reprocess_on_match': False, 'reprocess_on_fail': False, 'work_over': 1, 'reprocess_rules': None, 'value': 'a_value', 'name': 'simple', 'matched': False}) """ def __init__( self, init=None, reduce=None, name=None, reprocess_on_match=False, reprocess_on_fail=False, work_over=ListCategory.BOTH, reprocess_rules=None ): self.constraints = [] # Initialize from named parameters self.reduce = reduce self.name = name self.reprocess_on_match = reprocess_on_match self.reprocess_on_fail = reprocess_on_fail self.work_over = work_over self.reprocess_rules = reprocess_rules # Initialize from a structure. if init is None: pass elif isinstance(init, list): self.constraints = init elif isinstance(init, Constraint): self.reduce = init.reduce self.name = init.name self.reprocess_on_match = init.reprocess_on_match self.reprocess_on_fail = init.reprocess_on_fail self.work_over = init.work_over self.reprocess_rules = init.reprocess_rules self.constraints = deepcopy(init.constraints) elif isinstance(init, SimpleConstraintABC): self.constraints = [init] else: raise TypeError( 'Invalid initialization value type {}.' '\nValid types are `SimpleConstraint`, `Constraint`,' '\nor subclass.'.format(type(init)) ) # Give some defaults real meaning. self.matched = False if self.reduce is None: self.reduce = self.all @property def dup_names(self): # -> dict[str, list[typing.Union[SimpleConstraint, Constraint]]]: """Return dictionary of constraints with duplicate names This method is meant to be overridden by classes that need to traverse a list of constraints. Returns ------- dups : {str: [constraint[,...]][,...]} Returns a mapping between the duplicated name and all the constraints that define that name. """ attrs = self.get_all_attr('name') constraints, names = zip(*attrs) dups = [name for name, count in collections.Counter(names).items() if count > 1] result = collections.defaultdict(list) for name, constraint in zip(names, constraints): if name in dups: result[name].append(constraint) # Turn off the defaultdict factory. result.default_factory = None return result @property def id(self): """Return identifyer for the constraint Returns ------- id : str The identifyer """ return f'{self.__class__.__name__}:{self.name}'
[docs] def append(self, constraint): """Append a new constraint""" self.constraints.append(constraint)
[docs] def check_and_set(self, item, work_over=ListCategory.BOTH): """Check and set the constraint Returns ------- success, reprocess : bool, [~jwst.associations.ProcessList[,...]] Returns 2-tuple of - success : True if check is successful. - List of `~jwst.associations.ProcessList`. """ if work_over not in (self.work_over, ListCategory.BOTH): return False, [] # Do we have positive? self.matched, reprocess = self.reduce(item, self.constraints) # Determine reprocessing if ((self.matched and self.reprocess_on_match) or (not self.matched and self.reprocess_on_fail)): reprocess.append([ProcessList( items=[item], work_over=self.work_over, rules=self.reprocess_rules, trigger_constraints=[self.id] )]) return self.matched, list(chain(*reprocess))
[docs] def copy(self): """Copy ourselves""" return deepcopy(self)
[docs] def get_all_attr(self, attribute: str): # -> list[tuple[typing.Union[SimpleConstraint, Constraint], typing.Any]]: """Return the specified attribute This method is meant to be overridden by classes that need to traverse a list of constraints. Parameters ---------- attribute : str The attribute to retrieve Returns ------- result : [(SimpleConstraint or Constraint, object)[,...]] The list of values of the attribute in a tuple. If there is no attribute, an empty tuple is returned. Raises ------ AttributeError If the attribute is not found. """ result = [] value = getattr(self, attribute) if value is not None: result = [(self, value)] for constraint in self.constraints: result.extend(constraint.get_all_attr(attribute)) return result
[docs] @staticmethod def all(item, constraints): """Return positive only if all results are positive.""" # If there are no constraints, there is nothing to match. # Result is false. if len(constraints) == 0: return False, [] # Find all negatives. Note first negative # that requires reprocessing and how many # negatives do not. all_match = True negative_reprocess = None to_reprocess = [] for constraint in constraints: match, reprocess = constraint.check_and_set(item) if match: if all_match: to_reprocess.append(reprocess) else: all_match = False # If not match and no reprocessing, then fail # completely. However, if there is reprocessing, take # the first one. Continue to check to ensure # there is no further complete fail. if len(reprocess) == 0: negative_reprocess = None break elif negative_reprocess is None: negative_reprocess = [reprocess] if not all_match: if negative_reprocess is not None: to_reprocess = negative_reprocess else: to_reprocess = [] return all_match, to_reprocess
[docs] @staticmethod def any(item, constraints): """Return the first successful constraint.""" # If there are no constraints, there is nothing to match. # Result is false. if len(constraints) == 0: return False, [] to_reprocess = [] for constraint in constraints: match, reprocess = constraint.check_and_set(item) if match: to_reprocess = [reprocess] break to_reprocess.append(reprocess) return match, to_reprocess
[docs] @staticmethod def notany(item, constraints): """True if none of the constraints match""" match, to_reprocess = Constraint.any(item, constraints) return not match, to_reprocess
[docs] @staticmethod def notall(item, constraints): """True if not all of the constraints match""" match, to_reprocess = Constraint.all(item, constraints) return not match, to_reprocess
def __delitem__(self, key): """Not implemented""" raise NotImplementedError('Cannot delete a constraint by index.') # Make iterable def __iter__(self): for constraint in chain(*map(iter, self.constraints)): yield constraint # Index implementation def __getitem__(self, key): """Retrieve a named constraint""" for constraint in self.constraints: name = getattr(constraint, 'name', None) if name is not None and name == key: return constraint try: found = constraint[key] except (KeyError, TypeError): pass else: return found raise KeyError('Constraint {} not found'.format(key)) def __repr__(self): result = '{}(name={}).{}([{}])'.format( self.__class__.__name__, str(getattr(self, 'name', None)), str(self.reduce.__name__), ''.join([ repr(constraint) for constraint in self.constraints ]) ) return result def __setitem__(self, key, value): """Not implemented""" raise NotImplementedError('Cannot set constraints by index.') def __str__(self): result = '\n'.join([ str(constraint) for constraint in self if constraint.name is not None ]) return result
# --------- # Utilities # --------- def meets_conditions(value, conditions): """Check whether value meets any of the provided conditions Parameters ---------- values : str The value to be check with. condition : regex, Regular expressions to match against. Returns ------- True if any condition is meant. """ if not is_iterable(conditions): conditions = [conditions] for condition in conditions: condition = ''.join([ '^', condition, '$' ]) match = re.match(condition, value, flags=re.IGNORECASE) if match: return True return False def reprocess_multivalue(item, source, values, constraint): """Reprocess items that have a list of values Parameters ---------- item : dict The item. source : str The attribute which has the multi-values. values : list The list of values constraint : Constraint The constraint which is triggering the reprocessing. Returns ------- process_list : ProcessList The process list to put on the reprocess queue """ reprocess_items = [] for value in values: new_item = PoolRow(item) new_item[source] = str(value) reprocess_items.append(new_item) process_list = (ProcessList(items=reprocess_items, trigger_constraints=[constraint.id])) return process_list