Source code for gatelfdata.featurenumeric

"""Module for the FeatureNumeric class"""

import sys
import logging

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
streamhandler = logging.StreamHandler(stream=sys.stderr)
formatter = logging.Formatter(
                '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
streamhandler.setFormatter(formatter)
logger.addHandler(streamhandler)


[docs]class FeatureNumeric(object): def __init__(self, fname, attrinfo, featurestats): """For now, we do not do anything fancy for numeric features.""" self.fname = fname self.attrinfo = attrinfo self.featurestats = featurestats # store the values we need for normalization and decide if normalization should # be done by default self.mean = self.featurestats["mean"] self.var = self.featurestats["variance"] self.min = self.featurestats["min"] self.max = self.featurestats["max"] self.range = (self.max - self.min) self.n = self.featurestats["max"] # normalizer is either none or a function f(value) that returns the normalized value # For now we use mean/variance normalization by default: this can change once we implement # easier parametrization of this from the LF self.normalizer = self.normalize_meanvar
[docs] def set_normalization(self, normalize=None): """Either one of 'meanvar', 'minmax' or a function that takes and returns a float or one of the normalization methods of this class.""" if isinstance(normalize, str): self.normalizer = self._normalizer4str(normalize) else: self.normalizer = normalize
[docs] def normalize_meanvar(self, value): if self.var > 0.0: return (value-self.mean)/self.var else: return value
[docs] def normalize_minmax(self, value): if self.range > 0.0: return (value-self.min)/self.range else: return value
def _normalizer4str(self, name): if name == 'minmax': return self.normalize_minmax elif name == 'meanvar': return self.normalize_meanvar else: raise Exception("Not a known normalization method: %s" % name)
[docs] def normalize(self, value, normalize=None): """This normalizes the value using the currently set normalization if normalize is None, explicitly no normalization if normalize is false (overriding what is the default), or whatever the string or function set indicates. """ if normalize: if isinstance(normalize, str): method = self._normalizer4str(normalize) return method(value) else: return normalize(value) else: if normalize is None: if self.normalizer: return self.normalizer(value) else: # must be False or other non-None false return value
def __call__(self, valueorlist, normalize=None): """Currently this optionally normalizes the value or list of values, then passes through the original or normalized value or list of values. The value should be a float!""" if normalize: if isinstance(valueorlist, list): return [self.normalize(val, normalize=normalize) for val in valueorlist] else: return self.normalize(valueorlist, normalize=normalize) else: return valueorlist
[docs] def type_converted(self): return "float"
[docs] def type_original(self): return "numeric"
def __str__(self): return "FeatureNumeric(name=%s)" % self.fname def __repr__(self): return "FeatureNumeric(name=%r)" % self.fname