2014-02-23

Wanted ListStatisticsCalculator Transformer

> Community: Calculate statistics from a list
Although the statistics calculation for list elements can be performed with the ListExploder and StatisticsCalculator, it might be more convenient if there were such a new transformer.
-----
# Prototype of the ListStatisticsCalculator with Python Script (PythonCaller)
# _count, _mode, _histogram{} are calculated for all elements,
# others are calculated for numeric elements only.
# Partially different from the regular StatisticsCalculator.
import fmeobjects, math

def calculateListStatistics(feature):
    elements = feature.getAttribute('_list{}')
    if not isinstance(elements, list):
        return
     
    # Collect numeric elements and calculate histogram.
    numeric, histogram = [], {}
    for e in elements:
        try:
            e = float(e)
            numeric.append(e)
        except:
            pass
        histogram[e] = histogram.setdefault(e, 0) + 1
    feature.setAttribute('_count', len(elements))
    feature.setAttribute('_numeric_count', len(numeric))
 
    # Create histogram and calculate mode.
    v, n = None, 0
    for i, k in enumerate(histogram.keys()):
        feature.setAttribute('_histogram{%d}.value' % i, k)
        feature.setAttribute('_histogram{%d}.count' % i, histogram[k])
        if n < histogram[k]:
            v, n = k, histogram[k]
    if v != None:
        feature.setAttribute('_mode', v)

    # Calculate statistics of numeric elements.
    if 0 < len(numeric):
        numeric.sort()
        feature.setAttribute('_min', numeric[0])
        feature.setAttribute('_max', numeric[-1])
        feature.setAttribute('_range', numeric[-1] - numeric[0])
     
        # Calculate median.
        i = len(numeric) / 2
        median = numeric[i] if len(numeric) % 2 else 0.5 * (numeric[i - 1] + numeric[i])
        feature.setAttribute('_median', median)
     
        # Calculate sum and mean.
        s = sum(numeric)
        m = s / len(numeric)
        feature.setAttribute('_sum', s)
        feature.setAttribute('_mean', m)
     
        # Calculate standard deviation.
        if 1 < len(numeric):
            ss = sum([(v - m)**2 for v in numeric])
            feature.setAttribute('_stddev', math.sqrt(ss / (len(numeric) - 1)))
-----

=====
2014-09-20: See also the ListStatisticsCalculator in FME Store / Pragmatica.
=====

No comments:

Post a Comment