Report a bug
If you spot a problem with this page, click here to create a GitHub issue.
Improve this page
Quickly fork, edit online, and submit a pull request for this page. Requires a signed-in GitHub account. This works well for small changes. If you'd like to make larger changes you may want to consider using a local clone.

mir.stat.transform

This module contains algorithms for transforming data that are useful in statistical applications.
Authors:
John Michael Hall, Ilya Yaroshenko
template sweep(alias fun, string op)
For each e of the input, applies e op m where m is the result of fun and op is an operation, such as "+", "-", "*", or "/". For instance, if op = "-", then this function computes e - m for each e of the input and where m is the result of applying fun to the input. Overloads are provided to directly provide m to the function, rather than calculate it using fun.
Parameters:
fun function used to sweep
op operation
Returns:
The input
See Also:
center ,
auto sweep(Iterator, size_t N, SliceKind kind)(Slice!(Iterator, N, kind) slice);

auto sweep(T)(T[] array);

auto sweep(T)(T withAsSlice)
if (hasAsSlice!T);
Parameters:
Slice!(Iterator, N, kind) slice slice
template sweep(string op)
Parameters:
op operation
Examples:
Sweep vector
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;
import mir.ndslice.slice: sliced;

static double f(T)(T x) {
    return 3.5;
}

auto x = [1.0, 2, 3, 4, 5, 6].sliced;
assert(x.sweep!(f, "-").all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
assert(x.sweep!"-"(3.5).all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
assert(x.sweep!(f, "+").all!approxEqual([4.5, 5.5, 6.5, 7.5, 8.5, 9.5]));
Examples:
Sweep dynamic array
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;

static double f(T)(T x) {
    return 3.5;
}

auto x = [1.0, 2, 3, 4, 5, 6];
assert(x.sweep!(f, "-").all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
assert(x.sweep!"-"(3.5).all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
assert(x.sweep!(f, "+").all!approxEqual([4.5, 5.5, 6.5, 7.5, 8.5, 9.5]));
Examples:
Sweep matrix
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;
import mir.ndslice: fuse;

static double f(T)(T x) {
    return 3.5;
}

auto x = [
    [1.0, 2, 3],
    [4.0, 5, 6]
].fuse;

auto y0 = [
    [-2.5, -1.5, -0.5],
    [ 0.5,  1.5,  2.5]
];

auto y1 = [
    [4.5, 5.5, 6.5],
    [7.5, 8.5, 9.5]
];

assert(x.sweep!(f, "-").all!approxEqual(y0));
assert(x.sweep!"-"(3.5).all!approxEqual(y0));
assert(x.sweep!(f, "+").all!approxEqual(y1));
Examples:
Column sweep matrix
import mir.algorithm.iteration: all, equal;
import mir.math.common: approxEqual;
import mir.ndslice.fuse: fuse;
import mir.ndslice.topology: alongDim, byDim, map;

static double f(T)(T x) {
    return 0.5 * (x[0] +x[1]);
}

auto x = [
    [20.0, 100.0, 2000.0],
    [10.0,   5.0,    2.0]
].fuse;

auto result = [
    [ 5.0,  47.5,  999],
    [-5.0, -47.5, -999]
].fuse;

// Use byDim with map to sweep mean of row/column.
auto xSweepByDim = x.byDim!1.map!(sweep!(f, "-"));
auto resultByDim = result.byDim!1;
assert(xSweepByDim.equal!(equal!approxEqual)(resultByDim));

auto xSweepAlongDim = x.alongDim!0.map!(sweep!(f, "-"));
auto resultAlongDim = result.alongDim!0;
assert(xSweepAlongDim.equal!(equal!approxEqual)(resultAlongDim));
Examples:
Can also pass arguments to sweep function
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;
import mir.ndslice.slice: sliced;

static double f(T)(T x, double a) {
    return a;
}

static double g(double a, T)(T x) {
    return a;
}

auto x = [1.0, 2, 3, 4, 5, 6].sliced;
assert(x.sweep!(a => f(a, 3.5), "-").all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
assert(x.sweep!(a => f(a, 3.5), "+").all!approxEqual([4.5, 5.5, 6.5, 7.5, 8.5, 9.5]));
assert(x.sweep!(a => g!3.5(a), "-").all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
assert(x.sweep!(a => g!3.5(a), "+").all!approxEqual([4.5, 5.5, 6.5, 7.5, 8.5, 9.5]));
Examples:
Sweep withAsSlice
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;
import mir.rc.array: RCArray;

static double f(T)(T x) {
    return 3.5;
}

auto x = RCArray!double(6);
foreach(i, ref e; x)
    e = i + 1;

assert(x.sweep!(f, "-").all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
assert(x.sweep!"-"(3.5).all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
assert(x.sweep!(f, "+").all!approxEqual([4.5, 5.5, 6.5, 7.5, 8.5, 9.5]));
auto sweep(Iterator, size_t N, SliceKind kind, T)(Slice!(Iterator, N, kind) slice, T m);

auto sweep(T)(T[] array, T m);

auto sweep(T, U)(T withAsSlice, U m)
if (hasAsSlice!T);
Parameters:
Slice!(Iterator, N, kind) slice slice
T m value to pass to vmap
template scale(alias centralTendency = mean!(Summation.appropriate), alias dispersion = standardDeviation!(VarianceAlgo.online, Summation.appropriate))
Scales the input. By default, the input is first centered using the mean of the input. A custom function may also be provided using centralTendency. The centered input is then divided by the sample standard deviation of the input. A custom function may also be provided using dispersion. Overloads are also provided to scale with variables m and d, which correspond to the results of centralTendency and dispersion. This function is equivalent to center when passing d = 1.
Parameters:
centralTendency function used to center input, default is mean
dispersion function used to , default is dispersion
Returns:
The scaled result
auto scale(Iterator, size_t N, SliceKind kind)(Slice!(Iterator, N, kind) slice);

auto scale(T)(T[] array);

auto scale(T)(T withAsSlice)
if (hasAsSlice!T);
Parameters:
Slice!(Iterator, N, kind) slice slice
auto scale(Iterator, size_t N, SliceKind kind, T, U)(Slice!(Iterator, N, kind) slice, T m, U d);

auto scale(T, U)(T[] array, T m, U d);

auto scale(T, U, V)(T withAsSlice, U m, V d)
if (hasAsSlice!T);
Parameters:
Slice!(Iterator, N, kind) slice slice
T m value to subtract from slice
U d value to divide slice by
Examples:
Scale vector
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;
import mir.math.stat: mean, gmean, hmean, median, standardDeviation;
import mir.ndslice.slice: sliced;

auto x = [1.0, 2, 3, 4, 5, 6].sliced;

assert(x.scale.all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306]));
assert(x.scale(3.5, 1.87083).all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306]));

// Can scale using different `centralTendency` functions
assert(x.scale!hmean.all!approxEqual([-0.774512, -0.23999, 0.294533, 0.829055, 1.363578, 1.898100]));
assert(x.scale!gmean.all!approxEqual([-1.065728, -0.531206, 0.003317, 0.537839, 1.072362, 1.606884]));
assert(x.scale!median.all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306]));

// Can scale using different `centralTendency` and `dispersion` functions
assert(x.scale!(mean, a => a.standardDeviation(true)).all!approxEqual([-1.46385, -0.87831, -0.29277, 0.29277, 0.87831, 1.46385]));
assert(x.scale!(hmean, a => a.standardDeviation(true)).all!approxEqual([-0.848436, -0.262896, 0.322645, 0.908185, 1.493725, 2.079265]));
assert(x.scale!(gmean, a => a.standardDeviation(true)).all!approxEqual([-1.167447, -0.581907, 0.003633, 0.589173, 1.174713, 1.760253]));
assert(x.scale!(median, a => a.standardDeviation(true)).all!approxEqual([-1.46385, -0.87831, -0.29277, 0.29277, 0.87831, 1.46385]));
Examples:
Scale dynamic array
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;

auto x = [1.0, 2, 3, 4, 5, 6];
assert(x.scale.all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306]));
assert(x.scale(3.5, 1.87083).all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306]));
Examples:
Scale matrix
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;
import mir.ndslice: fuse;

auto x = [
    [1.0, 2, 3], 
    [4.0, 5, 6]
].fuse;

assert(x.scale.all!approxEqual([[-1.336306, -0.801784, -0.267261], [0.267261, 0.801784, 1.336306]]));
assert(x.scale(3.5, 1.87083).all!approxEqual([[-1.336306, -0.801784, -0.267261], [0.267261, 0.801784, 1.336306]]));
Examples:
Column scale matrix
import mir.algorithm.iteration: all, equal;
import mir.math.common: approxEqual;
import mir.ndslice.fuse: fuse;
import mir.ndslice.topology: alongDim, byDim, map;

auto x = [
    [20.0, 100.0, 2000.0],
    [10.0,   5.0,    2.0]
].fuse;

auto result = [
    [ 0.707107,  0.707107,  0.707107],
    [-0.707107, -0.707107, -0.707107]
].fuse;

// Use byDim with map to scale by row/column.
auto xScaleByDim = x.byDim!1.map!scale;
auto resultByDim = result.byDim!1;
assert(xScaleByDim.equal!(equal!approxEqual)(resultByDim));

auto xCenterAlongDim = x.alongDim!0.map!scale;
auto resultAlongDim = result.alongDim!0;
assert(xScaleByDim.equal!(equal!approxEqual)(resultAlongDim));
Examples:
Can also pass arguments to mean and standardDeviation functions used by scale
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;
import mir.math.stat: mean, standardDeviation;
import mir.ndslice.slice: sliced;

//Set sum algorithm
auto a = [1, 1e100, 1, -1e100];

auto x = a.sliced * 10_000;

auto result = [6.123724e-101, 1.224745, 6.123724e-101, -1.224745].sliced;

assert(x.scale!(mean!"kbn", standardDeviation!("online", "kbn")).all!approxEqual(result));
assert(x.scale!(mean!"kb2", standardDeviation!("online", "kb2")).all!approxEqual(result));
assert(x.scale!(mean!"precise", standardDeviation!("online", "precise")).all!approxEqual(result));
template zscore(F, VarianceAlgo varianceAlgo = VarianceAlgo.online, Summation summation = Summation.appropriate)
Computes the Z-score of the input. The Z-score is computed by first calculating the mean and standard deviation of the input, by default in one pass, and then scaling the input using those values.
Returns:
The z-score of the input
auto zscore(Iterator, size_t N, SliceKind kind)(Slice!(Iterator, N, kind) slice, bool isPopulation = false);

auto zscore(T)(T[] array, bool isPopulation = false);

auto zscore(T)(T withAsSlice, bool isPopulation = false)
if (hasAsSlice!T);
Parameters:
Slice!(Iterator, N, kind) slice slice
bool isPopulation true if population standard deviation, false is sample (default)
template zscore(VarianceAlgo varianceAlgo = VarianceAlgo.online, Summation summation = Summation.appropriate)

template zscore(F, string varianceAlgo, string summation = "appropriate")

template zscore(string varianceAlgo, string summation = "appropriate")
Examples:
zscore vector
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;
import mir.ndslice.slice: sliced;

auto x = [1.0, 2, 3, 4, 5, 6].sliced;

assert(x.zscore.all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306]));
assert(x.zscore(true).all!approxEqual([-1.46385, -0.87831, -0.29277, 0.29277, 0.87831, 1.46385]));
Examples:
zscore dynamic array
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;

auto x = [1.0, 2, 3, 4, 5, 6];
assert(x.zscore.all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306]));
assert(x.zscore(true).all!approxEqual([-1.46385, -0.87831, -0.29277, 0.29277, 0.87831, 1.46385]));
Examples:
zscore matrix
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;
import mir.ndslice.fuse: fuse;

auto x = [
    [1.0, 2, 3], 
    [4.0, 5, 6]
].fuse;

assert(x.zscore.all!approxEqual([[-1.336306, -0.801784, -0.267261], [0.267261, 0.801784, 1.336306]]));
assert(x.zscore(true).all!approxEqual([[-1.46385, -0.87831, -0.29277], [0.29277, 0.87831, 1.46385]]));
Examples:
Column zscore matrix
import mir.algorithm.iteration: all, equal;
import mir.math.common: approxEqual;
import mir.ndslice.fuse: fuse;
import mir.ndslice.topology: alongDim, byDim, map;

auto x = [
    [20.0, 100.0, 2000.0],
    [10.0,   5.0,    2.0]
].fuse;

auto result = [
    [ 0.707107,  0.707107,  0.707107],
    [-0.707107, -0.707107, -0.707107]
].fuse;

// Use byDim with map to scale by row/column.
auto xScaleByDim = x.byDim!1.map!zscore;
auto resultByDim = result.byDim!1;
assert(xScaleByDim.equal!(equal!approxEqual)(resultByDim));

auto xCenterAlongDim = x.alongDim!0.map!zscore;
auto resultAlongDim = result.alongDim!0;
assert(xScaleByDim.equal!(equal!approxEqual)(resultAlongDim));
Examples:
Can control how mean and standardDeviation are calculated and output type
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;
import mir.ndslice.slice: sliced;
import mir.ndslice.topology: repeat;

//Set sum algorithm or output type
auto a = [1, 1e100, 1, -1e100];

auto x = a.sliced * 10_000;

auto result = [6.123724e-101, 1.224745, 6.123724e-101, -1.224745].sliced;

assert(x.zscore!("online", "kbn").all!approxEqual(result));
assert(x.zscore!("online", "kb2").all!approxEqual(result));
assert(x.zscore!("online", "precise").all!approxEqual(result));
assert(x.zscore!(double, "online", "precise").all!approxEqual(result));

auto y = [uint.max, uint.max / 2, uint.max / 3].sliced;
assert(y.zscore!ulong.all!approxEqual([1.120897, -0.320256, -0.800641]));
auto zscore(Iterator, size_t N, SliceKind kind)(Slice!(Iterator, N, kind) slice, bool isPopulation = false);

auto zscore(T)(T[] array, bool isPopulation = false);

auto zscore(T)(T withAsSlice, bool isPopulation = false)
if (hasAsSlice!T);
ditto