Report a bug
If you spot a problem with this page, click here to create a GitHub issue.
Improve this page
Quickly fork, edit online, and submit a pull request for this page. Requires a signed-in GitHub account. This works well for small changes. If you'd like to make larger changes you may want to consider using a local clone.

mir.stat.transform

This module contains algorithms for transforming data that are useful in statistical applications.
Authors:
John Michael Hall, Ilya Yaroshenko
template sweep(alias fun, string op)
For each e of the input, applies e op m where m is the result of fun and op is an operation, such as "+", "-", "*", or "/". For instance, if op = "-", then this function computes e - m for each e of the input and where m is the result of applying fun to the input. Overloads are provided to directly provide m to the function, rather than calculate it using fun.
Parameters:
fun function used to sweep
op operation
Returns:
The input
See Also:
center ,
auto sweep(Iterator, size_t N, SliceKind kind)(Slice!(Iterator, N, kind) slice);

auto sweep(T)(T[] array);

auto sweep(T)(T withAsSlice)
if (hasAsSlice!T);
Parameters:
Slice!(Iterator, N, kind) slice slice
template sweep(string op)
Parameters:
op operation
Examples:
Sweep vector
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;
import mir.ndslice.slice: sliced;

static double f(T)(T x) {
    return 3.5;
}

auto x = [1.0, 2, 3, 4, 5, 6].sliced;
assert(x.sweep!(f, "-").all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
assert(x.sweep!"-"(3.5).all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
assert(x.sweep!(f, "+").all!approxEqual([4.5, 5.5, 6.5, 7.5, 8.5, 9.5]));
Examples:
Sweep dynamic array
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;

static double f(T)(T x) {
    return 3.5;
}

auto x = [1.0, 2, 3, 4, 5, 6];
assert(x.sweep!(f, "-").all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
assert(x.sweep!"-"(3.5).all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
assert(x.sweep!(f, "+").all!approxEqual([4.5, 5.5, 6.5, 7.5, 8.5, 9.5]));
Examples:
Sweep matrix
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;
import mir.ndslice: fuse;

static double f(T)(T x) {
    return 3.5;
}

auto x = [
    [1.0, 2, 3],
    [4.0, 5, 6]
].fuse;

auto y0 = [
    [-2.5, -1.5, -0.5],
    [ 0.5,  1.5,  2.5]
];

auto y1 = [
    [4.5, 5.5, 6.5],
    [7.5, 8.5, 9.5]
];

assert(x.sweep!(f, "-").all!approxEqual(y0));
assert(x.sweep!"-"(3.5).all!approxEqual(y0));
assert(x.sweep!(f, "+").all!approxEqual(y1));
Examples:
Column sweep matrix
import mir.algorithm.iteration: all, equal;
import mir.math.common: approxEqual;
import mir.ndslice.fuse: fuse;
import mir.ndslice.topology: alongDim, byDim, map;

static double f(T)(T x) {
    return 0.5 * (x[0] +x[1]);
}

auto x = [
    [20.0, 100.0, 2000.0],
    [10.0,   5.0,    2.0]
].fuse;

auto result = [
    [ 5.0,  47.5,  999],
    [-5.0, -47.5, -999]
].fuse;

// Use byDim with map to sweep mean of row/column.
auto xSweepByDim = x.byDim!1.map!(sweep!(f, "-"));
auto resultByDim = result.byDim!1;
assert(xSweepByDim.equal!(equal!approxEqual)(resultByDim));

auto xSweepAlongDim = x.alongDim!0.map!(sweep!(f, "-"));
auto resultAlongDim = result.alongDim!0;
assert(xSweepAlongDim.equal!(equal!approxEqual)(resultAlongDim));
Examples:
Can also pass arguments to sweep function
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;
import mir.ndslice.slice: sliced;

static double f(T)(T x, double a) {
    return a;
}

static double g(double a, T)(T x) {
    return a;
}

auto x = [1.0, 2, 3, 4, 5, 6].sliced;
assert(x.sweep!(a => f(a, 3.5), "-").all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
assert(x.sweep!(a => f(a, 3.5), "+").all!approxEqual([4.5, 5.5, 6.5, 7.5, 8.5, 9.5]));
assert(x.sweep!(a => g!3.5(a), "-").all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
assert(x.sweep!(a => g!3.5(a), "+").all!approxEqual([4.5, 5.5, 6.5, 7.5, 8.5, 9.5]));
Examples:
Sweep withAsSlice
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;
import mir.rc.array: RCArray;

static double f(T)(T x) {
    return 3.5;
}

auto x = RCArray!double(6);
foreach(i, ref e; x)
    e = i + 1;

assert(x.sweep!(f, "-").all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
assert(x.sweep!"-"(3.5).all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
assert(x.sweep!(f, "+").all!approxEqual([4.5, 5.5, 6.5, 7.5, 8.5, 9.5]));
auto sweep(Iterator, size_t N, SliceKind kind, T)(Slice!(Iterator, N, kind) slice, T m);

auto sweep(T)(T[] array, T m);

auto sweep(T, U)(T withAsSlice, U m)
if (hasAsSlice!T);
Parameters:
Slice!(Iterator, N, kind) slice slice
T m value to pass to vmap
template scale(alias centralTendency = mean!(Summation.appropriate), alias dispersion = standardDeviation!(VarianceAlgo.online, Summation.appropriate))
Scales the input.
By default, the input is first centered using the mean of the input. A custom function may also be provided using centralTendency. The centered input is then divided by the sample standard deviation of the input. A custom function may also be provided using dispersion.
Overloads are also provided to scale with variables m and d, which correspond to the results of centralTendency and dispersion. This function is equivalent to center when passing d = 1.
Parameters:
centralTendency function used to center input, default is mean
dispersion function used to , default is dispersion
Returns:
The scaled result
auto scale(Iterator, size_t N, SliceKind kind)(Slice!(Iterator, N, kind) slice);

auto scale(T)(T[] array);

auto scale(T)(T withAsSlice)
if (hasAsSlice!T);
Parameters:
Slice!(Iterator, N, kind) slice slice
auto scale(Iterator, size_t N, SliceKind kind, T, U)(Slice!(Iterator, N, kind) slice, T m, U d);

auto scale(T, U)(T[] array, T m, U d);

auto scale(T, U, V)(T withAsSlice, U m, V d)
if (hasAsSlice!T);
Parameters:
Slice!(Iterator, N, kind) slice slice
T m value to subtract from slice
U d value to divide slice by
Examples:
Scale vector
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;
import mir.math.stat: mean, gmean, hmean, median, standardDeviation;
import mir.ndslice.slice: sliced;

auto x = [1.0, 2, 3, 4, 5, 6].sliced;

assert(x.scale.all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306]));
assert(x.scale(3.5, 1.87083).all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306]));

// Can scale using different `centralTendency` functions
assert(x.scale!hmean.all!approxEqual([-0.774512, -0.23999, 0.294533, 0.829055, 1.363578, 1.898100]));
assert(x.scale!gmean.all!approxEqual([-1.065728, -0.531206, 0.003317, 0.537839, 1.072362, 1.606884]));
assert(x.scale!median.all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306]));

// Can scale using different `centralTendency` and `dispersion` functions
assert(x.scale!(mean, a => a.standardDeviation(true)).all!approxEqual([-1.46385, -0.87831, -0.29277, 0.29277, 0.87831, 1.46385]));
assert(x.scale!(hmean, a => a.standardDeviation(true)).all!approxEqual([-0.848436, -0.262896, 0.322645, 0.908185, 1.493725, 2.079265]));
assert(x.scale!(gmean, a => a.standardDeviation(true)).all!approxEqual([-1.167447, -0.581907, 0.003633, 0.589173, 1.174713, 1.760253]));
assert(x.scale!(median, a => a.standardDeviation(true)).all!approxEqual([-1.46385, -0.87831, -0.29277, 0.29277, 0.87831, 1.46385]));
Examples:
Scale dynamic array
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;

auto x = [1.0, 2, 3, 4, 5, 6];
assert(x.scale.all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306]));
assert(x.scale(3.5, 1.87083).all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306]));
Examples:
Scale matrix
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;
import mir.ndslice: fuse;

auto x = [
    [1.0, 2, 3], 
    [4.0, 5, 6]
].fuse;

assert(x.scale.all!approxEqual([[-1.336306, -0.801784, -0.267261], [0.267261, 0.801784, 1.336306]]));
assert(x.scale(3.5, 1.87083).all!approxEqual([[-1.336306, -0.801784, -0.267261], [0.267261, 0.801784, 1.336306]]));
Examples:
Column scale matrix
import mir.algorithm.iteration: all, equal;
import mir.math.common: approxEqual;
import mir.ndslice.fuse: fuse;
import mir.ndslice.topology: alongDim, byDim, map;

auto x = [
    [20.0, 100.0, 2000.0],
    [10.0,   5.0,    2.0]
].fuse;

auto result = [
    [ 0.707107,  0.707107,  0.707107],
    [-0.707107, -0.707107, -0.707107]
].fuse;

// Use byDim with map to scale by row/column.
auto xScaleByDim = x.byDim!1.map!scale;
auto resultByDim = result.byDim!1;
assert(xScaleByDim.equal!(equal!approxEqual)(resultByDim));

auto xScaleAlongDim = x.alongDim!0.map!scale;
auto resultAlongDim = result.alongDim!0;
assert(xScaleAlongDim.equal!(equal!approxEqual)(resultAlongDim));
Examples:
Can also pass arguments to mean and standardDeviation functions used by scale
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;
import mir.math.stat: mean, standardDeviation;
import mir.ndslice.slice: sliced;

//Set sum algorithm
auto a = [1, 1e100, 1, -1e100];

auto x = a.sliced * 10_000;

auto result = [6.123724e-101, 1.224745, 6.123724e-101, -1.224745].sliced;

assert(x.scale!(mean!"kbn", standardDeviation!("online", "kbn")).all!approxEqual(result));
assert(x.scale!(mean!"kb2", standardDeviation!("online", "kb2")).all!approxEqual(result));
assert(x.scale!(mean!"precise", standardDeviation!("online", "precise")).all!approxEqual(result));
template zscore(F, VarianceAlgo varianceAlgo = VarianceAlgo.online, Summation summation = Summation.appropriate)

template zscore(VarianceAlgo varianceAlgo = VarianceAlgo.online, Summation summation = Summation.appropriate)

template zscore(F, string varianceAlgo, string summation = "appropriate")

template zscore(string varianceAlgo, string summation = "appropriate")
Computes the Z-score of the input.
The Z-score is computed by first calculating the mean and standard deviation of the input, by default in one pass, and then scaling the input using those values.
Parameters:
F controls type of output
varianceAlgo algorithm for calculating variance (default: VarianceAlgo.online)
summation algorithm for calculating sums (default: Summation.appropriate)
Returns:
The z-score of the input
Examples:
zscore vector
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;
import mir.ndslice.slice: sliced;

auto x = [1.0, 2, 3, 4, 5, 6].sliced;

assert(x.zscore.all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306]));
assert(x.zscore(true).all!approxEqual([-1.46385, -0.87831, -0.29277, 0.29277, 0.87831, 1.46385]));
Examples:
zscore dynamic array
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;

auto x = [1.0, 2, 3, 4, 5, 6];
assert(x.zscore.all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306]));
assert(x.zscore(true).all!approxEqual([-1.46385, -0.87831, -0.29277, 0.29277, 0.87831, 1.46385]));
Examples:
zscore matrix
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;
import mir.ndslice.fuse: fuse;

auto x = [
    [1.0, 2, 3], 
    [4.0, 5, 6]
].fuse;

assert(x.zscore.all!approxEqual([[-1.336306, -0.801784, -0.267261], [0.267261, 0.801784, 1.336306]]));
assert(x.zscore(true).all!approxEqual([[-1.46385, -0.87831, -0.29277], [0.29277, 0.87831, 1.46385]]));
Examples:
Column zscore matrix
import mir.algorithm.iteration: all, equal;
import mir.math.common: approxEqual;
import mir.ndslice.fuse: fuse;
import mir.ndslice.topology: alongDim, byDim, map;

auto x = [
    [20.0, 100.0, 2000.0],
    [10.0,   5.0,    2.0]
].fuse;

auto result = [
    [ 0.707107,  0.707107,  0.707107],
    [-0.707107, -0.707107, -0.707107]
].fuse;

// Use byDim with map to scale by row/column.
auto xZScoreByDim = x.byDim!1.map!zscore;
auto resultByDim = result.byDim!1;
assert(xZScoreByDim.equal!(equal!approxEqual)(resultByDim));

auto xZScoreAlongDim = x.alongDim!0.map!zscore;
auto resultAlongDim = result.alongDim!0;
assert(xZScoreAlongDim.equal!(equal!approxEqual)(resultAlongDim));
Examples:
Can control how mean and standardDeviation are calculated and output type
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;
import mir.ndslice.slice: sliced;
import mir.ndslice.topology: repeat;

//Set sum algorithm or output type
auto a = [1, 1e100, 1, -1e100];

auto x = a.sliced * 10_000;

auto result = [6.123724e-101, 1.224745, 6.123724e-101, -1.224745].sliced;

assert(x.zscore!("online", "kbn").all!approxEqual(result));
assert(x.zscore!("online", "kb2").all!approxEqual(result));
assert(x.zscore!("online", "precise").all!approxEqual(result));
assert(x.zscore!(double, "online", "precise").all!approxEqual(result));

auto y = [uint.max, uint.max / 2, uint.max / 3].sliced;
assert(y.zscore!ulong.all!approxEqual([1.120897, -0.320256, -0.800641]));
auto zscore(Iterator, size_t N, SliceKind kind)(Slice!(Iterator, N, kind) slice, bool isPopulation = false);

auto zscore(T)(T[] array, bool isPopulation = false);

auto zscore(T)(T withAsSlice, bool isPopulation = false)
if (hasAsSlice!T);
Parameters:
Slice!(Iterator, N, kind) slice slice
bool isPopulation true if population standard deviation, false is sample (default)
template robustScale(F, QuantileAlgo quantileAlgo = QuantileAlgo.type7, bool allowModifySlice = false)
Scales input using robust statistics.
This function centers the input using the median and then scales the data according to the quantile range defined by (low_quartile, 1 - low_quartile). By default, it uses the interquartile range, whereby low_quartile equals 0.25.
Parameters:
F controls type of output
quantileAlgo algorithm for calculating quantile (default: QuantileAlgo.type7)
allowModifySlice controls whether the input is modified in place, default is false
Returns:
The robust scaled input
auto robustScale(Iterator, size_t N, SliceKind kind, T)(Slice!(Iterator, N, kind) slice, T low_quartile = 0.25);
Parameters:
Slice!(Iterator, N, kind) slice slice
T low_quartile lower end of quartile range
auto robustScale(T)(T[] array, F low_quartile = cast(F)0.25);
Parameters:
T[] array array
F low_quartile lower end of quartile range
auto robustScale(T)(T withAsSlice, F low_quartile = cast(F)0.25)
if (hasAsSlice!T);
Parameters:
T withAsSlice input for which hasAsSlice is true
F low_quartile lower end of quartile range
template robustScale(QuantileAlgo quantileAlgo = QuantileAlgo.type7, bool allowModifySlice = false)

template robustScale(F, string quantileAlgo, bool allowModifySlice = false)

template robustScale(string quantileAlgo, bool allowModifySlice = false)

template robustScale(bool allowModifySlice)
Parameters:
quantileAlgo algorithm for calculating quantile (default: QuantileAlgo.type7)
allowModifySlice controls whether the input is modified in place, default is false
Examples:
robustScale vector
import mir.algorithm.iteration: all, findIndex;
import mir.math.common: approxEqual;
import mir.ndslice.slice: sliced;

static immutable input = [100.0, 16, 12, 13, 15, 12, 16, 9, 3, -100];
auto x = input.dup.sliced;
auto y = x.robustScale;

assert(y.all!approxEqual([14.583333, 0.583333, -0.083333, 0.083333, 0.416667, -0.083333, 0.583333, -0.583333, -1.583333, -18.750000]));
assert(x.robustScale(0.15).all!approxEqual([8.02752, 0.321101, -0.0458716, 0.0458716, 0.229358, -0.0458716, 0.321101, -0.321101, -0.87156, -10.3211]));

// When allowModifySlice = true, this modifies both the original input and
// the order of the output
auto yCopy = y.idup;
auto z = x.robustScale!true;
size_t j;
foreach(i, ref e; input) {
    j = x.findIndex!(a => a == e);
    assert(z[j].approxEqual(yCopy[i]));
}
Examples:
robustScale dynamic array
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;

auto x = [100.0, 16, 12, 13, 15, 12, 16, 9, 3, -100];
assert(x.robustScale.all!approxEqual([14.583333, 0.583333, -0.083333, 0.083333, 0.416667, -0.083333, 0.583333, -0.583333, -1.583333, -18.750000]));
Examples:
robustScale matrix
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;
import mir.ndslice.fuse: fuse;

auto x = [
    [100.0, 16, 12, 13,   15], 
    [ 12.0, 16,  9,  3, -100]
].fuse;

assert(x.robustScale.all!approxEqual([[14.583333, 0.583333, -0.083333, 0.083333, 0.416667], [-0.083333, 0.583333, -0.583333, -1.583333, -18.750000]]));
Examples:
Column robustScale matrix
import mir.algorithm.iteration: all, equal;
import mir.math.common: approxEqual;
import mir.ndslice.fuse: fuse;
import mir.ndslice.topology: alongDim, byDim, map;

auto x = [
    [100.0, 16, 12, 13,   15], 
    [ 12.0, 16,  9,  3, -100]
].fuse;

auto result = [
    [28.333333, 0.333333, -1.0, -0.666667,  0.0], 
    [ 0.333333, 0.777778,  0.0, -0.666667, -12.111111]
].fuse;

// Use byDim with map to scale by row/column.
auto xRobustScaleByDim = x.byDim!0.map!robustScale;
auto resultByDim = result.byDim!0;
assert(xRobustScaleByDim.equal!(equal!approxEqual)(resultByDim));

auto xRobustScaleAlongDim = x.alongDim!1.map!robustScale;
auto resultAlongDim = result.alongDim!1;
assert(xRobustScaleAlongDim.equal!(equal!approxEqual)(resultAlongDim));
Examples:
Can control QuantileAlgo and output type
import mir.algorithm.iteration: all;
import mir.math.common: approxEqual;
import mir.ndslice.slice: sliced;
import mir.ndslice.topology: repeat;

//Set `QuantileAlgo` algorithm or output type
auto x = [100.0, 16, 12, 13, 15, 12, 16, 9, 3, -100].sliced;

assert(x.robustScale!("type9").all!approxEqual([11.864407, 0.474576, -0.0677966, 0.0677966, 0.338983, -0.0677966, 0.474576, -0.474576, -1.288136, -15.254237]));
assert(x.robustScale!("type1").all!approxEqual([12.500000, 0.500000, -0.0714286, 0.0714286, 0.357143, -0.0714286, 0.500000, -0.500000, -1.357143, -16.071429]));
assert(x.robustScale!(float, "type6").all!approxEqual([10.294118f, 0.411765f, -0.0588235f, 0.0588235f, 0.294118f, -0.0588235f, 0.411765f, -0.411765f, -1.117647f, -13.235294f]));

auto y = [uint.max, uint.max / 2, uint.max / 3].sliced;
assert(y.robustScale!"type1".all!approxEqual([0.75, 0, -0.25]));

auto z = [ulong.max, ulong.max / 2, ulong.max / 3].sliced;
assert(z.robustScale!(ulong, "type1").all!approxEqual([0.75, 0, -0.25]));
auto robustScale(Iterator, size_t N, SliceKind kind)(Slice!(Iterator, N, kind) slice, double low_quartile = 0.25);
Parameters:
Slice!(Iterator, N, kind) slice slice
double low_quartile lower end of quartile range
auto robustScale(T)(T[] array, double low_quartile = 0.25);
Parameters:
T[] array array
double low_quartile lower end of quartile range
auto robustScale(T)(T withAsSlice, double low_quartile = 0.25)
if (hasAsSlice!T);
Parameters:
T withAsSlice input for which hasAsSlice is true
double low_quartile lower end of quartile range