From bcbe7b453faa7741475a7a13a5e10f08cec5aaed Mon Sep 17 00:00:00 2001
From: Adrian Lanzafame <adrianlanzafame92@gmail.com>
Date: Thu, 21 Mar 2019 16:34:58 +1000
Subject: [PATCH] refactor from big.Float to float64 and add prob tests

License: MIT
Signed-off-by: Adrian Lanzafame <adrianlanzafame92@gmail.com>
---
 monitor/metrics/prob.go      |  45 ++---
 monitor/metrics/prob_test.go | 373 +++++++++++++++++++++++++++++++++++
 2 files changed, 390 insertions(+), 28 deletions(-)
 create mode 100644 monitor/metrics/prob_test.go

diff --git a/monitor/metrics/prob.go b/monitor/metrics/prob.go
index ead55b65..49cb9f91 100644
--- a/monitor/metrics/prob.go
+++ b/monitor/metrics/prob.go
@@ -24,63 +24,52 @@ package metrics
 
 import (
 	"math"
-	"math/big"
 )
 
 // Phi returns the φ-failure for the given value and distribution.
 func phi(v float64, d []int64) float64 {
 	u := mean(d)
 	o := standardDeviation(d)
-	cdf := cdf(u, o, big.NewFloat(v))
-	phi := -math.Log10(1 - cdf)
-	if math.IsInf(phi, 1) {
-		phi = 0
+	if phi := -math.Log10(1 - cdf(u, o, v)); !math.IsInf(phi, 1) {
+		return phi
 	}
-	return phi
+	return 0
 }
 
 // CDF returns the cumulative distribution function if the given
 // normal function, for the given value.
-func cdf(u, o, v *big.Float) float64 {
-	var a, b, c big.Float
-	c.Quo(b.Sub(v, u), a.Mul(o, big.NewFloat(math.Sqrt2)))
-	cf, _ := c.Float64()
-	cdf := ((1.0 / 2.0) * (1 + math.Erf(cf)))
-	return cdf
+func cdf(u, o, v float64) float64 {
+	return ((1.0 / 2.0) * (1 + math.Erf((v-u)/(o*math.Sqrt2))))
 }
 
 // Mean returns the mean of the given sample.
-func mean(values []int64) *big.Float {
+func mean(values []int64) float64 {
 	if len(values) == 0 {
-		return big.NewFloat(0.0)
+		return 0.0
 	}
 	var sum int64
 	for _, v := range values {
 		sum += v
 	}
-	var q big.Float
-	return q.Quo(big.NewFloat(float64(sum)), big.NewFloat(float64(len(values))))
+
+	return float64(sum) / float64(len(values))
 }
 
 // StandardDeviation returns standard deviation of the given sample.
-func standardDeviation(v []int64) *big.Float {
-	var z big.Float
-	z.Sqrt(variance(v)).Float64()
-	return &z
+func standardDeviation(v []int64) float64 {
+	return math.Sqrt(variance(v))
 }
 
 // Variance returns variance if the given sample.
-func variance(values []int64) *big.Float {
+func variance(values []int64) float64 {
 	if len(values) == 0 {
-		return big.NewFloat(0.0)
+		return 0.0
 	}
 	m := mean(values)
-	var sum, pwr, res big.Float
+	var sum float64
 	for _, v := range values {
-		d := big.NewFloat(float64(v))
-		d.Sub(d, m)
-		pwr.Mul(d, d)
-		sum.Add(&sum, &pwr)
+		d := float64(v) - m
+		sum += d * d
 	}
-	return res.Quo(&sum, big.NewFloat(float64(len(values))))
+	return sum / float64(len(values))
 }
diff --git a/monitor/metrics/prob_test.go b/monitor/metrics/prob_test.go
new file mode 100644
index 00000000..bc2b838f
--- /dev/null
+++ b/monitor/metrics/prob_test.go
@@ -0,0 +1,373 @@
+package metrics
+
+import (
+	"math"
+	"math/rand"
+	"testing"
+	"time"
+)
+
+// NOTE: Test_phi and Test_cdf contain float64 want values that are 'precise',
+// they look like golden test data, they ARE NOT. They have been calculated
+// using Wolfram Alpha. The following three links provide examples of calculating
+// the phi value:
+// - standardDeviation: https://www.wolframalpha.com/input/?i=population+standard+deviation+-2,+-4,+-4,+-4,+-5,+-5,+-7,+-9
+// - mean: https://www.wolframalpha.com/input/?i=mean+-2,+-4,+-4,+-4,+-5,+-5,+-7,+-9
+// - cdf: https://www.wolframalpha.com/input/?i=(((1.0+%2F+2.0)+*+(1+%2B+Erf((-4--5)%2F(2*Sqrt2)))))
+// - phi: https://www.wolframalpha.com/input/?i=-log10(1+-+0.691462461274013103637704610608337739883602175554577936)
+//
+// Output from the each calculation needs to copy-pasted over. Look at the phi source code
+// to understand where each variable should go in the cdf calculation.
+func Test_phi(t *testing.T) {
+	type args struct {
+		v float64
+		d []int64
+	}
+	tests := []struct {
+		name string
+		args args
+		want float64
+	}{
+		{
+			"zero values",
+			args{0, []int64{0}},
+			math.NaN(), // won't actually be used in comparison; see math.IsNaN() def
+		},
+		{
+			"increasing values",
+			args{
+				4,
+				[]int64{2, 4, 4, 4, 5, 5, 7, 9},
+			},
+			0.160231392277849,
+		},
+		{
+			"decreasing values",
+			args{
+				-4,
+				[]int64{-2, -4, -4, -4, -5, -5, -7, -9},
+			},
+			0.5106919892652407,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := phi(tt.args.v, tt.args.d)
+			if got != tt.want && !math.IsNaN(got) {
+				t.Errorf("phi() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func Test_cdf(t *testing.T) {
+	type args struct {
+		values []int64
+		v      float64
+	}
+	tests := []struct {
+		name string
+		args args
+		want float64
+	}{
+		{
+			"zero values",
+			args{[]int64{0}, 0},
+			math.NaN(),
+		},
+		{
+			"increasing values",
+			args{
+				[]int64{2, 4, 4, 4, 5, 5, 7, 9},
+				4,
+			},
+			0.3085375387259869,
+		},
+		{
+			"decreasing values",
+			args{
+				[]int64{-2, -4, -4, -4, -5, -5, -7, -9},
+				-4,
+			},
+			0.6914624612740131,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			m := mean(tt.args.values)
+			sd := standardDeviation(tt.args.values)
+			got := cdf(m, sd, tt.args.v)
+			if got != tt.want && !math.IsNaN(got) {
+				t.Errorf("cdf() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func Test_mean(t *testing.T) {
+	type args struct {
+		values []int64
+	}
+	tests := []struct {
+		name string
+		args args
+		want float64
+	}{
+		{
+			"zero values",
+			args{[]int64{}},
+			0,
+		},
+		{
+			"increasing values",
+			args{[]int64{2, 4, 4, 4, 5, 5, 7, 9}},
+			5,
+		},
+		{
+			"decreasing values",
+			args{[]int64{-2, -4, -4, -4, -5, -5, -7, -9}},
+			-5,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := mean(tt.args.values); got != tt.want {
+				t.Errorf("mean() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func Test_standardDeviation(t *testing.T) {
+	type args struct {
+		v []int64
+	}
+	tests := []struct {
+		name string
+		args args
+		want float64
+	}{
+		{
+			"zero values",
+			args{[]int64{}},
+			0,
+		},
+		{
+			"increasing values",
+			args{[]int64{2, 4, 4, 4, 5, 5, 7, 9}},
+			2,
+		},
+		{
+			"decreasing values",
+			args{[]int64{-2, -4, -4, -4, -5, -5, -7, -9}},
+			2,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := standardDeviation(tt.args.v); got != tt.want {
+				t.Errorf("standardDeviation() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func Test_variance(t *testing.T) {
+	type args struct {
+		values []int64
+	}
+	tests := []struct {
+		name string
+		args args
+		want float64
+	}{
+		{
+			"zero values",
+			args{[]int64{}},
+			0,
+		},
+		{
+			"increasing values",
+			args{[]int64{2, 4, 4, 4, 5, 5, 7, 9}},
+			4,
+		},
+		{
+			"decreasing values",
+			args{[]int64{-2, -4, -4, -4, -5, -5, -7, -9}},
+			4,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := variance(tt.args.values); got != tt.want {
+				t.Errorf("variance() = %.5f, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func Benchmark_prob_phi(b *testing.B) {
+	r := rand.New(rand.NewSource(time.Now().UnixNano()))
+
+	b.Run("distribution size 10", func(b *testing.B) {
+		d := makeRandSlice(10)
+		v := float64(r.Int63n(25))
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			phi(v, d)
+		}
+	})
+
+	b.Run("distribution size 50", func(b *testing.B) {
+		d := makeRandSlice(50)
+		v := float64(r.Int63n(25))
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			phi(v, d)
+		}
+	})
+
+	b.Run("distribution size 1000", func(b *testing.B) {
+		d := makeRandSlice(1000)
+		v := float64(r.Int63n(25))
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			phi(v, d)
+		}
+	})
+}
+
+func Benchmark_prob_cdf(b *testing.B) {
+	r := rand.New(rand.NewSource(time.Now().UnixNano()))
+
+	b.Run("distribution size 10", func(b *testing.B) {
+		d := makeRandSlice(10)
+		u := mean(d)
+		o := standardDeviation(d)
+		v := float64(r.Int63n(25))
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			cdf(u, o, v)
+		}
+	})
+
+	b.Run("distribution size 50", func(b *testing.B) {
+		d := makeRandSlice(50)
+		u := mean(d)
+		o := standardDeviation(d)
+		v := float64(r.Int63n(25))
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			cdf(u, o, v)
+		}
+	})
+
+	b.Run("distribution size 1000", func(b *testing.B) {
+		d := makeRandSlice(1000)
+		u := mean(d)
+		o := standardDeviation(d)
+		v := float64(r.Int63n(25))
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			cdf(u, o, v)
+		}
+	})
+}
+
+func Benchmark_prob_mean(b *testing.B) {
+	b.Run("distribution size 10", func(b *testing.B) {
+		d := makeRandSlice(10)
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			mean(d)
+		}
+	})
+
+	b.Run("distribution size 50", func(b *testing.B) {
+		d := makeRandSlice(50)
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			mean(d)
+		}
+	})
+
+	b.Run("distribution size 1000", func(b *testing.B) {
+		d := makeRandSlice(1000)
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			mean(d)
+		}
+	})
+}
+
+func Benchmark_prob_standardDeviation(b *testing.B) {
+	b.Run("distribution size 10", func(b *testing.B) {
+		d := makeRandSlice(10)
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			standardDeviation(d)
+		}
+	})
+
+	b.Run("distribution size 50", func(b *testing.B) {
+		d := makeRandSlice(50)
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			standardDeviation(d)
+		}
+	})
+
+	b.Run("distribution size 1000", func(b *testing.B) {
+		d := makeRandSlice(1000)
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			standardDeviation(d)
+		}
+	})
+}
+
+func Benchmark_prob_variance(b *testing.B) {
+	b.Run("distribution size 10", func(b *testing.B) {
+		d := makeRandSlice(10)
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			variance(d)
+		}
+	})
+
+	b.Run("distribution size 50", func(b *testing.B) {
+		d := makeRandSlice(50)
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			variance(d)
+		}
+	})
+
+	b.Run("distribution size 1000", func(b *testing.B) {
+		d := makeRandSlice(1000)
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			variance(d)
+		}
+	})
+}
+
+func makeRandSlice(size int) []int64 {
+	r := rand.New(rand.NewSource(time.Now().UnixNano()))
+	s := make([]int64, size, size)
+
+	for i := 0; i < size-1; i++ {
+		s[i] = r.Int63n(25)
+	}
+	return s
+}
+
+func makeRandSliceFloat64(size int) []float64 {
+	r := rand.New(rand.NewSource(time.Now().UnixNano()))
+	s := make([]float64, size, size)
+
+	for i := 0; i < size-1; i++ {
+		s[i] = float64(r.Int63n(25)) + r.Float64()
+	}
+	return s
+}