refactor from big.Float to float64 and add prob tests
License: MIT Signed-off-by: Adrian Lanzafame <adrianlanzafame92@gmail.com>
This commit is contained in:
parent
e187b800cf
commit
bcbe7b453f
|
@ -24,63 +24,52 @@ package metrics
|
|||
|
||||
import (
|
||||
"math"
|
||||
"math/big"
|
||||
)
|
||||
|
||||
// Phi returns the φ-failure for the given value and distribution.
|
||||
func phi(v float64, d []int64) float64 {
|
||||
u := mean(d)
|
||||
o := standardDeviation(d)
|
||||
cdf := cdf(u, o, big.NewFloat(v))
|
||||
phi := -math.Log10(1 - cdf)
|
||||
if math.IsInf(phi, 1) {
|
||||
phi = 0
|
||||
if phi := -math.Log10(1 - cdf(u, o, v)); !math.IsInf(phi, 1) {
|
||||
return phi
|
||||
}
|
||||
return phi
|
||||
return 0
|
||||
}
|
||||
|
||||
// CDF returns the cumulative distribution function if the given
|
||||
// normal function, for the given value.
|
||||
func cdf(u, o, v *big.Float) float64 {
|
||||
var a, b, c big.Float
|
||||
c.Quo(b.Sub(v, u), a.Mul(o, big.NewFloat(math.Sqrt2)))
|
||||
cf, _ := c.Float64()
|
||||
cdf := ((1.0 / 2.0) * (1 + math.Erf(cf)))
|
||||
return cdf
|
||||
func cdf(u, o, v float64) float64 {
|
||||
return ((1.0 / 2.0) * (1 + math.Erf((v-u)/(o*math.Sqrt2))))
|
||||
}
|
||||
|
||||
// Mean returns the mean of the given sample.
|
||||
func mean(values []int64) *big.Float {
|
||||
func mean(values []int64) float64 {
|
||||
if len(values) == 0 {
|
||||
return big.NewFloat(0.0)
|
||||
return 0.0
|
||||
}
|
||||
var sum int64
|
||||
for _, v := range values {
|
||||
sum += v
|
||||
}
|
||||
var q big.Float
|
||||
return q.Quo(big.NewFloat(float64(sum)), big.NewFloat(float64(len(values))))
|
||||
|
||||
return float64(sum) / float64(len(values))
|
||||
}
|
||||
|
||||
// StandardDeviation returns standard deviation of the given sample.
|
||||
func standardDeviation(v []int64) *big.Float {
|
||||
var z big.Float
|
||||
z.Sqrt(variance(v)).Float64()
|
||||
return &z
|
||||
func standardDeviation(v []int64) float64 {
|
||||
return math.Sqrt(variance(v))
|
||||
}
|
||||
|
||||
// Variance returns variance if the given sample.
|
||||
func variance(values []int64) *big.Float {
|
||||
func variance(values []int64) float64 {
|
||||
if len(values) == 0 {
|
||||
return big.NewFloat(0.0)
|
||||
return 0.0
|
||||
}
|
||||
m := mean(values)
|
||||
var sum, pwr, res big.Float
|
||||
var sum float64
|
||||
for _, v := range values {
|
||||
d := big.NewFloat(float64(v))
|
||||
d.Sub(d, m)
|
||||
pwr.Mul(d, d)
|
||||
sum.Add(&sum, &pwr)
|
||||
d := float64(v) - m
|
||||
sum += d * d
|
||||
}
|
||||
return res.Quo(&sum, big.NewFloat(float64(len(values))))
|
||||
return sum / float64(len(values))
|
||||
}
|
||||
|
|
373
monitor/metrics/prob_test.go
Normal file
373
monitor/metrics/prob_test.go
Normal file
|
@ -0,0 +1,373 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"math"
|
||||
"math/rand"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// NOTE: Test_phi and Test_cdf contain float64 want values that are 'precise',
|
||||
// they look like golden test data, they ARE NOT. They have been calculated
|
||||
// using Wolfram Alpha. The following three links provide examples of calculating
|
||||
// the phi value:
|
||||
// - standardDeviation: https://www.wolframalpha.com/input/?i=population+standard+deviation+-2,+-4,+-4,+-4,+-5,+-5,+-7,+-9
|
||||
// - mean: https://www.wolframalpha.com/input/?i=mean+-2,+-4,+-4,+-4,+-5,+-5,+-7,+-9
|
||||
// - cdf: https://www.wolframalpha.com/input/?i=(((1.0+%2F+2.0)+*+(1+%2B+Erf((-4--5)%2F(2*Sqrt2)))))
|
||||
// - phi: https://www.wolframalpha.com/input/?i=-log10(1+-+0.691462461274013103637704610608337739883602175554577936)
|
||||
//
|
||||
// Output from the each calculation needs to copy-pasted over. Look at the phi source code
|
||||
// to understand where each variable should go in the cdf calculation.
|
||||
func Test_phi(t *testing.T) {
|
||||
type args struct {
|
||||
v float64
|
||||
d []int64
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want float64
|
||||
}{
|
||||
{
|
||||
"zero values",
|
||||
args{0, []int64{0}},
|
||||
math.NaN(), // won't actually be used in comparison; see math.IsNaN() def
|
||||
},
|
||||
{
|
||||
"increasing values",
|
||||
args{
|
||||
4,
|
||||
[]int64{2, 4, 4, 4, 5, 5, 7, 9},
|
||||
},
|
||||
0.160231392277849,
|
||||
},
|
||||
{
|
||||
"decreasing values",
|
||||
args{
|
||||
-4,
|
||||
[]int64{-2, -4, -4, -4, -5, -5, -7, -9},
|
||||
},
|
||||
0.5106919892652407,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := phi(tt.args.v, tt.args.d)
|
||||
if got != tt.want && !math.IsNaN(got) {
|
||||
t.Errorf("phi() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_cdf(t *testing.T) {
|
||||
type args struct {
|
||||
values []int64
|
||||
v float64
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want float64
|
||||
}{
|
||||
{
|
||||
"zero values",
|
||||
args{[]int64{0}, 0},
|
||||
math.NaN(),
|
||||
},
|
||||
{
|
||||
"increasing values",
|
||||
args{
|
||||
[]int64{2, 4, 4, 4, 5, 5, 7, 9},
|
||||
4,
|
||||
},
|
||||
0.3085375387259869,
|
||||
},
|
||||
{
|
||||
"decreasing values",
|
||||
args{
|
||||
[]int64{-2, -4, -4, -4, -5, -5, -7, -9},
|
||||
-4,
|
||||
},
|
||||
0.6914624612740131,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
m := mean(tt.args.values)
|
||||
sd := standardDeviation(tt.args.values)
|
||||
got := cdf(m, sd, tt.args.v)
|
||||
if got != tt.want && !math.IsNaN(got) {
|
||||
t.Errorf("cdf() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_mean(t *testing.T) {
|
||||
type args struct {
|
||||
values []int64
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want float64
|
||||
}{
|
||||
{
|
||||
"zero values",
|
||||
args{[]int64{}},
|
||||
0,
|
||||
},
|
||||
{
|
||||
"increasing values",
|
||||
args{[]int64{2, 4, 4, 4, 5, 5, 7, 9}},
|
||||
5,
|
||||
},
|
||||
{
|
||||
"decreasing values",
|
||||
args{[]int64{-2, -4, -4, -4, -5, -5, -7, -9}},
|
||||
-5,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := mean(tt.args.values); got != tt.want {
|
||||
t.Errorf("mean() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_standardDeviation(t *testing.T) {
|
||||
type args struct {
|
||||
v []int64
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want float64
|
||||
}{
|
||||
{
|
||||
"zero values",
|
||||
args{[]int64{}},
|
||||
0,
|
||||
},
|
||||
{
|
||||
"increasing values",
|
||||
args{[]int64{2, 4, 4, 4, 5, 5, 7, 9}},
|
||||
2,
|
||||
},
|
||||
{
|
||||
"decreasing values",
|
||||
args{[]int64{-2, -4, -4, -4, -5, -5, -7, -9}},
|
||||
2,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := standardDeviation(tt.args.v); got != tt.want {
|
||||
t.Errorf("standardDeviation() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_variance(t *testing.T) {
|
||||
type args struct {
|
||||
values []int64
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want float64
|
||||
}{
|
||||
{
|
||||
"zero values",
|
||||
args{[]int64{}},
|
||||
0,
|
||||
},
|
||||
{
|
||||
"increasing values",
|
||||
args{[]int64{2, 4, 4, 4, 5, 5, 7, 9}},
|
||||
4,
|
||||
},
|
||||
{
|
||||
"decreasing values",
|
||||
args{[]int64{-2, -4, -4, -4, -5, -5, -7, -9}},
|
||||
4,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := variance(tt.args.values); got != tt.want {
|
||||
t.Errorf("variance() = %.5f, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Benchmark_prob_phi(b *testing.B) {
|
||||
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
|
||||
b.Run("distribution size 10", func(b *testing.B) {
|
||||
d := makeRandSlice(10)
|
||||
v := float64(r.Int63n(25))
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
phi(v, d)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("distribution size 50", func(b *testing.B) {
|
||||
d := makeRandSlice(50)
|
||||
v := float64(r.Int63n(25))
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
phi(v, d)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("distribution size 1000", func(b *testing.B) {
|
||||
d := makeRandSlice(1000)
|
||||
v := float64(r.Int63n(25))
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
phi(v, d)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func Benchmark_prob_cdf(b *testing.B) {
|
||||
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
|
||||
b.Run("distribution size 10", func(b *testing.B) {
|
||||
d := makeRandSlice(10)
|
||||
u := mean(d)
|
||||
o := standardDeviation(d)
|
||||
v := float64(r.Int63n(25))
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
cdf(u, o, v)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("distribution size 50", func(b *testing.B) {
|
||||
d := makeRandSlice(50)
|
||||
u := mean(d)
|
||||
o := standardDeviation(d)
|
||||
v := float64(r.Int63n(25))
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
cdf(u, o, v)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("distribution size 1000", func(b *testing.B) {
|
||||
d := makeRandSlice(1000)
|
||||
u := mean(d)
|
||||
o := standardDeviation(d)
|
||||
v := float64(r.Int63n(25))
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
cdf(u, o, v)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func Benchmark_prob_mean(b *testing.B) {
|
||||
b.Run("distribution size 10", func(b *testing.B) {
|
||||
d := makeRandSlice(10)
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
mean(d)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("distribution size 50", func(b *testing.B) {
|
||||
d := makeRandSlice(50)
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
mean(d)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("distribution size 1000", func(b *testing.B) {
|
||||
d := makeRandSlice(1000)
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
mean(d)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func Benchmark_prob_standardDeviation(b *testing.B) {
|
||||
b.Run("distribution size 10", func(b *testing.B) {
|
||||
d := makeRandSlice(10)
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
standardDeviation(d)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("distribution size 50", func(b *testing.B) {
|
||||
d := makeRandSlice(50)
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
standardDeviation(d)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("distribution size 1000", func(b *testing.B) {
|
||||
d := makeRandSlice(1000)
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
standardDeviation(d)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func Benchmark_prob_variance(b *testing.B) {
|
||||
b.Run("distribution size 10", func(b *testing.B) {
|
||||
d := makeRandSlice(10)
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
variance(d)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("distribution size 50", func(b *testing.B) {
|
||||
d := makeRandSlice(50)
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
variance(d)
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("distribution size 1000", func(b *testing.B) {
|
||||
d := makeRandSlice(1000)
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
variance(d)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func makeRandSlice(size int) []int64 {
|
||||
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
s := make([]int64, size, size)
|
||||
|
||||
for i := 0; i < size-1; i++ {
|
||||
s[i] = r.Int63n(25)
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func makeRandSliceFloat64(size int) []float64 {
|
||||
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
s := make([]float64, size, size)
|
||||
|
||||
for i := 0; i < size-1; i++ {
|
||||
s[i] = float64(r.Int63n(25)) + r.Float64()
|
||||
}
|
||||
return s
|
||||
}
|
Loading…
Reference in New Issue
Block a user