refactor from big.Float to float64 and add prob tests

License: MIT
Signed-off-by: Adrian Lanzafame <adrianlanzafame92@gmail.com>
This commit is contained in:
Adrian Lanzafame 2019-03-21 16:34:58 +10:00
parent e187b800cf
commit bcbe7b453f
No known key found for this signature in database
GPG Key ID: 87E40C5D62EAE192
2 changed files with 390 additions and 28 deletions

View File

@ -24,63 +24,52 @@ package metrics
import (
"math"
"math/big"
)
// Phi returns the φ-failure for the given value and distribution.
func phi(v float64, d []int64) float64 {
u := mean(d)
o := standardDeviation(d)
cdf := cdf(u, o, big.NewFloat(v))
phi := -math.Log10(1 - cdf)
if math.IsInf(phi, 1) {
phi = 0
if phi := -math.Log10(1 - cdf(u, o, v)); !math.IsInf(phi, 1) {
return phi
}
return phi
return 0
}
// CDF returns the cumulative distribution function if the given
// normal function, for the given value.
func cdf(u, o, v *big.Float) float64 {
var a, b, c big.Float
c.Quo(b.Sub(v, u), a.Mul(o, big.NewFloat(math.Sqrt2)))
cf, _ := c.Float64()
cdf := ((1.0 / 2.0) * (1 + math.Erf(cf)))
return cdf
func cdf(u, o, v float64) float64 {
return ((1.0 / 2.0) * (1 + math.Erf((v-u)/(o*math.Sqrt2))))
}
// Mean returns the mean of the given sample.
func mean(values []int64) *big.Float {
func mean(values []int64) float64 {
if len(values) == 0 {
return big.NewFloat(0.0)
return 0.0
}
var sum int64
for _, v := range values {
sum += v
}
var q big.Float
return q.Quo(big.NewFloat(float64(sum)), big.NewFloat(float64(len(values))))
return float64(sum) / float64(len(values))
}
// StandardDeviation returns standard deviation of the given sample.
func standardDeviation(v []int64) *big.Float {
var z big.Float
z.Sqrt(variance(v)).Float64()
return &z
func standardDeviation(v []int64) float64 {
return math.Sqrt(variance(v))
}
// Variance returns variance if the given sample.
func variance(values []int64) *big.Float {
func variance(values []int64) float64 {
if len(values) == 0 {
return big.NewFloat(0.0)
return 0.0
}
m := mean(values)
var sum, pwr, res big.Float
var sum float64
for _, v := range values {
d := big.NewFloat(float64(v))
d.Sub(d, m)
pwr.Mul(d, d)
sum.Add(&sum, &pwr)
d := float64(v) - m
sum += d * d
}
return res.Quo(&sum, big.NewFloat(float64(len(values))))
return sum / float64(len(values))
}

View File

@ -0,0 +1,373 @@
package metrics
import (
"math"
"math/rand"
"testing"
"time"
)
// NOTE: Test_phi and Test_cdf contain float64 want values that are 'precise',
// they look like golden test data, they ARE NOT. They have been calculated
// using Wolfram Alpha. The following three links provide examples of calculating
// the phi value:
// - standardDeviation: https://www.wolframalpha.com/input/?i=population+standard+deviation+-2,+-4,+-4,+-4,+-5,+-5,+-7,+-9
// - mean: https://www.wolframalpha.com/input/?i=mean+-2,+-4,+-4,+-4,+-5,+-5,+-7,+-9
// - cdf: https://www.wolframalpha.com/input/?i=(((1.0+%2F+2.0)+*+(1+%2B+Erf((-4--5)%2F(2*Sqrt2)))))
// - phi: https://www.wolframalpha.com/input/?i=-log10(1+-+0.691462461274013103637704610608337739883602175554577936)
//
// Output from the each calculation needs to copy-pasted over. Look at the phi source code
// to understand where each variable should go in the cdf calculation.
func Test_phi(t *testing.T) {
type args struct {
v float64
d []int64
}
tests := []struct {
name string
args args
want float64
}{
{
"zero values",
args{0, []int64{0}},
math.NaN(), // won't actually be used in comparison; see math.IsNaN() def
},
{
"increasing values",
args{
4,
[]int64{2, 4, 4, 4, 5, 5, 7, 9},
},
0.160231392277849,
},
{
"decreasing values",
args{
-4,
[]int64{-2, -4, -4, -4, -5, -5, -7, -9},
},
0.5106919892652407,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := phi(tt.args.v, tt.args.d)
if got != tt.want && !math.IsNaN(got) {
t.Errorf("phi() = %v, want %v", got, tt.want)
}
})
}
}
func Test_cdf(t *testing.T) {
type args struct {
values []int64
v float64
}
tests := []struct {
name string
args args
want float64
}{
{
"zero values",
args{[]int64{0}, 0},
math.NaN(),
},
{
"increasing values",
args{
[]int64{2, 4, 4, 4, 5, 5, 7, 9},
4,
},
0.3085375387259869,
},
{
"decreasing values",
args{
[]int64{-2, -4, -4, -4, -5, -5, -7, -9},
-4,
},
0.6914624612740131,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
m := mean(tt.args.values)
sd := standardDeviation(tt.args.values)
got := cdf(m, sd, tt.args.v)
if got != tt.want && !math.IsNaN(got) {
t.Errorf("cdf() = %v, want %v", got, tt.want)
}
})
}
}
func Test_mean(t *testing.T) {
type args struct {
values []int64
}
tests := []struct {
name string
args args
want float64
}{
{
"zero values",
args{[]int64{}},
0,
},
{
"increasing values",
args{[]int64{2, 4, 4, 4, 5, 5, 7, 9}},
5,
},
{
"decreasing values",
args{[]int64{-2, -4, -4, -4, -5, -5, -7, -9}},
-5,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := mean(tt.args.values); got != tt.want {
t.Errorf("mean() = %v, want %v", got, tt.want)
}
})
}
}
func Test_standardDeviation(t *testing.T) {
type args struct {
v []int64
}
tests := []struct {
name string
args args
want float64
}{
{
"zero values",
args{[]int64{}},
0,
},
{
"increasing values",
args{[]int64{2, 4, 4, 4, 5, 5, 7, 9}},
2,
},
{
"decreasing values",
args{[]int64{-2, -4, -4, -4, -5, -5, -7, -9}},
2,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := standardDeviation(tt.args.v); got != tt.want {
t.Errorf("standardDeviation() = %v, want %v", got, tt.want)
}
})
}
}
func Test_variance(t *testing.T) {
type args struct {
values []int64
}
tests := []struct {
name string
args args
want float64
}{
{
"zero values",
args{[]int64{}},
0,
},
{
"increasing values",
args{[]int64{2, 4, 4, 4, 5, 5, 7, 9}},
4,
},
{
"decreasing values",
args{[]int64{-2, -4, -4, -4, -5, -5, -7, -9}},
4,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := variance(tt.args.values); got != tt.want {
t.Errorf("variance() = %.5f, want %v", got, tt.want)
}
})
}
}
func Benchmark_prob_phi(b *testing.B) {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
b.Run("distribution size 10", func(b *testing.B) {
d := makeRandSlice(10)
v := float64(r.Int63n(25))
b.ResetTimer()
for i := 0; i < b.N; i++ {
phi(v, d)
}
})
b.Run("distribution size 50", func(b *testing.B) {
d := makeRandSlice(50)
v := float64(r.Int63n(25))
b.ResetTimer()
for i := 0; i < b.N; i++ {
phi(v, d)
}
})
b.Run("distribution size 1000", func(b *testing.B) {
d := makeRandSlice(1000)
v := float64(r.Int63n(25))
b.ResetTimer()
for i := 0; i < b.N; i++ {
phi(v, d)
}
})
}
func Benchmark_prob_cdf(b *testing.B) {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
b.Run("distribution size 10", func(b *testing.B) {
d := makeRandSlice(10)
u := mean(d)
o := standardDeviation(d)
v := float64(r.Int63n(25))
b.ResetTimer()
for i := 0; i < b.N; i++ {
cdf(u, o, v)
}
})
b.Run("distribution size 50", func(b *testing.B) {
d := makeRandSlice(50)
u := mean(d)
o := standardDeviation(d)
v := float64(r.Int63n(25))
b.ResetTimer()
for i := 0; i < b.N; i++ {
cdf(u, o, v)
}
})
b.Run("distribution size 1000", func(b *testing.B) {
d := makeRandSlice(1000)
u := mean(d)
o := standardDeviation(d)
v := float64(r.Int63n(25))
b.ResetTimer()
for i := 0; i < b.N; i++ {
cdf(u, o, v)
}
})
}
func Benchmark_prob_mean(b *testing.B) {
b.Run("distribution size 10", func(b *testing.B) {
d := makeRandSlice(10)
b.ResetTimer()
for i := 0; i < b.N; i++ {
mean(d)
}
})
b.Run("distribution size 50", func(b *testing.B) {
d := makeRandSlice(50)
b.ResetTimer()
for i := 0; i < b.N; i++ {
mean(d)
}
})
b.Run("distribution size 1000", func(b *testing.B) {
d := makeRandSlice(1000)
b.ResetTimer()
for i := 0; i < b.N; i++ {
mean(d)
}
})
}
func Benchmark_prob_standardDeviation(b *testing.B) {
b.Run("distribution size 10", func(b *testing.B) {
d := makeRandSlice(10)
b.ResetTimer()
for i := 0; i < b.N; i++ {
standardDeviation(d)
}
})
b.Run("distribution size 50", func(b *testing.B) {
d := makeRandSlice(50)
b.ResetTimer()
for i := 0; i < b.N; i++ {
standardDeviation(d)
}
})
b.Run("distribution size 1000", func(b *testing.B) {
d := makeRandSlice(1000)
b.ResetTimer()
for i := 0; i < b.N; i++ {
standardDeviation(d)
}
})
}
func Benchmark_prob_variance(b *testing.B) {
b.Run("distribution size 10", func(b *testing.B) {
d := makeRandSlice(10)
b.ResetTimer()
for i := 0; i < b.N; i++ {
variance(d)
}
})
b.Run("distribution size 50", func(b *testing.B) {
d := makeRandSlice(50)
b.ResetTimer()
for i := 0; i < b.N; i++ {
variance(d)
}
})
b.Run("distribution size 1000", func(b *testing.B) {
d := makeRandSlice(1000)
b.ResetTimer()
for i := 0; i < b.N; i++ {
variance(d)
}
})
}
func makeRandSlice(size int) []int64 {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
s := make([]int64, size, size)
for i := 0; i < size-1; i++ {
s[i] = r.Int63n(25)
}
return s
}
func makeRandSliceFloat64(size int) []float64 {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
s := make([]float64, size, size)
for i := 0; i < size-1; i++ {
s[i] = float64(r.Int63n(25)) + r.Float64()
}
return s
}