From eae4329cb3e99f7689f622fa685baeb52195444b Mon Sep 17 00:00:00 2001 From: Adrian Lanzafame Date: Wed, 17 Apr 2019 16:39:00 +1000 Subject: [PATCH] address pr feedback License: MIT Signed-off-by: Adrian Lanzafame --- cluster.go | 14 +- config_test.go | 2 +- go.mod | 1 + go.sum | 18 +++ monitor/metrics/checker.go | 22 ++- monitor/metrics/checker_test.go | 269 ++++++++++++++++++++++++++++++- monitor/metrics/prob.go | 2 +- monitor/pubsubmon/config_test.go | 2 +- 8 files changed, 314 insertions(+), 16 deletions(-) diff --git a/cluster.go b/cluster.go index a62e8ce5..ed7e790e 100644 --- a/cluster.go +++ b/cluster.go @@ -300,9 +300,9 @@ func (c *Cluster) alertsHandler() { case alrt := <-c.monitor.Alerts(): list := c.state.List(c.ctx) for _, pin := range list { - if len(pin.Allocations) < 1 && containsPeer(pin.Allocations, alrt.Peer) { - logger.Error("a pin with only one allocation cannot be repinned") - logger.Error("to make repinning possible, pin with a replication factor of 2+") + if len(pin.Allocations) == 1 && containsPeer(pin.Allocations, alrt.Peer) { + logger.Warning("a pin with only one allocation cannot be repinned") + logger.Warning("to make repinning possible, pin with a replication factor of 2+") continue } if c.shouldPeerRepinCid(alrt.Peer, pin) { @@ -319,13 +319,13 @@ func (c *Cluster) alertsHandler() { // return true. func (c *Cluster) shouldPeerRepinCid(failed peer.ID, pin *api.Pin) bool { if containsPeer(pin.Allocations, failed) && containsPeer(pin.Allocations, c.id) { - allocs := sort.StringSlice(api.PeersToStrings(pin.Allocations)) - allocs.Sort() - if allocs[0] == c.id.String() { + allocs := peer.IDSlice(pin.Allocations) + sort.Sort(allocs) + if allocs[0] == c.id { return true } - if allocs[1] == c.id.String() && allocs[0] == failed.String() { + if allocs[1] == c.id && allocs[0] == failed { return true } } diff --git a/config_test.go b/config_test.go index 33b3e3c8..9e5a14bd 100644 --- a/config_test.go +++ b/config_test.go @@ -103,7 +103,7 @@ var testingTrackerCfg = []byte(` var testingMonCfg = []byte(`{ "check_interval": "300ms", - "failure_threshold": 1 + "failure_threshold": 1 }`) var testingDiskInfCfg = []byte(`{ diff --git a/go.mod b/go.mod index 008f2af3..f1a21214 100644 --- a/go.mod +++ b/go.mod @@ -88,5 +88,6 @@ require ( golang.org/x/sys v0.0.0-20190416152802-12500544f89f // indirect golang.org/x/xerrors v0.0.0-20190410155217-1f06c39b4373 // indirect gonum.org/v1/gonum v0.0.0-20190321072728-ca4d35bc590a + gonum.org/v1/plot v0.0.0-20190410204940-3a5f52653745 google.golang.org/grpc v1.19.1 // indirect ) diff --git a/go.sum b/go.sum index f5bc3fba..1487d4f6 100644 --- a/go.sum +++ b/go.sum @@ -14,6 +14,8 @@ github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMx github.com/Stebalien/go-bitfield v0.0.0-20180330043415-076a62f9ce6e h1:2Z+EBRrOJsA3psnUPcEWMIH2EIga1xHflQcr/EZslx8= github.com/Stebalien/go-bitfield v0.0.0-20180330043415-076a62f9ce6e/go.mod h1:3oM7gXIttpYDAJXpVNnSCiUMYBLIZ6cb1t+Ip982MRo= github.com/aead/siphash v1.0.1/go.mod h1:Nywa3cDsYNNK3gaciGTWPwHt0wlpNV15vwmswBAUSII= +github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af h1:wVe6/Ea46ZMeNkQjjBW6xcqyQA/j5e0D6GytH95g0gQ= +github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/apache/thrift v0.12.0 h1:pODnxUFNcjP9UTLZGTdeh+j16A8lJbRvD3rOtrk/7bs= @@ -69,6 +71,8 @@ github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21/go.mod h1 github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I= github.com/facebookgo/atomicfile v0.0.0-20151019160806-2de1f203e7d5/go.mod h1:JpoxHjuQauoxiFMl1ie8Xc/7TfLuMZ5eOCONd1sUBHg= github.com/fd/go-nat v1.0.0/go.mod h1:BTBu/CKvMmOMUPkKVef1pngt2WFH/lg7E6yQnulfp6E= +github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90 h1:WXb3TSNmHp2vHoCroCIB1foO/yQ36swABL8aOVeDpgg= +github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= @@ -81,6 +85,8 @@ github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7a github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.2.1 h1:/s5zKNz0uPFCZ5hddgPdo2TK2TVrUNMn0OOX8/aZMTE= github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= +github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g= +github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:tluoj9z5200jBnyusfRPU2LqT6J+DAorxEvtC7LHB+E= @@ -255,6 +261,8 @@ github.com/jtolds/gls v4.2.1+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVY github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= +github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5 h1:PJr+ZMXIecYc1Ey2zucXdR73SMBtgjPgwa31099IMv0= +github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= github.com/kelseyhightower/envconfig v1.3.0 h1:IvRS4f2VcIQy6j4ORGIf9145T/AsUB+oY8LyvN8BXNM= github.com/kelseyhightower/envconfig v1.3.0/go.mod h1:cccZRl6mQpaq41TPp5QxidR+Sa3axMbJDNb//FQX6Gg= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= @@ -567,9 +575,13 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 h1:VklqNMn3ovrHsnt90Pveol golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190417170229-92d88b081a49 h1:mE9V9RMa141kxdQR3pfZM3mkg0MPyw+FOPpnciBXkbE= golang.org/x/crypto v0.0.0-20190417170229-92d88b081a49/go.mod h1:WFFai1msRO1wXaEeE5yQxYXgSfI8pQAWXbQop6sCtWE= +golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2 h1:y102fOLFqhV41b+4GPiJoa0k/x+pJcEi2/HB1Y5T6fU= golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81 h1:00VmoueYNlNz/aHIilyyQz/MHSqGoWJzpFv/HW8xpzI= +golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= golang.org/x/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20181217174547-8f45f776aaf1/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -624,6 +636,7 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2 h1:z99zHgr7hKfrUcX/KsoJk5FJfjTceCKIp96+biqP4To= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20181219222714-6e267b5cc78e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -636,10 +649,13 @@ golang.org/x/xerrors v0.0.0-20190212162355-a5947ffaace3 h1:P6iTFmrTQqWrqLZPX1VMz golang.org/x/xerrors v0.0.0-20190212162355-a5947ffaace3/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20190410155217-1f06c39b4373 h1:PPwnA7z1Pjf7XYaBP9GL1VAMZmcIWyFz7QCMSIIa3Bg= golang.org/x/xerrors v0.0.0-20190410155217-1f06c39b4373/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= gonum.org/v1/gonum v0.0.0-20190321072728-ca4d35bc590a h1:XffIu/i+IJIC+M8WoBEmJm8N/YYbA8Pvh748YgzU7kI= gonum.org/v1/gonum v0.0.0-20190321072728-ca4d35bc590a/go.mod h1:2ltnJ7xHfj0zHS40VVPYEAAMTa3ZGguvHGBSJeRWqE0= gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0 h1:OE9mWmgKkjJyEmDAAtGMPjXu+YNeGvK9VTSHY6+Qihc= gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= +gonum.org/v1/plot v0.0.0-20190410204940-3a5f52653745 h1:Xaq5xR1I2KM/MWp1vwZxOosUPa1U8wtNN8zRbVko0ZY= +gonum.org/v1/plot v0.0.0-20190410204940-3a5f52653745/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc= google.golang.org/api v0.0.0-20181220000619-583d854617af h1:iQMS7JKv/0w/iiWf1M49Cg3dmOkBoBZT5KheqPDpaac= google.golang.org/api v0.0.0-20181220000619-583d854617af/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= google.golang.org/api v0.3.1 h1:oJra/lMfmtm13/rgY/8i3MzjFWYXvQIAKjQ3HqofMk8= @@ -674,3 +690,5 @@ gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20180920025451-e3ad64cb4ed3/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +rsc.io/pdf v0.1.1 h1:k1MczvYDUvJBe93bYd7wrZLLUEcLZAuF824/I4e5Xr4= +rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= diff --git a/monitor/metrics/checker.go b/monitor/metrics/checker.go index c2580a97..fe07de5d 100644 --- a/monitor/metrics/checker.go +++ b/monitor/metrics/checker.go @@ -25,7 +25,11 @@ type Checker struct { } // NewChecker creates a Checker using the given -// MetricsStore. +// MetricsStore. The threshold value indicates when a +// monitored component should be considered to have failed. +// The greater the threshold value the more leniency is granted. +// +// A value between 2.0 and 4.0 is suggested for the threshold. func NewChecker(metrics *Store, threshold float64) *Checker { return &Checker{ alertCh: make(chan *api.Alert, AlertChannelCap), @@ -125,16 +129,24 @@ func (mc *Checker) Watch(ctx context.Context, peersF func(context.Context) ([]pe } } -// Failed returns if a peer has potentially failed. Peers -// that are not present in the metrics store will return +// Failed returns true if a peer has potentially failed. +// Peers that are not present in the metrics store will return // as failed. func (mc *Checker) Failed(pid peer.ID) bool { + _, _, _, result := mc.failed(pid) + return result +} + +// failed returns all the values involved in making the decision +// as to whether a peer has failed or not. This mainly for debugging +// purposes. +func (mc *Checker) failed(pid peer.ID) (float64, []float64, float64, bool) { latest := mc.metrics.PeerLatest("ping", pid) if latest == nil { - return true + return 0.0, nil, 0.0, true } v := time.Now().UnixNano() - latest.ReceivedAt dv := mc.metrics.Distribution("ping", pid) phiv := phi(float64(v), dv) - return phiv >= mc.threshold + return float64(v), dv, phiv, phiv >= mc.threshold } diff --git a/monitor/metrics/checker_test.go b/monitor/metrics/checker_test.go index 15abb164..4a62bef0 100644 --- a/monitor/metrics/checker_test.go +++ b/monitor/metrics/checker_test.go @@ -2,10 +2,16 @@ package metrics import ( "context" + "fmt" + "math" "testing" "time" peer "github.com/libp2p/go-libp2p-peer" + "gonum.org/v1/plot" + "gonum.org/v1/plot/plotter" + "gonum.org/v1/plot/plotutil" + "gonum.org/v1/plot/vg" "github.com/ipfs/ipfs-cluster/api" "github.com/ipfs/ipfs-cluster/test" @@ -93,7 +99,6 @@ func TestChecker_Failed(t *testing.T) { } for i := 0; i < 10; i++ { metrics.Add(makePeerMetric(test.PeerID1, "1")) - time.Sleep(time.Duration(i) * time.Millisecond) got := checker.Failed(test.PeerID1) // the magic number 17 represents the point at which // the time between metrics addition has gotten @@ -102,10 +107,200 @@ func TestChecker_Failed(t *testing.T) { if i >= 17 && !got { t.Fatal("threshold should have been passed by now") } + time.Sleep(time.Duration(i) * time.Millisecond) } }) } +func TestThresholdValues(t *testing.T) { + t.Log("TestThresholdValues is useful for testing out different threshold values") + t.Log("It doesn't actually perform any 'tests', so it is skipped by default") + t.SkipNow() + + thresholds := []float64{0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.8, 0.9, 1.3, 1.4, 1.5, 1.8, 2.0, 3.0, 4.0, 5.0, 7.0, 10.0, 20.0} + + t.Run("linear threshold test", func(t *testing.T) { + dists := make([]timeseries, 0) + phivs := make([]timeseries, 0) + for _, v := range thresholds { + metrics := NewStore() + checker := NewChecker(metrics, v) + tsName := fmt.Sprintf("%f", v) + distTS := newTS(tsName) + phivTS := newTS(tsName) + + var failed bool + output := false + + check := func(i int) bool { + inputv, dist, phiv, got := checker.failed(test.PeerID1) + if output { + fmt.Println(i) + fmt.Printf("phiv: %f\n", phiv) + fmt.Printf("threshold: %f\n", v) + fmt.Printf("latest: %f\n", inputv) + fmt.Printf("distribution: %v\n", dist) + } + phivTS.record(phiv) + failed = got + if failed { + distTS.record(float64(i)) + } + return got + } + + // set baseline static distribution of ping intervals + for i := 0; i < 10; i++ { + check(i) + distTS.record(float64(10)) + metrics.Add(makePeerMetric(test.PeerID1, "1")) + time.Sleep(time.Duration(10) * time.Millisecond) + } + // start linearly increasing the interval values + for i := 10; i < 100 && !check(i); i++ { + distTS.record(float64(i)) + metrics.Add(makePeerMetric(test.PeerID1, "1")) + time.Sleep(time.Duration(i) * time.Millisecond) + } + + if failed { + dists = append(dists, *distTS) + phivs = append(phivs, *phivTS) + continue + } + dists = append(dists, *distTS) + phivs = append(phivs, *phivTS) + + // if a threshold has made it to this point, all greater + // thresholds will make it here so print the threshold and skip + t.Log("threshold that was not meet by linear increase: ", v) + break + } + plotDistsPhivs("linear_", dists, phivs) + }) + + t.Run("cubic threshold test", func(t *testing.T) { + dists := make([]timeseries, 0) + phivs := make([]timeseries, 0) + for _, v := range thresholds { + metrics := NewStore() + checker := NewChecker(metrics, v) + tsName := fmt.Sprintf("%f", v) + distTS := newTS(tsName) + phivTS := newTS(tsName) + + var failed bool + output := false + + check := func(i int) bool { + inputv, dist, phiv, got := checker.failed(test.PeerID1) + if output { + fmt.Println(i) + fmt.Printf("phiv: %f\n", phiv) + fmt.Printf("threshold: %f\n", v) + fmt.Printf("latest: %f\n", inputv) + fmt.Printf("distribution: %v\n", dist) + } + phivTS.record(phiv) + failed = got + if failed { + diff := math.Pow(float64(i), float64(i)) + distTS.record(diff) + } + return got + } + + // set baseline static distribution of ping intervals + for i := 0; i < 10; i++ { + check(i) + distTS.record(float64(8)) + metrics.Add(makePeerMetric(test.PeerID1, "1")) + time.Sleep(time.Duration(8) * time.Millisecond) + } + for i := 2; !check(i) && i < 20; i++ { + diff := math.Pow(float64(i), 3) + distTS.record(diff) + metrics.Add(makePeerMetric(test.PeerID1, "1")) + time.Sleep(time.Duration(diff) * time.Millisecond) + } + + if failed { + dists = append(dists, *distTS) + phivs = append(phivs, *phivTS) + continue + } + dists = append(dists, *distTS) + phivs = append(phivs, *phivTS) + + // if a threshold has made it to this point, all greater + // thresholds will make it here so print the threshold and skip + t.Log("threshold that was not meet by cubic increase: ", v) + break + } + plotDistsPhivs("cubic_", dists, phivs) + }) + + t.Run("14x threshold test", func(t *testing.T) { + dists := make([]timeseries, 0) + phivs := make([]timeseries, 0) + for _, v := range thresholds { + metrics := NewStore() + checker := NewChecker(metrics, v) + tsName := fmt.Sprintf("%f", v) + distTS := newTS(tsName) + phivTS := newTS(tsName) + + var failed bool + output := false + + check := func(i int) bool { + inputv, dist, phiv, got := checker.failed(test.PeerID1) + if output { + fmt.Println(i) + fmt.Printf("phiv: %f\n", phiv) + fmt.Printf("threshold: %f\n", v) + fmt.Printf("latest: %f\n", inputv) + fmt.Printf("distribution: %v\n", dist) + } + phivTS.record(phiv) + failed = got + if failed { + diff := i * 50 + distTS.record(float64(diff)) + } + return got + } + + for i := 0; i < 10; i++ { + check(i) + distTS.record(float64(i)) + metrics.Add(makePeerMetric(test.PeerID1, "1")) + time.Sleep(time.Duration(i) * time.Millisecond) + } + for i := 10; !check(i) && i < 30; i++ { + diff := i * 50 + distTS.record(float64(diff)) + metrics.Add(makePeerMetric(test.PeerID1, "1")) + time.Sleep(time.Duration(diff) * time.Millisecond) + } + + if failed { + dists = append(dists, *distTS) + phivs = append(phivs, *phivTS) + continue + } + dists = append(dists, *distTS) + phivs = append(phivs, *phivTS) + + // if a threshold has made it to this point, all greater + // thresholds will make it here so print the threshold and skip + t.Log("threshold that was not meet by 14x increase: ", v) + break + } + plotDistsPhivs("14x_", dists, phivs) + }) +} + func makePeerMetric(pid peer.ID, value string) *api.Metric { metr := &api.Metric{ Name: "ping", @@ -115,3 +310,75 @@ func makePeerMetric(pid peer.ID, value string) *api.Metric { } return metr } + +type timeseries struct { + name string + values []float64 +} + +func newTS(name string) *timeseries { + v := make([]float64, 0) + return ×eries{name: name, values: v} +} + +func (ts *timeseries) record(v float64) { + ts.values = append(ts.values, v) +} + +func (ts *timeseries) ToXYs() plotter.XYs { + pts := make(plotter.XYs, len(ts.values)) + for i, v := range ts.values { + pts[i].X = float64(i) + if math.IsInf(v, 0) { + pts[i].Y = -5 + } else { + pts[i].Y = v + } + } + return pts +} + +func toPlotLines(ts []timeseries) ([]string, [][]plot.Plotter) { + labels := make([]string, 0) + plotters := make([][]plot.Plotter, 0) + for i, t := range ts { + l, s, err := plotter.NewLinePoints(t.ToXYs()) + if err != nil { + panic(err) + } + l.Width = vg.Points(2) + l.Color = plotutil.Color(i) + l.Dashes = plotutil.Dashes(i) + s.Color = plotutil.Color(i) + s.Shape = plotutil.Shape(i) + + labels = append(labels, t.name) + plotters = append(plotters, []plot.Plotter{l, s}) + } + return labels, plotters +} + +func plotDistsPhivs(prefix string, dists, phivs []timeseries) { + plotTS(prefix+"dists", dists) + plotTS(prefix+"phivs", phivs) +} + +func plotTS(name string, ts []timeseries) { + p, err := plot.New() + if err != nil { + panic(err) + } + p.Title.Text = name + p.Add(plotter.NewGrid()) + + labels, plotters := toPlotLines(ts) + for i := range labels { + l, pts := plotters[i][0], plotters[i][1] + p.Add(l, pts) + p.Legend.Add(labels[i], l.(*plotter.Line), pts.(*plotter.Scatter)) + } + + if err := p.Save(20*vg.Inch, 15*vg.Inch, name+".png"); err != nil { + panic(err) + } +} diff --git a/monitor/metrics/prob.go b/monitor/metrics/prob.go index 8368bb7c..02d62ca7 100644 --- a/monitor/metrics/prob.go +++ b/monitor/metrics/prob.go @@ -28,7 +28,7 @@ import ( "gonum.org/v1/gonum/floats" ) -// Phi returns the φ-failure for the given value and distribution. +// phi returns the φ-failure for the given value and distribution. // Two edge cases that are dealt with in phi: // 1. phi == math.+Inf // 2. phi == math.NaN diff --git a/monitor/pubsubmon/config_test.go b/monitor/pubsubmon/config_test.go index cc0f476b..1dd017ab 100644 --- a/monitor/pubsubmon/config_test.go +++ b/monitor/pubsubmon/config_test.go @@ -10,7 +10,7 @@ import ( var cfgJSON = []byte(` { "check_interval": "15s", - "failure_threshold": 3.0 + "failure_threshold": 3.0 } `)