ipfs-cluster/monitor/metrics/checker_test.go
Hector Sanjuan 9b9d76f92d Pinset streaming and method type revamp
This commit introduces the new go-libp2p-gorpc streaming capabilities for
Cluster. The main aim is to work towards heavily reducing memory usage when
working with very large pinsets.

As a side-effect, it takes the chance to revampt all types for all public
methods so that pointers to static what should be static objects are not used
anymore. This should heavily reduce heap allocations and GC activity.

The main change is that state.List now returns a channel from which to read
the pins, rather than pins being all loaded into a huge slice.

Things reading pins have been all updated to iterate on the channel rather
than on the slice. The full pinset is no longer fully loaded onto memory for
things that run regularly like StateSync().

Additionally, the /allocations endpoint of the rest API no longer returns an
array of pins, but rather streams json-encoded pin objects directly. This
change has extended to the restapi client (which puts pins into a channel as
they arrive) and to ipfs-cluster-ctl.

There are still pending improvements like StatusAll() calls which should also
stream responses, and specially BlockPut calls which should stream blocks
directly into IPFS on a single call.

These are coming up in future commits.
2022-03-19 03:02:55 +01:00

201 lines
4.1 KiB
Go

package metrics
import (
"context"
"testing"
"time"
"github.com/ipfs/ipfs-cluster/api"
"github.com/ipfs/ipfs-cluster/test"
peer "github.com/libp2p/go-libp2p-core/peer"
)
func TestChecker_CheckPeers(t *testing.T) {
t.Run("check with single metric", func(t *testing.T) {
metrics := NewStore()
checker := NewChecker(context.Background(), metrics)
metr := api.Metric{
Name: "ping",
Peer: test.PeerID1,
Value: "1",
Valid: true,
}
metr.SetTTL(2 * time.Second)
metrics.Add(metr)
checker.CheckPeers([]peer.ID{test.PeerID1})
select {
case <-checker.Alerts():
t.Error("there should not be an alert yet")
default:
}
time.Sleep(3 * time.Second)
err := checker.CheckPeers([]peer.ID{test.PeerID1})
if err != nil {
t.Fatal(err)
}
select {
case <-checker.Alerts():
default:
t.Error("an alert should have been triggered")
}
checker.CheckPeers([]peer.ID{test.PeerID2})
select {
case <-checker.Alerts():
t.Error("there should not be alerts for different peer")
default:
}
})
}
func TestChecker_CheckAll(t *testing.T) {
t.Run("checkall with single metric", func(t *testing.T) {
metrics := NewStore()
checker := NewChecker(context.Background(), metrics)
metr := api.Metric{
Name: "ping",
Peer: test.PeerID1,
Value: "1",
Valid: true,
}
metr.SetTTL(2 * time.Second)
metrics.Add(metr)
checker.CheckAll()
select {
case <-checker.Alerts():
t.Error("there should not be an alert yet")
default:
}
time.Sleep(3 * time.Second)
err := checker.CheckAll()
if err != nil {
t.Fatal(err)
}
select {
case <-checker.Alerts():
default:
t.Error("an alert should have been triggered")
}
checker.CheckAll()
select {
case <-checker.Alerts():
t.Error("there should not be alerts for different peer")
default:
}
})
}
func TestChecker_Watch(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
metrics := NewStore()
checker := NewChecker(context.Background(), metrics)
metr := api.Metric{
Name: "ping",
Peer: test.PeerID1,
Value: "1",
Valid: true,
}
metr.SetTTL(100 * time.Millisecond)
metrics.Add(metr)
peersF := func(context.Context) ([]peer.ID, error) {
return []peer.ID{test.PeerID1}, nil
}
go checker.Watch(ctx, peersF, 200*time.Millisecond)
select {
case a := <-checker.Alerts():
t.Log("received alert:", a)
case <-ctx.Done():
t.Fatal("should have received an alert")
}
}
func TestChecker_Failed(t *testing.T) {
t.Run("standard failure check", func(t *testing.T) {
metrics := NewStore()
checker := NewChecker(context.Background(), metrics)
metrics.Add(makePeerMetric(test.PeerID1, "1", 100*time.Millisecond))
time.Sleep(50 * time.Millisecond)
got := checker.FailedMetric("ping", test.PeerID1)
if got {
t.Error("should not have failed so soon")
}
time.Sleep(100 * time.Millisecond)
got = checker.FailedMetric("ping", test.PeerID1)
if !got {
t.Error("should have failed")
}
})
}
func TestChecker_alert(t *testing.T) {
t.Run("remove peer from store after alert", func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
metrics := NewStore()
checker := NewChecker(ctx, metrics)
metr := api.Metric{
Name: "ping",
Peer: test.PeerID1,
Value: "1",
Valid: true,
}
metr.SetTTL(100 * time.Millisecond)
metrics.Add(metr)
peersF := func(context.Context) ([]peer.ID, error) {
return []peer.ID{test.PeerID1}, nil
}
go checker.Watch(ctx, peersF, 200*time.Millisecond)
var alertCount int
for {
select {
case a := <-checker.Alerts():
t.Log("received alert:", a)
alertCount++
if alertCount > MaxAlertThreshold {
t.Fatalf("there should no more than %d alert", MaxAlertThreshold)
}
case <-ctx.Done():
if alertCount < 1 {
t.Fatal("should have received an alert")
}
return
}
}
})
}
func makePeerMetric(pid peer.ID, value string, ttl time.Duration) api.Metric {
metr := api.Metric{
Name: "ping",
Peer: pid,
Value: value,
Valid: true,
}
metr.SetTTL(ttl)
return metr
}