ipfs-cluster/peer_monitor_test.go
Hector Sanjuan 6ee0f3bead Issue #45: Detect expired metrics and trigger re-pins
An initial, simple approach to this. The PeerMonitor will
check it's metrics, compare to the current set of peers and put
an alert in the alerts channel if the metrics for a peer have expired.

Cluster reads this channel looking for "ping" alerts. The leader
is in charge of triggering repins in all the Cids allocated to
a given peer.

Also, metrics are now broadcasted to the cluster instead of pushed only
to the leader. Since they happen every few seconds it should be okay
regarding how it scales. Main problem was that if the leader is the node
going down, the new leader will not now about it as it doesn't have any
metrics for it, so it won't trigger an alert. If it acted on that then
the component needs to know it is the leader, or cluster needs to
handle alerts in complicated ways when leadership changes. Detecting
leadership changes or letting a component know who is the leader is another
dependency from the consensus algorithm that should be avoided. Therefore
we broadcast, for the moment.

License: MIT
Signed-off-by: Hector Sanjuan <hector@protocol.ai>
2017-03-02 14:59:45 +01:00

129 lines
2.6 KiB
Go

package ipfscluster
import (
"fmt"
"testing"
"time"
peer "github.com/libp2p/go-libp2p-peer"
"github.com/ipfs/ipfs-cluster/api"
"github.com/ipfs/ipfs-cluster/test"
)
var metricCounter = 0
func testPeerMonitor(t *testing.T) *StdPeerMonitor {
cfg := testingConfig()
mock := test.NewMockRPCClient(t)
mon := NewStdPeerMonitor(cfg)
mon.SetClient(mock)
return mon
}
func newMetric(n string, p peer.ID) api.Metric {
m := api.Metric{
Name: n,
Peer: p,
Value: fmt.Sprintf("%d", metricCounter),
Valid: true,
}
m.SetTTL(5)
metricCounter++
return m
}
func TestPeerMonitorShutdown(t *testing.T) {
pm := testPeerMonitor(t)
err := pm.Shutdown()
if err != nil {
t.Error(err)
}
err = pm.Shutdown()
if err != nil {
t.Error(err)
}
}
func TestPeerMonitorLogMetric(t *testing.T) {
pm := testPeerMonitor(t)
defer pm.Shutdown()
metricCounter = 0
// dont fill window
pm.LogMetric(newMetric("test", test.TestPeerID1))
pm.LogMetric(newMetric("test", test.TestPeerID2))
pm.LogMetric(newMetric("test", test.TestPeerID3))
// fill window
pm.LogMetric(newMetric("test2", test.TestPeerID3))
pm.LogMetric(newMetric("test2", test.TestPeerID3))
pm.LogMetric(newMetric("test2", test.TestPeerID3))
pm.LogMetric(newMetric("test2", test.TestPeerID3))
lastMetrics := pm.LastMetrics("testbad")
if len(lastMetrics) != 0 {
t.Logf("%+v", lastMetrics)
t.Error("metrics should be empty")
}
lastMetrics = pm.LastMetrics("test")
if len(lastMetrics) != 3 {
t.Error("metrics should correspond to 3 hosts")
}
for _, v := range lastMetrics {
switch v.Peer {
case test.TestPeerID1:
if v.Value != "0" {
t.Error("bad metric value")
}
case test.TestPeerID2:
if v.Value != "1" {
t.Error("bad metric value")
}
case test.TestPeerID3:
if v.Value != "2" {
t.Error("bad metric value")
}
default:
t.Error("bad peer")
}
}
lastMetrics = pm.LastMetrics("test2")
if len(lastMetrics) != 1 {
t.Fatal("should only be one metric")
}
if lastMetrics[0].Value != fmt.Sprintf("%d", metricCounter-1) {
t.Error("metric is not last")
}
}
func TestPeerMonitorAlerts(t *testing.T) {
pm := testPeerMonitor(t)
defer pm.Shutdown()
mtr := newMetric("test", test.TestPeerID1)
mtr.SetTTL(0)
pm.LogMetric(mtr)
time.Sleep(time.Second)
timeout := time.NewTimer(time.Second * 5)
// it should alert twice at least. Alert re-occurrs.
for i := 0; i < 2; i++ {
select {
case <-timeout.C:
t.Fatal("should have thrown an alert by now")
case alrt := <-pm.Alerts():
if alrt.MetricName != "test" {
t.Error("Alert should be for test")
}
if alrt.Peer != test.TestPeerID1 {
t.Error("Peer should be TestPeerID1")
}
}
}
}