ipfs-cluster/monitor/basic/peer_monitor_test.go

242 lines
5.0 KiB
Go
Raw Normal View History

package basic
import (
"context"
"fmt"
"strconv"
"sync"
"testing"
"time"
libp2p "github.com/libp2p/go-libp2p"
peer "github.com/libp2p/go-libp2p-peer"
host "github.com/libp2p/go-libp2p-host"
"github.com/ipfs/ipfs-cluster/api"
"github.com/ipfs/ipfs-cluster/test"
)
type metricFactory struct {
l sync.Mutex
counter int
}
func newMetricFactory() *metricFactory {
return &metricFactory{
counter: 0,
Issue #162: Rework configuration format The following commit reimplements ipfs-cluster configuration under the following premises: * Each component is initialized with a configuration object defined by its module * Each component decides how the JSON representation of its configuration looks like * Each component parses and validates its own configuration * Each component exposes its own defaults * Component configurations are make the sections of a central JSON configuration file (which replaces the current JSON format) * Component configurations implement a common interface (config.ComponentConfig) with a set of common operations * The central configuration file is managed by a config.ConfigManager which: * Registers ComponentConfigs * Assigns the correspondent sections from the JSON file to each component and delegates the parsing * Delegates the JSON generation for each section * Can be notified when the configuration is updated and must be saved to disk The new service.json would then look as follows: ```json { "cluster": { "id": "QmTVW8NoRxC5wBhV7WtAYtRn7itipEESfozWN5KmXUQnk2", "private_key": "<...>", "secret": "00224102ae6aaf94f2606abf69a0e278251ecc1d64815b617ff19d6d2841f786", "peers": [], "bootstrap": [], "leave_on_shutdown": false, "listen_multiaddress": "/ip4/0.0.0.0/tcp/9096", "state_sync_interval": "1m0s", "ipfs_sync_interval": "2m10s", "replication_factor": -1, "monitor_ping_interval": "15s" }, "consensus": { "raft": { "heartbeat_timeout": "1s", "election_timeout": "1s", "commit_timeout": "50ms", "max_append_entries": 64, "trailing_logs": 10240, "snapshot_interval": "2m0s", "snapshot_threshold": 8192, "leader_lease_timeout": "500ms" } }, "api": { "restapi": { "listen_multiaddress": "/ip4/127.0.0.1/tcp/9094", "read_timeout": "30s", "read_header_timeout": "5s", "write_timeout": "1m0s", "idle_timeout": "2m0s" } }, "ipfs_connector": { "ipfshttp": { "proxy_listen_multiaddress": "/ip4/127.0.0.1/tcp/9095", "node_multiaddress": "/ip4/127.0.0.1/tcp/5001", "connect_swarms_delay": "7s", "proxy_read_timeout": "10m0s", "proxy_read_header_timeout": "5s", "proxy_write_timeout": "10m0s", "proxy_idle_timeout": "1m0s" } }, "monitor": { "monbasic": { "check_interval": "15s" } }, "informer": { "disk": { "metric_ttl": "30s", "metric_type": "freespace" }, "numpin": { "metric_ttl": "10s" } } } ``` This new format aims to be easily extensible per component. As such, it already surfaces quite a few new options which were hardcoded before. Additionally, since Go API have changed, some redundant methods have been removed and small refactoring has happened to take advantage of the new way. License: MIT Signed-off-by: Hector Sanjuan <hector@protocol.ai>
2017-10-11 18:23:03 +00:00
}
}
func (mf *metricFactory) newMetric(n string, p peer.ID) api.Metric {
mf.l.Lock()
defer mf.l.Unlock()
m := api.Metric{
Name: n,
Peer: p,
Value: fmt.Sprintf("%d", mf.counter),
Valid: true,
}
m.SetTTL(5 * time.Second)
mf.counter++
return m
}
func (mf *metricFactory) count() int {
mf.l.Lock()
defer mf.l.Unlock()
return mf.counter
}
func testPeerMonitor(t *testing.T) *Monitor {
return testPeerMonitorWithHost(t, nil)
}
func testPeerMonitorWithHost(t *testing.T, h host.Host) *Monitor {
mock := test.NewMockRPCClientWithHost(t, h)
cfg := &Config{}
cfg.Default()
cfg.CheckInterval = 2 * time.Second
mon, err := NewMonitor(cfg)
if err != nil {
t.Fatal(err)
}
mon.SetClient(mock)
return mon
}
func TestPeerMonitorShutdown(t *testing.T) {
pm := testPeerMonitor(t)
err := pm.Shutdown()
if err != nil {
t.Error(err)
}
err = pm.Shutdown()
if err != nil {
t.Error(err)
}
}
func TestLogMetricConcurrent(t *testing.T) {
pm := testPeerMonitor(t)
defer pm.Shutdown()
var wg sync.WaitGroup
wg.Add(3)
// Insert 25 metrics
f := func() {
defer wg.Done()
for i := 0; i < 25; i++ {
mt := api.Metric{
Name: "test",
Peer: test.TestPeerID1,
Value: fmt.Sprintf("%d", time.Now().UnixNano()),
Valid: true,
}
mt.SetTTL(150 * time.Millisecond)
pm.LogMetric(mt)
time.Sleep(75 * time.Millisecond)
}
}
go f()
go f()
go f()
// Wait for at least two metrics to be inserted
time.Sleep(200 * time.Millisecond)
last := time.Now().Add(-500 * time.Millisecond)
for i := 0; i <= 20; i++ {
lastMtrcs := pm.LatestMetrics("test")
// There should always 1 valid LatestMetric "test"
if len(lastMtrcs) != 1 {
t.Error("no valid metrics", len(lastMtrcs), i)
time.Sleep(75 * time.Millisecond)
continue
}
n, err := strconv.Atoi(lastMtrcs[0].Value)
if err != nil {
t.Fatal(err)
}
// The timestamp of the metric cannot be older than
// the timestamp from the last
current := time.Unix(0, int64(n))
if current.Before(last) {
t.Errorf("expected newer metric: Current: %s, Last: %s", current, last)
}
last = current
time.Sleep(75 * time.Millisecond)
}
wg.Wait()
}
func TestPeerMonitorLogMetric(t *testing.T) {
pm := testPeerMonitor(t)
defer pm.Shutdown()
mf := newMetricFactory()
// dont fill window
pm.LogMetric(mf.newMetric("test", test.TestPeerID1))
pm.LogMetric(mf.newMetric("test", test.TestPeerID2))
pm.LogMetric(mf.newMetric("test", test.TestPeerID3))
// fill window
pm.LogMetric(mf.newMetric("test2", test.TestPeerID3))
pm.LogMetric(mf.newMetric("test2", test.TestPeerID3))
pm.LogMetric(mf.newMetric("test2", test.TestPeerID3))
pm.LogMetric(mf.newMetric("test2", test.TestPeerID3))
latestMetrics := pm.LatestMetrics("testbad")
if len(latestMetrics) != 0 {
t.Logf("%+v", latestMetrics)
t.Error("metrics should be empty")
}
latestMetrics = pm.LatestMetrics("test")
if len(latestMetrics) != 3 {
t.Error("metrics should correspond to 3 hosts")
}
for _, v := range latestMetrics {
switch v.Peer {
case test.TestPeerID1:
if v.Value != "0" {
t.Error("bad metric value")
}
case test.TestPeerID2:
if v.Value != "1" {
t.Error("bad metric value")
}
case test.TestPeerID3:
if v.Value != "2" {
t.Error("bad metric value")
}
default:
t.Error("bad peer")
}
}
latestMetrics = pm.LatestMetrics("test2")
if len(latestMetrics) != 1 {
t.Fatal("should only be one metric")
}
if latestMetrics[0].Value != fmt.Sprintf("%d", mf.count()-1) {
t.Error("metric is not last")
}
}
func TestPeerMonitorPublishMetric(t *testing.T) {
h, err := libp2p.New(context.Background())
if err != nil {
t.Fatal(err)
}
pm := testPeerMonitorWithHost(t, h)
defer pm.Shutdown()
defer h.Close()
mf := newMetricFactory()
metric := mf.newMetric("test", test.TestPeerID1)
err = pm.PublishMetric(metric)
// Note mock rpc returns 3 consensus peers and we cannot
// push to those so an error is in order and indicates
// things work as expected.
if err == nil {
t.Error("expected an error")
}
}
func TestPeerMonitorAlerts(t *testing.T) {
pm := testPeerMonitor(t)
defer pm.Shutdown()
mf := newMetricFactory()
mtr := mf.newMetric("test", test.TestPeerID1)
mtr.SetTTL(0)
pm.LogMetric(mtr)
time.Sleep(time.Second)
timeout := time.NewTimer(time.Second * 5)
// it should alert twice at least. Alert re-occurrs.
for i := 0; i < 2; i++ {
select {
case <-timeout.C:
t.Fatal("should have thrown an alert by now")
case alrt := <-pm.Alerts():
if alrt.MetricName != "test" {
t.Error("Alert should be for test")
}
if alrt.Peer != test.TestPeerID1 {
t.Error("Peer should be TestPeerID1")
}
}
}
}