Fix #339: Reduce Sleeps in tests

License: MIT
Signed-off-by: Hector Sanjuan <code@hector.link>
This commit is contained in:
Hector Sanjuan 2018-03-16 17:37:39 +01:00
parent 58acf16efa
commit dd4128affc
13 changed files with 182 additions and 151 deletions

View File

@ -68,7 +68,7 @@ check:
golint -set_exit_status -min_confidence 0.3 ./... golint -set_exit_status -min_confidence 0.3 ./...
test: deps test: deps
go test -timeout 20m -loglevel "CRITICAL" -v ./... go test -loglevel "CRITICAL" -v ./...
test_sharness: $(sharness) test_sharness: $(sharness)
@sh sharness/run-sharness-tests.sh @sh sharness/run-sharness-tests.sh

2
ci/Jenkinsfile vendored
View File

@ -1,2 +1,2 @@
golang([test: "go test -v -timeout 20m ./..."]) golang([test: "go test -v -loglevel ERROR ./..."])

View File

@ -272,7 +272,6 @@ func (c *Cluster) pushInformerMetrics() {
// The following control how often to make and log // The following control how often to make and log
// a retry // a retry
retries := 0 retries := 0
retryDelay := 500 * time.Millisecond
retryWarnMod := 60 retryWarnMod := 60
for { for {
select { select {
@ -293,7 +292,7 @@ func (c *Cluster) pushInformerMetrics() {
retries++ retries++
} }
// retry in retryDelay // retry in retryDelay
timer.Reset(retryDelay) timer.Reset(metric.GetTTL() / 4)
continue continue
} }

View File

@ -105,12 +105,12 @@ type Config struct {
// possible. // possible.
ReplicationFactorMin int ReplicationFactorMin int
// MonitorPingInterval is the frequency by which a cluster peer pings // MonitorPingInterval is the frequency with which a cluster peer pings
// the monitoring component. The ping metric has a TTL set to the double // the monitoring component. The ping metric has a TTL set to the double
// of this value. // of this value.
MonitorPingInterval time.Duration MonitorPingInterval time.Duration
// PeerWatchInterval is the frequency that we watch for changes // PeerWatchInterval is the frequency that we use to watch for changes
// in the consensus peerset and save new peers to the configuration // in the consensus peerset and save new peers to the configuration
// file. This also affects how soon we realize that we have // file. This also affects how soon we realize that we have
// been removed from a cluster. // been removed from a cluster.

View File

@ -315,7 +315,7 @@ func TestClusterRecoverAllLocal(t *testing.T) {
t.Fatal("pin should have worked:", err) t.Fatal("pin should have worked:", err)
} }
time.Sleep(time.Second) pinDelay()
recov, err := cl.RecoverAllLocal() recov, err := cl.RecoverAllLocal()
if err != nil { if err != nil {

View File

@ -15,7 +15,8 @@ import (
var logger = logging.Logger("config") var logger = logging.Logger("config")
// How often to save the configuration file if it needs saving. // ConfigSaveInterval specifies how often to save the configuration file if
// it needs saving.
var ConfigSaveInterval = time.Second var ConfigSaveInterval = time.Second
// The ComponentConfig interface allows components to define configurations // The ComponentConfig interface allows components to define configurations

View File

@ -17,29 +17,30 @@ var testingClusterCfg = []byte(`{
"secret": "2588b80d5cb05374fa142aed6cbb047d1f4ef8ef15e37eba68c65b9d30df67ed", "secret": "2588b80d5cb05374fa142aed6cbb047d1f4ef8ef15e37eba68c65b9d30df67ed",
"peers": [], "peers": [],
"bootstrap": [], "bootstrap": [],
"leave_on_shutdown": true, "leave_on_shutdown": false,
"listen_multiaddress": "/ip4/127.0.0.1/tcp/10000", "listen_multiaddress": "/ip4/127.0.0.1/tcp/10000",
"state_sync_interval": "1m0s", "state_sync_interval": "1m0s",
"ipfs_sync_interval": "2m10s", "ipfs_sync_interval": "2m10s",
"replication_factor": -1, "replication_factor": -1,
"monitor_ping_interval": "1s" "monitor_ping_interval": "150ms",
"peer_watch_interval": "100ms"
} }
`) `)
var testingRaftCfg = []byte(`{ var testingRaftCfg = []byte(`{
"data_folder": "raftFolderFromTests", "data_folder": "raftFolderFromTests",
"wait_for_leader_timeout": "30s", "wait_for_leader_timeout": "10s",
"commit_retries": 1, "commit_retries": 2,
"commit_retry_delay": "1s", "commit_retry_delay": "50ms",
"network_timeout": "20s", "network_timeout": "5s",
"heartbeat_timeout": "1s", "heartbeat_timeout": "100ms",
"election_timeout": "1s", "election_timeout": "100ms",
"commit_timeout": "50ms", "commit_timeout": "50ms",
"max_append_entries": 64, "max_append_entries": 256,
"trailing_logs": 10240, "trailing_logs": 10240,
"snapshot_interval": "2m0s", "snapshot_interval": "2m0s",
"snapshot_threshold": 8192, "snapshot_threshold": 8192,
"leader_lease_timeout": "500ms" "leader_lease_timeout": "80ms"
}`) }`)
var testingAPICfg = []byte(`{ var testingAPICfg = []byte(`{
@ -71,11 +72,11 @@ var testingTrackerCfg = []byte(`
`) `)
var testingMonCfg = []byte(`{ var testingMonCfg = []byte(`{
"check_interval": "1s" "check_interval": "400ms"
}`) }`)
var testingDiskInfCfg = []byte(`{ var testingDiskInfCfg = []byte(`{
"metric_ttl": "1s", "metric_ttl": "150ms",
"metric_type": "freespace" "metric_type": "freespace"
}`) }`)

View File

@ -50,6 +50,7 @@ func makeTestingHost(t *testing.T) host.Host {
} }
func testingConsensus(t *testing.T, idn int) *Consensus { func testingConsensus(t *testing.T, idn int) *Consensus {
cleanRaft(idn)
h := makeTestingHost(t) h := makeTestingHost(t)
st := mapstate.NewMapState() st := mapstate.NewMapState()
@ -72,6 +73,7 @@ func TestShutdownConsensus(t *testing.T) {
// Bring it up twice to make sure shutdown cleans up properly // Bring it up twice to make sure shutdown cleans up properly
// but also to make sure raft comes up ok when re-initialized // but also to make sure raft comes up ok when re-initialized
cc := testingConsensus(t, 1) cc := testingConsensus(t, 1)
defer cleanRaft(1)
err := cc.Shutdown() err := cc.Shutdown()
if err != nil { if err != nil {
t.Fatal("Consensus cannot shutdown:", err) t.Fatal("Consensus cannot shutdown:", err)

View File

@ -7,9 +7,9 @@ for dir in $dirs;
do do
if ls "$dir"/*.go &> /dev/null; if ls "$dir"/*.go &> /dev/null;
then then
cmdflags="-timeout 20m -v -coverprofile=profile.out -covermode=count $dir" cmdflags="-v -coverprofile=profile.out -covermode=count $dir"
if [ "$dir" == "." ]; then if [ "$dir" == "." ]; then
cmdflags="-timeout 20m -v -coverprofile=profile.out -covermode=count -loglevel CRITICAL ." cmdflags="-v -coverprofile=profile.out -covermode=count -loglevel CRITICAL ."
fi fi
echo go test $cmdflags echo go test $cmdflags
go test $cmdflags go test $cmdflags

View File

@ -35,7 +35,7 @@ var (
nClusters = 6 nClusters = 6
// number of pins to pin/unpin/check // number of pins to pin/unpin/check
nPins = 500 nPins = 100
logLevel = "CRITICAL" logLevel = "CRITICAL"
@ -205,17 +205,29 @@ func createClusters(t *testing.T) ([]*Cluster, []*test.IpfsMock) {
for i := 1; i < nClusters; i++ { for i := 1; i < nClusters; i++ {
cfgs[i].Bootstrap = []ma.Multiaddr{bootstrapAddr} cfgs[i].Bootstrap = []ma.Multiaddr{bootstrapAddr}
} }
time.Sleep(200 * time.Millisecond)
// Start the rest // Start the rest
var wg sync.WaitGroup // We don't do this in parallel because it causes libp2p dial backoffs
for i := 1; i < nClusters; i++ { for i := 1; i < nClusters; i++ {
wg.Add(1) clusters[i] = createCluster(t, cfgs[i], concfgs[i], apis[i], ipfss[i], states[i], trackers[i], mons[i], allocs[i], infs[i])
go func(i int) { time.Sleep(200 * time.Millisecond)
clusters[i] = createCluster(t, cfgs[i], concfgs[i], apis[i], ipfss[i], states[i], trackers[i], mons[i], allocs[i], infs[i]) }
wg.Done()
}(i) // open connections among all peers. This ensures smoother operations.
// Best effort. Some errors do happen.
for _, c := range clusters {
peers, err := c.consensus.Peers()
if err != nil {
shutdownClusters(t, clusters, ipfsMocks)
t.Fatal(err)
}
for _, p := range peers {
if p != c.id {
c.host.Network().DialPeer(c.ctx, p)
}
}
} }
wg.Wait()
// --------------------------------------------- // ---------------------------------------------
@ -223,8 +235,7 @@ func createClusters(t *testing.T) ([]*Cluster, []*test.IpfsMock) {
// for i := 1; i < nClusters; i++ { // for i := 1; i < nClusters; i++ {
// clusters[0].PeerAdd(clusterAddr(clusters[i])) // clusters[0].PeerAdd(clusterAddr(clusters[i]))
// } // }
delay()
delay()
return clusters, ipfsMocks return clusters, ipfsMocks
} }
@ -255,26 +266,31 @@ func runF(t *testing.T, clusters []*Cluster, f func(*testing.T, *Cluster)) {
func delay() { func delay() {
var d int var d int
if nClusters > 10 { if nClusters > 10 {
d = 8 d = 2000
} else if nClusters > 5 {
d = 5
} else { } else {
d = nClusters d = 1000
} }
time.Sleep(time.Duration(d) * time.Second) time.Sleep(time.Duration(d) * time.Millisecond)
} }
func waitForLeader(t *testing.T, clusters []*Cluster) { func pinDelay() {
timer := time.NewTimer(time.Minute) time.Sleep(400 * time.Millisecond)
ticker := time.NewTicker(time.Second) }
// Wait for consensus to pick a new leader in case we shut it down
// Make sure we don't check on a shutdown cluster func ttlDelay() {
j := rand.Intn(len(clusters)) diskInfCfg := &disk.Config{}
for clusters[j].shutdownB { diskInfCfg.LoadJSON(testingDiskInfCfg)
j = rand.Intn(len(clusters)) time.Sleep(diskInfCfg.MetricTTL * 3)
} }
// Waits for consensus to pick a new leader in case we shut it down
// Makes sure all peers know about it.
// Makes sure new metrics have come in for the new leader.
func waitForLeader(t *testing.T, clusters []*Cluster) {
ttlDelay()
timer := time.NewTimer(time.Minute)
ticker := time.NewTicker(time.Second / 4)
loop: loop:
for { for {
@ -282,12 +298,20 @@ loop:
case <-timer.C: case <-timer.C:
t.Fatal("timed out waiting for a leader") t.Fatal("timed out waiting for a leader")
case <-ticker.C: case <-ticker.C:
_, err := clusters[j].consensus.Leader() for _, cl := range clusters {
if err == nil { if cl.shutdownB {
break loop continue // skip shutdown clusters
}
_, err := cl.consensus.Leader()
if err != nil {
continue loop
}
} }
break loop
} }
} }
ttlDelay()
} }
func TestClustersVersion(t *testing.T) { func TestClustersVersion(t *testing.T) {
@ -305,7 +329,6 @@ func TestClustersVersion(t *testing.T) {
func TestClustersPeers(t *testing.T) { func TestClustersPeers(t *testing.T) {
clusters, mock := createClusters(t) clusters, mock := createClusters(t)
defer shutdownClusters(t, clusters, mock) defer shutdownClusters(t, clusters, mock)
delay()
j := rand.Intn(nClusters) // choose a random cluster peer j := rand.Intn(nClusters) // choose a random cluster peer
peers := clusters[j].Peers() peers := clusters[j].Peers()
@ -345,6 +368,9 @@ func TestClustersPin(t *testing.T) {
defer shutdownClusters(t, clusters, mock) defer shutdownClusters(t, clusters, mock)
exampleCid, _ := cid.Decode(test.TestCid1) exampleCid, _ := cid.Decode(test.TestCid1)
prefix := exampleCid.Prefix() prefix := exampleCid.Prefix()
ttlDelay()
for i := 0; i < nPins; i++ { for i := 0; i < nPins; i++ {
j := rand.Intn(nClusters) // choose a random cluster peer j := rand.Intn(nClusters) // choose a random cluster peer
h, err := prefix.Sum(randomBytes()) // create random cid h, err := prefix.Sum(randomBytes()) // create random cid
@ -360,6 +386,7 @@ func TestClustersPin(t *testing.T) {
} }
} }
delay() delay()
delay()
fpinned := func(t *testing.T, c *Cluster) { fpinned := func(t *testing.T, c *Cluster) {
status := c.tracker.StatusAll() status := c.tracker.StatusAll()
for _, v := range status { for _, v := range status {
@ -378,7 +405,7 @@ func TestClustersPin(t *testing.T) {
// Unpin everything // Unpin everything
pinList := clusters[0].Pins() pinList := clusters[0].Pins()
for i := 0; i < nPins; i++ { for i := 0; i < len(pinList); i++ {
j := rand.Intn(nClusters) // choose a random cluster peer j := rand.Intn(nClusters) // choose a random cluster peer
err := clusters[j].Unpin(pinList[i].Cid) err := clusters[j].Unpin(pinList[i].Cid)
if err != nil { if err != nil {
@ -392,6 +419,7 @@ func TestClustersPin(t *testing.T) {
} }
delay() delay()
delay()
funpinned := func(t *testing.T, c *Cluster) { funpinned := func(t *testing.T, c *Cluster) {
status := c.tracker.StatusAll() status := c.tracker.StatusAll()
@ -408,7 +436,7 @@ func TestClustersStatusAll(t *testing.T) {
defer shutdownClusters(t, clusters, mock) defer shutdownClusters(t, clusters, mock)
h, _ := cid.Decode(test.TestCid1) h, _ := cid.Decode(test.TestCid1)
clusters[0].Pin(api.PinCid(h)) clusters[0].Pin(api.PinCid(h))
delay() pinDelay()
// Global status // Global status
f := func(t *testing.T, c *Cluster) { f := func(t *testing.T, c *Cluster) {
statuses, err := c.StatusAll() statuses, err := c.StatusAll()
@ -452,10 +480,11 @@ func TestClustersStatusAllWithErrors(t *testing.T) {
defer shutdownClusters(t, clusters, mock) defer shutdownClusters(t, clusters, mock)
h, _ := cid.Decode(test.TestCid1) h, _ := cid.Decode(test.TestCid1)
clusters[0].Pin(api.PinCid(h)) clusters[0].Pin(api.PinCid(h))
delay() pinDelay()
// shutdown 1 cluster peer // shutdown 1 cluster peer
clusters[1].Shutdown() clusters[1].Shutdown()
delay()
f := func(t *testing.T, c *Cluster) { f := func(t *testing.T, c *Cluster) {
// skip if it's the shutdown peer // skip if it's the shutdown peer
@ -513,7 +542,9 @@ func TestClustersSyncAllLocal(t *testing.T) {
h2, _ := cid.Decode(test.TestCid2) h2, _ := cid.Decode(test.TestCid2)
clusters[0].Pin(api.PinCid(h)) clusters[0].Pin(api.PinCid(h))
clusters[0].Pin(api.PinCid(h2)) clusters[0].Pin(api.PinCid(h2))
delay() pinDelay()
pinDelay()
f := func(t *testing.T, c *Cluster) { f := func(t *testing.T, c *Cluster) {
// Sync bad ID // Sync bad ID
infos, err := c.SyncAllLocal() infos, err := c.SyncAllLocal()
@ -541,7 +572,8 @@ func TestClustersSyncLocal(t *testing.T) {
h2, _ := cid.Decode(test.TestCid2) h2, _ := cid.Decode(test.TestCid2)
clusters[0].Pin(api.PinCid(h)) clusters[0].Pin(api.PinCid(h))
clusters[0].Pin(api.PinCid(h2)) clusters[0].Pin(api.PinCid(h2))
delay() pinDelay()
pinDelay()
f := func(t *testing.T, c *Cluster) { f := func(t *testing.T, c *Cluster) {
info, err := c.SyncLocal(h) info, err := c.SyncLocal(h)
@ -572,7 +604,8 @@ func TestClustersSyncAll(t *testing.T) {
h2, _ := cid.Decode(test.TestCid2) h2, _ := cid.Decode(test.TestCid2)
clusters[0].Pin(api.PinCid(h)) clusters[0].Pin(api.PinCid(h))
clusters[0].Pin(api.PinCid(h2)) clusters[0].Pin(api.PinCid(h2))
delay() pinDelay()
pinDelay()
j := rand.Intn(nClusters) // choose a random cluster peer j := rand.Intn(nClusters) // choose a random cluster peer
ginfos, err := clusters[j].SyncAll() ginfos, err := clusters[j].SyncAll()
@ -603,7 +636,8 @@ func TestClustersSync(t *testing.T) {
h2, _ := cid.Decode(test.TestCid2) h2, _ := cid.Decode(test.TestCid2)
clusters[0].Pin(api.PinCid(h)) clusters[0].Pin(api.PinCid(h))
clusters[0].Pin(api.PinCid(h2)) clusters[0].Pin(api.PinCid(h2))
delay() pinDelay()
pinDelay()
j := rand.Intn(nClusters) j := rand.Intn(nClusters)
ginfo, err := clusters[j].Sync(h) ginfo, err := clusters[j].Sync(h)
@ -662,10 +696,13 @@ func TestClustersRecoverLocal(t *testing.T) {
defer shutdownClusters(t, clusters, mock) defer shutdownClusters(t, clusters, mock)
h, _ := cid.Decode(test.ErrorCid) // This cid always fails h, _ := cid.Decode(test.ErrorCid) // This cid always fails
h2, _ := cid.Decode(test.TestCid2) h2, _ := cid.Decode(test.TestCid2)
ttlDelay()
clusters[0].Pin(api.PinCid(h)) clusters[0].Pin(api.PinCid(h))
clusters[0].Pin(api.PinCid(h2)) clusters[0].Pin(api.PinCid(h2))
pinDelay()
delay() pinDelay()
f := func(t *testing.T, c *Cluster) { f := func(t *testing.T, c *Cluster) {
info, err := c.RecoverLocal(h) info, err := c.RecoverLocal(h)
@ -694,10 +731,14 @@ func TestClustersRecover(t *testing.T) {
defer shutdownClusters(t, clusters, mock) defer shutdownClusters(t, clusters, mock)
h, _ := cid.Decode(test.ErrorCid) // This cid always fails h, _ := cid.Decode(test.ErrorCid) // This cid always fails
h2, _ := cid.Decode(test.TestCid2) h2, _ := cid.Decode(test.TestCid2)
ttlDelay()
clusters[0].Pin(api.PinCid(h)) clusters[0].Pin(api.PinCid(h))
clusters[0].Pin(api.PinCid(h2)) clusters[0].Pin(api.PinCid(h2))
delay() pinDelay()
pinDelay()
j := rand.Intn(nClusters) j := rand.Intn(nClusters)
ginfo, err := clusters[j].Recover(h) ginfo, err := clusters[j].Recover(h)
@ -771,6 +812,8 @@ func TestClustersReplication(t *testing.T) {
c.config.ReplicationFactorMax = nClusters - 1 c.config.ReplicationFactorMax = nClusters - 1
} }
ttlDelay()
// Why is replication factor nClusters - 1? // Why is replication factor nClusters - 1?
// Because that way we know that pinning nCluster // Because that way we know that pinning nCluster
// pins with an strategy like numpins/disk // pins with an strategy like numpins/disk
@ -789,7 +832,7 @@ func TestClustersReplication(t *testing.T) {
if err != nil { if err != nil {
t.Error(err) t.Error(err)
} }
time.Sleep(time.Second) pinDelay()
// check that it is held by exactly nClusters -1 peers // check that it is held by exactly nClusters -1 peers
gpi, err := clusters[j].Status(h) gpi, err := clusters[j].Status(h)
@ -814,7 +857,7 @@ func TestClustersReplication(t *testing.T) {
if numRemote != 1 { if numRemote != 1 {
t.Errorf("We wanted 1 peer track as remote but %d do", numRemote) t.Errorf("We wanted 1 peer track as remote but %d do", numRemote)
} }
time.Sleep(time.Second) // this is for metric to be up to date ttlDelay()
} }
f := func(t *testing.T, c *Cluster) { f := func(t *testing.T, c *Cluster) {
@ -875,13 +918,15 @@ func TestClustersReplicationFactorMax(t *testing.T) {
c.config.ReplicationFactorMax = nClusters - 1 c.config.ReplicationFactorMax = nClusters - 1
} }
ttlDelay()
h, _ := cid.Decode(test.TestCid1) h, _ := cid.Decode(test.TestCid1)
err := clusters[0].Pin(api.PinCid(h)) err := clusters[0].Pin(api.PinCid(h))
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
delay() pinDelay()
f := func(t *testing.T, c *Cluster) { f := func(t *testing.T, c *Cluster) {
p, err := c.PinGet(h) p, err := c.PinGet(h)
@ -918,13 +963,15 @@ func TestClustersReplicationFactorMaxLower(t *testing.T) {
c.config.ReplicationFactorMax = nClusters c.config.ReplicationFactorMax = nClusters
} }
ttlDelay() // make sure we have places to pin
h, _ := cid.Decode(test.TestCid1) h, _ := cid.Decode(test.TestCid1)
err := clusters[0].Pin(api.PinCid(h)) err := clusters[0].Pin(api.PinCid(h))
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
delay() pinDelay()
p1, err := clusters[0].PinGet(h) p1, err := clusters[0].PinGet(h)
if err != nil { if err != nil {
@ -944,7 +991,7 @@ func TestClustersReplicationFactorMaxLower(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
delay() pinDelay()
p2, err := clusters[0].PinGet(h) p2, err := clusters[0].PinGet(h)
if err != nil { if err != nil {
@ -970,24 +1017,21 @@ func TestClustersReplicationFactorInBetween(t *testing.T) {
c.config.ReplicationFactorMax = nClusters c.config.ReplicationFactorMax = nClusters
} }
ttlDelay()
// Shutdown two peers // Shutdown two peers
clusters[nClusters-1].Shutdown() clusters[nClusters-1].Shutdown()
clusters[nClusters-2].Shutdown() clusters[nClusters-2].Shutdown()
time.Sleep(time.Second) // let metric expire
waitForLeader(t, clusters) waitForLeader(t, clusters)
// allow metrics to arrive to new leader
delay()
h, _ := cid.Decode(test.TestCid1) h, _ := cid.Decode(test.TestCid1)
err := clusters[0].Pin(api.PinCid(h)) err := clusters[0].Pin(api.PinCid(h))
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
delay() pinDelay()
f := func(t *testing.T, c *Cluster) { f := func(t *testing.T, c *Cluster) {
if c == clusters[nClusters-1] || c == clusters[nClusters-2] { if c == clusters[nClusters-1] || c == clusters[nClusters-2] {
@ -1029,14 +1073,9 @@ func TestClustersReplicationFactorMin(t *testing.T) {
// Shutdown two peers // Shutdown two peers
clusters[nClusters-1].Shutdown() clusters[nClusters-1].Shutdown()
clusters[nClusters-2].Shutdown()
time.Sleep(time.Second) // let metric expire
waitForLeader(t, clusters) waitForLeader(t, clusters)
clusters[nClusters-2].Shutdown()
// allow metrics to arrive to new leader waitForLeader(t, clusters)
delay()
h, _ := cid.Decode(test.TestCid1) h, _ := cid.Decode(test.TestCid1)
err := clusters[0].Pin(api.PinCid(h)) err := clusters[0].Pin(api.PinCid(h))
@ -1063,28 +1102,29 @@ func TestClustersReplicationMinMaxNoRealloc(t *testing.T) {
c.config.ReplicationFactorMax = nClusters c.config.ReplicationFactorMax = nClusters
} }
ttlDelay()
h, _ := cid.Decode(test.TestCid1) h, _ := cid.Decode(test.TestCid1)
err := clusters[0].Pin(api.PinCid(h)) err := clusters[0].Pin(api.PinCid(h))
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
pinDelay()
// Shutdown two peers // Shutdown two peers
clusters[nClusters-1].Shutdown() clusters[nClusters-1].Shutdown()
clusters[nClusters-2].Shutdown()
time.Sleep(time.Second) // let metric expire
waitForLeader(t, clusters) waitForLeader(t, clusters)
clusters[nClusters-2].Shutdown()
// allow metrics to arrive to new leader waitForLeader(t, clusters)
delay()
err = clusters[0].Pin(api.PinCid(h)) err = clusters[0].Pin(api.PinCid(h))
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
pinDelay()
p, err := clusters[0].PinGet(h) p, err := clusters[0].PinGet(h)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
@ -1114,13 +1154,15 @@ func TestClustersReplicationMinMaxRealloc(t *testing.T) {
c.config.ReplicationFactorMax = 4 c.config.ReplicationFactorMax = 4
} }
ttlDelay() // make sure metrics are in
h, _ := cid.Decode(test.TestCid1) h, _ := cid.Decode(test.TestCid1)
err := clusters[0].Pin(api.PinCid(h)) err := clusters[0].Pin(api.PinCid(h))
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
delay() pinDelay()
p, err := clusters[0].PinGet(h) p, err := clusters[0].PinGet(h)
if err != nil { if err != nil {
@ -1142,19 +1184,16 @@ func TestClustersReplicationMinMaxRealloc(t *testing.T) {
alloc1.Shutdown() alloc1.Shutdown()
alloc2.Shutdown() alloc2.Shutdown()
time.Sleep(time.Second) // let metric expire
waitForLeader(t, clusters) waitForLeader(t, clusters)
// allow metrics to arrive to new leader
delay()
// Repin - (although this might have been taken of if there was an alert // Repin - (although this might have been taken of if there was an alert
err = safePeer.Pin(api.PinCid(h)) err = safePeer.Pin(api.PinCid(h))
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
pinDelay()
p, err = safePeer.PinGet(h) p, err = safePeer.PinGet(h)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
@ -1176,7 +1215,7 @@ func TestClustersReplicationMinMaxRealloc(t *testing.T) {
lenSA := len(secondAllocations) lenSA := len(secondAllocations)
expected := minInt(nClusters-2, 4) expected := minInt(nClusters-2, 4)
if lenSA != expected { if lenSA != expected {
t.Errorf("Inssufficient reallocation, could have allocated to %d peers but instead only allocated to %d peers", expected, lenSA) t.Errorf("Insufficient reallocation, could have allocated to %d peers but instead only allocated to %d peers", expected, lenSA)
} }
if lenSA < 3 { if lenSA < 3 {
@ -1194,6 +1233,8 @@ func TestClustersReplicationRealloc(t *testing.T) {
c.config.ReplicationFactorMax = nClusters - 1 c.config.ReplicationFactorMax = nClusters - 1
} }
ttlDelay()
j := rand.Intn(nClusters) j := rand.Intn(nClusters)
h, _ := cid.Decode(test.TestCid1) h, _ := cid.Decode(test.TestCid1)
err := clusters[j].Pin(api.PinCid(h)) err := clusters[j].Pin(api.PinCid(h))
@ -1202,7 +1243,7 @@ func TestClustersReplicationRealloc(t *testing.T) {
} }
// Let the pin arrive // Let the pin arrive
time.Sleep(time.Second / 2) pinDelay()
pin := clusters[j].Pins()[0] pin := clusters[j].Pins()[0]
pinSerial := pin.ToSerial() pinSerial := pin.ToSerial()
@ -1217,7 +1258,7 @@ func TestClustersReplicationRealloc(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
time.Sleep(time.Second / 2) pinDelay()
pin2 := clusters[j].Pins()[0] pin2 := clusters[j].Pins()[0]
pinSerial2 := pin2.ToSerial() pinSerial2 := pin2.ToSerial()
@ -1245,10 +1286,7 @@ func TestClustersReplicationRealloc(t *testing.T) {
// let metrics expire and give time for the cluster to // let metrics expire and give time for the cluster to
// see if they have lost the leader // see if they have lost the leader
time.Sleep(4 * time.Second)
waitForLeader(t, clusters) waitForLeader(t, clusters)
// wait for new metrics to arrive
time.Sleep(2 * time.Second)
// Make sure we haven't killed our randomly // Make sure we haven't killed our randomly
// selected cluster // selected cluster
@ -1262,7 +1300,7 @@ func TestClustersReplicationRealloc(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
time.Sleep(time.Second / 2) pinDelay()
numPinned := 0 numPinned := 0
for i, c := range clusters { for i, c := range clusters {
@ -1303,12 +1341,11 @@ func TestClustersReplicationNotEnoughPeers(t *testing.T) {
} }
// Let the pin arrive // Let the pin arrive
time.Sleep(time.Second / 2) pinDelay()
clusters[0].Shutdown() clusters[0].Shutdown()
clusters[1].Shutdown() clusters[1].Shutdown()
delay()
waitForLeader(t, clusters) waitForLeader(t, clusters)
err = clusters[2].Pin(api.PinCid(h)) err = clusters[2].Pin(api.PinCid(h))
@ -1337,7 +1374,7 @@ func TestClustersRebalanceOnPeerDown(t *testing.T) {
// pin something // pin something
h, _ := cid.Decode(test.TestCid1) h, _ := cid.Decode(test.TestCid1)
clusters[0].Pin(api.PinCid(h)) clusters[0].Pin(api.PinCid(h))
time.Sleep(time.Second * 2) // let the pin arrive pinDelay()
pinLocal := 0 pinLocal := 0
pinRemote := 0 pinRemote := 0
var localPinner peer.ID var localPinner peer.ID
@ -1361,7 +1398,7 @@ func TestClustersRebalanceOnPeerDown(t *testing.T) {
t.Fatal("Not pinned as expected") t.Fatal("Not pinned as expected")
} }
// find a kill the local pinner // kill the local pinner
for _, c := range clusters { for _, c := range clusters {
if c.id == localPinner { if c.id == localPinner {
c.Shutdown() c.Shutdown()
@ -1370,8 +1407,8 @@ func TestClustersRebalanceOnPeerDown(t *testing.T) {
} }
} }
// Sleep a monitoring interval delay()
time.Sleep(6 * time.Second) waitForLeader(t, clusters) // in case we killed the leader
// It should be now pinned in the remote pinner // It should be now pinned in the remote pinner
if s := remotePinnerCluster.tracker.Status(h).Status; s != api.TrackerStatusPinned { if s := remotePinnerCluster.tracker.Status(h).Status; s != api.TrackerStatusPinned {
@ -1452,8 +1489,6 @@ func validateClusterGraph(t *testing.T, graph api.ConnectGraph, clusterIDs map[p
func TestClustersGraphConnected(t *testing.T) { func TestClustersGraphConnected(t *testing.T) {
clusters, mock := createClusters(t) clusters, mock := createClusters(t)
defer shutdownClusters(t, clusters, mock) defer shutdownClusters(t, clusters, mock)
delay()
delay()
j := rand.Intn(nClusters) // choose a random cluster peer to query j := rand.Intn(nClusters) // choose a random cluster peer to query
graph, err := clusters[j].ConnectGraph() graph, err := clusters[j].ConnectGraph()
@ -1496,9 +1531,8 @@ func TestClustersGraphUnhealthy(t *testing.T) {
clusters[discon1].Shutdown() clusters[discon1].Shutdown()
clusters[discon2].Shutdown() clusters[discon2].Shutdown()
delay()
waitForLeader(t, clusters) waitForLeader(t, clusters)
delay()
graph, err := clusters[j].ConnectGraph() graph, err := clusters[j].ConnectGraph()
if err != nil { if err != nil {

View File

@ -22,7 +22,7 @@ var logger = logging.Logger("monitor")
var AlertChannelCap = 256 var AlertChannelCap = 256
// WindowCap specifies how many metrics to keep for given host and metric type // WindowCap specifies how many metrics to keep for given host and metric type
var WindowCap = 10 var WindowCap = 100
// peerMetrics is just a circular queue // peerMetrics is just a circular queue
type peerMetrics struct { type peerMetrics struct {
@ -55,6 +55,7 @@ func (pmets *peerMetrics) latest() (api.Metric, error) {
// pmets.mux.RLock() // pmets.mux.RLock()
// defer pmets.mux.RUnlock() // defer pmets.mux.RUnlock()
if len(pmets.window) == 0 { if len(pmets.window) == 0 {
logger.Warning("no metrics")
return api.Metric{}, errors.New("no metrics") return api.Metric{}, errors.New("no metrics")
} }
return pmets.window[pmets.last], nil return pmets.window[pmets.last], nil

View File

@ -29,7 +29,6 @@ func peerManagerClusters(t *testing.T) ([]*Cluster, []*test.IpfsMock) {
}(i) }(i)
} }
wg.Wait() wg.Wait()
delay()
return cls, mocks return cls, mocks
} }
@ -65,7 +64,7 @@ func TestClustersPeerAdd(t *testing.T) {
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
delay() pinDelay()
f := func(t *testing.T, c *Cluster) { f := func(t *testing.T, c *Cluster) {
ids := c.Peers() ids := c.Peers()
@ -88,8 +87,6 @@ func TestClustersPeerAdd(t *testing.T) {
t.Error("By now cluster peers should reflect all peers") t.Error("By now cluster peers should reflect all peers")
} }
time.Sleep(2 * time.Second)
// check that they are part of the configuration // check that they are part of the configuration
// This only works because each peer only has one multiaddress // This only works because each peer only has one multiaddress
// (localhost) // (localhost)
@ -214,6 +211,7 @@ func TestClustersPeerRemoveSelf(t *testing.T) {
defer shutdownClusters(t, clusters, mocks) defer shutdownClusters(t, clusters, mocks)
for i := 0; i < len(clusters); i++ { for i := 0; i < len(clusters); i++ {
waitForLeader(t, clusters)
peers := clusters[i].Peers() peers := clusters[i].Peers()
t.Logf("Current cluster size: %d", len(peers)) t.Logf("Current cluster size: %d", len(peers))
if len(peers) != (len(clusters) - i) { if len(peers) != (len(clusters) - i) {
@ -286,7 +284,7 @@ func TestClustersPeerRemoveLeader(t *testing.T) {
if more { if more {
t.Error("should be done") t.Error("should be done")
} }
time.Sleep(time.Second) time.Sleep(time.Second / 2)
} }
} }
@ -341,10 +339,10 @@ func TestClustersPeerRemoveReallocsPins(t *testing.T) {
checkErr(t, err) checkErr(t, err)
err = leader.Pin(api.PinCid(h)) err = leader.Pin(api.PinCid(h))
checkErr(t, err) checkErr(t, err)
time.Sleep(time.Second) // time to update the metrics ttlDelay()
} }
delay() pinDelay()
// At this point, all peers must have 1 pin associated to them. // At this point, all peers must have 1 pin associated to them.
// Find out which pin is associated to leader. // Find out which pin is associated to leader.
@ -373,9 +371,7 @@ func TestClustersPeerRemoveReallocsPins(t *testing.T) {
t.Fatal("error removing peer:", err) t.Fatal("error removing peer:", err)
} }
time.Sleep(2 * time.Second)
waitForLeader(t, clusters) waitForLeader(t, clusters)
delay()
for _, icid := range interestingCids { for _, icid := range interestingCids {
// Now check that the allocations are new. // Now check that the allocations are new.
@ -405,7 +401,7 @@ func TestClustersPeerJoin(t *testing.T) {
} }
hash, _ := cid.Decode(test.TestCid1) hash, _ := cid.Decode(test.TestCid1)
clusters[0].Pin(api.PinCid(hash)) clusters[0].Pin(api.PinCid(hash))
delay() pinDelay()
f := func(t *testing.T, c *Cluster) { f := func(t *testing.T, c *Cluster) {
peers := c.Peers() peers := c.Peers()
@ -438,7 +434,7 @@ func TestClustersPeerJoinAllAtOnce(t *testing.T) {
hash, _ := cid.Decode(test.TestCid1) hash, _ := cid.Decode(test.TestCid1)
clusters[0].Pin(api.PinCid(hash)) clusters[0].Pin(api.PinCid(hash))
delay() pinDelay()
f2 := func(t *testing.T, c *Cluster) { f2 := func(t *testing.T, c *Cluster) {
peers := c.Peers() peers := c.Peers()
@ -555,7 +551,7 @@ func TestClustersPeerRejoin(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
delay() pinDelay()
// Rejoin c0 // Rejoin c0
c0, m0 := createOnePeerCluster(t, 0, testingClusterSecret) c0, m0 := createOnePeerCluster(t, 0, testingClusterSecret)

View File

@ -1,10 +1,6 @@
package ipfscluster package ipfscluster
import ( import "testing"
"testing"
pnet "github.com/libp2p/go-libp2p-pnet"
)
func TestClusterSecretFormat(t *testing.T) { func TestClusterSecretFormat(t *testing.T) {
goodSecret := "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" goodSecret := "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"
@ -57,28 +53,29 @@ func TestSimplePNet(t *testing.T) {
} }
} }
func TestClusterSecretRequired(t *testing.T) { // // Adds one minute to tests. Disabled for the moment.
cl1Secret, err := pnet.GenerateV1Bytes() // func TestClusterSecretRequired(t *testing.T) {
if err != nil { // cl1Secret, err := pnet.GenerateV1Bytes()
t.Fatal("Unable to generate cluster secret.") // if err != nil {
} // t.Fatal("Unable to generate cluster secret.")
cl1, _ := createOnePeerCluster(t, 1, (*cl1Secret)[:]) // }
cl2, _ := createOnePeerCluster(t, 2, testingClusterSecret) // cl1, _ := createOnePeerCluster(t, 1, (*cl1Secret)[:])
defer cleanRaft() // cl2, _ := createOnePeerCluster(t, 2, testingClusterSecret)
defer cl1.Shutdown() // defer cleanRaft()
defer cl2.Shutdown() // defer cl1.Shutdown()
peers1 := cl1.Peers() // defer cl2.Shutdown()
peers2 := cl2.Peers() // peers1 := cl1.Peers()
// peers2 := cl2.Peers()
//
// _, err = cl1.PeerAdd(clusterAddr(cl2))
// if err == nil {
// t.Fatal("Peer entered private cluster without key.")
// }
_, err = cl1.PeerAdd(clusterAddr(cl2)) // if len(peers1) != len(peers2) {
if err == nil { // t.Fatal("Expected same number of peers")
t.Fatal("Peer entered private cluster without key.") // }
} // if len(peers1) != 1 {
// t.Fatal("Expected no peers other than self")
if len(peers1) != len(peers2) { // }
t.Fatal("Expected same number of peers") // }
}
if len(peers1) != 1 {
t.Fatal("Expected no peers other than self")
}
}