From 99217ea84e3dd67911ba52c3ad907e9e4b51d741 Mon Sep 17 00:00:00 2001
From: ZenGround0 <wtd37@cornell.edu>
Date: Mon, 3 Jul 2017 09:49:25 -0700
Subject: [PATCH 1/5] Travis sharness (#110)

* Added travis sharness build

* ipfs-cluster-ctl help text width overflow fixed

* Expect success from fixed sharness test

* First travis-sharness PR touch up
Undoes gx rewrite of cluster-ctl dependencies
Undoess linebreaking of usage descriptions
Changes test to check for lines in excess of 120 chars instead of 80
run-sharness script now correctly tracks exit codes and exits with error
---
 .travis.yml                          |  1 +
 sharness/run-sharness-tests.sh       |  9 +++++++++
 sharness/t0010-ctl-basic-commands.sh | 10 +++++-----
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index eda3f75d..a7ca47b3 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,6 +7,7 @@ install:
 - make deps
 script:
 - make service && make ctl && ./coverage.sh
+- make test_sharness && make clean_sharness
 env:
   global:
     secure: M3K3y9+D933tCda7+blW3qqVV8fA6PBDRdJoQvmQc1f0XYbWinJ+bAziFp6diKkF8sMQ+cPwLMONYJuaNT2h7/PkG+sIwF0PuUo5VVCbhGmSDrn2qOjmSnfawNs8wW31f44FQA8ICka1EFZcihohoIMf0e5xZ0tXA9jqw+ngPJiRnv4zyzC3r6t4JMAZcbS9w4KTYpIev5Yj72eCvk6lGjadSVCDVXo2sVs27tNt+BSgtMXiH6Sv8GLOnN2kFspGITgivHgB/jtU6QVtFXB+cbBJJAs3lUYnzmQZ5INecbjweYll07ilwFiCVNCX67+L15gpymKGJbQggloIGyTWrAOa2TMaB/bvblzwwQZ8wE5P3Rss5L0TFkUAcdU+3BUHM+TwV4e8F9x10v1PjgWNBRJQzd1sjKKgGUBCeyCY7VeYDKn9AXI5llISgY/AAfCZwm2cbckMHZZJciMjm+U3Q1FCF+rfhlvUcMG1VEj8r9cGpmWIRjFYVm0NmpUDDNjlC3/lUfTCOOJJyM254EUw63XxabbK6EtDN1yQe8kYRcXH//2rtEwgtMBgqHVY+OOkekzGz8Ra3EBkh6jXrAQL3zKu/GwRlK7/a1OU5MQ7dWcTjbx1AQ6Zfyjg5bZ+idqPgMbqM9Zn2+OaSby8HEEXS0QeZVooDVf/6wdYO4MQ/0A=
diff --git a/sharness/run-sharness-tests.sh b/sharness/run-sharness-tests.sh
index aa513c87..29779904 100755
--- a/sharness/run-sharness-tests.sh
+++ b/sharness/run-sharness-tests.sh
@@ -2,10 +2,12 @@
 
 # Run tests
 cd "$(dirname "$0")"
+exit_codes=()
 for i in t*.sh;
 do
     echo "*** $i ***"
     ./$i
+    exit_codes+=($?)
 done
 
 # Aggregate Results
@@ -13,3 +15,10 @@ echo "Aggregating..."
 for f in test-results/*.counts; do
     echo "$f";
 done | bash lib/sharness/aggregate-results.sh
+
+# If any test fails error on exit
+for exit_code in "${exit_codes[@]}"; do
+    if [[ $exit_code != 0 ]]; then
+        exit 1
+    fi
+done
diff --git a/sharness/t0010-ctl-basic-commands.sh b/sharness/t0010-ctl-basic-commands.sh
index 5d954e20..662d0221 100755
--- a/sharness/t0010-ctl-basic-commands.sh
+++ b/sharness/t0010-ctl-basic-commands.sh
@@ -21,11 +21,11 @@ test_expect_success "cluster-ctl help commands succeed" '
     ipfs-cluster-ctl help
 '
 
-test_expect_failure "cluster-ctl help has 80 char limits" '
+test_expect_success "cluster-ctl help has 120 char limits" '
     ipfs-cluster-ctl --help >help.txt &&
     test_when_finished "rm help.txt" &&
     LENGTH="$(cat help.txt | awk '"'"'{print length }'"'"' | sort -nr | head -n 1)" &&
-    [ ! "$LENGTH" -gt 80 ]
+    [ ! "$LENGTH" -gt 120 ]
 '
 
 test_expect_success "cluster-ctl help output looks good" '
@@ -47,15 +47,15 @@ test_expect_success "cluster-ctl commands output looks good" '
     egrep -q "ipfs-cluster-ctl commands" commands.txt
 '
 
-test_expect_success "All cluster-ctl command docs are 80 columns or less" '
+test_expect_success "All cluster-ctl command docs are 120 columns or less" '
     export failure="0" &&
     ipfs-cluster-ctl commands | awk "NF" >commands.txt &&
     test_when_finished "rm commands.txt" &&
     while read cmd
     do
         LENGTH="$($cmd --help | awk "{ print length }" | sort -nr | head -n 1)"
-        [ "$LENGTH" -gt 80 ] &&
-            { echo "$cmd" help text is longer than 79 chars "($LENGTH)"; export failure="1"; }
+        [ "$LENGTH" -gt 120 ] &&
+            { echo "$cmd" help text is longer than 119 chars "($LENGTH)"; export failure="1"; }
     done <commands.txt
 
     if [ $failure -eq "1" ]; then

From 4ea1777050c82479ea11cf4b3c42925c3e277890 Mon Sep 17 00:00:00 2001
From: Hector Sanjuan <hector@protocol.ai>
Date: Mon, 3 Jul 2017 20:02:27 +0200
Subject: [PATCH 2/5] Fix sharness tests

Use docker to run ipfs
Improve intialization of daemons
Fix a bunch of tests
Improve run script
Make sure everything is shell-compatible (remove bash syntax)
Fit to run in travis
---
 .travis.yml                                  |  7 ++
 sharness/lib/test-lib.sh                     | 83 +++++++++++++++-----
 sharness/run-sharness-tests.sh               | 27 ++++---
 sharness/t0010-ctl-basic-commands.sh         |  4 +-
 sharness/t0020-service-basic-commands.sh     | 12 ++-
 sharness/t0025-ctl-status-report-commands.sh | 19 ++---
 sharness/t0030-ctl-pin.sh                    | 12 +--
 7 files changed, 106 insertions(+), 58 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index a7ca47b3..e411dc8b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,12 +1,19 @@
 language: go
 go:
 - '1.8'
+services:
+  - docker
+before_install:
+  - docker pull ipfs/go-ipfs
+  - sudo apt-get update
+  - sudo apt-get install -y jq curl
 install:
 - go get golang.org/x/tools/cmd/cover
 - go get github.com/mattn/goveralls
 - make deps
 script:
 - make service && make ctl && ./coverage.sh
+- make install
 - make test_sharness && make clean_sharness
 env:
   global:
diff --git a/sharness/lib/test-lib.sh b/sharness/lib/test-lib.sh
index da28350a..06d23baf 100755
--- a/sharness/lib/test-lib.sh
+++ b/sharness/lib/test-lib.sh
@@ -5,51 +5,92 @@
 
 SHARNESS_LIB="lib/sharness/sharness.sh"
 
+# Daemons output will be redirected to...
+IPFS_OUTPUT="/dev/null" # change for debugging
+# IPFS_OUTPUT="/dev/stderr" # change for debugging
+
 . "$SHARNESS_LIB" || {
     echo >&2 "Cannot source: $SHARNESS_LIB"
     echo >&2 "Please check Sharness installation."
     exit 1
 }
 
-if [ $(command -v jq) ]; then
+which jq >/dev/null 2>&1
+if [ $? -eq 0 ]; then
     test_set_prereq JQ
 fi
 
 # Set prereqs
 test_ipfs_init() {
-    ipfs help | egrep -q -i "^Usage" &&
-    IPFS_TEMP_DIR=`mktemp -d ipfs-XXXXX` && # Store in TEMP_DIR for safer delete
-    export IPFS_PATH=$IPFS_TEMP_DIR &&
-    ipfs init &&
-    eval 'ipfs daemon & export IPFS_D_PID=`echo $!`' && # Esoteric, but gets correct value of $!
-    sleep 2 &&
+    which docker >/dev/null 2>&1
+    if [ $? -ne 0 ]; then
+        echo "Docker not found"
+        exit 1
+    fi
+    if docker ps --format '{{.Names}}' | egrep -q '^ipfs$'; then
+        echo "ipfs container already running"
+    else
+        docker run --name ipfs -d -p 127.0.0.1:5001:5001 ipfs/go-ipfs > /dev/null 2>&1
+        if [ $? -ne 0 ]; then
+            echo "Error running go-ipfs in docker."
+            exit 1
+        fi
+        sleep 6
+    fi
     test_set_prereq IPFS
 }
 
+test_ipfs_running() {
+    if curl -s "localhost:5001/api/v0/version" > /dev/null; then
+        test_set_prereq IPFS
+    else
+        echo "IPFS is not running"
+        exit 1
+    fi
+}
+
 test_cluster_init() {
-    ipfs-cluster-service help | egrep -q -i "^Usage" &&
-    CLUSTER_TEMP_DIR=`mktemp -d cluster-XXXXX` &&
-    ipfs-cluster-service -f --config $CLUSTER_TEMP_DIR init &&
-    eval 'ipfs-cluster-service --config $CLUSTER_TEMP_DIR & export CLUSTER_D_PID=`echo $!`' &&
-    sleep 2 &&
+    which ipfs-cluster-service >/dev/null 2>&1
+    if [ $? -ne 0 ]; then
+        echo "ipfs-cluster-service not found"
+        exit 1
+    fi
+    which ipfs-cluster-ctl >/dev/null 2>&1
+    if [ $? -ne 0 ]; then
+        echo "ipfs-cluster-ctl not found"
+        exit 1
+    fi
+    export CLUSTER_TEMP_DIR=`mktemp -d cluster-XXXXX`
+    ipfs-cluster-service -f --config "$CLUSTER_TEMP_DIR" init --gen-secret >"$IPFS_OUTPUT" 2>&1
+    if [ $? -ne 0 ]; then
+        echo "error initializing ipfs cluster"
+        exit 1
+    fi
+    ipfs-cluster-service --config "$CLUSTER_TEMP_DIR" >"$IPFS_OUTPUT" 2>&1 &
+    export CLUSTER_D_PID=$!
+    sleep 5
     test_set_prereq CLUSTER
 }
 
 test_cluster_config() {
-    CLUSTER_CONFIG_PATH=$CLUSTER_TEMP_DIR"/service.json" &&
-    CLUSTER_CONFIG_ID=`jq --raw-output ".id" $CLUSTER_CONFIG_PATH` &&
-    CLUSTER_CONFIG_PK=`jq --raw-output ".private_key" $CLUSTER_CONFIG_PATH` &&
-    [ $CLUSTER_CONFIG_ID != null ] &&
-    [ $CLUSTER_CONFIG_PK != null ]
+    export CLUSTER_CONFIG_PATH="${CLUSTER_TEMP_DIR}/service.json"
+    export CLUSTER_CONFIG_ID=`jq --raw-output ".id" $CLUSTER_CONFIG_PATH`
+    export CLUSTER_CONFIG_PK=`jq --raw-output ".private_key" $CLUSTER_CONFIG_PATH`
+    [ "$CLUSTER_CONFIG_ID" != "null" ] && [ "$CLUSTER_CONFIG_PK" != "null" ]
+}
+
+cluster_id() {
+    echo "$CLUSTER_CONFIG_ID"
 }
 
 # Cleanup functions
 test_clean_ipfs(){
-    kill -1 $IPFS_D_PID &&
-    rm -rf $IPFS_TEMP_DIR    # Remove temp_dir not path in case this is called before init
+    docker kill ipfs
+    docker rm ipfs
+    sleep 1
 }
 
 test_clean_cluster(){
-    kill -1 $CLUSTER_D_PID &&
-    rm -rf $CLUSTER_TEMP_DIR
+    kill -1 "$CLUSTER_D_PID"
+    rm -rf "$CLUSTER_TEMP_DIR"
 }
diff --git a/sharness/run-sharness-tests.sh b/sharness/run-sharness-tests.sh
index 29779904..df8405f0 100755
--- a/sharness/run-sharness-tests.sh
+++ b/sharness/run-sharness-tests.sh
@@ -1,13 +1,17 @@
-#!/bin/sh
+#!/bin/bash
 
 # Run tests
 cd "$(dirname "$0")"
-exit_codes=()
-for i in t*.sh;
+statuses=0
+for i in t0*.sh;
 do
     echo "*** $i ***"
     ./$i
-    exit_codes+=($?)
+    status=$?
+    statuses=$((statuses + $status))
+    if [ $status -ne 0 ]; then
+       echo "Test $i failed"
+    fi
 done
 
 # Aggregate Results
@@ -16,9 +20,12 @@ for f in test-results/*.counts; do
     echo "$f";
 done | bash lib/sharness/aggregate-results.sh
 
-# If any test fails error on exit
-for exit_code in "${exit_codes[@]}"; do
-    if [[ $exit_code != 0 ]]; then
-        exit 1
-    fi
-done
+# Cleanup results
+rm -rf test-results
+
+# Exit with error if any test has failed
+if [ $statuses -gt 0 ]; then
+    echo $statuses
+    exit 1
+fi
+exit 0
diff --git a/sharness/t0010-ctl-basic-commands.sh b/sharness/t0010-ctl-basic-commands.sh
index 662d0221..fb31b719 100755
--- a/sharness/t0010-ctl-basic-commands.sh
+++ b/sharness/t0010-ctl-basic-commands.sh
@@ -33,10 +33,8 @@ test_expect_success "cluster-ctl help output looks good" '
 '
 
 test_expect_success "cluster-ctl commands output looks good" '
-    ipfs-cluster-ctl commands | awk "NF" >commands.txt &&
+    ipfs-cluster-ctl commands > commands.txt &&
     test_when_finished "rm commands.txt" &&
-    numCmds=`cat commands.txt | wc -l` &&
-    [ $numCmds -eq "8" ] &&
     egrep -q "ipfs-cluster-ctl id" commands.txt &&
     egrep -q "ipfs-cluster-ctl peers" commands.txt &&
     egrep -q "ipfs-cluster-ctl pin" commands.txt &&
diff --git a/sharness/t0020-service-basic-commands.sh b/sharness/t0020-service-basic-commands.sh
index f4657d5f..48ea9fd6 100755
--- a/sharness/t0020-service-basic-commands.sh
+++ b/sharness/t0020-service-basic-commands.sh
@@ -5,7 +5,9 @@ test_description="Test service startup and init functionality"
 . lib/test-lib.sh
 
 test_ipfs_init
+cleanup test_clean_ipfs
 test_cluster_init
+cleanup test_clean_cluster
 
 test_expect_success "prerequisites" '
     test_have_prereq IPFS &&
@@ -21,13 +23,9 @@ test_expect_success "custer-service help output looks good" '
 '
 
 test_expect_success "cluster-service --version succeeds and matches ctl" '
-    export SERV_VERSION=`ipfs-cluster-service --version` &&
-    export CTL_VERSION=`ipfs-cluster-ctl --version` &&
-    sv=($SERV_VERSION) &&
-    cv=($CTL_VERSION) &&
-    [ "${sv[2]}" = "${cv[2]}" ]
+    export SERV_VERSION=`ipfs-cluster-service --version | grep -Po "\d+\.\d+\.\d+"`
+    export CTL_VERSION=`ipfs-cluster-ctl --version | grep -Po "\d+\.\d+\.\d+"`
+    [ "$SERV_VERSION" = "$CTL_VERSION" ]
 '
 
-cleanup test_clean_cluster
-cleanup test_clean_ipfs
 test_done
diff --git a/sharness/t0025-ctl-status-report-commands.sh b/sharness/t0025-ctl-status-report-commands.sh
index d75a82d4..1c9cc2c3 100755
--- a/sharness/t0025-ctl-status-report-commands.sh
+++ b/sharness/t0025-ctl-status-report-commands.sh
@@ -5,19 +5,18 @@ test_description="Test ctl's status reporting functionality.  Test errors on inc
 . lib/test-lib.sh
 
 test_ipfs_init
+cleanup test_clean_ipfs
 test_cluster_init
+cleanup test_clean_cluster
 
 test_expect_success IPFS,CLUSTER,JQ "cluster-ctl can read id" '
-    test_cluster_config &&
-    ipfs-cluster-ctl id | egrep -q -i "$CLUSTER_CONFIG_ID"
+    id=`cluster_id`
+    test_cluster_config && ipfs-cluster-ctl id | egrep -q "$id"
 '
 
 test_expect_success IPFS,CLUSTER "cluster-ctl list 0 peers" '
-    export PEER_OUT=`ipfs-cluster-ctl peers ls` &&
-    sorted_peer_out=$(printf "%s\n" $PEER_OUT | sort -u) &&
-    export SELF_OUT=`ipfs-cluster-ctl id` &&
-    sorted_self_out=$(printf "%s\n" $SELF_OUT | sort -u) &&
-    [ "$sorted_peer_out" = "$sorted_self_out" ]
+    peer_length=`ipfs-cluster-ctl --enc=json peers ls | jq ". | length"`
+    [ $peer_length -eq 1 ]
 '
 
 test_expect_success IPFS,CLUSTER "cluster-ctl add need peer id" '
@@ -56,10 +55,8 @@ test_expect_success IPFS,CLUSTER "pin ls succeeds" '
     ipfs-cluster-ctl pin ls
 '
 
-test_expect_success IPFS,CLUSTER "pin ls on invalid CID succeeds" '
-    ipfs-cluster-ctl pin ls XXXinvalid-CIDXXX
+test_expect_success IPFS,CLUSTER "pin ls on invalid CID fails" '
+    test_must_fail ipfs-cluster-ctl pin ls XXXinvalid-CIDXXX
 '
 
-cleanup test_clean_cluster
-cleanup test_clean_ipfs
 test_done
diff --git a/sharness/t0030-ctl-pin.sh b/sharness/t0030-ctl-pin.sh
index 612544c3..5ac56981 100755
--- a/sharness/t0030-ctl-pin.sh
+++ b/sharness/t0030-ctl-pin.sh
@@ -2,26 +2,26 @@
 
 test_description="Test cluster-ctl's pinning and unpinning functionality"
 
-
 . lib/test-lib.sh
 
 test_ipfs_init
+cleanup test_clean_ipfs
 test_cluster_init
+cleanup test_clean_cluster
+
 
 test_expect_success IPFS,CLUSTER "pin data to cluster with ctl" '
-    cid=$(echo "test" | ipfs add -q) &&
-    echo $cid &&
-    ipfs-cluster-ctl pin add "$cid" &&
+    cid=`docker exec ipfs bash -c "echo test | ipfs add -q"`
+    ipfs-cluster-ctl pin add "$cid" &> test4 &&
     ipfs-cluster-ctl pin ls "$cid" | grep -q "$cid" &&
     ipfs-cluster-ctl status "$cid" | grep -q -i "PINNED"
 '
 
 test_expect_success IPFS,CLUSTER "unpin data from cluster with ctl" '
+    cid=`ipfs-cluster-ctl --enc=json pin ls | jq -r ".[] | .cid" | head -1`
     ipfs-cluster-ctl pin rm "$cid" &&
     !(ipfs-cluster-ctl pin ls "$cid" | grep -q "$cid") &&
     ipfs-cluster-ctl status "$cid" | grep -q -i "UNPINNED"
 '
 
-cleanup test_clean_cluster
-cleanup test_clean_ipfs
 test_done

From 8d3c72b7663bdf8712a0b9301a55482f876b179d Mon Sep 17 00:00:00 2001
From: Hector Sanjuan <hector@protocol.ai>
Date: Thu, 20 Jul 2017 13:18:46 +0200
Subject: [PATCH 3/5] Fix tests: Make metric broadcasting async

When a peer is down, metric broadcasting hangs, and no more
ticks are sent for a while.
---
 .travis.yml                   |  1 +
 Makefile                      |  6 ++++
 cluster.go                    | 65 +++++++++++++++++++++--------------
 ipfscluster_test.go           |  4 ++-
 monitor/basic/peer_monitor.go |  3 +-
 5 files changed, 51 insertions(+), 28 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index e411dc8b..b3d07a16 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,6 +12,7 @@ install:
 - go get github.com/mattn/goveralls
 - make deps
 script:
+#- make test_problem
 - make service && make ctl && ./coverage.sh
 - make install
 - make test_sharness && make clean_sharness
diff --git a/Makefile b/Makefile
index 08c48c44..528dc613 100644
--- a/Makefile
+++ b/Makefile
@@ -10,6 +10,9 @@ gx-go_bin=$(deptools)/$(gx-go)
 bin_env=$(shell go env GOHOSTOS)-$(shell go env GOHOSTARCH)
 sharness = sharness/lib/sharness
 
+# For debugging
+problematic_test = TestClustersReplicationRealloc
+
 export PATH := $(deptools):$(PATH)
 
 all: service ctl
@@ -63,6 +66,9 @@ test: deps
 test_sharness: $(sharness)
 	@sh sharness/run-sharness-tests.sh
 
+test_problem: deps
+	go test -tags debug -v -run $(problematic_test)
+
 $(sharness):
 	@echo "Downloading sharness"
 	@wget -q -O sharness/lib/sharness.tar.gz http://github.com/chriscool/sharness/archive/master.tar.gz
diff --git a/cluster.go b/cluster.go
index d033e712..9c50c980 100644
--- a/cluster.go
+++ b/cluster.go
@@ -10,10 +10,11 @@ import (
 	"sync"
 	"time"
 
+	pnet "github.com/libp2p/go-libp2p-pnet"
+
 	"github.com/ipfs/ipfs-cluster/api"
 	"github.com/ipfs/ipfs-cluster/consensus/raft"
 	"github.com/ipfs/ipfs-cluster/state"
-	pnet "github.com/libp2p/go-libp2p-pnet"
 
 	rpc "github.com/hsanjuan/go-libp2p-gorpc"
 	cid "github.com/ipfs/go-cid"
@@ -217,31 +218,41 @@ func (c *Cluster) broadcastMetric(m api.Metric) error {
 		return err
 	}
 
-	if leader == c.id {
-		// Leader needs to broadcast its metric to everyone
-		// in case it goes down (new leader will have to detect this node went down)
-		errs := c.multiRPC(peers,
-			"Cluster",
-			"PeerMonitorLogMetric",
-			m,
-			copyEmptyStructToIfaces(make([]struct{}, len(peers), len(peers))))
-		for i, e := range errs {
-			if e != nil {
-				logger.Errorf("error pushing metric to %s: %s", peers[i].Pretty(), e)
-			}
-		}
-	} else {
-		// non-leaders just need to forward their metrics to the leader
-		err := c.rpcClient.Call(leader,
-			"Cluster", "PeerMonitorLogMetric",
-			m, &struct{}{})
-		if err != nil {
-			logger.Error(err)
-			return err
-		}
-	}
+	// If a peer is down, the rpc call will get locked. Therefore,
+	// we need to do it async. This way we keep broadcasting
+	// even if someone is down. Eventually those requests will
+	// timeout in libp2p and the errors logged.
+	go func() {
+		if leader == c.id {
+			// Leader needs to broadcast its metric to everyone
+			// in case it goes down (new leader will have to detect this node went down)
+			logger.Debugf("Leader %s about to broadcast metric %s to %s. Expires: %s", c.id, m.Name, peers, m.Expire)
 
-	logger.Debugf("sent metric %s", m.Name)
+			errs := c.multiRPC(peers,
+				"Cluster",
+				"PeerMonitorLogMetric",
+				m,
+				copyEmptyStructToIfaces(make([]struct{}, len(peers), len(peers))))
+			for i, e := range errs {
+				if e != nil {
+					logger.Errorf("error pushing metric to %s: %s", peers[i].Pretty(), e)
+				}
+			}
+			logger.Debugf("Leader %s broadcasted metric %s to %s. Expires: %s", c.id, m.Name, peers, m.Expire)
+		} else {
+			// non-leaders just need to forward their metrics to the leader
+			logger.Debugf("Peer %s about to send metric %s to %s. Expires: %s", c.id, m.Name, leader, m.Expire)
+
+			err := c.rpcClient.Call(leader,
+				"Cluster", "PeerMonitorLogMetric",
+				m, &struct{}{})
+			if err != nil {
+				logger.Error(err)
+			}
+			logger.Debugf("Peer %s sent metric %s to %s. Expires: %s", c.id, m.Name, leader, m.Expire)
+
+		}
+	}()
 	return nil
 }
 
@@ -272,6 +283,7 @@ func (c *Cluster) pushInformerMetrics() {
 		timer.Stop() // no need to drain C if we are here
 		timer.Reset(metric.GetTTL() / 2)
 	}
+	logger.Debugf("Peer %s. Finished pushInformerMetrics", c.id)
 }
 
 func (c *Cluster) pushPingMetrics() {
@@ -291,6 +303,7 @@ func (c *Cluster) pushPingMetrics() {
 		case <-ticker.C:
 		}
 	}
+	logger.Debugf("Peer %s. Finished pushPingMetrics", c.id)
 }
 
 // read the alerts channel from the monitor and triggers repins
@@ -303,7 +316,7 @@ func (c *Cluster) alertsHandler() {
 			// only the leader handles alerts
 			leader, err := c.consensus.Leader()
 			if err == nil && leader == c.id {
-				logger.Warningf("Received alert for %s in %s", alrt.MetricName, alrt.Peer.Pretty())
+				logger.Warningf("Peer %s received alert for %s in %s", c.id, alrt.MetricName, alrt.Peer.Pretty())
 				switch alrt.MetricName {
 				case "ping":
 					c.repinFromPeer(alrt.Peer)
diff --git a/ipfscluster_test.go b/ipfscluster_test.go
index 6c42b3c2..dbdd10a7 100644
--- a/ipfscluster_test.go
+++ b/ipfscluster_test.go
@@ -865,6 +865,7 @@ func TestClustersReplicationRealloc(t *testing.T) {
 		if pinfo.Status == api.TrackerStatusPinned {
 			//t.Logf("Killing %s", c.id.Pretty())
 			killedClusterIndex = i
+			t.Logf("Shutting down %s", c.ID().ID)
 			c.Shutdown()
 			break
 		}
@@ -873,8 +874,9 @@ func TestClustersReplicationRealloc(t *testing.T) {
 	// let metrics expire and give time for the cluster to
 	// see if they have lost the leader
 	time.Sleep(4 * time.Second)
-
 	waitForLeader(t, clusters)
+	// wait for new metrics to arrive
+	delay()
 
 	// Make sure we haven't killed our randomly
 	// selected cluster
diff --git a/monitor/basic/peer_monitor.go b/monitor/basic/peer_monitor.go
index 5362f544..fc7462a0 100644
--- a/monitor/basic/peer_monitor.go
+++ b/monitor/basic/peer_monitor.go
@@ -178,7 +178,7 @@ func (mon *StdPeerMonitor) LogMetric(m api.Metric) {
 		mbyp[peer] = pmets
 	}
 
-	logger.Debugf("logged '%s' metric from '%s'", name, peer)
+	logger.Debugf("logged '%s' metric from '%s'. Expires on %s", name, peer, m.Expire)
 	pmets.add(m)
 }
 
@@ -308,6 +308,7 @@ func (mon *StdPeerMonitor) checkMetrics(peers []peer.ID, metricName string) {
 		}
 		// send alert if metric is expired (but was valid at some point)
 		if last.Valid && last.Expired() {
+			logger.Debugf("Metric %s from peer %s expired at %s", metricName, p, last.Expire)
 			mon.sendAlert(p, metricName)
 		}
 	}

From 92b265c19db2e70bceb14c1240fdbb5b31da7b5f Mon Sep 17 00:00:00 2001
From: Hector Sanjuan <hector@protocol.ai>
Date: Fri, 21 Jul 2017 23:45:22 +0200
Subject: [PATCH 4/5] Revert "Safer delays for Realloc test"

This reverts commit 22ec3577934f3598d99f06597e8f09c3e5089b7b.

License: MIT
Signed-off-by: Hector Sanjuan <hector@protocol.ai>
---
 ipfscluster_test.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/ipfscluster_test.go b/ipfscluster_test.go
index dbdd10a7..5e17b52f 100644
--- a/ipfscluster_test.go
+++ b/ipfscluster_test.go
@@ -830,7 +830,7 @@ func TestClustersReplicationRealloc(t *testing.T) {
 	}
 
 	// Let the pin arrive
-	delay()
+	time.Sleep(time.Second / 2)
 
 	pin := clusters[j].Pins()[0]
 	pinSerial := pin.ToSerial()
@@ -845,7 +845,7 @@ func TestClustersReplicationRealloc(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	time.Sleep(time.Second)
+	time.Sleep(time.Second / 2)
 
 	pin2 := clusters[j].Pins()[0]
 	pinSerial2 := pin2.ToSerial()
@@ -876,7 +876,7 @@ func TestClustersReplicationRealloc(t *testing.T) {
 	time.Sleep(4 * time.Second)
 	waitForLeader(t, clusters)
 	// wait for new metrics to arrive
-	delay()
+	time.Sleep(2 * time.Second)
 
 	// Make sure we haven't killed our randomly
 	// selected cluster
@@ -890,7 +890,7 @@ func TestClustersReplicationRealloc(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	time.Sleep(time.Second)
+	time.Sleep(time.Second / 2)
 
 	numPinned := 0
 	for i, c := range clusters {

From 1815d38f0e14769ef0b5e13d744010e2533b1636 Mon Sep 17 00:00:00 2001
From: Hector Sanjuan <hector@protocol.ai>
Date: Fri, 21 Jul 2017 23:45:59 +0200
Subject: [PATCH 5/5] Revert "increase delays after creation"

This reverts commit 5ed5a43432881f5f2197b9b74dedcba48d52e74b.
---
 ipfscluster_test.go | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ipfscluster_test.go b/ipfscluster_test.go
index 5e17b52f..52775f0f 100644
--- a/ipfscluster_test.go
+++ b/ipfscluster_test.go
@@ -178,7 +178,6 @@ func createClusters(t *testing.T) ([]*Cluster, []*test.IpfsMock) {
 	// 	clusters[0].PeerAdd(clusterAddr(clusters[i]))
 	// }
 	delay()
-	delay()
 	return clusters, ipfsMocks
 }
 
@@ -766,7 +765,7 @@ func TestClustersReplication(t *testing.T) {
 		if numRemote != 1 {
 			t.Errorf("We wanted 1 peer track as remote but %d do", numRemote)
 		}
-		time.Sleep(time.Second) // this is for metric to be up to date
+		time.Sleep(time.Second / 2) // this is for metric to be up to date
 	}
 
 	f := func(t *testing.T, c *Cluster) {