ipfs-cluster/ipfscluster_test.go

package ipfscluster

import (
	"context"
	"errors"
	"flag"
	"fmt"
	"math/rand"
	"mime/multipart"
	"os"
	"path/filepath"
	"sort"
	"strings"
	"sync"
	"testing"
	"time"

	"github.com/ipfs/ipfs-cluster/allocator/balanced"
	"github.com/ipfs/ipfs-cluster/api"
	"github.com/ipfs/ipfs-cluster/api/rest"
	"github.com/ipfs/ipfs-cluster/consensus/crdt"
	"github.com/ipfs/ipfs-cluster/consensus/raft"
	"github.com/ipfs/ipfs-cluster/datastore/badger"
	"github.com/ipfs/ipfs-cluster/datastore/inmem"
	"github.com/ipfs/ipfs-cluster/datastore/leveldb"
	"github.com/ipfs/ipfs-cluster/informer/disk"
	"github.com/ipfs/ipfs-cluster/ipfsconn/ipfshttp"
	"github.com/ipfs/ipfs-cluster/monitor/pubsubmon"
	"github.com/ipfs/ipfs-cluster/observations"
	"github.com/ipfs/ipfs-cluster/pintracker/stateless"
	"github.com/ipfs/ipfs-cluster/state"
	"github.com/ipfs/ipfs-cluster/test"
	"github.com/ipfs/ipfs-cluster/version"

	ds "github.com/ipfs/go-datastore"
	libp2p "github.com/libp2p/go-libp2p"
	crypto "github.com/libp2p/go-libp2p-core/crypto"
	host "github.com/libp2p/go-libp2p-core/host"
	peer "github.com/libp2p/go-libp2p-core/peer"
	peerstore "github.com/libp2p/go-libp2p-core/peerstore"
	dht "github.com/libp2p/go-libp2p-kad-dht"
	dual "github.com/libp2p/go-libp2p-kad-dht/dual"
	pubsub "github.com/libp2p/go-libp2p-pubsub"
	routedhost "github.com/libp2p/go-libp2p/p2p/host/routed"
	ma "github.com/multiformats/go-multiaddr"
)

var (
	// number of clusters to create
	nClusters = 5

	// number of pins to pin/unpin/check
	nPins = 100

	logLevel               = "FATAL"
	customLogLvlFacilities = logFacilities{}

	consensus = "crdt"
	datastore = "badger"

	ttlDelayTime = 2 * time.Second // set on Main to diskInf.MetricTTL
	testsFolder  = "clusterTestsFolder"

	// When testing with fixed ports...
	// clusterPort   = 10000
	// apiPort       = 10100
	// ipfsProxyPort = 10200
)

type logFacilities []string

// String is the method to format the flag's value, part of the flag.Value interface.
func (lg *logFacilities) String() string {
	return fmt.Sprint(*lg)
}

// Set is the method to set the flag value, part of the flag.Value interface.
func (lg *logFacilities) Set(value string) error {
	if len(*lg) > 0 {
		return errors.New("logFacilities flag already set")
	}
	for _, lf := range strings.Split(value, ",") {
		*lg = append(*lg, lf)
	}
	return nil
}

// TestMain runs test initialization. Since Go1.13 we cannot run this on init()
// as flag.Parse() does not work well there
// (see https://golang.org/src/testing/testing.go#L211)
func TestMain(m *testing.M) {
	rand.Seed(time.Now().UnixNano())
	ReadyTimeout = 11 * time.Second

	// GossipSub needs to heartbeat to discover newly connected hosts
	// This speeds things up a little.
	pubsub.GossipSubHeartbeatInterval = 50 * time.Millisecond

	flag.Var(&customLogLvlFacilities, "logfacs", "use -logLevel for only the following log facilities; comma-separated")
	flag.StringVar(&logLevel, "loglevel", logLevel, "default log level for tests")
	flag.IntVar(&nClusters, "nclusters", nClusters, "number of clusters to use")
	flag.IntVar(&nPins, "npins", nPins, "number of pins to pin/unpin/check")
	flag.StringVar(&consensus, "consensus", consensus, "consensus implementation")
	flag.StringVar(&datastore, "datastore", datastore, "datastore backend")
	flag.Parse()

	if len(customLogLvlFacilities) <= 0 {
		for f := range LoggingFacilities {
			SetFacilityLogLevel(f, logLevel)
		}

		for f := range LoggingFacilitiesExtra {
			SetFacilityLogLevel(f, logLevel)
		}
	}

	for _, f := range customLogLvlFacilities {
		if _, ok := LoggingFacilities[f]; ok {
			SetFacilityLogLevel(f, logLevel)
			continue
		}
		if _, ok := LoggingFacilitiesExtra[f]; ok {
			SetFacilityLogLevel(f, logLevel)
			continue
		}
	}

	diskInfCfg := &disk.Config{}
	diskInfCfg.LoadJSON(testingDiskInfCfg)
	ttlDelayTime = diskInfCfg.MetricTTL * 2

	os.Exit(m.Run())
}

func randomBytes() []byte {
	bs := make([]byte, 64)
	for i := 0; i < len(bs); i++ {
		b := byte(rand.Int())
		bs[i] = b
	}
	return bs
}

func createComponents(
	t *testing.T,
	host host.Host,
	pubsub *pubsub.PubSub,
	dht *dual.DHT,
	i int,
	staging bool,
) (
	*Config,
	ds.Datastore,
	Consensus,
	[]API,
	IPFSConnector,
	PinTracker,
	PeerMonitor,
	PinAllocator,
	Informer,
	Tracer,
	*test.IpfsMock,
) {
	ctx := context.Background()
	mock := test.NewIpfsMock(t)

	//apiAddr, _ := ma.NewMultiaddr(fmt.Sprintf("/ip4/127.0.0.1/tcp/%d", apiPort+i))
	// Bind on port 0
	apiAddr, _ := ma.NewMultiaddr("/ip4/127.0.0.1/tcp/0")
	// Bind on Port 0
	// proxyAddr, _ := ma.NewMultiaddr(fmt.Sprintf("/ip4/127.0.0.1/tcp/%d", ipfsProxyPort+i))
	proxyAddr, _ := ma.NewMultiaddr("/ip4/127.0.0.1/tcp/0")
	nodeAddr, _ := ma.NewMultiaddr(fmt.Sprintf("/ip4/%s/tcp/%d", mock.Addr, mock.Port))

	peername := fmt.Sprintf("peer_%d", i)

	ident, clusterCfg, apiCfg, ipfsproxyCfg, ipfshttpCfg, badgerCfg, levelDBCfg, raftCfg, crdtCfg, statelesstrackerCfg, psmonCfg, allocBalancedCfg, diskInfCfg, tracingCfg := testingConfigs()

	ident.ID = host.ID()
	ident.PrivateKey = host.Peerstore().PrivKey(host.ID())
	clusterCfg.Peername = peername
	clusterCfg.LeaveOnShutdown = false
	clusterCfg.SetBaseDir(filepath.Join(testsFolder, host.ID().Pretty()))

	apiCfg.HTTPListenAddr = []ma.Multiaddr{apiAddr}

	ipfsproxyCfg.ListenAddr = []ma.Multiaddr{proxyAddr}
	ipfsproxyCfg.NodeAddr = nodeAddr

	ipfshttpCfg.NodeAddr = nodeAddr

	raftCfg.DataFolder = filepath.Join(testsFolder, host.ID().Pretty())

	badgerCfg.Folder = filepath.Join(testsFolder, host.ID().Pretty(), "badger")
	levelDBCfg.Folder = filepath.Join(testsFolder, host.ID().Pretty(), "leveldb")

	api, err := rest.NewAPI(ctx, apiCfg)
	if err != nil {
		t.Fatal(err)
	}

	ipfsProxy, err := rest.NewAPI(ctx, apiCfg)
	if err != nil {
		t.Fatal(err)
	}

	ipfs, err := ipfshttp.NewConnector(ipfshttpCfg)
	if err != nil {
		t.Fatal(err)
	}

	alloc, err := balanced.New(allocBalancedCfg)
	if err != nil {
		t.Fatal(err)
	}
	inf, err := disk.NewInformer(diskInfCfg)
	if err != nil {
		t.Fatal(err)
	}

	store := makeStore(t, badgerCfg, levelDBCfg)
	cons := makeConsensus(t, store, host, pubsub, dht, raftCfg, staging, crdtCfg)
	tracker := stateless.New(statelesstrackerCfg, ident.ID, clusterCfg.Peername, cons.State)

	var peersF func(context.Context) ([]peer.ID, error)
	if consensus == "raft" {
		peersF = cons.Peers
	}
	mon, err := pubsubmon.New(ctx, psmonCfg, pubsub, peersF)
	if err != nil {
		t.Fatal(err)
	}
	tracingCfg.ServiceName = peername
	tracer, err := observations.SetupTracing(tracingCfg)
	if err != nil {
		t.Fatal(err)
	}

	return clusterCfg, store, cons, []API{api, ipfsProxy}, ipfs, tracker, mon, alloc, inf, tracer, mock
}

func makeStore(t *testing.T, badgerCfg *badger.Config, levelDBCfg *leveldb.Config) ds.Datastore {
	switch consensus {
	case "crdt":
		if datastore == "badger" {
			dstr, err := badger.New(badgerCfg)
			if err != nil {
				t.Fatal(err)
			}
			return dstr
		}
		dstr, err := leveldb.New(levelDBCfg)
		if err != nil {
			t.Fatal(err)
		}
		return dstr
	default:
		return inmem.New()
	}
}

func makeConsensus(t *testing.T, store ds.Datastore, h host.Host, psub *pubsub.PubSub, dht *dual.DHT, raftCfg *raft.Config, staging bool, crdtCfg *crdt.Config) Consensus {
	switch consensus {
	case "raft":
		raftCon, err := raft.NewConsensus(h, raftCfg, store, staging)
		if err != nil {
			t.Fatal(err)
		}
		return raftCon
	case "crdt":
		crdtCon, err := crdt.New(h, dht, psub, crdtCfg, store)
		if err != nil {
			t.Fatal(err)
		}
		return crdtCon
	default:
		panic("bad consensus")
	}
}

func createCluster(t *testing.T, host host.Host, dht *dual.DHT, clusterCfg *Config, store ds.Datastore, consensus Consensus, apis []API, ipfs IPFSConnector, tracker PinTracker, mon PeerMonitor, alloc PinAllocator, inf Informer, tracer Tracer) *Cluster {
	cl, err := NewCluster(context.Background(), host, dht, clusterCfg, store, consensus, apis, ipfs, tracker, mon, alloc, []Informer{inf}, tracer)
	if err != nil {
		t.Fatal(err)
	}
	return cl
}

func createOnePeerCluster(t *testing.T, nth int, clusterSecret []byte) (*Cluster, *test.IpfsMock) {
	hosts, pubsubs, dhts := createHosts(t, clusterSecret, 1)
	clusterCfg, store, consensus, api, ipfs, tracker, mon, alloc, inf, tracer, mock := createComponents(t, hosts[0], pubsubs[0], dhts[0], nth, false)
	cl := createCluster(t, hosts[0], dhts[0], clusterCfg, store, consensus, api, ipfs, tracker, mon, alloc, inf, tracer)
	<-cl.Ready()
	return cl, mock
}

func createHosts(t *testing.T, clusterSecret []byte, nClusters int) ([]host.Host, []*pubsub.PubSub, []*dual.DHT) {
	hosts := make([]host.Host, nClusters)
	pubsubs := make([]*pubsub.PubSub, nClusters)
	dhts := make([]*dual.DHT, nClusters)

	tcpaddr, _ := ma.NewMultiaddr("/ip4/127.0.0.1/tcp/0")
	quicAddr, _ := ma.NewMultiaddr("/ip4/127.0.0.1/udp/0/quic")
	for i := range hosts {
		priv, _, err := crypto.GenerateKeyPair(crypto.RSA, 2048)
		if err != nil {
			t.Fatal(err)
		}

		h, p, d := createHost(t, priv, clusterSecret, []ma.Multiaddr{quicAddr, tcpaddr})
		hosts[i] = h
		dhts[i] = d
		pubsubs[i] = p
	}

	return hosts, pubsubs, dhts
}

func createHost(t *testing.T, priv crypto.PrivKey, clusterSecret []byte, listen []ma.Multiaddr) (host.Host, *pubsub.PubSub, *dual.DHT) {
	ctx := context.Background()

	h, err := newHost(ctx, clusterSecret, priv, libp2p.ListenAddrs(listen...))
	if err != nil {
		t.Fatal(err)
	}

	// DHT needs to be created BEFORE connecting the peers
	d, err := newTestDHT(ctx, h)
	if err != nil {
		t.Fatal(err)
	}

	// Pubsub needs to be created BEFORE connecting the peers,
	// otherwise they are not picked up.
	psub, err := newPubSub(ctx, h)
	if err != nil {
		t.Fatal(err)
	}

	return routedhost.Wrap(h, d), psub, d
}

func newTestDHT(ctx context.Context, h host.Host) (*dual.DHT, error) {
	return newDHT(ctx, h, nil,
		dual.DHTOption(dht.RoutingTableRefreshPeriod(600*time.Millisecond)),
		dual.DHTOption(dht.RoutingTableRefreshQueryTimeout(300*time.Millisecond)),
	)
}

func createClusters(t *testing.T) ([]*Cluster, []*test.IpfsMock) {
	ctx := context.Background()
	os.RemoveAll(testsFolder)
	cfgs := make([]*Config, nClusters)
	stores := make([]ds.Datastore, nClusters)
	cons := make([]Consensus, nClusters)
	apis := make([][]API, nClusters)
	ipfss := make([]IPFSConnector, nClusters)
	trackers := make([]PinTracker, nClusters)
	mons := make([]PeerMonitor, nClusters)
	allocs := make([]PinAllocator, nClusters)
	infs := make([]Informer, nClusters)
	tracers := make([]Tracer, nClusters)
	ipfsMocks := make([]*test.IpfsMock, nClusters)

	clusters := make([]*Cluster, nClusters)

	// Uncomment when testing with fixed ports
	// clusterPeers := make([]ma.Multiaddr, nClusters, nClusters)

	hosts, pubsubs, dhts := createHosts(t, testingClusterSecret, nClusters)

	for i := 0; i < nClusters; i++ {
		// staging = true for all except first (i==0)
		cfgs[i], stores[i], cons[i], apis[i], ipfss[i], trackers[i], mons[i], allocs[i], infs[i], tracers[i], ipfsMocks[i] = createComponents(t, hosts[i], pubsubs[i], dhts[i], i, i != 0)
	}

	// Start first node
	clusters[0] = createCluster(t, hosts[0], dhts[0], cfgs[0], stores[0], cons[0], apis[0], ipfss[0], trackers[0], mons[0], allocs[0], infs[0], tracers[0])
	<-clusters[0].Ready()
	bootstrapAddr := clusterAddr(clusters[0])

	// Start the rest and join
	for i := 1; i < nClusters; i++ {
		clusters[i] = createCluster(t, hosts[i], dhts[i], cfgs[i], stores[i], cons[i], apis[i], ipfss[i], trackers[i], mons[i], allocs[i], infs[i], tracers[i])
		err := clusters[i].Join(ctx, bootstrapAddr)
		if err != nil {
			logger.Error(err)
			t.Fatal(err)
		}
		<-clusters[i].Ready()
	}

	// connect all hosts
	for _, h := range hosts {
		for _, h2 := range hosts {
			if h.ID() != h2.ID() {
				h.Peerstore().AddAddrs(h2.ID(), h2.Addrs(), peerstore.PermanentAddrTTL)
				_, err := h.Network().DialPeer(ctx, h2.ID())
				if err != nil {
					t.Log(err)
				}
			}

		}
	}

	waitForLeader(t, clusters)
	waitForClustersHealthy(t, clusters)

	return clusters, ipfsMocks
}

func shutdownClusters(t *testing.T, clusters []*Cluster, m []*test.IpfsMock) {
	for i, c := range clusters {
		shutdownCluster(t, c, m[i])
	}
	os.RemoveAll(testsFolder)
}

func shutdownCluster(t *testing.T, c *Cluster, m *test.IpfsMock) {
	err := c.Shutdown(context.Background())
	if err != nil {
		t.Error(err)
	}
	c.dht.Close()
	c.host.Close()
	c.datastore.Close()
	m.Close()
}

func collectGlobalPinInfos(t *testing.T, out <-chan api.GlobalPinInfo, timeout time.Duration) []api.GlobalPinInfo {
	t.Helper()

	ctx, cancel := context.WithTimeout(context.Background(), timeout)
	defer cancel()

	var gpis []api.GlobalPinInfo
	for {
		select {
		case <-ctx.Done():
			t.Error(ctx.Err())
			return gpis
		case gpi, ok := <-out:
			if !ok {
				return gpis
			}
			gpis = append(gpis, gpi)
		}
	}
}

func collectPinInfos(t *testing.T, out <-chan api.PinInfo) []api.PinInfo {
	t.Helper()

	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
	defer cancel()

	var pis []api.PinInfo
	for {
		select {
		case <-ctx.Done():
			t.Error(ctx.Err())
			return pis
		case pi, ok := <-out:
			if !ok {
				return pis
			}
			pis = append(pis, pi)
		}
	}
}

func runF(t *testing.T, clusters []*Cluster, f func(*testing.T, *Cluster)) {
	t.Helper()
	var wg sync.WaitGroup
	for _, c := range clusters {
		wg.Add(1)
		go func(c *Cluster) {
			defer wg.Done()
			f(t, c)
		}(c)

	}
	wg.Wait()
}

//////////////////////////////////////
// Delay and wait functions
//
// Delays are used in tests to wait for certain events to happen:
//   * ttlDelay() waits for metrics to arrive. If you pin something
//     and your next operation depends on updated metrics, you need to wait
//   * pinDelay() accounts for the time necessary to pin something and for the new
//     log entry to be visible in all cluster peers
//   * delay just sleeps a second or two.
//   * waitForLeader functions make sure there is a raft leader, for example,
//     after killing the leader.
//
// The values for delays are a result of testing and adjusting so tests pass
// in travis, jenkins etc., taking into account the values used in the
// testing configuration (config_test.go).
func delay() {
	var d int
	if nClusters > 10 {
		d = 3000
	} else {
		d = 2000
	}
	time.Sleep(time.Duration(d) * time.Millisecond)
}

func pinDelay() {
	time.Sleep(800 * time.Millisecond)
}

func ttlDelay() {
	time.Sleep(ttlDelayTime)
}

// Like waitForLeader but letting metrics expire before waiting, and
// waiting for new metrics to arrive afterwards.
func waitForLeaderAndMetrics(t *testing.T, clusters []*Cluster) {
	ttlDelay()
	waitForLeader(t, clusters)
	ttlDelay()
}

// Makes sure there is a leader and everyone knows about it.
func waitForLeader(t *testing.T, clusters []*Cluster) {
	if consensus == "crdt" {
		return // yai
	}
	ctx := context.Background()
	timer := time.NewTimer(time.Minute)
	ticker := time.NewTicker(100 * time.Millisecond)

loop:
	for {
		select {
		case <-timer.C:
			t.Fatal("timed out waiting for a leader")
		case <-ticker.C:
			for _, cl := range clusters {
				if cl.shutdownB {
					continue // skip shutdown clusters
				}
				_, err := cl.consensus.Leader(ctx)
				if err != nil {
					continue loop
				}
			}
			break loop
		}
	}
}

func waitForClustersHealthy(t *testing.T, clusters []*Cluster) {
	t.Helper()
	if len(clusters) == 0 {
		return
	}

	timer := time.NewTimer(15 * time.Second)
	for {
		ttlDelay()
		metrics := clusters[0].monitor.LatestMetrics(context.Background(), clusters[0].informers[0].Name())
		healthy := 0
		for _, m := range metrics {
			if !m.Expired() {
				healthy++
			}
		}
		if len(clusters) == healthy {
			return
		}

		select {
		case <-timer.C:
			t.Fatal("timed out waiting for clusters to be healthy")
		default:
		}
	}
}

/////////////////////////////////////////

func TestClustersVersion(t *testing.T) {
	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)
	f := func(t *testing.T, c *Cluster) {
		v := c.Version()
		if v != version.Version.String() {
			t.Error("Bad version")
		}
	}
	runF(t, clusters, f)
}

func TestClustersPeers(t *testing.T) {
	ctx := context.Background()
	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)

	delay()

	j := rand.Intn(nClusters) // choose a random cluster peer

	out := make(chan api.ID, len(clusters))
	clusters[j].Peers(ctx, out)

	if len(out) != nClusters {
		t.Fatal("expected as many peers as clusters")
	}

	clusterIDMap := make(map[peer.ID]api.ID)
	peerIDMap := make(map[peer.ID]api.ID)

	for _, c := range clusters {
		id := c.ID(ctx)
		clusterIDMap[id.ID] = id
	}

	for p := range out {
		if p.Error != "" {
			t.Error(p.ID, p.Error)
			continue
		}
		peerIDMap[p.ID] = p
	}

	for k, id := range clusterIDMap {
		id2, ok := peerIDMap[k]
		if !ok {
			t.Fatal("expected id in both maps")
		}
		//if !crypto.KeyEqual(id.PublicKey, id2.PublicKey) {
		//	t.Error("expected same public key")
		//}
		if id.IPFS.ID != id2.IPFS.ID {
			t.Error("expected same ipfs daemon ID")
		}
	}
}

func TestClustersPin(t *testing.T) {
	ctx := context.Background()
	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)
	prefix := test.Cid1.Prefix()

	ttlDelay()

	for i := 0; i < nPins; i++ {
		j := rand.Intn(nClusters)           // choose a random cluster peer
		h, err := prefix.Sum(randomBytes()) // create random cid
		if err != nil {
			t.Fatal(err)
		}
		_, err = clusters[j].Pin(ctx, h, api.PinOptions{})
		if err != nil {
			t.Errorf("error pinning %s: %s", h, err)
		}
		// // Test re-pin
		// err = clusters[j].Pin(ctx, api.PinCid(h))
		// if err != nil {
		// 	t.Errorf("error repinning %s: %s", h, err)
		// }
	}
	switch consensus {
	case "crdt":
		time.Sleep(10 * time.Second)
	default:
		delay()
	}
	fpinned := func(t *testing.T, c *Cluster) {
		out := make(chan api.PinInfo, 10)

		go func() {
			err := c.tracker.StatusAll(ctx, api.TrackerStatusUndefined, out)
			if err != nil {
				t.Error(err)
			}
		}()

		status := collectPinInfos(t, out)

		for _, v := range status {
			if v.Status != api.TrackerStatusPinned {
				t.Errorf("%s should have been pinned but it is %s", v.Cid, v.Status)
			}
		}
		if l := len(status); l != nPins {
			t.Errorf("Pinned %d out of %d requests", l, nPins)
		}
	}
	runF(t, clusters, fpinned)

	// Unpin everything
	pinList, err := clusters[0].pinsSlice(ctx)
	if err != nil {
		t.Fatal(err)
	}

	if len(pinList) != nPins {
		t.Fatalf("pin list has %d but pinned %d", len(pinList), nPins)
	}

	for i := 0; i < len(pinList); i++ {
		// test re-unpin fails
		j := rand.Intn(nClusters) // choose a random cluster peer
		_, err := clusters[j].Unpin(ctx, pinList[i].Cid)
		if err != nil {
			t.Errorf("error unpinning %s: %s", pinList[i].Cid, err)
		}
	}

	switch consensus {
	case "crdt":
		time.Sleep(10 * time.Second)
	default:
		delay()
	}

	for i := 0; i < len(pinList); i++ {
		j := rand.Intn(nClusters) // choose a random cluster peer
		_, err := clusters[j].Unpin(ctx, pinList[i].Cid)
		if err == nil {
			t.Errorf("expected error re-unpinning %s", pinList[i].Cid)
		}
	}

	delay()

	funpinned := func(t *testing.T, c *Cluster) {
		out := make(chan api.PinInfo)
		go func() {
			err := c.tracker.StatusAll(ctx, api.TrackerStatusUndefined, out)
			if err != nil {
				t.Error(err)
			}
		}()

		status := collectPinInfos(t, out)
		for _, v := range status {
			t.Errorf("%s should have been unpinned but it is %s", v.Cid, v.Status)
		}
	}
	runF(t, clusters, funpinned)
}

func TestClustersPinUpdate(t *testing.T) {
	ctx := context.Background()
	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)
	prefix := test.Cid1.Prefix()

	ttlDelay()

	h, _ := prefix.Sum(randomBytes())  // create random cid
	h2, _ := prefix.Sum(randomBytes()) // create random cid

	_, err := clusters[0].PinUpdate(ctx, h, h2, api.PinOptions{})
	if err == nil || err != state.ErrNotFound {
		t.Fatal("pin update should fail when from is not pinned")
	}

	_, err = clusters[0].Pin(ctx, h, api.PinOptions{})
	if err != nil {
		t.Errorf("error pinning %s: %s", h, err)
	}

	pinDelay()
	expiry := time.Now().AddDate(1, 0, 0)
	opts2 := api.PinOptions{
		UserAllocations: []peer.ID{clusters[0].host.ID()}, // should not be used
		PinUpdate:       h,
		Name:            "new name",
		ExpireAt:        expiry,
	}

	_, err = clusters[0].Pin(ctx, h2, opts2) // should call PinUpdate
	if err != nil {
		t.Errorf("error pin-updating %s: %s", h2, err)
	}

	pinDelay()

	f := func(t *testing.T, c *Cluster) {
		pinget, err := c.PinGet(ctx, h2)
		if err != nil {
			t.Fatal(err)
		}

		if len(pinget.Allocations) != 0 {
			t.Error("new pin should be allocated everywhere like pin1")
		}

		if pinget.MaxDepth != -1 {
			t.Error("updated pin should be recursive like pin1")
		}
		// We compare Unix seconds because our protobuf serde will have
		// lost any sub-second precision.
		if pinget.ExpireAt.Unix() != expiry.Unix() {
			t.Errorf("Expiry didn't match. Expected: %s. Got: %s", expiry, pinget.ExpireAt)
		}

		if pinget.Name != "new name" {
			t.Error("name should be kept")
		}
	}
	runF(t, clusters, f)
}

func TestClustersPinDirect(t *testing.T) {
	ctx := context.Background()
	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)
	prefix := test.Cid1.Prefix()

	ttlDelay()

	h, _ := prefix.Sum(randomBytes()) // create random cid

	_, err := clusters[0].Pin(ctx, h, api.PinOptions{Mode: api.PinModeDirect})
	if err != nil {
		t.Fatal(err)
	}

	pinDelay()

	f := func(t *testing.T, c *Cluster, mode api.PinMode) {
		pinget, err := c.PinGet(ctx, h)
		if err != nil {
			t.Fatal(err)
		}

		if pinget.Mode != mode {
			t.Error("pin should be pinned in direct mode")
		}

		if pinget.MaxDepth != mode.ToPinDepth() {
			t.Errorf("pin should have max-depth %d but has %d", mode.ToPinDepth(), pinget.MaxDepth)
		}

		pInfo := c.StatusLocal(ctx, h)
		if pInfo.Error != "" {
			t.Error(pInfo.Error)
		}
		if pInfo.Status != api.TrackerStatusPinned {
			t.Error(pInfo.Error)
			t.Error("the status should show the hash as pinned")
		}
	}

	runF(t, clusters, func(t *testing.T, c *Cluster) {
		f(t, c, api.PinModeDirect)
	})

	// Convert into a recursive mode
	_, err = clusters[0].Pin(ctx, h, api.PinOptions{Mode: api.PinModeRecursive})
	if err != nil {
		t.Fatal(err)
	}

	pinDelay()

	runF(t, clusters, func(t *testing.T, c *Cluster) {
		f(t, c, api.PinModeRecursive)
	})

	// This should fail as we cannot convert back to direct
	_, err = clusters[0].Pin(ctx, h, api.PinOptions{Mode: api.PinModeDirect})
	if err == nil {
		t.Error("a recursive pin cannot be converted back to direct pin")
	}
}

func TestClustersStatusAll(t *testing.T) {
	ctx := context.Background()
	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)
	h := test.Cid1
	clusters[0].Pin(ctx, h, api.PinOptions{Name: "test"})
	pinDelay()
	// Global status
	f := func(t *testing.T, c *Cluster) {
		out := make(chan api.GlobalPinInfo, 10)
		go func() {
			err := c.StatusAll(ctx, api.TrackerStatusUndefined, out)
			if err != nil {
				t.Error(err)
			}
		}()

		statuses := collectGlobalPinInfos(t, out, 5*time.Second)
		if len(statuses) != 1 {
			t.Fatal("bad status. Expected one item")
		}
		if !statuses[0].Cid.Equals(h) {
			t.Error("bad cid in status")
		}

		if statuses[0].Name != "test" {
			t.Error("globalPinInfo should have the name")
		}

		info := statuses[0].PeerMap
		if len(info) != nClusters {
			t.Error("bad info in status")
		}

		for _, pi := range info {
			if pi.IPFS != test.PeerID1 {
				t.Error("ipfs not set in pin status")
			}
		}

		pid := peer.Encode(c.host.ID())
		if info[pid].Status != api.TrackerStatusPinned {
			t.Error("the hash should have been pinned")
		}

		status, err := c.Status(ctx, h)
		if err != nil {
			t.Error(err)
		}

		pinfo, ok := status.PeerMap[pid]
		if !ok {
			t.Fatal("Host not in status")
		}

		if pinfo.Status != api.TrackerStatusPinned {
			t.Error(pinfo.Error)
			t.Error("the status should show the hash as pinned")
		}
	}
	runF(t, clusters, f)
}

func TestClustersStatusAllWithErrors(t *testing.T) {
	ctx := context.Background()
	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)
	h := test.Cid1
	clusters[0].Pin(ctx, h, api.PinOptions{Name: "test"})
	pinDelay()

	// shutdown 1 cluster peer
	clusters[1].Shutdown(ctx)
	clusters[1].host.Close()
	delay()

	f := func(t *testing.T, c *Cluster) {
		// skip if it's the shutdown peer
		if c.ID(ctx).ID == clusters[1].ID(ctx).ID {
			return
		}

		out := make(chan api.GlobalPinInfo, 10)
		go func() {
			err := c.StatusAll(ctx, api.TrackerStatusUndefined, out)
			if err != nil {
				t.Error(err)
			}
		}()

		statuses := collectGlobalPinInfos(t, out, 5*time.Second)

		if len(statuses) != 1 {
			t.Fatal("bad status. Expected one item")
		}

		if !statuses[0].Cid.Equals(h) {
			t.Error("wrong Cid in globalPinInfo")
		}

		if statuses[0].Name != "test" {
			t.Error("wrong Name in globalPinInfo")
		}

		// Raft and CRDT behave differently here
		switch consensus {
		case "raft":
			// Raft will have all statuses with one of them
			// being in ERROR because the peer is off

			stts := statuses[0]
			if len(stts.PeerMap) != nClusters {
				t.Error("bad number of peers in status")
			}

			pid := peer.Encode(clusters[1].id)
			errst := stts.PeerMap[pid]

			if errst.Status != api.TrackerStatusClusterError {
				t.Error("erroring status should be set to ClusterError:", errst.Status)
			}
			if errst.PeerName != "peer_1" {
				t.Error("peername should have been set in the erroring peer too from the cache")
			}

			if errst.IPFS != test.PeerID1 {
				t.Error("IPFS ID should have been set in the erroring peer too from the cache")
			}

			// now check with Cid status
			status, err := c.Status(ctx, h)
			if err != nil {
				t.Error(err)
			}

			pinfo := status.PeerMap[pid]

			if pinfo.Status != api.TrackerStatusClusterError {
				t.Error("erroring status should be ClusterError:", pinfo.Status)
			}

			if pinfo.PeerName != "peer_1" {
				t.Error("peername should have been set in the erroring peer too from the cache")
			}

			if pinfo.IPFS != test.PeerID1 {
				t.Error("IPFS ID should have been set in the erroring peer too from the cache")
			}
		case "crdt":
			// CRDT will not have contacted the offline peer because
			// its metric expired and therefore is not in the
			// peerset.
			if len(statuses[0].PeerMap) != nClusters-1 {
				t.Error("expected a different number of statuses")
			}
		default:
			t.Fatal("bad consensus")

		}

	}
	runF(t, clusters, f)
}

func TestClustersRecoverLocal(t *testing.T) {
	ctx := context.Background()
	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)
	h := test.ErrorCid // This cid always fails
	h2 := test.Cid2

	ttlDelay()

	clusters[0].Pin(ctx, h, api.PinOptions{})
	clusters[0].Pin(ctx, h2, api.PinOptions{})
	pinDelay()
	pinDelay()

	f := func(t *testing.T, c *Cluster) {
		_, err := c.RecoverLocal(ctx, h)
		if err != nil {
			t.Fatal(err)
		}
		// Wait for queue to be processed
		delay()

		info := c.StatusLocal(ctx, h)
		if info.Status != api.TrackerStatusPinError {
			t.Errorf("element is %s and not PinError", info.Status)
		}

		// Recover good ID
		info, _ = c.RecoverLocal(ctx, h2)
		if info.Status != api.TrackerStatusPinned {
			t.Error("element should be in Pinned state")
		}
	}
	// Test Local syncs
	runF(t, clusters, f)
}

func TestClustersRecover(t *testing.T) {
	ctx := context.Background()
	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)
	h := test.ErrorCid // This cid always fails
	h2 := test.Cid2

	ttlDelay()

	clusters[0].Pin(ctx, h, api.PinOptions{})
	clusters[0].Pin(ctx, h2, api.PinOptions{})

	pinDelay()
	pinDelay()

	j := rand.Intn(nClusters)
	ginfo, err := clusters[j].Recover(ctx, h)
	if err != nil {
		// we always attempt to return a valid response
		// with errors contained in GlobalPinInfo
		t.Fatal("did not expect an error")
	}
	if len(ginfo.PeerMap) != nClusters {
		t.Error("number of peers do not match")
	}
	// Wait for queue to be processed
	delay()

	ginfo, err = clusters[j].Status(ctx, h)
	if err != nil {
		t.Fatal(err)
	}

	pinfo, ok := ginfo.PeerMap[peer.Encode(clusters[j].host.ID())]
	if !ok {
		t.Fatal("should have info for this host")
	}
	if pinfo.Error == "" {
		t.Error("pinInfo error should not be empty")
	}

	for _, c := range clusters {
		inf, ok := ginfo.PeerMap[peer.Encode(c.host.ID())]
		if !ok {
			t.Fatal("GlobalPinInfo should not be empty for this host")
		}

		if inf.Status != api.TrackerStatusPinError {
			t.Logf("%+v", inf)
			t.Error("should be PinError in all peers")
		}
	}

	// Test with a good Cid
	j = rand.Intn(nClusters)
	ginfo, err = clusters[j].Recover(ctx, h2)
	if err != nil {
		t.Fatal(err)
	}
	if !ginfo.Cid.Equals(h2) {
		t.Error("GlobalPinInfo should be for testrCid2")
	}
	if len(ginfo.PeerMap) != nClusters {
		t.Error("number of peers do not match")
	}

	for _, c := range clusters {
		inf, ok := ginfo.PeerMap[peer.Encode(c.host.ID())]
		if !ok {
			t.Fatal("GlobalPinInfo should have this cluster")
		}
		if inf.Status != api.TrackerStatusPinned {
			t.Error("the GlobalPinInfo should show Pinned in all peers")
		}
	}
}

func TestClustersRecoverAll(t *testing.T) {
	ctx := context.Background()
	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)
	h1 := test.Cid1
	hError := test.ErrorCid

	ttlDelay()

	clusters[0].Pin(ctx, h1, api.PinOptions{})
	clusters[0].Pin(ctx, hError, api.PinOptions{})

	pinDelay()

	out := make(chan api.GlobalPinInfo)
	go func() {
		err := clusters[rand.Intn(nClusters)].RecoverAll(ctx, out)
		if err != nil {
			t.Error(err)
		}
	}()

	gInfos := collectGlobalPinInfos(t, out, 5*time.Second)

	if len(gInfos) != 1 {
		t.Error("expected one items")
	}

	for _, gInfo := range gInfos {
		if len(gInfo.PeerMap) != nClusters {
			t.Error("number of peers do not match")
		}
	}
}

func TestClustersShutdown(t *testing.T) {
	ctx := context.Background()
	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)

	f := func(t *testing.T, c *Cluster) {
		err := c.Shutdown(ctx)
		if err != nil {
			t.Error("should be able to shutdown cleanly")
		}
	}
	// Shutdown 3 times
	runF(t, clusters, f)
	runF(t, clusters, f)
	runF(t, clusters, f)
}

func TestClustersReplicationOverall(t *testing.T) {
	ctx := context.Background()
	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)
	for _, c := range clusters {
		c.config.ReplicationFactorMin = nClusters - 1
		c.config.ReplicationFactorMax = nClusters - 1
	}

	// Why is replication factor nClusters - 1?
	// Because that way we know that pinning nCluster
	// pins with an strategy like numpins/disk
	// will result in each peer holding locally exactly
	// nCluster pins.

	prefix := test.Cid1.Prefix()

	for i := 0; i < nClusters; i++ {
		// Pick a random cluster and hash
		j := rand.Intn(nClusters)           // choose a random cluster peer
		h, err := prefix.Sum(randomBytes()) // create random cid
		if err != nil {
			t.Fatal(err)
		}
		_, err = clusters[j].Pin(ctx, h, api.PinOptions{})
		if err != nil {
			t.Error(err)
		}
		pinDelay()

		// check that it is held by exactly nClusters - 1 peers
		gpi, err := clusters[j].Status(ctx, h)
		if err != nil {
			t.Fatal(err)
		}

		numLocal := 0
		numRemote := 0
		for _, v := range gpi.PeerMap {
			if v.Status == api.TrackerStatusPinned {
				numLocal++
			} else if v.Status == api.TrackerStatusRemote {
				numRemote++
			}
		}
		if numLocal != nClusters-1 {
			t.Errorf(
				"We wanted replication %d but it's only %d",
				nClusters-1,
				numLocal,
			)
		}

		if numRemote != 1 {
			t.Errorf("We wanted 1 peer track as remote but %d do", numRemote)
		}
		ttlDelay()
	}

	f := func(t *testing.T, c *Cluster) {
		// confirm that the pintracker state matches the current global state
		out := make(chan api.PinInfo, 100)

		go func() {
			err := c.tracker.StatusAll(ctx, api.TrackerStatusUndefined, out)
			if err != nil {
				t.Error(err)
			}
		}()
		pinfos := collectPinInfos(t, out)
		if len(pinfos) != nClusters {
			t.Error("Pinfos does not have the expected pins")
		}

		numRemote := 0
		numLocal := 0
		for _, pi := range pinfos {
			switch pi.Status {
			case api.TrackerStatusPinned:
				numLocal++

			case api.TrackerStatusRemote:
				numRemote++
			}
		}
		if numLocal != nClusters-1 {
			t.Errorf("%s: Expected %d local pins but got %d", c.id.String(), nClusters-1, numLocal)
		}

		if numRemote != 1 {
			t.Errorf("%s: Expected 1 remote pin but got %d", c.id.String(), numRemote)
		}

		outPins := make(chan api.Pin)
		go func() {
			err := c.Pins(ctx, outPins)
			if err != nil {
				t.Error(err)
			}
		}()
		for pin := range outPins {
			allocs := pin.Allocations
			if len(allocs) != nClusters-1 {
				t.Errorf("Allocations are [%s]", allocs)
			}
			for _, a := range allocs {
				if a == c.id {
					pinfo := c.tracker.Status(ctx, pin.Cid)
					if pinfo.Status != api.TrackerStatusPinned {
						t.Errorf("Peer %s was allocated but it is not pinning cid", c.id)
					}
				}
			}
		}
	}

	runF(t, clusters, f)
}

// This test checks that we pin with ReplicationFactorMax when
// we can
func TestClustersReplicationFactorMax(t *testing.T) {
	ctx := context.Background()
	if nClusters < 3 {
		t.Skip("Need at least 3 peers")
	}

	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)
	for _, c := range clusters {
		c.config.ReplicationFactorMin = 1
		c.config.ReplicationFactorMax = nClusters - 1
	}

	ttlDelay()

	h := test.Cid1
	_, err := clusters[0].Pin(ctx, h, api.PinOptions{})
	if err != nil {
		t.Fatal(err)
	}

	pinDelay()

	f := func(t *testing.T, c *Cluster) {
		p, err := c.PinGet(ctx, h)
		if err != nil {
			t.Fatal(err)
		}

		if len(p.Allocations) != nClusters-1 {
			t.Error("should have pinned nClusters - 1 allocations")
		}

		if p.ReplicationFactorMin != 1 {
			t.Error("rplMin should be 1")
		}

		if p.ReplicationFactorMax != nClusters-1 {
			t.Error("rplMax should be nClusters-1")
		}
	}
	runF(t, clusters, f)
}

// This tests checks that repinning something that is overpinned
// removes some allocations
func TestClustersReplicationFactorMaxLower(t *testing.T) {
	ctx := context.Background()
	if nClusters < 5 {
		t.Skip("Need at least 5 peers")
	}

	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)
	for _, c := range clusters {
		c.config.ReplicationFactorMin = 1
		c.config.ReplicationFactorMax = nClusters
	}

	ttlDelay() // make sure we have places to pin

	h := test.Cid1
	_, err := clusters[0].Pin(ctx, h, api.PinOptions{})
	if err != nil {
		t.Fatal(err)
	}

	pinDelay()

	p1, err := clusters[0].PinGet(ctx, h)
	if err != nil {
		t.Fatal(err)
	}

	if len(p1.Allocations) != nClusters {
		t.Fatal("allocations should be nClusters")
	}

	opts := api.PinOptions{
		ReplicationFactorMin: 1,
		ReplicationFactorMax: 2,
	}
	_, err = clusters[0].Pin(ctx, h, opts)
	if err != nil {
		t.Fatal(err)
	}

	pinDelay()

	p2, err := clusters[0].PinGet(ctx, h)
	if err != nil {
		t.Fatal(err)
	}

	if len(p2.Allocations) != 2 {
		t.Fatal("allocations should have been reduced to 2")
	}
}

// This test checks that when not all nodes are available,
// we pin in as many as we can aiming for ReplicationFactorMax
func TestClustersReplicationFactorInBetween(t *testing.T) {
	ctx := context.Background()
	if nClusters < 5 {
		t.Skip("Need at least 5 peers")
	}

	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)
	for _, c := range clusters {
		c.config.ReplicationFactorMin = 1
		c.config.ReplicationFactorMax = nClusters
	}

	ttlDelay()

	// Shutdown two peers
	clusters[nClusters-1].Shutdown(ctx)
	clusters[nClusters-2].Shutdown(ctx)

	waitForLeaderAndMetrics(t, clusters)

	h := test.Cid1
	_, err := clusters[0].Pin(ctx, h, api.PinOptions{})
	if err != nil {
		t.Fatal(err)
	}

	pinDelay()

	f := func(t *testing.T, c *Cluster) {
		if c == clusters[nClusters-1] || c == clusters[nClusters-2] {
			return
		}
		p, err := c.PinGet(ctx, h)
		if err != nil {
			t.Fatal(err)
		}

		if len(p.Allocations) != nClusters-2 {
			t.Error("should have pinned nClusters-2 allocations")
		}

		if p.ReplicationFactorMin != 1 {
			t.Error("rplMin should be 1")
		}

		if p.ReplicationFactorMax != nClusters {
			t.Error("rplMax should be nClusters")
		}
	}
	runF(t, clusters, f)
}

// This test checks that we do not pin something for which
// we cannot reach ReplicationFactorMin
func TestClustersReplicationFactorMin(t *testing.T) {
	ctx := context.Background()
	if nClusters < 5 {
		t.Skip("Need at least 5 peers")
	}

	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)
	for _, c := range clusters {
		c.config.ReplicationFactorMin = nClusters - 1
		c.config.ReplicationFactorMax = nClusters
	}

	// Shutdown two peers
	clusters[nClusters-1].Shutdown(ctx)
	waitForLeaderAndMetrics(t, clusters)
	clusters[nClusters-2].Shutdown(ctx)
	waitForLeaderAndMetrics(t, clusters)

	h := test.Cid1
	_, err := clusters[0].Pin(ctx, h, api.PinOptions{})
	if err == nil {
		t.Error("Pin should have failed as rplMin cannot be satisfied")
	}
	t.Log(err)
	if !strings.Contains(err.Error(), "not enough peers to allocate CID") {
		t.Fatal(err)
	}
}

// This tests checks that repinning something that has becomed
// underpinned actually changes nothing if it's sufficiently pinned
func TestClustersReplicationMinMaxNoRealloc(t *testing.T) {
	ctx := context.Background()
	if nClusters < 5 {
		t.Skip("Need at least 5 peers")
	}

	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)
	for _, c := range clusters {
		c.config.ReplicationFactorMin = 1
		c.config.ReplicationFactorMax = nClusters
	}

	ttlDelay()

	h := test.Cid1
	_, err := clusters[0].Pin(ctx, h, api.PinOptions{})
	if err != nil {
		t.Fatal(err)
	}

	pinDelay()

	// Shutdown two peers
	clusters[nClusters-1].Shutdown(ctx)
	waitForLeaderAndMetrics(t, clusters)
	clusters[nClusters-2].Shutdown(ctx)
	waitForLeaderAndMetrics(t, clusters)

	_, err = clusters[0].Pin(ctx, h, api.PinOptions{})
	if err != nil {
		t.Fatal(err)
	}

	pinDelay()

	p, err := clusters[0].PinGet(ctx, h)
	if err != nil {
		t.Fatal(err)
	}

	if len(p.Allocations) != nClusters {
		t.Error("allocations should still be nCluster even if not all available")
	}

	if p.ReplicationFactorMax != nClusters {
		t.Error("rplMax should have not changed")
	}
}

// This test checks that repinning something that has becomed
// underpinned does re-allocations when it's not sufficiently
// pinned anymore.
// FIXME: The manual repin only works if the pin options changed.
func TestClustersReplicationMinMaxRealloc(t *testing.T) {
	ctx := context.Background()
	if nClusters < 5 {
		t.Skip("Need at least 5 peers")
	}

	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)
	for _, c := range clusters {
		c.config.ReplicationFactorMin = 3
		c.config.ReplicationFactorMax = 4
	}

	ttlDelay() // make sure metrics are in

	h := test.Cid1
	_, err := clusters[0].Pin(ctx, h, api.PinOptions{
		Name: "a",
	})
	if err != nil {
		t.Fatal(err)
	}

	pinDelay()

	p, err := clusters[0].PinGet(ctx, h)
	if err != nil {
		t.Fatal(err)
	}

	firstAllocations := p.Allocations

	peerIDMap := make(map[peer.ID]*Cluster)
	for _, a := range clusters {
		peerIDMap[a.id] = a
	}

	// kill two of the allocations
	// Only the first allocated peer (or the second if the first is
	// alerting) will automatically repin.
	alloc1 := peerIDMap[firstAllocations[1]]
	alloc2 := peerIDMap[firstAllocations[2]]
	safePeer := peerIDMap[firstAllocations[0]]

	alloc1.Shutdown(ctx)
	alloc2.Shutdown(ctx)

	waitForLeaderAndMetrics(t, clusters)

	// Repin - (although this should have been taken of as alerts
	// happen for the shutdown nodes. We force re-allocation by
	// changing the name.
	_, err = safePeer.Pin(ctx, h, api.PinOptions{
		Name: "b",
	})
	if err != nil {
		t.Fatal(err)
	}

	pinDelay()

	p, err = safePeer.PinGet(ctx, h)
	if err != nil {
		t.Fatal(err)
	}

	secondAllocations := p.Allocations

	strings1 := api.PeersToStrings(firstAllocations)
	strings2 := api.PeersToStrings(secondAllocations)
	sort.Strings(strings1)
	sort.Strings(strings2)
	t.Logf("Allocs1: %s", strings1)
	t.Logf("Allocs2: %s", strings2)

	if fmt.Sprintf("%s", strings1) == fmt.Sprintf("%s", strings2) {
		t.Error("allocations should have changed")
	}

	lenSA := len(secondAllocations)
	expected := minInt(nClusters-2, 4)
	if lenSA != expected {
		t.Errorf("Insufficient reallocation, could have allocated to %d peers but instead only allocated to %d peers", expected, lenSA)
	}

	if lenSA < 3 {
		t.Error("allocations should be more than rplMin")
	}
}

// In this test we check that repinning something
// when a node has gone down will re-assign the pin
func TestClustersReplicationRealloc(t *testing.T) {
	ctx := context.Background()
	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)
	for _, c := range clusters {
		c.config.ReplicationFactorMin = nClusters - 1
		c.config.ReplicationFactorMax = nClusters - 1
	}

	ttlDelay()

	j := rand.Intn(nClusters)
	h := test.Cid1
	_, err := clusters[j].Pin(ctx, h, api.PinOptions{})
	if err != nil {
		t.Fatal(err)
	}

	// Let the pin arrive
	pinDelay()

	pinList, err := clusters[j].pinsSlice(ctx)
	if err != nil {
		t.Fatal(err)
	}
	pin := pinList[0]
	allocs := sort.StringSlice(api.PeersToStrings(pin.Allocations))
	allocs.Sort()
	allocsStr := fmt.Sprintf("%s", allocs)

	// Re-pin should work and be allocated to the same
	// nodes
	_, err = clusters[j].Pin(ctx, h, api.PinOptions{})
	if err != nil {
		t.Fatal(err)
	}

	pinDelay()

	pinList2, err := clusters[j].pinsSlice(ctx)
	if err != nil {
		t.Fatal(err)
	}
	pin2 := pinList2[0]
	allocs2 := sort.StringSlice(api.PeersToStrings(pin2.Allocations))
	allocs2.Sort()
	allocsStr2 := fmt.Sprintf("%s", allocs2)
	if allocsStr != allocsStr2 {
		t.Fatal("allocations changed without reason")
	}
	//t.Log(allocsStr)
	//t.Log(allocsStr2)

	var killedClusterIndex int
	// find someone that pinned it and kill that cluster
	for i, c := range clusters {
		pinfo := c.tracker.Status(ctx, h)
		if pinfo.Status == api.TrackerStatusPinned {
			//t.Logf("Killing %s", c.id.Pretty())
			killedClusterIndex = i
			t.Logf("Shutting down %s", c.ID(ctx).ID)
			c.Shutdown(ctx)
			break
		}
	}

	// let metrics expire and give time for the cluster to
	// see if they have lost the leader
	waitForLeaderAndMetrics(t, clusters)

	// Make sure we haven't killed our randomly
	// selected cluster
	for j == killedClusterIndex {
		j = rand.Intn(nClusters)
	}

	// now pin should succeed
	_, err = clusters[j].Pin(ctx, h, api.PinOptions{})
	if err != nil {
		t.Fatal(err)
	}

	pinDelay()

	numPinned := 0
	for i, c := range clusters {
		if i == killedClusterIndex {
			continue
		}
		pinfo := c.tracker.Status(ctx, h)
		if pinfo.Status == api.TrackerStatusPinned {
			//t.Log(pinfo.Peer.Pretty())
			numPinned++
		}
	}

	if numPinned != nClusters-1 {
		t.Error("pin should have been correctly re-assigned")
	}
}

// In this test we try to pin something when there are not
// as many available peers a we need. It's like before, except
// more peers are killed.
func TestClustersReplicationNotEnoughPeers(t *testing.T) {
	ctx := context.Background()
	if nClusters < 5 {
		t.Skip("Need at least 5 peers")
	}
	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)
	for _, c := range clusters {
		c.config.ReplicationFactorMin = nClusters - 1
		c.config.ReplicationFactorMax = nClusters - 1
	}

	ttlDelay()

	j := rand.Intn(nClusters)
	_, err := clusters[j].Pin(ctx, test.Cid1, api.PinOptions{})
	if err != nil {
		t.Fatal(err)
	}

	// Let the pin arrive
	pinDelay()

	clusters[0].Shutdown(ctx)
	clusters[1].Shutdown(ctx)

	waitForLeaderAndMetrics(t, clusters)

	_, err = clusters[2].Pin(ctx, test.Cid2, api.PinOptions{})
	if err == nil {
		t.Fatal("expected an error")
	}
	if !strings.Contains(err.Error(), "not enough peers to allocate") {
		t.Error("different error than expected")
		t.Error(err)
	}
	//t.Log(err)
}

func TestClustersRebalanceOnPeerDown(t *testing.T) {
	ctx := context.Background()
	if nClusters < 5 {
		t.Skip("Need at least 5 peers")
	}

	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)
	for _, c := range clusters {
		c.config.ReplicationFactorMin = nClusters - 1
		c.config.ReplicationFactorMax = nClusters - 1
	}

	// pin something
	h := test.Cid1
	clusters[0].Pin(ctx, h, api.PinOptions{})
	pinDelay()
	pinLocal := 0
	pinRemote := 0
	var localPinner string
	var remotePinner string
	var remotePinnerCluster *Cluster

	status, _ := clusters[0].Status(ctx, h)

	// check it was correctly pinned
	for p, pinfo := range status.PeerMap {
		if pinfo.Status == api.TrackerStatusPinned {
			pinLocal++
			localPinner = p
		} else if pinfo.Status == api.TrackerStatusRemote {
			pinRemote++
			remotePinner = p
		}
	}

	if pinLocal != nClusters-1 || pinRemote != 1 {
		t.Fatal("Not pinned as expected")
	}

	// kill the local pinner
	for _, c := range clusters {
		clid := peer.Encode(c.id)
		if clid == localPinner {
			c.Shutdown(ctx)
		} else if clid == remotePinner {
			remotePinnerCluster = c
		}
	}

	delay()
	waitForLeaderAndMetrics(t, clusters) // in case we killed the leader

	// It should be now pinned in the remote pinner
	if s := remotePinnerCluster.tracker.Status(ctx, h).Status; s != api.TrackerStatusPinned {
		t.Errorf("it should be pinned and is %s", s)
	}
}

// Helper function for verifying cluster graph. Will only pass if exactly the
// peers in clusterIDs are fully connected to each other and the expected ipfs
// mock connectivity exists. Cluster peers not in clusterIDs are assumed to
// be disconnected and the graph should reflect this
func validateClusterGraph(t *testing.T, graph api.ConnectGraph, clusterIDs map[string]struct{}, peerNum int) {
	// Check that all cluster peers see each other as peers
	for id1, peers := range graph.ClusterLinks {
		if _, ok := clusterIDs[id1]; !ok {
			if len(peers) != 0 {
				t.Errorf("disconnected peer %s is still connected in graph", id1)
			}
			continue
		}
		t.Logf("id: %s, peers: %v\n", id1, peers)
		if len(peers) > len(clusterIDs)-1 {
			t.Errorf("More peers recorded in graph than expected")
		}
		// Make lookup index for peers connected to id1
		peerIndex := make(map[string]struct{})
		for _, p := range peers {
			peerIndex[peer.Encode(p)] = struct{}{}
		}
		for id2 := range clusterIDs {
			if _, ok := peerIndex[id2]; id1 != id2 && !ok {
				t.Errorf("Expected graph to see peer %s connected to peer %s", id1, id2)
			}
		}
	}
	if len(graph.ClusterLinks) != peerNum {
		t.Errorf("Unexpected number of cluster nodes in graph")
	}

	// Check that all cluster peers are recorded as nodes in the graph
	for id := range clusterIDs {
		if _, ok := graph.ClusterLinks[id]; !ok {
			t.Errorf("Expected graph to record peer %s as a node", id)
		}
	}

	if len(graph.ClusterTrustLinks) != peerNum {
		t.Errorf("Unexpected number of trust links in graph")
	}

	// Check that the mocked ipfs swarm is recorded
	if len(graph.IPFSLinks) != 1 {
		t.Error("Expected exactly one ipfs peer for all cluster nodes, the mocked peer")
	}
	links, ok := graph.IPFSLinks[peer.Encode(test.PeerID1)]
	if !ok {
		t.Error("Expected the mocked ipfs peer to be a node in the graph")
	} else {
		if len(links) != 2 || links[0] != test.PeerID4 ||
			links[1] != test.PeerID5 {
			t.Error("Swarm peers of mocked ipfs are not those expected")
		}
	}

	// Check that the cluster to ipfs connections are all recorded
	for id := range clusterIDs {
		if ipfsID, ok := graph.ClustertoIPFS[id]; !ok {
			t.Errorf("Expected graph to record peer %s's ipfs connection", id)
		} else {
			if ipfsID != test.PeerID1 {
				t.Errorf("Unexpected error %s", ipfsID)
			}
		}
	}
	if len(graph.ClustertoIPFS) > len(clusterIDs) {
		t.Error("More cluster to ipfs links recorded in graph than expected")
	}
}

// In this test we get a cluster graph report from a random peer in a healthy
// fully connected cluster and verify that it is formed as expected.
func TestClustersGraphConnected(t *testing.T) {
	ctx := context.Background()
	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)

	ttlDelay()

	j := rand.Intn(nClusters) // choose a random cluster peer to query
	graph, err := clusters[j].ConnectGraph()
	if err != nil {
		t.Fatal(err)
	}

	clusterIDs := make(map[string]struct{})
	for _, c := range clusters {
		id := peer.Encode(c.ID(ctx).ID)
		clusterIDs[id] = struct{}{}
	}
	validateClusterGraph(t, graph, clusterIDs, nClusters)
}

// Similar to the previous test we get a cluster graph report from a peer.
// However now 2 peers have been shutdown and so we do not expect to see
// them in the graph
func TestClustersGraphUnhealthy(t *testing.T) {
	ctx := context.Background()
	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)
	if nClusters < 5 {
		t.Skip("Need at least 5 peers")
	}

	j := rand.Intn(nClusters) // choose a random cluster peer to query
	// chose the clusters to shutdown
	discon1 := -1
	discon2 := -1
	for i := range clusters {
		if i != j {
			if discon1 == -1 {
				discon1 = i
			} else {
				discon2 = i
				break
			}
		}
	}

	clusters[discon1].Shutdown(ctx)
	clusters[discon1].host.Close()
	clusters[discon2].Shutdown(ctx)
	clusters[discon2].host.Close()

	waitForLeaderAndMetrics(t, clusters)

	graph, err := clusters[j].ConnectGraph()
	if err != nil {
		t.Fatal(err)
	}

	clusterIDs := make(map[string]struct{})
	for i, c := range clusters {
		if i == discon1 || i == discon2 {
			continue
		}
		id := peer.Encode(c.ID(ctx).ID)
		clusterIDs[id] = struct{}{}
	}
	peerNum := nClusters
	switch consensus {
	case "crdt":
		peerNum = nClusters - 2
	}

	validateClusterGraph(t, graph, clusterIDs, peerNum)
}

// Check that the pin is not re-assigned when a node
// that has disabled repinning goes down.
func TestClustersDisabledRepinning(t *testing.T) {
	ctx := context.Background()
	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)
	for _, c := range clusters {
		c.config.ReplicationFactorMin = nClusters - 1
		c.config.ReplicationFactorMax = nClusters - 1
		c.config.DisableRepinning = true
	}

	ttlDelay()

	j := rand.Intn(nClusters)
	h := test.Cid1
	_, err := clusters[j].Pin(ctx, h, api.PinOptions{})
	if err != nil {
		t.Fatal(err)
	}

	// Let the pin arrive
	pinDelay()

	var killedClusterIndex int
	// find someone that pinned it and kill that cluster
	for i, c := range clusters {
		pinfo := c.tracker.Status(ctx, h)
		if pinfo.Status == api.TrackerStatusPinned {
			killedClusterIndex = i
			t.Logf("Shutting down %s", c.ID(ctx).ID)
			c.Shutdown(ctx)
			break
		}
	}

	// let metrics expire and give time for the cluster to
	// see if they have lost the leader
	waitForLeaderAndMetrics(t, clusters)

	// Make sure we haven't killed our randomly
	// selected cluster
	for j == killedClusterIndex {
		j = rand.Intn(nClusters)
	}

	numPinned := 0
	for i, c := range clusters {
		if i == killedClusterIndex {
			continue
		}
		pinfo := c.tracker.Status(ctx, h)
		if pinfo.Status == api.TrackerStatusPinned {
			//t.Log(pinfo.Peer.Pretty())
			numPinned++
		}
	}

	if numPinned != nClusters-2 {
		t.Errorf("expected %d replicas for pin, got %d", nClusters-2, numPinned)
	}
}

func TestRepoGC(t *testing.T) {
	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)
	f := func(t *testing.T, c *Cluster) {
		gRepoGC, err := c.RepoGC(context.Background())
		if err != nil {
			t.Fatal("gc should have worked:", err)
		}

		if gRepoGC.PeerMap == nil {
			t.Fatal("expected a non-nil peer map")
		}

		if len(gRepoGC.PeerMap) != nClusters {
			t.Errorf("expected repo gc information for %d peer", nClusters)
		}
		for _, repoGC := range gRepoGC.PeerMap {
			testRepoGC(t, repoGC)
		}
	}

	runF(t, clusters, f)
}

func TestClustersFollowerMode(t *testing.T) {
	ctx := context.Background()
	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)

	_, err := clusters[0].Pin(ctx, test.Cid1, api.PinOptions{})
	if err != nil {
		t.Fatal(err)
	}
	_, err = clusters[0].Pin(ctx, test.ErrorCid, api.PinOptions{})
	if err != nil {
		t.Fatal(err)
	}

	// Let the pins arrive
	pinDelay()

	// Set Cluster1 to follower mode
	clusters[1].config.FollowerMode = true

	t.Run("follower cannot pin", func(t *testing.T) {
		_, err := clusters[1].PinPath(ctx, "/ipfs/"+test.Cid2.String(), api.PinOptions{})
		if err != errFollowerMode {
			t.Error("expected follower mode error")
		}
		_, err = clusters[1].Pin(ctx, test.Cid2, api.PinOptions{})
		if err != errFollowerMode {
			t.Error("expected follower mode error")
		}
	})

	t.Run("follower cannot unpin", func(t *testing.T) {
		_, err := clusters[1].UnpinPath(ctx, "/ipfs/"+test.Cid1.String())
		if err != errFollowerMode {
			t.Error("expected follower mode error")
		}
		_, err = clusters[1].Unpin(ctx, test.Cid1)
		if err != errFollowerMode {
			t.Error("expected follower mode error")
		}
	})

	t.Run("follower cannot add", func(t *testing.T) {
		sth := test.NewShardingTestHelper()
		defer sth.Clean(t)
		params := api.DefaultAddParams()
		params.Shard = false
		params.Name = "testlocal"
		mfr, closer := sth.GetTreeMultiReader(t)
		defer closer.Close()
		r := multipart.NewReader(mfr, mfr.Boundary())
		_, err = clusters[1].AddFile(r, params)
		if err != errFollowerMode {
			t.Error("expected follower mode error")
		}
	})

	t.Run("follower status itself only", func(t *testing.T) {
		gpi, err := clusters[1].Status(ctx, test.Cid1)
		if err != nil {
			t.Error("status should work")
		}
		if len(gpi.PeerMap) != 1 {
			t.Fatal("globalPinInfo should only have one peer")
		}
	})
}

func TestClusterPinsWithExpiration(t *testing.T) {
	ctx := context.Background()

	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)

	ttlDelay()

	cl := clusters[rand.Intn(nClusters)] // choose a random cluster peer to query

	c := test.Cid1
	expireIn := 1 * time.Second
	opts := api.PinOptions{
		ExpireAt: time.Now().Add(expireIn),
	}
	_, err := cl.Pin(ctx, c, opts)
	if err != nil {
		t.Fatal("pin should have worked:", err)
	}

	pinDelay()

	pins, err := cl.pinsSlice(ctx)
	if err != nil {
		t.Fatal(err)
	}
	if len(pins) != 1 {
		t.Error("pin should be part of the state")
	}

	// wait till expiry time
	time.Sleep(expireIn)

	// manually call state sync on all peers, so we don't have to wait till
	// state sync interval
	for _, c := range clusters {
		err = c.StateSync(ctx)
		if err != nil {
			t.Error(err)
		}
	}

	pinDelay()

	// state sync should have unpinned expired pin
	pins, err = cl.pinsSlice(ctx)
	if err != nil {
		t.Fatal(err)
	}
	if len(pins) != 0 {
		t.Error("pin should not be part of the state")
	}
}

func TestClusterAlerts(t *testing.T) {
	ctx := context.Background()
	clusters, mock := createClusters(t)
	defer shutdownClusters(t, clusters, mock)

	if len(clusters) < 2 {
		t.Skip("need at least 2 nodes for this test")
	}

	ttlDelay()

	for _, c := range clusters[1:] {
		c.Shutdown(ctx)
	}

	ttlDelay()

	alerts := clusters[0].Alerts()
	if len(alerts) == 0 {
		t.Error("expected at least one alert")
	}
}