1d98538411
This commit fixes #810 and adds block streaming to the final destinations when adding. This should add major performance gains when adding data to clusters. Before, everytime cluster issued a block, it was broadcasted individually to all destinations (new libp2p stream), where it was block/put to IPFS (a single block/put http roundtrip per block). Now, blocks are streamed all the way from the adder module to the ipfs daemon, by making every block as it arrives a single part in a multipart block/put request. Before, block-broadcast needed to wait for all destinations to finish in order to process the next block. Now, buffers allow some destinations to be faster than others while sending and receiving blocks. Before, if a block put request failed to be broadcasted everywhere, an error would happen at that moment. Now, we keep streaming until the end and only then report any errors. The operation succeeds as long as at least one stream finished successfully. Errors block/putting to IPFS will not abort streams. Instead, subsequent blocks are retried with a new request, although the method will return an error when the stream finishes if there were errors at any point.
299 lines
7.5 KiB
Go
299 lines
7.5 KiB
Go
package ipfscluster
|
|
|
|
// This files has tests for Add* using multiple cluster peers.
|
|
|
|
import (
|
|
"context"
|
|
"mime/multipart"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
files "github.com/ipfs/go-ipfs-files"
|
|
"github.com/ipfs/ipfs-cluster/adder"
|
|
"github.com/ipfs/ipfs-cluster/api"
|
|
"github.com/ipfs/ipfs-cluster/test"
|
|
peer "github.com/libp2p/go-libp2p-core/peer"
|
|
)
|
|
|
|
func TestAdd(t *testing.T) {
|
|
ctx := context.Background()
|
|
clusters, mock := createClusters(t)
|
|
defer shutdownClusters(t, clusters, mock)
|
|
sth := test.NewShardingTestHelper()
|
|
defer sth.Clean(t)
|
|
|
|
waitForLeaderAndMetrics(t, clusters)
|
|
|
|
t.Run("default", func(t *testing.T) {
|
|
params := api.DefaultAddParams()
|
|
params.Shard = false
|
|
params.Name = "testlocal"
|
|
mfr, closer := sth.GetTreeMultiReader(t)
|
|
defer closer.Close()
|
|
r := multipart.NewReader(mfr, mfr.Boundary())
|
|
ci, err := clusters[0].AddFile(context.Background(), r, params)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if ci.String() != test.ShardingDirBalancedRootCID {
|
|
t.Fatal("unexpected root CID for local add")
|
|
}
|
|
|
|
// We need to sleep a lot because it takes time to
|
|
// catch up on a first/single pin on crdts
|
|
time.Sleep(10 * time.Second)
|
|
|
|
f := func(t *testing.T, c *Cluster) {
|
|
pin := c.StatusLocal(ctx, ci)
|
|
if pin.Error != "" {
|
|
t.Error(pin.Error)
|
|
}
|
|
if pin.Status != api.TrackerStatusPinned {
|
|
t.Error("item should be pinned and is", pin.Status)
|
|
}
|
|
}
|
|
|
|
runF(t, clusters, f)
|
|
})
|
|
|
|
t.Run("local_one_allocation", func(t *testing.T) {
|
|
params := api.DefaultAddParams()
|
|
params.Shard = false
|
|
params.Name = "testlocal"
|
|
params.ReplicationFactorMin = 1
|
|
params.ReplicationFactorMax = 1
|
|
params.Local = true
|
|
mfr, closer := sth.GetTreeMultiReader(t)
|
|
defer closer.Close()
|
|
r := multipart.NewReader(mfr, mfr.Boundary())
|
|
ci, err := clusters[2].AddFile(context.Background(), r, params)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if ci.String() != test.ShardingDirBalancedRootCID {
|
|
t.Fatal("unexpected root CID for local add")
|
|
}
|
|
|
|
// We need to sleep a lot because it takes time to
|
|
// catch up on a first/single pin on crdts
|
|
time.Sleep(10 * time.Second)
|
|
|
|
f := func(t *testing.T, c *Cluster) {
|
|
pin := c.StatusLocal(ctx, ci)
|
|
if pin.Error != "" {
|
|
t.Error(pin.Error)
|
|
}
|
|
switch c.id {
|
|
case clusters[2].id:
|
|
if pin.Status != api.TrackerStatusPinned {
|
|
t.Error("item should be pinned and is", pin.Status)
|
|
}
|
|
default:
|
|
if pin.Status != api.TrackerStatusRemote {
|
|
t.Errorf("item should only be allocated to cluster2")
|
|
}
|
|
}
|
|
}
|
|
|
|
runF(t, clusters, f)
|
|
})
|
|
}
|
|
|
|
func TestAddWithUserAllocations(t *testing.T) {
|
|
ctx := context.Background()
|
|
clusters, mock := createClusters(t)
|
|
defer shutdownClusters(t, clusters, mock)
|
|
sth := test.NewShardingTestHelper()
|
|
defer sth.Clean(t)
|
|
|
|
waitForLeaderAndMetrics(t, clusters)
|
|
|
|
t.Run("local", func(t *testing.T) {
|
|
params := api.DefaultAddParams()
|
|
params.ReplicationFactorMin = 2
|
|
params.ReplicationFactorMax = 2
|
|
params.UserAllocations = []peer.ID{clusters[0].id, clusters[1].id}
|
|
params.Shard = false
|
|
params.Name = "testlocal"
|
|
mfr, closer := sth.GetTreeMultiReader(t)
|
|
defer closer.Close()
|
|
r := multipart.NewReader(mfr, mfr.Boundary())
|
|
ci, err := clusters[0].AddFile(context.Background(), r, params)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
pinDelay()
|
|
|
|
f := func(t *testing.T, c *Cluster) {
|
|
if c == clusters[0] || c == clusters[1] {
|
|
pin := c.StatusLocal(ctx, ci)
|
|
if pin.Error != "" {
|
|
t.Error(pin.Error)
|
|
}
|
|
if pin.Status != api.TrackerStatusPinned {
|
|
t.Error("item should be pinned and is", pin.Status)
|
|
}
|
|
} else {
|
|
pin := c.StatusLocal(ctx, ci)
|
|
if pin.Status != api.TrackerStatusRemote {
|
|
t.Error("expected tracker status remote")
|
|
}
|
|
}
|
|
}
|
|
|
|
runF(t, clusters, f)
|
|
})
|
|
}
|
|
|
|
func TestAddPeerDown(t *testing.T) {
|
|
ctx := context.Background()
|
|
clusters, mock := createClusters(t)
|
|
defer shutdownClusters(t, clusters, mock)
|
|
sth := test.NewShardingTestHelper()
|
|
defer sth.Clean(t)
|
|
err := clusters[0].Shutdown(ctx)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
waitForLeaderAndMetrics(t, clusters)
|
|
|
|
t.Run("local", func(t *testing.T) {
|
|
params := api.DefaultAddParams()
|
|
params.Shard = false
|
|
params.Name = "testlocal"
|
|
mfr, closer := sth.GetTreeMultiReader(t)
|
|
defer closer.Close()
|
|
r := multipart.NewReader(mfr, mfr.Boundary())
|
|
ci, err := clusters[1].AddFile(context.Background(), r, params)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if ci.String() != test.ShardingDirBalancedRootCID {
|
|
t.Fatal("unexpected root CID for local add")
|
|
}
|
|
|
|
// We need to sleep a lot because it takes time to
|
|
// catch up on a first/single pin on crdts
|
|
time.Sleep(10 * time.Second)
|
|
|
|
f := func(t *testing.T, c *Cluster) {
|
|
if c.id == clusters[0].id {
|
|
return
|
|
}
|
|
pin := c.StatusLocal(ctx, ci)
|
|
if pin.Error != "" {
|
|
t.Error(pin.Error)
|
|
}
|
|
if pin.Status != api.TrackerStatusPinned {
|
|
t.Error("item should be pinned")
|
|
}
|
|
}
|
|
|
|
runF(t, clusters, f)
|
|
})
|
|
}
|
|
|
|
func TestAddOnePeerFails(t *testing.T) {
|
|
clusters, mock := createClusters(t)
|
|
defer shutdownClusters(t, clusters, mock)
|
|
sth := test.NewShardingTestHelper()
|
|
defer sth.Clean(t)
|
|
|
|
waitForLeaderAndMetrics(t, clusters)
|
|
|
|
t.Run("local", func(t *testing.T) {
|
|
params := api.DefaultAddParams()
|
|
params.Shard = false
|
|
params.Name = "testlocal"
|
|
lg, closer := sth.GetRandFileReader(t, 100000) // 100 MB
|
|
defer closer.Close()
|
|
|
|
mr := files.NewMultiFileReader(lg, true)
|
|
r := multipart.NewReader(mr, mr.Boundary())
|
|
|
|
var wg sync.WaitGroup
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
_, err := clusters[0].AddFile(context.Background(), r, params)
|
|
if err != nil {
|
|
t.Error(err)
|
|
}
|
|
}()
|
|
|
|
// Disconnect 1 cluster (the last). Things should keep working.
|
|
// Important that we close the hosts, otherwise the RPC
|
|
// Servers keep working along with BlockPuts.
|
|
time.Sleep(100 * time.Millisecond)
|
|
c := clusters[nClusters-1]
|
|
c.Shutdown(context.Background())
|
|
c.dht.Close()
|
|
c.host.Close()
|
|
wg.Wait()
|
|
})
|
|
}
|
|
|
|
func TestAddAllPeersFail(t *testing.T) {
|
|
ctx := context.Background()
|
|
clusters, mock := createClusters(t)
|
|
defer shutdownClusters(t, clusters, mock)
|
|
sth := test.NewShardingTestHelper()
|
|
defer sth.Clean(t)
|
|
|
|
waitForLeaderAndMetrics(t, clusters)
|
|
|
|
t.Run("local", func(t *testing.T) {
|
|
// Prevent added content to be allocated to cluster 0
|
|
// as it is already going to have something.
|
|
_, err := clusters[0].Pin(ctx, test.Cid1, api.PinOptions{
|
|
ReplicationFactorMin: 1,
|
|
ReplicationFactorMax: 1,
|
|
UserAllocations: []peer.ID{clusters[0].host.ID()},
|
|
})
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
ttlDelay()
|
|
|
|
params := api.DefaultAddParams()
|
|
params.Shard = false
|
|
params.Name = "testlocal"
|
|
// Allocate to every peer except 0 (which already has a pin)
|
|
params.PinOptions.ReplicationFactorMax = nClusters - 1
|
|
params.PinOptions.ReplicationFactorMin = nClusters - 1
|
|
|
|
lg, closer := sth.GetRandFileReader(t, 100000) // 100 MB
|
|
defer closer.Close()
|
|
mr := files.NewMultiFileReader(lg, true)
|
|
r := multipart.NewReader(mr, mr.Boundary())
|
|
|
|
// var cid cid.Cid
|
|
var wg sync.WaitGroup
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
_, err := clusters[0].AddFile(context.Background(), r, params)
|
|
if err != adder.ErrBlockAdder {
|
|
t.Error("expected ErrBlockAdder. Got: ", err)
|
|
}
|
|
}()
|
|
|
|
time.Sleep(100 * time.Millisecond)
|
|
|
|
// Shutdown all clusters except 0 to see the right error.
|
|
// Important that we shut down the hosts, otherwise
|
|
// the RPC Servers keep working along with BlockPuts.
|
|
// Note that this kills raft.
|
|
runF(t, clusters[1:], func(t *testing.T, c *Cluster) {
|
|
c.Shutdown(ctx)
|
|
c.dht.Close()
|
|
c.host.Close()
|
|
})
|
|
wg.Wait()
|
|
})
|
|
}
|