ipfs-cluster/add_test.go
Hector Sanjuan 1d98538411 Adders: stream blocks to destinations
This commit fixes #810 and adds block streaming to the final destinations when
adding. This should add major performance gains when adding data to clusters.

Before, everytime cluster issued a block, it was broadcasted individually to
all destinations (new libp2p stream), where it was block/put to IPFS (a single
block/put http roundtrip per block).

Now, blocks are streamed all the way from the adder module to the ipfs daemon,
by making every block as it arrives a single part in a multipart block/put
request.

Before, block-broadcast needed to wait for all destinations to finish in order
to process the next block. Now, buffers allow some destinations to be faster
than others while sending and receiving blocks.

Before, if a block put request failed to be broadcasted everywhere, an error
would happen at that moment.

Now, we keep streaming until the end and only then report any errors. The
operation succeeds as long as at least one stream finished successfully.

Errors block/putting to IPFS will not abort streams. Instead, subsequent
blocks are retried with a new request, although the method will return an
error when the stream finishes if there were errors at any point.
2022-03-24 17:24:58 +01:00

299 lines
7.5 KiB
Go

package ipfscluster
// This files has tests for Add* using multiple cluster peers.
import (
"context"
"mime/multipart"
"sync"
"testing"
"time"
files "github.com/ipfs/go-ipfs-files"
"github.com/ipfs/ipfs-cluster/adder"
"github.com/ipfs/ipfs-cluster/api"
"github.com/ipfs/ipfs-cluster/test"
peer "github.com/libp2p/go-libp2p-core/peer"
)
func TestAdd(t *testing.T) {
ctx := context.Background()
clusters, mock := createClusters(t)
defer shutdownClusters(t, clusters, mock)
sth := test.NewShardingTestHelper()
defer sth.Clean(t)
waitForLeaderAndMetrics(t, clusters)
t.Run("default", func(t *testing.T) {
params := api.DefaultAddParams()
params.Shard = false
params.Name = "testlocal"
mfr, closer := sth.GetTreeMultiReader(t)
defer closer.Close()
r := multipart.NewReader(mfr, mfr.Boundary())
ci, err := clusters[0].AddFile(context.Background(), r, params)
if err != nil {
t.Fatal(err)
}
if ci.String() != test.ShardingDirBalancedRootCID {
t.Fatal("unexpected root CID for local add")
}
// We need to sleep a lot because it takes time to
// catch up on a first/single pin on crdts
time.Sleep(10 * time.Second)
f := func(t *testing.T, c *Cluster) {
pin := c.StatusLocal(ctx, ci)
if pin.Error != "" {
t.Error(pin.Error)
}
if pin.Status != api.TrackerStatusPinned {
t.Error("item should be pinned and is", pin.Status)
}
}
runF(t, clusters, f)
})
t.Run("local_one_allocation", func(t *testing.T) {
params := api.DefaultAddParams()
params.Shard = false
params.Name = "testlocal"
params.ReplicationFactorMin = 1
params.ReplicationFactorMax = 1
params.Local = true
mfr, closer := sth.GetTreeMultiReader(t)
defer closer.Close()
r := multipart.NewReader(mfr, mfr.Boundary())
ci, err := clusters[2].AddFile(context.Background(), r, params)
if err != nil {
t.Fatal(err)
}
if ci.String() != test.ShardingDirBalancedRootCID {
t.Fatal("unexpected root CID for local add")
}
// We need to sleep a lot because it takes time to
// catch up on a first/single pin on crdts
time.Sleep(10 * time.Second)
f := func(t *testing.T, c *Cluster) {
pin := c.StatusLocal(ctx, ci)
if pin.Error != "" {
t.Error(pin.Error)
}
switch c.id {
case clusters[2].id:
if pin.Status != api.TrackerStatusPinned {
t.Error("item should be pinned and is", pin.Status)
}
default:
if pin.Status != api.TrackerStatusRemote {
t.Errorf("item should only be allocated to cluster2")
}
}
}
runF(t, clusters, f)
})
}
func TestAddWithUserAllocations(t *testing.T) {
ctx := context.Background()
clusters, mock := createClusters(t)
defer shutdownClusters(t, clusters, mock)
sth := test.NewShardingTestHelper()
defer sth.Clean(t)
waitForLeaderAndMetrics(t, clusters)
t.Run("local", func(t *testing.T) {
params := api.DefaultAddParams()
params.ReplicationFactorMin = 2
params.ReplicationFactorMax = 2
params.UserAllocations = []peer.ID{clusters[0].id, clusters[1].id}
params.Shard = false
params.Name = "testlocal"
mfr, closer := sth.GetTreeMultiReader(t)
defer closer.Close()
r := multipart.NewReader(mfr, mfr.Boundary())
ci, err := clusters[0].AddFile(context.Background(), r, params)
if err != nil {
t.Fatal(err)
}
pinDelay()
f := func(t *testing.T, c *Cluster) {
if c == clusters[0] || c == clusters[1] {
pin := c.StatusLocal(ctx, ci)
if pin.Error != "" {
t.Error(pin.Error)
}
if pin.Status != api.TrackerStatusPinned {
t.Error("item should be pinned and is", pin.Status)
}
} else {
pin := c.StatusLocal(ctx, ci)
if pin.Status != api.TrackerStatusRemote {
t.Error("expected tracker status remote")
}
}
}
runF(t, clusters, f)
})
}
func TestAddPeerDown(t *testing.T) {
ctx := context.Background()
clusters, mock := createClusters(t)
defer shutdownClusters(t, clusters, mock)
sth := test.NewShardingTestHelper()
defer sth.Clean(t)
err := clusters[0].Shutdown(ctx)
if err != nil {
t.Fatal(err)
}
waitForLeaderAndMetrics(t, clusters)
t.Run("local", func(t *testing.T) {
params := api.DefaultAddParams()
params.Shard = false
params.Name = "testlocal"
mfr, closer := sth.GetTreeMultiReader(t)
defer closer.Close()
r := multipart.NewReader(mfr, mfr.Boundary())
ci, err := clusters[1].AddFile(context.Background(), r, params)
if err != nil {
t.Fatal(err)
}
if ci.String() != test.ShardingDirBalancedRootCID {
t.Fatal("unexpected root CID for local add")
}
// We need to sleep a lot because it takes time to
// catch up on a first/single pin on crdts
time.Sleep(10 * time.Second)
f := func(t *testing.T, c *Cluster) {
if c.id == clusters[0].id {
return
}
pin := c.StatusLocal(ctx, ci)
if pin.Error != "" {
t.Error(pin.Error)
}
if pin.Status != api.TrackerStatusPinned {
t.Error("item should be pinned")
}
}
runF(t, clusters, f)
})
}
func TestAddOnePeerFails(t *testing.T) {
clusters, mock := createClusters(t)
defer shutdownClusters(t, clusters, mock)
sth := test.NewShardingTestHelper()
defer sth.Clean(t)
waitForLeaderAndMetrics(t, clusters)
t.Run("local", func(t *testing.T) {
params := api.DefaultAddParams()
params.Shard = false
params.Name = "testlocal"
lg, closer := sth.GetRandFileReader(t, 100000) // 100 MB
defer closer.Close()
mr := files.NewMultiFileReader(lg, true)
r := multipart.NewReader(mr, mr.Boundary())
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
_, err := clusters[0].AddFile(context.Background(), r, params)
if err != nil {
t.Error(err)
}
}()
// Disconnect 1 cluster (the last). Things should keep working.
// Important that we close the hosts, otherwise the RPC
// Servers keep working along with BlockPuts.
time.Sleep(100 * time.Millisecond)
c := clusters[nClusters-1]
c.Shutdown(context.Background())
c.dht.Close()
c.host.Close()
wg.Wait()
})
}
func TestAddAllPeersFail(t *testing.T) {
ctx := context.Background()
clusters, mock := createClusters(t)
defer shutdownClusters(t, clusters, mock)
sth := test.NewShardingTestHelper()
defer sth.Clean(t)
waitForLeaderAndMetrics(t, clusters)
t.Run("local", func(t *testing.T) {
// Prevent added content to be allocated to cluster 0
// as it is already going to have something.
_, err := clusters[0].Pin(ctx, test.Cid1, api.PinOptions{
ReplicationFactorMin: 1,
ReplicationFactorMax: 1,
UserAllocations: []peer.ID{clusters[0].host.ID()},
})
if err != nil {
t.Fatal(err)
}
ttlDelay()
params := api.DefaultAddParams()
params.Shard = false
params.Name = "testlocal"
// Allocate to every peer except 0 (which already has a pin)
params.PinOptions.ReplicationFactorMax = nClusters - 1
params.PinOptions.ReplicationFactorMin = nClusters - 1
lg, closer := sth.GetRandFileReader(t, 100000) // 100 MB
defer closer.Close()
mr := files.NewMultiFileReader(lg, true)
r := multipart.NewReader(mr, mr.Boundary())
// var cid cid.Cid
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
_, err := clusters[0].AddFile(context.Background(), r, params)
if err != adder.ErrBlockAdder {
t.Error("expected ErrBlockAdder. Got: ", err)
}
}()
time.Sleep(100 * time.Millisecond)
// Shutdown all clusters except 0 to see the right error.
// Important that we shut down the hosts, otherwise
// the RPC Servers keep working along with BlockPuts.
// Note that this kills raft.
runF(t, clusters[1:], func(t *testing.T, c *Cluster) {
c.Shutdown(ctx)
c.dht.Close()
c.host.Close()
})
wg.Wait()
})
}