2016-12-08 16:24:38 +00:00
|
|
|
package ipfscluster
|
|
|
|
|
|
|
|
import (
|
2018-03-29 18:18:15 +00:00
|
|
|
"context"
|
2018-06-07 02:19:15 +00:00
|
|
|
"errors"
|
2017-11-29 16:49:03 +00:00
|
|
|
"flag"
|
2016-12-21 13:30:54 +00:00
|
|
|
"fmt"
|
|
|
|
"math/rand"
|
2019-07-30 17:42:26 +00:00
|
|
|
"mime/multipart"
|
2016-12-21 13:30:54 +00:00
|
|
|
"os"
|
2019-02-20 14:24:25 +00:00
|
|
|
"path/filepath"
|
2017-03-09 13:44:14 +00:00
|
|
|
"sort"
|
2017-02-13 15:46:53 +00:00
|
|
|
"strings"
|
2016-12-21 13:30:54 +00:00
|
|
|
"sync"
|
2016-12-08 16:24:38 +00:00
|
|
|
"testing"
|
|
|
|
"time"
|
2016-12-09 19:54:46 +00:00
|
|
|
|
2021-10-06 09:26:38 +00:00
|
|
|
"github.com/ipfs/ipfs-cluster/allocator/balanced"
|
2017-02-08 17:04:08 +00:00
|
|
|
"github.com/ipfs/ipfs-cluster/api"
|
Issue #162: Rework configuration format
The following commit reimplements ipfs-cluster configuration under
the following premises:
* Each component is initialized with a configuration object
defined by its module
* Each component decides how the JSON representation of its
configuration looks like
* Each component parses and validates its own configuration
* Each component exposes its own defaults
* Component configurations are make the sections of a
central JSON configuration file (which replaces the current
JSON format)
* Component configurations implement a common interface
(config.ComponentConfig) with a set of common operations
* The central configuration file is managed by a
config.ConfigManager which:
* Registers ComponentConfigs
* Assigns the correspondent sections from the JSON file to each
component and delegates the parsing
* Delegates the JSON generation for each section
* Can be notified when the configuration is updated and must be
saved to disk
The new service.json would then look as follows:
```json
{
"cluster": {
"id": "QmTVW8NoRxC5wBhV7WtAYtRn7itipEESfozWN5KmXUQnk2",
"private_key": "<...>",
"secret": "00224102ae6aaf94f2606abf69a0e278251ecc1d64815b617ff19d6d2841f786",
"peers": [],
"bootstrap": [],
"leave_on_shutdown": false,
"listen_multiaddress": "/ip4/0.0.0.0/tcp/9096",
"state_sync_interval": "1m0s",
"ipfs_sync_interval": "2m10s",
"replication_factor": -1,
"monitor_ping_interval": "15s"
},
"consensus": {
"raft": {
"heartbeat_timeout": "1s",
"election_timeout": "1s",
"commit_timeout": "50ms",
"max_append_entries": 64,
"trailing_logs": 10240,
"snapshot_interval": "2m0s",
"snapshot_threshold": 8192,
"leader_lease_timeout": "500ms"
}
},
"api": {
"restapi": {
"listen_multiaddress": "/ip4/127.0.0.1/tcp/9094",
"read_timeout": "30s",
"read_header_timeout": "5s",
"write_timeout": "1m0s",
"idle_timeout": "2m0s"
}
},
"ipfs_connector": {
"ipfshttp": {
"proxy_listen_multiaddress": "/ip4/127.0.0.1/tcp/9095",
"node_multiaddress": "/ip4/127.0.0.1/tcp/5001",
"connect_swarms_delay": "7s",
"proxy_read_timeout": "10m0s",
"proxy_read_header_timeout": "5s",
"proxy_write_timeout": "10m0s",
"proxy_idle_timeout": "1m0s"
}
},
"monitor": {
"monbasic": {
"check_interval": "15s"
}
},
"informer": {
"disk": {
"metric_ttl": "30s",
"metric_type": "freespace"
},
"numpin": {
"metric_ttl": "10s"
}
}
}
```
This new format aims to be easily extensible per component. As such,
it already surfaces quite a few new options which were hardcoded
before.
Additionally, since Go API have changed, some redundant methods have been
removed and small refactoring has happened to take advantage of the new
way.
License: MIT
Signed-off-by: Hector Sanjuan <hector@protocol.ai>
2017-10-11 18:23:03 +00:00
|
|
|
"github.com/ipfs/ipfs-cluster/api/rest"
|
2019-02-20 14:24:25 +00:00
|
|
|
"github.com/ipfs/ipfs-cluster/consensus/crdt"
|
Issue #162: Rework configuration format
The following commit reimplements ipfs-cluster configuration under
the following premises:
* Each component is initialized with a configuration object
defined by its module
* Each component decides how the JSON representation of its
configuration looks like
* Each component parses and validates its own configuration
* Each component exposes its own defaults
* Component configurations are make the sections of a
central JSON configuration file (which replaces the current
JSON format)
* Component configurations implement a common interface
(config.ComponentConfig) with a set of common operations
* The central configuration file is managed by a
config.ConfigManager which:
* Registers ComponentConfigs
* Assigns the correspondent sections from the JSON file to each
component and delegates the parsing
* Delegates the JSON generation for each section
* Can be notified when the configuration is updated and must be
saved to disk
The new service.json would then look as follows:
```json
{
"cluster": {
"id": "QmTVW8NoRxC5wBhV7WtAYtRn7itipEESfozWN5KmXUQnk2",
"private_key": "<...>",
"secret": "00224102ae6aaf94f2606abf69a0e278251ecc1d64815b617ff19d6d2841f786",
"peers": [],
"bootstrap": [],
"leave_on_shutdown": false,
"listen_multiaddress": "/ip4/0.0.0.0/tcp/9096",
"state_sync_interval": "1m0s",
"ipfs_sync_interval": "2m10s",
"replication_factor": -1,
"monitor_ping_interval": "15s"
},
"consensus": {
"raft": {
"heartbeat_timeout": "1s",
"election_timeout": "1s",
"commit_timeout": "50ms",
"max_append_entries": 64,
"trailing_logs": 10240,
"snapshot_interval": "2m0s",
"snapshot_threshold": 8192,
"leader_lease_timeout": "500ms"
}
},
"api": {
"restapi": {
"listen_multiaddress": "/ip4/127.0.0.1/tcp/9094",
"read_timeout": "30s",
"read_header_timeout": "5s",
"write_timeout": "1m0s",
"idle_timeout": "2m0s"
}
},
"ipfs_connector": {
"ipfshttp": {
"proxy_listen_multiaddress": "/ip4/127.0.0.1/tcp/9095",
"node_multiaddress": "/ip4/127.0.0.1/tcp/5001",
"connect_swarms_delay": "7s",
"proxy_read_timeout": "10m0s",
"proxy_read_header_timeout": "5s",
"proxy_write_timeout": "10m0s",
"proxy_idle_timeout": "1m0s"
}
},
"monitor": {
"monbasic": {
"check_interval": "15s"
}
},
"informer": {
"disk": {
"metric_ttl": "30s",
"metric_type": "freespace"
},
"numpin": {
"metric_ttl": "10s"
}
}
}
```
This new format aims to be easily extensible per component. As such,
it already surfaces quite a few new options which were hardcoded
before.
Additionally, since Go API have changed, some redundant methods have been
removed and small refactoring has happened to take advantage of the new
way.
License: MIT
Signed-off-by: Hector Sanjuan <hector@protocol.ai>
2017-10-11 18:23:03 +00:00
|
|
|
"github.com/ipfs/ipfs-cluster/consensus/raft"
|
2019-02-20 14:24:25 +00:00
|
|
|
"github.com/ipfs/ipfs-cluster/datastore/badger"
|
|
|
|
"github.com/ipfs/ipfs-cluster/datastore/inmem"
|
2021-06-11 16:43:54 +00:00
|
|
|
"github.com/ipfs/ipfs-cluster/datastore/leveldb"
|
2017-03-27 13:07:12 +00:00
|
|
|
"github.com/ipfs/ipfs-cluster/informer/disk"
|
2017-03-14 15:10:45 +00:00
|
|
|
"github.com/ipfs/ipfs-cluster/ipfsconn/ipfshttp"
|
2018-05-07 12:34:11 +00:00
|
|
|
"github.com/ipfs/ipfs-cluster/monitor/pubsubmon"
|
2018-06-27 04:03:15 +00:00
|
|
|
"github.com/ipfs/ipfs-cluster/observations"
|
2018-06-07 02:19:15 +00:00
|
|
|
"github.com/ipfs/ipfs-cluster/pintracker/stateless"
|
2019-07-12 14:40:29 +00:00
|
|
|
"github.com/ipfs/ipfs-cluster/state"
|
2017-02-09 15:29:17 +00:00
|
|
|
"github.com/ipfs/ipfs-cluster/test"
|
2018-12-18 14:44:11 +00:00
|
|
|
"github.com/ipfs/ipfs-cluster/version"
|
2017-02-08 17:04:08 +00:00
|
|
|
|
2019-02-20 14:24:25 +00:00
|
|
|
ds "github.com/ipfs/go-datastore"
|
2018-08-15 10:30:00 +00:00
|
|
|
libp2p "github.com/libp2p/go-libp2p"
|
2019-06-14 10:41:11 +00:00
|
|
|
crypto "github.com/libp2p/go-libp2p-core/crypto"
|
|
|
|
host "github.com/libp2p/go-libp2p-core/host"
|
|
|
|
peer "github.com/libp2p/go-libp2p-core/peer"
|
|
|
|
peerstore "github.com/libp2p/go-libp2p-core/peerstore"
|
2018-08-15 10:30:00 +00:00
|
|
|
dht "github.com/libp2p/go-libp2p-kad-dht"
|
2020-04-14 20:03:24 +00:00
|
|
|
dual "github.com/libp2p/go-libp2p-kad-dht/dual"
|
2018-08-15 10:30:00 +00:00
|
|
|
pubsub "github.com/libp2p/go-libp2p-pubsub"
|
2020-09-02 10:06:47 +00:00
|
|
|
routedhost "github.com/libp2p/go-libp2p/p2p/host/routed"
|
2017-01-23 17:38:59 +00:00
|
|
|
ma "github.com/multiformats/go-multiaddr"
|
2016-12-08 16:24:38 +00:00
|
|
|
)
|
|
|
|
|
2016-12-21 13:30:54 +00:00
|
|
|
var (
|
|
|
|
// number of clusters to create
|
2018-03-29 18:18:15 +00:00
|
|
|
nClusters = 5
|
2016-12-21 13:30:54 +00:00
|
|
|
|
|
|
|
// number of pins to pin/unpin/check
|
2018-03-16 16:37:39 +00:00
|
|
|
nPins = 100
|
2018-01-15 09:49:07 +00:00
|
|
|
|
2022-02-15 18:38:33 +00:00
|
|
|
logLevel = "FATAL"
|
2018-06-07 02:19:15 +00:00
|
|
|
customLogLvlFacilities = logFacilities{}
|
2016-12-21 13:30:54 +00:00
|
|
|
|
2019-11-07 19:19:34 +00:00
|
|
|
consensus = "crdt"
|
2021-06-11 16:43:54 +00:00
|
|
|
datastore = "badger"
|
2019-02-20 14:24:25 +00:00
|
|
|
|
2019-11-07 19:37:03 +00:00
|
|
|
ttlDelayTime = 2 * time.Second // set on Main to diskInf.MetricTTL
|
|
|
|
testsFolder = "clusterTestsFolder"
|
2018-05-08 12:39:45 +00:00
|
|
|
|
2018-01-25 22:34:06 +00:00
|
|
|
// When testing with fixed ports...
|
|
|
|
// clusterPort = 10000
|
|
|
|
// apiPort = 10100
|
|
|
|
// ipfsProxyPort = 10200
|
2016-12-21 13:30:54 +00:00
|
|
|
)
|
|
|
|
|
2018-06-07 02:19:15 +00:00
|
|
|
type logFacilities []string
|
|
|
|
|
|
|
|
// String is the method to format the flag's value, part of the flag.Value interface.
|
|
|
|
func (lg *logFacilities) String() string {
|
|
|
|
return fmt.Sprint(*lg)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set is the method to set the flag value, part of the flag.Value interface.
|
|
|
|
func (lg *logFacilities) Set(value string) error {
|
|
|
|
if len(*lg) > 0 {
|
|
|
|
return errors.New("logFacilities flag already set")
|
|
|
|
}
|
|
|
|
for _, lf := range strings.Split(value, ",") {
|
|
|
|
*lg = append(*lg, lf)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-10-04 18:01:40 +00:00
|
|
|
// TestMain runs test initialization. Since Go1.13 we cannot run this on init()
|
|
|
|
// as flag.Parse() does not work well there
|
|
|
|
// (see https://golang.org/src/testing/testing.go#L211)
|
|
|
|
func TestMain(m *testing.M) {
|
|
|
|
rand.Seed(time.Now().UnixNano())
|
|
|
|
ReadyTimeout = 11 * time.Second
|
|
|
|
|
|
|
|
// GossipSub needs to heartbeat to discover newly connected hosts
|
|
|
|
// This speeds things up a little.
|
|
|
|
pubsub.GossipSubHeartbeatInterval = 50 * time.Millisecond
|
|
|
|
|
2018-06-07 02:19:15 +00:00
|
|
|
flag.Var(&customLogLvlFacilities, "logfacs", "use -logLevel for only the following log facilities; comma-separated")
|
2018-01-15 09:49:07 +00:00
|
|
|
flag.StringVar(&logLevel, "loglevel", logLevel, "default log level for tests")
|
|
|
|
flag.IntVar(&nClusters, "nclusters", nClusters, "number of clusters to use")
|
|
|
|
flag.IntVar(&nPins, "npins", nPins, "number of pins to pin/unpin/check")
|
2019-02-20 14:24:25 +00:00
|
|
|
flag.StringVar(&consensus, "consensus", consensus, "consensus implementation")
|
2021-06-11 16:43:54 +00:00
|
|
|
flag.StringVar(&datastore, "datastore", datastore, "datastore backend")
|
2018-01-15 09:49:07 +00:00
|
|
|
flag.Parse()
|
|
|
|
|
2018-06-11 01:57:42 +00:00
|
|
|
if len(customLogLvlFacilities) <= 0 {
|
2018-06-07 02:19:15 +00:00
|
|
|
for f := range LoggingFacilities {
|
|
|
|
SetFacilityLogLevel(f, logLevel)
|
|
|
|
}
|
|
|
|
|
|
|
|
for f := range LoggingFacilitiesExtra {
|
|
|
|
SetFacilityLogLevel(f, logLevel)
|
|
|
|
}
|
2018-01-15 09:49:07 +00:00
|
|
|
}
|
|
|
|
|
2018-06-07 02:19:15 +00:00
|
|
|
for _, f := range customLogLvlFacilities {
|
|
|
|
if _, ok := LoggingFacilities[f]; ok {
|
|
|
|
SetFacilityLogLevel(f, logLevel)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if _, ok := LoggingFacilitiesExtra[f]; ok {
|
|
|
|
SetFacilityLogLevel(f, logLevel)
|
|
|
|
continue
|
|
|
|
}
|
Fix: maptracker race issues
This commit attempts to fix race issues in the maptracker since the
introduction of the OperationTracker.
There were two main problems:
* Duplicity tracking the state both in the state map and the opTracker
* Non atomiciy of operations with different threads being able to affect
other threads operations.
A test performing random Track/Untracks on the same Cid quickly showed
that items would sometimes stay as pin_queued or pin_unqueued. That happened
because operations could be cancelled under the hood by a different request,
while leaving the map status untouched.
It was not simply to deal with this issues without a refactoring.
First, the state map has been removed, and the operation tracker now provides
status information for any Cid. This implies that the tracker keeps all
operations and operations have a `PhaseDone`. There's also a
new `OperationRemote` type.
Secondly, operations are only created in the tracker and can only be removed
by their creators (they can be overwritten by other operations though).
Operations cannot be accessed directly and modifications are limited to setting
Error for PhaseDone operations.
After created, *Operations are queued in the pinWorker queues which handle any
status updates. This means, that, even when an operation has been removed from
the tracker, status updates will not interfere with any other newer operations.
In the maptracker, only the Unpin worker Cleans operations once processed. A
sucessful unpin is the only way that a delete() happens in the tracker map.
Otherwise, operations stay there until a newer operation for the Cid arrives
and 1) cancels the existing one 2) takes its place. The tracker refuses to
create a new operation if a similar "ongoing" operation of the same type
exists.
The final change is that Recover and RecoverAll() are not async and play by the
same rules as Track() and Untrack(), queueing the items to be recovered.
Note: for stateless pintracker, the tracker will need to Clean() operation
of type OperationPin as well, and complement the Status reported
by the tracker with those coming from IPFS.
License: MIT
Signed-off-by: Hector Sanjuan <code@hector.link>
2018-05-25 16:32:10 +00:00
|
|
|
}
|
2019-02-20 14:24:25 +00:00
|
|
|
|
2019-11-07 19:37:03 +00:00
|
|
|
diskInfCfg := &disk.Config{}
|
|
|
|
diskInfCfg.LoadJSON(testingDiskInfCfg)
|
|
|
|
ttlDelayTime = diskInfCfg.MetricTTL * 2
|
|
|
|
|
2019-10-04 18:01:40 +00:00
|
|
|
os.Exit(m.Run())
|
2016-12-21 13:30:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func randomBytes() []byte {
|
2020-04-14 17:58:00 +00:00
|
|
|
bs := make([]byte, 64)
|
2016-12-21 13:30:54 +00:00
|
|
|
for i := 0; i < len(bs); i++ {
|
|
|
|
b := byte(rand.Int())
|
|
|
|
bs[i] = b
|
|
|
|
}
|
|
|
|
return bs
|
|
|
|
}
|
|
|
|
|
2019-05-07 09:11:37 +00:00
|
|
|
func createComponents(
|
|
|
|
t *testing.T,
|
|
|
|
host host.Host,
|
|
|
|
pubsub *pubsub.PubSub,
|
2020-04-14 20:03:24 +00:00
|
|
|
dht *dual.DHT,
|
2019-05-07 09:11:37 +00:00
|
|
|
i int,
|
|
|
|
staging bool,
|
|
|
|
) (
|
|
|
|
*Config,
|
|
|
|
ds.Datastore,
|
|
|
|
Consensus,
|
|
|
|
[]API,
|
|
|
|
IPFSConnector,
|
|
|
|
PinTracker,
|
|
|
|
PeerMonitor,
|
|
|
|
PinAllocator,
|
|
|
|
Informer,
|
|
|
|
Tracer,
|
|
|
|
*test.IpfsMock,
|
|
|
|
) {
|
2018-06-27 04:03:15 +00:00
|
|
|
ctx := context.Background()
|
2019-05-07 09:13:19 +00:00
|
|
|
mock := test.NewIpfsMock(t)
|
2019-02-20 14:24:25 +00:00
|
|
|
|
2018-01-16 19:57:54 +00:00
|
|
|
//apiAddr, _ := ma.NewMultiaddr(fmt.Sprintf("/ip4/127.0.0.1/tcp/%d", apiPort+i))
|
|
|
|
// Bind on port 0
|
|
|
|
apiAddr, _ := ma.NewMultiaddr("/ip4/127.0.0.1/tcp/0")
|
|
|
|
// Bind on Port 0
|
|
|
|
// proxyAddr, _ := ma.NewMultiaddr(fmt.Sprintf("/ip4/127.0.0.1/tcp/%d", ipfsProxyPort+i))
|
|
|
|
proxyAddr, _ := ma.NewMultiaddr("/ip4/127.0.0.1/tcp/0")
|
2017-02-09 15:29:17 +00:00
|
|
|
nodeAddr, _ := ma.NewMultiaddr(fmt.Sprintf("/ip4/%s/tcp/%d", mock.Addr, mock.Port))
|
2019-02-20 14:24:25 +00:00
|
|
|
|
2019-05-07 09:13:19 +00:00
|
|
|
peername := fmt.Sprintf("peer_%d", i)
|
|
|
|
|
2021-10-06 09:26:38 +00:00
|
|
|
ident, clusterCfg, apiCfg, ipfsproxyCfg, ipfshttpCfg, badgerCfg, levelDBCfg, raftCfg, crdtCfg, statelesstrackerCfg, psmonCfg, allocBalancedCfg, diskInfCfg, tracingCfg := testingConfigs()
|
Issue #162: Rework configuration format
The following commit reimplements ipfs-cluster configuration under
the following premises:
* Each component is initialized with a configuration object
defined by its module
* Each component decides how the JSON representation of its
configuration looks like
* Each component parses and validates its own configuration
* Each component exposes its own defaults
* Component configurations are make the sections of a
central JSON configuration file (which replaces the current
JSON format)
* Component configurations implement a common interface
(config.ComponentConfig) with a set of common operations
* The central configuration file is managed by a
config.ConfigManager which:
* Registers ComponentConfigs
* Assigns the correspondent sections from the JSON file to each
component and delegates the parsing
* Delegates the JSON generation for each section
* Can be notified when the configuration is updated and must be
saved to disk
The new service.json would then look as follows:
```json
{
"cluster": {
"id": "QmTVW8NoRxC5wBhV7WtAYtRn7itipEESfozWN5KmXUQnk2",
"private_key": "<...>",
"secret": "00224102ae6aaf94f2606abf69a0e278251ecc1d64815b617ff19d6d2841f786",
"peers": [],
"bootstrap": [],
"leave_on_shutdown": false,
"listen_multiaddress": "/ip4/0.0.0.0/tcp/9096",
"state_sync_interval": "1m0s",
"ipfs_sync_interval": "2m10s",
"replication_factor": -1,
"monitor_ping_interval": "15s"
},
"consensus": {
"raft": {
"heartbeat_timeout": "1s",
"election_timeout": "1s",
"commit_timeout": "50ms",
"max_append_entries": 64,
"trailing_logs": 10240,
"snapshot_interval": "2m0s",
"snapshot_threshold": 8192,
"leader_lease_timeout": "500ms"
}
},
"api": {
"restapi": {
"listen_multiaddress": "/ip4/127.0.0.1/tcp/9094",
"read_timeout": "30s",
"read_header_timeout": "5s",
"write_timeout": "1m0s",
"idle_timeout": "2m0s"
}
},
"ipfs_connector": {
"ipfshttp": {
"proxy_listen_multiaddress": "/ip4/127.0.0.1/tcp/9095",
"node_multiaddress": "/ip4/127.0.0.1/tcp/5001",
"connect_swarms_delay": "7s",
"proxy_read_timeout": "10m0s",
"proxy_read_header_timeout": "5s",
"proxy_write_timeout": "10m0s",
"proxy_idle_timeout": "1m0s"
}
},
"monitor": {
"monbasic": {
"check_interval": "15s"
}
},
"informer": {
"disk": {
"metric_ttl": "30s",
"metric_type": "freespace"
},
"numpin": {
"metric_ttl": "10s"
}
}
}
```
This new format aims to be easily extensible per component. As such,
it already surfaces quite a few new options which were hardcoded
before.
Additionally, since Go API have changed, some redundant methods have been
removed and small refactoring has happened to take advantage of the new
way.
License: MIT
Signed-off-by: Hector Sanjuan <hector@protocol.ai>
2017-10-11 18:23:03 +00:00
|
|
|
|
2019-05-08 16:24:59 +00:00
|
|
|
ident.ID = host.ID()
|
|
|
|
ident.PrivateKey = host.Peerstore().PrivKey(host.ID())
|
2017-12-01 18:50:13 +00:00
|
|
|
clusterCfg.Peername = peername
|
Issue #162: Rework configuration format
The following commit reimplements ipfs-cluster configuration under
the following premises:
* Each component is initialized with a configuration object
defined by its module
* Each component decides how the JSON representation of its
configuration looks like
* Each component parses and validates its own configuration
* Each component exposes its own defaults
* Component configurations are make the sections of a
central JSON configuration file (which replaces the current
JSON format)
* Component configurations implement a common interface
(config.ComponentConfig) with a set of common operations
* The central configuration file is managed by a
config.ConfigManager which:
* Registers ComponentConfigs
* Assigns the correspondent sections from the JSON file to each
component and delegates the parsing
* Delegates the JSON generation for each section
* Can be notified when the configuration is updated and must be
saved to disk
The new service.json would then look as follows:
```json
{
"cluster": {
"id": "QmTVW8NoRxC5wBhV7WtAYtRn7itipEESfozWN5KmXUQnk2",
"private_key": "<...>",
"secret": "00224102ae6aaf94f2606abf69a0e278251ecc1d64815b617ff19d6d2841f786",
"peers": [],
"bootstrap": [],
"leave_on_shutdown": false,
"listen_multiaddress": "/ip4/0.0.0.0/tcp/9096",
"state_sync_interval": "1m0s",
"ipfs_sync_interval": "2m10s",
"replication_factor": -1,
"monitor_ping_interval": "15s"
},
"consensus": {
"raft": {
"heartbeat_timeout": "1s",
"election_timeout": "1s",
"commit_timeout": "50ms",
"max_append_entries": 64,
"trailing_logs": 10240,
"snapshot_interval": "2m0s",
"snapshot_threshold": 8192,
"leader_lease_timeout": "500ms"
}
},
"api": {
"restapi": {
"listen_multiaddress": "/ip4/127.0.0.1/tcp/9094",
"read_timeout": "30s",
"read_header_timeout": "5s",
"write_timeout": "1m0s",
"idle_timeout": "2m0s"
}
},
"ipfs_connector": {
"ipfshttp": {
"proxy_listen_multiaddress": "/ip4/127.0.0.1/tcp/9095",
"node_multiaddress": "/ip4/127.0.0.1/tcp/5001",
"connect_swarms_delay": "7s",
"proxy_read_timeout": "10m0s",
"proxy_read_header_timeout": "5s",
"proxy_write_timeout": "10m0s",
"proxy_idle_timeout": "1m0s"
}
},
"monitor": {
"monbasic": {
"check_interval": "15s"
}
},
"informer": {
"disk": {
"metric_ttl": "30s",
"metric_type": "freespace"
},
"numpin": {
"metric_ttl": "10s"
}
}
}
```
This new format aims to be easily extensible per component. As such,
it already surfaces quite a few new options which were hardcoded
before.
Additionally, since Go API have changed, some redundant methods have been
removed and small refactoring has happened to take advantage of the new
way.
License: MIT
Signed-off-by: Hector Sanjuan <hector@protocol.ai>
2017-10-11 18:23:03 +00:00
|
|
|
clusterCfg.LeaveOnShutdown = false
|
2019-02-20 14:24:25 +00:00
|
|
|
clusterCfg.SetBaseDir(filepath.Join(testsFolder, host.ID().Pretty()))
|
Feat: emancipate Consensus from the Cluster component
This commit promotes the Consensus component (and Raft) to become a fully
independent thing like other components, passed to NewCluster during
initialization. Cluster (main component) no longer creates the consensus
layer internally. This has triggered a number of breaking changes
that I will explain below.
Motivation: Future work will require the possibility of running Cluster
with a consensus layer that is not Raft. The "consensus" layer is in charge
of maintaining two things:
* The current cluster peerset, as required by the implementation
* The current cluster pinset (shared state)
While the pinset maintenance has always been in the consensus layer, the
peerset maintenance was handled by the main component (starting by the "peers"
key in the configuration) AND the Raft component (internally)
and this generated lots of confusion: if the user edited the peers in the
configuration they would be greeted with an error.
The bootstrap process (adding a peer to an existing cluster) and configuration
key also complicated many things, since the main component did it, but only
when the consensus was initialized and in single peer mode.
In all this we also mixed the peerstore (list of peer addresses in the libp2p
host) with the peerset, when they need not to be linked.
By initializing the consensus layer before calling NewCluster, all the
difficulties in maintaining the current implementation in the same way
have come to light. Thus, the following changes have been introduced:
* Remove "peers" and "bootstrap" keys from the configuration: we no longer
edit or save the configuration files. This was a very bad practice, requiring
write permissions by the process to the file containing the private key and
additionally made things like Puppet deployments of cluster difficult as
configuration would mutate from its initial version. Needless to say all the
maintenance associated to making sure peers and bootstrap had correct values
when peers are bootstrapped or removed. A loud and detailed error message has
been added when staring cluster with an old config, along with instructions on
how to move forward.
* Introduce a PeerstoreFile ("peerstore") which stores peer addresses: in
ipfs, the peerstore is not persisted because it can be re-built from the
network bootstrappers and the DHT. Cluster should probably also allow
discoverability of peers addresses (when not bootstrapping, as in that case
we have it), but in the meantime, we will read and persist the peerstore
addresses for cluster peers in this file, different from the configuration.
Note that dns multiaddresses are now fully supported and no IPs are saved
when we have DNS multiaddresses for a peer.
* The former "peer_manager" code is now a pstoremgr module, providing utilities
to parse, add, list and generally maintain the libp2p host peerstore, including
operations on the PeerstoreFile. This "pstoremgr" can now also be extended to
perform address autodiscovery and other things indepedently from Cluster.
* Create and initialize Raft outside of the main Cluster component: since we
can now launch Raft independently from Cluster, we have more degrees of
freedom. A new "staging" option when creating the object allows a raft peer to
be launched in Staging mode, waiting to be added to a running consensus, and
thus, not electing itself as leader or doing anything like we were doing
before. This additionally allows us to track when the peer has become a
Voter, which only happens when it's caught up with the state, something that
was wonky previously.
* The raft configuration now includes an InitPeerset key, which allows to
provide a peerset for new peers and which is ignored when staging==true. The
whole Raft initialization code is way cleaner and stronger now.
* Cluster peer bootsrapping is now an ipfs-cluster-service feature. The
--bootstrap flag works as before (additionally allowing comma-separated-list
of entries). What bootstrap does, is to initialize Raft with staging == true,
and then call Join in the main cluster component. Only when the Raft peer
transitions to Voter, consensus becomes ready, and cluster becomes Ready.
This is cleaner, works better and is less complex than before (supporting
both flags and config values). We also backup and clean the state whenever
we are boostrapping, automatically
* ipfs-cluster-service no longer runs the daemon. Starting cluster needs
now "ipfs-cluster-service daemon". The daemon specific flags (bootstrap,
alloc) are now flags for the daemon subcommand. Here we mimic ipfs ("ipfs"
does not start the daemon but print help) and pave the path for merging both
service and ctl in the future.
While this brings some breaking changes, it significantly reduces the
complexity of the configuration, the code and most importantly, the
documentation. It should be easier now to explain the user what is the
right way to launch a cluster peer, and more difficult to make mistakes.
As a side effect, the PR also:
* Fixes #381 - peers with dynamic addresses
* Fixes #371 - peers should be Raft configuration option
* Fixes #378 - waitForUpdates may return before state fully synced
* Fixes #235 - config option shadowing (no cfg saves, no need to shadow)
License: MIT
Signed-off-by: Hector Sanjuan <code@hector.link>
2018-04-28 22:22:23 +00:00
|
|
|
|
2020-02-28 16:16:16 +00:00
|
|
|
apiCfg.HTTPListenAddr = []ma.Multiaddr{apiAddr}
|
2019-02-20 14:24:25 +00:00
|
|
|
|
2020-02-28 16:16:16 +00:00
|
|
|
ipfsproxyCfg.ListenAddr = []ma.Multiaddr{proxyAddr}
|
2018-11-11 10:53:36 +00:00
|
|
|
ipfsproxyCfg.NodeAddr = nodeAddr
|
2019-02-20 14:24:25 +00:00
|
|
|
|
Issue #162: Rework configuration format
The following commit reimplements ipfs-cluster configuration under
the following premises:
* Each component is initialized with a configuration object
defined by its module
* Each component decides how the JSON representation of its
configuration looks like
* Each component parses and validates its own configuration
* Each component exposes its own defaults
* Component configurations are make the sections of a
central JSON configuration file (which replaces the current
JSON format)
* Component configurations implement a common interface
(config.ComponentConfig) with a set of common operations
* The central configuration file is managed by a
config.ConfigManager which:
* Registers ComponentConfigs
* Assigns the correspondent sections from the JSON file to each
component and delegates the parsing
* Delegates the JSON generation for each section
* Can be notified when the configuration is updated and must be
saved to disk
The new service.json would then look as follows:
```json
{
"cluster": {
"id": "QmTVW8NoRxC5wBhV7WtAYtRn7itipEESfozWN5KmXUQnk2",
"private_key": "<...>",
"secret": "00224102ae6aaf94f2606abf69a0e278251ecc1d64815b617ff19d6d2841f786",
"peers": [],
"bootstrap": [],
"leave_on_shutdown": false,
"listen_multiaddress": "/ip4/0.0.0.0/tcp/9096",
"state_sync_interval": "1m0s",
"ipfs_sync_interval": "2m10s",
"replication_factor": -1,
"monitor_ping_interval": "15s"
},
"consensus": {
"raft": {
"heartbeat_timeout": "1s",
"election_timeout": "1s",
"commit_timeout": "50ms",
"max_append_entries": 64,
"trailing_logs": 10240,
"snapshot_interval": "2m0s",
"snapshot_threshold": 8192,
"leader_lease_timeout": "500ms"
}
},
"api": {
"restapi": {
"listen_multiaddress": "/ip4/127.0.0.1/tcp/9094",
"read_timeout": "30s",
"read_header_timeout": "5s",
"write_timeout": "1m0s",
"idle_timeout": "2m0s"
}
},
"ipfs_connector": {
"ipfshttp": {
"proxy_listen_multiaddress": "/ip4/127.0.0.1/tcp/9095",
"node_multiaddress": "/ip4/127.0.0.1/tcp/5001",
"connect_swarms_delay": "7s",
"proxy_read_timeout": "10m0s",
"proxy_read_header_timeout": "5s",
"proxy_write_timeout": "10m0s",
"proxy_idle_timeout": "1m0s"
}
},
"monitor": {
"monbasic": {
"check_interval": "15s"
}
},
"informer": {
"disk": {
"metric_ttl": "30s",
"metric_type": "freespace"
},
"numpin": {
"metric_ttl": "10s"
}
}
}
```
This new format aims to be easily extensible per component. As such,
it already surfaces quite a few new options which were hardcoded
before.
Additionally, since Go API have changed, some redundant methods have been
removed and small refactoring has happened to take advantage of the new
way.
License: MIT
Signed-off-by: Hector Sanjuan <hector@protocol.ai>
2017-10-11 18:23:03 +00:00
|
|
|
ipfshttpCfg.NodeAddr = nodeAddr
|
2019-02-20 14:24:25 +00:00
|
|
|
|
|
|
|
raftCfg.DataFolder = filepath.Join(testsFolder, host.ID().Pretty())
|
|
|
|
|
|
|
|
badgerCfg.Folder = filepath.Join(testsFolder, host.ID().Pretty(), "badger")
|
2021-06-11 16:43:54 +00:00
|
|
|
levelDBCfg.Folder = filepath.Join(testsFolder, host.ID().Pretty(), "leveldb")
|
Issue #162: Rework configuration format
The following commit reimplements ipfs-cluster configuration under
the following premises:
* Each component is initialized with a configuration object
defined by its module
* Each component decides how the JSON representation of its
configuration looks like
* Each component parses and validates its own configuration
* Each component exposes its own defaults
* Component configurations are make the sections of a
central JSON configuration file (which replaces the current
JSON format)
* Component configurations implement a common interface
(config.ComponentConfig) with a set of common operations
* The central configuration file is managed by a
config.ConfigManager which:
* Registers ComponentConfigs
* Assigns the correspondent sections from the JSON file to each
component and delegates the parsing
* Delegates the JSON generation for each section
* Can be notified when the configuration is updated and must be
saved to disk
The new service.json would then look as follows:
```json
{
"cluster": {
"id": "QmTVW8NoRxC5wBhV7WtAYtRn7itipEESfozWN5KmXUQnk2",
"private_key": "<...>",
"secret": "00224102ae6aaf94f2606abf69a0e278251ecc1d64815b617ff19d6d2841f786",
"peers": [],
"bootstrap": [],
"leave_on_shutdown": false,
"listen_multiaddress": "/ip4/0.0.0.0/tcp/9096",
"state_sync_interval": "1m0s",
"ipfs_sync_interval": "2m10s",
"replication_factor": -1,
"monitor_ping_interval": "15s"
},
"consensus": {
"raft": {
"heartbeat_timeout": "1s",
"election_timeout": "1s",
"commit_timeout": "50ms",
"max_append_entries": 64,
"trailing_logs": 10240,
"snapshot_interval": "2m0s",
"snapshot_threshold": 8192,
"leader_lease_timeout": "500ms"
}
},
"api": {
"restapi": {
"listen_multiaddress": "/ip4/127.0.0.1/tcp/9094",
"read_timeout": "30s",
"read_header_timeout": "5s",
"write_timeout": "1m0s",
"idle_timeout": "2m0s"
}
},
"ipfs_connector": {
"ipfshttp": {
"proxy_listen_multiaddress": "/ip4/127.0.0.1/tcp/9095",
"node_multiaddress": "/ip4/127.0.0.1/tcp/5001",
"connect_swarms_delay": "7s",
"proxy_read_timeout": "10m0s",
"proxy_read_header_timeout": "5s",
"proxy_write_timeout": "10m0s",
"proxy_idle_timeout": "1m0s"
}
},
"monitor": {
"monbasic": {
"check_interval": "15s"
}
},
"informer": {
"disk": {
"metric_ttl": "30s",
"metric_type": "freespace"
},
"numpin": {
"metric_ttl": "10s"
}
}
}
```
This new format aims to be easily extensible per component. As such,
it already surfaces quite a few new options which were hardcoded
before.
Additionally, since Go API have changed, some redundant methods have been
removed and small refactoring has happened to take advantage of the new
way.
License: MIT
Signed-off-by: Hector Sanjuan <hector@protocol.ai>
2017-10-11 18:23:03 +00:00
|
|
|
|
2018-06-27 04:03:15 +00:00
|
|
|
api, err := rest.NewAPI(ctx, apiCfg)
|
2019-10-31 20:51:13 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
2018-06-27 04:03:15 +00:00
|
|
|
ipfsProxy, err := rest.NewAPI(ctx, apiCfg)
|
2019-10-31 20:51:13 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2018-11-11 10:53:36 +00:00
|
|
|
|
Issue #162: Rework configuration format
The following commit reimplements ipfs-cluster configuration under
the following premises:
* Each component is initialized with a configuration object
defined by its module
* Each component decides how the JSON representation of its
configuration looks like
* Each component parses and validates its own configuration
* Each component exposes its own defaults
* Component configurations are make the sections of a
central JSON configuration file (which replaces the current
JSON format)
* Component configurations implement a common interface
(config.ComponentConfig) with a set of common operations
* The central configuration file is managed by a
config.ConfigManager which:
* Registers ComponentConfigs
* Assigns the correspondent sections from the JSON file to each
component and delegates the parsing
* Delegates the JSON generation for each section
* Can be notified when the configuration is updated and must be
saved to disk
The new service.json would then look as follows:
```json
{
"cluster": {
"id": "QmTVW8NoRxC5wBhV7WtAYtRn7itipEESfozWN5KmXUQnk2",
"private_key": "<...>",
"secret": "00224102ae6aaf94f2606abf69a0e278251ecc1d64815b617ff19d6d2841f786",
"peers": [],
"bootstrap": [],
"leave_on_shutdown": false,
"listen_multiaddress": "/ip4/0.0.0.0/tcp/9096",
"state_sync_interval": "1m0s",
"ipfs_sync_interval": "2m10s",
"replication_factor": -1,
"monitor_ping_interval": "15s"
},
"consensus": {
"raft": {
"heartbeat_timeout": "1s",
"election_timeout": "1s",
"commit_timeout": "50ms",
"max_append_entries": 64,
"trailing_logs": 10240,
"snapshot_interval": "2m0s",
"snapshot_threshold": 8192,
"leader_lease_timeout": "500ms"
}
},
"api": {
"restapi": {
"listen_multiaddress": "/ip4/127.0.0.1/tcp/9094",
"read_timeout": "30s",
"read_header_timeout": "5s",
"write_timeout": "1m0s",
"idle_timeout": "2m0s"
}
},
"ipfs_connector": {
"ipfshttp": {
"proxy_listen_multiaddress": "/ip4/127.0.0.1/tcp/9095",
"node_multiaddress": "/ip4/127.0.0.1/tcp/5001",
"connect_swarms_delay": "7s",
"proxy_read_timeout": "10m0s",
"proxy_read_header_timeout": "5s",
"proxy_write_timeout": "10m0s",
"proxy_idle_timeout": "1m0s"
}
},
"monitor": {
"monbasic": {
"check_interval": "15s"
}
},
"informer": {
"disk": {
"metric_ttl": "30s",
"metric_type": "freespace"
},
"numpin": {
"metric_ttl": "10s"
}
}
}
```
This new format aims to be easily extensible per component. As such,
it already surfaces quite a few new options which were hardcoded
before.
Additionally, since Go API have changed, some redundant methods have been
removed and small refactoring has happened to take advantage of the new
way.
License: MIT
Signed-off-by: Hector Sanjuan <hector@protocol.ai>
2017-10-11 18:23:03 +00:00
|
|
|
ipfs, err := ipfshttp.NewConnector(ipfshttpCfg)
|
2019-10-31 20:51:13 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
2021-10-06 09:26:38 +00:00
|
|
|
alloc, err := balanced.New(allocBalancedCfg)
|
2021-09-13 09:23:30 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
Issue #162: Rework configuration format
The following commit reimplements ipfs-cluster configuration under
the following premises:
* Each component is initialized with a configuration object
defined by its module
* Each component decides how the JSON representation of its
configuration looks like
* Each component parses and validates its own configuration
* Each component exposes its own defaults
* Component configurations are make the sections of a
central JSON configuration file (which replaces the current
JSON format)
* Component configurations implement a common interface
(config.ComponentConfig) with a set of common operations
* The central configuration file is managed by a
config.ConfigManager which:
* Registers ComponentConfigs
* Assigns the correspondent sections from the JSON file to each
component and delegates the parsing
* Delegates the JSON generation for each section
* Can be notified when the configuration is updated and must be
saved to disk
The new service.json would then look as follows:
```json
{
"cluster": {
"id": "QmTVW8NoRxC5wBhV7WtAYtRn7itipEESfozWN5KmXUQnk2",
"private_key": "<...>",
"secret": "00224102ae6aaf94f2606abf69a0e278251ecc1d64815b617ff19d6d2841f786",
"peers": [],
"bootstrap": [],
"leave_on_shutdown": false,
"listen_multiaddress": "/ip4/0.0.0.0/tcp/9096",
"state_sync_interval": "1m0s",
"ipfs_sync_interval": "2m10s",
"replication_factor": -1,
"monitor_ping_interval": "15s"
},
"consensus": {
"raft": {
"heartbeat_timeout": "1s",
"election_timeout": "1s",
"commit_timeout": "50ms",
"max_append_entries": 64,
"trailing_logs": 10240,
"snapshot_interval": "2m0s",
"snapshot_threshold": 8192,
"leader_lease_timeout": "500ms"
}
},
"api": {
"restapi": {
"listen_multiaddress": "/ip4/127.0.0.1/tcp/9094",
"read_timeout": "30s",
"read_header_timeout": "5s",
"write_timeout": "1m0s",
"idle_timeout": "2m0s"
}
},
"ipfs_connector": {
"ipfshttp": {
"proxy_listen_multiaddress": "/ip4/127.0.0.1/tcp/9095",
"node_multiaddress": "/ip4/127.0.0.1/tcp/5001",
"connect_swarms_delay": "7s",
"proxy_read_timeout": "10m0s",
"proxy_read_header_timeout": "5s",
"proxy_write_timeout": "10m0s",
"proxy_idle_timeout": "1m0s"
}
},
"monitor": {
"monbasic": {
"check_interval": "15s"
}
},
"informer": {
"disk": {
"metric_ttl": "30s",
"metric_type": "freespace"
},
"numpin": {
"metric_ttl": "10s"
}
}
}
```
This new format aims to be easily extensible per component. As such,
it already surfaces quite a few new options which were hardcoded
before.
Additionally, since Go API have changed, some redundant methods have been
removed and small refactoring has happened to take advantage of the new
way.
License: MIT
Signed-off-by: Hector Sanjuan <hector@protocol.ai>
2017-10-11 18:23:03 +00:00
|
|
|
inf, err := disk.NewInformer(diskInfCfg)
|
2019-10-31 20:51:13 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2019-02-20 14:24:25 +00:00
|
|
|
|
2021-06-11 16:43:54 +00:00
|
|
|
store := makeStore(t, badgerCfg, levelDBCfg)
|
2019-02-20 14:24:25 +00:00
|
|
|
cons := makeConsensus(t, store, host, pubsub, dht, raftCfg, staging, crdtCfg)
|
2019-12-12 20:22:54 +00:00
|
|
|
tracker := stateless.New(statelesstrackerCfg, ident.ID, clusterCfg.Peername, cons.State)
|
2019-02-20 14:24:25 +00:00
|
|
|
|
|
|
|
var peersF func(context.Context) ([]peer.ID, error)
|
|
|
|
if consensus == "raft" {
|
|
|
|
peersF = cons.Peers
|
|
|
|
}
|
2019-04-26 06:33:01 +00:00
|
|
|
mon, err := pubsubmon.New(ctx, psmonCfg, pubsub, peersF)
|
2019-10-31 20:51:13 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2019-05-07 09:11:37 +00:00
|
|
|
tracingCfg.ServiceName = peername
|
2018-06-27 04:03:15 +00:00
|
|
|
tracer, err := observations.SetupTracing(tracingCfg)
|
2019-10-31 20:51:13 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2018-06-27 04:03:15 +00:00
|
|
|
|
2019-02-20 14:24:25 +00:00
|
|
|
return clusterCfg, store, cons, []API{api, ipfsProxy}, ipfs, tracker, mon, alloc, inf, tracer, mock
|
|
|
|
}
|
|
|
|
|
2021-06-11 16:43:54 +00:00
|
|
|
func makeStore(t *testing.T, badgerCfg *badger.Config, levelDBCfg *leveldb.Config) ds.Datastore {
|
2019-02-20 14:24:25 +00:00
|
|
|
switch consensus {
|
|
|
|
case "crdt":
|
2021-06-11 16:43:54 +00:00
|
|
|
if datastore == "badger" {
|
|
|
|
dstr, err := badger.New(badgerCfg)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
return dstr
|
|
|
|
}
|
|
|
|
dstr, err := leveldb.New(levelDBCfg)
|
2019-10-31 20:51:13 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2019-02-20 14:24:25 +00:00
|
|
|
return dstr
|
|
|
|
default:
|
|
|
|
return inmem.New()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-14 20:03:24 +00:00
|
|
|
func makeConsensus(t *testing.T, store ds.Datastore, h host.Host, psub *pubsub.PubSub, dht *dual.DHT, raftCfg *raft.Config, staging bool, crdtCfg *crdt.Config) Consensus {
|
2019-02-20 14:24:25 +00:00
|
|
|
switch consensus {
|
|
|
|
case "raft":
|
|
|
|
raftCon, err := raft.NewConsensus(h, raftCfg, store, staging)
|
2019-10-31 20:51:13 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2019-02-20 14:24:25 +00:00
|
|
|
return raftCon
|
|
|
|
case "crdt":
|
|
|
|
crdtCon, err := crdt.New(h, dht, psub, crdtCfg, store)
|
2019-10-31 20:51:13 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2019-02-20 14:24:25 +00:00
|
|
|
return crdtCon
|
|
|
|
default:
|
|
|
|
panic("bad consensus")
|
|
|
|
}
|
2017-01-30 12:12:25 +00:00
|
|
|
}
|
2016-12-21 13:30:54 +00:00
|
|
|
|
2020-04-14 20:03:24 +00:00
|
|
|
func createCluster(t *testing.T, host host.Host, dht *dual.DHT, clusterCfg *Config, store ds.Datastore, consensus Consensus, apis []API, ipfs IPFSConnector, tracker PinTracker, mon PeerMonitor, alloc PinAllocator, inf Informer, tracer Tracer) *Cluster {
|
2019-12-05 14:08:43 +00:00
|
|
|
cl, err := NewCluster(context.Background(), host, dht, clusterCfg, store, consensus, apis, ipfs, tracker, mon, alloc, []Informer{inf}, tracer)
|
2019-10-31 20:51:13 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2017-01-30 12:12:25 +00:00
|
|
|
return cl
|
|
|
|
}
|
|
|
|
|
2017-07-04 20:39:10 +00:00
|
|
|
func createOnePeerCluster(t *testing.T, nth int, clusterSecret []byte) (*Cluster, *test.IpfsMock) {
|
2019-02-20 14:24:25 +00:00
|
|
|
hosts, pubsubs, dhts := createHosts(t, clusterSecret, 1)
|
|
|
|
clusterCfg, store, consensus, api, ipfs, tracker, mon, alloc, inf, tracer, mock := createComponents(t, hosts[0], pubsubs[0], dhts[0], nth, false)
|
|
|
|
cl := createCluster(t, hosts[0], dhts[0], clusterCfg, store, consensus, api, ipfs, tracker, mon, alloc, inf, tracer)
|
Feat: emancipate Consensus from the Cluster component
This commit promotes the Consensus component (and Raft) to become a fully
independent thing like other components, passed to NewCluster during
initialization. Cluster (main component) no longer creates the consensus
layer internally. This has triggered a number of breaking changes
that I will explain below.
Motivation: Future work will require the possibility of running Cluster
with a consensus layer that is not Raft. The "consensus" layer is in charge
of maintaining two things:
* The current cluster peerset, as required by the implementation
* The current cluster pinset (shared state)
While the pinset maintenance has always been in the consensus layer, the
peerset maintenance was handled by the main component (starting by the "peers"
key in the configuration) AND the Raft component (internally)
and this generated lots of confusion: if the user edited the peers in the
configuration they would be greeted with an error.
The bootstrap process (adding a peer to an existing cluster) and configuration
key also complicated many things, since the main component did it, but only
when the consensus was initialized and in single peer mode.
In all this we also mixed the peerstore (list of peer addresses in the libp2p
host) with the peerset, when they need not to be linked.
By initializing the consensus layer before calling NewCluster, all the
difficulties in maintaining the current implementation in the same way
have come to light. Thus, the following changes have been introduced:
* Remove "peers" and "bootstrap" keys from the configuration: we no longer
edit or save the configuration files. This was a very bad practice, requiring
write permissions by the process to the file containing the private key and
additionally made things like Puppet deployments of cluster difficult as
configuration would mutate from its initial version. Needless to say all the
maintenance associated to making sure peers and bootstrap had correct values
when peers are bootstrapped or removed. A loud and detailed error message has
been added when staring cluster with an old config, along with instructions on
how to move forward.
* Introduce a PeerstoreFile ("peerstore") which stores peer addresses: in
ipfs, the peerstore is not persisted because it can be re-built from the
network bootstrappers and the DHT. Cluster should probably also allow
discoverability of peers addresses (when not bootstrapping, as in that case
we have it), but in the meantime, we will read and persist the peerstore
addresses for cluster peers in this file, different from the configuration.
Note that dns multiaddresses are now fully supported and no IPs are saved
when we have DNS multiaddresses for a peer.
* The former "peer_manager" code is now a pstoremgr module, providing utilities
to parse, add, list and generally maintain the libp2p host peerstore, including
operations on the PeerstoreFile. This "pstoremgr" can now also be extended to
perform address autodiscovery and other things indepedently from Cluster.
* Create and initialize Raft outside of the main Cluster component: since we
can now launch Raft independently from Cluster, we have more degrees of
freedom. A new "staging" option when creating the object allows a raft peer to
be launched in Staging mode, waiting to be added to a running consensus, and
thus, not electing itself as leader or doing anything like we were doing
before. This additionally allows us to track when the peer has become a
Voter, which only happens when it's caught up with the state, something that
was wonky previously.
* The raft configuration now includes an InitPeerset key, which allows to
provide a peerset for new peers and which is ignored when staging==true. The
whole Raft initialization code is way cleaner and stronger now.
* Cluster peer bootsrapping is now an ipfs-cluster-service feature. The
--bootstrap flag works as before (additionally allowing comma-separated-list
of entries). What bootstrap does, is to initialize Raft with staging == true,
and then call Join in the main cluster component. Only when the Raft peer
transitions to Voter, consensus becomes ready, and cluster becomes Ready.
This is cleaner, works better and is less complex than before (supporting
both flags and config values). We also backup and clean the state whenever
we are boostrapping, automatically
* ipfs-cluster-service no longer runs the daemon. Starting cluster needs
now "ipfs-cluster-service daemon". The daemon specific flags (bootstrap,
alloc) are now flags for the daemon subcommand. Here we mimic ipfs ("ipfs"
does not start the daemon but print help) and pave the path for merging both
service and ctl in the future.
While this brings some breaking changes, it significantly reduces the
complexity of the configuration, the code and most importantly, the
documentation. It should be easier now to explain the user what is the
right way to launch a cluster peer, and more difficult to make mistakes.
As a side effect, the PR also:
* Fixes #381 - peers with dynamic addresses
* Fixes #371 - peers should be Raft configuration option
* Fixes #378 - waitForUpdates may return before state fully synced
* Fixes #235 - config option shadowing (no cfg saves, no need to shadow)
License: MIT
Signed-off-by: Hector Sanjuan <code@hector.link>
2018-04-28 22:22:23 +00:00
|
|
|
<-cl.Ready()
|
2017-01-30 12:12:25 +00:00
|
|
|
return cl, mock
|
|
|
|
}
|
2016-12-21 13:30:54 +00:00
|
|
|
|
2020-04-14 20:03:24 +00:00
|
|
|
func createHosts(t *testing.T, clusterSecret []byte, nClusters int) ([]host.Host, []*pubsub.PubSub, []*dual.DHT) {
|
2020-04-14 17:58:00 +00:00
|
|
|
hosts := make([]host.Host, nClusters)
|
|
|
|
pubsubs := make([]*pubsub.PubSub, nClusters)
|
2020-04-14 20:03:24 +00:00
|
|
|
dhts := make([]*dual.DHT, nClusters)
|
2019-02-20 14:24:25 +00:00
|
|
|
|
2019-11-03 21:42:24 +00:00
|
|
|
tcpaddr, _ := ma.NewMultiaddr("/ip4/127.0.0.1/tcp/0")
|
2019-11-09 13:57:00 +00:00
|
|
|
quicAddr, _ := ma.NewMultiaddr("/ip4/127.0.0.1/udp/0/quic")
|
2019-02-20 14:24:25 +00:00
|
|
|
for i := range hosts {
|
|
|
|
priv, _, err := crypto.GenerateKeyPair(crypto.RSA, 2048)
|
2019-10-31 20:51:13 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2019-02-20 14:24:25 +00:00
|
|
|
|
2019-11-09 13:57:00 +00:00
|
|
|
h, p, d := createHost(t, priv, clusterSecret, []ma.Multiaddr{quicAddr, tcpaddr})
|
2018-08-15 10:30:00 +00:00
|
|
|
hosts[i] = h
|
2019-02-20 14:24:25 +00:00
|
|
|
dhts[i] = d
|
2018-08-15 10:30:00 +00:00
|
|
|
pubsubs[i] = p
|
2019-02-20 14:24:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return hosts, pubsubs, dhts
|
|
|
|
}
|
|
|
|
|
2020-04-14 20:03:24 +00:00
|
|
|
func createHost(t *testing.T, priv crypto.PrivKey, clusterSecret []byte, listen []ma.Multiaddr) (host.Host, *pubsub.PubSub, *dual.DHT) {
|
2018-08-15 10:30:00 +00:00
|
|
|
ctx := context.Background()
|
2019-11-05 11:47:06 +00:00
|
|
|
|
2020-03-13 20:40:02 +00:00
|
|
|
h, err := newHost(ctx, clusterSecret, priv, libp2p.ListenAddrs(listen...))
|
2019-10-31 20:51:13 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2018-08-15 10:30:00 +00:00
|
|
|
|
2020-04-06 22:00:51 +00:00
|
|
|
// DHT needs to be created BEFORE connecting the peers
|
2019-10-31 20:51:13 +00:00
|
|
|
d, err := newTestDHT(ctx, h)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2018-08-15 10:30:00 +00:00
|
|
|
|
|
|
|
// Pubsub needs to be created BEFORE connecting the peers,
|
|
|
|
// otherwise they are not picked up.
|
|
|
|
psub, err := newPubSub(ctx, h)
|
2019-10-31 20:51:13 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2020-09-02 10:06:47 +00:00
|
|
|
|
|
|
|
return routedhost.Wrap(h, d), psub, d
|
2018-08-15 10:30:00 +00:00
|
|
|
}
|
|
|
|
|
2020-04-14 20:03:24 +00:00
|
|
|
func newTestDHT(ctx context.Context, h host.Host) (*dual.DHT, error) {
|
2020-04-06 22:00:51 +00:00
|
|
|
return newDHT(ctx, h, nil,
|
2020-09-02 10:06:47 +00:00
|
|
|
dual.DHTOption(dht.RoutingTableRefreshPeriod(600*time.Millisecond)),
|
|
|
|
dual.DHTOption(dht.RoutingTableRefreshQueryTimeout(300*time.Millisecond)),
|
2020-03-13 20:40:02 +00:00
|
|
|
)
|
2019-10-31 20:51:13 +00:00
|
|
|
}
|
|
|
|
|
2017-02-09 15:29:17 +00:00
|
|
|
func createClusters(t *testing.T) ([]*Cluster, []*test.IpfsMock) {
|
2018-06-27 04:03:15 +00:00
|
|
|
ctx := context.Background()
|
2019-02-20 14:24:25 +00:00
|
|
|
os.RemoveAll(testsFolder)
|
2020-04-14 17:58:00 +00:00
|
|
|
cfgs := make([]*Config, nClusters)
|
|
|
|
stores := make([]ds.Datastore, nClusters)
|
|
|
|
cons := make([]Consensus, nClusters)
|
|
|
|
apis := make([][]API, nClusters)
|
|
|
|
ipfss := make([]IPFSConnector, nClusters)
|
|
|
|
trackers := make([]PinTracker, nClusters)
|
|
|
|
mons := make([]PeerMonitor, nClusters)
|
|
|
|
allocs := make([]PinAllocator, nClusters)
|
|
|
|
infs := make([]Informer, nClusters)
|
|
|
|
tracers := make([]Tracer, nClusters)
|
|
|
|
ipfsMocks := make([]*test.IpfsMock, nClusters)
|
|
|
|
|
|
|
|
clusters := make([]*Cluster, nClusters)
|
2017-01-30 12:12:25 +00:00
|
|
|
|
2018-01-25 22:34:06 +00:00
|
|
|
// Uncomment when testing with fixed ports
|
|
|
|
// clusterPeers := make([]ma.Multiaddr, nClusters, nClusters)
|
|
|
|
|
2019-02-20 14:24:25 +00:00
|
|
|
hosts, pubsubs, dhts := createHosts(t, testingClusterSecret, nClusters)
|
|
|
|
|
2016-12-21 13:30:54 +00:00
|
|
|
for i := 0; i < nClusters; i++ {
|
Feat: emancipate Consensus from the Cluster component
This commit promotes the Consensus component (and Raft) to become a fully
independent thing like other components, passed to NewCluster during
initialization. Cluster (main component) no longer creates the consensus
layer internally. This has triggered a number of breaking changes
that I will explain below.
Motivation: Future work will require the possibility of running Cluster
with a consensus layer that is not Raft. The "consensus" layer is in charge
of maintaining two things:
* The current cluster peerset, as required by the implementation
* The current cluster pinset (shared state)
While the pinset maintenance has always been in the consensus layer, the
peerset maintenance was handled by the main component (starting by the "peers"
key in the configuration) AND the Raft component (internally)
and this generated lots of confusion: if the user edited the peers in the
configuration they would be greeted with an error.
The bootstrap process (adding a peer to an existing cluster) and configuration
key also complicated many things, since the main component did it, but only
when the consensus was initialized and in single peer mode.
In all this we also mixed the peerstore (list of peer addresses in the libp2p
host) with the peerset, when they need not to be linked.
By initializing the consensus layer before calling NewCluster, all the
difficulties in maintaining the current implementation in the same way
have come to light. Thus, the following changes have been introduced:
* Remove "peers" and "bootstrap" keys from the configuration: we no longer
edit or save the configuration files. This was a very bad practice, requiring
write permissions by the process to the file containing the private key and
additionally made things like Puppet deployments of cluster difficult as
configuration would mutate from its initial version. Needless to say all the
maintenance associated to making sure peers and bootstrap had correct values
when peers are bootstrapped or removed. A loud and detailed error message has
been added when staring cluster with an old config, along with instructions on
how to move forward.
* Introduce a PeerstoreFile ("peerstore") which stores peer addresses: in
ipfs, the peerstore is not persisted because it can be re-built from the
network bootstrappers and the DHT. Cluster should probably also allow
discoverability of peers addresses (when not bootstrapping, as in that case
we have it), but in the meantime, we will read and persist the peerstore
addresses for cluster peers in this file, different from the configuration.
Note that dns multiaddresses are now fully supported and no IPs are saved
when we have DNS multiaddresses for a peer.
* The former "peer_manager" code is now a pstoremgr module, providing utilities
to parse, add, list and generally maintain the libp2p host peerstore, including
operations on the PeerstoreFile. This "pstoremgr" can now also be extended to
perform address autodiscovery and other things indepedently from Cluster.
* Create and initialize Raft outside of the main Cluster component: since we
can now launch Raft independently from Cluster, we have more degrees of
freedom. A new "staging" option when creating the object allows a raft peer to
be launched in Staging mode, waiting to be added to a running consensus, and
thus, not electing itself as leader or doing anything like we were doing
before. This additionally allows us to track when the peer has become a
Voter, which only happens when it's caught up with the state, something that
was wonky previously.
* The raft configuration now includes an InitPeerset key, which allows to
provide a peerset for new peers and which is ignored when staging==true. The
whole Raft initialization code is way cleaner and stronger now.
* Cluster peer bootsrapping is now an ipfs-cluster-service feature. The
--bootstrap flag works as before (additionally allowing comma-separated-list
of entries). What bootstrap does, is to initialize Raft with staging == true,
and then call Join in the main cluster component. Only when the Raft peer
transitions to Voter, consensus becomes ready, and cluster becomes Ready.
This is cleaner, works better and is less complex than before (supporting
both flags and config values). We also backup and clean the state whenever
we are boostrapping, automatically
* ipfs-cluster-service no longer runs the daemon. Starting cluster needs
now "ipfs-cluster-service daemon". The daemon specific flags (bootstrap,
alloc) are now flags for the daemon subcommand. Here we mimic ipfs ("ipfs"
does not start the daemon but print help) and pave the path for merging both
service and ctl in the future.
While this brings some breaking changes, it significantly reduces the
complexity of the configuration, the code and most importantly, the
documentation. It should be easier now to explain the user what is the
right way to launch a cluster peer, and more difficult to make mistakes.
As a side effect, the PR also:
* Fixes #381 - peers with dynamic addresses
* Fixes #371 - peers should be Raft configuration option
* Fixes #378 - waitForUpdates may return before state fully synced
* Fixes #235 - config option shadowing (no cfg saves, no need to shadow)
License: MIT
Signed-off-by: Hector Sanjuan <code@hector.link>
2018-04-28 22:22:23 +00:00
|
|
|
// staging = true for all except first (i==0)
|
2019-02-20 14:24:25 +00:00
|
|
|
cfgs[i], stores[i], cons[i], apis[i], ipfss[i], trackers[i], mons[i], allocs[i], infs[i], tracers[i], ipfsMocks[i] = createComponents(t, hosts[i], pubsubs[i], dhts[i], i, i != 0)
|
2018-03-29 18:18:15 +00:00
|
|
|
}
|
|
|
|
|
2018-01-16 19:57:54 +00:00
|
|
|
// Start first node
|
2019-02-20 14:24:25 +00:00
|
|
|
clusters[0] = createCluster(t, hosts[0], dhts[0], cfgs[0], stores[0], cons[0], apis[0], ipfss[0], trackers[0], mons[0], allocs[0], infs[0], tracers[0])
|
Feat: emancipate Consensus from the Cluster component
This commit promotes the Consensus component (and Raft) to become a fully
independent thing like other components, passed to NewCluster during
initialization. Cluster (main component) no longer creates the consensus
layer internally. This has triggered a number of breaking changes
that I will explain below.
Motivation: Future work will require the possibility of running Cluster
with a consensus layer that is not Raft. The "consensus" layer is in charge
of maintaining two things:
* The current cluster peerset, as required by the implementation
* The current cluster pinset (shared state)
While the pinset maintenance has always been in the consensus layer, the
peerset maintenance was handled by the main component (starting by the "peers"
key in the configuration) AND the Raft component (internally)
and this generated lots of confusion: if the user edited the peers in the
configuration they would be greeted with an error.
The bootstrap process (adding a peer to an existing cluster) and configuration
key also complicated many things, since the main component did it, but only
when the consensus was initialized and in single peer mode.
In all this we also mixed the peerstore (list of peer addresses in the libp2p
host) with the peerset, when they need not to be linked.
By initializing the consensus layer before calling NewCluster, all the
difficulties in maintaining the current implementation in the same way
have come to light. Thus, the following changes have been introduced:
* Remove "peers" and "bootstrap" keys from the configuration: we no longer
edit or save the configuration files. This was a very bad practice, requiring
write permissions by the process to the file containing the private key and
additionally made things like Puppet deployments of cluster difficult as
configuration would mutate from its initial version. Needless to say all the
maintenance associated to making sure peers and bootstrap had correct values
when peers are bootstrapped or removed. A loud and detailed error message has
been added when staring cluster with an old config, along with instructions on
how to move forward.
* Introduce a PeerstoreFile ("peerstore") which stores peer addresses: in
ipfs, the peerstore is not persisted because it can be re-built from the
network bootstrappers and the DHT. Cluster should probably also allow
discoverability of peers addresses (when not bootstrapping, as in that case
we have it), but in the meantime, we will read and persist the peerstore
addresses for cluster peers in this file, different from the configuration.
Note that dns multiaddresses are now fully supported and no IPs are saved
when we have DNS multiaddresses for a peer.
* The former "peer_manager" code is now a pstoremgr module, providing utilities
to parse, add, list and generally maintain the libp2p host peerstore, including
operations on the PeerstoreFile. This "pstoremgr" can now also be extended to
perform address autodiscovery and other things indepedently from Cluster.
* Create and initialize Raft outside of the main Cluster component: since we
can now launch Raft independently from Cluster, we have more degrees of
freedom. A new "staging" option when creating the object allows a raft peer to
be launched in Staging mode, waiting to be added to a running consensus, and
thus, not electing itself as leader or doing anything like we were doing
before. This additionally allows us to track when the peer has become a
Voter, which only happens when it's caught up with the state, something that
was wonky previously.
* The raft configuration now includes an InitPeerset key, which allows to
provide a peerset for new peers and which is ignored when staging==true. The
whole Raft initialization code is way cleaner and stronger now.
* Cluster peer bootsrapping is now an ipfs-cluster-service feature. The
--bootstrap flag works as before (additionally allowing comma-separated-list
of entries). What bootstrap does, is to initialize Raft with staging == true,
and then call Join in the main cluster component. Only when the Raft peer
transitions to Voter, consensus becomes ready, and cluster becomes Ready.
This is cleaner, works better and is less complex than before (supporting
both flags and config values). We also backup and clean the state whenever
we are boostrapping, automatically
* ipfs-cluster-service no longer runs the daemon. Starting cluster needs
now "ipfs-cluster-service daemon". The daemon specific flags (bootstrap,
alloc) are now flags for the daemon subcommand. Here we mimic ipfs ("ipfs"
does not start the daemon but print help) and pave the path for merging both
service and ctl in the future.
While this brings some breaking changes, it significantly reduces the
complexity of the configuration, the code and most importantly, the
documentation. It should be easier now to explain the user what is the
right way to launch a cluster peer, and more difficult to make mistakes.
As a side effect, the PR also:
* Fixes #381 - peers with dynamic addresses
* Fixes #371 - peers should be Raft configuration option
* Fixes #378 - waitForUpdates may return before state fully synced
* Fixes #235 - config option shadowing (no cfg saves, no need to shadow)
License: MIT
Signed-off-by: Hector Sanjuan <code@hector.link>
2018-04-28 22:22:23 +00:00
|
|
|
<-clusters[0].Ready()
|
2018-10-25 12:55:01 +00:00
|
|
|
bootstrapAddr := clusterAddr(clusters[0])
|
2019-11-07 19:37:03 +00:00
|
|
|
|
Feat: emancipate Consensus from the Cluster component
This commit promotes the Consensus component (and Raft) to become a fully
independent thing like other components, passed to NewCluster during
initialization. Cluster (main component) no longer creates the consensus
layer internally. This has triggered a number of breaking changes
that I will explain below.
Motivation: Future work will require the possibility of running Cluster
with a consensus layer that is not Raft. The "consensus" layer is in charge
of maintaining two things:
* The current cluster peerset, as required by the implementation
* The current cluster pinset (shared state)
While the pinset maintenance has always been in the consensus layer, the
peerset maintenance was handled by the main component (starting by the "peers"
key in the configuration) AND the Raft component (internally)
and this generated lots of confusion: if the user edited the peers in the
configuration they would be greeted with an error.
The bootstrap process (adding a peer to an existing cluster) and configuration
key also complicated many things, since the main component did it, but only
when the consensus was initialized and in single peer mode.
In all this we also mixed the peerstore (list of peer addresses in the libp2p
host) with the peerset, when they need not to be linked.
By initializing the consensus layer before calling NewCluster, all the
difficulties in maintaining the current implementation in the same way
have come to light. Thus, the following changes have been introduced:
* Remove "peers" and "bootstrap" keys from the configuration: we no longer
edit or save the configuration files. This was a very bad practice, requiring
write permissions by the process to the file containing the private key and
additionally made things like Puppet deployments of cluster difficult as
configuration would mutate from its initial version. Needless to say all the
maintenance associated to making sure peers and bootstrap had correct values
when peers are bootstrapped or removed. A loud and detailed error message has
been added when staring cluster with an old config, along with instructions on
how to move forward.
* Introduce a PeerstoreFile ("peerstore") which stores peer addresses: in
ipfs, the peerstore is not persisted because it can be re-built from the
network bootstrappers and the DHT. Cluster should probably also allow
discoverability of peers addresses (when not bootstrapping, as in that case
we have it), but in the meantime, we will read and persist the peerstore
addresses for cluster peers in this file, different from the configuration.
Note that dns multiaddresses are now fully supported and no IPs are saved
when we have DNS multiaddresses for a peer.
* The former "peer_manager" code is now a pstoremgr module, providing utilities
to parse, add, list and generally maintain the libp2p host peerstore, including
operations on the PeerstoreFile. This "pstoremgr" can now also be extended to
perform address autodiscovery and other things indepedently from Cluster.
* Create and initialize Raft outside of the main Cluster component: since we
can now launch Raft independently from Cluster, we have more degrees of
freedom. A new "staging" option when creating the object allows a raft peer to
be launched in Staging mode, waiting to be added to a running consensus, and
thus, not electing itself as leader or doing anything like we were doing
before. This additionally allows us to track when the peer has become a
Voter, which only happens when it's caught up with the state, something that
was wonky previously.
* The raft configuration now includes an InitPeerset key, which allows to
provide a peerset for new peers and which is ignored when staging==true. The
whole Raft initialization code is way cleaner and stronger now.
* Cluster peer bootsrapping is now an ipfs-cluster-service feature. The
--bootstrap flag works as before (additionally allowing comma-separated-list
of entries). What bootstrap does, is to initialize Raft with staging == true,
and then call Join in the main cluster component. Only when the Raft peer
transitions to Voter, consensus becomes ready, and cluster becomes Ready.
This is cleaner, works better and is less complex than before (supporting
both flags and config values). We also backup and clean the state whenever
we are boostrapping, automatically
* ipfs-cluster-service no longer runs the daemon. Starting cluster needs
now "ipfs-cluster-service daemon". The daemon specific flags (bootstrap,
alloc) are now flags for the daemon subcommand. Here we mimic ipfs ("ipfs"
does not start the daemon but print help) and pave the path for merging both
service and ctl in the future.
While this brings some breaking changes, it significantly reduces the
complexity of the configuration, the code and most importantly, the
documentation. It should be easier now to explain the user what is the
right way to launch a cluster peer, and more difficult to make mistakes.
As a side effect, the PR also:
* Fixes #381 - peers with dynamic addresses
* Fixes #371 - peers should be Raft configuration option
* Fixes #378 - waitForUpdates may return before state fully synced
* Fixes #235 - config option shadowing (no cfg saves, no need to shadow)
License: MIT
Signed-off-by: Hector Sanjuan <code@hector.link>
2018-04-28 22:22:23 +00:00
|
|
|
// Start the rest and join
|
2018-01-16 19:57:54 +00:00
|
|
|
for i := 1; i < nClusters; i++ {
|
2019-02-20 14:24:25 +00:00
|
|
|
clusters[i] = createCluster(t, hosts[i], dhts[i], cfgs[i], stores[i], cons[i], apis[i], ipfss[i], trackers[i], mons[i], allocs[i], infs[i], tracers[i])
|
2018-06-27 04:03:15 +00:00
|
|
|
err := clusters[i].Join(ctx, bootstrapAddr)
|
Feat: emancipate Consensus from the Cluster component
This commit promotes the Consensus component (and Raft) to become a fully
independent thing like other components, passed to NewCluster during
initialization. Cluster (main component) no longer creates the consensus
layer internally. This has triggered a number of breaking changes
that I will explain below.
Motivation: Future work will require the possibility of running Cluster
with a consensus layer that is not Raft. The "consensus" layer is in charge
of maintaining two things:
* The current cluster peerset, as required by the implementation
* The current cluster pinset (shared state)
While the pinset maintenance has always been in the consensus layer, the
peerset maintenance was handled by the main component (starting by the "peers"
key in the configuration) AND the Raft component (internally)
and this generated lots of confusion: if the user edited the peers in the
configuration they would be greeted with an error.
The bootstrap process (adding a peer to an existing cluster) and configuration
key also complicated many things, since the main component did it, but only
when the consensus was initialized and in single peer mode.
In all this we also mixed the peerstore (list of peer addresses in the libp2p
host) with the peerset, when they need not to be linked.
By initializing the consensus layer before calling NewCluster, all the
difficulties in maintaining the current implementation in the same way
have come to light. Thus, the following changes have been introduced:
* Remove "peers" and "bootstrap" keys from the configuration: we no longer
edit or save the configuration files. This was a very bad practice, requiring
write permissions by the process to the file containing the private key and
additionally made things like Puppet deployments of cluster difficult as
configuration would mutate from its initial version. Needless to say all the
maintenance associated to making sure peers and bootstrap had correct values
when peers are bootstrapped or removed. A loud and detailed error message has
been added when staring cluster with an old config, along with instructions on
how to move forward.
* Introduce a PeerstoreFile ("peerstore") which stores peer addresses: in
ipfs, the peerstore is not persisted because it can be re-built from the
network bootstrappers and the DHT. Cluster should probably also allow
discoverability of peers addresses (when not bootstrapping, as in that case
we have it), but in the meantime, we will read and persist the peerstore
addresses for cluster peers in this file, different from the configuration.
Note that dns multiaddresses are now fully supported and no IPs are saved
when we have DNS multiaddresses for a peer.
* The former "peer_manager" code is now a pstoremgr module, providing utilities
to parse, add, list and generally maintain the libp2p host peerstore, including
operations on the PeerstoreFile. This "pstoremgr" can now also be extended to
perform address autodiscovery and other things indepedently from Cluster.
* Create and initialize Raft outside of the main Cluster component: since we
can now launch Raft independently from Cluster, we have more degrees of
freedom. A new "staging" option when creating the object allows a raft peer to
be launched in Staging mode, waiting to be added to a running consensus, and
thus, not electing itself as leader or doing anything like we were doing
before. This additionally allows us to track when the peer has become a
Voter, which only happens when it's caught up with the state, something that
was wonky previously.
* The raft configuration now includes an InitPeerset key, which allows to
provide a peerset for new peers and which is ignored when staging==true. The
whole Raft initialization code is way cleaner and stronger now.
* Cluster peer bootsrapping is now an ipfs-cluster-service feature. The
--bootstrap flag works as before (additionally allowing comma-separated-list
of entries). What bootstrap does, is to initialize Raft with staging == true,
and then call Join in the main cluster component. Only when the Raft peer
transitions to Voter, consensus becomes ready, and cluster becomes Ready.
This is cleaner, works better and is less complex than before (supporting
both flags and config values). We also backup and clean the state whenever
we are boostrapping, automatically
* ipfs-cluster-service no longer runs the daemon. Starting cluster needs
now "ipfs-cluster-service daemon". The daemon specific flags (bootstrap,
alloc) are now flags for the daemon subcommand. Here we mimic ipfs ("ipfs"
does not start the daemon but print help) and pave the path for merging both
service and ctl in the future.
While this brings some breaking changes, it significantly reduces the
complexity of the configuration, the code and most importantly, the
documentation. It should be easier now to explain the user what is the
right way to launch a cluster peer, and more difficult to make mistakes.
As a side effect, the PR also:
* Fixes #381 - peers with dynamic addresses
* Fixes #371 - peers should be Raft configuration option
* Fixes #378 - waitForUpdates may return before state fully synced
* Fixes #235 - config option shadowing (no cfg saves, no need to shadow)
License: MIT
Signed-off-by: Hector Sanjuan <code@hector.link>
2018-04-28 22:22:23 +00:00
|
|
|
if err != nil {
|
|
|
|
logger.Error(err)
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
<-clusters[i].Ready()
|
2016-12-21 13:30:54 +00:00
|
|
|
}
|
2019-02-20 14:24:25 +00:00
|
|
|
|
|
|
|
// connect all hosts
|
|
|
|
for _, h := range hosts {
|
|
|
|
for _, h2 := range hosts {
|
|
|
|
if h.ID() != h2.ID() {
|
|
|
|
h.Peerstore().AddAddrs(h2.ID(), h2.Addrs(), peerstore.PermanentAddrTTL)
|
|
|
|
_, err := h.Network().DialPeer(ctx, h2.ID())
|
|
|
|
if err != nil {
|
|
|
|
t.Log(err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-03-29 18:18:15 +00:00
|
|
|
waitForLeader(t, clusters)
|
2018-08-15 10:30:00 +00:00
|
|
|
waitForClustersHealthy(t, clusters)
|
2017-02-02 22:52:06 +00:00
|
|
|
|
2016-12-21 13:30:54 +00:00
|
|
|
return clusters, ipfsMocks
|
|
|
|
}
|
|
|
|
|
2017-02-09 15:29:17 +00:00
|
|
|
func shutdownClusters(t *testing.T, clusters []*Cluster, m []*test.IpfsMock) {
|
2016-12-21 13:30:54 +00:00
|
|
|
for i, c := range clusters {
|
2019-12-23 07:12:38 +00:00
|
|
|
shutdownCluster(t, c, m[i])
|
2016-12-21 13:30:54 +00:00
|
|
|
}
|
2019-02-20 14:24:25 +00:00
|
|
|
os.RemoveAll(testsFolder)
|
2016-12-21 13:30:54 +00:00
|
|
|
}
|
|
|
|
|
2019-12-23 07:12:38 +00:00
|
|
|
func shutdownCluster(t *testing.T, c *Cluster, m *test.IpfsMock) {
|
|
|
|
err := c.Shutdown(context.Background())
|
|
|
|
if err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
c.dht.Close()
|
|
|
|
c.host.Close()
|
2021-07-06 10:28:03 +00:00
|
|
|
c.datastore.Close()
|
2019-12-23 07:12:38 +00:00
|
|
|
m.Close()
|
|
|
|
}
|
|
|
|
|
2022-03-22 09:56:16 +00:00
|
|
|
func collectGlobalPinInfos(t *testing.T, out <-chan api.GlobalPinInfo, timeout time.Duration) []api.GlobalPinInfo {
|
|
|
|
t.Helper()
|
|
|
|
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
|
|
|
defer cancel()
|
|
|
|
|
|
|
|
var gpis []api.GlobalPinInfo
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
t.Error(ctx.Err())
|
|
|
|
return gpis
|
|
|
|
case gpi, ok := <-out:
|
|
|
|
if !ok {
|
|
|
|
return gpis
|
|
|
|
}
|
|
|
|
gpis = append(gpis, gpi)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func collectPinInfos(t *testing.T, out <-chan api.PinInfo) []api.PinInfo {
|
|
|
|
t.Helper()
|
|
|
|
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
|
|
defer cancel()
|
|
|
|
|
|
|
|
var pis []api.PinInfo
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
t.Error(ctx.Err())
|
|
|
|
return pis
|
|
|
|
case pi, ok := <-out:
|
|
|
|
if !ok {
|
|
|
|
return pis
|
|
|
|
}
|
|
|
|
pis = append(pis, pi)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-21 13:30:54 +00:00
|
|
|
func runF(t *testing.T, clusters []*Cluster, f func(*testing.T, *Cluster)) {
|
2020-04-23 16:28:16 +00:00
|
|
|
t.Helper()
|
2016-12-21 13:30:54 +00:00
|
|
|
var wg sync.WaitGroup
|
|
|
|
for _, c := range clusters {
|
|
|
|
wg.Add(1)
|
2016-12-21 19:46:00 +00:00
|
|
|
go func(c *Cluster) {
|
2016-12-21 13:30:54 +00:00
|
|
|
defer wg.Done()
|
|
|
|
f(t, c)
|
2016-12-21 19:46:00 +00:00
|
|
|
}(c)
|
2016-12-21 13:30:54 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
wg.Wait()
|
|
|
|
}
|
|
|
|
|
2018-03-29 20:31:11 +00:00
|
|
|
//////////////////////////////////////
|
|
|
|
// Delay and wait functions
|
|
|
|
//
|
|
|
|
// Delays are used in tests to wait for certain events to happen:
|
|
|
|
// * ttlDelay() waits for metrics to arrive. If you pin something
|
|
|
|
// and your next operation depends on updated metrics, you need to wait
|
|
|
|
// * pinDelay() accounts for the time necessary to pin something and for the new
|
|
|
|
// log entry to be visible in all cluster peers
|
|
|
|
// * delay just sleeps a second or two.
|
|
|
|
// * waitForLeader functions make sure there is a raft leader, for example,
|
|
|
|
// after killing the leader.
|
|
|
|
//
|
|
|
|
// The values for delays are a result of testing and adjusting so tests pass
|
|
|
|
// in travis, jenkins etc., taking into account the values used in the
|
|
|
|
// testing configuration (config_test.go).
|
2016-12-21 13:30:54 +00:00
|
|
|
func delay() {
|
2017-02-02 22:52:06 +00:00
|
|
|
var d int
|
|
|
|
if nClusters > 10 {
|
2018-05-08 12:39:45 +00:00
|
|
|
d = 3000
|
2017-02-02 22:52:06 +00:00
|
|
|
} else {
|
2018-05-08 12:39:45 +00:00
|
|
|
d = 2000
|
2017-02-02 22:52:06 +00:00
|
|
|
}
|
2018-03-16 16:37:39 +00:00
|
|
|
time.Sleep(time.Duration(d) * time.Millisecond)
|
|
|
|
}
|
|
|
|
|
|
|
|
func pinDelay() {
|
2018-08-15 10:30:00 +00:00
|
|
|
time.Sleep(800 * time.Millisecond)
|
2018-03-16 16:37:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func ttlDelay() {
|
2019-11-07 19:37:03 +00:00
|
|
|
time.Sleep(ttlDelayTime)
|
2016-12-21 13:30:54 +00:00
|
|
|
}
|
|
|
|
|
2018-03-29 20:31:11 +00:00
|
|
|
// Like waitForLeader but letting metrics expire before waiting, and
|
|
|
|
// waiting for new metrics to arrive afterwards.
|
|
|
|
func waitForLeaderAndMetrics(t *testing.T, clusters []*Cluster) {
|
2018-03-16 16:37:39 +00:00
|
|
|
ttlDelay()
|
2018-03-29 20:31:11 +00:00
|
|
|
waitForLeader(t, clusters)
|
2018-03-29 18:18:15 +00:00
|
|
|
ttlDelay()
|
|
|
|
}
|
2017-03-09 13:44:14 +00:00
|
|
|
|
2018-03-29 20:31:11 +00:00
|
|
|
// Makes sure there is a leader and everyone knows about it.
|
|
|
|
func waitForLeader(t *testing.T, clusters []*Cluster) {
|
2019-02-20 14:24:25 +00:00
|
|
|
if consensus == "crdt" {
|
|
|
|
return // yai
|
|
|
|
}
|
2018-06-27 04:03:15 +00:00
|
|
|
ctx := context.Background()
|
2018-03-16 16:37:39 +00:00
|
|
|
timer := time.NewTimer(time.Minute)
|
2018-03-29 18:18:15 +00:00
|
|
|
ticker := time.NewTicker(100 * time.Millisecond)
|
2017-03-09 13:44:14 +00:00
|
|
|
|
|
|
|
loop:
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-timer.C:
|
|
|
|
t.Fatal("timed out waiting for a leader")
|
|
|
|
case <-ticker.C:
|
2018-03-16 16:37:39 +00:00
|
|
|
for _, cl := range clusters {
|
|
|
|
if cl.shutdownB {
|
|
|
|
continue // skip shutdown clusters
|
|
|
|
}
|
2018-06-27 04:03:15 +00:00
|
|
|
_, err := cl.consensus.Leader(ctx)
|
2018-03-16 16:37:39 +00:00
|
|
|
if err != nil {
|
|
|
|
continue loop
|
|
|
|
}
|
2017-03-09 13:44:14 +00:00
|
|
|
}
|
2018-03-16 16:37:39 +00:00
|
|
|
break loop
|
2017-03-09 13:44:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-16 12:11:56 +00:00
|
|
|
func waitForClustersHealthy(t *testing.T, clusters []*Cluster) {
|
|
|
|
t.Helper()
|
|
|
|
if len(clusters) == 0 {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
timer := time.NewTimer(15 * time.Second)
|
|
|
|
for {
|
|
|
|
ttlDelay()
|
2019-12-05 14:08:43 +00:00
|
|
|
metrics := clusters[0].monitor.LatestMetrics(context.Background(), clusters[0].informers[0].Name())
|
2019-05-16 12:11:56 +00:00
|
|
|
healthy := 0
|
|
|
|
for _, m := range metrics {
|
|
|
|
if !m.Expired() {
|
|
|
|
healthy++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if len(clusters) == healthy {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
select {
|
|
|
|
case <-timer.C:
|
|
|
|
t.Fatal("timed out waiting for clusters to be healthy")
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-03-29 20:31:11 +00:00
|
|
|
/////////////////////////////////////////
|
|
|
|
|
2016-12-21 13:30:54 +00:00
|
|
|
func TestClustersVersion(t *testing.T) {
|
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
|
|
|
f := func(t *testing.T, c *Cluster) {
|
|
|
|
v := c.Version()
|
2018-12-18 14:44:11 +00:00
|
|
|
if v != version.Version.String() {
|
2016-12-21 13:30:54 +00:00
|
|
|
t.Error("Bad version")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
runF(t, clusters, f)
|
|
|
|
}
|
|
|
|
|
2017-01-26 18:59:31 +00:00
|
|
|
func TestClustersPeers(t *testing.T) {
|
2018-06-27 04:03:15 +00:00
|
|
|
ctx := context.Background()
|
2017-01-26 18:59:31 +00:00
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
|
|
|
|
2019-02-20 14:24:25 +00:00
|
|
|
delay()
|
|
|
|
|
2017-01-26 18:59:31 +00:00
|
|
|
j := rand.Intn(nClusters) // choose a random cluster peer
|
2018-01-16 19:57:54 +00:00
|
|
|
|
2022-03-23 00:27:57 +00:00
|
|
|
out := make(chan api.ID, len(clusters))
|
|
|
|
clusters[j].Peers(ctx, out)
|
|
|
|
|
|
|
|
if len(out) != nClusters {
|
2017-01-26 18:59:31 +00:00
|
|
|
t.Fatal("expected as many peers as clusters")
|
|
|
|
}
|
|
|
|
|
2022-03-19 01:52:46 +00:00
|
|
|
clusterIDMap := make(map[peer.ID]api.ID)
|
|
|
|
peerIDMap := make(map[peer.ID]api.ID)
|
2017-01-26 18:59:31 +00:00
|
|
|
|
|
|
|
for _, c := range clusters {
|
2018-06-27 04:03:15 +00:00
|
|
|
id := c.ID(ctx)
|
2017-01-26 18:59:31 +00:00
|
|
|
clusterIDMap[id.ID] = id
|
|
|
|
}
|
|
|
|
|
2022-03-23 00:27:57 +00:00
|
|
|
for p := range out {
|
2019-02-20 14:24:25 +00:00
|
|
|
if p.Error != "" {
|
|
|
|
t.Error(p.ID, p.Error)
|
|
|
|
continue
|
|
|
|
}
|
2017-01-26 18:59:31 +00:00
|
|
|
peerIDMap[p.ID] = p
|
|
|
|
}
|
|
|
|
|
|
|
|
for k, id := range clusterIDMap {
|
|
|
|
id2, ok := peerIDMap[k]
|
|
|
|
if !ok {
|
|
|
|
t.Fatal("expected id in both maps")
|
|
|
|
}
|
2017-02-08 17:04:08 +00:00
|
|
|
//if !crypto.KeyEqual(id.PublicKey, id2.PublicKey) {
|
|
|
|
// t.Error("expected same public key")
|
|
|
|
//}
|
2017-01-26 18:59:31 +00:00
|
|
|
if id.IPFS.ID != id2.IPFS.ID {
|
|
|
|
t.Error("expected same ipfs daemon ID")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-21 13:30:54 +00:00
|
|
|
func TestClustersPin(t *testing.T) {
|
2018-06-27 04:03:15 +00:00
|
|
|
ctx := context.Background()
|
2016-12-21 13:30:54 +00:00
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
2019-02-27 20:19:10 +00:00
|
|
|
prefix := test.Cid1.Prefix()
|
2018-03-16 16:37:39 +00:00
|
|
|
|
|
|
|
ttlDelay()
|
|
|
|
|
2016-12-21 13:30:54 +00:00
|
|
|
for i := 0; i < nPins; i++ {
|
2017-01-26 18:59:31 +00:00
|
|
|
j := rand.Intn(nClusters) // choose a random cluster peer
|
2016-12-21 13:30:54 +00:00
|
|
|
h, err := prefix.Sum(randomBytes()) // create random cid
|
2019-10-31 20:51:13 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
Improve pin/unpin method signatures (#843)
* Improve pin/unpin method signatures:
These changes the following Cluster Go API methods:
* -> Cluster.Pin(ctx, cid, options) (pin, error)
* -> Cluster.Unpin(ctx, cid) (pin, error)
* -> Cluster.PinPath(ctx, path, opts) (pin,error)
Pin and Unpin now return the pinned object.
The signature of the methods now matches that of the API Client, is clearer as
to what options the user can set and is aligned with PinPath, UnpinPath, which
returned pin methods.
The REST API now returns the Pinned/Unpinned object rather than 204-Accepted.
This was necessary for a cleaner pin/update approach, which I'm working on in
another branch.
Most of the changes here are updating tests to the new signatures
* Adapt load-balancing client to new Pin/Unpin signatures
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
2019-07-22 13:39:11 +00:00
|
|
|
_, err = clusters[j].Pin(ctx, h, api.PinOptions{})
|
2016-12-21 13:30:54 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Errorf("error pinning %s: %s", h, err)
|
|
|
|
}
|
2019-02-20 14:24:25 +00:00
|
|
|
// // Test re-pin
|
|
|
|
// err = clusters[j].Pin(ctx, api.PinCid(h))
|
|
|
|
// if err != nil {
|
|
|
|
// t.Errorf("error repinning %s: %s", h, err)
|
|
|
|
// }
|
|
|
|
}
|
|
|
|
switch consensus {
|
|
|
|
case "crdt":
|
2022-03-22 09:56:16 +00:00
|
|
|
time.Sleep(10 * time.Second)
|
2019-02-20 14:24:25 +00:00
|
|
|
default:
|
|
|
|
delay()
|
2016-12-21 13:30:54 +00:00
|
|
|
}
|
|
|
|
fpinned := func(t *testing.T, c *Cluster) {
|
2022-03-22 09:56:16 +00:00
|
|
|
out := make(chan api.PinInfo, 10)
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
err := c.tracker.StatusAll(ctx, api.TrackerStatusUndefined, out)
|
|
|
|
if err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
status := collectPinInfos(t, out)
|
|
|
|
|
2016-12-21 13:30:54 +00:00
|
|
|
for _, v := range status {
|
2017-02-08 17:04:08 +00:00
|
|
|
if v.Status != api.TrackerStatusPinned {
|
2018-05-11 17:38:40 +00:00
|
|
|
t.Errorf("%s should have been pinned but it is %s", v.Cid, v.Status)
|
2016-12-21 13:30:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if l := len(status); l != nPins {
|
|
|
|
t.Errorf("Pinned %d out of %d requests", l, nPins)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
runF(t, clusters, fpinned)
|
|
|
|
|
|
|
|
// Unpin everything
|
2022-03-22 09:56:16 +00:00
|
|
|
pinList, err := clusters[0].pinsSlice(ctx)
|
2019-02-20 14:24:25 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(pinList) != nPins {
|
|
|
|
t.Fatalf("pin list has %d but pinned %d", len(pinList), nPins)
|
|
|
|
}
|
2016-12-21 13:30:54 +00:00
|
|
|
|
2018-03-16 16:37:39 +00:00
|
|
|
for i := 0; i < len(pinList); i++ {
|
2018-03-18 19:29:02 +00:00
|
|
|
// test re-unpin fails
|
2017-01-26 18:59:31 +00:00
|
|
|
j := rand.Intn(nClusters) // choose a random cluster peer
|
Improve pin/unpin method signatures (#843)
* Improve pin/unpin method signatures:
These changes the following Cluster Go API methods:
* -> Cluster.Pin(ctx, cid, options) (pin, error)
* -> Cluster.Unpin(ctx, cid) (pin, error)
* -> Cluster.PinPath(ctx, path, opts) (pin,error)
Pin and Unpin now return the pinned object.
The signature of the methods now matches that of the API Client, is clearer as
to what options the user can set and is aligned with PinPath, UnpinPath, which
returned pin methods.
The REST API now returns the Pinned/Unpinned object rather than 204-Accepted.
This was necessary for a cleaner pin/update approach, which I'm working on in
another branch.
Most of the changes here are updating tests to the new signatures
* Adapt load-balancing client to new Pin/Unpin signatures
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
2019-07-22 13:39:11 +00:00
|
|
|
_, err := clusters[j].Unpin(ctx, pinList[i].Cid)
|
2016-12-21 13:30:54 +00:00
|
|
|
if err != nil {
|
2017-02-15 14:16:34 +00:00
|
|
|
t.Errorf("error unpinning %s: %s", pinList[i].Cid, err)
|
2016-12-21 13:30:54 +00:00
|
|
|
}
|
2018-03-18 19:29:02 +00:00
|
|
|
}
|
2019-10-31 20:51:13 +00:00
|
|
|
|
|
|
|
switch consensus {
|
|
|
|
case "crdt":
|
2022-03-22 09:56:16 +00:00
|
|
|
time.Sleep(10 * time.Second)
|
2019-10-31 20:51:13 +00:00
|
|
|
default:
|
|
|
|
delay()
|
|
|
|
}
|
|
|
|
|
2019-02-20 14:24:25 +00:00
|
|
|
for i := 0; i < len(pinList); i++ {
|
2018-03-18 19:29:02 +00:00
|
|
|
j := rand.Intn(nClusters) // choose a random cluster peer
|
Improve pin/unpin method signatures (#843)
* Improve pin/unpin method signatures:
These changes the following Cluster Go API methods:
* -> Cluster.Pin(ctx, cid, options) (pin, error)
* -> Cluster.Unpin(ctx, cid) (pin, error)
* -> Cluster.PinPath(ctx, path, opts) (pin,error)
Pin and Unpin now return the pinned object.
The signature of the methods now matches that of the API Client, is clearer as
to what options the user can set and is aligned with PinPath, UnpinPath, which
returned pin methods.
The REST API now returns the Pinned/Unpinned object rather than 204-Accepted.
This was necessary for a cleaner pin/update approach, which I'm working on in
another branch.
Most of the changes here are updating tests to the new signatures
* Adapt load-balancing client to new Pin/Unpin signatures
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
2019-07-22 13:39:11 +00:00
|
|
|
_, err := clusters[j].Unpin(ctx, pinList[i].Cid)
|
2018-03-18 19:29:02 +00:00
|
|
|
if err == nil {
|
2019-02-20 14:24:25 +00:00
|
|
|
t.Errorf("expected error re-unpinning %s", pinList[i].Cid)
|
2016-12-21 13:30:54 +00:00
|
|
|
}
|
|
|
|
}
|
2018-03-18 19:29:02 +00:00
|
|
|
|
2016-12-21 13:30:54 +00:00
|
|
|
delay()
|
2019-10-31 20:51:13 +00:00
|
|
|
|
2016-12-21 13:30:54 +00:00
|
|
|
funpinned := func(t *testing.T, c *Cluster) {
|
2022-03-22 09:56:16 +00:00
|
|
|
out := make(chan api.PinInfo)
|
|
|
|
go func() {
|
|
|
|
err := c.tracker.StatusAll(ctx, api.TrackerStatusUndefined, out)
|
|
|
|
if err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
status := collectPinInfos(t, out)
|
2018-05-11 17:38:40 +00:00
|
|
|
for _, v := range status {
|
|
|
|
t.Errorf("%s should have been unpinned but it is %s", v.Cid, v.Status)
|
2016-12-21 13:30:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
runF(t, clusters, funpinned)
|
|
|
|
}
|
|
|
|
|
2019-07-12 14:40:29 +00:00
|
|
|
func TestClustersPinUpdate(t *testing.T) {
|
|
|
|
ctx := context.Background()
|
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
|
|
|
prefix := test.Cid1.Prefix()
|
|
|
|
|
|
|
|
ttlDelay()
|
|
|
|
|
2020-04-14 17:58:00 +00:00
|
|
|
h, _ := prefix.Sum(randomBytes()) // create random cid
|
|
|
|
h2, _ := prefix.Sum(randomBytes()) // create random cid
|
2019-07-12 14:40:29 +00:00
|
|
|
|
2020-04-14 17:58:00 +00:00
|
|
|
_, err := clusters[0].PinUpdate(ctx, h, h2, api.PinOptions{})
|
2019-07-12 14:40:29 +00:00
|
|
|
if err == nil || err != state.ErrNotFound {
|
|
|
|
t.Fatal("pin update should fail when from is not pinned")
|
|
|
|
}
|
|
|
|
|
|
|
|
_, err = clusters[0].Pin(ctx, h, api.PinOptions{})
|
|
|
|
if err != nil {
|
|
|
|
t.Errorf("error pinning %s: %s", h, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
pinDelay()
|
2020-04-11 06:41:48 +00:00
|
|
|
expiry := time.Now().AddDate(1, 0, 0)
|
2019-07-12 14:40:29 +00:00
|
|
|
opts2 := api.PinOptions{
|
|
|
|
UserAllocations: []peer.ID{clusters[0].host.ID()}, // should not be used
|
|
|
|
PinUpdate: h,
|
|
|
|
Name: "new name",
|
2020-04-11 06:41:48 +00:00
|
|
|
ExpireAt: expiry,
|
2019-07-12 14:40:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
_, err = clusters[0].Pin(ctx, h2, opts2) // should call PinUpdate
|
|
|
|
if err != nil {
|
|
|
|
t.Errorf("error pin-updating %s: %s", h2, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
pinDelay()
|
|
|
|
|
|
|
|
f := func(t *testing.T, c *Cluster) {
|
|
|
|
pinget, err := c.PinGet(ctx, h2)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(pinget.Allocations) != 0 {
|
|
|
|
t.Error("new pin should be allocated everywhere like pin1")
|
|
|
|
}
|
|
|
|
|
|
|
|
if pinget.MaxDepth != -1 {
|
|
|
|
t.Error("updated pin should be recursive like pin1")
|
|
|
|
}
|
2020-04-16 09:02:33 +00:00
|
|
|
// We compare Unix seconds because our protobuf serde will have
|
|
|
|
// lost any sub-second precision.
|
|
|
|
if pinget.ExpireAt.Unix() != expiry.Unix() {
|
|
|
|
t.Errorf("Expiry didn't match. Expected: %s. Got: %s", expiry, pinget.ExpireAt)
|
2020-04-11 06:41:48 +00:00
|
|
|
}
|
2019-07-12 14:40:29 +00:00
|
|
|
|
|
|
|
if pinget.Name != "new name" {
|
|
|
|
t.Error("name should be kept")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
runF(t, clusters, f)
|
2020-04-23 16:05:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestClustersPinDirect(t *testing.T) {
|
|
|
|
ctx := context.Background()
|
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
|
|
|
prefix := test.Cid1.Prefix()
|
2019-07-12 14:40:29 +00:00
|
|
|
|
2020-04-23 16:05:05 +00:00
|
|
|
ttlDelay()
|
|
|
|
|
|
|
|
h, _ := prefix.Sum(randomBytes()) // create random cid
|
|
|
|
|
|
|
|
_, err := clusters[0].Pin(ctx, h, api.PinOptions{Mode: api.PinModeDirect})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
pinDelay()
|
|
|
|
|
2020-04-23 16:28:16 +00:00
|
|
|
f := func(t *testing.T, c *Cluster, mode api.PinMode) {
|
2020-04-23 16:05:05 +00:00
|
|
|
pinget, err := c.PinGet(ctx, h)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
2020-04-23 16:28:16 +00:00
|
|
|
if pinget.Mode != mode {
|
2020-04-23 16:05:05 +00:00
|
|
|
t.Error("pin should be pinned in direct mode")
|
|
|
|
}
|
|
|
|
|
2020-04-23 16:28:16 +00:00
|
|
|
if pinget.MaxDepth != mode.ToPinDepth() {
|
|
|
|
t.Errorf("pin should have max-depth %d but has %d", mode.ToPinDepth(), pinget.MaxDepth)
|
2020-04-23 16:05:05 +00:00
|
|
|
}
|
2019-07-12 14:40:29 +00:00
|
|
|
|
2020-04-23 16:05:05 +00:00
|
|
|
pInfo := c.StatusLocal(ctx, h)
|
|
|
|
if pInfo.Error != "" {
|
|
|
|
t.Error(pInfo.Error)
|
|
|
|
}
|
|
|
|
if pInfo.Status != api.TrackerStatusPinned {
|
|
|
|
t.Error(pInfo.Error)
|
|
|
|
t.Error("the status should show the hash as pinned")
|
|
|
|
}
|
|
|
|
}
|
2020-04-23 16:28:16 +00:00
|
|
|
|
|
|
|
runF(t, clusters, func(t *testing.T, c *Cluster) {
|
|
|
|
f(t, c, api.PinModeDirect)
|
|
|
|
})
|
|
|
|
|
|
|
|
// Convert into a recursive mode
|
|
|
|
_, err = clusters[0].Pin(ctx, h, api.PinOptions{Mode: api.PinModeRecursive})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
pinDelay()
|
|
|
|
|
|
|
|
runF(t, clusters, func(t *testing.T, c *Cluster) {
|
|
|
|
f(t, c, api.PinModeRecursive)
|
|
|
|
})
|
|
|
|
|
|
|
|
// This should fail as we cannot convert back to direct
|
|
|
|
_, err = clusters[0].Pin(ctx, h, api.PinOptions{Mode: api.PinModeDirect})
|
|
|
|
if err == nil {
|
|
|
|
t.Error("a recursive pin cannot be converted back to direct pin")
|
|
|
|
}
|
2019-07-12 14:40:29 +00:00
|
|
|
}
|
|
|
|
|
2017-01-25 18:38:23 +00:00
|
|
|
func TestClustersStatusAll(t *testing.T) {
|
2018-06-27 04:03:15 +00:00
|
|
|
ctx := context.Background()
|
2016-12-21 13:30:54 +00:00
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
2019-02-27 20:19:10 +00:00
|
|
|
h := test.Cid1
|
2020-05-15 22:32:28 +00:00
|
|
|
clusters[0].Pin(ctx, h, api.PinOptions{Name: "test"})
|
2018-03-16 16:37:39 +00:00
|
|
|
pinDelay()
|
2016-12-21 13:30:54 +00:00
|
|
|
// Global status
|
|
|
|
f := func(t *testing.T, c *Cluster) {
|
2022-03-22 09:56:16 +00:00
|
|
|
out := make(chan api.GlobalPinInfo, 10)
|
|
|
|
go func() {
|
|
|
|
err := c.StatusAll(ctx, api.TrackerStatusUndefined, out)
|
|
|
|
if err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
statuses := collectGlobalPinInfos(t, out, 5*time.Second)
|
2017-06-21 12:16:28 +00:00
|
|
|
if len(statuses) != 1 {
|
2016-12-21 13:30:54 +00:00
|
|
|
t.Fatal("bad status. Expected one item")
|
|
|
|
}
|
2019-02-27 20:09:31 +00:00
|
|
|
if !statuses[0].Cid.Equals(h) {
|
2016-12-21 13:30:54 +00:00
|
|
|
t.Error("bad cid in status")
|
|
|
|
}
|
2020-05-15 22:32:28 +00:00
|
|
|
|
|
|
|
if statuses[0].Name != "test" {
|
|
|
|
t.Error("globalPinInfo should have the name")
|
|
|
|
}
|
|
|
|
|
2017-01-25 17:07:19 +00:00
|
|
|
info := statuses[0].PeerMap
|
2016-12-21 13:30:54 +00:00
|
|
|
if len(info) != nClusters {
|
|
|
|
t.Error("bad info in status")
|
|
|
|
}
|
|
|
|
|
2022-01-28 17:21:11 +00:00
|
|
|
for _, pi := range info {
|
|
|
|
if pi.IPFS != test.PeerID1 {
|
|
|
|
t.Error("ipfs not set in pin status")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-14 17:58:00 +00:00
|
|
|
pid := peer.Encode(c.host.ID())
|
2019-02-27 17:04:35 +00:00
|
|
|
if info[pid].Status != api.TrackerStatusPinned {
|
2016-12-21 13:30:54 +00:00
|
|
|
t.Error("the hash should have been pinned")
|
|
|
|
}
|
|
|
|
|
2018-06-27 04:03:15 +00:00
|
|
|
status, err := c.Status(ctx, h)
|
2016-12-21 13:30:54 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
|
2019-02-27 17:04:35 +00:00
|
|
|
pinfo, ok := status.PeerMap[pid]
|
2016-12-21 13:30:54 +00:00
|
|
|
if !ok {
|
|
|
|
t.Fatal("Host not in status")
|
|
|
|
}
|
|
|
|
|
2017-02-08 17:04:08 +00:00
|
|
|
if pinfo.Status != api.TrackerStatusPinned {
|
2019-02-27 17:04:35 +00:00
|
|
|
t.Error(pinfo.Error)
|
2016-12-21 13:30:54 +00:00
|
|
|
t.Error("the status should show the hash as pinned")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
runF(t, clusters, f)
|
|
|
|
}
|
|
|
|
|
2017-06-21 12:16:28 +00:00
|
|
|
func TestClustersStatusAllWithErrors(t *testing.T) {
|
2018-06-27 04:03:15 +00:00
|
|
|
ctx := context.Background()
|
2017-06-21 12:16:28 +00:00
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
2019-02-27 20:19:10 +00:00
|
|
|
h := test.Cid1
|
2020-05-15 22:32:28 +00:00
|
|
|
clusters[0].Pin(ctx, h, api.PinOptions{Name: "test"})
|
2018-03-16 16:37:39 +00:00
|
|
|
pinDelay()
|
2017-06-21 12:16:28 +00:00
|
|
|
|
|
|
|
// shutdown 1 cluster peer
|
2018-06-27 04:03:15 +00:00
|
|
|
clusters[1].Shutdown(ctx)
|
2019-05-24 22:53:43 +00:00
|
|
|
clusters[1].host.Close()
|
2018-03-16 16:37:39 +00:00
|
|
|
delay()
|
2017-06-21 12:16:28 +00:00
|
|
|
|
|
|
|
f := func(t *testing.T, c *Cluster) {
|
|
|
|
// skip if it's the shutdown peer
|
2018-06-27 04:03:15 +00:00
|
|
|
if c.ID(ctx).ID == clusters[1].ID(ctx).ID {
|
2017-06-21 12:16:28 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2022-03-22 09:56:16 +00:00
|
|
|
out := make(chan api.GlobalPinInfo, 10)
|
|
|
|
go func() {
|
|
|
|
err := c.StatusAll(ctx, api.TrackerStatusUndefined, out)
|
|
|
|
if err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
statuses := collectGlobalPinInfos(t, out, 5*time.Second)
|
|
|
|
|
2017-06-21 12:16:28 +00:00
|
|
|
if len(statuses) != 1 {
|
|
|
|
t.Fatal("bad status. Expected one item")
|
|
|
|
}
|
|
|
|
|
2020-05-15 22:32:28 +00:00
|
|
|
if !statuses[0].Cid.Equals(h) {
|
|
|
|
t.Error("wrong Cid in globalPinInfo")
|
|
|
|
}
|
|
|
|
|
|
|
|
if statuses[0].Name != "test" {
|
|
|
|
t.Error("wrong Name in globalPinInfo")
|
|
|
|
}
|
|
|
|
|
2019-02-20 14:24:25 +00:00
|
|
|
// Raft and CRDT behave differently here
|
|
|
|
switch consensus {
|
|
|
|
case "raft":
|
|
|
|
// Raft will have all statuses with one of them
|
|
|
|
// being in ERROR because the peer is off
|
2017-06-21 12:16:28 +00:00
|
|
|
|
2019-02-20 14:24:25 +00:00
|
|
|
stts := statuses[0]
|
|
|
|
if len(stts.PeerMap) != nClusters {
|
|
|
|
t.Error("bad number of peers in status")
|
|
|
|
}
|
2017-06-21 12:16:28 +00:00
|
|
|
|
2020-04-14 17:58:00 +00:00
|
|
|
pid := peer.Encode(clusters[1].id)
|
2019-02-20 14:24:25 +00:00
|
|
|
errst := stts.PeerMap[pid]
|
2017-06-21 12:16:28 +00:00
|
|
|
|
2019-02-20 14:24:25 +00:00
|
|
|
if errst.Status != api.TrackerStatusClusterError {
|
2019-05-24 22:53:43 +00:00
|
|
|
t.Error("erroring status should be set to ClusterError:", errst.Status)
|
2019-02-20 14:24:25 +00:00
|
|
|
}
|
2022-01-28 17:21:11 +00:00
|
|
|
if errst.PeerName != "peer_1" {
|
|
|
|
t.Error("peername should have been set in the erroring peer too from the cache")
|
|
|
|
}
|
|
|
|
|
|
|
|
if errst.IPFS != test.PeerID1 {
|
|
|
|
t.Error("IPFS ID should have been set in the erroring peer too from the cache")
|
|
|
|
}
|
2017-06-21 12:16:28 +00:00
|
|
|
|
2019-02-20 14:24:25 +00:00
|
|
|
// now check with Cid status
|
|
|
|
status, err := c.Status(ctx, h)
|
|
|
|
if err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
2017-06-21 12:16:28 +00:00
|
|
|
|
2019-02-20 14:24:25 +00:00
|
|
|
pinfo := status.PeerMap[pid]
|
|
|
|
|
|
|
|
if pinfo.Status != api.TrackerStatusClusterError {
|
2019-05-24 22:53:43 +00:00
|
|
|
t.Error("erroring status should be ClusterError:", pinfo.Status)
|
2019-02-20 14:24:25 +00:00
|
|
|
}
|
2022-01-28 17:21:11 +00:00
|
|
|
|
2022-01-28 18:31:13 +00:00
|
|
|
if pinfo.PeerName != "peer_1" {
|
|
|
|
t.Error("peername should have been set in the erroring peer too from the cache")
|
|
|
|
}
|
2022-01-28 17:21:11 +00:00
|
|
|
|
2022-01-28 18:31:13 +00:00
|
|
|
if pinfo.IPFS != test.PeerID1 {
|
|
|
|
t.Error("IPFS ID should have been set in the erroring peer too from the cache")
|
|
|
|
}
|
2019-02-20 14:24:25 +00:00
|
|
|
case "crdt":
|
|
|
|
// CRDT will not have contacted the offline peer because
|
|
|
|
// its metric expired and therefore is not in the
|
|
|
|
// peerset.
|
|
|
|
if len(statuses[0].PeerMap) != nClusters-1 {
|
|
|
|
t.Error("expected a different number of statuses")
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
t.Fatal("bad consensus")
|
2017-06-21 12:16:28 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
runF(t, clusters, f)
|
|
|
|
}
|
|
|
|
|
2017-01-25 18:38:23 +00:00
|
|
|
func TestClustersRecoverLocal(t *testing.T) {
|
2018-06-27 04:03:15 +00:00
|
|
|
ctx := context.Background()
|
2017-01-25 18:38:23 +00:00
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
2019-02-27 20:09:31 +00:00
|
|
|
h := test.ErrorCid // This cid always fails
|
2019-02-27 20:19:10 +00:00
|
|
|
h2 := test.Cid2
|
2018-03-16 16:37:39 +00:00
|
|
|
|
|
|
|
ttlDelay()
|
|
|
|
|
Improve pin/unpin method signatures (#843)
* Improve pin/unpin method signatures:
These changes the following Cluster Go API methods:
* -> Cluster.Pin(ctx, cid, options) (pin, error)
* -> Cluster.Unpin(ctx, cid) (pin, error)
* -> Cluster.PinPath(ctx, path, opts) (pin,error)
Pin and Unpin now return the pinned object.
The signature of the methods now matches that of the API Client, is clearer as
to what options the user can set and is aligned with PinPath, UnpinPath, which
returned pin methods.
The REST API now returns the Pinned/Unpinned object rather than 204-Accepted.
This was necessary for a cleaner pin/update approach, which I'm working on in
another branch.
Most of the changes here are updating tests to the new signatures
* Adapt load-balancing client to new Pin/Unpin signatures
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
2019-07-22 13:39:11 +00:00
|
|
|
clusters[0].Pin(ctx, h, api.PinOptions{})
|
|
|
|
clusters[0].Pin(ctx, h2, api.PinOptions{})
|
2018-03-16 16:37:39 +00:00
|
|
|
pinDelay()
|
|
|
|
pinDelay()
|
2017-01-25 18:38:23 +00:00
|
|
|
|
|
|
|
f := func(t *testing.T, c *Cluster) {
|
2020-04-14 17:58:00 +00:00
|
|
|
_, err := c.RecoverLocal(ctx, h)
|
Fix: maptracker race issues
This commit attempts to fix race issues in the maptracker since the
introduction of the OperationTracker.
There were two main problems:
* Duplicity tracking the state both in the state map and the opTracker
* Non atomiciy of operations with different threads being able to affect
other threads operations.
A test performing random Track/Untracks on the same Cid quickly showed
that items would sometimes stay as pin_queued or pin_unqueued. That happened
because operations could be cancelled under the hood by a different request,
while leaving the map status untouched.
It was not simply to deal with this issues without a refactoring.
First, the state map has been removed, and the operation tracker now provides
status information for any Cid. This implies that the tracker keeps all
operations and operations have a `PhaseDone`. There's also a
new `OperationRemote` type.
Secondly, operations are only created in the tracker and can only be removed
by their creators (they can be overwritten by other operations though).
Operations cannot be accessed directly and modifications are limited to setting
Error for PhaseDone operations.
After created, *Operations are queued in the pinWorker queues which handle any
status updates. This means, that, even when an operation has been removed from
the tracker, status updates will not interfere with any other newer operations.
In the maptracker, only the Unpin worker Cleans operations once processed. A
sucessful unpin is the only way that a delete() happens in the tracker map.
Otherwise, operations stay there until a newer operation for the Cid arrives
and 1) cancels the existing one 2) takes its place. The tracker refuses to
create a new operation if a similar "ongoing" operation of the same type
exists.
The final change is that Recover and RecoverAll() are not async and play by the
same rules as Track() and Untrack(), queueing the items to be recovered.
Note: for stateless pintracker, the tracker will need to Clean() operation
of type OperationPin as well, and complement the Status reported
by the tracker with those coming from IPFS.
License: MIT
Signed-off-by: Hector Sanjuan <code@hector.link>
2018-05-25 16:32:10 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
2017-01-25 18:38:23 +00:00
|
|
|
}
|
Fix: maptracker race issues
This commit attempts to fix race issues in the maptracker since the
introduction of the OperationTracker.
There were two main problems:
* Duplicity tracking the state both in the state map and the opTracker
* Non atomiciy of operations with different threads being able to affect
other threads operations.
A test performing random Track/Untracks on the same Cid quickly showed
that items would sometimes stay as pin_queued or pin_unqueued. That happened
because operations could be cancelled under the hood by a different request,
while leaving the map status untouched.
It was not simply to deal with this issues without a refactoring.
First, the state map has been removed, and the operation tracker now provides
status information for any Cid. This implies that the tracker keeps all
operations and operations have a `PhaseDone`. There's also a
new `OperationRemote` type.
Secondly, operations are only created in the tracker and can only be removed
by their creators (they can be overwritten by other operations though).
Operations cannot be accessed directly and modifications are limited to setting
Error for PhaseDone operations.
After created, *Operations are queued in the pinWorker queues which handle any
status updates. This means, that, even when an operation has been removed from
the tracker, status updates will not interfere with any other newer operations.
In the maptracker, only the Unpin worker Cleans operations once processed. A
sucessful unpin is the only way that a delete() happens in the tracker map.
Otherwise, operations stay there until a newer operation for the Cid arrives
and 1) cancels the existing one 2) takes its place. The tracker refuses to
create a new operation if a similar "ongoing" operation of the same type
exists.
The final change is that Recover and RecoverAll() are not async and play by the
same rules as Track() and Untrack(), queueing the items to be recovered.
Note: for stateless pintracker, the tracker will need to Clean() operation
of type OperationPin as well, and complement the Status reported
by the tracker with those coming from IPFS.
License: MIT
Signed-off-by: Hector Sanjuan <code@hector.link>
2018-05-25 16:32:10 +00:00
|
|
|
// Wait for queue to be processed
|
|
|
|
delay()
|
|
|
|
|
2020-04-14 17:58:00 +00:00
|
|
|
info := c.StatusLocal(ctx, h)
|
2017-02-08 17:04:08 +00:00
|
|
|
if info.Status != api.TrackerStatusPinError {
|
2017-01-25 18:38:23 +00:00
|
|
|
t.Errorf("element is %s and not PinError", info.Status)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Recover good ID
|
2020-04-14 17:58:00 +00:00
|
|
|
info, _ = c.RecoverLocal(ctx, h2)
|
2017-02-08 17:04:08 +00:00
|
|
|
if info.Status != api.TrackerStatusPinned {
|
2017-01-25 18:38:23 +00:00
|
|
|
t.Error("element should be in Pinned state")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Test Local syncs
|
|
|
|
runF(t, clusters, f)
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestClustersRecover(t *testing.T) {
|
2018-06-27 04:03:15 +00:00
|
|
|
ctx := context.Background()
|
2017-01-25 18:38:23 +00:00
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
2019-02-27 20:09:31 +00:00
|
|
|
h := test.ErrorCid // This cid always fails
|
2019-02-27 20:19:10 +00:00
|
|
|
h2 := test.Cid2
|
2018-03-16 16:37:39 +00:00
|
|
|
|
|
|
|
ttlDelay()
|
|
|
|
|
Improve pin/unpin method signatures (#843)
* Improve pin/unpin method signatures:
These changes the following Cluster Go API methods:
* -> Cluster.Pin(ctx, cid, options) (pin, error)
* -> Cluster.Unpin(ctx, cid) (pin, error)
* -> Cluster.PinPath(ctx, path, opts) (pin,error)
Pin and Unpin now return the pinned object.
The signature of the methods now matches that of the API Client, is clearer as
to what options the user can set and is aligned with PinPath, UnpinPath, which
returned pin methods.
The REST API now returns the Pinned/Unpinned object rather than 204-Accepted.
This was necessary for a cleaner pin/update approach, which I'm working on in
another branch.
Most of the changes here are updating tests to the new signatures
* Adapt load-balancing client to new Pin/Unpin signatures
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
2019-07-22 13:39:11 +00:00
|
|
|
clusters[0].Pin(ctx, h, api.PinOptions{})
|
|
|
|
clusters[0].Pin(ctx, h2, api.PinOptions{})
|
2017-01-25 18:38:23 +00:00
|
|
|
|
2018-03-16 16:37:39 +00:00
|
|
|
pinDelay()
|
|
|
|
pinDelay()
|
2017-01-25 18:38:23 +00:00
|
|
|
|
|
|
|
j := rand.Intn(nClusters)
|
2019-09-02 03:42:43 +00:00
|
|
|
ginfo, err := clusters[j].Recover(ctx, h)
|
2017-01-25 18:38:23 +00:00
|
|
|
if err != nil {
|
|
|
|
// we always attempt to return a valid response
|
|
|
|
// with errors contained in GlobalPinInfo
|
|
|
|
t.Fatal("did not expect an error")
|
|
|
|
}
|
2019-09-02 03:42:43 +00:00
|
|
|
if len(ginfo.PeerMap) != nClusters {
|
|
|
|
t.Error("number of peers do not match")
|
|
|
|
}
|
Fix: maptracker race issues
This commit attempts to fix race issues in the maptracker since the
introduction of the OperationTracker.
There were two main problems:
* Duplicity tracking the state both in the state map and the opTracker
* Non atomiciy of operations with different threads being able to affect
other threads operations.
A test performing random Track/Untracks on the same Cid quickly showed
that items would sometimes stay as pin_queued or pin_unqueued. That happened
because operations could be cancelled under the hood by a different request,
while leaving the map status untouched.
It was not simply to deal with this issues without a refactoring.
First, the state map has been removed, and the operation tracker now provides
status information for any Cid. This implies that the tracker keeps all
operations and operations have a `PhaseDone`. There's also a
new `OperationRemote` type.
Secondly, operations are only created in the tracker and can only be removed
by their creators (they can be overwritten by other operations though).
Operations cannot be accessed directly and modifications are limited to setting
Error for PhaseDone operations.
After created, *Operations are queued in the pinWorker queues which handle any
status updates. This means, that, even when an operation has been removed from
the tracker, status updates will not interfere with any other newer operations.
In the maptracker, only the Unpin worker Cleans operations once processed. A
sucessful unpin is the only way that a delete() happens in the tracker map.
Otherwise, operations stay there until a newer operation for the Cid arrives
and 1) cancels the existing one 2) takes its place. The tracker refuses to
create a new operation if a similar "ongoing" operation of the same type
exists.
The final change is that Recover and RecoverAll() are not async and play by the
same rules as Track() and Untrack(), queueing the items to be recovered.
Note: for stateless pintracker, the tracker will need to Clean() operation
of type OperationPin as well, and complement the Status reported
by the tracker with those coming from IPFS.
License: MIT
Signed-off-by: Hector Sanjuan <code@hector.link>
2018-05-25 16:32:10 +00:00
|
|
|
// Wait for queue to be processed
|
|
|
|
delay()
|
|
|
|
|
2019-09-02 03:42:43 +00:00
|
|
|
ginfo, err = clusters[j].Status(ctx, h)
|
Fix: maptracker race issues
This commit attempts to fix race issues in the maptracker since the
introduction of the OperationTracker.
There were two main problems:
* Duplicity tracking the state both in the state map and the opTracker
* Non atomiciy of operations with different threads being able to affect
other threads operations.
A test performing random Track/Untracks on the same Cid quickly showed
that items would sometimes stay as pin_queued or pin_unqueued. That happened
because operations could be cancelled under the hood by a different request,
while leaving the map status untouched.
It was not simply to deal with this issues without a refactoring.
First, the state map has been removed, and the operation tracker now provides
status information for any Cid. This implies that the tracker keeps all
operations and operations have a `PhaseDone`. There's also a
new `OperationRemote` type.
Secondly, operations are only created in the tracker and can only be removed
by their creators (they can be overwritten by other operations though).
Operations cannot be accessed directly and modifications are limited to setting
Error for PhaseDone operations.
After created, *Operations are queued in the pinWorker queues which handle any
status updates. This means, that, even when an operation has been removed from
the tracker, status updates will not interfere with any other newer operations.
In the maptracker, only the Unpin worker Cleans operations once processed. A
sucessful unpin is the only way that a delete() happens in the tracker map.
Otherwise, operations stay there until a newer operation for the Cid arrives
and 1) cancels the existing one 2) takes its place. The tracker refuses to
create a new operation if a similar "ongoing" operation of the same type
exists.
The final change is that Recover and RecoverAll() are not async and play by the
same rules as Track() and Untrack(), queueing the items to be recovered.
Note: for stateless pintracker, the tracker will need to Clean() operation
of type OperationPin as well, and complement the Status reported
by the tracker with those coming from IPFS.
License: MIT
Signed-off-by: Hector Sanjuan <code@hector.link>
2018-05-25 16:32:10 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
2020-04-14 17:58:00 +00:00
|
|
|
pinfo, ok := ginfo.PeerMap[peer.Encode(clusters[j].host.ID())]
|
2017-01-25 18:38:23 +00:00
|
|
|
if !ok {
|
|
|
|
t.Fatal("should have info for this host")
|
|
|
|
}
|
|
|
|
if pinfo.Error == "" {
|
|
|
|
t.Error("pinInfo error should not be empty")
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, c := range clusters {
|
2020-04-14 17:58:00 +00:00
|
|
|
inf, ok := ginfo.PeerMap[peer.Encode(c.host.ID())]
|
2017-01-25 18:38:23 +00:00
|
|
|
if !ok {
|
|
|
|
t.Fatal("GlobalPinInfo should not be empty for this host")
|
|
|
|
}
|
|
|
|
|
2017-02-08 17:04:08 +00:00
|
|
|
if inf.Status != api.TrackerStatusPinError {
|
|
|
|
t.Logf("%+v", inf)
|
2017-01-26 18:59:31 +00:00
|
|
|
t.Error("should be PinError in all peers")
|
2017-01-25 18:38:23 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Test with a good Cid
|
|
|
|
j = rand.Intn(nClusters)
|
2018-06-27 04:03:15 +00:00
|
|
|
ginfo, err = clusters[j].Recover(ctx, h2)
|
2016-12-21 13:30:54 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2019-02-27 20:09:31 +00:00
|
|
|
if !ginfo.Cid.Equals(h2) {
|
2016-12-21 13:30:54 +00:00
|
|
|
t.Error("GlobalPinInfo should be for testrCid2")
|
|
|
|
}
|
2019-09-02 03:42:43 +00:00
|
|
|
if len(ginfo.PeerMap) != nClusters {
|
|
|
|
t.Error("number of peers do not match")
|
|
|
|
}
|
2016-12-21 13:30:54 +00:00
|
|
|
|
|
|
|
for _, c := range clusters {
|
2020-04-14 17:58:00 +00:00
|
|
|
inf, ok := ginfo.PeerMap[peer.Encode(c.host.ID())]
|
2016-12-21 13:30:54 +00:00
|
|
|
if !ok {
|
|
|
|
t.Fatal("GlobalPinInfo should have this cluster")
|
|
|
|
}
|
2017-02-08 17:04:08 +00:00
|
|
|
if inf.Status != api.TrackerStatusPinned {
|
2017-01-26 18:59:31 +00:00
|
|
|
t.Error("the GlobalPinInfo should show Pinned in all peers")
|
2016-12-21 13:30:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2017-01-30 12:12:25 +00:00
|
|
|
|
2019-08-21 05:49:07 +00:00
|
|
|
func TestClustersRecoverAll(t *testing.T) {
|
|
|
|
ctx := context.Background()
|
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
2019-09-02 03:42:43 +00:00
|
|
|
h1 := test.Cid1
|
|
|
|
hError := test.ErrorCid
|
2019-08-21 05:49:07 +00:00
|
|
|
|
|
|
|
ttlDelay()
|
|
|
|
|
2019-09-02 03:42:43 +00:00
|
|
|
clusters[0].Pin(ctx, h1, api.PinOptions{})
|
|
|
|
clusters[0].Pin(ctx, hError, api.PinOptions{})
|
2019-08-21 05:49:07 +00:00
|
|
|
|
|
|
|
pinDelay()
|
|
|
|
|
2022-03-22 09:56:16 +00:00
|
|
|
out := make(chan api.GlobalPinInfo)
|
|
|
|
go func() {
|
|
|
|
err := clusters[rand.Intn(nClusters)].RecoverAll(ctx, out)
|
|
|
|
if err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
gInfos := collectGlobalPinInfos(t, out, 5*time.Second)
|
2019-08-21 05:49:07 +00:00
|
|
|
|
2022-01-11 12:23:47 +00:00
|
|
|
if len(gInfos) != 1 {
|
|
|
|
t.Error("expected one items")
|
2019-08-21 05:49:07 +00:00
|
|
|
}
|
|
|
|
|
2019-09-02 03:42:43 +00:00
|
|
|
for _, gInfo := range gInfos {
|
|
|
|
if len(gInfo.PeerMap) != nClusters {
|
|
|
|
t.Error("number of peers do not match")
|
2019-08-21 05:49:07 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-01-30 12:12:25 +00:00
|
|
|
func TestClustersShutdown(t *testing.T) {
|
2018-06-27 04:03:15 +00:00
|
|
|
ctx := context.Background()
|
2017-01-30 12:12:25 +00:00
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
|
|
|
|
|
|
|
f := func(t *testing.T, c *Cluster) {
|
2018-06-27 04:03:15 +00:00
|
|
|
err := c.Shutdown(ctx)
|
2017-01-30 12:12:25 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Error("should be able to shutdown cleanly")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Shutdown 3 times
|
|
|
|
runF(t, clusters, f)
|
|
|
|
runF(t, clusters, f)
|
|
|
|
runF(t, clusters, f)
|
|
|
|
}
|
2017-02-13 15:46:53 +00:00
|
|
|
|
2019-04-26 07:57:45 +00:00
|
|
|
func TestClustersReplicationOverall(t *testing.T) {
|
2018-06-27 04:03:15 +00:00
|
|
|
ctx := context.Background()
|
2017-02-13 15:46:53 +00:00
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
|
|
|
for _, c := range clusters {
|
2018-01-12 17:04:46 +00:00
|
|
|
c.config.ReplicationFactorMin = nClusters - 1
|
|
|
|
c.config.ReplicationFactorMax = nClusters - 1
|
2017-02-13 15:46:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Why is replication factor nClusters - 1?
|
|
|
|
// Because that way we know that pinning nCluster
|
2017-03-27 13:07:12 +00:00
|
|
|
// pins with an strategy like numpins/disk
|
2017-02-13 15:46:53 +00:00
|
|
|
// will result in each peer holding locally exactly
|
|
|
|
// nCluster pins.
|
|
|
|
|
2019-02-27 20:19:10 +00:00
|
|
|
prefix := test.Cid1.Prefix()
|
2017-02-13 15:46:53 +00:00
|
|
|
|
|
|
|
for i := 0; i < nClusters; i++ {
|
|
|
|
// Pick a random cluster and hash
|
|
|
|
j := rand.Intn(nClusters) // choose a random cluster peer
|
|
|
|
h, err := prefix.Sum(randomBytes()) // create random cid
|
2019-10-31 20:51:13 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
Improve pin/unpin method signatures (#843)
* Improve pin/unpin method signatures:
These changes the following Cluster Go API methods:
* -> Cluster.Pin(ctx, cid, options) (pin, error)
* -> Cluster.Unpin(ctx, cid) (pin, error)
* -> Cluster.PinPath(ctx, path, opts) (pin,error)
Pin and Unpin now return the pinned object.
The signature of the methods now matches that of the API Client, is clearer as
to what options the user can set and is aligned with PinPath, UnpinPath, which
returned pin methods.
The REST API now returns the Pinned/Unpinned object rather than 204-Accepted.
This was necessary for a cleaner pin/update approach, which I'm working on in
another branch.
Most of the changes here are updating tests to the new signatures
* Adapt load-balancing client to new Pin/Unpin signatures
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
2019-07-22 13:39:11 +00:00
|
|
|
_, err = clusters[j].Pin(ctx, h, api.PinOptions{})
|
2017-02-13 15:46:53 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
2018-03-16 16:37:39 +00:00
|
|
|
pinDelay()
|
2017-02-13 15:46:53 +00:00
|
|
|
|
2019-05-07 09:11:37 +00:00
|
|
|
// check that it is held by exactly nClusters - 1 peers
|
2018-06-27 04:03:15 +00:00
|
|
|
gpi, err := clusters[j].Status(ctx, h)
|
2017-02-13 15:46:53 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
numLocal := 0
|
|
|
|
numRemote := 0
|
|
|
|
for _, v := range gpi.PeerMap {
|
|
|
|
if v.Status == api.TrackerStatusPinned {
|
|
|
|
numLocal++
|
|
|
|
} else if v.Status == api.TrackerStatusRemote {
|
|
|
|
numRemote++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if numLocal != nClusters-1 {
|
2019-05-07 09:11:37 +00:00
|
|
|
t.Errorf(
|
|
|
|
"We wanted replication %d but it's only %d",
|
|
|
|
nClusters-1,
|
|
|
|
numLocal,
|
|
|
|
)
|
2017-02-13 15:46:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if numRemote != 1 {
|
|
|
|
t.Errorf("We wanted 1 peer track as remote but %d do", numRemote)
|
|
|
|
}
|
2018-03-16 16:37:39 +00:00
|
|
|
ttlDelay()
|
2017-02-13 15:46:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
f := func(t *testing.T, c *Cluster) {
|
2019-05-07 09:11:37 +00:00
|
|
|
// confirm that the pintracker state matches the current global state
|
2022-03-22 09:56:16 +00:00
|
|
|
out := make(chan api.PinInfo, 100)
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
err := c.tracker.StatusAll(ctx, api.TrackerStatusUndefined, out)
|
|
|
|
if err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
pinfos := collectPinInfos(t, out)
|
2017-02-13 15:46:53 +00:00
|
|
|
if len(pinfos) != nClusters {
|
|
|
|
t.Error("Pinfos does not have the expected pins")
|
|
|
|
}
|
2019-05-07 09:11:37 +00:00
|
|
|
|
2017-02-13 15:46:53 +00:00
|
|
|
numRemote := 0
|
|
|
|
numLocal := 0
|
|
|
|
for _, pi := range pinfos {
|
|
|
|
switch pi.Status {
|
|
|
|
case api.TrackerStatusPinned:
|
|
|
|
numLocal++
|
|
|
|
|
|
|
|
case api.TrackerStatusRemote:
|
|
|
|
numRemote++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if numLocal != nClusters-1 {
|
2019-05-07 09:11:37 +00:00
|
|
|
t.Errorf("%s: Expected %d local pins but got %d", c.id.String(), nClusters-1, numLocal)
|
2017-02-13 15:46:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if numRemote != 1 {
|
2019-05-07 09:11:37 +00:00
|
|
|
t.Errorf("%s: Expected 1 remote pin but got %d", c.id.String(), numRemote)
|
2017-02-13 15:46:53 +00:00
|
|
|
}
|
|
|
|
|
2022-03-22 09:56:16 +00:00
|
|
|
outPins := make(chan api.Pin)
|
|
|
|
go func() {
|
|
|
|
err := c.Pins(ctx, outPins)
|
|
|
|
if err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
for pin := range outPins {
|
2017-02-13 15:46:53 +00:00
|
|
|
allocs := pin.Allocations
|
|
|
|
if len(allocs) != nClusters-1 {
|
|
|
|
t.Errorf("Allocations are [%s]", allocs)
|
|
|
|
}
|
|
|
|
for _, a := range allocs {
|
|
|
|
if a == c.id {
|
2018-06-27 04:03:15 +00:00
|
|
|
pinfo := c.tracker.Status(ctx, pin.Cid)
|
2017-02-13 15:46:53 +00:00
|
|
|
if pinfo.Status != api.TrackerStatusPinned {
|
|
|
|
t.Errorf("Peer %s was allocated but it is not pinning cid", c.id)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
runF(t, clusters, f)
|
|
|
|
}
|
|
|
|
|
2018-01-15 18:32:19 +00:00
|
|
|
// This test checks that we pin with ReplicationFactorMax when
|
|
|
|
// we can
|
|
|
|
func TestClustersReplicationFactorMax(t *testing.T) {
|
2018-06-27 04:03:15 +00:00
|
|
|
ctx := context.Background()
|
2018-01-15 18:32:19 +00:00
|
|
|
if nClusters < 3 {
|
|
|
|
t.Skip("Need at least 3 peers")
|
|
|
|
}
|
|
|
|
|
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
|
|
|
for _, c := range clusters {
|
|
|
|
c.config.ReplicationFactorMin = 1
|
|
|
|
c.config.ReplicationFactorMax = nClusters - 1
|
|
|
|
}
|
|
|
|
|
2018-03-16 16:37:39 +00:00
|
|
|
ttlDelay()
|
|
|
|
|
2019-02-27 20:19:10 +00:00
|
|
|
h := test.Cid1
|
Improve pin/unpin method signatures (#843)
* Improve pin/unpin method signatures:
These changes the following Cluster Go API methods:
* -> Cluster.Pin(ctx, cid, options) (pin, error)
* -> Cluster.Unpin(ctx, cid) (pin, error)
* -> Cluster.PinPath(ctx, path, opts) (pin,error)
Pin and Unpin now return the pinned object.
The signature of the methods now matches that of the API Client, is clearer as
to what options the user can set and is aligned with PinPath, UnpinPath, which
returned pin methods.
The REST API now returns the Pinned/Unpinned object rather than 204-Accepted.
This was necessary for a cleaner pin/update approach, which I'm working on in
another branch.
Most of the changes here are updating tests to the new signatures
* Adapt load-balancing client to new Pin/Unpin signatures
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
2019-07-22 13:39:11 +00:00
|
|
|
_, err := clusters[0].Pin(ctx, h, api.PinOptions{})
|
2018-01-15 18:32:19 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
2018-03-16 16:37:39 +00:00
|
|
|
pinDelay()
|
2018-01-15 18:32:19 +00:00
|
|
|
|
|
|
|
f := func(t *testing.T, c *Cluster) {
|
2018-06-27 04:03:15 +00:00
|
|
|
p, err := c.PinGet(ctx, h)
|
2018-01-15 18:32:19 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(p.Allocations) != nClusters-1 {
|
|
|
|
t.Error("should have pinned nClusters - 1 allocations")
|
|
|
|
}
|
|
|
|
|
|
|
|
if p.ReplicationFactorMin != 1 {
|
|
|
|
t.Error("rplMin should be 1")
|
|
|
|
}
|
|
|
|
|
|
|
|
if p.ReplicationFactorMax != nClusters-1 {
|
2018-01-16 10:19:39 +00:00
|
|
|
t.Error("rplMax should be nClusters-1")
|
2018-01-15 18:32:19 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
runF(t, clusters, f)
|
|
|
|
}
|
|
|
|
|
2018-01-16 10:19:39 +00:00
|
|
|
// This tests checks that repinning something that is overpinned
|
|
|
|
// removes some allocations
|
|
|
|
func TestClustersReplicationFactorMaxLower(t *testing.T) {
|
2018-06-27 04:03:15 +00:00
|
|
|
ctx := context.Background()
|
2018-01-16 10:19:39 +00:00
|
|
|
if nClusters < 5 {
|
|
|
|
t.Skip("Need at least 5 peers")
|
|
|
|
}
|
|
|
|
|
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
|
|
|
for _, c := range clusters {
|
|
|
|
c.config.ReplicationFactorMin = 1
|
|
|
|
c.config.ReplicationFactorMax = nClusters
|
|
|
|
}
|
|
|
|
|
2018-03-16 16:37:39 +00:00
|
|
|
ttlDelay() // make sure we have places to pin
|
|
|
|
|
2019-02-27 20:19:10 +00:00
|
|
|
h := test.Cid1
|
Improve pin/unpin method signatures (#843)
* Improve pin/unpin method signatures:
These changes the following Cluster Go API methods:
* -> Cluster.Pin(ctx, cid, options) (pin, error)
* -> Cluster.Unpin(ctx, cid) (pin, error)
* -> Cluster.PinPath(ctx, path, opts) (pin,error)
Pin and Unpin now return the pinned object.
The signature of the methods now matches that of the API Client, is clearer as
to what options the user can set and is aligned with PinPath, UnpinPath, which
returned pin methods.
The REST API now returns the Pinned/Unpinned object rather than 204-Accepted.
This was necessary for a cleaner pin/update approach, which I'm working on in
another branch.
Most of the changes here are updating tests to the new signatures
* Adapt load-balancing client to new Pin/Unpin signatures
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
2019-07-22 13:39:11 +00:00
|
|
|
_, err := clusters[0].Pin(ctx, h, api.PinOptions{})
|
2018-01-16 10:19:39 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
2018-03-16 16:37:39 +00:00
|
|
|
pinDelay()
|
2018-01-16 10:19:39 +00:00
|
|
|
|
2018-06-27 04:03:15 +00:00
|
|
|
p1, err := clusters[0].PinGet(ctx, h)
|
2018-01-16 10:19:39 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(p1.Allocations) != nClusters {
|
|
|
|
t.Fatal("allocations should be nClusters")
|
|
|
|
}
|
|
|
|
|
Improve pin/unpin method signatures (#843)
* Improve pin/unpin method signatures:
These changes the following Cluster Go API methods:
* -> Cluster.Pin(ctx, cid, options) (pin, error)
* -> Cluster.Unpin(ctx, cid) (pin, error)
* -> Cluster.PinPath(ctx, path, opts) (pin,error)
Pin and Unpin now return the pinned object.
The signature of the methods now matches that of the API Client, is clearer as
to what options the user can set and is aligned with PinPath, UnpinPath, which
returned pin methods.
The REST API now returns the Pinned/Unpinned object rather than 204-Accepted.
This was necessary for a cleaner pin/update approach, which I'm working on in
another branch.
Most of the changes here are updating tests to the new signatures
* Adapt load-balancing client to new Pin/Unpin signatures
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
2019-07-22 13:39:11 +00:00
|
|
|
opts := api.PinOptions{
|
|
|
|
ReplicationFactorMin: 1,
|
|
|
|
ReplicationFactorMax: 2,
|
|
|
|
}
|
|
|
|
_, err = clusters[0].Pin(ctx, h, opts)
|
2018-01-16 10:19:39 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
2018-03-16 16:37:39 +00:00
|
|
|
pinDelay()
|
2018-01-16 10:19:39 +00:00
|
|
|
|
2018-06-27 04:03:15 +00:00
|
|
|
p2, err := clusters[0].PinGet(ctx, h)
|
2018-01-16 10:19:39 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(p2.Allocations) != 2 {
|
|
|
|
t.Fatal("allocations should have been reduced to 2")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-15 18:32:19 +00:00
|
|
|
// This test checks that when not all nodes are available,
|
|
|
|
// we pin in as many as we can aiming for ReplicationFactorMax
|
|
|
|
func TestClustersReplicationFactorInBetween(t *testing.T) {
|
2018-06-27 04:03:15 +00:00
|
|
|
ctx := context.Background()
|
2018-01-15 18:32:19 +00:00
|
|
|
if nClusters < 5 {
|
|
|
|
t.Skip("Need at least 5 peers")
|
|
|
|
}
|
|
|
|
|
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
|
|
|
for _, c := range clusters {
|
|
|
|
c.config.ReplicationFactorMin = 1
|
|
|
|
c.config.ReplicationFactorMax = nClusters
|
|
|
|
}
|
|
|
|
|
2018-03-16 16:37:39 +00:00
|
|
|
ttlDelay()
|
|
|
|
|
2018-01-15 18:32:19 +00:00
|
|
|
// Shutdown two peers
|
2018-06-27 04:03:15 +00:00
|
|
|
clusters[nClusters-1].Shutdown(ctx)
|
|
|
|
clusters[nClusters-2].Shutdown(ctx)
|
2018-01-15 18:32:19 +00:00
|
|
|
|
2018-03-29 20:31:11 +00:00
|
|
|
waitForLeaderAndMetrics(t, clusters)
|
2018-01-15 18:32:19 +00:00
|
|
|
|
2019-02-27 20:19:10 +00:00
|
|
|
h := test.Cid1
|
Improve pin/unpin method signatures (#843)
* Improve pin/unpin method signatures:
These changes the following Cluster Go API methods:
* -> Cluster.Pin(ctx, cid, options) (pin, error)
* -> Cluster.Unpin(ctx, cid) (pin, error)
* -> Cluster.PinPath(ctx, path, opts) (pin,error)
Pin and Unpin now return the pinned object.
The signature of the methods now matches that of the API Client, is clearer as
to what options the user can set and is aligned with PinPath, UnpinPath, which
returned pin methods.
The REST API now returns the Pinned/Unpinned object rather than 204-Accepted.
This was necessary for a cleaner pin/update approach, which I'm working on in
another branch.
Most of the changes here are updating tests to the new signatures
* Adapt load-balancing client to new Pin/Unpin signatures
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
2019-07-22 13:39:11 +00:00
|
|
|
_, err := clusters[0].Pin(ctx, h, api.PinOptions{})
|
2018-01-15 18:32:19 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
2018-03-16 16:37:39 +00:00
|
|
|
pinDelay()
|
2018-01-15 18:32:19 +00:00
|
|
|
|
|
|
|
f := func(t *testing.T, c *Cluster) {
|
|
|
|
if c == clusters[nClusters-1] || c == clusters[nClusters-2] {
|
|
|
|
return
|
|
|
|
}
|
2018-06-27 04:03:15 +00:00
|
|
|
p, err := c.PinGet(ctx, h)
|
2018-01-15 18:32:19 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(p.Allocations) != nClusters-2 {
|
2018-01-16 10:19:39 +00:00
|
|
|
t.Error("should have pinned nClusters-2 allocations")
|
2018-01-15 18:32:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if p.ReplicationFactorMin != 1 {
|
|
|
|
t.Error("rplMin should be 1")
|
|
|
|
}
|
|
|
|
|
|
|
|
if p.ReplicationFactorMax != nClusters {
|
|
|
|
t.Error("rplMax should be nClusters")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
runF(t, clusters, f)
|
|
|
|
}
|
|
|
|
|
|
|
|
// This test checks that we do not pin something for which
|
|
|
|
// we cannot reach ReplicationFactorMin
|
|
|
|
func TestClustersReplicationFactorMin(t *testing.T) {
|
2018-06-27 04:03:15 +00:00
|
|
|
ctx := context.Background()
|
2018-01-15 18:32:19 +00:00
|
|
|
if nClusters < 5 {
|
|
|
|
t.Skip("Need at least 5 peers")
|
|
|
|
}
|
|
|
|
|
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
|
|
|
for _, c := range clusters {
|
|
|
|
c.config.ReplicationFactorMin = nClusters - 1
|
|
|
|
c.config.ReplicationFactorMax = nClusters
|
|
|
|
}
|
|
|
|
|
|
|
|
// Shutdown two peers
|
2018-06-27 04:03:15 +00:00
|
|
|
clusters[nClusters-1].Shutdown(ctx)
|
2018-03-29 20:31:11 +00:00
|
|
|
waitForLeaderAndMetrics(t, clusters)
|
2018-06-27 04:03:15 +00:00
|
|
|
clusters[nClusters-2].Shutdown(ctx)
|
2018-03-29 20:31:11 +00:00
|
|
|
waitForLeaderAndMetrics(t, clusters)
|
2018-01-15 18:32:19 +00:00
|
|
|
|
2019-02-27 20:19:10 +00:00
|
|
|
h := test.Cid1
|
Improve pin/unpin method signatures (#843)
* Improve pin/unpin method signatures:
These changes the following Cluster Go API methods:
* -> Cluster.Pin(ctx, cid, options) (pin, error)
* -> Cluster.Unpin(ctx, cid) (pin, error)
* -> Cluster.PinPath(ctx, path, opts) (pin,error)
Pin and Unpin now return the pinned object.
The signature of the methods now matches that of the API Client, is clearer as
to what options the user can set and is aligned with PinPath, UnpinPath, which
returned pin methods.
The REST API now returns the Pinned/Unpinned object rather than 204-Accepted.
This was necessary for a cleaner pin/update approach, which I'm working on in
another branch.
Most of the changes here are updating tests to the new signatures
* Adapt load-balancing client to new Pin/Unpin signatures
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
2019-07-22 13:39:11 +00:00
|
|
|
_, err := clusters[0].Pin(ctx, h, api.PinOptions{})
|
2018-01-15 18:32:19 +00:00
|
|
|
if err == nil {
|
|
|
|
t.Error("Pin should have failed as rplMin cannot be satisfied")
|
|
|
|
}
|
|
|
|
t.Log(err)
|
2020-04-14 17:58:00 +00:00
|
|
|
if !strings.Contains(err.Error(), "not enough peers to allocate CID") {
|
2018-01-15 18:32:19 +00:00
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// This tests checks that repinning something that has becomed
|
|
|
|
// underpinned actually changes nothing if it's sufficiently pinned
|
|
|
|
func TestClustersReplicationMinMaxNoRealloc(t *testing.T) {
|
2018-06-27 04:03:15 +00:00
|
|
|
ctx := context.Background()
|
2018-01-15 18:32:19 +00:00
|
|
|
if nClusters < 5 {
|
|
|
|
t.Skip("Need at least 5 peers")
|
|
|
|
}
|
|
|
|
|
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
|
|
|
for _, c := range clusters {
|
|
|
|
c.config.ReplicationFactorMin = 1
|
|
|
|
c.config.ReplicationFactorMax = nClusters
|
|
|
|
}
|
|
|
|
|
2018-03-16 16:37:39 +00:00
|
|
|
ttlDelay()
|
|
|
|
|
2019-02-27 20:19:10 +00:00
|
|
|
h := test.Cid1
|
Improve pin/unpin method signatures (#843)
* Improve pin/unpin method signatures:
These changes the following Cluster Go API methods:
* -> Cluster.Pin(ctx, cid, options) (pin, error)
* -> Cluster.Unpin(ctx, cid) (pin, error)
* -> Cluster.PinPath(ctx, path, opts) (pin,error)
Pin and Unpin now return the pinned object.
The signature of the methods now matches that of the API Client, is clearer as
to what options the user can set and is aligned with PinPath, UnpinPath, which
returned pin methods.
The REST API now returns the Pinned/Unpinned object rather than 204-Accepted.
This was necessary for a cleaner pin/update approach, which I'm working on in
another branch.
Most of the changes here are updating tests to the new signatures
* Adapt load-balancing client to new Pin/Unpin signatures
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
2019-07-22 13:39:11 +00:00
|
|
|
_, err := clusters[0].Pin(ctx, h, api.PinOptions{})
|
2018-01-15 18:32:19 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
2018-03-16 16:37:39 +00:00
|
|
|
pinDelay()
|
|
|
|
|
2018-01-15 18:32:19 +00:00
|
|
|
// Shutdown two peers
|
2018-06-27 04:03:15 +00:00
|
|
|
clusters[nClusters-1].Shutdown(ctx)
|
2018-03-29 20:31:11 +00:00
|
|
|
waitForLeaderAndMetrics(t, clusters)
|
2018-06-27 04:03:15 +00:00
|
|
|
clusters[nClusters-2].Shutdown(ctx)
|
2018-03-29 20:31:11 +00:00
|
|
|
waitForLeaderAndMetrics(t, clusters)
|
2018-01-15 18:32:19 +00:00
|
|
|
|
Improve pin/unpin method signatures (#843)
* Improve pin/unpin method signatures:
These changes the following Cluster Go API methods:
* -> Cluster.Pin(ctx, cid, options) (pin, error)
* -> Cluster.Unpin(ctx, cid) (pin, error)
* -> Cluster.PinPath(ctx, path, opts) (pin,error)
Pin and Unpin now return the pinned object.
The signature of the methods now matches that of the API Client, is clearer as
to what options the user can set and is aligned with PinPath, UnpinPath, which
returned pin methods.
The REST API now returns the Pinned/Unpinned object rather than 204-Accepted.
This was necessary for a cleaner pin/update approach, which I'm working on in
another branch.
Most of the changes here are updating tests to the new signatures
* Adapt load-balancing client to new Pin/Unpin signatures
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
2019-07-22 13:39:11 +00:00
|
|
|
_, err = clusters[0].Pin(ctx, h, api.PinOptions{})
|
2018-01-15 18:32:19 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
2018-03-16 16:37:39 +00:00
|
|
|
pinDelay()
|
|
|
|
|
2018-06-27 04:03:15 +00:00
|
|
|
p, err := clusters[0].PinGet(ctx, h)
|
2018-01-15 18:32:19 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(p.Allocations) != nClusters {
|
|
|
|
t.Error("allocations should still be nCluster even if not all available")
|
|
|
|
}
|
|
|
|
|
|
|
|
if p.ReplicationFactorMax != nClusters {
|
|
|
|
t.Error("rplMax should have not changed")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-16 10:19:39 +00:00
|
|
|
// This test checks that repinning something that has becomed
|
2018-01-15 18:32:19 +00:00
|
|
|
// underpinned does re-allocations when it's not sufficiently
|
2019-10-31 20:51:13 +00:00
|
|
|
// pinned anymore.
|
|
|
|
// FIXME: The manual repin only works if the pin options changed.
|
2018-01-15 18:32:19 +00:00
|
|
|
func TestClustersReplicationMinMaxRealloc(t *testing.T) {
|
2018-06-27 04:03:15 +00:00
|
|
|
ctx := context.Background()
|
2018-01-15 18:32:19 +00:00
|
|
|
if nClusters < 5 {
|
|
|
|
t.Skip("Need at least 5 peers")
|
|
|
|
}
|
|
|
|
|
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
|
|
|
for _, c := range clusters {
|
|
|
|
c.config.ReplicationFactorMin = 3
|
|
|
|
c.config.ReplicationFactorMax = 4
|
|
|
|
}
|
|
|
|
|
2018-03-16 16:37:39 +00:00
|
|
|
ttlDelay() // make sure metrics are in
|
|
|
|
|
2019-02-27 20:19:10 +00:00
|
|
|
h := test.Cid1
|
2019-10-31 20:51:13 +00:00
|
|
|
_, err := clusters[0].Pin(ctx, h, api.PinOptions{
|
|
|
|
Name: "a",
|
|
|
|
})
|
2018-01-15 18:32:19 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
2018-03-16 16:37:39 +00:00
|
|
|
pinDelay()
|
2018-01-15 18:32:19 +00:00
|
|
|
|
2018-06-27 04:03:15 +00:00
|
|
|
p, err := clusters[0].PinGet(ctx, h)
|
2018-01-15 18:32:19 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
firstAllocations := p.Allocations
|
|
|
|
|
|
|
|
peerIDMap := make(map[peer.ID]*Cluster)
|
|
|
|
for _, a := range clusters {
|
|
|
|
peerIDMap[a.id] = a
|
|
|
|
}
|
|
|
|
|
|
|
|
// kill two of the allocations
|
2019-10-31 20:51:13 +00:00
|
|
|
// Only the first allocated peer (or the second if the first is
|
|
|
|
// alerting) will automatically repin.
|
|
|
|
alloc1 := peerIDMap[firstAllocations[1]]
|
|
|
|
alloc2 := peerIDMap[firstAllocations[2]]
|
|
|
|
safePeer := peerIDMap[firstAllocations[0]]
|
2018-01-15 18:32:19 +00:00
|
|
|
|
2018-06-27 04:03:15 +00:00
|
|
|
alloc1.Shutdown(ctx)
|
|
|
|
alloc2.Shutdown(ctx)
|
2018-01-15 18:32:19 +00:00
|
|
|
|
2018-03-29 20:31:11 +00:00
|
|
|
waitForLeaderAndMetrics(t, clusters)
|
2018-01-15 18:32:19 +00:00
|
|
|
|
2019-10-31 20:51:13 +00:00
|
|
|
// Repin - (although this should have been taken of as alerts
|
|
|
|
// happen for the shutdown nodes. We force re-allocation by
|
|
|
|
// changing the name.
|
|
|
|
_, err = safePeer.Pin(ctx, h, api.PinOptions{
|
|
|
|
Name: "b",
|
|
|
|
})
|
2018-01-15 18:32:19 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
2018-03-16 16:37:39 +00:00
|
|
|
pinDelay()
|
|
|
|
|
2018-06-27 04:03:15 +00:00
|
|
|
p, err = safePeer.PinGet(ctx, h)
|
2018-01-15 18:32:19 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
secondAllocations := p.Allocations
|
|
|
|
|
|
|
|
strings1 := api.PeersToStrings(firstAllocations)
|
|
|
|
strings2 := api.PeersToStrings(secondAllocations)
|
|
|
|
sort.Strings(strings1)
|
|
|
|
sort.Strings(strings2)
|
|
|
|
t.Logf("Allocs1: %s", strings1)
|
|
|
|
t.Logf("Allocs2: %s", strings2)
|
|
|
|
|
|
|
|
if fmt.Sprintf("%s", strings1) == fmt.Sprintf("%s", strings2) {
|
|
|
|
t.Error("allocations should have changed")
|
|
|
|
}
|
|
|
|
|
2018-01-16 10:19:39 +00:00
|
|
|
lenSA := len(secondAllocations)
|
|
|
|
expected := minInt(nClusters-2, 4)
|
|
|
|
if lenSA != expected {
|
2018-03-16 16:37:39 +00:00
|
|
|
t.Errorf("Insufficient reallocation, could have allocated to %d peers but instead only allocated to %d peers", expected, lenSA)
|
2018-01-16 10:19:39 +00:00
|
|
|
}
|
|
|
|
|
2018-01-19 12:04:06 +00:00
|
|
|
if lenSA < 3 {
|
2018-01-15 18:32:19 +00:00
|
|
|
t.Error("allocations should be more than rplMin")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-13 15:46:53 +00:00
|
|
|
// In this test we check that repinning something
|
|
|
|
// when a node has gone down will re-assign the pin
|
|
|
|
func TestClustersReplicationRealloc(t *testing.T) {
|
2018-06-27 04:03:15 +00:00
|
|
|
ctx := context.Background()
|
2017-02-13 15:46:53 +00:00
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
|
|
|
for _, c := range clusters {
|
2018-01-12 17:04:46 +00:00
|
|
|
c.config.ReplicationFactorMin = nClusters - 1
|
|
|
|
c.config.ReplicationFactorMax = nClusters - 1
|
2017-02-13 15:46:53 +00:00
|
|
|
}
|
|
|
|
|
2018-03-16 16:37:39 +00:00
|
|
|
ttlDelay()
|
|
|
|
|
2017-02-13 15:46:53 +00:00
|
|
|
j := rand.Intn(nClusters)
|
2019-02-27 20:19:10 +00:00
|
|
|
h := test.Cid1
|
Improve pin/unpin method signatures (#843)
* Improve pin/unpin method signatures:
These changes the following Cluster Go API methods:
* -> Cluster.Pin(ctx, cid, options) (pin, error)
* -> Cluster.Unpin(ctx, cid) (pin, error)
* -> Cluster.PinPath(ctx, path, opts) (pin,error)
Pin and Unpin now return the pinned object.
The signature of the methods now matches that of the API Client, is clearer as
to what options the user can set and is aligned with PinPath, UnpinPath, which
returned pin methods.
The REST API now returns the Pinned/Unpinned object rather than 204-Accepted.
This was necessary for a cleaner pin/update approach, which I'm working on in
another branch.
Most of the changes here are updating tests to the new signatures
* Adapt load-balancing client to new Pin/Unpin signatures
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
2019-07-22 13:39:11 +00:00
|
|
|
_, err := clusters[j].Pin(ctx, h, api.PinOptions{})
|
2017-02-13 15:46:53 +00:00
|
|
|
if err != nil {
|
2017-03-09 13:44:14 +00:00
|
|
|
t.Fatal(err)
|
2017-02-13 15:46:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Let the pin arrive
|
2018-03-16 16:37:39 +00:00
|
|
|
pinDelay()
|
2017-02-13 15:46:53 +00:00
|
|
|
|
2022-03-22 09:56:16 +00:00
|
|
|
pinList, err := clusters[j].pinsSlice(ctx)
|
2019-02-20 14:24:25 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
pin := pinList[0]
|
2019-02-27 17:04:35 +00:00
|
|
|
allocs := sort.StringSlice(api.PeersToStrings(pin.Allocations))
|
2017-03-09 13:44:14 +00:00
|
|
|
allocs.Sort()
|
|
|
|
allocsStr := fmt.Sprintf("%s", allocs)
|
|
|
|
|
|
|
|
// Re-pin should work and be allocated to the same
|
|
|
|
// nodes
|
Improve pin/unpin method signatures (#843)
* Improve pin/unpin method signatures:
These changes the following Cluster Go API methods:
* -> Cluster.Pin(ctx, cid, options) (pin, error)
* -> Cluster.Unpin(ctx, cid) (pin, error)
* -> Cluster.PinPath(ctx, path, opts) (pin,error)
Pin and Unpin now return the pinned object.
The signature of the methods now matches that of the API Client, is clearer as
to what options the user can set and is aligned with PinPath, UnpinPath, which
returned pin methods.
The REST API now returns the Pinned/Unpinned object rather than 204-Accepted.
This was necessary for a cleaner pin/update approach, which I'm working on in
another branch.
Most of the changes here are updating tests to the new signatures
* Adapt load-balancing client to new Pin/Unpin signatures
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
2019-07-22 13:39:11 +00:00
|
|
|
_, err = clusters[j].Pin(ctx, h, api.PinOptions{})
|
2017-03-09 13:44:14 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
2017-02-13 15:46:53 +00:00
|
|
|
}
|
2017-03-09 13:44:14 +00:00
|
|
|
|
2018-03-16 16:37:39 +00:00
|
|
|
pinDelay()
|
2017-03-09 13:44:14 +00:00
|
|
|
|
2022-03-22 09:56:16 +00:00
|
|
|
pinList2, err := clusters[j].pinsSlice(ctx)
|
2019-02-20 14:24:25 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
pin2 := pinList2[0]
|
2019-02-27 17:04:35 +00:00
|
|
|
allocs2 := sort.StringSlice(api.PeersToStrings(pin2.Allocations))
|
2017-03-09 13:44:14 +00:00
|
|
|
allocs2.Sort()
|
|
|
|
allocsStr2 := fmt.Sprintf("%s", allocs2)
|
|
|
|
if allocsStr != allocsStr2 {
|
|
|
|
t.Fatal("allocations changed without reason")
|
|
|
|
}
|
|
|
|
//t.Log(allocsStr)
|
|
|
|
//t.Log(allocsStr2)
|
2017-02-13 15:46:53 +00:00
|
|
|
|
|
|
|
var killedClusterIndex int
|
|
|
|
// find someone that pinned it and kill that cluster
|
|
|
|
for i, c := range clusters {
|
2018-06-27 04:03:15 +00:00
|
|
|
pinfo := c.tracker.Status(ctx, h)
|
2017-02-13 15:46:53 +00:00
|
|
|
if pinfo.Status == api.TrackerStatusPinned {
|
2017-03-09 13:44:14 +00:00
|
|
|
//t.Logf("Killing %s", c.id.Pretty())
|
2017-02-13 15:46:53 +00:00
|
|
|
killedClusterIndex = i
|
2018-06-27 04:03:15 +00:00
|
|
|
t.Logf("Shutting down %s", c.ID(ctx).ID)
|
|
|
|
c.Shutdown(ctx)
|
2017-03-09 13:44:14 +00:00
|
|
|
break
|
2017-02-13 15:46:53 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-07-21 08:34:20 +00:00
|
|
|
// let metrics expire and give time for the cluster to
|
|
|
|
// see if they have lost the leader
|
2018-03-29 20:31:11 +00:00
|
|
|
waitForLeaderAndMetrics(t, clusters)
|
2017-03-09 13:44:14 +00:00
|
|
|
|
|
|
|
// Make sure we haven't killed our randomly
|
|
|
|
// selected cluster
|
|
|
|
for j == killedClusterIndex {
|
|
|
|
j = rand.Intn(nClusters)
|
|
|
|
}
|
|
|
|
|
2017-02-13 15:46:53 +00:00
|
|
|
// now pin should succeed
|
Improve pin/unpin method signatures (#843)
* Improve pin/unpin method signatures:
These changes the following Cluster Go API methods:
* -> Cluster.Pin(ctx, cid, options) (pin, error)
* -> Cluster.Unpin(ctx, cid) (pin, error)
* -> Cluster.PinPath(ctx, path, opts) (pin,error)
Pin and Unpin now return the pinned object.
The signature of the methods now matches that of the API Client, is clearer as
to what options the user can set and is aligned with PinPath, UnpinPath, which
returned pin methods.
The REST API now returns the Pinned/Unpinned object rather than 204-Accepted.
This was necessary for a cleaner pin/update approach, which I'm working on in
another branch.
Most of the changes here are updating tests to the new signatures
* Adapt load-balancing client to new Pin/Unpin signatures
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
2019-07-22 13:39:11 +00:00
|
|
|
_, err = clusters[j].Pin(ctx, h, api.PinOptions{})
|
2017-02-13 15:46:53 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
2018-03-16 16:37:39 +00:00
|
|
|
pinDelay()
|
2017-03-09 13:44:14 +00:00
|
|
|
|
2017-02-13 15:46:53 +00:00
|
|
|
numPinned := 0
|
|
|
|
for i, c := range clusters {
|
|
|
|
if i == killedClusterIndex {
|
|
|
|
continue
|
|
|
|
}
|
2018-06-27 04:03:15 +00:00
|
|
|
pinfo := c.tracker.Status(ctx, h)
|
2017-02-13 15:46:53 +00:00
|
|
|
if pinfo.Status == api.TrackerStatusPinned {
|
2017-03-09 13:44:14 +00:00
|
|
|
//t.Log(pinfo.Peer.Pretty())
|
2017-02-13 15:46:53 +00:00
|
|
|
numPinned++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if numPinned != nClusters-1 {
|
|
|
|
t.Error("pin should have been correctly re-assigned")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// In this test we try to pin something when there are not
|
|
|
|
// as many available peers a we need. It's like before, except
|
|
|
|
// more peers are killed.
|
|
|
|
func TestClustersReplicationNotEnoughPeers(t *testing.T) {
|
2018-06-27 04:03:15 +00:00
|
|
|
ctx := context.Background()
|
2017-02-13 15:46:53 +00:00
|
|
|
if nClusters < 5 {
|
|
|
|
t.Skip("Need at least 5 peers")
|
|
|
|
}
|
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
|
|
|
for _, c := range clusters {
|
2018-01-12 17:04:46 +00:00
|
|
|
c.config.ReplicationFactorMin = nClusters - 1
|
|
|
|
c.config.ReplicationFactorMax = nClusters - 1
|
2017-02-13 15:46:53 +00:00
|
|
|
}
|
|
|
|
|
2019-05-09 19:24:56 +00:00
|
|
|
ttlDelay()
|
|
|
|
|
2017-02-13 15:46:53 +00:00
|
|
|
j := rand.Intn(nClusters)
|
2019-09-06 15:56:00 +00:00
|
|
|
_, err := clusters[j].Pin(ctx, test.Cid1, api.PinOptions{})
|
2017-02-13 15:46:53 +00:00
|
|
|
if err != nil {
|
2017-02-28 15:01:26 +00:00
|
|
|
t.Fatal(err)
|
2017-02-13 15:46:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Let the pin arrive
|
2018-03-16 16:37:39 +00:00
|
|
|
pinDelay()
|
2017-02-13 15:46:53 +00:00
|
|
|
|
2018-06-27 04:03:15 +00:00
|
|
|
clusters[0].Shutdown(ctx)
|
|
|
|
clusters[1].Shutdown(ctx)
|
2017-02-13 15:46:53 +00:00
|
|
|
|
2018-03-29 20:31:11 +00:00
|
|
|
waitForLeaderAndMetrics(t, clusters)
|
2017-02-28 15:01:26 +00:00
|
|
|
|
2019-09-06 15:56:00 +00:00
|
|
|
_, err = clusters[2].Pin(ctx, test.Cid2, api.PinOptions{})
|
2017-02-13 15:46:53 +00:00
|
|
|
if err == nil {
|
|
|
|
t.Fatal("expected an error")
|
|
|
|
}
|
2018-01-12 17:04:46 +00:00
|
|
|
if !strings.Contains(err.Error(), "not enough peers to allocate") {
|
2017-02-13 15:46:53 +00:00
|
|
|
t.Error("different error than expected")
|
|
|
|
t.Error(err)
|
|
|
|
}
|
2017-03-13 14:55:52 +00:00
|
|
|
//t.Log(err)
|
2017-02-13 15:46:53 +00:00
|
|
|
}
|
2017-02-28 15:01:26 +00:00
|
|
|
|
|
|
|
func TestClustersRebalanceOnPeerDown(t *testing.T) {
|
2018-06-27 04:03:15 +00:00
|
|
|
ctx := context.Background()
|
2017-02-28 15:01:26 +00:00
|
|
|
if nClusters < 5 {
|
|
|
|
t.Skip("Need at least 5 peers")
|
|
|
|
}
|
|
|
|
|
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
|
|
|
for _, c := range clusters {
|
2018-01-12 17:04:46 +00:00
|
|
|
c.config.ReplicationFactorMin = nClusters - 1
|
|
|
|
c.config.ReplicationFactorMax = nClusters - 1
|
2017-02-28 15:01:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// pin something
|
2019-02-27 20:19:10 +00:00
|
|
|
h := test.Cid1
|
Improve pin/unpin method signatures (#843)
* Improve pin/unpin method signatures:
These changes the following Cluster Go API methods:
* -> Cluster.Pin(ctx, cid, options) (pin, error)
* -> Cluster.Unpin(ctx, cid) (pin, error)
* -> Cluster.PinPath(ctx, path, opts) (pin,error)
Pin and Unpin now return the pinned object.
The signature of the methods now matches that of the API Client, is clearer as
to what options the user can set and is aligned with PinPath, UnpinPath, which
returned pin methods.
The REST API now returns the Pinned/Unpinned object rather than 204-Accepted.
This was necessary for a cleaner pin/update approach, which I'm working on in
another branch.
Most of the changes here are updating tests to the new signatures
* Adapt load-balancing client to new Pin/Unpin signatures
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
2019-07-22 13:39:11 +00:00
|
|
|
clusters[0].Pin(ctx, h, api.PinOptions{})
|
2018-03-16 16:37:39 +00:00
|
|
|
pinDelay()
|
2017-02-28 15:01:26 +00:00
|
|
|
pinLocal := 0
|
|
|
|
pinRemote := 0
|
2019-02-27 17:04:35 +00:00
|
|
|
var localPinner string
|
|
|
|
var remotePinner string
|
2017-02-28 15:01:26 +00:00
|
|
|
var remotePinnerCluster *Cluster
|
|
|
|
|
2018-06-27 04:03:15 +00:00
|
|
|
status, _ := clusters[0].Status(ctx, h)
|
2017-02-28 15:01:26 +00:00
|
|
|
|
|
|
|
// check it was correctly pinned
|
|
|
|
for p, pinfo := range status.PeerMap {
|
|
|
|
if pinfo.Status == api.TrackerStatusPinned {
|
|
|
|
pinLocal++
|
|
|
|
localPinner = p
|
|
|
|
} else if pinfo.Status == api.TrackerStatusRemote {
|
|
|
|
pinRemote++
|
|
|
|
remotePinner = p
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if pinLocal != nClusters-1 || pinRemote != 1 {
|
|
|
|
t.Fatal("Not pinned as expected")
|
|
|
|
}
|
|
|
|
|
2018-03-16 16:37:39 +00:00
|
|
|
// kill the local pinner
|
2017-02-28 15:01:26 +00:00
|
|
|
for _, c := range clusters {
|
2020-04-14 17:58:00 +00:00
|
|
|
clid := peer.Encode(c.id)
|
2019-02-27 17:04:35 +00:00
|
|
|
if clid == localPinner {
|
2018-06-27 04:03:15 +00:00
|
|
|
c.Shutdown(ctx)
|
2019-02-27 17:04:35 +00:00
|
|
|
} else if clid == remotePinner {
|
2017-02-28 15:01:26 +00:00
|
|
|
remotePinnerCluster = c
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-03-16 16:37:39 +00:00
|
|
|
delay()
|
2018-03-29 20:31:11 +00:00
|
|
|
waitForLeaderAndMetrics(t, clusters) // in case we killed the leader
|
2017-02-28 15:01:26 +00:00
|
|
|
|
|
|
|
// It should be now pinned in the remote pinner
|
2018-06-27 04:03:15 +00:00
|
|
|
if s := remotePinnerCluster.tracker.Status(ctx, h).Status; s != api.TrackerStatusPinned {
|
2017-02-28 15:01:26 +00:00
|
|
|
t.Errorf("it should be pinned and is %s", s)
|
|
|
|
}
|
|
|
|
}
|
2018-01-18 02:49:35 +00:00
|
|
|
|
2019-09-18 06:08:13 +00:00
|
|
|
// Helper function for verifying cluster graph. Will only pass if exactly the
|
2018-01-18 02:49:35 +00:00
|
|
|
// peers in clusterIDs are fully connected to each other and the expected ipfs
|
2019-09-18 06:08:13 +00:00
|
|
|
// mock connectivity exists. Cluster peers not in clusterIDs are assumed to
|
2018-01-18 02:49:35 +00:00
|
|
|
// be disconnected and the graph should reflect this
|
2019-02-20 14:24:25 +00:00
|
|
|
func validateClusterGraph(t *testing.T, graph api.ConnectGraph, clusterIDs map[string]struct{}, peerNum int) {
|
2018-01-18 02:49:35 +00:00
|
|
|
// Check that all cluster peers see each other as peers
|
|
|
|
for id1, peers := range graph.ClusterLinks {
|
|
|
|
if _, ok := clusterIDs[id1]; !ok {
|
|
|
|
if len(peers) != 0 {
|
|
|
|
t.Errorf("disconnected peer %s is still connected in graph", id1)
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
2019-02-27 17:04:35 +00:00
|
|
|
t.Logf("id: %s, peers: %v\n", id1, peers)
|
2018-01-18 02:49:35 +00:00
|
|
|
if len(peers) > len(clusterIDs)-1 {
|
|
|
|
t.Errorf("More peers recorded in graph than expected")
|
|
|
|
}
|
|
|
|
// Make lookup index for peers connected to id1
|
2019-02-27 17:04:35 +00:00
|
|
|
peerIndex := make(map[string]struct{})
|
|
|
|
for _, p := range peers {
|
2020-04-14 17:58:00 +00:00
|
|
|
peerIndex[peer.Encode(p)] = struct{}{}
|
2018-01-18 02:49:35 +00:00
|
|
|
}
|
|
|
|
for id2 := range clusterIDs {
|
|
|
|
if _, ok := peerIndex[id2]; id1 != id2 && !ok {
|
|
|
|
t.Errorf("Expected graph to see peer %s connected to peer %s", id1, id2)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-02-20 14:24:25 +00:00
|
|
|
if len(graph.ClusterLinks) != peerNum {
|
2018-01-18 02:49:35 +00:00
|
|
|
t.Errorf("Unexpected number of cluster nodes in graph")
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check that all cluster peers are recorded as nodes in the graph
|
|
|
|
for id := range clusterIDs {
|
|
|
|
if _, ok := graph.ClusterLinks[id]; !ok {
|
|
|
|
t.Errorf("Expected graph to record peer %s as a node", id)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-09-18 06:08:13 +00:00
|
|
|
if len(graph.ClusterTrustLinks) != peerNum {
|
|
|
|
t.Errorf("Unexpected number of trust links in graph")
|
|
|
|
}
|
|
|
|
|
2018-01-18 02:49:35 +00:00
|
|
|
// Check that the mocked ipfs swarm is recorded
|
|
|
|
if len(graph.IPFSLinks) != 1 {
|
|
|
|
t.Error("Expected exactly one ipfs peer for all cluster nodes, the mocked peer")
|
|
|
|
}
|
2020-04-14 17:58:00 +00:00
|
|
|
links, ok := graph.IPFSLinks[peer.Encode(test.PeerID1)]
|
2018-01-18 02:49:35 +00:00
|
|
|
if !ok {
|
|
|
|
t.Error("Expected the mocked ipfs peer to be a node in the graph")
|
|
|
|
} else {
|
2019-02-27 20:19:10 +00:00
|
|
|
if len(links) != 2 || links[0] != test.PeerID4 ||
|
|
|
|
links[1] != test.PeerID5 {
|
2018-01-18 02:49:35 +00:00
|
|
|
t.Error("Swarm peers of mocked ipfs are not those expected")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check that the cluster to ipfs connections are all recorded
|
|
|
|
for id := range clusterIDs {
|
|
|
|
if ipfsID, ok := graph.ClustertoIPFS[id]; !ok {
|
|
|
|
t.Errorf("Expected graph to record peer %s's ipfs connection", id)
|
|
|
|
} else {
|
2019-02-27 20:19:10 +00:00
|
|
|
if ipfsID != test.PeerID1 {
|
2018-01-18 02:49:35 +00:00
|
|
|
t.Errorf("Unexpected error %s", ipfsID)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if len(graph.ClustertoIPFS) > len(clusterIDs) {
|
|
|
|
t.Error("More cluster to ipfs links recorded in graph than expected")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// In this test we get a cluster graph report from a random peer in a healthy
|
|
|
|
// fully connected cluster and verify that it is formed as expected.
|
|
|
|
func TestClustersGraphConnected(t *testing.T) {
|
2018-06-27 04:03:15 +00:00
|
|
|
ctx := context.Background()
|
2018-01-18 02:49:35 +00:00
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
|
|
|
|
2019-05-09 19:24:56 +00:00
|
|
|
ttlDelay()
|
|
|
|
|
2018-01-18 02:49:35 +00:00
|
|
|
j := rand.Intn(nClusters) // choose a random cluster peer to query
|
|
|
|
graph, err := clusters[j].ConnectGraph()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
2019-02-27 17:04:35 +00:00
|
|
|
clusterIDs := make(map[string]struct{})
|
2018-01-18 02:49:35 +00:00
|
|
|
for _, c := range clusters {
|
2020-04-14 17:58:00 +00:00
|
|
|
id := peer.Encode(c.ID(ctx).ID)
|
2018-01-18 02:49:35 +00:00
|
|
|
clusterIDs[id] = struct{}{}
|
|
|
|
}
|
2019-02-20 14:24:25 +00:00
|
|
|
validateClusterGraph(t, graph, clusterIDs, nClusters)
|
2018-01-18 02:49:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Similar to the previous test we get a cluster graph report from a peer.
|
|
|
|
// However now 2 peers have been shutdown and so we do not expect to see
|
|
|
|
// them in the graph
|
|
|
|
func TestClustersGraphUnhealthy(t *testing.T) {
|
2018-06-27 04:03:15 +00:00
|
|
|
ctx := context.Background()
|
2018-01-18 02:49:35 +00:00
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
|
|
|
if nClusters < 5 {
|
|
|
|
t.Skip("Need at least 5 peers")
|
|
|
|
}
|
|
|
|
|
|
|
|
j := rand.Intn(nClusters) // choose a random cluster peer to query
|
|
|
|
// chose the clusters to shutdown
|
|
|
|
discon1 := -1
|
|
|
|
discon2 := -1
|
|
|
|
for i := range clusters {
|
|
|
|
if i != j {
|
|
|
|
if discon1 == -1 {
|
|
|
|
discon1 = i
|
|
|
|
} else {
|
|
|
|
discon2 = i
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-27 04:03:15 +00:00
|
|
|
clusters[discon1].Shutdown(ctx)
|
2019-05-24 22:53:43 +00:00
|
|
|
clusters[discon1].host.Close()
|
2018-06-27 04:03:15 +00:00
|
|
|
clusters[discon2].Shutdown(ctx)
|
2019-05-24 22:53:43 +00:00
|
|
|
clusters[discon2].host.Close()
|
2018-03-16 16:37:39 +00:00
|
|
|
|
2018-03-29 20:31:11 +00:00
|
|
|
waitForLeaderAndMetrics(t, clusters)
|
2018-01-18 02:49:35 +00:00
|
|
|
|
|
|
|
graph, err := clusters[j].ConnectGraph()
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
2019-02-27 17:04:35 +00:00
|
|
|
clusterIDs := make(map[string]struct{})
|
2018-01-18 02:49:35 +00:00
|
|
|
for i, c := range clusters {
|
|
|
|
if i == discon1 || i == discon2 {
|
|
|
|
continue
|
|
|
|
}
|
2020-04-14 17:58:00 +00:00
|
|
|
id := peer.Encode(c.ID(ctx).ID)
|
2018-01-18 02:49:35 +00:00
|
|
|
clusterIDs[id] = struct{}{}
|
|
|
|
}
|
2019-02-20 14:24:25 +00:00
|
|
|
peerNum := nClusters
|
|
|
|
switch consensus {
|
|
|
|
case "crdt":
|
|
|
|
peerNum = nClusters - 2
|
|
|
|
}
|
|
|
|
|
|
|
|
validateClusterGraph(t, graph, clusterIDs, peerNum)
|
2018-01-18 02:49:35 +00:00
|
|
|
}
|
2018-04-23 20:01:44 +00:00
|
|
|
|
|
|
|
// Check that the pin is not re-assigned when a node
|
|
|
|
// that has disabled repinning goes down.
|
|
|
|
func TestClustersDisabledRepinning(t *testing.T) {
|
2018-06-27 04:03:15 +00:00
|
|
|
ctx := context.Background()
|
2018-04-23 20:01:44 +00:00
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
|
|
|
for _, c := range clusters {
|
|
|
|
c.config.ReplicationFactorMin = nClusters - 1
|
|
|
|
c.config.ReplicationFactorMax = nClusters - 1
|
|
|
|
c.config.DisableRepinning = true
|
|
|
|
}
|
|
|
|
|
|
|
|
ttlDelay()
|
|
|
|
|
|
|
|
j := rand.Intn(nClusters)
|
2019-02-27 20:19:10 +00:00
|
|
|
h := test.Cid1
|
Improve pin/unpin method signatures (#843)
* Improve pin/unpin method signatures:
These changes the following Cluster Go API methods:
* -> Cluster.Pin(ctx, cid, options) (pin, error)
* -> Cluster.Unpin(ctx, cid) (pin, error)
* -> Cluster.PinPath(ctx, path, opts) (pin,error)
Pin and Unpin now return the pinned object.
The signature of the methods now matches that of the API Client, is clearer as
to what options the user can set and is aligned with PinPath, UnpinPath, which
returned pin methods.
The REST API now returns the Pinned/Unpinned object rather than 204-Accepted.
This was necessary for a cleaner pin/update approach, which I'm working on in
another branch.
Most of the changes here are updating tests to the new signatures
* Adapt load-balancing client to new Pin/Unpin signatures
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
* cluster.go: Fix typo
Co-Authored-By: Kishan Sagathiya <kishansagathiya@gmail.com>
2019-07-22 13:39:11 +00:00
|
|
|
_, err := clusters[j].Pin(ctx, h, api.PinOptions{})
|
2018-04-23 20:01:44 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Let the pin arrive
|
|
|
|
pinDelay()
|
|
|
|
|
|
|
|
var killedClusterIndex int
|
|
|
|
// find someone that pinned it and kill that cluster
|
|
|
|
for i, c := range clusters {
|
2018-06-27 04:03:15 +00:00
|
|
|
pinfo := c.tracker.Status(ctx, h)
|
2018-04-23 20:01:44 +00:00
|
|
|
if pinfo.Status == api.TrackerStatusPinned {
|
|
|
|
killedClusterIndex = i
|
2018-06-27 04:03:15 +00:00
|
|
|
t.Logf("Shutting down %s", c.ID(ctx).ID)
|
|
|
|
c.Shutdown(ctx)
|
2018-04-23 20:01:44 +00:00
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// let metrics expire and give time for the cluster to
|
|
|
|
// see if they have lost the leader
|
|
|
|
waitForLeaderAndMetrics(t, clusters)
|
|
|
|
|
|
|
|
// Make sure we haven't killed our randomly
|
|
|
|
// selected cluster
|
|
|
|
for j == killedClusterIndex {
|
|
|
|
j = rand.Intn(nClusters)
|
|
|
|
}
|
|
|
|
|
|
|
|
numPinned := 0
|
|
|
|
for i, c := range clusters {
|
|
|
|
if i == killedClusterIndex {
|
|
|
|
continue
|
|
|
|
}
|
2018-06-27 04:03:15 +00:00
|
|
|
pinfo := c.tracker.Status(ctx, h)
|
2018-04-23 20:01:44 +00:00
|
|
|
if pinfo.Status == api.TrackerStatusPinned {
|
|
|
|
//t.Log(pinfo.Peer.Pretty())
|
|
|
|
numPinned++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if numPinned != nClusters-2 {
|
|
|
|
t.Errorf("expected %d replicas for pin, got %d", nClusters-2, numPinned)
|
|
|
|
}
|
|
|
|
}
|
2019-07-30 17:42:26 +00:00
|
|
|
|
2019-10-22 05:40:32 +00:00
|
|
|
func TestRepoGC(t *testing.T) {
|
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
|
|
|
f := func(t *testing.T, c *Cluster) {
|
|
|
|
gRepoGC, err := c.RepoGC(context.Background())
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal("gc should have worked:", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if gRepoGC.PeerMap == nil {
|
|
|
|
t.Fatal("expected a non-nil peer map")
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(gRepoGC.PeerMap) != nClusters {
|
|
|
|
t.Errorf("expected repo gc information for %d peer", nClusters)
|
|
|
|
}
|
|
|
|
for _, repoGC := range gRepoGC.PeerMap {
|
|
|
|
testRepoGC(t, repoGC)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
runF(t, clusters, f)
|
|
|
|
}
|
|
|
|
|
2019-07-30 17:42:26 +00:00
|
|
|
func TestClustersFollowerMode(t *testing.T) {
|
|
|
|
ctx := context.Background()
|
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
|
|
|
|
|
|
|
_, err := clusters[0].Pin(ctx, test.Cid1, api.PinOptions{})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
_, err = clusters[0].Pin(ctx, test.ErrorCid, api.PinOptions{})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Let the pins arrive
|
|
|
|
pinDelay()
|
|
|
|
|
|
|
|
// Set Cluster1 to follower mode
|
|
|
|
clusters[1].config.FollowerMode = true
|
|
|
|
|
|
|
|
t.Run("follower cannot pin", func(t *testing.T) {
|
|
|
|
_, err := clusters[1].PinPath(ctx, "/ipfs/"+test.Cid2.String(), api.PinOptions{})
|
|
|
|
if err != errFollowerMode {
|
|
|
|
t.Error("expected follower mode error")
|
|
|
|
}
|
|
|
|
_, err = clusters[1].Pin(ctx, test.Cid2, api.PinOptions{})
|
|
|
|
if err != errFollowerMode {
|
|
|
|
t.Error("expected follower mode error")
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
t.Run("follower cannot unpin", func(t *testing.T) {
|
|
|
|
_, err := clusters[1].UnpinPath(ctx, "/ipfs/"+test.Cid1.String())
|
|
|
|
if err != errFollowerMode {
|
|
|
|
t.Error("expected follower mode error")
|
|
|
|
}
|
|
|
|
_, err = clusters[1].Unpin(ctx, test.Cid1)
|
|
|
|
if err != errFollowerMode {
|
|
|
|
t.Error("expected follower mode error")
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
t.Run("follower cannot add", func(t *testing.T) {
|
|
|
|
sth := test.NewShardingTestHelper()
|
|
|
|
defer sth.Clean(t)
|
|
|
|
params := api.DefaultAddParams()
|
|
|
|
params.Shard = false
|
|
|
|
params.Name = "testlocal"
|
|
|
|
mfr, closer := sth.GetTreeMultiReader(t)
|
|
|
|
defer closer.Close()
|
|
|
|
r := multipart.NewReader(mfr, mfr.Boundary())
|
Adders: stream blocks to destinations
This commit fixes #810 and adds block streaming to the final destinations when
adding. This should add major performance gains when adding data to clusters.
Before, everytime cluster issued a block, it was broadcasted individually to
all destinations (new libp2p stream), where it was block/put to IPFS (a single
block/put http roundtrip per block).
Now, blocks are streamed all the way from the adder module to the ipfs daemon,
by making every block as it arrives a single part in a multipart block/put
request.
Before, block-broadcast needed to wait for all destinations to finish in order
to process the next block. Now, buffers allow some destinations to be faster
than others while sending and receiving blocks.
Before, if a block put request failed to be broadcasted everywhere, an error
would happen at that moment.
Now, we keep streaming until the end and only then report any errors. The
operation succeeds as long as at least one stream finished successfully.
Errors block/putting to IPFS will not abort streams. Instead, subsequent
blocks are retried with a new request, although the method will return an
error when the stream finishes if there were errors at any point.
2022-03-24 01:17:10 +00:00
|
|
|
_, err = clusters[1].AddFile(ctx, r, params)
|
2019-07-30 17:42:26 +00:00
|
|
|
if err != errFollowerMode {
|
|
|
|
t.Error("expected follower mode error")
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
t.Run("follower status itself only", func(t *testing.T) {
|
|
|
|
gpi, err := clusters[1].Status(ctx, test.Cid1)
|
|
|
|
if err != nil {
|
|
|
|
t.Error("status should work")
|
|
|
|
}
|
|
|
|
if len(gpi.PeerMap) != 1 {
|
2019-08-07 08:13:07 +00:00
|
|
|
t.Fatal("globalPinInfo should only have one peer")
|
2019-07-30 17:42:26 +00:00
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
2019-09-18 16:05:55 +00:00
|
|
|
|
|
|
|
func TestClusterPinsWithExpiration(t *testing.T) {
|
|
|
|
ctx := context.Background()
|
|
|
|
|
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
|
|
|
|
|
|
|
ttlDelay()
|
|
|
|
|
|
|
|
cl := clusters[rand.Intn(nClusters)] // choose a random cluster peer to query
|
|
|
|
|
|
|
|
c := test.Cid1
|
|
|
|
expireIn := 1 * time.Second
|
|
|
|
opts := api.PinOptions{
|
|
|
|
ExpireAt: time.Now().Add(expireIn),
|
|
|
|
}
|
|
|
|
_, err := cl.Pin(ctx, c, opts)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal("pin should have worked:", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
pinDelay()
|
|
|
|
|
2022-03-22 09:56:16 +00:00
|
|
|
pins, err := cl.pinsSlice(ctx)
|
2019-09-18 16:05:55 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if len(pins) != 1 {
|
|
|
|
t.Error("pin should be part of the state")
|
|
|
|
}
|
|
|
|
|
|
|
|
// wait till expiry time
|
|
|
|
time.Sleep(expireIn)
|
|
|
|
|
|
|
|
// manually call state sync on all peers, so we don't have to wait till
|
|
|
|
// state sync interval
|
|
|
|
for _, c := range clusters {
|
|
|
|
err = c.StateSync(ctx)
|
|
|
|
if err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pinDelay()
|
|
|
|
|
|
|
|
// state sync should have unpinned expired pin
|
2022-03-22 09:56:16 +00:00
|
|
|
pins, err = cl.pinsSlice(ctx)
|
2019-09-18 16:05:55 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if len(pins) != 0 {
|
|
|
|
t.Error("pin should not be part of the state")
|
|
|
|
}
|
|
|
|
}
|
2019-12-23 07:12:38 +00:00
|
|
|
|
|
|
|
func TestClusterAlerts(t *testing.T) {
|
2021-01-13 21:23:51 +00:00
|
|
|
ctx := context.Background()
|
2019-12-23 07:12:38 +00:00
|
|
|
clusters, mock := createClusters(t)
|
|
|
|
defer shutdownClusters(t, clusters, mock)
|
|
|
|
|
2021-01-13 21:23:51 +00:00
|
|
|
if len(clusters) < 2 {
|
|
|
|
t.Skip("need at least 2 nodes for this test")
|
|
|
|
}
|
|
|
|
|
|
|
|
ttlDelay()
|
|
|
|
|
|
|
|
for _, c := range clusters[1:] {
|
|
|
|
c.Shutdown(ctx)
|
|
|
|
}
|
|
|
|
|
2019-12-23 07:12:38 +00:00
|
|
|
ttlDelay()
|
|
|
|
|
|
|
|
alerts := clusters[0].Alerts()
|
2021-01-13 21:23:51 +00:00
|
|
|
if len(alerts) == 0 {
|
|
|
|
t.Error("expected at least one alert")
|
2019-12-23 07:12:38 +00:00
|
|
|
}
|
|
|
|
}
|