2017-03-14 15:37:29 +00:00
|
|
|
// Package raft implements a Consensus component for IPFS Cluster which uses
|
|
|
|
// Raft (go-libp2p-raft).
|
2017-03-10 16:24:25 +00:00
|
|
|
package raft
|
2016-12-02 18:33:39 +00:00
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"errors"
|
2017-10-31 10:20:14 +00:00
|
|
|
"fmt"
|
2017-11-08 19:04:04 +00:00
|
|
|
"sort"
|
2016-12-15 13:07:19 +00:00
|
|
|
"sync"
|
2016-12-09 19:54:46 +00:00
|
|
|
"time"
|
2016-12-02 18:33:39 +00:00
|
|
|
|
2017-02-08 17:04:08 +00:00
|
|
|
"github.com/ipfs/ipfs-cluster/api"
|
2017-03-10 16:24:25 +00:00
|
|
|
"github.com/ipfs/ipfs-cluster/state"
|
2017-02-08 17:04:08 +00:00
|
|
|
|
2017-01-25 11:14:39 +00:00
|
|
|
rpc "github.com/hsanjuan/go-libp2p-gorpc"
|
2017-03-10 16:24:25 +00:00
|
|
|
logging "github.com/ipfs/go-log"
|
2016-12-16 11:40:28 +00:00
|
|
|
consensus "github.com/libp2p/go-libp2p-consensus"
|
|
|
|
host "github.com/libp2p/go-libp2p-host"
|
|
|
|
peer "github.com/libp2p/go-libp2p-peer"
|
|
|
|
libp2praft "github.com/libp2p/go-libp2p-raft"
|
2017-02-02 22:52:06 +00:00
|
|
|
ma "github.com/multiformats/go-multiaddr"
|
2016-12-02 18:33:39 +00:00
|
|
|
)
|
|
|
|
|
2017-03-10 16:24:25 +00:00
|
|
|
var logger = logging.Logger("consensus")
|
|
|
|
|
2016-12-15 18:08:46 +00:00
|
|
|
// Consensus handles the work of keeping a shared-state between
|
2017-01-26 18:59:31 +00:00
|
|
|
// the peers of an IPFS Cluster, as well as modifying that state and
|
2016-12-02 18:33:39 +00:00
|
|
|
// applying any updates in a thread-safe manner.
|
2016-12-15 18:08:46 +00:00
|
|
|
type Consensus struct {
|
2017-03-02 12:57:37 +00:00
|
|
|
ctx context.Context
|
|
|
|
cancel func()
|
2017-10-23 11:46:37 +00:00
|
|
|
config *Config
|
2016-12-02 18:33:39 +00:00
|
|
|
|
2016-12-23 18:35:37 +00:00
|
|
|
host host.Host
|
|
|
|
|
2016-12-02 18:33:39 +00:00
|
|
|
consensus consensus.OpLogConsensus
|
|
|
|
actor consensus.Actor
|
2017-02-13 15:46:53 +00:00
|
|
|
baseOp *LogOp
|
2017-10-23 11:46:37 +00:00
|
|
|
raft *raftWrapper
|
2016-12-02 18:33:39 +00:00
|
|
|
|
2016-12-23 18:35:37 +00:00
|
|
|
rpcClient *rpc.Client
|
|
|
|
rpcReady chan struct{}
|
2017-01-30 12:12:25 +00:00
|
|
|
readyCh chan struct{}
|
2016-12-15 13:07:19 +00:00
|
|
|
|
|
|
|
shutdownLock sync.Mutex
|
|
|
|
shutdown bool
|
2016-12-02 18:33:39 +00:00
|
|
|
}
|
|
|
|
|
2016-12-15 18:08:46 +00:00
|
|
|
// NewConsensus builds a new ClusterConsensus component. The state
|
2016-12-02 18:33:39 +00:00
|
|
|
// is used to initialize the Consensus system, so any information in it
|
|
|
|
// is discarded.
|
Issue #162: Rework configuration format
The following commit reimplements ipfs-cluster configuration under
the following premises:
* Each component is initialized with a configuration object
defined by its module
* Each component decides how the JSON representation of its
configuration looks like
* Each component parses and validates its own configuration
* Each component exposes its own defaults
* Component configurations are make the sections of a
central JSON configuration file (which replaces the current
JSON format)
* Component configurations implement a common interface
(config.ComponentConfig) with a set of common operations
* The central configuration file is managed by a
config.ConfigManager which:
* Registers ComponentConfigs
* Assigns the correspondent sections from the JSON file to each
component and delegates the parsing
* Delegates the JSON generation for each section
* Can be notified when the configuration is updated and must be
saved to disk
The new service.json would then look as follows:
```json
{
"cluster": {
"id": "QmTVW8NoRxC5wBhV7WtAYtRn7itipEESfozWN5KmXUQnk2",
"private_key": "<...>",
"secret": "00224102ae6aaf94f2606abf69a0e278251ecc1d64815b617ff19d6d2841f786",
"peers": [],
"bootstrap": [],
"leave_on_shutdown": false,
"listen_multiaddress": "/ip4/0.0.0.0/tcp/9096",
"state_sync_interval": "1m0s",
"ipfs_sync_interval": "2m10s",
"replication_factor": -1,
"monitor_ping_interval": "15s"
},
"consensus": {
"raft": {
"heartbeat_timeout": "1s",
"election_timeout": "1s",
"commit_timeout": "50ms",
"max_append_entries": 64,
"trailing_logs": 10240,
"snapshot_interval": "2m0s",
"snapshot_threshold": 8192,
"leader_lease_timeout": "500ms"
}
},
"api": {
"restapi": {
"listen_multiaddress": "/ip4/127.0.0.1/tcp/9094",
"read_timeout": "30s",
"read_header_timeout": "5s",
"write_timeout": "1m0s",
"idle_timeout": "2m0s"
}
},
"ipfs_connector": {
"ipfshttp": {
"proxy_listen_multiaddress": "/ip4/127.0.0.1/tcp/9095",
"node_multiaddress": "/ip4/127.0.0.1/tcp/5001",
"connect_swarms_delay": "7s",
"proxy_read_timeout": "10m0s",
"proxy_read_header_timeout": "5s",
"proxy_write_timeout": "10m0s",
"proxy_idle_timeout": "1m0s"
}
},
"monitor": {
"monbasic": {
"check_interval": "15s"
}
},
"informer": {
"disk": {
"metric_ttl": "30s",
"metric_type": "freespace"
},
"numpin": {
"metric_ttl": "10s"
}
}
}
```
This new format aims to be easily extensible per component. As such,
it already surfaces quite a few new options which were hardcoded
before.
Additionally, since Go API have changed, some redundant methods have been
removed and small refactoring has happened to take advantage of the new
way.
License: MIT
Signed-off-by: Hector Sanjuan <hector@protocol.ai>
2017-10-11 18:23:03 +00:00
|
|
|
func NewConsensus(clusterPeers []peer.ID, host host.Host, cfg *Config, state state.State) (*Consensus, error) {
|
|
|
|
err := cfg.Validate()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2017-10-30 11:45:08 +00:00
|
|
|
baseOp := &LogOp{}
|
2016-12-02 18:33:39 +00:00
|
|
|
|
2017-02-01 17:16:09 +00:00
|
|
|
logger.Infof("starting Consensus and waiting for a leader...")
|
2017-10-30 11:45:08 +00:00
|
|
|
consensus := libp2praft.NewOpLog(state, baseOp)
|
2017-10-23 11:46:37 +00:00
|
|
|
raft, err := newRaftWrapper(clusterPeers, host, cfg, consensus.FSM())
|
2017-02-01 17:16:09 +00:00
|
|
|
if err != nil {
|
2017-10-23 11:46:37 +00:00
|
|
|
logger.Error("error creating raft: ", err)
|
2017-02-01 17:16:09 +00:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
actor := libp2praft.NewActor(raft.raft)
|
|
|
|
consensus.SetActor(actor)
|
|
|
|
|
2017-03-02 12:57:37 +00:00
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
|
2016-12-15 18:08:46 +00:00
|
|
|
cc := &Consensus{
|
2017-03-02 12:57:37 +00:00
|
|
|
ctx: ctx,
|
|
|
|
cancel: cancel,
|
2017-10-23 11:46:37 +00:00
|
|
|
config: cfg,
|
2017-03-02 12:57:37 +00:00
|
|
|
host: host,
|
|
|
|
consensus: consensus,
|
|
|
|
actor: actor,
|
2017-10-30 11:45:08 +00:00
|
|
|
baseOp: baseOp,
|
2017-03-02 12:57:37 +00:00
|
|
|
raft: raft,
|
|
|
|
rpcReady: make(chan struct{}, 1),
|
|
|
|
readyCh: make(chan struct{}, 1),
|
2016-12-02 18:33:39 +00:00
|
|
|
}
|
2016-12-09 19:54:46 +00:00
|
|
|
|
2017-10-30 11:45:08 +00:00
|
|
|
baseOp.consensus = cc
|
|
|
|
|
2017-03-02 12:57:37 +00:00
|
|
|
go cc.finishBootstrap()
|
2016-12-02 18:33:39 +00:00
|
|
|
return cc, nil
|
|
|
|
}
|
|
|
|
|
2017-02-02 22:52:06 +00:00
|
|
|
// WaitForSync waits for a leader and for the state to be up to date, then returns.
|
|
|
|
func (cc *Consensus) WaitForSync() error {
|
2017-10-23 11:46:37 +00:00
|
|
|
leaderCtx, cancel := context.WithTimeout(
|
|
|
|
cc.ctx,
|
|
|
|
cc.config.WaitForLeaderTimeout)
|
2017-02-02 22:52:06 +00:00
|
|
|
defer cancel()
|
2017-10-23 11:46:37 +00:00
|
|
|
_, err := cc.raft.WaitForLeader(leaderCtx)
|
2017-02-02 22:52:06 +00:00
|
|
|
if err != nil {
|
|
|
|
return errors.New("error waiting for leader: " + err.Error())
|
|
|
|
}
|
|
|
|
err = cc.raft.WaitForUpdates(cc.ctx)
|
|
|
|
if err != nil {
|
|
|
|
return errors.New("error waiting for consensus updates: " + err.Error())
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-02-01 17:16:09 +00:00
|
|
|
// waits until there is a consensus leader and syncs the state
|
|
|
|
// to the tracker
|
|
|
|
func (cc *Consensus) finishBootstrap() {
|
2017-02-02 22:52:06 +00:00
|
|
|
err := cc.WaitForSync()
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
2017-02-01 17:16:09 +00:00
|
|
|
logger.Info("Consensus state is up to date")
|
2017-01-30 12:12:25 +00:00
|
|
|
|
2017-02-01 17:16:09 +00:00
|
|
|
// While rpc is not ready we cannot perform a sync
|
2017-02-02 22:52:06 +00:00
|
|
|
if cc.rpcClient == nil {
|
|
|
|
select {
|
|
|
|
case <-cc.ctx.Done():
|
|
|
|
return
|
|
|
|
case <-cc.rpcReady:
|
|
|
|
}
|
2017-01-30 12:12:25 +00:00
|
|
|
}
|
|
|
|
|
2017-02-02 22:52:06 +00:00
|
|
|
st, err := cc.State()
|
|
|
|
_ = st
|
2017-02-01 17:16:09 +00:00
|
|
|
// only check sync if we have a state
|
|
|
|
// avoid error on new running clusters
|
2017-01-30 12:12:25 +00:00
|
|
|
if err != nil {
|
2017-02-01 17:16:09 +00:00
|
|
|
logger.Debug("skipping state sync: ", err)
|
|
|
|
} else {
|
2017-02-08 17:04:08 +00:00
|
|
|
var pInfoSerial []api.PinInfoSerial
|
2017-02-01 17:16:09 +00:00
|
|
|
cc.rpcClient.Go(
|
|
|
|
"",
|
|
|
|
"Cluster",
|
|
|
|
"StateSync",
|
|
|
|
struct{}{},
|
2017-02-08 17:04:08 +00:00
|
|
|
&pInfoSerial,
|
2017-02-01 17:16:09 +00:00
|
|
|
nil)
|
2017-01-30 12:12:25 +00:00
|
|
|
}
|
2017-02-01 17:16:09 +00:00
|
|
|
cc.readyCh <- struct{}{}
|
2017-02-02 22:52:06 +00:00
|
|
|
logger.Debug("consensus ready")
|
2017-01-30 12:12:25 +00:00
|
|
|
}
|
|
|
|
|
2016-12-02 18:33:39 +00:00
|
|
|
// Shutdown stops the component so it will not process any
|
|
|
|
// more updates. The underlying consensus is permanently
|
|
|
|
// shutdown, along with the libp2p transport.
|
2016-12-15 18:08:46 +00:00
|
|
|
func (cc *Consensus) Shutdown() error {
|
2016-12-15 13:07:19 +00:00
|
|
|
cc.shutdownLock.Lock()
|
|
|
|
defer cc.shutdownLock.Unlock()
|
|
|
|
|
|
|
|
if cc.shutdown {
|
|
|
|
logger.Debug("already shutdown")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-12-15 13:19:41 +00:00
|
|
|
logger.Info("stopping Consensus component")
|
2016-12-15 13:07:19 +00:00
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
// Raft Shutdown
|
|
|
|
err := cc.raft.Shutdown()
|
2016-12-15 13:07:19 +00:00
|
|
|
if err != nil {
|
2017-10-23 11:46:37 +00:00
|
|
|
logger.Error(err)
|
2016-12-02 18:33:39 +00:00
|
|
|
}
|
2017-11-08 19:04:04 +00:00
|
|
|
|
|
|
|
if cc.config.hostShutdown {
|
|
|
|
cc.host.Close()
|
|
|
|
}
|
|
|
|
|
2016-12-15 13:07:19 +00:00
|
|
|
cc.shutdown = true
|
2017-10-23 11:46:37 +00:00
|
|
|
cc.cancel()
|
|
|
|
close(cc.rpcReady)
|
2016-12-02 18:33:39 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-12-23 18:35:37 +00:00
|
|
|
// SetClient makes the component ready to perform RPC requets
|
|
|
|
func (cc *Consensus) SetClient(c *rpc.Client) {
|
|
|
|
cc.rpcClient = c
|
|
|
|
cc.rpcReady <- struct{}{}
|
2016-12-02 18:33:39 +00:00
|
|
|
}
|
|
|
|
|
2017-01-30 12:12:25 +00:00
|
|
|
// Ready returns a channel which is signaled when the Consensus
|
|
|
|
// algorithm has finished bootstrapping and is ready to use
|
|
|
|
func (cc *Consensus) Ready() <-chan struct{} {
|
|
|
|
return cc.readyCh
|
|
|
|
}
|
|
|
|
|
2017-11-08 19:04:04 +00:00
|
|
|
func (cc *Consensus) op(pin api.Pin, t LogOpType) *LogOp {
|
|
|
|
return &LogOp{
|
|
|
|
Cid: pin.ToSerial(),
|
|
|
|
Type: t,
|
2017-02-02 22:52:06 +00:00
|
|
|
}
|
2016-12-02 18:33:39 +00:00
|
|
|
}
|
|
|
|
|
2017-01-23 13:01:49 +00:00
|
|
|
// returns true if the operation was redirected to the leader
|
2017-10-23 11:46:37 +00:00
|
|
|
// note that if the leader just dissappeared, the rpc call will
|
|
|
|
// fail because we haven't heard that it's gone.
|
2017-01-30 12:12:25 +00:00
|
|
|
func (cc *Consensus) redirectToLeader(method string, arg interface{}) (bool, error) {
|
2017-10-23 11:46:37 +00:00
|
|
|
var finalErr error
|
|
|
|
|
|
|
|
// Retry redirects
|
|
|
|
for i := 0; i <= cc.config.CommitRetries; i++ {
|
|
|
|
logger.Debugf("redirect try %d", i)
|
|
|
|
leader, err := cc.Leader()
|
|
|
|
|
|
|
|
// No leader, wait for one
|
2017-02-02 22:52:06 +00:00
|
|
|
if err != nil {
|
2017-10-31 10:20:14 +00:00
|
|
|
logger.Warning("there seems to be no leader. Waiting for one")
|
2017-10-23 11:46:37 +00:00
|
|
|
rctx, cancel := context.WithTimeout(
|
|
|
|
cc.ctx,
|
|
|
|
cc.config.WaitForLeaderTimeout)
|
|
|
|
defer cancel()
|
|
|
|
pidstr, err := cc.raft.WaitForLeader(rctx)
|
|
|
|
|
|
|
|
// means we timed out waiting for a leader
|
|
|
|
// we don't retry in this case
|
|
|
|
if err != nil {
|
2017-10-31 10:20:14 +00:00
|
|
|
return false, fmt.Errorf("timed out waiting for leader: %s", err)
|
2017-10-23 11:46:37 +00:00
|
|
|
}
|
|
|
|
leader, err = peer.IDB58Decode(pidstr)
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
2017-02-02 22:52:06 +00:00
|
|
|
}
|
2017-10-23 11:46:37 +00:00
|
|
|
|
|
|
|
// We are the leader. Do not redirect
|
|
|
|
if leader == cc.host.ID() {
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
2017-11-15 01:33:46 +00:00
|
|
|
logger.Debugf("redirecting %s to leader: %s", method, leader.Pretty())
|
2017-10-23 11:46:37 +00:00
|
|
|
finalErr = cc.rpcClient.Call(
|
|
|
|
leader,
|
|
|
|
"Cluster",
|
|
|
|
method,
|
|
|
|
arg,
|
|
|
|
&struct{}{})
|
|
|
|
if finalErr != nil {
|
|
|
|
logger.Error(finalErr)
|
2017-10-31 10:20:14 +00:00
|
|
|
logger.Error("retrying to redirect request to leader")
|
2017-10-23 11:46:37 +00:00
|
|
|
time.Sleep(2 * cc.config.RaftConfig.HeartbeatTimeout)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
break
|
2017-01-23 13:01:49 +00:00
|
|
|
}
|
2017-01-30 12:12:25 +00:00
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
// We tried to redirect, but something happened
|
|
|
|
return true, finalErr
|
2017-01-23 13:01:49 +00:00
|
|
|
}
|
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
// commit submits a cc.consensus commit. It retries upon failures.
|
|
|
|
func (cc *Consensus) commit(op *LogOp, rpcOp string, redirectArg interface{}) error {
|
2017-02-02 22:52:06 +00:00
|
|
|
var finalErr error
|
2017-10-23 11:46:37 +00:00
|
|
|
for i := 0; i <= cc.config.CommitRetries; i++ {
|
2017-10-30 09:59:03 +00:00
|
|
|
logger.Debugf("attempt #%d: committing %+v", i, op)
|
2017-10-23 11:46:37 +00:00
|
|
|
|
|
|
|
// this means we are retrying
|
|
|
|
if finalErr != nil {
|
2017-10-31 10:20:14 +00:00
|
|
|
logger.Errorf("retrying upon failed commit (retry %d): %s ",
|
2017-10-23 11:46:37 +00:00
|
|
|
i, finalErr)
|
2017-02-02 22:52:06 +00:00
|
|
|
}
|
2017-01-23 13:01:49 +00:00
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
// try to send it to the leader
|
|
|
|
// redirectToLeader has it's own retry loop. If this fails
|
|
|
|
// we're done here.
|
|
|
|
ok, err := cc.redirectToLeader(rpcOp, redirectArg)
|
|
|
|
if err != nil || ok {
|
|
|
|
return err
|
2017-02-02 22:52:06 +00:00
|
|
|
}
|
2017-01-23 13:01:49 +00:00
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
// Being here means we are the LEADER. We can commit.
|
2017-02-02 22:52:06 +00:00
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
// now commit the changes to our state
|
2017-11-14 22:29:56 +00:00
|
|
|
cc.shutdownLock.Lock() // do not shut down while committing
|
2017-10-31 10:20:14 +00:00
|
|
|
_, finalErr = cc.consensus.CommitOp(op)
|
2017-11-14 22:29:56 +00:00
|
|
|
cc.shutdownLock.Unlock()
|
2017-10-23 11:46:37 +00:00
|
|
|
if finalErr != nil {
|
|
|
|
goto RETRY
|
|
|
|
}
|
|
|
|
|
|
|
|
switch op.Type {
|
2017-10-30 10:27:39 +00:00
|
|
|
case LogOpPin:
|
|
|
|
logger.Infof("pin committed to global state: %s", op.Cid.Cid)
|
|
|
|
case LogOpUnpin:
|
|
|
|
logger.Infof("unpin committed to global state: %s", op.Cid.Cid)
|
2017-02-02 22:52:06 +00:00
|
|
|
}
|
|
|
|
break
|
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
RETRY:
|
|
|
|
time.Sleep(cc.config.CommitRetryDelay)
|
2016-12-02 18:33:39 +00:00
|
|
|
}
|
2017-10-23 11:46:37 +00:00
|
|
|
return finalErr
|
2016-12-02 18:33:39 +00:00
|
|
|
}
|
|
|
|
|
2017-02-02 22:52:06 +00:00
|
|
|
// LogPin submits a Cid to the shared state of the cluster. It will forward
|
|
|
|
// the operation to the leader if this is not it.
|
2017-10-23 11:46:37 +00:00
|
|
|
func (cc *Consensus) LogPin(pin api.Pin) error {
|
|
|
|
op := cc.op(pin, LogOpPin)
|
|
|
|
err := cc.commit(op, "ConsensusLogPin", pin.ToSerial())
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return nil
|
2017-02-02 22:52:06 +00:00
|
|
|
}
|
|
|
|
|
2016-12-19 17:35:24 +00:00
|
|
|
// LogUnpin removes a Cid from the shared state of the cluster.
|
2017-10-23 11:46:37 +00:00
|
|
|
func (cc *Consensus) LogUnpin(pin api.Pin) error {
|
|
|
|
op := cc.op(pin, LogOpUnpin)
|
|
|
|
err := cc.commit(op, "ConsensusLogUnpin", pin.ToSerial())
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return nil
|
2017-02-02 22:52:06 +00:00
|
|
|
}
|
2017-01-23 13:01:49 +00:00
|
|
|
|
2017-11-08 19:04:04 +00:00
|
|
|
// AddPeer adds a new peer to participate in this consensus. It will
|
2017-02-02 22:52:06 +00:00
|
|
|
// forward the operation to the leader if this is not it.
|
2017-11-08 19:04:04 +00:00
|
|
|
func (cc *Consensus) AddPeer(pid peer.ID) error {
|
|
|
|
var finalErr error
|
|
|
|
for i := 0; i <= cc.config.CommitRetries; i++ {
|
|
|
|
logger.Debugf("attempt #%d: AddPeer %s", i, pid.Pretty())
|
|
|
|
if finalErr != nil {
|
|
|
|
logger.Errorf("retrying to add peer. Attempt #%d failed: %s", i, finalErr)
|
|
|
|
}
|
|
|
|
ok, err := cc.redirectToLeader("ConsensusAddPeer", pid)
|
|
|
|
if err != nil || ok {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
// Being here means we are the leader and can commit
|
2017-11-14 22:29:56 +00:00
|
|
|
cc.shutdownLock.Lock() // do not shutdown while committing
|
2017-11-08 19:04:04 +00:00
|
|
|
finalErr = cc.raft.AddPeer(peer.IDB58Encode(pid))
|
2017-11-14 22:29:56 +00:00
|
|
|
cc.shutdownLock.Unlock()
|
2017-11-08 19:04:04 +00:00
|
|
|
if finalErr != nil {
|
|
|
|
time.Sleep(cc.config.CommitRetryDelay)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
logger.Infof("peer added to Raft: %s", pid.Pretty())
|
|
|
|
break
|
|
|
|
}
|
|
|
|
return finalErr
|
2016-12-02 18:33:39 +00:00
|
|
|
}
|
|
|
|
|
2017-11-08 19:04:04 +00:00
|
|
|
// RmPeer removes a peer from this consensus. It will
|
2017-02-02 22:52:06 +00:00
|
|
|
// forward the operation to the leader if this is not it.
|
2017-11-08 19:04:04 +00:00
|
|
|
func (cc *Consensus) RmPeer(pid peer.ID) error {
|
|
|
|
var finalErr error
|
|
|
|
for i := 0; i <= cc.config.CommitRetries; i++ {
|
|
|
|
logger.Debugf("attempt #%d: RmPeer %s", i, pid.Pretty())
|
|
|
|
if finalErr != nil {
|
2017-11-14 22:29:56 +00:00
|
|
|
logger.Errorf("retrying to remove peer. Attempt #%d failed: %s", i, finalErr)
|
2017-11-08 19:04:04 +00:00
|
|
|
}
|
|
|
|
ok, err := cc.redirectToLeader("ConsensusRmPeer", pid)
|
|
|
|
if err != nil || ok {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
// Being here means we are the leader and can commit
|
2017-11-14 22:29:56 +00:00
|
|
|
cc.shutdownLock.Lock() // do not shutdown while committing
|
2017-11-08 19:04:04 +00:00
|
|
|
finalErr = cc.raft.RemovePeer(peer.IDB58Encode(pid))
|
2017-11-14 22:29:56 +00:00
|
|
|
cc.shutdownLock.Unlock()
|
2017-11-08 19:04:04 +00:00
|
|
|
if finalErr != nil {
|
|
|
|
time.Sleep(cc.config.CommitRetryDelay)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
logger.Infof("peer removed from Raft: %s", pid.Pretty())
|
|
|
|
break
|
2017-02-02 22:52:06 +00:00
|
|
|
}
|
2017-11-08 19:04:04 +00:00
|
|
|
return finalErr
|
2017-01-30 12:12:25 +00:00
|
|
|
}
|
|
|
|
|
2016-12-28 15:25:24 +00:00
|
|
|
// State retrieves the current consensus State. It may error
|
|
|
|
// if no State has been agreed upon or the state is not
|
|
|
|
// consistent. The returned State is the last agreed-upon
|
|
|
|
// State known by this node.
|
2017-03-10 16:24:25 +00:00
|
|
|
func (cc *Consensus) State() (state.State, error) {
|
2016-12-09 19:54:46 +00:00
|
|
|
st, err := cc.consensus.GetLogHead()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2017-03-10 16:24:25 +00:00
|
|
|
state, ok := st.(state.State)
|
2016-12-09 19:54:46 +00:00
|
|
|
if !ok {
|
2016-12-15 13:19:41 +00:00
|
|
|
return nil, errors.New("wrong state type")
|
2016-12-09 19:54:46 +00:00
|
|
|
}
|
|
|
|
return state, nil
|
|
|
|
}
|
|
|
|
|
2016-12-28 15:25:24 +00:00
|
|
|
// Leader returns the peerID of the Leader of the
|
|
|
|
// cluster. It returns an error when there is no leader.
|
2016-12-16 11:40:28 +00:00
|
|
|
func (cc *Consensus) Leader() (peer.ID, error) {
|
2017-10-23 11:46:37 +00:00
|
|
|
// Note the hard-dependency on raft here...
|
2016-12-02 18:33:39 +00:00
|
|
|
raftactor := cc.actor.(*libp2praft.Actor)
|
|
|
|
return raftactor.Leader()
|
|
|
|
}
|
|
|
|
|
2017-11-01 12:25:28 +00:00
|
|
|
// Clean removes all raft data from disk. Next time
|
|
|
|
// a full new peer will be bootstrapped.
|
|
|
|
func (cc *Consensus) Clean() error {
|
|
|
|
if !cc.shutdown {
|
|
|
|
return errors.New("consensus component is not shutdown")
|
|
|
|
}
|
|
|
|
|
|
|
|
err := cc.raft.Clean()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
logger.Info("consensus data cleaned")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-12-28 15:25:24 +00:00
|
|
|
// Rollback replaces the current agreed-upon
|
|
|
|
// state with the state provided. Only the consensus leader
|
|
|
|
// can perform this operation.
|
2017-03-10 16:24:25 +00:00
|
|
|
func (cc *Consensus) Rollback(state state.State) error {
|
2017-10-23 11:46:37 +00:00
|
|
|
// This is unused. It *might* be used for upgrades.
|
|
|
|
// There is rather untested magic in libp2p-raft's FSM()
|
|
|
|
// to make this possible.
|
2016-12-02 18:33:39 +00:00
|
|
|
return cc.consensus.Rollback(state)
|
|
|
|
}
|
2017-10-23 11:46:37 +00:00
|
|
|
|
2017-11-08 19:04:04 +00:00
|
|
|
// Peers return the current list of peers in the consensus.
|
|
|
|
// The list will be sorted alphabetically.
|
|
|
|
func (cc *Consensus) Peers() ([]peer.ID, error) {
|
|
|
|
if cc.shutdown { // things hang a lot in this case
|
|
|
|
return nil, errors.New("consensus is shutdown")
|
|
|
|
}
|
|
|
|
peers := []peer.ID{}
|
|
|
|
raftPeers, err := cc.raft.Peers()
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot retrieve list of peers: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
sort.Strings(raftPeers)
|
|
|
|
|
|
|
|
for _, p := range raftPeers {
|
|
|
|
id, err := peer.IDB58Decode(p)
|
|
|
|
if err != nil {
|
|
|
|
panic("could not decode peer")
|
|
|
|
}
|
|
|
|
peers = append(peers, id)
|
|
|
|
}
|
|
|
|
return peers, nil
|
|
|
|
}
|
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
func parsePIDFromMultiaddr(addr ma.Multiaddr) string {
|
|
|
|
pidstr, err := addr.ValueForProtocol(ma.P_IPFS)
|
|
|
|
if err != nil {
|
|
|
|
panic("peer badly encoded")
|
|
|
|
}
|
|
|
|
return pidstr
|
|
|
|
}
|