ipfs-cluster/consensus.go
Hector Sanjuan 6c18c02106 Issue #10: peers/add and peers/rm feature + tests
This commit adds PeerAdd() and PeerRemove() endpoints, CLI support,
tests. Peer management is a delicate issue because of how the consensus
works underneath and the places that need to track such peers.

When adding a peer the procedure is as follows:

* Try to open a connection to the new peer and abort if not reachable
* Broadcast a PeerManagerAddPeer operation which tells all cluster members
to add the new Peer. The Raft leader will add it to Raft's peerset and
the multiaddress will be saved in the ClusterPeers configuration key.
* If the above fails because some cluster node is not responding,
broadcast a PeerRemove() and try to undo any damage.
* If the broadcast succeeds, send our ClusterPeers to the new Peer along with
the local multiaddress we are using in the connection opened in the
first step (that is the multiaddress through which the other peer can reach us)
* The new peer updates its configuration with the new list and joins
the consensus

License: MIT
Signed-off-by: Hector Sanjuan <hector@protocol.ai>
2017-02-02 13:51:49 +01:00

447 lines
10 KiB
Go

package ipfscluster
import (
"context"
"errors"
"strings"
"sync"
"time"
rpc "github.com/hsanjuan/go-libp2p-gorpc"
cid "github.com/ipfs/go-cid"
consensus "github.com/libp2p/go-libp2p-consensus"
host "github.com/libp2p/go-libp2p-host"
peer "github.com/libp2p/go-libp2p-peer"
libp2praft "github.com/libp2p/go-libp2p-raft"
)
const (
maxSnapshots = 5
raftSingleMode = true
)
// Type of pin operation
const (
LogOpPin = iota + 1
LogOpUnpin
)
// LeaderTimeout specifies how long to wait during initialization
// before failing for not having a leader.
var LeaderTimeout = 120 * time.Second
type clusterLogOpType int
// clusterLogOp represents an operation for the OpLogConsensus system.
// It implements the consensus.Op interface.
type clusterLogOp struct {
Cid string
Type clusterLogOpType
ctx context.Context
rpcClient *rpc.Client
}
// ApplyTo applies the operation to the State
func (op *clusterLogOp) ApplyTo(cstate consensus.State) (consensus.State, error) {
state, ok := cstate.(State)
var err error
if !ok {
// Should never be here
panic("received unexpected state type")
}
c, err := cid.Decode(op.Cid)
if err != nil {
// Should never be here
panic("could not decode a CID we ourselves encoded")
}
switch op.Type {
case LogOpPin:
err := state.AddPin(c)
if err != nil {
goto ROLLBACK
}
// Async, we let the PinTracker take care of any problems
op.rpcClient.Go("",
"Cluster",
"Track",
NewCidArg(c),
&struct{}{},
nil)
case LogOpUnpin:
err := state.RmPin(c)
if err != nil {
goto ROLLBACK
}
// Async, we let the PinTracker take care of any problems
op.rpcClient.Go("",
"Cluster",
"Untrack",
NewCidArg(c),
&struct{}{},
nil)
default:
logger.Error("unknown clusterLogOp type. Ignoring")
}
return state, nil
ROLLBACK:
// We failed to apply the operation to the state
// and therefore we need to request a rollback to the
// cluster to the previous state. This operation can only be performed
// by the cluster leader.
logger.Error("Rollbacks are not implemented")
return nil, errors.New("a rollback may be necessary. Reason: " + err.Error())
}
// Consensus handles the work of keeping a shared-state between
// the peers of an IPFS Cluster, as well as modifying that state and
// applying any updates in a thread-safe manner.
type Consensus struct {
ctx context.Context
cfg *Config
host host.Host
consensus consensus.OpLogConsensus
actor consensus.Actor
baseOp *clusterLogOp
p2pRaft *libp2pRaftWrap
rpcClient *rpc.Client
rpcReady chan struct{}
readyCh chan struct{}
shutdownLock sync.Mutex
shutdown bool
shutdownCh chan struct{}
wg sync.WaitGroup
}
// NewConsensus builds a new ClusterConsensus component. The state
// is used to initialize the Consensus system, so any information in it
// is discarded.
func NewConsensus(cfg *Config, host host.Host, state State) (*Consensus, error) {
ctx := context.Background()
op := &clusterLogOp{
ctx: context.Background(),
}
cc := &Consensus{
ctx: ctx,
cfg: cfg,
host: host,
baseOp: op,
shutdownCh: make(chan struct{}, 1),
rpcReady: make(chan struct{}, 1),
readyCh: make(chan struct{}, 1),
}
logger.Infof("starting Consensus and waiting leader...")
con, actor, wrapper, err := makeLibp2pRaft(cc.cfg,
cc.host, state, cc.baseOp)
if err != nil {
return nil, err
}
con.SetActor(actor)
cc.actor = actor
cc.consensus = con
cc.p2pRaft = wrapper
cc.run()
return cc, nil
}
func (cc *Consensus) run() {
cc.wg.Add(1)
go func() {
defer cc.wg.Done()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
cc.ctx = ctx
cc.baseOp.ctx = ctx
leader, err := cc.waitForLeader()
if err != nil {
return
}
logger.Infof("Consensus leader found (%s). Syncing state...", leader.Pretty())
cc.waitForUpdates()
logger.Info("Consensus state is up to date")
// While rpc is not ready we cannot perform a sync
<-cc.rpcReady
var pInfo []PinInfo
_, err = cc.State()
// only check sync if we have a state
// avoid error on new running clusters
if err != nil {
logger.Debug("skipping state sync: ", err)
} else {
cc.rpcClient.Go(
"",
"Cluster",
"StateSync",
struct{}{},
&pInfo,
nil)
}
cc.readyCh <- struct{}{}
logger.Debug("consensus ready")
<-cc.shutdownCh
}()
}
// waits until there is a raft leader
func (cc *Consensus) waitForLeader() (peer.ID, error) {
// Wait for a leader
leader := peer.ID("")
var err error
rounds := 0
for {
select {
case <-cc.ctx.Done():
return "", errors.New("shutdown")
default:
if rounds%20 == 0 { //every 10 secs
logger.Info("Consensus is waiting for a leader...")
}
rounds++
time.Sleep(500 * time.Millisecond)
leader, err = cc.Leader()
if err == nil && leader != "" {
return leader, nil
}
}
}
return leader, nil
}
// waits until the appliedIndex is the same as the lastIndex
func (cc *Consensus) waitForUpdates() {
// Wait for state catch up
logger.Debug("consensus state is catching up")
time.Sleep(time.Second)
for {
select {
case <-cc.ctx.Done():
return
default:
lai := cc.p2pRaft.raft.AppliedIndex()
li := cc.p2pRaft.raft.LastIndex()
logger.Debugf("current Raft index: %d/%d",
lai, li)
if lai == li {
return
}
time.Sleep(500 * time.Millisecond)
}
}
}
// raft stores peer add/rm operations. This is how to force a peer set.
func (cc *Consensus) setPeers() {
logger.Debug("forcefully setting Raft peers to known set")
var peersStr []string
var peers []peer.ID
err := cc.rpcClient.Call("",
"Cluster",
"PeerManagerPeers",
struct{}{},
&peers)
if err != nil {
logger.Error(err)
return
}
for _, p := range peers {
peersStr = append(peersStr, p.Pretty())
}
cc.p2pRaft.raft.SetPeers(peersStr)
}
// Shutdown stops the component so it will not process any
// more updates. The underlying consensus is permanently
// shutdown, along with the libp2p transport.
func (cc *Consensus) Shutdown() error {
cc.shutdownLock.Lock()
defer cc.shutdownLock.Unlock()
if cc.shutdown {
logger.Debug("already shutdown")
return nil
}
logger.Info("stopping Consensus component")
close(cc.rpcReady)
cc.shutdownCh <- struct{}{}
// Raft shutdown
errMsgs := ""
f := cc.p2pRaft.raft.Snapshot()
err := f.Error()
if err != nil && !strings.Contains(err.Error(), "Nothing new to snapshot") {
errMsgs += "could not take snapshot: " + err.Error() + ".\n"
}
f = cc.p2pRaft.raft.Shutdown()
err = f.Error()
if err != nil {
errMsgs += "could not shutdown raft: " + err.Error() + ".\n"
}
err = cc.p2pRaft.boltdb.Close() // important!
if err != nil {
errMsgs += "could not close boltdb: " + err.Error() + ".\n"
}
if errMsgs != "" {
errMsgs += "Consensus shutdown unsuccessful"
logger.Error(errMsgs)
return errors.New(errMsgs)
}
cc.wg.Wait()
cc.shutdown = true
return nil
}
// SetClient makes the component ready to perform RPC requets
func (cc *Consensus) SetClient(c *rpc.Client) {
cc.rpcClient = c
cc.baseOp.rpcClient = c
cc.rpcReady <- struct{}{}
}
// Ready returns a channel which is signaled when the Consensus
// algorithm has finished bootstrapping and is ready to use
func (cc *Consensus) Ready() <-chan struct{} {
return cc.readyCh
}
func (cc *Consensus) op(c *cid.Cid, t clusterLogOpType) *clusterLogOp {
return &clusterLogOp{
Cid: c.String(),
Type: t,
}
}
// returns true if the operation was redirected to the leader
func (cc *Consensus) redirectToLeader(method string, arg interface{}) (bool, error) {
leader, err := cc.Leader()
if err != nil {
return false, err
}
if leader == cc.host.ID() {
return false, nil
}
err = cc.rpcClient.Call(
leader,
"Cluster",
method,
arg,
&struct{}{})
return true, err
}
// LogPin submits a Cid to the shared state of the cluster. It will forward
// the operation to the leader if this is not it.
func (cc *Consensus) LogPin(c *cid.Cid) error {
redirected, err := cc.redirectToLeader("ConsensusLogPin", NewCidArg(c))
if err != nil || redirected {
return err
}
// It seems WE are the leader.
// Create pin operation for the log
op := cc.op(c, LogOpPin)
_, err = cc.consensus.CommitOp(op)
if err != nil {
// This means the op did not make it to the log
return err
}
logger.Infof("pin committed to global state: %s", c)
return nil
}
// LogUnpin removes a Cid from the shared state of the cluster.
func (cc *Consensus) LogUnpin(c *cid.Cid) error {
redirected, err := cc.redirectToLeader("ConsensusLogUnpin", NewCidArg(c))
if err != nil || redirected {
return err
}
// It seems WE are the leader.
// Create unpin operation for the log
op := cc.op(c, LogOpUnpin)
_, err = cc.consensus.CommitOp(op)
if err != nil {
return err
}
logger.Infof("unpin committed to global state: %s", c)
return nil
}
// AddPeer attempts to add a peer to the consensus.
func (cc *Consensus) AddPeer(p peer.ID) error {
//redirected, err := cc.redirectToLeader("ConsensusAddPeer", p)
//if err != nil || redirected {
// return err
// }
// We are the leader
future := cc.p2pRaft.raft.AddPeer(peer.IDB58Encode(p))
err := future.Error()
return err
}
// RemovePeer attempts to remove a peer from the consensus.
func (cc *Consensus) RemovePeer(p peer.ID) error {
//redirected, err := cc.redirectToLeader("ConsensusRmPeer", p)
//if err != nil || redirected {
// return err
//}
future := cc.p2pRaft.raft.RemovePeer(peer.IDB58Encode(p))
err := future.Error()
return err
}
// State retrieves the current consensus State. It may error
// if no State has been agreed upon or the state is not
// consistent. The returned State is the last agreed-upon
// State known by this node.
func (cc *Consensus) State() (State, error) {
st, err := cc.consensus.GetLogHead()
if err != nil {
return nil, err
}
state, ok := st.(State)
if !ok {
return nil, errors.New("wrong state type")
}
return state, nil
}
// Leader returns the peerID of the Leader of the
// cluster. It returns an error when there is no leader.
func (cc *Consensus) Leader() (peer.ID, error) {
// FIXME: Hashicorp Raft specific
raftactor := cc.actor.(*libp2praft.Actor)
return raftactor.Leader()
}
// Rollback replaces the current agreed-upon
// state with the state provided. Only the consensus leader
// can perform this operation.
func (cc *Consensus) Rollback(state State) error {
return cc.consensus.Rollback(state)
}