2017-03-10 16:24:25 +00:00
|
|
|
package raft
|
2016-12-02 18:33:39 +00:00
|
|
|
|
|
|
|
import (
|
2017-02-01 17:16:09 +00:00
|
|
|
"context"
|
|
|
|
"errors"
|
2017-11-28 22:45:10 +00:00
|
|
|
"io"
|
2017-07-12 14:51:32 +00:00
|
|
|
"os"
|
2016-12-02 18:33:39 +00:00
|
|
|
"path/filepath"
|
2017-02-01 17:16:09 +00:00
|
|
|
"time"
|
2016-12-02 18:33:39 +00:00
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
hraft "github.com/hashicorp/raft"
|
2017-01-25 11:20:49 +00:00
|
|
|
raftboltdb "github.com/hashicorp/raft-boltdb"
|
2016-12-16 11:40:28 +00:00
|
|
|
host "github.com/libp2p/go-libp2p-host"
|
|
|
|
peer "github.com/libp2p/go-libp2p-peer"
|
2017-10-23 11:46:37 +00:00
|
|
|
p2praft "github.com/libp2p/go-libp2p-raft"
|
2017-11-28 22:45:10 +00:00
|
|
|
|
|
|
|
"github.com/ipfs/ipfs-cluster/state"
|
2016-12-02 18:33:39 +00:00
|
|
|
)
|
|
|
|
|
2017-10-31 10:20:14 +00:00
|
|
|
// errBadRaftState is returned when the consensus component cannot start
|
2017-10-30 13:16:44 +00:00
|
|
|
// because the cluster peers do not match the raft peers.
|
2017-10-31 10:20:14 +00:00
|
|
|
var errBadRaftState = errors.New("cluster peers do not match raft peers")
|
|
|
|
|
|
|
|
// ErrWaitingForSelf is returned when we are waiting for ourselves to depart
|
|
|
|
// the peer set, which won't happen
|
|
|
|
var errWaitingForSelf = errors.New("waiting for ourselves to depart")
|
2017-10-30 13:16:44 +00:00
|
|
|
|
2017-02-01 17:16:09 +00:00
|
|
|
// RaftMaxSnapshots indicates how many snapshots to keep in the consensus data
|
|
|
|
// folder.
|
2017-10-23 11:46:37 +00:00
|
|
|
// TODO: Maybe include this in Config. Not sure how useful it is to touch
|
|
|
|
// this anyways.
|
2017-02-01 17:16:09 +00:00
|
|
|
var RaftMaxSnapshots = 5
|
|
|
|
|
2017-10-30 13:00:02 +00:00
|
|
|
// RaftLogCacheSize is the maximum number of logs to cache in-memory.
|
|
|
|
// This is used to reduce disk I/O for the recently committed entries.
|
|
|
|
var RaftLogCacheSize = 512
|
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
// Are we compiled on a 64-bit architecture?
|
|
|
|
// https://groups.google.com/forum/#!topic/golang-nuts/vAckmhUMAdQ
|
|
|
|
// This is used below because raft Observers panic on 32-bit.
|
2017-02-01 17:16:09 +00:00
|
|
|
const sixtyfour = uint64(^uint(0)) == ^uint64(0)
|
|
|
|
|
2018-01-08 11:47:53 +00:00
|
|
|
// How long we wait for updates during shutdown before snapshotting
|
|
|
|
var waitForUpdatesShutdownTimeout = 5 * time.Second
|
|
|
|
var waitForUpdatesInterval = 100 * time.Millisecond
|
|
|
|
|
|
|
|
// How many times to retry snapshotting when shutting down
|
|
|
|
var maxShutdownSnapshotRetries = 5
|
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
// raftWrapper performs all Raft-specific operations which are needed by
|
|
|
|
// Cluster but are not fulfilled by the consensus interface. It should contain
|
|
|
|
// most of the Raft-related stuff so it can be easily replaced in the future,
|
|
|
|
// if need be.
|
|
|
|
type raftWrapper struct {
|
|
|
|
raft *hraft.Raft
|
2017-11-01 12:25:28 +00:00
|
|
|
dataFolder string
|
2017-10-23 11:46:37 +00:00
|
|
|
srvConfig hraft.Configuration
|
|
|
|
transport *hraft.NetworkTransport
|
|
|
|
snapshotStore hraft.SnapshotStore
|
|
|
|
logStore hraft.LogStore
|
|
|
|
stableStore hraft.StableStore
|
2016-12-14 14:31:50 +00:00
|
|
|
boltdb *raftboltdb.BoltStore
|
2016-12-02 18:33:39 +00:00
|
|
|
}
|
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
// newRaft launches a go-libp2p-raft consensus peer.
|
|
|
|
func newRaftWrapper(peers []peer.ID, host host.Host, cfg *Config, fsm hraft.FSM) (*raftWrapper, error) {
|
|
|
|
// Set correct LocalID
|
|
|
|
cfg.RaftConfig.LocalID = hraft.ServerID(peer.IDB58Encode(host.ID()))
|
|
|
|
|
|
|
|
// Prepare data folder
|
|
|
|
dataFolder, err := makeDataFolder(cfg.BaseDir, cfg.DataFolder)
|
2016-12-02 18:33:39 +00:00
|
|
|
if err != nil {
|
2017-02-01 17:16:09 +00:00
|
|
|
return nil, err
|
2016-12-02 18:33:39 +00:00
|
|
|
}
|
2017-10-23 11:46:37 +00:00
|
|
|
srvCfg := makeServerConf(peers)
|
2016-12-02 18:33:39 +00:00
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
logger.Debug("creating libp2p Raft transport")
|
|
|
|
transport, err := p2praft.NewLibp2pTransport(host, cfg.NetworkTimeout)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
Issue #162: Rework configuration format
The following commit reimplements ipfs-cluster configuration under
the following premises:
* Each component is initialized with a configuration object
defined by its module
* Each component decides how the JSON representation of its
configuration looks like
* Each component parses and validates its own configuration
* Each component exposes its own defaults
* Component configurations are make the sections of a
central JSON configuration file (which replaces the current
JSON format)
* Component configurations implement a common interface
(config.ComponentConfig) with a set of common operations
* The central configuration file is managed by a
config.ConfigManager which:
* Registers ComponentConfigs
* Assigns the correspondent sections from the JSON file to each
component and delegates the parsing
* Delegates the JSON generation for each section
* Can be notified when the configuration is updated and must be
saved to disk
The new service.json would then look as follows:
```json
{
"cluster": {
"id": "QmTVW8NoRxC5wBhV7WtAYtRn7itipEESfozWN5KmXUQnk2",
"private_key": "<...>",
"secret": "00224102ae6aaf94f2606abf69a0e278251ecc1d64815b617ff19d6d2841f786",
"peers": [],
"bootstrap": [],
"leave_on_shutdown": false,
"listen_multiaddress": "/ip4/0.0.0.0/tcp/9096",
"state_sync_interval": "1m0s",
"ipfs_sync_interval": "2m10s",
"replication_factor": -1,
"monitor_ping_interval": "15s"
},
"consensus": {
"raft": {
"heartbeat_timeout": "1s",
"election_timeout": "1s",
"commit_timeout": "50ms",
"max_append_entries": 64,
"trailing_logs": 10240,
"snapshot_interval": "2m0s",
"snapshot_threshold": 8192,
"leader_lease_timeout": "500ms"
}
},
"api": {
"restapi": {
"listen_multiaddress": "/ip4/127.0.0.1/tcp/9094",
"read_timeout": "30s",
"read_header_timeout": "5s",
"write_timeout": "1m0s",
"idle_timeout": "2m0s"
}
},
"ipfs_connector": {
"ipfshttp": {
"proxy_listen_multiaddress": "/ip4/127.0.0.1/tcp/9095",
"node_multiaddress": "/ip4/127.0.0.1/tcp/5001",
"connect_swarms_delay": "7s",
"proxy_read_timeout": "10m0s",
"proxy_read_header_timeout": "5s",
"proxy_write_timeout": "10m0s",
"proxy_idle_timeout": "1m0s"
}
},
"monitor": {
"monbasic": {
"check_interval": "15s"
}
},
"informer": {
"disk": {
"metric_ttl": "30s",
"metric_type": "freespace"
},
"numpin": {
"metric_ttl": "10s"
}
}
}
```
This new format aims to be easily extensible per component. As such,
it already surfaces quite a few new options which were hardcoded
before.
Additionally, since Go API have changed, some redundant methods have been
removed and small refactoring has happened to take advantage of the new
way.
License: MIT
Signed-off-by: Hector Sanjuan <hector@protocol.ai>
2017-10-11 18:23:03 +00:00
|
|
|
}
|
|
|
|
|
2017-10-30 13:00:02 +00:00
|
|
|
var log hraft.LogStore
|
|
|
|
var stable hraft.StableStore
|
|
|
|
var snap hraft.SnapshotStore
|
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
logger.Debug("creating raft snapshot store")
|
2017-10-30 13:00:02 +00:00
|
|
|
snapstore, err := hraft.NewFileSnapshotStoreWithLogger(
|
2017-10-23 11:46:37 +00:00
|
|
|
dataFolder, RaftMaxSnapshots, raftStdLogger)
|
2017-07-12 14:51:32 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2017-10-23 11:46:37 +00:00
|
|
|
|
2017-10-30 13:00:02 +00:00
|
|
|
logger.Debug("creating BoltDB store")
|
|
|
|
store, err := raftboltdb.NewBoltStore(
|
2017-10-23 11:46:37 +00:00
|
|
|
filepath.Join(dataFolder, "raft.db"))
|
2016-12-02 18:33:39 +00:00
|
|
|
if err != nil {
|
2017-02-01 17:16:09 +00:00
|
|
|
return nil, err
|
2016-12-02 18:33:39 +00:00
|
|
|
}
|
|
|
|
|
2017-10-30 13:00:02 +00:00
|
|
|
// wraps the store in a LogCache to improve performance.
|
|
|
|
// See consul/agent/consul/serger.go
|
|
|
|
cacheStore, err := hraft.NewLogCache(RaftLogCacheSize, store)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
stable = store
|
|
|
|
log = cacheStore
|
|
|
|
snap = snapstore
|
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
logger.Debug("checking for existing raft states")
|
2017-10-30 13:00:02 +00:00
|
|
|
hasState, err := hraft.HasExistingState(log, stable, snap)
|
2016-12-02 18:33:39 +00:00
|
|
|
if err != nil {
|
2017-02-01 17:16:09 +00:00
|
|
|
return nil, err
|
2016-12-02 18:33:39 +00:00
|
|
|
}
|
2017-10-23 11:46:37 +00:00
|
|
|
if !hasState {
|
2017-12-12 16:47:21 +00:00
|
|
|
logger.Info("initializing raft cluster")
|
2017-10-23 11:46:37 +00:00
|
|
|
err := hraft.BootstrapCluster(cfg.RaftConfig,
|
2017-10-30 13:00:02 +00:00
|
|
|
log, stable, snap, transport, srvCfg)
|
2017-10-23 11:46:37 +00:00
|
|
|
if err != nil {
|
|
|
|
logger.Error("bootstrapping cluster: ", err)
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
} else {
|
2017-12-12 16:47:21 +00:00
|
|
|
logger.Debug("raft cluster is already initialized")
|
2017-10-23 11:46:37 +00:00
|
|
|
}
|
2016-12-02 18:33:39 +00:00
|
|
|
|
2016-12-14 14:31:50 +00:00
|
|
|
logger.Debug("creating Raft")
|
2017-10-23 11:46:37 +00:00
|
|
|
r, err := hraft.NewRaft(cfg.RaftConfig,
|
2017-10-30 13:00:02 +00:00
|
|
|
fsm, log, stable, snap, transport)
|
2016-12-02 18:33:39 +00:00
|
|
|
if err != nil {
|
|
|
|
logger.Error("initializing raft: ", err)
|
2017-02-01 17:16:09 +00:00
|
|
|
return nil, err
|
2016-12-02 18:33:39 +00:00
|
|
|
}
|
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
raftW := &raftWrapper{
|
2016-12-02 18:33:39 +00:00
|
|
|
raft: r,
|
2017-11-01 12:25:28 +00:00
|
|
|
dataFolder: dataFolder,
|
2017-10-23 11:46:37 +00:00
|
|
|
srvConfig: srvCfg,
|
2016-12-02 18:33:39 +00:00
|
|
|
transport: transport,
|
2017-10-30 13:00:02 +00:00
|
|
|
snapshotStore: snap,
|
|
|
|
logStore: log,
|
|
|
|
stableStore: stable,
|
|
|
|
boltdb: store,
|
2017-02-02 22:52:06 +00:00
|
|
|
}
|
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
// Handle existing, different configuration
|
|
|
|
if hasState {
|
|
|
|
cf := r.GetConfiguration()
|
|
|
|
if err := cf.Error(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
currentCfg := cf.Configuration()
|
|
|
|
added, removed := diffConfigurations(srvCfg, currentCfg)
|
|
|
|
if len(added)+len(removed) > 0 {
|
|
|
|
raftW.Shutdown()
|
2017-10-30 13:16:44 +00:00
|
|
|
logger.Error("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
|
|
|
|
logger.Error("Raft peers do not match cluster peers from the configuration.")
|
|
|
|
logger.Error("This likely indicates that this peer has left the cluster and/or")
|
|
|
|
logger.Error("has a dirty state. Clean the raft state for this peer")
|
2017-12-06 12:48:41 +00:00
|
|
|
logger.Errorf("(%s)", dataFolder)
|
2017-10-30 13:16:44 +00:00
|
|
|
logger.Error("bootstrap it to a working cluster.")
|
|
|
|
logger.Error("Raft peers:")
|
2017-10-30 09:31:41 +00:00
|
|
|
for _, s := range currentCfg.Servers {
|
2017-10-30 13:16:44 +00:00
|
|
|
logger.Errorf(" - %s", s.ID)
|
2017-10-30 09:31:41 +00:00
|
|
|
}
|
2017-10-30 13:16:44 +00:00
|
|
|
logger.Error("Cluster configuration peers:")
|
2017-10-30 09:31:41 +00:00
|
|
|
for _, s := range srvCfg.Servers {
|
2017-10-30 13:16:44 +00:00
|
|
|
logger.Errorf(" - %s", s.ID)
|
2017-10-30 09:31:41 +00:00
|
|
|
}
|
2017-10-30 13:16:44 +00:00
|
|
|
logger.Errorf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
|
2017-10-31 10:20:14 +00:00
|
|
|
return nil, errBadRaftState
|
2017-10-30 09:59:03 +00:00
|
|
|
//return nil, errors.New("Bad cluster peers")
|
2017-10-23 11:46:37 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return raftW, nil
|
2016-12-02 18:33:39 +00:00
|
|
|
}
|
2017-02-01 17:16:09 +00:00
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
// returns the folder path after creating it.
|
|
|
|
// if folder is empty, it uses baseDir+Default.
|
|
|
|
func makeDataFolder(baseDir, folder string) (string, error) {
|
|
|
|
if folder == "" {
|
|
|
|
folder = filepath.Join(baseDir, DefaultDataSubFolder)
|
|
|
|
}
|
|
|
|
|
|
|
|
err := os.MkdirAll(folder, 0700)
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
return folder, nil
|
2017-02-01 17:16:09 +00:00
|
|
|
}
|
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
// create Raft servers configuration
|
|
|
|
func makeServerConf(peers []peer.ID) hraft.Configuration {
|
|
|
|
sm := make(map[string]struct{})
|
|
|
|
|
|
|
|
servers := make([]hraft.Server, 0)
|
|
|
|
for _, pid := range peers {
|
|
|
|
p := peer.IDB58Encode(pid)
|
|
|
|
_, ok := sm[p]
|
|
|
|
if !ok { // avoid dups
|
|
|
|
sm[p] = struct{}{}
|
|
|
|
servers = append(servers, hraft.Server{
|
|
|
|
Suffrage: hraft.Voter,
|
|
|
|
ID: hraft.ServerID(p),
|
|
|
|
Address: hraft.ServerAddress(p),
|
|
|
|
})
|
2017-02-01 17:16:09 +00:00
|
|
|
}
|
|
|
|
}
|
2017-10-23 11:46:37 +00:00
|
|
|
return hraft.Configuration{
|
|
|
|
Servers: servers,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// diffConfigurations returns the serverIDs added and removed from
|
|
|
|
// c2 in relation to c1.
|
|
|
|
func diffConfigurations(
|
|
|
|
c1, c2 hraft.Configuration) (added, removed []hraft.ServerID) {
|
|
|
|
m1 := make(map[hraft.ServerID]struct{})
|
|
|
|
m2 := make(map[hraft.ServerID]struct{})
|
|
|
|
added = make([]hraft.ServerID, 0)
|
|
|
|
removed = make([]hraft.ServerID, 0)
|
|
|
|
for _, s := range c1.Servers {
|
|
|
|
m1[s.ID] = struct{}{}
|
|
|
|
}
|
|
|
|
for _, s := range c2.Servers {
|
|
|
|
m2[s.ID] = struct{}{}
|
|
|
|
}
|
2017-12-06 12:45:35 +00:00
|
|
|
for k := range m1 {
|
2017-10-23 11:46:37 +00:00
|
|
|
_, ok := m2[k]
|
|
|
|
if !ok {
|
|
|
|
removed = append(removed, k)
|
|
|
|
}
|
|
|
|
}
|
2017-12-06 12:45:35 +00:00
|
|
|
for k := range m2 {
|
2017-10-23 11:46:37 +00:00
|
|
|
_, ok := m1[k]
|
|
|
|
if !ok {
|
|
|
|
added = append(added, k)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// WaitForLeader holds until Raft says we have a leader.
|
|
|
|
// Returns uf ctx is cancelled.
|
|
|
|
func (rw *raftWrapper) WaitForLeader(ctx context.Context) (string, error) {
|
|
|
|
obsCh := make(chan hraft.Observation, 1)
|
|
|
|
if sixtyfour { // 32-bit systems don't support observers
|
|
|
|
observer := hraft.NewObserver(obsCh, false, nil)
|
|
|
|
rw.raft.RegisterObserver(observer)
|
|
|
|
defer rw.raft.DeregisterObserver(observer)
|
|
|
|
}
|
|
|
|
ticker := time.NewTicker(time.Second / 2)
|
2017-02-02 22:52:06 +00:00
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case obs := <-obsCh:
|
2017-10-23 11:46:37 +00:00
|
|
|
_ = obs
|
|
|
|
// See https://github.com/hashicorp/raft/issues/254
|
|
|
|
// switch obs.Data.(type) {
|
|
|
|
// case hraft.LeaderObservation:
|
|
|
|
// lObs := obs.Data.(hraft.LeaderObservation)
|
|
|
|
// logger.Infof("Raft Leader elected: %s",
|
|
|
|
// lObs.Leader)
|
|
|
|
// return string(lObs.Leader), nil
|
|
|
|
// }
|
2017-02-02 22:52:06 +00:00
|
|
|
case <-ticker.C:
|
2017-10-23 11:46:37 +00:00
|
|
|
if l := rw.raft.Leader(); l != "" {
|
2017-02-02 22:52:06 +00:00
|
|
|
logger.Debug("waitForleaderTimer")
|
2017-12-12 16:47:21 +00:00
|
|
|
logger.Infof("Current Raft Leader: %s", l)
|
2017-02-02 22:52:06 +00:00
|
|
|
ticker.Stop()
|
2017-10-23 11:46:37 +00:00
|
|
|
return string(l), nil
|
2017-02-02 22:52:06 +00:00
|
|
|
}
|
|
|
|
case <-ctx.Done():
|
2017-10-23 11:46:37 +00:00
|
|
|
return "", ctx.Err()
|
2017-02-01 17:16:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// WaitForUpdates holds until Raft has synced to the last index in the log
|
2017-10-23 11:46:37 +00:00
|
|
|
func (rw *raftWrapper) WaitForUpdates(ctx context.Context) error {
|
2017-12-12 16:47:21 +00:00
|
|
|
logger.Debug("Raft state is catching up to the latest known version. Please wait...")
|
2017-02-01 17:16:09 +00:00
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
2017-02-02 22:52:06 +00:00
|
|
|
return ctx.Err()
|
2017-02-01 17:16:09 +00:00
|
|
|
default:
|
2017-10-23 11:46:37 +00:00
|
|
|
lai := rw.raft.AppliedIndex()
|
|
|
|
li := rw.raft.LastIndex()
|
2017-02-01 17:16:09 +00:00
|
|
|
logger.Debugf("current Raft index: %d/%d",
|
|
|
|
lai, li)
|
|
|
|
if lai == li {
|
2017-02-02 22:52:06 +00:00
|
|
|
return nil
|
2017-02-01 17:16:09 +00:00
|
|
|
}
|
2018-01-08 11:47:53 +00:00
|
|
|
time.Sleep(waitForUpdatesInterval)
|
2017-02-01 17:16:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-31 10:20:14 +00:00
|
|
|
func (rw *raftWrapper) WaitForPeer(ctx context.Context, pid string, depart bool) error {
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
|
|
|
default:
|
|
|
|
peers, err := rw.Peers()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(peers) == 1 && pid == peers[0] && depart {
|
|
|
|
return errWaitingForSelf
|
|
|
|
}
|
|
|
|
|
|
|
|
found := find(peers, pid)
|
|
|
|
|
|
|
|
// departing
|
|
|
|
if depart && !found {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// joining
|
|
|
|
if !depart && found {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
time.Sleep(50 * time.Millisecond)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-01 17:16:09 +00:00
|
|
|
// Snapshot tells Raft to take a snapshot.
|
2017-10-23 11:46:37 +00:00
|
|
|
func (rw *raftWrapper) Snapshot() error {
|
|
|
|
future := rw.raft.Snapshot()
|
2017-02-01 17:16:09 +00:00
|
|
|
err := future.Error()
|
2017-10-23 11:46:37 +00:00
|
|
|
if err != nil && err.Error() != hraft.ErrNothingNewToSnapshot.Error() {
|
|
|
|
return err
|
2017-02-01 17:16:09 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-01-08 11:47:53 +00:00
|
|
|
// snapshotOnShutdown attempts to take a snapshot before a shutdown.
|
|
|
|
// Snapshotting might fail if the raft applied index is not the last index.
|
|
|
|
// This waits for the updates and tries to take a snapshot when the
|
|
|
|
// applied index is up to date.
|
|
|
|
// It will retry if the snapshot still fails, in case more updates have arrived.
|
|
|
|
// If waiting for updates times-out, it will not try anymore, since something
|
|
|
|
// is wrong. This is a best-effort solution as there is no way to tell Raft
|
|
|
|
// to stop processing entries because we want to take a snapshot before
|
|
|
|
// shutting down.
|
|
|
|
func (rw *raftWrapper) snapshotOnShutdown() error {
|
|
|
|
var err error
|
|
|
|
for i := 0; i < maxShutdownSnapshotRetries; i++ {
|
|
|
|
done := false
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), waitForUpdatesShutdownTimeout)
|
|
|
|
err := rw.WaitForUpdates(ctx)
|
|
|
|
cancel()
|
|
|
|
if err != nil {
|
|
|
|
logger.Warning("timed out waiting for state updates before shutdown. Snapshotting may fail")
|
|
|
|
done = true // let's not wait for updates again
|
|
|
|
}
|
|
|
|
|
|
|
|
err = rw.Snapshot()
|
|
|
|
if err != nil {
|
|
|
|
err = errors.New("could not snapshot raft: " + err.Error())
|
|
|
|
} else {
|
|
|
|
err = nil
|
|
|
|
done = true
|
|
|
|
}
|
|
|
|
|
|
|
|
if done {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
logger.Warningf("retrying to snapshot (%d/%d)...", i+1, maxShutdownSnapshotRetries)
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2017-02-01 17:16:09 +00:00
|
|
|
// Shutdown shutdown Raft and closes the BoltDB.
|
2017-10-23 11:46:37 +00:00
|
|
|
func (rw *raftWrapper) Shutdown() error {
|
2017-02-01 17:16:09 +00:00
|
|
|
errMsgs := ""
|
2018-01-08 11:47:53 +00:00
|
|
|
|
|
|
|
err := rw.snapshotOnShutdown()
|
2017-11-28 22:45:10 +00:00
|
|
|
if err != nil {
|
2018-01-08 11:47:53 +00:00
|
|
|
errMsgs += err.Error() + ".\n"
|
2017-11-28 22:45:10 +00:00
|
|
|
}
|
2018-01-08 11:47:53 +00:00
|
|
|
|
2017-11-28 22:45:10 +00:00
|
|
|
future := rw.raft.Shutdown()
|
|
|
|
err = future.Error()
|
2017-02-01 17:16:09 +00:00
|
|
|
if err != nil {
|
|
|
|
errMsgs += "could not shutdown raft: " + err.Error() + ".\n"
|
|
|
|
}
|
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
err = rw.boltdb.Close() // important!
|
2017-02-01 17:16:09 +00:00
|
|
|
if err != nil {
|
|
|
|
errMsgs += "could not close boltdb: " + err.Error()
|
|
|
|
}
|
2017-02-02 22:52:06 +00:00
|
|
|
|
2017-02-01 17:16:09 +00:00
|
|
|
if errMsgs != "" {
|
|
|
|
return errors.New(errMsgs)
|
|
|
|
}
|
2017-02-02 22:52:06 +00:00
|
|
|
|
2017-02-01 17:16:09 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// AddPeer adds a peer to Raft
|
2017-10-23 11:46:37 +00:00
|
|
|
func (rw *raftWrapper) AddPeer(peer string) error {
|
|
|
|
// Check that we don't have it to not waste
|
|
|
|
// log entries if so.
|
|
|
|
peers, err := rw.Peers()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if find(peers, peer) {
|
|
|
|
logger.Infof("%s is already a raft peer", peer)
|
2017-02-02 22:52:06 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
future := rw.raft.AddVoter(
|
|
|
|
hraft.ServerID(peer),
|
|
|
|
hraft.ServerAddress(peer),
|
|
|
|
0,
|
|
|
|
0) // TODO: Extra cfg value?
|
|
|
|
err = future.Error()
|
2017-02-02 22:52:06 +00:00
|
|
|
if err != nil {
|
|
|
|
logger.Error("raft cannot add peer: ", err)
|
|
|
|
}
|
2017-02-01 17:16:09 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// RemovePeer removes a peer from Raft
|
2017-10-23 11:46:37 +00:00
|
|
|
func (rw *raftWrapper) RemovePeer(peer string) error {
|
|
|
|
// Check that we have it to not waste
|
|
|
|
// log entries if we don't.
|
|
|
|
peers, err := rw.Peers()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if !find(peers, peer) {
|
|
|
|
logger.Infof("%s is not among raft peers", peer)
|
2017-02-02 22:52:06 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
if len(peers) == 1 && peers[0] == peer {
|
|
|
|
return errors.New("cannot remove ourselves from a 1-peer cluster")
|
|
|
|
}
|
|
|
|
|
2017-10-30 11:45:08 +00:00
|
|
|
rmFuture := rw.raft.RemoveServer(
|
2017-10-23 11:46:37 +00:00
|
|
|
hraft.ServerID(peer),
|
|
|
|
0,
|
|
|
|
0) // TODO: Extra cfg value?
|
2017-10-30 11:45:08 +00:00
|
|
|
err = rmFuture.Error()
|
2017-02-02 22:52:06 +00:00
|
|
|
if err != nil {
|
|
|
|
logger.Error("raft cannot remove peer: ", err)
|
2017-10-30 11:45:08 +00:00
|
|
|
return err
|
2017-02-02 22:52:06 +00:00
|
|
|
}
|
2017-10-30 11:45:08 +00:00
|
|
|
|
|
|
|
return nil
|
2017-02-01 17:16:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Leader returns Raft's leader. It may be an empty string if
|
|
|
|
// there is no leader or it is unknown.
|
2017-10-23 11:46:37 +00:00
|
|
|
func (rw *raftWrapper) Leader() string {
|
|
|
|
return string(rw.raft.Leader())
|
2017-02-01 17:16:09 +00:00
|
|
|
}
|
2017-02-02 22:52:06 +00:00
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
func (rw *raftWrapper) Peers() ([]string, error) {
|
|
|
|
ids := make([]string, 0)
|
|
|
|
|
|
|
|
configFuture := rw.raft.GetConfiguration()
|
|
|
|
if err := configFuture.Error(); err != nil {
|
|
|
|
return nil, err
|
2017-02-02 22:52:06 +00:00
|
|
|
}
|
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
for _, server := range configFuture.Configuration().Servers {
|
|
|
|
ids = append(ids, string(server.ID))
|
|
|
|
}
|
|
|
|
|
|
|
|
return ids, nil
|
|
|
|
}
|
|
|
|
|
2017-11-28 22:45:10 +00:00
|
|
|
// latestSnapshot looks for the most recent raft snapshot stored at the
|
|
|
|
// provided basedir. It returns a boolean indicating if any snapshot is
|
|
|
|
// readable, the snapshot's metadata, and a reader to the snapshot's bytes
|
|
|
|
func latestSnapshot(raftDataFolder string) (*hraft.SnapshotMeta, io.ReadCloser, error) {
|
|
|
|
store, err := hraft.NewFileSnapshotStore(raftDataFolder, RaftMaxSnapshots, nil)
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
snapMetas, err := store.List()
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
if len(snapMetas) == 0 { // no error if snapshot isn't found
|
|
|
|
return nil, nil, nil
|
|
|
|
}
|
|
|
|
meta, r, err := store.Open(snapMetas[0].ID)
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
return meta, r, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// LastStateRaw returns the bytes of the last snapshot stored, its metadata,
|
|
|
|
// and a flag indicating whether any snapshot was found.
|
|
|
|
func LastStateRaw(cfg *Config) (io.Reader, bool, error) {
|
|
|
|
// Read most recent snapshot
|
|
|
|
dataFolder, err := makeDataFolder(cfg.BaseDir, cfg.DataFolder)
|
|
|
|
if err != nil {
|
|
|
|
return nil, false, err
|
|
|
|
}
|
|
|
|
meta, r, err := latestSnapshot(dataFolder)
|
|
|
|
if err != nil {
|
|
|
|
return nil, false, err
|
|
|
|
}
|
|
|
|
if meta == nil { // no snapshots could be read
|
|
|
|
return nil, false, nil
|
|
|
|
}
|
|
|
|
return r, true, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// SnapshotSave saves the provided state to a snapshot in the
|
|
|
|
// raft data path. Old raft data is backed up and replaced
|
2018-01-25 21:47:12 +00:00
|
|
|
// by the new snapshot. pids contains the config-specified
|
|
|
|
// peer ids to include in the snapshot metadata if no snapshot exists
|
|
|
|
// from which to copy the raft metadata
|
|
|
|
func SnapshotSave(cfg *Config, newState state.State, pids []peer.ID) error {
|
2017-11-28 22:45:10 +00:00
|
|
|
newStateBytes, err := p2praft.EncodeSnapshot(newState)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
dataFolder, err := makeDataFolder(cfg.BaseDir, cfg.DataFolder)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
meta, _, err := latestSnapshot(dataFolder)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// make a new raft snapshot
|
|
|
|
var raftSnapVersion hraft.SnapshotVersion
|
|
|
|
raftSnapVersion = 1 // As of hraft v1.0.0 this is always 1
|
|
|
|
configIndex := uint64(1)
|
|
|
|
var raftIndex uint64
|
|
|
|
var raftTerm uint64
|
|
|
|
var srvCfg hraft.Configuration
|
|
|
|
if meta != nil {
|
|
|
|
raftIndex = meta.Index
|
|
|
|
raftTerm = meta.Term
|
|
|
|
srvCfg = meta.Configuration
|
2017-12-19 17:05:32 +00:00
|
|
|
CleanupRaft(dataFolder)
|
2017-11-28 22:45:10 +00:00
|
|
|
} else {
|
2018-01-25 21:47:12 +00:00
|
|
|
// Begin the log after the index of a fresh start so that
|
|
|
|
// the snapshot's state propagate's during bootstrap
|
|
|
|
raftIndex = uint64(2)
|
2017-11-28 22:45:10 +00:00
|
|
|
raftTerm = uint64(1)
|
2018-01-25 21:47:12 +00:00
|
|
|
srvCfg = makeServerConf(pids)
|
2017-11-28 22:45:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
snapshotStore, err := hraft.NewFileSnapshotStoreWithLogger(dataFolder, RaftMaxSnapshots, nil)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
_, dummyTransport := hraft.NewInmemTransport("")
|
|
|
|
|
|
|
|
sink, err := snapshotStore.Create(raftSnapVersion, raftIndex, raftTerm, srvCfg, configIndex, dummyTransport)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
_, err = sink.Write(newStateBytes)
|
|
|
|
if err != nil {
|
|
|
|
sink.Cancel()
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
err = sink.Close()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-12-19 17:05:32 +00:00
|
|
|
// CleanupRaft moves the current data folder to a backup location
|
|
|
|
func CleanupRaft(dataFolder string) error {
|
2017-11-28 22:45:10 +00:00
|
|
|
dbh := newDataBackupHelper(dataFolder)
|
2017-11-13 17:18:52 +00:00
|
|
|
err := dbh.makeBackup()
|
|
|
|
if err != nil {
|
|
|
|
logger.Warning(err)
|
|
|
|
logger.Warning("the state could not be cleaned properly")
|
|
|
|
logger.Warning("manual intervention may be needed before starting cluster again")
|
|
|
|
}
|
|
|
|
return nil
|
2017-11-01 12:25:28 +00:00
|
|
|
}
|
|
|
|
|
2017-11-28 22:45:10 +00:00
|
|
|
// only call when Raft is shutdown
|
|
|
|
func (rw *raftWrapper) Clean() error {
|
2017-12-19 17:05:32 +00:00
|
|
|
return CleanupRaft(rw.dataFolder)
|
2017-11-28 22:45:10 +00:00
|
|
|
}
|
|
|
|
|
2017-10-23 11:46:37 +00:00
|
|
|
func find(s []string, elem string) bool {
|
|
|
|
for _, selem := range s {
|
|
|
|
if selem == elem {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
2017-02-02 22:52:06 +00:00
|
|
|
}
|