ipfs-cluster/peer_manager.go
Hector Sanjuan 34fdc329fc Fix #24: Auto-join and auto-leave operations for Cluster
This is the third implementation attempt. This time, rather than
broadcasting PeerAdd/Join requests to the whole cluster, we use the
consensus log to broadcast new peers joining.

This makes it easier to recover from errors and to know who exactly
is member of a cluster and who is not. The consensus is, after all,
meant to agree on things, and the list of cluster peers is something
everyone has to agree on.

Raft itself uses a special log operation to maintain the peer set.

The tests are almost unchanged from the previous attempts so it should
be the same, except it doesn't seem possible to bootstrap a bunch of nodes
at the same time using different bootstrap nodes. It works when using
the same. I'm not sure this worked before either, but the code is
simpler than recursively contacting peers, and scales better for
larger clusters.

Nodes have to be careful about joining clusters while keeping the state
from a different cluster (disjoint logs). This may cause problems with
Raft.

License: MIT
Signed-off-by: Hector Sanjuan <hector@protocol.ai>
2017-02-07 18:46:09 +01:00

140 lines
2.7 KiB
Go

package ipfscluster
import (
"sync"
"time"
peer "github.com/libp2p/go-libp2p-peer"
peerstore "github.com/libp2p/go-libp2p-peerstore"
ma "github.com/multiformats/go-multiaddr"
)
// peerManager is our own local peerstore
type peerManager struct {
cluster *Cluster
ps peerstore.Peerstore
self peer.ID
peermap map[peer.ID]ma.Multiaddr
m sync.RWMutex
}
func newPeerManager(c *Cluster) *peerManager {
pm := &peerManager{
cluster: c,
ps: c.host.Peerstore(),
self: c.host.ID(),
}
pm.resetPeers()
return pm
}
func (pm *peerManager) addPeer(addr ma.Multiaddr) error {
logger.Debugf("adding peer %s", addr)
pid, decapAddr, err := multiaddrSplit(addr)
if err != nil {
return err
}
pm.ps.AddAddr(pid, decapAddr, peerstore.PermanentAddrTTL)
if !pm.isPeer(pid) {
logger.Infof("new Cluster peer %s", addr.String())
}
pm.m.Lock()
pm.peermap[pid] = addr
pm.m.Unlock()
return nil
}
func (pm *peerManager) rmPeer(pid peer.ID, selfShutdown bool) error {
logger.Debugf("removing peer %s", pid.Pretty())
if pm.isPeer(pid) {
logger.Infof("removing Cluster peer %s", pid.Pretty())
}
pm.m.Lock()
delete(pm.peermap, pid)
pm.m.Unlock()
// It's ourselves. This is not very graceful
if pid == pm.self && selfShutdown {
logger.Warning("this peer has been removed from the Cluster and will shutdown itself in 5 seconds")
defer func() {
go func() {
time.Sleep(1 * time.Second)
pm.cluster.consensus.Shutdown()
pm.resetPeers()
time.Sleep(4 * time.Second)
pm.cluster.Shutdown()
}()
}()
}
return nil
}
func (pm *peerManager) savePeers() {
pm.cluster.config.ClusterPeers = pm.peersAddrs()
pm.cluster.config.Save("")
}
func (pm *peerManager) resetPeers() {
pm.m.Lock()
pm.peermap = make(map[peer.ID]ma.Multiaddr)
pm.peermap[pm.self] = pm.cluster.config.ClusterAddr
pm.m.Unlock()
}
func (pm *peerManager) isPeer(p peer.ID) bool {
if p == pm.self {
return true
}
pm.m.RLock()
_, ok := pm.peermap[p]
pm.m.RUnlock()
return ok
}
// peers including ourselves
func (pm *peerManager) peers() []peer.ID {
pm.m.RLock()
defer pm.m.RUnlock()
var peers []peer.ID
for k := range pm.peermap {
peers = append(peers, k)
}
return peers
}
// cluster peer addresses (NOT including ourselves)
func (pm *peerManager) peersAddrs() []ma.Multiaddr {
pm.m.RLock()
defer pm.m.RUnlock()
var addrs []ma.Multiaddr
for k, addr := range pm.peermap {
if k != pm.self {
addrs = append(addrs, addr)
}
}
return addrs
}
// func (pm *peerManager) addFromConfig(cfg *Config) error {
// return pm.addFromMultiaddrs(cfg.ClusterPeers)
// }
func (pm *peerManager) addFromMultiaddrs(addrs []ma.Multiaddr) error {
for _, m := range addrs {
err := pm.addPeer(m)
if err != nil {
logger.Error(err)
return err
}
}
return nil
}