Fix #787: reBoostrap reguarly
This should fix a problem pointed out in #787 about a peer not being able to recover in CRDT mode after a router re-start (when we lose all connections to all peers). We attempt to re-open connections to boostrap peers regularly.
This commit is contained in:
parent
e665704962
commit
2d5b31ad6a
28
cluster.go
28
cluster.go
|
@ -40,6 +40,7 @@ var ReadyTimeout = 30 * time.Second
|
||||||
const (
|
const (
|
||||||
pingMetricName = "ping"
|
pingMetricName = "ping"
|
||||||
bootstrapCount = 3
|
bootstrapCount = 3
|
||||||
|
reBootstrapInterval = 30 * time.Second
|
||||||
)
|
)
|
||||||
|
|
||||||
// Cluster is the main IPFS cluster component. It provides
|
// Cluster is the main IPFS cluster component. It provides
|
||||||
|
@ -405,6 +406,7 @@ func (c *Cluster) shouldPeerRepinCid(failed peer.ID, pin *api.Pin) bool {
|
||||||
// detects that we have been removed from the peerset, it shuts down this peer.
|
// detects that we have been removed from the peerset, it shuts down this peer.
|
||||||
func (c *Cluster) watchPeers() {
|
func (c *Cluster) watchPeers() {
|
||||||
ticker := time.NewTicker(c.config.PeerWatchInterval)
|
ticker := time.NewTicker(c.config.PeerWatchInterval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
|
@ -437,6 +439,26 @@ func (c *Cluster) watchPeers() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// reBootstrap reguarly attempts to bootstrap (re-connect to peers from the
|
||||||
|
// peerstore). This should ensure that we auto-recover from situations in
|
||||||
|
// which the network was completely gone and we lost all peers.
|
||||||
|
func (c *Cluster) reBootstrap() {
|
||||||
|
ticker := time.NewTicker(reBootstrapInterval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-c.ctx.Done():
|
||||||
|
return
|
||||||
|
case <-ticker.C:
|
||||||
|
connected := c.peerManager.Bootstrap(bootstrapCount)
|
||||||
|
for _, p := range connected {
|
||||||
|
logger.Infof("reconnected to %s", p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// find all Cids pinned to a given peer and triggers re-pins on them.
|
// find all Cids pinned to a given peer and triggers re-pins on them.
|
||||||
func (c *Cluster) repinFromPeer(ctx context.Context, p peer.ID) {
|
func (c *Cluster) repinFromPeer(ctx context.Context, p peer.ID) {
|
||||||
ctx, span := trace.StartSpan(ctx, "cluster/repinFromPeer")
|
ctx, span := trace.StartSpan(ctx, "cluster/repinFromPeer")
|
||||||
|
@ -498,6 +520,12 @@ func (c *Cluster) run() {
|
||||||
defer c.wg.Done()
|
defer c.wg.Done()
|
||||||
c.alertsHandler()
|
c.alertsHandler()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
c.wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer c.wg.Done()
|
||||||
|
c.reBootstrap()
|
||||||
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Cluster) ready(timeout time.Duration) {
|
func (c *Cluster) ready(timeout time.Duration) {
|
||||||
|
|
2
go.mod
2
go.mod
|
@ -27,7 +27,6 @@ require (
|
||||||
github.com/ipfs/go-ds-crdt v0.0.14
|
github.com/ipfs/go-ds-crdt v0.0.14
|
||||||
github.com/ipfs/go-fs-lock v0.0.1
|
github.com/ipfs/go-fs-lock v0.0.1
|
||||||
github.com/ipfs/go-ipfs-api v0.0.1
|
github.com/ipfs/go-ipfs-api v0.0.1
|
||||||
github.com/ipfs/go-ipfs-blockstore v0.0.1
|
|
||||||
github.com/ipfs/go-ipfs-chunker v0.0.1
|
github.com/ipfs/go-ipfs-chunker v0.0.1
|
||||||
github.com/ipfs/go-ipfs-ds-help v0.0.1
|
github.com/ipfs/go-ipfs-ds-help v0.0.1
|
||||||
github.com/ipfs/go-ipfs-files v0.0.3
|
github.com/ipfs/go-ipfs-files v0.0.3
|
||||||
|
@ -50,6 +49,7 @@ require (
|
||||||
github.com/libp2p/go-libp2p-host v0.0.3
|
github.com/libp2p/go-libp2p-host v0.0.3
|
||||||
github.com/libp2p/go-libp2p-interface-pnet v0.0.1
|
github.com/libp2p/go-libp2p-interface-pnet v0.0.1
|
||||||
github.com/libp2p/go-libp2p-kad-dht v0.0.14
|
github.com/libp2p/go-libp2p-kad-dht v0.0.14
|
||||||
|
github.com/libp2p/go-libp2p-net v0.0.2
|
||||||
github.com/libp2p/go-libp2p-peer v0.2.0
|
github.com/libp2p/go-libp2p-peer v0.2.0
|
||||||
github.com/libp2p/go-libp2p-peerstore v0.1.0
|
github.com/libp2p/go-libp2p-peerstore v0.1.0
|
||||||
github.com/libp2p/go-libp2p-pnet v0.0.1
|
github.com/libp2p/go-libp2p-pnet v0.0.1
|
||||||
|
|
|
@ -39,6 +39,7 @@ var LoggingFacilities = map[string]string{
|
||||||
"localdags": "INFO",
|
"localdags": "INFO",
|
||||||
"adder": "INFO",
|
"adder": "INFO",
|
||||||
"optracker": "INFO",
|
"optracker": "INFO",
|
||||||
|
"pstoremgr": "INFO",
|
||||||
}
|
}
|
||||||
|
|
||||||
// LoggingFacilitiesExtra provides logging identifiers
|
// LoggingFacilitiesExtra provides logging identifiers
|
||||||
|
|
|
@ -16,6 +16,7 @@ import (
|
||||||
|
|
||||||
logging "github.com/ipfs/go-log"
|
logging "github.com/ipfs/go-log"
|
||||||
host "github.com/libp2p/go-libp2p-host"
|
host "github.com/libp2p/go-libp2p-host"
|
||||||
|
net "github.com/libp2p/go-libp2p-net"
|
||||||
peer "github.com/libp2p/go-libp2p-peer"
|
peer "github.com/libp2p/go-libp2p-peer"
|
||||||
peerstore "github.com/libp2p/go-libp2p-peerstore"
|
peerstore "github.com/libp2p/go-libp2p-peerstore"
|
||||||
ma "github.com/multiformats/go-multiaddr"
|
ma "github.com/multiformats/go-multiaddr"
|
||||||
|
@ -279,6 +280,13 @@ func (pm *Manager) Bootstrap(count int) []peer.ID {
|
||||||
ctx, cancel := context.WithTimeout(pm.ctx, ConnectTimeout)
|
ctx, cancel := context.WithTimeout(pm.ctx, ConnectTimeout)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
|
if pm.host.Network().Connectedness(pinfo.ID) == net.Connected {
|
||||||
|
// We are connected, assume success and do not try
|
||||||
|
// to re-connect
|
||||||
|
totalConns++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
logger.Infof("connecting to %s", pinfo.ID)
|
logger.Infof("connecting to %s", pinfo.ID)
|
||||||
err := pm.host.Connect(ctx, pinfo)
|
err := pm.host.Connect(ctx, pinfo)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -286,6 +294,7 @@ func (pm *Manager) Bootstrap(count int) []peer.ID {
|
||||||
pm.SetPriority(pinfo.ID, 9999)
|
pm.SetPriority(pinfo.ID, 9999)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
logger.Infof("connected to %s", pinfo.ID)
|
||||||
totalConns++
|
totalConns++
|
||||||
success = append(success, pinfo.ID)
|
success = append(success, pinfo.ID)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user