Fix #787: reBoostrap reguarly
This should fix a problem pointed out in #787 about a peer not being able to recover in CRDT mode after a router re-start (when we lose all connections to all peers). We attempt to re-open connections to boostrap peers regularly.
This commit is contained in:
parent
e665704962
commit
2d5b31ad6a
28
cluster.go
28
cluster.go
|
@ -40,6 +40,7 @@ var ReadyTimeout = 30 * time.Second
|
|||
const (
|
||||
pingMetricName = "ping"
|
||||
bootstrapCount = 3
|
||||
reBootstrapInterval = 30 * time.Second
|
||||
)
|
||||
|
||||
// Cluster is the main IPFS cluster component. It provides
|
||||
|
@ -405,6 +406,7 @@ func (c *Cluster) shouldPeerRepinCid(failed peer.ID, pin *api.Pin) bool {
|
|||
// detects that we have been removed from the peerset, it shuts down this peer.
|
||||
func (c *Cluster) watchPeers() {
|
||||
ticker := time.NewTicker(c.config.PeerWatchInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
|
@ -437,6 +439,26 @@ func (c *Cluster) watchPeers() {
|
|||
}
|
||||
}
|
||||
|
||||
// reBootstrap reguarly attempts to bootstrap (re-connect to peers from the
|
||||
// peerstore). This should ensure that we auto-recover from situations in
|
||||
// which the network was completely gone and we lost all peers.
|
||||
func (c *Cluster) reBootstrap() {
|
||||
ticker := time.NewTicker(reBootstrapInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-c.ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
connected := c.peerManager.Bootstrap(bootstrapCount)
|
||||
for _, p := range connected {
|
||||
logger.Infof("reconnected to %s", p)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// find all Cids pinned to a given peer and triggers re-pins on them.
|
||||
func (c *Cluster) repinFromPeer(ctx context.Context, p peer.ID) {
|
||||
ctx, span := trace.StartSpan(ctx, "cluster/repinFromPeer")
|
||||
|
@ -498,6 +520,12 @@ func (c *Cluster) run() {
|
|||
defer c.wg.Done()
|
||||
c.alertsHandler()
|
||||
}()
|
||||
|
||||
c.wg.Add(1)
|
||||
go func() {
|
||||
defer c.wg.Done()
|
||||
c.reBootstrap()
|
||||
}()
|
||||
}
|
||||
|
||||
func (c *Cluster) ready(timeout time.Duration) {
|
||||
|
|
2
go.mod
2
go.mod
|
@ -27,7 +27,6 @@ require (
|
|||
github.com/ipfs/go-ds-crdt v0.0.14
|
||||
github.com/ipfs/go-fs-lock v0.0.1
|
||||
github.com/ipfs/go-ipfs-api v0.0.1
|
||||
github.com/ipfs/go-ipfs-blockstore v0.0.1
|
||||
github.com/ipfs/go-ipfs-chunker v0.0.1
|
||||
github.com/ipfs/go-ipfs-ds-help v0.0.1
|
||||
github.com/ipfs/go-ipfs-files v0.0.3
|
||||
|
@ -50,6 +49,7 @@ require (
|
|||
github.com/libp2p/go-libp2p-host v0.0.3
|
||||
github.com/libp2p/go-libp2p-interface-pnet v0.0.1
|
||||
github.com/libp2p/go-libp2p-kad-dht v0.0.14
|
||||
github.com/libp2p/go-libp2p-net v0.0.2
|
||||
github.com/libp2p/go-libp2p-peer v0.2.0
|
||||
github.com/libp2p/go-libp2p-peerstore v0.1.0
|
||||
github.com/libp2p/go-libp2p-pnet v0.0.1
|
||||
|
|
|
@ -39,6 +39,7 @@ var LoggingFacilities = map[string]string{
|
|||
"localdags": "INFO",
|
||||
"adder": "INFO",
|
||||
"optracker": "INFO",
|
||||
"pstoremgr": "INFO",
|
||||
}
|
||||
|
||||
// LoggingFacilitiesExtra provides logging identifiers
|
||||
|
|
|
@ -16,6 +16,7 @@ import (
|
|||
|
||||
logging "github.com/ipfs/go-log"
|
||||
host "github.com/libp2p/go-libp2p-host"
|
||||
net "github.com/libp2p/go-libp2p-net"
|
||||
peer "github.com/libp2p/go-libp2p-peer"
|
||||
peerstore "github.com/libp2p/go-libp2p-peerstore"
|
||||
ma "github.com/multiformats/go-multiaddr"
|
||||
|
@ -279,6 +280,13 @@ func (pm *Manager) Bootstrap(count int) []peer.ID {
|
|||
ctx, cancel := context.WithTimeout(pm.ctx, ConnectTimeout)
|
||||
defer cancel()
|
||||
|
||||
if pm.host.Network().Connectedness(pinfo.ID) == net.Connected {
|
||||
// We are connected, assume success and do not try
|
||||
// to re-connect
|
||||
totalConns++
|
||||
continue
|
||||
}
|
||||
|
||||
logger.Infof("connecting to %s", pinfo.ID)
|
||||
err := pm.host.Connect(ctx, pinfo)
|
||||
if err != nil {
|
||||
|
@ -286,6 +294,7 @@ func (pm *Manager) Bootstrap(count int) []peer.ID {
|
|||
pm.SetPriority(pinfo.ID, 9999)
|
||||
continue
|
||||
}
|
||||
logger.Infof("connected to %s", pinfo.ID)
|
||||
totalConns++
|
||||
success = append(success, pinfo.ID)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user