From 2d5b31ad6a94efcdcca33210cdf674f4939f7f2b Mon Sep 17 00:00:00 2001 From: Hector Sanjuan Date: Sun, 9 Jun 2019 13:26:18 +0200 Subject: [PATCH] Fix #787: reBoostrap reguarly This should fix a problem pointed out in #787 about a peer not being able to recover in CRDT mode after a router re-start (when we lose all connections to all peers). We attempt to re-open connections to boostrap peers regularly. --- cluster.go | 32 ++++++++++++++++++++++++++++++-- go.mod | 2 +- logging.go | 1 + pstoremgr/pstoremgr.go | 9 +++++++++ 4 files changed, 41 insertions(+), 3 deletions(-) diff --git a/cluster.go b/cluster.go index 3e9c1037..1c9596b0 100644 --- a/cluster.go +++ b/cluster.go @@ -38,8 +38,9 @@ import ( var ReadyTimeout = 30 * time.Second const ( - pingMetricName = "ping" - bootstrapCount = 3 + pingMetricName = "ping" + bootstrapCount = 3 + reBootstrapInterval = 30 * time.Second ) // Cluster is the main IPFS cluster component. It provides @@ -405,6 +406,7 @@ func (c *Cluster) shouldPeerRepinCid(failed peer.ID, pin *api.Pin) bool { // detects that we have been removed from the peerset, it shuts down this peer. func (c *Cluster) watchPeers() { ticker := time.NewTicker(c.config.PeerWatchInterval) + defer ticker.Stop() for { select { @@ -437,6 +439,26 @@ func (c *Cluster) watchPeers() { } } +// reBootstrap reguarly attempts to bootstrap (re-connect to peers from the +// peerstore). This should ensure that we auto-recover from situations in +// which the network was completely gone and we lost all peers. +func (c *Cluster) reBootstrap() { + ticker := time.NewTicker(reBootstrapInterval) + defer ticker.Stop() + + for { + select { + case <-c.ctx.Done(): + return + case <-ticker.C: + connected := c.peerManager.Bootstrap(bootstrapCount) + for _, p := range connected { + logger.Infof("reconnected to %s", p) + } + } + } +} + // find all Cids pinned to a given peer and triggers re-pins on them. func (c *Cluster) repinFromPeer(ctx context.Context, p peer.ID) { ctx, span := trace.StartSpan(ctx, "cluster/repinFromPeer") @@ -498,6 +520,12 @@ func (c *Cluster) run() { defer c.wg.Done() c.alertsHandler() }() + + c.wg.Add(1) + go func() { + defer c.wg.Done() + c.reBootstrap() + }() } func (c *Cluster) ready(timeout time.Duration) { diff --git a/go.mod b/go.mod index 15391d45..26cb8787 100644 --- a/go.mod +++ b/go.mod @@ -27,7 +27,6 @@ require ( github.com/ipfs/go-ds-crdt v0.0.14 github.com/ipfs/go-fs-lock v0.0.1 github.com/ipfs/go-ipfs-api v0.0.1 - github.com/ipfs/go-ipfs-blockstore v0.0.1 github.com/ipfs/go-ipfs-chunker v0.0.1 github.com/ipfs/go-ipfs-ds-help v0.0.1 github.com/ipfs/go-ipfs-files v0.0.3 @@ -50,6 +49,7 @@ require ( github.com/libp2p/go-libp2p-host v0.0.3 github.com/libp2p/go-libp2p-interface-pnet v0.0.1 github.com/libp2p/go-libp2p-kad-dht v0.0.14 + github.com/libp2p/go-libp2p-net v0.0.2 github.com/libp2p/go-libp2p-peer v0.2.0 github.com/libp2p/go-libp2p-peerstore v0.1.0 github.com/libp2p/go-libp2p-pnet v0.0.1 diff --git a/logging.go b/logging.go index b11e0cdb..53b058a0 100644 --- a/logging.go +++ b/logging.go @@ -39,6 +39,7 @@ var LoggingFacilities = map[string]string{ "localdags": "INFO", "adder": "INFO", "optracker": "INFO", + "pstoremgr": "INFO", } // LoggingFacilitiesExtra provides logging identifiers diff --git a/pstoremgr/pstoremgr.go b/pstoremgr/pstoremgr.go index 3b986eb0..c88273be 100644 --- a/pstoremgr/pstoremgr.go +++ b/pstoremgr/pstoremgr.go @@ -16,6 +16,7 @@ import ( logging "github.com/ipfs/go-log" host "github.com/libp2p/go-libp2p-host" + net "github.com/libp2p/go-libp2p-net" peer "github.com/libp2p/go-libp2p-peer" peerstore "github.com/libp2p/go-libp2p-peerstore" ma "github.com/multiformats/go-multiaddr" @@ -279,6 +280,13 @@ func (pm *Manager) Bootstrap(count int) []peer.ID { ctx, cancel := context.WithTimeout(pm.ctx, ConnectTimeout) defer cancel() + if pm.host.Network().Connectedness(pinfo.ID) == net.Connected { + // We are connected, assume success and do not try + // to re-connect + totalConns++ + continue + } + logger.Infof("connecting to %s", pinfo.ID) err := pm.host.Connect(ctx, pinfo) if err != nil { @@ -286,6 +294,7 @@ func (pm *Manager) Bootstrap(count int) []peer.ID { pm.SetPriority(pinfo.ID, 9999) continue } + logger.Infof("connected to %s", pinfo.ID) totalConns++ success = append(success, pinfo.ID) }