ipfs-cluster/connect_graph.go
Adrian Lanzafame 3b3f786d68
add opencensus tracing and metrics
This commit adds support for OpenCensus tracing
and metrics collection. This required support for
context.Context propogation throughout the cluster
codebase, and in particular, the ipfscluster component
interfaces.

The tracing propogates across RPC and HTTP boundaries.
The current default tracing backend is Jaeger.

The metrics currently exports the metrics exposed by
the opencensus http plugin as well as the pprof metrics
to a prometheus endpoint for scraping.
The current default metrics backend is Prometheus.

Metrics are currently exposed by default due to low
overhead, can be turned off if desired, whereas tracing
is off by default as it has a much higher performance
overhead, though the extent of the performance hit can be
adjusted with smaller sampling rates.

License: MIT
Signed-off-by: Adrian Lanzafame <adrianlanzafame92@gmail.com>
2019-02-04 18:53:21 +10:00

105 lines
2.7 KiB
Go

package ipfscluster
import (
peer "github.com/libp2p/go-libp2p-peer"
"go.opencensus.io/trace"
"github.com/ipfs/ipfs-cluster/api"
"github.com/ipfs/ipfs-cluster/rpcutil"
)
// ConnectGraph returns a description of which cluster peers and ipfs
// daemons are connected to each other
func (c *Cluster) ConnectGraph() (api.ConnectGraph, error) {
ctx, span := trace.StartSpan(c.ctx, "cluster/ConnectGraph")
defer span.End()
cg := api.ConnectGraph{
IPFSLinks: make(map[peer.ID][]peer.ID),
ClusterLinks: make(map[peer.ID][]peer.ID),
ClustertoIPFS: make(map[peer.ID]peer.ID),
}
members, err := c.consensus.Peers(ctx)
if err != nil {
return cg, err
}
peersSerials := make([][]api.IDSerial, len(members), len(members))
ctxs, cancels := rpcutil.CtxsWithCancel(ctx, len(members))
defer rpcutil.MultiCancel(cancels)
errs := c.rpcClient.MultiCall(
ctxs,
members,
"Cluster",
"Peers",
struct{}{},
rpcutil.CopyIDSerialSliceToIfaces(peersSerials),
)
for i, err := range errs {
p := members[i]
cg.ClusterLinks[p] = make([]peer.ID, 0)
if err != nil { // Only setting cluster connections when no error occurs
logger.Debugf("RPC error reaching cluster peer %s: %s", p.Pretty(), err.Error())
continue
}
selfConnection, pID := c.recordClusterLinks(&cg, p, peersSerials[i])
// IPFS connections
if !selfConnection {
logger.Warningf("cluster peer %s not its own peer. No ipfs info ", p.Pretty())
continue
}
c.recordIPFSLinks(&cg, pID)
}
return cg, nil
}
func (c *Cluster) recordClusterLinks(cg *api.ConnectGraph, p peer.ID, sPeers []api.IDSerial) (bool, api.ID) {
selfConnection := false
var pID api.ID
for _, sID := range sPeers {
id := sID.ToID()
if id.Error != "" {
logger.Debugf("Peer %s errored connecting to its peer %s", p.Pretty(), id.ID.Pretty())
continue
}
if id.ID == p {
selfConnection = true
pID = id
} else {
cg.ClusterLinks[p] = append(cg.ClusterLinks[p], id.ID)
}
}
return selfConnection, pID
}
func (c *Cluster) recordIPFSLinks(cg *api.ConnectGraph, pID api.ID) {
ipfsID := pID.IPFS.ID
if pID.IPFS.Error != "" { // Only setting ipfs connections when no error occurs
logger.Warningf("ipfs id: %s has error: %s. Skipping swarm connections", ipfsID.Pretty(), pID.IPFS.Error)
return
}
if _, ok := cg.IPFSLinks[pID.ID]; ok {
logger.Warningf("ipfs id: %s already recorded, one ipfs daemon in use by multiple cluster peers", ipfsID.Pretty())
}
cg.ClustertoIPFS[pID.ID] = ipfsID
cg.IPFSLinks[ipfsID] = make([]peer.ID, 0)
var swarmPeersS api.SwarmPeersSerial
err := c.rpcClient.Call(pID.ID,
"Cluster",
"IPFSSwarmPeers",
struct{}{},
&swarmPeersS,
)
if err != nil {
return
}
swarmPeers := swarmPeersS.ToSwarmPeers()
cg.IPFSLinks[ipfsID] = swarmPeers
}