diff --git a/add_test.go b/add_test.go index 39002e82..e856583d 100644 --- a/add_test.go +++ b/add_test.go @@ -3,6 +3,7 @@ package ipfscluster // This files has tests for Add* using multiple cluster peers. import ( + "context" "mime/multipart" "testing" @@ -11,6 +12,7 @@ import ( ) func TestAdd(t *testing.T) { + ctx := context.Background() clusters, mock := createClusters(t) defer shutdownClusters(t, clusters, mock) sth := test.NewShardingTestHelper() @@ -34,7 +36,7 @@ func TestAdd(t *testing.T) { pinDelay() f := func(t *testing.T, c *Cluster) { - pin := c.StatusLocal(ci) + pin := c.StatusLocal(ctx, ci) if pin.Error != "" { t.Error(pin.Error) } @@ -48,12 +50,13 @@ func TestAdd(t *testing.T) { } func TestAddPeerDown(t *testing.T) { + ctx := context.Background() clusters, mock := createClusters(t) defer shutdownClusters(t, clusters, mock) sth := test.NewShardingTestHelper() defer sth.Clean(t) - err := clusters[0].Shutdown() + err := clusters[0].Shutdown(ctx) if err != nil { t.Fatal(err) } @@ -82,7 +85,7 @@ func TestAddPeerDown(t *testing.T) { return } - pin := c.StatusLocal(ci) + pin := c.StatusLocal(ctx, ci) if pin.Error != "" { t.Error(pin.Error) } diff --git a/adder/sharding/dag_service_test.go b/adder/sharding/dag_service_test.go index c6ad83bc..3ffb3348 100644 --- a/adder/sharding/dag_service_test.go +++ b/adder/sharding/dag_service_test.go @@ -44,7 +44,7 @@ func (rpcs *testRPC) BlockAllocate(ctx context.Context, in api.PinSerial, out *[ return nil } -func (rpcs *testRPC) PinGet(c cid.Cid) (api.Pin, error) { +func (rpcs *testRPC) PinGet(ctx context.Context, c cid.Cid) (api.Pin, error) { pI, ok := rpcs.pins.Load(c.String()) if !ok { return api.Pin{}, errors.New("not found") @@ -52,7 +52,7 @@ func (rpcs *testRPC) PinGet(c cid.Cid) (api.Pin, error) { return pI.(api.PinSerial).ToPin(), nil } -func (rpcs *testRPC) BlockGet(c cid.Cid) ([]byte, error) { +func (rpcs *testRPC) BlockGet(ctx context.Context, c cid.Cid) ([]byte, error) { bI, ok := rpcs.blocks.Load(c.String()) if !ok { return nil, errors.New("not found") diff --git a/adder/sharding/verify.go b/adder/sharding/verify.go index dc731775..e6afe2b6 100644 --- a/adder/sharding/verify.go +++ b/adder/sharding/verify.go @@ -1,6 +1,7 @@ package sharding import ( + "context" "fmt" "testing" @@ -12,20 +13,21 @@ import ( // MockPinStore is used in VerifyShards type MockPinStore interface { // Gets a pin - PinGet(cid.Cid) (api.Pin, error) + PinGet(context.Context, cid.Cid) (api.Pin, error) } // MockBlockStore is used in VerifyShards type MockBlockStore interface { // Gets a block - BlockGet(cid.Cid) ([]byte, error) + BlockGet(context.Context, cid.Cid) ([]byte, error) } // VerifyShards checks that a sharded CID has been correctly formed and stored. // This is a helper function for testing. It returns a map with all the blocks // from all shards. func VerifyShards(t *testing.T, rootCid cid.Cid, pins MockPinStore, ipfs MockBlockStore, expectedShards int) (map[string]struct{}, error) { - metaPin, err := pins.PinGet(rootCid) + ctx := context.Background() + metaPin, err := pins.PinGet(ctx, rootCid) if err != nil { return nil, fmt.Errorf("meta pin was not pinned: %s", err) } @@ -34,7 +36,7 @@ func VerifyShards(t *testing.T, rootCid cid.Cid, pins MockPinStore, ipfs MockBlo return nil, fmt.Errorf("bad MetaPin type") } - clusterPin, err := pins.PinGet(metaPin.Reference) + clusterPin, err := pins.PinGet(ctx, metaPin.Reference) if err != nil { return nil, fmt.Errorf("cluster pin was not pinned: %s", err) } @@ -46,7 +48,7 @@ func VerifyShards(t *testing.T, rootCid cid.Cid, pins MockPinStore, ipfs MockBlo return nil, fmt.Errorf("clusterDAG should reference the MetaPin") } - clusterDAGBlock, err := ipfs.BlockGet(clusterPin.Cid) + clusterDAGBlock, err := ipfs.BlockGet(ctx, clusterPin.Cid) if err != nil { return nil, fmt.Errorf("cluster pin was not stored: %s", err) } @@ -70,7 +72,7 @@ func VerifyShards(t *testing.T, rootCid cid.Cid, pins MockPinStore, ipfs MockBlo return nil, err } - shardPin, err := pins.PinGet(sh.Cid) + shardPin, err := pins.PinGet(ctx, sh.Cid) if err != nil { return nil, fmt.Errorf("shard was not pinned: %s %s", sh.Cid, err) } @@ -80,7 +82,7 @@ func VerifyShards(t *testing.T, rootCid cid.Cid, pins MockPinStore, ipfs MockBlo } ref = shardPin.Cid - shardBlock, err := ipfs.BlockGet(shardPin.Cid) + shardBlock, err := ipfs.BlockGet(ctx, shardPin.Cid) if err != nil { return nil, fmt.Errorf("shard block was not stored: %s", err) } diff --git a/allocate.go b/allocate.go index ba4b7f33..88d4f231 100644 --- a/allocate.go +++ b/allocate.go @@ -1,11 +1,13 @@ package ipfscluster import ( + "context" "errors" "fmt" cid "github.com/ipfs/go-cid" peer "github.com/libp2p/go-libp2p-peer" + "go.opencensus.io/trace" "github.com/ipfs/ipfs-cluster/api" ) @@ -44,7 +46,10 @@ import ( // into account if the given CID was previously in a "pin everywhere" mode, // and will consider such Pins as currently unallocated ones, providing // new allocations as available. -func (c *Cluster) allocate(hash cid.Cid, rplMin, rplMax int, blacklist []peer.ID, prioritylist []peer.ID) ([]peer.ID, error) { +func (c *Cluster) allocate(ctx context.Context, hash cid.Cid, rplMin, rplMax int, blacklist []peer.ID, prioritylist []peer.ID) ([]peer.ID, error) { + ctx, span := trace.StartSpan(ctx, "cluster/allocate") + defer span.End() + if (rplMin + rplMax) == 0 { return nil, fmt.Errorf("bad replication factors: %d/%d", rplMin, rplMax) } @@ -54,9 +59,9 @@ func (c *Cluster) allocate(hash cid.Cid, rplMin, rplMax int, blacklist []peer.ID } // Figure out who is holding the CID - currentPin, _ := c.PinGet(hash) + currentPin, _ := c.PinGet(ctx, hash) currentAllocs := currentPin.Allocations - metrics := c.monitor.LatestMetrics(c.informer.Name()) + metrics := c.monitor.LatestMetrics(ctx, c.informer.Name()) currentMetrics := make(map[peer.ID]api.Metric) candidatesMetrics := make(map[peer.ID]api.Metric) @@ -80,6 +85,7 @@ func (c *Cluster) allocate(hash cid.Cid, rplMin, rplMax int, blacklist []peer.ID } newAllocs, err := c.obtainAllocations( + ctx, hash, rplMin, rplMax, @@ -114,12 +120,15 @@ func allocationError(hash cid.Cid, needed, wanted int, candidatesValid []peer.ID } func (c *Cluster) obtainAllocations( + ctx context.Context, hash cid.Cid, rplMin, rplMax int, currentValidMetrics map[peer.ID]api.Metric, candidatesMetrics map[peer.ID]api.Metric, priorityMetrics map[peer.ID]api.Metric, ) ([]peer.ID, error) { + ctx, span := trace.StartSpan(ctx, "cluster/obtainAllocations") + defer span.End() // The list of peers in current validAllocations := make([]peer.ID, 0, len(currentValidMetrics)) @@ -167,6 +176,7 @@ func (c *Cluster) obtainAllocations( // the allocator returns a list of peers ordered by priority finalAllocs, err := c.allocator.Allocate( + ctx, hash, currentValidMetrics, candidatesMetrics, diff --git a/allocator/ascendalloc/ascendalloc.go b/allocator/ascendalloc/ascendalloc.go index 3b4be58a..3514be18 100644 --- a/allocator/ascendalloc/ascendalloc.go +++ b/allocator/ascendalloc/ascendalloc.go @@ -5,6 +5,8 @@ package ascendalloc import ( + "context" + "github.com/ipfs/ipfs-cluster/allocator/util" "github.com/ipfs/ipfs-cluster/api" @@ -28,14 +30,17 @@ func NewAllocator() AscendAllocator { func (alloc AscendAllocator) SetClient(c *rpc.Client) {} // Shutdown does nothing in this allocator -func (alloc AscendAllocator) Shutdown() error { return nil } +func (alloc AscendAllocator) Shutdown(_ context.Context) error { return nil } // Allocate returns where to allocate a pin request based on metrics which // carry a numeric value such as "used disk". We do not pay attention to // the metrics of the currently allocated peers and we just sort the // candidates based on their metric values (smallest to largest). -func (alloc AscendAllocator) Allocate(c cid.Cid, current, - candidates, priority map[peer.ID]api.Metric) ([]peer.ID, error) { +func (alloc AscendAllocator) Allocate( + ctx context.Context, + c cid.Cid, + current, candidates, priority map[peer.ID]api.Metric, +) ([]peer.ID, error) { // sort our metrics first := util.SortNumeric(priority, false) last := util.SortNumeric(candidates, false) diff --git a/allocator/ascendalloc/ascendalloc_test.go b/allocator/ascendalloc/ascendalloc_test.go index 5d0fd646..defdf353 100644 --- a/allocator/ascendalloc/ascendalloc_test.go +++ b/allocator/ascendalloc/ascendalloc_test.go @@ -1,6 +1,7 @@ package ascendalloc import ( + "context" "testing" "time" @@ -96,10 +97,11 @@ var testCases = []testcase{ } func Test(t *testing.T) { + ctx := context.Background() alloc := &AscendAllocator{} for i, tc := range testCases { t.Logf("Test case %d", i) - res, err := alloc.Allocate(testCid, tc.current, tc.candidates, nil) + res, err := alloc.Allocate(ctx, testCid, tc.current, tc.candidates, nil) if err != nil { t.Fatal(err) } diff --git a/allocator/descendalloc/descendalloc.go b/allocator/descendalloc/descendalloc.go index f4264b85..1c762e10 100644 --- a/allocator/descendalloc/descendalloc.go +++ b/allocator/descendalloc/descendalloc.go @@ -5,6 +5,8 @@ package descendalloc import ( + "context" + "github.com/ipfs/ipfs-cluster/allocator/util" "github.com/ipfs/ipfs-cluster/api" @@ -28,13 +30,13 @@ func NewAllocator() DescendAllocator { func (alloc DescendAllocator) SetClient(c *rpc.Client) {} // Shutdown does nothing in this allocator -func (alloc DescendAllocator) Shutdown() error { return nil } +func (alloc DescendAllocator) Shutdown(_ context.Context) error { return nil } // Allocate returns where to allocate a pin request based on metrics which // carry a numeric value such as "used disk". We do not pay attention to // the metrics of the currently allocated peers and we just sort the // candidates based on their metric values (largest to smallest). -func (alloc DescendAllocator) Allocate(c cid.Cid, current, candidates, priority map[peer.ID]api.Metric) ([]peer.ID, error) { +func (alloc DescendAllocator) Allocate(ctx context.Context, c cid.Cid, current, candidates, priority map[peer.ID]api.Metric) ([]peer.ID, error) { // sort our metrics first := util.SortNumeric(priority, true) last := util.SortNumeric(candidates, true) diff --git a/allocator/descendalloc/descendalloc_test.go b/allocator/descendalloc/descendalloc_test.go index 2552d1f6..2615ffc9 100644 --- a/allocator/descendalloc/descendalloc_test.go +++ b/allocator/descendalloc/descendalloc_test.go @@ -1,6 +1,7 @@ package descendalloc import ( + "context" "testing" "time" @@ -96,10 +97,11 @@ var testCases = []testcase{ } func Test(t *testing.T) { + ctx := context.Background() alloc := &DescendAllocator{} for i, tc := range testCases { t.Logf("Test case %d", i) - res, err := alloc.Allocate(testCid, tc.current, tc.candidates, nil) + res, err := alloc.Allocate(ctx, testCid, tc.current, tc.candidates, nil) if err != nil { t.Fatal(err) } diff --git a/api/ipfsproxy/config.go b/api/ipfsproxy/config.go index 5df8ee4f..0d1618cf 100644 --- a/api/ipfsproxy/config.go +++ b/api/ipfsproxy/config.go @@ -74,6 +74,9 @@ type Config struct { // Establishes how long we should remember extracted headers before we // refresh them with a new request. 0 means always. ExtractHeadersTTL time.Duration + + // Tracing flag used to skip tracing specific paths when not enabled. + Tracing bool } type jsonConfig struct { diff --git a/api/ipfsproxy/ipfsproxy.go b/api/ipfsproxy/ipfsproxy.go index 36c21103..89a31196 100644 --- a/api/ipfsproxy/ipfsproxy.go +++ b/api/ipfsproxy/ipfsproxy.go @@ -13,6 +13,10 @@ import ( "sync" "time" + "go.opencensus.io/plugin/ochttp" + "go.opencensus.io/plugin/ochttp/propagation/tracecontext" + "go.opencensus.io/trace" + "github.com/ipfs/ipfs-cluster/adder/adderutils" "github.com/ipfs/ipfs-cluster/api" "github.com/ipfs/ipfs-cluster/rpcutil" @@ -127,13 +131,28 @@ func New(cfg *Config) (*Server, error) { return nil, err } + var handler http.Handler router := mux.NewRouter() + handler = router + + if cfg.Tracing { + handler = &ochttp.Handler{ + IsPublicEndpoint: true, + Propagation: &tracecontext.HTTPFormat{}, + Handler: router, + StartOptions: trace.StartOptions{SpanKind: trace.SpanKindServer}, + FormatSpanName: func(req *http.Request) string { + return "proxy:" + req.Host + ":" + req.URL.Path + ":" + req.Method + }, + } + } + s := &http.Server{ ReadTimeout: cfg.ReadTimeout, WriteTimeout: cfg.WriteTimeout, ReadHeaderTimeout: cfg.ReadHeaderTimeout, IdleTimeout: cfg.IdleTimeout, - Handler: router, + Handler: handler, } // See: https://github.com/ipfs/go-ipfs/issues/5168 @@ -216,7 +235,7 @@ func (proxy *Server) SetClient(c *rpc.Client) { // Shutdown stops any listeners and stops the component from taking // any requests. -func (proxy *Server) Shutdown() error { +func (proxy *Server) Shutdown(ctx context.Context) error { proxy.shutdownLock.Lock() defer proxy.shutdownLock.Unlock() diff --git a/api/ipfsproxy/ipfsproxy_test.go b/api/ipfsproxy/ipfsproxy_test.go index 54bf6ad9..ac91c668 100644 --- a/api/ipfsproxy/ipfsproxy_test.go +++ b/api/ipfsproxy/ipfsproxy_test.go @@ -1,6 +1,7 @@ package ipfsproxy import ( + "context" "encoding/json" "fmt" "io/ioutil" @@ -47,9 +48,10 @@ func testIPFSProxy(t *testing.T) (*Server, *test.IpfsMock) { } func TestIPFSProxyVersion(t *testing.T) { + ctx := context.Background() proxy, mock := testIPFSProxy(t) defer mock.Close() - defer proxy.Shutdown() + defer proxy.Shutdown(ctx) res, err := http.Post(fmt.Sprintf("%s/version", proxyURL(proxy)), "", nil) if err != nil { @@ -76,9 +78,10 @@ func TestIPFSProxyVersion(t *testing.T) { } func TestIPFSProxyPin(t *testing.T) { + ctx := context.Background() proxy, mock := testIPFSProxy(t) defer mock.Close() - defer proxy.Shutdown() + defer proxy.Shutdown(ctx) type args struct { urlPath string @@ -178,9 +181,10 @@ func TestIPFSProxyPin(t *testing.T) { } func TestIPFSProxyUnpin(t *testing.T) { + ctx := context.Background() proxy, mock := testIPFSProxy(t) defer mock.Close() - defer proxy.Shutdown() + defer proxy.Shutdown(ctx) type args struct { urlPath string @@ -280,9 +284,10 @@ func TestIPFSProxyUnpin(t *testing.T) { } func TestIPFSProxyPinLs(t *testing.T) { + ctx := context.Background() proxy, mock := testIPFSProxy(t) defer mock.Close() - defer proxy.Shutdown() + defer proxy.Shutdown(ctx) t.Run("pin/ls query arg", func(t *testing.T) { res, err := http.Post(fmt.Sprintf("%s/pin/ls?arg=%s", proxyURL(proxy), test.TestCid1), "", nil) @@ -367,9 +372,10 @@ func TestIPFSProxyPinLs(t *testing.T) { } func TestProxyRepoStat(t *testing.T) { + ctx := context.Background() proxy, mock := testIPFSProxy(t) defer mock.Close() - defer proxy.Shutdown() + defer proxy.Shutdown(ctx) res, err := http.Post(fmt.Sprintf("%s/repo/stat", proxyURL(proxy)), "", nil) if err != nil { t.Fatal(err) @@ -396,9 +402,10 @@ func TestProxyRepoStat(t *testing.T) { } func TestProxyAdd(t *testing.T) { + ctx := context.Background() proxy, mock := testIPFSProxy(t) defer mock.Close() - defer proxy.Shutdown() + defer proxy.Shutdown(ctx) type testcase struct { query string @@ -468,9 +475,10 @@ func TestProxyAdd(t *testing.T) { } func TestProxyAddError(t *testing.T) { + ctx := context.Background() proxy, mock := testIPFSProxy(t) defer mock.Close() - defer proxy.Shutdown() + defer proxy.Shutdown(ctx) res, err := http.Post(fmt.Sprintf("%s/add?recursive=true", proxyURL(proxy)), "", nil) if err != nil { t.Fatal(err) @@ -482,9 +490,10 @@ func TestProxyAddError(t *testing.T) { } func TestProxyError(t *testing.T) { + ctx := context.Background() proxy, mock := testIPFSProxy(t) defer mock.Close() - defer proxy.Shutdown() + defer proxy.Shutdown(ctx) res, err := http.Post(fmt.Sprintf("%s/bad/command", proxyURL(proxy)), "", nil) if err != nil { @@ -502,12 +511,13 @@ func proxyURL(c *Server) string { } func TestIPFSProxy(t *testing.T) { + ctx := context.Background() proxy, mock := testIPFSProxy(t) defer mock.Close() - if err := proxy.Shutdown(); err != nil { + if err := proxy.Shutdown(ctx); err != nil { t.Error("expected a clean shutdown") } - if err := proxy.Shutdown(); err != nil { + if err := proxy.Shutdown(ctx); err != nil { t.Error("expected a second clean shutdown") } } @@ -521,10 +531,11 @@ func mustParseURL(rawurl string) *url.URL { } func TestHeaderExtraction(t *testing.T) { + ctx := context.Background() proxy, mock := testIPFSProxy(t) proxy.config.ExtractHeadersTTL = time.Second defer mock.Close() - defer proxy.Shutdown() + defer proxy.Shutdown(ctx) req, err := http.NewRequest("POST", fmt.Sprintf("%s/pin/ls", proxyURL(proxy)), nil) if err != nil { diff --git a/api/rest/client/client.go b/api/rest/client/client.go index 97967cbf..bc0eb243 100644 --- a/api/rest/client/client.go +++ b/api/rest/client/client.go @@ -21,6 +21,10 @@ import ( ma "github.com/multiformats/go-multiaddr" madns "github.com/multiformats/go-multiaddr-dns" manet "github.com/multiformats/go-multiaddr-net" + + "go.opencensus.io/plugin/ochttp" + "go.opencensus.io/plugin/ochttp/propagation/tracecontext" + "go.opencensus.io/trace" ) // Configuration defaults @@ -37,76 +41,79 @@ var loggingFacility = "apiclient" var logger = logging.Logger(loggingFacility) // Client interface defines the interface to be used by API clients to -// interact with the ipfs-cluster-service +// interact with the ipfs-cluster-service. All methods take a +// context.Context as their first parameter, this allows for +// timing out and cancelling of requests as well as recording +// metrics and tracing of requests through the API. type Client interface { // ID returns information about the cluster Peer. - ID() (api.ID, error) + ID(context.Context) (api.ID, error) // Peers requests ID information for all cluster peers. - Peers() ([]api.ID, error) + Peers(context.Context) ([]api.ID, error) // PeerAdd adds a new peer to the cluster. - PeerAdd(pid peer.ID) (api.ID, error) + PeerAdd(ctx context.Context, pid peer.ID) (api.ID, error) // PeerRm removes a current peer from the cluster - PeerRm(pid peer.ID) error + PeerRm(ctx context.Context, pid peer.ID) error // Add imports files to the cluster from the given paths. - Add(paths []string, params *api.AddParams, out chan<- *api.AddedOutput) error + Add(ctx context.Context, paths []string, params *api.AddParams, out chan<- *api.AddedOutput) error // AddMultiFile imports new files from a MultiFileReader. - AddMultiFile(multiFileR *files.MultiFileReader, params *api.AddParams, out chan<- *api.AddedOutput) error + AddMultiFile(ctx context.Context, multiFileR *files.MultiFileReader, params *api.AddParams, out chan<- *api.AddedOutput) error // Pin tracks a Cid with the given replication factor and a name for // human-friendliness. - Pin(ci cid.Cid, replicationFactorMin, replicationFactorMax int, name string) error + Pin(ctx context.Context, ci cid.Cid, replicationFactorMin, replicationFactorMax int, name string) error // Unpin untracks a Cid from cluster. - Unpin(ci cid.Cid) error + Unpin(ctx context.Context, ci cid.Cid) error // Allocations returns the consensus state listing all tracked items // and the peers that should be pinning them. - Allocations(filter api.PinType) ([]api.Pin, error) + Allocations(ctx context.Context, filter api.PinType) ([]api.Pin, error) // Allocation returns the current allocations for a given Cid. - Allocation(ci cid.Cid) (api.Pin, error) + Allocation(ctx context.Context, ci cid.Cid) (api.Pin, error) // Status returns the current ipfs state for a given Cid. If local is true, // the information affects only the current peer, otherwise the information // is fetched from all cluster peers. - Status(ci cid.Cid, local bool) (api.GlobalPinInfo, error) + Status(ctx context.Context, ci cid.Cid, local bool) (api.GlobalPinInfo, error) // StatusAll gathers Status() for all tracked items. - StatusAll(filter api.TrackerStatus, local bool) ([]api.GlobalPinInfo, error) + StatusAll(ctx context.Context, filter api.TrackerStatus, local bool) ([]api.GlobalPinInfo, error) // Sync makes sure the state of a Cid corresponds to the state reported // by the ipfs daemon, and returns it. If local is true, this operation // only happens on the current peer, otherwise it happens on every // cluster peer. - Sync(ci cid.Cid, local bool) (api.GlobalPinInfo, error) + Sync(ctx context.Context, ci cid.Cid, local bool) (api.GlobalPinInfo, error) // SyncAll triggers Sync() operations for all tracked items. It only // returns informations for items that were de-synced or have an error // state. If local is true, the operation is limited to the current // peer. Otherwise it happens on every cluster peer. - SyncAll(local bool) ([]api.GlobalPinInfo, error) + SyncAll(ctx context.Context, local bool) ([]api.GlobalPinInfo, error) // Recover retriggers pin or unpin ipfs operations for a Cid in error // state. If local is true, the operation is limited to the current // peer, otherwise it happens on every cluster peer. - Recover(ci cid.Cid, local bool) (api.GlobalPinInfo, error) + Recover(ctx context.Context, ci cid.Cid, local bool) (api.GlobalPinInfo, error) // RecoverAll triggers Recover() operations on all tracked items. If // local is true, the operation is limited to the current peer. // Otherwise, it happens everywhere. - RecoverAll(local bool) ([]api.GlobalPinInfo, error) + RecoverAll(ctx context.Context, local bool) ([]api.GlobalPinInfo, error) // Version returns the ipfs-cluster peer's version. - Version() (api.Version, error) + Version(context.Context) (api.Version, error) // IPFS returns an instance of go-ipfs-api's Shell, pointing to a // Cluster's IPFS proxy endpoint. - IPFS() *shell.Shell + IPFS(context.Context) *shell.Shell // GetConnectGraph returns an ipfs-cluster connection graph. The // serialized version, strings instead of pids, is returned - GetConnectGraph() (api.ConnectGraphSerial, error) + GetConnectGraph(context.Context) (api.ConnectGraphSerial, error) // Metrics returns a map with the latest metrics of matching name // for the current cluster peers. - Metrics(name string) ([]api.Metric, error) + Metrics(ctx context.Context, name string) ([]api.Metric, error) } // Config allows to configure the parameters to connect @@ -290,8 +297,14 @@ func (c *defaultClient) setupHTTPClient() error { } c.client = &http.Client{ - Transport: c.transport, - Timeout: c.config.Timeout, + Transport: &ochttp.Transport{ + Base: c.transport, + Propagation: &tracecontext.HTTPFormat{}, + StartOptions: trace.StartOptions{SpanKind: trace.SpanKindClient}, + FormatSpanName: func(req *http.Request) string { return req.Host + ":" + req.URL.Path + ":" + req.Method }, + NewClientTrace: ochttp.NewSpanAnnotatingClientTrace, + }, + Timeout: c.config.Timeout, } return nil } @@ -328,7 +341,7 @@ func (c *defaultClient) setupProxy() error { // configured ProxyAddr (or to the default Cluster's IPFS proxy port). // It re-uses this Client's HTTP client, thus will be constrained by // the same configurations affecting it (timeouts...). -func (c *defaultClient) IPFS() *shell.Shell { +func (c *defaultClient) IPFS(ctx context.Context) *shell.Shell { return shell.NewShellWithClient(c.config.ProxyAddr.String(), c.client) } diff --git a/api/rest/client/client_test.go b/api/rest/client/client_test.go index 6ae1ddf6..b41d7f3b 100644 --- a/api/rest/client/client_test.go +++ b/api/rest/client/client_test.go @@ -16,6 +16,7 @@ import ( ) func testAPI(t *testing.T) *rest.API { + ctx := context.Background() //logging.SetDebugLogging() apiMAddr, _ := ma.NewMultiaddr("/ip4/127.0.0.1/tcp/0") @@ -37,7 +38,7 @@ func testAPI(t *testing.T) *rest.API { t.Fatal(err) } - rest, err := rest.NewAPIWithHost(cfg, h) + rest, err := rest.NewAPIWithHost(ctx, cfg, h) if err != nil { t.Fatal("should be able to create a new Api: ", err) } @@ -47,7 +48,8 @@ func testAPI(t *testing.T) *rest.API { } func shutdown(a *rest.API) { - a.Shutdown() + ctx := context.Background() + a.Shutdown(ctx) a.Host().Close() } @@ -264,6 +266,7 @@ func TestProxyAddress(t *testing.T) { } func TestIPFS(t *testing.T) { + ctx := context.Background() ipfsMock := test.NewIpfsMock() defer ipfsMock.Close() @@ -284,7 +287,7 @@ func TestIPFS(t *testing.T) { t.Fatal(err) } dc := c.(*defaultClient) - ipfs := dc.IPFS() + ipfs := dc.IPFS(ctx) err = ipfs.Pin(test.TestCid1) if err != nil { diff --git a/api/rest/client/methods.go b/api/rest/client/methods.go index 15300715..ef76a91d 100644 --- a/api/rest/client/methods.go +++ b/api/rest/client/methods.go @@ -13,6 +13,8 @@ import ( "strings" "time" + "go.opencensus.io/trace" + "github.com/ipfs/ipfs-cluster/api" cid "github.com/ipfs/go-cid" @@ -21,16 +23,22 @@ import ( ) // ID returns information about the cluster Peer. -func (c *defaultClient) ID() (api.ID, error) { +func (c *defaultClient) ID(ctx context.Context) (api.ID, error) { + ctx, span := trace.StartSpan(ctx, "client/ID") + defer span.End() + var id api.IDSerial - err := c.do("GET", "/id", nil, nil, &id) + err := c.do(ctx, "GET", "/id", nil, nil, &id) return id.ToID(), err } // Peers requests ID information for all cluster peers. -func (c *defaultClient) Peers() ([]api.ID, error) { +func (c *defaultClient) Peers(ctx context.Context) ([]api.ID, error) { + ctx, span := trace.StartSpan(ctx, "client/Peers") + defer span.End() + var ids []api.IDSerial - err := c.do("GET", "/peers", nil, nil, &ids) + err := c.do(ctx, "GET", "/peers", nil, nil, &ids) result := make([]api.ID, len(ids)) for i, id := range ids { result[i] = id.ToID() @@ -43,7 +51,10 @@ type peerAddBody struct { } // PeerAdd adds a new peer to the cluster. -func (c *defaultClient) PeerAdd(pid peer.ID) (api.ID, error) { +func (c *defaultClient) PeerAdd(ctx context.Context, pid peer.ID) (api.ID, error) { + ctx, span := trace.StartSpan(ctx, "client/PeerAdd") + defer span.End() + pidStr := peer.IDB58Encode(pid) body := peerAddBody{pidStr} @@ -52,20 +63,27 @@ func (c *defaultClient) PeerAdd(pid peer.ID) (api.ID, error) { enc.Encode(body) var id api.IDSerial - err := c.do("POST", "/peers", nil, &buf, &id) + err := c.do(ctx, "POST", "/peers", nil, &buf, &id) return id.ToID(), err } // PeerRm removes a current peer from the cluster -func (c *defaultClient) PeerRm(id peer.ID) error { - return c.do("DELETE", fmt.Sprintf("/peers/%s", id.Pretty()), nil, nil, nil) +func (c *defaultClient) PeerRm(ctx context.Context, id peer.ID) error { + ctx, span := trace.StartSpan(ctx, "client/PeerRm") + defer span.End() + + return c.do(ctx, "DELETE", fmt.Sprintf("/peers/%s", id.Pretty()), nil, nil, nil) } // Pin tracks a Cid with the given replication factor and a name for // human-friendliness. -func (c *defaultClient) Pin(ci cid.Cid, replicationFactorMin, replicationFactorMax int, name string) error { +func (c *defaultClient) Pin(ctx context.Context, ci cid.Cid, replicationFactorMin, replicationFactorMax int, name string) error { + ctx, span := trace.StartSpan(ctx, "client/Pin") + defer span.End() + escName := url.QueryEscape(name) err := c.do( + ctx, "POST", fmt.Sprintf( "/pins/%s?replication-min=%d&replication-max=%d&name=%s", @@ -82,13 +100,18 @@ func (c *defaultClient) Pin(ci cid.Cid, replicationFactorMin, replicationFactorM } // Unpin untracks a Cid from cluster. -func (c *defaultClient) Unpin(ci cid.Cid) error { - return c.do("DELETE", fmt.Sprintf("/pins/%s", ci.String()), nil, nil, nil) +func (c *defaultClient) Unpin(ctx context.Context, ci cid.Cid) error { + ctx, span := trace.StartSpan(ctx, "client/Unpin") + defer span.End() + return c.do(ctx, "DELETE", fmt.Sprintf("/pins/%s", ci.String()), nil, nil, nil) } // Allocations returns the consensus state listing all tracked items and // the peers that should be pinning them. -func (c *defaultClient) Allocations(filter api.PinType) ([]api.Pin, error) { +func (c *defaultClient) Allocations(ctx context.Context, filter api.PinType) ([]api.Pin, error) { + ctx, span := trace.StartSpan(ctx, "client/Allocations") + defer span.End() + var pins []api.PinSerial types := []api.PinType{ @@ -111,7 +134,7 @@ func (c *defaultClient) Allocations(filter api.PinType) ([]api.Pin, error) { } f := url.QueryEscape(strings.Join(strFilter, ",")) - err := c.do("GET", fmt.Sprintf("/allocations?filter=%s", f), nil, nil, &pins) + err := c.do(ctx, "GET", fmt.Sprintf("/allocations?filter=%s", f), nil, nil, &pins) result := make([]api.Pin, len(pins)) for i, p := range pins { result[i] = p.ToPin() @@ -120,18 +143,24 @@ func (c *defaultClient) Allocations(filter api.PinType) ([]api.Pin, error) { } // Allocation returns the current allocations for a given Cid. -func (c *defaultClient) Allocation(ci cid.Cid) (api.Pin, error) { +func (c *defaultClient) Allocation(ctx context.Context, ci cid.Cid) (api.Pin, error) { + ctx, span := trace.StartSpan(ctx, "client/Allocation") + defer span.End() + var pin api.PinSerial - err := c.do("GET", fmt.Sprintf("/allocations/%s", ci.String()), nil, nil, &pin) + err := c.do(ctx, "GET", fmt.Sprintf("/allocations/%s", ci.String()), nil, nil, &pin) return pin.ToPin(), err } // Status returns the current ipfs state for a given Cid. If local is true, // the information affects only the current peer, otherwise the information // is fetched from all cluster peers. -func (c *defaultClient) Status(ci cid.Cid, local bool) (api.GlobalPinInfo, error) { +func (c *defaultClient) Status(ctx context.Context, ci cid.Cid, local bool) (api.GlobalPinInfo, error) { + ctx, span := trace.StartSpan(ctx, "client/Status") + defer span.End() + var gpi api.GlobalPinInfoSerial - err := c.do("GET", fmt.Sprintf("/pins/%s?local=%t", ci.String(), local), nil, nil, &gpi) + err := c.do(ctx, "GET", fmt.Sprintf("/pins/%s?local=%t", ci.String(), local), nil, nil, &gpi) return gpi.ToGlobalPinInfo(), err } @@ -140,7 +169,10 @@ func (c *defaultClient) Status(ci cid.Cid, local bool) (api.GlobalPinInfo, error // will be returned. A filter can be built by merging TrackerStatuses with // a bitwise OR operation (st1 | st2 | ...). A "0" filter value (or // api.TrackerStatusUndefined), means all. -func (c *defaultClient) StatusAll(filter api.TrackerStatus, local bool) ([]api.GlobalPinInfo, error) { +func (c *defaultClient) StatusAll(ctx context.Context, filter api.TrackerStatus, local bool) ([]api.GlobalPinInfo, error) { + ctx, span := trace.StartSpan(ctx, "client/StatusAll") + defer span.End() + var gpis []api.GlobalPinInfoSerial filterStr := "" @@ -151,7 +183,7 @@ func (c *defaultClient) StatusAll(filter api.TrackerStatus, local bool) ([]api.G } } - err := c.do("GET", fmt.Sprintf("/pins?local=%t&filter=%s", local, url.QueryEscape(filterStr)), nil, nil, &gpis) + err := c.do(ctx, "GET", fmt.Sprintf("/pins?local=%t&filter=%s", local, url.QueryEscape(filterStr)), nil, nil, &gpis) result := make([]api.GlobalPinInfo, len(gpis)) for i, p := range gpis { result[i] = p.ToGlobalPinInfo() @@ -162,9 +194,12 @@ func (c *defaultClient) StatusAll(filter api.TrackerStatus, local bool) ([]api.G // Sync makes sure the state of a Cid corresponds to the state reported by // the ipfs daemon, and returns it. If local is true, this operation only // happens on the current peer, otherwise it happens on every cluster peer. -func (c *defaultClient) Sync(ci cid.Cid, local bool) (api.GlobalPinInfo, error) { +func (c *defaultClient) Sync(ctx context.Context, ci cid.Cid, local bool) (api.GlobalPinInfo, error) { + ctx, span := trace.StartSpan(ctx, "client/Sync") + defer span.End() + var gpi api.GlobalPinInfoSerial - err := c.do("POST", fmt.Sprintf("/pins/%s/sync?local=%t", ci.String(), local), nil, nil, &gpi) + err := c.do(ctx, "POST", fmt.Sprintf("/pins/%s/sync?local=%t", ci.String(), local), nil, nil, &gpi) return gpi.ToGlobalPinInfo(), err } @@ -172,9 +207,12 @@ func (c *defaultClient) Sync(ci cid.Cid, local bool) (api.GlobalPinInfo, error) // informations for items that were de-synced or have an error state. If // local is true, the operation is limited to the current peer. Otherwise // it happens on every cluster peer. -func (c *defaultClient) SyncAll(local bool) ([]api.GlobalPinInfo, error) { +func (c *defaultClient) SyncAll(ctx context.Context, local bool) ([]api.GlobalPinInfo, error) { + ctx, span := trace.StartSpan(ctx, "client/SyncAll") + defer span.End() + var gpis []api.GlobalPinInfoSerial - err := c.do("POST", fmt.Sprintf("/pins/sync?local=%t", local), nil, nil, &gpis) + err := c.do(ctx, "POST", fmt.Sprintf("/pins/sync?local=%t", local), nil, nil, &gpis) result := make([]api.GlobalPinInfo, len(gpis)) for i, p := range gpis { result[i] = p.ToGlobalPinInfo() @@ -185,18 +223,24 @@ func (c *defaultClient) SyncAll(local bool) ([]api.GlobalPinInfo, error) { // Recover retriggers pin or unpin ipfs operations for a Cid in error state. // If local is true, the operation is limited to the current peer, otherwise // it happens on every cluster peer. -func (c *defaultClient) Recover(ci cid.Cid, local bool) (api.GlobalPinInfo, error) { +func (c *defaultClient) Recover(ctx context.Context, ci cid.Cid, local bool) (api.GlobalPinInfo, error) { + ctx, span := trace.StartSpan(ctx, "client/Recover") + defer span.End() + var gpi api.GlobalPinInfoSerial - err := c.do("POST", fmt.Sprintf("/pins/%s/recover?local=%t", ci.String(), local), nil, nil, &gpi) + err := c.do(ctx, "POST", fmt.Sprintf("/pins/%s/recover?local=%t", ci.String(), local), nil, nil, &gpi) return gpi.ToGlobalPinInfo(), err } // RecoverAll triggers Recover() operations on all tracked items. If local is // true, the operation is limited to the current peer. Otherwise, it happens // everywhere. -func (c *defaultClient) RecoverAll(local bool) ([]api.GlobalPinInfo, error) { +func (c *defaultClient) RecoverAll(ctx context.Context, local bool) ([]api.GlobalPinInfo, error) { + ctx, span := trace.StartSpan(ctx, "client/RecoverAll") + defer span.End() + var gpis []api.GlobalPinInfoSerial - err := c.do("POST", fmt.Sprintf("/pins/recover?local=%t", local), nil, nil, &gpis) + err := c.do(ctx, "POST", fmt.Sprintf("/pins/recover?local=%t", local), nil, nil, &gpis) result := make([]api.GlobalPinInfo, len(gpis)) for i, p := range gpis { result[i] = p.ToGlobalPinInfo() @@ -205,28 +249,37 @@ func (c *defaultClient) RecoverAll(local bool) ([]api.GlobalPinInfo, error) { } // Version returns the ipfs-cluster peer's version. -func (c *defaultClient) Version() (api.Version, error) { +func (c *defaultClient) Version(ctx context.Context) (api.Version, error) { + ctx, span := trace.StartSpan(ctx, "client/Version") + defer span.End() + var ver api.Version - err := c.do("GET", "/version", nil, nil, &ver) + err := c.do(ctx, "GET", "/version", nil, nil, &ver) return ver, err } // GetConnectGraph returns an ipfs-cluster connection graph. // The serialized version, strings instead of pids, is returned -func (c *defaultClient) GetConnectGraph() (api.ConnectGraphSerial, error) { +func (c *defaultClient) GetConnectGraph(ctx context.Context) (api.ConnectGraphSerial, error) { + ctx, span := trace.StartSpan(ctx, "client/GetConnectGraph") + defer span.End() + var graphS api.ConnectGraphSerial - err := c.do("GET", "/health/graph", nil, nil, &graphS) + err := c.do(ctx, "GET", "/health/graph", nil, nil, &graphS) return graphS, err } // Metrics returns a map with the latest valid metrics of the given name // for the current cluster peers. -func (c *defaultClient) Metrics(name string) ([]api.Metric, error) { +func (c *defaultClient) Metrics(ctx context.Context, name string) ([]api.Metric, error) { + ctx, span := trace.StartSpan(ctx, "client/Metrics") + defer span.End() + if name == "" { return nil, errors.New("bad metric name") } var metrics []api.Metric - err := c.do("GET", fmt.Sprintf("/monitor/metrics/%s", name), nil, nil, &metrics) + err := c.do(ctx, "GET", fmt.Sprintf("/monitor/metrics/%s", name), nil, nil, &metrics) return metrics, err } @@ -239,6 +292,9 @@ func (c *defaultClient) Metrics(name string) ([]api.Metric, error) { // If an error of some type happens, WaitFor returns immediately with an // empty GlobalPinInfo. func WaitFor(ctx context.Context, c Client, fp StatusFilterParams) (api.GlobalPinInfo, error) { + ctx, span := trace.StartSpan(ctx, "client/WaitFor") + defer span.End() + ctx, cancel := context.WithCancel(ctx) defer cancel() @@ -326,7 +382,7 @@ func (sf *statusFilter) pollStatus(ctx context.Context, c Client, fp StatusFilte sf.Err <- ctx.Err() return case <-ticker.C: - gblPinInfo, err := c.Status(fp.Cid, fp.Local) + gblPinInfo, err := c.Status(ctx, fp.Cid, fp.Local) if err != nil { sf.Err <- err return @@ -397,10 +453,13 @@ func makeSerialFile(fpath string, params *api.AddParams) (files.Node, error) { // peers. The output channel will receive regular updates as the adding // process progresses. func (c *defaultClient) Add( + ctx context.Context, paths []string, params *api.AddParams, out chan<- *api.AddedOutput, ) error { + ctx, span := trace.StartSpan(ctx, "client/Add") + defer span.End() addFiles := make([]files.DirEntry, len(paths), len(paths)) for i, p := range paths { @@ -428,15 +487,19 @@ func (c *defaultClient) Add( // If `form` is set to true, the multipart data will have // a Content-Type of 'multipart/form-data', if `form` is false, // the Content-Type will be 'multipart/mixed'. - return c.AddMultiFile(files.NewMultiFileReader(sliceFile, true), params, out) + return c.AddMultiFile(ctx, files.NewMultiFileReader(sliceFile, true), params, out) } // AddMultiFile imports new files from a MultiFileReader. See Add(). func (c *defaultClient) AddMultiFile( + ctx context.Context, multiFileR *files.MultiFileReader, params *api.AddParams, out chan<- *api.AddedOutput, ) error { + ctx, span := trace.StartSpan(ctx, "client/AddMultiFile") + defer span.End() + defer close(out) headers := make(map[string]string) @@ -461,7 +524,7 @@ func (c *defaultClient) AddMultiFile( return nil } - err := c.doStream( + err := c.doStream(ctx, "POST", "/add?"+queryStr, headers, diff --git a/api/rest/client/methods_test.go b/api/rest/client/methods_test.go index f1f49c04..cb3c6452 100644 --- a/api/rest/client/methods_test.go +++ b/api/rest/client/methods_test.go @@ -32,11 +32,12 @@ func testClients(t *testing.T, api *rest.API, f func(*testing.T, Client)) { } func TestVersion(t *testing.T) { + ctx := context.Background() api := testAPI(t) defer shutdown(api) testF := func(t *testing.T, c Client) { - v, err := c.Version() + v, err := c.Version(ctx) if err != nil || v.Version == "" { t.Logf("%+v", v) t.Log(err) @@ -48,11 +49,12 @@ func TestVersion(t *testing.T) { } func TestID(t *testing.T) { + ctx := context.Background() api := testAPI(t) defer shutdown(api) testF := func(t *testing.T, c Client) { - id, err := c.ID() + id, err := c.ID(ctx) if err != nil { t.Fatal(err) } @@ -65,11 +67,12 @@ func TestID(t *testing.T) { } func TestPeers(t *testing.T) { + ctx := context.Background() api := testAPI(t) defer shutdown(api) testF := func(t *testing.T, c Client) { - ids, err := c.Peers() + ids, err := c.Peers(ctx) if err != nil { t.Fatal(err) } @@ -82,13 +85,14 @@ func TestPeers(t *testing.T) { } func TestPeersWithError(t *testing.T) { + ctx := context.Background() api := testAPI(t) defer shutdown(api) testF := func(t *testing.T, c Client) { addr, _ := ma.NewMultiaddr("/ip4/127.0.0.1/tcp/44444") c, _ = NewDefaultClient(&Config{APIAddr: addr, DisableKeepAlives: true}) - ids, err := c.Peers() + ids, err := c.Peers(ctx) if err == nil { t.Fatal("expected error") } @@ -101,11 +105,12 @@ func TestPeersWithError(t *testing.T) { } func TestPeerAdd(t *testing.T) { + ctx := context.Background() api := testAPI(t) defer shutdown(api) testF := func(t *testing.T, c Client) { - id, err := c.PeerAdd(test.TestPeerID1) + id, err := c.PeerAdd(ctx, test.TestPeerID1) if err != nil { t.Fatal(err) } @@ -118,11 +123,12 @@ func TestPeerAdd(t *testing.T) { } func TestPeerRm(t *testing.T) { + ctx := context.Background() api := testAPI(t) defer shutdown(api) testF := func(t *testing.T, c Client) { - err := c.PeerRm(test.TestPeerID1) + err := c.PeerRm(ctx, test.TestPeerID1) if err != nil { t.Fatal(err) } @@ -132,12 +138,13 @@ func TestPeerRm(t *testing.T) { } func TestPin(t *testing.T) { + ctx := context.Background() api := testAPI(t) defer shutdown(api) testF := func(t *testing.T, c Client) { ci, _ := cid.Decode(test.TestCid1) - err := c.Pin(ci, 6, 7, "hello there") + err := c.Pin(ctx, ci, 6, 7, "hello there") if err != nil { t.Fatal(err) } @@ -147,12 +154,13 @@ func TestPin(t *testing.T) { } func TestUnpin(t *testing.T) { + ctx := context.Background() api := testAPI(t) defer shutdown(api) testF := func(t *testing.T, c Client) { ci, _ := cid.Decode(test.TestCid1) - err := c.Unpin(ci) + err := c.Unpin(ctx, ci) if err != nil { t.Fatal(err) } @@ -162,11 +170,12 @@ func TestUnpin(t *testing.T) { } func TestAllocations(t *testing.T) { + ctx := context.Background() api := testAPI(t) defer shutdown(api) testF := func(t *testing.T, c Client) { - pins, err := c.Allocations(types.DataType | types.MetaType) + pins, err := c.Allocations(ctx, types.DataType|types.MetaType) if err != nil { t.Fatal(err) } @@ -179,12 +188,13 @@ func TestAllocations(t *testing.T) { } func TestAllocation(t *testing.T) { + ctx := context.Background() api := testAPI(t) defer shutdown(api) testF := func(t *testing.T, c Client) { ci, _ := cid.Decode(test.TestCid1) - pin, err := c.Allocation(ci) + pin, err := c.Allocation(ctx, ci) if err != nil { t.Fatal(err) } @@ -197,12 +207,13 @@ func TestAllocation(t *testing.T) { } func TestStatus(t *testing.T) { + ctx := context.Background() api := testAPI(t) defer shutdown(api) testF := func(t *testing.T, c Client) { ci, _ := cid.Decode(test.TestCid1) - pin, err := c.Status(ci, false) + pin, err := c.Status(ctx, ci, false) if err != nil { t.Fatal(err) } @@ -215,11 +226,12 @@ func TestStatus(t *testing.T) { } func TestStatusAll(t *testing.T) { + ctx := context.Background() api := testAPI(t) defer shutdown(api) testF := func(t *testing.T, c Client) { - pins, err := c.StatusAll(0, false) + pins, err := c.StatusAll(ctx, 0, false) if err != nil { t.Fatal(err) } @@ -229,7 +241,7 @@ func TestStatusAll(t *testing.T) { } // With local true - pins, err = c.StatusAll(0, true) + pins, err = c.StatusAll(ctx, 0, true) if err != nil { t.Fatal(err) } @@ -238,7 +250,7 @@ func TestStatusAll(t *testing.T) { } // With filter option - pins, err = c.StatusAll(types.TrackerStatusPinning, false) + pins, err = c.StatusAll(ctx, types.TrackerStatusPinning, false) if err != nil { t.Fatal(err) } @@ -246,7 +258,7 @@ func TestStatusAll(t *testing.T) { t.Error("there should be one pin") } - pins, err = c.StatusAll(types.TrackerStatusPinned|types.TrackerStatusError, false) + pins, err = c.StatusAll(ctx, types.TrackerStatusPinned|types.TrackerStatusError, false) if err != nil { t.Fatal(err) } @@ -254,7 +266,7 @@ func TestStatusAll(t *testing.T) { t.Error("there should be two pins") } - pins, err = c.StatusAll(1<<25, false) + pins, err = c.StatusAll(ctx, 1<<25, false) if err == nil { t.Error("expected an error") } @@ -264,12 +276,13 @@ func TestStatusAll(t *testing.T) { } func TestSync(t *testing.T) { + ctx := context.Background() api := testAPI(t) defer shutdown(api) testF := func(t *testing.T, c Client) { ci, _ := cid.Decode(test.TestCid1) - pin, err := c.Sync(ci, false) + pin, err := c.Sync(ctx, ci, false) if err != nil { t.Fatal(err) } @@ -282,11 +295,12 @@ func TestSync(t *testing.T) { } func TestSyncAll(t *testing.T) { + ctx := context.Background() api := testAPI(t) defer shutdown(api) testF := func(t *testing.T, c Client) { - pins, err := c.SyncAll(false) + pins, err := c.SyncAll(ctx, false) if err != nil { t.Fatal(err) } @@ -300,12 +314,13 @@ func TestSyncAll(t *testing.T) { } func TestRecover(t *testing.T) { + ctx := context.Background() api := testAPI(t) defer shutdown(api) testF := func(t *testing.T, c Client) { ci, _ := cid.Decode(test.TestCid1) - pin, err := c.Recover(ci, false) + pin, err := c.Recover(ctx, ci, false) if err != nil { t.Fatal(err) } @@ -318,11 +333,12 @@ func TestRecover(t *testing.T) { } func TestRecoverAll(t *testing.T) { + ctx := context.Background() api := testAPI(t) defer shutdown(api) testF := func(t *testing.T, c Client) { - _, err := c.RecoverAll(true) + _, err := c.RecoverAll(ctx, true) if err != nil { t.Fatal(err) } @@ -332,11 +348,12 @@ func TestRecoverAll(t *testing.T) { } func TestGetConnectGraph(t *testing.T) { + ctx := context.Background() api := testAPI(t) defer shutdown(api) testF := func(t *testing.T, c Client) { - cg, err := c.GetConnectGraph() + cg, err := c.GetConnectGraph(ctx) if err != nil { t.Fatal(err) } @@ -350,11 +367,12 @@ func TestGetConnectGraph(t *testing.T) { } func TestMetrics(t *testing.T) { + ctx := context.Background() api := testAPI(t) defer shutdown(api) testF := func(t *testing.T, c Client) { - m, err := c.Metrics("somemetricstype") + m, err := c.Metrics(ctx, "somemetricstype") if err != nil { t.Fatal(err) } @@ -425,6 +443,7 @@ func (wait *waitService) Status(ctx context.Context, in api.PinSerial, out *api. } func TestWaitFor(t *testing.T) { + ctx := context.Background() tapi := testAPI(t) defer shutdown(tapi) @@ -469,7 +488,7 @@ func TestWaitFor(t *testing.T) { } } }() - err := c.Pin(ci, 0, 0, "test") + err := c.Pin(ctx, ci, 0, 0, "test") if err != nil { t.Fatal(err) } @@ -480,8 +499,9 @@ func TestWaitFor(t *testing.T) { } func TestAddMultiFile(t *testing.T) { + ctx := context.Background() api := testAPI(t) - defer api.Shutdown() + defer api.Shutdown(ctx) sth := test.NewShardingTestHelper() defer sth.Clean(t) @@ -515,7 +535,7 @@ func TestAddMultiFile(t *testing.T) { } }() - err := c.AddMultiFile(mfr, p, out) + err := c.AddMultiFile(ctx, mfr, p, out) if err != nil { t.Fatal(err) } diff --git a/api/rest/client/request.go b/api/rest/client/request.go index 13a978ca..4a8b7258 100644 --- a/api/rest/client/request.go +++ b/api/rest/client/request.go @@ -1,25 +1,29 @@ package client import ( + "context" "encoding/json" "io" "io/ioutil" "net/http" "strings" + "go.opencensus.io/trace" + "github.com/ipfs/ipfs-cluster/api" ) type responseDecoder func(d *json.Decoder) error func (c *defaultClient) do( + ctx context.Context, method, path string, headers map[string]string, body io.Reader, obj interface{}, ) error { - resp, err := c.doRequest(method, path, headers, body) + resp, err := c.doRequest(ctx, method, path, headers, body) if err != nil { return &api.Error{Code: 0, Message: err.Error()} } @@ -27,13 +31,14 @@ func (c *defaultClient) do( } func (c *defaultClient) doStream( + ctx context.Context, method, path string, headers map[string]string, body io.Reader, outHandler responseDecoder, ) error { - resp, err := c.doRequest(method, path, headers, body) + resp, err := c.doRequest(ctx, method, path, headers, body) if err != nil { return &api.Error{Code: 0, Message: err.Error()} } @@ -41,10 +46,17 @@ func (c *defaultClient) doStream( } func (c *defaultClient) doRequest( + ctx context.Context, method, path string, headers map[string]string, body io.Reader, ) (*http.Response, error) { + span := trace.FromContext(ctx) + span.AddAttributes( + trace.StringAttribute("method", method), + trace.StringAttribute("path", path), + ) + defer span.End() urlpath := c.net + "://" + c.hostname + "/" + strings.TrimPrefix(path, "/") logger.Debugf("%s: %s", method, urlpath) @@ -71,6 +83,9 @@ func (c *defaultClient) doRequest( r.ContentLength = -1 // this lets go use "chunked". } + ctx = trace.NewContext(ctx, span) + r = r.WithContext(ctx) + return c.client.Do(r) } func (c *defaultClient) handleResponse(resp *http.Response, obj interface{}) error { diff --git a/api/rest/config.go b/api/rest/config.go index 8a62c4c3..5069ee29 100644 --- a/api/rest/config.go +++ b/api/rest/config.go @@ -112,6 +112,9 @@ type Config struct { CORSExposedHeaders []string CORSAllowCredentials bool CORSMaxAge time.Duration + + // Tracing flag used to skip tracing specific paths when not enabled. + Tracing bool } type jsonConfig struct { diff --git a/api/rest/config_test.go b/api/rest/config_test.go index 2500beae..67a27488 100644 --- a/api/rest/config_test.go +++ b/api/rest/config_test.go @@ -1,6 +1,7 @@ package rest import ( + "context" "encoding/json" "os" "testing" @@ -139,6 +140,7 @@ func TestLoadJSONEnvConfig(t *testing.T) { } func TestLibp2pConfig(t *testing.T) { + ctx := context.Background() cfg := &Config{} err := cfg.Default() if err != nil { @@ -174,11 +176,11 @@ func TestLibp2pConfig(t *testing.T) { } // Test creating a new API with a libp2p config - rest, err := NewAPI(cfg) + rest, err := NewAPI(ctx, cfg) if err != nil { t.Fatal(err) } - defer rest.Shutdown() + defer rest.Shutdown(ctx) badPid, _ := peer.IDB58Decode("QmTQ6oKHDwFjzr4ihirVCLJe8CxanxD3ZjGRYzubFuNDjE") cfg.ID = badPid diff --git a/api/rest/restapi.go b/api/rest/restapi.go index 60693349..df45a26a 100644 --- a/api/rest/restapi.go +++ b/api/rest/restapi.go @@ -22,6 +22,9 @@ import ( "time" "github.com/rs/cors" + "go.opencensus.io/plugin/ochttp" + "go.opencensus.io/plugin/ochttp/propagation/tracecontext" + "go.opencensus.io/trace" "github.com/ipfs/ipfs-cluster/adder/adderutils" types "github.com/ipfs/ipfs-cluster/api" @@ -98,13 +101,13 @@ type peerAddBody struct { } // NewAPI creates a new REST API component with the given configuration. -func NewAPI(cfg *Config) (*API, error) { - return NewAPIWithHost(cfg, nil) +func NewAPI(ctx context.Context, cfg *Config) (*API, error) { + return NewAPIWithHost(ctx, cfg, nil) } // NewAPIWithHost creates a new REST API component and enables // the libp2p-http endpoint using the given Host, if not nil. -func NewAPIWithHost(cfg *Config, h host.Host) (*API, error) { +func NewAPIWithHost(ctx context.Context, cfg *Config, h host.Host) (*API, error) { err := cfg.Validate() if err != nil { return nil, err @@ -118,6 +121,15 @@ func NewAPIWithHost(cfg *Config, h host.Host) (*API, error) { cfg.BasicAuthCreds, cors.New(*cfg.corsOptions()).Handler(router), ) + if cfg.Tracing { + handler = &ochttp.Handler{ + IsPublicEndpoint: true, + Propagation: &tracecontext.HTTPFormat{}, + Handler: handler, + StartOptions: trace.StartOptions{SpanKind: trace.SpanKindServer}, + FormatSpanName: func(req *http.Request) string { return req.Host + ":" + req.URL.Path + ":" + req.Method }, + } + } s := &http.Server{ ReadTimeout: cfg.ReadTimeout, ReadHeaderTimeout: cfg.ReadHeaderTimeout, @@ -131,7 +143,7 @@ func NewAPIWithHost(cfg *Config, h host.Host) (*API, error) { // on why this is re-enabled. s.SetKeepAlivesEnabled(true) - ctx, cancel := context.WithCancel(context.Background()) + ctx, cancel := context.WithCancel(ctx) api := &API{ ctx: ctx, @@ -144,7 +156,7 @@ func NewAPIWithHost(cfg *Config, h host.Host) (*API, error) { api.addRoutes(router) // Set up api.httpListener if enabled - err = api.setupHTTP() + err = api.setupHTTP(ctx) if err != nil { return nil, err } @@ -159,11 +171,11 @@ func NewAPIWithHost(cfg *Config, h host.Host) (*API, error) { return nil, ErrNoEndpointsEnabled } - api.run() + api.run(ctx) return api, nil } -func (api *API) setupHTTP() error { +func (api *API) setupHTTP(ctx context.Context) error { if api.config.HTTPListenAddr == nil { return nil } @@ -238,7 +250,12 @@ func (api *API) addRoutes(router *mux.Router) { Methods(route.Method). Path(route.Pattern). Name(route.Name). - Handler(route.HandlerFunc) + Handler( + ochttp.WithRouteTag( + http.HandlerFunc(route.HandlerFunc), + "/"+route.Name, + ), + ) } api.router = router } @@ -406,20 +423,20 @@ func (api *API) routes() []route { } } -func (api *API) run() { +func (api *API) run(ctx context.Context) { if api.httpListener != nil { api.wg.Add(1) - go api.runHTTPServer() + go api.runHTTPServer(ctx) } if api.libp2pListener != nil { api.wg.Add(1) - go api.runLibp2pServer() + go api.runLibp2pServer(ctx) } } // runs in goroutine from run() -func (api *API) runHTTPServer() { +func (api *API) runHTTPServer(ctx context.Context) { defer api.wg.Done() <-api.rpcReady @@ -431,7 +448,7 @@ func (api *API) runHTTPServer() { } // runs in goroutine from run() -func (api *API) runLibp2pServer() { +func (api *API) runLibp2pServer(ctx context.Context) { defer api.wg.Done() <-api.rpcReady @@ -449,7 +466,10 @@ func (api *API) runLibp2pServer() { } // Shutdown stops any API listeners. -func (api *API) Shutdown() error { +func (api *API) Shutdown(ctx context.Context) error { + _, span := trace.StartSpan(ctx, "restapi/Shutdown") + defer span.End() + api.shutdownLock.Lock() defer api.shutdownLock.Unlock() @@ -566,7 +586,7 @@ func (api *API) addHandler(w http.ResponseWriter, r *http.Request) { // any errors sent as trailer adderutils.AddMultipartHTTPHandler( - api.ctx, + r.Context(), api.rpcClient, params, reader, @@ -587,6 +607,7 @@ func (api *API) peerListHandler(w http.ResponseWriter, r *http.Request) { struct{}{}, &peersSerial, ) + api.sendResponse(w, autoStatus, err, peersSerial) } @@ -636,7 +657,7 @@ func (api *API) peerRemoveHandler(w http.ResponseWriter, r *http.Request) { func (api *API) pinHandler(w http.ResponseWriter, r *http.Request) { if ps := api.parseCidOrError(w, r); ps.Cid != "" { logger.Debugf("rest api pinHandler: %s", ps.Cid) - + // span.AddAttributes(trace.StringAttribute("cid", ps.Cid)) err := api.rpcClient.CallContext( r.Context(), "", @@ -653,6 +674,7 @@ func (api *API) pinHandler(w http.ResponseWriter, r *http.Request) { func (api *API) unpinHandler(w http.ResponseWriter, r *http.Request) { if ps := api.parseCidOrError(w, r); ps.Cid != "" { logger.Debugf("rest api unpinHandler: %s", ps.Cid) + // span.AddAttributes(trace.StringAttribute("cid", ps.Cid)) err := api.rpcClient.CallContext( r.Context(), "", diff --git a/api/rest/restapi_test.go b/api/rest/restapi_test.go index d5afeed5..43f0f08f 100644 --- a/api/rest/restapi_test.go +++ b/api/rest/restapi_test.go @@ -32,8 +32,9 @@ const ( ) func testAPI(t *testing.T) *API { + ctx := context.Background() apiMAddr, _ := ma.NewMultiaddr("/ip4/127.0.0.1/tcp/0") - h, err := libp2p.New(context.Background(), libp2p.ListenAddrs(apiMAddr)) + h, err := libp2p.New(ctx, libp2p.ListenAddrs(apiMAddr)) if err != nil { t.Fatal(err) } @@ -46,7 +47,7 @@ func testAPI(t *testing.T) *API { //cfg.CORSAllowedHeaders = []string{"Content-Type"} cfg.CORSMaxAge = 10 * time.Minute - rest, err := NewAPIWithHost(cfg, h) + rest, err := NewAPIWithHost(ctx, cfg, h) if err != nil { t.Fatal("should be able to create a new API: ", err) } @@ -59,8 +60,9 @@ func testAPI(t *testing.T) *API { } func testHTTPSAPI(t *testing.T) *API { + ctx := context.Background() apiMAddr, _ := ma.NewMultiaddr("/ip4/127.0.0.1/tcp/0") - h, err := libp2p.New(context.Background(), libp2p.ListenAddrs(apiMAddr)) + h, err := libp2p.New(ctx, libp2p.ListenAddrs(apiMAddr)) if err != nil { t.Fatal(err) } @@ -75,7 +77,7 @@ func testHTTPSAPI(t *testing.T) *API { } cfg.HTTPListenAddr = apiMAddr - rest, err := NewAPIWithHost(cfg, h) + rest, err := NewAPIWithHost(ctx, cfg, h) if err != nil { t.Fatal("should be able to create a new https Api: ", err) } @@ -286,21 +288,23 @@ func testHTTPSEndPoint(t *testing.T, test testF) { } func TestAPIShutdown(t *testing.T) { + ctx := context.Background() rest := testAPI(t) - err := rest.Shutdown() + err := rest.Shutdown(ctx) if err != nil { t.Error("should shutdown cleanly: ", err) } // test shutting down twice - rest.Shutdown() + rest.Shutdown(ctx) } func TestRestAPIIDEndpoint(t *testing.T) { + ctx := context.Background() rest := testAPI(t) httpsrest := testHTTPSAPI(t) - defer rest.Shutdown() - defer httpsrest.Shutdown() + defer rest.Shutdown(ctx) + defer httpsrest.Shutdown(ctx) tf := func(t *testing.T, url urlF) { id := api.IDSerial{} @@ -323,8 +327,9 @@ func TestRestAPIIDEndpoint(t *testing.T) { } func TestAPIVersionEndpoint(t *testing.T) { + ctx := context.Background() rest := testAPI(t) - defer rest.Shutdown() + defer rest.Shutdown(ctx) tf := func(t *testing.T, url urlF) { ver := api.Version{} @@ -338,8 +343,9 @@ func TestAPIVersionEndpoint(t *testing.T) { } func TestAPIPeerstEndpoint(t *testing.T) { + ctx := context.Background() rest := testAPI(t) - defer rest.Shutdown() + defer rest.Shutdown(ctx) tf := func(t *testing.T, url urlF) { var list []api.IDSerial @@ -356,8 +362,9 @@ func TestAPIPeerstEndpoint(t *testing.T) { } func TestAPIPeerAddEndpoint(t *testing.T) { + ctx := context.Background() rest := testAPI(t) - defer rest.Shutdown() + defer rest.Shutdown(ctx) tf := func(t *testing.T, url urlF) { id := api.IDSerial{} @@ -390,8 +397,9 @@ func TestAPIPeerAddEndpoint(t *testing.T) { } func TestAPIAddFileEndpointBadContentType(t *testing.T) { + ctx := context.Background() rest := testAPI(t) - defer rest.Shutdown() + defer rest.Shutdown(ctx) tf := func(t *testing.T, url urlF) { fmtStr1 := "/add?shard=true&repl_min=-1&repl_max=-1" @@ -409,8 +417,9 @@ func TestAPIAddFileEndpointBadContentType(t *testing.T) { } func TestAPIAddFileEndpointLocal(t *testing.T) { + ctx := context.Background() rest := testAPI(t) - defer rest.Shutdown() + defer rest.Shutdown(ctx) sth := test.NewShardingTestHelper() defer sth.Clean(t) @@ -441,8 +450,9 @@ func TestAPIAddFileEndpointLocal(t *testing.T) { } func TestAPIAddFileEndpointShard(t *testing.T) { + ctx := context.Background() rest := testAPI(t) - defer rest.Shutdown() + defer rest.Shutdown(ctx) sth := test.NewShardingTestHelper() defer sth.Clean(t) @@ -468,8 +478,9 @@ func TestAPIAddFileEndpointShard(t *testing.T) { } func TestAPIAddFileEndpoint_StreamChannelsFalse(t *testing.T) { + ctx := context.Background() rest := testAPI(t) - defer rest.Shutdown() + defer rest.Shutdown(ctx) sth := test.NewShardingTestHelper() defer sth.Clean(t) @@ -504,8 +515,9 @@ func TestAPIAddFileEndpoint_StreamChannelsFalse(t *testing.T) { } func TestAPIPeerRemoveEndpoint(t *testing.T) { + ctx := context.Background() rest := testAPI(t) - defer rest.Shutdown() + defer rest.Shutdown(ctx) tf := func(t *testing.T, url urlF) { makeDelete(t, rest, url(rest)+"/peers/"+test.TestPeerID1.Pretty(), &struct{}{}) @@ -515,8 +527,9 @@ func TestAPIPeerRemoveEndpoint(t *testing.T) { } func TestConnectGraphEndpoint(t *testing.T) { + ctx := context.Background() rest := testAPI(t) - defer rest.Shutdown() + defer rest.Shutdown(ctx) tf := func(t *testing.T, url urlF) { var cg api.ConnectGraphSerial @@ -548,8 +561,9 @@ func TestConnectGraphEndpoint(t *testing.T) { } func TestAPIPinEndpoint(t *testing.T) { + ctx := context.Background() rest := testAPI(t) - defer rest.Shutdown() + defer rest.Shutdown(ctx) tf := func(t *testing.T, url urlF) { // test regular post @@ -571,8 +585,9 @@ func TestAPIPinEndpoint(t *testing.T) { } func TestAPIUnpinEndpoint(t *testing.T) { + ctx := context.Background() rest := testAPI(t) - defer rest.Shutdown() + defer rest.Shutdown(ctx) tf := func(t *testing.T, url urlF) { // test regular delete @@ -594,8 +609,9 @@ func TestAPIUnpinEndpoint(t *testing.T) { } func TestAPIAllocationsEndpoint(t *testing.T) { + ctx := context.Background() rest := testAPI(t) - defer rest.Shutdown() + defer rest.Shutdown(ctx) tf := func(t *testing.T, url urlF) { var resp []api.PinSerial @@ -611,8 +627,9 @@ func TestAPIAllocationsEndpoint(t *testing.T) { } func TestAPIAllocationEndpoint(t *testing.T) { + ctx := context.Background() rest := testAPI(t) - defer rest.Shutdown() + defer rest.Shutdown(ctx) tf := func(t *testing.T, url urlF) { var resp api.PinSerial @@ -632,8 +649,9 @@ func TestAPIAllocationEndpoint(t *testing.T) { } func TestAPIMetricsEndpoint(t *testing.T) { + ctx := context.Background() rest := testAPI(t) - defer rest.Shutdown() + defer rest.Shutdown(ctx) tf := func(t *testing.T, url urlF) { var resp []api.MetricSerial @@ -655,8 +673,9 @@ func TestAPIMetricsEndpoint(t *testing.T) { } func TestAPIStatusAllEndpoint(t *testing.T) { + ctx := context.Background() rest := testAPI(t) - defer rest.Shutdown() + defer rest.Shutdown(ctx) tf := func(t *testing.T, url urlF) { var resp []api.GlobalPinInfoSerial @@ -710,8 +729,9 @@ func TestAPIStatusAllEndpoint(t *testing.T) { } func TestAPIStatusEndpoint(t *testing.T) { + ctx := context.Background() rest := testAPI(t) - defer rest.Shutdown() + defer rest.Shutdown(ctx) tf := func(t *testing.T, url urlF) { var resp api.GlobalPinInfoSerial @@ -748,8 +768,9 @@ func TestAPIStatusEndpoint(t *testing.T) { } func TestAPISyncAllEndpoint(t *testing.T) { + ctx := context.Background() rest := testAPI(t) - defer rest.Shutdown() + defer rest.Shutdown(ctx) tf := func(t *testing.T, url urlF) { var resp []api.GlobalPinInfoSerial @@ -774,8 +795,9 @@ func TestAPISyncAllEndpoint(t *testing.T) { } func TestAPISyncEndpoint(t *testing.T) { + ctx := context.Background() rest := testAPI(t) - defer rest.Shutdown() + defer rest.Shutdown(ctx) tf := func(t *testing.T, url urlF) { var resp api.GlobalPinInfoSerial @@ -812,8 +834,9 @@ func TestAPISyncEndpoint(t *testing.T) { } func TestAPIRecoverEndpoint(t *testing.T) { + ctx := context.Background() rest := testAPI(t) - defer rest.Shutdown() + defer rest.Shutdown(ctx) tf := func(t *testing.T, url urlF) { var resp api.GlobalPinInfoSerial @@ -835,8 +858,9 @@ func TestAPIRecoverEndpoint(t *testing.T) { } func TestAPIRecoverAllEndpoint(t *testing.T) { + ctx := context.Background() rest := testAPI(t) - defer rest.Shutdown() + defer rest.Shutdown(ctx) tf := func(t *testing.T, url urlF) { var resp []api.GlobalPinInfoSerial @@ -857,8 +881,9 @@ func TestAPIRecoverAllEndpoint(t *testing.T) { } func TestCORS(t *testing.T) { + ctx := context.Background() rest := testAPI(t) - defer rest.Shutdown() + defer rest.Shutdown(ctx) type testcase struct { method string diff --git a/cluster.go b/cluster.go index a75696d3..b2321546 100644 --- a/cluster.go +++ b/cluster.go @@ -17,6 +17,10 @@ import ( "github.com/ipfs/ipfs-cluster/state" "github.com/ipfs/ipfs-cluster/version" + ocgorpc "github.com/lanzafame/go-libp2p-ocgorpc" + + "go.opencensus.io/trace" + cid "github.com/ipfs/go-cid" rpc "github.com/libp2p/go-libp2p-gorpc" host "github.com/libp2p/go-libp2p-host" @@ -56,6 +60,7 @@ type Cluster struct { monitor PeerMonitor allocator PinAllocator informer Informer + tracer Tracer doneCh chan struct{} readyCh chan struct{} @@ -88,6 +93,7 @@ func NewCluster( monitor PeerMonitor, allocator PinAllocator, informer Informer, + tracer Tracer, ) (*Cluster, error) { err := cfg.Validate() if err != nil { @@ -141,6 +147,7 @@ func NewCluster( monitor: monitor, allocator: allocator, informer: informer, + tracer: tracer, peerManager: peerManager, shutdownB: false, removed: false, @@ -151,7 +158,7 @@ func NewCluster( err = c.setupRPC() if err != nil { - c.Shutdown() + c.Shutdown(ctx) return nil, err } c.setupRPCClients() @@ -159,17 +166,31 @@ func NewCluster( c.ready(ReadyTimeout) c.run() }() + return c, nil } func (c *Cluster) setupRPC() error { - rpcServer := rpc.NewServer(c.host, version.RPCProtocol) + var rpcServer *rpc.Server + if c.config.Tracing { + sh := &ocgorpc.ServerHandler{} + rpcServer = rpc.NewServer(c.host, version.RPCProtocol, rpc.WithServerStatsHandler(sh)) + } else { + rpcServer = rpc.NewServer(c.host, version.RPCProtocol) + } err := rpcServer.RegisterName("Cluster", &RPCAPI{c}) if err != nil { return err } c.rpcServer = rpcServer - rpcClient := rpc.NewClientWithServer(c.host, version.RPCProtocol, rpcServer) + + var rpcClient *rpc.Client + if c.config.Tracing { + csh := &ocgorpc.ClientHandler{} + rpcClient = rpc.NewClientWithServer(c.host, version.RPCProtocol, rpcServer, rpc.WithClientStatsHandler(csh)) + } else { + rpcClient = rpc.NewClientWithServer(c.host, version.RPCProtocol, rpcServer) + } c.rpcClient = rpcClient return nil } @@ -188,6 +209,9 @@ func (c *Cluster) setupRPCClients() { // syncWatcher loops and triggers StateSync and SyncAllLocal from time to time func (c *Cluster) syncWatcher() { + ctx, span := trace.StartSpan(c.ctx, "cluster/syncWatcher") + defer span.End() + stateSyncTicker := time.NewTicker(c.config.StateSyncInterval) syncTicker := time.NewTicker(c.config.IPFSSyncInterval) @@ -195,10 +219,10 @@ func (c *Cluster) syncWatcher() { select { case <-stateSyncTicker.C: logger.Debug("auto-triggering StateSync()") - c.StateSync() + c.StateSync(ctx) case <-syncTicker.C: logger.Debug("auto-triggering SyncAllLocal()") - c.SyncAllLocal() + c.SyncAllLocal(ctx) case <-c.ctx.Done(): stateSyncTicker.Stop() return @@ -206,16 +230,22 @@ func (c *Cluster) syncWatcher() { } } -func (c *Cluster) sendInformerMetric() (api.Metric, error) { - metric := c.informer.GetMetric() +func (c *Cluster) sendInformerMetric(ctx context.Context) (api.Metric, error) { + ctx, span := trace.StartSpan(ctx, "cluster/sendInformerMetric") + defer span.End() + + metric := c.informer.GetMetric(ctx) metric.Peer = c.id - return metric, c.monitor.PublishMetric(metric) + return metric, c.monitor.PublishMetric(ctx, metric) } // pushInformerMetrics loops and publishes informers metrics using the // cluster monitor. Metrics are pushed normally at a TTL/2 rate. If an error // occurs, they are pushed at a TTL/4 rate. -func (c *Cluster) pushInformerMetrics() { +func (c *Cluster) pushInformerMetrics(ctx context.Context) { + ctx, span := trace.StartSpan(ctx, "cluster/pushInformerMetrics") + defer span.End() + timer := time.NewTimer(0) // fire immediately first // retries counts how many retries we have made @@ -228,13 +258,13 @@ func (c *Cluster) pushInformerMetrics() { for { select { - case <-c.ctx.Done(): + case <-ctx.Done(): return case <-timer.C: // wait } - metric, err := c.sendInformerMetric() + metric, err := c.sendInformerMetric(ctx) if err != nil { if (retries % retryWarnMod) == 0 { @@ -252,7 +282,10 @@ func (c *Cluster) pushInformerMetrics() { } } -func (c *Cluster) pushPingMetrics() { +func (c *Cluster) pushPingMetrics(ctx context.Context) { + ctx, span := trace.StartSpan(ctx, "cluster/pushPingMetrics") + defer span.End() + ticker := time.NewTicker(c.config.MonitorPingInterval) for { metric := api.Metric{ @@ -261,10 +294,10 @@ func (c *Cluster) pushPingMetrics() { Valid: true, } metric.SetTTL(c.config.MonitorPingInterval * 2) - c.monitor.PublishMetric(metric) + c.monitor.PublishMetric(ctx, metric) select { - case <-c.ctx.Done(): + case <-ctx.Done(): return case <-ticker.C: } @@ -279,7 +312,7 @@ func (c *Cluster) alertsHandler() { return case alrt := <-c.monitor.Alerts(): // only the leader handles alerts - leader, err := c.consensus.Leader() + leader, err := c.consensus.Leader(c.ctx) if err == nil && leader == c.id { logger.Warningf( "Peer %s received alert for %s in %s", @@ -287,7 +320,7 @@ func (c *Cluster) alertsHandler() { ) switch alrt.MetricName { case pingMetricName: - c.repinFromPeer(alrt.Peer) + c.repinFromPeer(c.ctx, alrt.Peer) } } } @@ -306,7 +339,7 @@ func (c *Cluster) watchPeers() { case <-ticker.C: logger.Debugf("%s watching peers", c.id) hasMe := false - peers, err := c.consensus.Peers() + peers, err := c.consensus.Peers(c.ctx) if err != nil { logger.Error(err) continue @@ -323,7 +356,7 @@ func (c *Cluster) watchPeers() { defer c.shutdownLock.Unlock() logger.Infof("%s: removed from raft. Initiating shutdown", c.id.Pretty()) c.removed = true - go c.Shutdown() + go c.Shutdown(c.ctx) return } } @@ -331,21 +364,24 @@ func (c *Cluster) watchPeers() { } // find all Cids pinned to a given peer and triggers re-pins on them. -func (c *Cluster) repinFromPeer(p peer.ID) { +func (c *Cluster) repinFromPeer(ctx context.Context, p peer.ID) { + ctx, span := trace.StartSpan(ctx, "cluster/repinFromPeer") + defer span.End() + if c.config.DisableRepinning { logger.Warningf("repinning is disabled. Will not re-allocate cids from %s", p.Pretty()) return } - cState, err := c.consensus.State() + cState, err := c.consensus.State(ctx) if err != nil { logger.Warning(err) return } - list := cState.List() + list := cState.List(ctx) for _, pin := range list { if containsPeer(pin.Allocations, p) { - ok, err := c.pin(pin, []peer.ID{p}, []peer.ID{}) // pin blacklisting this peer + ok, err := c.pin(ctx, pin, []peer.ID{p}, []peer.ID{}) // pin blacklisting this peer if ok && err == nil { logger.Infof("repinned %s out of %s", pin.Cid, p.Pretty()) } @@ -356,13 +392,16 @@ func (c *Cluster) repinFromPeer(p peer.ID) { // run launches some go-routines which live throughout the cluster's life func (c *Cluster) run() { go c.syncWatcher() - go c.pushPingMetrics() - go c.pushInformerMetrics() + go c.pushPingMetrics(c.ctx) + go c.pushInformerMetrics(c.ctx) go c.watchPeers() go c.alertsHandler() } func (c *Cluster) ready(timeout time.Duration) { + ctx, span := trace.StartSpan(c.ctx, "cluster/ready") + defer span.End() + // We bootstrapped first because with dirty state consensus // may have a peerset and not find a leader so we cannot wait // for it. @@ -384,22 +423,22 @@ This might be due to one or several causes: same version of IPFS-cluster. ************************************************** `) - c.Shutdown() + c.Shutdown(ctx) return - case <-c.consensus.Ready(): + case <-c.consensus.Ready(ctx): // Consensus ready means the state is up to date so we can sync // it to the tracker. We ignore errors (normal when state // doesn't exist in new peers). - c.StateSync() + c.StateSync(ctx) case <-c.ctx.Done(): return } // Cluster is ready. - peers, err := c.consensus.Peers() + peers, err := c.consensus.Peers(ctx) if err != nil { logger.Error(err) - c.Shutdown() + c.Shutdown(ctx) return } @@ -428,7 +467,11 @@ func (c *Cluster) Ready() <-chan struct{} { } // Shutdown stops the IPFS cluster components -func (c *Cluster) Shutdown() error { +func (c *Cluster) Shutdown(ctx context.Context) error { + _, span := trace.StartSpan(ctx, "cluster/Shutdown") + defer span.End() + ctx = trace.NewContext(c.ctx, span) + c.shutdownLock.Lock() defer c.shutdownLock.Unlock() @@ -451,11 +494,11 @@ func (c *Cluster) Shutdown() error { // - We are not removed already (means watchPeers() called us) if c.consensus != nil && c.config.LeaveOnShutdown && c.readyB && !c.removed { c.removed = true - _, err := c.consensus.Peers() + _, err := c.consensus.Peers(ctx) if err == nil { // best effort logger.Warning("attempting to leave the cluster. This may take some seconds") - err := c.consensus.RmPeer(c.id) + err := c.consensus.RmPeer(ctx, c.id) if err != nil { logger.Error("leaving cluster: " + err.Error()) } @@ -463,7 +506,7 @@ func (c *Cluster) Shutdown() error { } if con := c.consensus; con != nil { - if err := con.Shutdown(); err != nil { + if err := con.Shutdown(ctx); err != nil { logger.Errorf("error stopping consensus: %s", err) return err } @@ -471,34 +514,39 @@ func (c *Cluster) Shutdown() error { // We left the cluster or were removed. Destroy the Raft state. if c.removed && c.readyB { - err := c.consensus.Clean() + err := c.consensus.Clean(ctx) if err != nil { logger.Error("cleaning consensus: ", err) } } - if err := c.monitor.Shutdown(); err != nil { + if err := c.monitor.Shutdown(ctx); err != nil { logger.Errorf("error stopping monitor: %s", err) return err } for _, api := range c.apis { - if err := api.Shutdown(); err != nil { + if err := api.Shutdown(ctx); err != nil { logger.Errorf("error stopping API: %s", err) return err } } - if err := c.ipfs.Shutdown(); err != nil { + if err := c.ipfs.Shutdown(ctx); err != nil { logger.Errorf("error stopping IPFS Connector: %s", err) return err } - if err := c.tracker.Shutdown(); err != nil { + if err := c.tracker.Shutdown(ctx); err != nil { logger.Errorf("error stopping PinTracker: %s", err) return err } + if err := c.tracer.Shutdown(ctx); err != nil { + logger.Errorf("error stopping Tracer: %s", err) + return err + } + c.cancel() c.host.Close() // Shutdown all network services c.wg.Wait() @@ -514,9 +562,13 @@ func (c *Cluster) Done() <-chan struct{} { } // ID returns information about the Cluster peer -func (c *Cluster) ID() api.ID { +func (c *Cluster) ID(ctx context.Context) api.ID { + _, span := trace.StartSpan(ctx, "cluster/ID") + defer span.End() + ctx = trace.NewContext(c.ctx, span) + // ignore error since it is included in response object - ipfsID, _ := c.ipfs.ID() + ipfsID, _ := c.ipfs.ID(ctx) var addrs []ma.Multiaddr addrsSet := make(map[string]struct{}) // to filter dups @@ -532,7 +584,7 @@ func (c *Cluster) ID() api.ID { // This method might get called very early by a remote peer // and might catch us when consensus is not set if c.consensus != nil { - peers, _ = c.consensus.Peers() + peers, _ = c.consensus.Peers(ctx) } return api.ID{ @@ -558,7 +610,11 @@ func (c *Cluster) ID() api.ID { // // The new peer ID will be passed to the consensus // component to be added to the peerset. -func (c *Cluster) PeerAdd(pid peer.ID) (api.ID, error) { +func (c *Cluster) PeerAdd(ctx context.Context, pid peer.ID) (api.ID, error) { + _, span := trace.StartSpan(ctx, "cluster/PeerAdd") + defer span.End() + ctx = trace.NewContext(c.ctx, span) + // starting 10 nodes on the same box for testing // causes deadlock and a global lock here // seems to help. @@ -567,7 +623,7 @@ func (c *Cluster) PeerAdd(pid peer.ID) (api.ID, error) { logger.Debugf("peerAdd called with %s", pid.Pretty()) // Log the new peer in the log so everyone gets it. - err := c.consensus.AddPeer(pid) + err := c.consensus.AddPeer(ctx, pid) if err != nil { logger.Error(err) id := api.ID{ID: pid, Error: err.Error()} @@ -575,11 +631,14 @@ func (c *Cluster) PeerAdd(pid peer.ID) (api.ID, error) { } // Ask the new peer to connect its IPFS daemon to the rest - err = c.rpcClient.Call(pid, + err = c.rpcClient.CallContext( + ctx, + pid, "Cluster", "IPFSConnectSwarms", struct{}{}, - &struct{}{}) + &struct{}{}, + ) if err != nil { logger.Error(err) } @@ -591,8 +650,8 @@ func (c *Cluster) PeerAdd(pid peer.ID) (api.ID, error) { // otherwise it might not contain the current cluster peers // as it should. for i := 0; i < 20; i++ { - id, _ = c.getIDForPeer(pid) - ownPeers, err := c.consensus.Peers() + id, _ = c.getIDForPeer(ctx, pid) + ownPeers, err := c.consensus.Peers(ctx) if err != nil { break } @@ -613,13 +672,17 @@ func (c *Cluster) PeerAdd(pid peer.ID) (api.ID, error) { // // The peer will be removed from the consensus peerset. // This may first trigger repinnings for all content if not disabled. -func (c *Cluster) PeerRemove(pid peer.ID) error { +func (c *Cluster) PeerRemove(ctx context.Context, pid peer.ID) error { + _, span := trace.StartSpan(ctx, "cluster/PeerRemove") + defer span.End() + ctx = trace.NewContext(c.ctx, span) + // We need to repin before removing the peer, otherwise, it won't // be able to submit the pins. logger.Infof("re-allocating all CIDs directly associated to %s", pid) - c.repinFromPeer(pid) + c.repinFromPeer(ctx, pid) - err := c.consensus.RmPeer(pid) + err := c.consensus.RmPeer(ctx, pid) if err != nil { logger.Error(err) return err @@ -632,7 +695,11 @@ func (c *Cluster) PeerRemove(pid peer.ID) error { // given multiaddress. It works by calling PeerAdd on the destination // cluster and making sure that the new peer is ready to discover and contact // the rest. -func (c *Cluster) Join(addr ma.Multiaddr) error { +func (c *Cluster) Join(ctx context.Context, addr ma.Multiaddr) error { + _, span := trace.StartSpan(ctx, "cluster/Join") + defer span.End() + ctx = trace.NewContext(c.ctx, span) + logger.Debugf("Join(%s)", addr) pid, _, err := api.Libp2pMultiaddrSplit(addr) @@ -653,11 +720,14 @@ func (c *Cluster) Join(addr ma.Multiaddr) error { // figure out what our real address is (obviously not // ListenAddr). var myID api.IDSerial - err = c.rpcClient.Call(pid, + err = c.rpcClient.CallContext( + ctx, + pid, "Cluster", "PeerAdd", peer.IDB58Encode(c.id), - &myID) + &myID, + ) if err != nil { logger.Error(err) return err @@ -671,7 +741,7 @@ func (c *Cluster) Join(addr ma.Multiaddr) error { go func() { ch := make(chan time.Time) bstCfg := dht.DefaultBootstrapConfig - dhtBstCtx, cancel := context.WithTimeout(c.ctx, bstCfg.Timeout*2) + dhtBstCtx, cancel := context.WithTimeout(ctx, bstCfg.Timeout*2) defer cancel() proc, err := c.dht.BootstrapOnSignal(bstCfg, ch) if err != nil { @@ -687,13 +757,13 @@ func (c *Cluster) Join(addr ma.Multiaddr) error { // wait for leader and for state to catch up // then sync - err = c.consensus.WaitForSync() + err = c.consensus.WaitForSync(ctx) if err != nil { logger.Error(err) return err } - c.StateSync() + c.StateSync(ctx) logger.Infof("%s: joined %s's cluster", c.id.Pretty(), pid.Pretty()) return nil @@ -702,16 +772,20 @@ func (c *Cluster) Join(addr ma.Multiaddr) error { // StateSync syncs the consensus state to the Pin Tracker, ensuring // that every Cid in the shared state is tracked and that the Pin Tracker // is not tracking more Cids than it should. -func (c *Cluster) StateSync() error { - cState, err := c.consensus.State() +func (c *Cluster) StateSync(ctx context.Context) error { + _, span := trace.StartSpan(ctx, "cluster/StateSync") + defer span.End() + ctx = trace.NewContext(c.ctx, span) + + cState, err := c.consensus.State(ctx) if err != nil { return err } logger.Debug("syncing state to tracker") - clusterPins := cState.List() + clusterPins := cState.List(ctx) - trackedPins := c.tracker.StatusAll() + trackedPins := c.tracker.StatusAll(ctx) trackedPinsMap := make(map[string]int) for i, tpin := range trackedPins { trackedPinsMap[tpin.Cid.String()] = i @@ -722,7 +796,7 @@ func (c *Cluster) StateSync() error { _, tracked := trackedPinsMap[pin.Cid.String()] if !tracked { logger.Debugf("StateSync: tracking %s, part of the shared state", pin.Cid) - c.tracker.Track(pin) + c.tracker.Track(ctx, pin) } } @@ -731,19 +805,19 @@ func (c *Cluster) StateSync() error { // c. Track items which should not be local as remote for _, p := range trackedPins { pCid := p.Cid - currentPin, has := cState.Get(pCid) + currentPin, has := cState.Get(ctx, pCid) allocatedHere := containsPeer(currentPin.Allocations, c.id) || currentPin.ReplicationFactorMin == -1 switch { case !has: logger.Debugf("StateSync: Untracking %s, is not part of shared state", pCid) - c.tracker.Untrack(pCid) + c.tracker.Untrack(ctx, pCid) case p.Status == api.TrackerStatusRemote && allocatedHere: logger.Debugf("StateSync: Tracking %s locally (currently remote)", pCid) - c.tracker.Track(currentPin) + c.tracker.Track(ctx, currentPin) case p.Status == api.TrackerStatusPinned && !allocatedHere: logger.Debugf("StateSync: Tracking %s as remote (currently local)", pCid) - c.tracker.Track(currentPin) + c.tracker.Track(ctx, currentPin) } } @@ -753,33 +827,53 @@ func (c *Cluster) StateSync() error { // StatusAll returns the GlobalPinInfo for all tracked Cids in all peers. // If an error happens, the slice will contain as much information as // could be fetched from other peers. -func (c *Cluster) StatusAll() ([]api.GlobalPinInfo, error) { - return c.globalPinInfoSlice("TrackerStatusAll") +func (c *Cluster) StatusAll(ctx context.Context) ([]api.GlobalPinInfo, error) { + _, span := trace.StartSpan(ctx, "cluster/StatusAll") + defer span.End() + ctx = trace.NewContext(c.ctx, span) + + return c.globalPinInfoSlice(ctx, "TrackerStatusAll") } // StatusAllLocal returns the PinInfo for all the tracked Cids in this peer. -func (c *Cluster) StatusAllLocal() []api.PinInfo { - return c.tracker.StatusAll() +func (c *Cluster) StatusAllLocal(ctx context.Context) []api.PinInfo { + _, span := trace.StartSpan(ctx, "cluster/StatusAllLocal") + defer span.End() + ctx = trace.NewContext(c.ctx, span) + + return c.tracker.StatusAll(ctx) } // Status returns the GlobalPinInfo for a given Cid as fetched from all // current peers. If an error happens, the GlobalPinInfo should contain // as much information as could be fetched from the other peers. -func (c *Cluster) Status(h cid.Cid) (api.GlobalPinInfo, error) { - return c.globalPinInfoCid("TrackerStatus", h) +func (c *Cluster) Status(ctx context.Context, h cid.Cid) (api.GlobalPinInfo, error) { + _, span := trace.StartSpan(ctx, "cluster/Status") + defer span.End() + ctx = trace.NewContext(c.ctx, span) + + return c.globalPinInfoCid(ctx, "TrackerStatus", h) } // StatusLocal returns this peer's PinInfo for a given Cid. -func (c *Cluster) StatusLocal(h cid.Cid) api.PinInfo { - return c.tracker.Status(h) +func (c *Cluster) StatusLocal(ctx context.Context, h cid.Cid) api.PinInfo { + _, span := trace.StartSpan(ctx, "cluster/StatusLocal") + defer span.End() + ctx = trace.NewContext(c.ctx, span) + + return c.tracker.Status(ctx, h) } // SyncAll triggers SyncAllLocal() operations in all cluster peers, making sure // that the state of tracked items matches the state reported by the IPFS daemon // and returning the results as GlobalPinInfo. If an error happens, the slice // will contain as much information as could be fetched from the peers. -func (c *Cluster) SyncAll() ([]api.GlobalPinInfo, error) { - return c.globalPinInfoSlice("SyncAllLocal") +func (c *Cluster) SyncAll(ctx context.Context) ([]api.GlobalPinInfo, error) { + _, span := trace.StartSpan(ctx, "cluster/SyncAll") + defer span.End() + ctx = trace.NewContext(c.ctx, span) + + return c.globalPinInfoSlice(ctx, "SyncAllLocal") } // SyncAllLocal makes sure that the current state for all tracked items @@ -787,8 +881,12 @@ func (c *Cluster) SyncAll() ([]api.GlobalPinInfo, error) { // // SyncAllLocal returns the list of PinInfo that where updated because of // the operation, along with those in error states. -func (c *Cluster) SyncAllLocal() ([]api.PinInfo, error) { - syncedItems, err := c.tracker.SyncAll() +func (c *Cluster) SyncAllLocal(ctx context.Context) ([]api.PinInfo, error) { + _, span := trace.StartSpan(ctx, "cluster/SyncAllLocal") + defer span.End() + ctx = trace.NewContext(c.ctx, span) + + syncedItems, err := c.tracker.SyncAll(ctx) // Despite errors, tracker provides synced items that we can provide. // They encapsulate the error. if err != nil { @@ -800,22 +898,30 @@ func (c *Cluster) SyncAllLocal() ([]api.PinInfo, error) { // Sync triggers a SyncLocal() operation for a given Cid. // in all cluster peers. -func (c *Cluster) Sync(h cid.Cid) (api.GlobalPinInfo, error) { - return c.globalPinInfoCid("SyncLocal", h) +func (c *Cluster) Sync(ctx context.Context, h cid.Cid) (api.GlobalPinInfo, error) { + _, span := trace.StartSpan(ctx, "cluster/Sync") + defer span.End() + ctx = trace.NewContext(c.ctx, span) + + return c.globalPinInfoCid(ctx, "SyncLocal", h) } // used for RecoverLocal and SyncLocal. func (c *Cluster) localPinInfoOp( + ctx context.Context, h cid.Cid, - f func(cid.Cid) (api.PinInfo, error), + f func(context.Context, cid.Cid) (api.PinInfo, error), ) (pInfo api.PinInfo, err error) { - cids, err := c.cidsFromMetaPin(h) + ctx, span := trace.StartSpan(ctx, "cluster/localPinInfoOp") + defer span.End() + + cids, err := c.cidsFromMetaPin(ctx, h) if err != nil { return api.PinInfo{}, err } for _, ci := range cids { - pInfo, err = f(ci) + pInfo, err = f(ctx, ci) if err != nil { logger.Error("tracker.SyncCid() returned with error: ", err) logger.Error("Is the ipfs daemon running?") @@ -830,40 +936,59 @@ func (c *Cluster) localPinInfoOp( // SyncLocal performs a local sync operation for the given Cid. This will // tell the tracker to verify the status of the Cid against the IPFS daemon. // It returns the updated PinInfo for the Cid. -func (c *Cluster) SyncLocal(h cid.Cid) (pInfo api.PinInfo, err error) { - return c.localPinInfoOp(h, c.tracker.Sync) +func (c *Cluster) SyncLocal(ctx context.Context, h cid.Cid) (pInfo api.PinInfo, err error) { + _, span := trace.StartSpan(ctx, "cluster/SyncLocal") + defer span.End() + ctx = trace.NewContext(c.ctx, span) + + return c.localPinInfoOp(ctx, h, c.tracker.Sync) } // RecoverAllLocal triggers a RecoverLocal operation for all Cids tracked // by this peer. -func (c *Cluster) RecoverAllLocal() ([]api.PinInfo, error) { - return c.tracker.RecoverAll() +func (c *Cluster) RecoverAllLocal(ctx context.Context) ([]api.PinInfo, error) { + _, span := trace.StartSpan(ctx, "cluster/RecoverAllLocal") + defer span.End() + ctx = trace.NewContext(c.ctx, span) + + return c.tracker.RecoverAll(ctx) } // Recover triggers a recover operation for a given Cid in all // cluster peers. -func (c *Cluster) Recover(h cid.Cid) (api.GlobalPinInfo, error) { - return c.globalPinInfoCid("TrackerRecover", h) +func (c *Cluster) Recover(ctx context.Context, h cid.Cid) (api.GlobalPinInfo, error) { + _, span := trace.StartSpan(ctx, "cluster/Recover") + defer span.End() + ctx = trace.NewContext(c.ctx, span) + + return c.globalPinInfoCid(ctx, "TrackerRecover", h) } // RecoverLocal triggers a recover operation for a given Cid in this peer only. // It returns the updated PinInfo, after recovery. -func (c *Cluster) RecoverLocal(h cid.Cid) (pInfo api.PinInfo, err error) { - return c.localPinInfoOp(h, c.tracker.Recover) +func (c *Cluster) RecoverLocal(ctx context.Context, h cid.Cid) (pInfo api.PinInfo, err error) { + _, span := trace.StartSpan(ctx, "cluster/RecoverLocal") + defer span.End() + ctx = trace.NewContext(c.ctx, span) + + return c.localPinInfoOp(ctx, h, c.tracker.Recover) } // Pins returns the list of Cids managed by Cluster and which are part // of the current global state. This is the source of truth as to which // pins are managed and their allocation, but does not indicate if // the item is successfully pinned. For that, use StatusAll(). -func (c *Cluster) Pins() []api.Pin { - cState, err := c.consensus.State() +func (c *Cluster) Pins(ctx context.Context) []api.Pin { + _, span := trace.StartSpan(ctx, "cluster/Pins") + defer span.End() + ctx = trace.NewContext(c.ctx, span) + + cState, err := c.consensus.State(ctx) if err != nil { logger.Error(err) return []api.Pin{} } - return cState.List() - + return cState.List(ctx) } // PinGet returns information for a single Cid managed by Cluster. @@ -872,12 +997,16 @@ func (c *Cluster) Pins() []api.Pin { // assigned for the requested Cid, but does not indicate if // the item is successfully pinned. For that, use Status(). PinGet // returns an error if the given Cid is not part of the global state. -func (c *Cluster) PinGet(h cid.Cid) (api.Pin, error) { - st, err := c.consensus.State() +func (c *Cluster) PinGet(ctx context.Context, h cid.Cid) (api.Pin, error) { + _, span := trace.StartSpan(ctx, "cluster/PinGet") + defer span.End() + ctx = trace.NewContext(c.ctx, span) + + st, err := c.consensus.State(ctx) if err != nil { return api.PinCid(h), err } - pin, ok := st.Get(h) + pin, ok := st.Get(ctx, h) if !ok { return pin, errors.New("cid is not part of the global state") } @@ -900,8 +1029,11 @@ func (c *Cluster) PinGet(h cid.Cid) (api.Pin, error) { // this set then the remaining peers are allocated in order from the rest of // the cluster. Priority allocations are best effort. If any priority peers // are unavailable then Pin will simply allocate from the rest of the cluster. -func (c *Cluster) Pin(pin api.Pin) error { - _, err := c.pin(pin, []peer.ID{}, pin.Allocations) +func (c *Cluster) Pin(ctx context.Context, pin api.Pin) error { + _, span := trace.StartSpan(ctx, "cluster/Pin") + defer span.End() + ctx = trace.NewContext(c.ctx, span) + _, err := c.pin(ctx, pin, []peer.ID{}, pin.Allocations) return err } @@ -963,13 +1095,16 @@ func checkPinType(pin *api.Pin) error { // setupPin ensures that the Pin object is fit for pinning. We check // and set the replication factors and ensure that the pinType matches the // metadata consistently. -func (c *Cluster) setupPin(pin *api.Pin) error { +func (c *Cluster) setupPin(ctx context.Context, pin *api.Pin) error { + ctx, span := trace.StartSpan(ctx, "cluster/setupPin") + defer span.End() + err := c.setupReplicationFactor(pin) if err != nil { return err } - existing, err := c.PinGet(pin.Cid) + existing, err := c.PinGet(ctx, pin.Cid) if err == nil && existing.Type != pin.Type { // it exists return errors.New("cannot repin CID with different tracking method, clear state with pin rm to proceed") } @@ -980,21 +1115,25 @@ func (c *Cluster) setupPin(pin *api.Pin) error { // able to evacuate a node and returns whether the pin was submitted // to the consensus layer or skipped (due to error or to the fact // that it was already valid). -func (c *Cluster) pin(pin api.Pin, blacklist []peer.ID, prioritylist []peer.ID) (bool, error) { +func (c *Cluster) pin(ctx context.Context, pin api.Pin, blacklist []peer.ID, prioritylist []peer.ID) (bool, error) { + ctx, span := trace.StartSpan(ctx, "cluster/pin") + defer span.End() + if pin.Cid == cid.Undef { return false, errors.New("bad pin object") } // setup pin might produce some side-effects to our pin - err := c.setupPin(&pin) + err := c.setupPin(ctx, &pin) if err != nil { return false, err } if pin.Type == api.MetaType { - return true, c.consensus.LogPin(pin) + return true, c.consensus.LogPin(ctx, pin) } allocs, err := c.allocate( + ctx, pin.Cid, pin.ReplicationFactorMin, pin.ReplicationFactorMax, @@ -1006,7 +1145,7 @@ func (c *Cluster) pin(pin api.Pin, blacklist []peer.ID, prioritylist []peer.ID) } pin.Allocations = allocs - if curr, _ := c.PinGet(pin.Cid); curr.Equals(pin) { + if curr, _ := c.PinGet(ctx, pin.Cid); curr.Equals(pin) { // skip pinning logger.Debugf("pinning %s skipped: already correctly allocated", pin.Cid) return false, nil @@ -1018,7 +1157,7 @@ func (c *Cluster) pin(pin api.Pin, blacklist []peer.ID, prioritylist []peer.ID) logger.Infof("IPFS cluster pinning %s on %s:", pin.Cid, pin.Allocations) } - return true, c.consensus.LogPin(pin) + return true, c.consensus.LogPin(ctx, pin) } // Unpin makes the cluster Unpin a Cid. This implies adding the Cid @@ -1027,16 +1166,20 @@ func (c *Cluster) pin(pin api.Pin, blacklist []peer.ID, prioritylist []peer.ID) // Unpin returns an error if the operation could not be persisted // to the global state. Unpin does not reflect the success or failure // of underlying IPFS daemon unpinning operations. -func (c *Cluster) Unpin(h cid.Cid) error { +func (c *Cluster) Unpin(ctx context.Context, h cid.Cid) error { + _, span := trace.StartSpan(ctx, "cluster/Unpin") + defer span.End() + ctx = trace.NewContext(c.ctx, span) + logger.Info("IPFS cluster unpinning:", h) - pin, err := c.PinGet(h) + pin, err := c.PinGet(ctx, h) if err != nil { return fmt.Errorf("cannot unpin pin uncommitted to state: %s", err) } switch pin.Type { case api.DataType: - return c.consensus.LogUnpin(pin) + return c.consensus.LogUnpin(ctx, pin) case api.ShardType: err := "cannot unpin a shard direclty. Unpin content root CID instead." return errors.New(err) @@ -1046,7 +1189,7 @@ func (c *Cluster) Unpin(h cid.Cid) error { if err != nil { return err } - return c.consensus.LogUnpin(pin) + return c.consensus.LogUnpin(ctx, pin) case api.ClusterDAGType: err := "cannot unpin a Cluster DAG directly. Unpin content root CID instead." return errors.New(err) @@ -1060,7 +1203,10 @@ func (c *Cluster) Unpin(h cid.Cid) error { // reference the same metadata node, only unpinning those nodes without // existing references func (c *Cluster) unpinClusterDag(metaPin api.Pin) error { - cids, err := c.cidsFromMetaPin(metaPin.Cid) + ctx, span := trace.StartSpan(c.ctx, "cluster/unpinClusterDag") + defer span.End() + + cids, err := c.cidsFromMetaPin(ctx, metaPin.Cid) if err != nil { return err } @@ -1068,7 +1214,7 @@ func (c *Cluster) unpinClusterDag(metaPin api.Pin) error { // TODO: FIXME: potentially unpinning shards which are referenced // by other clusterDAGs. for _, ci := range cids { - err = c.consensus.LogUnpin(api.PinCid(ci)) + err = c.consensus.LogUnpin(ctx, api.PinCid(ci)) if err != nil { return err } @@ -1081,6 +1227,7 @@ func (c *Cluster) unpinClusterDag(metaPin api.Pin) error { // DAG can be added locally to the calling cluster peer's ipfs repo, or // sharded across the entire cluster. func (c *Cluster) AddFile(reader *multipart.Reader, params *api.AddParams) (cid.Cid, error) { + // TODO: add context param and tracing var dags adder.ClusterDAGService if params.Shard { dags = sharding.New(c.rpcClient, params.PinOptions, nil) @@ -1097,8 +1244,12 @@ func (c *Cluster) Version() string { } // Peers returns the IDs of the members of this Cluster. -func (c *Cluster) Peers() []api.ID { - members, err := c.consensus.Peers() +func (c *Cluster) Peers(ctx context.Context) []api.ID { + _, span := trace.StartSpan(ctx, "cluster/Peers") + defer span.End() + ctx = trace.NewContext(c.ctx, span) + + members, err := c.consensus.Peers(ctx) if err != nil { logger.Error(err) logger.Error("an empty list of peers will be returned") @@ -1109,7 +1260,7 @@ func (c *Cluster) Peers() []api.ID { peersSerial := make([]api.IDSerial, lenMembers, lenMembers) peers := make([]api.ID, lenMembers, lenMembers) - ctxs, cancels := rpcutil.CtxsWithCancel(c.ctx, lenMembers) + ctxs, cancels := rpcutil.CtxsWithCancel(ctx, lenMembers) defer rpcutil.MultiCancel(cancels) errs := c.rpcClient.MultiCall( @@ -1134,13 +1285,16 @@ func (c *Cluster) Peers() []api.ID { return peers } -func (c *Cluster) globalPinInfoCid(method string, h cid.Cid) (api.GlobalPinInfo, error) { +func (c *Cluster) globalPinInfoCid(ctx context.Context, method string, h cid.Cid) (api.GlobalPinInfo, error) { + ctx, span := trace.StartSpan(ctx, "cluster/globalPinInfoCid") + defer span.End() + pin := api.GlobalPinInfo{ Cid: h, PeerMap: make(map[peer.ID]api.PinInfo), } - members, err := c.consensus.Peers() + members, err := c.consensus.Peers(ctx) if err != nil { logger.Error(err) return api.GlobalPinInfo{}, err @@ -1152,7 +1306,7 @@ func (c *Cluster) globalPinInfoCid(method string, h cid.Cid) (api.GlobalPinInfo, Cid: h, } - ctxs, cancels := rpcutil.CtxsWithCancel(c.ctx, lenMembers) + ctxs, cancels := rpcutil.CtxsWithCancel(ctx, lenMembers) defer rpcutil.MultiCancel(cancels) errs := c.rpcClient.MultiCall( @@ -1202,11 +1356,14 @@ func (c *Cluster) globalPinInfoCid(method string, h cid.Cid) (api.GlobalPinInfo, return pin, nil } -func (c *Cluster) globalPinInfoSlice(method string) ([]api.GlobalPinInfo, error) { +func (c *Cluster) globalPinInfoSlice(ctx context.Context, method string) ([]api.GlobalPinInfo, error) { + ctx, span := trace.StartSpan(ctx, "cluster/globalPinInfoSlice") + defer span.End() + infos := make([]api.GlobalPinInfo, 0) fullMap := make(map[string]api.GlobalPinInfo) - members, err := c.consensus.Peers() + members, err := c.consensus.Peers(ctx) if err != nil { logger.Error(err) return []api.GlobalPinInfo{}, err @@ -1215,7 +1372,7 @@ func (c *Cluster) globalPinInfoSlice(method string) ([]api.GlobalPinInfo, error) replies := make([][]api.PinInfoSerial, lenMembers, lenMembers) - ctxs, cancels := rpcutil.CtxsWithCancel(c.ctx, lenMembers) + ctxs, cancels := rpcutil.CtxsWithCancel(ctx, lenMembers) defer rpcutil.MultiCancel(cancels) errs := c.rpcClient.MultiCall( @@ -1275,10 +1432,19 @@ func (c *Cluster) globalPinInfoSlice(method string) ([]api.GlobalPinInfo, error) return infos, nil } -func (c *Cluster) getIDForPeer(pid peer.ID) (api.ID, error) { +func (c *Cluster) getIDForPeer(ctx context.Context, pid peer.ID) (api.ID, error) { + ctx, span := trace.StartSpan(ctx, "cluster/getIDForPeer") + defer span.End() + idSerial := api.ID{ID: pid}.ToSerial() - err := c.rpcClient.Call( - pid, "Cluster", "ID", struct{}{}, &idSerial) + err := c.rpcClient.CallContext( + ctx, + pid, + "Cluster", + "ID", + struct{}{}, + &idSerial, + ) id := idSerial.ToID() if err != nil { logger.Error(err) @@ -1292,15 +1458,18 @@ func (c *Cluster) getIDForPeer(pid peer.ID) (api.ID, error) { // that order (the MetaPin is the last element). // It returns a slice with only the given Cid if it's not a known Cid or not a // MetaPin. -func (c *Cluster) cidsFromMetaPin(h cid.Cid) ([]cid.Cid, error) { - cState, err := c.consensus.State() +func (c *Cluster) cidsFromMetaPin(ctx context.Context, h cid.Cid) ([]cid.Cid, error) { + ctx, span := trace.StartSpan(ctx, "cluster/cidsFromMetaPin") + defer span.End() + + cState, err := c.consensus.State(ctx) if err != nil { return nil, err } list := []cid.Cid{h} - pin, ok := cState.Get(h) + pin, ok := cState.Get(ctx, h) if !ok { return list, nil } @@ -1310,12 +1479,12 @@ func (c *Cluster) cidsFromMetaPin(h cid.Cid) ([]cid.Cid, error) { } list = append([]cid.Cid{pin.Reference}, list...) - clusterDagPin, err := c.PinGet(pin.Reference) + clusterDagPin, err := c.PinGet(ctx, pin.Reference) if err != nil { return list, fmt.Errorf("could not get clusterDAG pin from state. Malformed pin?: %s", err) } - clusterDagBlock, err := c.ipfs.BlockGet(clusterDagPin.Cid) + clusterDagBlock, err := c.ipfs.BlockGet(ctx, clusterDagPin.Cid) if err != nil { return list, fmt.Errorf("error reading clusterDAG block from ipfs: %s", err) } diff --git a/cluster_config.go b/cluster_config.go index 6d81c6aa..49eeb339 100644 --- a/cluster_config.go +++ b/cluster_config.go @@ -120,6 +120,9 @@ type Config struct { // Peerstore file specifies the file on which we persist the // libp2p host peerstore addresses. This file is regularly saved. PeerstoreFile string + + // Tracing flag used to skip tracing specific paths when not enabled. + Tracing bool } // configJSON represents a Cluster configuration as it will look when it is diff --git a/cluster_test.go b/cluster_test.go index daa4e3ef..63964ed5 100644 --- a/cluster_test.go +++ b/cluster_test.go @@ -29,7 +29,7 @@ type mockComponent struct { rpcClient *rpc.Client } -func (c *mockComponent) Shutdown() error { +func (c *mockComponent) Shutdown(ctx context.Context) error { return nil } @@ -53,7 +53,7 @@ type mockConnector struct { blocks sync.Map } -func (ipfs *mockConnector) ID() (api.IPFSID, error) { +func (ipfs *mockConnector) ID(ctx context.Context) (api.IPFSID, error) { return api.IPFSID{ ID: test.TestPeerID1, }, nil @@ -99,23 +99,23 @@ func (ipfs *mockConnector) PinLs(ctx context.Context, filter string) (map[string return m, nil } -func (ipfs *mockConnector) SwarmPeers() (api.SwarmPeers, error) { +func (ipfs *mockConnector) SwarmPeers(ctx context.Context) (api.SwarmPeers, error) { return []peer.ID{test.TestPeerID4, test.TestPeerID5}, nil } -func (ipfs *mockConnector) RepoStat() (api.IPFSRepoStat, error) { +func (ipfs *mockConnector) RepoStat(ctx context.Context) (api.IPFSRepoStat, error) { return api.IPFSRepoStat{RepoSize: 100, StorageMax: 1000}, nil } -func (ipfs *mockConnector) ConnectSwarms() error { return nil } +func (ipfs *mockConnector) ConnectSwarms(ctx context.Context) error { return nil } func (ipfs *mockConnector) ConfigKey(keypath string) (interface{}, error) { return nil, nil } -func (ipfs *mockConnector) BlockPut(nwm api.NodeWithMeta) error { +func (ipfs *mockConnector) BlockPut(ctx context.Context, nwm api.NodeWithMeta) error { ipfs.blocks.Store(nwm.Cid, nwm.Data) return nil } -func (ipfs *mockConnector) BlockGet(c cid.Cid) ([]byte, error) { +func (ipfs *mockConnector) BlockGet(ctx context.Context, c cid.Cid) ([]byte, error) { d, ok := ipfs.blocks.Load(c.String()) if !ok { return nil, errors.New("block not found") @@ -123,8 +123,12 @@ func (ipfs *mockConnector) BlockGet(c cid.Cid) ([]byte, error) { return d.([]byte), nil } +type mockTracer struct { + mockComponent +} + func testingCluster(t *testing.T) (*Cluster, *mockAPI, *mockConnector, state.State, PinTracker) { - clusterCfg, _, _, _, consensusCfg, maptrackerCfg, statelesstrackerCfg, bmonCfg, psmonCfg, _ := testingConfigs() + clusterCfg, _, _, _, consensusCfg, maptrackerCfg, statelesstrackerCfg, bmonCfg, psmonCfg, _, _ := testingConfigs() host, err := NewClusterHost(context.Background(), clusterCfg) if err != nil { @@ -136,6 +140,7 @@ func testingCluster(t *testing.T) (*Cluster, *mockAPI, *mockConnector, state.Sta ipfs := &mockConnector{} st := mapstate.NewMapState() tracker := makePinTracker(t, clusterCfg.ID, maptrackerCfg, statelesstrackerCfg, clusterCfg.Peername) + tracer := &mockTracer{} raftcon, _ := raft.NewConsensus(host, consensusCfg, st, false) @@ -161,6 +166,7 @@ func testingCluster(t *testing.T) (*Cluster, *mockAPI, *mockConnector, state.Sta mon, alloc, inf, + tracer, ) if err != nil { t.Fatal("cannot create cluster:", err) @@ -177,54 +183,57 @@ func cleanRaft() { } func testClusterShutdown(t *testing.T) { + ctx := context.Background() cl, _, _, _, _ := testingCluster(t) - err := cl.Shutdown() + err := cl.Shutdown(ctx) if err != nil { t.Error("cluster shutdown failed:", err) } - cl.Shutdown() + cl.Shutdown(ctx) cl, _, _, _, _ = testingCluster(t) - err = cl.Shutdown() + err = cl.Shutdown(ctx) if err != nil { t.Error("cluster shutdown failed:", err) } } func TestClusterStateSync(t *testing.T) { + ctx := context.Background() cleanRaft() cl, _, _, st, _ := testingCluster(t) defer cleanRaft() - defer cl.Shutdown() - err := cl.StateSync() + defer cl.Shutdown(ctx) + err := cl.StateSync(ctx) if err == nil { t.Fatal("expected an error as there is no state to sync") } c, _ := cid.Decode(test.TestCid1) - err = cl.Pin(api.PinCid(c)) + err = cl.Pin(ctx, api.PinCid(c)) if err != nil { t.Fatal("pin should have worked:", err) } - err = cl.StateSync() + err = cl.StateSync(ctx) if err != nil { t.Fatal("sync after pinning should have worked:", err) } // Modify state on the side so the sync does not // happen on an empty slide - st.Rm(c) - err = cl.StateSync() + st.Rm(ctx, c) + err = cl.StateSync(ctx) if err != nil { t.Fatal("sync with recover should have worked:", err) } } func TestClusterID(t *testing.T) { + ctx := context.Background() cl, _, _, _, _ := testingCluster(t) defer cleanRaft() - defer cl.Shutdown() - id := cl.ID() + defer cl.Shutdown(ctx) + id := cl.ID(ctx) if len(id.Addresses) == 0 { t.Error("expected more addresses") } @@ -240,31 +249,33 @@ func TestClusterID(t *testing.T) { } func TestClusterPin(t *testing.T) { + ctx := context.Background() cl, _, _, _, _ := testingCluster(t) defer cleanRaft() - defer cl.Shutdown() + defer cl.Shutdown(ctx) c, _ := cid.Decode(test.TestCid1) - err := cl.Pin(api.PinCid(c)) + err := cl.Pin(ctx, api.PinCid(c)) if err != nil { t.Fatal("pin should have worked:", err) } // test an error case - cl.consensus.Shutdown() + cl.consensus.Shutdown(ctx) pin := api.PinCid(c) pin.ReplicationFactorMax = 1 pin.ReplicationFactorMin = 1 - err = cl.Pin(pin) + err = cl.Pin(ctx, pin) if err == nil { t.Error("expected an error but things worked") } } func TestAddFile(t *testing.T) { + ctx := context.Background() cl, _, _, _, _ := testingCluster(t) defer cleanRaft() - defer cl.Shutdown() + defer cl.Shutdown(ctx) sth := test.NewShardingTestHelper() defer sth.Clean(t) @@ -285,7 +296,7 @@ func TestAddFile(t *testing.T) { pinDelay() - pin := cl.StatusLocal(c) + pin := cl.StatusLocal(ctx, c) if pin.Error != "" { t.Fatal(pin.Error) } @@ -293,7 +304,7 @@ func TestAddFile(t *testing.T) { t.Error("cid should be pinned") } - cl.Unpin(c) // unpin so we can pin the shard in next test + cl.Unpin(ctx, c) // unpin so we can pin the shard in next test pinDelay() }) @@ -321,9 +332,10 @@ func TestAddFile(t *testing.T) { } func TestUnpinShard(t *testing.T) { + ctx := context.Background() cl, _, _, _, _ := testingCluster(t) defer cleanRaft() - defer cl.Shutdown() + defer cl.Shutdown(ctx) sth := test.NewShardingTestHelper() defer sth.Clean(t) @@ -346,17 +358,17 @@ func TestUnpinShard(t *testing.T) { // skipping errors, VerifyShards has checked pinnedCids := []cid.Cid{} pinnedCids = append(pinnedCids, root) - metaPin, _ := cl.PinGet(root) - cDag, _ := cl.PinGet(metaPin.Reference) + metaPin, _ := cl.PinGet(ctx, root) + cDag, _ := cl.PinGet(ctx, metaPin.Reference) pinnedCids = append(pinnedCids, cDag.Cid) - cDagBlock, _ := cl.ipfs.BlockGet(cDag.Cid) + cDagBlock, _ := cl.ipfs.BlockGet(ctx, cDag.Cid) cDagNode, _ := sharding.CborDataToNode(cDagBlock, "cbor") for _, l := range cDagNode.Links() { pinnedCids = append(pinnedCids, l.Cid) } t.Run("unpin clusterdag should fail", func(t *testing.T) { - err := cl.Unpin(cDag.Cid) + err := cl.Unpin(ctx, cDag.Cid) if err == nil { t.Fatal("should not allow unpinning the cluster DAG directly") } @@ -364,7 +376,7 @@ func TestUnpinShard(t *testing.T) { }) t.Run("unpin shard should fail", func(t *testing.T) { - err := cl.Unpin(cDagNode.Links()[0].Cid) + err := cl.Unpin(ctx, cDagNode.Links()[0].Cid) if err == nil { t.Fatal("should not allow unpinning shards directly") } @@ -372,7 +384,7 @@ func TestUnpinShard(t *testing.T) { }) t.Run("normal unpin", func(t *testing.T) { - err := cl.Unpin(root) + err := cl.Unpin(ctx, root) if err != nil { t.Fatal(err) } @@ -380,7 +392,7 @@ func TestUnpinShard(t *testing.T) { pinDelay() for _, c := range pinnedCids { - st := cl.StatusLocal(c) + st := cl.StatusLocal(ctx, c) if st.Status != api.TrackerStatusUnpinned { t.Errorf("%s should have been unpinned but is %s", c, st.Status) } @@ -682,17 +694,18 @@ func TestUnpinShard(t *testing.T) { // } func TestClusterPins(t *testing.T) { + ctx := context.Background() cl, _, _, _, _ := testingCluster(t) defer cleanRaft() - defer cl.Shutdown() + defer cl.Shutdown(ctx) c, _ := cid.Decode(test.TestCid1) - err := cl.Pin(api.PinCid(c)) + err := cl.Pin(ctx, api.PinCid(c)) if err != nil { t.Fatal("pin should have worked:", err) } - pins := cl.Pins() + pins := cl.Pins(ctx) if len(pins) != 1 { t.Fatal("pin should be part of the state") } @@ -702,17 +715,18 @@ func TestClusterPins(t *testing.T) { } func TestClusterPinGet(t *testing.T) { + ctx := context.Background() cl, _, _, _, _ := testingCluster(t) defer cleanRaft() - defer cl.Shutdown() + defer cl.Shutdown(ctx) c, _ := cid.Decode(test.TestCid1) - err := cl.Pin(api.PinCid(c)) + err := cl.Pin(ctx, api.PinCid(c)) if err != nil { t.Fatal("pin should have worked:", err) } - pin, err := cl.PinGet(c) + pin, err := cl.PinGet(ctx, c) if err != nil { t.Fatal(err) } @@ -721,47 +735,49 @@ func TestClusterPinGet(t *testing.T) { } c2, _ := cid.Decode(test.TestCid2) - _, err = cl.PinGet(c2) + _, err = cl.PinGet(ctx, c2) if err == nil { t.Fatal("expected an error") } } func TestClusterUnpin(t *testing.T) { + ctx := context.Background() cl, _, _, _, _ := testingCluster(t) defer cleanRaft() - defer cl.Shutdown() + defer cl.Shutdown(ctx) c, _ := cid.Decode(test.TestCid1) // Unpin should error without pin being committed to state - err := cl.Unpin(c) + err := cl.Unpin(ctx, c) if err == nil { t.Error("unpin should have failed") } // Unpin after pin should succeed - err = cl.Pin(api.PinCid(c)) + err = cl.Pin(ctx, api.PinCid(c)) if err != nil { t.Fatal("pin should have worked:", err) } - err = cl.Unpin(c) + err = cl.Unpin(ctx, c) if err != nil { t.Error("unpin should have worked:", err) } // test another error case - cl.consensus.Shutdown() - err = cl.Unpin(c) + cl.consensus.Shutdown(ctx) + err = cl.Unpin(ctx, c) if err == nil { t.Error("expected an error but things worked") } } func TestClusterPeers(t *testing.T) { + ctx := context.Background() cl, _, _, _, _ := testingCluster(t) defer cleanRaft() - defer cl.Shutdown() - peers := cl.Peers() + defer cl.Shutdown(ctx) + peers := cl.Peers(ctx) if len(peers) != 1 { t.Fatal("expected 1 peer") } @@ -774,28 +790,30 @@ func TestClusterPeers(t *testing.T) { } func TestVersion(t *testing.T) { + ctx := context.Background() cl, _, _, _, _ := testingCluster(t) defer cleanRaft() - defer cl.Shutdown() + defer cl.Shutdown(ctx) if cl.Version() != version.Version.String() { t.Error("bad Version()") } } func TestClusterRecoverAllLocal(t *testing.T) { + ctx := context.Background() cl, _, _, _, _ := testingCluster(t) defer cleanRaft() - defer cl.Shutdown() + defer cl.Shutdown(ctx) c, _ := cid.Decode(test.TestCid1) - err := cl.Pin(api.PinCid(c)) + err := cl.Pin(ctx, api.PinCid(c)) if err != nil { t.Fatal("pin should have worked:", err) } pinDelay() - recov, err := cl.RecoverAllLocal() + recov, err := cl.RecoverAllLocal(ctx) if err != nil { t.Error("did not expect an error") } diff --git a/cmd/ipfs-cluster-ctl/main.go b/cmd/ipfs-cluster-ctl/main.go index 99cf900d..f449d1a6 100644 --- a/cmd/ipfs-cluster-ctl/main.go +++ b/cmd/ipfs-cluster-ctl/main.go @@ -12,6 +12,8 @@ import ( "sync" "time" + "go.opencensus.io/exporter/jaeger" + "github.com/ipfs/ipfs-cluster/api" "github.com/ipfs/ipfs-cluster/api/rest/client" uuid "github.com/satori/go.uuid" @@ -40,6 +42,8 @@ var ( var logger = logging.Logger("cluster-ctl") +var tracer *jaeger.Exporter + var globalClient client.Client // Description provides a short summary of the functionality of this tool @@ -82,6 +86,8 @@ func checkErr(doing string, err error) { } func main() { + ctx := context.Background() + app := cli.NewApp() app.Name = programName app.Usage = "CLI for IPFS Cluster" @@ -174,6 +180,21 @@ requires authorization. implies --https, which you can disable with --force-http globalClient, err = client.NewDefaultClient(cfg) checkErr("creating API client", err) + + // TODO: need to figure out best way to configure tracing for ctl + // leaving the following as it is still useful for local debugging. + // tracingCfg := &observations.Config{} + // tracingCfg.Default() + // tracingCfg.EnableTracing = true + // tracingCfg.TracingServiceName = "cluster-ctl" + // tracingCfg.TracingSamplingProb = 1 + // tracer = observations.SetupTracing(tracingCfg) + return nil + } + app.After = func(c *cli.Context) error { + // TODO: need to figure out best way to configure tracing for ctl + // leaving the following as it is still useful for local debugging. + // tracer.Flush() return nil } @@ -187,7 +208,7 @@ This command displays information about the peer that the tool is contacting `, Flags: []cli.Flag{}, Action: func(c *cli.Context) error { - resp, cerr := globalClient.ID() + resp, cerr := globalClient.ID(ctx) formatResponse(c, resp, cerr) return nil }, @@ -206,7 +227,7 @@ This command provides a list of the ID information of all the peers in the Clust Flags: []cli.Flag{}, ArgsUsage: " ", Action: func(c *cli.Context) error { - resp, cerr := globalClient.Peers() + resp, cerr := globalClient.Peers(ctx) formatResponse(c, resp, cerr) return nil }, @@ -226,7 +247,7 @@ cluster peers. pid := c.Args().First() p, err := peer.IDB58Decode(pid) checkErr("parsing peer ID", err) - cerr := globalClient.PeerRm(p) + cerr := globalClient.PeerRm(ctx, p) formatResponse(c, nil, cerr) return nil }, @@ -428,7 +449,7 @@ cluster "pin add". } }() - cerr := globalClient.Add(paths, p, out) + cerr := globalClient.Add(ctx, paths, p, out) wg.Wait() formatResponse(c, nil, cerr) return cerr @@ -503,13 +524,14 @@ peers should pin this content. rplMax = rpl } - cerr := globalClient.Pin(ci, rplMin, rplMax, c.String("name")) + cerr := globalClient.Pin(ctx, ci, rplMin, rplMax, c.String("name")) if cerr != nil { formatResponse(c, nil, cerr) return nil } handlePinResponseFormatFlags( + ctx, c, ci, api.TrackerStatusPinned, @@ -545,16 +567,18 @@ although unpinning operations in the cluster may take longer or fail. }, }, Action: func(c *cli.Context) error { + cidStr := c.Args().First() ci, err := cid.Decode(cidStr) checkErr("parsing cid", err) - cerr := globalClient.Unpin(ci) + cerr := globalClient.Unpin(ctx, ci) if cerr != nil { formatResponse(c, nil, cerr) return nil } handlePinResponseFormatFlags( + ctx, c, ci, api.TrackerStatusUnpinned, @@ -592,7 +616,7 @@ The filter only takes effect when listing all pins. The possible values are: if cidStr != "" { ci, err := cid.Decode(cidStr) checkErr("parsing cid", err) - resp, cerr := globalClient.Allocation(ci) + resp, cerr := globalClient.Allocation(ctx, ci) formatResponse(c, resp, cerr) } else { var filter api.PinType @@ -601,7 +625,7 @@ The filter only takes effect when listing all pins. The possible values are: filter |= api.PinTypeFromString(f) } - resp, cerr := globalClient.Allocations(filter) + resp, cerr := globalClient.Allocations(ctx, filter) formatResponse(c, resp, cerr) } return nil @@ -642,7 +666,7 @@ separated list). The following are valid status values: if cidStr != "" { ci, err := cid.Decode(cidStr) checkErr("parsing cid", err) - resp, cerr := globalClient.Status(ci, c.Bool("local")) + resp, cerr := globalClient.Status(ctx, ci, c.Bool("local")) formatResponse(c, resp, cerr) } else { filterFlag := c.String("filter") @@ -650,7 +674,7 @@ separated list). The following are valid status values: if filter == api.TrackerStatusUndefined && filterFlag != "" { checkErr("parsing filter flag", errors.New("invalid filter name")) } - resp, cerr := globalClient.StatusAll(filter, c.Bool("local")) + resp, cerr := globalClient.StatusAll(ctx, filter, c.Bool("local")) formatResponse(c, resp, cerr) } return nil @@ -683,10 +707,10 @@ operations on the contacted peer. By default, all peers will sync. if cidStr != "" { ci, err := cid.Decode(cidStr) checkErr("parsing cid", err) - resp, cerr := globalClient.Sync(ci, c.Bool("local")) + resp, cerr := globalClient.Sync(ctx, ci, c.Bool("local")) formatResponse(c, resp, cerr) } else { - resp, cerr := globalClient.SyncAll(c.Bool("local")) + resp, cerr := globalClient.SyncAll(ctx, c.Bool("local")) formatResponse(c, resp, cerr) } return nil @@ -715,10 +739,10 @@ operations on the contacted peer (as opposed to on every peer). if cidStr != "" { ci, err := cid.Decode(cidStr) checkErr("parsing cid", err) - resp, cerr := globalClient.Recover(ci, c.Bool("local")) + resp, cerr := globalClient.Recover(ctx, ci, c.Bool("local")) formatResponse(c, resp, cerr) } else { - resp, cerr := globalClient.RecoverAll(c.Bool("local")) + resp, cerr := globalClient.RecoverAll(ctx, c.Bool("local")) formatResponse(c, resp, cerr) } return nil @@ -735,7 +759,7 @@ to check that it matches the CLI version (shown by -v). ArgsUsage: " ", Flags: []cli.Flag{}, Action: func(c *cli.Context) error { - resp, cerr := globalClient.Version() + resp, cerr := globalClient.Version(ctx) formatResponse(c, resp, cerr) return nil }, @@ -764,7 +788,7 @@ graph of the connections. Output is a dot file encoding the cluster's connectio }, }, Action: func(c *cli.Context) error { - resp, cerr := globalClient.GetConnectGraph() + resp, cerr := globalClient.GetConnectGraph(ctx) if cerr != nil { formatResponse(c, resp, cerr) return nil @@ -805,7 +829,7 @@ but usually are: checkErr("", errors.New("provide a metric name")) } - resp, cerr := globalClient.Metrics(metric) + resp, cerr := globalClient.Metrics(ctx, metric) formatResponse(c, resp, cerr) return nil }, @@ -906,6 +930,7 @@ func parseCredentials(userInput string) (string, string) { } func handlePinResponseFormatFlags( + ctx context.Context, c *cli.Context, ci cid.Cid, target api.TrackerStatus, @@ -925,7 +950,7 @@ func handlePinResponseFormatFlags( if status.Cid == cid.Undef { // no status from "wait" time.Sleep(time.Second) - status, cerr = globalClient.Status(ci, false) + status, cerr = globalClient.Status(ctx, ci, false) } formatResponse(c, status, cerr) } @@ -953,3 +978,33 @@ func waitFor( return client.WaitFor(ctx, globalClient, fp) } + +// func setupTracing(config tracingConfig) { +// if !config.Enable { +// return +// } + +// agentEndpointURI := "0.0.0.0:6831" +// collectorEndpointURI := "http://0.0.0.0:14268" + +// if config.JaegerAgentEndpoint != "" { +// agentEndpointURI = config.JaegerAgentEndpoint +// } +// if config.JaegerCollectorEndpoint != "" { +// collectorEndpointURI = config.JaegerCollectorEndpoint +// } + +// je, err := jaeger.NewExporter(jaeger.Options{ +// AgentEndpoint: agentEndpointURI, +// CollectorEndpoint: collectorEndpointURI, +// ServiceName: "ipfs-cluster-ctl", +// }) +// if err != nil { +// log.Fatalf("Failed to create the Jaeger exporter: %v", err) +// } +// // Register/enable the trace exporter +// trace.RegisterExporter(je) + +// // For demo purposes, set the trace sampling probability to be high +// trace.ApplyConfig(trace.Config{DefaultSampler: trace.ProbabilitySampler(1.0)}) +// } diff --git a/cmd/ipfs-cluster-service/configs.go b/cmd/ipfs-cluster-service/configs.go index 8ad9cc14..f1eda915 100644 --- a/cmd/ipfs-cluster-service/configs.go +++ b/cmd/ipfs-cluster-service/configs.go @@ -14,6 +14,7 @@ import ( "github.com/ipfs/ipfs-cluster/ipfsconn/ipfshttp" "github.com/ipfs/ipfs-cluster/monitor/basic" "github.com/ipfs/ipfs-cluster/monitor/pubsubmon" + "github.com/ipfs/ipfs-cluster/observations" "github.com/ipfs/ipfs-cluster/pintracker/maptracker" "github.com/ipfs/ipfs-cluster/pintracker/stateless" ) @@ -30,6 +31,8 @@ type cfgs struct { pubsubmonCfg *pubsubmon.Config diskInfCfg *disk.Config numpinInfCfg *numpin.Config + metricsCfg *observations.MetricsConfig + tracingCfg *observations.TracingConfig } func makeConfigs() (*config.Manager, *cfgs) { @@ -45,6 +48,8 @@ func makeConfigs() (*config.Manager, *cfgs) { pubsubmonCfg := &pubsubmon.Config{} diskInfCfg := &disk.Config{} numpinInfCfg := &numpin.Config{} + metricsCfg := &observations.MetricsConfig{} + tracingCfg := &observations.TracingConfig{} cfg.RegisterComponent(config.Cluster, clusterCfg) cfg.RegisterComponent(config.API, apiCfg) cfg.RegisterComponent(config.API, ipfsproxyCfg) @@ -56,6 +61,8 @@ func makeConfigs() (*config.Manager, *cfgs) { cfg.RegisterComponent(config.Monitor, pubsubmonCfg) cfg.RegisterComponent(config.Informer, diskInfCfg) cfg.RegisterComponent(config.Informer, numpinInfCfg) + cfg.RegisterComponent(config.Observations, metricsCfg) + cfg.RegisterComponent(config.Observations, tracingCfg) return cfg, &cfgs{ clusterCfg, apiCfg, @@ -68,6 +75,8 @@ func makeConfigs() (*config.Manager, *cfgs) { pubsubmonCfg, diskInfCfg, numpinInfCfg, + metricsCfg, + tracingCfg, } } @@ -77,3 +86,21 @@ func saveConfig(cfg *config.Manager) { checkErr("saving new configuration", err) out("%s configuration written to %s\n", programName, configPath) } + +func propagateTracingConfig(cfgs *cfgs, tracingFlag bool) *cfgs { + // tracingFlag represents the cli flag passed to ipfs-cluster-service daemon. + // It takes priority. If false, fallback to config file value. + tracingValue := tracingFlag + if !tracingFlag { + tracingValue = cfgs.tracingCfg.EnableTracing + } + // propagate to any other interested configuration + cfgs.tracingCfg.EnableTracing = tracingValue + cfgs.clusterCfg.Tracing = tracingValue + cfgs.consensusCfg.Tracing = tracingValue + cfgs.apiCfg.Tracing = tracingValue + cfgs.ipfshttpCfg.Tracing = tracingValue + cfgs.ipfsproxyCfg.Tracing = tracingValue + + return cfgs +} diff --git a/cmd/ipfs-cluster-service/daemon.go b/cmd/ipfs-cluster-service/daemon.go index 2b191bd2..efa2f0d1 100644 --- a/cmd/ipfs-cluster-service/daemon.go +++ b/cmd/ipfs-cluster-service/daemon.go @@ -22,6 +22,7 @@ import ( "github.com/ipfs/ipfs-cluster/ipfsconn/ipfshttp" "github.com/ipfs/ipfs-cluster/monitor/basic" "github.com/ipfs/ipfs-cluster/monitor/pubsubmon" + "github.com/ipfs/ipfs-cluster/observations" "github.com/ipfs/ipfs-cluster/pintracker/maptracker" "github.com/ipfs/ipfs-cluster/pintracker/stateless" "github.com/ipfs/ipfs-cluster/pstoremgr" @@ -43,12 +44,15 @@ func parseBootstraps(flagVal []string) (bootstraps []ma.Multiaddr) { func daemon(c *cli.Context) error { logger.Info("Initializing. For verbose output run with \"-l debug\". Please wait...") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + // Load all the configurations cfgMgr, cfgs := makeConfigs() // Run any migrations if c.Bool("upgrade") { - err := upgrade() + err := upgrade(ctx) if err != errNoSnapshot { checkErr("upgrading state", err) } // otherwise continue @@ -68,6 +72,12 @@ func daemon(c *cli.Context) error { err = cfgMgr.LoadJSONFromFile(configPath) checkErr("loading configuration", err) + if c.Bool("stats") { + cfgs.metricsCfg.EnableStats = true + } + + cfgs = propagateTracingConfig(cfgs, c.Bool("tracing")) + // Cleanup state if bootstrapping raftStaging := false if len(bootstraps) > 0 { @@ -79,9 +89,6 @@ func daemon(c *cli.Context) error { cfgs.clusterCfg.LeaveOnShutdown = true } - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cluster, err := createCluster(ctx, c, cfgs, raftStaging) checkErr("starting cluster", err) @@ -90,9 +97,9 @@ func daemon(c *cli.Context) error { // and timeout. So this can happen in background and we // avoid worrying about error handling here (since Cluster // will realize). - go bootstrap(cluster, bootstraps) + go bootstrap(ctx, cluster, bootstraps) - return handleSignals(cluster) + return handleSignals(ctx, cluster) } func createCluster( @@ -101,6 +108,11 @@ func createCluster( cfgs *cfgs, raftStaging bool, ) (*ipfscluster.Cluster, error) { + err := observations.SetupMetrics(cfgs.metricsCfg) + checkErr("setting up Metrics", err) + + tracer, err := observations.SetupTracing(cfgs.tracingCfg) + checkErr("setting up Tracing", err) host, err := ipfscluster.NewClusterHost(ctx, cfgs.clusterCfg) checkErr("creating libP2P Host", err) @@ -108,7 +120,7 @@ func createCluster( peerstoreMgr := pstoremgr.New(host, cfgs.clusterCfg.GetPeerstorePath()) peerstoreMgr.ImportPeersFromPeerstore(false) - api, err := rest.NewAPIWithHost(cfgs.apiCfg, host) + api, err := rest.NewAPIWithHost(ctx, cfgs.apiCfg, host) checkErr("creating REST API component", err) proxy, err := ipfsproxy.New(cfgs.ipfsproxyCfg) @@ -121,7 +133,7 @@ func createCluster( state := mapstate.NewMapState() - err = validateVersion(cfgs.clusterCfg, cfgs.consensusCfg) + err = validateVersion(ctx, cfgs.clusterCfg, cfgs.consensusCfg) checkErr("validating version", err) raftcon, err := raft.NewConsensus( @@ -149,22 +161,23 @@ func createCluster( mon, alloc, informer, + tracer, ) } // bootstrap will bootstrap this peer to one of the bootstrap addresses // if there are any. -func bootstrap(cluster *ipfscluster.Cluster, bootstraps []ma.Multiaddr) { +func bootstrap(ctx context.Context, cluster *ipfscluster.Cluster, bootstraps []ma.Multiaddr) { for _, bstrap := range bootstraps { logger.Infof("Bootstrapping to %s", bstrap) - err := cluster.Join(bstrap) + err := cluster.Join(ctx, bstrap) if err != nil { logger.Errorf("bootstrap to %s failed: %s", bstrap, err) } } } -func handleSignals(cluster *ipfscluster.Cluster) error { +func handleSignals(ctx context.Context, cluster *ipfscluster.Cluster) error { signalChan := make(chan os.Signal, 20) signal.Notify( signalChan, @@ -178,18 +191,18 @@ func handleSignals(cluster *ipfscluster.Cluster) error { select { case <-signalChan: ctrlcCount++ - handleCtrlC(cluster, ctrlcCount) + handleCtrlC(ctx, cluster, ctrlcCount) case <-cluster.Done(): return nil } } } -func handleCtrlC(cluster *ipfscluster.Cluster, ctrlcCount int) { +func handleCtrlC(ctx context.Context, cluster *ipfscluster.Cluster, ctrlcCount int) { switch ctrlcCount { case 1: go func() { - err := cluster.Shutdown() + err := cluster.Shutdown(ctx) checkErr("shutting down cluster", err) }() case 2: diff --git a/cmd/ipfs-cluster-service/main.go b/cmd/ipfs-cluster-service/main.go index d7f74020..4b2d8762 100644 --- a/cmd/ipfs-cluster-service/main.go +++ b/cmd/ipfs-cluster-service/main.go @@ -3,14 +3,13 @@ package main import ( "bufio" + "context" "fmt" "io" "os" "os/user" "path/filepath" - // _ "net/http/pprof" - ipfscluster "github.com/ipfs/ipfs-cluster" "github.com/ipfs/ipfs-cluster/state/mapstate" "github.com/ipfs/ipfs-cluster/version" @@ -156,10 +155,6 @@ func checkErr(doing string, err error, args ...interface{}) { } func main() { - // go func() { - // log.Println(http.ListenAndServe("localhost:6060", nil)) - // }() - app := cli.NewApp() app.Name = programName app.Usage = "IPFS Cluster node" @@ -289,6 +284,14 @@ configuration. Hidden: true, Usage: "pintracker to use [map,stateless].", }, + cli.BoolFlag{ + Name: "stats", + Usage: "enable stats collection", + }, + cli.BoolFlag{ + Name: "tracing", + Usage: "enable tracing collection", + }, }, Action: daemon, }, @@ -308,18 +311,19 @@ configuration. Name: "upgrade", Usage: "upgrade the IPFS Cluster state to the current version", Description: ` -This command upgrades the internal state of the ipfs-cluster node -specified in the latest raft snapshot. The state format is migrated from the -version of the snapshot to the version supported by the current cluster version. +This command upgrades the internal state of the ipfs-cluster node +specified in the latest raft snapshot. The state format is migrated from the +version of the snapshot to the version supported by the current cluster version. To successfully run an upgrade of an entire cluster, shut down each peer without removal, upgrade state using this command, and restart every peer. `, Action: func(c *cli.Context) error { + ctx := context.Background() err := locker.lock() checkErr("acquiring execution lock", err) defer locker.tryUnlock() - err = upgrade() + err = upgrade(ctx) checkErr("upgrading state", err) return nil }, @@ -328,7 +332,7 @@ removal, upgrade state using this command, and restart every peer. Name: "export", Usage: "save the IPFS Cluster state to a json file", Description: ` -This command reads the current cluster state and saves it as json for +This command reads the current cluster state and saves it as json for human readability and editing. Only state formats compatible with this version of ipfs-cluster-service can be exported. By default this command prints the state to stdout. @@ -341,6 +345,7 @@ prints the state to stdout. }, }, Action: func(c *cli.Context) error { + ctx := context.Background() err := locker.lock() checkErr("acquiring execution lock", err) defer locker.tryUnlock() @@ -357,7 +362,7 @@ prints the state to stdout. } defer w.Close() - err = export(w) + err = export(ctx, w) checkErr("exporting state", err) return nil }, @@ -378,6 +383,7 @@ import. If no argument is provided cluster will read json from stdin }, }, Action: func(c *cli.Context) error { + ctx := context.Background() err := locker.lock() checkErr("acquiring execution lock", err) defer locker.tryUnlock() @@ -399,7 +405,7 @@ import. If no argument is provided cluster will read json from stdin checkErr("reading import file", err) } defer r.Close() - err = stateImport(r) + err = stateImport(ctx, r) checkErr("importing state", err) logger.Info("the given state has been correctly imported to this peer. Make sure all peers have consistent states") return nil @@ -412,7 +418,7 @@ import. If no argument is provided cluster will read json from stdin This command removes the persistent state that is loaded on startup to determine this peer's view of the cluster state. While it removes the existing state from the load path, one invocation does not permanently remove this state from disk. This command renames cluster's data folder to .old.0, and rotates other -deprecated data folders to .old., etc for some rotation factor before permanatly deleting +deprecated data folders to .old., etc for some rotation factor before permanatly deleting the mth data folder (m currently defaults to 5) `, Flags: []cli.Flag{ diff --git a/cmd/ipfs-cluster-service/state.go b/cmd/ipfs-cluster-service/state.go index 275823bd..18c19d1a 100644 --- a/cmd/ipfs-cluster-service/state.go +++ b/cmd/ipfs-cluster-service/state.go @@ -2,6 +2,7 @@ package main import ( "bytes" + "context" "encoding/json" "errors" "io" @@ -12,12 +13,16 @@ import ( "github.com/ipfs/ipfs-cluster/consensus/raft" "github.com/ipfs/ipfs-cluster/pstoremgr" "github.com/ipfs/ipfs-cluster/state/mapstate" + "go.opencensus.io/trace" ) var errNoSnapshot = errors.New("no snapshot found") -func upgrade() error { - newState, current, err := restoreStateFromDisk() +func upgrade(ctx context.Context) error { + ctx, span := trace.StartSpan(ctx, "daemon/upgrade") + defer span.End() + + newState, current, err := restoreStateFromDisk(ctx) if err != nil { return err } @@ -39,19 +44,25 @@ func upgrade() error { return raft.SnapshotSave(cfgs.consensusCfg, newState, raftPeers) } -func export(w io.Writer) error { - stateToExport, _, err := restoreStateFromDisk() +func export(ctx context.Context, w io.Writer) error { + ctx, span := trace.StartSpan(ctx, "daemon/export") + defer span.End() + + stateToExport, _, err := restoreStateFromDisk(ctx) if err != nil { return err } - return exportState(stateToExport, w) + return exportState(ctx, stateToExport, w) } // restoreStateFromDisk returns a mapstate containing the latest // snapshot, a flag set to true when the state format has the // current version and an error -func restoreStateFromDisk() (*mapstate.MapState, bool, error) { +func restoreStateFromDisk(ctx context.Context) (*mapstate.MapState, bool, error) { + ctx, span := trace.StartSpan(ctx, "daemon/restoreStateFromDisk") + defer span.End() + cfgMgr, cfgs := makeConfigs() err := cfgMgr.LoadJSONFromFile(configPath) @@ -83,7 +94,7 @@ func restoreStateFromDisk() (*mapstate.MapState, bool, error) { return stateFromSnap, true, nil } - err = stateFromSnap.Migrate(&buf) + err = stateFromSnap.Migrate(ctx, &buf) if err != nil { return nil, false, err } @@ -91,7 +102,10 @@ func restoreStateFromDisk() (*mapstate.MapState, bool, error) { return stateFromSnap, false, nil } -func stateImport(r io.Reader) error { +func stateImport(ctx context.Context, r io.Reader) error { + ctx, span := trace.StartSpan(ctx, "daemon/stateImport") + defer span.End() + cfgMgr, cfgs := makeConfigs() err := cfgMgr.LoadJSONFromFile(configPath) @@ -108,7 +122,7 @@ func stateImport(r io.Reader) error { stateToImport := mapstate.NewMapState() for _, pS := range pinSerials { - err = stateToImport.Add(pS.ToPin()) + err = stateToImport.Add(ctx, pS.ToPin()) if err != nil { return err } @@ -119,7 +133,10 @@ func stateImport(r io.Reader) error { return raft.SnapshotSave(cfgs.consensusCfg, stateToImport, raftPeers) } -func validateVersion(cfg *ipfscluster.Config, cCfg *raft.Config) error { +func validateVersion(ctx context.Context, cfg *ipfscluster.Config, cCfg *raft.Config) error { + ctx, span := trace.StartSpan(ctx, "daemon/validateVersion") + defer span.End() + state := mapstate.NewMapState() r, snapExists, err := raft.LastStateRaw(cCfg) if !snapExists && err != nil { @@ -150,9 +167,12 @@ func validateVersion(cfg *ipfscluster.Config, cCfg *raft.Config) error { } // ExportState saves a json representation of a state -func exportState(state *mapstate.MapState, w io.Writer) error { +func exportState(ctx context.Context, state *mapstate.MapState, w io.Writer) error { + ctx, span := trace.StartSpan(ctx, "daemon/exportState") + defer span.End() + // Serialize pins - pins := state.List() + pins := state.List(ctx) pinSerials := make([]api.PinSerial, len(pins), len(pins)) for i, pin := range pins { pinSerials[i] = pin.ToSerial() diff --git a/config/config.go b/config/config.go index aeb1cdbf..16a2a167 100644 --- a/config/config.go +++ b/config/config.go @@ -55,6 +55,7 @@ const ( Monitor Allocator Informer + Observations endTypes // keep this at the end ) @@ -165,15 +166,16 @@ func (cfg *Manager) watchSave(save <-chan struct{}) { // saved using json. Most configuration keys are converted into simple types // like strings, and key names aim to be self-explanatory for the user. type jsonConfig struct { - Cluster *json.RawMessage `json:"cluster"` - Consensus jsonSection `json:"consensus,omitempty"` - API jsonSection `json:"api,omitempty"` - IPFSConn jsonSection `json:"ipfs_connector,omitempty"` - State jsonSection `json:"state,omitempty"` - PinTracker jsonSection `json:"pin_tracker,omitempty"` - Monitor jsonSection `json:"monitor,omitempty"` - Allocator jsonSection `json:"allocator,omitempty"` - Informer jsonSection `json:"informer,omitempty"` + Cluster *json.RawMessage `json:"cluster"` + Consensus jsonSection `json:"consensus,omitempty"` + API jsonSection `json:"api,omitempty"` + IPFSConn jsonSection `json:"ipfs_connector,omitempty"` + State jsonSection `json:"state,omitempty"` + PinTracker jsonSection `json:"pin_tracker,omitempty"` + Monitor jsonSection `json:"monitor,omitempty"` + Allocator jsonSection `json:"allocator,omitempty"` + Informer jsonSection `json:"informer,omitempty"` + Observations jsonSection `json:"observations,omitempty"` } func (jcfg *jsonConfig) getSection(i SectionType) jsonSection { @@ -194,6 +196,8 @@ func (jcfg *jsonConfig) getSection(i SectionType) jsonSection { return jcfg.Allocator case Informer: return jcfg.Informer + case Observations: + return jcfg.Observations default: return nil } @@ -436,26 +440,12 @@ func (cfg *Manager) ToJSON() ([]byte, error) { return nil } - for k, v := range cfg.sections { - var err error - switch k { - case Consensus: - err = updateJSONConfigs(v, &jcfg.Consensus) - case API: - err = updateJSONConfigs(v, &jcfg.API) - case IPFSConn: - err = updateJSONConfigs(v, &jcfg.IPFSConn) - case State: - err = updateJSONConfigs(v, &jcfg.State) - case PinTracker: - err = updateJSONConfigs(v, &jcfg.PinTracker) - case Monitor: - err = updateJSONConfigs(v, &jcfg.Monitor) - case Allocator: - err = updateJSONConfigs(v, &jcfg.Allocator) - case Informer: - err = updateJSONConfigs(v, &jcfg.Informer) + for _, t := range SectionTypes() { + if t == Cluster { + continue } + jsection := jcfg.getSection(t) + err := updateJSONConfigs(cfg.sections[t], &jsection) if err != nil { return nil, err } diff --git a/config_test.go b/config_test.go index ff3a4701..65050bd6 100644 --- a/config_test.go +++ b/config_test.go @@ -8,6 +8,7 @@ import ( "github.com/ipfs/ipfs-cluster/ipfsconn/ipfshttp" "github.com/ipfs/ipfs-cluster/monitor/basic" "github.com/ipfs/ipfs-cluster/monitor/pubsubmon" + "github.com/ipfs/ipfs-cluster/observations" "github.com/ipfs/ipfs-cluster/pintracker/maptracker" "github.com/ipfs/ipfs-cluster/pintracker/stateless" ) @@ -99,8 +100,12 @@ var testingDiskInfCfg = []byte(`{ "metric_type": "freespace" }`) -func testingConfigs() (*Config, *rest.Config, *ipfsproxy.Config, *ipfshttp.Config, *raft.Config, *maptracker.Config, *stateless.Config, *basic.Config, *pubsubmon.Config, *disk.Config) { - clusterCfg, apiCfg, proxyCfg, ipfsCfg, consensusCfg, maptrackerCfg, statelesstrkrCfg, basicmonCfg, pubsubmonCfg, diskInfCfg := testingEmptyConfigs() +var testingTracerCfg = []byte(`{ + "enable_tracing": false +}`) + +func testingConfigs() (*Config, *rest.Config, *ipfsproxy.Config, *ipfshttp.Config, *raft.Config, *maptracker.Config, *stateless.Config, *basic.Config, *pubsubmon.Config, *disk.Config, *observations.TracingConfig) { + clusterCfg, apiCfg, proxyCfg, ipfsCfg, consensusCfg, maptrackerCfg, statelesstrkrCfg, basicmonCfg, pubsubmonCfg, diskInfCfg, tracingCfg := testingEmptyConfigs() clusterCfg.LoadJSON(testingClusterCfg) apiCfg.LoadJSON(testingAPICfg) proxyCfg.LoadJSON(testingProxyCfg) @@ -111,11 +116,12 @@ func testingConfigs() (*Config, *rest.Config, *ipfsproxy.Config, *ipfshttp.Confi basicmonCfg.LoadJSON(testingMonCfg) pubsubmonCfg.LoadJSON(testingMonCfg) diskInfCfg.LoadJSON(testingDiskInfCfg) + tracingCfg.LoadJSON(testingTracerCfg) - return clusterCfg, apiCfg, proxyCfg, ipfsCfg, consensusCfg, maptrackerCfg, statelesstrkrCfg, basicmonCfg, pubsubmonCfg, diskInfCfg + return clusterCfg, apiCfg, proxyCfg, ipfsCfg, consensusCfg, maptrackerCfg, statelesstrkrCfg, basicmonCfg, pubsubmonCfg, diskInfCfg, tracingCfg } -func testingEmptyConfigs() (*Config, *rest.Config, *ipfsproxy.Config, *ipfshttp.Config, *raft.Config, *maptracker.Config, *stateless.Config, *basic.Config, *pubsubmon.Config, *disk.Config) { +func testingEmptyConfigs() (*Config, *rest.Config, *ipfsproxy.Config, *ipfshttp.Config, *raft.Config, *maptracker.Config, *stateless.Config, *basic.Config, *pubsubmon.Config, *disk.Config, *observations.TracingConfig) { clusterCfg := &Config{} apiCfg := &rest.Config{} proxyCfg := &ipfsproxy.Config{} @@ -126,7 +132,8 @@ func testingEmptyConfigs() (*Config, *rest.Config, *ipfsproxy.Config, *ipfshttp. basicmonCfg := &basic.Config{} pubsubmonCfg := &pubsubmon.Config{} diskInfCfg := &disk.Config{} - return clusterCfg, apiCfg, proxyCfg, ipfshttpCfg, consensusCfg, maptrackerCfg, statelessCfg, basicmonCfg, pubsubmonCfg, diskInfCfg + tracingCfg := &observations.TracingConfig{} + return clusterCfg, apiCfg, proxyCfg, ipfshttpCfg, consensusCfg, maptrackerCfg, statelessCfg, basicmonCfg, pubsubmonCfg, diskInfCfg, tracingCfg } // func TestConfigDefault(t *testing.T) { diff --git a/connect_graph.go b/connect_graph.go index 3a7877dc..09880e00 100644 --- a/connect_graph.go +++ b/connect_graph.go @@ -2,6 +2,7 @@ package ipfscluster import ( peer "github.com/libp2p/go-libp2p-peer" + "go.opencensus.io/trace" "github.com/ipfs/ipfs-cluster/api" "github.com/ipfs/ipfs-cluster/rpcutil" @@ -10,19 +11,22 @@ import ( // ConnectGraph returns a description of which cluster peers and ipfs // daemons are connected to each other func (c *Cluster) ConnectGraph() (api.ConnectGraph, error) { + ctx, span := trace.StartSpan(c.ctx, "cluster/ConnectGraph") + defer span.End() + cg := api.ConnectGraph{ IPFSLinks: make(map[peer.ID][]peer.ID), ClusterLinks: make(map[peer.ID][]peer.ID), ClustertoIPFS: make(map[peer.ID]peer.ID), } - members, err := c.consensus.Peers() + members, err := c.consensus.Peers(ctx) if err != nil { return cg, err } peersSerials := make([][]api.IDSerial, len(members), len(members)) - ctxs, cancels := rpcutil.CtxsWithCancel(c.ctx, len(members)) + ctxs, cancels := rpcutil.CtxsWithCancel(ctx, len(members)) defer rpcutil.MultiCancel(cancels) errs := c.rpcClient.MultiCall( diff --git a/consensus/raft/config.go b/consensus/raft/config.go index 32d99dce..f3aca9d9 100644 --- a/consensus/raft/config.go +++ b/consensus/raft/config.go @@ -61,6 +61,10 @@ type Config struct { // A Hashicorp Raft's configuration object. RaftConfig *hraft.Config + + // Tracing enables propagation of contexts across binary boundary in + // consensus component + Tracing bool } // ConfigJSON represents a human-friendly Config diff --git a/consensus/raft/consensus.go b/consensus/raft/consensus.go index 2968099b..550ac8c7 100644 --- a/consensus/raft/consensus.go +++ b/consensus/raft/consensus.go @@ -10,6 +10,9 @@ import ( "sync" "time" + "go.opencensus.io/tag" + "go.opencensus.io/trace" + "github.com/ipfs/ipfs-cluster/api" "github.com/ipfs/ipfs-cluster/state" @@ -63,7 +66,7 @@ func NewConsensus( return nil, err } - baseOp := &LogOp{} + baseOp := &LogOp{tracing: cfg.Tracing} logger.Debug("starting Consensus and waiting for a leader...") consensus := libp2praft.NewOpLog(state, baseOp) @@ -97,9 +100,12 @@ func NewConsensus( } // WaitForSync waits for a leader and for the state to be up to date, then returns. -func (cc *Consensus) WaitForSync() error { +func (cc *Consensus) WaitForSync(ctx context.Context) error { + ctx, span := trace.StartSpan(ctx, "consensus/WaitForSync") + defer span.End() + leaderCtx, cancel := context.WithTimeout( - cc.ctx, + ctx, cc.config.WaitForLeaderTimeout) defer cancel() @@ -122,12 +128,12 @@ func (cc *Consensus) WaitForSync() error { return errors.New("error waiting for leader: " + err.Error()) } - err = cc.raft.WaitForVoter(cc.ctx) + err = cc.raft.WaitForVoter(ctx) if err != nil { return errors.New("error waiting to become a Voter: " + err.Error()) } - err = cc.raft.WaitForUpdates(cc.ctx) + err = cc.raft.WaitForUpdates(ctx) if err != nil { return errors.New("error waiting for consensus updates: " + err.Error()) } @@ -152,7 +158,7 @@ func (cc *Consensus) finishBootstrap() { return } - err = cc.WaitForSync() + err = cc.WaitForSync(cc.ctx) if err != nil { return } @@ -164,7 +170,10 @@ func (cc *Consensus) finishBootstrap() { // Shutdown stops the component so it will not process any // more updates. The underlying consensus is permanently // shutdown, along with the libp2p transport. -func (cc *Consensus) Shutdown() error { +func (cc *Consensus) Shutdown(ctx context.Context) error { + ctx, span := trace.StartSpan(ctx, "consensus/Shutdown") + defer span.End() + cc.shutdownLock.Lock() defer cc.shutdownLock.Unlock() @@ -176,7 +185,7 @@ func (cc *Consensus) Shutdown() error { logger.Info("stopping Consensus component") // Raft Shutdown - err := cc.raft.Shutdown() + err := cc.raft.Shutdown(ctx) if err != nil { logger.Error(err) } @@ -199,11 +208,14 @@ func (cc *Consensus) SetClient(c *rpc.Client) { // Ready returns a channel which is signaled when the Consensus // algorithm has finished bootstrapping and is ready to use -func (cc *Consensus) Ready() <-chan struct{} { +func (cc *Consensus) Ready(ctx context.Context) <-chan struct{} { + ctx, span := trace.StartSpan(ctx, "consensus/Ready") + defer span.End() + return cc.readyCh } -func (cc *Consensus) op(pin api.Pin, t LogOpType) *LogOp { +func (cc *Consensus) op(ctx context.Context, pin api.Pin, t LogOpType) *LogOp { return &LogOp{ Cid: pin.ToSerial(), Type: t, @@ -214,19 +226,23 @@ func (cc *Consensus) op(pin api.Pin, t LogOpType) *LogOp { // note that if the leader just dissappeared, the rpc call will // fail because we haven't heard that it's gone. func (cc *Consensus) redirectToLeader(method string, arg interface{}) (bool, error) { + ctx, span := trace.StartSpan(cc.ctx, "consensus/redirectToLeader") + defer span.End() + var finalErr error // Retry redirects for i := 0; i <= cc.config.CommitRetries; i++ { logger.Debugf("redirect try %d", i) - leader, err := cc.Leader() + leader, err := cc.Leader(ctx) // No leader, wait for one if err != nil { logger.Warning("there seems to be no leader. Waiting for one") rctx, cancel := context.WithTimeout( - cc.ctx, - cc.config.WaitForLeaderTimeout) + ctx, + cc.config.WaitForLeaderTimeout, + ) defer cancel() pidstr, err := cc.raft.WaitForLeader(rctx) @@ -247,12 +263,14 @@ func (cc *Consensus) redirectToLeader(method string, arg interface{}) (bool, err } logger.Debugf("redirecting %s to leader: %s", method, leader.Pretty()) - finalErr = cc.rpcClient.Call( + finalErr = cc.rpcClient.CallContext( + ctx, leader, "Cluster", method, arg, - &struct{}{}) + &struct{}{}, + ) if finalErr != nil { logger.Error(finalErr) logger.Error("retrying to redirect request to leader") @@ -267,7 +285,19 @@ func (cc *Consensus) redirectToLeader(method string, arg interface{}) (bool, err } // commit submits a cc.consensus commit. It retries upon failures. -func (cc *Consensus) commit(op *LogOp, rpcOp string, redirectArg interface{}) error { +func (cc *Consensus) commit(ctx context.Context, op *LogOp, rpcOp string, redirectArg interface{}) error { + ctx, span := trace.StartSpan(ctx, "consensus/commit") + defer span.End() + + if cc.config.Tracing { + // required to cross the serialized boundary + op.SpanCtx = span.SpanContext() + tagmap := tag.FromContext(ctx) + if tagmap != nil { + op.TagCtx = tag.Encode(tagmap) + } + } + var finalErr error for i := 0; i <= cc.config.CommitRetries; i++ { logger.Debugf("attempt #%d: committing %+v", i, op) @@ -312,9 +342,12 @@ func (cc *Consensus) commit(op *LogOp, rpcOp string, redirectArg interface{}) er // LogPin submits a Cid to the shared state of the cluster. It will forward // the operation to the leader if this is not it. -func (cc *Consensus) LogPin(pin api.Pin) error { - op := cc.op(pin, LogOpPin) - err := cc.commit(op, "ConsensusLogPin", pin.ToSerial()) +func (cc *Consensus) LogPin(ctx context.Context, pin api.Pin) error { + ctx, span := trace.StartSpan(ctx, "consensus/LogPin") + defer span.End() + + op := cc.op(ctx, pin, LogOpPin) + err := cc.commit(ctx, op, "ConsensusLogPin", pin.ToSerial()) if err != nil { return err } @@ -322,9 +355,12 @@ func (cc *Consensus) LogPin(pin api.Pin) error { } // LogUnpin removes a Cid from the shared state of the cluster. -func (cc *Consensus) LogUnpin(pin api.Pin) error { - op := cc.op(pin, LogOpUnpin) - err := cc.commit(op, "ConsensusLogUnpin", pin.ToSerial()) +func (cc *Consensus) LogUnpin(ctx context.Context, pin api.Pin) error { + ctx, span := trace.StartSpan(ctx, "consensus/LogUnpin") + defer span.End() + + op := cc.op(ctx, pin, LogOpUnpin) + err := cc.commit(ctx, op, "ConsensusLogUnpin", pin.ToSerial()) if err != nil { return err } @@ -333,7 +369,10 @@ func (cc *Consensus) LogUnpin(pin api.Pin) error { // AddPeer adds a new peer to participate in this consensus. It will // forward the operation to the leader if this is not it. -func (cc *Consensus) AddPeer(pid peer.ID) error { +func (cc *Consensus) AddPeer(ctx context.Context, pid peer.ID) error { + ctx, span := trace.StartSpan(ctx, "consensus/AddPeer") + defer span.End() + var finalErr error for i := 0; i <= cc.config.CommitRetries; i++ { logger.Debugf("attempt #%d: AddPeer %s", i, pid.Pretty()) @@ -346,7 +385,7 @@ func (cc *Consensus) AddPeer(pid peer.ID) error { } // Being here means we are the leader and can commit cc.shutdownLock.RLock() // do not shutdown while committing - finalErr = cc.raft.AddPeer(peer.IDB58Encode(pid)) + finalErr = cc.raft.AddPeer(ctx, peer.IDB58Encode(pid)) cc.shutdownLock.RUnlock() if finalErr != nil { time.Sleep(cc.config.CommitRetryDelay) @@ -360,7 +399,10 @@ func (cc *Consensus) AddPeer(pid peer.ID) error { // RmPeer removes a peer from this consensus. It will // forward the operation to the leader if this is not it. -func (cc *Consensus) RmPeer(pid peer.ID) error { +func (cc *Consensus) RmPeer(ctx context.Context, pid peer.ID) error { + ctx, span := trace.StartSpan(ctx, "consensus/RmPeer") + defer span.End() + var finalErr error for i := 0; i <= cc.config.CommitRetries; i++ { logger.Debugf("attempt #%d: RmPeer %s", i, pid.Pretty()) @@ -373,7 +415,7 @@ func (cc *Consensus) RmPeer(pid peer.ID) error { } // Being here means we are the leader and can commit cc.shutdownLock.RLock() // do not shutdown while committing - finalErr = cc.raft.RemovePeer(peer.IDB58Encode(pid)) + finalErr = cc.raft.RemovePeer(ctx, peer.IDB58Encode(pid)) cc.shutdownLock.RUnlock() if finalErr != nil { time.Sleep(cc.config.CommitRetryDelay) @@ -389,7 +431,10 @@ func (cc *Consensus) RmPeer(pid peer.ID) error { // if no State has been agreed upon or the state is not // consistent. The returned State is the last agreed-upon // State known by this node. -func (cc *Consensus) State() (state.State, error) { +func (cc *Consensus) State(ctx context.Context) (state.State, error) { + ctx, span := trace.StartSpan(ctx, "consensus/State") + defer span.End() + st, err := cc.consensus.GetLogHead() if err != nil { return nil, err @@ -403,7 +448,10 @@ func (cc *Consensus) State() (state.State, error) { // Leader returns the peerID of the Leader of the // cluster. It returns an error when there is no leader. -func (cc *Consensus) Leader() (peer.ID, error) { +func (cc *Consensus) Leader(ctx context.Context) (peer.ID, error) { + ctx, span := trace.StartSpan(ctx, "consensus/Leader") + defer span.End() + // Note the hard-dependency on raft here... raftactor := cc.actor.(*libp2praft.Actor) return raftactor.Leader() @@ -411,7 +459,10 @@ func (cc *Consensus) Leader() (peer.ID, error) { // Clean removes all raft data from disk. Next time // a full new peer will be bootstrapped. -func (cc *Consensus) Clean() error { +func (cc *Consensus) Clean(ctx context.Context) error { + ctx, span := trace.StartSpan(ctx, "consensus/Clean") + defer span.End() + cc.shutdownLock.RLock() defer cc.shutdownLock.RUnlock() if !cc.shutdown { @@ -437,7 +488,10 @@ func (cc *Consensus) Rollback(state state.State) error { // Peers return the current list of peers in the consensus. // The list will be sorted alphabetically. -func (cc *Consensus) Peers() ([]peer.ID, error) { +func (cc *Consensus) Peers(ctx context.Context) ([]peer.ID, error) { + ctx, span := trace.StartSpan(ctx, "consensus/Peers") + defer span.End() + cc.shutdownLock.RLock() // prevent shutdown while here defer cc.shutdownLock.RUnlock() @@ -445,7 +499,7 @@ func (cc *Consensus) Peers() ([]peer.ID, error) { return nil, errors.New("consensus is shutdown") } peers := []peer.ID{} - raftPeers, err := cc.raft.Peers() + raftPeers, err := cc.raft.Peers(ctx) if err != nil { return nil, fmt.Errorf("cannot retrieve list of peers: %s", err) } diff --git a/consensus/raft/consensus_test.go b/consensus/raft/consensus_test.go index b5b85f11..4fc6f458 100644 --- a/consensus/raft/consensus_test.go +++ b/consensus/raft/consensus_test.go @@ -40,6 +40,7 @@ func makeTestingHost(t *testing.T) host.Host { } func testingConsensus(t *testing.T, idn int) *Consensus { + ctx := context.Background() cleanRaft(idn) h := makeTestingHost(t) st := mapstate.NewMapState() @@ -54,27 +55,28 @@ func testingConsensus(t *testing.T, idn int) *Consensus { t.Fatal("cannot create Consensus:", err) } cc.SetClient(test.NewMockRPCClientWithHost(t, h)) - <-cc.Ready() + <-cc.Ready(ctx) return cc } func TestShutdownConsensus(t *testing.T) { + ctx := context.Background() // Bring it up twice to make sure shutdown cleans up properly // but also to make sure raft comes up ok when re-initialized cc := testingConsensus(t, 1) defer cleanRaft(1) - err := cc.Shutdown() + err := cc.Shutdown(ctx) if err != nil { t.Fatal("Consensus cannot shutdown:", err) } - err = cc.Shutdown() // should be fine to shutdown twice + err = cc.Shutdown(ctx) // should be fine to shutdown twice if err != nil { t.Fatal("Consensus should be able to shutdown several times") } cleanRaft(1) cc = testingConsensus(t, 1) - err = cc.Shutdown() + err = cc.Shutdown(ctx) if err != nil { t.Fatal("Consensus cannot shutdown:", err) } @@ -82,50 +84,53 @@ func TestShutdownConsensus(t *testing.T) { } func TestConsensusPin(t *testing.T) { + ctx := context.Background() cc := testingConsensus(t, 1) defer cleanRaft(1) // Remember defer runs in LIFO order - defer cc.Shutdown() + defer cc.Shutdown(ctx) c, _ := cid.Decode(test.TestCid1) - err := cc.LogPin(testPin(c)) + err := cc.LogPin(ctx, testPin(c)) if err != nil { t.Error("the operation did not make it to the log:", err) } time.Sleep(250 * time.Millisecond) - st, err := cc.State() + st, err := cc.State(ctx) if err != nil { t.Fatal("error gettinng state:", err) } - pins := st.List() + pins := st.List(ctx) if len(pins) != 1 || pins[0].Cid.String() != test.TestCid1 { t.Error("the added pin should be in the state") } } func TestConsensusUnpin(t *testing.T) { + ctx := context.Background() cc := testingConsensus(t, 1) defer cleanRaft(1) - defer cc.Shutdown() + defer cc.Shutdown(ctx) c, _ := cid.Decode(test.TestCid2) - err := cc.LogUnpin(api.PinCid(c)) + err := cc.LogUnpin(ctx, api.PinCid(c)) if err != nil { t.Error("the operation did not make it to the log:", err) } } func TestConsensusUpdate(t *testing.T) { + ctx := context.Background() cc := testingConsensus(t, 1) defer cleanRaft(1) - defer cc.Shutdown() + defer cc.Shutdown(ctx) // Pin first c1, _ := cid.Decode(test.TestCid1) pin := testPin(c1) pin.Type = api.ShardType - err := cc.LogPin(pin) + err := cc.LogPin(ctx, pin) if err != nil { t.Fatal("the initial operation did not make it to the log:", err) } @@ -134,18 +139,18 @@ func TestConsensusUpdate(t *testing.T) { // Update pin c2, _ := cid.Decode(test.TestCid2) pin.Reference = c2 - err = cc.LogPin(pin) + err = cc.LogPin(ctx, pin) if err != nil { t.Error("the update op did not make it to the log:", err) } time.Sleep(250 * time.Millisecond) - st, err := cc.State() + st, err := cc.State(ctx) if err != nil { t.Fatal("error getting state:", err) } - pins := st.List() + pins := st.List(ctx) if len(pins) != 1 || pins[0].Cid.String() != test.TestCid1 { t.Error("the added pin should be in the state") } @@ -155,17 +160,18 @@ func TestConsensusUpdate(t *testing.T) { } func TestConsensusAddPeer(t *testing.T) { + ctx := context.Background() cc := testingConsensus(t, 1) cc2 := testingConsensus(t, 2) t.Log(cc.host.ID().Pretty()) t.Log(cc2.host.ID().Pretty()) defer cleanRaft(1) defer cleanRaft(2) - defer cc.Shutdown() - defer cc2.Shutdown() + defer cc.Shutdown(ctx) + defer cc2.Shutdown(ctx) cc.host.Peerstore().AddAddrs(cc2.host.ID(), cc2.host.Addrs(), peerstore.PermanentAddrTTL) - err := cc.AddPeer(cc2.host.ID()) + err := cc.AddPeer(ctx, cc2.host.ID()) if err != nil { t.Error("the operation did not make it to the log:", err) } @@ -177,7 +183,7 @@ func TestConsensusAddPeer(t *testing.T) { t.Fatal(err) } - peers, err := cc2.raft.Peers() + peers, err := cc2.raft.Peers(ctx) if err != nil { t.Fatal(err) } @@ -188,16 +194,17 @@ func TestConsensusAddPeer(t *testing.T) { } func TestConsensusRmPeer(t *testing.T) { + ctx := context.Background() cc := testingConsensus(t, 1) cc2 := testingConsensus(t, 2) defer cleanRaft(1) defer cleanRaft(2) - defer cc.Shutdown() - defer cc2.Shutdown() + defer cc.Shutdown(ctx) + defer cc2.Shutdown(ctx) cc.host.Peerstore().AddAddrs(cc2.host.ID(), cc2.host.Addrs(), peerstore.PermanentAddrTTL) - err := cc.AddPeer(cc2.host.ID()) + err := cc.AddPeer(ctx, cc2.host.ID()) if err != nil { t.Error("could not add peer:", err) } @@ -211,7 +218,7 @@ func TestConsensusRmPeer(t *testing.T) { cc.raft.WaitForLeader(ctx) c, _ := cid.Decode(test.TestCid1) - err = cc.LogPin(testPin(c)) + err = cc.LogPin(ctx, testPin(c)) if err != nil { t.Error("could not pin after adding peer:", err) } @@ -219,14 +226,14 @@ func TestConsensusRmPeer(t *testing.T) { time.Sleep(2 * time.Second) // Remove unexisting peer - err = cc.RmPeer(test.TestPeerID1) + err = cc.RmPeer(ctx, test.TestPeerID1) if err != nil { t.Error("the operation did not make it to the log:", err) } // Remove real peer. At least the leader can succeed - err = cc2.RmPeer(cc.host.ID()) - err2 := cc.RmPeer(cc2.host.ID()) + err = cc2.RmPeer(ctx, cc.host.ID()) + err2 := cc.RmPeer(ctx, cc2.host.ID()) if err != nil && err2 != nil { t.Error("could not remove peer:", err, err2) } @@ -238,11 +245,12 @@ func TestConsensusRmPeer(t *testing.T) { } func TestConsensusLeader(t *testing.T) { + ctx := context.Background() cc := testingConsensus(t, 1) pID := cc.host.ID() defer cleanRaft(1) - defer cc.Shutdown() - l, err := cc.Leader() + defer cc.Shutdown(ctx) + l, err := cc.Leader(ctx) if err != nil { t.Fatal("No leader:", err) } @@ -253,13 +261,14 @@ func TestConsensusLeader(t *testing.T) { } func TestRaftLatestSnapshot(t *testing.T) { + ctx := context.Background() cc := testingConsensus(t, 1) defer cleanRaft(1) - defer cc.Shutdown() + defer cc.Shutdown(ctx) // Make pin 1 c1, _ := cid.Decode(test.TestCid1) - err := cc.LogPin(testPin(c1)) + err := cc.LogPin(ctx, testPin(c1)) if err != nil { t.Error("the first pin did not make it to the log:", err) } @@ -272,7 +281,7 @@ func TestRaftLatestSnapshot(t *testing.T) { // Make pin 2 c2, _ := cid.Decode(test.TestCid2) - err = cc.LogPin(testPin(c2)) + err = cc.LogPin(ctx, testPin(c2)) if err != nil { t.Error("the second pin did not make it to the log:", err) } @@ -285,7 +294,7 @@ func TestRaftLatestSnapshot(t *testing.T) { // Make pin 3 c3, _ := cid.Decode(test.TestCid3) - err = cc.LogPin(testPin(c3)) + err = cc.LogPin(ctx, testPin(c3)) if err != nil { t.Error("the third pin did not make it to the log:", err) } @@ -305,11 +314,11 @@ func TestRaftLatestSnapshot(t *testing.T) { if err != nil { t.Fatal("Error while taking snapshot", err) } - err = snapState.Migrate(r) + err = snapState.Migrate(ctx, r) if err != nil { - t.Fatal("Snapshot bytes returned could not restore to state") + t.Fatal("Snapshot bytes returned could not restore to state: ", err) } - pins := snapState.List() + pins := snapState.List(ctx) if len(pins) != 3 { t.Fatal("Latest snapshot not read") } diff --git a/consensus/raft/log_op.go b/consensus/raft/log_op.go index f9a10e51..e21789cb 100644 --- a/consensus/raft/log_op.go +++ b/consensus/raft/log_op.go @@ -1,8 +1,12 @@ package raft import ( + "context" "errors" + "go.opencensus.io/tag" + "go.opencensus.io/trace" + "github.com/ipfs/ipfs-cluster/api" "github.com/ipfs/ipfs-cluster/state" @@ -22,15 +26,30 @@ type LogOpType int // It implements the consensus.Op interface and it is used by the // Consensus component. type LogOp struct { + SpanCtx trace.SpanContext + TagCtx []byte Cid api.PinSerial Type LogOpType consensus *Consensus + tracing bool } // ApplyTo applies the operation to the State func (op *LogOp) ApplyTo(cstate consensus.State) (consensus.State, error) { - state, ok := cstate.(state.State) var err error + ctx := context.Background() + if op.tracing { + tagmap, err := tag.Decode(op.TagCtx) + if err != nil { + logger.Error(err) + } + ctx = tag.NewContext(ctx, tagmap) + var span *trace.Span + ctx, span = trace.StartSpanWithRemoteParent(ctx, "consensus/raft/logop/ApplyTo", op.SpanCtx) + defer span.End() + } + + state, ok := cstate.(state.State) if !ok { // Should never be here panic("received unexpected state type") @@ -45,12 +64,13 @@ func (op *LogOp) ApplyTo(cstate consensus.State) (consensus.State, error) { switch op.Type { case LogOpPin: - err = state.Add(pinS.ToPin()) + err = state.Add(ctx, pinS.ToPin()) if err != nil { goto ROLLBACK } // Async, we let the PinTracker take care of any problems - op.consensus.rpcClient.Go( + op.consensus.rpcClient.GoContext( + ctx, "", "Cluster", "Track", @@ -59,12 +79,13 @@ func (op *LogOp) ApplyTo(cstate consensus.State) (consensus.State, error) { nil, ) case LogOpUnpin: - err = state.Rm(pinS.DecodeCid()) + err = state.Rm(ctx, pinS.DecodeCid()) if err != nil { goto ROLLBACK } // Async, we let the PinTracker take care of any problems - op.consensus.rpcClient.Go( + op.consensus.rpcClient.GoContext( + ctx, "", "Cluster", "Untrack", diff --git a/consensus/raft/log_op_test.go b/consensus/raft/log_op_test.go index 2ba875e0..e067052a 100644 --- a/consensus/raft/log_op_test.go +++ b/consensus/raft/log_op_test.go @@ -1,6 +1,7 @@ package raft import ( + "context" "testing" cid "github.com/ipfs/go-cid" @@ -11,6 +12,7 @@ import ( ) func TestApplyToPin(t *testing.T) { + ctx := context.Background() cc := testingConsensus(t, 1) op := &LogOp{ Cid: api.PinSerial{Cid: test.TestCid1}, @@ -18,17 +20,18 @@ func TestApplyToPin(t *testing.T) { consensus: cc, } defer cleanRaft(1) - defer cc.Shutdown() + defer cc.Shutdown(ctx) st := mapstate.NewMapState() op.ApplyTo(st) - pins := st.List() + pins := st.List(ctx) if len(pins) != 1 || pins[0].Cid.String() != test.TestCid1 { t.Error("the state was not modified correctly") } } func TestApplyToUnpin(t *testing.T) { + ctx := context.Background() cc := testingConsensus(t, 1) op := &LogOp{ Cid: api.PinSerial{Cid: test.TestCid1}, @@ -36,13 +39,13 @@ func TestApplyToUnpin(t *testing.T) { consensus: cc, } defer cleanRaft(1) - defer cc.Shutdown() + defer cc.Shutdown(ctx) st := mapstate.NewMapState() c, _ := cid.Decode(test.TestCid1) - st.Add(testPin(c)) + st.Add(ctx, testPin(c)) op.ApplyTo(st) - pins := st.List() + pins := st.List(ctx) if len(pins) != 0 { t.Error("the state was not modified correctly") } diff --git a/consensus/raft/raft.go b/consensus/raft/raft.go index 1ad4f7e3..2e944676 100644 --- a/consensus/raft/raft.go +++ b/consensus/raft/raft.go @@ -14,6 +14,7 @@ import ( host "github.com/libp2p/go-libp2p-host" peer "github.com/libp2p/go-libp2p-peer" p2praft "github.com/libp2p/go-libp2p-raft" + "go.opencensus.io/trace" "github.com/ipfs/ipfs-cluster/state" ) @@ -268,6 +269,9 @@ func makeServerConf(peers []peer.ID) hraft.Configuration { // WaitForLeader holds until Raft says we have a leader. // Returns uf ctx is cancelled. func (rw *raftWrapper) WaitForLeader(ctx context.Context) (string, error) { + ctx, span := trace.StartSpan(ctx, "consensus/raft/WaitForLeader") + defer span.End() + obsCh := make(chan hraft.Observation, 1) if sixtyfour { // 32-bit systems don't support observers observer := hraft.NewObserver(obsCh, false, nil) @@ -301,6 +305,9 @@ func (rw *raftWrapper) WaitForLeader(ctx context.Context) (string, error) { } func (rw *raftWrapper) WaitForVoter(ctx context.Context) error { + ctx, span := trace.StartSpan(ctx, "consensus/raft/WaitForVoter") + defer span.End() + logger.Debug("waiting until we are promoted to a voter") pid := hraft.ServerID(peer.IDB58Encode(rw.host.ID())) @@ -334,6 +341,9 @@ func isVoter(srvID hraft.ServerID, cfg hraft.Configuration) bool { // WaitForUpdates holds until Raft has synced to the last index in the log func (rw *raftWrapper) WaitForUpdates(ctx context.Context) error { + ctx, span := trace.StartSpan(ctx, "consensus/raft/WaitForUpdates") + defer span.End() + logger.Debug("Raft state is catching up to the latest known version. Please wait...") for { select { @@ -353,12 +363,15 @@ func (rw *raftWrapper) WaitForUpdates(ctx context.Context) error { } func (rw *raftWrapper) WaitForPeer(ctx context.Context, pid string, depart bool) error { + ctx, span := trace.StartSpan(ctx, "consensus/raft/WaitForPeer") + defer span.End() + for { select { case <-ctx.Done(): return ctx.Err() default: - peers, err := rw.Peers() + peers, err := rw.Peers(ctx) if err != nil { return err } @@ -432,7 +445,10 @@ func (rw *raftWrapper) snapshotOnShutdown() error { } // Shutdown shutdown Raft and closes the BoltDB. -func (rw *raftWrapper) Shutdown() error { +func (rw *raftWrapper) Shutdown(ctx context.Context) error { + ctx, span := trace.StartSpan(ctx, "consensus/raft/Shutdown") + defer span.End() + errMsgs := "" err := rw.snapshotOnShutdown() @@ -459,10 +475,13 @@ func (rw *raftWrapper) Shutdown() error { } // AddPeer adds a peer to Raft -func (rw *raftWrapper) AddPeer(peer string) error { +func (rw *raftWrapper) AddPeer(ctx context.Context, peer string) error { + ctx, span := trace.StartSpan(ctx, "consensus/raft/AddPeer") + defer span.End() + // Check that we don't have it to not waste // log entries if so. - peers, err := rw.Peers() + peers, err := rw.Peers(ctx) if err != nil { return err } @@ -475,7 +494,8 @@ func (rw *raftWrapper) AddPeer(peer string) error { hraft.ServerID(peer), hraft.ServerAddress(peer), 0, - 0) // TODO: Extra cfg value? + 0, + ) // TODO: Extra cfg value? err = future.Error() if err != nil { logger.Error("raft cannot add peer: ", err) @@ -484,10 +504,13 @@ func (rw *raftWrapper) AddPeer(peer string) error { } // RemovePeer removes a peer from Raft -func (rw *raftWrapper) RemovePeer(peer string) error { +func (rw *raftWrapper) RemovePeer(ctx context.Context, peer string) error { + ctx, span := trace.StartSpan(ctx, "consensus/RemovePeer") + defer span.End() + // Check that we have it to not waste // log entries if we don't. - peers, err := rw.Peers() + peers, err := rw.Peers(ctx) if err != nil { return err } @@ -503,7 +526,8 @@ func (rw *raftWrapper) RemovePeer(peer string) error { rmFuture := rw.raft.RemoveServer( hraft.ServerID(peer), 0, - 0) // TODO: Extra cfg value? + 0, + ) // TODO: Extra cfg value? err = rmFuture.Error() if err != nil { logger.Error("raft cannot remove peer: ", err) @@ -515,11 +539,17 @@ func (rw *raftWrapper) RemovePeer(peer string) error { // Leader returns Raft's leader. It may be an empty string if // there is no leader or it is unknown. -func (rw *raftWrapper) Leader() string { +func (rw *raftWrapper) Leader(ctx context.Context) string { + ctx, span := trace.StartSpan(ctx, "consensus/raft/Leader") + defer span.End() + return string(rw.raft.Leader()) } -func (rw *raftWrapper) Peers() ([]string, error) { +func (rw *raftWrapper) Peers(ctx context.Context) ([]string, error) { + ctx, span := trace.StartSpan(ctx, "consensus/raft/Peers") + defer span.End() + ids := make([]string, 0) configFuture := rw.raft.GetConfiguration() diff --git a/informer/disk/disk.go b/informer/disk/disk.go index 67c951ee..25360842 100644 --- a/informer/disk/disk.go +++ b/informer/disk/disk.go @@ -3,12 +3,14 @@ package disk import ( + "context" "fmt" logging "github.com/ipfs/go-log" rpc "github.com/libp2p/go-libp2p-gorpc" "github.com/ipfs/ipfs-cluster/api" + "go.opencensus.io/trace" ) // MetricType identifies the type of metric to fetch from the IPFS daemon. @@ -55,14 +57,20 @@ func (disk *Informer) SetClient(c *rpc.Client) { // Shutdown is called on cluster shutdown. We just invalidate // any metrics from this point. -func (disk *Informer) Shutdown() error { +func (disk *Informer) Shutdown(ctx context.Context) error { + ctx, span := trace.StartSpan(ctx, "informer/disk/Shutdown") + defer span.End() + disk.rpcClient = nil return nil } // GetMetric returns the metric obtained by this // Informer. -func (disk *Informer) GetMetric() api.Metric { +func (disk *Informer) GetMetric(ctx context.Context) api.Metric { + ctx, span := trace.StartSpan(ctx, "informer/disk/GetMetric") + defer span.End() + if disk.rpcClient == nil { return api.Metric{ Name: disk.Name(), @@ -75,11 +83,14 @@ func (disk *Informer) GetMetric() api.Metric { valid := true - err := disk.rpcClient.Call("", + err := disk.rpcClient.CallContext( + ctx, + "", "Cluster", "IPFSRepoStat", struct{}{}, - &repoStat) + &repoStat, + ) if err != nil { logger.Error(err) valid = false diff --git a/informer/disk/disk_test.go b/informer/disk/disk_test.go index e8a24346..771b48eb 100644 --- a/informer/disk/disk_test.go +++ b/informer/disk/disk_test.go @@ -29,25 +29,27 @@ func (mock *badRPCService) IPFSRepoStat(ctx context.Context, in struct{}, out *a } func Test(t *testing.T) { + ctx := context.Background() cfg := &Config{} cfg.Default() inf, err := NewInformer(cfg) if err != nil { t.Fatal(err) } - defer inf.Shutdown() - m := inf.GetMetric() + defer inf.Shutdown(ctx) + m := inf.GetMetric(ctx) if m.Valid { t.Error("metric should be invalid") } inf.SetClient(test.NewMockRPCClient(t)) - m = inf.GetMetric() + m = inf.GetMetric(ctx) if !m.Valid { t.Error("metric should be valid") } } func TestFreeSpace(t *testing.T) { + ctx := context.Background() cfg := &Config{} cfg.Default() cfg.Type = MetricFreeSpace @@ -56,13 +58,13 @@ func TestFreeSpace(t *testing.T) { if err != nil { t.Fatal(err) } - defer inf.Shutdown() - m := inf.GetMetric() + defer inf.Shutdown(ctx) + m := inf.GetMetric(ctx) if m.Valid { t.Error("metric should be invalid") } inf.SetClient(test.NewMockRPCClient(t)) - m = inf.GetMetric() + m = inf.GetMetric(ctx) if !m.Valid { t.Error("metric should be valid") } @@ -73,6 +75,7 @@ func TestFreeSpace(t *testing.T) { } func TestRepoSize(t *testing.T) { + ctx := context.Background() cfg := &Config{} cfg.Default() cfg.Type = MetricRepoSize @@ -81,13 +84,13 @@ func TestRepoSize(t *testing.T) { if err != nil { t.Fatal(err) } - defer inf.Shutdown() - m := inf.GetMetric() + defer inf.Shutdown(ctx) + m := inf.GetMetric(ctx) if m.Valid { t.Error("metric should be invalid") } inf.SetClient(test.NewMockRPCClient(t)) - m = inf.GetMetric() + m = inf.GetMetric(ctx) if !m.Valid { t.Error("metric should be valid") } @@ -98,15 +101,16 @@ func TestRepoSize(t *testing.T) { } func TestWithErrors(t *testing.T) { + ctx := context.Background() cfg := &Config{} cfg.Default() inf, err := NewInformer(cfg) if err != nil { t.Fatal(err) } - defer inf.Shutdown() + defer inf.Shutdown(ctx) inf.SetClient(badRPCClient(t)) - m := inf.GetMetric() + m := inf.GetMetric(ctx) if m.Valid { t.Errorf("metric should be invalid") } diff --git a/informer/numpin/numpin.go b/informer/numpin/numpin.go index 0e026b3e..82086568 100644 --- a/informer/numpin/numpin.go +++ b/informer/numpin/numpin.go @@ -3,11 +3,13 @@ package numpin import ( + "context" "fmt" rpc "github.com/libp2p/go-libp2p-gorpc" "github.com/ipfs/ipfs-cluster/api" + "go.opencensus.io/trace" ) // MetricName specifies the name of our metric @@ -40,7 +42,10 @@ func (npi *Informer) SetClient(c *rpc.Client) { // Shutdown is called on cluster shutdown. We just invalidate // any metrics from this point. -func (npi *Informer) Shutdown() error { +func (npi *Informer) Shutdown(ctx context.Context) error { + ctx, span := trace.StartSpan(ctx, "informer/numpin/Shutdown") + defer span.End() + npi.rpcClient = nil return nil } @@ -53,7 +58,10 @@ func (npi *Informer) Name() string { // GetMetric contacts the IPFSConnector component and // requests the `pin ls` command. We return the number // of pins in IPFS. -func (npi *Informer) GetMetric() api.Metric { +func (npi *Informer) GetMetric(ctx context.Context) api.Metric { + ctx, span := trace.StartSpan(ctx, "informer/numpin/GetMetric") + defer span.End() + if npi.rpcClient == nil { return api.Metric{ Valid: false, @@ -64,11 +72,14 @@ func (npi *Informer) GetMetric() api.Metric { // make use of the RPC API to obtain information // about the number of pins in IPFS. See RPCAPI docs. - err := npi.rpcClient.Call("", // Local call + err := npi.rpcClient.CallContext( + ctx, + "", // Local call "Cluster", // Service name "IPFSPinLs", // Method name "recursive", // in arg - &pinMap) // out arg + &pinMap, // out arg + ) valid := err == nil diff --git a/informer/numpin/numpin_test.go b/informer/numpin/numpin_test.go index 406a656c..0b8de2af 100644 --- a/informer/numpin/numpin_test.go +++ b/informer/numpin/numpin_test.go @@ -30,18 +30,19 @@ func (mock *mockService) IPFSPinLs(ctx context.Context, in string, out *map[stri } func Test(t *testing.T) { + ctx := context.Background() cfg := &Config{} cfg.Default() inf, err := NewInformer(cfg) if err != nil { t.Fatal(err) } - m := inf.GetMetric() + m := inf.GetMetric(ctx) if m.Valid { t.Error("metric should be invalid") } inf.SetClient(mockRPCClient(t)) - m = inf.GetMetric() + m = inf.GetMetric(ctx) if !m.Valid { t.Error("metric should be valid") } diff --git a/ipfscluster.go b/ipfscluster.go index b73a9e49..7b565c95 100644 --- a/ipfscluster.go +++ b/ipfscluster.go @@ -25,7 +25,7 @@ import ( // (both local and remote), using an instance of rpc.Client. type Component interface { SetClient(*rpc.Client) - Shutdown() error + Shutdown(context.Context) error } // Consensus is a component which keeps a shared state in @@ -37,24 +37,24 @@ type Consensus interface { Component // Returns a channel to signal that the consensus layer is ready // allowing the main component to wait for it during start. - Ready() <-chan struct{} + Ready(context.Context) <-chan struct{} // Logs a pin operation - LogPin(c api.Pin) error + LogPin(ctx context.Context, c api.Pin) error // Logs an unpin operation - LogUnpin(c api.Pin) error - AddPeer(p peer.ID) error - RmPeer(p peer.ID) error - State() (state.State, error) + LogUnpin(ctx context.Context, c api.Pin) error + AddPeer(ctx context.Context, p peer.ID) error + RmPeer(ctx context.Context, p peer.ID) error + State(context.Context) (state.State, error) // Provide a node which is responsible to perform // specific tasks which must only run in 1 cluster peer - Leader() (peer.ID, error) + Leader(context.Context) (peer.ID, error) // Only returns when the consensus state has all log // updates applied to it - WaitForSync() error + WaitForSync(context.Context) error // Clean removes all consensus data - Clean() error + Clean(context.Context) error // Peers returns the peerset participating in the Consensus - Peers() ([]peer.ID, error) + Peers(context.Context) ([]peer.ID, error) } // API is a component which offers an API for Cluster. This is @@ -67,33 +67,33 @@ type API interface { // an IPFS daemon. This is a base component. type IPFSConnector interface { Component - ID() (api.IPFSID, error) + ID(context.Context) (api.IPFSID, error) Pin(context.Context, cid.Cid, int) error Unpin(context.Context, cid.Cid) error PinLsCid(context.Context, cid.Cid) (api.IPFSPinStatus, error) PinLs(ctx context.Context, typeFilter string) (map[string]api.IPFSPinStatus, error) // ConnectSwarms make sure this peer's IPFS daemon is connected to // other peers IPFS daemons. - ConnectSwarms() error + ConnectSwarms(context.Context) error // SwarmPeers returns the IPFS daemon's swarm peers - SwarmPeers() (api.SwarmPeers, error) + SwarmPeers(context.Context) (api.SwarmPeers, error) // ConfigKey returns the value for a configuration key. // Subobjects are reached with keypaths as "Parent/Child/GrandChild...". ConfigKey(keypath string) (interface{}, error) // RepoStat returns the current repository size and max limit as // provided by "repo stat". - RepoStat() (api.IPFSRepoStat, error) + RepoStat(context.Context) (api.IPFSRepoStat, error) // BlockPut directly adds a block of data to the IPFS repo - BlockPut(api.NodeWithMeta) error + BlockPut(context.Context, api.NodeWithMeta) error // BlockGet retrieves the raw data of an IPFS block - BlockGet(cid.Cid) ([]byte, error) + BlockGet(context.Context, cid.Cid) ([]byte, error) } // Peered represents a component which needs to be aware of the peers // in the Cluster and of any changes to the peer set. type Peered interface { - AddPeer(p peer.ID) - RmPeer(p peer.ID) + AddPeer(ctx context.Context, p peer.ID) + RmPeer(ctx context.Context, p peer.ID) //SetPeers(peers []peer.ID) } @@ -104,24 +104,24 @@ type PinTracker interface { Component // Track tells the tracker that a Cid is now under its supervision // The tracker may decide to perform an IPFS pin. - Track(api.Pin) error + Track(context.Context, api.Pin) error // Untrack tells the tracker that a Cid is to be forgotten. The tracker // may perform an IPFS unpin operation. - Untrack(cid.Cid) error + Untrack(context.Context, cid.Cid) error // StatusAll returns the list of pins with their local status. - StatusAll() []api.PinInfo + StatusAll(context.Context) []api.PinInfo // Status returns the local status of a given Cid. - Status(cid.Cid) api.PinInfo + Status(context.Context, cid.Cid) api.PinInfo // SyncAll makes sure that all tracked Cids reflect the real IPFS status. // It returns the list of pins which were updated by the call. - SyncAll() ([]api.PinInfo, error) + SyncAll(context.Context) ([]api.PinInfo, error) // Sync makes sure that the Cid status reflect the real IPFS status. // It returns the local status of the Cid. - Sync(cid.Cid) (api.PinInfo, error) + Sync(context.Context, cid.Cid) (api.PinInfo, error) // RecoverAll calls Recover() for all pins tracked. - RecoverAll() ([]api.PinInfo, error) + RecoverAll(context.Context) ([]api.PinInfo, error) // Recover retriggers a Pin/Unpin operation in a Cids with error status. - Recover(cid.Cid) (api.PinInfo, error) + Recover(context.Context, cid.Cid) (api.PinInfo, error) } // Informer provides Metric information from a peer. The metrics produced by @@ -131,7 +131,7 @@ type PinTracker interface { type Informer interface { Component Name() string - GetMetric() api.Metric + GetMetric(context.Context) api.Metric } // PinAllocator decides where to pin certain content. In order to make such @@ -146,7 +146,7 @@ type PinAllocator interface { // which are currently pinning the content. The candidates map // contains the metrics for all peers which are eligible for pinning // the content. - Allocate(c cid.Cid, current, candidates, priority map[peer.ID]api.Metric) ([]peer.ID, error) + Allocate(ctx context.Context, c cid.Cid, current, candidates, priority map[peer.ID]api.Metric) ([]peer.ID, error) } // PeerMonitor is a component in charge of publishing a peer's metrics and @@ -160,15 +160,21 @@ type PeerMonitor interface { Component // LogMetric stores a metric. It can be used to manually inject // a metric to a monitor. - LogMetric(api.Metric) error + LogMetric(context.Context, api.Metric) error // PublishMetric sends a metric to the rest of the peers. // How to send it, and to who, is to be decided by the implementation. - PublishMetric(api.Metric) error + PublishMetric(context.Context, api.Metric) error // LatestMetrics returns a map with the latest metrics of matching name // for the current cluster peers. - LatestMetrics(name string) []api.Metric + LatestMetrics(ctx context.Context, name string) []api.Metric // Alerts delivers alerts generated when this peer monitor detects // a problem (i.e. metrics not arriving as expected). Alerts can be used // to trigger self-healing measures or re-pinnings of content. Alerts() <-chan api.Alert } + +// Tracer implements Component as a way +// to shutdown and flush and remaining traces. +type Tracer interface { + Component +} diff --git a/ipfscluster_test.go b/ipfscluster_test.go index 42cd324a..4bed31e9 100644 --- a/ipfscluster_test.go +++ b/ipfscluster_test.go @@ -21,6 +21,7 @@ import ( "github.com/ipfs/ipfs-cluster/ipfsconn/ipfshttp" "github.com/ipfs/ipfs-cluster/monitor/basic" "github.com/ipfs/ipfs-cluster/monitor/pubsubmon" + "github.com/ipfs/ipfs-cluster/observations" "github.com/ipfs/ipfs-cluster/pintracker/maptracker" "github.com/ipfs/ipfs-cluster/pintracker/stateless" "github.com/ipfs/ipfs-cluster/state" @@ -122,7 +123,8 @@ func randomBytes() []byte { return bs } -func createComponents(t *testing.T, i int, clusterSecret []byte, staging bool) (host.Host, *Config, *raft.Consensus, []API, IPFSConnector, state.State, PinTracker, PeerMonitor, PinAllocator, Informer, *test.IpfsMock) { +func createComponents(t *testing.T, i int, clusterSecret []byte, staging bool) (host.Host, *Config, *raft.Consensus, []API, IPFSConnector, state.State, PinTracker, PeerMonitor, PinAllocator, Informer, Tracer, *test.IpfsMock) { + ctx := context.Background() mock := test.NewIpfsMock() // //clusterAddr, _ := ma.NewMultiaddr(fmt.Sprintf("/ip4/127.0.0.1/tcp/%d", clusterPort+i)) @@ -141,7 +143,7 @@ func createComponents(t *testing.T, i int, clusterSecret []byte, staging bool) ( checkErr(t, err) peername := fmt.Sprintf("peer_%d", i) - clusterCfg, apiCfg, ipfsproxyCfg, ipfshttpCfg, consensusCfg, maptrackerCfg, statelesstrackerCfg, bmonCfg, psmonCfg, diskInfCfg := testingConfigs() + clusterCfg, apiCfg, ipfsproxyCfg, ipfshttpCfg, consensusCfg, maptrackerCfg, statelesstrackerCfg, bmonCfg, psmonCfg, diskInfCfg, tracingCfg := testingConfigs() clusterCfg.ID = pid clusterCfg.Peername = peername @@ -160,9 +162,9 @@ func createComponents(t *testing.T, i int, clusterSecret []byte, staging bool) ( ipfshttpCfg.NodeAddr = nodeAddr consensusCfg.DataFolder = "./e2eTestRaft/" + pid.Pretty() - api, err := rest.NewAPI(apiCfg) + api, err := rest.NewAPI(ctx, apiCfg) checkErr(t, err) - ipfsProxy, err := rest.NewAPI(apiCfg) + ipfsProxy, err := rest.NewAPI(ctx, apiCfg) checkErr(t, err) ipfs, err := ipfshttp.NewConnector(ipfshttpCfg) @@ -178,7 +180,10 @@ func createComponents(t *testing.T, i int, clusterSecret []byte, staging bool) ( raftCon, err := raft.NewConsensus(host, consensusCfg, state, staging) checkErr(t, err) - return host, clusterCfg, raftCon, []API{api, ipfsProxy}, ipfs, state, tracker, mon, alloc, inf, mock + tracer, err := observations.SetupTracing(tracingCfg) + checkErr(t, err) + + return host, clusterCfg, raftCon, []API{api, ipfsProxy}, ipfs, state, tracker, mon, alloc, inf, tracer, mock } func makeMonitor(t *testing.T, h host.Host, bmonCfg *basic.Config, psmonCfg *pubsubmon.Config) PeerMonitor { @@ -209,20 +214,21 @@ func makePinTracker(t *testing.T, pid peer.ID, mptCfg *maptracker.Config, sptCfg return ptrkr } -func createCluster(t *testing.T, host host.Host, clusterCfg *Config, raftCons *raft.Consensus, apis []API, ipfs IPFSConnector, state state.State, tracker PinTracker, mon PeerMonitor, alloc PinAllocator, inf Informer) *Cluster { - cl, err := NewCluster(host, clusterCfg, raftCons, apis, ipfs, state, tracker, mon, alloc, inf) +func createCluster(t *testing.T, host host.Host, clusterCfg *Config, raftCons *raft.Consensus, apis []API, ipfs IPFSConnector, state state.State, tracker PinTracker, mon PeerMonitor, alloc PinAllocator, inf Informer, tracer Tracer) *Cluster { + cl, err := NewCluster(host, clusterCfg, raftCons, apis, ipfs, state, tracker, mon, alloc, inf, tracer) checkErr(t, err) return cl } func createOnePeerCluster(t *testing.T, nth int, clusterSecret []byte) (*Cluster, *test.IpfsMock) { - host, clusterCfg, consensusCfg, api, ipfs, state, tracker, mon, alloc, inf, mock := createComponents(t, nth, clusterSecret, false) - cl := createCluster(t, host, clusterCfg, consensusCfg, api, ipfs, state, tracker, mon, alloc, inf) + host, clusterCfg, consensusCfg, api, ipfs, state, tracker, mon, alloc, inf, tracer, mock := createComponents(t, nth, clusterSecret, false) + cl := createCluster(t, host, clusterCfg, consensusCfg, api, ipfs, state, tracker, mon, alloc, inf, tracer) <-cl.Ready() return cl, mock } func createClusters(t *testing.T) ([]*Cluster, []*test.IpfsMock) { + ctx := context.Background() os.RemoveAll("./e2eTestRaft") cfgs := make([]*Config, nClusters, nClusters) raftCons := make([]*raft.Consensus, nClusters, nClusters) @@ -233,6 +239,7 @@ func createClusters(t *testing.T) ([]*Cluster, []*test.IpfsMock) { mons := make([]PeerMonitor, nClusters, nClusters) allocs := make([]PinAllocator, nClusters, nClusters) infs := make([]Informer, nClusters, nClusters) + tracers := make([]Tracer, nClusters, nClusters) ipfsMocks := make([]*test.IpfsMock, nClusters, nClusters) hosts := make([]host.Host, nClusters, nClusters) @@ -243,7 +250,7 @@ func createClusters(t *testing.T) ([]*Cluster, []*test.IpfsMock) { for i := 0; i < nClusters; i++ { // staging = true for all except first (i==0) - hosts[i], cfgs[i], raftCons[i], apis[i], ipfss[i], states[i], trackers[i], mons[i], allocs[i], infs[i], ipfsMocks[i] = createComponents(t, i, testingClusterSecret, i != 0) + hosts[i], cfgs[i], raftCons[i], apis[i], ipfss[i], states[i], trackers[i], mons[i], allocs[i], infs[i], tracers[i], ipfsMocks[i] = createComponents(t, i, testingClusterSecret, i != 0) } // open connections among all hosts @@ -261,14 +268,14 @@ func createClusters(t *testing.T) ([]*Cluster, []*test.IpfsMock) { } // Start first node - clusters[0] = createCluster(t, hosts[0], cfgs[0], raftCons[0], apis[0], ipfss[0], states[0], trackers[0], mons[0], allocs[0], infs[0]) + clusters[0] = createCluster(t, hosts[0], cfgs[0], raftCons[0], apis[0], ipfss[0], states[0], trackers[0], mons[0], allocs[0], infs[0], tracers[0]) <-clusters[0].Ready() bootstrapAddr := clusterAddr(clusters[0]) // Start the rest and join for i := 1; i < nClusters; i++ { - clusters[i] = createCluster(t, hosts[i], cfgs[i], raftCons[i], apis[i], ipfss[i], states[i], trackers[i], mons[i], allocs[i], infs[i]) - err := clusters[i].Join(bootstrapAddr) + clusters[i] = createCluster(t, hosts[i], cfgs[i], raftCons[i], apis[i], ipfss[i], states[i], trackers[i], mons[i], allocs[i], infs[i], tracers[i]) + err := clusters[i].Join(ctx, bootstrapAddr) if err != nil { logger.Error(err) t.Fatal(err) @@ -281,8 +288,9 @@ func createClusters(t *testing.T) ([]*Cluster, []*test.IpfsMock) { } func shutdownClusters(t *testing.T, clusters []*Cluster, m []*test.IpfsMock) { + ctx := context.Background() for i, c := range clusters { - err := c.Shutdown() + err := c.Shutdown(ctx) if err != nil { t.Error(err) } @@ -349,6 +357,7 @@ func waitForLeaderAndMetrics(t *testing.T, clusters []*Cluster) { // Makes sure there is a leader and everyone knows about it. func waitForLeader(t *testing.T, clusters []*Cluster) { + ctx := context.Background() timer := time.NewTimer(time.Minute) ticker := time.NewTicker(100 * time.Millisecond) @@ -362,7 +371,7 @@ loop: if cl.shutdownB { continue // skip shutdown clusters } - _, err := cl.consensus.Leader() + _, err := cl.consensus.Leader(ctx) if err != nil { continue loop } @@ -387,11 +396,12 @@ func TestClustersVersion(t *testing.T) { } func TestClustersPeers(t *testing.T) { + ctx := context.Background() clusters, mock := createClusters(t) defer shutdownClusters(t, clusters, mock) j := rand.Intn(nClusters) // choose a random cluster peer - peers := clusters[j].Peers() + peers := clusters[j].Peers(ctx) if len(peers) != nClusters { t.Fatal("expected as many peers as clusters") @@ -401,7 +411,7 @@ func TestClustersPeers(t *testing.T) { peerIDMap := make(map[peer.ID]api.ID) for _, c := range clusters { - id := c.ID() + id := c.ID(ctx) clusterIDMap[id.ID] = id } @@ -424,6 +434,7 @@ func TestClustersPeers(t *testing.T) { } func TestClustersPin(t *testing.T) { + ctx := context.Background() clusters, mock := createClusters(t) defer shutdownClusters(t, clusters, mock) exampleCid, _ := cid.Decode(test.TestCid1) @@ -435,19 +446,19 @@ func TestClustersPin(t *testing.T) { j := rand.Intn(nClusters) // choose a random cluster peer h, err := prefix.Sum(randomBytes()) // create random cid checkErr(t, err) - err = clusters[j].Pin(api.PinCid(h)) + err = clusters[j].Pin(ctx, api.PinCid(h)) if err != nil { t.Errorf("error pinning %s: %s", h, err) } // Test re-pin - err = clusters[j].Pin(api.PinCid(h)) + err = clusters[j].Pin(ctx, api.PinCid(h)) if err != nil { t.Errorf("error repinning %s: %s", h, err) } } delay() fpinned := func(t *testing.T, c *Cluster) { - status := c.tracker.StatusAll() + status := c.tracker.StatusAll(ctx) for _, v := range status { if v.Status != api.TrackerStatusPinned { t.Errorf("%s should have been pinned but it is %s", v.Cid, v.Status) @@ -460,12 +471,12 @@ func TestClustersPin(t *testing.T) { runF(t, clusters, fpinned) // Unpin everything - pinList := clusters[0].Pins() + pinList := clusters[0].Pins(ctx) for i := 0; i < len(pinList); i++ { // test re-unpin fails j := rand.Intn(nClusters) // choose a random cluster peer - err := clusters[j].Unpin(pinList[i].Cid) + err := clusters[j].Unpin(ctx, pinList[i].Cid) if err != nil { t.Errorf("error unpinning %s: %s", pinList[i].Cid, err) } @@ -473,7 +484,7 @@ func TestClustersPin(t *testing.T) { delay() for i := 0; i < nPins; i++ { j := rand.Intn(nClusters) // choose a random cluster peer - err := clusters[j].Unpin(pinList[i].Cid) + err := clusters[j].Unpin(ctx, pinList[i].Cid) if err == nil { t.Errorf("expected error re-unpinning %s: %s", pinList[i].Cid, err) } @@ -481,7 +492,7 @@ func TestClustersPin(t *testing.T) { delay() funpinned := func(t *testing.T, c *Cluster) { - status := c.tracker.StatusAll() + status := c.tracker.StatusAll(ctx) for _, v := range status { t.Errorf("%s should have been unpinned but it is %s", v.Cid, v.Status) } @@ -490,14 +501,15 @@ func TestClustersPin(t *testing.T) { } func TestClustersStatusAll(t *testing.T) { + ctx := context.Background() clusters, mock := createClusters(t) defer shutdownClusters(t, clusters, mock) h, _ := cid.Decode(test.TestCid1) - clusters[0].Pin(api.PinCid(h)) + clusters[0].Pin(ctx, api.PinCid(h)) pinDelay() // Global status f := func(t *testing.T, c *Cluster) { - statuses, err := c.StatusAll() + statuses, err := c.StatusAll(ctx) if err != nil { t.Error(err) } @@ -516,7 +528,7 @@ func TestClustersStatusAll(t *testing.T) { t.Error("the hash should have been pinned") } - status, err := c.Status(h) + status, err := c.Status(ctx, h) if err != nil { t.Error(err) } @@ -534,23 +546,24 @@ func TestClustersStatusAll(t *testing.T) { } func TestClustersStatusAllWithErrors(t *testing.T) { + ctx := context.Background() clusters, mock := createClusters(t) defer shutdownClusters(t, clusters, mock) h, _ := cid.Decode(test.TestCid1) - clusters[0].Pin(api.PinCid(h)) + clusters[0].Pin(ctx, api.PinCid(h)) pinDelay() // shutdown 1 cluster peer - clusters[1].Shutdown() + clusters[1].Shutdown(ctx) delay() f := func(t *testing.T, c *Cluster) { // skip if it's the shutdown peer - if c.ID().ID == clusters[1].ID().ID { + if c.ID(ctx).ID == clusters[1].ID(ctx).ID { return } - statuses, err := c.StatusAll() + statuses, err := c.StatusAll(ctx) if err != nil { t.Error(err) } @@ -563,7 +576,7 @@ func TestClustersStatusAllWithErrors(t *testing.T) { t.Error("bad number of peers in status") } - errst := stts.PeerMap[clusters[1].ID().ID] + errst := stts.PeerMap[clusters[1].ID(ctx).ID] if errst.Cid.String() != test.TestCid1 { t.Error("errored pinInfo should have a good cid") @@ -574,12 +587,12 @@ func TestClustersStatusAllWithErrors(t *testing.T) { } // now check with Cid status - status, err := c.Status(h) + status, err := c.Status(ctx, h) if err != nil { t.Error(err) } - pinfo := status.PeerMap[clusters[1].ID().ID] + pinfo := status.PeerMap[clusters[1].ID(ctx).ID] if pinfo.Status != api.TrackerStatusClusterError { t.Error("erroring status should be ClusterError") @@ -594,18 +607,19 @@ func TestClustersStatusAllWithErrors(t *testing.T) { } func TestClustersSyncAllLocal(t *testing.T) { + ctx := context.Background() clusters, mock := createClusters(t) defer shutdownClusters(t, clusters, mock) h, _ := cid.Decode(test.ErrorCid) // This cid always fails h2, _ := cid.Decode(test.TestCid2) - clusters[0].Pin(api.PinCid(h)) - clusters[0].Pin(api.PinCid(h2)) + clusters[0].Pin(ctx, api.PinCid(h)) + clusters[0].Pin(ctx, api.PinCid(h2)) pinDelay() pinDelay() f := func(t *testing.T, c *Cluster) { // Sync bad ID - infos, err := c.SyncAllLocal() + infos, err := c.SyncAllLocal(ctx) if err != nil { // LocalSync() is asynchronous and should not show an // error even if Recover() fails. @@ -624,17 +638,18 @@ func TestClustersSyncAllLocal(t *testing.T) { } func TestClustersSyncLocal(t *testing.T) { + ctx := context.Background() clusters, mock := createClusters(t) defer shutdownClusters(t, clusters, mock) h, _ := cid.Decode(test.ErrorCid) // This cid always fails h2, _ := cid.Decode(test.TestCid2) - clusters[0].Pin(api.PinCid(h)) - clusters[0].Pin(api.PinCid(h2)) + clusters[0].Pin(ctx, api.PinCid(h)) + clusters[0].Pin(ctx, api.PinCid(h2)) pinDelay() pinDelay() f := func(t *testing.T, c *Cluster) { - info, err := c.SyncLocal(h) + info, err := c.SyncLocal(ctx, h) if err != nil { t.Error(err) } @@ -643,7 +658,7 @@ func TestClustersSyncLocal(t *testing.T) { } // Sync good ID - info, err = c.SyncLocal(h2) + info, err = c.SyncLocal(ctx, h2) if err != nil { t.Error(err) } @@ -656,17 +671,18 @@ func TestClustersSyncLocal(t *testing.T) { } func TestClustersSyncAll(t *testing.T) { + ctx := context.Background() clusters, mock := createClusters(t) defer shutdownClusters(t, clusters, mock) h, _ := cid.Decode(test.ErrorCid) // This cid always fails h2, _ := cid.Decode(test.TestCid2) - clusters[0].Pin(api.PinCid(h)) - clusters[0].Pin(api.PinCid(h2)) + clusters[0].Pin(ctx, api.PinCid(h)) + clusters[0].Pin(ctx, api.PinCid(h2)) pinDelay() pinDelay() j := rand.Intn(nClusters) // choose a random cluster peer - ginfos, err := clusters[j].SyncAll() + ginfos, err := clusters[j].SyncAll(ctx) if err != nil { t.Fatal(err) } @@ -688,17 +704,18 @@ func TestClustersSyncAll(t *testing.T) { } func TestClustersSync(t *testing.T) { + ctx := context.Background() clusters, mock := createClusters(t) defer shutdownClusters(t, clusters, mock) h, _ := cid.Decode(test.ErrorCid) // This cid always fails h2, _ := cid.Decode(test.TestCid2) - clusters[0].Pin(api.PinCid(h)) - clusters[0].Pin(api.PinCid(h2)) + clusters[0].Pin(ctx, api.PinCid(h)) + clusters[0].Pin(ctx, api.PinCid(h2)) pinDelay() pinDelay() j := rand.Intn(nClusters) - ginfo, err := clusters[j].Sync(h) + ginfo, err := clusters[j].Sync(ctx, h) if err != nil { // we always attempt to return a valid response // with errors contained in GlobalPinInfo @@ -730,7 +747,7 @@ func TestClustersSync(t *testing.T) { // Test with a good Cid j = rand.Intn(nClusters) - ginfo, err = clusters[j].Sync(h2) + ginfo, err = clusters[j].Sync(ctx, h2) if err != nil { t.Fatal(err) } @@ -750,6 +767,7 @@ func TestClustersSync(t *testing.T) { } func TestClustersRecoverLocal(t *testing.T) { + ctx := context.Background() clusters, mock := createClusters(t) defer shutdownClusters(t, clusters, mock) h, _ := cid.Decode(test.ErrorCid) // This cid always fails @@ -757,26 +775,26 @@ func TestClustersRecoverLocal(t *testing.T) { ttlDelay() - clusters[0].Pin(api.PinCid(h)) - clusters[0].Pin(api.PinCid(h2)) + clusters[0].Pin(ctx, api.PinCid(h)) + clusters[0].Pin(ctx, api.PinCid(h2)) pinDelay() pinDelay() f := func(t *testing.T, c *Cluster) { - info, err := c.RecoverLocal(h) + info, err := c.RecoverLocal(ctx, h) if err != nil { t.Fatal(err) } // Wait for queue to be processed delay() - info = c.StatusLocal(h) + info = c.StatusLocal(ctx, h) if info.Status != api.TrackerStatusPinError { t.Errorf("element is %s and not PinError", info.Status) } // Recover good ID - info, err = c.SyncLocal(h2) + info, err = c.SyncLocal(ctx, h2) if err != nil { t.Error(err) } @@ -789,6 +807,7 @@ func TestClustersRecoverLocal(t *testing.T) { } func TestClustersRecover(t *testing.T) { + ctx := context.Background() clusters, mock := createClusters(t) defer shutdownClusters(t, clusters, mock) h, _ := cid.Decode(test.ErrorCid) // This cid always fails @@ -796,14 +815,14 @@ func TestClustersRecover(t *testing.T) { ttlDelay() - clusters[0].Pin(api.PinCid(h)) - clusters[0].Pin(api.PinCid(h2)) + clusters[0].Pin(ctx, api.PinCid(h)) + clusters[0].Pin(ctx, api.PinCid(h2)) pinDelay() pinDelay() j := rand.Intn(nClusters) - _, err := clusters[j].Recover(h) + _, err := clusters[j].Recover(ctx, h) if err != nil { // we always attempt to return a valid response // with errors contained in GlobalPinInfo @@ -813,7 +832,7 @@ func TestClustersRecover(t *testing.T) { // Wait for queue to be processed delay() - ginfo, err := clusters[j].Status(h) + ginfo, err := clusters[j].Status(ctx, h) if err != nil { t.Fatal(err) } @@ -840,7 +859,7 @@ func TestClustersRecover(t *testing.T) { // Test with a good Cid j = rand.Intn(nClusters) - ginfo, err = clusters[j].Recover(h2) + ginfo, err = clusters[j].Recover(ctx, h2) if err != nil { t.Fatal(err) } @@ -860,11 +879,12 @@ func TestClustersRecover(t *testing.T) { } func TestClustersShutdown(t *testing.T) { + ctx := context.Background() clusters, mock := createClusters(t) defer shutdownClusters(t, clusters, mock) f := func(t *testing.T, c *Cluster) { - err := c.Shutdown() + err := c.Shutdown(ctx) if err != nil { t.Error("should be able to shutdown cleanly") } @@ -876,6 +896,7 @@ func TestClustersShutdown(t *testing.T) { } func TestClustersReplication(t *testing.T) { + ctx := context.Background() clusters, mock := createClusters(t) defer shutdownClusters(t, clusters, mock) for _, c := range clusters { @@ -899,14 +920,14 @@ func TestClustersReplication(t *testing.T) { j := rand.Intn(nClusters) // choose a random cluster peer h, err := prefix.Sum(randomBytes()) // create random cid checkErr(t, err) - err = clusters[j].Pin(api.PinCid(h)) + err = clusters[j].Pin(ctx, api.PinCid(h)) if err != nil { t.Error(err) } pinDelay() // check that it is held by exactly nClusters -1 peers - gpi, err := clusters[j].Status(h) + gpi, err := clusters[j].Status(ctx, h) if err != nil { t.Fatal(err) } @@ -932,7 +953,7 @@ func TestClustersReplication(t *testing.T) { } f := func(t *testing.T, c *Cluster) { - pinfos := c.tracker.StatusAll() + pinfos := c.tracker.StatusAll(ctx) if len(pinfos) != nClusters { t.Error("Pinfos does not have the expected pins") } @@ -956,7 +977,7 @@ func TestClustersReplication(t *testing.T) { t.Errorf("Expected 1 remote pin but got %d", numRemote) } - pins := c.Pins() + pins := c.Pins(ctx) for _, pin := range pins { allocs := pin.Allocations if len(allocs) != nClusters-1 { @@ -964,7 +985,7 @@ func TestClustersReplication(t *testing.T) { } for _, a := range allocs { if a == c.id { - pinfo := c.tracker.Status(pin.Cid) + pinfo := c.tracker.Status(ctx, pin.Cid) if pinfo.Status != api.TrackerStatusPinned { t.Errorf("Peer %s was allocated but it is not pinning cid", c.id) } @@ -979,6 +1000,7 @@ func TestClustersReplication(t *testing.T) { // This test checks that we pin with ReplicationFactorMax when // we can func TestClustersReplicationFactorMax(t *testing.T) { + ctx := context.Background() if nClusters < 3 { t.Skip("Need at least 3 peers") } @@ -993,7 +1015,7 @@ func TestClustersReplicationFactorMax(t *testing.T) { ttlDelay() h, _ := cid.Decode(test.TestCid1) - err := clusters[0].Pin(api.PinCid(h)) + err := clusters[0].Pin(ctx, api.PinCid(h)) if err != nil { t.Fatal(err) } @@ -1001,7 +1023,7 @@ func TestClustersReplicationFactorMax(t *testing.T) { pinDelay() f := func(t *testing.T, c *Cluster) { - p, err := c.PinGet(h) + p, err := c.PinGet(ctx, h) if err != nil { t.Fatal(err) } @@ -1024,6 +1046,7 @@ func TestClustersReplicationFactorMax(t *testing.T) { // This tests checks that repinning something that is overpinned // removes some allocations func TestClustersReplicationFactorMaxLower(t *testing.T) { + ctx := context.Background() if nClusters < 5 { t.Skip("Need at least 5 peers") } @@ -1038,14 +1061,14 @@ func TestClustersReplicationFactorMaxLower(t *testing.T) { ttlDelay() // make sure we have places to pin h, _ := cid.Decode(test.TestCid1) - err := clusters[0].Pin(api.PinCid(h)) + err := clusters[0].Pin(ctx, api.PinCid(h)) if err != nil { t.Fatal(err) } pinDelay() - p1, err := clusters[0].PinGet(h) + p1, err := clusters[0].PinGet(ctx, h) if err != nil { t.Fatal(err) } @@ -1057,14 +1080,14 @@ func TestClustersReplicationFactorMaxLower(t *testing.T) { pin := api.PinCid(h) pin.ReplicationFactorMin = 1 pin.ReplicationFactorMax = 2 - err = clusters[0].Pin(pin) + err = clusters[0].Pin(ctx, pin) if err != nil { t.Fatal(err) } pinDelay() - p2, err := clusters[0].PinGet(h) + p2, err := clusters[0].PinGet(ctx, h) if err != nil { t.Fatal(err) } @@ -1077,6 +1100,7 @@ func TestClustersReplicationFactorMaxLower(t *testing.T) { // This test checks that when not all nodes are available, // we pin in as many as we can aiming for ReplicationFactorMax func TestClustersReplicationFactorInBetween(t *testing.T) { + ctx := context.Background() if nClusters < 5 { t.Skip("Need at least 5 peers") } @@ -1091,13 +1115,13 @@ func TestClustersReplicationFactorInBetween(t *testing.T) { ttlDelay() // Shutdown two peers - clusters[nClusters-1].Shutdown() - clusters[nClusters-2].Shutdown() + clusters[nClusters-1].Shutdown(ctx) + clusters[nClusters-2].Shutdown(ctx) waitForLeaderAndMetrics(t, clusters) h, _ := cid.Decode(test.TestCid1) - err := clusters[0].Pin(api.PinCid(h)) + err := clusters[0].Pin(ctx, api.PinCid(h)) if err != nil { t.Fatal(err) } @@ -1108,7 +1132,7 @@ func TestClustersReplicationFactorInBetween(t *testing.T) { if c == clusters[nClusters-1] || c == clusters[nClusters-2] { return } - p, err := c.PinGet(h) + p, err := c.PinGet(ctx, h) if err != nil { t.Fatal(err) } @@ -1131,6 +1155,7 @@ func TestClustersReplicationFactorInBetween(t *testing.T) { // This test checks that we do not pin something for which // we cannot reach ReplicationFactorMin func TestClustersReplicationFactorMin(t *testing.T) { + ctx := context.Background() if nClusters < 5 { t.Skip("Need at least 5 peers") } @@ -1143,13 +1168,13 @@ func TestClustersReplicationFactorMin(t *testing.T) { } // Shutdown two peers - clusters[nClusters-1].Shutdown() + clusters[nClusters-1].Shutdown(ctx) waitForLeaderAndMetrics(t, clusters) - clusters[nClusters-2].Shutdown() + clusters[nClusters-2].Shutdown(ctx) waitForLeaderAndMetrics(t, clusters) h, _ := cid.Decode(test.TestCid1) - err := clusters[0].Pin(api.PinCid(h)) + err := clusters[0].Pin(ctx, api.PinCid(h)) if err == nil { t.Error("Pin should have failed as rplMin cannot be satisfied") } @@ -1162,6 +1187,7 @@ func TestClustersReplicationFactorMin(t *testing.T) { // This tests checks that repinning something that has becomed // underpinned actually changes nothing if it's sufficiently pinned func TestClustersReplicationMinMaxNoRealloc(t *testing.T) { + ctx := context.Background() if nClusters < 5 { t.Skip("Need at least 5 peers") } @@ -1176,7 +1202,7 @@ func TestClustersReplicationMinMaxNoRealloc(t *testing.T) { ttlDelay() h, _ := cid.Decode(test.TestCid1) - err := clusters[0].Pin(api.PinCid(h)) + err := clusters[0].Pin(ctx, api.PinCid(h)) if err != nil { t.Fatal(err) } @@ -1184,19 +1210,19 @@ func TestClustersReplicationMinMaxNoRealloc(t *testing.T) { pinDelay() // Shutdown two peers - clusters[nClusters-1].Shutdown() + clusters[nClusters-1].Shutdown(ctx) waitForLeaderAndMetrics(t, clusters) - clusters[nClusters-2].Shutdown() + clusters[nClusters-2].Shutdown(ctx) waitForLeaderAndMetrics(t, clusters) - err = clusters[0].Pin(api.PinCid(h)) + err = clusters[0].Pin(ctx, api.PinCid(h)) if err != nil { t.Fatal(err) } pinDelay() - p, err := clusters[0].PinGet(h) + p, err := clusters[0].PinGet(ctx, h) if err != nil { t.Fatal(err) } @@ -1214,6 +1240,7 @@ func TestClustersReplicationMinMaxNoRealloc(t *testing.T) { // underpinned does re-allocations when it's not sufficiently // pinned anymore func TestClustersReplicationMinMaxRealloc(t *testing.T) { + ctx := context.Background() if nClusters < 5 { t.Skip("Need at least 5 peers") } @@ -1228,14 +1255,14 @@ func TestClustersReplicationMinMaxRealloc(t *testing.T) { ttlDelay() // make sure metrics are in h, _ := cid.Decode(test.TestCid1) - err := clusters[0].Pin(api.PinCid(h)) + err := clusters[0].Pin(ctx, api.PinCid(h)) if err != nil { t.Fatal(err) } pinDelay() - p, err := clusters[0].PinGet(h) + p, err := clusters[0].PinGet(ctx, h) if err != nil { t.Fatal(err) } @@ -1252,20 +1279,20 @@ func TestClustersReplicationMinMaxRealloc(t *testing.T) { alloc2 := peerIDMap[firstAllocations[1]] safePeer := peerIDMap[firstAllocations[2]] - alloc1.Shutdown() - alloc2.Shutdown() + alloc1.Shutdown(ctx) + alloc2.Shutdown(ctx) waitForLeaderAndMetrics(t, clusters) // Repin - (although this might have been taken of if there was an alert - err = safePeer.Pin(api.PinCid(h)) + err = safePeer.Pin(ctx, api.PinCid(h)) if err != nil { t.Fatal(err) } pinDelay() - p, err = safePeer.PinGet(h) + p, err = safePeer.PinGet(ctx, h) if err != nil { t.Fatal(err) } @@ -1297,6 +1324,7 @@ func TestClustersReplicationMinMaxRealloc(t *testing.T) { // In this test we check that repinning something // when a node has gone down will re-assign the pin func TestClustersReplicationRealloc(t *testing.T) { + ctx := context.Background() clusters, mock := createClusters(t) defer shutdownClusters(t, clusters, mock) for _, c := range clusters { @@ -1308,7 +1336,7 @@ func TestClustersReplicationRealloc(t *testing.T) { j := rand.Intn(nClusters) h, _ := cid.Decode(test.TestCid1) - err := clusters[j].Pin(api.PinCid(h)) + err := clusters[j].Pin(ctx, api.PinCid(h)) if err != nil { t.Fatal(err) } @@ -1316,7 +1344,7 @@ func TestClustersReplicationRealloc(t *testing.T) { // Let the pin arrive pinDelay() - pin := clusters[j].Pins()[0] + pin := clusters[j].Pins(ctx)[0] pinSerial := pin.ToSerial() allocs := sort.StringSlice(pinSerial.Allocations) allocs.Sort() @@ -1324,14 +1352,14 @@ func TestClustersReplicationRealloc(t *testing.T) { // Re-pin should work and be allocated to the same // nodes - err = clusters[j].Pin(api.PinCid(h)) + err = clusters[j].Pin(ctx, api.PinCid(h)) if err != nil { t.Fatal(err) } pinDelay() - pin2 := clusters[j].Pins()[0] + pin2 := clusters[j].Pins(ctx)[0] pinSerial2 := pin2.ToSerial() allocs2 := sort.StringSlice(pinSerial2.Allocations) allocs2.Sort() @@ -1345,12 +1373,12 @@ func TestClustersReplicationRealloc(t *testing.T) { var killedClusterIndex int // find someone that pinned it and kill that cluster for i, c := range clusters { - pinfo := c.tracker.Status(h) + pinfo := c.tracker.Status(ctx, h) if pinfo.Status == api.TrackerStatusPinned { //t.Logf("Killing %s", c.id.Pretty()) killedClusterIndex = i - t.Logf("Shutting down %s", c.ID().ID) - c.Shutdown() + t.Logf("Shutting down %s", c.ID(ctx).ID) + c.Shutdown(ctx) break } } @@ -1366,7 +1394,7 @@ func TestClustersReplicationRealloc(t *testing.T) { } // now pin should succeed - err = clusters[j].Pin(api.PinCid(h)) + err = clusters[j].Pin(ctx, api.PinCid(h)) if err != nil { t.Fatal(err) } @@ -1378,7 +1406,7 @@ func TestClustersReplicationRealloc(t *testing.T) { if i == killedClusterIndex { continue } - pinfo := c.tracker.Status(h) + pinfo := c.tracker.Status(ctx, h) if pinfo.Status == api.TrackerStatusPinned { //t.Log(pinfo.Peer.Pretty()) numPinned++ @@ -1394,6 +1422,7 @@ func TestClustersReplicationRealloc(t *testing.T) { // as many available peers a we need. It's like before, except // more peers are killed. func TestClustersReplicationNotEnoughPeers(t *testing.T) { + ctx := context.Background() if nClusters < 5 { t.Skip("Need at least 5 peers") } @@ -1406,7 +1435,7 @@ func TestClustersReplicationNotEnoughPeers(t *testing.T) { j := rand.Intn(nClusters) h, _ := cid.Decode(test.TestCid1) - err := clusters[j].Pin(api.PinCid(h)) + err := clusters[j].Pin(ctx, api.PinCid(h)) if err != nil { t.Fatal(err) } @@ -1414,12 +1443,12 @@ func TestClustersReplicationNotEnoughPeers(t *testing.T) { // Let the pin arrive pinDelay() - clusters[0].Shutdown() - clusters[1].Shutdown() + clusters[0].Shutdown(ctx) + clusters[1].Shutdown(ctx) waitForLeaderAndMetrics(t, clusters) - err = clusters[2].Pin(api.PinCid(h)) + err = clusters[2].Pin(ctx, api.PinCid(h)) if err == nil { t.Fatal("expected an error") } @@ -1431,6 +1460,7 @@ func TestClustersReplicationNotEnoughPeers(t *testing.T) { } func TestClustersRebalanceOnPeerDown(t *testing.T) { + ctx := context.Background() if nClusters < 5 { t.Skip("Need at least 5 peers") } @@ -1444,7 +1474,7 @@ func TestClustersRebalanceOnPeerDown(t *testing.T) { // pin something h, _ := cid.Decode(test.TestCid1) - clusters[0].Pin(api.PinCid(h)) + clusters[0].Pin(ctx, api.PinCid(h)) pinDelay() pinLocal := 0 pinRemote := 0 @@ -1452,7 +1482,7 @@ func TestClustersRebalanceOnPeerDown(t *testing.T) { var remotePinner peer.ID var remotePinnerCluster *Cluster - status, _ := clusters[0].Status(h) + status, _ := clusters[0].Status(ctx, h) // check it was correctly pinned for p, pinfo := range status.PeerMap { @@ -1472,7 +1502,7 @@ func TestClustersRebalanceOnPeerDown(t *testing.T) { // kill the local pinner for _, c := range clusters { if c.id == localPinner { - c.Shutdown() + c.Shutdown(ctx) } else if c.id == remotePinner { remotePinnerCluster = c } @@ -1482,7 +1512,7 @@ func TestClustersRebalanceOnPeerDown(t *testing.T) { waitForLeaderAndMetrics(t, clusters) // in case we killed the leader // It should be now pinned in the remote pinner - if s := remotePinnerCluster.tracker.Status(h).Status; s != api.TrackerStatusPinned { + if s := remotePinnerCluster.tracker.Status(ctx, h).Status; s != api.TrackerStatusPinned { t.Errorf("it should be pinned and is %s", s) } } @@ -1558,6 +1588,7 @@ func validateClusterGraph(t *testing.T, graph api.ConnectGraph, clusterIDs map[p // In this test we get a cluster graph report from a random peer in a healthy // fully connected cluster and verify that it is formed as expected. func TestClustersGraphConnected(t *testing.T) { + ctx := context.Background() clusters, mock := createClusters(t) defer shutdownClusters(t, clusters, mock) @@ -1569,7 +1600,7 @@ func TestClustersGraphConnected(t *testing.T) { clusterIDs := make(map[peer.ID]struct{}) for _, c := range clusters { - id := c.ID().ID + id := c.ID(ctx).ID clusterIDs[id] = struct{}{} } validateClusterGraph(t, graph, clusterIDs) @@ -1579,6 +1610,7 @@ func TestClustersGraphConnected(t *testing.T) { // However now 2 peers have been shutdown and so we do not expect to see // them in the graph func TestClustersGraphUnhealthy(t *testing.T) { + ctx := context.Background() clusters, mock := createClusters(t) defer shutdownClusters(t, clusters, mock) if nClusters < 5 { @@ -1600,8 +1632,8 @@ func TestClustersGraphUnhealthy(t *testing.T) { } } - clusters[discon1].Shutdown() - clusters[discon2].Shutdown() + clusters[discon1].Shutdown(ctx) + clusters[discon2].Shutdown(ctx) waitForLeaderAndMetrics(t, clusters) @@ -1615,7 +1647,7 @@ func TestClustersGraphUnhealthy(t *testing.T) { if i == discon1 || i == discon2 { continue } - id := c.ID().ID + id := c.ID(ctx).ID clusterIDs[id] = struct{}{} } validateClusterGraph(t, graph, clusterIDs) @@ -1624,6 +1656,7 @@ func TestClustersGraphUnhealthy(t *testing.T) { // Check that the pin is not re-assigned when a node // that has disabled repinning goes down. func TestClustersDisabledRepinning(t *testing.T) { + ctx := context.Background() clusters, mock := createClusters(t) defer shutdownClusters(t, clusters, mock) for _, c := range clusters { @@ -1636,7 +1669,7 @@ func TestClustersDisabledRepinning(t *testing.T) { j := rand.Intn(nClusters) h, _ := cid.Decode(test.TestCid1) - err := clusters[j].Pin(api.PinCid(h)) + err := clusters[j].Pin(ctx, api.PinCid(h)) if err != nil { t.Fatal(err) } @@ -1647,11 +1680,11 @@ func TestClustersDisabledRepinning(t *testing.T) { var killedClusterIndex int // find someone that pinned it and kill that cluster for i, c := range clusters { - pinfo := c.tracker.Status(h) + pinfo := c.tracker.Status(ctx, h) if pinfo.Status == api.TrackerStatusPinned { killedClusterIndex = i - t.Logf("Shutting down %s", c.ID().ID) - c.Shutdown() + t.Logf("Shutting down %s", c.ID(ctx).ID) + c.Shutdown(ctx) break } } @@ -1671,7 +1704,7 @@ func TestClustersDisabledRepinning(t *testing.T) { if i == killedClusterIndex { continue } - pinfo := c.tracker.Status(h) + pinfo := c.tracker.Status(ctx, h) if pinfo.Status == api.TrackerStatusPinned { //t.Log(pinfo.Peer.Pretty()) numPinned++ diff --git a/ipfsconn/ipfshttp/config.go b/ipfsconn/ipfshttp/config.go index 1edd398f..abc0d416 100644 --- a/ipfsconn/ipfshttp/config.go +++ b/ipfsconn/ipfshttp/config.go @@ -49,6 +49,9 @@ type Config struct { // Unpin Operation timeout UnpinTimeout time.Duration + + // Tracing flag used to skip tracing specific paths when not enabled. + Tracing bool } type jsonConfig struct { diff --git a/ipfsconn/ipfshttp/ipfshttp.go b/ipfsconn/ipfshttp/ipfshttp.go index b6156eb3..1fd9dab7 100644 --- a/ipfsconn/ipfshttp/ipfshttp.go +++ b/ipfsconn/ipfshttp/ipfshttp.go @@ -15,6 +15,12 @@ import ( "time" "github.com/ipfs/ipfs-cluster/api" + "github.com/ipfs/ipfs-cluster/observations" + + "go.opencensus.io/plugin/ochttp" + "go.opencensus.io/plugin/ochttp/propagation/tracecontext" + "go.opencensus.io/stats" + "go.opencensus.io/trace" cid "github.com/ipfs/go-cid" files "github.com/ipfs/go-ipfs-files" @@ -115,6 +121,15 @@ func NewConnector(cfg *Config) (*Connector, error) { } c := &http.Client{} // timeouts are handled by context timeouts + if cfg.Tracing { + c.Transport = &ochttp.Transport{ + Base: http.DefaultTransport, + Propagation: &tracecontext.HTTPFormat{}, + StartOptions: trace.StartOptions{SpanKind: trace.SpanKindClient}, + FormatSpanName: func(req *http.Request) string { return req.Host + ":" + req.URL.Path + ":" + req.Method }, + NewClientTrace: ochttp.NewSpanAnnotatingClientTrace, + } + } ctx, cancel := context.WithCancel(context.Background()) @@ -155,7 +170,7 @@ func (ipfs *Connector) run() { case <-tmr.C: // do not hang this goroutine if this call hangs // otherwise we hang during shutdown - go ipfs.ConnectSwarms() + go ipfs.ConnectSwarms(ipfs.ctx) case <-ipfs.ctx.Done(): return } @@ -171,7 +186,10 @@ func (ipfs *Connector) SetClient(c *rpc.Client) { // Shutdown stops any listeners and stops the component from taking // any requests. -func (ipfs *Connector) Shutdown() error { +func (ipfs *Connector) Shutdown(ctx context.Context) error { + ctx, span := trace.StartSpan(ctx, "ipfsconn/ipfshttp/Shutdown") + defer span.End() + ipfs.shutdownLock.Lock() defer ipfs.shutdownLock.Unlock() @@ -196,8 +214,11 @@ func (ipfs *Connector) Shutdown() error { // If the request fails, or the parsing fails, it // returns an error and an empty IPFSID which also // contains the error message. -func (ipfs *Connector) ID() (api.IPFSID, error) { - ctx, cancel := context.WithTimeout(ipfs.ctx, ipfs.config.IPFSRequestTimeout) +func (ipfs *Connector) ID(ctx context.Context) (api.IPFSID, error) { + ctx, span := trace.StartSpan(ctx, "ipfsconn/ipfshttp/ID") + defer span.End() + + ctx, cancel := context.WithTimeout(ctx, ipfs.config.IPFSRequestTimeout) defer cancel() id := api.IPFSID{} body, err := ipfs.postCtx(ctx, "id", "", nil) @@ -236,6 +257,9 @@ func (ipfs *Connector) ID() (api.IPFSID, error) { // Pin performs a pin request against the configured IPFS // daemon. func (ipfs *Connector) Pin(ctx context.Context, hash cid.Cid, maxDepth int) error { + ctx, span := trace.StartSpan(ctx, "ipfsconn/ipfshttp/Pin") + defer span.End() + ctx, cancel := context.WithTimeout(ctx, ipfs.config.PinTimeout) defer cancel() pinStatus, err := ipfs.PinLsCid(ctx, hash) @@ -248,7 +272,7 @@ func (ipfs *Connector) Pin(ctx context.Context, hash cid.Cid, maxDepth int) erro return nil } - defer ipfs.updateInformerMetric() + defer ipfs.updateInformerMetric(ctx) var pinArgs string switch { @@ -268,6 +292,7 @@ func (ipfs *Connector) Pin(ctx context.Context, hash cid.Cid, maxDepth int) erro return err } logger.Debugf("Refs for %s sucessfully fetched", hash) + stats.Record(ctx, observations.PinCountMetric.M(1)) } path := fmt.Sprintf("pin/add?arg=%s&%s", hash, pinArgs) @@ -281,6 +306,9 @@ func (ipfs *Connector) Pin(ctx context.Context, hash cid.Cid, maxDepth int) erro // Unpin performs an unpin request against the configured IPFS // daemon. func (ipfs *Connector) Unpin(ctx context.Context, hash cid.Cid) error { + ctx, span := trace.StartSpan(ctx, "ipfsconn/ipfshttp/Unpin") + defer span.End() + ctx, cancel := context.WithTimeout(ctx, ipfs.config.UnpinTimeout) defer cancel() @@ -289,13 +317,14 @@ func (ipfs *Connector) Unpin(ctx context.Context, hash cid.Cid) error { return err } if pinStatus.IsPinned(-1) { - defer ipfs.updateInformerMetric() + defer ipfs.updateInformerMetric(ctx) path := fmt.Sprintf("pin/rm?arg=%s", hash) _, err := ipfs.postCtx(ctx, path, "", nil) - if err == nil { - logger.Info("IPFS Unpin request succeeded:", hash) + if err != nil { + return err } - return err + logger.Info("IPFS Unpin request succeeded:", hash) + stats.Record(ctx, observations.PinCountMetric.M(-1)) } logger.Debug("IPFS object is already unpinned: ", hash) @@ -305,6 +334,9 @@ func (ipfs *Connector) Unpin(ctx context.Context, hash cid.Cid) error { // PinLs performs a "pin ls --type typeFilter" request against the configured // IPFS daemon and returns a map of cid strings and their status. func (ipfs *Connector) PinLs(ctx context.Context, typeFilter string) (map[string]api.IPFSPinStatus, error) { + ctx, span := trace.StartSpan(ctx, "ipfsconn/ipfshttp/PinLs") + defer span.End() + ctx, cancel := context.WithTimeout(ctx, ipfs.config.IPFSRequestTimeout) defer cancel() body, err := ipfs.postCtx(ctx, "pin/ls?type="+typeFilter, "", nil) @@ -333,6 +365,9 @@ func (ipfs *Connector) PinLs(ctx context.Context, typeFilter string) (map[string // "type=recursive" and then, if not found, with "type=direct". It returns an // api.IPFSPinStatus for that hash. func (ipfs *Connector) PinLsCid(ctx context.Context, hash cid.Cid) (api.IPFSPinStatus, error) { + ctx, span := trace.StartSpan(ctx, "ipfsconn/ipfshttp/PinLsCid") + defer span.End() + pinLsType := func(pinType string) ([]byte, error) { ctx, cancel := context.WithTimeout(ctx, ipfs.config.IPFSRequestTimeout) defer cancel() @@ -450,11 +485,15 @@ func (ipfs *Connector) apiURL() string { // ConnectSwarms requests the ipfs addresses of other peers and // triggers ipfs swarm connect requests -func (ipfs *Connector) ConnectSwarms() error { - ctx, cancel := context.WithTimeout(ipfs.ctx, ipfs.config.IPFSRequestTimeout) +func (ipfs *Connector) ConnectSwarms(ctx context.Context) error { + ctx, span := trace.StartSpan(ctx, "ipfsconn/ipfshttp/ConnectSwarms") + defer span.End() + + ctx, cancel := context.WithTimeout(ctx, ipfs.config.IPFSRequestTimeout) defer cancel() idsSerial := make([]api.IDSerial, 0) - err := ipfs.rpcClient.Call( + err := ipfs.rpcClient.CallContext( + ctx, "", "Cluster", "Peers", @@ -537,8 +576,11 @@ func getConfigValue(path []string, cfg map[string]interface{}) (interface{}, err // RepoStat returns the DiskUsage and StorageMax repo/stat values from the // ipfs daemon, in bytes, wrapped as an IPFSRepoStat object. -func (ipfs *Connector) RepoStat() (api.IPFSRepoStat, error) { - ctx, cancel := context.WithTimeout(ipfs.ctx, ipfs.config.IPFSRequestTimeout) +func (ipfs *Connector) RepoStat(ctx context.Context) (api.IPFSRepoStat, error) { + ctx, span := trace.StartSpan(ctx, "ipfsconn/ipfshttp/RepoStat") + defer span.End() + + ctx, cancel := context.WithTimeout(ctx, ipfs.config.IPFSRequestTimeout) defer cancel() res, err := ipfs.postCtx(ctx, "repo/stat?size-only=true", "", nil) if err != nil { @@ -556,8 +598,11 @@ func (ipfs *Connector) RepoStat() (api.IPFSRepoStat, error) { } // SwarmPeers returns the peers currently connected to this ipfs daemon. -func (ipfs *Connector) SwarmPeers() (api.SwarmPeers, error) { - ctx, cancel := context.WithTimeout(ipfs.ctx, ipfs.config.IPFSRequestTimeout) +func (ipfs *Connector) SwarmPeers(ctx context.Context) (api.SwarmPeers, error) { + ctx, span := trace.StartSpan(ctx, "ipfsconn/ipfshttp/SwarmPeers") + defer span.End() + + ctx, cancel := context.WithTimeout(ctx, ipfs.config.IPFSRequestTimeout) defer cancel() swarm := api.SwarmPeers{} res, err := ipfs.postCtx(ctx, "swarm/peers", "", nil) @@ -586,11 +631,14 @@ func (ipfs *Connector) SwarmPeers() (api.SwarmPeers, error) { // BlockPut triggers an ipfs block put on the given data, inserting the block // into the ipfs daemon's repo. -func (ipfs *Connector) BlockPut(b api.NodeWithMeta) error { +func (ipfs *Connector) BlockPut(ctx context.Context, b api.NodeWithMeta) error { + ctx, span := trace.StartSpan(ctx, "ipfsconn/ipfshttp/BlockPut") + defer span.End() + logger.Debugf("putting block to IPFS: %s", b.Cid) - ctx, cancel := context.WithTimeout(ipfs.ctx, ipfs.config.IPFSRequestTimeout) + ctx, cancel := context.WithTimeout(ctx, ipfs.config.IPFSRequestTimeout) defer cancel() - defer ipfs.updateInformerMetric() + defer ipfs.updateInformerMetric(ctx) mapDir := files.NewMapDirectory( map[string]files.Node{ // IPFS reqs require a wrapping directory @@ -610,8 +658,11 @@ func (ipfs *Connector) BlockPut(b api.NodeWithMeta) error { } // BlockGet retrieves an ipfs block with the given cid -func (ipfs *Connector) BlockGet(c cid.Cid) ([]byte, error) { - ctx, cancel := context.WithTimeout(ipfs.ctx, ipfs.config.IPFSRequestTimeout) +func (ipfs *Connector) BlockGet(ctx context.Context, c cid.Cid) ([]byte, error) { + ctx, span := trace.StartSpan(ctx, "ipfsconn/ipfshttp/BlockGet") + defer span.End() + + ctx, cancel := context.WithTimeout(ctx, ipfs.config.IPFSRequestTimeout) defer cancel() url := "block/get?arg=" + c.String() return ipfs.postCtx(ctx, url, "", nil) @@ -631,7 +682,10 @@ func (ipfs *Connector) shouldUpdateMetric() bool { } // Trigger a broadcast of the local informer metrics. -func (ipfs *Connector) updateInformerMetric() error { +func (ipfs *Connector) updateInformerMetric(ctx context.Context) error { + ctx, span := trace.StartSpan(ctx, "ipfsconn/ipfshttp/updateInformerMetric") + defer span.End() + if !ipfs.shouldUpdateMetric() { return nil } @@ -639,7 +693,7 @@ func (ipfs *Connector) updateInformerMetric() error { var metric api.Metric err := ipfs.rpcClient.GoContext( - ipfs.ctx, + ctx, "", "Cluster", "SendInformerMetric", diff --git a/ipfsconn/ipfshttp/ipfshttp_test.go b/ipfsconn/ipfshttp/ipfshttp_test.go index d74a2dc5..456b192c 100644 --- a/ipfsconn/ipfshttp/ipfshttp_test.go +++ b/ipfsconn/ipfshttp/ipfshttp_test.go @@ -39,15 +39,17 @@ func testIPFSConnector(t *testing.T) (*Connector, *test.IpfsMock) { } func TestNewConnector(t *testing.T) { + ctx := context.Background() ipfs, mock := testIPFSConnector(t) defer mock.Close() - defer ipfs.Shutdown() + defer ipfs.Shutdown(ctx) } func TestIPFSID(t *testing.T) { + ctx := context.Background() ipfs, mock := testIPFSConnector(t) - defer ipfs.Shutdown() - id, err := ipfs.ID() + defer ipfs.Shutdown(ctx) + id, err := ipfs.ID(ctx) if err != nil { t.Fatal(err) } @@ -61,7 +63,7 @@ func TestIPFSID(t *testing.T) { t.Error("expected no error") } mock.Close() - id, err = ipfs.ID() + id, err = ipfs.ID(ctx) if err == nil { t.Error("expected an error") } @@ -74,7 +76,7 @@ func testPin(t *testing.T, method string) { ctx := context.Background() ipfs, mock := testIPFSConnector(t) defer mock.Close() - defer ipfs.Shutdown() + defer ipfs.Shutdown(ctx) ipfs.config.PinMethod = method @@ -107,7 +109,7 @@ func TestIPFSUnpin(t *testing.T) { ctx := context.Background() ipfs, mock := testIPFSConnector(t) defer mock.Close() - defer ipfs.Shutdown() + defer ipfs.Shutdown(ctx) c, _ := cid.Decode(test.TestCid1) err := ipfs.Unpin(ctx, c) if err != nil { @@ -124,7 +126,7 @@ func TestIPFSPinLsCid(t *testing.T) { ctx := context.Background() ipfs, mock := testIPFSConnector(t) defer mock.Close() - defer ipfs.Shutdown() + defer ipfs.Shutdown(ctx) c, _ := cid.Decode(test.TestCid1) c2, _ := cid.Decode(test.TestCid2) @@ -144,7 +146,7 @@ func TestIPFSPinLs(t *testing.T) { ctx := context.Background() ipfs, mock := testIPFSConnector(t) defer mock.Close() - defer ipfs.Shutdown() + defer ipfs.Shutdown(ctx) c, _ := cid.Decode(test.TestCid1) c2, _ := cid.Decode(test.TestCid2) @@ -165,12 +167,13 @@ func TestIPFSPinLs(t *testing.T) { } func TestIPFSShutdown(t *testing.T) { + ctx := context.Background() ipfs, mock := testIPFSConnector(t) defer mock.Close() - if err := ipfs.Shutdown(); err != nil { + if err := ipfs.Shutdown(ctx); err != nil { t.Error("expected a clean shutdown") } - if err := ipfs.Shutdown(); err != nil { + if err := ipfs.Shutdown(ctx); err != nil { t.Error("expected a second clean shutdown") } } @@ -181,18 +184,20 @@ func TestConnectSwarms(t *testing.T) { // ipfs mock // logging.SetDebugLogging() + ctx := context.Background() ipfs, mock := testIPFSConnector(t) defer mock.Close() - defer ipfs.Shutdown() + defer ipfs.Shutdown(ctx) time.Sleep(time.Second) } func TestSwarmPeers(t *testing.T) { + ctx := context.Background() ipfs, mock := testIPFSConnector(t) defer mock.Close() - defer ipfs.Shutdown() + defer ipfs.Shutdown(ctx) - swarmPeers, err := ipfs.SwarmPeers() + swarmPeers, err := ipfs.SwarmPeers(ctx) if err != nil { t.Fatal(err) } @@ -208,12 +213,13 @@ func TestSwarmPeers(t *testing.T) { } func TestBlockPut(t *testing.T) { + ctx := context.Background() ipfs, mock := testIPFSConnector(t) defer mock.Close() - defer ipfs.Shutdown() + defer ipfs.Shutdown(ctx) data := []byte(test.TestCid4Data) - err := ipfs.BlockPut(api.NodeWithMeta{ + err := ipfs.BlockPut(ctx, api.NodeWithMeta{ Data: data, Cid: test.TestCid4, Format: "raw", @@ -224,22 +230,23 @@ func TestBlockPut(t *testing.T) { } func TestBlockGet(t *testing.T) { + ctx := context.Background() ipfs, mock := testIPFSConnector(t) defer mock.Close() - defer ipfs.Shutdown() + defer ipfs.Shutdown(ctx) shardCid, err := cid.Decode(test.TestShardCid) if err != nil { t.Fatal(err) } // Fail when getting before putting - _, err = ipfs.BlockGet(shardCid) + _, err = ipfs.BlockGet(ctx, shardCid) if err == nil { t.Fatal("expected to fail getting unput block") } // Put and then successfully get - err = ipfs.BlockPut(api.NodeWithMeta{ + err = ipfs.BlockPut(ctx, api.NodeWithMeta{ Data: test.TestShardData, Cid: test.TestShardCid, Format: "cbor", @@ -248,7 +255,7 @@ func TestBlockGet(t *testing.T) { t.Fatal(err) } - data, err := ipfs.BlockGet(shardCid) + data, err := ipfs.BlockGet(ctx, shardCid) if err != nil { t.Error(err) } @@ -261,9 +268,9 @@ func TestRepoStat(t *testing.T) { ctx := context.Background() ipfs, mock := testIPFSConnector(t) defer mock.Close() - defer ipfs.Shutdown() + defer ipfs.Shutdown(ctx) - s, err := ipfs.RepoStat() + s, err := ipfs.RepoStat(ctx) if err != nil { t.Fatal(err) } @@ -278,7 +285,7 @@ func TestRepoStat(t *testing.T) { t.Error("expected success pinning cid") } - s, err = ipfs.RepoStat() + s, err = ipfs.RepoStat(ctx) if err != nil { t.Fatal(err) } @@ -288,9 +295,10 @@ func TestRepoStat(t *testing.T) { } func TestConfigKey(t *testing.T) { + ctx := context.Background() ipfs, mock := testIPFSConnector(t) defer mock.Close() - defer ipfs.Shutdown() + defer ipfs.Shutdown(ctx) v, err := ipfs.ConfigKey("Datastore/StorageMax") if err != nil { diff --git a/monitor/basic/peer_monitor.go b/monitor/basic/peer_monitor.go index 988038a1..ecc8939c 100644 --- a/monitor/basic/peer_monitor.go +++ b/monitor/basic/peer_monitor.go @@ -13,6 +13,7 @@ import ( "github.com/ipfs/ipfs-cluster/api" "github.com/ipfs/ipfs-cluster/monitor/metrics" "github.com/ipfs/ipfs-cluster/rpcutil" + "go.opencensus.io/trace" logging "github.com/ipfs/go-log" rpc "github.com/libp2p/go-libp2p-gorpc" @@ -81,7 +82,10 @@ func (mon *Monitor) SetClient(c *rpc.Client) { // Shutdown stops the peer monitor. It particular, it will // not deliver any alerts. -func (mon *Monitor) Shutdown() error { +func (mon *Monitor) Shutdown(ctx context.Context) error { + ctx, span := trace.StartSpan(ctx, "monitor/basic/Shutdown") + defer span.End() + mon.shutdownLock.Lock() defer mon.shutdownLock.Unlock() @@ -99,25 +103,31 @@ func (mon *Monitor) Shutdown() error { } // LogMetric stores a metric so it can later be retrieved. -func (mon *Monitor) LogMetric(m api.Metric) error { +func (mon *Monitor) LogMetric(ctx context.Context, m api.Metric) error { + ctx, span := trace.StartSpan(ctx, "monitor/basic/LogMetric") + defer span.End() + mon.metrics.Add(m) logger.Debugf("basic monitor logged '%s' metric from '%s'. Expires on %d", m.Name, m.Peer, m.Expire) return nil } // PublishMetric broadcasts a metric to all current cluster peers. -func (mon *Monitor) PublishMetric(m api.Metric) error { +func (mon *Monitor) PublishMetric(ctx context.Context, m api.Metric) error { + ctx, span := trace.StartSpan(ctx, "monitor/basic/PublishMetric") + defer span.End() + if m.Discard() { logger.Warningf("discarding invalid metric: %+v", m) return nil } - peers, err := mon.getPeers() + peers, err := mon.getPeers(ctx) if err != nil { return err } - ctxs, cancels := rpcutil.CtxsWithTimeout(mon.ctx, len(peers), m.GetTTL()/2) + ctxs, cancels := rpcutil.CtxsWithTimeout(ctx, len(peers), m.GetTTL()/2) defer rpcutil.MultiCancel(cancels) logger.Debugf( @@ -166,9 +176,13 @@ func (mon *Monitor) PublishMetric(m api.Metric) error { } // getPeers gets the current list of peers from the consensus component -func (mon *Monitor) getPeers() ([]peer.ID, error) { +func (mon *Monitor) getPeers(ctx context.Context) ([]peer.ID, error) { + ctx, span := trace.StartSpan(ctx, "monitor/basic/getPeers") + defer span.End() + var peers []peer.ID - err := mon.rpcClient.Call( + err := mon.rpcClient.CallContext( + ctx, "", "Cluster", "ConsensusPeers", @@ -183,11 +197,14 @@ func (mon *Monitor) getPeers() ([]peer.ID, error) { // LatestMetrics returns last known VALID metrics of a given type. A metric // is only valid if it has not expired and belongs to a current cluster peers. -func (mon *Monitor) LatestMetrics(name string) []api.Metric { +func (mon *Monitor) LatestMetrics(ctx context.Context, name string) []api.Metric { + ctx, span := trace.StartSpan(ctx, "monitor/basic/LatestMetrics") + defer span.End() + latest := mon.metrics.Latest(name) // Make sure we only return metrics in the current peerset - peers, err := mon.getPeers() + peers, err := mon.getPeers(ctx) if err != nil { return []api.Metric{} } diff --git a/monitor/basic/peer_monitor_test.go b/monitor/basic/peer_monitor_test.go index e1dff84c..ddfcdc5f 100644 --- a/monitor/basic/peer_monitor_test.go +++ b/monitor/basic/peer_monitor_test.go @@ -66,21 +66,23 @@ func testPeerMonitorWithHost(t *testing.T, h host.Host) *Monitor { } func TestPeerMonitorShutdown(t *testing.T) { + ctx := context.Background() pm := testPeerMonitor(t) - err := pm.Shutdown() + err := pm.Shutdown(ctx) if err != nil { t.Error(err) } - err = pm.Shutdown() + err = pm.Shutdown(ctx) if err != nil { t.Error(err) } } func TestLogMetricConcurrent(t *testing.T) { + ctx := context.Background() pm := testPeerMonitor(t) - defer pm.Shutdown() + defer pm.Shutdown(ctx) var wg sync.WaitGroup wg.Add(3) @@ -96,7 +98,7 @@ func TestLogMetricConcurrent(t *testing.T) { Valid: true, } mt.SetTTL(150 * time.Millisecond) - pm.LogMetric(mt) + pm.LogMetric(ctx, mt) time.Sleep(75 * time.Millisecond) } } @@ -109,7 +111,7 @@ func TestLogMetricConcurrent(t *testing.T) { last := time.Now().Add(-500 * time.Millisecond) for i := 0; i <= 20; i++ { - lastMtrcs := pm.LatestMetrics("test") + lastMtrcs := pm.LatestMetrics(ctx, "test") // There should always 1 valid LatestMetric "test" if len(lastMtrcs) != 1 { @@ -137,28 +139,29 @@ func TestLogMetricConcurrent(t *testing.T) { } func TestPeerMonitorLogMetric(t *testing.T) { + ctx := context.Background() pm := testPeerMonitor(t) - defer pm.Shutdown() + defer pm.Shutdown(ctx) mf := newMetricFactory() // dont fill window - pm.LogMetric(mf.newMetric("test", test.TestPeerID1)) - pm.LogMetric(mf.newMetric("test", test.TestPeerID2)) - pm.LogMetric(mf.newMetric("test", test.TestPeerID3)) + pm.LogMetric(ctx, mf.newMetric("test", test.TestPeerID1)) + pm.LogMetric(ctx, mf.newMetric("test", test.TestPeerID2)) + pm.LogMetric(ctx, mf.newMetric("test", test.TestPeerID3)) // fill window - pm.LogMetric(mf.newMetric("test2", test.TestPeerID3)) - pm.LogMetric(mf.newMetric("test2", test.TestPeerID3)) - pm.LogMetric(mf.newMetric("test2", test.TestPeerID3)) - pm.LogMetric(mf.newMetric("test2", test.TestPeerID3)) + pm.LogMetric(ctx, mf.newMetric("test2", test.TestPeerID3)) + pm.LogMetric(ctx, mf.newMetric("test2", test.TestPeerID3)) + pm.LogMetric(ctx, mf.newMetric("test2", test.TestPeerID3)) + pm.LogMetric(ctx, mf.newMetric("test2", test.TestPeerID3)) - latestMetrics := pm.LatestMetrics("testbad") + latestMetrics := pm.LatestMetrics(ctx, "testbad") if len(latestMetrics) != 0 { t.Logf("%+v", latestMetrics) t.Error("metrics should be empty") } - latestMetrics = pm.LatestMetrics("test") + latestMetrics = pm.LatestMetrics(ctx, "test") if len(latestMetrics) != 3 { t.Error("metrics should correspond to 3 hosts") } @@ -182,7 +185,7 @@ func TestPeerMonitorLogMetric(t *testing.T) { } } - latestMetrics = pm.LatestMetrics("test2") + latestMetrics = pm.LatestMetrics(ctx, "test2") if len(latestMetrics) != 1 { t.Fatal("should only be one metric") } @@ -192,18 +195,19 @@ func TestPeerMonitorLogMetric(t *testing.T) { } func TestPeerMonitorPublishMetric(t *testing.T) { + ctx := context.Background() h, err := libp2p.New(context.Background()) if err != nil { t.Fatal(err) } pm := testPeerMonitorWithHost(t, h) - defer pm.Shutdown() + defer pm.Shutdown(ctx) defer h.Close() mf := newMetricFactory() metric := mf.newMetric("test", test.TestPeerID1) - err = pm.PublishMetric(metric) + err = pm.PublishMetric(ctx, metric) // Note mock rpc returns 3 consensus peers and we cannot // push to those so an error is in order and indicates @@ -214,13 +218,14 @@ func TestPeerMonitorPublishMetric(t *testing.T) { } func TestPeerMonitorAlerts(t *testing.T) { + ctx := context.Background() pm := testPeerMonitor(t) - defer pm.Shutdown() + defer pm.Shutdown(ctx) mf := newMetricFactory() mtr := mf.newMetric("test", test.TestPeerID1) mtr.SetTTL(0) - pm.LogMetric(mtr) + pm.LogMetric(ctx, mtr) time.Sleep(time.Second) timeout := time.NewTimer(time.Second * 5) diff --git a/monitor/metrics/checker.go b/monitor/metrics/checker.go index a6e24ecf..76ab185e 100644 --- a/monitor/metrics/checker.go +++ b/monitor/metrics/checker.go @@ -69,12 +69,12 @@ func (mc *Checker) Alerts() <-chan api.Alert { // Watch will trigger regular CheckPeers on the given interval. It will call // peersF to obtain a peerset. It can be stopped by cancelling the context. // Usually you want to launch this in a goroutine. -func (mc *Checker) Watch(ctx context.Context, peersF func() ([]peer.ID, error), interval time.Duration) { +func (mc *Checker) Watch(ctx context.Context, peersF func(context.Context) ([]peer.ID, error), interval time.Duration) { ticker := time.NewTicker(interval) for { select { case <-ticker.C: - peers, err := peersF() + peers, err := peersF(ctx) if err != nil { continue } diff --git a/monitor/metrics/checker_test.go b/monitor/metrics/checker_test.go index 45e41841..913832be 100644 --- a/monitor/metrics/checker_test.go +++ b/monitor/metrics/checker_test.go @@ -68,7 +68,7 @@ func TestCheckerWatch(t *testing.T) { metr.SetTTL(100 * time.Millisecond) metrics.Add(metr) - peersF := func() ([]peer.ID, error) { + peersF := func(context.Context) ([]peer.ID, error) { return []peer.ID{test.TestPeerID1}, nil } diff --git a/monitor/pubsubmon/pubsubmon.go b/monitor/pubsubmon/pubsubmon.go index 617d948b..de271112 100644 --- a/monitor/pubsubmon/pubsubmon.go +++ b/monitor/pubsubmon/pubsubmon.go @@ -10,6 +10,7 @@ import ( "github.com/ipfs/ipfs-cluster/api" "github.com/ipfs/ipfs-cluster/monitor/metrics" + "go.opencensus.io/trace" logging "github.com/ipfs/go-log" rpc "github.com/libp2p/go-libp2p-gorpc" @@ -101,12 +102,15 @@ func (mon *Monitor) run() { // logFromPubsub logs metrics received in the subscribed topic. func (mon *Monitor) logFromPubsub() { + ctx, span := trace.StartSpan(mon.ctx, "monitor/pubsub/logFromPubsub") + defer span.End() + for { select { - case <-mon.ctx.Done(): + case <-ctx.Done(): return default: - msg, err := mon.subscription.Next(mon.ctx) + msg, err := mon.subscription.Next(ctx) if err != nil { // context cancelled enters here continue } @@ -126,7 +130,7 @@ func (mon *Monitor) logFromPubsub() { metric.Peer, ) - err = mon.LogMetric(metric) + err = mon.LogMetric(ctx, metric) if err != nil { logger.Error(err) continue @@ -143,7 +147,10 @@ func (mon *Monitor) SetClient(c *rpc.Client) { // Shutdown stops the peer monitor. It particular, it will // not deliver any alerts. -func (mon *Monitor) Shutdown() error { +func (mon *Monitor) Shutdown(ctx context.Context) error { + ctx, span := trace.StartSpan(ctx, "monitor/pubsub/Shutdown") + defer span.End() + mon.shutdownLock.Lock() defer mon.shutdownLock.Unlock() @@ -163,14 +170,20 @@ func (mon *Monitor) Shutdown() error { } // LogMetric stores a metric so it can later be retrieved. -func (mon *Monitor) LogMetric(m api.Metric) error { +func (mon *Monitor) LogMetric(ctx context.Context, m api.Metric) error { + ctx, span := trace.StartSpan(ctx, "monitor/pubsub/LogMetric") + defer span.End() + mon.metrics.Add(m) logger.Debugf("pubsub mon logged '%s' metric from '%s'. Expires on %d", m.Name, m.Peer, m.Expire) return nil } // PublishMetric broadcasts a metric to all current cluster peers. -func (mon *Monitor) PublishMetric(m api.Metric) error { +func (mon *Monitor) PublishMetric(ctx context.Context, m api.Metric) error { + ctx, span := trace.StartSpan(ctx, "monitor/pubsub/PublishMetric") + defer span.End() + if m.Discard() { logger.Warningf("discarding invalid metric: %+v", m) return nil @@ -201,9 +214,13 @@ func (mon *Monitor) PublishMetric(m api.Metric) error { } // getPeers gets the current list of peers from the consensus component -func (mon *Monitor) getPeers() ([]peer.ID, error) { +func (mon *Monitor) getPeers(ctx context.Context) ([]peer.ID, error) { + ctx, span := trace.StartSpan(ctx, "monitor/pubsub/getPeers") + defer span.End() + var peers []peer.ID - err := mon.rpcClient.Call( + err := mon.rpcClient.CallContext( + ctx, "", "Cluster", "ConsensusPeers", @@ -218,11 +235,14 @@ func (mon *Monitor) getPeers() ([]peer.ID, error) { // LatestMetrics returns last known VALID metrics of a given type. A metric // is only valid if it has not expired and belongs to a current cluster peers. -func (mon *Monitor) LatestMetrics(name string) []api.Metric { +func (mon *Monitor) LatestMetrics(ctx context.Context, name string) []api.Metric { + ctx, span := trace.StartSpan(ctx, "monitor/pubsub/LatestMetrics") + defer span.End() + latest := mon.metrics.Latest(name) // Make sure we only return metrics in the current peerset - peers, err := mon.getPeers() + peers, err := mon.getPeers(ctx) if err != nil { return []api.Metric{} } diff --git a/monitor/pubsubmon/pubsubmon_test.go b/monitor/pubsubmon/pubsubmon_test.go index eb12b290..410967ce 100644 --- a/monitor/pubsubmon/pubsubmon_test.go +++ b/monitor/pubsubmon/pubsubmon_test.go @@ -56,6 +56,7 @@ func (mf *metricFactory) count() int { } func testPeerMonitor(t *testing.T) (*Monitor, func()) { + ctx := context.Background() h, err := libp2p.New( context.Background(), libp2p.ListenAddrStrings("/ip4/127.0.0.1/tcp/0"), @@ -75,7 +76,7 @@ func testPeerMonitor(t *testing.T) (*Monitor, func()) { mon.SetClient(mock) shutdownF := func() { - mon.Shutdown() + mon.Shutdown(ctx) h.Close() } @@ -83,21 +84,23 @@ func testPeerMonitor(t *testing.T) (*Monitor, func()) { } func TestPeerMonitorShutdown(t *testing.T) { + ctx := context.Background() pm, shutdown := testPeerMonitor(t) defer shutdown() - err := pm.Shutdown() + err := pm.Shutdown(ctx) if err != nil { t.Error(err) } - err = pm.Shutdown() + err = pm.Shutdown(ctx) if err != nil { t.Error(err) } } func TestLogMetricConcurrent(t *testing.T) { + ctx := context.Background() pm, shutdown := testPeerMonitor(t) defer shutdown() @@ -115,7 +118,7 @@ func TestLogMetricConcurrent(t *testing.T) { Valid: true, } mt.SetTTL(150 * time.Millisecond) - pm.LogMetric(mt) + pm.LogMetric(ctx, mt) time.Sleep(75 * time.Millisecond) } } @@ -128,7 +131,7 @@ func TestLogMetricConcurrent(t *testing.T) { last := time.Now().Add(-500 * time.Millisecond) for i := 0; i <= 20; i++ { - lastMtrcs := pm.LatestMetrics("test") + lastMtrcs := pm.LatestMetrics(ctx, "test") // There should always 1 valid LatestMetric "test" if len(lastMtrcs) != 1 { @@ -156,28 +159,29 @@ func TestLogMetricConcurrent(t *testing.T) { } func TestPeerMonitorLogMetric(t *testing.T) { + ctx := context.Background() pm, shutdown := testPeerMonitor(t) defer shutdown() mf := newMetricFactory() // dont fill window - pm.LogMetric(mf.newMetric("test", test.TestPeerID1)) - pm.LogMetric(mf.newMetric("test", test.TestPeerID2)) - pm.LogMetric(mf.newMetric("test", test.TestPeerID3)) + pm.LogMetric(ctx, mf.newMetric("test", test.TestPeerID1)) + pm.LogMetric(ctx, mf.newMetric("test", test.TestPeerID2)) + pm.LogMetric(ctx, mf.newMetric("test", test.TestPeerID3)) // fill window - pm.LogMetric(mf.newMetric("test2", test.TestPeerID3)) - pm.LogMetric(mf.newMetric("test2", test.TestPeerID3)) - pm.LogMetric(mf.newMetric("test2", test.TestPeerID3)) - pm.LogMetric(mf.newMetric("test2", test.TestPeerID3)) + pm.LogMetric(ctx, mf.newMetric("test2", test.TestPeerID3)) + pm.LogMetric(ctx, mf.newMetric("test2", test.TestPeerID3)) + pm.LogMetric(ctx, mf.newMetric("test2", test.TestPeerID3)) + pm.LogMetric(ctx, mf.newMetric("test2", test.TestPeerID3)) - latestMetrics := pm.LatestMetrics("testbad") + latestMetrics := pm.LatestMetrics(ctx, "testbad") if len(latestMetrics) != 0 { t.Logf("%+v", latestMetrics) t.Error("metrics should be empty") } - latestMetrics = pm.LatestMetrics("test") + latestMetrics = pm.LatestMetrics(ctx, "test") if len(latestMetrics) != 3 { t.Error("metrics should correspond to 3 hosts") } @@ -201,7 +205,7 @@ func TestPeerMonitorLogMetric(t *testing.T) { } } - latestMetrics = pm.LatestMetrics("test2") + latestMetrics = pm.LatestMetrics(ctx, "test2") if len(latestMetrics) != 1 { t.Fatal("should only be one metric") } @@ -211,6 +215,7 @@ func TestPeerMonitorLogMetric(t *testing.T) { } func TestPeerMonitorPublishMetric(t *testing.T) { + ctx := context.Background() pm, shutdown := testPeerMonitor(t) defer shutdown() @@ -235,7 +240,7 @@ func TestPeerMonitorPublishMetric(t *testing.T) { mf := newMetricFactory() metric := mf.newMetric("test", test.TestPeerID1) - err = pm.PublishMetric(metric) + err = pm.PublishMetric(ctx, metric) if err != nil { t.Fatal(err) } @@ -243,7 +248,7 @@ func TestPeerMonitorPublishMetric(t *testing.T) { time.Sleep(500 * time.Millisecond) checkMetric := func(t *testing.T, pm *Monitor) { - latestMetrics := pm.LatestMetrics("test") + latestMetrics := pm.LatestMetrics(ctx, "test") if len(latestMetrics) != 1 { t.Fatal(pm.host.ID(), "expected 1 published metric") } @@ -266,13 +271,14 @@ func TestPeerMonitorPublishMetric(t *testing.T) { } func TestPeerMonitorAlerts(t *testing.T) { + ctx := context.Background() pm, shutdown := testPeerMonitor(t) defer shutdown() mf := newMetricFactory() mtr := mf.newMetric("test", test.TestPeerID1) mtr.SetTTL(0) - pm.LogMetric(mtr) + pm.LogMetric(ctx, mtr) time.Sleep(time.Second) timeout := time.NewTimer(time.Second * 5) diff --git a/observations/config.go b/observations/config.go new file mode 100644 index 00000000..2f7aa8f4 --- /dev/null +++ b/observations/config.go @@ -0,0 +1,225 @@ +package observations + +import ( + "encoding/json" + "errors" + "fmt" + "time" + + "github.com/kelseyhightower/envconfig" + + "github.com/ipfs/ipfs-cluster/config" + ma "github.com/multiformats/go-multiaddr" +) + +const metricsConfigKey = "metrics" +const tracingConfigKey = "tracing" +const envConfigKey = "cluster_observations" + +// Default values for this Config. +const ( + DefaultEnableStats = false + DefaultPrometheusEndpoint = "/ip4/0.0.0.0/tcp/8888" + DefaultStatsReportingInterval = 2 * time.Second + + DefaultEnableTracing = false + DefaultJaegerAgentEndpoint = "/ip4/0.0.0.0/udp/6831" + DefaultTracingSamplingProb = 0.3 + DefaultTracingServiceName = "cluster-daemon" +) + +// MetricsConfig configures metrics collection. +type MetricsConfig struct { + config.Saver + + EnableStats bool + PrometheusEndpoint ma.Multiaddr + StatsReportingInterval time.Duration +} + +type jsonMetricsConfig struct { + EnableStats bool `json:"enable_stats"` + PrometheusEndpoint string `json:"prometheus_endpoint"` + StatsReportingInterval string `json:"reporting_interval"` +} + +// ConfigKey provides a human-friendly identifier for this type of Config. +func (cfg *MetricsConfig) ConfigKey() string { + return metricsConfigKey +} + +// Default sets the fields of this Config to sensible values. +func (cfg *MetricsConfig) Default() error { + cfg.EnableStats = DefaultEnableStats + endpointAddr, _ := ma.NewMultiaddr(DefaultPrometheusEndpoint) + cfg.PrometheusEndpoint = endpointAddr + cfg.StatsReportingInterval = DefaultStatsReportingInterval + + return nil +} + +// Validate checks that the fields of this Config have working values, +// at least in appearance. +func (cfg *MetricsConfig) Validate() error { + if cfg.EnableStats { + if cfg.PrometheusEndpoint == nil { + return errors.New("metrics.prometheus_endpoint is undefined") + } + if cfg.StatsReportingInterval < 0 { + return errors.New("metrics.reporting_interval is invalid") + } + } + return nil +} + +// LoadJSON sets the fields of this Config to the values defined by the JSON +// representation of it, as generated by ToJSON. +func (cfg *MetricsConfig) LoadJSON(raw []byte) error { + jcfg := &jsonMetricsConfig{} + err := json.Unmarshal(raw, jcfg) + if err != nil { + logger.Error("Error unmarshaling observations config") + return err + } + + cfg.Default() + + // override json config with env var + err = envconfig.Process(envConfigKey, jcfg) + if err != nil { + return err + } + + err = cfg.loadMetricsOptions(jcfg) + if err != nil { + return err + } + + return cfg.Validate() +} + +func (cfg *MetricsConfig) loadMetricsOptions(jcfg *jsonMetricsConfig) error { + cfg.EnableStats = jcfg.EnableStats + endpointAddr, err := ma.NewMultiaddr(jcfg.PrometheusEndpoint) + if err != nil { + return fmt.Errorf("loadMetricsOptions: PrometheusEndpoint multiaddr: %v", err) + } + cfg.PrometheusEndpoint = endpointAddr + + return config.ParseDurations( + metricsConfigKey, + &config.DurationOpt{ + Duration: jcfg.StatsReportingInterval, + Dst: &cfg.StatsReportingInterval, + Name: "metrics.reporting_interval", + }, + ) +} + +// ToJSON generates a human-friendly JSON representation of this Config. +func (cfg *MetricsConfig) ToJSON() ([]byte, error) { + jcfg := &jsonMetricsConfig{ + EnableStats: cfg.EnableStats, + PrometheusEndpoint: cfg.PrometheusEndpoint.String(), + StatsReportingInterval: cfg.StatsReportingInterval.String(), + } + + return config.DefaultJSONMarshal(jcfg) +} + +// TracingConfig configures tracing. +type TracingConfig struct { + config.Saver + + EnableTracing bool + JaegerAgentEndpoint ma.Multiaddr + TracingSamplingProb float64 + TracingServiceName string +} + +type jsonTracingConfig struct { + EnableTracing bool `json:"enable_tracing"` + JaegerAgentEndpoint string `json:"jaeger_agent_endpoint"` + TracingSamplingProb float64 `json:"sampling_prob"` + TracingServiceName string `json:"service_name"` +} + +// ConfigKey provides a human-friendly identifier for this type of Config. +func (cfg *TracingConfig) ConfigKey() string { + return tracingConfigKey +} + +// Default sets the fields of this Config to sensible values. +func (cfg *TracingConfig) Default() error { + cfg.EnableTracing = DefaultEnableTracing + agentAddr, _ := ma.NewMultiaddr(DefaultJaegerAgentEndpoint) + cfg.JaegerAgentEndpoint = agentAddr + cfg.TracingSamplingProb = DefaultTracingSamplingProb + cfg.TracingServiceName = DefaultTracingServiceName + return nil +} + +// Validate checks that the fields of this Config have working values, +// at least in appearance. +func (cfg *TracingConfig) Validate() error { + if cfg.EnableTracing { + if cfg.JaegerAgentEndpoint == nil { + return errors.New("tracing.jaeger_agent_endpoint is undefined") + } + if cfg.TracingSamplingProb < 0 { + return errors.New("tracing.sampling_prob is invalid") + } + } + return nil +} + +// LoadJSON sets the fields of this Config to the values defined by the JSON +// representation of it, as generated by ToJSON. +func (cfg *TracingConfig) LoadJSON(raw []byte) error { + jcfg := &jsonTracingConfig{} + err := json.Unmarshal(raw, jcfg) + if err != nil { + logger.Error("Error unmarshaling observations config") + return err + } + + cfg.Default() + + // override json config with env var + err = envconfig.Process(envConfigKey, jcfg) + if err != nil { + return err + } + + err = cfg.loadTracingOptions(jcfg) + if err != nil { + return err + } + + return cfg.Validate() +} + +func (cfg *TracingConfig) loadTracingOptions(jcfg *jsonTracingConfig) error { + cfg.EnableTracing = jcfg.EnableTracing + agentAddr, err := ma.NewMultiaddr(jcfg.JaegerAgentEndpoint) + if err != nil { + return fmt.Errorf("loadTracingOptions: JaegerAgentEndpoint multiaddr: %v", err) + } + cfg.JaegerAgentEndpoint = agentAddr + cfg.TracingSamplingProb = jcfg.TracingSamplingProb + cfg.TracingServiceName = jcfg.TracingServiceName + + return nil +} + +// ToJSON generates a human-friendly JSON representation of this Config. +func (cfg *TracingConfig) ToJSON() ([]byte, error) { + jcfg := &jsonTracingConfig{ + EnableTracing: cfg.EnableTracing, + JaegerAgentEndpoint: cfg.JaegerAgentEndpoint.String(), + TracingSamplingProb: cfg.TracingSamplingProb, + TracingServiceName: cfg.TracingServiceName, + } + + return config.DefaultJSONMarshal(jcfg) +} diff --git a/observations/metrics.go b/observations/metrics.go new file mode 100644 index 00000000..64e56253 --- /dev/null +++ b/observations/metrics.go @@ -0,0 +1,72 @@ +package observations + +import ( + "go.opencensus.io/stats" + "go.opencensus.io/stats/view" + "go.opencensus.io/tag" + + logging "github.com/ipfs/go-log" +) + +var logger = logging.Logger("observations") + +var ( + // taken from ocgrpc (https://github.com/census-instrumentation/opencensus-go/blob/master/plugin/ocgrpc/stats_common.go) + latencyDistribution = view.Distribution(0, 0.01, 0.05, 0.1, 0.3, 0.6, 0.8, 1, 2, 3, 4, 5, 6, 8, 10, 13, 16, 20, 25, 30, 40, 50, 65, 80, 100, 130, 160, 200, 250, 300, 400, 500, 650, 800, 1000, 2000, 5000, 10000, 20000, 50000, 100000) + + bytesDistribution = view.Distribution(0, 24, 32, 64, 128, 256, 512, 1024, 2048, 4096, 16384, 65536, 262144, 1048576) +) + +// opencensus attributes +var ( + ClientIPAttribute = "http.client.ip" +) + +// opencensus keys +var ( + HostKey = makeKey("host") +) + +// opencensus metrics +var ( + // PinCountMetric counts the number of pins ipfs-cluster is tracking. + PinCountMetric = stats.Int64("cluster/pin_count", "Number of pins", stats.UnitDimensionless) + // TrackerPinCountMetric counts the number of pins the local peer is tracking. + TrackerPinCountMetric = stats.Int64("pintracker/pin_count", "Number of pins", stats.UnitDimensionless) + // PeerCountMetric counts the number of ipfs-cluster peers are currently in the cluster. + PeerCountMetric = stats.Int64("cluster/peer_count", "Number of cluster peers", stats.UnitDimensionless) +) + +// opencensus views, which is just the aggregation of the metrics +var ( + PinCountView = &view.View{ + Measure: PinCountMetric, + Aggregation: view.Sum(), + } + + TrackerPinCountView = &view.View{ + Measure: TrackerPinCountMetric, + TagKeys: []tag.Key{HostKey}, + Aggregation: view.Sum(), + } + + PeerCountView = &view.View{ + Measure: PeerCountMetric, + TagKeys: []tag.Key{HostKey}, + Aggregation: view.Count(), + } + + DefaultViews = []*view.View{ + PinCountView, + TrackerPinCountView, + PeerCountView, + } +) + +func makeKey(name string) tag.Key { + key, err := tag.NewKey(name) + if err != nil { + logger.Fatal(err) + } + return key +} diff --git a/observations/setup.go b/observations/setup.go new file mode 100644 index 00000000..009ea239 --- /dev/null +++ b/observations/setup.go @@ -0,0 +1,157 @@ +package observations + +import ( + "context" + "expvar" + "net/http" + "net/http/pprof" + + ocgorpc "github.com/lanzafame/go-libp2p-ocgorpc" + rpc "github.com/libp2p/go-libp2p-gorpc" + + "go.opencensus.io/exporter/jaeger" + "go.opencensus.io/exporter/prometheus" + "go.opencensus.io/plugin/ochttp" + "go.opencensus.io/stats/view" + "go.opencensus.io/trace" + "go.opencensus.io/zpages" + + manet "github.com/multiformats/go-multiaddr-net" + + prom "github.com/prometheus/client_golang/prometheus" +) + +// SetupMetrics configures and starts stats tooling, +// if enabled. +func SetupMetrics(cfg *MetricsConfig) error { + if cfg.EnableStats { + logger.Info("stats collection enabled...") + return setupMetrics(cfg) + } + return nil +} + +// JaegerTracer implements ipfscluster.Tracer. +type JaegerTracer struct { + jaeger *jaeger.Exporter +} + +// SetClient no-op. +func (t *JaegerTracer) SetClient(*rpc.Client) {} + +// Shutdown the tracer and flush any remaining traces. +func (t *JaegerTracer) Shutdown(context.Context) error { + // nil check for testing, where tracer may not be configured + if t != (*JaegerTracer)(nil) && t.jaeger != nil { + t.jaeger.Flush() + } + return nil +} + +// SetupTracing configures and starts tracing tooling, +// if enabled. +func SetupTracing(cfg *TracingConfig) (*JaegerTracer, error) { + if !cfg.EnableTracing { + return nil, nil + } + logger.Info("tracing enabled...") + je, err := setupTracing(cfg) + if err != nil { + return nil, err + } + return &JaegerTracer{je}, nil +} + +func setupMetrics(cfg *MetricsConfig) error { + // setup Prometheus + registry := prom.NewRegistry() + goCollector := prom.NewGoCollector() + procCollector := prom.NewProcessCollector(prom.ProcessCollectorOpts{}) + registry.MustRegister(goCollector, procCollector) + pe, err := prometheus.NewExporter(prometheus.Options{ + Namespace: "cluster", + Registry: registry, + }) + if err != nil { + return err + } + + // register prometheus with opencensus + view.RegisterExporter(pe) + view.SetReportingPeriod(cfg.StatsReportingInterval) + + // register the metrics views of interest + if err := view.Register(DefaultViews...); err != nil { + return err + } + if err := view.Register( + ochttp.ClientCompletedCount, + ochttp.ClientRoundtripLatencyDistribution, + ochttp.ClientReceivedBytesDistribution, + ochttp.ClientSentBytesDistribution, + ); err != nil { + return err + } + if err := view.Register( + ochttp.ServerRequestCountView, + ochttp.ServerRequestBytesView, + ochttp.ServerResponseBytesView, + ochttp.ServerLatencyView, + ochttp.ServerRequestCountByMethod, + ochttp.ServerResponseCountByStatusCode, + ); err != nil { + return err + } + if err := view.Register(ocgorpc.DefaultServerViews...); err != nil { + return err + } + + _, promAddr, err := manet.DialArgs(cfg.PrometheusEndpoint) + if err != nil { + return err + } + go func() { + mux := http.NewServeMux() + zpages.Handle(mux, "/debug") + mux.Handle("/metrics", pe) + mux.Handle("/debug/vars", expvar.Handler()) + mux.HandleFunc("/debug/pprof", pprof.Index) + mux.HandleFunc("/debug/cmdline", pprof.Cmdline) + mux.HandleFunc("/debug/profile", pprof.Profile) + mux.HandleFunc("/debug/symbol", pprof.Symbol) + mux.HandleFunc("/debug/trace", pprof.Trace) + mux.Handle("/debug/block", pprof.Handler("block")) + mux.Handle("/debug/goroutine", pprof.Handler("goroutine")) + mux.Handle("/debug/heap", pprof.Handler("heap")) + mux.Handle("/debug/mutex", pprof.Handler("mutex")) + mux.Handle("/debug/threadcreate", pprof.Handler("threadcreate")) + if err := http.ListenAndServe(promAddr, mux); err != nil { + logger.Fatalf("Failed to run Prometheus /metrics endpoint: %v", err) + } + }() + return nil +} + +// setupTracing configures a OpenCensus Tracing exporter for Jaeger. +func setupTracing(cfg *TracingConfig) (*jaeger.Exporter, error) { + _, agentAddr, err := manet.DialArgs(cfg.JaegerAgentEndpoint) + if err != nil { + return nil, err + } + // setup Jaeger + je, err := jaeger.NewExporter(jaeger.Options{ + AgentEndpoint: agentAddr, + Process: jaeger.Process{ + ServiceName: cfg.TracingServiceName, + }, + }) + if err != nil { + return nil, err + } + + // register jaeger with opencensus + trace.RegisterExporter(je) + // configure tracing + trace.ApplyConfig(trace.Config{DefaultSampler: trace.ProbabilitySampler(cfg.TracingSamplingProb)}) + return je, nil +} diff --git a/package.json b/package.json index bbe6fe9f..bfc222f7 100644 --- a/package.json +++ b/package.json @@ -45,9 +45,9 @@ }, { "author": "hsanjuan", - "hash": "QmTfA73jjmEphGCYGYyZksqy4vRKdv9sKJLKb6WzbCBqJB", + "hash": "QmeZoNDg6yos4DESSzHsYNXKwjLomUxK2CL9PPxAjPj5iJ", "name": "go-libp2p-gorpc", - "version": "1.0.25" + "version": "1.1.0" }, { "author": "libp2p", @@ -55,12 +55,6 @@ "name": "go-libp2p-pnet", "version": "3.0.4" }, - { - "author": "ZenGround0", - "hash": "QmPuuqyMyoadGDkefg7L11kAwmvQykrHiRkuLjQRpa1bqF", - "name": "go-dot", - "version": "0.0.1" - }, { "author": "dignifiedquire", "hash": "Qmctxy7Q5h5ohCy8TwxrqWCrDY36MNkN71hPozExc4Yxem", @@ -161,6 +155,24 @@ "hash": "QmNNk4iczWp8Q4R1mXQ2mrrjQvWisYqMqbW1an8qGbJZsM", "name": "cors", "version": "1.6.0" + }, + { + "author": "ZenGround0", + "hash": "QmPuuqyMyoadGDkefg7L11kAwmvQykrHiRkuLjQRpa1bqF", + "name": "go-dot", + "version": "0.0.1" + }, + { + "author": "hsanjuan", + "hash": "QmP8ibs3yQcaPYwz3ZWggPeU6o8vb8iWQrY348zvP3XU6G", + "name": "go.opencensus.io", + "version": "0.19.0" + }, + { + "author": "lanzafame", + "hash": "QmafPUA9RPADvFhFiY584v3dbYsQKeGYJDeA5yeQvgkhFE", + "name": "go-libp2p-ocgorpc", + "version": "0.1.4" } ], "gxVersion": "0.11.0", diff --git a/peer_manager_test.go b/peer_manager_test.go index 9b6859ca..bf51b0a3 100644 --- a/peer_manager_test.go +++ b/peer_manager_test.go @@ -61,6 +61,7 @@ func clusterAddr(c *Cluster) ma.Multiaddr { } func TestClustersPeerAdd(t *testing.T) { + ctx := context.Background() clusters, mocks := peerManagerClusters(t) defer shutdownClusters(t, clusters, mocks) @@ -69,7 +70,7 @@ func TestClustersPeerAdd(t *testing.T) { } for i := 1; i < len(clusters); i++ { - id, err := clusters[0].PeerAdd(clusters[i].id) + id, err := clusters[0].PeerAdd(ctx, clusters[i].id) if err != nil { t.Fatal(err) } @@ -82,14 +83,14 @@ func TestClustersPeerAdd(t *testing.T) { } h, _ := cid.Decode(test.TestCid1) - err := clusters[1].Pin(api.PinCid(h)) + err := clusters[1].Pin(ctx, api.PinCid(h)) if err != nil { t.Fatal(err) } pinDelay() f := func(t *testing.T, c *Cluster) { - ids := c.Peers() + ids := c.Peers(ctx) // check they are tracked by the peer manager if len(ids) != nClusters { @@ -98,21 +99,21 @@ func TestClustersPeerAdd(t *testing.T) { } // Check that they are part of the consensus - pins := c.Pins() + pins := c.Pins(ctx) if len(pins) != 1 { t.Log(pins) t.Error("expected 1 pin everywhere") } - if len(c.ID().ClusterPeers) != nClusters { - t.Log(c.ID().ClusterPeers) + if len(c.ID(ctx).ClusterPeers) != nClusters { + t.Log(c.ID(ctx).ClusterPeers) t.Error("By now cluster peers should reflect all peers") } } runF(t, clusters, f) for _, c := range clusters { - c.Shutdown() + c.Shutdown(ctx) } f2 := func(t *testing.T, c *Cluster) { @@ -137,6 +138,7 @@ func TestClustersPeerAdd(t *testing.T) { } func TestClustersJoinBadPeer(t *testing.T) { + ctx := context.Background() clusters, mocks := peerManagerClusters(t) defer shutdownClusters(t, clusters, mocks) @@ -148,23 +150,24 @@ func TestClustersJoinBadPeer(t *testing.T) { // We add a cluster that has been shutdown // (closed transports) - clusters[1].Shutdown() + clusters[1].Shutdown(ctx) // Let the OS actually close the ports. // Sometimes we hang otherwise. delay() - err := clusters[0].Join(addr) + err := clusters[0].Join(ctx, addr) if err == nil { t.Error("expected an error") } - ids := clusters[0].Peers() + ids := clusters[0].Peers(ctx) if len(ids) != 1 { t.Error("cluster should have only one member") } } func TestClustersPeerAddInUnhealthyCluster(t *testing.T) { + ctx := context.Background() clusters, mocks := peerManagerClusters(t) defer shutdownClusters(t, clusters, mocks) @@ -172,33 +175,34 @@ func TestClustersPeerAddInUnhealthyCluster(t *testing.T) { t.Skip("need at least 3 nodes for this test") } - _, err := clusters[0].PeerAdd(clusters[1].id) - ids := clusters[1].Peers() + _, err := clusters[0].PeerAdd(ctx, clusters[1].id) + ids := clusters[1].Peers(ctx) if len(ids) != 2 { t.Error("expected 2 peers") } // Now we shutdown the one member of the running cluster // and try to add someone else. - err = clusters[1].Shutdown() + err = clusters[1].Shutdown(ctx) if err != nil { t.Error("Shutdown should be clean: ", err) } delay() // This makes sure the leader realizes //that it's not leader anymore. Otherwise it commits fine. - _, err = clusters[0].PeerAdd(clusters[2].id) + _, err = clusters[0].PeerAdd(ctx, clusters[2].id) if err == nil { t.Error("expected an error") } - ids = clusters[0].Peers() + ids = clusters[0].Peers(ctx) if len(ids) != 2 { t.Error("cluster should still have 2 peers") } } func TestClustersPeerRemove(t *testing.T) { + ctx := context.Background() clusters, mocks := createClusters(t) defer shutdownClusters(t, clusters, mocks) @@ -206,8 +210,8 @@ func TestClustersPeerRemove(t *testing.T) { t.Skip("test needs at least 2 clusters") } - p := clusters[1].ID().ID - err := clusters[0].PeerRemove(p) + p := clusters[1].ID(ctx).ID + err := clusters[0].PeerRemove(ctx, p) if err != nil { t.Error(err) } @@ -215,13 +219,13 @@ func TestClustersPeerRemove(t *testing.T) { delay() f := func(t *testing.T, c *Cluster) { - if c.ID().ID == p { //This is the removed cluster + if c.ID(ctx).ID == p { //This is the removed cluster _, ok := <-c.Done() if ok { t.Error("removed peer should have exited") } } else { - ids := c.Peers() + ids := c.Peers(ctx) if len(ids) != nClusters-1 { t.Error("should have removed 1 peer") } @@ -232,25 +236,26 @@ func TestClustersPeerRemove(t *testing.T) { } func TestClustersPeerRemoveSelf(t *testing.T) { + ctx := context.Background() // this test hangs sometimes if there are problems clusters, mocks := createClusters(t) defer shutdownClusters(t, clusters, mocks) for i := 0; i < len(clusters); i++ { waitForLeaderAndMetrics(t, clusters) - peers := clusters[i].Peers() + peers := clusters[i].Peers(ctx) t.Logf("Current cluster size: %d", len(peers)) if len(peers) != (len(clusters) - i) { t.Fatal("Previous peers not removed correctly") } - err := clusters[i].PeerRemove(clusters[i].ID().ID) + err := clusters[i].PeerRemove(ctx, clusters[i].ID(ctx).ID) // Last peer member won't be able to remove itself // In this case, we shut it down. if err != nil { if i != len(clusters)-1 { //not last t.Error(err) } else { - err := clusters[i].Shutdown() + err := clusters[i].Shutdown(ctx) if err != nil { t.Fatal(err) } @@ -264,6 +269,7 @@ func TestClustersPeerRemoveSelf(t *testing.T) { } func TestClustersPeerRemoveLeader(t *testing.T) { + ctx := context.Background() // this test is like the one above, except it always // removes the current leader. // this test hangs sometimes if there are problems @@ -275,7 +281,7 @@ func TestClustersPeerRemoveLeader(t *testing.T) { for _, c := range clusters { if !c.shutdownB { waitForLeaderAndMetrics(t, clusters) - l, _ = c.consensus.Leader() + l, _ = c.consensus.Leader(ctx) } } for _, c := range clusters { @@ -288,19 +294,19 @@ func TestClustersPeerRemoveLeader(t *testing.T) { for i := 0; i < len(clusters); i++ { leader := findLeader() - peers := leader.Peers() + peers := leader.Peers(ctx) t.Logf("Current cluster size: %d", len(peers)) if len(peers) != (len(clusters) - i) { t.Fatal("Previous peers not removed correctly") } - err := leader.PeerRemove(leader.id) + err := leader.PeerRemove(ctx, leader.id) // Last peer member won't be able to remove itself // In this case, we shut it down. if err != nil { if i != len(clusters)-1 { //not last t.Error(err) } else { - err := leader.Shutdown() + err := leader.Shutdown(ctx) if err != nil { t.Fatal(err) } @@ -315,6 +321,7 @@ func TestClustersPeerRemoveLeader(t *testing.T) { } func TestClustersPeerRemoveReallocsPins(t *testing.T) { + ctx := context.Background() clusters, mocks := createClusters(t) defer shutdownClusters(t, clusters, mocks) @@ -329,7 +336,7 @@ func TestClustersPeerRemoveReallocsPins(t *testing.T) { } // We choose to remove the leader, to make things even more interesting - leaderID, err := clusters[0].consensus.Leader() + leaderID, err := clusters[0].consensus.Leader(ctx) if err != nil { t.Fatal(err) } @@ -337,7 +344,7 @@ func TestClustersPeerRemoveReallocsPins(t *testing.T) { var leader *Cluster var leaderi int for i, cl := range clusters { - if id := cl.ID().ID; id == leaderID { + if id := cl.ID(ctx).ID; id == leaderID { leader = cl leaderi = i break @@ -352,7 +359,7 @@ func TestClustersPeerRemoveReallocsPins(t *testing.T) { // Remove leader from set clusters = append(clusters[:leaderi], clusters[leaderi+1:]...) mocks = append(mocks[:leaderi], mocks[leaderi+1:]...) - defer leader.Shutdown() + defer leader.Shutdown(ctx) defer leaderMock.Close() tmpCid, _ := cid.Decode(test.TestCid1) @@ -363,7 +370,7 @@ func TestClustersPeerRemoveReallocsPins(t *testing.T) { for i := 0; i < nClusters; i++ { h, err := prefix.Sum(randomBytes()) checkErr(t, err) - err = leader.Pin(api.PinCid(h)) + err = leader.Pin(ctx, api.PinCid(h)) checkErr(t, err) ttlDelay() } @@ -375,7 +382,7 @@ func TestClustersPeerRemoveReallocsPins(t *testing.T) { // Find out which pins are associated to the leader. interestingCids := []cid.Cid{} - pins := leader.Pins() + pins := leader.Pins(ctx) if len(pins) != nClusters { t.Fatal("expected number of tracked pins to be nClusters") } @@ -393,7 +400,7 @@ func TestClustersPeerRemoveReallocsPins(t *testing.T) { } // Now the leader removes itself - err = leader.PeerRemove(leaderID) + err = leader.PeerRemove(ctx, leaderID) if err != nil { t.Fatal("error removing peer:", err) } @@ -404,7 +411,7 @@ func TestClustersPeerRemoveReallocsPins(t *testing.T) { for _, icid := range interestingCids { // Now check that the allocations are new. - newPin, err := clusters[1].PinGet(icid) + newPin, err := clusters[1].PinGet(ctx, icid) if err != nil { t.Fatal("error getting the new allocations for", icid) } @@ -415,6 +422,7 @@ func TestClustersPeerRemoveReallocsPins(t *testing.T) { } func TestClustersPeerJoin(t *testing.T) { + ctx := context.Background() clusters, mocks := peerManagerClusters(t) defer shutdownClusters(t, clusters, mocks) @@ -423,21 +431,30 @@ func TestClustersPeerJoin(t *testing.T) { } for i := 1; i < len(clusters); i++ { - err := clusters[i].Join(clusterAddr(clusters[0])) + err := clusters[i].Join(ctx, clusterAddr(clusters[0])) if err != nil { t.Fatal(err) } } hash, _ := cid.Decode(test.TestCid1) - clusters[0].Pin(api.PinCid(hash)) + clusters[0].Pin(ctx, api.PinCid(hash)) pinDelay() + for _, p := range clusters { + t.Log(p.id.String()) + } + f := func(t *testing.T, c *Cluster) { - peers := c.Peers() + peers := c.Peers(ctx) + str := c.id.String() + "\n" + for _, p := range peers { + str += " - " + p.ID.String() + "\n" + } + t.Log(str) if len(peers) != nClusters { t.Error("all peers should be connected") } - pins := c.Pins() + pins := c.Pins(ctx) if len(pins) != 1 || !pins[0].Cid.Equals(hash) { t.Error("all peers should have pinned the cid") } @@ -446,6 +463,7 @@ func TestClustersPeerJoin(t *testing.T) { } func TestClustersPeerJoinAllAtOnce(t *testing.T) { + ctx := context.Background() clusters, mocks := peerManagerClusters(t) defer shutdownClusters(t, clusters, mocks) @@ -454,7 +472,7 @@ func TestClustersPeerJoinAllAtOnce(t *testing.T) { } f := func(t *testing.T, c *Cluster) { - err := c.Join(clusterAddr(clusters[0])) + err := c.Join(ctx, clusterAddr(clusters[0])) if err != nil { t.Fatal(err) } @@ -462,15 +480,15 @@ func TestClustersPeerJoinAllAtOnce(t *testing.T) { runF(t, clusters[1:], f) hash, _ := cid.Decode(test.TestCid1) - clusters[0].Pin(api.PinCid(hash)) + clusters[0].Pin(ctx, api.PinCid(hash)) pinDelay() f2 := func(t *testing.T, c *Cluster) { - peers := c.Peers() + peers := c.Peers(ctx) if len(peers) != nClusters { t.Error("all peers should be connected") } - pins := c.Pins() + pins := c.Pins(ctx) if len(pins) != 1 || !pins[0].Cid.Equals(hash) { t.Error("all peers should have pinned the cid") } @@ -529,19 +547,20 @@ func TestClustersPeerJoinAllAtOnce(t *testing.T) { // Tests that a peer catches up on the state correctly after rejoining func TestClustersPeerRejoin(t *testing.T) { + ctx := context.Background() clusters, mocks := peerManagerClusters(t) defer shutdownClusters(t, clusters, mocks) // pin something in c0 pin1, _ := cid.Decode(test.TestCid1) - err := clusters[0].Pin(api.PinCid(pin1)) + err := clusters[0].Pin(ctx, api.PinCid(pin1)) if err != nil { t.Fatal(err) } // add all clusters for i := 1; i < len(clusters); i++ { - err := clusters[i].Join(clusterAddr(clusters[0])) + err := clusters[i].Join(ctx, clusterAddr(clusters[0])) if err != nil { t.Fatal(err) } @@ -551,14 +570,14 @@ func TestClustersPeerRejoin(t *testing.T) { // all added peers should have the content for i := 1; i < len(clusters); i++ { - pinfo := clusters[i].tracker.Status(pin1) + pinfo := clusters[i].tracker.Status(ctx, pin1) if pinfo.Status != api.TrackerStatusPinned { t.Error("Added peers should pin the content") } } clusters[0].config.LeaveOnShutdown = true - err = clusters[0].Shutdown() + err = clusters[0].Shutdown(ctx) if err != nil { t.Fatal(err) } @@ -574,7 +593,7 @@ func TestClustersPeerRejoin(t *testing.T) { // Pin something on the rest pin2, _ := cid.Decode(test.TestCid2) - err = clusters[1].Pin(api.PinCid(pin2)) + err = clusters[1].Pin(ctx, api.PinCid(pin2)) if err != nil { t.Fatal(err) } @@ -585,19 +604,19 @@ func TestClustersPeerRejoin(t *testing.T) { c0, m0 := createOnePeerCluster(t, 0, testingClusterSecret) clusters[0] = c0 mocks[0] = m0 - err = c0.Join(clusterAddr(clusters[1])) + err = c0.Join(ctx, clusterAddr(clusters[1])) if err != nil { t.Fatal(err) } delay() - pinfo := clusters[0].tracker.Status(pin2) + pinfo := clusters[0].tracker.Status(ctx, pin2) if pinfo.Status != api.TrackerStatusPinned { t.Error("re-joined cluster should have caught up") } - pinfo = clusters[0].tracker.Status(pin1) + pinfo = clusters[0].tracker.Status(ctx, pin1) if pinfo.Status != api.TrackerStatusPinned { t.Error("re-joined cluster should have original pin") } diff --git a/pintracker/maptracker/maptracker.go b/pintracker/maptracker/maptracker.go index 875b8ca4..476d3619 100644 --- a/pintracker/maptracker/maptracker.go +++ b/pintracker/maptracker/maptracker.go @@ -7,6 +7,8 @@ import ( "errors" "sync" + "go.opencensus.io/trace" + "github.com/ipfs/ipfs-cluster/api" "github.com/ipfs/ipfs-cluster/pintracker/optracker" "github.com/ipfs/ipfs-cluster/pintracker/util" @@ -62,15 +64,15 @@ func NewMapPinTracker(cfg *Config, pid peer.ID, peerName string) *MapPinTracker } for i := 0; i < mpt.config.ConcurrentPins; i++ { - go mpt.opWorker(mpt.pin, mpt.pinCh) + go mpt.opWorker(ctx, mpt.pin, mpt.pinCh) } - go mpt.opWorker(mpt.unpin, mpt.unpinCh) + go mpt.opWorker(ctx, mpt.unpin, mpt.unpinCh) return mpt } // receives a pin Function (pin or unpin) and a channel. // Used for both pinning and unpinning -func (mpt *MapPinTracker) opWorker(pinF func(*optracker.Operation) error, opChan chan *optracker.Operation) { +func (mpt *MapPinTracker) opWorker(ctx context.Context, pinF func(*optracker.Operation) error, opChan chan *optracker.Operation) { for { select { case op := <-opChan: @@ -97,7 +99,7 @@ func (mpt *MapPinTracker) opWorker(pinF func(*optracker.Operation) error, opChan // We keep all pinned things in the tracker, // only clean unpinned things. if op.Type() == optracker.OperationUnpin { - mpt.optracker.Clean(op) + mpt.optracker.Clean(ctx, op) } case <-mpt.ctx.Done(): return @@ -107,7 +109,10 @@ func (mpt *MapPinTracker) opWorker(pinF func(*optracker.Operation) error, opChan // Shutdown finishes the services provided by the MapPinTracker and cancels // any active context. -func (mpt *MapPinTracker) Shutdown() error { +func (mpt *MapPinTracker) Shutdown(ctx context.Context) error { + ctx, span := trace.StartSpan(ctx, "tracker/map/Shutdown") + defer span.End() + mpt.shutdownLock.Lock() defer mpt.shutdownLock.Unlock() @@ -125,9 +130,12 @@ func (mpt *MapPinTracker) Shutdown() error { } func (mpt *MapPinTracker) pin(op *optracker.Operation) error { + ctx, span := trace.StartSpan(op.Context(), "tracker/map/pin") + defer span.End() + logger.Debugf("issuing pin call for %s", op.Cid()) err := mpt.rpcClient.CallContext( - op.Context(), + ctx, "", "Cluster", "IPFSPin", @@ -141,9 +149,12 @@ func (mpt *MapPinTracker) pin(op *optracker.Operation) error { } func (mpt *MapPinTracker) unpin(op *optracker.Operation) error { + ctx, span := trace.StartSpan(op.Context(), "tracker/map/unpin") + defer span.End() + logger.Debugf("issuing unpin call for %s", op.Cid()) err := mpt.rpcClient.CallContext( - op.Context(), + ctx, "", "Cluster", "IPFSUnpin", @@ -153,13 +164,15 @@ func (mpt *MapPinTracker) unpin(op *optracker.Operation) error { if err != nil { return err } - return nil } // puts a new operation on the queue, unless ongoing exists -func (mpt *MapPinTracker) enqueue(c api.Pin, typ optracker.OperationType, ch chan *optracker.Operation) error { - op := mpt.optracker.TrackNewOperation(c, typ, optracker.PhaseQueued) +func (mpt *MapPinTracker) enqueue(ctx context.Context, c api.Pin, typ optracker.OperationType, ch chan *optracker.Operation) error { + ctx, span := trace.StartSpan(ctx, "tracker/map/enqueue") + defer span.End() + + op := mpt.optracker.TrackNewOperation(ctx, c, typ, optracker.PhaseQueued) if op == nil { return nil // ongoing pin operation. } @@ -178,14 +191,17 @@ func (mpt *MapPinTracker) enqueue(c api.Pin, typ optracker.OperationType, ch cha // Track tells the MapPinTracker to start managing a Cid, // possibly triggering Pin operations on the IPFS daemon. -func (mpt *MapPinTracker) Track(c api.Pin) error { +func (mpt *MapPinTracker) Track(ctx context.Context, c api.Pin) error { + ctx, span := trace.StartSpan(ctx, "tracker/map/Track") + defer span.End() + logger.Debugf("tracking %s", c.Cid) // Sharded pins are never pinned. A sharded pin cannot turn into // something else or viceversa like it happens with Remote pins so // we just track them. if c.Type == api.MetaType { - mpt.optracker.TrackNewOperation(c, optracker.OperationShard, optracker.PhaseDone) + mpt.optracker.TrackNewOperation(ctx, c, optracker.OperationShard, optracker.PhaseDone) return nil } @@ -194,7 +210,7 @@ func (mpt *MapPinTracker) Track(c api.Pin) error { // pin/rm, so this actually does not always trigger unpin // to ipfs. if util.IsRemotePin(c, mpt.peerID) { - op := mpt.optracker.TrackNewOperation(c, optracker.OperationRemote, optracker.PhaseInProgress) + op := mpt.optracker.TrackNewOperation(ctx, c, optracker.OperationRemote, optracker.PhaseInProgress) if op == nil { return nil // Ongoing operationRemote / PhaseInProgress } @@ -208,26 +224,35 @@ func (mpt *MapPinTracker) Track(c api.Pin) error { return nil } - return mpt.enqueue(c, optracker.OperationPin, mpt.pinCh) + return mpt.enqueue(ctx, c, optracker.OperationPin, mpt.pinCh) } // Untrack tells the MapPinTracker to stop managing a Cid. // If the Cid is pinned locally, it will be unpinned. -func (mpt *MapPinTracker) Untrack(c cid.Cid) error { +func (mpt *MapPinTracker) Untrack(ctx context.Context, c cid.Cid) error { + ctx, span := trace.StartSpan(ctx, "tracker/map/Untrack") + defer span.End() + logger.Debugf("untracking %s", c) - return mpt.enqueue(api.PinCid(c), optracker.OperationUnpin, mpt.unpinCh) + return mpt.enqueue(ctx, api.PinCid(c), optracker.OperationUnpin, mpt.unpinCh) } // Status returns information for a Cid tracked by this // MapPinTracker. -func (mpt *MapPinTracker) Status(c cid.Cid) api.PinInfo { - return mpt.optracker.Get(c) +func (mpt *MapPinTracker) Status(ctx context.Context, c cid.Cid) api.PinInfo { + ctx, span := trace.StartSpan(mpt.ctx, "tracker/map/Status") + defer span.End() + + return mpt.optracker.Get(ctx, c) } // StatusAll returns information for all Cids tracked by this // MapPinTracker. -func (mpt *MapPinTracker) StatusAll() []api.PinInfo { - return mpt.optracker.GetAll() +func (mpt *MapPinTracker) StatusAll(ctx context.Context) []api.PinInfo { + ctx, span := trace.StartSpan(mpt.ctx, "tracker/map/StatusAll") + defer span.End() + + return mpt.optracker.GetAll(ctx) } // Sync verifies that the status of a Cid matches that of @@ -238,7 +263,10 @@ func (mpt *MapPinTracker) StatusAll() []api.PinInfo { // Pins in error states can be recovered with Recover(). // An error is returned if we are unable to contact // the IPFS daemon. -func (mpt *MapPinTracker) Sync(c cid.Cid) (api.PinInfo, error) { +func (mpt *MapPinTracker) Sync(ctx context.Context, c cid.Cid) (api.PinInfo, error) { + ctx, span := trace.StartSpan(mpt.ctx, "tracker/map/Sync") + defer span.End() + var ips api.IPFSPinStatus err := mpt.rpcClient.Call( "", @@ -249,11 +277,11 @@ func (mpt *MapPinTracker) Sync(c cid.Cid) (api.PinInfo, error) { ) if err != nil { - mpt.optracker.SetError(c, err) - return mpt.optracker.Get(c), nil + mpt.optracker.SetError(ctx, c, err) + return mpt.optracker.Get(ctx, c), nil } - return mpt.syncStatus(c, ips), nil + return mpt.syncStatus(ctx, c, ips), nil } // SyncAll verifies that the statuses of all tracked Cids match the @@ -264,7 +292,10 @@ func (mpt *MapPinTracker) Sync(c cid.Cid) (api.PinInfo, error) { // were updated or have errors. Cids in error states can be recovered // with Recover(). // An error is returned if we are unable to contact the IPFS daemon. -func (mpt *MapPinTracker) SyncAll() ([]api.PinInfo, error) { +func (mpt *MapPinTracker) SyncAll(ctx context.Context) ([]api.PinInfo, error) { + ctx, span := trace.StartSpan(mpt.ctx, "tracker/map/SyncAll") + defer span.End() + var ipsMap map[string]api.IPFSPinStatus var results []api.PinInfo err := mpt.rpcClient.Call( @@ -277,12 +308,12 @@ func (mpt *MapPinTracker) SyncAll() ([]api.PinInfo, error) { if err != nil { // set pinning or unpinning ops to error, since we can't // verify them - pInfos := mpt.optracker.GetAll() + pInfos := mpt.optracker.GetAll(ctx) for _, pInfo := range pInfos { op, _ := optracker.TrackerStatusToOperationPhase(pInfo.Status) if op == optracker.OperationPin || op == optracker.OperationUnpin { - mpt.optracker.SetError(pInfo.Cid, err) - results = append(results, mpt.optracker.Get(pInfo.Cid)) + mpt.optracker.SetError(ctx, pInfo.Cid, err) + results = append(results, mpt.optracker.Get(ctx, pInfo.Cid)) } else { results = append(results, pInfo) } @@ -290,15 +321,15 @@ func (mpt *MapPinTracker) SyncAll() ([]api.PinInfo, error) { return results, nil } - status := mpt.StatusAll() + status := mpt.StatusAll(ctx) for _, pInfoOrig := range status { var pInfoNew api.PinInfo c := pInfoOrig.Cid ips, ok := ipsMap[c.String()] if !ok { - pInfoNew = mpt.syncStatus(c, api.IPFSPinStatusUnpinned) + pInfoNew = mpt.syncStatus(ctx, c, api.IPFSPinStatusUnpinned) } else { - pInfoNew = mpt.syncStatus(c, ips) + pInfoNew = mpt.syncStatus(ctx, c, ips) } if pInfoOrig.Status != pInfoNew.Status || @@ -310,8 +341,8 @@ func (mpt *MapPinTracker) SyncAll() ([]api.PinInfo, error) { return results, nil } -func (mpt *MapPinTracker) syncStatus(c cid.Cid, ips api.IPFSPinStatus) api.PinInfo { - status, ok := mpt.optracker.Status(c) +func (mpt *MapPinTracker) syncStatus(ctx context.Context, c cid.Cid, ips api.IPFSPinStatus) api.PinInfo { + status, ok := mpt.optracker.Status(ctx, c) if !ok { status = api.TrackerStatusUnpinned } @@ -335,6 +366,7 @@ func (mpt *MapPinTracker) syncStatus(c cid.Cid, ips api.IPFSPinStatus) api.PinIn case api.TrackerStatusPinError: // If an item that we wanted to pin is pinned, we mark it so mpt.optracker.TrackNewOperation( + ctx, api.PinCid(c), optracker.OperationPin, optracker.PhaseDone, @@ -349,47 +381,54 @@ func (mpt *MapPinTracker) syncStatus(c cid.Cid, ips api.IPFSPinStatus) api.PinIn case api.TrackerStatusUnpinError: // clean op := mpt.optracker.TrackNewOperation( + ctx, api.PinCid(c), optracker.OperationUnpin, optracker.PhaseDone, ) if op != nil { - mpt.optracker.Clean(op) + mpt.optracker.Clean(ctx, op) } case api.TrackerStatusPinned: // not pinned in IPFS but we think it should be: mark as error - mpt.optracker.SetError(c, errUnpinned) + mpt.optracker.SetError(ctx, c, errUnpinned) default: // 1. Pinning phases // -> do nothing } } - return mpt.optracker.Get(c) + return mpt.optracker.Get(ctx, c) } // Recover will re-queue a Cid in error state for the failed operation, // possibly retriggering an IPFS pinning operation. -func (mpt *MapPinTracker) Recover(c cid.Cid) (api.PinInfo, error) { +func (mpt *MapPinTracker) Recover(ctx context.Context, c cid.Cid) (api.PinInfo, error) { + ctx, span := trace.StartSpan(mpt.ctx, "tracker/map/Recover") + defer span.End() + logger.Infof("Attempting to recover %s", c) - pInfo := mpt.optracker.Get(c) + pInfo := mpt.optracker.Get(ctx, c) var err error switch pInfo.Status { case api.TrackerStatusPinError: - err = mpt.enqueue(api.PinCid(c), optracker.OperationPin, mpt.pinCh) + err = mpt.enqueue(ctx, api.PinCid(c), optracker.OperationPin, mpt.pinCh) case api.TrackerStatusUnpinError: - err = mpt.enqueue(api.PinCid(c), optracker.OperationUnpin, mpt.unpinCh) + err = mpt.enqueue(ctx, api.PinCid(c), optracker.OperationUnpin, mpt.unpinCh) } - return mpt.optracker.Get(c), err + return mpt.optracker.Get(ctx, c), err } // RecoverAll attempts to recover all items tracked by this peer. -func (mpt *MapPinTracker) RecoverAll() ([]api.PinInfo, error) { - pInfos := mpt.optracker.GetAll() +func (mpt *MapPinTracker) RecoverAll(ctx context.Context) ([]api.PinInfo, error) { + ctx, span := trace.StartSpan(mpt.ctx, "tracker/map/RecoverAll") + defer span.End() + + pInfos := mpt.optracker.GetAll(ctx) var results []api.PinInfo for _, pInfo := range pInfos { - res, err := mpt.Recover(pInfo.Cid) + res, err := mpt.Recover(ctx, pInfo.Cid) results = append(results, res) if err != nil { return results, err @@ -407,6 +446,6 @@ func (mpt *MapPinTracker) SetClient(c *rpc.Client) { // OpContext exports the internal optracker's OpContext method. // For testing purposes only. -func (mpt *MapPinTracker) OpContext(c cid.Cid) context.Context { - return mpt.optracker.OpContext(c) +func (mpt *MapPinTracker) OpContext(ctx context.Context, c cid.Cid) context.Context { + return mpt.optracker.OpContext(ctx, c) } diff --git a/pintracker/maptracker/maptracker_test.go b/pintracker/maptracker/maptracker_test.go index 2b2c5c77..93e42e92 100644 --- a/pintracker/maptracker/maptracker_test.go +++ b/pintracker/maptracker/maptracker_test.go @@ -86,61 +86,65 @@ func testMapPinTracker(t *testing.T) *MapPinTracker { } func TestNew(t *testing.T) { + ctx := context.Background() mpt := testMapPinTracker(t) - defer mpt.Shutdown() + defer mpt.Shutdown(ctx) } func TestShutdown(t *testing.T) { + ctx := context.Background() mpt := testMapPinTracker(t) - err := mpt.Shutdown() + err := mpt.Shutdown(ctx) if err != nil { t.Fatal(err) } - err = mpt.Shutdown() + err = mpt.Shutdown(ctx) if err != nil { t.Fatal(err) } } func TestTrack(t *testing.T) { + ctx := context.Background() mpt := testMapPinTracker(t) - defer mpt.Shutdown() + defer mpt.Shutdown(ctx) h, _ := cid.Decode(test.TestCid1) // Let's tart with a local pin c := testPin(h, -1, -1) - err := mpt.Track(c) + err := mpt.Track(context.Background(), c) if err != nil { t.Fatal(err) } time.Sleep(200 * time.Millisecond) // let it be pinned - st := mpt.Status(h) + st := mpt.Status(context.Background(), h) if st.Status != api.TrackerStatusPinned { t.Fatalf("cid should be pinned and is %s", st.Status) } // Unpin and set remote c = testPin(h, 1, 1, test.TestPeerID2) - err = mpt.Track(c) + err = mpt.Track(context.Background(), c) if err != nil { t.Fatal(err) } time.Sleep(200 * time.Millisecond) // let it be unpinned - st = mpt.Status(h) + st = mpt.Status(context.Background(), h) if st.Status != api.TrackerStatusRemote { t.Fatalf("cid should be pinned and is %s", st.Status) } } func TestUntrack(t *testing.T) { + ctx := context.Background() mpt := testMapPinTracker(t) - defer mpt.Shutdown() + defer mpt.Shutdown(ctx) h1, _ := cid.Decode(test.TestCid1) h2, _ := cid.Decode(test.TestCid2) @@ -148,62 +152,63 @@ func TestUntrack(t *testing.T) { // LocalPin c := testPin(h1, -1, -1) - err := mpt.Track(c) + err := mpt.Track(context.Background(), c) if err != nil { t.Fatal(err) } // Remote pin c = testPin(h2, 1, 1, test.TestPeerID2) - err = mpt.Track(c) + err = mpt.Track(context.Background(), c) if err != nil { t.Fatal(err) } time.Sleep(time.Second / 2) - err = mpt.Untrack(h2) + err = mpt.Untrack(context.Background(), h2) if err != nil { t.Fatal(err) } - err = mpt.Untrack(h1) + err = mpt.Untrack(context.Background(), h1) if err != nil { t.Fatal(err) } - err = mpt.Untrack(h1) + err = mpt.Untrack(context.Background(), h1) if err != nil { t.Fatal(err) } time.Sleep(time.Second / 2) - st := mpt.Status(h1) + st := mpt.Status(context.Background(), h1) if st.Status != api.TrackerStatusUnpinned { t.Fatalf("cid should be unpinned and is %s", st.Status) } - st = mpt.Status(h2) + st = mpt.Status(context.Background(), h2) if st.Status != api.TrackerStatusUnpinned { t.Fatalf("cid should be unpinned and is %s", st.Status) } } func TestStatusAll(t *testing.T) { + ctx := context.Background() mpt := testMapPinTracker(t) - defer mpt.Shutdown() + defer mpt.Shutdown(ctx) h1, _ := cid.Decode(test.TestCid1) h2, _ := cid.Decode(test.TestCid2) // LocalPin c := testPin(h1, -1, -1) - mpt.Track(c) + mpt.Track(context.Background(), c) c = testPin(h2, 1, 1) - mpt.Track(c) + mpt.Track(context.Background(), c) time.Sleep(200 * time.Millisecond) - stAll := mpt.StatusAll() + stAll := mpt.StatusAll(context.Background()) if len(stAll) != 2 { t.Logf("%+v", stAll) t.Fatal("expected 2 pins") @@ -220,21 +225,22 @@ func TestStatusAll(t *testing.T) { } func TestSyncAndRecover(t *testing.T) { + ctx := context.Background() mpt := testMapPinTracker(t) - defer mpt.Shutdown() + defer mpt.Shutdown(ctx) h1, _ := cid.Decode(test.TestCid1) h2, _ := cid.Decode(test.TestCid2) c := testPin(h1, -1, -1) - mpt.Track(c) + mpt.Track(context.Background(), c) c = testPin(h2, -1, -1) - mpt.Track(c) + mpt.Track(context.Background(), c) time.Sleep(100 * time.Millisecond) // IPFSPinLS RPC returns unpinned for anything != Cid1 or Cid3 - info, err := mpt.Sync(h2) + info, err := mpt.Sync(context.Background(), h2) if err != nil { t.Fatal(err) } @@ -242,7 +248,7 @@ func TestSyncAndRecover(t *testing.T) { t.Error("expected pin_error") } - info, err = mpt.Sync(h1) + info, err = mpt.Sync(context.Background(), h1) if err != nil { t.Fatal(err) } @@ -250,7 +256,7 @@ func TestSyncAndRecover(t *testing.T) { t.Error("expected pinned") } - info, err = mpt.Recover(h1) + info, err = mpt.Recover(context.Background(), h1) if err != nil { t.Fatal(err) } @@ -258,30 +264,31 @@ func TestSyncAndRecover(t *testing.T) { t.Error("expected pinned") } - _, err = mpt.Recover(h2) + _, err = mpt.Recover(context.Background(), h2) if err != nil { t.Fatal(err) } time.Sleep(100 * time.Millisecond) - info = mpt.Status(h2) + info = mpt.Status(context.Background(), h2) if info.Status != api.TrackerStatusPinned { t.Error("expected pinned") } } func TestRecoverAll(t *testing.T) { + ctx := context.Background() mpt := testMapPinTracker(t) - defer mpt.Shutdown() + defer mpt.Shutdown(ctx) h1, _ := cid.Decode(test.TestCid1) c := testPin(h1, -1, -1) - mpt.Track(c) + mpt.Track(context.Background(), c) time.Sleep(100 * time.Millisecond) - mpt.optracker.SetError(h1, errors.New("fakeerror")) - pins, err := mpt.RecoverAll() + mpt.optracker.SetError(context.Background(), h1, errors.New("fakeerror")) + pins, err := mpt.RecoverAll(context.Background()) if err != nil { t.Fatal(err) } @@ -290,7 +297,7 @@ func TestRecoverAll(t *testing.T) { } time.Sleep(100 * time.Millisecond) - info := mpt.Status(h1) + info := mpt.Status(context.Background(), h1) if info.Status != api.TrackerStatusPinned { t.Error("the pin should have been recovered") @@ -298,10 +305,11 @@ func TestRecoverAll(t *testing.T) { } func TestSyncAll(t *testing.T) { + ctx := context.Background() mpt := testMapPinTracker(t) - defer mpt.Shutdown() + defer mpt.Shutdown(ctx) - synced, err := mpt.SyncAll() + synced, err := mpt.SyncAll(context.Background()) if err != nil { t.Fatal(err) } @@ -315,13 +323,13 @@ func TestSyncAll(t *testing.T) { h2, _ := cid.Decode(test.TestCid2) c := testPin(h1, -1, -1) - mpt.Track(c) + mpt.Track(context.Background(), c) c = testPin(h2, -1, -1) - mpt.Track(c) + mpt.Track(context.Background(), c) time.Sleep(100 * time.Millisecond) - synced, err = mpt.SyncAll() + synced, err = mpt.SyncAll(context.Background()) if err != nil { t.Fatal(err) } @@ -332,56 +340,58 @@ func TestSyncAll(t *testing.T) { } func TestUntrackTrack(t *testing.T) { + ctx := context.Background() mpt := testMapPinTracker(t) - defer mpt.Shutdown() + defer mpt.Shutdown(ctx) h1, _ := cid.Decode(test.TestCid1) // LocalPin c := testPin(h1, -1, -1) - err := mpt.Track(c) + err := mpt.Track(context.Background(), c) if err != nil { t.Fatal(err) } time.Sleep(time.Second / 2) - err = mpt.Untrack(h1) + err = mpt.Untrack(context.Background(), h1) if err != nil { t.Fatal(err) } } func TestTrackUntrackWithCancel(t *testing.T) { + ctx := context.Background() mpt := testSlowMapPinTracker(t) - defer mpt.Shutdown() + defer mpt.Shutdown(ctx) slowPinCid, _ := cid.Decode(test.TestSlowCid1) // LocalPin slowPin := testPin(slowPinCid, -1, -1) - err := mpt.Track(slowPin) + err := mpt.Track(context.Background(), slowPin) if err != nil { t.Fatal(err) } time.Sleep(100 * time.Millisecond) // let pinning start - pInfo := mpt.Status(slowPin.Cid) + pInfo := mpt.Status(context.Background(), slowPin.Cid) if pInfo.Status == api.TrackerStatusUnpinned { t.Fatal("slowPin should be tracked") } if pInfo.Status == api.TrackerStatusPinning { go func() { - err = mpt.Untrack(slowPinCid) + err = mpt.Untrack(context.Background(), slowPinCid) if err != nil { t.Fatal(err) } }() select { - case <-mpt.optracker.OpContext(slowPinCid).Done(): + case <-mpt.optracker.OpContext(context.Background(), slowPinCid).Done(): return case <-time.Tick(100 * time.Millisecond): t.Errorf("operation context should have been cancelled by now") @@ -392,8 +402,9 @@ func TestTrackUntrackWithCancel(t *testing.T) { } func TestTrackUntrackWithNoCancel(t *testing.T) { + ctx := context.Background() mpt := testSlowMapPinTracker(t) - defer mpt.Shutdown() + defer mpt.Shutdown(ctx) slowPinCid, _ := cid.Decode(test.TestSlowCid1) fastPinCid, _ := cid.Decode(pinCancelCid) @@ -404,24 +415,24 @@ func TestTrackUntrackWithNoCancel(t *testing.T) { // LocalPin fastPin := testPin(fastPinCid, -1, -1) - err := mpt.Track(slowPin) + err := mpt.Track(context.Background(), slowPin) if err != nil { t.Fatal(err) } - err = mpt.Track(fastPin) + err = mpt.Track(context.Background(), fastPin) if err != nil { t.Fatal(err) } // fastPin should be queued because slow pin is pinning - pInfo := mpt.Status(fastPinCid) + pInfo := mpt.Status(context.Background(), fastPinCid) if pInfo.Status == api.TrackerStatusPinQueued { - err = mpt.Untrack(fastPinCid) + err = mpt.Untrack(context.Background(), fastPinCid) if err != nil { t.Fatal(err) } - pi := mpt.Status(fastPinCid) + pi := mpt.Status(context.Background(), fastPinCid) if pi.Error == ErrPinCancelCid.Error() { t.Fatal(ErrPinCancelCid) } @@ -430,22 +441,23 @@ func TestTrackUntrackWithNoCancel(t *testing.T) { } time.Sleep(100 * time.Millisecond) - pInfo = mpt.Status(fastPinCid) + pInfo = mpt.Status(context.Background(), fastPinCid) if pInfo.Status != api.TrackerStatusUnpinned { t.Error("fastPin should have been removed from tracker:", pInfo.Status) } } func TestUntrackTrackWithCancel(t *testing.T) { + ctx := context.Background() mpt := testSlowMapPinTracker(t) - defer mpt.Shutdown() + defer mpt.Shutdown(ctx) slowPinCid, _ := cid.Decode(test.TestSlowCid1) // LocalPin slowPin := testPin(slowPinCid, -1, -1) - err := mpt.Track(slowPin) + err := mpt.Track(context.Background(), slowPin) if err != nil { t.Fatal(err) } @@ -454,27 +466,27 @@ func TestUntrackTrackWithCancel(t *testing.T) { // Untrack should cancel the ongoing request // and unpin right away - err = mpt.Untrack(slowPinCid) + err = mpt.Untrack(context.Background(), slowPinCid) if err != nil { t.Fatal(err) } time.Sleep(100 * time.Millisecond) - pInfo := mpt.Status(slowPinCid) + pInfo := mpt.Status(context.Background(), slowPinCid) if pInfo.Status == api.TrackerStatusUnpinned { t.Fatal("expected slowPin to be tracked") } if pInfo.Status == api.TrackerStatusUnpinning { go func() { - err = mpt.Track(slowPin) + err = mpt.Track(context.Background(), slowPin) if err != nil { t.Fatal(err) } }() select { - case <-mpt.optracker.OpContext(slowPinCid).Done(): + case <-mpt.optracker.OpContext(context.Background(), slowPinCid).Done(): return case <-time.Tick(100 * time.Millisecond): t.Errorf("operation context should have been cancelled by now") @@ -486,8 +498,9 @@ func TestUntrackTrackWithCancel(t *testing.T) { } func TestUntrackTrackWithNoCancel(t *testing.T) { + ctx := context.Background() mpt := testSlowMapPinTracker(t) - defer mpt.Shutdown() + defer mpt.Shutdown(ctx) slowPinCid, _ := cid.Decode(test.TestSlowCid1) fastPinCid, _ := cid.Decode(unpinCancelCid) @@ -498,40 +511,40 @@ func TestUntrackTrackWithNoCancel(t *testing.T) { // LocalPin fastPin := testPin(fastPinCid, -1, -1) - err := mpt.Track(slowPin) + err := mpt.Track(context.Background(), slowPin) if err != nil { t.Fatal(err) } - err = mpt.Track(fastPin) + err = mpt.Track(context.Background(), fastPin) if err != nil { t.Fatal(err) } time.Sleep(3 * time.Second) - err = mpt.Untrack(slowPin.Cid) + err = mpt.Untrack(context.Background(), slowPin.Cid) if err != nil { t.Fatal(err) } - err = mpt.Untrack(fastPin.Cid) + err = mpt.Untrack(context.Background(), fastPin.Cid) if err != nil { t.Fatal(err) } - pInfo := mpt.Status(fastPinCid) + pInfo := mpt.Status(context.Background(), fastPinCid) if pInfo.Status == api.TrackerStatusUnpinned { t.Fatal("c untrack operation should be tracked") } if pInfo.Status == api.TrackerStatusUnpinQueued { - err = mpt.Track(fastPin) + err = mpt.Track(context.Background(), fastPin) if err != nil { t.Fatal(err) } - pi := mpt.Status(fastPinCid) + pi := mpt.Status(context.Background(), fastPinCid) if pi.Error == ErrUnpinCancelCid.Error() { t.Fatal(ErrUnpinCancelCid) } @@ -541,8 +554,9 @@ func TestUntrackTrackWithNoCancel(t *testing.T) { } func TestTrackUntrackConcurrent(t *testing.T) { + ctx := context.Background() mpt := testMapPinTracker(t) - defer mpt.Shutdown() + defer mpt.Shutdown(ctx) h1, _ := cid.Decode(test.TestCid1) @@ -559,9 +573,9 @@ func TestTrackUntrackConcurrent(t *testing.T) { var err error op := rand.Intn(2) if op == 1 { - err = mpt.Track(c) + err = mpt.Track(context.Background(), c) } else { - err = mpt.Untrack(c.Cid) + err = mpt.Untrack(context.Background(), c.Cid) } if err != nil { t.Error(err) @@ -573,7 +587,7 @@ func TestTrackUntrackConcurrent(t *testing.T) { wg.Wait() time.Sleep(200 * time.Millisecond) - st := mpt.Status(h1) + st := mpt.Status(context.Background(), h1) t.Log(st.Status) if st.Status != api.TrackerStatusUnpinned && st.Status != api.TrackerStatusPinned { t.Fatal("should be pinned or unpinned") diff --git a/pintracker/optracker/operation.go b/pintracker/optracker/operation.go index cf5c462e..1f969dd7 100644 --- a/pintracker/optracker/operation.go +++ b/pintracker/optracker/operation.go @@ -8,6 +8,7 @@ import ( cid "github.com/ipfs/go-cid" "github.com/ipfs/ipfs-cluster/api" + "go.opencensus.io/trace" ) //go:generate stringer -type=OperationType @@ -66,6 +67,9 @@ type Operation struct { // NewOperation creates a new Operation. func NewOperation(ctx context.Context, pin api.Pin, typ OperationType, ph Phase) *Operation { + ctx, span := trace.StartSpan(ctx, "optracker/NewOperation") + defer span.End() + ctx, cancel := context.WithCancel(ctx) return &Operation{ ctx: ctx, @@ -93,6 +97,9 @@ func (op *Operation) Context() context.Context { // Cancel will cancel the context associated to this operation. func (op *Operation) Cancel() { + ctx, span := trace.StartSpan(op.ctx, "optracker/Cancel") + _ = ctx + defer span.End() op.cancel() } @@ -105,6 +112,9 @@ func (op *Operation) Phase() Phase { // SetPhase changes the Phase and updates the timestamp. func (op *Operation) SetPhase(ph Phase) { + ctx, span := trace.StartSpan(op.ctx, "optracker/SetPhase") + _ = ctx + defer span.End() op.mu.Lock() defer op.mu.Unlock() op.phase = ph @@ -121,6 +131,9 @@ func (op *Operation) Error() string { // SetError sets the phase to PhaseError along with // an error message. It updates the timestamp. func (op *Operation) SetError(err error) { + ctx, span := trace.StartSpan(op.ctx, "optracker/SetError") + _ = ctx + defer span.End() op.mu.Lock() defer op.mu.Unlock() op.phase = PhaseError @@ -149,6 +162,9 @@ func (op *Operation) Timestamp() time.Time { // Cancelled returns whether the context for this // operation has been cancelled. func (op *Operation) Cancelled() bool { + ctx, span := trace.StartSpan(op.ctx, "optracker/Cancelled") + _ = ctx + defer span.End() select { case <-op.ctx.Done(): return true diff --git a/pintracker/optracker/operationtracker.go b/pintracker/optracker/operationtracker.go index 43902264..2648caf8 100644 --- a/pintracker/optracker/operationtracker.go +++ b/pintracker/optracker/operationtracker.go @@ -11,6 +11,7 @@ import ( "time" "github.com/ipfs/ipfs-cluster/api" + "go.opencensus.io/trace" cid "github.com/ipfs/go-cid" logging "github.com/ipfs/go-log" @@ -44,7 +45,11 @@ func NewOperationTracker(ctx context.Context, pid peer.ID, peerName string) *Ope // // If an operation exists it is of different type, it is // cancelled and the new one replaces it in the tracker. -func (opt *OperationTracker) TrackNewOperation(pin api.Pin, typ OperationType, ph Phase) *Operation { +func (opt *OperationTracker) TrackNewOperation(ctx context.Context, pin api.Pin, typ OperationType, ph Phase) *Operation { + ctx = trace.NewContext(opt.ctx, trace.FromContext(ctx)) + ctx, span := trace.StartSpan(ctx, "optracker/TrackNewOperation") + defer span.End() + cidStr := pin.Cid.String() opt.mu.Lock() @@ -58,7 +63,7 @@ func (opt *OperationTracker) TrackNewOperation(pin api.Pin, typ OperationType, p op.Cancel() // cancel ongoing operation and replace it } - op2 := NewOperation(opt.ctx, pin, typ, ph) + op2 := NewOperation(ctx, pin, typ, ph) logger.Debugf("'%s' on cid '%s' has been created with phase '%s'", typ, cidStr, ph) opt.operations[cidStr] = op2 return op2 @@ -66,7 +71,7 @@ func (opt *OperationTracker) TrackNewOperation(pin api.Pin, typ OperationType, p // Clean deletes an operation from the tracker if it is the one we are tracking // (compares pointers). -func (opt *OperationTracker) Clean(op *Operation) { +func (opt *OperationTracker) Clean(ctx context.Context, op *Operation) { cidStr := op.Cid().String() opt.mu.Lock() @@ -80,7 +85,7 @@ func (opt *OperationTracker) Clean(op *Operation) { // Status returns the TrackerStatus associated to the last operation known // with the given Cid. It returns false if we are not tracking any operation // for the given Cid. -func (opt *OperationTracker) Status(c cid.Cid) (api.TrackerStatus, bool) { +func (opt *OperationTracker) Status(ctx context.Context, c cid.Cid) (api.TrackerStatus, bool) { opt.mu.RLock() defer opt.mu.RUnlock() op, ok := opt.operations[c.String()] @@ -95,7 +100,7 @@ func (opt *OperationTracker) Status(c cid.Cid) (api.TrackerStatus, bool) { // is PhaseDone. Any other phases are considered in-flight and not touched. // For things already in error, the error message is updated. // Remote pins are ignored too. -func (opt *OperationTracker) SetError(c cid.Cid, err error) { +func (opt *OperationTracker) SetError(ctx context.Context, c cid.Cid, err error) { opt.mu.Lock() defer opt.mu.Unlock() op, ok := opt.operations[c.String()] @@ -113,7 +118,7 @@ func (opt *OperationTracker) SetError(c cid.Cid, err error) { } } -func (opt *OperationTracker) unsafePinInfo(op *Operation) api.PinInfo { +func (opt *OperationTracker) unsafePinInfo(ctx context.Context, op *Operation) api.PinInfo { if op == nil { return api.PinInfo{ Cid: cid.Undef, @@ -135,12 +140,15 @@ func (opt *OperationTracker) unsafePinInfo(op *Operation) api.PinInfo { } // Get returns a PinInfo object for Cid. -func (opt *OperationTracker) Get(c cid.Cid) api.PinInfo { +func (opt *OperationTracker) Get(ctx context.Context, c cid.Cid) api.PinInfo { + ctx, span := trace.StartSpan(ctx, "optracker/GetAll") + defer span.End() + opt.mu.RLock() defer opt.mu.RUnlock() op := opt.operations[c.String()] - pInfo := opt.unsafePinInfo(op) - if !pInfo.Cid.Defined() { + pInfo := opt.unsafePinInfo(ctx, op) + if pInfo.Cid == cid.Undef { pInfo.Cid = c } return pInfo @@ -148,31 +156,37 @@ func (opt *OperationTracker) Get(c cid.Cid) api.PinInfo { // GetExists returns a PinInfo object for a Cid only if there exists // an associated Operation. -func (opt *OperationTracker) GetExists(c cid.Cid) (api.PinInfo, bool) { +func (opt *OperationTracker) GetExists(ctx context.Context, c cid.Cid) (api.PinInfo, bool) { + ctx, span := trace.StartSpan(ctx, "optracker/GetExists") + defer span.End() + opt.mu.RLock() defer opt.mu.RUnlock() op, ok := opt.operations[c.String()] if !ok { return api.PinInfo{}, false } - pInfo := opt.unsafePinInfo(op) + pInfo := opt.unsafePinInfo(ctx, op) return pInfo, true } // GetAll returns PinInfo objets for all known operations. -func (opt *OperationTracker) GetAll() []api.PinInfo { +func (opt *OperationTracker) GetAll(ctx context.Context) []api.PinInfo { + ctx, span := trace.StartSpan(ctx, "optracker/GetAll") + defer span.End() + var pinfos []api.PinInfo opt.mu.RLock() defer opt.mu.RUnlock() for _, op := range opt.operations { - pinfos = append(pinfos, opt.unsafePinInfo(op)) + pinfos = append(pinfos, opt.unsafePinInfo(ctx, op)) } return pinfos } // CleanError removes the associated Operation, if it is // in PhaseError. -func (opt *OperationTracker) CleanError(c cid.Cid) { +func (opt *OperationTracker) CleanError(ctx context.Context, c cid.Cid) { opt.mu.RLock() defer opt.mu.RUnlock() errop, ok := opt.operations[c.String()] @@ -184,12 +198,12 @@ func (opt *OperationTracker) CleanError(c cid.Cid) { return } - opt.Clean(errop) + opt.Clean(ctx, errop) return } // CleanAllDone deletes any operation from the tracker that is in PhaseDone. -func (opt *OperationTracker) CleanAllDone() { +func (opt *OperationTracker) CleanAllDone(ctx context.Context) { opt.mu.Lock() defer opt.mu.Unlock() for _, op := range opt.operations { @@ -200,7 +214,7 @@ func (opt *OperationTracker) CleanAllDone() { } // OpContext gets the context of an operation, if any. -func (opt *OperationTracker) OpContext(c cid.Cid) context.Context { +func (opt *OperationTracker) OpContext(ctx context.Context, c cid.Cid) context.Context { opt.mu.RLock() defer opt.mu.RUnlock() op, ok := opt.operations[c.String()] @@ -214,13 +228,13 @@ func (opt *OperationTracker) OpContext(c cid.Cid) context.Context { // Operations that matched the provided filter. Note, only supports // filters of type OperationType or Phase, any other type // will result in a nil slice being returned. -func (opt *OperationTracker) Filter(filters ...interface{}) []api.PinInfo { +func (opt *OperationTracker) Filter(ctx context.Context, filters ...interface{}) []api.PinInfo { var pinfos []api.PinInfo opt.mu.RLock() defer opt.mu.RUnlock() - ops := filterOpsMap(opt.operations, filters) + ops := filterOpsMap(ctx, opt.operations, filters) for _, op := range ops { - pinfos = append(pinfos, opt.unsafePinInfo(op)) + pinfos = append(pinfos, opt.unsafePinInfo(ctx, op)) } return pinfos } @@ -229,34 +243,34 @@ func (opt *OperationTracker) Filter(filters ...interface{}) []api.PinInfo { // with the matching filter. Note, only supports // filters of type OperationType or Phase, any other type // will result in a nil slice being returned. -func (opt *OperationTracker) filterOps(filters ...interface{}) []*Operation { +func (opt *OperationTracker) filterOps(ctx context.Context, filters ...interface{}) []*Operation { var fltops []*Operation opt.mu.RLock() defer opt.mu.RUnlock() - for _, op := range filterOpsMap(opt.operations, filters) { + for _, op := range filterOpsMap(ctx, opt.operations, filters) { fltops = append(fltops, op) } return fltops } -func filterOpsMap(ops map[string]*Operation, filters []interface{}) map[string]*Operation { +func filterOpsMap(ctx context.Context, ops map[string]*Operation, filters []interface{}) map[string]*Operation { fltops := make(map[string]*Operation) if len(filters) < 1 { return nil } if len(filters) == 1 { - filter(ops, fltops, filters[0]) + filter(ctx, ops, fltops, filters[0]) return fltops } mainFilter, filters := filters[0], filters[1:] - filter(ops, fltops, mainFilter) + filter(ctx, ops, fltops, mainFilter) - return filterOpsMap(fltops, filters) + return filterOpsMap(ctx, fltops, filters) } -func filter(in, out map[string]*Operation, filter interface{}) { +func filter(ctx context.Context, in, out map[string]*Operation, filter interface{}) { for _, op := range in { switch filter.(type) { case OperationType: diff --git a/pintracker/optracker/operationtracker_test.go b/pintracker/optracker/operationtracker_test.go index 06301da4..d66dbb48 100644 --- a/pintracker/optracker/operationtracker_test.go +++ b/pintracker/optracker/operationtracker_test.go @@ -15,9 +15,10 @@ func testOperationTracker(t *testing.T) *OperationTracker { } func TestOperationTracker_TrackNewOperation(t *testing.T) { + ctx := context.Background() opt := testOperationTracker(t) h := test.MustDecodeCid(test.TestCid1) - op := opt.TrackNewOperation(api.PinCid(h), OperationPin, PhaseQueued) + op := opt.TrackNewOperation(ctx, api.PinCid(h), OperationPin, PhaseQueued) t.Run("track new operation", func(t *testing.T) { if op == nil { @@ -41,14 +42,14 @@ func TestOperationTracker_TrackNewOperation(t *testing.T) { }) t.Run("track when ongoing operation", func(t *testing.T) { - op2 := opt.TrackNewOperation(api.PinCid(h), OperationPin, PhaseInProgress) + op2 := opt.TrackNewOperation(ctx, api.PinCid(h), OperationPin, PhaseInProgress) if op2 != nil { t.Fatal("should not have created new operation") } }) t.Run("track of different type", func(t *testing.T) { - op2 := opt.TrackNewOperation(api.PinCid(h), OperationUnpin, PhaseQueued) + op2 := opt.TrackNewOperation(ctx, api.PinCid(h), OperationUnpin, PhaseQueued) if op2 == nil { t.Fatal("should have created a new operation") } @@ -59,24 +60,24 @@ func TestOperationTracker_TrackNewOperation(t *testing.T) { }) t.Run("track of same type when done", func(t *testing.T) { - op2 := opt.TrackNewOperation(api.PinCid(h), OperationPin, PhaseDone) + op2 := opt.TrackNewOperation(ctx, api.PinCid(h), OperationPin, PhaseDone) if op2 == nil { t.Fatal("should have created a new operation") } - op3 := opt.TrackNewOperation(api.PinCid(h), OperationPin, PhaseQueued) + op3 := opt.TrackNewOperation(ctx, api.PinCid(h), OperationPin, PhaseQueued) if op3 == nil { t.Fatal("should have created a new operation when other is in Done") } }) t.Run("track of same type when error", func(t *testing.T) { - op4 := opt.TrackNewOperation(api.PinCid(h), OperationUnpin, PhaseError) + op4 := opt.TrackNewOperation(ctx, api.PinCid(h), OperationUnpin, PhaseError) if op4 == nil { t.Fatal("should have created a new operation") } - op5 := opt.TrackNewOperation(api.PinCid(h), OperationUnpin, PhaseQueued) + op5 := opt.TrackNewOperation(ctx, api.PinCid(h), OperationUnpin, PhaseQueued) if op5 == nil { t.Fatal("should have created a new operation") } @@ -84,21 +85,22 @@ func TestOperationTracker_TrackNewOperation(t *testing.T) { } func TestOperationTracker_Clean(t *testing.T) { + ctx := context.Background() opt := testOperationTracker(t) h := test.MustDecodeCid(test.TestCid1) - op := opt.TrackNewOperation(api.PinCid(h), OperationPin, PhaseQueued) - op2 := opt.TrackNewOperation(api.PinCid(h), OperationUnpin, PhaseQueued) + op := opt.TrackNewOperation(ctx, api.PinCid(h), OperationPin, PhaseQueued) + op2 := opt.TrackNewOperation(ctx, api.PinCid(h), OperationUnpin, PhaseQueued) t.Run("clean older operation", func(t *testing.T) { - opt.Clean(op) - st, ok := opt.Status(h) + opt.Clean(ctx, op) + st, ok := opt.Status(ctx, h) if !ok || st != api.TrackerStatusUnpinQueued { t.Fatal("should not have cleaned the latest op") } }) t.Run("clean current operation", func(t *testing.T) { - opt.Clean(op2) - _, ok := opt.Status(h) + opt.Clean(ctx, op2) + _, ok := opt.Status(ctx, h) if ok { t.Fatal("should have cleaned the latest op") } @@ -106,26 +108,28 @@ func TestOperationTracker_Clean(t *testing.T) { } func TestOperationTracker_Status(t *testing.T) { + ctx := context.Background() opt := testOperationTracker(t) h := test.MustDecodeCid(test.TestCid1) - opt.TrackNewOperation(api.PinCid(h), OperationRemote, PhaseDone) - st, ok := opt.Status(h) + opt.TrackNewOperation(ctx, api.PinCid(h), OperationRemote, PhaseDone) + st, ok := opt.Status(ctx, h) if !ok || st != api.TrackerStatusRemote { t.Error("should provide status remote") } - _, ok = opt.Status(h) + _, ok = opt.Status(ctx, h) if !ok { t.Error("should signal unexistent status") } } func TestOperationTracker_SetError(t *testing.T) { + ctx := context.Background() opt := testOperationTracker(t) h := test.MustDecodeCid(test.TestCid1) - opt.TrackNewOperation(api.PinCid(h), OperationPin, PhaseDone) - opt.SetError(h, errors.New("fake error")) - pinfo := opt.Get(h) + opt.TrackNewOperation(ctx, api.PinCid(h), OperationPin, PhaseDone) + opt.SetError(ctx, h, errors.New("fake error")) + pinfo := opt.Get(ctx, h) if pinfo.Status != api.TrackerStatusPinError { t.Error("should have updated the status") } @@ -133,22 +137,23 @@ func TestOperationTracker_SetError(t *testing.T) { t.Error("should have set the error message") } - opt.TrackNewOperation(api.PinCid(h), OperationUnpin, PhaseQueued) - opt.SetError(h, errors.New("fake error")) - st, ok := opt.Status(h) + opt.TrackNewOperation(ctx, api.PinCid(h), OperationUnpin, PhaseQueued) + opt.SetError(ctx, h, errors.New("fake error")) + st, ok := opt.Status(ctx, h) if !ok || st != api.TrackerStatusUnpinQueued { t.Error("should not have set an error on in-flight items") } } func TestOperationTracker_Get(t *testing.T) { + ctx := context.Background() opt := testOperationTracker(t) h := test.MustDecodeCid(test.TestCid1) h2 := test.MustDecodeCid(test.TestCid2) - opt.TrackNewOperation(api.PinCid(h), OperationPin, PhaseDone) + opt.TrackNewOperation(ctx, api.PinCid(h), OperationPin, PhaseDone) t.Run("Get with existing item", func(t *testing.T) { - pinfo := opt.Get(h) + pinfo := opt.Get(ctx, h) if pinfo.Status != api.TrackerStatusPinned { t.Error("bad status") } @@ -163,7 +168,7 @@ func TestOperationTracker_Get(t *testing.T) { }) t.Run("Get with unexisting item", func(t *testing.T) { - pinfo := opt.Get(h2) + pinfo := opt.Get(ctx, h2) if pinfo.Status != api.TrackerStatusUnpinned { t.Error("bad status") } @@ -178,10 +183,11 @@ func TestOperationTracker_Get(t *testing.T) { } func TestOperationTracker_GetAll(t *testing.T) { + ctx := context.Background() opt := testOperationTracker(t) h := test.MustDecodeCid(test.TestCid1) - opt.TrackNewOperation(api.PinCid(h), OperationPin, PhaseInProgress) - pinfos := opt.GetAll() + opt.TrackNewOperation(ctx, api.PinCid(h), OperationPin, PhaseInProgress) + pinfos := opt.GetAll(ctx) if len(pinfos) != 1 { t.Fatal("expected 1 item") } @@ -191,11 +197,12 @@ func TestOperationTracker_GetAll(t *testing.T) { } func TestOperationTracker_OpContext(t *testing.T) { + ctx := context.Background() opt := testOperationTracker(t) h := test.MustDecodeCid(test.TestCid1) - op := opt.TrackNewOperation(api.PinCid(h), OperationPin, PhaseInProgress) + op := opt.TrackNewOperation(ctx, api.PinCid(h), OperationPin, PhaseInProgress) ctx1 := op.Context() - ctx2 := opt.OpContext(h) + ctx2 := opt.OpContext(ctx, h) if ctx1 != ctx2 { t.Fatal("didn't get the right context") } @@ -213,7 +220,7 @@ func TestOperationTracker_filterOps(t *testing.T) { t.Run("filter ops to pin operations", func(t *testing.T) { wantLen := 2 wantOp := OperationPin - got := opt.filterOps(wantOp) + got := opt.filterOps(ctx, wantOp) if len(got) != wantLen { t.Errorf("want: %d %s operations; got: %d", wantLen, wantOp.String(), len(got)) } @@ -227,7 +234,7 @@ func TestOperationTracker_filterOps(t *testing.T) { t.Run("filter ops to in progress phase", func(t *testing.T) { wantLen := 2 wantPhase := PhaseInProgress - got := opt.filterOps(PhaseInProgress) + got := opt.filterOps(ctx, PhaseInProgress) if len(got) != wantLen { t.Errorf("want: %d %s operations; got: %d", wantLen, wantPhase.String(), len(got)) } @@ -242,7 +249,7 @@ func TestOperationTracker_filterOps(t *testing.T) { wantLen := 1 wantPhase := PhaseQueued wantOp := OperationPin - got := opt.filterOps(OperationPin, PhaseQueued) + got := opt.filterOps(ctx, OperationPin, PhaseQueued) if len(got) != wantLen { t.Errorf("want: %d %s operations; got: %d", wantLen, wantPhase.String(), len(got)) } diff --git a/pintracker/pintracker_test.go b/pintracker/pintracker_test.go index 11ffa997..4844822b 100644 --- a/pintracker/pintracker_test.go +++ b/pintracker/pintracker_test.go @@ -178,7 +178,7 @@ func TestPinTracker_Track(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if err := tt.args.tracker.Track(tt.args.c); (err != nil) != tt.wantErr { + if err := tt.args.tracker.Track(context.Background(), tt.args.c); (err != nil) != tt.wantErr { t.Errorf("PinTracker.Track() error = %v, wantErr %v", err, tt.wantErr) } }) @@ -213,7 +213,7 @@ func BenchmarkPinTracker_Track(b *testing.B) { b.Run(tt.name, func(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - if err := tt.args.tracker.Track(tt.args.c); err != nil { + if err := tt.args.tracker.Track(context.Background(), tt.args.c); err != nil { b.Errorf("PinTracker.Track() error = %v", err) } } @@ -250,7 +250,7 @@ func TestPinTracker_Untrack(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if err := tt.args.tracker.Untrack(tt.args.c); (err != nil) != tt.wantErr { + if err := tt.args.tracker.Untrack(context.Background(), tt.args.c); (err != nil) != tt.wantErr { t.Errorf("PinTracker.Untrack() error = %v, wantErr %v", err, tt.wantErr) } }) @@ -330,11 +330,11 @@ func TestPinTracker_StatusAll(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if err := tt.args.tracker.Track(tt.args.c); err != nil { + if err := tt.args.tracker.Track(context.Background(), tt.args.c); err != nil { t.Errorf("PinTracker.Track() error = %v", err) } time.Sleep(1 * time.Second) - got := tt.args.tracker.StatusAll() + got := tt.args.tracker.StatusAll(context.Background()) if len(got) != len(tt.want) { for _, pi := range got { t.Logf("pinfo: %v", pi) @@ -383,7 +383,7 @@ func BenchmarkPinTracker_StatusAll(b *testing.B) { b.Run(tt.name, func(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - tt.args.tracker.StatusAll() + tt.args.tracker.StatusAll(context.Background()) } }) } @@ -474,13 +474,13 @@ func TestPinTracker_Status(t *testing.T) { // the Track preps the internal map of the MapPinTracker // not required by the Stateless impl pin := api.PinWithOpts(test.MustDecodeCid(test.TestCid1), pinOpts) - if err := tt.args.tracker.Track(pin); err != nil { + if err := tt.args.tracker.Track(context.Background(), pin); err != nil { t.Errorf("PinTracker.Track() error = %v", err) } time.Sleep(1 * time.Second) } - got := tt.args.tracker.Status(tt.args.c) + got := tt.args.tracker.Status(context.Background(), tt.args.c) if got.Cid.String() != tt.want.Cid.String() { t.Errorf("PinTracker.Status() = %v, want %v", got.Cid, tt.want.Cid) @@ -591,7 +591,7 @@ func TestPinTracker_SyncAll(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, err := tt.args.tracker.SyncAll() + got, err := tt.args.tracker.SyncAll(context.Background()) if (err != nil) != tt.wantErr { t.Errorf("PinTracker.SyncAll() error = %v, wantErr %v", err, tt.wantErr) return @@ -602,7 +602,7 @@ func TestPinTracker_SyncAll(t *testing.T) { } for _, c := range tt.args.cs { - err := tt.args.tracker.Track(api.PinWithOpts(c, pinOpts)) + err := tt.args.tracker.Track(context.Background(), api.PinWithOpts(c, pinOpts)) if err != nil { t.Fatal(err) } @@ -690,13 +690,13 @@ func TestPinTracker_Sync(t *testing.T) { case *maptracker.MapPinTracker: // the Track preps the internal map of the MapPinTracker; not required by the Stateless impl pin := api.PinWithOpts(test.MustDecodeCid(test.TestCid1), pinOpts) - if err := tt.args.tracker.Track(pin); err != nil { + if err := tt.args.tracker.Track(context.Background(), pin); err != nil { t.Errorf("PinTracker.Track() error = %v", err) } time.Sleep(1 * time.Second) } - got, err := tt.args.tracker.Sync(tt.args.c) + got, err := tt.args.tracker.Sync(context.Background(), tt.args.c) if (err != nil) != tt.wantErr { t.Errorf("PinTracker.Sync() error = %v, wantErr %v", err, tt.wantErr) return @@ -767,13 +767,13 @@ func TestPinTracker_RecoverAll(t *testing.T) { switch tt.args.tracker.(type) { case *maptracker.MapPinTracker: // the Track preps the internal map of the MapPinTracker; not required by the Stateless impl - if err := tt.args.tracker.Track(tt.args.pin); err != nil { + if err := tt.args.tracker.Track(context.Background(), tt.args.pin); err != nil { t.Errorf("PinTracker.Track() error = %v", err) } time.Sleep(1 * time.Second) } - got, err := tt.args.tracker.RecoverAll() + got, err := tt.args.tracker.RecoverAll(context.Background()) if (err != nil) != tt.wantErr { t.Errorf("PinTracker.RecoverAll() error = %v, wantErr %v", err, tt.wantErr) return @@ -841,7 +841,7 @@ func TestPinTracker_Recover(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, err := tt.args.tracker.Recover(tt.args.c) + got, err := tt.args.tracker.Recover(context.Background(), tt.args.c) if (err != nil) != tt.wantErr { t.Errorf("PinTracker.Recover() error = %v, wantErr %v", err, tt.wantErr) return @@ -892,14 +892,14 @@ func TestUntrackTrack(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - err := tt.args.tracker.Track(api.PinWithOpts(tt.args.c, pinOpts)) + err := tt.args.tracker.Track(context.Background(), api.PinWithOpts(tt.args.c, pinOpts)) if err != nil { t.Fatal(err) } time.Sleep(time.Second / 2) - err = tt.args.tracker.Untrack(tt.args.c) + err = tt.args.tracker.Untrack(context.Background(), tt.args.c) if err != nil { t.Fatal(err) } @@ -946,21 +946,21 @@ func TestTrackUntrackWithCancel(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { p := api.PinWithOpts(tt.args.c, pinOpts) - err := tt.args.tracker.Track(p) + err := tt.args.tracker.Track(context.Background(), p) if err != nil { t.Fatal(err) } time.Sleep(100 * time.Millisecond) // let pinning start - pInfo := tt.args.tracker.Status(tt.args.c) + pInfo := tt.args.tracker.Status(context.Background(), tt.args.c) if pInfo.Status == api.TrackerStatusUnpinned { t.Fatal("slowPin should be tracked") } if pInfo.Status == api.TrackerStatusPinning { go func() { - err = tt.args.tracker.Untrack(tt.args.c) + err = tt.args.tracker.Untrack(context.Background(), tt.args.c) if err != nil { t.Fatal(err) } @@ -968,9 +968,9 @@ func TestTrackUntrackWithCancel(t *testing.T) { var ctx context.Context switch trkr := tt.args.tracker.(type) { case *maptracker.MapPinTracker: - ctx = trkr.OpContext(tt.args.c) + ctx = trkr.OpContext(context.Background(), tt.args.c) case *stateless.Tracker: - ctx = trkr.OpContext(tt.args.c) + ctx = trkr.OpContext(context.Background(), tt.args.c) } select { case <-ctx.Done(): @@ -986,6 +986,7 @@ func TestTrackUntrackWithCancel(t *testing.T) { } func TestPinTracker_RemoteIgnoresError(t *testing.T) { + ctx := context.Background() testF := func(t *testing.T, pt ipfscluster.PinTracker) { remoteCid := test.MustDecodeCid(test.TestCid4) @@ -994,14 +995,14 @@ func TestPinTracker_RemoteIgnoresError(t *testing.T) { remote.ReplicationFactorMin = 1 remote.ReplicationFactorMax = 1 - err := pt.Track(remote) + err := pt.Track(ctx, remote) if err != nil { t.Fatal(err) } // Sync triggers IPFSPinLs which will return an error // (see mock) - pi, err := pt.Sync(remoteCid) + pi, err := pt.Sync(ctx, remoteCid) if err != nil { t.Fatal(err) } @@ -1010,7 +1011,7 @@ func TestPinTracker_RemoteIgnoresError(t *testing.T) { t.Error("Remote pin should not be in error") } - pi = pt.Status(remoteCid) + pi = pt.Status(ctx, remoteCid) if err != nil { t.Fatal(err) } diff --git a/pintracker/stateless/stateless.go b/pintracker/stateless/stateless.go index 6fe05258..9e95e11c 100644 --- a/pintracker/stateless/stateless.go +++ b/pintracker/stateless/stateless.go @@ -9,6 +9,8 @@ import ( "sync" "time" + "go.opencensus.io/trace" + "github.com/ipfs/ipfs-cluster/api" "github.com/ipfs/ipfs-cluster/pintracker/optracker" @@ -74,13 +76,13 @@ func (spt *Tracker) opWorker(pinF func(*optracker.Operation) error, opChan chan select { case <-ticker.C: // every tick, clear out all Done operations - spt.optracker.CleanAllDone() + spt.optracker.CleanAllDone(spt.ctx) case op := <-opChan: if cont := applyPinF(pinF, op); cont { continue } - spt.optracker.Clean(op) + spt.optracker.Clean(op.Context(), op) case <-spt.ctx.Done(): return } @@ -112,9 +114,12 @@ func applyPinF(pinF func(*optracker.Operation) error, op *optracker.Operation) b } func (spt *Tracker) pin(op *optracker.Operation) error { + ctx, span := trace.StartSpan(op.Context(), "tracker/stateless/pin") + defer span.End() + logger.Debugf("issuing pin call for %s", op.Cid()) err := spt.rpcClient.CallContext( - op.Context(), + ctx, "", "Cluster", "IPFSPin", @@ -128,9 +133,12 @@ func (spt *Tracker) pin(op *optracker.Operation) error { } func (spt *Tracker) unpin(op *optracker.Operation) error { + ctx, span := trace.StartSpan(op.Context(), "tracker/stateless/unpin") + defer span.End() + logger.Debugf("issuing unpin call for %s", op.Cid()) err := spt.rpcClient.CallContext( - op.Context(), + ctx, "", "Cluster", "IPFSUnpin", @@ -144,9 +152,12 @@ func (spt *Tracker) unpin(op *optracker.Operation) error { } // Enqueue puts a new operation on the queue, unless ongoing exists. -func (spt *Tracker) enqueue(c api.Pin, typ optracker.OperationType) error { +func (spt *Tracker) enqueue(ctx context.Context, c api.Pin, typ optracker.OperationType) error { + ctx, span := trace.StartSpan(ctx, "tracker/stateless/enqueue") + defer span.End() + logger.Debugf("entering enqueue: pin: %+v", c) - op := spt.optracker.TrackNewOperation(c, typ, optracker.PhaseQueued) + op := spt.optracker.TrackNewOperation(ctx, c, typ, optracker.PhaseQueued) if op == nil { return nil // ongoing pin operation. } @@ -183,7 +194,11 @@ func (spt *Tracker) SetClient(c *rpc.Client) { // Shutdown finishes the services provided by the StatelessPinTracker // and cancels any active context. -func (spt *Tracker) Shutdown() error { +func (spt *Tracker) Shutdown(ctx context.Context) error { + ctx, span := trace.StartSpan(ctx, "tracker/stateless/Shutdown") + _ = ctx + defer span.End() + spt.shutdownMu.Lock() defer spt.shutdownMu.Unlock() @@ -202,14 +217,17 @@ func (spt *Tracker) Shutdown() error { // Track tells the StatelessPinTracker to start managing a Cid, // possibly triggering Pin operations on the IPFS daemon. -func (spt *Tracker) Track(c api.Pin) error { +func (spt *Tracker) Track(ctx context.Context, c api.Pin) error { + ctx, span := trace.StartSpan(ctx, "tracker/stateless/Track") + defer span.End() + logger.Debugf("tracking %s", c.Cid) // Sharded pins are never pinned. A sharded pin cannot turn into // something else or viceversa like it happens with Remote pins so // we just track them. if c.Type == api.MetaType { - spt.optracker.TrackNewOperation(c, optracker.OperationShard, optracker.PhaseDone) + spt.optracker.TrackNewOperation(ctx, c, optracker.OperationShard, optracker.PhaseDone) return nil } @@ -217,7 +235,7 @@ func (spt *Tracker) Track(c api.Pin) error { // Note, IPFSConn checks with pin/ls before triggering // pin/rm. if c.IsRemotePin(spt.peerID) { - op := spt.optracker.TrackNewOperation(c, optracker.OperationRemote, optracker.PhaseInProgress) + op := spt.optracker.TrackNewOperation(ctx, c, optracker.OperationRemote, optracker.PhaseInProgress) if op == nil { return nil // ongoing unpin } @@ -231,19 +249,25 @@ func (spt *Tracker) Track(c api.Pin) error { return nil } - return spt.enqueue(c, optracker.OperationPin) + return spt.enqueue(ctx, c, optracker.OperationPin) } // Untrack tells the StatelessPinTracker to stop managing a Cid. // If the Cid is pinned locally, it will be unpinned. -func (spt *Tracker) Untrack(c cid.Cid) error { +func (spt *Tracker) Untrack(ctx context.Context, c cid.Cid) error { + ctx, span := trace.StartSpan(ctx, "tracker/stateless/Untrack") + defer span.End() + logger.Debugf("untracking %s", c) - return spt.enqueue(api.PinCid(c), optracker.OperationUnpin) + return spt.enqueue(ctx, api.PinCid(c), optracker.OperationUnpin) } // StatusAll returns information for all Cids pinned to the local IPFS node. -func (spt *Tracker) StatusAll() []api.PinInfo { - pininfos, err := spt.localStatus(true) +func (spt *Tracker) StatusAll(ctx context.Context) []api.PinInfo { + ctx, span := trace.StartSpan(ctx, "tracker/stateless/StatusAll") + defer span.End() + + pininfos, err := spt.localStatus(ctx, true) if err != nil { logger.Error(err) return nil @@ -252,7 +276,7 @@ func (spt *Tracker) StatusAll() []api.PinInfo { // get all inflight operations from optracker and // put them into the map, deduplicating any already 'pinned' items with // their inflight operation - for _, infop := range spt.optracker.GetAll() { + for _, infop := range spt.optracker.GetAll(ctx) { pininfos[infop.Cid.String()] = infop } @@ -264,9 +288,12 @@ func (spt *Tracker) StatusAll() []api.PinInfo { } // Status returns information for a Cid pinned to the local IPFS node. -func (spt *Tracker) Status(c cid.Cid) api.PinInfo { +func (spt *Tracker) Status(ctx context.Context, c cid.Cid) api.PinInfo { + ctx, span := trace.StartSpan(ctx, "tracker/stateless/Status") + defer span.End() + // check if c has an inflight operation or errorred operation in optracker - if oppi, ok := spt.optracker.GetExists(c); ok { + if oppi, ok := spt.optracker.GetExists(ctx, c); ok { // if it does return the status of the operation return oppi } @@ -355,34 +382,40 @@ func (spt *Tracker) Status(c cid.Cid) api.PinInfo { // were updated or have errors. Cids in error states can be recovered // with Recover(). // An error is returned if we are unable to contact the IPFS daemon. -func (spt *Tracker) SyncAll() ([]api.PinInfo, error) { +func (spt *Tracker) SyncAll(ctx context.Context) ([]api.PinInfo, error) { + ctx, span := trace.StartSpan(ctx, "tracker/stateless/SyncAll") + defer span.End() + // get ipfs status for all - localpis, err := spt.localStatus(false) + localpis, err := spt.localStatus(ctx, false) if err != nil { logger.Error(err) return nil, err } - for _, p := range spt.optracker.Filter(optracker.OperationPin, optracker.PhaseError) { + for _, p := range spt.optracker.Filter(ctx, optracker.OperationPin, optracker.PhaseError) { if _, ok := localpis[p.Cid.String()]; ok { - spt.optracker.CleanError(p.Cid) + spt.optracker.CleanError(ctx, p.Cid) } } - for _, p := range spt.optracker.Filter(optracker.OperationUnpin, optracker.PhaseError) { + for _, p := range spt.optracker.Filter(ctx, optracker.OperationUnpin, optracker.PhaseError) { if _, ok := localpis[p.Cid.String()]; !ok { - spt.optracker.CleanError(p.Cid) + spt.optracker.CleanError(ctx, p.Cid) } } - return spt.getErrorsAll(), nil + return spt.getErrorsAll(ctx), nil } // Sync returns the updated local status for the given Cid. -func (spt *Tracker) Sync(c cid.Cid) (api.PinInfo, error) { - oppi, ok := spt.optracker.GetExists(c) +func (spt *Tracker) Sync(ctx context.Context, c cid.Cid) (api.PinInfo, error) { + ctx, span := trace.StartSpan(ctx, "tracker/stateless/Sync") + defer span.End() + + oppi, ok := spt.optracker.GetExists(ctx, c) if !ok { - return spt.Status(c), nil + return spt.Status(ctx, c), nil } if oppi.Status == api.TrackerStatusUnpinError { @@ -408,7 +441,7 @@ func (spt *Tracker) Sync(c cid.Cid) (api.PinInfo, error) { }, err } // it isn't in the global state - spt.optracker.CleanError(c) + spt.optracker.CleanError(ctx, c) return api.PinInfo{ Cid: c, Peer: spt.peerID, @@ -418,7 +451,7 @@ func (spt *Tracker) Sync(c cid.Cid) (api.PinInfo, error) { } // check if pin is a remote pin if gpin.ToPin().IsRemotePin(spt.peerID) { - spt.optracker.CleanError(c) + spt.optracker.CleanError(ctx, c) return api.PinInfo{ Cid: c, Peer: spt.peerID, @@ -449,7 +482,7 @@ func (spt *Tracker) Sync(c cid.Cid) (api.PinInfo, error) { }, err } if ips.ToTrackerStatus() == api.TrackerStatusPinned { - spt.optracker.CleanError(c) + spt.optracker.CleanError(ctx, c) pi := api.PinInfo{ Cid: c, Peer: spt.peerID, @@ -460,15 +493,18 @@ func (spt *Tracker) Sync(c cid.Cid) (api.PinInfo, error) { } } - return spt.optracker.Get(c), nil + return spt.optracker.Get(ctx, c), nil } // RecoverAll attempts to recover all items tracked by this peer. -func (spt *Tracker) RecoverAll() ([]api.PinInfo, error) { - statuses := spt.StatusAll() +func (spt *Tracker) RecoverAll(ctx context.Context) ([]api.PinInfo, error) { + ctx, span := trace.StartSpan(ctx, "tracker/stateless/RecoverAll") + defer span.End() + + statuses := spt.StatusAll(ctx) resp := make([]api.PinInfo, 0) for _, st := range statuses { - r, err := spt.Recover(st.Cid) + r, err := spt.Recover(ctx, st.Cid) if err != nil { return resp, err } @@ -480,30 +516,37 @@ func (spt *Tracker) RecoverAll() ([]api.PinInfo, error) { // Recover will re-track or re-untrack a Cid in error state, // possibly retriggering an IPFS pinning operation and returning // only when it is done. -func (spt *Tracker) Recover(c cid.Cid) (api.PinInfo, error) { +func (spt *Tracker) Recover(ctx context.Context, c cid.Cid) (api.PinInfo, error) { + ctx, span := trace.StartSpan(ctx, "tracker/stateless/Recover") + defer span.End() + logger.Infof("Attempting to recover %s", c) - pInfo, ok := spt.optracker.GetExists(c) + pInfo, ok := spt.optracker.GetExists(ctx, c) if !ok { - return spt.Status(c), nil + return spt.Status(ctx, c), nil } var err error switch pInfo.Status { case api.TrackerStatusPinError: - err = spt.enqueue(api.PinCid(c), optracker.OperationPin) + err = spt.enqueue(ctx, api.PinCid(c), optracker.OperationPin) case api.TrackerStatusUnpinError: - err = spt.enqueue(api.PinCid(c), optracker.OperationUnpin) + err = spt.enqueue(ctx, api.PinCid(c), optracker.OperationUnpin) } if err != nil { - return spt.Status(c), err + return spt.Status(ctx, c), err } - return spt.Status(c), nil + return spt.Status(ctx, c), nil } -func (spt *Tracker) ipfsStatusAll() (map[string]api.PinInfo, error) { +func (spt *Tracker) ipfsStatusAll(ctx context.Context) (map[string]api.PinInfo, error) { + ctx, span := trace.StartSpan(ctx, "tracker/stateless/ipfsStatusAll") + defer span.End() + var ipsMap map[string]api.IPFSPinStatus - err := spt.rpcClient.Call( + err := spt.rpcClient.CallContext( + ctx, "", "Cluster", "IPFSPinLs", @@ -535,12 +578,16 @@ func (spt *Tracker) ipfsStatusAll() (map[string]api.PinInfo, error) { // localStatus returns a joint set of consensusState and ipfsStatus // marking pins which should be meta or remote and leaving any ipfs pins that // aren't in the consensusState out. -func (spt *Tracker) localStatus(incExtra bool) (map[string]api.PinInfo, error) { +func (spt *Tracker) localStatus(ctx context.Context, incExtra bool) (map[string]api.PinInfo, error) { + ctx, span := trace.StartSpan(ctx, "tracker/stateless/localStatus") + defer span.End() + pininfos := make(map[string]api.PinInfo) // get shared state var statePinsSerial []api.PinSerial - err := spt.rpcClient.Call( + err := spt.rpcClient.CallContext( + ctx, "", "Cluster", "Pins", @@ -557,7 +604,7 @@ func (spt *Tracker) localStatus(incExtra bool) (map[string]api.PinInfo, error) { } // get statuses from ipfs node first - localpis, err := spt.ipfsStatusAll() + localpis, err := spt.ipfsStatusAll(ctx) if err != nil { logger.Error(err) return nil, err @@ -594,12 +641,12 @@ func (spt *Tracker) localStatus(incExtra bool) (map[string]api.PinInfo, error) { return pininfos, nil } -func (spt *Tracker) getErrorsAll() []api.PinInfo { - return spt.optracker.Filter(optracker.PhaseError) +func (spt *Tracker) getErrorsAll(ctx context.Context) []api.PinInfo { + return spt.optracker.Filter(ctx, optracker.PhaseError) } // OpContext exports the internal optracker's OpContext method. // For testing purposes only. -func (spt *Tracker) OpContext(c cid.Cid) context.Context { - return spt.optracker.OpContext(c) +func (spt *Tracker) OpContext(ctx context.Context, c cid.Cid) context.Context { + return spt.optracker.OpContext(ctx, c) } diff --git a/pintracker/stateless/stateless_test.go b/pintracker/stateless/stateless_test.go index 8f94fb61..b6a1254f 100644 --- a/pintracker/stateless/stateless_test.go +++ b/pintracker/stateless/stateless_test.go @@ -118,74 +118,78 @@ func testStatelessPinTracker(t testing.TB) *Tracker { } func TestStatelessPinTracker_New(t *testing.T) { + ctx := context.Background() spt := testStatelessPinTracker(t) - defer spt.Shutdown() + defer spt.Shutdown(ctx) } func TestStatelessPinTracker_Shutdown(t *testing.T) { + ctx := context.Background() spt := testStatelessPinTracker(t) - err := spt.Shutdown() + err := spt.Shutdown(ctx) if err != nil { t.Fatal(err) } - err = spt.Shutdown() + err = spt.Shutdown(ctx) if err != nil { t.Fatal(err) } } func TestUntrackTrack(t *testing.T) { + ctx := context.Background() spt := testStatelessPinTracker(t) - defer spt.Shutdown() + defer spt.Shutdown(ctx) h1 := test.MustDecodeCid(test.TestCid1) // LocalPin c := api.PinWithOpts(h1, pinOpts) - err := spt.Track(c) + err := spt.Track(context.Background(), c) if err != nil { t.Fatal(err) } time.Sleep(time.Second / 2) - err = spt.Untrack(h1) + err = spt.Untrack(context.Background(), h1) if err != nil { t.Fatal(err) } } func TestTrackUntrackWithCancel(t *testing.T) { + ctx := context.Background() spt := testSlowStatelessPinTracker(t) - defer spt.Shutdown() + defer spt.Shutdown(ctx) slowPinCid := test.MustDecodeCid(test.TestSlowCid1) // LocalPin slowPin := api.PinWithOpts(slowPinCid, pinOpts) - err := spt.Track(slowPin) + err := spt.Track(context.Background(), slowPin) if err != nil { t.Fatal(err) } time.Sleep(100 * time.Millisecond) // let pinning start - pInfo := spt.optracker.Get(slowPin.Cid) + pInfo := spt.optracker.Get(context.Background(), slowPin.Cid) if pInfo.Status == api.TrackerStatusUnpinned { t.Fatal("slowPin should be tracked") } if pInfo.Status == api.TrackerStatusPinning { go func() { - err = spt.Untrack(slowPinCid) + err = spt.Untrack(context.Background(), slowPinCid) if err != nil { t.Fatal(err) } }() select { - case <-spt.optracker.OpContext(slowPinCid).Done(): + case <-spt.optracker.OpContext(context.Background(), slowPinCid).Done(): return case <-time.Tick(100 * time.Millisecond): t.Errorf("operation context should have been cancelled by now") @@ -201,8 +205,9 @@ func TestTrackUntrackWithCancel(t *testing.T) { // "pinning", it should simply be unqueued (or ignored), and no // cancelling of the pinning operation happens (unlike on WithCancel). func TestTrackUntrackWithNoCancel(t *testing.T) { + ctx := context.Background() spt := testSlowStatelessPinTracker(t) - defer spt.Shutdown() + defer spt.Shutdown(ctx) slowPinCid := test.MustDecodeCid(test.TestSlowCid1) fastPinCid := test.MustDecodeCid(pinCancelCid) @@ -213,7 +218,7 @@ func TestTrackUntrackWithNoCancel(t *testing.T) { // LocalPin fastPin := api.PinWithOpts(fastPinCid, pinOpts) - err := spt.Track(slowPin) + err := spt.Track(context.Background(), slowPin) if err != nil { t.Fatal(err) } @@ -221,18 +226,18 @@ func TestTrackUntrackWithNoCancel(t *testing.T) { // Otherwise fails when running with -race time.Sleep(300 * time.Millisecond) - err = spt.Track(fastPin) + err = spt.Track(context.Background(), fastPin) if err != nil { t.Fatal(err) } // fastPin should be queued because slow pin is pinning - fastPInfo := spt.optracker.Get(fastPin.Cid) + fastPInfo := spt.optracker.Get(context.Background(), fastPin.Cid) if fastPInfo.Status == api.TrackerStatusUnpinned { t.Fatal("fastPin should be tracked") } if fastPInfo.Status == api.TrackerStatusPinQueued { - err = spt.Untrack(fastPinCid) + err = spt.Untrack(context.Background(), fastPinCid) if err != nil { t.Fatal(err) } @@ -244,22 +249,23 @@ func TestTrackUntrackWithNoCancel(t *testing.T) { t.Errorf("fastPin should be queued to pin but is %s", fastPInfo.Status) } - pi := spt.optracker.Get(fastPin.Cid) + pi := spt.optracker.Get(context.Background(), fastPin.Cid) if pi.Cid == cid.Undef { t.Error("fastPin should have been removed from tracker") } } func TestUntrackTrackWithCancel(t *testing.T) { + ctx := context.Background() spt := testSlowStatelessPinTracker(t) - defer spt.Shutdown() + defer spt.Shutdown(ctx) slowPinCid := test.MustDecodeCid(test.TestSlowCid1) // LocalPin slowPin := api.PinWithOpts(slowPinCid, pinOpts) - err := spt.Track(slowPin) + err := spt.Track(context.Background(), slowPin) if err != nil { t.Fatal(err) } @@ -268,27 +274,27 @@ func TestUntrackTrackWithCancel(t *testing.T) { // Untrack should cancel the ongoing request // and unpin right away - err = spt.Untrack(slowPinCid) + err = spt.Untrack(context.Background(), slowPinCid) if err != nil { t.Fatal(err) } time.Sleep(100 * time.Millisecond) - pi := spt.optracker.Get(slowPin.Cid) + pi := spt.optracker.Get(context.Background(), slowPin.Cid) if pi.Cid == cid.Undef { t.Fatal("expected slowPin to be tracked") } if pi.Status == api.TrackerStatusUnpinning { go func() { - err = spt.Track(slowPin) + err = spt.Track(context.Background(), slowPin) if err != nil { t.Fatal(err) } }() select { - case <-spt.optracker.OpContext(slowPinCid).Done(): + case <-spt.optracker.OpContext(context.Background(), slowPinCid).Done(): return case <-time.Tick(100 * time.Millisecond): t.Errorf("operation context should have been cancelled by now") @@ -300,8 +306,9 @@ func TestUntrackTrackWithCancel(t *testing.T) { } func TestUntrackTrackWithNoCancel(t *testing.T) { + ctx := context.Background() spt := testStatelessPinTracker(t) - defer spt.Shutdown() + defer spt.Shutdown(ctx) slowPinCid := test.MustDecodeCid(test.TestSlowCid1) fastPinCid := test.MustDecodeCid(unpinCancelCid) @@ -312,35 +319,35 @@ func TestUntrackTrackWithNoCancel(t *testing.T) { // LocalPin fastPin := api.PinWithOpts(fastPinCid, pinOpts) - err := spt.Track(slowPin) + err := spt.Track(context.Background(), slowPin) if err != nil { t.Fatal(err) } - err = spt.Track(fastPin) + err = spt.Track(context.Background(), fastPin) if err != nil { t.Fatal(err) } time.Sleep(3 * time.Second) - err = spt.Untrack(slowPin.Cid) + err = spt.Untrack(context.Background(), slowPin.Cid) if err != nil { t.Fatal(err) } - err = spt.Untrack(fastPin.Cid) + err = spt.Untrack(context.Background(), fastPin.Cid) if err != nil { t.Fatal(err) } - pi := spt.optracker.Get(fastPin.Cid) + pi := spt.optracker.Get(context.Background(), fastPin.Cid) if pi.Cid == cid.Undef { t.Fatal("c untrack operation should be tracked") } if pi.Status == api.TrackerStatusUnpinQueued { - err = spt.Track(fastPin) + err = spt.Track(context.Background(), fastPin) if err != nil { t.Fatal(err) } @@ -416,7 +423,7 @@ func TestStatelessTracker_SyncAll(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, err := tt.args.tracker.SyncAll() + got, err := tt.args.tracker.SyncAll(context.Background()) if (err != nil) != tt.wantErr { t.Errorf("PinTracker.SyncAll() error = %v, wantErr %v", err, tt.wantErr) return @@ -427,14 +434,14 @@ func TestStatelessTracker_SyncAll(t *testing.T) { } for _, c := range tt.args.cs { - err := tt.args.tracker.Track(api.PinWithOpts(c, pinOpts)) + err := tt.args.tracker.Track(context.Background(), api.PinWithOpts(c, pinOpts)) if err != nil { t.Fatal(err) } - tt.args.tracker.optracker.SetError(c, errors.New("test error")) + tt.args.tracker.optracker.SetError(context.Background(), c, errors.New("test error")) } - got, err = tt.args.tracker.SyncAll() + got, err = tt.args.tracker.SyncAll(context.Background()) if (err != nil) != tt.wantErr { t.Errorf("PinTracker.SyncAll() error = %v, wantErr %v", err, tt.wantErr) return @@ -460,6 +467,6 @@ func BenchmarkTracker_localStatus(b *testing.B) { tracker := testStatelessPinTracker(b) b.ResetTimer() for i := 0; i < b.N; i++ { - tracker.localStatus(true) + tracker.localStatus(context.Background(), true) } } diff --git a/pnet_test.go b/pnet_test.go index 1179edfe..83454172 100644 --- a/pnet_test.go +++ b/pnet_test.go @@ -1,6 +1,7 @@ package ipfscluster import ( + "context" "testing" ) @@ -34,6 +35,7 @@ func TestClusterSecretFormat(t *testing.T) { } func TestSimplePNet(t *testing.T) { + ctx := context.Background() clusters, mocks := peerManagerClusters(t) defer cleanRaft() defer shutdownClusters(t, clusters, mocks) @@ -42,15 +44,15 @@ func TestSimplePNet(t *testing.T) { t.Skip("need at least 2 nodes for this test") } - _, err := clusters[0].PeerAdd(clusters[1].id) + _, err := clusters[0].PeerAdd(ctx, clusters[1].id) if err != nil { t.Fatal(err) } - if len(clusters[0].Peers()) != len(clusters[1].Peers()) { + if len(clusters[0].Peers(ctx)) != len(clusters[1].Peers(ctx)) { t.Fatal("Expected same number of peers") } - if len(clusters[0].Peers()) != 2 { + if len(clusters[0].Peers(ctx)) != 2 { t.Fatal("Expected 2 peers") } } diff --git a/rpc_api.go b/rpc_api.go index c5a2c703..bf9fc635 100644 --- a/rpc_api.go +++ b/rpc_api.go @@ -5,6 +5,8 @@ import ( peer "github.com/libp2p/go-libp2p-peer" + "go.opencensus.io/trace" + "github.com/ipfs/ipfs-cluster/api" ) @@ -25,25 +27,25 @@ type RPCAPI struct { // ID runs Cluster.ID() func (rpcapi *RPCAPI) ID(ctx context.Context, in struct{}, out *api.IDSerial) error { - id := rpcapi.c.ID().ToSerial() + id := rpcapi.c.ID(ctx).ToSerial() *out = id return nil } // Pin runs Cluster.Pin(). func (rpcapi *RPCAPI) Pin(ctx context.Context, in api.PinSerial, out *struct{}) error { - return rpcapi.c.Pin(in.ToPin()) + return rpcapi.c.Pin(ctx, in.ToPin()) } // Unpin runs Cluster.Unpin(). func (rpcapi *RPCAPI) Unpin(ctx context.Context, in api.PinSerial, out *struct{}) error { c := in.DecodeCid() - return rpcapi.c.Unpin(c) + return rpcapi.c.Unpin(ctx, c) } // Pins runs Cluster.Pins(). func (rpcapi *RPCAPI) Pins(ctx context.Context, in struct{}, out *[]api.PinSerial) error { - cidList := rpcapi.c.Pins() + cidList := rpcapi.c.Pins(ctx) cidSerialList := make([]api.PinSerial, 0, len(cidList)) for _, c := range cidList { cidSerialList = append(cidSerialList, c.ToSerial()) @@ -55,7 +57,7 @@ func (rpcapi *RPCAPI) Pins(ctx context.Context, in struct{}, out *[]api.PinSeria // PinGet runs Cluster.PinGet(). func (rpcapi *RPCAPI) PinGet(ctx context.Context, in api.PinSerial, out *api.PinSerial) error { cidarg := in.ToPin() - pin, err := rpcapi.c.PinGet(cidarg.Cid) + pin, err := rpcapi.c.PinGet(ctx, cidarg.Cid) if err == nil { *out = pin.ToSerial() } @@ -72,7 +74,7 @@ func (rpcapi *RPCAPI) Version(ctx context.Context, in struct{}, out *api.Version // Peers runs Cluster.Peers(). func (rpcapi *RPCAPI) Peers(ctx context.Context, in struct{}, out *[]api.IDSerial) error { - peers := rpcapi.c.Peers() + peers := rpcapi.c.Peers(ctx) var sPeers []api.IDSerial for _, p := range peers { sPeers = append(sPeers, p.ToSerial()) @@ -84,7 +86,7 @@ func (rpcapi *RPCAPI) Peers(ctx context.Context, in struct{}, out *[]api.IDSeria // PeerAdd runs Cluster.PeerAdd(). func (rpcapi *RPCAPI) PeerAdd(ctx context.Context, in string, out *api.IDSerial) error { pid, _ := peer.IDB58Decode(in) - id, err := rpcapi.c.PeerAdd(pid) + id, err := rpcapi.c.PeerAdd(ctx, pid) *out = id.ToSerial() return err } @@ -98,26 +100,26 @@ func (rpcapi *RPCAPI) ConnectGraph(ctx context.Context, in struct{}, out *api.Co // PeerRemove runs Cluster.PeerRm(). func (rpcapi *RPCAPI) PeerRemove(ctx context.Context, in peer.ID, out *struct{}) error { - return rpcapi.c.PeerRemove(in) + return rpcapi.c.PeerRemove(ctx, in) } // Join runs Cluster.Join(). func (rpcapi *RPCAPI) Join(ctx context.Context, in api.MultiaddrSerial, out *struct{}) error { addr := in.ToMultiaddr() - err := rpcapi.c.Join(addr) + err := rpcapi.c.Join(ctx, addr) return err } // StatusAll runs Cluster.StatusAll(). func (rpcapi *RPCAPI) StatusAll(ctx context.Context, in struct{}, out *[]api.GlobalPinInfoSerial) error { - pinfos, err := rpcapi.c.StatusAll() + pinfos, err := rpcapi.c.StatusAll(ctx) *out = GlobalPinInfoSliceToSerial(pinfos) return err } // StatusAllLocal runs Cluster.StatusAllLocal(). func (rpcapi *RPCAPI) StatusAllLocal(ctx context.Context, in struct{}, out *[]api.PinInfoSerial) error { - pinfos := rpcapi.c.StatusAllLocal() + pinfos := rpcapi.c.StatusAllLocal(ctx) *out = pinInfoSliceToSerial(pinfos) return nil } @@ -125,7 +127,7 @@ func (rpcapi *RPCAPI) StatusAllLocal(ctx context.Context, in struct{}, out *[]ap // Status runs Cluster.Status(). func (rpcapi *RPCAPI) Status(ctx context.Context, in api.PinSerial, out *api.GlobalPinInfoSerial) error { c := in.DecodeCid() - pinfo, err := rpcapi.c.Status(c) + pinfo, err := rpcapi.c.Status(ctx, c) *out = pinfo.ToSerial() return err } @@ -133,21 +135,21 @@ func (rpcapi *RPCAPI) Status(ctx context.Context, in api.PinSerial, out *api.Glo // StatusLocal runs Cluster.StatusLocal(). func (rpcapi *RPCAPI) StatusLocal(ctx context.Context, in api.PinSerial, out *api.PinInfoSerial) error { c := in.DecodeCid() - pinfo := rpcapi.c.StatusLocal(c) + pinfo := rpcapi.c.StatusLocal(ctx, c) *out = pinfo.ToSerial() return nil } // SyncAll runs Cluster.SyncAll(). func (rpcapi *RPCAPI) SyncAll(ctx context.Context, in struct{}, out *[]api.GlobalPinInfoSerial) error { - pinfos, err := rpcapi.c.SyncAll() + pinfos, err := rpcapi.c.SyncAll(ctx) *out = GlobalPinInfoSliceToSerial(pinfos) return err } // SyncAllLocal runs Cluster.SyncAllLocal(). func (rpcapi *RPCAPI) SyncAllLocal(ctx context.Context, in struct{}, out *[]api.PinInfoSerial) error { - pinfos, err := rpcapi.c.SyncAllLocal() + pinfos, err := rpcapi.c.SyncAllLocal(ctx) *out = pinInfoSliceToSerial(pinfos) return err } @@ -155,7 +157,7 @@ func (rpcapi *RPCAPI) SyncAllLocal(ctx context.Context, in struct{}, out *[]api. // Sync runs Cluster.Sync(). func (rpcapi *RPCAPI) Sync(ctx context.Context, in api.PinSerial, out *api.GlobalPinInfoSerial) error { c := in.DecodeCid() - pinfo, err := rpcapi.c.Sync(c) + pinfo, err := rpcapi.c.Sync(ctx, c) *out = pinfo.ToSerial() return err } @@ -163,14 +165,14 @@ func (rpcapi *RPCAPI) Sync(ctx context.Context, in api.PinSerial, out *api.Globa // SyncLocal runs Cluster.SyncLocal(). func (rpcapi *RPCAPI) SyncLocal(ctx context.Context, in api.PinSerial, out *api.PinInfoSerial) error { c := in.DecodeCid() - pinfo, err := rpcapi.c.SyncLocal(c) + pinfo, err := rpcapi.c.SyncLocal(ctx, c) *out = pinfo.ToSerial() return err } // RecoverAllLocal runs Cluster.RecoverAllLocal(). func (rpcapi *RPCAPI) RecoverAllLocal(ctx context.Context, in struct{}, out *[]api.PinInfoSerial) error { - pinfos, err := rpcapi.c.RecoverAllLocal() + pinfos, err := rpcapi.c.RecoverAllLocal(ctx) *out = pinInfoSliceToSerial(pinfos) return err } @@ -178,7 +180,7 @@ func (rpcapi *RPCAPI) RecoverAllLocal(ctx context.Context, in struct{}, out *[]a // Recover runs Cluster.Recover(). func (rpcapi *RPCAPI) Recover(ctx context.Context, in api.PinSerial, out *api.GlobalPinInfoSerial) error { c := in.DecodeCid() - pinfo, err := rpcapi.c.Recover(c) + pinfo, err := rpcapi.c.Recover(ctx, c) *out = pinfo.ToSerial() return err } @@ -186,7 +188,7 @@ func (rpcapi *RPCAPI) Recover(ctx context.Context, in api.PinSerial, out *api.Gl // RecoverLocal runs Cluster.RecoverLocal(). func (rpcapi *RPCAPI) RecoverLocal(ctx context.Context, in api.PinSerial, out *api.PinInfoSerial) error { c := in.DecodeCid() - pinfo, err := rpcapi.c.RecoverLocal(c) + pinfo, err := rpcapi.c.RecoverLocal(ctx, c) *out = pinfo.ToSerial() return err } @@ -195,7 +197,7 @@ func (rpcapi *RPCAPI) RecoverLocal(ctx context.Context, in api.PinSerial, out *a // It's different from pin allocations when ReplicationFactor < 0. func (rpcapi *RPCAPI) BlockAllocate(ctx context.Context, in api.PinSerial, out *[]string) error { pin := in.ToPin() - err := rpcapi.c.setupPin(&pin) + err := rpcapi.c.setupPin(ctx, &pin) if err != nil { return err } @@ -204,7 +206,7 @@ func (rpcapi *RPCAPI) BlockAllocate(ctx context.Context, in api.PinSerial, out * if pin.ReplicationFactorMin < 0 { // Returned metrics are Valid and belong to current // Cluster peers. - metrics := rpcapi.c.monitor.LatestMetrics(pingMetricName) + metrics := rpcapi.c.monitor.LatestMetrics(ctx, pingMetricName) peers := make([]string, len(metrics), len(metrics)) for i, m := range metrics { peers[i] = peer.IDB58Encode(m.Peer) @@ -215,6 +217,7 @@ func (rpcapi *RPCAPI) BlockAllocate(ctx context.Context, in api.PinSerial, out * } allocs, err := rpcapi.c.allocate( + ctx, pin.Cid, pin.ReplicationFactorMin, pin.ReplicationFactorMax, @@ -232,7 +235,7 @@ func (rpcapi *RPCAPI) BlockAllocate(ctx context.Context, in api.PinSerial, out * // SendInformerMetric runs Cluster.sendInformerMetric(). func (rpcapi *RPCAPI) SendInformerMetric(ctx context.Context, in struct{}, out *api.Metric) error { - m, err := rpcapi.c.sendInformerMetric() + m, err := rpcapi.c.sendInformerMetric(ctx) *out = m return err } @@ -243,40 +246,52 @@ func (rpcapi *RPCAPI) SendInformerMetric(ctx context.Context, in struct{}, out * // Track runs PinTracker.Track(). func (rpcapi *RPCAPI) Track(ctx context.Context, in api.PinSerial, out *struct{}) error { - return rpcapi.c.tracker.Track(in.ToPin()) + ctx, span := trace.StartSpan(ctx, "rpc/tracker/Track") + defer span.End() + return rpcapi.c.tracker.Track(ctx, in.ToPin()) } // Untrack runs PinTracker.Untrack(). func (rpcapi *RPCAPI) Untrack(ctx context.Context, in api.PinSerial, out *struct{}) error { + ctx, span := trace.StartSpan(ctx, "rpc/tracker/Untrack") + defer span.End() c := in.DecodeCid() - return rpcapi.c.tracker.Untrack(c) + return rpcapi.c.tracker.Untrack(ctx, c) } // TrackerStatusAll runs PinTracker.StatusAll(). func (rpcapi *RPCAPI) TrackerStatusAll(ctx context.Context, in struct{}, out *[]api.PinInfoSerial) error { - *out = pinInfoSliceToSerial(rpcapi.c.tracker.StatusAll()) + ctx, span := trace.StartSpan(ctx, "rpc/tracker/StatusAll") + defer span.End() + *out = pinInfoSliceToSerial(rpcapi.c.tracker.StatusAll(ctx)) return nil } // TrackerStatus runs PinTracker.Status(). func (rpcapi *RPCAPI) TrackerStatus(ctx context.Context, in api.PinSerial, out *api.PinInfoSerial) error { + ctx, span := trace.StartSpan(ctx, "rpc/tracker/Status") + defer span.End() c := in.DecodeCid() - pinfo := rpcapi.c.tracker.Status(c) + pinfo := rpcapi.c.tracker.Status(ctx, c) *out = pinfo.ToSerial() return nil } // TrackerRecoverAll runs PinTracker.RecoverAll().f func (rpcapi *RPCAPI) TrackerRecoverAll(ctx context.Context, in struct{}, out *[]api.PinInfoSerial) error { - pinfos, err := rpcapi.c.tracker.RecoverAll() + ctx, span := trace.StartSpan(ctx, "rpc/tracker/RecoverAll") + defer span.End() + pinfos, err := rpcapi.c.tracker.RecoverAll(ctx) *out = pinInfoSliceToSerial(pinfos) return err } // TrackerRecover runs PinTracker.Recover(). func (rpcapi *RPCAPI) TrackerRecover(ctx context.Context, in api.PinSerial, out *api.PinInfoSerial) error { + ctx, span := trace.StartSpan(ctx, "rpc/tracker/Recover") + defer span.End() c := in.DecodeCid() - pinfo, err := rpcapi.c.tracker.Recover(c) + pinfo, err := rpcapi.c.tracker.Recover(ctx, c) *out = pinfo.ToSerial() return err } @@ -287,6 +302,8 @@ func (rpcapi *RPCAPI) TrackerRecover(ctx context.Context, in api.PinSerial, out // IPFSPin runs IPFSConnector.Pin(). func (rpcapi *RPCAPI) IPFSPin(ctx context.Context, in api.PinSerial, out *struct{}) error { + ctx, span := trace.StartSpan(ctx, "rpc/ipfsconn/IPFSPin") + defer span.End() c := in.DecodeCid() depth := in.ToPin().MaxDepth return rpcapi.c.ipfs.Pin(ctx, c, depth) @@ -315,7 +332,7 @@ func (rpcapi *RPCAPI) IPFSPinLs(ctx context.Context, in string, out *map[string] // IPFSConnectSwarms runs IPFSConnector.ConnectSwarms(). func (rpcapi *RPCAPI) IPFSConnectSwarms(ctx context.Context, in struct{}, out *struct{}) error { - err := rpcapi.c.ipfs.ConnectSwarms() + err := rpcapi.c.ipfs.ConnectSwarms(ctx) return err } @@ -328,27 +345,27 @@ func (rpcapi *RPCAPI) IPFSConfigKey(ctx context.Context, in string, out *interfa // IPFSRepoStat runs IPFSConnector.RepoStat(). func (rpcapi *RPCAPI) IPFSRepoStat(ctx context.Context, in struct{}, out *api.IPFSRepoStat) error { - res, err := rpcapi.c.ipfs.RepoStat() + res, err := rpcapi.c.ipfs.RepoStat(ctx) *out = res return err } // IPFSSwarmPeers runs IPFSConnector.SwarmPeers(). func (rpcapi *RPCAPI) IPFSSwarmPeers(ctx context.Context, in struct{}, out *api.SwarmPeersSerial) error { - res, err := rpcapi.c.ipfs.SwarmPeers() + res, err := rpcapi.c.ipfs.SwarmPeers(ctx) *out = res.ToSerial() return err } // IPFSBlockPut runs IPFSConnector.BlockPut(). func (rpcapi *RPCAPI) IPFSBlockPut(ctx context.Context, in api.NodeWithMeta, out *struct{}) error { - return rpcapi.c.ipfs.BlockPut(in) + return rpcapi.c.ipfs.BlockPut(ctx, in) } // IPFSBlockGet runs IPFSConnector.BlockGet(). func (rpcapi *RPCAPI) IPFSBlockGet(ctx context.Context, in api.PinSerial, out *[]byte) error { c := in.DecodeCid() - res, err := rpcapi.c.ipfs.BlockGet(c) + res, err := rpcapi.c.ipfs.BlockGet(ctx, c) *out = res return err } @@ -359,29 +376,37 @@ func (rpcapi *RPCAPI) IPFSBlockGet(ctx context.Context, in api.PinSerial, out *[ // ConsensusLogPin runs Consensus.LogPin(). func (rpcapi *RPCAPI) ConsensusLogPin(ctx context.Context, in api.PinSerial, out *struct{}) error { + ctx, span := trace.StartSpan(ctx, "rpc/consensus/LogPin") + defer span.End() c := in.ToPin() - return rpcapi.c.consensus.LogPin(c) + return rpcapi.c.consensus.LogPin(ctx, c) } // ConsensusLogUnpin runs Consensus.LogUnpin(). func (rpcapi *RPCAPI) ConsensusLogUnpin(ctx context.Context, in api.PinSerial, out *struct{}) error { + ctx, span := trace.StartSpan(ctx, "rpc/consensus/LogUnpin") + defer span.End() c := in.ToPin() - return rpcapi.c.consensus.LogUnpin(c) + return rpcapi.c.consensus.LogUnpin(ctx, c) } // ConsensusAddPeer runs Consensus.AddPeer(). func (rpcapi *RPCAPI) ConsensusAddPeer(ctx context.Context, in peer.ID, out *struct{}) error { - return rpcapi.c.consensus.AddPeer(in) + ctx, span := trace.StartSpan(ctx, "rpc/consensus/AddPeer") + defer span.End() + return rpcapi.c.consensus.AddPeer(ctx, in) } // ConsensusRmPeer runs Consensus.RmPeer(). func (rpcapi *RPCAPI) ConsensusRmPeer(ctx context.Context, in peer.ID, out *struct{}) error { - return rpcapi.c.consensus.RmPeer(in) + ctx, span := trace.StartSpan(ctx, "rpc/consensus/RmPeer") + defer span.End() + return rpcapi.c.consensus.RmPeer(ctx, in) } // ConsensusPeers runs Consensus.Peers(). func (rpcapi *RPCAPI) ConsensusPeers(ctx context.Context, in struct{}, out *[]peer.ID) error { - peers, err := rpcapi.c.consensus.Peers() + peers, err := rpcapi.c.consensus.Peers(ctx) *out = peers return err } @@ -392,12 +417,12 @@ func (rpcapi *RPCAPI) ConsensusPeers(ctx context.Context, in struct{}, out *[]pe // PeerMonitorLogMetric runs PeerMonitor.LogMetric(). func (rpcapi *RPCAPI) PeerMonitorLogMetric(ctx context.Context, in api.Metric, out *struct{}) error { - rpcapi.c.monitor.LogMetric(in) + rpcapi.c.monitor.LogMetric(ctx, in) return nil } // PeerMonitorLatestMetrics runs PeerMonitor.LatestMetrics(). func (rpcapi *RPCAPI) PeerMonitorLatestMetrics(ctx context.Context, in string, out *[]api.Metric) error { - *out = rpcapi.c.monitor.LatestMetrics(in) + *out = rpcapi.c.monitor.LatestMetrics(ctx, in) return nil } diff --git a/state/interface.go b/state/interface.go index 834d56e6..6d4141f1 100644 --- a/state/interface.go +++ b/state/interface.go @@ -4,6 +4,7 @@ package state // State represents the shared state of the cluster and it import ( + "context" "io" cid "github.com/ipfs/go-cid" @@ -15,17 +16,17 @@ import ( // objects which objects are pinned. This component should be thread safe. type State interface { // Add adds a pin to the State - Add(api.Pin) error + Add(context.Context, api.Pin) error // Rm removes a pin from the State - Rm(cid.Cid) error + Rm(context.Context, cid.Cid) error // List lists all the pins in the state - List() []api.Pin + List(context.Context) []api.Pin // Has returns true if the state is holding information for a Cid - Has(cid.Cid) bool + Has(context.Context, cid.Cid) bool // Get returns the information attacthed to this pin - Get(cid.Cid) (api.Pin, bool) + Get(context.Context, cid.Cid) (api.Pin, bool) // Migrate restores the serialized format of an outdated state to the current version - Migrate(r io.Reader) error + Migrate(ctx context.Context, r io.Reader) error // Return the version of this state GetVersion() int // Marshal serializes the state to a byte slice diff --git a/state/mapstate/map_state.go b/state/mapstate/map_state.go index aea983cc..1332cd36 100644 --- a/state/mapstate/map_state.go +++ b/state/mapstate/map_state.go @@ -4,6 +4,7 @@ package mapstate import ( "bytes" + "context" "errors" "io" "io/ioutil" @@ -15,6 +16,7 @@ import ( logging "github.com/ipfs/go-log" "github.com/ipfs/ipfs-cluster/api" + "go.opencensus.io/trace" ) // Version is the map state Version. States with old versions should @@ -40,7 +42,10 @@ func NewMapState() *MapState { } // Add adds a Pin to the internal map. -func (st *MapState) Add(c api.Pin) error { +func (st *MapState) Add(ctx context.Context, c api.Pin) error { + ctx, span := trace.StartSpan(ctx, "state/map/Add") + defer span.End() + st.pinMux.Lock() defer st.pinMux.Unlock() st.PinMap[c.Cid.String()] = c.ToSerial() @@ -48,7 +53,10 @@ func (st *MapState) Add(c api.Pin) error { } // Rm removes a Cid from the internal map. -func (st *MapState) Rm(c cid.Cid) error { +func (st *MapState) Rm(ctx context.Context, c cid.Cid) error { + ctx, span := trace.StartSpan(ctx, "state/map/Rm") + defer span.End() + st.pinMux.Lock() defer st.pinMux.Unlock() delete(st.PinMap, c.String()) @@ -60,7 +68,10 @@ func (st *MapState) Rm(c cid.Cid) error { // fields initialized, regardless of the // presence of the provided Cid in the state. // To check the presence, use MapState.Has(cid.Cid). -func (st *MapState) Get(c cid.Cid) (api.Pin, bool) { +func (st *MapState) Get(ctx context.Context, c cid.Cid) (api.Pin, bool) { + ctx, span := trace.StartSpan(ctx, "state/map/Get") + defer span.End() + if !c.Defined() { return api.PinCid(c), false } @@ -74,7 +85,10 @@ func (st *MapState) Get(c cid.Cid) (api.Pin, bool) { } // Has returns true if the Cid belongs to the State. -func (st *MapState) Has(c cid.Cid) bool { +func (st *MapState) Has(ctx context.Context, c cid.Cid) bool { + ctx, span := trace.StartSpan(ctx, "state/map/Has") + defer span.End() + st.pinMux.RLock() defer st.pinMux.RUnlock() _, ok := st.PinMap[c.String()] @@ -82,7 +96,10 @@ func (st *MapState) Has(c cid.Cid) bool { } // List provides the list of tracked Pins. -func (st *MapState) List() []api.Pin { +func (st *MapState) List(ctx context.Context) []api.Pin { + ctx, span := trace.StartSpan(ctx, "state/map/List") + defer span.End() + st.pinMux.RLock() defer st.pinMux.RUnlock() cids := make([]api.Pin, 0, len(st.PinMap)) @@ -97,7 +114,10 @@ func (st *MapState) List() []api.Pin { // Migrate restores a snapshot from the state's internal bytes and if // necessary migrates the format to the current version. -func (st *MapState) Migrate(r io.Reader) error { +func (st *MapState) Migrate(ctx context.Context, r io.Reader) error { + ctx, span := trace.StartSpan(ctx, "state/map/Migrate") + defer span.End() + bs, err := ioutil.ReadAll(r) if err != nil { return err @@ -126,6 +146,10 @@ func (st *MapState) GetVersion() int { // Marshal encodes the state using msgpack func (st *MapState) Marshal() ([]byte, error) { + // FIXME: Re-enable this span when raft Marshable interface has contexts + //ctx, span := trace.StartSpan(ctx, "state/map/Marshal") + //defer span.End() + logger.Debugf("Marshal-- Marshalling state of version %d", st.Version) buf := new(bytes.Buffer) enc := msgpack.Multicodec(msgpack.DefaultMsgpackHandle()).Encoder(buf) @@ -137,7 +161,7 @@ func (st *MapState) Marshal() ([]byte, error) { vCodec := make([]byte, 1) vCodec[0] = byte(st.Version) ret := append(vCodec, buf.Bytes()...) - // logger.Debugf("Marshal-- The final marshaled bytes: %x", ret) + //logger.Debugf("Marshal-- The final marshaled bytes: %x\n", ret) return ret, nil } @@ -147,6 +171,10 @@ func (st *MapState) Marshal() ([]byte, error) { // to the current version in a later call to restore. Note: Out of date // version is not an error func (st *MapState) Unmarshal(bs []byte) error { + // FIXME: Re-enable this span when raft Marshable interface has contexts + // ctx, span := trace.StartSpan(ctx, "state/map/Unmarshal") + // defer span.End() + // Check version byte // logger.Debugf("The incoming bytes to unmarshal: %x", bs) if len(bs) < 1 { diff --git a/state/mapstate/map_state_test.go b/state/mapstate/map_state_test.go index fd06983b..19325199 100644 --- a/state/mapstate/map_state_test.go +++ b/state/mapstate/map_state_test.go @@ -2,6 +2,7 @@ package mapstate import ( "bytes" + "context" "testing" msgpack "github.com/multiformats/go-multicodec/msgpack" @@ -27,31 +28,34 @@ var c = api.Pin{ } func TestAdd(t *testing.T) { + ctx := context.Background() ms := NewMapState() - ms.Add(c) - if !ms.Has(c.Cid) { + ms.Add(ctx, c) + if !ms.Has(ctx, c.Cid) { t.Error("should have added it") } } func TestRm(t *testing.T) { + ctx := context.Background() ms := NewMapState() - ms.Add(c) - ms.Rm(c.Cid) - if ms.Has(c.Cid) { + ms.Add(ctx, c) + ms.Rm(ctx, c.Cid) + if ms.Has(ctx, c.Cid) { t.Error("should have removed it") } } func TestGet(t *testing.T) { + ctx := context.Background() defer func() { if r := recover(); r != nil { t.Fatal("paniced") } }() ms := NewMapState() - ms.Add(c) - get, _ := ms.Get(c.Cid) + ms.Add(ctx, c) + get, _ := ms.Get(ctx, c.Cid) if get.Cid.String() != c.Cid.String() || get.Allocations[0] != c.Allocations[0] || get.ReplicationFactorMax != c.ReplicationFactorMax || @@ -61,14 +65,15 @@ func TestGet(t *testing.T) { } func TestList(t *testing.T) { + ctx := context.Background() defer func() { if r := recover(); r != nil { t.Fatal("paniced") } }() ms := NewMapState() - ms.Add(c) - list := ms.List() + ms.Add(ctx, c) + list := ms.List(ctx) if list[0].Cid.String() != c.Cid.String() || list[0].Allocations[0] != c.Allocations[0] || list[0].ReplicationFactorMax != c.ReplicationFactorMax || @@ -78,8 +83,9 @@ func TestList(t *testing.T) { } func TestMarshalUnmarshal(t *testing.T) { + ctx := context.Background() ms := NewMapState() - ms.Add(c) + ms.Add(ctx, c) b, err := ms.Marshal() if err != nil { t.Fatal(err) @@ -92,13 +98,14 @@ func TestMarshalUnmarshal(t *testing.T) { if ms.Version != ms2.Version { t.Fatal(err) } - get, _ := ms2.Get(c.Cid) + get, _ := ms2.Get(ctx, c.Cid) if get.Allocations[0] != testPeerID1 { t.Error("expected different peer id") } } func TestMigrateFromV1(t *testing.T) { + ctx := context.Background() // Construct the bytes of a v1 state var v1State mapStateV1 v1State.PinMap = map[string]struct{}{ @@ -125,11 +132,11 @@ func TestMigrateFromV1(t *testing.T) { } // Migrate state to current version r := bytes.NewBuffer(v1Bytes) - err = ms.Migrate(r) + err = ms.Migrate(ctx, r) if err != nil { t.Fatal(err) } - get, ok := ms.Get(c.Cid) + get, ok := ms.Get(ctx, c.Cid) if !ok { t.Fatal("migrated state does not contain cid") } diff --git a/test/ipfs_mock.go b/test/ipfs_mock.go index 621775f1..e612b156 100644 --- a/test/ipfs_mock.go +++ b/test/ipfs_mock.go @@ -1,6 +1,7 @@ package test import ( + "context" "encoding/json" "fmt" "io/ioutil" @@ -129,6 +130,7 @@ func NewIpfsMock() *IpfsMock { // FIXME: what if IPFS API changes? func (m *IpfsMock) handler(w http.ResponseWriter, r *http.Request) { + ctx := context.Background() p := r.URL.Path w.Header().Set(IpfsCustomHeaderName, IpfsCustomHeaderValue) w.Header().Set("Server", "ipfs-mock") @@ -156,7 +158,7 @@ func (m *IpfsMock) handler(w http.ResponseWriter, r *http.Request) { if err != nil { goto ERROR } - m.pinMap.Add(api.PinCid(c)) + m.pinMap.Add(ctx, api.PinCid(c)) resp := mockPinResp{ Pins: []string{arg}, } @@ -171,7 +173,7 @@ func (m *IpfsMock) handler(w http.ResponseWriter, r *http.Request) { if err != nil { goto ERROR } - m.pinMap.Rm(c) + m.pinMap.Rm(ctx, c) resp := mockPinResp{ Pins: []string{arg}, } @@ -181,7 +183,7 @@ func (m *IpfsMock) handler(w http.ResponseWriter, r *http.Request) { arg, ok := extractCid(r.URL) if !ok { rMap := make(map[string]mockPinType) - pins := m.pinMap.List() + pins := m.pinMap.List(ctx) for _, p := range pins { rMap[p.Cid.String()] = mockPinType{"recursive"} } @@ -195,7 +197,7 @@ func (m *IpfsMock) handler(w http.ResponseWriter, r *http.Request) { if err != nil { goto ERROR } - ok = m.pinMap.Has(c) + ok = m.pinMap.Has(ctx, c) if ok { rMap := make(map[string]mockPinType) rMap[cidStr] = mockPinType{"recursive"} @@ -288,7 +290,7 @@ func (m *IpfsMock) handler(w http.ResponseWriter, r *http.Request) { w.Write(data) case "repo/stat": sizeOnly := r.URL.Query().Get("size-only") - len := len(m.pinMap.List()) + len := len(m.pinMap.List(ctx)) numObjs := uint64(len) if sizeOnly == "true" { numObjs = 0