Hector Sanjuan 9b9d76f92d Pinset streaming and method type revamp
This commit introduces the new go-libp2p-gorpc streaming capabilities for
Cluster. The main aim is to work towards heavily reducing memory usage when
working with very large pinsets.

As a side-effect, it takes the chance to revampt all types for all public
methods so that pointers to static what should be static objects are not used
anymore. This should heavily reduce heap allocations and GC activity.

The main change is that state.List now returns a channel from which to read
the pins, rather than pins being all loaded into a huge slice.

Things reading pins have been all updated to iterate on the channel rather
than on the slice. The full pinset is no longer fully loaded onto memory for
things that run regularly like StateSync().

Additionally, the /allocations endpoint of the rest API no longer returns an
array of pins, but rather streams json-encoded pin objects directly. This
change has extended to the restapi client (which puts pins into a channel as
they arrive) and to ipfs-cluster-ctl.

There are still pending improvements like StatusAll() calls which should also
stream responses, and specially BlockPut calls which should stream blocks
directly into IPFS on a single call.

These are coming up in future commits.
2022-03-19 03:02:55 +01:00

314 lines
6.5 KiB

package pubsubmon
import (
libp2p "github.com/libp2p/go-libp2p"
host "github.com/libp2p/go-libp2p-core/host"
peer "github.com/libp2p/go-libp2p-core/peer"
pubsub "github.com/libp2p/go-libp2p-pubsub"
func init() {
// GossipSub needs to heartbeat to discover newly connected hosts
// This speeds things up a little.
pubsub.GossipSubHeartbeatInterval = 50 * time.Millisecond
type metricFactory struct {
l sync.Mutex
counter int
func newMetricFactory() *metricFactory {
return &metricFactory{
counter: 0,
func (mf *metricFactory) newMetric(n string, p peer.ID) api.Metric {
defer mf.l.Unlock()
m := api.Metric{
Name: n,
Peer: p,
Value: fmt.Sprintf("%d", mf.counter),
Valid: true,
m.SetTTL(5 * time.Second)
return m
func (mf *metricFactory) count() int {
defer mf.l.Unlock()
return mf.counter
func peers(ctx context.Context) ([]peer.ID, error) {
return []peer.ID{test.PeerID1, test.PeerID2, test.PeerID3}, nil
func testPeerMonitor(t *testing.T) (*Monitor, host.Host, func()) {
ctx := context.Background()
h, err := libp2p.New(
if err != nil {
psub, err := pubsub.NewGossipSub(
if err != nil {
mock := test.NewMockRPCClientWithHost(t, h)
cfg := &Config{}
cfg.CheckInterval = 2 * time.Second
mon, err := New(ctx, cfg, psub, peers)
if err != nil {
shutdownF := func() {
return mon, h, shutdownF
func TestPeerMonitorShutdown(t *testing.T) {
ctx := context.Background()
pm, _, shutdown := testPeerMonitor(t)
defer shutdown()
err := pm.Shutdown(ctx)
if err != nil {
err = pm.Shutdown(ctx)
if err != nil {
func TestLogMetricConcurrent(t *testing.T) {
ctx := context.Background()
pm, _, shutdown := testPeerMonitor(t)
defer shutdown()
var wg sync.WaitGroup
// Insert 25 metrics
f := func() {
defer wg.Done()
for i := 0; i < 25; i++ {
mt := api.Metric{
Name: "test",
Peer: test.PeerID1,
Value: fmt.Sprintf("%d", time.Now().UnixNano()),
Valid: true,
mt.SetTTL(150 * time.Millisecond)
pm.LogMetric(ctx, mt)
time.Sleep(75 * time.Millisecond)
go f()
go f()
go f()
// Wait for at least two metrics to be inserted
time.Sleep(200 * time.Millisecond)
last := time.Now().Add(-500 * time.Millisecond)
for i := 0; i <= 20; i++ {
lastMtrcs := pm.LatestMetrics(ctx, "test")
// There should always 1 valid LatestMetric "test"
if len(lastMtrcs) != 1 {
t.Error("no valid metrics", len(lastMtrcs), i)
time.Sleep(75 * time.Millisecond)
n, err := strconv.Atoi(lastMtrcs[0].Value)
if err != nil {
// The timestamp of the metric cannot be older than
// the timestamp from the last
current := time.Unix(0, int64(n))
if current.Before(last) {
t.Errorf("expected newer metric: Current: %s, Last: %s", current, last)
last = current
time.Sleep(75 * time.Millisecond)
func TestPeerMonitorLogMetric(t *testing.T) {
ctx := context.Background()
pm, _, shutdown := testPeerMonitor(t)
defer shutdown()
mf := newMetricFactory()
// dont fill window
pm.LogMetric(ctx, mf.newMetric("test", test.PeerID1))
pm.LogMetric(ctx, mf.newMetric("test", test.PeerID2))
pm.LogMetric(ctx, mf.newMetric("test", test.PeerID3))
// fill window
pm.LogMetric(ctx, mf.newMetric("test2", test.PeerID3))
pm.LogMetric(ctx, mf.newMetric("test2", test.PeerID3))
pm.LogMetric(ctx, mf.newMetric("test2", test.PeerID3))
pm.LogMetric(ctx, mf.newMetric("test2", test.PeerID3))
latestMetrics := pm.LatestMetrics(ctx, "testbad")
if len(latestMetrics) != 0 {
t.Logf("%+v", latestMetrics)
t.Error("metrics should be empty")
latestMetrics = pm.LatestMetrics(ctx, "test")
if len(latestMetrics) != 3 {
t.Error("metrics should correspond to 3 hosts")
for _, v := range latestMetrics {
switch v.Peer {
case test.PeerID1:
if v.Value != "0" {
t.Error("bad metric value")
case test.PeerID2:
if v.Value != "1" {
t.Error("bad metric value")
case test.PeerID3:
if v.Value != "2" {
t.Error("bad metric value")
t.Error("bad peer")
latestMetrics = pm.LatestMetrics(ctx, "test2")
if len(latestMetrics) != 1 {
t.Fatal("should only be one metric")
if latestMetrics[0].Value != fmt.Sprintf("%d", mf.count()-1) {
t.Error("metric is not last")
func TestPeerMonitorPublishMetric(t *testing.T) {
ctx := context.Background()
pm, host, shutdown := testPeerMonitor(t)
defer shutdown()
pm2, host2, shutdown2 := testPeerMonitor(t)
defer shutdown2()
time.Sleep(200 * time.Millisecond)
err := host.Connect(
ID: host2.ID(),
Addrs: host2.Addrs(),
if err != nil {
time.Sleep(200 * time.Millisecond)
mf := newMetricFactory()
metric := mf.newMetric("test", test.PeerID1)
err = pm.PublishMetric(ctx, metric)
if err != nil {
time.Sleep(500 * time.Millisecond)
checkMetric := func(t *testing.T, pm *Monitor) {
latestMetrics := pm.LatestMetrics(ctx, "test")
if len(latestMetrics) != 1 {
t.Fatal(host.ID(), "expected 1 published metric")
t.Log(host.ID(), "received metric")
receivedMetric := latestMetrics[0]
if receivedMetric.Peer != metric.Peer ||
receivedMetric.Expire != metric.Expire ||
receivedMetric.Value != metric.Value ||
receivedMetric.Valid != metric.Valid ||
receivedMetric.Name != metric.Name {
t.Fatal("it should be exactly the same metric we published")
checkMetric(t, pm)
checkMetric(t, pm2)
func TestPeerMonitorAlerts(t *testing.T) {
ctx := context.Background()
pm, _, shutdown := testPeerMonitor(t)
defer shutdown()
mf := newMetricFactory()
mtr := mf.newMetric("test", test.PeerID1)
pm.LogMetric(ctx, mtr)
timeout := time.NewTimer(time.Second * 5)
// it should alert once.
for i := 0; i < 1; i++ {
select {
case <-timeout.C:
t.Fatal("should have thrown an alert by now")
case alrt := <-pm.Alerts():
if alrt.Name != "test" {
t.Error("Alert should be for test")
if alrt.Peer != test.PeerID1 {
t.Error("Peer should be TestPeerID1")