Address most comments from PR review
This commit is contained in:
parent
cf4fb74993
commit
6b31f44351
76
allocate.go
76
allocate.go
|
@ -39,6 +39,16 @@ import (
|
|||
// ReplicationFactorMax is reached. Error if there are less than
|
||||
// ReplicationFactorMin.
|
||||
|
||||
// A wrapper to carry peer metrics that have been classified.
|
||||
type classifiedMetrics struct {
|
||||
current api.MetricsSet
|
||||
currentPeers []peer.ID
|
||||
candidate api.MetricsSet
|
||||
candidatePeers []peer.ID
|
||||
priority api.MetricsSet
|
||||
priorityPeers []peer.ID
|
||||
}
|
||||
|
||||
// allocate finds peers to allocate a hash using the informer and the monitor
|
||||
// it should only be used with valid replicationFactors (if rplMin and rplMax
|
||||
// are > 0, then rplMin <= rplMax).
|
||||
|
@ -65,13 +75,16 @@ func (c *Cluster) allocate(ctx context.Context, hash cid.Cid, currentPin *api.Pi
|
|||
currentAllocs = currentPin.Allocations
|
||||
}
|
||||
|
||||
// Get Metrics that the allocator is interested on
|
||||
mSet := make(api.MetricsSet)
|
||||
metrics := c.allocator.Metrics()
|
||||
for _, metricName := range metrics {
|
||||
mSet[metricName] = c.monitor.LatestMetrics(ctx, metricName)
|
||||
}
|
||||
|
||||
curSet, curPeers, candSet, candPeers, prioSet, prioPeers := filterMetrics(
|
||||
// Filter and divide metrics. The resulting sets only have peers that
|
||||
// have all the metrics needed and are not blacklisted.
|
||||
classified := filterMetrics(
|
||||
mSet,
|
||||
len(metrics),
|
||||
currentAllocs,
|
||||
|
@ -84,12 +97,7 @@ func (c *Cluster) allocate(ctx context.Context, hash cid.Cid, currentPin *api.Pi
|
|||
hash,
|
||||
rplMin,
|
||||
rplMax,
|
||||
curSet,
|
||||
candSet,
|
||||
prioSet,
|
||||
curPeers,
|
||||
candPeers,
|
||||
prioPeers,
|
||||
classified,
|
||||
)
|
||||
if err != nil {
|
||||
return newAllocs, err
|
||||
|
@ -108,14 +116,7 @@ func (c *Cluster) allocate(ctx context.Context, hash cid.Cid, currentPin *api.Pi
|
|||
//
|
||||
// For a metric/peer to be included in a group, it is necessary that it has
|
||||
// metrics for all informers.
|
||||
func filterMetrics(mSet api.MetricsSet, numMetrics int, currentAllocs, priorityList, blacklist []peer.ID) (
|
||||
curSet api.MetricsSet,
|
||||
curPeers []peer.ID,
|
||||
candSet api.MetricsSet,
|
||||
candPeers []peer.ID,
|
||||
prioSet api.MetricsSet,
|
||||
prioPeers []peer.ID,
|
||||
) {
|
||||
func filterMetrics(mSet api.MetricsSet, numMetrics int, currentAllocs, priorityList, blacklist []peer.ID) classifiedMetrics {
|
||||
curPeersMap := make(map[peer.ID][]*api.Metric)
|
||||
candPeersMap := make(map[peer.ID][]*api.Metric)
|
||||
prioPeersMap := make(map[peer.ID][]*api.Metric)
|
||||
|
@ -141,8 +142,11 @@ func filterMetrics(mSet api.MetricsSet, numMetrics int, currentAllocs, priorityL
|
|||
mSet := make(api.MetricsSet)
|
||||
peers := make([]peer.ID, 0, len(peersMap))
|
||||
|
||||
// Put the metrics in their sets if peers have metrics for all informers
|
||||
// Record peers
|
||||
// Put the metrics in their sets if peers have metrics for all
|
||||
// informers Record peers. This relies on LatestMetrics
|
||||
// returning exactly one metric per peer. Thus, a peer with
|
||||
// all the needed metrics should have exactly numMetrics.
|
||||
// Otherwise, they are ignored.
|
||||
for p, metrics := range peersMap {
|
||||
if len(metrics) == numMetrics {
|
||||
for _, m := range metrics {
|
||||
|
@ -154,11 +158,18 @@ func filterMetrics(mSet api.MetricsSet, numMetrics int, currentAllocs, priorityL
|
|||
return mSet, peers
|
||||
}
|
||||
|
||||
curSet, curPeers = fillMetricsSet(curPeersMap)
|
||||
candSet, candPeers = fillMetricsSet(candPeersMap)
|
||||
prioSet, prioPeers = fillMetricsSet(prioPeersMap)
|
||||
curSet, curPeers := fillMetricsSet(curPeersMap)
|
||||
candSet, candPeers := fillMetricsSet(candPeersMap)
|
||||
prioSet, prioPeers := fillMetricsSet(prioPeersMap)
|
||||
|
||||
return
|
||||
return classifiedMetrics{
|
||||
current: curSet,
|
||||
currentPeers: curPeers,
|
||||
candidate: candSet,
|
||||
candidatePeers: candPeers,
|
||||
priority: prioSet,
|
||||
priorityPeers: prioPeers,
|
||||
}
|
||||
}
|
||||
|
||||
// allocationError logs an allocation error
|
||||
|
@ -182,21 +193,20 @@ func (c *Cluster) obtainAllocations(
|
|||
ctx context.Context,
|
||||
hash cid.Cid,
|
||||
rplMin, rplMax int,
|
||||
currentValidMetrics, candidatesMetrics, priorityMetrics api.MetricsSet,
|
||||
currentPeers, candidatePeers, priorityPeers []peer.ID,
|
||||
metrics classifiedMetrics,
|
||||
) ([]peer.ID, error) {
|
||||
ctx, span := trace.StartSpan(ctx, "cluster/obtainAllocations")
|
||||
defer span.End()
|
||||
|
||||
nCurrentValid := len(currentPeers)
|
||||
nAvailableValid := len(candidatePeers) + len(priorityPeers)
|
||||
nCurrentValid := len(metrics.currentPeers)
|
||||
nAvailableValid := len(metrics.candidatePeers) + len(metrics.priorityPeers)
|
||||
needed := rplMin - nCurrentValid // The minimum we need
|
||||
wanted := rplMax - nCurrentValid // The maximum we want
|
||||
|
||||
logger.Debugf("obtainAllocations: current: %d", nCurrentValid)
|
||||
logger.Debugf("obtainAllocations: available: %d", nAvailableValid)
|
||||
logger.Debugf("obtainAllocations: candidates: %d", len(candidatePeers))
|
||||
logger.Debugf("obtainAllocations: priority: %d", len(priorityPeers))
|
||||
logger.Debugf("obtainAllocations: candidates: %d", len(metrics.candidatePeers))
|
||||
logger.Debugf("obtainAllocations: priority: %d", len(metrics.priorityPeers))
|
||||
logger.Debugf("obtainAllocations: Needed: %d", needed)
|
||||
logger.Debugf("obtainAllocations: Wanted: %d", wanted)
|
||||
|
||||
|
@ -206,7 +216,7 @@ func (c *Cluster) obtainAllocations(
|
|||
// This could be done more intelligently by dropping them
|
||||
// according to the allocator order (i.e. free-ing peers
|
||||
// with most used space first).
|
||||
return currentPeers[0 : len(currentPeers)+wanted], nil
|
||||
return metrics.currentPeers[0 : len(metrics.currentPeers)+wanted], nil
|
||||
}
|
||||
|
||||
if needed <= 0 { // allocations are above minimal threshold
|
||||
|
@ -215,7 +225,7 @@ func (c *Cluster) obtainAllocations(
|
|||
}
|
||||
|
||||
if nAvailableValid < needed { // not enough candidates
|
||||
return nil, allocationError(hash, needed, wanted, append(priorityPeers, candidatePeers...))
|
||||
return nil, allocationError(hash, needed, wanted, append(metrics.priorityPeers, metrics.candidatePeers...))
|
||||
}
|
||||
|
||||
// We can allocate from this point. Use the allocator to decide
|
||||
|
@ -225,9 +235,9 @@ func (c *Cluster) obtainAllocations(
|
|||
finalAllocs, err := c.allocator.Allocate(
|
||||
ctx,
|
||||
hash,
|
||||
currentValidMetrics,
|
||||
candidatesMetrics,
|
||||
priorityMetrics,
|
||||
metrics.current,
|
||||
metrics.candidate,
|
||||
metrics.priority,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, logError(err.Error())
|
||||
|
@ -245,5 +255,5 @@ func (c *Cluster) obtainAllocations(
|
|||
|
||||
// the final result is the currently valid allocations
|
||||
// along with the ones provided by the allocator
|
||||
return append(currentPeers, finalAllocs[0:allocationsToUse]...), nil
|
||||
return append(metrics.currentPeers, finalAllocs[0:allocationsToUse]...), nil
|
||||
}
|
||||
|
|
|
@ -13,7 +13,7 @@ const envConfigKey = "cluster_metricsalloc"
|
|||
|
||||
// These are the default values for a Config.
|
||||
var (
|
||||
DefaultAllocateBy = []string{"freespace"}
|
||||
DefaultAllocateBy = []string{"tag:group", "freespace"}
|
||||
)
|
||||
|
||||
// Config allows to initialize the Allocator.
|
||||
|
@ -76,7 +76,7 @@ func (cfg *Config) LoadJSON(raw []byte) error {
|
|||
}
|
||||
|
||||
func (cfg *Config) applyJSONConfig(jcfg *jsonConfig) error {
|
||||
// When unset - use default
|
||||
// When unset, leave default
|
||||
if len(jcfg.AllocateBy) > 0 {
|
||||
cfg.AllocateBy = jcfg.AllocateBy
|
||||
}
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
// based on multiple metrics, where metrics may be an arbitrary way to
|
||||
// partition a set of peers.
|
||||
//
|
||||
// For example, allocating by [tags, disk] will
|
||||
// first order candidate peers by tag metric, and then by disk metric.
|
||||
// For example, allocating by ["tag:region", "disk"] will
|
||||
// first order candidate peers by tag metric, and then by "disk" metric.
|
||||
// The final list will pick up allocations from each tag metric group.
|
||||
// based on the given order of metrics.
|
||||
package metrics
|
||||
|
@ -16,12 +16,10 @@ import (
|
|||
"github.com/ipfs/ipfs-cluster/api"
|
||||
peer "github.com/libp2p/go-libp2p-core/peer"
|
||||
rpc "github.com/libp2p/go-libp2p-gorpc"
|
||||
|
||||
"go.opencensus.io/trace"
|
||||
)
|
||||
|
||||
// Allocator is an allocator that partitions metrics and orders
|
||||
// the final least of allocation by selecting for each partition.
|
||||
// the final list of allocation by selecting for each partition.
|
||||
type Allocator struct {
|
||||
config *Config
|
||||
rpcClient *rpc.Client
|
||||
|
@ -48,9 +46,6 @@ func (a *Allocator) SetClient(c *rpc.Client) {
|
|||
// Shutdown is called on cluster shutdown. We just invalidate
|
||||
// any metrics from this point.
|
||||
func (a *Allocator) Shutdown(ctx context.Context) error {
|
||||
_, span := trace.StartSpan(ctx, "allocator/metrics/Shutdown")
|
||||
defer span.End()
|
||||
|
||||
a.rpcClient = nil
|
||||
return nil
|
||||
}
|
||||
|
@ -62,13 +57,13 @@ type partitionedMetric struct {
|
|||
partitions []*partition // they are in order of their values
|
||||
}
|
||||
|
||||
// Returned a list of peers sorted by never choosing twice from the same
|
||||
// Returns a list of peers sorted by never choosing twice from the same
|
||||
// partition if there is some other partition to choose from.
|
||||
func (pnedm *partitionedMetric) sortedPeers() []peer.ID {
|
||||
peers := []peer.ID{}
|
||||
for {
|
||||
peer := pnedm.chooseNext()
|
||||
if peer == "" {
|
||||
if peer == "" { // This means we are done.
|
||||
break
|
||||
}
|
||||
peers = append(peers, peer)
|
||||
|
@ -125,7 +120,7 @@ func (pnedm *partitionedMetric) chooseNext() peer.ID {
|
|||
|
||||
type partition struct {
|
||||
value string
|
||||
peers map[peer.ID]bool
|
||||
peers map[peer.ID]bool // the bool tracks whether the peer has been picked already out of the partition when doing the final sort.
|
||||
sub *partitionedMetric // all peers in sub-partitions will have the same value for this metric
|
||||
}
|
||||
|
||||
|
|
|
@ -1100,7 +1100,7 @@ func (n *NodeWithMeta) Size() uint64 {
|
|||
return uint64(len(n.Data))
|
||||
}
|
||||
|
||||
// MetricsSet is map to carry slices of metric indexed by type.
|
||||
// MetricsSet is a map to carry slices of metrics indexed by type.
|
||||
type MetricsSet map[string][]*Metric
|
||||
|
||||
// Metric transports information about a peer.ID. It is used to decide
|
||||
|
@ -1126,7 +1126,11 @@ func (m *Metric) SetTTL(d time.Duration) {
|
|||
// GetTTL returns the time left before the Metric expires
|
||||
func (m *Metric) GetTTL() time.Duration {
|
||||
expDate := time.Unix(0, m.Expire)
|
||||
return time.Until(expDate)
|
||||
ttl := time.Until(expDate)
|
||||
if ttl < 0 {
|
||||
ttl = 0
|
||||
}
|
||||
return ttl
|
||||
}
|
||||
|
||||
// Expired returns if the Metric has expired
|
||||
|
|
12
cluster.go
12
cluster.go
|
@ -283,6 +283,7 @@ func (c *Cluster) watchPinset() {
|
|||
}
|
||||
}
|
||||
|
||||
// returns the smallest ttl from the metrics pushed by the informer.
|
||||
func (c *Cluster) sendInformerMetrics(ctx context.Context, informer Informer) (time.Duration, error) {
|
||||
ctx, span := trace.StartSpan(ctx, "cluster/sendInformerMetric")
|
||||
defer span.End()
|
||||
|
@ -298,10 +299,7 @@ func (c *Cluster) sendInformerMetrics(ctx context.Context, informer Informer) (t
|
|||
for _, metric := range metrics {
|
||||
metric.Peer = c.id
|
||||
ttl := metric.GetTTL()
|
||||
if minTTL == 0 {
|
||||
minTTL = metric.GetTTL()
|
||||
}
|
||||
if ttl < minTTL && ttl > 0 {
|
||||
if ttl > 0 && (ttl < minTTL || minTTL == 0) {
|
||||
minTTL = ttl
|
||||
}
|
||||
err := c.monitor.PublishMetric(ctx, metric)
|
||||
|
@ -310,7 +308,7 @@ func (c *Cluster) sendInformerMetrics(ctx context.Context, informer Informer) (t
|
|||
logger.Warnf("error sending metric %s: %s", metric.Name, err)
|
||||
}
|
||||
}
|
||||
return minTTL, nil
|
||||
return minTTL, errors
|
||||
}
|
||||
|
||||
func (c *Cluster) sendInformersMetrics(ctx context.Context) error {
|
||||
|
@ -354,8 +352,8 @@ func (c *Cluster) pushInformerMetrics(ctx context.Context, informer Informer) {
|
|||
|
||||
minTTL, err := c.sendInformerMetrics(ctx, informer)
|
||||
if minTTL == 0 {
|
||||
logger.Errorf("informer %s reported metric ttl 0. This must be a bug. Aborting this informer", informer.Name())
|
||||
return
|
||||
minTTL = 30 * time.Second
|
||||
logger.Warningf("informer %s reported a min metric ttl of 0s.", informer.Name())
|
||||
}
|
||||
if err != nil {
|
||||
if (retries % retryWarnMod) == 0 {
|
||||
|
|
|
@ -12,8 +12,6 @@ import (
|
|||
|
||||
logging "github.com/ipfs/go-log/v2"
|
||||
rpc "github.com/libp2p/go-libp2p-gorpc"
|
||||
|
||||
"go.opencensus.io/trace"
|
||||
)
|
||||
|
||||
var logger = logging.Logger("tags")
|
||||
|
@ -46,7 +44,8 @@ func New(cfg *Config) (*Informer, error) {
|
|||
}, nil
|
||||
}
|
||||
|
||||
// Name returns the name of the metric issued by this informer.
|
||||
// Name returns the name of this informer. Note the informer issues metrics
|
||||
// with custom names.
|
||||
func (tags *Informer) Name() string {
|
||||
return MetricName
|
||||
}
|
||||
|
@ -62,9 +61,6 @@ func (tags *Informer) SetClient(c *rpc.Client) {
|
|||
// Shutdown is called on cluster shutdown. We just invalidate
|
||||
// any metrics from this point.
|
||||
func (tags *Informer) Shutdown(ctx context.Context) error {
|
||||
_, span := trace.StartSpan(ctx, "informer/tags/Shutdown")
|
||||
defer span.End()
|
||||
|
||||
tags.mu.Lock()
|
||||
defer tags.mu.Unlock()
|
||||
|
||||
|
|
|
@ -137,6 +137,8 @@ type PinTracker interface {
|
|||
type Informer interface {
|
||||
Component
|
||||
Name() string
|
||||
// GetMetrics returns the metrics obtained by this Informer. It must
|
||||
// always return at least one metric.
|
||||
GetMetrics(context.Context) []*api.Metric
|
||||
}
|
||||
|
||||
|
@ -172,8 +174,9 @@ type PeerMonitor interface {
|
|||
// PublishMetric sends a metric to the rest of the peers.
|
||||
// How to send it, and to who, is to be decided by the implementation.
|
||||
PublishMetric(context.Context, *api.Metric) error
|
||||
// LatestMetrics returns a map with the latest metrics of matching name
|
||||
// for the current cluster peers.
|
||||
// LatestMetrics returns a map with the latest metrics of matching
|
||||
// name for the current cluster peers. The result should only contain
|
||||
// one metric per peer at most.
|
||||
LatestMetrics(ctx context.Context, name string) []*api.Metric
|
||||
// MetricNames returns a list of metric names.
|
||||
MetricNames(ctx context.Context) []string
|
||||
|
|
Loading…
Reference in New Issue
Block a user