Do alert for all metric types
This commit is contained in:
parent
27295c10ac
commit
563a0da9ae
|
@ -58,12 +58,14 @@ func NewChecker(ctx context.Context, metrics *Store, threshold float64) *Checker
|
|||
// CheckPeers will trigger alerts based on the latest metrics from the given peerset
|
||||
// when they have expired and no alert has been sent before.
|
||||
func (mc *Checker) CheckPeers(peers []peer.ID) error {
|
||||
for _, peer := range peers {
|
||||
for _, metric := range mc.metrics.PeerMetricAll("ping", peer) {
|
||||
if mc.FailedMetric(metric.Name, peer) {
|
||||
err := mc.alert(peer, metric.Name)
|
||||
if err != nil {
|
||||
return err
|
||||
for _, name := range mc.metrics.MetricNames() {
|
||||
for _, peer := range peers {
|
||||
for _, metric := range mc.metrics.PeerMetricAll(name, peer) {
|
||||
if mc.FailedMetric(metric.Name, peer) {
|
||||
err := mc.alert(peer, metric.Name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -75,7 +77,7 @@ func (mc *Checker) CheckPeers(peers []peer.ID) error {
|
|||
// and no alert has been sent before.
|
||||
func (mc *Checker) CheckAll() error {
|
||||
for _, metric := range mc.metrics.AllMetrics() {
|
||||
if metric.Name == "ping" && mc.Failed(metric.Peer) {
|
||||
if mc.FailedMetric(metric.Name, metric.Peer) {
|
||||
err := mc.alert(metric.Peer, metric.Name)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -168,16 +170,7 @@ func (mc *Checker) Watch(ctx context.Context, peersF func(context.Context) ([]pe
|
|||
}
|
||||
}
|
||||
|
||||
// Failed returns true if a peer has potentially failed.
|
||||
// Peers that are not present in the metrics store will return
|
||||
// as failed.
|
||||
func (mc *Checker) Failed(pid peer.ID) bool {
|
||||
_, _, _, result := mc.failed("ping", pid)
|
||||
return result
|
||||
}
|
||||
|
||||
// FailedMetric is the same as Failed but can use any metric type,
|
||||
// not just ping.
|
||||
// FailedMetric returns if a peer is marked as failed for a particular metric.
|
||||
func (mc *Checker) FailedMetric(metric string, pid peer.ID) bool {
|
||||
_, _, _, result := mc.failed(metric, pid)
|
||||
return result
|
||||
|
|
|
@ -145,7 +145,7 @@ func TestChecker_Failed(t *testing.T) {
|
|||
}
|
||||
for i := 0; i < 10; i++ {
|
||||
metrics.Add(makePeerMetric(test.PeerID1, "1", 3*time.Millisecond))
|
||||
got := checker.Failed(test.PeerID1)
|
||||
got := checker.FailedMetric("ping", test.PeerID1)
|
||||
// the magic number 17 represents the point at which
|
||||
// the time between metrics addition has gotten
|
||||
// so large that the probability that the service
|
||||
|
|
|
@ -189,3 +189,15 @@ func (mtrs *Store) Distribution(name string, pid peer.ID) []float64 {
|
|||
|
||||
return window.Distribution()
|
||||
}
|
||||
|
||||
// MetricNames returns all the known metric names
|
||||
func (mtrs *Store) MetricNames() []string {
|
||||
mtrs.mux.RLock()
|
||||
defer mtrs.mux.RUnlock()
|
||||
|
||||
list := make([]string, 0, len(mtrs.byName))
|
||||
for k := range mtrs.byName {
|
||||
list = append(list, k)
|
||||
}
|
||||
return list
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user