Do alert for all metric types

This commit is contained in:
Hector Sanjuan 2019-06-23 09:05:32 +01:00
parent 27295c10ac
commit 563a0da9ae
3 changed files with 23 additions and 18 deletions

View File

@ -58,12 +58,14 @@ func NewChecker(ctx context.Context, metrics *Store, threshold float64) *Checker
// CheckPeers will trigger alerts based on the latest metrics from the given peerset // CheckPeers will trigger alerts based on the latest metrics from the given peerset
// when they have expired and no alert has been sent before. // when they have expired and no alert has been sent before.
func (mc *Checker) CheckPeers(peers []peer.ID) error { func (mc *Checker) CheckPeers(peers []peer.ID) error {
for _, peer := range peers { for _, name := range mc.metrics.MetricNames() {
for _, metric := range mc.metrics.PeerMetricAll("ping", peer) { for _, peer := range peers {
if mc.FailedMetric(metric.Name, peer) { for _, metric := range mc.metrics.PeerMetricAll(name, peer) {
err := mc.alert(peer, metric.Name) if mc.FailedMetric(metric.Name, peer) {
if err != nil { err := mc.alert(peer, metric.Name)
return err if err != nil {
return err
}
} }
} }
} }
@ -75,7 +77,7 @@ func (mc *Checker) CheckPeers(peers []peer.ID) error {
// and no alert has been sent before. // and no alert has been sent before.
func (mc *Checker) CheckAll() error { func (mc *Checker) CheckAll() error {
for _, metric := range mc.metrics.AllMetrics() { for _, metric := range mc.metrics.AllMetrics() {
if metric.Name == "ping" && mc.Failed(metric.Peer) { if mc.FailedMetric(metric.Name, metric.Peer) {
err := mc.alert(metric.Peer, metric.Name) err := mc.alert(metric.Peer, metric.Name)
if err != nil { if err != nil {
return err return err
@ -168,16 +170,7 @@ func (mc *Checker) Watch(ctx context.Context, peersF func(context.Context) ([]pe
} }
} }
// Failed returns true if a peer has potentially failed. // FailedMetric returns if a peer is marked as failed for a particular metric.
// Peers that are not present in the metrics store will return
// as failed.
func (mc *Checker) Failed(pid peer.ID) bool {
_, _, _, result := mc.failed("ping", pid)
return result
}
// FailedMetric is the same as Failed but can use any metric type,
// not just ping.
func (mc *Checker) FailedMetric(metric string, pid peer.ID) bool { func (mc *Checker) FailedMetric(metric string, pid peer.ID) bool {
_, _, _, result := mc.failed(metric, pid) _, _, _, result := mc.failed(metric, pid)
return result return result

View File

@ -145,7 +145,7 @@ func TestChecker_Failed(t *testing.T) {
} }
for i := 0; i < 10; i++ { for i := 0; i < 10; i++ {
metrics.Add(makePeerMetric(test.PeerID1, "1", 3*time.Millisecond)) metrics.Add(makePeerMetric(test.PeerID1, "1", 3*time.Millisecond))
got := checker.Failed(test.PeerID1) got := checker.FailedMetric("ping", test.PeerID1)
// the magic number 17 represents the point at which // the magic number 17 represents the point at which
// the time between metrics addition has gotten // the time between metrics addition has gotten
// so large that the probability that the service // so large that the probability that the service

View File

@ -189,3 +189,15 @@ func (mtrs *Store) Distribution(name string, pid peer.ID) []float64 {
return window.Distribution() return window.Distribution()
} }
// MetricNames returns all the known metric names
func (mtrs *Store) MetricNames() []string {
mtrs.mux.RLock()
defer mtrs.mux.RUnlock()
list := make([]string, 0, len(mtrs.byName))
for k := range mtrs.byName {
list = append(list, k)
}
return list
}