Collect statistics about the total number of probes missed for a
link-watcher. Only count a probe as missed if the previous send() hasn't
failed. If the previous send() failed (for example, because the port is
not currently active), do not count it as a missed probe. Do not reset
the counter when the port becomes active.
Export the value as a string, because int in jansson is limited to
32-bits (and, in general, JSON implementations do not handle 64-bits
values very well).
This is useful to monitor the arp_ping and nsna_ping link-watchers.
The preexisting "missed" counter is reset when the port becomes active
and is incremented also when a probe was not effectively sent (for
example, because the port was not active). These characteristics make
the preexisting "missed" counter less useful for monitoring.
Signed-off-by: Daniele Di Proietto <ddiproietto(a)google.com>
---
v1->v2:
* Reword commit message to use imperative mood.
---
teamd/teamd_link_watch.h | 4 ++++
teamd/teamd_lw_arp_ping.c | 5 +++++
teamd/teamd_lw_nsna_ping.c | 5 +++++
teamd/teamd_lw_psr.c | 20 ++++++++++++++++++++
4 files changed, 34 insertions(+)
diff --git a/teamd/teamd_link_watch.h b/teamd/teamd_link_watch.h
index 9c404be..3734322 100644
--- a/teamd/teamd_link_watch.h
+++ b/teamd/teamd_link_watch.h
@@ -43,6 +43,7 @@ struct lw_psr_port_priv {
int sock;
unsigned int missed;
uint64_t total_probes_sent;
+ uint64_t total_probes_missed;
bool probe_sent;
bool reply_received;
};
@@ -84,5 +85,8 @@ int lw_psr_state_missed_get(struct teamd_context *ctx,
int lw_psr_state_total_probes_sent_get(struct teamd_context *ctx,
struct team_state_gsc *gsc,
void *priv);
+int lw_psr_state_total_probes_missed_get(struct teamd_context *ctx,
+ struct team_state_gsc *gsc,
+ void *priv);
#endif
diff --git a/teamd/teamd_lw_arp_ping.c b/teamd/teamd_lw_arp_ping.c
index 153a2e8..feb8f03 100644
--- a/teamd/teamd_lw_arp_ping.c
+++ b/teamd/teamd_lw_arp_ping.c
@@ -486,6 +486,11 @@ static const struct teamd_state_val lw_ap_state_vals[] = {
.type = TEAMD_STATE_ITEM_TYPE_STRING,
.getter = lw_psr_state_total_probes_sent_get,
},
+ {
+ .subpath = "total_probes_missed",
+ .type = TEAMD_STATE_ITEM_TYPE_STRING,
+ .getter = lw_psr_state_total_probes_missed_get,
+ },
};
const struct teamd_link_watch teamd_link_watch_arp_ping = {
diff --git a/teamd/teamd_lw_nsna_ping.c b/teamd/teamd_lw_nsna_ping.c
index b5648a7..f7737f7 100644
--- a/teamd/teamd_lw_nsna_ping.c
+++ b/teamd/teamd_lw_nsna_ping.c
@@ -331,6 +331,11 @@ static const struct teamd_state_val lw_nsnap_state_vals[] = {
.type = TEAMD_STATE_ITEM_TYPE_STRING,
.getter = lw_psr_state_total_probes_sent_get,
},
+ {
+ .subpath = "total_probes_missed",
+ .type = TEAMD_STATE_ITEM_TYPE_STRING,
+ .getter = lw_psr_state_total_probes_missed_get,
+ },
};
const struct teamd_link_watch teamd_link_watch_nsnap = {
diff --git a/teamd/teamd_lw_psr.c b/teamd/teamd_lw_psr.c
index d84e4a5..03cc024 100644
--- a/teamd/teamd_lw_psr.c
+++ b/teamd/teamd_lw_psr.c
@@ -47,6 +47,8 @@ static int lw_psr_callback_periodic(struct teamd_context *ctx, int
events, void
psr_ppriv->missed = 0;
} else {
psr_ppriv->missed++;
+ if (psr_ppriv->probe_sent)
+ psr_ppriv->total_probes_missed++;
if (psr_ppriv->missed > psr_ppriv->missed_max && link_up) {
teamd_log_dbg(ctx, "%s: Missed %u replies (max %u).",
tdport->ifname, psr_ppriv->missed,
@@ -258,3 +260,21 @@ int lw_psr_state_total_probes_sent_get(struct teamd_context *ctx,
gsc->data.str_val.free = true;
return 0;
}
+
+int lw_psr_state_total_probes_missed_get(struct teamd_context *ctx,
+ struct team_state_gsc *gsc,
+ void *priv)
+{
+ struct lw_common_port_priv *common_ppriv = priv;
+ struct lw_psr_port_priv *psr_ppriv = lw_psr_ppriv_get(common_ppriv);
+ char *str;
+ int err;
+
+ err = asprintf(&str, "%"PRIu64, psr_ppriv->total_probes_missed);
+ if (err == -1)
+ return -ENOMEM;
+
+ gsc->data.str_val.ptr = str;
+ gsc->data.str_val.free = true;
+ return 0;
+}
--
2.33.0.259.gc128427fd7-goog