Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=783... Commit: 7837036cc1b5a5c343a5bd4e041e1f7a91d50086 Parent: 39837b7bf28253a21a3ab805e8fedd44b7cb8ca2 Author: Lon Hohberger lhh@redhat.com AuthorDate: Tue May 4 17:51:45 2010 -0400 Committer: Lon Hohberger lhh@redhat.com CommitterDate: Thu May 20 09:49:43 2010 -0400
rgmanager: Add per-resource status check tolerances
Resolves: bz583788
Signed-off-by: Lon Hohberger lhh@redhat.com --- rgmanager/include/reslist.h | 1 + rgmanager/src/daemons/restree.c | 47 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 0 deletions(-)
diff --git a/rgmanager/include/reslist.h b/rgmanager/include/reslist.h index 0275bfd..001f56b 100644 --- a/rgmanager/include/reslist.h +++ b/rgmanager/include/reslist.h @@ -128,6 +128,7 @@ typedef struct _rg_node { resource_t *rn_resource; resource_act_t *rn_actions; restart_counter_t rn_restart_counter; + restart_counter_t rn_failure_counter; int rn_state; /* State of this instance of rn_resource */ int rn_flags; int rn_last_status; diff --git a/rgmanager/src/daemons/restree.c b/rgmanager/src/daemons/restree.c index d5ffea1..3f07f8f 100644 --- a/rgmanager/src/daemons/restree.c +++ b/rgmanager/src/daemons/restree.c @@ -565,6 +565,8 @@ do_load_resource(int ccsfd, char *base, char *ref; resource_node_t *node; resource_t *curres; + time_t failure_expire = 0; + int max_failures = 0;
snprintf(tok, sizeof(tok), "%s/@ref", base);
@@ -659,6 +661,36 @@ do_load_resource(int ccsfd, char *base, free(ref); }
+ /* per-resource-node failures / expire times */ + snprintf(tok, sizeof(tok), "%s/@__max_failures", base); +#ifndef NO_CCS + if (ccs_get(ccsfd, tok, &ref) == 0) { +#else + if (conf_get(tok, &ref) == 0) { +#endif + max_failures = atoi(ref); + if (max_failures < 0) + max_failures = 0; + free(ref); + } + + snprintf(tok, sizeof(tok), "%s/@__failure_expire_time", base); +#ifndef NO_CCS + if (ccs_get(ccsfd, tok, &ref) == 0) { +#else + if (conf_get(tok, &ref) == 0) { +#endif + failure_expire = (time_t)expand_time(ref); + if ((int64_t)failure_expire < 0) + failure_expire = 0; + free(ref); + } + + if (max_failures && failure_expire) { + node->rn_failure_counter = restart_init(failure_expire, + max_failures); + } + curres->r_refs++;
*newnode = node; @@ -906,6 +938,10 @@ destroy_resource_tree(resource_node_t **tree) restart_cleanup(node->rn_restart_counter); }
+ if (node->rn_failure_counter) { + restart_cleanup(node->rn_failure_counter); + } + if(node->rn_actions){ free(node->rn_actions); } @@ -1204,6 +1240,15 @@ do_status(resource_node_t *node) * completed. */ node->rn_actions[idx].ra_last = time(NULL);
+ /* If we have not exceeded our failure count threshold, then fudge + * the status check this round */ + if (x && node->rn_failure_counter) { + if (!restart_threshold_exceeded(node->rn_failure_counter)) { + x = 0; + restart_add(node->rn_failure_counter); + } + } + node->rn_last_status = x; node->rn_last_depth = node->rn_actions[idx].ra_depth; node->rn_checked = 1; @@ -1277,6 +1322,8 @@ clear_checks(resource_node_t *node) node->rn_actions[x].ra_last = now; }
+ restart_clear(node->rn_failure_counter); + node->rn_checked = 0; node->rn_last_status = 0; node->rn_last_depth = 0;
cluster-commits@lists.fedorahosted.org