Gitweb: http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=efdb438ad868b4... Commit: efdb438ad868b447e28cde5959530b5dc763186f Parent: 60dd70f06444939ea14bb6a40cfb61ab1eea9616 Author: David Teigland teigland@redhat.com AuthorDate: Thu Jan 10 10:29:49 2013 -0600 Committer: David Teigland teigland@redhat.com CommitterDate: Mon Apr 8 15:33:18 2013 -0500
fenced: reconnect to ccs in more places
Reconnect to ccsd if connection descriptor times out during processing.
bz 881217
Based on patch from John Ruemker jruemker@redhat.com
Signed-off-by: David Teigland teigland@redhat.com --- fence/fenced/agent.c | 42 +++++++++++++++++++++++++++++++++++------- 1 files changed, 35 insertions(+), 7 deletions(-)
diff --git a/fence/fenced/agent.c b/fence/fenced/agent.c index de4467c..604bbb9 100644 --- a/fence/fenced/agent.c +++ b/fence/fenced/agent.c @@ -302,6 +302,25 @@ void update_cman(char *victim, char *method) cman_finish(ch); }
+static int reconnect_ccs(void) +{ + int count = 0; + int cd; + + syslog(LOG_INFO, "ccs connection timed out, retrying\n"); + + while ((cd = ccs_connect()) < 0) { + sleep(1); + + count++; + if (!(count % 60)) { + syslog(LOG_WARNING, "ccs reconnect failing\n"); + } + } + + return cd; +} + int dispatch_fence_agent(char *victim, int force) { char good_device[256]; @@ -328,14 +347,9 @@ int dispatch_fence_agent(char *victim, int force)
error = get_method(cd, victim, m, &method);
- /* if the connection timed out while we were trying - * to fence, try to open the connection again - */ + /* ccs connection times out if prev call took too long */ if (error == -EBADR) { - syslog(LOG_INFO, "ccs connection timed out, " - "retrying\n"); - while ((cd = ccs_connect()) < 0) - sleep(1); + cd = reconnect_ccs(); error = get_method(cd, victim, m, &method); } @@ -349,10 +363,24 @@ int dispatch_fence_agent(char *victim, int force)
for (d = 0; d < num_devices; d++) { error = get_device(cd, victim, method, d, &device); + + /* ccs connection times out if prev call took too long */ + if (error == -EBADR) { + cd = reconnect_ccs(); + error = get_device(cd, victim, method, d, &device); + } + if (error) break;
error = use_device(cd, victim, method, d, device); + + /* ccs connection times out if prev call took too long */ + if (error == -EBADR) { + cd = reconnect_ccs(); + error = use_device(cd, victim, method, d, device); + } + if (error) break;
cluster-commits@lists.fedorahosted.org