src/cmd.c | 6 +- src/main.c | 129 +++++++++++++++++++++++++------------------------ src/sanlock_internal.h | 7 +- src/sanlock_resource.h | 1 src/task.c | 14 ++++- 5 files changed, 88 insertions(+), 69 deletions(-)
New commits: commit ba4b71a6cd7b27ea3adcd011916b4b93b6da253e Author: David Teigland teigland@redhat.com Date: Thu Sep 15 16:23:18 2011 -0500
sanlock: improve killing pids
try to kill a pid once a second for a configured max number of attempts (60), after which we give up. The first 10 attempts use SIGTERM (if not restricted), after which we use SIGKILL (if not restricted).
diff --git a/src/cmd.c b/src/cmd.c index 4810876..bbab319 100644 --- a/src/cmd.c +++ b/src/cmd.c @@ -1219,7 +1219,8 @@ static int print_state_client(struct client *cl, int ci, char *str) "cmd_active=%d " "cmd_last=%d " "pid_dead=%d " - "killing=%d " + "kill_count=%d " + "kill_last=%llu " "suspend=%d " "need_free=%d", ci, @@ -1229,7 +1230,8 @@ static int print_state_client(struct client *cl, int ci, char *str) cl->cmd_active, cl->cmd_last, cl->pid_dead, - cl->killing, + cl->kill_count, + (unsigned long long)cl->kill_last, cl->suspend, cl->need_free);
diff --git a/src/main.c b/src/main.c index 25f2b09..3034f47 100644 --- a/src/main.c +++ b/src/main.c @@ -150,7 +150,8 @@ static void _client_free(int ci) cl->pid_dead = 0; cl->suspend = 0; cl->need_free = 0; - cl->killing = 0; + cl->kill_count = 0; + cl->kill_last = 0; cl->restrict = 0; memset(cl->owner_name, 0, sizeof(cl->owner_name)); cl->workfn = NULL; @@ -417,7 +418,7 @@ static int client_using_space(struct client *cl, struct space *sp) if (strncmp(token->r.lockspace_name, sp->space_name, NAME_ID_SIZE)) continue;
- if (cl->killing < 10) + if (!cl->kill_count) log_spoke(sp, token, "client_using_space pid %d", cl->pid); token->space_dead = sp->space_dead; rv = 1; @@ -425,92 +426,89 @@ static int client_using_space(struct client *cl, struct space *sp) return rv; }
-/* - * TODO: move to time-based kill tracking instead of repetition based, e.g. - * kill pid once a second until a max pid_exit_wait after which we quit - * trying to kill it. half way through pid_exit_wait shift from SIGTERM - * to SIGKILL (if no restriction). - * - * sp->killing pids begins at 1 - * don't increment sp->killing pids each time - * when all clients have gone past pid_exit_wait, set sp->killing_pids to 2 - * when sp->killing_pids is 2, then return immediately from kill_pids() - * - * Remove the usleep delay after a kill. - */ +/* TODO: try killscript first if one is provided */
static void kill_pids(struct space *sp) { struct client *cl; - int ci, pid; - int sig; - int found = 0; - - /* TODO: try killscript first if one is provided */ + uint64_t now; + int ci, fd, pid, sig; + int do_kill;
- if (sp->killing_pids > 11) + /* + * all remaining pids using sp are stuck, we've made max attempts to + * kill all, don't bother cycling through them + */ + if (sp->killing_pids > 1) return;
- log_space(sp, "kill_pids %d", sp->killing_pids); - - if (sp->killing_pids > 10) { - sp->killing_pids++; - goto do_dump; - } - - sp->killing_pids++; + now = monotime();
for (ci = 0; ci <= client_maxi; ci++) { - pid = -1; + do_kill = 0;
cl = &client[ci]; pthread_mutex_lock(&cl->mutex);
if (!cl->used) goto unlock; + if (cl->pid <= 0) goto unlock; + + /* NB this cl may not be using sp, but trying to + avoid the expensive client_using_space check */ + + if (cl->kill_count >= main_task.kill_count_max) + goto unlock; + + if (cl->kill_count && (now - cl->kill_last < 1)) + goto unlock; + if (!client_using_space(cl, sp)) goto unlock;
- if (!(cl->restrict & SANLK_RESTRICT_SIGKILL) && cl->killing > 1) + cl->kill_last = now; + cl->kill_count++; + + fd = cl->fd; + pid = cl->pid; + + if (cl->restrict & SANLK_RESTRICT_SIGKILL) + sig = SIGTERM; + else if (cl->restrict & SANLK_RESTRICT_SIGTERM) sig = SIGKILL; - else + else if (cl->kill_count <= main_task.kill_count_term) sig = SIGTERM; + else + sig = SIGKILL;
- pid = cl->pid; - cl->killing++; - found++; + do_kill = 1; unlock: pthread_mutex_unlock(&cl->mutex);
- if (pid > 0) - kill(pid, sig); - } - - if (found) { - log_space(sp, "kill_pids %d found %d pids", sig, found); - usleep(500000); - } - - return; + if (!do_kill) + continue;
- do_dump: - for (ci = 0; ci <= client_maxi; ci++) { - if (client[ci].pid && client[ci].killing) { - log_error("kill_pids %d stuck", client[ci].pid); + if (cl->kill_count == main_task.kill_count_max) { + log_erros(sp, "kill %d,%d,%d sig %d count %d final attempt", + ci, fd, pid, sig, cl->kill_count); + } else { + log_space(sp, "kill %d,%d,%d sig %d count %d", + ci, fd, pid, sig, cl->kill_count); } + + kill(pid, sig); } }
static int all_pids_dead(struct space *sp) { struct client *cl; - int ci, pid; + int stuck = 0, check = 0; + int ci;
for (ci = 0; ci <= client_maxi; ci++) { - pid = -1; - cl = &client[ci]; pthread_mutex_lock(&cl->mutex);
@@ -521,18 +519,23 @@ static int all_pids_dead(struct space *sp) if (!client_using_space(cl, sp)) goto unlock;
- pid = cl->pid; + if (cl->kill_count >= main_task.kill_count_max) + stuck++; + else + check++; unlock: pthread_mutex_unlock(&cl->mutex); + }
- if (pid > 0) { - if (cl->killing < 10) { - log_space(sp, "used by pid %d killing %d", - pid, cl->killing); - } - return 0; - } + if (stuck && !check && sp->killing_pids < 2) { + log_erros(sp, "killing pids stuck %d", stuck); + /* cause kill_pids to give up */ + sp->killing_pids = 2; } + + if (stuck || check) + return 0; + log_space(sp, "used by no pids"); return 1; } @@ -887,9 +890,9 @@ static void process_cmd_thread_registered(int ci_in, struct sm_header *h_recv) goto out; }
- if (cl->killing) { - log_error("cmd %d %d,%d,%d killing", - h_recv->cmd, ci_target, cl->fd, cl->pid); + if (cl->kill_count) { + log_error("cmd %d %d,%d,%d kill_count %d", + h_recv->cmd, ci_target, cl->fd, cl->pid, cl->kill_count); result = -EBUSY; goto out; } diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h index fb13afb..24ceb2f 100644 --- a/src/sanlock_internal.h +++ b/src/sanlock_internal.h @@ -450,10 +450,10 @@ struct task { int id_renewal_seconds; /* configured */ int id_renewal_fail_seconds; /* configured */ int id_renewal_warn_seconds; /* configured */ - int host_dead_seconds; /* calculated */ - int request_finish_seconds; /* calculated */ + int kill_count_term; /* constant */ + int kill_count_max; /* constant */
unsigned int io_count; /* stats */ unsigned int to_count; /* stats */ @@ -477,8 +477,9 @@ struct client { int pid_dead; int suspend; int need_free; - int killing; + int kill_count; uint32_t restrict; + uint64_t kill_last; char owner_name[SANLK_NAME_LEN+1]; pthread_mutex_t mutex; void *workfn; diff --git a/src/sanlock_resource.h b/src/sanlock_resource.h index ae8dfa4..b86bde0 100644 --- a/src/sanlock_resource.h +++ b/src/sanlock_resource.h @@ -23,6 +23,7 @@ /* restrict flags */ #define SANLK_RESTRICT_ALL 0x00000001 #define SANLK_RESTRICT_SIGKILL 0x00000002 +#define SANLK_RESTRICT_SIGTERM 0x00000004
/* release flags */ #define SANLK_REL_ALL 0x00000001 diff --git a/src/task.c b/src/task.c index 9635f92..e06a6fa 100644 --- a/src/task.c +++ b/src/task.c @@ -53,13 +53,24 @@ void setup_task_timeouts(struct task *task, int io_timeout_arg) int paxos_acquire_held_min = host_dead_seconds; int paxos_acquire_free_max = 6 * io_timeout_seconds; int paxos_acquire_free_min = 0; + int request_finish_seconds = 3 * id_renewal_seconds; /* random */
task->io_timeout_seconds = io_timeout_seconds; task->id_renewal_seconds = id_renewal_seconds; task->id_renewal_fail_seconds = id_renewal_fail_seconds; task->id_renewal_warn_seconds = id_renewal_warn_seconds; task->host_dead_seconds = host_dead_seconds; - task->request_finish_seconds = 3 * id_renewal_seconds; /* random */ + task->request_finish_seconds = request_finish_seconds; + + /* interval between each kill count is approx 1 sec, so we + spend about 10 seconds sending 10 SIGTERMs to a pid, + then send SIGKILLs to it. after 60 attempts the watchdog + should have fired if the kills are due to failed renewal; + otherwise we just give up at that point */ + + task->kill_count_term = 10; + task->kill_count_max = 60; + /* the rest are calculated as needed in place */
/* hack to make just main thread log this info */ @@ -84,6 +95,7 @@ void setup_task_timeouts(struct task *task, int io_timeout_arg) log_debug("paxos_acquire_held_min %d", paxos_acquire_held_min); log_debug("paxos_acquire_free_max %d", paxos_acquire_free_max); log_debug("paxos_acquire_free_min %d", paxos_acquire_free_min); + log_debug("request_finish_seconds %d", request_finish_seconds); }
void setup_task_aio(struct task *task, int use_aio, int cb_size)
sanlock-devel@lists.fedorahosted.org