This is an automated email from the git hooks/post-receive script.
teigland pushed a commit to branch test-sched
in repository sanlock.
commit f49110b426222c931e604e63ee38552d549d0eac
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Fri May 8 14:36:07 2020 -0500
sanlock: warn about scheduling delay
Two new config file settings:
sched_warn_main_ms = <ms>
sched_warn_renew_sec = <sec>
If the main loop check interval is delayed more than
<ms> milliseconds longer than intended, log a warning.
If the lockspace thread sleep between renewals is
<sec> seconds longer than intended, log a warning.
---
src/cmd.c | 4 ++++
src/lockspace.c | 27 ++++++++++++++++++++++++++-
src/main.c | 14 ++++++++++++++
src/sanlock.8 | 13 +++++++++++++
src/sanlock.conf | 6 ++++++
src/sanlock_internal.h | 2 ++
6 files changed, 65 insertions(+), 1 deletion(-)
diff --git a/src/cmd.c b/src/cmd.c
index 23522ba..5c91181 100644
--- a/src/cmd.c
+++ b/src/cmd.c
@@ -2259,6 +2259,8 @@ static int print_state_daemon(char *str)
"debug_clients=%d "
"debug_cmds=0x%llx "
"renewal_history_size=%d "
+ "sched_warn_main_ms=%u "
+ "sched_warn_renew_sec=%u "
"gid=%d "
"uid=%d "
"sh_retries=%d "
@@ -2285,6 +2287,8 @@ static int print_state_daemon(char *str)
com.debug_clients,
(unsigned long long)com.debug_cmds,
com.renewal_history_size,
+ com.sched_warn_main_ms,
+ com.sched_warn_renew_sec,
com.gid,
com.uid,
com.sh_retries,
diff --git a/src/lockspace.c b/src/lockspace.c
index 41f96cb..dd4875f 100644
--- a/src/lockspace.c
+++ b/src/lockspace.c
@@ -670,6 +670,8 @@ static void *lockspace_thread(void *arg_in)
struct space *sp;
struct leader_record leader;
uint64_t delta_begin, last_success = 0;
+ uint64_t wait_interval_begin = 0, wait_interval_want = 0;
+ uint64_t now;
int sector_size = 0;
int align_size = 0;
int max_hosts = 0;
@@ -815,9 +817,24 @@ static void *lockspace_thread(void *arg_in)
/*
* wait between each renewal
+ *
+ * T=10 delta renew begins and ends, wrote leader.timesamp=10, last_success=10
+ * T=30 delta renew begins, write leader.timestamp=30
+ * T=35 delta renew ends after write being delayed
+ * renewal_interval = leader.timestamp - last_success (30-10) = 20
+ * last_success = leader.timestamp = 30
+ *
+ * now - last_success (35 - 30) = 5 < id_renewal_seconds (20)
+ * so delay for 15 sec before starting next renewal
+ * delay = id_renewal_seconds - (now - last_success)
*/
+ now = monotime();
- if (monotime() - last_success < id_renewal_seconds) {
+ if (now - last_success < id_renewal_seconds) {
+ if (!wait_interval_begin) {
+ wait_interval_begin = now;
+ wait_interval_want = id_renewal_seconds - (now - last_success);
+ }
sleep(1);
continue;
} else {
@@ -826,6 +843,14 @@ static void *lockspace_thread(void *arg_in)
usleep(500000);
}
+ /* Warn about scheduling delays */
+ if (com.sched_warn_renew_sec && wait_interval_begin &&
wait_interval_want &&
+ (now - wait_interval_begin > wait_interval_want + com.sched_warn_renew_sec)) {
+ log_warns(sp, "renew interval is %u sec want %u warn %u",
+ (uint32_t)(now - wait_interval_begin), (uint32_t)wait_interval_want,
com.sched_warn_renew_sec);
+ }
+ wait_interval_begin = 0;
+ wait_interval_want = 0;
/*
* do a renewal, measuring length of time spent in renewal,
diff --git a/src/main.c b/src/main.c
index 8c6eef8..4afd4db 100644
--- a/src/main.c
+++ b/src/main.c
@@ -823,6 +823,10 @@ static int main_loop(void)
poll_timeout = check_interval - ms;
continue;
}
+ if (ms >= (check_interval + com.sched_warn_main_ms)) {
+ log_warn("main check interval is %u ms want %u warn %u",
+ ms, check_interval, com.sched_warn_main_ms);
+ }
last_check = now;
check_interval = STANDARD_CHECK_INTERVAL;
@@ -2872,6 +2876,14 @@ static void read_config_file(void)
} else {
log_error("ignore unknown max_sectors_kb %s", str);
}
+
+ } else if (!strcmp(str, "sched_warn_main_ms")) {
+ get_val_int(line, &val);
+ com.sched_warn_main_ms = val;
+
+ } else if (!strcmp(str, "sched_warn_renew_sec")) {
+ get_val_int(line, &val);
+ com.sched_warn_renew_sec = val;
}
}
@@ -3860,6 +3872,8 @@ int main(int argc, char *argv[])
com.renewal_read_extend_sec_set = 0;
com.renewal_read_extend_sec = 0;
com.renewal_history_size = DEFAULT_RENEWAL_HISTORY_SIZE;
+ com.sched_warn_main_ms = 2000;
+ com.sched_warn_renew_sec = 2;
com.paxos_debug_all = 0;
com.max_sectors_kb_ignore = DEFAULT_MAX_SECTORS_KB_IGNORE;
com.max_sectors_kb_align = DEFAULT_MAX_SECTORS_KB_ALIGN;
diff --git a/src/sanlock.8 b/src/sanlock.8
index d75211d..0ee8d95 100644
--- a/src/sanlock.8
+++ b/src/sanlock.8
@@ -1365,6 +1365,19 @@ for that command. Special values +all and -all can be used to
enable or disable all commands, and can be used before or after other
debug_cmd lines.
+.IP \[bu] 2
+sched_warn_main_ms = <msec>
+.br
+Log a warning if the intended check interval of the main processing
+loop is run this many milliseconds later than intended. Set to 0
+to disable.
+
+.IP \[bu] 2
+sched_warn_renew_sec = <seconds>
+.br
+Log a warning if the intended delay between lockspace renewals
+is this many seconds later than intended. Set to 0 to disable.
+
.SH SEE ALSO
.BR wdmd (8)
diff --git a/src/sanlock.conf b/src/sanlock.conf
index e52da2b..5d47ad1 100644
--- a/src/sanlock.conf
+++ b/src/sanlock.conf
@@ -63,3 +63,9 @@
# debug_cmd = -<name>
# ...
# command line: n/a
+#
+# sched_warn_main_ms = <msec>
+# command line: n/a
+#
+# sched_warn_renew_sec = <seconds>
+# command line: n/a
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index cd4d23a..b103209 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -385,6 +385,8 @@ struct command_line {
int renewal_history_size;
int renewal_read_extend_sec_set; /* 1 if renewal_read_extend_sec is configured */
uint32_t renewal_read_extend_sec;
+ uint32_t sched_warn_main_ms;
+ uint32_t sched_warn_renew_sec;
char our_host_name[SANLK_NAME_LEN+1];
char *file_path;
char *dump_path;
--
To stop receiving notification emails like this one, please contact
the administrator of this repository.