Gitweb: https://sourceware.org/git/?p=lvm2.git;a=commitdiff;h=32c87d56b12e9b6b8e6e1…
Commit: 32c87d56b12e9b6b8e6e1e7e85178abacf19811a
Parent: 60e3dbd6d50ce2c28206c96f824a5afc9bb287e6
Author: Tony Asleson <tasleson(a)redhat.com>
AuthorDate: Mon Sep 25 15:20:03 2017 -0500
Committer: Tony Asleson <tasleson(a)redhat.com>
CommitterDate: Wed Sep 27 07:45:00 2017 -0500
lvmdbusd: thread stacks dump support
If you send a SIGUSR1 (10) to the daemon it will dump all the
threads current stacks to stdout. This will be useful when the
daemon is apparently hung and not processing requests.
eg.
$ sudo kill -10 <daemon pid>
---
daemons/lvmdbusd/main.py | 29 ++++++++++++++++++++------
daemons/lvmdbusd/utils.py | 48 +++++++++++++++++++++++++++++++++++++++-----
2 files changed, 64 insertions(+), 13 deletions(-)
diff --git a/daemons/lvmdbusd/main.py b/daemons/lvmdbusd/main.py
index dc31b6f..7f0a028 100644
--- a/daemons/lvmdbusd/main.py
+++ b/daemons/lvmdbusd/main.py
@@ -63,6 +63,24 @@ def check_bb_size(value):
return v
+def install_signal_handlers():
+ # Because of the glib main loop stuff the python signal handler code is
+ # apparently not usable and we need to use the glib calls instead
+ signal_add = None
+
+ if hasattr(GLib, 'unix_signal_add'):
+ signal_add = GLib.unix_signal_add
+ elif hasattr(GLib, 'unix_signal_add_full'):
+ signal_add = GLib.unix_signal_add_full
+
+ if signal_add:
+ signal_add(GLib.PRIORITY_HIGH, signal.SIGHUP, utils.handler, signal.SIGHUP)
+ signal_add(GLib.PRIORITY_HIGH, signal.SIGINT, utils.handler, signal.SIGINT)
+ signal_add(GLib.PRIORITY_HIGH, signal.SIGUSR1, utils.handler, signal.SIGUSR1)
+ else:
+ log_error("GLib.unix_signal_[add|add_full] are NOT available!")
+
+
def main():
start = time.time()
# Add simple command line handling
@@ -112,12 +130,7 @@ def main():
# List of threads that we start up
thread_list = []
- # Install signal handlers
- for s in [signal.SIGHUP, signal.SIGINT]:
- try:
- signal.signal(s, utils.handler)
- except RuntimeError:
- pass
+ install_signal_handlers()
dbus.mainloop.glib.DBusGMainLoop(set_as_default=True)
dbus.mainloop.glib.threads_init()
@@ -177,5 +190,7 @@ def main():
for thread in thread_list:
thread.join()
except KeyboardInterrupt:
- utils.handler(signal.SIGINT, None)
+ # If we are unable to register signal handler, we will end up here when
+ # the service gets a ^C or a kill -2 <parent pid>
+ utils.handler(signal.SIGINT)
return 0
diff --git a/daemons/lvmdbusd/utils.py b/daemons/lvmdbusd/utils.py
index ce2ed22..3c006c4 100644
--- a/daemons/lvmdbusd/utils.py
+++ b/daemons/lvmdbusd/utils.py
@@ -21,6 +21,7 @@ from lvmdbusd import cfg
from gi.repository import GLib
import threading
import traceback
+import signal
STDOUT_TTY = os.isatty(sys.stdout.fileno())
@@ -281,12 +282,47 @@ def log_error(msg, *attributes):
_common_log(msg, *attributes)
+def dump_threads_stackframe():
+ ident_to_name = {}
+
+ for thread_object in threading.enumerate():
+ ident_to_name[thread_object.ident] = thread_object
+
+ stacks = []
+ for thread_ident, frame in sys._current_frames().items():
+ stack = traceback.format_list(traceback.extract_stack(frame))
+
+ # There is a possibility that a thread gets created after we have
+ # enumerated all threads, so this lookup table may be incomplete, so
+ # account for this
+ if thread_ident in ident_to_name:
+ thread_name = ident_to_name[thread_ident].name
+ else:
+ thread_name = "unknown"
+
+ stacks.append("Thread: %s" % (thread_name))
+ stacks.append("".join(stack))
+
+ log_error("Dumping thread stack frames!\n" + "\n".join(stacks))
+
+
# noinspection PyUnusedLocal
-def handler(signum, frame):
- cfg.run.value = 0
- log_debug('Signal handler called with signal %d' % signum)
- if cfg.loop is not None:
- cfg.loop.quit()
+def handler(signum):
+ try:
+ if signum == signal.SIGUSR1:
+ dump_threads_stackframe()
+ else:
+ cfg.run.value = 0
+ log_debug('Exiting daemon with signal %d' % signum)
+ if cfg.loop is not None:
+ cfg.loop.quit()
+ except:
+ st = traceback.format_exc()
+ log_error("signal handler: exception (logged, not reported!) \n %s" % st)
+
+ # It's important we report that we handled the exception for the exception
+ # handler to continue to work, especially for signal 10 (SIGUSR1)
+ return True
def pv_obj_path_generate():
@@ -535,7 +571,7 @@ def add_no_notify(cmdline):
def _async_handler(call_back, parameters):
- params_str = ", ".join([str(x) for x in parameters])
+ params_str = ", ".join(str(x) for x in parameters)
log_debug('Main thread execution, callback = %s, parameters = (%s)' %
(str(call_back), params_str))
Gitweb: https://sourceware.org/git/?p=lvm2.git;a=commitdiff;h=2074094e778bfc298fb4c…
Commit: 2074094e778bfc298fb4cb184b6005e40acd1159
Parent: 090db98828694a00016395cc6c2a25fa1de953f4
Author: Tony Asleson <tasleson(a)redhat.com>
AuthorDate: Fri Sep 22 09:59:50 2017 -0500
Committer: Tony Asleson <tasleson(a)redhat.com>
CommitterDate: Wed Sep 27 07:45:00 2017 -0500
lvmdbusd: Main thread exception logging
Make sure that any and all code that executes in the main thread is
wrapped with a try/except block to ensure that at the very least
we log when things are going wrong.
---
daemons/lvmdbusd/job.py | 8 ++++----
daemons/lvmdbusd/request.py | 8 ++++----
daemons/lvmdbusd/utils.py | 26 ++++++++++++++++----------
3 files changed, 24 insertions(+), 18 deletions(-)
diff --git a/daemons/lvmdbusd/job.py b/daemons/lvmdbusd/job.py
index 609b747..988b114 100644
--- a/daemons/lvmdbusd/job.py
+++ b/daemons/lvmdbusd/job.py
@@ -8,7 +8,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from .automatedproperties import AutomatedProperties
-from .utils import job_obj_path_generate, mt_async_result, mt_run_no_wait
+from .utils import job_obj_path_generate, mt_async_call
from . import cfg
from .cfg import JOB_INTERFACE
import dbus
@@ -30,7 +30,7 @@ class WaitingClient(object):
# Remove ourselves from waiting client
wc.job_state.remove_waiting_client(wc)
wc.timer_id = -1
- mt_async_result(wc.cb, wc.job_state.Complete)
+ mt_async_call(wc.cb, wc.job_state.Complete)
wc.job_state = None
def __init__(self, job_state, tmo, cb, cbe):
@@ -55,7 +55,7 @@ class WaitingClient(object):
GLib.source_remove(self.timer_id)
self.timer_id = -1
- mt_async_result(self.cb, self.job_state.Complete)
+ mt_async_call(self.cb, self.job_state.Complete)
self.job_state = None
@@ -188,7 +188,7 @@ class Job(AutomatedProperties):
@Complete.setter
def Complete(self, value):
self.state.Complete = value
- mt_run_no_wait(Job._signal_complete, self)
+ mt_async_call(Job._signal_complete, self)
@property
def GetError(self):
diff --git a/daemons/lvmdbusd/request.py b/daemons/lvmdbusd/request.py
index 78392de..eaec04c 100644
--- a/daemons/lvmdbusd/request.py
+++ b/daemons/lvmdbusd/request.py
@@ -13,7 +13,7 @@ from gi.repository import GLib
from .job import Job
from . import cfg
import traceback
-from .utils import log_error, mt_async_result
+from .utils import log_error, mt_async_call
class RequestEntry(object):
@@ -116,9 +116,9 @@ class RequestEntry(object):
if error_rc == 0:
if self.cb:
if self._return_tuple:
- mt_async_result(self.cb, (result, '/'))
+ mt_async_call(self.cb, (result, '/'))
else:
- mt_async_result(self.cb, result)
+ mt_async_call(self.cb, result)
else:
if self.cb_error:
if not error_exception:
@@ -129,7 +129,7 @@ class RequestEntry(object):
else:
error_exception = Exception(error_msg)
- mt_async_result(self.cb_error, error_exception)
+ mt_async_call(self.cb_error, error_exception)
else:
# We have a job and it's complete, indicate that it's done.
self._job.Complete = True
diff --git a/daemons/lvmdbusd/utils.py b/daemons/lvmdbusd/utils.py
index c9fbaad..ce2ed22 100644
--- a/daemons/lvmdbusd/utils.py
+++ b/daemons/lvmdbusd/utils.py
@@ -534,20 +534,26 @@ def add_no_notify(cmdline):
# ensure all dbus library interaction is done from the same thread!
-def _async_result(call_back, results):
- log_debug('Results = %s' % str(results))
- call_back(results)
+def _async_handler(call_back, parameters):
+ params_str = ", ".join([str(x) for x in parameters])
+ log_debug('Main thread execution, callback = %s, parameters = (%s)' %
+ (str(call_back), params_str))
+ try:
+ if parameters:
+ call_back(*parameters)
+ else:
+ call_back()
+ except:
+ st = traceback.format_exc()
+ log_error("mt_async_call: exception (logged, not reported!) \n %s" % st)
-# Return result in main thread
-def mt_async_result(call_back, results):
- GLib.idle_add(_async_result, call_back, results)
+# Execute the function on the main thread with the provided parameters, do
+# not return *any* value or wait for the execution to complete!
+def mt_async_call(function_call_back, *parameters):
+ GLib.idle_add(_async_handler, function_call_back, parameters)
-# Take the supplied function and run it on the main thread and not wait for
-# a result!
-def mt_run_no_wait(function, param):
- GLib.idle_add(function, param)
# Run the supplied function and arguments on the main thread and wait for them
# to complete while allowing the ability to get the return value too.
Gitweb: https://sourceware.org/git/?p=lvm2.git;a=commitdiff;h=8146548d25e9104f0d530…
Commit: 8146548d25e9104f0d530d943290d448c1994c0a
Parent: b0f4e0fcec1a50b73fbc4f05bfd2d385895bc3bc
Author: Alasdair G Kergon <agk(a)redhat.com>
AuthorDate: Fri Sep 22 18:02:58 2017 +0100
Committer: Alasdair G Kergon <agk(a)redhat.com>
CommitterDate: Fri Sep 22 18:34:34 2017 +0100
vgsplit: Fix intermediate metadata corruption.
Changing the VG of a PV uses the same on-disk mechanism as vgrename.
This relies on recognising both the old and new VG names. Prior to this
patch the vgsplit code incorrectly provided the new VG name twice
instead of the old and new ones. This lead the low-level mechanism not
to recognise the device as already belonging to a VG and so paying no
attention to the location of its existing metadata, sometimes partly
overwriting it and then later trying to read the corrupt metadata and
issuing a checksum error.
---
WHATS_NEW | 1 +
lib/format_text/format-text.c | 5 +++--
tools/vgsplit.c | 3 +++
3 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/WHATS_NEW b/WHATS_NEW
index 6b88fc4..b2037a6 100644
--- a/WHATS_NEW
+++ b/WHATS_NEW
@@ -1,5 +1,6 @@
Version 2.02.175 -
======================================
+ Fix metadata corruption in vgsplit intermediate state.
Require LV name with pvmove in a shared VG.
Allow shared active mirror LVs with lvmlockd, dlm, and cmirrord.
Support lvconvert --repair with cache and cachepool volumes.
diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c
index e974f05..c359c8a 100644
--- a/lib/format_text/format-text.c
+++ b/lib/format_text/format-text.c
@@ -448,8 +448,9 @@ static struct raw_locn *_find_vg_rlocn(struct device_area *dev_area,
(isspace(vgnamebuf[len]) || vgnamebuf[len] == '{'))
return rlocn;
- log_debug_metadata("Volume group name found in metadata does "
- "not match expected name %s.", vgname);
+ log_debug_metadata("Volume group name found in metadata on %s at %" PRIu64 " does "
+ "not match expected name %s.",
+ dev_name(dev_area->dev), dev_area->start + rlocn->offset, vgname);
bad:
if ((info = lvmcache_info_from_pvid(dev_area->dev->pvid, dev_area->dev, 0)) &&
diff --git a/tools/vgsplit.c b/tools/vgsplit.c
index da9b7b1..843738b 100644
--- a/tools/vgsplit.c
+++ b/tools/vgsplit.c
@@ -705,6 +705,9 @@ int vgsplit(struct cmd_context *cmd, int argc, char **argv)
if (!vg_rename(cmd, vg_to, vg_name_to))
goto_bad;
+ /* Set old VG name so the metadata operations recognise that the PVs are in an existing VG */
+ vg_to->old_name = vg_from->name;
+
/* store it on disks */
log_verbose("Writing out updated volume groups");
Gitweb: https://sourceware.org/git/?p=lvm2.git;a=commitdiff;h=7a6e438df8d3e2bb83723…
Commit: 7a6e438df8d3e2bb83723d707048e4431016d84a
Parent: e3965d392cc1cf1b29c4fcd6f2effb925d8df550
Author: Tony Asleson <tasleson(a)redhat.com>
AuthorDate: Wed Sep 20 16:39:35 2017 -0500
Committer: Tony Asleson <tasleson(a)redhat.com>
CommitterDate: Thu Sep 21 14:35:36 2017 -0500
lvmdbusd: Ensure vg_uuid is present
In some cases we are seeing where there are no VGs, but the data returned from
lvm shows that the PVs have the following for the VG:
"vg_name":"[unknown]", "vg_uuid":""
The code was only checking for the exitence of the VG name and we called into
the function get_object_path_by_uuid_lvm_id which requires both the VG name and
the LV name to exist (asserts this) which results in the following stack trace:
Traceback (most recent call last):
File "/home/tasleson/lvm2/daemons/lvmdbusd/utils.py", line 563, in runner
obj._run()
File "/home/tasleson/lvm2/daemons/lvmdbusd/utils.py", line 584, in _run
self.rc = self.f(*self.args)
File "/home/tasleson/lvm2/daemons/lvmdbusd/fetch.py", line 26, in
_main_thread_load
cache_refresh=False)[1]
File "/home/tasleson/lvm2/daemons/lvmdbusd/pv.py", line 48, in load_pvs
emit_signal, cache_refresh)
File "/home/tasleson/lvm2/daemons/lvmdbusd/loader.py", line 37, in common
objects = retrieve(search_keys, cache_refresh=False)
File "/home/tasleson/lvm2/daemons/lvmdbusd/pv.py", line 40, in
pvs_state_retrieve
p["pv_attr"], p["pv_tags"], p["vg_name"], p["vg_uuid"]))
File "/home/tasleson/lvm2/daemons/lvmdbusd/pv.py", line 84, in __init__
vg_uuid, vg_name, vg_obj_path_generate)
File "/home/tasleson/lvm2/daemons/lvmdbusd/objectmanager.py", line 318,
in get_object_path_by_uuid_lvm_id
assert uuid
AssertionError
---
daemons/lvmdbusd/pv.py | 4 +++-
1 files changed, 3 insertions(+), 1 deletions(-)
diff --git a/daemons/lvmdbusd/pv.py b/daemons/lvmdbusd/pv.py
index fe32143..e5f8b9d 100644
--- a/daemons/lvmdbusd/pv.py
+++ b/daemons/lvmdbusd/pv.py
@@ -79,7 +79,9 @@ class PvState(State):
self.lv = self._lv_object_list(vg_name)
- if vg_name:
+ # It's possible to have a vg_name and no uuid with the main example
+ # being when the vg_name == '[unknown]'
+ if vg_uuid and vg_name:
self.vg_path = cfg.om.get_object_path_by_uuid_lvm_id(
vg_uuid, vg_name, vg_obj_path_generate)
else:
Gitweb: https://sourceware.org/git/?p=lvm2.git;a=commitdiff;h=e3965d392cc1cf1b29c4f…
Commit: e3965d392cc1cf1b29c4fcd6f2effb925d8df550
Parent: 096fcb5a6e373763bcdf1d182456f206c06cec43
Author: Tony Asleson <tasleson(a)redhat.com>
AuthorDate: Wed Sep 20 15:46:16 2017 -0500
Committer: Tony Asleson <tasleson(a)redhat.com>
CommitterDate: Thu Sep 21 14:35:36 2017 -0500
lvmdbusd: Fix hang in MThreadRunner
When executing in the main thread, if we encounter an exception we
will bypass the notify_all call on the condition and the calling thread
never wakes up.
@staticmethod
def runner(obj):
# noinspection PyProtectedMember
Exception thrown here
----> obj._run()
So the following code doesn't run, which causes calling thread to hang
with obj.cond:
obj.function_complete = True
obj.cond.notify_all()
Additionally for some unknown reason the stderr is lost.
Best guess is it's something to do with scheduling a python function
into the GLib.idle_add. That made finding issue quite difficult.
---
daemons/lvmdbusd/utils.py | 19 ++++++++++++++-----
1 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/daemons/lvmdbusd/utils.py b/daemons/lvmdbusd/utils.py
index 170824d..98fd017 100644
--- a/daemons/lvmdbusd/utils.py
+++ b/daemons/lvmdbusd/utils.py
@@ -20,7 +20,7 @@ from lvmdbusd import cfg
# noinspection PyUnresolvedReferences
from gi.repository import GLib
import threading
-
+import traceback
STDOUT_TTY = os.isatty(sys.stdout.fileno())
@@ -568,6 +568,7 @@ class MThreadRunner(object):
def __init__(self, function, *args):
self.f = function
self.rc = None
+ self.exception = None
self.args = args
self.function_complete = False
self.cond = threading.Condition(threading.Lock())
@@ -577,13 +578,21 @@ class MThreadRunner(object):
with self.cond:
if not self.function_complete:
self.cond.wait()
+ if self.exception:
+ raise self.exception
return self.rc
def _run(self):
- if len(self.args):
- self.rc = self.f(*self.args)
- else:
- self.rc = self.f()
+ try:
+ if len(self.args):
+ self.rc = self.f(*self.args)
+ else:
+ self.rc = self.f()
+ except BaseException as be:
+ self.exception = be
+ st = traceback.format_exc()
+ log_error("MThreadRunner: exception \n %s" % st)
+ log_error("Exception will be raised in calling thread!")
def _remove_objects(dbus_objects_rm):
Gitweb: https://sourceware.org/git/?p=lvm2.git;a=commitdiff;h=096fcb5a6e373763bcdf1…
Commit: 096fcb5a6e373763bcdf1d182456f206c06cec43
Parent: 584b4ae38bca05c0bbc6a097b549372c39727f3b
Author: Peter Rajnoha <prajnoha(a)redhat.com>
AuthorDate: Thu Sep 21 15:23:24 2017 +0200
Committer: Peter Rajnoha <prajnoha(a)redhat.com>
CommitterDate: Thu Sep 21 17:15:48 2017 +0200
blkdeactivate: also try to unmount /boot on blkdeactivate -u if on top of supported device
There's nothing special about /boot other than it's used during boot.
But when blkdeactivate is called either on all devices or including a
device where the /boot is on top, we should also include this mount
point when doing unmount before deactivation of supported devices.
---
WHATS_NEW_DM | 1 +
scripts/blkdeactivate.sh.in | 2 +-
2 files changed, 2 insertions(+), 1 deletions(-)
diff --git a/WHATS_NEW_DM b/WHATS_NEW_DM
index df72a25..5a11e2e 100644
--- a/WHATS_NEW_DM
+++ b/WHATS_NEW_DM
@@ -1,5 +1,6 @@
Version 1.02.144 -
======================================
+ Also try to unmount /boot on blkdeactivate -u if on top of supported device.
Use blkdeactivate -r wait in blk-availability systemd service/initscript.
Add blkdeactivate -r wait option to wait for MD resync/recovery/reshape.
Fix blkdeactivate regression with failing DM/MD devs deactivation (1.02.142).
diff --git a/scripts/blkdeactivate.sh.in b/scripts/blkdeactivate.sh.in
index 71cc241..11b5983 100644
--- a/scripts/blkdeactivate.sh.in
+++ b/scripts/blkdeactivate.sh.in
@@ -97,7 +97,7 @@ declare -A SKIP_VG_LIST=()
# listed here will be added to SKIP_DEVICE_LIST (and SKIP_VG_LIST) automatically.
# (list is an associative array!)
#
-declare -A SKIP_UMOUNT_LIST=(["/"]=1 ["/boot"]=1 \
+declare -A SKIP_UMOUNT_LIST=(["/"]=1 \
["/lib"]=1 ["/lib64"]=1 \
["/bin"]=1 ["/sbin"]=1 \
["/var"]=1 ["/var/log"]=1 \