[PATCH] kexec/arch/i386: Add support for KASLR memory randomization
by Dave Young
Backport upstream kexec-tools commit for correct kaslr page_offset value
commit 9f62cbddddfc93d78d9aafbddf3e1208cb242f7b
Author: Thomas Garnier <thgarnie(a)google.com>
Date: Tue Sep 13 15:10:05 2016 +0800
kexec/arch/i386: Add support for KASLR memory randomization
Multiple changes were made on KASLR (right now in linux-next). One of
them is randomizing the virtual address of the physical mapping, vmalloc
and vmemmap memory sections. It breaks kdump ability to read physical
memory.
This change identifies if KASLR memories randomization is used by
checking if the page_offset_base variable exists. It search for the
correct PAGE_OFFSET value by looking at the loaded memory section and
find the lowest aligned on PUD (the randomization level).
Related commits on linux-next:
- 0483e1fa6e09d4948272680f691dccb1edb9677f: Base for randomization
- 021182e52fe01c1f7b126f97fd6ba048dc4234fd: Enable for PAGE_OFFSET
Signed-off-by: Thomas Garnier <thgarnie(a)google.com>
Signed-off-by: Simon Horman <horms(a)verge.net.au>
Signed-off-by: Dave Young <dyoung(a)redhat.com>
---
...i386-Add-support-for-KASLR-memory-randomi.patch | 97 ++++++++++++++++++++++
kexec-tools.spec | 2 +
2 files changed, 99 insertions(+)
create mode 100644 kexec-tools-2.0.13-kexec-arch-i386-Add-support-for-KASLR-memory-randomi.patch
diff --git a/kexec-tools-2.0.13-kexec-arch-i386-Add-support-for-KASLR-memory-randomi.patch b/kexec-tools-2.0.13-kexec-arch-i386-Add-support-for-KASLR-memory-randomi.patch
new file mode 100644
index 0000000..a5474cb
--- /dev/null
+++ b/kexec-tools-2.0.13-kexec-arch-i386-Add-support-for-KASLR-memory-randomi.patch
@@ -0,0 +1,97 @@
+From 9f62cbddddfc93d78d9aafbddf3e1208cb242f7b Mon Sep 17 00:00:00 2001
+From: Thomas Garnier <thgarnie(a)google.com>
+Date: Tue, 13 Sep 2016 15:10:05 +0800
+Subject: [PATCH] kexec/arch/i386: Add support for KASLR memory randomization
+
+Multiple changes were made on KASLR (right now in linux-next). One of
+them is randomizing the virtual address of the physical mapping, vmalloc
+and vmemmap memory sections. It breaks kdump ability to read physical
+memory.
+
+This change identifies if KASLR memories randomization is used by
+checking if the page_offset_base variable exists. It search for the
+correct PAGE_OFFSET value by looking at the loaded memory section and
+find the lowest aligned on PUD (the randomization level).
+
+Related commits on linux-next:
+ - 0483e1fa6e09d4948272680f691dccb1edb9677f: Base for randomization
+ - 021182e52fe01c1f7b126f97fd6ba048dc4234fd: Enable for PAGE_OFFSET
+
+Signed-off-by: Thomas Garnier <thgarnie(a)google.com>
+Signed-off-by: Simon Horman <horms(a)verge.net.au>
+---
+ kexec/arch/i386/crashdump-x86.c | 29 ++++++++++++++++++++++-------
+ 1 file changed, 22 insertions(+), 7 deletions(-)
+
+diff --git a/kexec/arch/i386/crashdump-x86.c b/kexec/arch/i386/crashdump-x86.c
+index bbc0f35..ab833d4 100644
+--- a/kexec/arch/i386/crashdump-x86.c
++++ b/kexec/arch/i386/crashdump-x86.c
+@@ -102,11 +102,10 @@ static int get_kernel_paddr(struct kexec_info *UNUSED(info),
+ return -1;
+ }
+
+-/* Retrieve kernel _stext symbol virtual address from /proc/kallsyms */
+-static unsigned long long get_kernel_stext_sym(void)
++/* Retrieve kernel symbol virtual address from /proc/kallsyms */
++static unsigned long long get_kernel_sym(const char *symbol)
+ {
+ const char *kallsyms = "/proc/kallsyms";
+- const char *stext = "_stext";
+ char sym[128];
+ char line[128];
+ FILE *fp;
+@@ -122,13 +121,13 @@ static unsigned long long get_kernel_stext_sym(void)
+ while(fgets(line, sizeof(line), fp) != NULL) {
+ if (sscanf(line, "%Lx %c %s", &vaddr, &type, sym) != 3)
+ continue;
+- if (strcmp(sym, stext) == 0) {
+- dbgprintf("kernel symbol %s vaddr = %16llx\n", stext, vaddr);
++ if (strcmp(sym, symbol) == 0) {
++ dbgprintf("kernel symbol %s vaddr = %16llx\n", symbol, vaddr);
+ return vaddr;
+ }
+ }
+
+- fprintf(stderr, "Cannot get kernel %s symbol address\n", stext);
++ fprintf(stderr, "Cannot get kernel %s symbol address\n", symbol);
+ return 0;
+ }
+
+@@ -151,6 +150,8 @@ static int get_kernel_vaddr_and_size(struct kexec_info *UNUSED(info),
+ off_t size;
+ uint32_t elf_flags = 0;
+ uint64_t stext_sym;
++ const unsigned long long pud_mask = ~((1 << 30) - 1);
++ unsigned long long vaddr, lowest_vaddr = 0;
+
+ if (elf_info->machine != EM_X86_64)
+ return 0;
+@@ -180,9 +181,23 @@ static int get_kernel_vaddr_and_size(struct kexec_info *UNUSED(info),
+
+ end_phdr = &ehdr.e_phdr[ehdr.e_phnum];
+
++ /* Search for the real PAGE_OFFSET when KASLR memory randomization
++ * is enabled */
++ if (get_kernel_sym("page_offset_base") != 0) {
++ for(phdr = ehdr.e_phdr; phdr != end_phdr; phdr++) {
++ if (phdr->p_type == PT_LOAD) {
++ vaddr = phdr->p_vaddr & pud_mask;
++ if (lowest_vaddr == 0 || lowest_vaddr > vaddr)
++ lowest_vaddr = vaddr;
++ }
++ }
++ if (lowest_vaddr != 0)
++ elf_info->page_offset = lowest_vaddr;
++ }
++
+ /* Traverse through the Elf headers and find the region where
+ * _stext symbol is located in. That's where kernel is mapped */
+- stext_sym = get_kernel_stext_sym();
++ stext_sym = get_kernel_sym("_stext");
+ for(phdr = ehdr.e_phdr; stext_sym && phdr != end_phdr; phdr++) {
+ if (phdr->p_type == PT_LOAD) {
+ unsigned long long saddr = phdr->p_vaddr;
+--
+2.10.1
+
diff --git a/kexec-tools.spec b/kexec-tools.spec
index be98202..bf28938 100644
--- a/kexec-tools.spec
+++ b/kexec-tools.spec
@@ -65,6 +65,7 @@ Obsoletes: diskdumputils netdump kexec-tools-eppic
#
# Patches 0 through 100 are meant for x86 kexec-tools enablement
#
+Patch1: kexec-tools-2.0.13-kexec-arch-i386-Add-support-for-KASLR-memory-randomi.patch
#
# Patches 101 through 200 are meant for x86_64 kexec-tools enablement
@@ -120,6 +121,7 @@ tar -z -x -v -f %{SOURCE9}
tar -z -x -v -f %{SOURCE19}
tar -z -x -v -f %{SOURCE23}
+%patch1 -p1
%patch500 -p1
%patch501 -p1
%patch502 -p1
--
2.10.1
7 years, 5 months
[PATCH] Drop kdump specific get persistent dev function
by Dave Young
We maintained kdump specific get persistent dev function, it is ready to drop it.
Dracut upstream splitted dracut init stuff from dracut-functions.sh so that we
can source it.
OTOH, dracut now has a persistent_policy feature, for kdump when we dump to
raw disks we do not care the filesystem uuid and labels so we prefer to search
disk id instead. Set the persistent_policy before calling get_persistent_dev will
ensure kdump logic still works.
Signed-off-by: Dave Young <dyoung(a)redhat.com>
---
kdump-lib.sh | 26 --------------------------
kdumpctl | 8 +++++++-
mkdumprd | 10 ++++++----
3 files changed, 13 insertions(+), 31 deletions(-)
--- kexec-tools.orig/kdump-lib.sh
+++ kexec-tools/kdump-lib.sh
@@ -88,32 +88,6 @@ to_dev_name() {
echo $dev
}
-kdump_get_persistent_dev() {
- local i _tmp _dev _lookup_dirs
-
- _dev=$(udevadm info --query=name --name="$1" 2>/dev/null)
- [ -z "$_dev" ] && {
- perror_exit "Kernel dev name of $1 is not found."
- }
-
- if [[ $2 = "raw" ]];then
- _lookup_dirs="/dev/mapper/* /dev/disk/by-id/*"
- else
- _lookup_dirs="/dev/mapper/* /dev/disk/by-uuid/* /dev/disk/by-id/*"
- fi
-
- for i in $_lookup_dirs; do
- _tmp=$(udevadm info --query=name --name="$i" 2>/dev/null)
- if [ "$_tmp" = "$_dev" ]; then
- echo $i
- return
- fi
- done
-
- perror "WARNING: Persistent device name of $1 not found. Using $1 as dump target name"
- echo $1
-}
-
get_user_configured_dump_disk()
{
local _target
--- kexec-tools.orig/kdumpctl
+++ kexec-tools/kdumpctl
@@ -16,6 +16,8 @@ FADUMP_REGISTER_SYS_NODE="/sys/kernel/fa
DEFAULT_DUMP_MODE="kdump"
image_time=0
+[[ $dracutbasedir ]] || dracutbasedir=/usr/lib/dracut
+. $dracutbasedir/dracut-functions.sh
. /lib/kdump/kdump-lib.sh
standard_kexec_args="-p"
@@ -416,7 +418,11 @@ check_dump_fs_modified()
if [[ $(expr substr $_new_fstype 1 3) = "nfs" ]];then
_new_dev=$_target
else
- _new_dev=$(kdump_get_persistent_dev $_target $_new_fstype)
+ _new_dev=$(persistent_policy="by-id" get_persistent_dev $_target)
+ if [ -z "$_new_dev" ]; then
+ echo "Get persistent device name failed"
+ return 2
+ fi
fi
if ! findmnt $_target >/dev/null; then
--- kexec-tools.orig/mkdumprd
+++ kexec-tools/mkdumprd
@@ -6,6 +6,8 @@
# Written by Cong Wang <amwang(a)redhat.com>
#
+[[ $dracutbasedir ]] || dracutbasedir=/usr/lib/dracut
+. $dracutbasedir/dracut-functions.sh
. /lib/kdump/kdump-lib.sh
export IN_KDUMP=1
@@ -127,8 +129,8 @@ to_mount() {
_mntopts="$_target $_fstype $_options"
#for non-nfs _dev converting to use udev persistent name
if [ -b "$_source" ]; then
- _pdev="$(kdump_get_persistent_dev $_source $_fstype)"
- if [ $? -ne 0 ]; then
+ _pdev="$(get_persistent_dev $_source)"
+ if [ -z "$_pdev" ]; then
return 1
fi
@@ -532,8 +534,8 @@ do
dd if=$config_val count=1 of=/dev/null > /dev/null 2>&1 || {
perror_exit "Bad raw disk $config_val"
}
- _praw=$(kdump_get_persistent_dev $config_val "raw")
- if [ $? -ne 0 ]; then
+ _praw=$(persistent_policy="by-id" get_persistent_dev $config_val)
+ if [ -z "$_praw" ]; then
exit 1
fi
add_dracut_arg "--device" "$_praw"
7 years, 5 months
[PATCH v2 0/3] use trace to debug large kernel module memory consumption
by Xunlei Pang
The current method for kdump memory debug is to use dracut "rd.memdebug=[0-3]",
it is not enough for debugging kernel modules. For example, when we want to find
out which kernel module consumes a large amount of memory, "rd.memdebug" won't
help too much.
A better way is needed to achieve this requirement, this is very useful for kdump
OOM debugging.
The principle of this patch series is to use kernel trace to track slab and buddy
allocation calls during kernel module loading(module_init), thus we can analyze
all the trace data and get the total memory consumption. As for large slab allocation,
it will probably fall into buddy allocation, thus tracing "mm_page_alloc" alone should
be enough for the purpose.
The trace events include memory calls under "tracing/events/":
kmem/mm_page_alloc
We also inpect the following events to detect the module loading:
module/module_load
module/module_put
We can get the module name and task pid from "module_load" event which
also mark the beginning of the loading, and module_put called by the
same task pid implies the end of the loading. So the memory events
recorded in between by the same task pid are consumed by this module
during loading(i.e. modprobe or module_init()).
With these information, we can record the total memory(the larger, the more
precise) consumption involved by each kernel module loading.
One major flaw of this method is that the trace ring buffer consumes a lot
of memory. If it is too small, old records maybe be overwritten by subsequent
records. The trace ring buffer is set to be 5MB by default, but it can be
overridden by users via the standard kernel boot parameter "trace_buf_size".
Users should increase the crash kernel memory reservation as needed after
setting large trace ring buffer size, in case oom happens during debugging.
Usage:
1)Pass "rd.kodebug" to kdump kernel cmdline using "KDUMP_COMMANDLINE_APPEND" in /etc/sysconfig/kdump.
2)Pass the extra "trace_buf_size=nn[KMG]" to specify trace ring buffer size(per cpu) as needed.
As an example, it prints out something below on my kvm machine:
== debug_mem for kernel modules during loading begin ==
4 pages consumed by "dm_mod" [load finished]
0 pages consumed by "dm_log" [load finished]
0 pages consumed by "dm_region_hash" [load finished]
0 pages consumed by "dm_mirror" [load finished]
9 pages consumed by "sunrpc" [load finished]
24 pages consumed by "floppy" [load finished]
0 pages consumed by "libata" [load finished]
0 pages consumed by "i2c_core" [load finished]
27 pages consumed by "ata_piix" [load finished]
0 pages consumed by "drm" [load finished]
1 pages consumed by "ttm" [load finished]
0 pages consumed by "drm_kms_helper" [load finished]
1604 pages consumed by "qxl" [load finished]
0 pages consumed by "virtio" [load finished]
0 pages consumed by "virtio_ring" [load finished]
10 pages consumed by "virtio_pci" [load finished]
1 pages consumed by "pata_acpi" [load finished]
0 pages consumed by "ata_generic" [load finished]
0 pages consumed by "serio_raw" [load finished]
0 pages consumed by "crc32c_intel" [load finished]
0 pages consumed by "crct10dif_common" [load finished]
0 pages consumed by "crct10dif_pclmul" [load finished]
278 pages consumed by "virtio_net" [load finished]
198 pages consumed by "virtio_console" [load finished]
0 pages consumed by "cdrom" [load finished]
6 pages consumed by "sr_mod" [load finished]
162 pages consumed by "virtio_blk" [load finished]
1 pages consumed by "fscache" [load finished]
0 pages consumed by "lockd" [load finished]
17 pages consumed by "nfs" [load finished]
0 pages consumed by "libcrc32c" [load finished]
0 pages consumed by "dns_resolver" [load finished]
8 pages consumed by "xfs" [load finished]
0 pages consumed by "nfsv4" [load finished]
== debug_mem for kernel modules during loading end ==
We can clearly see that "qxl" loading consumed more than 6MB memory.
Xunlei Pang (3):
memdebug-ko: add dracut-memdebug-ko.sh to debug kernel module memory
consumption
module-setup: apply kernel module memory debug support
kexec-kdump-howto: add the debugging tip for rd.kodebug
dracut-kdump.sh | 9 ++++
dracut-memdebug-ko.sh | 117 +++++++++++++++++++++++++++++++++++++++++++++++++
dracut-module-setup.sh | 9 ++++
kdumpctl | 14 ++++++
kexec-kdump-howto.txt | 37 ++++++++++++++++
kexec-tools.spec | 2 +
6 files changed, 188 insertions(+)
create mode 100755 dracut-memdebug-ko.sh
--
1.8.3.1
7 years, 5 months
[RFC PATCH 0/2] Use trace to debug kernel module memory consumption
by Xunlei Pang
The current method for kdump memory debug is to use dracut "rd.memdebug=[0-3]",
it is not enough for debugging kernel modules. For example, when we want to find
out which kernel module consumes a large amount of memory, "rd.memdebug" won't
help too much.
A better way is needed to achieve this requirement, this is very useful for kdump
OOM debugging.
The principle of this patch series is to use kernel trace to track slab and buddy
allocation calls during kernel module loading(module_init), thus we can analyze
all the trace data and get the total memory consumption.
The trace events include memory calls under /sys/kernel/debug/tracing/events:
kmem/mm_page_alloc
kmem/mm_page_free
kmem/kmalloc
kmem/kmalloc_node
kmem/kmem_cache_alloc
kmem/kmem_cache_alloc_node
We also inpect the following events to detect the module loading
module/module_load
module/module_put
We can get the module name and task pid from "module_load" event which
also mark the beginning of the loading, and module_put called by the
same task pid implies the end of the loading. So the memory events
recorded in between by the same task pid are consumed by this module
during loading(i.e. modprobe or module_init()).
With these information, we can record approximately the total memory
consumption involved by each kernel module loading.
One major flaw of this method is that the trace ring buffer consumes a lot
of memory. If it is too small, old records maybe be overwritten by subsequent
records. The trace ring buffer is set to be 10MB by default, but it can be
overridden by users via the standard kernel boot parameter "trace_buf_size".
Users should increase the crash kernel memory reservation as needed after
setting large trace ring buffer size, in case oom happens during debugging.
Usage:
1)Pass "rd.memdebug" to kdump kernel cmdline using "KDUMP_COMMANDLINE_APPEND" in /etc/sysconfig/kdump.
2)Pass the extra "trace_buf_size=nn[KMG]" to specify trace ring buffer size(per cpu) as needed.
Xunlei Pang (2):
memdebug-ko: add dracut-memdebug-ko.sh to debug kernel module memory
consumption
module-setup: apply kernel module memory debug support
dracut-kdump.sh | 11 ++++
dracut-memdebug-ko.sh | 144 +++++++++++++++++++++++++++++++++++++++++++++++++
dracut-module-setup.sh | 12 +++++
kdumpctl | 14 +++++
kexec-tools.spec | 2 +
5 files changed, 183 insertions(+)
create mode 100755 dracut-memdebug-ko.sh
--
1.8.3.1
7 years, 5 months