Check the number of cpus for x86_64 kdump kernel to boot with.
We met an issue for x86_64: kdump runs out of vectors with the
default "nr_cpus=1", when requesting tons of irqs.
This patch detects such situation and warns users about the risk.
Signed-off-by: Xunlei Pang <xlpang(a)redhat.com>
---
v1->v2:
- When detecting risky cpu vectors, we just warn users instead of
modifying "nr_cpus=X" forcely.
- Improved code comments.
- Replaced nr_old with nr_origin, and improved some logic.
v2->v3:
- Improved the code according to Dave's suggestions.
v3->v4:
- Only handle nr_cpus=1
kdumpctl | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 74 insertions(+)
diff --git a/kdumpctl b/kdumpctl
index b2068cc..4411ec5 100755
--- a/kdumpctl
+++ b/kdumpctl
@@ -105,6 +105,78 @@ append_cmdline()
echo $cmdline
}
+# Check the number of cpus for kdump kernel to boot with.
+# We met an issue for x86_64: kdump runs out of vectors with
+# "nr_cpus=1" when requesting tons of irqs, so here we check
+# "nr_cpus=1" and warn users if kdump probably can't work.
+check_kdump_cpus()
+{
+ local nr_origin nr_min nr_max
+ local arch=$(uname -m) cmdline=$KDUMP_COMMANDLINE_APPEND
+
+ # Special treatment for x86_64 only currently.
+ if [ $arch != "x86_64" ]; then
+ return
+ fi
+
+ # We only care about the default "nr_cpus=1".
+ echo $cmdline | grep -E -q "nr_cpus=1[[:space:]]*|nr_cpus=1$"
+ if [ $? -ne 0 ]; then
+ return
+ fi
+
+ nr_origin=1
+
+ # Online cpus in first kernel.
+ nr_max=$(grep -c '^processor' /proc/cpuinfo)
+
+ # To calculate the estimated minimal cpus required by device interrupts.
+ nr_min=$(ls /proc/irq/ -l | grep ^d | wc -l)
+
+ # The total number of vectors percpu is 256 defined by x86 architecture.
+ # The available vectors can be allocated to io devices percpu starts
+ # from FIRST_EXTERNAL_VECTOR(see kernel code), and some high-numbered
+ # ones are consumed by some system interrupts. As a result, the vectors
+ # for io device are within [FIRST_EXTERNAL_VECTOR, FIRST_SYSTEM_VECTOR),
+ # with one known exception, 0x80 within the range is reserved specially
+ # as the syscall vector.
+ #
+ # FIRST_EXTERNAL_VECTOR is invariably 32, while FIRST_SYSTEM_VECTOR can
+ # vary between different kernel versions. E.g. FIRST_SYSTEM_VECTOR gets
+ # 0xef(with CONFIG_X86_LOCAL_APIC on)for linux-4.10, that is 17 vectors
+ # reserved, considering it may increase in the future and the special
+ # vectors, we use a flexible variance and assume there are 32 reserved
+ # from FIRST_EXTERNAL_VECTOR. Then the max vectors for device interrupts
+ # percpu is: (256-32)-32=192.
+ #
+ # For "nr_cpus=1", irq and vector have the 1:1 mapping.
+ nr_min=$(($nr_min + 192 - 1))
+ nr_min=$(($nr_min / 192))
+ if [ $nr_min -gt 1 ]; then
+ # The system seems to have tons of interrupts. while interrupts with
+ # multiple-cpu affinity can consume multiple vectors(i.e. 1:M mapping),
+ # with one vector for each cpu within the affinity mask. Fortunately
+ # for x2apic which is widely used on large modern machines, in default
+ # case of boot, device bringup etc will use a single cpu for interrupt
+ # affinity to minimize vector pressure.
+ #
+ # For further safety, we add one more cpu and round it up to an even
+ # number which is commonly-used.
+ nr_min=$(($nr_min + 1))
+ nr_min=$(($nr_min + $nr_min % 2))
+ fi
+
+ if [ $nr_min -gt $nr_max ]; then
+ nr_min=$nr_max
+ fi
+
+ if [ $nr_origin -ge $nr_min ]; then
+ return
+ fi
+
+ echo "Warning: nr_cpus=$nr_origin may not be enough for kdump boot, try
nr_cpus=$nr_min or larger instead"
+}
+
# This function performs a series of edits on the command line.
# Store the final result in global $KDUMP_COMMANDLINE.
prepare_cmdline()
@@ -134,6 +206,8 @@ prepare_cmdline()
fi
KDUMP_COMMANDLINE=$cmdline
+
+ check_kdump_cpus
}
--
1.8.3.1