[kernel/f15] Add backport for P4 watchdog and perf support from Don Zickus (rhbz 713675)

Josh Boyer jwboyer at fedoraproject.org
Thu Oct 20 13:00:37 UTC 2011


commit 73fbe46367cf6b567a26be06c1cd719e8d49c779
Author: Josh Boyer <jwboyer at redhat.com>
Date:   Thu Oct 20 08:59:15 2011 -0400

    Add backport for P4 watchdog and perf support from Don Zickus (rhbz 713675)

 kernel.spec                                       |    7 +-
 x86-p4-make-watchdog-and-perf-work-together.patch |  267 +++++++++++++++++++++
 2 files changed, 273 insertions(+), 1 deletions(-)
---
diff --git a/kernel.spec b/kernel.spec
index 86dca2a..5216e0e 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -42,7 +42,7 @@ Summary: The Linux kernel
 # When changing real_sublevel below, reset this by hand to 1
 # (or to 0 and then use rpmdev-bumpspec).
 #
-%global baserelease 1
+%global baserelease 2
 %global fedora_build %{baserelease}
 
 # real_sublevel is the 3.x kernel version we're starting with
@@ -652,6 +652,7 @@ Patch12024: usb-add-quirk-for-logitech-webcams.patch
 Patch12025: crypto-register-cryptd-first.patch
 Patch12026: cputimer-Cure-lock-inversion.patch
 Patch12027: x86-efi-Calling-__pa-with-an-ioremap-address-is-invalid.patch
+Patch12028: x86-p4-make-watchdog-and-perf-work-together.patch
 
 # Runtime power management
 Patch12203: linux-2.6-usb-pci-autosuspend.patch
@@ -1246,6 +1247,7 @@ ApplyPatch usb-add-quirk-for-logitech-webcams.patch
 ApplyPatch crypto-register-cryptd-first.patch
 ApplyPatch cputimer-Cure-lock-inversion.patch
 ApplyPatch x86-efi-Calling-__pa-with-an-ioremap-address-is-invalid.patch
+ApplyPatch x86-p4-make-watchdog-and-perf-work-together.patch
 
 # rhbz#605888
 ApplyPatch dmar-disable-when-ricoh-multifunction.patch
@@ -1914,6 +1916,9 @@ fi
 # and build.
 
 %changelog
+* Thu Oct 20 2011 Josh Boyer <jwboyer at redhat.com>
+- Add backport for P4 watchdog and perf support from Don Zickus (rhbz 713675)
+
 * Wed Oct 19 2011 Dave Jones <davej at redhat.com>
 - Add Sony VGN-FW21E to nonvs blacklist. (rhbz 641789)
 
diff --git a/x86-p4-make-watchdog-and-perf-work-together.patch b/x86-p4-make-watchdog-and-perf-work-together.patch
new file mode 100644
index 0000000..9ef049b
--- /dev/null
+++ b/x86-p4-make-watchdog-and-perf-work-together.patch
@@ -0,0 +1,267 @@
+BZ https://bugzilla.redhat.com/show_bug.cgi?id=713675
+
+Let nmi watchdog and perf work together on a P4.  Combination of the following 3.1
+upstream commits (the second commit reverts the first one).
+
+commit 1880c4ae182afb5650c5678949ecfe7ff66a724e
+Author: Cyrill Gorcunov <gorcunov at gmail.com>
+Date:   Thu Jun 23 16:49:18 2011 +0400
+
+    perf, x86: Add hw_watchdog_set_attr() in a sake of nmi-watchdog on P4
+    
+    Due to restriction and specifics of Netburst PMU we need a separated
+    event for NMI watchdog. In particular every Netburst event
+    consumes not just a counter and a config register, but also an
+    additional ESCR register.
+    
+    Since ESCR registers are grouped upon counters (i.e. if ESCR is occupied
+    for some event there is no room for another event to enter until its
+    released) we need to pick up the "least" used ESCR (or the most available
+    one) for nmi-watchdog purposes -- so MSR_P4_CRU_ESCR2/3 was chosen.
+    
+    With this patch nmi-watchdog and perf top should be able to run simultaneously.
+    
+    Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
+    CC: Lin Ming <ming.m.lin at intel.com>
+    CC: Arnaldo Carvalho de Melo <acme at redhat.com>
+    CC: Frederic Weisbecker <fweisbec at gmail.com>
+    Tested-and-reviewed-by: Don Zickus <dzickus at redhat.com>
+    Tested-and-reviewed-by: Stephane Eranian <eranian at google.com>
+    Signed-off-by: Peter Zijlstra <a.p.zijlstra at chello.nl>
+    Link: http://lkml.kernel.org/r/20110623124918.GC13050@sun
+    Signed-off-by: Ingo Molnar <mingo at elte.hu>
+
+commit f91298709790b9a483752ca3c967845537df2af3
+Author: Cyrill Gorcunov <gorcunov at openvz.org>
+Date:   Sat Jul 9 00:17:12 2011 +0400
+
+    perf, x86: P4 PMU - Introduce event alias feature
+    
+    Instead of hw_nmi_watchdog_set_attr() weak function
+    and appropriate x86_pmu::hw_watchdog_set_attr() call
+    we introduce even alias mechanism which allow us
+    to drop this routines completely and isolate quirks
+    of Netburst architecture inside P4 PMU code only.
+    
+    The main idea remains the same though -- to allow
+    nmi-watchdog and perf top run simultaneously.
+    
+    Note the aliasing mechanism applies to generic
+    PERF_COUNT_HW_CPU_CYCLES event only because arbitrary
+    event (say passed as RAW initially) might have some
+    additional bits set inside ESCR register changing
+    the behaviour of event and we can't guarantee anymore
+    that alias event will give the same result.
+    
+    P.S. Thanks a huge to Don and Steven for for testing
+         and early review.
+    
+    Acked-by: Don Zickus <dzickus at redhat.com>
+    Tested-by: Steven Rostedt <rostedt at goodmis.org>
+    Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
+    CC: Ingo Molnar <mingo at elte.hu>
+    CC: Peter Zijlstra <a.p.zijlstra at chello.nl>
+    CC: Stephane Eranian <eranian at google.com>
+    CC: Lin Ming <ming.m.lin at intel.com>
+    CC: Arnaldo Carvalho de Melo <acme at redhat.com>
+    CC: Frederic Weisbecker <fweisbec at gmail.com>
+    Link: http://lkml.kernel.org/r/20110708201712.GS23657@sun
+    Signed-off-by: Steven Rostedt <rostedt at goodmis.org>
+
+
+diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
+index 56fd9e3..4d86c86 100644
+--- a/arch/x86/include/asm/perf_event_p4.h
++++ b/arch/x86/include/asm/perf_event_p4.h
+@@ -102,6 +102,14 @@
+ #define P4_CONFIG_HT			(1ULL << P4_CONFIG_HT_SHIFT)
+ 
+ /*
++ * If an event has alias it should be marked
++ * with a special bit. (Don't forget to check
++ * P4_PEBS_CONFIG_MASK and related bits on
++ * modification.)
++ */
++#define P4_CONFIG_ALIASABLE		(1 << 9)
++
++/*
+  * The bits we allow to pass for RAW events
+  */
+ #define P4_CONFIG_MASK_ESCR		\
+@@ -123,6 +131,31 @@
+ 	(p4_config_pack_escr(P4_CONFIG_MASK_ESCR))	| \
+ 	(p4_config_pack_cccr(P4_CONFIG_MASK_CCCR))
+ 
++/*
++ * In case of event aliasing we need to preserve some
++ * caller bits otherwise the mapping won't be complete.
++ */
++#define P4_CONFIG_EVENT_ALIAS_MASK			  \
++	(p4_config_pack_escr(P4_CONFIG_MASK_ESCR)	| \
++	 p4_config_pack_cccr(P4_CCCR_EDGE		| \
++			     P4_CCCR_THRESHOLD_MASK	| \
++			     P4_CCCR_COMPLEMENT		| \
++			     P4_CCCR_COMPARE))
++
++#define  P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS		  \
++	((P4_CONFIG_HT)					| \
++	 p4_config_pack_escr(P4_ESCR_T0_OS		| \
++			     P4_ESCR_T0_USR		| \
++			     P4_ESCR_T1_OS		| \
++			     P4_ESCR_T1_USR)		| \
++	 p4_config_pack_cccr(P4_CCCR_OVF		| \
++			     P4_CCCR_CASCADE		| \
++			     P4_CCCR_FORCE_OVF		| \
++			     P4_CCCR_THREAD_ANY		| \
++			     P4_CCCR_OVF_PMI_T0		| \
++			     P4_CCCR_OVF_PMI_T1		| \
++			     P4_CONFIG_ALIASABLE))
++
+ static inline bool p4_is_event_cascaded(u64 config)
+ {
+ 	u32 cccr = p4_config_unpack_cccr(config);
+diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
+index ead584f..0c4071a 100644
+--- a/arch/x86/kernel/cpu/perf_event_p4.c
++++ b/arch/x86/kernel/cpu/perf_event_p4.c
+@@ -556,11 +556,92 @@ static __initconst const u64 p4_hw_cache_event_ids
+  },
+ };
+ 
++/*
++ * Because of Netburst being quite restricted in now
++ * many same events can run simultaneously, we use
++ * event aliases, ie different events which have the
++ * same functionallity but use non-intersected resources
++ * (ESCR/CCCR/couter registers). This allow us to run
++ * two or more semi-same events together. It is done
++ * transparently to a user space.
++ *
++ * Never set any cusom internal bits such as P4_CONFIG_HT,
++ * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are
++ * either up-to-dated automatically either not appliable
++ * at all.
++ *
++ * And be really carefull choosing aliases!
++ */
++struct p4_event_alias {
++	u64 orig;
++	u64 alter;
++} p4_event_aliases[] = {
++	{
++		/*
++		 * Non-halted cycles can be substituted with
++		 * non-sleeping cycles (see Intel SDM Vol3b for
++		 * details).
++		 */
++	.orig	=
++		p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS)		|
++				    P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
++	.alter	=
++		p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT)		|
++				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)|
++				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)|
++				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)|
++				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)|
++				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0)	|
++				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1)	|
++				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2)	|
++				    P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))|
++		p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT		|
++				    P4_CCCR_COMPARE),
++	},
++};
++
++static u64 p4_get_alias_event(u64 config)
++{
++	u64 config_match;
++	int i;
++
++	/*
++	 * Probably we're lucky and don't have to do
++	 * matching over all config bits.
++	 */
++	if (!(config & P4_CONFIG_ALIASABLE))
++		return 0;
++
++	config_match = config & P4_CONFIG_EVENT_ALIAS_MASK;
++
++	/*
++	 * If an event was previously swapped to the alter config
++	 * we should swap it back otherwise contnention on registers
++	 * will return back.
++	 */
++	for (i = 0; i < ARRAY_SIZE(p4_event_aliases); i++) {
++		if (config_match == p4_event_aliases[i].orig) {
++			config_match = p4_event_aliases[i].alter;
++			break;
++		} else if (config_match == p4_event_aliases[i].alter) {
++			config_match = p4_event_aliases[i].orig;
++			break;
++		}
++	}
++
++	if (i >= ARRAY_SIZE(p4_event_aliases))
++		return 0;
++
++	return config_match |
++		(config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS);
++}
++
+ static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
+   /* non-halted CPU clocks */
+   [PERF_COUNT_HW_CPU_CYCLES] =
+ 	p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS)		|
+-		P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
++		P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING))	|
++		P4_CONFIG_ALIASABLE,
+ 
+   /*
+    * retired instructions
+@@ -1120,6 +1201,8 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
+ 	struct p4_event_bind *bind;
+ 	unsigned int i, thread, num;
+ 	int cntr_idx, escr_idx;
++	u64 config_alias;
++	int pass;
+ 
+ 	bitmap_zero(used_mask, X86_PMC_IDX_MAX);
+ 	bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE);
+@@ -1128,6 +1211,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
+ 
+ 		hwc = &cpuc->event_list[i]->hw;
+ 		thread = p4_ht_thread(cpu);
++		pass = 0;
++
++again:
++		/*
++		 * Aliases are swappable so we may hit circular
++		 * lock if both original config and alias need
++		 * resources (MSR registers) which already busy.
++		 */
++		if (pass > 2)
++			goto done;
++
+ 		bind = p4_config_get_bind(hwc->config);
+ 		escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
+ 		if (unlikely(escr_idx == -1))
+@@ -1141,8 +1235,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
+ 		}
+ 
+ 		cntr_idx = p4_next_cntr(thread, used_mask, bind);
+-		if (cntr_idx == -1 || test_bit(escr_idx, escr_mask))
+-			goto done;
++		if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) {
++			/*
++			 * Probably an event alias is still available.
++			 */
++			config_alias = p4_get_alias_event(hwc->config);
++			if (!config_alias)
++				goto done;
++			hwc->config = config_alias;
++			pass++;
++			goto again;
++		}
+ 
+ 		p4_pmu_swap_config_ts(hwc, cpu);
+ 		if (assign)


More information about the scm-commits mailing list