[kernel/f14/master] Add sched-35-increment-cache_nice_tries-only-on-periodic-lb.patch

Sat Sep 25 12:30:52 UTC 2010

commit b49b116bb07cc234c6da4d93d6f66b26a8eb1554
Author: Chuck Ebbert <cebbert at redhat.com>
Date:   Sat Sep 25 08:29:41 2010 -0400

    Add sched-35-increment-cache_nice_tries-only-on-periodic-lb.patch

 kernel.spec                                        |    4 +
 ...ment-cache_nice_tries-only-on-periodic-lb.patch |   93 ++++++++++++++++++++
 2 files changed, 97 insertions(+), 0 deletions(-)
---

diff --git a/kernel.spec b/kernel.spec
index e8e6cc6..e61da1f 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -749,6 +749,7 @@ Patch12575: sched-15-update-rq-clock-for-nohz-balanced-cpus.patch
 Patch12580: sched-20-fix-rq-clock-synchronization-when-migrating-tasks.patch
 Patch12585: sched-25-move-sched_avg_update-to-update_cpu_load.patch
 Patch12590: sched-30-sched-fix-nohz-balance-kick.patch
+Patch12595: sched-35-increment-cache_nice_tries-only-on-periodic-lb.patch
 
 Patch13600: btusb-macbookpro-6-2.patch
 Patch13601: btusb-macbookpro-7-1.patch
@@ -1392,6 +1393,7 @@ ApplyPatch sched-15-update-rq-clock-for-nohz-balanced-cpus.patch
 ApplyPatch sched-20-fix-rq-clock-synchronization-when-migrating-tasks.patch
 ApplyPatch sched-25-move-sched_avg_update-to-update_cpu_load.patch
 ApplyPatch sched-30-sched-fix-nohz-balance-kick.patch
+ApplyPatch sched-35-increment-cache_nice_tries-only-on-periodic-lb.patch
 
 ApplyPatch btusb-macbookpro-7-1.patch
 ApplyPatch btusb-macbookpro-6-2.patch
@@ -1998,6 +2000,8 @@ fi
    sched-00-fix-user-time-incorrectly-accounted-as-system-time-on-32-bit.patch
 - Revert: "drm/nv50: initialize ramht_refs list for faked 0 channel"
   (our DRM update removes ramht_refs entirely.)
+- Add sched-35-increment-cache_nice_tries-only-on-periodic-lb.patch, another
+  fix for excessive scheduler load balancing.
 
 * Thu Sep 23 2010 Kyle McMartin <kyle at redhat.com> 2.6.35.5-32
 - Serialize mandocs/htmldocs build, since otherwise it will constantly
diff --git a/sched-35-increment-cache_nice_tries-only-on-periodic-lb.patch b/sched-35-increment-cache_nice_tries-only-on-periodic-lb.patch
new file mode 100644
index 0000000..5277dc7
--- /dev/null
+++ b/sched-35-increment-cache_nice_tries-only-on-periodic-lb.patch
@@ -0,0 +1,93 @@
+From: Venkatesh Pallipadi <venki at google.com>
+Date: Sat, 11 Sep 2010 01:19:17 +0000 (-0700)
+Subject: sched: Increment cache_nice_tries only on periodic lb
+X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Fmingo%2Flinux-2.6-x86.git;a=commitdiff_plain;h=58b26c4c025778c09c7a1438ff185080e11b7d0a
+
+sched: Increment cache_nice_tries only on periodic lb
+
+scheduler uses cache_nice_tries as an indicator to do cache_hot and
+active load balance, when normal load balance fails. Currently,
+this value is changed on any failed load balance attempt. That ends
+up being not so nice to workloads that enter/exit idle often, as
+they do more frequent new_idle balance and that pretty soon results
+in cache hot tasks being pulled in.
+
+Making the cache_nice_tries ignore failed new_idle balance seems to
+make better sense. With that only the failed load balance in
+periodic load balance gets accounted and the rate of accumulation
+of cache_nice_tries will not depend on idle entry/exit (short
+running sleep-wakeup kind of tasks). This reduces movement of
+cache_hot tasks.
+
+schedstat diff (after-before) excerpt from a workload that has
+frequent and short wakeup-idle pattern (:2 in cpu col below refers
+to NEWIDLE idx) This snapshot was across ~400 seconds.
+
+Without this change:
+domainstats:  domain0
+ cpu     cnt      bln      fld      imb     gain    hgain  nobusyq  nobusyg
+ 0:2  306487   219575    73167  110069413    44583    19070     1172   218403
+ 1:2  292139   194853    81421  120893383    50745    21902     1259   193594
+ 2:2  283166   174607    91359  129699642    54931    23688     1287   173320
+ 3:2  273998   161788    93991  132757146    57122    24351     1366   160422
+ 4:2  289851   215692    62190  83398383    36377    13680      851   214841
+ 5:2  316312   222146    77605  117582154    49948    20281      988   221158
+ 6:2  297172   195596    83623  122133390    52801    21301      929   194667
+ 7:2  283391   178078    86378  126622761    55122    22239      928   177150
+ 8:2  297655   210359    72995  110246694    45798    19777     1125   209234
+ 9:2  297357   202011    79363  119753474    50953    22088     1089   200922
+10:2  278797   178703    83180  122514385    52969    22726     1128   177575
+11:2  272661   167669    86978  127342327    55857    24342     1195   166474
+12:2  293039   204031    73211  110282059    47285    19651      948   203083
+13:2  289502   196762    76803  114712942    49339    20547     1016   195746
+14:2  264446   169609    78292  115715605    50459    21017      982   168627
+15:2  260968   163660    80142  116811793    51483    21281     1064   162596
+
+With this change:
+domainstats:  domain0
+ cpu     cnt      bln      fld      imb     gain    hgain  nobusyq  nobusyg
+ 0:2  272347   187380    77455  105420270    24975        1      953   186427
+ 1:2  267276   172360    86234  116242264    28087        6     1028   171332
+ 2:2  259769   156777    93281  123243134    30555        1     1043   155734
+ 3:2  250870   143129    97627  127370868    32026        6     1188   141941
+ 4:2  248422   177116    64096  78261112    22202        2      757   176359
+ 5:2  275595   180683    84950  116075022    29400        6      778   179905
+ 6:2  262418   162609    88944  119256898    31056        4      817   161792
+ 7:2  252204   147946    92646  122388300    32879        4      824   147122
+ 8:2  262335   172239    81631  110477214    26599        4      864   171375
+ 9:2  261563   164775    88016  117203621    28331        3      849   163926
+10:2  243389   140949    93379  121353071    29585        2      909   140040
+11:2  242795   134651    98310  124768957    30895        2     1016   133635
+12:2  255234   166622    79843  104696912    26483        4      746   165876
+13:2  244944   151595    83855  109808099    27787        3      801   150794
+14:2  241301   140982    89935  116954383    30403        6      845   140137
+15:2  232271   128564    92821  119185207    31207        4     1416   127148
+
+Signed-off-by: Venkatesh Pallipadi <venki at google.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra at chello.nl>
+LKML-Reference: <1284167957-3675-1-git-send-email-venki at google.com>
+Signed-off-by: Ingo Molnar <mingo at elte.hu>
+---
+
+[ 2.6.35.x backport ]
+
+diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
+index a171138..aa16cf1 100644
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -3031,7 +3031,14 @@ redo:
+ 
+ 	if (!ld_moved) {
+ 		schedstat_inc(sd, lb_failed[idle]);
+-		sd->nr_balance_failed++;
++		/*
++		 * Increment the failure counter only on periodic balance.
++		 * We do not want newidle balance, which can be very
++		 * frequent, pollute the failure counter causing
++		 * excessive cache_hot migrations and active balances.
++		 */
++		if (idle != CPU_NEWLY_IDLE)
++			sd->nr_balance_failed++;
+ 
+ 		if (need_active_balance(sd, sd_idle, idle)) {
+ 			raw_spin_lock_irqsave(&busiest->lock, flags);