diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-20 13:31:44 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-20 13:31:44 -0400 |
commit | 2ba68940c893c8f0bfc8573c041254251bb6aeab (patch) | |
tree | fa83ebb01d32abd98123fa28f9f6f0b3eaeee25d | |
parent | 9c2b957db1772ebf942ae7a9346b14eba6c8ca66 (diff) | |
parent | 600e145882802d6ccbfe2c4aea243d97caeb91a9 (diff) |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler changes for v3.4 from Ingo Molnar
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (27 commits)
printk: Make it compile with !CONFIG_PRINTK
sched/x86: Fix overflow in cyc2ns_offset
sched: Fix nohz load accounting -- again!
sched: Update yield() docs
printk/sched: Introduce special printk_sched() for those awkward moments
sched/nohz: Correctly initialize 'next_balance' in 'nohz' idle balancer
sched: Cleanup cpu_active madness
sched: Fix load-balance wreckage
sched: Clean up parameter passing of proc_sched_autogroup_set_nice()
sched: Ditch per cgroup task lists for load-balancing
sched: Rename load-balancing fields
sched: Move load-balancing arguments into helper struct
sched/rt: Do not submit new work when PI-blocked
sched/rt: Prevent idle task boosting
sched/wait: Add __wake_up_all_locked() API
sched/rt: Document scheduler related skip-resched-check sites
sched/rt: Use schedule_preempt_disabled()
sched/rt: Add schedule_preempt_disabled()
sched/rt: Do not throttle when PI boosting
sched/rt: Keep period timer ticking when rt throttling is active
...
52 files changed, 462 insertions, 462 deletions
diff --git a/Documentation/scheduler/sched-stats.txt b/Documentation/scheduler/sched-stats.txt index 1cd5d51bc761..8259b34a66ae 100644 --- a/Documentation/scheduler/sched-stats.txt +++ b/Documentation/scheduler/sched-stats.txt | |||
@@ -38,7 +38,8 @@ First field is a sched_yield() statistic: | |||
38 | 1) # of times sched_yield() was called | 38 | 1) # of times sched_yield() was called |
39 | 39 | ||
40 | Next three are schedule() statistics: | 40 | Next three are schedule() statistics: |
41 | 2) # of times we switched to the expired queue and reused it | 41 | 2) This field is a legacy array expiration count field used in the O(1) |
42 | scheduler. We kept it for ABI compatibility, but it is always set to zero. | ||
42 | 3) # of times schedule() was called | 43 | 3) # of times schedule() was called |
43 | 4) # of times schedule() left the processor idle | 44 | 4) # of times schedule() left the processor idle |
44 | 45 | ||
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 971d65c253a9..c2ae3cd331fe 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c | |||
@@ -239,9 +239,7 @@ void cpu_idle(void) | |||
239 | leds_event(led_idle_end); | 239 | leds_event(led_idle_end); |
240 | rcu_idle_exit(); | 240 | rcu_idle_exit(); |
241 | tick_nohz_idle_exit(); | 241 | tick_nohz_idle_exit(); |
242 | preempt_enable_no_resched(); | 242 | schedule_preempt_disabled(); |
243 | schedule(); | ||
244 | preempt_disable(); | ||
245 | } | 243 | } |
246 | } | 244 | } |
247 | 245 | ||
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index cdeb727527d3..d616ed51e7a7 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c | |||
@@ -295,13 +295,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void) | |||
295 | */ | 295 | */ |
296 | percpu_timer_setup(); | 296 | percpu_timer_setup(); |
297 | 297 | ||
298 | while (!cpu_active(cpu)) | ||
299 | cpu_relax(); | ||
300 | |||
301 | /* | ||
302 | * cpu_active bit is set, so it's safe to enalbe interrupts | ||
303 | * now. | ||
304 | */ | ||
305 | local_irq_enable(); | 298 | local_irq_enable(); |
306 | local_fiq_enable(); | 299 | local_fiq_enable(); |
307 | 300 | ||
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index ea3395750324..92c5af98a6f7 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c | |||
@@ -40,9 +40,7 @@ void cpu_idle(void) | |||
40 | cpu_idle_sleep(); | 40 | cpu_idle_sleep(); |
41 | rcu_idle_exit(); | 41 | rcu_idle_exit(); |
42 | tick_nohz_idle_exit(); | 42 | tick_nohz_idle_exit(); |
43 | preempt_enable_no_resched(); | 43 | schedule_preempt_disabled(); |
44 | schedule(); | ||
45 | preempt_disable(); | ||
46 | } | 44 | } |
47 | } | 45 | } |
48 | 46 | ||
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index 8dd0416673cb..a80a643f3691 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c | |||
@@ -94,9 +94,7 @@ void cpu_idle(void) | |||
94 | idle(); | 94 | idle(); |
95 | rcu_idle_exit(); | 95 | rcu_idle_exit(); |
96 | tick_nohz_idle_exit(); | 96 | tick_nohz_idle_exit(); |
97 | preempt_enable_no_resched(); | 97 | schedule_preempt_disabled(); |
98 | schedule(); | ||
99 | preempt_disable(); | ||
100 | } | 98 | } |
101 | } | 99 | } |
102 | 100 | ||
diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c index aa585e4e979e..d8f50ff6fadd 100644 --- a/arch/cris/kernel/process.c +++ b/arch/cris/kernel/process.c | |||
@@ -115,9 +115,7 @@ void cpu_idle (void) | |||
115 | idle = default_idle; | 115 | idle = default_idle; |
116 | idle(); | 116 | idle(); |
117 | } | 117 | } |
118 | preempt_enable_no_resched(); | 118 | schedule_preempt_disabled(); |
119 | schedule(); | ||
120 | preempt_disable(); | ||
121 | } | 119 | } |
122 | } | 120 | } |
123 | 121 | ||
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index 3901df1213c0..29cc49783787 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c | |||
@@ -92,9 +92,7 @@ void cpu_idle(void) | |||
92 | idle(); | 92 | idle(); |
93 | } | 93 | } |
94 | 94 | ||
95 | preempt_enable_no_resched(); | 95 | schedule_preempt_disabled(); |
96 | schedule(); | ||
97 | preempt_disable(); | ||
98 | } | 96 | } |
99 | } | 97 | } |
100 | 98 | ||
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index 933bd388efb2..1a173b35f475 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c | |||
@@ -81,9 +81,7 @@ void cpu_idle(void) | |||
81 | while (1) { | 81 | while (1) { |
82 | while (!need_resched()) | 82 | while (!need_resched()) |
83 | idle(); | 83 | idle(); |
84 | preempt_enable_no_resched(); | 84 | schedule_preempt_disabled(); |
85 | schedule(); | ||
86 | preempt_disable(); | ||
87 | } | 85 | } |
88 | } | 86 | } |
89 | 87 | ||
diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c index c871a2cffaef..0123c63e9a3a 100644 --- a/arch/hexagon/kernel/smp.c +++ b/arch/hexagon/kernel/smp.c | |||
@@ -179,8 +179,6 @@ void __cpuinit start_secondary(void) | |||
179 | printk(KERN_INFO "%s cpu %d\n", __func__, current_thread_info()->cpu); | 179 | printk(KERN_INFO "%s cpu %d\n", __func__, current_thread_info()->cpu); |
180 | 180 | ||
181 | set_cpu_online(cpu, true); | 181 | set_cpu_online(cpu, true); |
182 | while (!cpumask_test_cpu(cpu, cpu_active_mask)) | ||
183 | cpu_relax(); | ||
184 | local_irq_enable(); | 182 | local_irq_enable(); |
185 | 183 | ||
186 | cpu_idle(); | 184 | cpu_idle(); |
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 6d33c5cc94f0..9dc52b63fc87 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c | |||
@@ -330,9 +330,7 @@ cpu_idle (void) | |||
330 | normal_xtp(); | 330 | normal_xtp(); |
331 | #endif | 331 | #endif |
332 | } | 332 | } |
333 | preempt_enable_no_resched(); | 333 | schedule_preempt_disabled(); |
334 | schedule(); | ||
335 | preempt_disable(); | ||
336 | check_pgt_cache(); | 334 | check_pgt_cache(); |
337 | if (cpu_is_offline(cpu)) | 335 | if (cpu_is_offline(cpu)) |
338 | play_dead(); | 336 | play_dead(); |
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index 422bea9f1dbc..3a4a32b27208 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c | |||
@@ -90,9 +90,7 @@ void cpu_idle (void) | |||
90 | 90 | ||
91 | idle(); | 91 | idle(); |
92 | } | 92 | } |
93 | preempt_enable_no_resched(); | 93 | schedule_preempt_disabled(); |
94 | schedule(); | ||
95 | preempt_disable(); | ||
96 | } | 94 | } |
97 | } | 95 | } |
98 | 96 | ||
diff --git a/arch/m68k/kernel/process_mm.c b/arch/m68k/kernel/process_mm.c index 099283ee1a8f..fe4186b5fc32 100644 --- a/arch/m68k/kernel/process_mm.c +++ b/arch/m68k/kernel/process_mm.c | |||
@@ -78,9 +78,7 @@ void cpu_idle(void) | |||
78 | while (1) { | 78 | while (1) { |
79 | while (!need_resched()) | 79 | while (!need_resched()) |
80 | idle(); | 80 | idle(); |
81 | preempt_enable_no_resched(); | 81 | schedule_preempt_disabled(); |
82 | schedule(); | ||
83 | preempt_disable(); | ||
84 | } | 82 | } |
85 | } | 83 | } |
86 | 84 | ||
diff --git a/arch/m68k/kernel/process_no.c b/arch/m68k/kernel/process_no.c index 5e1078cabe0e..f7fe6c348595 100644 --- a/arch/m68k/kernel/process_no.c +++ b/arch/m68k/kernel/process_no.c | |||
@@ -73,9 +73,7 @@ void cpu_idle(void) | |||
73 | /* endless idle loop with no priority at all */ | 73 | /* endless idle loop with no priority at all */ |
74 | while (1) { | 74 | while (1) { |
75 | idle(); | 75 | idle(); |
76 | preempt_enable_no_resched(); | 76 | schedule_preempt_disabled(); |
77 | schedule(); | ||
78 | preempt_disable(); | ||
79 | } | 77 | } |
80 | } | 78 | } |
81 | 79 | ||
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index 7dcb5bfffb75..9155f7d92669 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c | |||
@@ -110,9 +110,7 @@ void cpu_idle(void) | |||
110 | rcu_idle_exit(); | 110 | rcu_idle_exit(); |
111 | tick_nohz_idle_exit(); | 111 | tick_nohz_idle_exit(); |
112 | 112 | ||
113 | preempt_enable_no_resched(); | 113 | schedule_preempt_disabled(); |
114 | schedule(); | ||
115 | preempt_disable(); | ||
116 | check_pgt_cache(); | 114 | check_pgt_cache(); |
117 | } | 115 | } |
118 | } | 116 | } |
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 7955409051c4..61f1cb45a1d5 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c | |||
@@ -80,9 +80,7 @@ void __noreturn cpu_idle(void) | |||
80 | #endif | 80 | #endif |
81 | rcu_idle_exit(); | 81 | rcu_idle_exit(); |
82 | tick_nohz_idle_exit(); | 82 | tick_nohz_idle_exit(); |
83 | preempt_enable_no_resched(); | 83 | schedule_preempt_disabled(); |
84 | schedule(); | ||
85 | preempt_disable(); | ||
86 | } | 84 | } |
87 | } | 85 | } |
88 | 86 | ||
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index 28eec3102535..cac401d37f75 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c | |||
@@ -123,9 +123,7 @@ void cpu_idle(void) | |||
123 | idle(); | 123 | idle(); |
124 | } | 124 | } |
125 | 125 | ||
126 | preempt_enable_no_resched(); | 126 | schedule_preempt_disabled(); |
127 | schedule(); | ||
128 | preempt_disable(); | ||
129 | } | 127 | } |
130 | } | 128 | } |
131 | 129 | ||
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 62c60b87d039..d4b94b395c16 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c | |||
@@ -71,9 +71,7 @@ void cpu_idle(void) | |||
71 | while (1) { | 71 | while (1) { |
72 | while (!need_resched()) | 72 | while (!need_resched()) |
73 | barrier(); | 73 | barrier(); |
74 | preempt_enable_no_resched(); | 74 | schedule_preempt_disabled(); |
75 | schedule(); | ||
76 | preempt_disable(); | ||
77 | check_pgt_cache(); | 75 | check_pgt_cache(); |
78 | } | 76 | } |
79 | } | 77 | } |
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 0a48bf5db6c8..c97fc60c790c 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c | |||
@@ -101,11 +101,11 @@ void cpu_idle(void) | |||
101 | ppc64_runlatch_on(); | 101 | ppc64_runlatch_on(); |
102 | rcu_idle_exit(); | 102 | rcu_idle_exit(); |
103 | tick_nohz_idle_exit(); | 103 | tick_nohz_idle_exit(); |
104 | preempt_enable_no_resched(); | 104 | if (cpu_should_die()) { |
105 | if (cpu_should_die()) | 105 | sched_preempt_enable_no_resched(); |
106 | cpu_die(); | 106 | cpu_die(); |
107 | schedule(); | 107 | } |
108 | preempt_disable(); | 108 | schedule_preempt_disabled(); |
109 | } | 109 | } |
110 | } | 110 | } |
111 | 111 | ||
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c index 8fc62586a973..a5fbf4cb6329 100644 --- a/arch/powerpc/platforms/iseries/setup.c +++ b/arch/powerpc/platforms/iseries/setup.c | |||
@@ -584,9 +584,7 @@ static void iseries_shared_idle(void) | |||
584 | if (hvlpevent_is_pending()) | 584 | if (hvlpevent_is_pending()) |
585 | process_iSeries_events(); | 585 | process_iSeries_events(); |
586 | 586 | ||
587 | preempt_enable_no_resched(); | 587 | schedule_preempt_disabled(); |
588 | schedule(); | ||
589 | preempt_disable(); | ||
590 | } | 588 | } |
591 | } | 589 | } |
592 | 590 | ||
@@ -615,9 +613,7 @@ static void iseries_dedicated_idle(void) | |||
615 | ppc64_runlatch_on(); | 613 | ppc64_runlatch_on(); |
616 | rcu_idle_exit(); | 614 | rcu_idle_exit(); |
617 | tick_nohz_idle_exit(); | 615 | tick_nohz_idle_exit(); |
618 | preempt_enable_no_resched(); | 616 | schedule_preempt_disabled(); |
619 | schedule(); | ||
620 | preempt_disable(); | ||
621 | } | 617 | } |
622 | } | 618 | } |
623 | 619 | ||
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index e795933eb2cb..7618085b4164 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c | |||
@@ -97,9 +97,7 @@ void cpu_idle(void) | |||
97 | tick_nohz_idle_exit(); | 97 | tick_nohz_idle_exit(); |
98 | if (test_thread_flag(TIF_MCCK_PENDING)) | 98 | if (test_thread_flag(TIF_MCCK_PENDING)) |
99 | s390_handle_mcck(); | 99 | s390_handle_mcck(); |
100 | preempt_enable_no_resched(); | 100 | schedule_preempt_disabled(); |
101 | schedule(); | ||
102 | preempt_disable(); | ||
103 | } | 101 | } |
104 | } | 102 | } |
105 | 103 | ||
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 2398ce6b15ae..b0e28c47ab83 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c | |||
@@ -550,12 +550,6 @@ int __cpuinit start_secondary(void *cpuvoid) | |||
550 | S390_lowcore.restart_psw.addr = | 550 | S390_lowcore.restart_psw.addr = |
551 | PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; | 551 | PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; |
552 | __ctl_set_bit(0, 28); /* Enable lowcore protection */ | 552 | __ctl_set_bit(0, 28); /* Enable lowcore protection */ |
553 | /* | ||
554 | * Wait until the cpu which brought this one up marked it | ||
555 | * active before enabling interrupts. | ||
556 | */ | ||
557 | while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask)) | ||
558 | cpu_relax(); | ||
559 | local_irq_enable(); | 553 | local_irq_enable(); |
560 | /* cpu_idle will call schedule for us */ | 554 | /* cpu_idle will call schedule for us */ |
561 | cpu_idle(); | 555 | cpu_idle(); |
diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c index 25d08030a883..2707023c7563 100644 --- a/arch/score/kernel/process.c +++ b/arch/score/kernel/process.c | |||
@@ -53,9 +53,7 @@ void __noreturn cpu_idle(void) | |||
53 | while (!need_resched()) | 53 | while (!need_resched()) |
54 | barrier(); | 54 | barrier(); |
55 | 55 | ||
56 | preempt_enable_no_resched(); | 56 | schedule_preempt_disabled(); |
57 | schedule(); | ||
58 | preempt_disable(); | ||
59 | } | 57 | } |
60 | } | 58 | } |
61 | 59 | ||
diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index 406508d4ce74..7e4892826563 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c | |||
@@ -114,9 +114,7 @@ void cpu_idle(void) | |||
114 | 114 | ||
115 | rcu_idle_exit(); | 115 | rcu_idle_exit(); |
116 | tick_nohz_idle_exit(); | 116 | tick_nohz_idle_exit(); |
117 | preempt_enable_no_resched(); | 117 | schedule_preempt_disabled(); |
118 | schedule(); | ||
119 | preempt_disable(); | ||
120 | } | 118 | } |
121 | } | 119 | } |
122 | 120 | ||
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index f793742eec2b..935fdbcd88c2 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c | |||
@@ -113,9 +113,7 @@ void cpu_idle(void) | |||
113 | while (!need_resched()) | 113 | while (!need_resched()) |
114 | cpu_relax(); | 114 | cpu_relax(); |
115 | } | 115 | } |
116 | preempt_enable_no_resched(); | 116 | schedule_preempt_disabled(); |
117 | schedule(); | ||
118 | preempt_disable(); | ||
119 | check_pgt_cache(); | 117 | check_pgt_cache(); |
120 | } | 118 | } |
121 | } | 119 | } |
@@ -138,9 +136,7 @@ void cpu_idle(void) | |||
138 | while (!need_resched()) | 136 | while (!need_resched()) |
139 | cpu_relax(); | 137 | cpu_relax(); |
140 | } | 138 | } |
141 | preempt_enable_no_resched(); | 139 | schedule_preempt_disabled(); |
142 | schedule(); | ||
143 | preempt_disable(); | ||
144 | check_pgt_cache(); | 140 | check_pgt_cache(); |
145 | } | 141 | } |
146 | } | 142 | } |
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 39d8b05201a2..06b5b5fc20c7 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c | |||
@@ -104,15 +104,13 @@ void cpu_idle(void) | |||
104 | rcu_idle_exit(); | 104 | rcu_idle_exit(); |
105 | tick_nohz_idle_exit(); | 105 | tick_nohz_idle_exit(); |
106 | 106 | ||
107 | preempt_enable_no_resched(); | ||
108 | |||
109 | #ifdef CONFIG_HOTPLUG_CPU | 107 | #ifdef CONFIG_HOTPLUG_CPU |
110 | if (cpu_is_offline(cpu)) | 108 | if (cpu_is_offline(cpu)) { |
109 | sched_preempt_enable_no_resched(); | ||
111 | cpu_play_dead(); | 110 | cpu_play_dead(); |
111 | } | ||
112 | #endif | 112 | #endif |
113 | 113 | schedule_preempt_disabled(); | |
114 | schedule(); | ||
115 | preempt_disable(); | ||
116 | } | 114 | } |
117 | } | 115 | } |
118 | 116 | ||
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 4c1ac6e5347a..6ae495ef2b99 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c | |||
@@ -108,9 +108,7 @@ void cpu_idle(void) | |||
108 | } | 108 | } |
109 | rcu_idle_exit(); | 109 | rcu_idle_exit(); |
110 | tick_nohz_idle_exit(); | 110 | tick_nohz_idle_exit(); |
111 | preempt_enable_no_resched(); | 111 | schedule_preempt_disabled(); |
112 | schedule(); | ||
113 | preempt_disable(); | ||
114 | } | 112 | } |
115 | } | 113 | } |
116 | 114 | ||
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 431793e5d484..34baa0eb5d0c 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h | |||
@@ -57,14 +57,10 @@ DECLARE_PER_CPU(unsigned long long, cyc2ns_offset); | |||
57 | 57 | ||
58 | static inline unsigned long long __cycles_2_ns(unsigned long long cyc) | 58 | static inline unsigned long long __cycles_2_ns(unsigned long long cyc) |
59 | { | 59 | { |
60 | unsigned long long quot; | ||
61 | unsigned long long rem; | ||
62 | int cpu = smp_processor_id(); | 60 | int cpu = smp_processor_id(); |
63 | unsigned long long ns = per_cpu(cyc2ns_offset, cpu); | 61 | unsigned long long ns = per_cpu(cyc2ns_offset, cpu); |
64 | quot = (cyc >> CYC2NS_SCALE_FACTOR); | 62 | ns += mult_frac(cyc, per_cpu(cyc2ns, cpu), |
65 | rem = cyc & ((1ULL << CYC2NS_SCALE_FACTOR) - 1); | 63 | (1UL << CYC2NS_SCALE_FACTOR)); |
66 | ns += quot * per_cpu(cyc2ns, cpu) + | ||
67 | ((rem * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR); | ||
68 | return ns; | 64 | return ns; |
69 | } | 65 | } |
70 | 66 | ||
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index c08d1ff12b7c..49888fefe794 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -119,9 +119,7 @@ void cpu_idle(void) | |||
119 | } | 119 | } |
120 | rcu_idle_exit(); | 120 | rcu_idle_exit(); |
121 | tick_nohz_idle_exit(); | 121 | tick_nohz_idle_exit(); |
122 | preempt_enable_no_resched(); | 122 | schedule_preempt_disabled(); |
123 | schedule(); | ||
124 | preempt_disable(); | ||
125 | } | 123 | } |
126 | } | 124 | } |
127 | 125 | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index cfa5c90c01db..e34257c70c28 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -156,9 +156,7 @@ void cpu_idle(void) | |||
156 | } | 156 | } |
157 | 157 | ||
158 | tick_nohz_idle_exit(); | 158 | tick_nohz_idle_exit(); |
159 | preempt_enable_no_resched(); | 159 | schedule_preempt_disabled(); |
160 | schedule(); | ||
161 | preempt_disable(); | ||
162 | } | 160 | } |
163 | } | 161 | } |
164 | 162 | ||
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 66d250c00d11..58f78165d308 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -291,19 +291,6 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
291 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; | 291 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; |
292 | x86_platform.nmi_init(); | 292 | x86_platform.nmi_init(); |
293 | 293 | ||
294 | /* | ||
295 | * Wait until the cpu which brought this one up marked it | ||
296 | * online before enabling interrupts. If we don't do that then | ||
297 | * we can end up waking up the softirq thread before this cpu | ||
298 | * reached the active state, which makes the scheduler unhappy | ||
299 | * and schedule the softirq thread on the wrong cpu. This is | ||
300 | * only observable with forced threaded interrupts, but in | ||
301 | * theory it could also happen w/o them. It's just way harder | ||
302 | * to achieve. | ||
303 | */ | ||
304 | while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask)) | ||
305 | cpu_relax(); | ||
306 | |||
307 | /* enable local interrupts */ | 294 | /* enable local interrupts */ |
308 | local_irq_enable(); | 295 | local_irq_enable(); |
309 | 296 | ||
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index a62c201c97ec..183c5925a9fe 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -620,7 +620,8 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) | |||
620 | 620 | ||
621 | if (cpu_khz) { | 621 | if (cpu_khz) { |
622 | *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; | 622 | *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; |
623 | *offset = ns_now - (tsc_now * *scale >> CYC2NS_SCALE_FACTOR); | 623 | *offset = ns_now - mult_frac(tsc_now, *scale, |
624 | (1UL << CYC2NS_SCALE_FACTOR)); | ||
624 | } | 625 | } |
625 | 626 | ||
626 | sched_clock_idle_wakeup_event(0); | 627 | sched_clock_idle_wakeup_event(0); |
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 47041e7c088c..2c9004770c4e 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c | |||
@@ -113,9 +113,7 @@ void cpu_idle(void) | |||
113 | while (1) { | 113 | while (1) { |
114 | while (!need_resched()) | 114 | while (!need_resched()) |
115 | platform_idle(); | 115 | platform_idle(); |
116 | preempt_enable_no_resched(); | 116 | schedule_preempt_disabled(); |
117 | schedule(); | ||
118 | preempt_disable(); | ||
119 | } | 117 | } |
120 | } | 118 | } |
121 | 119 | ||
diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 1366a89d8e66..467c8de88642 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/blkdev.h> | 8 | #include <linux/blkdev.h> |
9 | #include <linux/interrupt.h> | 9 | #include <linux/interrupt.h> |
10 | #include <linux/cpu.h> | 10 | #include <linux/cpu.h> |
11 | #include <linux/sched.h> | ||
11 | 12 | ||
12 | #include "blk.h" | 13 | #include "blk.h" |
13 | 14 | ||
@@ -103,9 +104,10 @@ static struct notifier_block __cpuinitdata blk_cpu_notifier = { | |||
103 | 104 | ||
104 | void __blk_complete_request(struct request *req) | 105 | void __blk_complete_request(struct request *req) |
105 | { | 106 | { |
106 | int ccpu, cpu, group_cpu = NR_CPUS; | 107 | int ccpu, cpu; |
107 | struct request_queue *q = req->q; | 108 | struct request_queue *q = req->q; |
108 | unsigned long flags; | 109 | unsigned long flags; |
110 | bool shared = false; | ||
109 | 111 | ||
110 | BUG_ON(!q->softirq_done_fn); | 112 | BUG_ON(!q->softirq_done_fn); |
111 | 113 | ||
@@ -117,22 +119,20 @@ void __blk_complete_request(struct request *req) | |||
117 | */ | 119 | */ |
118 | if (req->cpu != -1) { | 120 | if (req->cpu != -1) { |
119 | ccpu = req->cpu; | 121 | ccpu = req->cpu; |
120 | if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) { | 122 | if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) |
121 | ccpu = blk_cpu_to_group(ccpu); | 123 | shared = cpus_share_cache(cpu, ccpu); |
122 | group_cpu = blk_cpu_to_group(cpu); | ||
123 | } | ||
124 | } else | 124 | } else |
125 | ccpu = cpu; | 125 | ccpu = cpu; |
126 | 126 | ||
127 | /* | 127 | /* |
128 | * If current CPU and requested CPU are in the same group, running | 128 | * If current CPU and requested CPU share a cache, run the softirq on |
129 | * softirq in current CPU. One might concern this is just like | 129 | * the current CPU. One might concern this is just like |
130 | * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is | 130 | * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is |
131 | * running in interrupt handler, and currently I/O controller doesn't | 131 | * running in interrupt handler, and currently I/O controller doesn't |
132 | * support multiple interrupts, so current CPU is unique actually. This | 132 | * support multiple interrupts, so current CPU is unique actually. This |
133 | * avoids IPI sending from current CPU to the first CPU of a group. | 133 | * avoids IPI sending from current CPU to the first CPU of a group. |
134 | */ | 134 | */ |
135 | if (ccpu == cpu || ccpu == group_cpu) { | 135 | if (ccpu == cpu || shared) { |
136 | struct list_head *list; | 136 | struct list_head *list; |
137 | do_local: | 137 | do_local: |
138 | list = &__get_cpu_var(blk_cpu_done); | 138 | list = &__get_cpu_var(blk_cpu_done); |
diff --git a/block/blk.h b/block/blk.h index 9c12f80882b0..d45be871329e 100644 --- a/block/blk.h +++ b/block/blk.h | |||
@@ -166,22 +166,6 @@ static inline int queue_congestion_off_threshold(struct request_queue *q) | |||
166 | return q->nr_congestion_off; | 166 | return q->nr_congestion_off; |
167 | } | 167 | } |
168 | 168 | ||
169 | static inline int blk_cpu_to_group(int cpu) | ||
170 | { | ||
171 | int group = NR_CPUS; | ||
172 | #ifdef CONFIG_SCHED_MC | ||
173 | const struct cpumask *mask = cpu_coregroup_mask(cpu); | ||
174 | group = cpumask_first(mask); | ||
175 | #elif defined(CONFIG_SCHED_SMT) | ||
176 | group = cpumask_first(topology_thread_cpumask(cpu)); | ||
177 | #else | ||
178 | return cpu; | ||
179 | #endif | ||
180 | if (likely(group < NR_CPUS)) | ||
181 | return group; | ||
182 | return cpu; | ||
183 | } | ||
184 | |||
185 | /* | 169 | /* |
186 | * Contribute to IO statistics IFF: | 170 | * Contribute to IO statistics IFF: |
187 | * | 171 | * |
diff --git a/fs/proc/base.c b/fs/proc/base.c index d4548dd49b02..965d4bde3a3b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -1310,8 +1310,7 @@ sched_autogroup_write(struct file *file, const char __user *buf, | |||
1310 | if (!p) | 1310 | if (!p) |
1311 | return -ESRCH; | 1311 | return -ESRCH; |
1312 | 1312 | ||
1313 | err = nice; | 1313 | err = proc_sched_autogroup_set_nice(p, nice); |
1314 | err = proc_sched_autogroup_set_nice(p, &err); | ||
1315 | if (err) | 1314 | if (err) |
1316 | count = err; | 1315 | count = err; |
1317 | 1316 | ||
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9c66b1ada9d7..f994d51f70f2 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -149,7 +149,7 @@ extern struct cred init_cred; | |||
149 | }, \ | 149 | }, \ |
150 | .rt = { \ | 150 | .rt = { \ |
151 | .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \ | 151 | .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \ |
152 | .time_slice = HZ, \ | 152 | .time_slice = RR_TIMESLICE, \ |
153 | .nr_cpus_allowed = NR_CPUS, \ | 153 | .nr_cpus_allowed = NR_CPUS, \ |
154 | }, \ | 154 | }, \ |
155 | .tasks = LIST_HEAD_INIT(tsk.tasks), \ | 155 | .tasks = LIST_HEAD_INIT(tsk.tasks), \ |
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e8343422240a..d801acb5e680 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h | |||
@@ -85,6 +85,19 @@ | |||
85 | } \ | 85 | } \ |
86 | ) | 86 | ) |
87 | 87 | ||
88 | /* | ||
89 | * Multiplies an integer by a fraction, while avoiding unnecessary | ||
90 | * overflow or loss of precision. | ||
91 | */ | ||
92 | #define mult_frac(x, numer, denom)( \ | ||
93 | { \ | ||
94 | typeof(x) quot = (x) / (denom); \ | ||
95 | typeof(x) rem = (x) % (denom); \ | ||
96 | (quot * (numer)) + ((rem * (numer)) / (denom)); \ | ||
97 | } \ | ||
98 | ) | ||
99 | |||
100 | |||
88 | #define _RET_IP_ (unsigned long)__builtin_return_address(0) | 101 | #define _RET_IP_ (unsigned long)__builtin_return_address(0) |
89 | #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) | 102 | #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) |
90 | 103 | ||
diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 58969b2a8a82..5a710b9c578e 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h | |||
@@ -48,12 +48,14 @@ do { \ | |||
48 | barrier(); \ | 48 | barrier(); \ |
49 | } while (0) | 49 | } while (0) |
50 | 50 | ||
51 | #define preempt_enable_no_resched() \ | 51 | #define sched_preempt_enable_no_resched() \ |
52 | do { \ | 52 | do { \ |
53 | barrier(); \ | 53 | barrier(); \ |
54 | dec_preempt_count(); \ | 54 | dec_preempt_count(); \ |
55 | } while (0) | 55 | } while (0) |
56 | 56 | ||
57 | #define preempt_enable_no_resched() sched_preempt_enable_no_resched() | ||
58 | |||
57 | #define preempt_enable() \ | 59 | #define preempt_enable() \ |
58 | do { \ | 60 | do { \ |
59 | preempt_enable_no_resched(); \ | 61 | preempt_enable_no_resched(); \ |
@@ -92,6 +94,7 @@ do { \ | |||
92 | #else /* !CONFIG_PREEMPT_COUNT */ | 94 | #else /* !CONFIG_PREEMPT_COUNT */ |
93 | 95 | ||
94 | #define preempt_disable() do { } while (0) | 96 | #define preempt_disable() do { } while (0) |
97 | #define sched_preempt_enable_no_resched() do { } while (0) | ||
95 | #define preempt_enable_no_resched() do { } while (0) | 98 | #define preempt_enable_no_resched() do { } while (0) |
96 | #define preempt_enable() do { } while (0) | 99 | #define preempt_enable() do { } while (0) |
97 | 100 | ||
diff --git a/include/linux/printk.h b/include/linux/printk.h index f0e22f75143f..1f77a4174ee0 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h | |||
@@ -101,6 +101,11 @@ asmlinkage __printf(1, 2) __cold | |||
101 | int printk(const char *fmt, ...); | 101 | int printk(const char *fmt, ...); |
102 | 102 | ||
103 | /* | 103 | /* |
104 | * Special printk facility for scheduler use only, _DO_NOT_USE_ ! | ||
105 | */ | ||
106 | __printf(1, 2) __cold int printk_sched(const char *fmt, ...); | ||
107 | |||
108 | /* | ||
104 | * Please don't use printk_ratelimit(), because it shares ratelimiting state | 109 | * Please don't use printk_ratelimit(), because it shares ratelimiting state |
105 | * with all other unrelated printk_ratelimit() callsites. Instead use | 110 | * with all other unrelated printk_ratelimit() callsites. Instead use |
106 | * printk_ratelimited() or plain old __ratelimit(). | 111 | * printk_ratelimited() or plain old __ratelimit(). |
@@ -127,6 +132,11 @@ int printk(const char *s, ...) | |||
127 | { | 132 | { |
128 | return 0; | 133 | return 0; |
129 | } | 134 | } |
135 | static inline __printf(1, 2) __cold | ||
136 | int printk_sched(const char *s, ...) | ||
137 | { | ||
138 | return 0; | ||
139 | } | ||
130 | static inline int printk_ratelimit(void) | 140 | static inline int printk_ratelimit(void) |
131 | { | 141 | { |
132 | return 0; | 142 | return 0; |
diff --git a/include/linux/sched.h b/include/linux/sched.h index e345163da657..e074e1e54f85 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -361,6 +361,7 @@ extern signed long schedule_timeout_interruptible(signed long timeout); | |||
361 | extern signed long schedule_timeout_killable(signed long timeout); | 361 | extern signed long schedule_timeout_killable(signed long timeout); |
362 | extern signed long schedule_timeout_uninterruptible(signed long timeout); | 362 | extern signed long schedule_timeout_uninterruptible(signed long timeout); |
363 | asmlinkage void schedule(void); | 363 | asmlinkage void schedule(void); |
364 | extern void schedule_preempt_disabled(void); | ||
364 | extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner); | 365 | extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner); |
365 | 366 | ||
366 | struct nsproxy; | 367 | struct nsproxy; |
@@ -905,6 +906,7 @@ struct sched_group_power { | |||
905 | * single CPU. | 906 | * single CPU. |
906 | */ | 907 | */ |
907 | unsigned int power, power_orig; | 908 | unsigned int power, power_orig; |
909 | unsigned long next_update; | ||
908 | /* | 910 | /* |
909 | * Number of busy cpus in this group. | 911 | * Number of busy cpus in this group. |
910 | */ | 912 | */ |
@@ -1052,6 +1054,8 @@ static inline int test_sd_parent(struct sched_domain *sd, int flag) | |||
1052 | unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu); | 1054 | unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu); |
1053 | unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu); | 1055 | unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu); |
1054 | 1056 | ||
1057 | bool cpus_share_cache(int this_cpu, int that_cpu); | ||
1058 | |||
1055 | #else /* CONFIG_SMP */ | 1059 | #else /* CONFIG_SMP */ |
1056 | 1060 | ||
1057 | struct sched_domain_attr; | 1061 | struct sched_domain_attr; |
@@ -1061,6 +1065,12 @@ partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], | |||
1061 | struct sched_domain_attr *dattr_new) | 1065 | struct sched_domain_attr *dattr_new) |
1062 | { | 1066 | { |
1063 | } | 1067 | } |
1068 | |||
1069 | static inline bool cpus_share_cache(int this_cpu, int that_cpu) | ||
1070 | { | ||
1071 | return true; | ||
1072 | } | ||
1073 | |||
1064 | #endif /* !CONFIG_SMP */ | 1074 | #endif /* !CONFIG_SMP */ |
1065 | 1075 | ||
1066 | 1076 | ||
@@ -1225,6 +1235,12 @@ struct sched_rt_entity { | |||
1225 | #endif | 1235 | #endif |
1226 | }; | 1236 | }; |
1227 | 1237 | ||
1238 | /* | ||
1239 | * default timeslice is 100 msecs (used only for SCHED_RR tasks). | ||
1240 | * Timeslices get refilled after they expire. | ||
1241 | */ | ||
1242 | #define RR_TIMESLICE (100 * HZ / 1000) | ||
1243 | |||
1228 | struct rcu_node; | 1244 | struct rcu_node; |
1229 | 1245 | ||
1230 | enum perf_event_task_context { | 1246 | enum perf_event_task_context { |
@@ -2047,7 +2063,7 @@ extern void sched_autogroup_fork(struct signal_struct *sig); | |||
2047 | extern void sched_autogroup_exit(struct signal_struct *sig); | 2063 | extern void sched_autogroup_exit(struct signal_struct *sig); |
2048 | #ifdef CONFIG_PROC_FS | 2064 | #ifdef CONFIG_PROC_FS |
2049 | extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m); | 2065 | extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m); |
2050 | extern int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice); | 2066 | extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice); |
2051 | #endif | 2067 | #endif |
2052 | #else | 2068 | #else |
2053 | static inline void sched_autogroup_create_attach(struct task_struct *p) { } | 2069 | static inline void sched_autogroup_create_attach(struct task_struct *p) { } |
@@ -2064,12 +2080,20 @@ extern unsigned int sysctl_sched_cfs_bandwidth_slice; | |||
2064 | extern int rt_mutex_getprio(struct task_struct *p); | 2080 | extern int rt_mutex_getprio(struct task_struct *p); |
2065 | extern void rt_mutex_setprio(struct task_struct *p, int prio); | 2081 | extern void rt_mutex_setprio(struct task_struct *p, int prio); |
2066 | extern void rt_mutex_adjust_pi(struct task_struct *p); | 2082 | extern void rt_mutex_adjust_pi(struct task_struct *p); |
2083 | static inline bool tsk_is_pi_blocked(struct task_struct *tsk) | ||
2084 | { | ||
2085 | return tsk->pi_blocked_on != NULL; | ||
2086 | } | ||
2067 | #else | 2087 | #else |
2068 | static inline int rt_mutex_getprio(struct task_struct *p) | 2088 | static inline int rt_mutex_getprio(struct task_struct *p) |
2069 | { | 2089 | { |
2070 | return p->normal_prio; | 2090 | return p->normal_prio; |
2071 | } | 2091 | } |
2072 | # define rt_mutex_adjust_pi(p) do { } while (0) | 2092 | # define rt_mutex_adjust_pi(p) do { } while (0) |
2093 | static inline bool tsk_is_pi_blocked(struct task_struct *tsk) | ||
2094 | { | ||
2095 | return false; | ||
2096 | } | ||
2073 | #endif | 2097 | #endif |
2074 | 2098 | ||
2075 | extern bool yield_to(struct task_struct *p, bool preempt); | 2099 | extern bool yield_to(struct task_struct *p, bool preempt); |
@@ -2388,12 +2412,15 @@ static inline void task_unlock(struct task_struct *p) | |||
2388 | extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, | 2412 | extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, |
2389 | unsigned long *flags); | 2413 | unsigned long *flags); |
2390 | 2414 | ||
2391 | #define lock_task_sighand(tsk, flags) \ | 2415 | static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk, |
2392 | ({ struct sighand_struct *__ss; \ | 2416 | unsigned long *flags) |
2393 | __cond_lock(&(tsk)->sighand->siglock, \ | 2417 | { |
2394 | (__ss = __lock_task_sighand(tsk, flags))); \ | 2418 | struct sighand_struct *ret; |
2395 | __ss; \ | 2419 | |
2396 | }) \ | 2420 | ret = __lock_task_sighand(tsk, flags); |
2421 | (void)__cond_lock(&tsk->sighand->siglock, ret); | ||
2422 | return ret; | ||
2423 | } | ||
2397 | 2424 | ||
2398 | static inline void unlock_task_sighand(struct task_struct *tsk, | 2425 | static inline void unlock_task_sighand(struct task_struct *tsk, |
2399 | unsigned long *flags) | 2426 | unsigned long *flags) |
diff --git a/include/linux/wait.h b/include/linux/wait.h index a9ce45e8501c..7d9a9e990ce6 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h | |||
@@ -157,7 +157,7 @@ void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); | |||
157 | void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); | 157 | void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); |
158 | void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, | 158 | void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, |
159 | void *key); | 159 | void *key); |
160 | void __wake_up_locked(wait_queue_head_t *q, unsigned int mode); | 160 | void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr); |
161 | void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); | 161 | void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); |
162 | void __wake_up_bit(wait_queue_head_t *, void *, int); | 162 | void __wake_up_bit(wait_queue_head_t *, void *, int); |
163 | int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned); | 163 | int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned); |
@@ -170,7 +170,8 @@ wait_queue_head_t *bit_waitqueue(void *, int); | |||
170 | #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) | 170 | #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) |
171 | #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) | 171 | #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) |
172 | #define wake_up_all(x) __wake_up(x, TASK_NORMAL, 0, NULL) | 172 | #define wake_up_all(x) __wake_up(x, TASK_NORMAL, 0, NULL) |
173 | #define wake_up_locked(x) __wake_up_locked((x), TASK_NORMAL) | 173 | #define wake_up_locked(x) __wake_up_locked((x), TASK_NORMAL, 1) |
174 | #define wake_up_all_locked(x) __wake_up_locked((x), TASK_NORMAL, 0) | ||
174 | 175 | ||
175 | #define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL) | 176 | #define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL) |
176 | #define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL) | 177 | #define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL) |
diff --git a/init/main.c b/init/main.c index ff49a6dacfbb..4990f7ec776a 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -374,11 +374,8 @@ static noinline void __init_refok rest_init(void) | |||
374 | * at least once to get things moving: | 374 | * at least once to get things moving: |
375 | */ | 375 | */ |
376 | init_idle_bootup_task(current); | 376 | init_idle_bootup_task(current); |
377 | preempt_enable_no_resched(); | 377 | schedule_preempt_disabled(); |
378 | schedule(); | ||
379 | |||
380 | /* Call into cpu_idle with preempt disabled */ | 378 | /* Call into cpu_idle with preempt disabled */ |
381 | preempt_disable(); | ||
382 | cpu_idle(); | 379 | cpu_idle(); |
383 | } | 380 | } |
384 | 381 | ||
diff --git a/kernel/mutex.c b/kernel/mutex.c index 89096dd8786f..a307cc9c9526 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
@@ -240,9 +240,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
240 | 240 | ||
241 | /* didn't get the lock, go to sleep: */ | 241 | /* didn't get the lock, go to sleep: */ |
242 | spin_unlock_mutex(&lock->wait_lock, flags); | 242 | spin_unlock_mutex(&lock->wait_lock, flags); |
243 | preempt_enable_no_resched(); | 243 | schedule_preempt_disabled(); |
244 | schedule(); | ||
245 | preempt_disable(); | ||
246 | spin_lock_mutex(&lock->wait_lock, flags); | 244 | spin_lock_mutex(&lock->wait_lock, flags); |
247 | } | 245 | } |
248 | 246 | ||
diff --git a/kernel/printk.c b/kernel/printk.c index 0b3ea2cbd5fb..b663c2c95d39 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -1216,13 +1216,27 @@ int is_console_locked(void) | |||
1216 | return console_locked; | 1216 | return console_locked; |
1217 | } | 1217 | } |
1218 | 1218 | ||
1219 | /* | ||
1220 | * Delayed printk facility, for scheduler-internal messages: | ||
1221 | */ | ||
1222 | #define PRINTK_BUF_SIZE 512 | ||
1223 | |||
1224 | #define PRINTK_PENDING_WAKEUP 0x01 | ||
1225 | #define PRINTK_PENDING_SCHED 0x02 | ||
1226 | |||
1219 | static DEFINE_PER_CPU(int, printk_pending); | 1227 | static DEFINE_PER_CPU(int, printk_pending); |
1228 | static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf); | ||
1220 | 1229 | ||
1221 | void printk_tick(void) | 1230 | void printk_tick(void) |
1222 | { | 1231 | { |
1223 | if (__this_cpu_read(printk_pending)) { | 1232 | if (__this_cpu_read(printk_pending)) { |
1224 | __this_cpu_write(printk_pending, 0); | 1233 | int pending = __this_cpu_xchg(printk_pending, 0); |
1225 | wake_up_interruptible(&log_wait); | 1234 | if (pending & PRINTK_PENDING_SCHED) { |
1235 | char *buf = __get_cpu_var(printk_sched_buf); | ||
1236 | printk(KERN_WARNING "[sched_delayed] %s", buf); | ||
1237 | } | ||
1238 | if (pending & PRINTK_PENDING_WAKEUP) | ||
1239 | wake_up_interruptible(&log_wait); | ||
1226 | } | 1240 | } |
1227 | } | 1241 | } |
1228 | 1242 | ||
@@ -1236,7 +1250,7 @@ int printk_needs_cpu(int cpu) | |||
1236 | void wake_up_klogd(void) | 1250 | void wake_up_klogd(void) |
1237 | { | 1251 | { |
1238 | if (waitqueue_active(&log_wait)) | 1252 | if (waitqueue_active(&log_wait)) |
1239 | this_cpu_write(printk_pending, 1); | 1253 | this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); |
1240 | } | 1254 | } |
1241 | 1255 | ||
1242 | /** | 1256 | /** |
@@ -1629,6 +1643,26 @@ late_initcall(printk_late_init); | |||
1629 | 1643 | ||
1630 | #if defined CONFIG_PRINTK | 1644 | #if defined CONFIG_PRINTK |
1631 | 1645 | ||
1646 | int printk_sched(const char *fmt, ...) | ||
1647 | { | ||
1648 | unsigned long flags; | ||
1649 | va_list args; | ||
1650 | char *buf; | ||
1651 | int r; | ||
1652 | |||
1653 | local_irq_save(flags); | ||
1654 | buf = __get_cpu_var(printk_sched_buf); | ||
1655 | |||
1656 | va_start(args, fmt); | ||
1657 | r = vsnprintf(buf, PRINTK_BUF_SIZE, fmt, args); | ||
1658 | va_end(args); | ||
1659 | |||
1660 | __this_cpu_or(printk_pending, PRINTK_PENDING_SCHED); | ||
1661 | local_irq_restore(flags); | ||
1662 | |||
1663 | return r; | ||
1664 | } | ||
1665 | |||
1632 | /* | 1666 | /* |
1633 | * printk rate limiting, lifted from the networking subsystem. | 1667 | * printk rate limiting, lifted from the networking subsystem. |
1634 | * | 1668 | * |
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c index e8a1f83ee0e7..0984a21076a3 100644 --- a/kernel/sched/auto_group.c +++ b/kernel/sched/auto_group.c | |||
@@ -195,20 +195,20 @@ __setup("noautogroup", setup_autogroup); | |||
195 | 195 | ||
196 | #ifdef CONFIG_PROC_FS | 196 | #ifdef CONFIG_PROC_FS |
197 | 197 | ||
198 | int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice) | 198 | int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) |
199 | { | 199 | { |
200 | static unsigned long next = INITIAL_JIFFIES; | 200 | static unsigned long next = INITIAL_JIFFIES; |
201 | struct autogroup *ag; | 201 | struct autogroup *ag; |
202 | int err; | 202 | int err; |
203 | 203 | ||
204 | if (*nice < -20 || *nice > 19) | 204 | if (nice < -20 || nice > 19) |
205 | return -EINVAL; | 205 | return -EINVAL; |
206 | 206 | ||
207 | err = security_task_setnice(current, *nice); | 207 | err = security_task_setnice(current, nice); |
208 | if (err) | 208 | if (err) |
209 | return err; | 209 | return err; |
210 | 210 | ||
211 | if (*nice < 0 && !can_nice(current, *nice)) | 211 | if (nice < 0 && !can_nice(current, nice)) |
212 | return -EPERM; | 212 | return -EPERM; |
213 | 213 | ||
214 | /* this is a heavy operation taking global locks.. */ | 214 | /* this is a heavy operation taking global locks.. */ |
@@ -219,9 +219,9 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice) | |||
219 | ag = autogroup_task_get(p); | 219 | ag = autogroup_task_get(p); |
220 | 220 | ||
221 | down_write(&ag->lock); | 221 | down_write(&ag->lock); |
222 | err = sched_group_set_shares(ag->tg, prio_to_weight[*nice + 20]); | 222 | err = sched_group_set_shares(ag->tg, prio_to_weight[nice + 20]); |
223 | if (!err) | 223 | if (!err) |
224 | ag->nice = *nice; | 224 | ag->nice = nice; |
225 | up_write(&ag->lock); | 225 | up_write(&ag->lock); |
226 | 226 | ||
227 | autogroup_kref_put(ag); | 227 | autogroup_kref_put(ag); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 6c41ba49767a..d2bd4647586c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -1284,7 +1284,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p) | |||
1284 | * leave kernel. | 1284 | * leave kernel. |
1285 | */ | 1285 | */ |
1286 | if (p->mm && printk_ratelimit()) { | 1286 | if (p->mm && printk_ratelimit()) { |
1287 | printk(KERN_INFO "process %d (%s) no longer affine to cpu%d\n", | 1287 | printk_sched("process %d (%s) no longer affine to cpu%d\n", |
1288 | task_pid_nr(p), p->comm, cpu); | 1288 | task_pid_nr(p), p->comm, cpu); |
1289 | } | 1289 | } |
1290 | 1290 | ||
@@ -1507,7 +1507,7 @@ static int ttwu_activate_remote(struct task_struct *p, int wake_flags) | |||
1507 | } | 1507 | } |
1508 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ | 1508 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ |
1509 | 1509 | ||
1510 | static inline int ttwu_share_cache(int this_cpu, int that_cpu) | 1510 | bool cpus_share_cache(int this_cpu, int that_cpu) |
1511 | { | 1511 | { |
1512 | return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); | 1512 | return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); |
1513 | } | 1513 | } |
@@ -1518,7 +1518,7 @@ static void ttwu_queue(struct task_struct *p, int cpu) | |||
1518 | struct rq *rq = cpu_rq(cpu); | 1518 | struct rq *rq = cpu_rq(cpu); |
1519 | 1519 | ||
1520 | #if defined(CONFIG_SMP) | 1520 | #if defined(CONFIG_SMP) |
1521 | if (sched_feat(TTWU_QUEUE) && !ttwu_share_cache(smp_processor_id(), cpu)) { | 1521 | if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) { |
1522 | sched_clock_cpu(cpu); /* sync clocks x-cpu */ | 1522 | sched_clock_cpu(cpu); /* sync clocks x-cpu */ |
1523 | ttwu_queue_remote(p, cpu); | 1523 | ttwu_queue_remote(p, cpu); |
1524 | return; | 1524 | return; |
@@ -2266,13 +2266,10 @@ calc_load_n(unsigned long load, unsigned long exp, | |||
2266 | * Once we've updated the global active value, we need to apply the exponential | 2266 | * Once we've updated the global active value, we need to apply the exponential |
2267 | * weights adjusted to the number of cycles missed. | 2267 | * weights adjusted to the number of cycles missed. |
2268 | */ | 2268 | */ |
2269 | static void calc_global_nohz(unsigned long ticks) | 2269 | static void calc_global_nohz(void) |
2270 | { | 2270 | { |
2271 | long delta, active, n; | 2271 | long delta, active, n; |
2272 | 2272 | ||
2273 | if (time_before(jiffies, calc_load_update)) | ||
2274 | return; | ||
2275 | |||
2276 | /* | 2273 | /* |
2277 | * If we crossed a calc_load_update boundary, make sure to fold | 2274 | * If we crossed a calc_load_update boundary, make sure to fold |
2278 | * any pending idle changes, the respective CPUs might have | 2275 | * any pending idle changes, the respective CPUs might have |
@@ -2284,31 +2281,25 @@ static void calc_global_nohz(unsigned long ticks) | |||
2284 | atomic_long_add(delta, &calc_load_tasks); | 2281 | atomic_long_add(delta, &calc_load_tasks); |
2285 | 2282 | ||
2286 | /* | 2283 | /* |
2287 | * If we were idle for multiple load cycles, apply them. | 2284 | * It could be the one fold was all it took, we done! |
2288 | */ | 2285 | */ |
2289 | if (ticks >= LOAD_FREQ) { | 2286 | if (time_before(jiffies, calc_load_update + 10)) |
2290 | n = ticks / LOAD_FREQ; | 2287 | return; |
2291 | 2288 | ||
2292 | active = atomic_long_read(&calc_load_tasks); | 2289 | /* |
2293 | active = active > 0 ? active * FIXED_1 : 0; | 2290 | * Catch-up, fold however many we are behind still |
2291 | */ | ||
2292 | delta = jiffies - calc_load_update - 10; | ||
2293 | n = 1 + (delta / LOAD_FREQ); | ||
2294 | 2294 | ||
2295 | avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); | 2295 | active = atomic_long_read(&calc_load_tasks); |
2296 | avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); | 2296 | active = active > 0 ? active * FIXED_1 : 0; |
2297 | avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); | ||
2298 | 2297 | ||
2299 | calc_load_update += n * LOAD_FREQ; | 2298 | avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); |
2300 | } | 2299 | avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); |
2300 | avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); | ||
2301 | 2301 | ||
2302 | /* | 2302 | calc_load_update += n * LOAD_FREQ; |
2303 | * Its possible the remainder of the above division also crosses | ||
2304 | * a LOAD_FREQ period, the regular check in calc_global_load() | ||
2305 | * which comes after this will take care of that. | ||
2306 | * | ||
2307 | * Consider us being 11 ticks before a cycle completion, and us | ||
2308 | * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will | ||
2309 | * age us 4 cycles, and the test in calc_global_load() will | ||
2310 | * pick up the final one. | ||
2311 | */ | ||
2312 | } | 2303 | } |
2313 | #else | 2304 | #else |
2314 | void calc_load_account_idle(struct rq *this_rq) | 2305 | void calc_load_account_idle(struct rq *this_rq) |
@@ -2320,7 +2311,7 @@ static inline long calc_load_fold_idle(void) | |||
2320 | return 0; | 2311 | return 0; |
2321 | } | 2312 | } |
2322 | 2313 | ||
2323 | static void calc_global_nohz(unsigned long ticks) | 2314 | static void calc_global_nohz(void) |
2324 | { | 2315 | { |
2325 | } | 2316 | } |
2326 | #endif | 2317 | #endif |
@@ -2348,8 +2339,6 @@ void calc_global_load(unsigned long ticks) | |||
2348 | { | 2339 | { |
2349 | long active; | 2340 | long active; |
2350 | 2341 | ||
2351 | calc_global_nohz(ticks); | ||
2352 | |||
2353 | if (time_before(jiffies, calc_load_update + 10)) | 2342 | if (time_before(jiffies, calc_load_update + 10)) |
2354 | return; | 2343 | return; |
2355 | 2344 | ||
@@ -2361,6 +2350,16 @@ void calc_global_load(unsigned long ticks) | |||
2361 | avenrun[2] = calc_load(avenrun[2], EXP_15, active); | 2350 | avenrun[2] = calc_load(avenrun[2], EXP_15, active); |
2362 | 2351 | ||
2363 | calc_load_update += LOAD_FREQ; | 2352 | calc_load_update += LOAD_FREQ; |
2353 | |||
2354 | /* | ||
2355 | * Account one period with whatever state we found before | ||
2356 | * folding in the nohz state and ageing the entire idle period. | ||
2357 | * | ||
2358 | * This avoids loosing a sample when we go idle between | ||
2359 | * calc_load_account_active() (10 ticks ago) and now and thus | ||
2360 | * under-accounting. | ||
2361 | */ | ||
2362 | calc_global_nohz(); | ||
2364 | } | 2363 | } |
2365 | 2364 | ||
2366 | /* | 2365 | /* |
@@ -3220,14 +3219,14 @@ need_resched: | |||
3220 | 3219 | ||
3221 | post_schedule(rq); | 3220 | post_schedule(rq); |
3222 | 3221 | ||
3223 | preempt_enable_no_resched(); | 3222 | sched_preempt_enable_no_resched(); |
3224 | if (need_resched()) | 3223 | if (need_resched()) |
3225 | goto need_resched; | 3224 | goto need_resched; |
3226 | } | 3225 | } |
3227 | 3226 | ||
3228 | static inline void sched_submit_work(struct task_struct *tsk) | 3227 | static inline void sched_submit_work(struct task_struct *tsk) |
3229 | { | 3228 | { |
3230 | if (!tsk->state) | 3229 | if (!tsk->state || tsk_is_pi_blocked(tsk)) |
3231 | return; | 3230 | return; |
3232 | /* | 3231 | /* |
3233 | * If we are going to sleep and we have plugged IO queued, | 3232 | * If we are going to sleep and we have plugged IO queued, |
@@ -3246,6 +3245,18 @@ asmlinkage void __sched schedule(void) | |||
3246 | } | 3245 | } |
3247 | EXPORT_SYMBOL(schedule); | 3246 | EXPORT_SYMBOL(schedule); |
3248 | 3247 | ||
3248 | /** | ||
3249 | * schedule_preempt_disabled - called with preemption disabled | ||
3250 | * | ||
3251 | * Returns with preemption disabled. Note: preempt_count must be 1 | ||
3252 | */ | ||
3253 | void __sched schedule_preempt_disabled(void) | ||
3254 | { | ||
3255 | sched_preempt_enable_no_resched(); | ||
3256 | schedule(); | ||
3257 | preempt_disable(); | ||
3258 | } | ||
3259 | |||
3249 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | 3260 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER |
3250 | 3261 | ||
3251 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) | 3262 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) |
@@ -3406,9 +3417,9 @@ EXPORT_SYMBOL(__wake_up); | |||
3406 | /* | 3417 | /* |
3407 | * Same as __wake_up but called with the spinlock in wait_queue_head_t held. | 3418 | * Same as __wake_up but called with the spinlock in wait_queue_head_t held. |
3408 | */ | 3419 | */ |
3409 | void __wake_up_locked(wait_queue_head_t *q, unsigned int mode) | 3420 | void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr) |
3410 | { | 3421 | { |
3411 | __wake_up_common(q, mode, 1, 0, NULL); | 3422 | __wake_up_common(q, mode, nr, 0, NULL); |
3412 | } | 3423 | } |
3413 | EXPORT_SYMBOL_GPL(__wake_up_locked); | 3424 | EXPORT_SYMBOL_GPL(__wake_up_locked); |
3414 | 3425 | ||
@@ -3767,6 +3778,24 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
3767 | 3778 | ||
3768 | rq = __task_rq_lock(p); | 3779 | rq = __task_rq_lock(p); |
3769 | 3780 | ||
3781 | /* | ||
3782 | * Idle task boosting is a nono in general. There is one | ||
3783 | * exception, when PREEMPT_RT and NOHZ is active: | ||
3784 | * | ||
3785 | * The idle task calls get_next_timer_interrupt() and holds | ||
3786 | * the timer wheel base->lock on the CPU and another CPU wants | ||
3787 | * to access the timer (probably to cancel it). We can safely | ||
3788 | * ignore the boosting request, as the idle CPU runs this code | ||
3789 | * with interrupts disabled and will complete the lock | ||
3790 | * protected section without being interrupted. So there is no | ||
3791 | * real need to boost. | ||
3792 | */ | ||
3793 | if (unlikely(p == rq->idle)) { | ||
3794 | WARN_ON(p != rq->curr); | ||
3795 | WARN_ON(p->pi_blocked_on); | ||
3796 | goto out_unlock; | ||
3797 | } | ||
3798 | |||
3770 | trace_sched_pi_setprio(p, prio); | 3799 | trace_sched_pi_setprio(p, prio); |
3771 | oldprio = p->prio; | 3800 | oldprio = p->prio; |
3772 | prev_class = p->sched_class; | 3801 | prev_class = p->sched_class; |
@@ -3790,11 +3819,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
3790 | enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); | 3819 | enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); |
3791 | 3820 | ||
3792 | check_class_changed(rq, p, prev_class, oldprio); | 3821 | check_class_changed(rq, p, prev_class, oldprio); |
3822 | out_unlock: | ||
3793 | __task_rq_unlock(rq); | 3823 | __task_rq_unlock(rq); |
3794 | } | 3824 | } |
3795 | |||
3796 | #endif | 3825 | #endif |
3797 | |||
3798 | void set_user_nice(struct task_struct *p, long nice) | 3826 | void set_user_nice(struct task_struct *p, long nice) |
3799 | { | 3827 | { |
3800 | int old_prio, delta, on_rq; | 3828 | int old_prio, delta, on_rq; |
@@ -4474,7 +4502,7 @@ SYSCALL_DEFINE0(sched_yield) | |||
4474 | __release(rq->lock); | 4502 | __release(rq->lock); |
4475 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); | 4503 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); |
4476 | do_raw_spin_unlock(&rq->lock); | 4504 | do_raw_spin_unlock(&rq->lock); |
4477 | preempt_enable_no_resched(); | 4505 | sched_preempt_enable_no_resched(); |
4478 | 4506 | ||
4479 | schedule(); | 4507 | schedule(); |
4480 | 4508 | ||
@@ -4548,8 +4576,24 @@ EXPORT_SYMBOL(__cond_resched_softirq); | |||
4548 | /** | 4576 | /** |
4549 | * yield - yield the current processor to other threads. | 4577 | * yield - yield the current processor to other threads. |
4550 | * | 4578 | * |
4551 | * This is a shortcut for kernel-space yielding - it marks the | 4579 | * Do not ever use this function, there's a 99% chance you're doing it wrong. |
4552 | * thread runnable and calls sys_sched_yield(). | 4580 | * |
4581 | * The scheduler is at all times free to pick the calling task as the most | ||
4582 | * eligible task to run, if removing the yield() call from your code breaks | ||
4583 | * it, its already broken. | ||
4584 | * | ||
4585 | * Typical broken usage is: | ||
4586 | * | ||
4587 | * while (!event) | ||
4588 | * yield(); | ||
4589 | * | ||
4590 | * where one assumes that yield() will let 'the other' process run that will | ||
4591 | * make event true. If the current task is a SCHED_FIFO task that will never | ||
4592 | * happen. Never use yield() as a progress guarantee!! | ||
4593 | * | ||
4594 | * If you want to use yield() to wait for something, use wait_event(). | ||
4595 | * If you want to use yield() to be 'nice' for others, use cond_resched(). | ||
4596 | * If you still want to use yield(), do not! | ||
4553 | */ | 4597 | */ |
4554 | void __sched yield(void) | 4598 | void __sched yield(void) |
4555 | { | 4599 | { |
@@ -5381,7 +5425,7 @@ static int __cpuinit sched_cpu_active(struct notifier_block *nfb, | |||
5381 | unsigned long action, void *hcpu) | 5425 | unsigned long action, void *hcpu) |
5382 | { | 5426 | { |
5383 | switch (action & ~CPU_TASKS_FROZEN) { | 5427 | switch (action & ~CPU_TASKS_FROZEN) { |
5384 | case CPU_ONLINE: | 5428 | case CPU_STARTING: |
5385 | case CPU_DOWN_FAILED: | 5429 | case CPU_DOWN_FAILED: |
5386 | set_cpu_active((long)hcpu, true); | 5430 | set_cpu_active((long)hcpu, true); |
5387 | return NOTIFY_OK; | 5431 | return NOTIFY_OK; |
@@ -5753,7 +5797,7 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu) | |||
5753 | * | 5797 | * |
5754 | * Also keep a unique ID per domain (we use the first cpu number in | 5798 | * Also keep a unique ID per domain (we use the first cpu number in |
5755 | * the cpumask of the domain), this allows us to quickly tell if | 5799 | * the cpumask of the domain), this allows us to quickly tell if |
5756 | * two cpus are in the same cache domain, see ttwu_share_cache(). | 5800 | * two cpus are in the same cache domain, see cpus_share_cache(). |
5757 | */ | 5801 | */ |
5758 | DEFINE_PER_CPU(struct sched_domain *, sd_llc); | 5802 | DEFINE_PER_CPU(struct sched_domain *, sd_llc); |
5759 | DEFINE_PER_CPU(int, sd_llc_id); | 5803 | DEFINE_PER_CPU(int, sd_llc_id); |
@@ -6930,6 +6974,9 @@ void __init sched_init(void) | |||
6930 | rq->online = 0; | 6974 | rq->online = 0; |
6931 | rq->idle_stamp = 0; | 6975 | rq->idle_stamp = 0; |
6932 | rq->avg_idle = 2*sysctl_sched_migration_cost; | 6976 | rq->avg_idle = 2*sysctl_sched_migration_cost; |
6977 | |||
6978 | INIT_LIST_HEAD(&rq->cfs_tasks); | ||
6979 | |||
6933 | rq_attach_root(rq, &def_root_domain); | 6980 | rq_attach_root(rq, &def_root_domain); |
6934 | #ifdef CONFIG_NO_HZ | 6981 | #ifdef CONFIG_NO_HZ |
6935 | rq->nohz_flags = 0; | 6982 | rq->nohz_flags = 0; |
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 2a075e10004b..09acaa15161d 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c | |||
@@ -288,7 +288,6 @@ static void print_cpu(struct seq_file *m, int cpu) | |||
288 | 288 | ||
289 | P(yld_count); | 289 | P(yld_count); |
290 | 290 | ||
291 | P(sched_switch); | ||
292 | P(sched_count); | 291 | P(sched_count); |
293 | P(sched_goidle); | 292 | P(sched_goidle); |
294 | #ifdef CONFIG_SMP | 293 | #ifdef CONFIG_SMP |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index fd974faf467d..94340c7544a9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -776,29 +776,16 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
776 | * Scheduling class queueing methods: | 776 | * Scheduling class queueing methods: |
777 | */ | 777 | */ |
778 | 778 | ||
779 | #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED | ||
780 | static void | ||
781 | add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight) | ||
782 | { | ||
783 | cfs_rq->task_weight += weight; | ||
784 | } | ||
785 | #else | ||
786 | static inline void | ||
787 | add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight) | ||
788 | { | ||
789 | } | ||
790 | #endif | ||
791 | |||
792 | static void | 779 | static void |
793 | account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) | 780 | account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) |
794 | { | 781 | { |
795 | update_load_add(&cfs_rq->load, se->load.weight); | 782 | update_load_add(&cfs_rq->load, se->load.weight); |
796 | if (!parent_entity(se)) | 783 | if (!parent_entity(se)) |
797 | update_load_add(&rq_of(cfs_rq)->load, se->load.weight); | 784 | update_load_add(&rq_of(cfs_rq)->load, se->load.weight); |
798 | if (entity_is_task(se)) { | 785 | #ifdef CONFIG_SMP |
799 | add_cfs_task_weight(cfs_rq, se->load.weight); | 786 | if (entity_is_task(se)) |
800 | list_add(&se->group_node, &cfs_rq->tasks); | 787 | list_add_tail(&se->group_node, &rq_of(cfs_rq)->cfs_tasks); |
801 | } | 788 | #endif |
802 | cfs_rq->nr_running++; | 789 | cfs_rq->nr_running++; |
803 | } | 790 | } |
804 | 791 | ||
@@ -808,10 +795,8 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
808 | update_load_sub(&cfs_rq->load, se->load.weight); | 795 | update_load_sub(&cfs_rq->load, se->load.weight); |
809 | if (!parent_entity(se)) | 796 | if (!parent_entity(se)) |
810 | update_load_sub(&rq_of(cfs_rq)->load, se->load.weight); | 797 | update_load_sub(&rq_of(cfs_rq)->load, se->load.weight); |
811 | if (entity_is_task(se)) { | 798 | if (entity_is_task(se)) |
812 | add_cfs_task_weight(cfs_rq, -se->load.weight); | ||
813 | list_del_init(&se->group_node); | 799 | list_del_init(&se->group_node); |
814 | } | ||
815 | cfs_rq->nr_running--; | 800 | cfs_rq->nr_running--; |
816 | } | 801 | } |
817 | 802 | ||
@@ -2672,8 +2657,6 @@ static int select_idle_sibling(struct task_struct *p, int target) | |||
2672 | /* | 2657 | /* |
2673 | * Otherwise, iterate the domains and find an elegible idle cpu. | 2658 | * Otherwise, iterate the domains and find an elegible idle cpu. |
2674 | */ | 2659 | */ |
2675 | rcu_read_lock(); | ||
2676 | |||
2677 | sd = rcu_dereference(per_cpu(sd_llc, target)); | 2660 | sd = rcu_dereference(per_cpu(sd_llc, target)); |
2678 | for_each_lower_domain(sd) { | 2661 | for_each_lower_domain(sd) { |
2679 | sg = sd->groups; | 2662 | sg = sd->groups; |
@@ -2695,8 +2678,6 @@ next: | |||
2695 | } while (sg != sd->groups); | 2678 | } while (sg != sd->groups); |
2696 | } | 2679 | } |
2697 | done: | 2680 | done: |
2698 | rcu_read_unlock(); | ||
2699 | |||
2700 | return target; | 2681 | return target; |
2701 | } | 2682 | } |
2702 | 2683 | ||
@@ -2922,7 +2903,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
2922 | return; | 2903 | return; |
2923 | 2904 | ||
2924 | /* | 2905 | /* |
2925 | * This is possible from callers such as pull_task(), in which we | 2906 | * This is possible from callers such as move_task(), in which we |
2926 | * unconditionally check_prempt_curr() after an enqueue (which may have | 2907 | * unconditionally check_prempt_curr() after an enqueue (which may have |
2927 | * lead to a throttle). This both saves work and prevents false | 2908 | * lead to a throttle). This both saves work and prevents false |
2928 | * next-buddy nomination below. | 2909 | * next-buddy nomination below. |
@@ -3086,17 +3067,39 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp | |||
3086 | * Fair scheduling class load-balancing methods: | 3067 | * Fair scheduling class load-balancing methods: |
3087 | */ | 3068 | */ |
3088 | 3069 | ||
3070 | static unsigned long __read_mostly max_load_balance_interval = HZ/10; | ||
3071 | |||
3072 | #define LBF_ALL_PINNED 0x01 | ||
3073 | #define LBF_NEED_BREAK 0x02 | ||
3074 | |||
3075 | struct lb_env { | ||
3076 | struct sched_domain *sd; | ||
3077 | |||
3078 | int src_cpu; | ||
3079 | struct rq *src_rq; | ||
3080 | |||
3081 | int dst_cpu; | ||
3082 | struct rq *dst_rq; | ||
3083 | |||
3084 | enum cpu_idle_type idle; | ||
3085 | long load_move; | ||
3086 | unsigned int flags; | ||
3087 | |||
3088 | unsigned int loop; | ||
3089 | unsigned int loop_break; | ||
3090 | unsigned int loop_max; | ||
3091 | }; | ||
3092 | |||
3089 | /* | 3093 | /* |
3090 | * pull_task - move a task from a remote runqueue to the local runqueue. | 3094 | * move_task - move a task from one runqueue to another runqueue. |
3091 | * Both runqueues must be locked. | 3095 | * Both runqueues must be locked. |
3092 | */ | 3096 | */ |
3093 | static void pull_task(struct rq *src_rq, struct task_struct *p, | 3097 | static void move_task(struct task_struct *p, struct lb_env *env) |
3094 | struct rq *this_rq, int this_cpu) | ||
3095 | { | 3098 | { |
3096 | deactivate_task(src_rq, p, 0); | 3099 | deactivate_task(env->src_rq, p, 0); |
3097 | set_task_cpu(p, this_cpu); | 3100 | set_task_cpu(p, env->dst_cpu); |
3098 | activate_task(this_rq, p, 0); | 3101 | activate_task(env->dst_rq, p, 0); |
3099 | check_preempt_curr(this_rq, p, 0); | 3102 | check_preempt_curr(env->dst_rq, p, 0); |
3100 | } | 3103 | } |
3101 | 3104 | ||
3102 | /* | 3105 | /* |
@@ -3131,19 +3134,11 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
3131 | return delta < (s64)sysctl_sched_migration_cost; | 3134 | return delta < (s64)sysctl_sched_migration_cost; |
3132 | } | 3135 | } |
3133 | 3136 | ||
3134 | #define LBF_ALL_PINNED 0x01 | ||
3135 | #define LBF_NEED_BREAK 0x02 /* clears into HAD_BREAK */ | ||
3136 | #define LBF_HAD_BREAK 0x04 | ||
3137 | #define LBF_HAD_BREAKS 0x0C /* count HAD_BREAKs overflows into ABORT */ | ||
3138 | #define LBF_ABORT 0x10 | ||
3139 | |||
3140 | /* | 3137 | /* |
3141 | * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? | 3138 | * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? |
3142 | */ | 3139 | */ |
3143 | static | 3140 | static |
3144 | int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | 3141 | int can_migrate_task(struct task_struct *p, struct lb_env *env) |
3145 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
3146 | int *lb_flags) | ||
3147 | { | 3142 | { |
3148 | int tsk_cache_hot = 0; | 3143 | int tsk_cache_hot = 0; |
3149 | /* | 3144 | /* |
@@ -3152,13 +3147,13 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | |||
3152 | * 2) cannot be migrated to this CPU due to cpus_allowed, or | 3147 | * 2) cannot be migrated to this CPU due to cpus_allowed, or |
3153 | * 3) are cache-hot on their current CPU. | 3148 | * 3) are cache-hot on their current CPU. |
3154 | */ | 3149 | */ |
3155 | if (!cpumask_test_cpu(this_cpu, tsk_cpus_allowed(p))) { | 3150 | if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) { |
3156 | schedstat_inc(p, se.statistics.nr_failed_migrations_affine); | 3151 | schedstat_inc(p, se.statistics.nr_failed_migrations_affine); |
3157 | return 0; | 3152 | return 0; |
3158 | } | 3153 | } |
3159 | *lb_flags &= ~LBF_ALL_PINNED; | 3154 | env->flags &= ~LBF_ALL_PINNED; |
3160 | 3155 | ||
3161 | if (task_running(rq, p)) { | 3156 | if (task_running(env->src_rq, p)) { |
3162 | schedstat_inc(p, se.statistics.nr_failed_migrations_running); | 3157 | schedstat_inc(p, se.statistics.nr_failed_migrations_running); |
3163 | return 0; | 3158 | return 0; |
3164 | } | 3159 | } |
@@ -3169,12 +3164,12 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | |||
3169 | * 2) too many balance attempts have failed. | 3164 | * 2) too many balance attempts have failed. |
3170 | */ | 3165 | */ |
3171 | 3166 | ||
3172 | tsk_cache_hot = task_hot(p, rq->clock_task, sd); | 3167 | tsk_cache_hot = task_hot(p, env->src_rq->clock_task, env->sd); |
3173 | if (!tsk_cache_hot || | 3168 | if (!tsk_cache_hot || |
3174 | sd->nr_balance_failed > sd->cache_nice_tries) { | 3169 | env->sd->nr_balance_failed > env->sd->cache_nice_tries) { |
3175 | #ifdef CONFIG_SCHEDSTATS | 3170 | #ifdef CONFIG_SCHEDSTATS |
3176 | if (tsk_cache_hot) { | 3171 | if (tsk_cache_hot) { |
3177 | schedstat_inc(sd, lb_hot_gained[idle]); | 3172 | schedstat_inc(env->sd, lb_hot_gained[env->idle]); |
3178 | schedstat_inc(p, se.statistics.nr_forced_migrations); | 3173 | schedstat_inc(p, se.statistics.nr_forced_migrations); |
3179 | } | 3174 | } |
3180 | #endif | 3175 | #endif |
@@ -3195,65 +3190,80 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | |||
3195 | * | 3190 | * |
3196 | * Called with both runqueues locked. | 3191 | * Called with both runqueues locked. |
3197 | */ | 3192 | */ |
3198 | static int | 3193 | static int move_one_task(struct lb_env *env) |
3199 | move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
3200 | struct sched_domain *sd, enum cpu_idle_type idle) | ||
3201 | { | 3194 | { |
3202 | struct task_struct *p, *n; | 3195 | struct task_struct *p, *n; |
3203 | struct cfs_rq *cfs_rq; | ||
3204 | int pinned = 0; | ||
3205 | 3196 | ||
3206 | for_each_leaf_cfs_rq(busiest, cfs_rq) { | 3197 | list_for_each_entry_safe(p, n, &env->src_rq->cfs_tasks, se.group_node) { |
3207 | list_for_each_entry_safe(p, n, &cfs_rq->tasks, se.group_node) { | 3198 | if (throttled_lb_pair(task_group(p), env->src_rq->cpu, env->dst_cpu)) |
3208 | if (throttled_lb_pair(task_group(p), | 3199 | continue; |
3209 | busiest->cpu, this_cpu)) | ||
3210 | break; | ||
3211 | 3200 | ||
3212 | if (!can_migrate_task(p, busiest, this_cpu, | 3201 | if (!can_migrate_task(p, env)) |
3213 | sd, idle, &pinned)) | 3202 | continue; |
3214 | continue; | ||
3215 | 3203 | ||
3216 | pull_task(busiest, p, this_rq, this_cpu); | 3204 | move_task(p, env); |
3217 | /* | 3205 | /* |
3218 | * Right now, this is only the second place pull_task() | 3206 | * Right now, this is only the second place move_task() |
3219 | * is called, so we can safely collect pull_task() | 3207 | * is called, so we can safely collect move_task() |
3220 | * stats here rather than inside pull_task(). | 3208 | * stats here rather than inside move_task(). |
3221 | */ | 3209 | */ |
3222 | schedstat_inc(sd, lb_gained[idle]); | 3210 | schedstat_inc(env->sd, lb_gained[env->idle]); |
3223 | return 1; | 3211 | return 1; |
3224 | } | ||
3225 | } | 3212 | } |
3226 | |||
3227 | return 0; | 3213 | return 0; |
3228 | } | 3214 | } |
3229 | 3215 | ||
3230 | static unsigned long | 3216 | static unsigned long task_h_load(struct task_struct *p); |
3231 | balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | 3217 | |
3232 | unsigned long max_load_move, struct sched_domain *sd, | 3218 | /* |
3233 | enum cpu_idle_type idle, int *lb_flags, | 3219 | * move_tasks tries to move up to load_move weighted load from busiest to |
3234 | struct cfs_rq *busiest_cfs_rq) | 3220 | * this_rq, as part of a balancing operation within domain "sd". |
3221 | * Returns 1 if successful and 0 otherwise. | ||
3222 | * | ||
3223 | * Called with both runqueues locked. | ||
3224 | */ | ||
3225 | static int move_tasks(struct lb_env *env) | ||
3235 | { | 3226 | { |
3236 | int loops = 0, pulled = 0; | 3227 | struct list_head *tasks = &env->src_rq->cfs_tasks; |
3237 | long rem_load_move = max_load_move; | 3228 | struct task_struct *p; |
3238 | struct task_struct *p, *n; | 3229 | unsigned long load; |
3230 | int pulled = 0; | ||
3231 | |||
3232 | if (env->load_move <= 0) | ||
3233 | return 0; | ||
3239 | 3234 | ||
3240 | if (max_load_move == 0) | 3235 | while (!list_empty(tasks)) { |
3241 | goto out; | 3236 | p = list_first_entry(tasks, struct task_struct, se.group_node); |
3242 | 3237 | ||
3243 | list_for_each_entry_safe(p, n, &busiest_cfs_rq->tasks, se.group_node) { | 3238 | env->loop++; |
3244 | if (loops++ > sysctl_sched_nr_migrate) { | 3239 | /* We've more or less seen every task there is, call it quits */ |
3245 | *lb_flags |= LBF_NEED_BREAK; | 3240 | if (env->loop > env->loop_max) |
3241 | break; | ||
3242 | |||
3243 | /* take a breather every nr_migrate tasks */ | ||
3244 | if (env->loop > env->loop_break) { | ||
3245 | env->loop_break += sysctl_sched_nr_migrate; | ||
3246 | env->flags |= LBF_NEED_BREAK; | ||
3246 | break; | 3247 | break; |
3247 | } | 3248 | } |
3248 | 3249 | ||
3249 | if ((p->se.load.weight >> 1) > rem_load_move || | 3250 | if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu)) |
3250 | !can_migrate_task(p, busiest, this_cpu, sd, idle, | 3251 | goto next; |
3251 | lb_flags)) | 3252 | |
3252 | continue; | 3253 | load = task_h_load(p); |
3254 | |||
3255 | if (load < 16 && !env->sd->nr_balance_failed) | ||
3256 | goto next; | ||
3257 | |||
3258 | if ((load / 2) > env->load_move) | ||
3259 | goto next; | ||
3253 | 3260 | ||
3254 | pull_task(busiest, p, this_rq, this_cpu); | 3261 | if (!can_migrate_task(p, env)) |
3262 | goto next; | ||
3263 | |||
3264 | move_task(p, env); | ||
3255 | pulled++; | 3265 | pulled++; |
3256 | rem_load_move -= p->se.load.weight; | 3266 | env->load_move -= load; |
3257 | 3267 | ||
3258 | #ifdef CONFIG_PREEMPT | 3268 | #ifdef CONFIG_PREEMPT |
3259 | /* | 3269 | /* |
@@ -3261,28 +3271,30 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
3261 | * kernels will stop after the first task is pulled to minimize | 3271 | * kernels will stop after the first task is pulled to minimize |
3262 | * the critical section. | 3272 | * the critical section. |
3263 | */ | 3273 | */ |
3264 | if (idle == CPU_NEWLY_IDLE) { | 3274 | if (env->idle == CPU_NEWLY_IDLE) |
3265 | *lb_flags |= LBF_ABORT; | ||
3266 | break; | 3275 | break; |
3267 | } | ||
3268 | #endif | 3276 | #endif |
3269 | 3277 | ||
3270 | /* | 3278 | /* |
3271 | * We only want to steal up to the prescribed amount of | 3279 | * We only want to steal up to the prescribed amount of |
3272 | * weighted load. | 3280 | * weighted load. |
3273 | */ | 3281 | */ |
3274 | if (rem_load_move <= 0) | 3282 | if (env->load_move <= 0) |
3275 | break; | 3283 | break; |
3284 | |||
3285 | continue; | ||
3286 | next: | ||
3287 | list_move_tail(&p->se.group_node, tasks); | ||
3276 | } | 3288 | } |
3277 | out: | 3289 | |
3278 | /* | 3290 | /* |
3279 | * Right now, this is one of only two places pull_task() is called, | 3291 | * Right now, this is one of only two places move_task() is called, |
3280 | * so we can safely collect pull_task() stats here rather than | 3292 | * so we can safely collect move_task() stats here rather than |
3281 | * inside pull_task(). | 3293 | * inside move_task(). |
3282 | */ | 3294 | */ |
3283 | schedstat_add(sd, lb_gained[idle], pulled); | 3295 | schedstat_add(env->sd, lb_gained[env->idle], pulled); |
3284 | 3296 | ||
3285 | return max_load_move - rem_load_move; | 3297 | return pulled; |
3286 | } | 3298 | } |
3287 | 3299 | ||
3288 | #ifdef CONFIG_FAIR_GROUP_SCHED | 3300 | #ifdef CONFIG_FAIR_GROUP_SCHED |
@@ -3362,113 +3374,35 @@ static int tg_load_down(struct task_group *tg, void *data) | |||
3362 | 3374 | ||
3363 | static void update_h_load(long cpu) | 3375 | static void update_h_load(long cpu) |
3364 | { | 3376 | { |
3377 | rcu_read_lock(); | ||
3365 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); | 3378 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); |
3379 | rcu_read_unlock(); | ||
3366 | } | 3380 | } |
3367 | 3381 | ||
3368 | static unsigned long | 3382 | static unsigned long task_h_load(struct task_struct *p) |
3369 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
3370 | unsigned long max_load_move, | ||
3371 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
3372 | int *lb_flags) | ||
3373 | { | 3383 | { |
3374 | long rem_load_move = max_load_move; | 3384 | struct cfs_rq *cfs_rq = task_cfs_rq(p); |
3375 | struct cfs_rq *busiest_cfs_rq; | 3385 | unsigned long load; |
3376 | |||
3377 | rcu_read_lock(); | ||
3378 | update_h_load(cpu_of(busiest)); | ||
3379 | |||
3380 | for_each_leaf_cfs_rq(busiest, busiest_cfs_rq) { | ||
3381 | unsigned long busiest_h_load = busiest_cfs_rq->h_load; | ||
3382 | unsigned long busiest_weight = busiest_cfs_rq->load.weight; | ||
3383 | u64 rem_load, moved_load; | ||
3384 | |||
3385 | if (*lb_flags & (LBF_NEED_BREAK|LBF_ABORT)) | ||
3386 | break; | ||
3387 | |||
3388 | /* | ||
3389 | * empty group or part of a throttled hierarchy | ||
3390 | */ | ||
3391 | if (!busiest_cfs_rq->task_weight || | ||
3392 | throttled_lb_pair(busiest_cfs_rq->tg, cpu_of(busiest), this_cpu)) | ||
3393 | continue; | ||
3394 | |||
3395 | rem_load = (u64)rem_load_move * busiest_weight; | ||
3396 | rem_load = div_u64(rem_load, busiest_h_load + 1); | ||
3397 | |||
3398 | moved_load = balance_tasks(this_rq, this_cpu, busiest, | ||
3399 | rem_load, sd, idle, lb_flags, | ||
3400 | busiest_cfs_rq); | ||
3401 | |||
3402 | if (!moved_load) | ||
3403 | continue; | ||
3404 | 3386 | ||
3405 | moved_load *= busiest_h_load; | 3387 | load = p->se.load.weight; |
3406 | moved_load = div_u64(moved_load, busiest_weight + 1); | 3388 | load = div_u64(load * cfs_rq->h_load, cfs_rq->load.weight + 1); |
3407 | 3389 | ||
3408 | rem_load_move -= moved_load; | 3390 | return load; |
3409 | if (rem_load_move < 0) | ||
3410 | break; | ||
3411 | } | ||
3412 | rcu_read_unlock(); | ||
3413 | |||
3414 | return max_load_move - rem_load_move; | ||
3415 | } | 3391 | } |
3416 | #else | 3392 | #else |
3417 | static inline void update_shares(int cpu) | 3393 | static inline void update_shares(int cpu) |
3418 | { | 3394 | { |
3419 | } | 3395 | } |
3420 | 3396 | ||
3421 | static unsigned long | 3397 | static inline void update_h_load(long cpu) |
3422 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
3423 | unsigned long max_load_move, | ||
3424 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
3425 | int *lb_flags) | ||
3426 | { | 3398 | { |
3427 | return balance_tasks(this_rq, this_cpu, busiest, | ||
3428 | max_load_move, sd, idle, lb_flags, | ||
3429 | &busiest->cfs); | ||
3430 | } | 3399 | } |
3431 | #endif | ||
3432 | 3400 | ||
3433 | /* | 3401 | static unsigned long task_h_load(struct task_struct *p) |
3434 | * move_tasks tries to move up to max_load_move weighted load from busiest to | ||
3435 | * this_rq, as part of a balancing operation within domain "sd". | ||
3436 | * Returns 1 if successful and 0 otherwise. | ||
3437 | * | ||
3438 | * Called with both runqueues locked. | ||
3439 | */ | ||
3440 | static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
3441 | unsigned long max_load_move, | ||
3442 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
3443 | int *lb_flags) | ||
3444 | { | 3402 | { |
3445 | unsigned long total_load_moved = 0, load_moved; | 3403 | return p->se.load.weight; |
3446 | |||
3447 | do { | ||
3448 | load_moved = load_balance_fair(this_rq, this_cpu, busiest, | ||
3449 | max_load_move - total_load_moved, | ||
3450 | sd, idle, lb_flags); | ||
3451 | |||
3452 | total_load_moved += load_moved; | ||
3453 | |||
3454 | if (*lb_flags & (LBF_NEED_BREAK|LBF_ABORT)) | ||
3455 | break; | ||
3456 | |||
3457 | #ifdef CONFIG_PREEMPT | ||
3458 | /* | ||
3459 | * NEWIDLE balancing is a source of latency, so preemptible | ||
3460 | * kernels will stop after the first task is pulled to minimize | ||
3461 | * the critical section. | ||
3462 | */ | ||
3463 | if (idle == CPU_NEWLY_IDLE && this_rq->nr_running) { | ||
3464 | *lb_flags |= LBF_ABORT; | ||
3465 | break; | ||
3466 | } | ||
3467 | #endif | ||
3468 | } while (load_moved && max_load_move > total_load_moved); | ||
3469 | |||
3470 | return total_load_moved > 0; | ||
3471 | } | 3404 | } |
3405 | #endif | ||
3472 | 3406 | ||
3473 | /********** Helpers for find_busiest_group ************************/ | 3407 | /********** Helpers for find_busiest_group ************************/ |
3474 | /* | 3408 | /* |
@@ -3778,6 +3712,11 @@ void update_group_power(struct sched_domain *sd, int cpu) | |||
3778 | struct sched_domain *child = sd->child; | 3712 | struct sched_domain *child = sd->child; |
3779 | struct sched_group *group, *sdg = sd->groups; | 3713 | struct sched_group *group, *sdg = sd->groups; |
3780 | unsigned long power; | 3714 | unsigned long power; |
3715 | unsigned long interval; | ||
3716 | |||
3717 | interval = msecs_to_jiffies(sd->balance_interval); | ||
3718 | interval = clamp(interval, 1UL, max_load_balance_interval); | ||
3719 | sdg->sgp->next_update = jiffies + interval; | ||
3781 | 3720 | ||
3782 | if (!child) { | 3721 | if (!child) { |
3783 | update_cpu_power(sd, cpu); | 3722 | update_cpu_power(sd, cpu); |
@@ -3885,12 +3824,15 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
3885 | * domains. In the newly idle case, we will allow all the cpu's | 3824 | * domains. In the newly idle case, we will allow all the cpu's |
3886 | * to do the newly idle load balance. | 3825 | * to do the newly idle load balance. |
3887 | */ | 3826 | */ |
3888 | if (idle != CPU_NEWLY_IDLE && local_group) { | 3827 | if (local_group) { |
3889 | if (balance_cpu != this_cpu) { | 3828 | if (idle != CPU_NEWLY_IDLE) { |
3890 | *balance = 0; | 3829 | if (balance_cpu != this_cpu) { |
3891 | return; | 3830 | *balance = 0; |
3892 | } | 3831 | return; |
3893 | update_group_power(sd, this_cpu); | 3832 | } |
3833 | update_group_power(sd, this_cpu); | ||
3834 | } else if (time_after_eq(jiffies, group->sgp->next_update)) | ||
3835 | update_group_power(sd, this_cpu); | ||
3894 | } | 3836 | } |
3895 | 3837 | ||
3896 | /* Adjust by relative CPU power of the group */ | 3838 | /* Adjust by relative CPU power of the group */ |
@@ -4453,13 +4395,21 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
4453 | struct sched_domain *sd, enum cpu_idle_type idle, | 4395 | struct sched_domain *sd, enum cpu_idle_type idle, |
4454 | int *balance) | 4396 | int *balance) |
4455 | { | 4397 | { |
4456 | int ld_moved, lb_flags = 0, active_balance = 0; | 4398 | int ld_moved, active_balance = 0; |
4457 | struct sched_group *group; | 4399 | struct sched_group *group; |
4458 | unsigned long imbalance; | 4400 | unsigned long imbalance; |
4459 | struct rq *busiest; | 4401 | struct rq *busiest; |
4460 | unsigned long flags; | 4402 | unsigned long flags; |
4461 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | 4403 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); |
4462 | 4404 | ||
4405 | struct lb_env env = { | ||
4406 | .sd = sd, | ||
4407 | .dst_cpu = this_cpu, | ||
4408 | .dst_rq = this_rq, | ||
4409 | .idle = idle, | ||
4410 | .loop_break = sysctl_sched_nr_migrate, | ||
4411 | }; | ||
4412 | |||
4463 | cpumask_copy(cpus, cpu_active_mask); | 4413 | cpumask_copy(cpus, cpu_active_mask); |
4464 | 4414 | ||
4465 | schedstat_inc(sd, lb_count[idle]); | 4415 | schedstat_inc(sd, lb_count[idle]); |
@@ -4494,32 +4444,34 @@ redo: | |||
4494 | * still unbalanced. ld_moved simply stays zero, so it is | 4444 | * still unbalanced. ld_moved simply stays zero, so it is |
4495 | * correctly treated as an imbalance. | 4445 | * correctly treated as an imbalance. |
4496 | */ | 4446 | */ |
4497 | lb_flags |= LBF_ALL_PINNED; | 4447 | env.flags |= LBF_ALL_PINNED; |
4448 | env.load_move = imbalance; | ||
4449 | env.src_cpu = busiest->cpu; | ||
4450 | env.src_rq = busiest; | ||
4451 | env.loop_max = busiest->nr_running; | ||
4452 | |||
4453 | more_balance: | ||
4498 | local_irq_save(flags); | 4454 | local_irq_save(flags); |
4499 | double_rq_lock(this_rq, busiest); | 4455 | double_rq_lock(this_rq, busiest); |
4500 | ld_moved = move_tasks(this_rq, this_cpu, busiest, | 4456 | if (!env.loop) |
4501 | imbalance, sd, idle, &lb_flags); | 4457 | update_h_load(env.src_cpu); |
4458 | ld_moved += move_tasks(&env); | ||
4502 | double_rq_unlock(this_rq, busiest); | 4459 | double_rq_unlock(this_rq, busiest); |
4503 | local_irq_restore(flags); | 4460 | local_irq_restore(flags); |
4504 | 4461 | ||
4462 | if (env.flags & LBF_NEED_BREAK) { | ||
4463 | env.flags &= ~LBF_NEED_BREAK; | ||
4464 | goto more_balance; | ||
4465 | } | ||
4466 | |||
4505 | /* | 4467 | /* |
4506 | * some other cpu did the load balance for us. | 4468 | * some other cpu did the load balance for us. |
4507 | */ | 4469 | */ |
4508 | if (ld_moved && this_cpu != smp_processor_id()) | 4470 | if (ld_moved && this_cpu != smp_processor_id()) |
4509 | resched_cpu(this_cpu); | 4471 | resched_cpu(this_cpu); |
4510 | 4472 | ||
4511 | if (lb_flags & LBF_ABORT) | ||
4512 | goto out_balanced; | ||
4513 | |||
4514 | if (lb_flags & LBF_NEED_BREAK) { | ||
4515 | lb_flags += LBF_HAD_BREAK - LBF_NEED_BREAK; | ||
4516 | if (lb_flags & LBF_ABORT) | ||
4517 | goto out_balanced; | ||
4518 | goto redo; | ||
4519 | } | ||
4520 | |||
4521 | /* All tasks on this runqueue were pinned by CPU affinity */ | 4473 | /* All tasks on this runqueue were pinned by CPU affinity */ |
4522 | if (unlikely(lb_flags & LBF_ALL_PINNED)) { | 4474 | if (unlikely(env.flags & LBF_ALL_PINNED)) { |
4523 | cpumask_clear_cpu(cpu_of(busiest), cpus); | 4475 | cpumask_clear_cpu(cpu_of(busiest), cpus); |
4524 | if (!cpumask_empty(cpus)) | 4476 | if (!cpumask_empty(cpus)) |
4525 | goto redo; | 4477 | goto redo; |
@@ -4549,7 +4501,7 @@ redo: | |||
4549 | tsk_cpus_allowed(busiest->curr))) { | 4501 | tsk_cpus_allowed(busiest->curr))) { |
4550 | raw_spin_unlock_irqrestore(&busiest->lock, | 4502 | raw_spin_unlock_irqrestore(&busiest->lock, |
4551 | flags); | 4503 | flags); |
4552 | lb_flags |= LBF_ALL_PINNED; | 4504 | env.flags |= LBF_ALL_PINNED; |
4553 | goto out_one_pinned; | 4505 | goto out_one_pinned; |
4554 | } | 4506 | } |
4555 | 4507 | ||
@@ -4602,7 +4554,7 @@ out_balanced: | |||
4602 | 4554 | ||
4603 | out_one_pinned: | 4555 | out_one_pinned: |
4604 | /* tune up the balancing interval */ | 4556 | /* tune up the balancing interval */ |
4605 | if (((lb_flags & LBF_ALL_PINNED) && | 4557 | if (((env.flags & LBF_ALL_PINNED) && |
4606 | sd->balance_interval < MAX_PINNED_INTERVAL) || | 4558 | sd->balance_interval < MAX_PINNED_INTERVAL) || |
4607 | (sd->balance_interval < sd->max_interval)) | 4559 | (sd->balance_interval < sd->max_interval)) |
4608 | sd->balance_interval *= 2; | 4560 | sd->balance_interval *= 2; |
@@ -4712,10 +4664,18 @@ static int active_load_balance_cpu_stop(void *data) | |||
4712 | } | 4664 | } |
4713 | 4665 | ||
4714 | if (likely(sd)) { | 4666 | if (likely(sd)) { |
4667 | struct lb_env env = { | ||
4668 | .sd = sd, | ||
4669 | .dst_cpu = target_cpu, | ||
4670 | .dst_rq = target_rq, | ||
4671 | .src_cpu = busiest_rq->cpu, | ||
4672 | .src_rq = busiest_rq, | ||
4673 | .idle = CPU_IDLE, | ||
4674 | }; | ||
4675 | |||
4715 | schedstat_inc(sd, alb_count); | 4676 | schedstat_inc(sd, alb_count); |
4716 | 4677 | ||
4717 | if (move_one_task(target_rq, target_cpu, busiest_rq, | 4678 | if (move_one_task(&env)) |
4718 | sd, CPU_IDLE)) | ||
4719 | schedstat_inc(sd, alb_pushed); | 4679 | schedstat_inc(sd, alb_pushed); |
4720 | else | 4680 | else |
4721 | schedstat_inc(sd, alb_failed); | 4681 | schedstat_inc(sd, alb_failed); |
@@ -4947,8 +4907,6 @@ static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb, | |||
4947 | 4907 | ||
4948 | static DEFINE_SPINLOCK(balancing); | 4908 | static DEFINE_SPINLOCK(balancing); |
4949 | 4909 | ||
4950 | static unsigned long __read_mostly max_load_balance_interval = HZ/10; | ||
4951 | |||
4952 | /* | 4910 | /* |
4953 | * Scale the max load_balance interval with the number of CPUs in the system. | 4911 | * Scale the max load_balance interval with the number of CPUs in the system. |
4954 | * This trades load-balance latency on larger machines for less cross talk. | 4912 | * This trades load-balance latency on larger machines for less cross talk. |
@@ -5342,7 +5300,6 @@ static void set_curr_task_fair(struct rq *rq) | |||
5342 | void init_cfs_rq(struct cfs_rq *cfs_rq) | 5300 | void init_cfs_rq(struct cfs_rq *cfs_rq) |
5343 | { | 5301 | { |
5344 | cfs_rq->tasks_timeline = RB_ROOT; | 5302 | cfs_rq->tasks_timeline = RB_ROOT; |
5345 | INIT_LIST_HEAD(&cfs_rq->tasks); | ||
5346 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); | 5303 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); |
5347 | #ifndef CONFIG_64BIT | 5304 | #ifndef CONFIG_64BIT |
5348 | cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; | 5305 | cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; |
@@ -5614,6 +5571,7 @@ __init void init_sched_fair_class(void) | |||
5614 | open_softirq(SCHED_SOFTIRQ, run_rebalance_domains); | 5571 | open_softirq(SCHED_SOFTIRQ, run_rebalance_domains); |
5615 | 5572 | ||
5616 | #ifdef CONFIG_NO_HZ | 5573 | #ifdef CONFIG_NO_HZ |
5574 | nohz.next_balance = jiffies; | ||
5617 | zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); | 5575 | zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); |
5618 | cpu_notifier(sched_ilb_notifier, 0); | 5576 | cpu_notifier(sched_ilb_notifier, 0); |
5619 | #endif | 5577 | #endif |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index f42ae7fb5ec5..b60dad720173 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
@@ -778,12 +778,9 @@ static inline int balance_runtime(struct rt_rq *rt_rq) | |||
778 | 778 | ||
779 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | 779 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) |
780 | { | 780 | { |
781 | int i, idle = 1; | 781 | int i, idle = 1, throttled = 0; |
782 | const struct cpumask *span; | 782 | const struct cpumask *span; |
783 | 783 | ||
784 | if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) | ||
785 | return 1; | ||
786 | |||
787 | span = sched_rt_period_mask(); | 784 | span = sched_rt_period_mask(); |
788 | for_each_cpu(i, span) { | 785 | for_each_cpu(i, span) { |
789 | int enqueue = 0; | 786 | int enqueue = 0; |
@@ -818,12 +815,17 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | |||
818 | if (!rt_rq_throttled(rt_rq)) | 815 | if (!rt_rq_throttled(rt_rq)) |
819 | enqueue = 1; | 816 | enqueue = 1; |
820 | } | 817 | } |
818 | if (rt_rq->rt_throttled) | ||
819 | throttled = 1; | ||
821 | 820 | ||
822 | if (enqueue) | 821 | if (enqueue) |
823 | sched_rt_rq_enqueue(rt_rq); | 822 | sched_rt_rq_enqueue(rt_rq); |
824 | raw_spin_unlock(&rq->lock); | 823 | raw_spin_unlock(&rq->lock); |
825 | } | 824 | } |
826 | 825 | ||
826 | if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)) | ||
827 | return 1; | ||
828 | |||
827 | return idle; | 829 | return idle; |
828 | } | 830 | } |
829 | 831 | ||
@@ -855,8 +857,30 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) | |||
855 | return 0; | 857 | return 0; |
856 | 858 | ||
857 | if (rt_rq->rt_time > runtime) { | 859 | if (rt_rq->rt_time > runtime) { |
858 | rt_rq->rt_throttled = 1; | 860 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); |
859 | printk_once(KERN_WARNING "sched: RT throttling activated\n"); | 861 | |
862 | /* | ||
863 | * Don't actually throttle groups that have no runtime assigned | ||
864 | * but accrue some time due to boosting. | ||
865 | */ | ||
866 | if (likely(rt_b->rt_runtime)) { | ||
867 | static bool once = false; | ||
868 | |||
869 | rt_rq->rt_throttled = 1; | ||
870 | |||
871 | if (!once) { | ||
872 | once = true; | ||
873 | printk_sched("sched: RT throttling activated\n"); | ||
874 | } | ||
875 | } else { | ||
876 | /* | ||
877 | * In case we did anyway, make it go away, | ||
878 | * replenishment is a joke, since it will replenish us | ||
879 | * with exactly 0 ns. | ||
880 | */ | ||
881 | rt_rq->rt_time = 0; | ||
882 | } | ||
883 | |||
860 | if (rt_rq_throttled(rt_rq)) { | 884 | if (rt_rq_throttled(rt_rq)) { |
861 | sched_rt_rq_dequeue(rt_rq); | 885 | sched_rt_rq_dequeue(rt_rq); |
862 | return 1; | 886 | return 1; |
@@ -884,7 +908,8 @@ static void update_curr_rt(struct rq *rq) | |||
884 | if (unlikely((s64)delta_exec < 0)) | 908 | if (unlikely((s64)delta_exec < 0)) |
885 | delta_exec = 0; | 909 | delta_exec = 0; |
886 | 910 | ||
887 | schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); | 911 | schedstat_set(curr->se.statistics.exec_max, |
912 | max(curr->se.statistics.exec_max, delta_exec)); | ||
888 | 913 | ||
889 | curr->se.sum_exec_runtime += delta_exec; | 914 | curr->se.sum_exec_runtime += delta_exec; |
890 | account_group_exec_runtime(curr, delta_exec); | 915 | account_group_exec_runtime(curr, delta_exec); |
@@ -1972,7 +1997,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) | |||
1972 | if (--p->rt.time_slice) | 1997 | if (--p->rt.time_slice) |
1973 | return; | 1998 | return; |
1974 | 1999 | ||
1975 | p->rt.time_slice = DEF_TIMESLICE; | 2000 | p->rt.time_slice = RR_TIMESLICE; |
1976 | 2001 | ||
1977 | /* | 2002 | /* |
1978 | * Requeue to the end of queue if we are not the only element | 2003 | * Requeue to the end of queue if we are not the only element |
@@ -2000,7 +2025,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) | |||
2000 | * Time slice is 0 for SCHED_FIFO tasks | 2025 | * Time slice is 0 for SCHED_FIFO tasks |
2001 | */ | 2026 | */ |
2002 | if (task->policy == SCHED_RR) | 2027 | if (task->policy == SCHED_RR) |
2003 | return DEF_TIMESLICE; | 2028 | return RR_TIMESLICE; |
2004 | else | 2029 | else |
2005 | return 0; | 2030 | return 0; |
2006 | } | 2031 | } |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b4cd6d8ea150..42b1f304b044 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -36,11 +36,7 @@ extern __read_mostly int scheduler_running; | |||
36 | 36 | ||
37 | /* | 37 | /* |
38 | * These are the 'tuning knobs' of the scheduler: | 38 | * These are the 'tuning knobs' of the scheduler: |
39 | * | ||
40 | * default timeslice is 100 msecs (used only for SCHED_RR tasks). | ||
41 | * Timeslices get refilled after they expire. | ||
42 | */ | 39 | */ |
43 | #define DEF_TIMESLICE (100 * HZ / 1000) | ||
44 | 40 | ||
45 | /* | 41 | /* |
46 | * single value that denotes runtime == period, ie unlimited time. | 42 | * single value that denotes runtime == period, ie unlimited time. |
@@ -216,9 +212,6 @@ struct cfs_rq { | |||
216 | struct rb_root tasks_timeline; | 212 | struct rb_root tasks_timeline; |
217 | struct rb_node *rb_leftmost; | 213 | struct rb_node *rb_leftmost; |
218 | 214 | ||
219 | struct list_head tasks; | ||
220 | struct list_head *balance_iterator; | ||
221 | |||
222 | /* | 215 | /* |
223 | * 'curr' points to currently running entity on this cfs_rq. | 216 | * 'curr' points to currently running entity on this cfs_rq. |
224 | * It is set to NULL otherwise (i.e when none are currently running). | 217 | * It is set to NULL otherwise (i.e when none are currently running). |
@@ -246,11 +239,6 @@ struct cfs_rq { | |||
246 | 239 | ||
247 | #ifdef CONFIG_SMP | 240 | #ifdef CONFIG_SMP |
248 | /* | 241 | /* |
249 | * the part of load.weight contributed by tasks | ||
250 | */ | ||
251 | unsigned long task_weight; | ||
252 | |||
253 | /* | ||
254 | * h_load = weight * f(tg) | 242 | * h_load = weight * f(tg) |
255 | * | 243 | * |
256 | * Where f(tg) is the recursive weight fraction assigned to | 244 | * Where f(tg) is the recursive weight fraction assigned to |
@@ -424,6 +412,8 @@ struct rq { | |||
424 | int cpu; | 412 | int cpu; |
425 | int online; | 413 | int online; |
426 | 414 | ||
415 | struct list_head cfs_tasks; | ||
416 | |||
427 | u64 rt_avg; | 417 | u64 rt_avg; |
428 | u64 age_stamp; | 418 | u64 age_stamp; |
429 | u64 idle_stamp; | 419 | u64 idle_stamp; |
@@ -462,7 +452,6 @@ struct rq { | |||
462 | unsigned int yld_count; | 452 | unsigned int yld_count; |
463 | 453 | ||
464 | /* schedule() stats */ | 454 | /* schedule() stats */ |
465 | unsigned int sched_switch; | ||
466 | unsigned int sched_count; | 455 | unsigned int sched_count; |
467 | unsigned int sched_goidle; | 456 | unsigned int sched_goidle; |
468 | 457 | ||
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index 2a581ba8e190..903ffa9e8872 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c | |||
@@ -32,9 +32,9 @@ static int show_schedstat(struct seq_file *seq, void *v) | |||
32 | 32 | ||
33 | /* runqueue-specific stats */ | 33 | /* runqueue-specific stats */ |
34 | seq_printf(seq, | 34 | seq_printf(seq, |
35 | "cpu%d %u %u %u %u %u %u %llu %llu %lu", | 35 | "cpu%d %u 0 %u %u %u %u %llu %llu %lu", |
36 | cpu, rq->yld_count, | 36 | cpu, rq->yld_count, |
37 | rq->sched_switch, rq->sched_count, rq->sched_goidle, | 37 | rq->sched_count, rq->sched_goidle, |
38 | rq->ttwu_count, rq->ttwu_local, | 38 | rq->ttwu_count, rq->ttwu_local, |
39 | rq->rq_cpu_time, | 39 | rq->rq_cpu_time, |
40 | rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount); | 40 | rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount); |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 8afc6a8d4d7c..15352e0cbd5d 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -343,7 +343,7 @@ void irq_exit(void) | |||
343 | tick_nohz_irq_exit(); | 343 | tick_nohz_irq_exit(); |
344 | #endif | 344 | #endif |
345 | rcu_irq_exit(); | 345 | rcu_irq_exit(); |
346 | preempt_enable_no_resched(); | 346 | sched_preempt_enable_no_resched(); |
347 | } | 347 | } |
348 | 348 | ||
349 | /* | 349 | /* |
@@ -740,9 +740,7 @@ static int run_ksoftirqd(void * __bind_cpu) | |||
740 | while (!kthread_should_stop()) { | 740 | while (!kthread_should_stop()) { |
741 | preempt_disable(); | 741 | preempt_disable(); |
742 | if (!local_softirq_pending()) { | 742 | if (!local_softirq_pending()) { |
743 | preempt_enable_no_resched(); | 743 | schedule_preempt_disabled(); |
744 | schedule(); | ||
745 | preempt_disable(); | ||
746 | } | 744 | } |
747 | 745 | ||
748 | __set_current_state(TASK_RUNNING); | 746 | __set_current_state(TASK_RUNNING); |
@@ -757,7 +755,7 @@ static int run_ksoftirqd(void * __bind_cpu) | |||
757 | if (local_softirq_pending()) | 755 | if (local_softirq_pending()) |
758 | __do_softirq(); | 756 | __do_softirq(); |
759 | local_irq_enable(); | 757 | local_irq_enable(); |
760 | preempt_enable_no_resched(); | 758 | sched_preempt_enable_no_resched(); |
761 | cond_resched(); | 759 | cond_resched(); |
762 | preempt_disable(); | 760 | preempt_disable(); |
763 | rcu_note_context_switch((long)__bind_cpu); | 761 | rcu_note_context_switch((long)__bind_cpu); |