aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/auditsc.c2
-rw-r--r--kernel/futex.c1
-rw-r--r--kernel/irq/manage.c2
-rw-r--r--kernel/module.c3
-rw-r--r--kernel/posix-timers.c9
-rw-r--r--kernel/printk.c13
-rw-r--r--kernel/sched.c88
-rw-r--r--kernel/sched_debug.c3
-rw-r--r--kernel/sched_fair.c106
-rw-r--r--kernel/sched_rt.c11
-rw-r--r--kernel/signal.c4
-rw-r--r--kernel/sysctl.c34
-rw-r--r--kernel/workqueue.c2
13 files changed, 199 insertions, 79 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 3401293359e8..04f3ffb8d9d4 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2023,7 +2023,7 @@ int __audit_signal_info(int sig, struct task_struct *t)
2023 axp->d.next = ctx->aux_pids; 2023 axp->d.next = ctx->aux_pids;
2024 ctx->aux_pids = (void *)axp; 2024 ctx->aux_pids = (void *)axp;
2025 } 2025 }
2026 BUG_ON(axp->pid_count > AUDIT_AUX_PIDS); 2026 BUG_ON(axp->pid_count >= AUDIT_AUX_PIDS);
2027 2027
2028 axp->target_pid[axp->pid_count] = t->tgid; 2028 axp->target_pid[axp->pid_count] = t->tgid;
2029 selinux_get_task_sid(t, &axp->target_sid[axp->pid_count]); 2029 selinux_get_task_sid(t, &axp->target_sid[axp->pid_count]);
diff --git a/kernel/futex.c b/kernel/futex.c
index 3415e9ad1391..e8935b195e88 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1670,6 +1670,7 @@ pi_faulted:
1670 attempt); 1670 attempt);
1671 if (ret) 1671 if (ret)
1672 goto out; 1672 goto out;
1673 uval = 0;
1673 goto retry_unlocked; 1674 goto retry_unlocked;
1674 } 1675 }
1675 1676
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 203a518b6f14..853aefbd184b 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -462,7 +462,9 @@ void free_irq(unsigned int irq, void *dev_id)
462 * We do this after actually deregistering it, to make sure that 462 * We do this after actually deregistering it, to make sure that
463 * a 'real' IRQ doesn't run in parallel with our fake 463 * a 'real' IRQ doesn't run in parallel with our fake
464 */ 464 */
465 local_irq_save(flags);
465 handler(irq, dev_id); 466 handler(irq, dev_id);
467 local_irq_restore(flags);
466 } 468 }
467#endif 469#endif
468} 470}
diff --git a/kernel/module.c b/kernel/module.c
index 33c04ad51175..db0ead0363e2 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -784,8 +784,7 @@ EXPORT_SYMBOL_GPL(symbol_put_addr);
784static ssize_t show_refcnt(struct module_attribute *mattr, 784static ssize_t show_refcnt(struct module_attribute *mattr,
785 struct module *mod, char *buffer) 785 struct module *mod, char *buffer)
786{ 786{
787 /* sysfs holds a reference */ 787 return sprintf(buffer, "%u\n", module_refcount(mod));
788 return sprintf(buffer, "%u\n", module_refcount(mod)-1);
789} 788}
790 789
791static struct module_attribute refcnt = { 790static struct module_attribute refcnt = {
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 55b3761edaa9..7a15afb73ed0 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -547,9 +547,9 @@ sys_timer_create(const clockid_t which_clock,
547 new_timer->it_process = process; 547 new_timer->it_process = process;
548 list_add(&new_timer->list, 548 list_add(&new_timer->list,
549 &process->signal->posix_timers); 549 &process->signal->posix_timers);
550 spin_unlock_irqrestore(&process->sighand->siglock, flags);
551 if (new_timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) 550 if (new_timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
552 get_task_struct(process); 551 get_task_struct(process);
552 spin_unlock_irqrestore(&process->sighand->siglock, flags);
553 } else { 553 } else {
554 spin_unlock_irqrestore(&process->sighand->siglock, flags); 554 spin_unlock_irqrestore(&process->sighand->siglock, flags);
555 process = NULL; 555 process = NULL;
@@ -605,13 +605,14 @@ static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags)
605 timr = (struct k_itimer *) idr_find(&posix_timers_id, (int) timer_id); 605 timr = (struct k_itimer *) idr_find(&posix_timers_id, (int) timer_id);
606 if (timr) { 606 if (timr) {
607 spin_lock(&timr->it_lock); 607 spin_lock(&timr->it_lock);
608 spin_unlock(&idr_lock);
609 608
610 if ((timr->it_id != timer_id) || !(timr->it_process) || 609 if ((timr->it_id != timer_id) || !(timr->it_process) ||
611 timr->it_process->tgid != current->tgid) { 610 timr->it_process->tgid != current->tgid) {
612 unlock_timer(timr, *flags); 611 spin_unlock(&timr->it_lock);
612 spin_unlock_irqrestore(&idr_lock, *flags);
613 timr = NULL; 613 timr = NULL;
614 } 614 } else
615 spin_unlock(&idr_lock);
615 } else 616 } else
616 spin_unlock_irqrestore(&idr_lock, *flags); 617 spin_unlock_irqrestore(&idr_lock, *flags);
617 618
diff --git a/kernel/printk.c b/kernel/printk.c
index bd2cd062878d..8451dfc31d25 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1083,6 +1083,19 @@ int unregister_console(struct console *console)
1083} 1083}
1084EXPORT_SYMBOL(unregister_console); 1084EXPORT_SYMBOL(unregister_console);
1085 1085
1086static int __init disable_boot_consoles(void)
1087{
1088 if (console_drivers != NULL) {
1089 if (console_drivers->flags & CON_BOOT) {
1090 printk(KERN_INFO "turn off boot console %s%d\n",
1091 console_drivers->name, console_drivers->index);
1092 return unregister_console(console_drivers);
1093 }
1094 }
1095 return 0;
1096}
1097late_initcall(disable_boot_consoles);
1098
1086/** 1099/**
1087 * tty_write_message - write a message to a certain tty, not just the console. 1100 * tty_write_message - write a message to a certain tty, not just the console.
1088 * @tty: the destination tty_struct 1101 * @tty: the destination tty_struct
diff --git a/kernel/sched.c b/kernel/sched.c
index 45e17b83b7f1..9fe473a190de 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -262,7 +262,8 @@ struct rq {
262 s64 clock_max_delta; 262 s64 clock_max_delta;
263 263
264 unsigned int clock_warps, clock_overflows; 264 unsigned int clock_warps, clock_overflows;
265 unsigned int clock_unstable_events; 265 u64 idle_clock;
266 unsigned int clock_deep_idle_events;
266 u64 tick_timestamp; 267 u64 tick_timestamp;
267 268
268 atomic_t nr_iowait; 269 atomic_t nr_iowait;
@@ -556,18 +557,40 @@ static inline struct rq *this_rq_lock(void)
556} 557}
557 558
558/* 559/*
559 * CPU frequency is/was unstable - start new by setting prev_clock_raw: 560 * We are going deep-idle (irqs are disabled):
560 */ 561 */
561void sched_clock_unstable_event(void) 562void sched_clock_idle_sleep_event(void)
562{ 563{
563 unsigned long flags; 564 struct rq *rq = cpu_rq(smp_processor_id());
564 struct rq *rq;
565 565
566 rq = task_rq_lock(current, &flags); 566 spin_lock(&rq->lock);
567 rq->prev_clock_raw = sched_clock(); 567 __update_rq_clock(rq);
568 rq->clock_unstable_events++; 568 spin_unlock(&rq->lock);
569 task_rq_unlock(rq, &flags); 569 rq->clock_deep_idle_events++;
570}
571EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
572
573/*
574 * We just idled delta nanoseconds (called with irqs disabled):
575 */
576void sched_clock_idle_wakeup_event(u64 delta_ns)
577{
578 struct rq *rq = cpu_rq(smp_processor_id());
579 u64 now = sched_clock();
580
581 rq->idle_clock += delta_ns;
582 /*
583 * Override the previous timestamp and ignore all
584 * sched_clock() deltas that occured while we idled,
585 * and use the PM-provided delta_ns to advance the
586 * rq clock:
587 */
588 spin_lock(&rq->lock);
589 rq->prev_clock_raw = now;
590 rq->clock += delta_ns;
591 spin_unlock(&rq->lock);
570} 592}
593EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
571 594
572/* 595/*
573 * resched_task - mark a task 'to be rescheduled now'. 596 * resched_task - mark a task 'to be rescheduled now'.
@@ -2157,12 +2180,6 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
2157 if (task_running(rq, p)) 2180 if (task_running(rq, p))
2158 return 0; 2181 return 0;
2159 2182
2160 /*
2161 * Aggressive migration if too many balance attempts have failed:
2162 */
2163 if (sd->nr_balance_failed > sd->cache_nice_tries)
2164 return 1;
2165
2166 return 1; 2183 return 1;
2167} 2184}
2168 2185
@@ -2494,7 +2511,7 @@ group_next:
2494 * a think about bumping its value to force at least one task to be 2511 * a think about bumping its value to force at least one task to be
2495 * moved 2512 * moved
2496 */ 2513 */
2497 if (*imbalance + SCHED_LOAD_SCALE_FUZZ < busiest_load_per_task/2) { 2514 if (*imbalance + SCHED_LOAD_SCALE_FUZZ < busiest_load_per_task) {
2498 unsigned long tmp, pwr_now, pwr_move; 2515 unsigned long tmp, pwr_now, pwr_move;
2499 unsigned int imbn; 2516 unsigned int imbn;
2500 2517
@@ -3020,6 +3037,7 @@ static inline void rebalance_domains(int cpu, enum cpu_idle_type idle)
3020 struct sched_domain *sd; 3037 struct sched_domain *sd;
3021 /* Earliest time when we have to do rebalance again */ 3038 /* Earliest time when we have to do rebalance again */
3022 unsigned long next_balance = jiffies + 60*HZ; 3039 unsigned long next_balance = jiffies + 60*HZ;
3040 int update_next_balance = 0;
3023 3041
3024 for_each_domain(cpu, sd) { 3042 for_each_domain(cpu, sd) {
3025 if (!(sd->flags & SD_LOAD_BALANCE)) 3043 if (!(sd->flags & SD_LOAD_BALANCE))
@@ -3056,8 +3074,10 @@ static inline void rebalance_domains(int cpu, enum cpu_idle_type idle)
3056 if (sd->flags & SD_SERIALIZE) 3074 if (sd->flags & SD_SERIALIZE)
3057 spin_unlock(&balancing); 3075 spin_unlock(&balancing);
3058out: 3076out:
3059 if (time_after(next_balance, sd->last_balance + interval)) 3077 if (time_after(next_balance, sd->last_balance + interval)) {
3060 next_balance = sd->last_balance + interval; 3078 next_balance = sd->last_balance + interval;
3079 update_next_balance = 1;
3080 }
3061 3081
3062 /* 3082 /*
3063 * Stop the load balance at this level. There is another 3083 * Stop the load balance at this level. There is another
@@ -3067,7 +3087,14 @@ out:
3067 if (!balance) 3087 if (!balance)
3068 break; 3088 break;
3069 } 3089 }
3070 rq->next_balance = next_balance; 3090
3091 /*
3092 * next_balance will be updated only when there is a need.
3093 * When the cpu is attached to null domain for ex, it will not be
3094 * updated.
3095 */
3096 if (likely(update_next_balance))
3097 rq->next_balance = next_balance;
3071} 3098}
3072 3099
3073/* 3100/*
@@ -4884,14 +4911,18 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
4884static inline void sched_init_granularity(void) 4911static inline void sched_init_granularity(void)
4885{ 4912{
4886 unsigned int factor = 1 + ilog2(num_online_cpus()); 4913 unsigned int factor = 1 + ilog2(num_online_cpus());
4887 const unsigned long gran_limit = 100000000; 4914 const unsigned long limit = 100000000;
4915
4916 sysctl_sched_min_granularity *= factor;
4917 if (sysctl_sched_min_granularity > limit)
4918 sysctl_sched_min_granularity = limit;
4888 4919
4889 sysctl_sched_granularity *= factor; 4920 sysctl_sched_latency *= factor;
4890 if (sysctl_sched_granularity > gran_limit) 4921 if (sysctl_sched_latency > limit)
4891 sysctl_sched_granularity = gran_limit; 4922 sysctl_sched_latency = limit;
4892 4923
4893 sysctl_sched_runtime_limit = sysctl_sched_granularity * 4; 4924 sysctl_sched_runtime_limit = sysctl_sched_latency;
4894 sysctl_sched_wakeup_granularity = sysctl_sched_granularity / 2; 4925 sysctl_sched_wakeup_granularity = sysctl_sched_min_granularity / 2;
4895} 4926}
4896 4927
4897#ifdef CONFIG_SMP 4928#ifdef CONFIG_SMP
@@ -5234,15 +5265,16 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
5234static struct ctl_table sd_ctl_dir[] = { 5265static struct ctl_table sd_ctl_dir[] = {
5235 { 5266 {
5236 .procname = "sched_domain", 5267 .procname = "sched_domain",
5237 .mode = 0755, 5268 .mode = 0555,
5238 }, 5269 },
5239 {0,}, 5270 {0,},
5240}; 5271};
5241 5272
5242static struct ctl_table sd_ctl_root[] = { 5273static struct ctl_table sd_ctl_root[] = {
5243 { 5274 {
5275 .ctl_name = CTL_KERN,
5244 .procname = "kernel", 5276 .procname = "kernel",
5245 .mode = 0755, 5277 .mode = 0555,
5246 .child = sd_ctl_dir, 5278 .child = sd_ctl_dir,
5247 }, 5279 },
5248 {0,}, 5280 {0,},
@@ -5318,7 +5350,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu)
5318 for_each_domain(cpu, sd) { 5350 for_each_domain(cpu, sd) {
5319 snprintf(buf, 32, "domain%d", i); 5351 snprintf(buf, 32, "domain%d", i);
5320 entry->procname = kstrdup(buf, GFP_KERNEL); 5352 entry->procname = kstrdup(buf, GFP_KERNEL);
5321 entry->mode = 0755; 5353 entry->mode = 0555;
5322 entry->child = sd_alloc_ctl_domain_table(sd); 5354 entry->child = sd_alloc_ctl_domain_table(sd);
5323 entry++; 5355 entry++;
5324 i++; 5356 i++;
@@ -5338,7 +5370,7 @@ static void init_sched_domain_sysctl(void)
5338 for (i = 0; i < cpu_num; i++, entry++) { 5370 for (i = 0; i < cpu_num; i++, entry++) {
5339 snprintf(buf, 32, "cpu%d", i); 5371 snprintf(buf, 32, "cpu%d", i);
5340 entry->procname = kstrdup(buf, GFP_KERNEL); 5372 entry->procname = kstrdup(buf, GFP_KERNEL);
5341 entry->mode = 0755; 5373 entry->mode = 0555;
5342 entry->child = sd_alloc_ctl_cpu_table(i); 5374 entry->child = sd_alloc_ctl_cpu_table(i);
5343 } 5375 }
5344 sd_sysctl_header = register_sysctl_table(sd_ctl_root); 5376 sd_sysctl_header = register_sysctl_table(sd_ctl_root);
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 87e524762b85..ab18f45f2ab2 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -154,10 +154,11 @@ static void print_cpu(struct seq_file *m, int cpu)
154 P(next_balance); 154 P(next_balance);
155 P(curr->pid); 155 P(curr->pid);
156 P(clock); 156 P(clock);
157 P(idle_clock);
157 P(prev_clock_raw); 158 P(prev_clock_raw);
158 P(clock_warps); 159 P(clock_warps);
159 P(clock_overflows); 160 P(clock_overflows);
160 P(clock_unstable_events); 161 P(clock_deep_idle_events);
161 P(clock_max_delta); 162 P(clock_max_delta);
162 P(cpu_load[0]); 163 P(cpu_load[0]);
163 P(cpu_load[1]); 164 P(cpu_load[1]);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index fedbb51bba96..ee3771850aaf 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -15,34 +15,42 @@
15 * 15 *
16 * Scaled math optimizations by Thomas Gleixner 16 * Scaled math optimizations by Thomas Gleixner
17 * Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de> 17 * Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de>
18 *
19 * Adaptive scheduling granularity, math enhancements by Peter Zijlstra
20 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
18 */ 21 */
19 22
20/* 23/*
21 * Preemption granularity: 24 * Targeted preemption latency for CPU-bound tasks:
22 * (default: 2 msec, units: nanoseconds) 25 * (default: 20ms, units: nanoseconds)
23 * 26 *
24 * NOTE: this granularity value is not the same as the concept of 27 * NOTE: this latency value is not the same as the concept of
25 * 'timeslice length' - timeslices in CFS will typically be somewhat 28 * 'timeslice length' - timeslices in CFS are of variable length.
26 * larger than this value. (to see the precise effective timeslice 29 * (to see the precise effective timeslice length of your workload,
27 * length of your workload, run vmstat and monitor the context-switches 30 * run vmstat and monitor the context-switches field)
28 * field)
29 * 31 *
30 * On SMP systems the value of this is multiplied by the log2 of the 32 * On SMP systems the value of this is multiplied by the log2 of the
31 * number of CPUs. (i.e. factor 2x on 2-way systems, 3x on 4-way 33 * number of CPUs. (i.e. factor 2x on 2-way systems, 3x on 4-way
32 * systems, 4x on 8-way systems, 5x on 16-way systems, etc.) 34 * systems, 4x on 8-way systems, 5x on 16-way systems, etc.)
35 * Targeted preemption latency for CPU-bound tasks:
36 */
37unsigned int sysctl_sched_latency __read_mostly = 20000000ULL;
38
39/*
40 * Minimal preemption granularity for CPU-bound tasks:
41 * (default: 2 msec, units: nanoseconds)
33 */ 42 */
34unsigned int sysctl_sched_granularity __read_mostly = 2000000000ULL/HZ; 43unsigned int sysctl_sched_min_granularity __read_mostly = 2000000ULL;
35 44
36/* 45/*
37 * SCHED_BATCH wake-up granularity. 46 * SCHED_BATCH wake-up granularity.
38 * (default: 10 msec, units: nanoseconds) 47 * (default: 25 msec, units: nanoseconds)
39 * 48 *
40 * This option delays the preemption effects of decoupled workloads 49 * This option delays the preemption effects of decoupled workloads
41 * and reduces their over-scheduling. Synchronous workloads will still 50 * and reduces their over-scheduling. Synchronous workloads will still
42 * have immediate wakeup/sleep latencies. 51 * have immediate wakeup/sleep latencies.
43 */ 52 */
44unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly = 53unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly = 25000000UL;
45 10000000000ULL/HZ;
46 54
47/* 55/*
48 * SCHED_OTHER wake-up granularity. 56 * SCHED_OTHER wake-up granularity.
@@ -52,12 +60,12 @@ unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly =
52 * and reduces their over-scheduling. Synchronous workloads will still 60 * and reduces their over-scheduling. Synchronous workloads will still
53 * have immediate wakeup/sleep latencies. 61 * have immediate wakeup/sleep latencies.
54 */ 62 */
55unsigned int sysctl_sched_wakeup_granularity __read_mostly = 1000000000ULL/HZ; 63unsigned int sysctl_sched_wakeup_granularity __read_mostly = 1000000UL;
56 64
57unsigned int sysctl_sched_stat_granularity __read_mostly; 65unsigned int sysctl_sched_stat_granularity __read_mostly;
58 66
59/* 67/*
60 * Initialized in sched_init_granularity(): 68 * Initialized in sched_init_granularity() [to 5 times the base granularity]:
61 */ 69 */
62unsigned int sysctl_sched_runtime_limit __read_mostly; 70unsigned int sysctl_sched_runtime_limit __read_mostly;
63 71
@@ -214,6 +222,49 @@ static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
214 */ 222 */
215 223
216/* 224/*
225 * Calculate the preemption granularity needed to schedule every
226 * runnable task once per sysctl_sched_latency amount of time.
227 * (down to a sensible low limit on granularity)
228 *
229 * For example, if there are 2 tasks running and latency is 10 msecs,
230 * we switch tasks every 5 msecs. If we have 3 tasks running, we have
231 * to switch tasks every 3.33 msecs to get a 10 msecs observed latency
232 * for each task. We do finer and finer scheduling up to until we
233 * reach the minimum granularity value.
234 *
235 * To achieve this we use the following dynamic-granularity rule:
236 *
237 * gran = lat/nr - lat/nr/nr
238 *
239 * This comes out of the following equations:
240 *
241 * kA1 + gran = kB1
242 * kB2 + gran = kA2
243 * kA2 = kA1
244 * kB2 = kB1 - d + d/nr
245 * lat = d * nr
246 *
247 * Where 'k' is key, 'A' is task A (waiting), 'B' is task B (running),
248 * '1' is start of time, '2' is end of time, 'd' is delay between
249 * 1 and 2 (during which task B was running), 'nr' is number of tasks
250 * running, 'lat' is the the period of each task. ('lat' is the
251 * sched_latency that we aim for.)
252 */
253static long
254sched_granularity(struct cfs_rq *cfs_rq)
255{
256 unsigned int gran = sysctl_sched_latency;
257 unsigned int nr = cfs_rq->nr_running;
258
259 if (nr > 1) {
260 gran = gran/nr - gran/nr/nr;
261 gran = max(gran, sysctl_sched_min_granularity);
262 }
263
264 return gran;
265}
266
267/*
217 * We rescale the rescheduling granularity of tasks according to their 268 * We rescale the rescheduling granularity of tasks according to their
218 * nice level, but only linearly, not exponentially: 269 * nice level, but only linearly, not exponentially:
219 */ 270 */
@@ -303,10 +354,10 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr)
303 delta_fair = calc_delta_fair(delta_exec, lw); 354 delta_fair = calc_delta_fair(delta_exec, lw);
304 delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw); 355 delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw);
305 356
306 if (cfs_rq->sleeper_bonus > sysctl_sched_granularity) { 357 if (cfs_rq->sleeper_bonus > sysctl_sched_latency) {
307 delta = min(cfs_rq->sleeper_bonus, (u64)delta_exec); 358 delta = min((u64)delta_mine, cfs_rq->sleeper_bonus);
308 delta = calc_delta_mine(delta, curr->load.weight, lw); 359 delta = min(delta, (unsigned long)(
309 delta = min((u64)delta, cfs_rq->sleeper_bonus); 360 (long)sysctl_sched_runtime_limit - curr->wait_runtime));
310 cfs_rq->sleeper_bonus -= delta; 361 cfs_rq->sleeper_bonus -= delta;
311 delta_mine -= delta; 362 delta_mine -= delta;
312 } 363 }
@@ -494,6 +545,13 @@ static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
494 unsigned long load = cfs_rq->load.weight, delta_fair; 545 unsigned long load = cfs_rq->load.weight, delta_fair;
495 long prev_runtime; 546 long prev_runtime;
496 547
548 /*
549 * Do not boost sleepers if there's too much bonus 'in flight'
550 * already:
551 */
552 if (unlikely(cfs_rq->sleeper_bonus > sysctl_sched_runtime_limit))
553 return;
554
497 if (sysctl_sched_features & SCHED_FEAT_SLEEPER_LOAD_AVG) 555 if (sysctl_sched_features & SCHED_FEAT_SLEEPER_LOAD_AVG)
498 load = rq_of(cfs_rq)->cpu_load[2]; 556 load = rq_of(cfs_rq)->cpu_load[2];
499 557
@@ -513,16 +571,13 @@ static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
513 571
514 prev_runtime = se->wait_runtime; 572 prev_runtime = se->wait_runtime;
515 __add_wait_runtime(cfs_rq, se, delta_fair); 573 __add_wait_runtime(cfs_rq, se, delta_fair);
574 schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
516 delta_fair = se->wait_runtime - prev_runtime; 575 delta_fair = se->wait_runtime - prev_runtime;
517 576
518 /* 577 /*
519 * Track the amount of bonus we've given to sleepers: 578 * Track the amount of bonus we've given to sleepers:
520 */ 579 */
521 cfs_rq->sleeper_bonus += delta_fair; 580 cfs_rq->sleeper_bonus += delta_fair;
522 if (unlikely(cfs_rq->sleeper_bonus > sysctl_sched_runtime_limit))
523 cfs_rq->sleeper_bonus = sysctl_sched_runtime_limit;
524
525 schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
526} 581}
527 582
528static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) 583static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -686,7 +741,8 @@ static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
686 if (next == curr) 741 if (next == curr)
687 return; 742 return;
688 743
689 __check_preempt_curr_fair(cfs_rq, next, curr, sysctl_sched_granularity); 744 __check_preempt_curr_fair(cfs_rq, next, curr,
745 sched_granularity(cfs_rq));
690} 746}
691 747
692/************************************************** 748/**************************************************
@@ -1031,7 +1087,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
1031 * it will preempt the parent: 1087 * it will preempt the parent:
1032 */ 1088 */
1033 p->se.fair_key = current->se.fair_key - 1089 p->se.fair_key = current->se.fair_key -
1034 niced_granularity(&rq->curr->se, sysctl_sched_granularity) - 1; 1090 niced_granularity(&rq->curr->se, sched_granularity(cfs_rq)) - 1;
1035 /* 1091 /*
1036 * The first wait is dominated by the child-runs-first logic, 1092 * The first wait is dominated by the child-runs-first logic,
1037 * so do not credit it with that waiting time yet: 1093 * so do not credit it with that waiting time yet:
@@ -1044,7 +1100,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
1044 * -granularity/2, so initialize the task with that: 1100 * -granularity/2, so initialize the task with that:
1045 */ 1101 */
1046 if (sysctl_sched_features & SCHED_FEAT_START_DEBIT) 1102 if (sysctl_sched_features & SCHED_FEAT_START_DEBIT)
1047 p->se.wait_runtime = -(sysctl_sched_granularity / 2); 1103 p->se.wait_runtime = -(sched_granularity(cfs_rq) / 2);
1048 1104
1049 __enqueue_entity(cfs_rq, se); 1105 __enqueue_entity(cfs_rq, se);
1050} 1106}
@@ -1057,7 +1113,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
1057 */ 1113 */
1058static void set_curr_task_fair(struct rq *rq) 1114static void set_curr_task_fair(struct rq *rq)
1059{ 1115{
1060 struct sched_entity *se = &rq->curr.se; 1116 struct sched_entity *se = &rq->curr->se;
1061 1117
1062 for_each_sched_entity(se) 1118 for_each_sched_entity(se)
1063 set_next_entity(cfs_rq_of(se), se); 1119 set_next_entity(cfs_rq_of(se), se);
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index dcdcad632fd9..4b87476a02d0 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -207,10 +207,15 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p)
207 return; 207 return;
208 208
209 p->time_slice = static_prio_timeslice(p->static_prio); 209 p->time_slice = static_prio_timeslice(p->static_prio);
210 set_tsk_need_resched(p);
211 210
212 /* put it at the end of the queue: */ 211 /*
213 requeue_task_rt(rq, p); 212 * Requeue to the end of queue if we are not the only element
213 * on the queue:
214 */
215 if (p->run_list.prev != p->run_list.next) {
216 requeue_task_rt(rq, p);
217 set_tsk_need_resched(p);
218 }
214} 219}
215 220
216static struct sched_class rt_sched_class __read_mostly = { 221static struct sched_class rt_sched_class __read_mostly = {
diff --git a/kernel/signal.c b/kernel/signal.c
index b27c01a66448..ad63109e413c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -378,7 +378,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
378 /* We only dequeue private signals from ourselves, we don't let 378 /* We only dequeue private signals from ourselves, we don't let
379 * signalfd steal them 379 * signalfd steal them
380 */ 380 */
381 if (tsk == current) 381 if (likely(tsk == current))
382 signr = __dequeue_signal(&tsk->pending, mask, info); 382 signr = __dequeue_signal(&tsk->pending, mask, info);
383 if (!signr) { 383 if (!signr) {
384 signr = __dequeue_signal(&tsk->signal->shared_pending, 384 signr = __dequeue_signal(&tsk->signal->shared_pending,
@@ -425,7 +425,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
425 if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) 425 if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT))
426 tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; 426 tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
427 } 427 }
428 if ( signr && 428 if (signr && likely(tsk == current) &&
429 ((info->si_code & __SI_MASK) == __SI_TIMER) && 429 ((info->si_code & __SI_MASK) == __SI_TIMER) &&
430 info->si_sys_private){ 430 info->si_sys_private){
431 /* 431 /*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8bdb8c07e04f..6ace893c17c9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -27,7 +27,6 @@
27#include <linux/capability.h> 27#include <linux/capability.h>
28#include <linux/ctype.h> 28#include <linux/ctype.h>
29#include <linux/utsname.h> 29#include <linux/utsname.h>
30#include <linux/capability.h>
31#include <linux/smp_lock.h> 30#include <linux/smp_lock.h>
32#include <linux/fs.h> 31#include <linux/fs.h>
33#include <linux/init.h> 32#include <linux/init.h>
@@ -223,8 +222,19 @@ static ctl_table kern_table[] = {
223#ifdef CONFIG_SCHED_DEBUG 222#ifdef CONFIG_SCHED_DEBUG
224 { 223 {
225 .ctl_name = CTL_UNNUMBERED, 224 .ctl_name = CTL_UNNUMBERED,
226 .procname = "sched_granularity_ns", 225 .procname = "sched_min_granularity_ns",
227 .data = &sysctl_sched_granularity, 226 .data = &sysctl_sched_min_granularity,
227 .maxlen = sizeof(unsigned int),
228 .mode = 0644,
229 .proc_handler = &proc_dointvec_minmax,
230 .strategy = &sysctl_intvec,
231 .extra1 = &min_sched_granularity_ns,
232 .extra2 = &max_sched_granularity_ns,
233 },
234 {
235 .ctl_name = CTL_UNNUMBERED,
236 .procname = "sched_latency_ns",
237 .data = &sysctl_sched_latency,
228 .maxlen = sizeof(unsigned int), 238 .maxlen = sizeof(unsigned int),
229 .mode = 0644, 239 .mode = 0644,
230 .proc_handler = &proc_dointvec_minmax, 240 .proc_handler = &proc_dointvec_minmax,
@@ -284,6 +294,15 @@ static ctl_table kern_table[] = {
284 .mode = 0644, 294 .mode = 0644,
285 .proc_handler = &proc_dointvec, 295 .proc_handler = &proc_dointvec,
286 }, 296 },
297 {
298 .ctl_name = CTL_UNNUMBERED,
299 .procname = "sched_features",
300 .data = &sysctl_sched_features,
301 .maxlen = sizeof(unsigned int),
302 .mode = 0644,
303 .proc_handler = &proc_dointvec,
304 },
305#endif
287#ifdef CONFIG_PROVE_LOCKING 306#ifdef CONFIG_PROVE_LOCKING
288 { 307 {
289 .ctl_name = CTL_UNNUMBERED, 308 .ctl_name = CTL_UNNUMBERED,
@@ -305,15 +324,6 @@ static ctl_table kern_table[] = {
305 }, 324 },
306#endif 325#endif
307 { 326 {
308 .ctl_name = CTL_UNNUMBERED,
309 .procname = "sched_features",
310 .data = &sysctl_sched_features,
311 .maxlen = sizeof(unsigned int),
312 .mode = 0644,
313 .proc_handler = &proc_dointvec,
314 },
315#endif
316 {
317 .ctl_name = KERN_PANIC, 327 .ctl_name = KERN_PANIC,
318 .procname = "panic", 328 .procname = "panic",
319 .data = &panic_timeout, 329 .data = &panic_timeout,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 58e5c152a6bb..e080d1d744cc 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -635,7 +635,7 @@ int keventd_up(void)
635int current_is_keventd(void) 635int current_is_keventd(void)
636{ 636{
637 struct cpu_workqueue_struct *cwq; 637 struct cpu_workqueue_struct *cwq;
638 int cpu = smp_processor_id(); /* preempt-safe: keventd is per-cpu */ 638 int cpu = raw_smp_processor_id(); /* preempt-safe: keventd is per-cpu */
639 int ret = 0; 639 int ret = 0;
640 640
641 BUG_ON(!keventd_wq); 641 BUG_ON(!keventd_wq);