diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/auditsc.c | 2 | ||||
-rw-r--r-- | kernel/futex.c | 1 | ||||
-rw-r--r-- | kernel/irq/manage.c | 2 | ||||
-rw-r--r-- | kernel/module.c | 3 | ||||
-rw-r--r-- | kernel/posix-timers.c | 9 | ||||
-rw-r--r-- | kernel/printk.c | 13 | ||||
-rw-r--r-- | kernel/sched.c | 88 | ||||
-rw-r--r-- | kernel/sched_debug.c | 3 | ||||
-rw-r--r-- | kernel/sched_fair.c | 106 | ||||
-rw-r--r-- | kernel/sched_rt.c | 11 | ||||
-rw-r--r-- | kernel/signal.c | 4 | ||||
-rw-r--r-- | kernel/sysctl.c | 34 | ||||
-rw-r--r-- | kernel/workqueue.c | 2 |
13 files changed, 199 insertions, 79 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 3401293359e8..04f3ffb8d9d4 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -2023,7 +2023,7 @@ int __audit_signal_info(int sig, struct task_struct *t) | |||
2023 | axp->d.next = ctx->aux_pids; | 2023 | axp->d.next = ctx->aux_pids; |
2024 | ctx->aux_pids = (void *)axp; | 2024 | ctx->aux_pids = (void *)axp; |
2025 | } | 2025 | } |
2026 | BUG_ON(axp->pid_count > AUDIT_AUX_PIDS); | 2026 | BUG_ON(axp->pid_count >= AUDIT_AUX_PIDS); |
2027 | 2027 | ||
2028 | axp->target_pid[axp->pid_count] = t->tgid; | 2028 | axp->target_pid[axp->pid_count] = t->tgid; |
2029 | selinux_get_task_sid(t, &axp->target_sid[axp->pid_count]); | 2029 | selinux_get_task_sid(t, &axp->target_sid[axp->pid_count]); |
diff --git a/kernel/futex.c b/kernel/futex.c index 3415e9ad1391..e8935b195e88 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -1670,6 +1670,7 @@ pi_faulted: | |||
1670 | attempt); | 1670 | attempt); |
1671 | if (ret) | 1671 | if (ret) |
1672 | goto out; | 1672 | goto out; |
1673 | uval = 0; | ||
1673 | goto retry_unlocked; | 1674 | goto retry_unlocked; |
1674 | } | 1675 | } |
1675 | 1676 | ||
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 203a518b6f14..853aefbd184b 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -462,7 +462,9 @@ void free_irq(unsigned int irq, void *dev_id) | |||
462 | * We do this after actually deregistering it, to make sure that | 462 | * We do this after actually deregistering it, to make sure that |
463 | * a 'real' IRQ doesn't run in parallel with our fake | 463 | * a 'real' IRQ doesn't run in parallel with our fake |
464 | */ | 464 | */ |
465 | local_irq_save(flags); | ||
465 | handler(irq, dev_id); | 466 | handler(irq, dev_id); |
467 | local_irq_restore(flags); | ||
466 | } | 468 | } |
467 | #endif | 469 | #endif |
468 | } | 470 | } |
diff --git a/kernel/module.c b/kernel/module.c index 33c04ad51175..db0ead0363e2 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -784,8 +784,7 @@ EXPORT_SYMBOL_GPL(symbol_put_addr); | |||
784 | static ssize_t show_refcnt(struct module_attribute *mattr, | 784 | static ssize_t show_refcnt(struct module_attribute *mattr, |
785 | struct module *mod, char *buffer) | 785 | struct module *mod, char *buffer) |
786 | { | 786 | { |
787 | /* sysfs holds a reference */ | 787 | return sprintf(buffer, "%u\n", module_refcount(mod)); |
788 | return sprintf(buffer, "%u\n", module_refcount(mod)-1); | ||
789 | } | 788 | } |
790 | 789 | ||
791 | static struct module_attribute refcnt = { | 790 | static struct module_attribute refcnt = { |
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 55b3761edaa9..7a15afb73ed0 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
@@ -547,9 +547,9 @@ sys_timer_create(const clockid_t which_clock, | |||
547 | new_timer->it_process = process; | 547 | new_timer->it_process = process; |
548 | list_add(&new_timer->list, | 548 | list_add(&new_timer->list, |
549 | &process->signal->posix_timers); | 549 | &process->signal->posix_timers); |
550 | spin_unlock_irqrestore(&process->sighand->siglock, flags); | ||
551 | if (new_timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) | 550 | if (new_timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) |
552 | get_task_struct(process); | 551 | get_task_struct(process); |
552 | spin_unlock_irqrestore(&process->sighand->siglock, flags); | ||
553 | } else { | 553 | } else { |
554 | spin_unlock_irqrestore(&process->sighand->siglock, flags); | 554 | spin_unlock_irqrestore(&process->sighand->siglock, flags); |
555 | process = NULL; | 555 | process = NULL; |
@@ -605,13 +605,14 @@ static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags) | |||
605 | timr = (struct k_itimer *) idr_find(&posix_timers_id, (int) timer_id); | 605 | timr = (struct k_itimer *) idr_find(&posix_timers_id, (int) timer_id); |
606 | if (timr) { | 606 | if (timr) { |
607 | spin_lock(&timr->it_lock); | 607 | spin_lock(&timr->it_lock); |
608 | spin_unlock(&idr_lock); | ||
609 | 608 | ||
610 | if ((timr->it_id != timer_id) || !(timr->it_process) || | 609 | if ((timr->it_id != timer_id) || !(timr->it_process) || |
611 | timr->it_process->tgid != current->tgid) { | 610 | timr->it_process->tgid != current->tgid) { |
612 | unlock_timer(timr, *flags); | 611 | spin_unlock(&timr->it_lock); |
612 | spin_unlock_irqrestore(&idr_lock, *flags); | ||
613 | timr = NULL; | 613 | timr = NULL; |
614 | } | 614 | } else |
615 | spin_unlock(&idr_lock); | ||
615 | } else | 616 | } else |
616 | spin_unlock_irqrestore(&idr_lock, *flags); | 617 | spin_unlock_irqrestore(&idr_lock, *flags); |
617 | 618 | ||
diff --git a/kernel/printk.c b/kernel/printk.c index bd2cd062878d..8451dfc31d25 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -1083,6 +1083,19 @@ int unregister_console(struct console *console) | |||
1083 | } | 1083 | } |
1084 | EXPORT_SYMBOL(unregister_console); | 1084 | EXPORT_SYMBOL(unregister_console); |
1085 | 1085 | ||
1086 | static int __init disable_boot_consoles(void) | ||
1087 | { | ||
1088 | if (console_drivers != NULL) { | ||
1089 | if (console_drivers->flags & CON_BOOT) { | ||
1090 | printk(KERN_INFO "turn off boot console %s%d\n", | ||
1091 | console_drivers->name, console_drivers->index); | ||
1092 | return unregister_console(console_drivers); | ||
1093 | } | ||
1094 | } | ||
1095 | return 0; | ||
1096 | } | ||
1097 | late_initcall(disable_boot_consoles); | ||
1098 | |||
1086 | /** | 1099 | /** |
1087 | * tty_write_message - write a message to a certain tty, not just the console. | 1100 | * tty_write_message - write a message to a certain tty, not just the console. |
1088 | * @tty: the destination tty_struct | 1101 | * @tty: the destination tty_struct |
diff --git a/kernel/sched.c b/kernel/sched.c index 45e17b83b7f1..9fe473a190de 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -262,7 +262,8 @@ struct rq { | |||
262 | s64 clock_max_delta; | 262 | s64 clock_max_delta; |
263 | 263 | ||
264 | unsigned int clock_warps, clock_overflows; | 264 | unsigned int clock_warps, clock_overflows; |
265 | unsigned int clock_unstable_events; | 265 | u64 idle_clock; |
266 | unsigned int clock_deep_idle_events; | ||
266 | u64 tick_timestamp; | 267 | u64 tick_timestamp; |
267 | 268 | ||
268 | atomic_t nr_iowait; | 269 | atomic_t nr_iowait; |
@@ -556,18 +557,40 @@ static inline struct rq *this_rq_lock(void) | |||
556 | } | 557 | } |
557 | 558 | ||
558 | /* | 559 | /* |
559 | * CPU frequency is/was unstable - start new by setting prev_clock_raw: | 560 | * We are going deep-idle (irqs are disabled): |
560 | */ | 561 | */ |
561 | void sched_clock_unstable_event(void) | 562 | void sched_clock_idle_sleep_event(void) |
562 | { | 563 | { |
563 | unsigned long flags; | 564 | struct rq *rq = cpu_rq(smp_processor_id()); |
564 | struct rq *rq; | ||
565 | 565 | ||
566 | rq = task_rq_lock(current, &flags); | 566 | spin_lock(&rq->lock); |
567 | rq->prev_clock_raw = sched_clock(); | 567 | __update_rq_clock(rq); |
568 | rq->clock_unstable_events++; | 568 | spin_unlock(&rq->lock); |
569 | task_rq_unlock(rq, &flags); | 569 | rq->clock_deep_idle_events++; |
570 | } | ||
571 | EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event); | ||
572 | |||
573 | /* | ||
574 | * We just idled delta nanoseconds (called with irqs disabled): | ||
575 | */ | ||
576 | void sched_clock_idle_wakeup_event(u64 delta_ns) | ||
577 | { | ||
578 | struct rq *rq = cpu_rq(smp_processor_id()); | ||
579 | u64 now = sched_clock(); | ||
580 | |||
581 | rq->idle_clock += delta_ns; | ||
582 | /* | ||
583 | * Override the previous timestamp and ignore all | ||
584 | * sched_clock() deltas that occured while we idled, | ||
585 | * and use the PM-provided delta_ns to advance the | ||
586 | * rq clock: | ||
587 | */ | ||
588 | spin_lock(&rq->lock); | ||
589 | rq->prev_clock_raw = now; | ||
590 | rq->clock += delta_ns; | ||
591 | spin_unlock(&rq->lock); | ||
570 | } | 592 | } |
593 | EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); | ||
571 | 594 | ||
572 | /* | 595 | /* |
573 | * resched_task - mark a task 'to be rescheduled now'. | 596 | * resched_task - mark a task 'to be rescheduled now'. |
@@ -2157,12 +2180,6 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | |||
2157 | if (task_running(rq, p)) | 2180 | if (task_running(rq, p)) |
2158 | return 0; | 2181 | return 0; |
2159 | 2182 | ||
2160 | /* | ||
2161 | * Aggressive migration if too many balance attempts have failed: | ||
2162 | */ | ||
2163 | if (sd->nr_balance_failed > sd->cache_nice_tries) | ||
2164 | return 1; | ||
2165 | |||
2166 | return 1; | 2183 | return 1; |
2167 | } | 2184 | } |
2168 | 2185 | ||
@@ -2494,7 +2511,7 @@ group_next: | |||
2494 | * a think about bumping its value to force at least one task to be | 2511 | * a think about bumping its value to force at least one task to be |
2495 | * moved | 2512 | * moved |
2496 | */ | 2513 | */ |
2497 | if (*imbalance + SCHED_LOAD_SCALE_FUZZ < busiest_load_per_task/2) { | 2514 | if (*imbalance + SCHED_LOAD_SCALE_FUZZ < busiest_load_per_task) { |
2498 | unsigned long tmp, pwr_now, pwr_move; | 2515 | unsigned long tmp, pwr_now, pwr_move; |
2499 | unsigned int imbn; | 2516 | unsigned int imbn; |
2500 | 2517 | ||
@@ -3020,6 +3037,7 @@ static inline void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
3020 | struct sched_domain *sd; | 3037 | struct sched_domain *sd; |
3021 | /* Earliest time when we have to do rebalance again */ | 3038 | /* Earliest time when we have to do rebalance again */ |
3022 | unsigned long next_balance = jiffies + 60*HZ; | 3039 | unsigned long next_balance = jiffies + 60*HZ; |
3040 | int update_next_balance = 0; | ||
3023 | 3041 | ||
3024 | for_each_domain(cpu, sd) { | 3042 | for_each_domain(cpu, sd) { |
3025 | if (!(sd->flags & SD_LOAD_BALANCE)) | 3043 | if (!(sd->flags & SD_LOAD_BALANCE)) |
@@ -3056,8 +3074,10 @@ static inline void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
3056 | if (sd->flags & SD_SERIALIZE) | 3074 | if (sd->flags & SD_SERIALIZE) |
3057 | spin_unlock(&balancing); | 3075 | spin_unlock(&balancing); |
3058 | out: | 3076 | out: |
3059 | if (time_after(next_balance, sd->last_balance + interval)) | 3077 | if (time_after(next_balance, sd->last_balance + interval)) { |
3060 | next_balance = sd->last_balance + interval; | 3078 | next_balance = sd->last_balance + interval; |
3079 | update_next_balance = 1; | ||
3080 | } | ||
3061 | 3081 | ||
3062 | /* | 3082 | /* |
3063 | * Stop the load balance at this level. There is another | 3083 | * Stop the load balance at this level. There is another |
@@ -3067,7 +3087,14 @@ out: | |||
3067 | if (!balance) | 3087 | if (!balance) |
3068 | break; | 3088 | break; |
3069 | } | 3089 | } |
3070 | rq->next_balance = next_balance; | 3090 | |
3091 | /* | ||
3092 | * next_balance will be updated only when there is a need. | ||
3093 | * When the cpu is attached to null domain for ex, it will not be | ||
3094 | * updated. | ||
3095 | */ | ||
3096 | if (likely(update_next_balance)) | ||
3097 | rq->next_balance = next_balance; | ||
3071 | } | 3098 | } |
3072 | 3099 | ||
3073 | /* | 3100 | /* |
@@ -4884,14 +4911,18 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE; | |||
4884 | static inline void sched_init_granularity(void) | 4911 | static inline void sched_init_granularity(void) |
4885 | { | 4912 | { |
4886 | unsigned int factor = 1 + ilog2(num_online_cpus()); | 4913 | unsigned int factor = 1 + ilog2(num_online_cpus()); |
4887 | const unsigned long gran_limit = 100000000; | 4914 | const unsigned long limit = 100000000; |
4915 | |||
4916 | sysctl_sched_min_granularity *= factor; | ||
4917 | if (sysctl_sched_min_granularity > limit) | ||
4918 | sysctl_sched_min_granularity = limit; | ||
4888 | 4919 | ||
4889 | sysctl_sched_granularity *= factor; | 4920 | sysctl_sched_latency *= factor; |
4890 | if (sysctl_sched_granularity > gran_limit) | 4921 | if (sysctl_sched_latency > limit) |
4891 | sysctl_sched_granularity = gran_limit; | 4922 | sysctl_sched_latency = limit; |
4892 | 4923 | ||
4893 | sysctl_sched_runtime_limit = sysctl_sched_granularity * 4; | 4924 | sysctl_sched_runtime_limit = sysctl_sched_latency; |
4894 | sysctl_sched_wakeup_granularity = sysctl_sched_granularity / 2; | 4925 | sysctl_sched_wakeup_granularity = sysctl_sched_min_granularity / 2; |
4895 | } | 4926 | } |
4896 | 4927 | ||
4897 | #ifdef CONFIG_SMP | 4928 | #ifdef CONFIG_SMP |
@@ -5234,15 +5265,16 @@ static void migrate_dead_tasks(unsigned int dead_cpu) | |||
5234 | static struct ctl_table sd_ctl_dir[] = { | 5265 | static struct ctl_table sd_ctl_dir[] = { |
5235 | { | 5266 | { |
5236 | .procname = "sched_domain", | 5267 | .procname = "sched_domain", |
5237 | .mode = 0755, | 5268 | .mode = 0555, |
5238 | }, | 5269 | }, |
5239 | {0,}, | 5270 | {0,}, |
5240 | }; | 5271 | }; |
5241 | 5272 | ||
5242 | static struct ctl_table sd_ctl_root[] = { | 5273 | static struct ctl_table sd_ctl_root[] = { |
5243 | { | 5274 | { |
5275 | .ctl_name = CTL_KERN, | ||
5244 | .procname = "kernel", | 5276 | .procname = "kernel", |
5245 | .mode = 0755, | 5277 | .mode = 0555, |
5246 | .child = sd_ctl_dir, | 5278 | .child = sd_ctl_dir, |
5247 | }, | 5279 | }, |
5248 | {0,}, | 5280 | {0,}, |
@@ -5318,7 +5350,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu) | |||
5318 | for_each_domain(cpu, sd) { | 5350 | for_each_domain(cpu, sd) { |
5319 | snprintf(buf, 32, "domain%d", i); | 5351 | snprintf(buf, 32, "domain%d", i); |
5320 | entry->procname = kstrdup(buf, GFP_KERNEL); | 5352 | entry->procname = kstrdup(buf, GFP_KERNEL); |
5321 | entry->mode = 0755; | 5353 | entry->mode = 0555; |
5322 | entry->child = sd_alloc_ctl_domain_table(sd); | 5354 | entry->child = sd_alloc_ctl_domain_table(sd); |
5323 | entry++; | 5355 | entry++; |
5324 | i++; | 5356 | i++; |
@@ -5338,7 +5370,7 @@ static void init_sched_domain_sysctl(void) | |||
5338 | for (i = 0; i < cpu_num; i++, entry++) { | 5370 | for (i = 0; i < cpu_num; i++, entry++) { |
5339 | snprintf(buf, 32, "cpu%d", i); | 5371 | snprintf(buf, 32, "cpu%d", i); |
5340 | entry->procname = kstrdup(buf, GFP_KERNEL); | 5372 | entry->procname = kstrdup(buf, GFP_KERNEL); |
5341 | entry->mode = 0755; | 5373 | entry->mode = 0555; |
5342 | entry->child = sd_alloc_ctl_cpu_table(i); | 5374 | entry->child = sd_alloc_ctl_cpu_table(i); |
5343 | } | 5375 | } |
5344 | sd_sysctl_header = register_sysctl_table(sd_ctl_root); | 5376 | sd_sysctl_header = register_sysctl_table(sd_ctl_root); |
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 87e524762b85..ab18f45f2ab2 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -154,10 +154,11 @@ static void print_cpu(struct seq_file *m, int cpu) | |||
154 | P(next_balance); | 154 | P(next_balance); |
155 | P(curr->pid); | 155 | P(curr->pid); |
156 | P(clock); | 156 | P(clock); |
157 | P(idle_clock); | ||
157 | P(prev_clock_raw); | 158 | P(prev_clock_raw); |
158 | P(clock_warps); | 159 | P(clock_warps); |
159 | P(clock_overflows); | 160 | P(clock_overflows); |
160 | P(clock_unstable_events); | 161 | P(clock_deep_idle_events); |
161 | P(clock_max_delta); | 162 | P(clock_max_delta); |
162 | P(cpu_load[0]); | 163 | P(cpu_load[0]); |
163 | P(cpu_load[1]); | 164 | P(cpu_load[1]); |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index fedbb51bba96..ee3771850aaf 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -15,34 +15,42 @@ | |||
15 | * | 15 | * |
16 | * Scaled math optimizations by Thomas Gleixner | 16 | * Scaled math optimizations by Thomas Gleixner |
17 | * Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de> | 17 | * Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de> |
18 | * | ||
19 | * Adaptive scheduling granularity, math enhancements by Peter Zijlstra | ||
20 | * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
18 | */ | 21 | */ |
19 | 22 | ||
20 | /* | 23 | /* |
21 | * Preemption granularity: | 24 | * Targeted preemption latency for CPU-bound tasks: |
22 | * (default: 2 msec, units: nanoseconds) | 25 | * (default: 20ms, units: nanoseconds) |
23 | * | 26 | * |
24 | * NOTE: this granularity value is not the same as the concept of | 27 | * NOTE: this latency value is not the same as the concept of |
25 | * 'timeslice length' - timeslices in CFS will typically be somewhat | 28 | * 'timeslice length' - timeslices in CFS are of variable length. |
26 | * larger than this value. (to see the precise effective timeslice | 29 | * (to see the precise effective timeslice length of your workload, |
27 | * length of your workload, run vmstat and monitor the context-switches | 30 | * run vmstat and monitor the context-switches field) |
28 | * field) | ||
29 | * | 31 | * |
30 | * On SMP systems the value of this is multiplied by the log2 of the | 32 | * On SMP systems the value of this is multiplied by the log2 of the |
31 | * number of CPUs. (i.e. factor 2x on 2-way systems, 3x on 4-way | 33 | * number of CPUs. (i.e. factor 2x on 2-way systems, 3x on 4-way |
32 | * systems, 4x on 8-way systems, 5x on 16-way systems, etc.) | 34 | * systems, 4x on 8-way systems, 5x on 16-way systems, etc.) |
35 | * Targeted preemption latency for CPU-bound tasks: | ||
36 | */ | ||
37 | unsigned int sysctl_sched_latency __read_mostly = 20000000ULL; | ||
38 | |||
39 | /* | ||
40 | * Minimal preemption granularity for CPU-bound tasks: | ||
41 | * (default: 2 msec, units: nanoseconds) | ||
33 | */ | 42 | */ |
34 | unsigned int sysctl_sched_granularity __read_mostly = 2000000000ULL/HZ; | 43 | unsigned int sysctl_sched_min_granularity __read_mostly = 2000000ULL; |
35 | 44 | ||
36 | /* | 45 | /* |
37 | * SCHED_BATCH wake-up granularity. | 46 | * SCHED_BATCH wake-up granularity. |
38 | * (default: 10 msec, units: nanoseconds) | 47 | * (default: 25 msec, units: nanoseconds) |
39 | * | 48 | * |
40 | * This option delays the preemption effects of decoupled workloads | 49 | * This option delays the preemption effects of decoupled workloads |
41 | * and reduces their over-scheduling. Synchronous workloads will still | 50 | * and reduces their over-scheduling. Synchronous workloads will still |
42 | * have immediate wakeup/sleep latencies. | 51 | * have immediate wakeup/sleep latencies. |
43 | */ | 52 | */ |
44 | unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly = | 53 | unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly = 25000000UL; |
45 | 10000000000ULL/HZ; | ||
46 | 54 | ||
47 | /* | 55 | /* |
48 | * SCHED_OTHER wake-up granularity. | 56 | * SCHED_OTHER wake-up granularity. |
@@ -52,12 +60,12 @@ unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly = | |||
52 | * and reduces their over-scheduling. Synchronous workloads will still | 60 | * and reduces their over-scheduling. Synchronous workloads will still |
53 | * have immediate wakeup/sleep latencies. | 61 | * have immediate wakeup/sleep latencies. |
54 | */ | 62 | */ |
55 | unsigned int sysctl_sched_wakeup_granularity __read_mostly = 1000000000ULL/HZ; | 63 | unsigned int sysctl_sched_wakeup_granularity __read_mostly = 1000000UL; |
56 | 64 | ||
57 | unsigned int sysctl_sched_stat_granularity __read_mostly; | 65 | unsigned int sysctl_sched_stat_granularity __read_mostly; |
58 | 66 | ||
59 | /* | 67 | /* |
60 | * Initialized in sched_init_granularity(): | 68 | * Initialized in sched_init_granularity() [to 5 times the base granularity]: |
61 | */ | 69 | */ |
62 | unsigned int sysctl_sched_runtime_limit __read_mostly; | 70 | unsigned int sysctl_sched_runtime_limit __read_mostly; |
63 | 71 | ||
@@ -214,6 +222,49 @@ static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) | |||
214 | */ | 222 | */ |
215 | 223 | ||
216 | /* | 224 | /* |
225 | * Calculate the preemption granularity needed to schedule every | ||
226 | * runnable task once per sysctl_sched_latency amount of time. | ||
227 | * (down to a sensible low limit on granularity) | ||
228 | * | ||
229 | * For example, if there are 2 tasks running and latency is 10 msecs, | ||
230 | * we switch tasks every 5 msecs. If we have 3 tasks running, we have | ||
231 | * to switch tasks every 3.33 msecs to get a 10 msecs observed latency | ||
232 | * for each task. We do finer and finer scheduling up to until we | ||
233 | * reach the minimum granularity value. | ||
234 | * | ||
235 | * To achieve this we use the following dynamic-granularity rule: | ||
236 | * | ||
237 | * gran = lat/nr - lat/nr/nr | ||
238 | * | ||
239 | * This comes out of the following equations: | ||
240 | * | ||
241 | * kA1 + gran = kB1 | ||
242 | * kB2 + gran = kA2 | ||
243 | * kA2 = kA1 | ||
244 | * kB2 = kB1 - d + d/nr | ||
245 | * lat = d * nr | ||
246 | * | ||
247 | * Where 'k' is key, 'A' is task A (waiting), 'B' is task B (running), | ||
248 | * '1' is start of time, '2' is end of time, 'd' is delay between | ||
249 | * 1 and 2 (during which task B was running), 'nr' is number of tasks | ||
250 | * running, 'lat' is the the period of each task. ('lat' is the | ||
251 | * sched_latency that we aim for.) | ||
252 | */ | ||
253 | static long | ||
254 | sched_granularity(struct cfs_rq *cfs_rq) | ||
255 | { | ||
256 | unsigned int gran = sysctl_sched_latency; | ||
257 | unsigned int nr = cfs_rq->nr_running; | ||
258 | |||
259 | if (nr > 1) { | ||
260 | gran = gran/nr - gran/nr/nr; | ||
261 | gran = max(gran, sysctl_sched_min_granularity); | ||
262 | } | ||
263 | |||
264 | return gran; | ||
265 | } | ||
266 | |||
267 | /* | ||
217 | * We rescale the rescheduling granularity of tasks according to their | 268 | * We rescale the rescheduling granularity of tasks according to their |
218 | * nice level, but only linearly, not exponentially: | 269 | * nice level, but only linearly, not exponentially: |
219 | */ | 270 | */ |
@@ -303,10 +354,10 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |||
303 | delta_fair = calc_delta_fair(delta_exec, lw); | 354 | delta_fair = calc_delta_fair(delta_exec, lw); |
304 | delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw); | 355 | delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw); |
305 | 356 | ||
306 | if (cfs_rq->sleeper_bonus > sysctl_sched_granularity) { | 357 | if (cfs_rq->sleeper_bonus > sysctl_sched_latency) { |
307 | delta = min(cfs_rq->sleeper_bonus, (u64)delta_exec); | 358 | delta = min((u64)delta_mine, cfs_rq->sleeper_bonus); |
308 | delta = calc_delta_mine(delta, curr->load.weight, lw); | 359 | delta = min(delta, (unsigned long)( |
309 | delta = min((u64)delta, cfs_rq->sleeper_bonus); | 360 | (long)sysctl_sched_runtime_limit - curr->wait_runtime)); |
310 | cfs_rq->sleeper_bonus -= delta; | 361 | cfs_rq->sleeper_bonus -= delta; |
311 | delta_mine -= delta; | 362 | delta_mine -= delta; |
312 | } | 363 | } |
@@ -494,6 +545,13 @@ static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
494 | unsigned long load = cfs_rq->load.weight, delta_fair; | 545 | unsigned long load = cfs_rq->load.weight, delta_fair; |
495 | long prev_runtime; | 546 | long prev_runtime; |
496 | 547 | ||
548 | /* | ||
549 | * Do not boost sleepers if there's too much bonus 'in flight' | ||
550 | * already: | ||
551 | */ | ||
552 | if (unlikely(cfs_rq->sleeper_bonus > sysctl_sched_runtime_limit)) | ||
553 | return; | ||
554 | |||
497 | if (sysctl_sched_features & SCHED_FEAT_SLEEPER_LOAD_AVG) | 555 | if (sysctl_sched_features & SCHED_FEAT_SLEEPER_LOAD_AVG) |
498 | load = rq_of(cfs_rq)->cpu_load[2]; | 556 | load = rq_of(cfs_rq)->cpu_load[2]; |
499 | 557 | ||
@@ -513,16 +571,13 @@ static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
513 | 571 | ||
514 | prev_runtime = se->wait_runtime; | 572 | prev_runtime = se->wait_runtime; |
515 | __add_wait_runtime(cfs_rq, se, delta_fair); | 573 | __add_wait_runtime(cfs_rq, se, delta_fair); |
574 | schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); | ||
516 | delta_fair = se->wait_runtime - prev_runtime; | 575 | delta_fair = se->wait_runtime - prev_runtime; |
517 | 576 | ||
518 | /* | 577 | /* |
519 | * Track the amount of bonus we've given to sleepers: | 578 | * Track the amount of bonus we've given to sleepers: |
520 | */ | 579 | */ |
521 | cfs_rq->sleeper_bonus += delta_fair; | 580 | cfs_rq->sleeper_bonus += delta_fair; |
522 | if (unlikely(cfs_rq->sleeper_bonus > sysctl_sched_runtime_limit)) | ||
523 | cfs_rq->sleeper_bonus = sysctl_sched_runtime_limit; | ||
524 | |||
525 | schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); | ||
526 | } | 581 | } |
527 | 582 | ||
528 | static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) | 583 | static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) |
@@ -686,7 +741,8 @@ static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |||
686 | if (next == curr) | 741 | if (next == curr) |
687 | return; | 742 | return; |
688 | 743 | ||
689 | __check_preempt_curr_fair(cfs_rq, next, curr, sysctl_sched_granularity); | 744 | __check_preempt_curr_fair(cfs_rq, next, curr, |
745 | sched_granularity(cfs_rq)); | ||
690 | } | 746 | } |
691 | 747 | ||
692 | /************************************************** | 748 | /************************************************** |
@@ -1031,7 +1087,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p) | |||
1031 | * it will preempt the parent: | 1087 | * it will preempt the parent: |
1032 | */ | 1088 | */ |
1033 | p->se.fair_key = current->se.fair_key - | 1089 | p->se.fair_key = current->se.fair_key - |
1034 | niced_granularity(&rq->curr->se, sysctl_sched_granularity) - 1; | 1090 | niced_granularity(&rq->curr->se, sched_granularity(cfs_rq)) - 1; |
1035 | /* | 1091 | /* |
1036 | * The first wait is dominated by the child-runs-first logic, | 1092 | * The first wait is dominated by the child-runs-first logic, |
1037 | * so do not credit it with that waiting time yet: | 1093 | * so do not credit it with that waiting time yet: |
@@ -1044,7 +1100,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p) | |||
1044 | * -granularity/2, so initialize the task with that: | 1100 | * -granularity/2, so initialize the task with that: |
1045 | */ | 1101 | */ |
1046 | if (sysctl_sched_features & SCHED_FEAT_START_DEBIT) | 1102 | if (sysctl_sched_features & SCHED_FEAT_START_DEBIT) |
1047 | p->se.wait_runtime = -(sysctl_sched_granularity / 2); | 1103 | p->se.wait_runtime = -(sched_granularity(cfs_rq) / 2); |
1048 | 1104 | ||
1049 | __enqueue_entity(cfs_rq, se); | 1105 | __enqueue_entity(cfs_rq, se); |
1050 | } | 1106 | } |
@@ -1057,7 +1113,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p) | |||
1057 | */ | 1113 | */ |
1058 | static void set_curr_task_fair(struct rq *rq) | 1114 | static void set_curr_task_fair(struct rq *rq) |
1059 | { | 1115 | { |
1060 | struct sched_entity *se = &rq->curr.se; | 1116 | struct sched_entity *se = &rq->curr->se; |
1061 | 1117 | ||
1062 | for_each_sched_entity(se) | 1118 | for_each_sched_entity(se) |
1063 | set_next_entity(cfs_rq_of(se), se); | 1119 | set_next_entity(cfs_rq_of(se), se); |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index dcdcad632fd9..4b87476a02d0 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -207,10 +207,15 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p) | |||
207 | return; | 207 | return; |
208 | 208 | ||
209 | p->time_slice = static_prio_timeslice(p->static_prio); | 209 | p->time_slice = static_prio_timeslice(p->static_prio); |
210 | set_tsk_need_resched(p); | ||
211 | 210 | ||
212 | /* put it at the end of the queue: */ | 211 | /* |
213 | requeue_task_rt(rq, p); | 212 | * Requeue to the end of queue if we are not the only element |
213 | * on the queue: | ||
214 | */ | ||
215 | if (p->run_list.prev != p->run_list.next) { | ||
216 | requeue_task_rt(rq, p); | ||
217 | set_tsk_need_resched(p); | ||
218 | } | ||
214 | } | 219 | } |
215 | 220 | ||
216 | static struct sched_class rt_sched_class __read_mostly = { | 221 | static struct sched_class rt_sched_class __read_mostly = { |
diff --git a/kernel/signal.c b/kernel/signal.c index b27c01a66448..ad63109e413c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -378,7 +378,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | |||
378 | /* We only dequeue private signals from ourselves, we don't let | 378 | /* We only dequeue private signals from ourselves, we don't let |
379 | * signalfd steal them | 379 | * signalfd steal them |
380 | */ | 380 | */ |
381 | if (tsk == current) | 381 | if (likely(tsk == current)) |
382 | signr = __dequeue_signal(&tsk->pending, mask, info); | 382 | signr = __dequeue_signal(&tsk->pending, mask, info); |
383 | if (!signr) { | 383 | if (!signr) { |
384 | signr = __dequeue_signal(&tsk->signal->shared_pending, | 384 | signr = __dequeue_signal(&tsk->signal->shared_pending, |
@@ -425,7 +425,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | |||
425 | if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) | 425 | if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) |
426 | tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; | 426 | tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; |
427 | } | 427 | } |
428 | if ( signr && | 428 | if (signr && likely(tsk == current) && |
429 | ((info->si_code & __SI_MASK) == __SI_TIMER) && | 429 | ((info->si_code & __SI_MASK) == __SI_TIMER) && |
430 | info->si_sys_private){ | 430 | info->si_sys_private){ |
431 | /* | 431 | /* |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8bdb8c07e04f..6ace893c17c9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -27,7 +27,6 @@ | |||
27 | #include <linux/capability.h> | 27 | #include <linux/capability.h> |
28 | #include <linux/ctype.h> | 28 | #include <linux/ctype.h> |
29 | #include <linux/utsname.h> | 29 | #include <linux/utsname.h> |
30 | #include <linux/capability.h> | ||
31 | #include <linux/smp_lock.h> | 30 | #include <linux/smp_lock.h> |
32 | #include <linux/fs.h> | 31 | #include <linux/fs.h> |
33 | #include <linux/init.h> | 32 | #include <linux/init.h> |
@@ -223,8 +222,19 @@ static ctl_table kern_table[] = { | |||
223 | #ifdef CONFIG_SCHED_DEBUG | 222 | #ifdef CONFIG_SCHED_DEBUG |
224 | { | 223 | { |
225 | .ctl_name = CTL_UNNUMBERED, | 224 | .ctl_name = CTL_UNNUMBERED, |
226 | .procname = "sched_granularity_ns", | 225 | .procname = "sched_min_granularity_ns", |
227 | .data = &sysctl_sched_granularity, | 226 | .data = &sysctl_sched_min_granularity, |
227 | .maxlen = sizeof(unsigned int), | ||
228 | .mode = 0644, | ||
229 | .proc_handler = &proc_dointvec_minmax, | ||
230 | .strategy = &sysctl_intvec, | ||
231 | .extra1 = &min_sched_granularity_ns, | ||
232 | .extra2 = &max_sched_granularity_ns, | ||
233 | }, | ||
234 | { | ||
235 | .ctl_name = CTL_UNNUMBERED, | ||
236 | .procname = "sched_latency_ns", | ||
237 | .data = &sysctl_sched_latency, | ||
228 | .maxlen = sizeof(unsigned int), | 238 | .maxlen = sizeof(unsigned int), |
229 | .mode = 0644, | 239 | .mode = 0644, |
230 | .proc_handler = &proc_dointvec_minmax, | 240 | .proc_handler = &proc_dointvec_minmax, |
@@ -284,6 +294,15 @@ static ctl_table kern_table[] = { | |||
284 | .mode = 0644, | 294 | .mode = 0644, |
285 | .proc_handler = &proc_dointvec, | 295 | .proc_handler = &proc_dointvec, |
286 | }, | 296 | }, |
297 | { | ||
298 | .ctl_name = CTL_UNNUMBERED, | ||
299 | .procname = "sched_features", | ||
300 | .data = &sysctl_sched_features, | ||
301 | .maxlen = sizeof(unsigned int), | ||
302 | .mode = 0644, | ||
303 | .proc_handler = &proc_dointvec, | ||
304 | }, | ||
305 | #endif | ||
287 | #ifdef CONFIG_PROVE_LOCKING | 306 | #ifdef CONFIG_PROVE_LOCKING |
288 | { | 307 | { |
289 | .ctl_name = CTL_UNNUMBERED, | 308 | .ctl_name = CTL_UNNUMBERED, |
@@ -305,15 +324,6 @@ static ctl_table kern_table[] = { | |||
305 | }, | 324 | }, |
306 | #endif | 325 | #endif |
307 | { | 326 | { |
308 | .ctl_name = CTL_UNNUMBERED, | ||
309 | .procname = "sched_features", | ||
310 | .data = &sysctl_sched_features, | ||
311 | .maxlen = sizeof(unsigned int), | ||
312 | .mode = 0644, | ||
313 | .proc_handler = &proc_dointvec, | ||
314 | }, | ||
315 | #endif | ||
316 | { | ||
317 | .ctl_name = KERN_PANIC, | 327 | .ctl_name = KERN_PANIC, |
318 | .procname = "panic", | 328 | .procname = "panic", |
319 | .data = &panic_timeout, | 329 | .data = &panic_timeout, |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 58e5c152a6bb..e080d1d744cc 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -635,7 +635,7 @@ int keventd_up(void) | |||
635 | int current_is_keventd(void) | 635 | int current_is_keventd(void) |
636 | { | 636 | { |
637 | struct cpu_workqueue_struct *cwq; | 637 | struct cpu_workqueue_struct *cwq; |
638 | int cpu = smp_processor_id(); /* preempt-safe: keventd is per-cpu */ | 638 | int cpu = raw_smp_processor_id(); /* preempt-safe: keventd is per-cpu */ |
639 | int ret = 0; | 639 | int ret = 0; |
640 | 640 | ||
641 | BUG_ON(!keventd_wq); | 641 | BUG_ON(!keventd_wq); |