diff options
author | Ingo Molnar <mingo@elte.hu> | 2010-12-23 03:48:41 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-12-23 03:48:41 -0500 |
commit | 26e20a108caca6231c6a5ec659f815a866904751 (patch) | |
tree | 36932c208a9e8994bfd7ed4eaf48b9c33f71fbe3 /kernel | |
parent | 691513f70d3957939a318da970987b876c720861 (diff) | |
parent | 90a8a73c06cc32b609a880d48449d7083327e11a (diff) |
Merge commit 'v2.6.37-rc7' into x86/security
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/debug/kdb/kdb_main.c | 21 | ||||
-rw-r--r-- | kernel/exit.c | 9 | ||||
-rw-r--r-- | kernel/fork.c | 1 | ||||
-rw-r--r-- | kernel/futex.c | 3 | ||||
-rw-r--r-- | kernel/futex_compat.c | 3 | ||||
-rw-r--r-- | kernel/hw_breakpoint.c | 3 | ||||
-rw-r--r-- | kernel/irq/proc.c | 2 | ||||
-rw-r--r-- | kernel/irq_work.c | 4 | ||||
-rw-r--r-- | kernel/module.c | 12 | ||||
-rw-r--r-- | kernel/perf_event.c | 130 | ||||
-rw-r--r-- | kernel/pm_qos_params.c | 4 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 12 | ||||
-rw-r--r-- | kernel/power/Kconfig | 4 | ||||
-rw-r--r-- | kernel/power/hibernate.c | 22 | ||||
-rw-r--r-- | kernel/power/suspend.c | 5 | ||||
-rw-r--r-- | kernel/power/swap.c | 55 | ||||
-rw-r--r-- | kernel/power/user.c | 4 | ||||
-rw-r--r-- | kernel/printk.c | 4 | ||||
-rw-r--r-- | kernel/resource.c | 104 | ||||
-rw-r--r-- | kernel/sched.c | 324 | ||||
-rw-r--r-- | kernel/sched_fair.c | 48 | ||||
-rw-r--r-- | kernel/sched_stoptask.c | 4 | ||||
-rw-r--r-- | kernel/sysctl.c | 2 | ||||
-rw-r--r-- | kernel/timer.c | 8 | ||||
-rw-r--r-- | kernel/trace/Kconfig | 2 | ||||
-rw-r--r-- | kernel/trace/trace.c | 30 | ||||
-rw-r--r-- | kernel/workqueue.c | 7 |
27 files changed, 570 insertions, 257 deletions
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 37755d621924..a6e729766821 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c | |||
@@ -82,7 +82,7 @@ static kdbtab_t kdb_base_commands[50]; | |||
82 | #define for_each_kdbcmd(cmd, num) \ | 82 | #define for_each_kdbcmd(cmd, num) \ |
83 | for ((cmd) = kdb_base_commands, (num) = 0; \ | 83 | for ((cmd) = kdb_base_commands, (num) = 0; \ |
84 | num < kdb_max_commands; \ | 84 | num < kdb_max_commands; \ |
85 | num == KDB_BASE_CMD_MAX ? cmd = kdb_commands : cmd++, num++) | 85 | num++, num == KDB_BASE_CMD_MAX ? cmd = kdb_commands : cmd++) |
86 | 86 | ||
87 | typedef struct _kdbmsg { | 87 | typedef struct _kdbmsg { |
88 | int km_diag; /* kdb diagnostic */ | 88 | int km_diag; /* kdb diagnostic */ |
@@ -646,7 +646,7 @@ static int kdb_defcmd2(const char *cmdstr, const char *argv0) | |||
646 | } | 646 | } |
647 | if (!s->usable) | 647 | if (!s->usable) |
648 | return KDB_NOTIMP; | 648 | return KDB_NOTIMP; |
649 | s->command = kmalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB); | 649 | s->command = kzalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB); |
650 | if (!s->command) { | 650 | if (!s->command) { |
651 | kdb_printf("Could not allocate new kdb_defcmd table for %s\n", | 651 | kdb_printf("Could not allocate new kdb_defcmd table for %s\n", |
652 | cmdstr); | 652 | cmdstr); |
@@ -2361,7 +2361,7 @@ static int kdb_pid(int argc, const char **argv) | |||
2361 | */ | 2361 | */ |
2362 | static int kdb_ll(int argc, const char **argv) | 2362 | static int kdb_ll(int argc, const char **argv) |
2363 | { | 2363 | { |
2364 | int diag; | 2364 | int diag = 0; |
2365 | unsigned long addr; | 2365 | unsigned long addr; |
2366 | long offset = 0; | 2366 | long offset = 0; |
2367 | unsigned long va; | 2367 | unsigned long va; |
@@ -2400,20 +2400,21 @@ static int kdb_ll(int argc, const char **argv) | |||
2400 | char buf[80]; | 2400 | char buf[80]; |
2401 | 2401 | ||
2402 | if (KDB_FLAG(CMD_INTERRUPT)) | 2402 | if (KDB_FLAG(CMD_INTERRUPT)) |
2403 | return 0; | 2403 | goto out; |
2404 | 2404 | ||
2405 | sprintf(buf, "%s " kdb_machreg_fmt "\n", command, va); | 2405 | sprintf(buf, "%s " kdb_machreg_fmt "\n", command, va); |
2406 | diag = kdb_parse(buf); | 2406 | diag = kdb_parse(buf); |
2407 | if (diag) | 2407 | if (diag) |
2408 | return diag; | 2408 | goto out; |
2409 | 2409 | ||
2410 | addr = va + linkoffset; | 2410 | addr = va + linkoffset; |
2411 | if (kdb_getword(&va, addr, sizeof(va))) | 2411 | if (kdb_getword(&va, addr, sizeof(va))) |
2412 | return 0; | 2412 | goto out; |
2413 | } | 2413 | } |
2414 | kfree(command); | ||
2415 | 2414 | ||
2416 | return 0; | 2415 | out: |
2416 | kfree(command); | ||
2417 | return diag; | ||
2417 | } | 2418 | } |
2418 | 2419 | ||
2419 | static int kdb_kgdb(int argc, const char **argv) | 2420 | static int kdb_kgdb(int argc, const char **argv) |
@@ -2739,13 +2740,13 @@ int kdb_register_repeat(char *cmd, | |||
2739 | } | 2740 | } |
2740 | if (kdb_commands) { | 2741 | if (kdb_commands) { |
2741 | memcpy(new, kdb_commands, | 2742 | memcpy(new, kdb_commands, |
2742 | kdb_max_commands * sizeof(*new)); | 2743 | (kdb_max_commands - KDB_BASE_CMD_MAX) * sizeof(*new)); |
2743 | kfree(kdb_commands); | 2744 | kfree(kdb_commands); |
2744 | } | 2745 | } |
2745 | memset(new + kdb_max_commands, 0, | 2746 | memset(new + kdb_max_commands, 0, |
2746 | kdb_command_extend * sizeof(*new)); | 2747 | kdb_command_extend * sizeof(*new)); |
2747 | kdb_commands = new; | 2748 | kdb_commands = new; |
2748 | kp = kdb_commands + kdb_max_commands; | 2749 | kp = kdb_commands + kdb_max_commands - KDB_BASE_CMD_MAX; |
2749 | kdb_max_commands += kdb_command_extend; | 2750 | kdb_max_commands += kdb_command_extend; |
2750 | } | 2751 | } |
2751 | 2752 | ||
diff --git a/kernel/exit.c b/kernel/exit.c index 21aa7b3001fb..676149a4ac5f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -914,6 +914,15 @@ NORET_TYPE void do_exit(long code) | |||
914 | if (unlikely(!tsk->pid)) | 914 | if (unlikely(!tsk->pid)) |
915 | panic("Attempted to kill the idle task!"); | 915 | panic("Attempted to kill the idle task!"); |
916 | 916 | ||
917 | /* | ||
918 | * If do_exit is called because this processes oopsed, it's possible | ||
919 | * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before | ||
920 | * continuing. Amongst other possible reasons, this is to prevent | ||
921 | * mm_release()->clear_child_tid() from writing to a user-controlled | ||
922 | * kernel address. | ||
923 | */ | ||
924 | set_fs(USER_DS); | ||
925 | |||
917 | tracehook_report_exit(&code); | 926 | tracehook_report_exit(&code); |
918 | 927 | ||
919 | validate_creds_for_do_exit(tsk); | 928 | validate_creds_for_do_exit(tsk); |
diff --git a/kernel/fork.c b/kernel/fork.c index 3b159c5991b7..5447dc7defa9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -273,6 +273,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
273 | 273 | ||
274 | setup_thread_stack(tsk, orig); | 274 | setup_thread_stack(tsk, orig); |
275 | clear_user_return_notifier(tsk); | 275 | clear_user_return_notifier(tsk); |
276 | clear_tsk_need_resched(tsk); | ||
276 | stackend = end_of_stack(tsk); | 277 | stackend = end_of_stack(tsk); |
277 | *stackend = STACK_END_MAGIC; /* for overflow detection */ | 278 | *stackend = STACK_END_MAGIC; /* for overflow detection */ |
278 | 279 | ||
diff --git a/kernel/futex.c b/kernel/futex.c index 6c683b37f2ce..40a8777a27d0 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -2489,7 +2489,8 @@ void exit_robust_list(struct task_struct *curr) | |||
2489 | { | 2489 | { |
2490 | struct robust_list_head __user *head = curr->robust_list; | 2490 | struct robust_list_head __user *head = curr->robust_list; |
2491 | struct robust_list __user *entry, *next_entry, *pending; | 2491 | struct robust_list __user *entry, *next_entry, *pending; |
2492 | unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip; | 2492 | unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; |
2493 | unsigned int uninitialized_var(next_pi); | ||
2493 | unsigned long futex_offset; | 2494 | unsigned long futex_offset; |
2494 | int rc; | 2495 | int rc; |
2495 | 2496 | ||
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 06da4dfc339b..a7934ac75e5b 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c | |||
@@ -49,7 +49,8 @@ void compat_exit_robust_list(struct task_struct *curr) | |||
49 | { | 49 | { |
50 | struct compat_robust_list_head __user *head = curr->compat_robust_list; | 50 | struct compat_robust_list_head __user *head = curr->compat_robust_list; |
51 | struct robust_list __user *entry, *next_entry, *pending; | 51 | struct robust_list __user *entry, *next_entry, *pending; |
52 | unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip; | 52 | unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; |
53 | unsigned int uninitialized_var(next_pi); | ||
53 | compat_uptr_t uentry, next_uentry, upending; | 54 | compat_uptr_t uentry, next_uentry, upending; |
54 | compat_long_t futex_offset; | 55 | compat_long_t futex_offset; |
55 | int rc; | 56 | int rc; |
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 2c9120f0afca..e5325825aeb6 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c | |||
@@ -620,7 +620,7 @@ static struct pmu perf_breakpoint = { | |||
620 | .read = hw_breakpoint_pmu_read, | 620 | .read = hw_breakpoint_pmu_read, |
621 | }; | 621 | }; |
622 | 622 | ||
623 | static int __init init_hw_breakpoint(void) | 623 | int __init init_hw_breakpoint(void) |
624 | { | 624 | { |
625 | unsigned int **task_bp_pinned; | 625 | unsigned int **task_bp_pinned; |
626 | int cpu, err_cpu; | 626 | int cpu, err_cpu; |
@@ -655,6 +655,5 @@ static int __init init_hw_breakpoint(void) | |||
655 | 655 | ||
656 | return -ENOMEM; | 656 | return -ENOMEM; |
657 | } | 657 | } |
658 | core_initcall(init_hw_breakpoint); | ||
659 | 658 | ||
660 | 659 | ||
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 01b1d3a88983..6c8a2a9f8a7b 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
@@ -214,7 +214,7 @@ static int irq_spurious_proc_show(struct seq_file *m, void *v) | |||
214 | 214 | ||
215 | static int irq_spurious_proc_open(struct inode *inode, struct file *file) | 215 | static int irq_spurious_proc_open(struct inode *inode, struct file *file) |
216 | { | 216 | { |
217 | return single_open(file, irq_spurious_proc_show, NULL); | 217 | return single_open(file, irq_spurious_proc_show, PDE(inode)->data); |
218 | } | 218 | } |
219 | 219 | ||
220 | static const struct file_operations irq_spurious_proc_fops = { | 220 | static const struct file_operations irq_spurious_proc_fops = { |
diff --git a/kernel/irq_work.c b/kernel/irq_work.c index f16763ff8481..90f881904bb1 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c | |||
@@ -145,7 +145,9 @@ void irq_work_run(void) | |||
145 | * Clear the BUSY bit and return to the free state if | 145 | * Clear the BUSY bit and return to the free state if |
146 | * no-one else claimed it meanwhile. | 146 | * no-one else claimed it meanwhile. |
147 | */ | 147 | */ |
148 | cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL); | 148 | (void)cmpxchg(&entry->next, |
149 | next_flags(NULL, IRQ_WORK_BUSY), | ||
150 | NULL); | ||
149 | } | 151 | } |
150 | } | 152 | } |
151 | EXPORT_SYMBOL_GPL(irq_work_run); | 153 | EXPORT_SYMBOL_GPL(irq_work_run); |
diff --git a/kernel/module.c b/kernel/module.c index ba421e6b4ada..562f665c721f 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -2480,6 +2480,18 @@ static void find_module_sections(struct module *mod, struct load_info *info) | |||
2480 | kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) * | 2480 | kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) * |
2481 | mod->num_trace_events, GFP_KERNEL); | 2481 | mod->num_trace_events, GFP_KERNEL); |
2482 | #endif | 2482 | #endif |
2483 | #ifdef CONFIG_TRACING | ||
2484 | mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt", | ||
2485 | sizeof(*mod->trace_bprintk_fmt_start), | ||
2486 | &mod->num_trace_bprintk_fmt); | ||
2487 | /* | ||
2488 | * This section contains pointers to allocated objects in the trace | ||
2489 | * code and not scanning it leads to false positives. | ||
2490 | */ | ||
2491 | kmemleak_scan_area(mod->trace_bprintk_fmt_start, | ||
2492 | sizeof(*mod->trace_bprintk_fmt_start) * | ||
2493 | mod->num_trace_bprintk_fmt, GFP_KERNEL); | ||
2494 | #endif | ||
2483 | #ifdef CONFIG_FTRACE_MCOUNT_RECORD | 2495 | #ifdef CONFIG_FTRACE_MCOUNT_RECORD |
2484 | /* sechdrs[0].sh_size is always zero */ | 2496 | /* sechdrs[0].sh_size is always zero */ |
2485 | mod->ftrace_callsites = section_objs(info, "__mcount_loc", | 2497 | mod->ftrace_callsites = section_objs(info, "__mcount_loc", |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index cb6c0d2af68f..2870feee81dd 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/kernel_stat.h> | 31 | #include <linux/kernel_stat.h> |
32 | #include <linux/perf_event.h> | 32 | #include <linux/perf_event.h> |
33 | #include <linux/ftrace_event.h> | 33 | #include <linux/ftrace_event.h> |
34 | #include <linux/hw_breakpoint.h> | ||
34 | 35 | ||
35 | #include <asm/irq_regs.h> | 36 | #include <asm/irq_regs.h> |
36 | 37 | ||
@@ -1286,8 +1287,6 @@ void __perf_event_task_sched_out(struct task_struct *task, | |||
1286 | { | 1287 | { |
1287 | int ctxn; | 1288 | int ctxn; |
1288 | 1289 | ||
1289 | perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); | ||
1290 | |||
1291 | for_each_task_context_nr(ctxn) | 1290 | for_each_task_context_nr(ctxn) |
1292 | perf_event_context_sched_out(task, ctxn, next); | 1291 | perf_event_context_sched_out(task, ctxn, next); |
1293 | } | 1292 | } |
@@ -1621,8 +1620,12 @@ static void rotate_ctx(struct perf_event_context *ctx) | |||
1621 | { | 1620 | { |
1622 | raw_spin_lock(&ctx->lock); | 1621 | raw_spin_lock(&ctx->lock); |
1623 | 1622 | ||
1624 | /* Rotate the first entry last of non-pinned groups */ | 1623 | /* |
1625 | list_rotate_left(&ctx->flexible_groups); | 1624 | * Rotate the first entry last of non-pinned groups. Rotation might be |
1625 | * disabled by the inheritance code. | ||
1626 | */ | ||
1627 | if (!ctx->rotate_disable) | ||
1628 | list_rotate_left(&ctx->flexible_groups); | ||
1626 | 1629 | ||
1627 | raw_spin_unlock(&ctx->lock); | 1630 | raw_spin_unlock(&ctx->lock); |
1628 | } | 1631 | } |
@@ -2234,11 +2237,6 @@ int perf_event_release_kernel(struct perf_event *event) | |||
2234 | raw_spin_unlock_irq(&ctx->lock); | 2237 | raw_spin_unlock_irq(&ctx->lock); |
2235 | mutex_unlock(&ctx->mutex); | 2238 | mutex_unlock(&ctx->mutex); |
2236 | 2239 | ||
2237 | mutex_lock(&event->owner->perf_event_mutex); | ||
2238 | list_del_init(&event->owner_entry); | ||
2239 | mutex_unlock(&event->owner->perf_event_mutex); | ||
2240 | put_task_struct(event->owner); | ||
2241 | |||
2242 | free_event(event); | 2240 | free_event(event); |
2243 | 2241 | ||
2244 | return 0; | 2242 | return 0; |
@@ -2251,9 +2249,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel); | |||
2251 | static int perf_release(struct inode *inode, struct file *file) | 2249 | static int perf_release(struct inode *inode, struct file *file) |
2252 | { | 2250 | { |
2253 | struct perf_event *event = file->private_data; | 2251 | struct perf_event *event = file->private_data; |
2252 | struct task_struct *owner; | ||
2254 | 2253 | ||
2255 | file->private_data = NULL; | 2254 | file->private_data = NULL; |
2256 | 2255 | ||
2256 | rcu_read_lock(); | ||
2257 | owner = ACCESS_ONCE(event->owner); | ||
2258 | /* | ||
2259 | * Matches the smp_wmb() in perf_event_exit_task(). If we observe | ||
2260 | * !owner it means the list deletion is complete and we can indeed | ||
2261 | * free this event, otherwise we need to serialize on | ||
2262 | * owner->perf_event_mutex. | ||
2263 | */ | ||
2264 | smp_read_barrier_depends(); | ||
2265 | if (owner) { | ||
2266 | /* | ||
2267 | * Since delayed_put_task_struct() also drops the last | ||
2268 | * task reference we can safely take a new reference | ||
2269 | * while holding the rcu_read_lock(). | ||
2270 | */ | ||
2271 | get_task_struct(owner); | ||
2272 | } | ||
2273 | rcu_read_unlock(); | ||
2274 | |||
2275 | if (owner) { | ||
2276 | mutex_lock(&owner->perf_event_mutex); | ||
2277 | /* | ||
2278 | * We have to re-check the event->owner field, if it is cleared | ||
2279 | * we raced with perf_event_exit_task(), acquiring the mutex | ||
2280 | * ensured they're done, and we can proceed with freeing the | ||
2281 | * event. | ||
2282 | */ | ||
2283 | if (event->owner) | ||
2284 | list_del_init(&event->owner_entry); | ||
2285 | mutex_unlock(&owner->perf_event_mutex); | ||
2286 | put_task_struct(owner); | ||
2287 | } | ||
2288 | |||
2257 | return perf_event_release_kernel(event); | 2289 | return perf_event_release_kernel(event); |
2258 | } | 2290 | } |
2259 | 2291 | ||
@@ -3792,6 +3824,8 @@ static void perf_event_task_event(struct perf_task_event *task_event) | |||
3792 | rcu_read_lock(); | 3824 | rcu_read_lock(); |
3793 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 3825 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
3794 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 3826 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
3827 | if (cpuctx->active_pmu != pmu) | ||
3828 | goto next; | ||
3795 | perf_event_task_ctx(&cpuctx->ctx, task_event); | 3829 | perf_event_task_ctx(&cpuctx->ctx, task_event); |
3796 | 3830 | ||
3797 | ctx = task_event->task_ctx; | 3831 | ctx = task_event->task_ctx; |
@@ -3927,6 +3961,8 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
3927 | rcu_read_lock(); | 3961 | rcu_read_lock(); |
3928 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 3962 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
3929 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 3963 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
3964 | if (cpuctx->active_pmu != pmu) | ||
3965 | goto next; | ||
3930 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); | 3966 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); |
3931 | 3967 | ||
3932 | ctxn = pmu->task_ctx_nr; | 3968 | ctxn = pmu->task_ctx_nr; |
@@ -4112,6 +4148,8 @@ got_name: | |||
4112 | rcu_read_lock(); | 4148 | rcu_read_lock(); |
4113 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 4149 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
4114 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 4150 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
4151 | if (cpuctx->active_pmu != pmu) | ||
4152 | goto next; | ||
4115 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, | 4153 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, |
4116 | vma->vm_flags & VM_EXEC); | 4154 | vma->vm_flags & VM_EXEC); |
4117 | 4155 | ||
@@ -4681,7 +4719,7 @@ static int perf_swevent_init(struct perf_event *event) | |||
4681 | break; | 4719 | break; |
4682 | } | 4720 | } |
4683 | 4721 | ||
4684 | if (event_id > PERF_COUNT_SW_MAX) | 4722 | if (event_id >= PERF_COUNT_SW_MAX) |
4685 | return -ENOENT; | 4723 | return -ENOENT; |
4686 | 4724 | ||
4687 | if (!event->parent) { | 4725 | if (!event->parent) { |
@@ -5113,20 +5151,36 @@ static void *find_pmu_context(int ctxn) | |||
5113 | return NULL; | 5151 | return NULL; |
5114 | } | 5152 | } |
5115 | 5153 | ||
5116 | static void free_pmu_context(void * __percpu cpu_context) | 5154 | static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu) |
5117 | { | 5155 | { |
5118 | struct pmu *pmu; | 5156 | int cpu; |
5157 | |||
5158 | for_each_possible_cpu(cpu) { | ||
5159 | struct perf_cpu_context *cpuctx; | ||
5160 | |||
5161 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); | ||
5162 | |||
5163 | if (cpuctx->active_pmu == old_pmu) | ||
5164 | cpuctx->active_pmu = pmu; | ||
5165 | } | ||
5166 | } | ||
5167 | |||
5168 | static void free_pmu_context(struct pmu *pmu) | ||
5169 | { | ||
5170 | struct pmu *i; | ||
5119 | 5171 | ||
5120 | mutex_lock(&pmus_lock); | 5172 | mutex_lock(&pmus_lock); |
5121 | /* | 5173 | /* |
5122 | * Like a real lame refcount. | 5174 | * Like a real lame refcount. |
5123 | */ | 5175 | */ |
5124 | list_for_each_entry(pmu, &pmus, entry) { | 5176 | list_for_each_entry(i, &pmus, entry) { |
5125 | if (pmu->pmu_cpu_context == cpu_context) | 5177 | if (i->pmu_cpu_context == pmu->pmu_cpu_context) { |
5178 | update_pmu_context(i, pmu); | ||
5126 | goto out; | 5179 | goto out; |
5180 | } | ||
5127 | } | 5181 | } |
5128 | 5182 | ||
5129 | free_percpu(cpu_context); | 5183 | free_percpu(pmu->pmu_cpu_context); |
5130 | out: | 5184 | out: |
5131 | mutex_unlock(&pmus_lock); | 5185 | mutex_unlock(&pmus_lock); |
5132 | } | 5186 | } |
@@ -5158,6 +5212,7 @@ int perf_pmu_register(struct pmu *pmu) | |||
5158 | cpuctx->ctx.pmu = pmu; | 5212 | cpuctx->ctx.pmu = pmu; |
5159 | cpuctx->jiffies_interval = 1; | 5213 | cpuctx->jiffies_interval = 1; |
5160 | INIT_LIST_HEAD(&cpuctx->rotation_list); | 5214 | INIT_LIST_HEAD(&cpuctx->rotation_list); |
5215 | cpuctx->active_pmu = pmu; | ||
5161 | } | 5216 | } |
5162 | 5217 | ||
5163 | got_cpu_context: | 5218 | got_cpu_context: |
@@ -5209,7 +5264,7 @@ void perf_pmu_unregister(struct pmu *pmu) | |||
5209 | synchronize_rcu(); | 5264 | synchronize_rcu(); |
5210 | 5265 | ||
5211 | free_percpu(pmu->pmu_disable_count); | 5266 | free_percpu(pmu->pmu_disable_count); |
5212 | free_pmu_context(pmu->pmu_cpu_context); | 5267 | free_pmu_context(pmu); |
5213 | } | 5268 | } |
5214 | 5269 | ||
5215 | struct pmu *perf_init_event(struct perf_event *event) | 5270 | struct pmu *perf_init_event(struct perf_event *event) |
@@ -5677,7 +5732,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5677 | mutex_unlock(&ctx->mutex); | 5732 | mutex_unlock(&ctx->mutex); |
5678 | 5733 | ||
5679 | event->owner = current; | 5734 | event->owner = current; |
5680 | get_task_struct(current); | 5735 | |
5681 | mutex_lock(¤t->perf_event_mutex); | 5736 | mutex_lock(¤t->perf_event_mutex); |
5682 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | 5737 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); |
5683 | mutex_unlock(¤t->perf_event_mutex); | 5738 | mutex_unlock(¤t->perf_event_mutex); |
@@ -5745,12 +5800,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
5745 | ++ctx->generation; | 5800 | ++ctx->generation; |
5746 | mutex_unlock(&ctx->mutex); | 5801 | mutex_unlock(&ctx->mutex); |
5747 | 5802 | ||
5748 | event->owner = current; | ||
5749 | get_task_struct(current); | ||
5750 | mutex_lock(¤t->perf_event_mutex); | ||
5751 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | ||
5752 | mutex_unlock(¤t->perf_event_mutex); | ||
5753 | |||
5754 | return event; | 5803 | return event; |
5755 | 5804 | ||
5756 | err_free: | 5805 | err_free: |
@@ -5901,8 +5950,24 @@ again: | |||
5901 | */ | 5950 | */ |
5902 | void perf_event_exit_task(struct task_struct *child) | 5951 | void perf_event_exit_task(struct task_struct *child) |
5903 | { | 5952 | { |
5953 | struct perf_event *event, *tmp; | ||
5904 | int ctxn; | 5954 | int ctxn; |
5905 | 5955 | ||
5956 | mutex_lock(&child->perf_event_mutex); | ||
5957 | list_for_each_entry_safe(event, tmp, &child->perf_event_list, | ||
5958 | owner_entry) { | ||
5959 | list_del_init(&event->owner_entry); | ||
5960 | |||
5961 | /* | ||
5962 | * Ensure the list deletion is visible before we clear | ||
5963 | * the owner, closes a race against perf_release() where | ||
5964 | * we need to serialize on the owner->perf_event_mutex. | ||
5965 | */ | ||
5966 | smp_wmb(); | ||
5967 | event->owner = NULL; | ||
5968 | } | ||
5969 | mutex_unlock(&child->perf_event_mutex); | ||
5970 | |||
5906 | for_each_task_context_nr(ctxn) | 5971 | for_each_task_context_nr(ctxn) |
5907 | perf_event_exit_task_context(child, ctxn); | 5972 | perf_event_exit_task_context(child, ctxn); |
5908 | } | 5973 | } |
@@ -6122,6 +6187,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
6122 | struct perf_event *event; | 6187 | struct perf_event *event; |
6123 | struct task_struct *parent = current; | 6188 | struct task_struct *parent = current; |
6124 | int inherited_all = 1; | 6189 | int inherited_all = 1; |
6190 | unsigned long flags; | ||
6125 | int ret = 0; | 6191 | int ret = 0; |
6126 | 6192 | ||
6127 | child->perf_event_ctxp[ctxn] = NULL; | 6193 | child->perf_event_ctxp[ctxn] = NULL; |
@@ -6162,6 +6228,15 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
6162 | break; | 6228 | break; |
6163 | } | 6229 | } |
6164 | 6230 | ||
6231 | /* | ||
6232 | * We can't hold ctx->lock when iterating the ->flexible_group list due | ||
6233 | * to allocations, but we need to prevent rotation because | ||
6234 | * rotate_ctx() will change the list from interrupt context. | ||
6235 | */ | ||
6236 | raw_spin_lock_irqsave(&parent_ctx->lock, flags); | ||
6237 | parent_ctx->rotate_disable = 1; | ||
6238 | raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); | ||
6239 | |||
6165 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { | 6240 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { |
6166 | ret = inherit_task_group(event, parent, parent_ctx, | 6241 | ret = inherit_task_group(event, parent, parent_ctx, |
6167 | child, ctxn, &inherited_all); | 6242 | child, ctxn, &inherited_all); |
@@ -6169,6 +6244,10 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
6169 | break; | 6244 | break; |
6170 | } | 6245 | } |
6171 | 6246 | ||
6247 | raw_spin_lock_irqsave(&parent_ctx->lock, flags); | ||
6248 | parent_ctx->rotate_disable = 0; | ||
6249 | raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); | ||
6250 | |||
6172 | child_ctx = child->perf_event_ctxp[ctxn]; | 6251 | child_ctx = child->perf_event_ctxp[ctxn]; |
6173 | 6252 | ||
6174 | if (child_ctx && inherited_all) { | 6253 | if (child_ctx && inherited_all) { |
@@ -6321,6 +6400,8 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | |||
6321 | 6400 | ||
6322 | void __init perf_event_init(void) | 6401 | void __init perf_event_init(void) |
6323 | { | 6402 | { |
6403 | int ret; | ||
6404 | |||
6324 | perf_event_init_all_cpus(); | 6405 | perf_event_init_all_cpus(); |
6325 | init_srcu_struct(&pmus_srcu); | 6406 | init_srcu_struct(&pmus_srcu); |
6326 | perf_pmu_register(&perf_swevent); | 6407 | perf_pmu_register(&perf_swevent); |
@@ -6328,4 +6409,7 @@ void __init perf_event_init(void) | |||
6328 | perf_pmu_register(&perf_task_clock); | 6409 | perf_pmu_register(&perf_task_clock); |
6329 | perf_tp_register(); | 6410 | perf_tp_register(); |
6330 | perf_cpu_notifier(perf_cpu_notify); | 6411 | perf_cpu_notifier(perf_cpu_notify); |
6412 | |||
6413 | ret = init_hw_breakpoint(); | ||
6414 | WARN(ret, "hw_breakpoint initialization failed with: %d", ret); | ||
6331 | } | 6415 | } |
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index c7a8f453919e..aeaa7f846821 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c | |||
@@ -121,10 +121,10 @@ static inline int pm_qos_get_value(struct pm_qos_object *o) | |||
121 | 121 | ||
122 | switch (o->type) { | 122 | switch (o->type) { |
123 | case PM_QOS_MIN: | 123 | case PM_QOS_MIN: |
124 | return plist_last(&o->requests)->prio; | 124 | return plist_first(&o->requests)->prio; |
125 | 125 | ||
126 | case PM_QOS_MAX: | 126 | case PM_QOS_MAX: |
127 | return plist_first(&o->requests)->prio; | 127 | return plist_last(&o->requests)->prio; |
128 | 128 | ||
129 | default: | 129 | default: |
130 | /* runtime check for not using enum */ | 130 | /* runtime check for not using enum */ |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 6842eeba5879..05bb7173850e 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -37,13 +37,13 @@ static int check_clock(const clockid_t which_clock) | |||
37 | if (pid == 0) | 37 | if (pid == 0) |
38 | return 0; | 38 | return 0; |
39 | 39 | ||
40 | read_lock(&tasklist_lock); | 40 | rcu_read_lock(); |
41 | p = find_task_by_vpid(pid); | 41 | p = find_task_by_vpid(pid); |
42 | if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ? | 42 | if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ? |
43 | same_thread_group(p, current) : thread_group_leader(p))) { | 43 | same_thread_group(p, current) : has_group_leader_pid(p))) { |
44 | error = -EINVAL; | 44 | error = -EINVAL; |
45 | } | 45 | } |
46 | read_unlock(&tasklist_lock); | 46 | rcu_read_unlock(); |
47 | 47 | ||
48 | return error; | 48 | return error; |
49 | } | 49 | } |
@@ -390,7 +390,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) | |||
390 | 390 | ||
391 | INIT_LIST_HEAD(&new_timer->it.cpu.entry); | 391 | INIT_LIST_HEAD(&new_timer->it.cpu.entry); |
392 | 392 | ||
393 | read_lock(&tasklist_lock); | 393 | rcu_read_lock(); |
394 | if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) { | 394 | if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) { |
395 | if (pid == 0) { | 395 | if (pid == 0) { |
396 | p = current; | 396 | p = current; |
@@ -404,7 +404,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) | |||
404 | p = current->group_leader; | 404 | p = current->group_leader; |
405 | } else { | 405 | } else { |
406 | p = find_task_by_vpid(pid); | 406 | p = find_task_by_vpid(pid); |
407 | if (p && !thread_group_leader(p)) | 407 | if (p && !has_group_leader_pid(p)) |
408 | p = NULL; | 408 | p = NULL; |
409 | } | 409 | } |
410 | } | 410 | } |
@@ -414,7 +414,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) | |||
414 | } else { | 414 | } else { |
415 | ret = -EINVAL; | 415 | ret = -EINVAL; |
416 | } | 416 | } |
417 | read_unlock(&tasklist_lock); | 417 | rcu_read_unlock(); |
418 | 418 | ||
419 | return ret; | 419 | return ret; |
420 | } | 420 | } |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 29bff6117abc..a5aff3ebad38 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
@@ -246,9 +246,13 @@ config PM_OPS | |||
246 | depends on PM_SLEEP || PM_RUNTIME | 246 | depends on PM_SLEEP || PM_RUNTIME |
247 | default y | 247 | default y |
248 | 248 | ||
249 | config ARCH_HAS_OPP | ||
250 | bool | ||
251 | |||
249 | config PM_OPP | 252 | config PM_OPP |
250 | bool "Operating Performance Point (OPP) Layer library" | 253 | bool "Operating Performance Point (OPP) Layer library" |
251 | depends on PM | 254 | depends on PM |
255 | depends on ARCH_HAS_OPP | ||
252 | ---help--- | 256 | ---help--- |
253 | SOCs have a standard set of tuples consisting of frequency and | 257 | SOCs have a standard set of tuples consisting of frequency and |
254 | voltage pairs that the device will support per voltage domain. This | 258 | voltage pairs that the device will support per voltage domain. This |
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 657272e91d0a..048d0b514831 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c | |||
@@ -327,7 +327,6 @@ static int create_image(int platform_mode) | |||
327 | int hibernation_snapshot(int platform_mode) | 327 | int hibernation_snapshot(int platform_mode) |
328 | { | 328 | { |
329 | int error; | 329 | int error; |
330 | gfp_t saved_mask; | ||
331 | 330 | ||
332 | error = platform_begin(platform_mode); | 331 | error = platform_begin(platform_mode); |
333 | if (error) | 332 | if (error) |
@@ -339,7 +338,7 @@ int hibernation_snapshot(int platform_mode) | |||
339 | goto Close; | 338 | goto Close; |
340 | 339 | ||
341 | suspend_console(); | 340 | suspend_console(); |
342 | saved_mask = clear_gfp_allowed_mask(GFP_IOFS); | 341 | pm_restrict_gfp_mask(); |
343 | error = dpm_suspend_start(PMSG_FREEZE); | 342 | error = dpm_suspend_start(PMSG_FREEZE); |
344 | if (error) | 343 | if (error) |
345 | goto Recover_platform; | 344 | goto Recover_platform; |
@@ -348,7 +347,10 @@ int hibernation_snapshot(int platform_mode) | |||
348 | goto Recover_platform; | 347 | goto Recover_platform; |
349 | 348 | ||
350 | error = create_image(platform_mode); | 349 | error = create_image(platform_mode); |
351 | /* Control returns here after successful restore */ | 350 | /* |
351 | * Control returns here (1) after the image has been created or the | ||
352 | * image creation has failed and (2) after a successful restore. | ||
353 | */ | ||
352 | 354 | ||
353 | Resume_devices: | 355 | Resume_devices: |
354 | /* We may need to release the preallocated image pages here. */ | 356 | /* We may need to release the preallocated image pages here. */ |
@@ -357,7 +359,10 @@ int hibernation_snapshot(int platform_mode) | |||
357 | 359 | ||
358 | dpm_resume_end(in_suspend ? | 360 | dpm_resume_end(in_suspend ? |
359 | (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); | 361 | (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); |
360 | set_gfp_allowed_mask(saved_mask); | 362 | |
363 | if (error || !in_suspend) | ||
364 | pm_restore_gfp_mask(); | ||
365 | |||
361 | resume_console(); | 366 | resume_console(); |
362 | Close: | 367 | Close: |
363 | platform_end(platform_mode); | 368 | platform_end(platform_mode); |
@@ -452,17 +457,16 @@ static int resume_target_kernel(bool platform_mode) | |||
452 | int hibernation_restore(int platform_mode) | 457 | int hibernation_restore(int platform_mode) |
453 | { | 458 | { |
454 | int error; | 459 | int error; |
455 | gfp_t saved_mask; | ||
456 | 460 | ||
457 | pm_prepare_console(); | 461 | pm_prepare_console(); |
458 | suspend_console(); | 462 | suspend_console(); |
459 | saved_mask = clear_gfp_allowed_mask(GFP_IOFS); | 463 | pm_restrict_gfp_mask(); |
460 | error = dpm_suspend_start(PMSG_QUIESCE); | 464 | error = dpm_suspend_start(PMSG_QUIESCE); |
461 | if (!error) { | 465 | if (!error) { |
462 | error = resume_target_kernel(platform_mode); | 466 | error = resume_target_kernel(platform_mode); |
463 | dpm_resume_end(PMSG_RECOVER); | 467 | dpm_resume_end(PMSG_RECOVER); |
464 | } | 468 | } |
465 | set_gfp_allowed_mask(saved_mask); | 469 | pm_restore_gfp_mask(); |
466 | resume_console(); | 470 | resume_console(); |
467 | pm_restore_console(); | 471 | pm_restore_console(); |
468 | return error; | 472 | return error; |
@@ -476,7 +480,6 @@ int hibernation_restore(int platform_mode) | |||
476 | int hibernation_platform_enter(void) | 480 | int hibernation_platform_enter(void) |
477 | { | 481 | { |
478 | int error; | 482 | int error; |
479 | gfp_t saved_mask; | ||
480 | 483 | ||
481 | if (!hibernation_ops) | 484 | if (!hibernation_ops) |
482 | return -ENOSYS; | 485 | return -ENOSYS; |
@@ -492,7 +495,6 @@ int hibernation_platform_enter(void) | |||
492 | 495 | ||
493 | entering_platform_hibernation = true; | 496 | entering_platform_hibernation = true; |
494 | suspend_console(); | 497 | suspend_console(); |
495 | saved_mask = clear_gfp_allowed_mask(GFP_IOFS); | ||
496 | error = dpm_suspend_start(PMSG_HIBERNATE); | 498 | error = dpm_suspend_start(PMSG_HIBERNATE); |
497 | if (error) { | 499 | if (error) { |
498 | if (hibernation_ops->recover) | 500 | if (hibernation_ops->recover) |
@@ -536,7 +538,6 @@ int hibernation_platform_enter(void) | |||
536 | Resume_devices: | 538 | Resume_devices: |
537 | entering_platform_hibernation = false; | 539 | entering_platform_hibernation = false; |
538 | dpm_resume_end(PMSG_RESTORE); | 540 | dpm_resume_end(PMSG_RESTORE); |
539 | set_gfp_allowed_mask(saved_mask); | ||
540 | resume_console(); | 541 | resume_console(); |
541 | 542 | ||
542 | Close: | 543 | Close: |
@@ -646,6 +647,7 @@ int hibernate(void) | |||
646 | swsusp_free(); | 647 | swsusp_free(); |
647 | if (!error) | 648 | if (!error) |
648 | power_down(); | 649 | power_down(); |
650 | pm_restore_gfp_mask(); | ||
649 | } else { | 651 | } else { |
650 | pr_debug("PM: Image restored successfully.\n"); | 652 | pr_debug("PM: Image restored successfully.\n"); |
651 | } | 653 | } |
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 7335952ee473..ecf770509d0d 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
@@ -197,7 +197,6 @@ static int suspend_enter(suspend_state_t state) | |||
197 | int suspend_devices_and_enter(suspend_state_t state) | 197 | int suspend_devices_and_enter(suspend_state_t state) |
198 | { | 198 | { |
199 | int error; | 199 | int error; |
200 | gfp_t saved_mask; | ||
201 | 200 | ||
202 | if (!suspend_ops) | 201 | if (!suspend_ops) |
203 | return -ENOSYS; | 202 | return -ENOSYS; |
@@ -208,7 +207,7 @@ int suspend_devices_and_enter(suspend_state_t state) | |||
208 | goto Close; | 207 | goto Close; |
209 | } | 208 | } |
210 | suspend_console(); | 209 | suspend_console(); |
211 | saved_mask = clear_gfp_allowed_mask(GFP_IOFS); | 210 | pm_restrict_gfp_mask(); |
212 | suspend_test_start(); | 211 | suspend_test_start(); |
213 | error = dpm_suspend_start(PMSG_SUSPEND); | 212 | error = dpm_suspend_start(PMSG_SUSPEND); |
214 | if (error) { | 213 | if (error) { |
@@ -225,7 +224,7 @@ int suspend_devices_and_enter(suspend_state_t state) | |||
225 | suspend_test_start(); | 224 | suspend_test_start(); |
226 | dpm_resume_end(PMSG_RESUME); | 225 | dpm_resume_end(PMSG_RESUME); |
227 | suspend_test_finish("resume devices"); | 226 | suspend_test_finish("resume devices"); |
228 | set_gfp_allowed_mask(saved_mask); | 227 | pm_restore_gfp_mask(); |
229 | resume_console(); | 228 | resume_console(); |
230 | Close: | 229 | Close: |
231 | if (suspend_ops->end) | 230 | if (suspend_ops->end) |
diff --git a/kernel/power/swap.c b/kernel/power/swap.c index a0e4a86ccf94..8c7e4832b9be 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c | |||
@@ -6,6 +6,7 @@ | |||
6 | * | 6 | * |
7 | * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz> | 7 | * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz> |
8 | * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> | 8 | * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> |
9 | * Copyright (C) 2010 Bojan Smojver <bojan@rexursive.com> | ||
9 | * | 10 | * |
10 | * This file is released under the GPLv2. | 11 | * This file is released under the GPLv2. |
11 | * | 12 | * |
@@ -29,7 +30,7 @@ | |||
29 | 30 | ||
30 | #include "power.h" | 31 | #include "power.h" |
31 | 32 | ||
32 | #define HIBERNATE_SIG "LINHIB0001" | 33 | #define HIBERNATE_SIG "S1SUSPEND" |
33 | 34 | ||
34 | /* | 35 | /* |
35 | * The swap map is a data structure used for keeping track of each page | 36 | * The swap map is a data structure used for keeping track of each page |
@@ -753,30 +754,43 @@ static int load_image_lzo(struct swap_map_handle *handle, | |||
753 | { | 754 | { |
754 | unsigned int m; | 755 | unsigned int m; |
755 | int error = 0; | 756 | int error = 0; |
757 | struct bio *bio; | ||
756 | struct timeval start; | 758 | struct timeval start; |
757 | struct timeval stop; | 759 | struct timeval stop; |
758 | unsigned nr_pages; | 760 | unsigned nr_pages; |
759 | size_t off, unc_len, cmp_len; | 761 | size_t i, off, unc_len, cmp_len; |
760 | unsigned char *unc, *cmp, *page; | 762 | unsigned char *unc, *cmp, *page[LZO_CMP_PAGES]; |
761 | 763 | ||
762 | page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); | 764 | for (i = 0; i < LZO_CMP_PAGES; i++) { |
763 | if (!page) { | 765 | page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); |
764 | printk(KERN_ERR "PM: Failed to allocate LZO page\n"); | 766 | if (!page[i]) { |
765 | return -ENOMEM; | 767 | printk(KERN_ERR "PM: Failed to allocate LZO page\n"); |
768 | |||
769 | while (i) | ||
770 | free_page((unsigned long)page[--i]); | ||
771 | |||
772 | return -ENOMEM; | ||
773 | } | ||
766 | } | 774 | } |
767 | 775 | ||
768 | unc = vmalloc(LZO_UNC_SIZE); | 776 | unc = vmalloc(LZO_UNC_SIZE); |
769 | if (!unc) { | 777 | if (!unc) { |
770 | printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n"); | 778 | printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n"); |
771 | free_page((unsigned long)page); | 779 | |
780 | for (i = 0; i < LZO_CMP_PAGES; i++) | ||
781 | free_page((unsigned long)page[i]); | ||
782 | |||
772 | return -ENOMEM; | 783 | return -ENOMEM; |
773 | } | 784 | } |
774 | 785 | ||
775 | cmp = vmalloc(LZO_CMP_SIZE); | 786 | cmp = vmalloc(LZO_CMP_SIZE); |
776 | if (!cmp) { | 787 | if (!cmp) { |
777 | printk(KERN_ERR "PM: Failed to allocate LZO compressed\n"); | 788 | printk(KERN_ERR "PM: Failed to allocate LZO compressed\n"); |
789 | |||
778 | vfree(unc); | 790 | vfree(unc); |
779 | free_page((unsigned long)page); | 791 | for (i = 0; i < LZO_CMP_PAGES; i++) |
792 | free_page((unsigned long)page[i]); | ||
793 | |||
780 | return -ENOMEM; | 794 | return -ENOMEM; |
781 | } | 795 | } |
782 | 796 | ||
@@ -787,6 +801,7 @@ static int load_image_lzo(struct swap_map_handle *handle, | |||
787 | if (!m) | 801 | if (!m) |
788 | m = 1; | 802 | m = 1; |
789 | nr_pages = 0; | 803 | nr_pages = 0; |
804 | bio = NULL; | ||
790 | do_gettimeofday(&start); | 805 | do_gettimeofday(&start); |
791 | 806 | ||
792 | error = snapshot_write_next(snapshot); | 807 | error = snapshot_write_next(snapshot); |
@@ -794,11 +809,11 @@ static int load_image_lzo(struct swap_map_handle *handle, | |||
794 | goto out_finish; | 809 | goto out_finish; |
795 | 810 | ||
796 | for (;;) { | 811 | for (;;) { |
797 | error = swap_read_page(handle, page, NULL); /* sync */ | 812 | error = swap_read_page(handle, page[0], NULL); /* sync */ |
798 | if (error) | 813 | if (error) |
799 | break; | 814 | break; |
800 | 815 | ||
801 | cmp_len = *(size_t *)page; | 816 | cmp_len = *(size_t *)page[0]; |
802 | if (unlikely(!cmp_len || | 817 | if (unlikely(!cmp_len || |
803 | cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) { | 818 | cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) { |
804 | printk(KERN_ERR "PM: Invalid LZO compressed length\n"); | 819 | printk(KERN_ERR "PM: Invalid LZO compressed length\n"); |
@@ -806,13 +821,20 @@ static int load_image_lzo(struct swap_map_handle *handle, | |||
806 | break; | 821 | break; |
807 | } | 822 | } |
808 | 823 | ||
809 | memcpy(cmp, page, PAGE_SIZE); | 824 | for (off = PAGE_SIZE, i = 1; |
810 | for (off = PAGE_SIZE; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) { | 825 | off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) { |
811 | error = swap_read_page(handle, page, NULL); /* sync */ | 826 | error = swap_read_page(handle, page[i], &bio); |
812 | if (error) | 827 | if (error) |
813 | goto out_finish; | 828 | goto out_finish; |
829 | } | ||
814 | 830 | ||
815 | memcpy(cmp + off, page, PAGE_SIZE); | 831 | error = hib_wait_on_bio_chain(&bio); /* need all data now */ |
832 | if (error) | ||
833 | goto out_finish; | ||
834 | |||
835 | for (off = 0, i = 0; | ||
836 | off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) { | ||
837 | memcpy(cmp + off, page[i], PAGE_SIZE); | ||
816 | } | 838 | } |
817 | 839 | ||
818 | unc_len = LZO_UNC_SIZE; | 840 | unc_len = LZO_UNC_SIZE; |
@@ -857,7 +879,8 @@ out_finish: | |||
857 | 879 | ||
858 | vfree(cmp); | 880 | vfree(cmp); |
859 | vfree(unc); | 881 | vfree(unc); |
860 | free_page((unsigned long)page); | 882 | for (i = 0; i < LZO_CMP_PAGES; i++) |
883 | free_page((unsigned long)page[i]); | ||
861 | 884 | ||
862 | return error; | 885 | return error; |
863 | } | 886 | } |
diff --git a/kernel/power/user.c b/kernel/power/user.c index e819e17877ca..c36c3b9e8a84 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c | |||
@@ -137,7 +137,7 @@ static int snapshot_release(struct inode *inode, struct file *filp) | |||
137 | free_all_swap_pages(data->swap); | 137 | free_all_swap_pages(data->swap); |
138 | if (data->frozen) | 138 | if (data->frozen) |
139 | thaw_processes(); | 139 | thaw_processes(); |
140 | pm_notifier_call_chain(data->mode == O_WRONLY ? | 140 | pm_notifier_call_chain(data->mode == O_RDONLY ? |
141 | PM_POST_HIBERNATION : PM_POST_RESTORE); | 141 | PM_POST_HIBERNATION : PM_POST_RESTORE); |
142 | atomic_inc(&snapshot_device_available); | 142 | atomic_inc(&snapshot_device_available); |
143 | 143 | ||
@@ -263,6 +263,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, | |||
263 | case SNAPSHOT_UNFREEZE: | 263 | case SNAPSHOT_UNFREEZE: |
264 | if (!data->frozen || data->ready) | 264 | if (!data->frozen || data->ready) |
265 | break; | 265 | break; |
266 | pm_restore_gfp_mask(); | ||
266 | thaw_processes(); | 267 | thaw_processes(); |
267 | usermodehelper_enable(); | 268 | usermodehelper_enable(); |
268 | data->frozen = 0; | 269 | data->frozen = 0; |
@@ -275,6 +276,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, | |||
275 | error = -EPERM; | 276 | error = -EPERM; |
276 | break; | 277 | break; |
277 | } | 278 | } |
279 | pm_restore_gfp_mask(); | ||
278 | error = hibernation_snapshot(data->platform_support); | 280 | error = hibernation_snapshot(data->platform_support); |
279 | if (!error) | 281 | if (!error) |
280 | error = put_user(in_suspend, (int __user *)arg); | 282 | error = put_user(in_suspend, (int __user *)arg); |
diff --git a/kernel/printk.c b/kernel/printk.c index 9a2264fc42ca..a23315dc4498 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -1082,13 +1082,15 @@ void printk_tick(void) | |||
1082 | 1082 | ||
1083 | int printk_needs_cpu(int cpu) | 1083 | int printk_needs_cpu(int cpu) |
1084 | { | 1084 | { |
1085 | if (unlikely(cpu_is_offline(cpu))) | ||
1086 | printk_tick(); | ||
1085 | return per_cpu(printk_pending, cpu); | 1087 | return per_cpu(printk_pending, cpu); |
1086 | } | 1088 | } |
1087 | 1089 | ||
1088 | void wake_up_klogd(void) | 1090 | void wake_up_klogd(void) |
1089 | { | 1091 | { |
1090 | if (waitqueue_active(&log_wait)) | 1092 | if (waitqueue_active(&log_wait)) |
1091 | __raw_get_cpu_var(printk_pending) = 1; | 1093 | this_cpu_write(printk_pending, 1); |
1092 | } | 1094 | } |
1093 | 1095 | ||
1094 | /** | 1096 | /** |
diff --git a/kernel/resource.c b/kernel/resource.c index 9fad33efd0db..798e2fae2a06 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -40,23 +40,6 @@ EXPORT_SYMBOL(iomem_resource); | |||
40 | 40 | ||
41 | static DEFINE_RWLOCK(resource_lock); | 41 | static DEFINE_RWLOCK(resource_lock); |
42 | 42 | ||
43 | /* | ||
44 | * By default, we allocate free space bottom-up. The architecture can request | ||
45 | * top-down by clearing this flag. The user can override the architecture's | ||
46 | * choice with the "resource_alloc_from_bottom" kernel boot option, but that | ||
47 | * should only be a debugging tool. | ||
48 | */ | ||
49 | int resource_alloc_from_bottom = 1; | ||
50 | |||
51 | static __init int setup_alloc_from_bottom(char *s) | ||
52 | { | ||
53 | printk(KERN_INFO | ||
54 | "resource: allocating from bottom-up; please report a bug\n"); | ||
55 | resource_alloc_from_bottom = 1; | ||
56 | return 0; | ||
57 | } | ||
58 | early_param("resource_alloc_from_bottom", setup_alloc_from_bottom); | ||
59 | |||
60 | static void *r_next(struct seq_file *m, void *v, loff_t *pos) | 43 | static void *r_next(struct seq_file *m, void *v, loff_t *pos) |
61 | { | 44 | { |
62 | struct resource *p = v; | 45 | struct resource *p = v; |
@@ -374,6 +357,10 @@ int __weak page_is_ram(unsigned long pfn) | |||
374 | return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1; | 357 | return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1; |
375 | } | 358 | } |
376 | 359 | ||
360 | void __weak arch_remove_reservations(struct resource *avail) | ||
361 | { | ||
362 | } | ||
363 | |||
377 | static resource_size_t simple_align_resource(void *data, | 364 | static resource_size_t simple_align_resource(void *data, |
378 | const struct resource *avail, | 365 | const struct resource *avail, |
379 | resource_size_t size, | 366 | resource_size_t size, |
@@ -397,74 +384,7 @@ static bool resource_contains(struct resource *res1, struct resource *res2) | |||
397 | } | 384 | } |
398 | 385 | ||
399 | /* | 386 | /* |
400 | * Find the resource before "child" in the sibling list of "root" children. | ||
401 | */ | ||
402 | static struct resource *find_sibling_prev(struct resource *root, struct resource *child) | ||
403 | { | ||
404 | struct resource *this; | ||
405 | |||
406 | for (this = root->child; this; this = this->sibling) | ||
407 | if (this->sibling == child) | ||
408 | return this; | ||
409 | |||
410 | return NULL; | ||
411 | } | ||
412 | |||
413 | /* | ||
414 | * Find empty slot in the resource tree given range and alignment. | 387 | * Find empty slot in the resource tree given range and alignment. |
415 | * This version allocates from the end of the root resource first. | ||
416 | */ | ||
417 | static int find_resource_from_top(struct resource *root, struct resource *new, | ||
418 | resource_size_t size, resource_size_t min, | ||
419 | resource_size_t max, resource_size_t align, | ||
420 | resource_size_t (*alignf)(void *, | ||
421 | const struct resource *, | ||
422 | resource_size_t, | ||
423 | resource_size_t), | ||
424 | void *alignf_data) | ||
425 | { | ||
426 | struct resource *this; | ||
427 | struct resource tmp, avail, alloc; | ||
428 | |||
429 | tmp.start = root->end; | ||
430 | tmp.end = root->end; | ||
431 | |||
432 | this = find_sibling_prev(root, NULL); | ||
433 | for (;;) { | ||
434 | if (this) { | ||
435 | if (this->end < root->end) | ||
436 | tmp.start = this->end + 1; | ||
437 | } else | ||
438 | tmp.start = root->start; | ||
439 | |||
440 | resource_clip(&tmp, min, max); | ||
441 | |||
442 | /* Check for overflow after ALIGN() */ | ||
443 | avail = *new; | ||
444 | avail.start = ALIGN(tmp.start, align); | ||
445 | avail.end = tmp.end; | ||
446 | if (avail.start >= tmp.start) { | ||
447 | alloc.start = alignf(alignf_data, &avail, size, align); | ||
448 | alloc.end = alloc.start + size - 1; | ||
449 | if (resource_contains(&avail, &alloc)) { | ||
450 | new->start = alloc.start; | ||
451 | new->end = alloc.end; | ||
452 | return 0; | ||
453 | } | ||
454 | } | ||
455 | |||
456 | if (!this || this->start == root->start) | ||
457 | break; | ||
458 | |||
459 | tmp.end = this->start - 1; | ||
460 | this = find_sibling_prev(root, this); | ||
461 | } | ||
462 | return -EBUSY; | ||
463 | } | ||
464 | |||
465 | /* | ||
466 | * Find empty slot in the resource tree given range and alignment. | ||
467 | * This version allocates from the beginning of the root resource first. | ||
468 | */ | 388 | */ |
469 | static int find_resource(struct resource *root, struct resource *new, | 389 | static int find_resource(struct resource *root, struct resource *new, |
470 | resource_size_t size, resource_size_t min, | 390 | resource_size_t size, resource_size_t min, |
@@ -478,23 +398,24 @@ static int find_resource(struct resource *root, struct resource *new, | |||
478 | struct resource *this = root->child; | 398 | struct resource *this = root->child; |
479 | struct resource tmp = *new, avail, alloc; | 399 | struct resource tmp = *new, avail, alloc; |
480 | 400 | ||
401 | tmp.flags = new->flags; | ||
481 | tmp.start = root->start; | 402 | tmp.start = root->start; |
482 | /* | 403 | /* |
483 | * Skip past an allocated resource that starts at 0, since the | 404 | * Skip past an allocated resource that starts at 0, since the assignment |
484 | * assignment of this->start - 1 to tmp->end below would cause an | 405 | * of this->start - 1 to tmp->end below would cause an underflow. |
485 | * underflow. | ||
486 | */ | 406 | */ |
487 | if (this && this->start == 0) { | 407 | if (this && this->start == 0) { |
488 | tmp.start = this->end + 1; | 408 | tmp.start = this->end + 1; |
489 | this = this->sibling; | 409 | this = this->sibling; |
490 | } | 410 | } |
491 | for (;;) { | 411 | for(;;) { |
492 | if (this) | 412 | if (this) |
493 | tmp.end = this->start - 1; | 413 | tmp.end = this->start - 1; |
494 | else | 414 | else |
495 | tmp.end = root->end; | 415 | tmp.end = root->end; |
496 | 416 | ||
497 | resource_clip(&tmp, min, max); | 417 | resource_clip(&tmp, min, max); |
418 | arch_remove_reservations(&tmp); | ||
498 | 419 | ||
499 | /* Check for overflow after ALIGN() */ | 420 | /* Check for overflow after ALIGN() */ |
500 | avail = *new; | 421 | avail = *new; |
@@ -509,10 +430,8 @@ static int find_resource(struct resource *root, struct resource *new, | |||
509 | return 0; | 430 | return 0; |
510 | } | 431 | } |
511 | } | 432 | } |
512 | |||
513 | if (!this) | 433 | if (!this) |
514 | break; | 434 | break; |
515 | |||
516 | tmp.start = this->end + 1; | 435 | tmp.start = this->end + 1; |
517 | this = this->sibling; | 436 | this = this->sibling; |
518 | } | 437 | } |
@@ -545,10 +464,7 @@ int allocate_resource(struct resource *root, struct resource *new, | |||
545 | alignf = simple_align_resource; | 464 | alignf = simple_align_resource; |
546 | 465 | ||
547 | write_lock(&resource_lock); | 466 | write_lock(&resource_lock); |
548 | if (resource_alloc_from_bottom) | 467 | err = find_resource(root, new, size, min, max, align, alignf, alignf_data); |
549 | err = find_resource(root, new, size, min, max, align, alignf, alignf_data); | ||
550 | else | ||
551 | err = find_resource_from_top(root, new, size, min, max, align, alignf, alignf_data); | ||
552 | if (err >= 0 && __request_resource(root, new)) | 468 | if (err >= 0 && __request_resource(root, new)) |
553 | err = -EBUSY; | 469 | err = -EBUSY; |
554 | write_unlock(&resource_lock); | 470 | write_unlock(&resource_lock); |
diff --git a/kernel/sched.c b/kernel/sched.c index aa14a56f9d03..297d1a0eedb0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -560,18 +560,8 @@ struct rq { | |||
560 | 560 | ||
561 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); | 561 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); |
562 | 562 | ||
563 | static inline | ||
564 | void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | ||
565 | { | ||
566 | rq->curr->sched_class->check_preempt_curr(rq, p, flags); | ||
567 | 563 | ||
568 | /* | 564 | static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags); |
569 | * A queue event has occurred, and we're going to schedule. In | ||
570 | * this case, we can save a useless back to back clock update. | ||
571 | */ | ||
572 | if (test_tsk_need_resched(p)) | ||
573 | rq->skip_clock_update = 1; | ||
574 | } | ||
575 | 565 | ||
576 | static inline int cpu_of(struct rq *rq) | 566 | static inline int cpu_of(struct rq *rq) |
577 | { | 567 | { |
@@ -646,22 +636,18 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
646 | 636 | ||
647 | #endif /* CONFIG_CGROUP_SCHED */ | 637 | #endif /* CONFIG_CGROUP_SCHED */ |
648 | 638 | ||
649 | static u64 irq_time_cpu(int cpu); | 639 | static void update_rq_clock_task(struct rq *rq, s64 delta); |
650 | static void sched_irq_time_avg_update(struct rq *rq, u64 irq_time); | ||
651 | 640 | ||
652 | inline void update_rq_clock(struct rq *rq) | 641 | static void update_rq_clock(struct rq *rq) |
653 | { | 642 | { |
654 | if (!rq->skip_clock_update) { | 643 | s64 delta; |
655 | int cpu = cpu_of(rq); | ||
656 | u64 irq_time; | ||
657 | 644 | ||
658 | rq->clock = sched_clock_cpu(cpu); | 645 | if (rq->skip_clock_update) |
659 | irq_time = irq_time_cpu(cpu); | 646 | return; |
660 | if (rq->clock - irq_time > rq->clock_task) | ||
661 | rq->clock_task = rq->clock - irq_time; | ||
662 | 647 | ||
663 | sched_irq_time_avg_update(rq, irq_time); | 648 | delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; |
664 | } | 649 | rq->clock += delta; |
650 | update_rq_clock_task(rq, delta); | ||
665 | } | 651 | } |
666 | 652 | ||
667 | /* | 653 | /* |
@@ -1934,10 +1920,9 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags) | |||
1934 | * They are read and saved off onto struct rq in update_rq_clock(). | 1920 | * They are read and saved off onto struct rq in update_rq_clock(). |
1935 | * This may result in other CPU reading this CPU's irq time and can | 1921 | * This may result in other CPU reading this CPU's irq time and can |
1936 | * race with irq/account_system_vtime on this CPU. We would either get old | 1922 | * race with irq/account_system_vtime on this CPU. We would either get old |
1937 | * or new value (or semi updated value on 32 bit) with a side effect of | 1923 | * or new value with a side effect of accounting a slice of irq time to wrong |
1938 | * accounting a slice of irq time to wrong task when irq is in progress | 1924 | * task when irq is in progress while we read rq->clock. That is a worthy |
1939 | * while we read rq->clock. That is a worthy compromise in place of having | 1925 | * compromise in place of having locks on each irq in account_system_time. |
1940 | * locks on each irq in account_system_time. | ||
1941 | */ | 1926 | */ |
1942 | static DEFINE_PER_CPU(u64, cpu_hardirq_time); | 1927 | static DEFINE_PER_CPU(u64, cpu_hardirq_time); |
1943 | static DEFINE_PER_CPU(u64, cpu_softirq_time); | 1928 | static DEFINE_PER_CPU(u64, cpu_softirq_time); |
@@ -1955,19 +1940,58 @@ void disable_sched_clock_irqtime(void) | |||
1955 | sched_clock_irqtime = 0; | 1940 | sched_clock_irqtime = 0; |
1956 | } | 1941 | } |
1957 | 1942 | ||
1958 | static u64 irq_time_cpu(int cpu) | 1943 | #ifndef CONFIG_64BIT |
1944 | static DEFINE_PER_CPU(seqcount_t, irq_time_seq); | ||
1945 | |||
1946 | static inline void irq_time_write_begin(void) | ||
1959 | { | 1947 | { |
1960 | if (!sched_clock_irqtime) | 1948 | __this_cpu_inc(irq_time_seq.sequence); |
1961 | return 0; | 1949 | smp_wmb(); |
1950 | } | ||
1962 | 1951 | ||
1952 | static inline void irq_time_write_end(void) | ||
1953 | { | ||
1954 | smp_wmb(); | ||
1955 | __this_cpu_inc(irq_time_seq.sequence); | ||
1956 | } | ||
1957 | |||
1958 | static inline u64 irq_time_read(int cpu) | ||
1959 | { | ||
1960 | u64 irq_time; | ||
1961 | unsigned seq; | ||
1962 | |||
1963 | do { | ||
1964 | seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu)); | ||
1965 | irq_time = per_cpu(cpu_softirq_time, cpu) + | ||
1966 | per_cpu(cpu_hardirq_time, cpu); | ||
1967 | } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq)); | ||
1968 | |||
1969 | return irq_time; | ||
1970 | } | ||
1971 | #else /* CONFIG_64BIT */ | ||
1972 | static inline void irq_time_write_begin(void) | ||
1973 | { | ||
1974 | } | ||
1975 | |||
1976 | static inline void irq_time_write_end(void) | ||
1977 | { | ||
1978 | } | ||
1979 | |||
1980 | static inline u64 irq_time_read(int cpu) | ||
1981 | { | ||
1963 | return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); | 1982 | return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); |
1964 | } | 1983 | } |
1984 | #endif /* CONFIG_64BIT */ | ||
1965 | 1985 | ||
1986 | /* | ||
1987 | * Called before incrementing preempt_count on {soft,}irq_enter | ||
1988 | * and before decrementing preempt_count on {soft,}irq_exit. | ||
1989 | */ | ||
1966 | void account_system_vtime(struct task_struct *curr) | 1990 | void account_system_vtime(struct task_struct *curr) |
1967 | { | 1991 | { |
1968 | unsigned long flags; | 1992 | unsigned long flags; |
1993 | s64 delta; | ||
1969 | int cpu; | 1994 | int cpu; |
1970 | u64 now, delta; | ||
1971 | 1995 | ||
1972 | if (!sched_clock_irqtime) | 1996 | if (!sched_clock_irqtime) |
1973 | return; | 1997 | return; |
@@ -1975,9 +1999,10 @@ void account_system_vtime(struct task_struct *curr) | |||
1975 | local_irq_save(flags); | 1999 | local_irq_save(flags); |
1976 | 2000 | ||
1977 | cpu = smp_processor_id(); | 2001 | cpu = smp_processor_id(); |
1978 | now = sched_clock_cpu(cpu); | 2002 | delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); |
1979 | delta = now - per_cpu(irq_start_time, cpu); | 2003 | __this_cpu_add(irq_start_time, delta); |
1980 | per_cpu(irq_start_time, cpu) = now; | 2004 | |
2005 | irq_time_write_begin(); | ||
1981 | /* | 2006 | /* |
1982 | * We do not account for softirq time from ksoftirqd here. | 2007 | * We do not account for softirq time from ksoftirqd here. |
1983 | * We want to continue accounting softirq time to ksoftirqd thread | 2008 | * We want to continue accounting softirq time to ksoftirqd thread |
@@ -1985,33 +2010,55 @@ void account_system_vtime(struct task_struct *curr) | |||
1985 | * that do not consume any time, but still wants to run. | 2010 | * that do not consume any time, but still wants to run. |
1986 | */ | 2011 | */ |
1987 | if (hardirq_count()) | 2012 | if (hardirq_count()) |
1988 | per_cpu(cpu_hardirq_time, cpu) += delta; | 2013 | __this_cpu_add(cpu_hardirq_time, delta); |
1989 | else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) | 2014 | else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) |
1990 | per_cpu(cpu_softirq_time, cpu) += delta; | 2015 | __this_cpu_add(cpu_softirq_time, delta); |
1991 | 2016 | ||
2017 | irq_time_write_end(); | ||
1992 | local_irq_restore(flags); | 2018 | local_irq_restore(flags); |
1993 | } | 2019 | } |
1994 | EXPORT_SYMBOL_GPL(account_system_vtime); | 2020 | EXPORT_SYMBOL_GPL(account_system_vtime); |
1995 | 2021 | ||
1996 | static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) | 2022 | static void update_rq_clock_task(struct rq *rq, s64 delta) |
1997 | { | 2023 | { |
1998 | if (sched_clock_irqtime && sched_feat(NONIRQ_POWER)) { | 2024 | s64 irq_delta; |
1999 | u64 delta_irq = curr_irq_time - rq->prev_irq_time; | 2025 | |
2000 | rq->prev_irq_time = curr_irq_time; | 2026 | irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time; |
2001 | sched_rt_avg_update(rq, delta_irq); | 2027 | |
2002 | } | 2028 | /* |
2029 | * Since irq_time is only updated on {soft,}irq_exit, we might run into | ||
2030 | * this case when a previous update_rq_clock() happened inside a | ||
2031 | * {soft,}irq region. | ||
2032 | * | ||
2033 | * When this happens, we stop ->clock_task and only update the | ||
2034 | * prev_irq_time stamp to account for the part that fit, so that a next | ||
2035 | * update will consume the rest. This ensures ->clock_task is | ||
2036 | * monotonic. | ||
2037 | * | ||
2038 | * It does however cause some slight miss-attribution of {soft,}irq | ||
2039 | * time, a more accurate solution would be to update the irq_time using | ||
2040 | * the current rq->clock timestamp, except that would require using | ||
2041 | * atomic ops. | ||
2042 | */ | ||
2043 | if (irq_delta > delta) | ||
2044 | irq_delta = delta; | ||
2045 | |||
2046 | rq->prev_irq_time += irq_delta; | ||
2047 | delta -= irq_delta; | ||
2048 | rq->clock_task += delta; | ||
2049 | |||
2050 | if (irq_delta && sched_feat(NONIRQ_POWER)) | ||
2051 | sched_rt_avg_update(rq, irq_delta); | ||
2003 | } | 2052 | } |
2004 | 2053 | ||
2005 | #else | 2054 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ |
2006 | 2055 | ||
2007 | static u64 irq_time_cpu(int cpu) | 2056 | static void update_rq_clock_task(struct rq *rq, s64 delta) |
2008 | { | 2057 | { |
2009 | return 0; | 2058 | rq->clock_task += delta; |
2010 | } | 2059 | } |
2011 | 2060 | ||
2012 | static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { } | 2061 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ |
2013 | |||
2014 | #endif | ||
2015 | 2062 | ||
2016 | #include "sched_idletask.c" | 2063 | #include "sched_idletask.c" |
2017 | #include "sched_fair.c" | 2064 | #include "sched_fair.c" |
@@ -2118,6 +2165,31 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
2118 | p->sched_class->prio_changed(rq, p, oldprio, running); | 2165 | p->sched_class->prio_changed(rq, p, oldprio, running); |
2119 | } | 2166 | } |
2120 | 2167 | ||
2168 | static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | ||
2169 | { | ||
2170 | const struct sched_class *class; | ||
2171 | |||
2172 | if (p->sched_class == rq->curr->sched_class) { | ||
2173 | rq->curr->sched_class->check_preempt_curr(rq, p, flags); | ||
2174 | } else { | ||
2175 | for_each_class(class) { | ||
2176 | if (class == rq->curr->sched_class) | ||
2177 | break; | ||
2178 | if (class == p->sched_class) { | ||
2179 | resched_task(rq->curr); | ||
2180 | break; | ||
2181 | } | ||
2182 | } | ||
2183 | } | ||
2184 | |||
2185 | /* | ||
2186 | * A queue event has occurred, and we're going to schedule. In | ||
2187 | * this case, we can save a useless back to back clock update. | ||
2188 | */ | ||
2189 | if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr)) | ||
2190 | rq->skip_clock_update = 1; | ||
2191 | } | ||
2192 | |||
2121 | #ifdef CONFIG_SMP | 2193 | #ifdef CONFIG_SMP |
2122 | /* | 2194 | /* |
2123 | * Is this task likely cache-hot: | 2195 | * Is this task likely cache-hot: |
@@ -3104,6 +3176,15 @@ static long calc_load_fold_active(struct rq *this_rq) | |||
3104 | return delta; | 3176 | return delta; |
3105 | } | 3177 | } |
3106 | 3178 | ||
3179 | static unsigned long | ||
3180 | calc_load(unsigned long load, unsigned long exp, unsigned long active) | ||
3181 | { | ||
3182 | load *= exp; | ||
3183 | load += active * (FIXED_1 - exp); | ||
3184 | load += 1UL << (FSHIFT - 1); | ||
3185 | return load >> FSHIFT; | ||
3186 | } | ||
3187 | |||
3107 | #ifdef CONFIG_NO_HZ | 3188 | #ifdef CONFIG_NO_HZ |
3108 | /* | 3189 | /* |
3109 | * For NO_HZ we delay the active fold to the next LOAD_FREQ update. | 3190 | * For NO_HZ we delay the active fold to the next LOAD_FREQ update. |
@@ -3133,6 +3214,128 @@ static long calc_load_fold_idle(void) | |||
3133 | 3214 | ||
3134 | return delta; | 3215 | return delta; |
3135 | } | 3216 | } |
3217 | |||
3218 | /** | ||
3219 | * fixed_power_int - compute: x^n, in O(log n) time | ||
3220 | * | ||
3221 | * @x: base of the power | ||
3222 | * @frac_bits: fractional bits of @x | ||
3223 | * @n: power to raise @x to. | ||
3224 | * | ||
3225 | * By exploiting the relation between the definition of the natural power | ||
3226 | * function: x^n := x*x*...*x (x multiplied by itself for n times), and | ||
3227 | * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i, | ||
3228 | * (where: n_i \elem {0, 1}, the binary vector representing n), | ||
3229 | * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is | ||
3230 | * of course trivially computable in O(log_2 n), the length of our binary | ||
3231 | * vector. | ||
3232 | */ | ||
3233 | static unsigned long | ||
3234 | fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n) | ||
3235 | { | ||
3236 | unsigned long result = 1UL << frac_bits; | ||
3237 | |||
3238 | if (n) for (;;) { | ||
3239 | if (n & 1) { | ||
3240 | result *= x; | ||
3241 | result += 1UL << (frac_bits - 1); | ||
3242 | result >>= frac_bits; | ||
3243 | } | ||
3244 | n >>= 1; | ||
3245 | if (!n) | ||
3246 | break; | ||
3247 | x *= x; | ||
3248 | x += 1UL << (frac_bits - 1); | ||
3249 | x >>= frac_bits; | ||
3250 | } | ||
3251 | |||
3252 | return result; | ||
3253 | } | ||
3254 | |||
3255 | /* | ||
3256 | * a1 = a0 * e + a * (1 - e) | ||
3257 | * | ||
3258 | * a2 = a1 * e + a * (1 - e) | ||
3259 | * = (a0 * e + a * (1 - e)) * e + a * (1 - e) | ||
3260 | * = a0 * e^2 + a * (1 - e) * (1 + e) | ||
3261 | * | ||
3262 | * a3 = a2 * e + a * (1 - e) | ||
3263 | * = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e) | ||
3264 | * = a0 * e^3 + a * (1 - e) * (1 + e + e^2) | ||
3265 | * | ||
3266 | * ... | ||
3267 | * | ||
3268 | * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1] | ||
3269 | * = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e) | ||
3270 | * = a0 * e^n + a * (1 - e^n) | ||
3271 | * | ||
3272 | * [1] application of the geometric series: | ||
3273 | * | ||
3274 | * n 1 - x^(n+1) | ||
3275 | * S_n := \Sum x^i = ------------- | ||
3276 | * i=0 1 - x | ||
3277 | */ | ||
3278 | static unsigned long | ||
3279 | calc_load_n(unsigned long load, unsigned long exp, | ||
3280 | unsigned long active, unsigned int n) | ||
3281 | { | ||
3282 | |||
3283 | return calc_load(load, fixed_power_int(exp, FSHIFT, n), active); | ||
3284 | } | ||
3285 | |||
3286 | /* | ||
3287 | * NO_HZ can leave us missing all per-cpu ticks calling | ||
3288 | * calc_load_account_active(), but since an idle CPU folds its delta into | ||
3289 | * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold | ||
3290 | * in the pending idle delta if our idle period crossed a load cycle boundary. | ||
3291 | * | ||
3292 | * Once we've updated the global active value, we need to apply the exponential | ||
3293 | * weights adjusted to the number of cycles missed. | ||
3294 | */ | ||
3295 | static void calc_global_nohz(unsigned long ticks) | ||
3296 | { | ||
3297 | long delta, active, n; | ||
3298 | |||
3299 | if (time_before(jiffies, calc_load_update)) | ||
3300 | return; | ||
3301 | |||
3302 | /* | ||
3303 | * If we crossed a calc_load_update boundary, make sure to fold | ||
3304 | * any pending idle changes, the respective CPUs might have | ||
3305 | * missed the tick driven calc_load_account_active() update | ||
3306 | * due to NO_HZ. | ||
3307 | */ | ||
3308 | delta = calc_load_fold_idle(); | ||
3309 | if (delta) | ||
3310 | atomic_long_add(delta, &calc_load_tasks); | ||
3311 | |||
3312 | /* | ||
3313 | * If we were idle for multiple load cycles, apply them. | ||
3314 | */ | ||
3315 | if (ticks >= LOAD_FREQ) { | ||
3316 | n = ticks / LOAD_FREQ; | ||
3317 | |||
3318 | active = atomic_long_read(&calc_load_tasks); | ||
3319 | active = active > 0 ? active * FIXED_1 : 0; | ||
3320 | |||
3321 | avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); | ||
3322 | avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); | ||
3323 | avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); | ||
3324 | |||
3325 | calc_load_update += n * LOAD_FREQ; | ||
3326 | } | ||
3327 | |||
3328 | /* | ||
3329 | * Its possible the remainder of the above division also crosses | ||
3330 | * a LOAD_FREQ period, the regular check in calc_global_load() | ||
3331 | * which comes after this will take care of that. | ||
3332 | * | ||
3333 | * Consider us being 11 ticks before a cycle completion, and us | ||
3334 | * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will | ||
3335 | * age us 4 cycles, and the test in calc_global_load() will | ||
3336 | * pick up the final one. | ||
3337 | */ | ||
3338 | } | ||
3136 | #else | 3339 | #else |
3137 | static void calc_load_account_idle(struct rq *this_rq) | 3340 | static void calc_load_account_idle(struct rq *this_rq) |
3138 | { | 3341 | { |
@@ -3142,6 +3345,10 @@ static inline long calc_load_fold_idle(void) | |||
3142 | { | 3345 | { |
3143 | return 0; | 3346 | return 0; |
3144 | } | 3347 | } |
3348 | |||
3349 | static void calc_global_nohz(unsigned long ticks) | ||
3350 | { | ||
3351 | } | ||
3145 | #endif | 3352 | #endif |
3146 | 3353 | ||
3147 | /** | 3354 | /** |
@@ -3159,24 +3366,17 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift) | |||
3159 | loads[2] = (avenrun[2] + offset) << shift; | 3366 | loads[2] = (avenrun[2] + offset) << shift; |
3160 | } | 3367 | } |
3161 | 3368 | ||
3162 | static unsigned long | ||
3163 | calc_load(unsigned long load, unsigned long exp, unsigned long active) | ||
3164 | { | ||
3165 | load *= exp; | ||
3166 | load += active * (FIXED_1 - exp); | ||
3167 | return load >> FSHIFT; | ||
3168 | } | ||
3169 | |||
3170 | /* | 3369 | /* |
3171 | * calc_load - update the avenrun load estimates 10 ticks after the | 3370 | * calc_load - update the avenrun load estimates 10 ticks after the |
3172 | * CPUs have updated calc_load_tasks. | 3371 | * CPUs have updated calc_load_tasks. |
3173 | */ | 3372 | */ |
3174 | void calc_global_load(void) | 3373 | void calc_global_load(unsigned long ticks) |
3175 | { | 3374 | { |
3176 | unsigned long upd = calc_load_update + 10; | ||
3177 | long active; | 3375 | long active; |
3178 | 3376 | ||
3179 | if (time_before(jiffies, upd)) | 3377 | calc_global_nohz(ticks); |
3378 | |||
3379 | if (time_before(jiffies, calc_load_update + 10)) | ||
3180 | return; | 3380 | return; |
3181 | 3381 | ||
3182 | active = atomic_long_read(&calc_load_tasks); | 3382 | active = atomic_long_read(&calc_load_tasks); |
@@ -3830,7 +4030,6 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev) | |||
3830 | { | 4030 | { |
3831 | if (prev->se.on_rq) | 4031 | if (prev->se.on_rq) |
3832 | update_rq_clock(rq); | 4032 | update_rq_clock(rq); |
3833 | rq->skip_clock_update = 0; | ||
3834 | prev->sched_class->put_prev_task(rq, prev); | 4033 | prev->sched_class->put_prev_task(rq, prev); |
3835 | } | 4034 | } |
3836 | 4035 | ||
@@ -3888,7 +4087,6 @@ need_resched_nonpreemptible: | |||
3888 | hrtick_clear(rq); | 4087 | hrtick_clear(rq); |
3889 | 4088 | ||
3890 | raw_spin_lock_irq(&rq->lock); | 4089 | raw_spin_lock_irq(&rq->lock); |
3891 | clear_tsk_need_resched(prev); | ||
3892 | 4090 | ||
3893 | switch_count = &prev->nivcsw; | 4091 | switch_count = &prev->nivcsw; |
3894 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { | 4092 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { |
@@ -3920,6 +4118,8 @@ need_resched_nonpreemptible: | |||
3920 | 4118 | ||
3921 | put_prev_task(rq, prev); | 4119 | put_prev_task(rq, prev); |
3922 | next = pick_next_task(rq); | 4120 | next = pick_next_task(rq); |
4121 | clear_tsk_need_resched(prev); | ||
4122 | rq->skip_clock_update = 0; | ||
3923 | 4123 | ||
3924 | if (likely(prev != next)) { | 4124 | if (likely(prev != next)) { |
3925 | sched_info_switch(prev, next); | 4125 | sched_info_switch(prev, next); |
@@ -6960,6 +7160,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) | |||
6960 | if (cpu != group_first_cpu(sd->groups)) | 7160 | if (cpu != group_first_cpu(sd->groups)) |
6961 | return; | 7161 | return; |
6962 | 7162 | ||
7163 | sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups)); | ||
7164 | |||
6963 | child = sd->child; | 7165 | child = sd->child; |
6964 | 7166 | ||
6965 | sd->groups->cpu_power = 0; | 7167 | sd->groups->cpu_power = 0; |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f4f6a8326dd0..00ebd7686676 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -1654,12 +1654,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
1654 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | 1654 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); |
1655 | int scale = cfs_rq->nr_running >= sched_nr_latency; | 1655 | int scale = cfs_rq->nr_running >= sched_nr_latency; |
1656 | 1656 | ||
1657 | if (unlikely(rt_prio(p->prio))) | ||
1658 | goto preempt; | ||
1659 | |||
1660 | if (unlikely(p->sched_class != &fair_sched_class)) | ||
1661 | return; | ||
1662 | |||
1663 | if (unlikely(se == pse)) | 1657 | if (unlikely(se == pse)) |
1664 | return; | 1658 | return; |
1665 | 1659 | ||
@@ -1764,10 +1758,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p, | |||
1764 | set_task_cpu(p, this_cpu); | 1758 | set_task_cpu(p, this_cpu); |
1765 | activate_task(this_rq, p, 0); | 1759 | activate_task(this_rq, p, 0); |
1766 | check_preempt_curr(this_rq, p, 0); | 1760 | check_preempt_curr(this_rq, p, 0); |
1767 | |||
1768 | /* re-arm NEWIDLE balancing when moving tasks */ | ||
1769 | src_rq->avg_idle = this_rq->avg_idle = 2*sysctl_sched_migration_cost; | ||
1770 | this_rq->idle_stamp = 0; | ||
1771 | } | 1761 | } |
1772 | 1762 | ||
1773 | /* | 1763 | /* |
@@ -2035,13 +2025,16 @@ struct sd_lb_stats { | |||
2035 | unsigned long this_load_per_task; | 2025 | unsigned long this_load_per_task; |
2036 | unsigned long this_nr_running; | 2026 | unsigned long this_nr_running; |
2037 | unsigned long this_has_capacity; | 2027 | unsigned long this_has_capacity; |
2028 | unsigned int this_idle_cpus; | ||
2038 | 2029 | ||
2039 | /* Statistics of the busiest group */ | 2030 | /* Statistics of the busiest group */ |
2031 | unsigned int busiest_idle_cpus; | ||
2040 | unsigned long max_load; | 2032 | unsigned long max_load; |
2041 | unsigned long busiest_load_per_task; | 2033 | unsigned long busiest_load_per_task; |
2042 | unsigned long busiest_nr_running; | 2034 | unsigned long busiest_nr_running; |
2043 | unsigned long busiest_group_capacity; | 2035 | unsigned long busiest_group_capacity; |
2044 | unsigned long busiest_has_capacity; | 2036 | unsigned long busiest_has_capacity; |
2037 | unsigned int busiest_group_weight; | ||
2045 | 2038 | ||
2046 | int group_imb; /* Is there imbalance in this sd */ | 2039 | int group_imb; /* Is there imbalance in this sd */ |
2047 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | 2040 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) |
@@ -2063,6 +2056,8 @@ struct sg_lb_stats { | |||
2063 | unsigned long sum_nr_running; /* Nr tasks running in the group */ | 2056 | unsigned long sum_nr_running; /* Nr tasks running in the group */ |
2064 | unsigned long sum_weighted_load; /* Weighted load of group's tasks */ | 2057 | unsigned long sum_weighted_load; /* Weighted load of group's tasks */ |
2065 | unsigned long group_capacity; | 2058 | unsigned long group_capacity; |
2059 | unsigned long idle_cpus; | ||
2060 | unsigned long group_weight; | ||
2066 | int group_imb; /* Is there an imbalance in the group ? */ | 2061 | int group_imb; /* Is there an imbalance in the group ? */ |
2067 | int group_has_capacity; /* Is there extra capacity in the group? */ | 2062 | int group_has_capacity; /* Is there extra capacity in the group? */ |
2068 | }; | 2063 | }; |
@@ -2431,7 +2426,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
2431 | sgs->group_load += load; | 2426 | sgs->group_load += load; |
2432 | sgs->sum_nr_running += rq->nr_running; | 2427 | sgs->sum_nr_running += rq->nr_running; |
2433 | sgs->sum_weighted_load += weighted_cpuload(i); | 2428 | sgs->sum_weighted_load += weighted_cpuload(i); |
2434 | 2429 | if (idle_cpu(i)) | |
2430 | sgs->idle_cpus++; | ||
2435 | } | 2431 | } |
2436 | 2432 | ||
2437 | /* | 2433 | /* |
@@ -2469,6 +2465,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
2469 | sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); | 2465 | sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); |
2470 | if (!sgs->group_capacity) | 2466 | if (!sgs->group_capacity) |
2471 | sgs->group_capacity = fix_small_capacity(sd, group); | 2467 | sgs->group_capacity = fix_small_capacity(sd, group); |
2468 | sgs->group_weight = group->group_weight; | ||
2472 | 2469 | ||
2473 | if (sgs->group_capacity > sgs->sum_nr_running) | 2470 | if (sgs->group_capacity > sgs->sum_nr_running) |
2474 | sgs->group_has_capacity = 1; | 2471 | sgs->group_has_capacity = 1; |
@@ -2576,13 +2573,16 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, | |||
2576 | sds->this_nr_running = sgs.sum_nr_running; | 2573 | sds->this_nr_running = sgs.sum_nr_running; |
2577 | sds->this_load_per_task = sgs.sum_weighted_load; | 2574 | sds->this_load_per_task = sgs.sum_weighted_load; |
2578 | sds->this_has_capacity = sgs.group_has_capacity; | 2575 | sds->this_has_capacity = sgs.group_has_capacity; |
2576 | sds->this_idle_cpus = sgs.idle_cpus; | ||
2579 | } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) { | 2577 | } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) { |
2580 | sds->max_load = sgs.avg_load; | 2578 | sds->max_load = sgs.avg_load; |
2581 | sds->busiest = sg; | 2579 | sds->busiest = sg; |
2582 | sds->busiest_nr_running = sgs.sum_nr_running; | 2580 | sds->busiest_nr_running = sgs.sum_nr_running; |
2581 | sds->busiest_idle_cpus = sgs.idle_cpus; | ||
2583 | sds->busiest_group_capacity = sgs.group_capacity; | 2582 | sds->busiest_group_capacity = sgs.group_capacity; |
2584 | sds->busiest_load_per_task = sgs.sum_weighted_load; | 2583 | sds->busiest_load_per_task = sgs.sum_weighted_load; |
2585 | sds->busiest_has_capacity = sgs.group_has_capacity; | 2584 | sds->busiest_has_capacity = sgs.group_has_capacity; |
2585 | sds->busiest_group_weight = sgs.group_weight; | ||
2586 | sds->group_imb = sgs.group_imb; | 2586 | sds->group_imb = sgs.group_imb; |
2587 | } | 2587 | } |
2588 | 2588 | ||
@@ -2860,8 +2860,26 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2860 | if (sds.this_load >= sds.avg_load) | 2860 | if (sds.this_load >= sds.avg_load) |
2861 | goto out_balanced; | 2861 | goto out_balanced; |
2862 | 2862 | ||
2863 | if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) | 2863 | /* |
2864 | goto out_balanced; | 2864 | * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative. |
2865 | * And to check for busy balance use !idle_cpu instead of | ||
2866 | * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE | ||
2867 | * even when they are idle. | ||
2868 | */ | ||
2869 | if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) { | ||
2870 | if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) | ||
2871 | goto out_balanced; | ||
2872 | } else { | ||
2873 | /* | ||
2874 | * This cpu is idle. If the busiest group load doesn't | ||
2875 | * have more tasks than the number of available cpu's and | ||
2876 | * there is no imbalance between this and busiest group | ||
2877 | * wrt to idle cpu's, it is balanced. | ||
2878 | */ | ||
2879 | if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) && | ||
2880 | sds.busiest_nr_running <= sds.busiest_group_weight) | ||
2881 | goto out_balanced; | ||
2882 | } | ||
2865 | 2883 | ||
2866 | force_balance: | 2884 | force_balance: |
2867 | /* Looks like there is an imbalance. Compute it */ | 2885 | /* Looks like there is an imbalance. Compute it */ |
@@ -3197,8 +3215,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
3197 | interval = msecs_to_jiffies(sd->balance_interval); | 3215 | interval = msecs_to_jiffies(sd->balance_interval); |
3198 | if (time_after(next_balance, sd->last_balance + interval)) | 3216 | if (time_after(next_balance, sd->last_balance + interval)) |
3199 | next_balance = sd->last_balance + interval; | 3217 | next_balance = sd->last_balance + interval; |
3200 | if (pulled_task) | 3218 | if (pulled_task) { |
3219 | this_rq->idle_stamp = 0; | ||
3201 | break; | 3220 | break; |
3221 | } | ||
3202 | } | 3222 | } |
3203 | 3223 | ||
3204 | raw_spin_lock(&this_rq->lock); | 3224 | raw_spin_lock(&this_rq->lock); |
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c index 45bddc0c1048..2bf6b47058c1 100644 --- a/kernel/sched_stoptask.c +++ b/kernel/sched_stoptask.c | |||
@@ -19,14 +19,14 @@ select_task_rq_stop(struct rq *rq, struct task_struct *p, | |||
19 | static void | 19 | static void |
20 | check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags) | 20 | check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags) |
21 | { | 21 | { |
22 | resched_task(rq->curr); /* we preempt everything */ | 22 | /* we're never preempted */ |
23 | } | 23 | } |
24 | 24 | ||
25 | static struct task_struct *pick_next_task_stop(struct rq *rq) | 25 | static struct task_struct *pick_next_task_stop(struct rq *rq) |
26 | { | 26 | { |
27 | struct task_struct *stop = rq->stop; | 27 | struct task_struct *stop = rq->stop; |
28 | 28 | ||
29 | if (stop && stop->state == TASK_RUNNING) | 29 | if (stop && stop->se.on_rq) |
30 | return stop; | 30 | return stop; |
31 | 31 | ||
32 | return NULL; | 32 | return NULL; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b65bf634035e..5abfa1518554 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -702,7 +702,6 @@ static struct ctl_table kern_table[] = { | |||
702 | .extra1 = &zero, | 702 | .extra1 = &zero, |
703 | .extra2 = &ten_thousand, | 703 | .extra2 = &ten_thousand, |
704 | }, | 704 | }, |
705 | #endif | ||
706 | { | 705 | { |
707 | .procname = "dmesg_restrict", | 706 | .procname = "dmesg_restrict", |
708 | .data = &dmesg_restrict, | 707 | .data = &dmesg_restrict, |
@@ -712,6 +711,7 @@ static struct ctl_table kern_table[] = { | |||
712 | .extra1 = &zero, | 711 | .extra1 = &zero, |
713 | .extra2 = &one, | 712 | .extra2 = &one, |
714 | }, | 713 | }, |
714 | #endif | ||
715 | { | 715 | { |
716 | .procname = "ngroups_max", | 716 | .procname = "ngroups_max", |
717 | .data = &ngroups_max, | 717 | .data = &ngroups_max, |
diff --git a/kernel/timer.c b/kernel/timer.c index 68a9ae7679b7..353b9227c2ec 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -1252,6 +1252,12 @@ unsigned long get_next_timer_interrupt(unsigned long now) | |||
1252 | struct tvec_base *base = __get_cpu_var(tvec_bases); | 1252 | struct tvec_base *base = __get_cpu_var(tvec_bases); |
1253 | unsigned long expires; | 1253 | unsigned long expires; |
1254 | 1254 | ||
1255 | /* | ||
1256 | * Pretend that there is no timer pending if the cpu is offline. | ||
1257 | * Possible pending timers will be migrated later to an active cpu. | ||
1258 | */ | ||
1259 | if (cpu_is_offline(smp_processor_id())) | ||
1260 | return now + NEXT_TIMER_MAX_DELTA; | ||
1255 | spin_lock(&base->lock); | 1261 | spin_lock(&base->lock); |
1256 | if (time_before_eq(base->next_timer, base->timer_jiffies)) | 1262 | if (time_before_eq(base->next_timer, base->timer_jiffies)) |
1257 | base->next_timer = __next_timer_interrupt(base); | 1263 | base->next_timer = __next_timer_interrupt(base); |
@@ -1319,7 +1325,7 @@ void do_timer(unsigned long ticks) | |||
1319 | { | 1325 | { |
1320 | jiffies_64 += ticks; | 1326 | jiffies_64 += ticks; |
1321 | update_wall_time(); | 1327 | update_wall_time(); |
1322 | calc_global_load(); | 1328 | calc_global_load(ticks); |
1323 | } | 1329 | } |
1324 | 1330 | ||
1325 | #ifdef __ARCH_WANT_SYS_ALARM | 1331 | #ifdef __ARCH_WANT_SYS_ALARM |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e04b8bcdef88..ea37e2ff4164 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -126,7 +126,7 @@ if FTRACE | |||
126 | config FUNCTION_TRACER | 126 | config FUNCTION_TRACER |
127 | bool "Kernel Function Tracer" | 127 | bool "Kernel Function Tracer" |
128 | depends on HAVE_FUNCTION_TRACER | 128 | depends on HAVE_FUNCTION_TRACER |
129 | select FRAME_POINTER if (!ARM_UNWIND) | 129 | select FRAME_POINTER if !ARM_UNWIND && !S390 |
130 | select KALLSYMS | 130 | select KALLSYMS |
131 | select GENERIC_TRACER | 131 | select GENERIC_TRACER |
132 | select CONTEXT_SWITCH_TRACER | 132 | select CONTEXT_SWITCH_TRACER |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 82d9b8106cd0..f8cf959bad45 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -17,7 +17,6 @@ | |||
17 | #include <linux/writeback.h> | 17 | #include <linux/writeback.h> |
18 | #include <linux/kallsyms.h> | 18 | #include <linux/kallsyms.h> |
19 | #include <linux/seq_file.h> | 19 | #include <linux/seq_file.h> |
20 | #include <linux/smp_lock.h> | ||
21 | #include <linux/notifier.h> | 20 | #include <linux/notifier.h> |
22 | #include <linux/irqflags.h> | 21 | #include <linux/irqflags.h> |
23 | #include <linux/debugfs.h> | 22 | #include <linux/debugfs.h> |
@@ -1284,6 +1283,8 @@ void trace_dump_stack(void) | |||
1284 | __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); | 1283 | __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); |
1285 | } | 1284 | } |
1286 | 1285 | ||
1286 | static DEFINE_PER_CPU(int, user_stack_count); | ||
1287 | |||
1287 | void | 1288 | void |
1288 | ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) | 1289 | ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) |
1289 | { | 1290 | { |
@@ -1302,6 +1303,18 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) | |||
1302 | if (unlikely(in_nmi())) | 1303 | if (unlikely(in_nmi())) |
1303 | return; | 1304 | return; |
1304 | 1305 | ||
1306 | /* | ||
1307 | * prevent recursion, since the user stack tracing may | ||
1308 | * trigger other kernel events. | ||
1309 | */ | ||
1310 | preempt_disable(); | ||
1311 | if (__this_cpu_read(user_stack_count)) | ||
1312 | goto out; | ||
1313 | |||
1314 | __this_cpu_inc(user_stack_count); | ||
1315 | |||
1316 | |||
1317 | |||
1305 | event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, | 1318 | event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, |
1306 | sizeof(*entry), flags, pc); | 1319 | sizeof(*entry), flags, pc); |
1307 | if (!event) | 1320 | if (!event) |
@@ -1319,6 +1332,11 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) | |||
1319 | save_stack_trace_user(&trace); | 1332 | save_stack_trace_user(&trace); |
1320 | if (!filter_check_discard(call, entry, buffer, event)) | 1333 | if (!filter_check_discard(call, entry, buffer, event)) |
1321 | ring_buffer_unlock_commit(buffer, event); | 1334 | ring_buffer_unlock_commit(buffer, event); |
1335 | |||
1336 | __this_cpu_dec(user_stack_count); | ||
1337 | |||
1338 | out: | ||
1339 | preempt_enable(); | ||
1322 | } | 1340 | } |
1323 | 1341 | ||
1324 | #ifdef UNUSED | 1342 | #ifdef UNUSED |
@@ -2320,11 +2338,19 @@ tracing_write_stub(struct file *filp, const char __user *ubuf, | |||
2320 | return count; | 2338 | return count; |
2321 | } | 2339 | } |
2322 | 2340 | ||
2341 | static loff_t tracing_seek(struct file *file, loff_t offset, int origin) | ||
2342 | { | ||
2343 | if (file->f_mode & FMODE_READ) | ||
2344 | return seq_lseek(file, offset, origin); | ||
2345 | else | ||
2346 | return 0; | ||
2347 | } | ||
2348 | |||
2323 | static const struct file_operations tracing_fops = { | 2349 | static const struct file_operations tracing_fops = { |
2324 | .open = tracing_open, | 2350 | .open = tracing_open, |
2325 | .read = seq_read, | 2351 | .read = seq_read, |
2326 | .write = tracing_write_stub, | 2352 | .write = tracing_write_stub, |
2327 | .llseek = seq_lseek, | 2353 | .llseek = tracing_seek, |
2328 | .release = tracing_release, | 2354 | .release = tracing_release, |
2329 | }; | 2355 | }; |
2330 | 2356 | ||
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 90db1bd1a978..e785b0f2aea5 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -661,7 +661,7 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu) | |||
661 | { | 661 | { |
662 | struct worker *worker = kthread_data(task); | 662 | struct worker *worker = kthread_data(task); |
663 | 663 | ||
664 | if (likely(!(worker->flags & WORKER_NOT_RUNNING))) | 664 | if (!(worker->flags & WORKER_NOT_RUNNING)) |
665 | atomic_inc(get_gcwq_nr_running(cpu)); | 665 | atomic_inc(get_gcwq_nr_running(cpu)); |
666 | } | 666 | } |
667 | 667 | ||
@@ -687,7 +687,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, | |||
687 | struct global_cwq *gcwq = get_gcwq(cpu); | 687 | struct global_cwq *gcwq = get_gcwq(cpu); |
688 | atomic_t *nr_running = get_gcwq_nr_running(cpu); | 688 | atomic_t *nr_running = get_gcwq_nr_running(cpu); |
689 | 689 | ||
690 | if (unlikely(worker->flags & WORKER_NOT_RUNNING)) | 690 | if (worker->flags & WORKER_NOT_RUNNING) |
691 | return NULL; | 691 | return NULL; |
692 | 692 | ||
693 | /* this can only happen on the local cpu */ | 693 | /* this can only happen on the local cpu */ |
@@ -3692,7 +3692,8 @@ static int __init init_workqueues(void) | |||
3692 | system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0); | 3692 | system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0); |
3693 | system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, | 3693 | system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, |
3694 | WQ_UNBOUND_MAX_ACTIVE); | 3694 | WQ_UNBOUND_MAX_ACTIVE); |
3695 | BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq); | 3695 | BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq || |
3696 | !system_unbound_wq); | ||
3696 | return 0; | 3697 | return 0; |
3697 | } | 3698 | } |
3698 | early_initcall(init_workqueues); | 3699 | early_initcall(init_workqueues); |