diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/debug/kdb/kdb_main.c | 21 | ||||
| -rw-r--r-- | kernel/exit.c | 9 | ||||
| -rw-r--r-- | kernel/fork.c | 1 | ||||
| -rw-r--r-- | kernel/futex.c | 3 | ||||
| -rw-r--r-- | kernel/futex_compat.c | 3 | ||||
| -rw-r--r-- | kernel/hw_breakpoint.c | 3 | ||||
| -rw-r--r-- | kernel/irq/proc.c | 2 | ||||
| -rw-r--r-- | kernel/irq_work.c | 4 | ||||
| -rw-r--r-- | kernel/latencytop.c | 17 | ||||
| -rw-r--r-- | kernel/module.c | 12 | ||||
| -rw-r--r-- | kernel/perf_event.c | 172 | ||||
| -rw-r--r-- | kernel/pm_qos_params.c | 4 | ||||
| -rw-r--r-- | kernel/posix-cpu-timers.c | 12 | ||||
| -rw-r--r-- | kernel/power/Kconfig | 4 | ||||
| -rw-r--r-- | kernel/power/hibernate.c | 22 | ||||
| -rw-r--r-- | kernel/power/suspend.c | 5 | ||||
| -rw-r--r-- | kernel/power/swap.c | 55 | ||||
| -rw-r--r-- | kernel/power/user.c | 4 | ||||
| -rw-r--r-- | kernel/printk.c | 25 | ||||
| -rw-r--r-- | kernel/range.c | 2 | ||||
| -rw-r--r-- | kernel/resource.c | 104 | ||||
| -rw-r--r-- | kernel/sched.c | 324 | ||||
| -rw-r--r-- | kernel/sched_fair.c | 48 | ||||
| -rw-r--r-- | kernel/sched_stoptask.c | 4 | ||||
| -rw-r--r-- | kernel/sysctl.c | 9 | ||||
| -rw-r--r-- | kernel/timer.c | 8 | ||||
| -rw-r--r-- | kernel/trace/Kconfig | 2 | ||||
| -rw-r--r-- | kernel/trace/blktrace.c | 4 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 30 | ||||
| -rw-r--r-- | kernel/workqueue.c | 7 |
30 files changed, 641 insertions, 279 deletions
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 37755d621924..a6e729766821 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c | |||
| @@ -82,7 +82,7 @@ static kdbtab_t kdb_base_commands[50]; | |||
| 82 | #define for_each_kdbcmd(cmd, num) \ | 82 | #define for_each_kdbcmd(cmd, num) \ |
| 83 | for ((cmd) = kdb_base_commands, (num) = 0; \ | 83 | for ((cmd) = kdb_base_commands, (num) = 0; \ |
| 84 | num < kdb_max_commands; \ | 84 | num < kdb_max_commands; \ |
| 85 | num == KDB_BASE_CMD_MAX ? cmd = kdb_commands : cmd++, num++) | 85 | num++, num == KDB_BASE_CMD_MAX ? cmd = kdb_commands : cmd++) |
| 86 | 86 | ||
| 87 | typedef struct _kdbmsg { | 87 | typedef struct _kdbmsg { |
| 88 | int km_diag; /* kdb diagnostic */ | 88 | int km_diag; /* kdb diagnostic */ |
| @@ -646,7 +646,7 @@ static int kdb_defcmd2(const char *cmdstr, const char *argv0) | |||
| 646 | } | 646 | } |
| 647 | if (!s->usable) | 647 | if (!s->usable) |
| 648 | return KDB_NOTIMP; | 648 | return KDB_NOTIMP; |
| 649 | s->command = kmalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB); | 649 | s->command = kzalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB); |
| 650 | if (!s->command) { | 650 | if (!s->command) { |
| 651 | kdb_printf("Could not allocate new kdb_defcmd table for %s\n", | 651 | kdb_printf("Could not allocate new kdb_defcmd table for %s\n", |
| 652 | cmdstr); | 652 | cmdstr); |
| @@ -2361,7 +2361,7 @@ static int kdb_pid(int argc, const char **argv) | |||
| 2361 | */ | 2361 | */ |
| 2362 | static int kdb_ll(int argc, const char **argv) | 2362 | static int kdb_ll(int argc, const char **argv) |
| 2363 | { | 2363 | { |
| 2364 | int diag; | 2364 | int diag = 0; |
| 2365 | unsigned long addr; | 2365 | unsigned long addr; |
| 2366 | long offset = 0; | 2366 | long offset = 0; |
| 2367 | unsigned long va; | 2367 | unsigned long va; |
| @@ -2400,20 +2400,21 @@ static int kdb_ll(int argc, const char **argv) | |||
| 2400 | char buf[80]; | 2400 | char buf[80]; |
| 2401 | 2401 | ||
| 2402 | if (KDB_FLAG(CMD_INTERRUPT)) | 2402 | if (KDB_FLAG(CMD_INTERRUPT)) |
| 2403 | return 0; | 2403 | goto out; |
| 2404 | 2404 | ||
| 2405 | sprintf(buf, "%s " kdb_machreg_fmt "\n", command, va); | 2405 | sprintf(buf, "%s " kdb_machreg_fmt "\n", command, va); |
| 2406 | diag = kdb_parse(buf); | 2406 | diag = kdb_parse(buf); |
| 2407 | if (diag) | 2407 | if (diag) |
| 2408 | return diag; | 2408 | goto out; |
| 2409 | 2409 | ||
| 2410 | addr = va + linkoffset; | 2410 | addr = va + linkoffset; |
| 2411 | if (kdb_getword(&va, addr, sizeof(va))) | 2411 | if (kdb_getword(&va, addr, sizeof(va))) |
| 2412 | return 0; | 2412 | goto out; |
| 2413 | } | 2413 | } |
| 2414 | kfree(command); | ||
| 2415 | 2414 | ||
| 2416 | return 0; | 2415 | out: |
| 2416 | kfree(command); | ||
| 2417 | return diag; | ||
| 2417 | } | 2418 | } |
| 2418 | 2419 | ||
| 2419 | static int kdb_kgdb(int argc, const char **argv) | 2420 | static int kdb_kgdb(int argc, const char **argv) |
| @@ -2739,13 +2740,13 @@ int kdb_register_repeat(char *cmd, | |||
| 2739 | } | 2740 | } |
| 2740 | if (kdb_commands) { | 2741 | if (kdb_commands) { |
| 2741 | memcpy(new, kdb_commands, | 2742 | memcpy(new, kdb_commands, |
| 2742 | kdb_max_commands * sizeof(*new)); | 2743 | (kdb_max_commands - KDB_BASE_CMD_MAX) * sizeof(*new)); |
| 2743 | kfree(kdb_commands); | 2744 | kfree(kdb_commands); |
| 2744 | } | 2745 | } |
| 2745 | memset(new + kdb_max_commands, 0, | 2746 | memset(new + kdb_max_commands, 0, |
| 2746 | kdb_command_extend * sizeof(*new)); | 2747 | kdb_command_extend * sizeof(*new)); |
| 2747 | kdb_commands = new; | 2748 | kdb_commands = new; |
| 2748 | kp = kdb_commands + kdb_max_commands; | 2749 | kp = kdb_commands + kdb_max_commands - KDB_BASE_CMD_MAX; |
| 2749 | kdb_max_commands += kdb_command_extend; | 2750 | kdb_max_commands += kdb_command_extend; |
| 2750 | } | 2751 | } |
| 2751 | 2752 | ||
diff --git a/kernel/exit.c b/kernel/exit.c index 21aa7b3001fb..676149a4ac5f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -914,6 +914,15 @@ NORET_TYPE void do_exit(long code) | |||
| 914 | if (unlikely(!tsk->pid)) | 914 | if (unlikely(!tsk->pid)) |
| 915 | panic("Attempted to kill the idle task!"); | 915 | panic("Attempted to kill the idle task!"); |
| 916 | 916 | ||
| 917 | /* | ||
| 918 | * If do_exit is called because this processes oopsed, it's possible | ||
| 919 | * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before | ||
| 920 | * continuing. Amongst other possible reasons, this is to prevent | ||
| 921 | * mm_release()->clear_child_tid() from writing to a user-controlled | ||
| 922 | * kernel address. | ||
| 923 | */ | ||
| 924 | set_fs(USER_DS); | ||
| 925 | |||
| 917 | tracehook_report_exit(&code); | 926 | tracehook_report_exit(&code); |
| 918 | 927 | ||
| 919 | validate_creds_for_do_exit(tsk); | 928 | validate_creds_for_do_exit(tsk); |
diff --git a/kernel/fork.c b/kernel/fork.c index 3b159c5991b7..5447dc7defa9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -273,6 +273,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
| 273 | 273 | ||
| 274 | setup_thread_stack(tsk, orig); | 274 | setup_thread_stack(tsk, orig); |
| 275 | clear_user_return_notifier(tsk); | 275 | clear_user_return_notifier(tsk); |
| 276 | clear_tsk_need_resched(tsk); | ||
| 276 | stackend = end_of_stack(tsk); | 277 | stackend = end_of_stack(tsk); |
| 277 | *stackend = STACK_END_MAGIC; /* for overflow detection */ | 278 | *stackend = STACK_END_MAGIC; /* for overflow detection */ |
| 278 | 279 | ||
diff --git a/kernel/futex.c b/kernel/futex.c index 6c683b37f2ce..40a8777a27d0 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -2489,7 +2489,8 @@ void exit_robust_list(struct task_struct *curr) | |||
| 2489 | { | 2489 | { |
| 2490 | struct robust_list_head __user *head = curr->robust_list; | 2490 | struct robust_list_head __user *head = curr->robust_list; |
| 2491 | struct robust_list __user *entry, *next_entry, *pending; | 2491 | struct robust_list __user *entry, *next_entry, *pending; |
| 2492 | unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip; | 2492 | unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; |
| 2493 | unsigned int uninitialized_var(next_pi); | ||
| 2493 | unsigned long futex_offset; | 2494 | unsigned long futex_offset; |
| 2494 | int rc; | 2495 | int rc; |
| 2495 | 2496 | ||
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 06da4dfc339b..a7934ac75e5b 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c | |||
| @@ -49,7 +49,8 @@ void compat_exit_robust_list(struct task_struct *curr) | |||
| 49 | { | 49 | { |
| 50 | struct compat_robust_list_head __user *head = curr->compat_robust_list; | 50 | struct compat_robust_list_head __user *head = curr->compat_robust_list; |
| 51 | struct robust_list __user *entry, *next_entry, *pending; | 51 | struct robust_list __user *entry, *next_entry, *pending; |
| 52 | unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip; | 52 | unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; |
| 53 | unsigned int uninitialized_var(next_pi); | ||
| 53 | compat_uptr_t uentry, next_uentry, upending; | 54 | compat_uptr_t uentry, next_uentry, upending; |
| 54 | compat_long_t futex_offset; | 55 | compat_long_t futex_offset; |
| 55 | int rc; | 56 | int rc; |
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 2c9120f0afca..e5325825aeb6 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c | |||
| @@ -620,7 +620,7 @@ static struct pmu perf_breakpoint = { | |||
| 620 | .read = hw_breakpoint_pmu_read, | 620 | .read = hw_breakpoint_pmu_read, |
| 621 | }; | 621 | }; |
| 622 | 622 | ||
| 623 | static int __init init_hw_breakpoint(void) | 623 | int __init init_hw_breakpoint(void) |
| 624 | { | 624 | { |
| 625 | unsigned int **task_bp_pinned; | 625 | unsigned int **task_bp_pinned; |
| 626 | int cpu, err_cpu; | 626 | int cpu, err_cpu; |
| @@ -655,6 +655,5 @@ static int __init init_hw_breakpoint(void) | |||
| 655 | 655 | ||
| 656 | return -ENOMEM; | 656 | return -ENOMEM; |
| 657 | } | 657 | } |
| 658 | core_initcall(init_hw_breakpoint); | ||
| 659 | 658 | ||
| 660 | 659 | ||
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 01b1d3a88983..6c8a2a9f8a7b 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
| @@ -214,7 +214,7 @@ static int irq_spurious_proc_show(struct seq_file *m, void *v) | |||
| 214 | 214 | ||
| 215 | static int irq_spurious_proc_open(struct inode *inode, struct file *file) | 215 | static int irq_spurious_proc_open(struct inode *inode, struct file *file) |
| 216 | { | 216 | { |
| 217 | return single_open(file, irq_spurious_proc_show, NULL); | 217 | return single_open(file, irq_spurious_proc_show, PDE(inode)->data); |
| 218 | } | 218 | } |
| 219 | 219 | ||
| 220 | static const struct file_operations irq_spurious_proc_fops = { | 220 | static const struct file_operations irq_spurious_proc_fops = { |
diff --git a/kernel/irq_work.c b/kernel/irq_work.c index f16763ff8481..90f881904bb1 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c | |||
| @@ -145,7 +145,9 @@ void irq_work_run(void) | |||
| 145 | * Clear the BUSY bit and return to the free state if | 145 | * Clear the BUSY bit and return to the free state if |
| 146 | * no-one else claimed it meanwhile. | 146 | * no-one else claimed it meanwhile. |
| 147 | */ | 147 | */ |
| 148 | cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL); | 148 | (void)cmpxchg(&entry->next, |
| 149 | next_flags(NULL, IRQ_WORK_BUSY), | ||
| 150 | NULL); | ||
| 149 | } | 151 | } |
| 150 | } | 152 | } |
| 151 | EXPORT_SYMBOL_GPL(irq_work_run); | 153 | EXPORT_SYMBOL_GPL(irq_work_run); |
diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 877fb306d415..17110a4a4fc2 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c | |||
| @@ -194,14 +194,7 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter) | |||
| 194 | 194 | ||
| 195 | account_global_scheduler_latency(tsk, &lat); | 195 | account_global_scheduler_latency(tsk, &lat); |
| 196 | 196 | ||
| 197 | /* | 197 | for (i = 0; i < tsk->latency_record_count; i++) { |
| 198 | * short term hack; if we're > 32 we stop; future we recycle: | ||
| 199 | */ | ||
| 200 | tsk->latency_record_count++; | ||
| 201 | if (tsk->latency_record_count >= LT_SAVECOUNT) | ||
| 202 | goto out_unlock; | ||
| 203 | |||
| 204 | for (i = 0; i < LT_SAVECOUNT; i++) { | ||
| 205 | struct latency_record *mylat; | 198 | struct latency_record *mylat; |
| 206 | int same = 1; | 199 | int same = 1; |
| 207 | 200 | ||
| @@ -227,8 +220,14 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter) | |||
| 227 | } | 220 | } |
| 228 | } | 221 | } |
| 229 | 222 | ||
| 223 | /* | ||
| 224 | * short term hack; if we're > 32 we stop; future we recycle: | ||
| 225 | */ | ||
| 226 | if (tsk->latency_record_count >= LT_SAVECOUNT) | ||
| 227 | goto out_unlock; | ||
| 228 | |||
| 230 | /* Allocated a new one: */ | 229 | /* Allocated a new one: */ |
| 231 | i = tsk->latency_record_count; | 230 | i = tsk->latency_record_count++; |
| 232 | memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record)); | 231 | memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record)); |
| 233 | 232 | ||
| 234 | out_unlock: | 233 | out_unlock: |
diff --git a/kernel/module.c b/kernel/module.c index 437a74a7524a..d190664f25ff 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -2326,6 +2326,18 @@ static void find_module_sections(struct module *mod, struct load_info *info) | |||
| 2326 | kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) * | 2326 | kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) * |
| 2327 | mod->num_trace_events, GFP_KERNEL); | 2327 | mod->num_trace_events, GFP_KERNEL); |
| 2328 | #endif | 2328 | #endif |
| 2329 | #ifdef CONFIG_TRACING | ||
| 2330 | mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt", | ||
| 2331 | sizeof(*mod->trace_bprintk_fmt_start), | ||
| 2332 | &mod->num_trace_bprintk_fmt); | ||
| 2333 | /* | ||
| 2334 | * This section contains pointers to allocated objects in the trace | ||
| 2335 | * code and not scanning it leads to false positives. | ||
| 2336 | */ | ||
| 2337 | kmemleak_scan_area(mod->trace_bprintk_fmt_start, | ||
| 2338 | sizeof(*mod->trace_bprintk_fmt_start) * | ||
| 2339 | mod->num_trace_bprintk_fmt, GFP_KERNEL); | ||
| 2340 | #endif | ||
| 2329 | #ifdef CONFIG_FTRACE_MCOUNT_RECORD | 2341 | #ifdef CONFIG_FTRACE_MCOUNT_RECORD |
| 2330 | /* sechdrs[0].sh_size is always zero */ | 2342 | /* sechdrs[0].sh_size is always zero */ |
| 2331 | mod->ftrace_callsites = section_objs(info, "__mcount_loc", | 2343 | mod->ftrace_callsites = section_objs(info, "__mcount_loc", |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 517d827f4982..2870feee81dd 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include <linux/kernel_stat.h> | 31 | #include <linux/kernel_stat.h> |
| 32 | #include <linux/perf_event.h> | 32 | #include <linux/perf_event.h> |
| 33 | #include <linux/ftrace_event.h> | 33 | #include <linux/ftrace_event.h> |
| 34 | #include <linux/hw_breakpoint.h> | ||
| 34 | 35 | ||
| 35 | #include <asm/irq_regs.h> | 36 | #include <asm/irq_regs.h> |
| 36 | 37 | ||
| @@ -674,6 +675,8 @@ event_sched_in(struct perf_event *event, | |||
| 674 | 675 | ||
| 675 | event->tstamp_running += ctx->time - event->tstamp_stopped; | 676 | event->tstamp_running += ctx->time - event->tstamp_stopped; |
| 676 | 677 | ||
| 678 | event->shadow_ctx_time = ctx->time - ctx->timestamp; | ||
| 679 | |||
| 677 | if (!is_software_event(event)) | 680 | if (!is_software_event(event)) |
| 678 | cpuctx->active_oncpu++; | 681 | cpuctx->active_oncpu++; |
| 679 | ctx->nr_active++; | 682 | ctx->nr_active++; |
| @@ -1284,8 +1287,6 @@ void __perf_event_task_sched_out(struct task_struct *task, | |||
| 1284 | { | 1287 | { |
| 1285 | int ctxn; | 1288 | int ctxn; |
| 1286 | 1289 | ||
| 1287 | perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); | ||
| 1288 | |||
| 1289 | for_each_task_context_nr(ctxn) | 1290 | for_each_task_context_nr(ctxn) |
| 1290 | perf_event_context_sched_out(task, ctxn, next); | 1291 | perf_event_context_sched_out(task, ctxn, next); |
| 1291 | } | 1292 | } |
| @@ -1619,8 +1620,12 @@ static void rotate_ctx(struct perf_event_context *ctx) | |||
| 1619 | { | 1620 | { |
| 1620 | raw_spin_lock(&ctx->lock); | 1621 | raw_spin_lock(&ctx->lock); |
| 1621 | 1622 | ||
| 1622 | /* Rotate the first entry last of non-pinned groups */ | 1623 | /* |
| 1623 | list_rotate_left(&ctx->flexible_groups); | 1624 | * Rotate the first entry last of non-pinned groups. Rotation might be |
| 1625 | * disabled by the inheritance code. | ||
| 1626 | */ | ||
| 1627 | if (!ctx->rotate_disable) | ||
| 1628 | list_rotate_left(&ctx->flexible_groups); | ||
| 1624 | 1629 | ||
| 1625 | raw_spin_unlock(&ctx->lock); | 1630 | raw_spin_unlock(&ctx->lock); |
| 1626 | } | 1631 | } |
| @@ -2232,11 +2237,6 @@ int perf_event_release_kernel(struct perf_event *event) | |||
| 2232 | raw_spin_unlock_irq(&ctx->lock); | 2237 | raw_spin_unlock_irq(&ctx->lock); |
| 2233 | mutex_unlock(&ctx->mutex); | 2238 | mutex_unlock(&ctx->mutex); |
| 2234 | 2239 | ||
| 2235 | mutex_lock(&event->owner->perf_event_mutex); | ||
| 2236 | list_del_init(&event->owner_entry); | ||
| 2237 | mutex_unlock(&event->owner->perf_event_mutex); | ||
| 2238 | put_task_struct(event->owner); | ||
| 2239 | |||
| 2240 | free_event(event); | 2240 | free_event(event); |
| 2241 | 2241 | ||
| 2242 | return 0; | 2242 | return 0; |
| @@ -2249,9 +2249,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel); | |||
| 2249 | static int perf_release(struct inode *inode, struct file *file) | 2249 | static int perf_release(struct inode *inode, struct file *file) |
| 2250 | { | 2250 | { |
| 2251 | struct perf_event *event = file->private_data; | 2251 | struct perf_event *event = file->private_data; |
| 2252 | struct task_struct *owner; | ||
| 2252 | 2253 | ||
| 2253 | file->private_data = NULL; | 2254 | file->private_data = NULL; |
| 2254 | 2255 | ||
| 2256 | rcu_read_lock(); | ||
| 2257 | owner = ACCESS_ONCE(event->owner); | ||
| 2258 | /* | ||
| 2259 | * Matches the smp_wmb() in perf_event_exit_task(). If we observe | ||
| 2260 | * !owner it means the list deletion is complete and we can indeed | ||
| 2261 | * free this event, otherwise we need to serialize on | ||
| 2262 | * owner->perf_event_mutex. | ||
| 2263 | */ | ||
| 2264 | smp_read_barrier_depends(); | ||
| 2265 | if (owner) { | ||
| 2266 | /* | ||
| 2267 | * Since delayed_put_task_struct() also drops the last | ||
| 2268 | * task reference we can safely take a new reference | ||
| 2269 | * while holding the rcu_read_lock(). | ||
| 2270 | */ | ||
| 2271 | get_task_struct(owner); | ||
| 2272 | } | ||
| 2273 | rcu_read_unlock(); | ||
| 2274 | |||
| 2275 | if (owner) { | ||
| 2276 | mutex_lock(&owner->perf_event_mutex); | ||
| 2277 | /* | ||
| 2278 | * We have to re-check the event->owner field, if it is cleared | ||
| 2279 | * we raced with perf_event_exit_task(), acquiring the mutex | ||
| 2280 | * ensured they're done, and we can proceed with freeing the | ||
| 2281 | * event. | ||
| 2282 | */ | ||
| 2283 | if (event->owner) | ||
| 2284 | list_del_init(&event->owner_entry); | ||
| 2285 | mutex_unlock(&owner->perf_event_mutex); | ||
| 2286 | put_task_struct(owner); | ||
| 2287 | } | ||
| 2288 | |||
| 2255 | return perf_event_release_kernel(event); | 2289 | return perf_event_release_kernel(event); |
| 2256 | } | 2290 | } |
| 2257 | 2291 | ||
| @@ -3396,7 +3430,8 @@ static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) | |||
| 3396 | } | 3430 | } |
| 3397 | 3431 | ||
| 3398 | static void perf_output_read_one(struct perf_output_handle *handle, | 3432 | static void perf_output_read_one(struct perf_output_handle *handle, |
| 3399 | struct perf_event *event) | 3433 | struct perf_event *event, |
| 3434 | u64 enabled, u64 running) | ||
| 3400 | { | 3435 | { |
| 3401 | u64 read_format = event->attr.read_format; | 3436 | u64 read_format = event->attr.read_format; |
| 3402 | u64 values[4]; | 3437 | u64 values[4]; |
| @@ -3404,11 +3439,11 @@ static void perf_output_read_one(struct perf_output_handle *handle, | |||
| 3404 | 3439 | ||
| 3405 | values[n++] = perf_event_count(event); | 3440 | values[n++] = perf_event_count(event); |
| 3406 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { | 3441 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { |
| 3407 | values[n++] = event->total_time_enabled + | 3442 | values[n++] = enabled + |
| 3408 | atomic64_read(&event->child_total_time_enabled); | 3443 | atomic64_read(&event->child_total_time_enabled); |
| 3409 | } | 3444 | } |
| 3410 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { | 3445 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { |
| 3411 | values[n++] = event->total_time_running + | 3446 | values[n++] = running + |
| 3412 | atomic64_read(&event->child_total_time_running); | 3447 | atomic64_read(&event->child_total_time_running); |
| 3413 | } | 3448 | } |
| 3414 | if (read_format & PERF_FORMAT_ID) | 3449 | if (read_format & PERF_FORMAT_ID) |
| @@ -3421,7 +3456,8 @@ static void perf_output_read_one(struct perf_output_handle *handle, | |||
| 3421 | * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. | 3456 | * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. |
| 3422 | */ | 3457 | */ |
| 3423 | static void perf_output_read_group(struct perf_output_handle *handle, | 3458 | static void perf_output_read_group(struct perf_output_handle *handle, |
| 3424 | struct perf_event *event) | 3459 | struct perf_event *event, |
| 3460 | u64 enabled, u64 running) | ||
| 3425 | { | 3461 | { |
| 3426 | struct perf_event *leader = event->group_leader, *sub; | 3462 | struct perf_event *leader = event->group_leader, *sub; |
| 3427 | u64 read_format = event->attr.read_format; | 3463 | u64 read_format = event->attr.read_format; |
| @@ -3431,10 +3467,10 @@ static void perf_output_read_group(struct perf_output_handle *handle, | |||
| 3431 | values[n++] = 1 + leader->nr_siblings; | 3467 | values[n++] = 1 + leader->nr_siblings; |
| 3432 | 3468 | ||
| 3433 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | 3469 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) |
| 3434 | values[n++] = leader->total_time_enabled; | 3470 | values[n++] = enabled; |
| 3435 | 3471 | ||
| 3436 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | 3472 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) |
| 3437 | values[n++] = leader->total_time_running; | 3473 | values[n++] = running; |
| 3438 | 3474 | ||
| 3439 | if (leader != event) | 3475 | if (leader != event) |
| 3440 | leader->pmu->read(leader); | 3476 | leader->pmu->read(leader); |
| @@ -3459,13 +3495,35 @@ static void perf_output_read_group(struct perf_output_handle *handle, | |||
| 3459 | } | 3495 | } |
| 3460 | } | 3496 | } |
| 3461 | 3497 | ||
| 3498 | #define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\ | ||
| 3499 | PERF_FORMAT_TOTAL_TIME_RUNNING) | ||
| 3500 | |||
| 3462 | static void perf_output_read(struct perf_output_handle *handle, | 3501 | static void perf_output_read(struct perf_output_handle *handle, |
| 3463 | struct perf_event *event) | 3502 | struct perf_event *event) |
| 3464 | { | 3503 | { |
| 3504 | u64 enabled = 0, running = 0, now, ctx_time; | ||
| 3505 | u64 read_format = event->attr.read_format; | ||
| 3506 | |||
| 3507 | /* | ||
| 3508 | * compute total_time_enabled, total_time_running | ||
| 3509 | * based on snapshot values taken when the event | ||
| 3510 | * was last scheduled in. | ||
| 3511 | * | ||
| 3512 | * we cannot simply called update_context_time() | ||
| 3513 | * because of locking issue as we are called in | ||
| 3514 | * NMI context | ||
| 3515 | */ | ||
| 3516 | if (read_format & PERF_FORMAT_TOTAL_TIMES) { | ||
| 3517 | now = perf_clock(); | ||
| 3518 | ctx_time = event->shadow_ctx_time + now; | ||
| 3519 | enabled = ctx_time - event->tstamp_enabled; | ||
| 3520 | running = ctx_time - event->tstamp_running; | ||
| 3521 | } | ||
| 3522 | |||
| 3465 | if (event->attr.read_format & PERF_FORMAT_GROUP) | 3523 | if (event->attr.read_format & PERF_FORMAT_GROUP) |
| 3466 | perf_output_read_group(handle, event); | 3524 | perf_output_read_group(handle, event, enabled, running); |
| 3467 | else | 3525 | else |
| 3468 | perf_output_read_one(handle, event); | 3526 | perf_output_read_one(handle, event, enabled, running); |
| 3469 | } | 3527 | } |
| 3470 | 3528 | ||
| 3471 | void perf_output_sample(struct perf_output_handle *handle, | 3529 | void perf_output_sample(struct perf_output_handle *handle, |
| @@ -3766,6 +3824,8 @@ static void perf_event_task_event(struct perf_task_event *task_event) | |||
| 3766 | rcu_read_lock(); | 3824 | rcu_read_lock(); |
| 3767 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 3825 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
| 3768 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 3826 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
| 3827 | if (cpuctx->active_pmu != pmu) | ||
| 3828 | goto next; | ||
| 3769 | perf_event_task_ctx(&cpuctx->ctx, task_event); | 3829 | perf_event_task_ctx(&cpuctx->ctx, task_event); |
| 3770 | 3830 | ||
| 3771 | ctx = task_event->task_ctx; | 3831 | ctx = task_event->task_ctx; |
| @@ -3901,6 +3961,8 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
| 3901 | rcu_read_lock(); | 3961 | rcu_read_lock(); |
| 3902 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 3962 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
| 3903 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 3963 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
| 3964 | if (cpuctx->active_pmu != pmu) | ||
| 3965 | goto next; | ||
| 3904 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); | 3966 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); |
| 3905 | 3967 | ||
| 3906 | ctxn = pmu->task_ctx_nr; | 3968 | ctxn = pmu->task_ctx_nr; |
| @@ -4086,6 +4148,8 @@ got_name: | |||
| 4086 | rcu_read_lock(); | 4148 | rcu_read_lock(); |
| 4087 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 4149 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
| 4088 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 4150 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
| 4151 | if (cpuctx->active_pmu != pmu) | ||
| 4152 | goto next; | ||
| 4089 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, | 4153 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, |
| 4090 | vma->vm_flags & VM_EXEC); | 4154 | vma->vm_flags & VM_EXEC); |
| 4091 | 4155 | ||
| @@ -4655,7 +4719,7 @@ static int perf_swevent_init(struct perf_event *event) | |||
| 4655 | break; | 4719 | break; |
| 4656 | } | 4720 | } |
| 4657 | 4721 | ||
| 4658 | if (event_id > PERF_COUNT_SW_MAX) | 4722 | if (event_id >= PERF_COUNT_SW_MAX) |
| 4659 | return -ENOENT; | 4723 | return -ENOENT; |
| 4660 | 4724 | ||
| 4661 | if (!event->parent) { | 4725 | if (!event->parent) { |
| @@ -5087,20 +5151,36 @@ static void *find_pmu_context(int ctxn) | |||
| 5087 | return NULL; | 5151 | return NULL; |
| 5088 | } | 5152 | } |
| 5089 | 5153 | ||
| 5090 | static void free_pmu_context(void * __percpu cpu_context) | 5154 | static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu) |
| 5091 | { | 5155 | { |
| 5092 | struct pmu *pmu; | 5156 | int cpu; |
| 5157 | |||
| 5158 | for_each_possible_cpu(cpu) { | ||
| 5159 | struct perf_cpu_context *cpuctx; | ||
| 5160 | |||
| 5161 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); | ||
| 5162 | |||
| 5163 | if (cpuctx->active_pmu == old_pmu) | ||
| 5164 | cpuctx->active_pmu = pmu; | ||
| 5165 | } | ||
| 5166 | } | ||
| 5167 | |||
| 5168 | static void free_pmu_context(struct pmu *pmu) | ||
| 5169 | { | ||
| 5170 | struct pmu *i; | ||
| 5093 | 5171 | ||
| 5094 | mutex_lock(&pmus_lock); | 5172 | mutex_lock(&pmus_lock); |
| 5095 | /* | 5173 | /* |
| 5096 | * Like a real lame refcount. | 5174 | * Like a real lame refcount. |
| 5097 | */ | 5175 | */ |
| 5098 | list_for_each_entry(pmu, &pmus, entry) { | 5176 | list_for_each_entry(i, &pmus, entry) { |
| 5099 | if (pmu->pmu_cpu_context == cpu_context) | 5177 | if (i->pmu_cpu_context == pmu->pmu_cpu_context) { |
| 5178 | update_pmu_context(i, pmu); | ||
| 5100 | goto out; | 5179 | goto out; |
| 5180 | } | ||
| 5101 | } | 5181 | } |
| 5102 | 5182 | ||
| 5103 | free_percpu(cpu_context); | 5183 | free_percpu(pmu->pmu_cpu_context); |
| 5104 | out: | 5184 | out: |
| 5105 | mutex_unlock(&pmus_lock); | 5185 | mutex_unlock(&pmus_lock); |
| 5106 | } | 5186 | } |
| @@ -5132,6 +5212,7 @@ int perf_pmu_register(struct pmu *pmu) | |||
| 5132 | cpuctx->ctx.pmu = pmu; | 5212 | cpuctx->ctx.pmu = pmu; |
| 5133 | cpuctx->jiffies_interval = 1; | 5213 | cpuctx->jiffies_interval = 1; |
| 5134 | INIT_LIST_HEAD(&cpuctx->rotation_list); | 5214 | INIT_LIST_HEAD(&cpuctx->rotation_list); |
| 5215 | cpuctx->active_pmu = pmu; | ||
| 5135 | } | 5216 | } |
| 5136 | 5217 | ||
| 5137 | got_cpu_context: | 5218 | got_cpu_context: |
| @@ -5183,7 +5264,7 @@ void perf_pmu_unregister(struct pmu *pmu) | |||
| 5183 | synchronize_rcu(); | 5264 | synchronize_rcu(); |
| 5184 | 5265 | ||
| 5185 | free_percpu(pmu->pmu_disable_count); | 5266 | free_percpu(pmu->pmu_disable_count); |
| 5186 | free_pmu_context(pmu->pmu_cpu_context); | 5267 | free_pmu_context(pmu); |
| 5187 | } | 5268 | } |
| 5188 | 5269 | ||
| 5189 | struct pmu *perf_init_event(struct perf_event *event) | 5270 | struct pmu *perf_init_event(struct perf_event *event) |
| @@ -5651,7 +5732,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 5651 | mutex_unlock(&ctx->mutex); | 5732 | mutex_unlock(&ctx->mutex); |
| 5652 | 5733 | ||
| 5653 | event->owner = current; | 5734 | event->owner = current; |
| 5654 | get_task_struct(current); | 5735 | |
| 5655 | mutex_lock(¤t->perf_event_mutex); | 5736 | mutex_lock(¤t->perf_event_mutex); |
| 5656 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | 5737 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); |
| 5657 | mutex_unlock(¤t->perf_event_mutex); | 5738 | mutex_unlock(¤t->perf_event_mutex); |
| @@ -5719,12 +5800,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
| 5719 | ++ctx->generation; | 5800 | ++ctx->generation; |
| 5720 | mutex_unlock(&ctx->mutex); | 5801 | mutex_unlock(&ctx->mutex); |
| 5721 | 5802 | ||
| 5722 | event->owner = current; | ||
| 5723 | get_task_struct(current); | ||
| 5724 | mutex_lock(¤t->perf_event_mutex); | ||
| 5725 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | ||
| 5726 | mutex_unlock(¤t->perf_event_mutex); | ||
| 5727 | |||
| 5728 | return event; | 5803 | return event; |
| 5729 | 5804 | ||
| 5730 | err_free: | 5805 | err_free: |
| @@ -5875,8 +5950,24 @@ again: | |||
| 5875 | */ | 5950 | */ |
| 5876 | void perf_event_exit_task(struct task_struct *child) | 5951 | void perf_event_exit_task(struct task_struct *child) |
| 5877 | { | 5952 | { |
| 5953 | struct perf_event *event, *tmp; | ||
| 5878 | int ctxn; | 5954 | int ctxn; |
| 5879 | 5955 | ||
| 5956 | mutex_lock(&child->perf_event_mutex); | ||
| 5957 | list_for_each_entry_safe(event, tmp, &child->perf_event_list, | ||
| 5958 | owner_entry) { | ||
| 5959 | list_del_init(&event->owner_entry); | ||
| 5960 | |||
| 5961 | /* | ||
| 5962 | * Ensure the list deletion is visible before we clear | ||
| 5963 | * the owner, closes a race against perf_release() where | ||
| 5964 | * we need to serialize on the owner->perf_event_mutex. | ||
| 5965 | */ | ||
| 5966 | smp_wmb(); | ||
| 5967 | event->owner = NULL; | ||
| 5968 | } | ||
| 5969 | mutex_unlock(&child->perf_event_mutex); | ||
| 5970 | |||
| 5880 | for_each_task_context_nr(ctxn) | 5971 | for_each_task_context_nr(ctxn) |
| 5881 | perf_event_exit_task_context(child, ctxn); | 5972 | perf_event_exit_task_context(child, ctxn); |
| 5882 | } | 5973 | } |
| @@ -6096,6 +6187,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
| 6096 | struct perf_event *event; | 6187 | struct perf_event *event; |
| 6097 | struct task_struct *parent = current; | 6188 | struct task_struct *parent = current; |
| 6098 | int inherited_all = 1; | 6189 | int inherited_all = 1; |
| 6190 | unsigned long flags; | ||
| 6099 | int ret = 0; | 6191 | int ret = 0; |
| 6100 | 6192 | ||
| 6101 | child->perf_event_ctxp[ctxn] = NULL; | 6193 | child->perf_event_ctxp[ctxn] = NULL; |
| @@ -6136,6 +6228,15 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
| 6136 | break; | 6228 | break; |
| 6137 | } | 6229 | } |
| 6138 | 6230 | ||
| 6231 | /* | ||
| 6232 | * We can't hold ctx->lock when iterating the ->flexible_group list due | ||
| 6233 | * to allocations, but we need to prevent rotation because | ||
| 6234 | * rotate_ctx() will change the list from interrupt context. | ||
| 6235 | */ | ||
| 6236 | raw_spin_lock_irqsave(&parent_ctx->lock, flags); | ||
| 6237 | parent_ctx->rotate_disable = 1; | ||
| 6238 | raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); | ||
| 6239 | |||
| 6139 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { | 6240 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { |
| 6140 | ret = inherit_task_group(event, parent, parent_ctx, | 6241 | ret = inherit_task_group(event, parent, parent_ctx, |
| 6141 | child, ctxn, &inherited_all); | 6242 | child, ctxn, &inherited_all); |
| @@ -6143,6 +6244,10 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
| 6143 | break; | 6244 | break; |
| 6144 | } | 6245 | } |
| 6145 | 6246 | ||
| 6247 | raw_spin_lock_irqsave(&parent_ctx->lock, flags); | ||
| 6248 | parent_ctx->rotate_disable = 0; | ||
| 6249 | raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); | ||
| 6250 | |||
| 6146 | child_ctx = child->perf_event_ctxp[ctxn]; | 6251 | child_ctx = child->perf_event_ctxp[ctxn]; |
| 6147 | 6252 | ||
| 6148 | if (child_ctx && inherited_all) { | 6253 | if (child_ctx && inherited_all) { |
| @@ -6295,6 +6400,8 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | |||
| 6295 | 6400 | ||
| 6296 | void __init perf_event_init(void) | 6401 | void __init perf_event_init(void) |
| 6297 | { | 6402 | { |
| 6403 | int ret; | ||
| 6404 | |||
| 6298 | perf_event_init_all_cpus(); | 6405 | perf_event_init_all_cpus(); |
| 6299 | init_srcu_struct(&pmus_srcu); | 6406 | init_srcu_struct(&pmus_srcu); |
| 6300 | perf_pmu_register(&perf_swevent); | 6407 | perf_pmu_register(&perf_swevent); |
| @@ -6302,4 +6409,7 @@ void __init perf_event_init(void) | |||
| 6302 | perf_pmu_register(&perf_task_clock); | 6409 | perf_pmu_register(&perf_task_clock); |
| 6303 | perf_tp_register(); | 6410 | perf_tp_register(); |
| 6304 | perf_cpu_notifier(perf_cpu_notify); | 6411 | perf_cpu_notifier(perf_cpu_notify); |
| 6412 | |||
| 6413 | ret = init_hw_breakpoint(); | ||
| 6414 | WARN(ret, "hw_breakpoint initialization failed with: %d", ret); | ||
| 6305 | } | 6415 | } |
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index c7a8f453919e..aeaa7f846821 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c | |||
| @@ -121,10 +121,10 @@ static inline int pm_qos_get_value(struct pm_qos_object *o) | |||
| 121 | 121 | ||
| 122 | switch (o->type) { | 122 | switch (o->type) { |
| 123 | case PM_QOS_MIN: | 123 | case PM_QOS_MIN: |
| 124 | return plist_last(&o->requests)->prio; | 124 | return plist_first(&o->requests)->prio; |
| 125 | 125 | ||
| 126 | case PM_QOS_MAX: | 126 | case PM_QOS_MAX: |
| 127 | return plist_first(&o->requests)->prio; | 127 | return plist_last(&o->requests)->prio; |
| 128 | 128 | ||
| 129 | default: | 129 | default: |
| 130 | /* runtime check for not using enum */ | 130 | /* runtime check for not using enum */ |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 6842eeba5879..05bb7173850e 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
| @@ -37,13 +37,13 @@ static int check_clock(const clockid_t which_clock) | |||
| 37 | if (pid == 0) | 37 | if (pid == 0) |
| 38 | return 0; | 38 | return 0; |
| 39 | 39 | ||
| 40 | read_lock(&tasklist_lock); | 40 | rcu_read_lock(); |
| 41 | p = find_task_by_vpid(pid); | 41 | p = find_task_by_vpid(pid); |
| 42 | if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ? | 42 | if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ? |
| 43 | same_thread_group(p, current) : thread_group_leader(p))) { | 43 | same_thread_group(p, current) : has_group_leader_pid(p))) { |
| 44 | error = -EINVAL; | 44 | error = -EINVAL; |
| 45 | } | 45 | } |
| 46 | read_unlock(&tasklist_lock); | 46 | rcu_read_unlock(); |
| 47 | 47 | ||
| 48 | return error; | 48 | return error; |
| 49 | } | 49 | } |
| @@ -390,7 +390,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) | |||
| 390 | 390 | ||
| 391 | INIT_LIST_HEAD(&new_timer->it.cpu.entry); | 391 | INIT_LIST_HEAD(&new_timer->it.cpu.entry); |
| 392 | 392 | ||
| 393 | read_lock(&tasklist_lock); | 393 | rcu_read_lock(); |
| 394 | if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) { | 394 | if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) { |
| 395 | if (pid == 0) { | 395 | if (pid == 0) { |
| 396 | p = current; | 396 | p = current; |
| @@ -404,7 +404,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) | |||
| 404 | p = current->group_leader; | 404 | p = current->group_leader; |
| 405 | } else { | 405 | } else { |
| 406 | p = find_task_by_vpid(pid); | 406 | p = find_task_by_vpid(pid); |
| 407 | if (p && !thread_group_leader(p)) | 407 | if (p && !has_group_leader_pid(p)) |
| 408 | p = NULL; | 408 | p = NULL; |
| 409 | } | 409 | } |
| 410 | } | 410 | } |
| @@ -414,7 +414,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) | |||
| 414 | } else { | 414 | } else { |
| 415 | ret = -EINVAL; | 415 | ret = -EINVAL; |
| 416 | } | 416 | } |
| 417 | read_unlock(&tasklist_lock); | 417 | rcu_read_unlock(); |
| 418 | 418 | ||
| 419 | return ret; | 419 | return ret; |
| 420 | } | 420 | } |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 29bff6117abc..a5aff3ebad38 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
| @@ -246,9 +246,13 @@ config PM_OPS | |||
| 246 | depends on PM_SLEEP || PM_RUNTIME | 246 | depends on PM_SLEEP || PM_RUNTIME |
| 247 | default y | 247 | default y |
| 248 | 248 | ||
| 249 | config ARCH_HAS_OPP | ||
| 250 | bool | ||
| 251 | |||
| 249 | config PM_OPP | 252 | config PM_OPP |
| 250 | bool "Operating Performance Point (OPP) Layer library" | 253 | bool "Operating Performance Point (OPP) Layer library" |
| 251 | depends on PM | 254 | depends on PM |
| 255 | depends on ARCH_HAS_OPP | ||
| 252 | ---help--- | 256 | ---help--- |
| 253 | SOCs have a standard set of tuples consisting of frequency and | 257 | SOCs have a standard set of tuples consisting of frequency and |
| 254 | voltage pairs that the device will support per voltage domain. This | 258 | voltage pairs that the device will support per voltage domain. This |
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 657272e91d0a..048d0b514831 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c | |||
| @@ -327,7 +327,6 @@ static int create_image(int platform_mode) | |||
| 327 | int hibernation_snapshot(int platform_mode) | 327 | int hibernation_snapshot(int platform_mode) |
| 328 | { | 328 | { |
| 329 | int error; | 329 | int error; |
| 330 | gfp_t saved_mask; | ||
| 331 | 330 | ||
| 332 | error = platform_begin(platform_mode); | 331 | error = platform_begin(platform_mode); |
| 333 | if (error) | 332 | if (error) |
| @@ -339,7 +338,7 @@ int hibernation_snapshot(int platform_mode) | |||
| 339 | goto Close; | 338 | goto Close; |
| 340 | 339 | ||
| 341 | suspend_console(); | 340 | suspend_console(); |
| 342 | saved_mask = clear_gfp_allowed_mask(GFP_IOFS); | 341 | pm_restrict_gfp_mask(); |
| 343 | error = dpm_suspend_start(PMSG_FREEZE); | 342 | error = dpm_suspend_start(PMSG_FREEZE); |
| 344 | if (error) | 343 | if (error) |
| 345 | goto Recover_platform; | 344 | goto Recover_platform; |
| @@ -348,7 +347,10 @@ int hibernation_snapshot(int platform_mode) | |||
| 348 | goto Recover_platform; | 347 | goto Recover_platform; |
| 349 | 348 | ||
| 350 | error = create_image(platform_mode); | 349 | error = create_image(platform_mode); |
| 351 | /* Control returns here after successful restore */ | 350 | /* |
| 351 | * Control returns here (1) after the image has been created or the | ||
| 352 | * image creation has failed and (2) after a successful restore. | ||
| 353 | */ | ||
| 352 | 354 | ||
| 353 | Resume_devices: | 355 | Resume_devices: |
| 354 | /* We may need to release the preallocated image pages here. */ | 356 | /* We may need to release the preallocated image pages here. */ |
| @@ -357,7 +359,10 @@ int hibernation_snapshot(int platform_mode) | |||
| 357 | 359 | ||
| 358 | dpm_resume_end(in_suspend ? | 360 | dpm_resume_end(in_suspend ? |
| 359 | (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); | 361 | (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); |
| 360 | set_gfp_allowed_mask(saved_mask); | 362 | |
| 363 | if (error || !in_suspend) | ||
| 364 | pm_restore_gfp_mask(); | ||
| 365 | |||
| 361 | resume_console(); | 366 | resume_console(); |
| 362 | Close: | 367 | Close: |
| 363 | platform_end(platform_mode); | 368 | platform_end(platform_mode); |
| @@ -452,17 +457,16 @@ static int resume_target_kernel(bool platform_mode) | |||
| 452 | int hibernation_restore(int platform_mode) | 457 | int hibernation_restore(int platform_mode) |
| 453 | { | 458 | { |
| 454 | int error; | 459 | int error; |
| 455 | gfp_t saved_mask; | ||
| 456 | 460 | ||
| 457 | pm_prepare_console(); | 461 | pm_prepare_console(); |
| 458 | suspend_console(); | 462 | suspend_console(); |
| 459 | saved_mask = clear_gfp_allowed_mask(GFP_IOFS); | 463 | pm_restrict_gfp_mask(); |
| 460 | error = dpm_suspend_start(PMSG_QUIESCE); | 464 | error = dpm_suspend_start(PMSG_QUIESCE); |
| 461 | if (!error) { | 465 | if (!error) { |
| 462 | error = resume_target_kernel(platform_mode); | 466 | error = resume_target_kernel(platform_mode); |
| 463 | dpm_resume_end(PMSG_RECOVER); | 467 | dpm_resume_end(PMSG_RECOVER); |
| 464 | } | 468 | } |
| 465 | set_gfp_allowed_mask(saved_mask); | 469 | pm_restore_gfp_mask(); |
| 466 | resume_console(); | 470 | resume_console(); |
| 467 | pm_restore_console(); | 471 | pm_restore_console(); |
| 468 | return error; | 472 | return error; |
| @@ -476,7 +480,6 @@ int hibernation_restore(int platform_mode) | |||
| 476 | int hibernation_platform_enter(void) | 480 | int hibernation_platform_enter(void) |
| 477 | { | 481 | { |
| 478 | int error; | 482 | int error; |
| 479 | gfp_t saved_mask; | ||
| 480 | 483 | ||
| 481 | if (!hibernation_ops) | 484 | if (!hibernation_ops) |
| 482 | return -ENOSYS; | 485 | return -ENOSYS; |
| @@ -492,7 +495,6 @@ int hibernation_platform_enter(void) | |||
| 492 | 495 | ||
| 493 | entering_platform_hibernation = true; | 496 | entering_platform_hibernation = true; |
| 494 | suspend_console(); | 497 | suspend_console(); |
| 495 | saved_mask = clear_gfp_allowed_mask(GFP_IOFS); | ||
| 496 | error = dpm_suspend_start(PMSG_HIBERNATE); | 498 | error = dpm_suspend_start(PMSG_HIBERNATE); |
| 497 | if (error) { | 499 | if (error) { |
| 498 | if (hibernation_ops->recover) | 500 | if (hibernation_ops->recover) |
| @@ -536,7 +538,6 @@ int hibernation_platform_enter(void) | |||
| 536 | Resume_devices: | 538 | Resume_devices: |
| 537 | entering_platform_hibernation = false; | 539 | entering_platform_hibernation = false; |
| 538 | dpm_resume_end(PMSG_RESTORE); | 540 | dpm_resume_end(PMSG_RESTORE); |
| 539 | set_gfp_allowed_mask(saved_mask); | ||
| 540 | resume_console(); | 541 | resume_console(); |
| 541 | 542 | ||
| 542 | Close: | 543 | Close: |
| @@ -646,6 +647,7 @@ int hibernate(void) | |||
| 646 | swsusp_free(); | 647 | swsusp_free(); |
| 647 | if (!error) | 648 | if (!error) |
| 648 | power_down(); | 649 | power_down(); |
| 650 | pm_restore_gfp_mask(); | ||
| 649 | } else { | 651 | } else { |
| 650 | pr_debug("PM: Image restored successfully.\n"); | 652 | pr_debug("PM: Image restored successfully.\n"); |
| 651 | } | 653 | } |
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 7335952ee473..ecf770509d0d 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
| @@ -197,7 +197,6 @@ static int suspend_enter(suspend_state_t state) | |||
| 197 | int suspend_devices_and_enter(suspend_state_t state) | 197 | int suspend_devices_and_enter(suspend_state_t state) |
| 198 | { | 198 | { |
| 199 | int error; | 199 | int error; |
| 200 | gfp_t saved_mask; | ||
| 201 | 200 | ||
| 202 | if (!suspend_ops) | 201 | if (!suspend_ops) |
| 203 | return -ENOSYS; | 202 | return -ENOSYS; |
| @@ -208,7 +207,7 @@ int suspend_devices_and_enter(suspend_state_t state) | |||
| 208 | goto Close; | 207 | goto Close; |
| 209 | } | 208 | } |
| 210 | suspend_console(); | 209 | suspend_console(); |
| 211 | saved_mask = clear_gfp_allowed_mask(GFP_IOFS); | 210 | pm_restrict_gfp_mask(); |
| 212 | suspend_test_start(); | 211 | suspend_test_start(); |
| 213 | error = dpm_suspend_start(PMSG_SUSPEND); | 212 | error = dpm_suspend_start(PMSG_SUSPEND); |
| 214 | if (error) { | 213 | if (error) { |
| @@ -225,7 +224,7 @@ int suspend_devices_and_enter(suspend_state_t state) | |||
| 225 | suspend_test_start(); | 224 | suspend_test_start(); |
| 226 | dpm_resume_end(PMSG_RESUME); | 225 | dpm_resume_end(PMSG_RESUME); |
| 227 | suspend_test_finish("resume devices"); | 226 | suspend_test_finish("resume devices"); |
| 228 | set_gfp_allowed_mask(saved_mask); | 227 | pm_restore_gfp_mask(); |
| 229 | resume_console(); | 228 | resume_console(); |
| 230 | Close: | 229 | Close: |
| 231 | if (suspend_ops->end) | 230 | if (suspend_ops->end) |
diff --git a/kernel/power/swap.c b/kernel/power/swap.c index a0e4a86ccf94..8c7e4832b9be 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | * | 6 | * |
| 7 | * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz> | 7 | * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz> |
| 8 | * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> | 8 | * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> |
| 9 | * Copyright (C) 2010 Bojan Smojver <bojan@rexursive.com> | ||
| 9 | * | 10 | * |
| 10 | * This file is released under the GPLv2. | 11 | * This file is released under the GPLv2. |
| 11 | * | 12 | * |
| @@ -29,7 +30,7 @@ | |||
| 29 | 30 | ||
| 30 | #include "power.h" | 31 | #include "power.h" |
| 31 | 32 | ||
| 32 | #define HIBERNATE_SIG "LINHIB0001" | 33 | #define HIBERNATE_SIG "S1SUSPEND" |
| 33 | 34 | ||
| 34 | /* | 35 | /* |
| 35 | * The swap map is a data structure used for keeping track of each page | 36 | * The swap map is a data structure used for keeping track of each page |
| @@ -753,30 +754,43 @@ static int load_image_lzo(struct swap_map_handle *handle, | |||
| 753 | { | 754 | { |
| 754 | unsigned int m; | 755 | unsigned int m; |
| 755 | int error = 0; | 756 | int error = 0; |
| 757 | struct bio *bio; | ||
| 756 | struct timeval start; | 758 | struct timeval start; |
| 757 | struct timeval stop; | 759 | struct timeval stop; |
| 758 | unsigned nr_pages; | 760 | unsigned nr_pages; |
| 759 | size_t off, unc_len, cmp_len; | 761 | size_t i, off, unc_len, cmp_len; |
| 760 | unsigned char *unc, *cmp, *page; | 762 | unsigned char *unc, *cmp, *page[LZO_CMP_PAGES]; |
| 761 | 763 | ||
| 762 | page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); | 764 | for (i = 0; i < LZO_CMP_PAGES; i++) { |
| 763 | if (!page) { | 765 | page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); |
| 764 | printk(KERN_ERR "PM: Failed to allocate LZO page\n"); | 766 | if (!page[i]) { |
| 765 | return -ENOMEM; | 767 | printk(KERN_ERR "PM: Failed to allocate LZO page\n"); |
| 768 | |||
| 769 | while (i) | ||
| 770 | free_page((unsigned long)page[--i]); | ||
| 771 | |||
| 772 | return -ENOMEM; | ||
| 773 | } | ||
| 766 | } | 774 | } |
| 767 | 775 | ||
| 768 | unc = vmalloc(LZO_UNC_SIZE); | 776 | unc = vmalloc(LZO_UNC_SIZE); |
| 769 | if (!unc) { | 777 | if (!unc) { |
| 770 | printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n"); | 778 | printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n"); |
| 771 | free_page((unsigned long)page); | 779 | |
| 780 | for (i = 0; i < LZO_CMP_PAGES; i++) | ||
| 781 | free_page((unsigned long)page[i]); | ||
| 782 | |||
| 772 | return -ENOMEM; | 783 | return -ENOMEM; |
| 773 | } | 784 | } |
| 774 | 785 | ||
| 775 | cmp = vmalloc(LZO_CMP_SIZE); | 786 | cmp = vmalloc(LZO_CMP_SIZE); |
| 776 | if (!cmp) { | 787 | if (!cmp) { |
| 777 | printk(KERN_ERR "PM: Failed to allocate LZO compressed\n"); | 788 | printk(KERN_ERR "PM: Failed to allocate LZO compressed\n"); |
| 789 | |||
| 778 | vfree(unc); | 790 | vfree(unc); |
| 779 | free_page((unsigned long)page); | 791 | for (i = 0; i < LZO_CMP_PAGES; i++) |
| 792 | free_page((unsigned long)page[i]); | ||
| 793 | |||
| 780 | return -ENOMEM; | 794 | return -ENOMEM; |
| 781 | } | 795 | } |
| 782 | 796 | ||
| @@ -787,6 +801,7 @@ static int load_image_lzo(struct swap_map_handle *handle, | |||
| 787 | if (!m) | 801 | if (!m) |
| 788 | m = 1; | 802 | m = 1; |
| 789 | nr_pages = 0; | 803 | nr_pages = 0; |
| 804 | bio = NULL; | ||
| 790 | do_gettimeofday(&start); | 805 | do_gettimeofday(&start); |
| 791 | 806 | ||
| 792 | error = snapshot_write_next(snapshot); | 807 | error = snapshot_write_next(snapshot); |
| @@ -794,11 +809,11 @@ static int load_image_lzo(struct swap_map_handle *handle, | |||
| 794 | goto out_finish; | 809 | goto out_finish; |
| 795 | 810 | ||
| 796 | for (;;) { | 811 | for (;;) { |
| 797 | error = swap_read_page(handle, page, NULL); /* sync */ | 812 | error = swap_read_page(handle, page[0], NULL); /* sync */ |
| 798 | if (error) | 813 | if (error) |
| 799 | break; | 814 | break; |
| 800 | 815 | ||
| 801 | cmp_len = *(size_t *)page; | 816 | cmp_len = *(size_t *)page[0]; |
| 802 | if (unlikely(!cmp_len || | 817 | if (unlikely(!cmp_len || |
| 803 | cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) { | 818 | cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) { |
| 804 | printk(KERN_ERR "PM: Invalid LZO compressed length\n"); | 819 | printk(KERN_ERR "PM: Invalid LZO compressed length\n"); |
| @@ -806,13 +821,20 @@ static int load_image_lzo(struct swap_map_handle *handle, | |||
| 806 | break; | 821 | break; |
| 807 | } | 822 | } |
| 808 | 823 | ||
| 809 | memcpy(cmp, page, PAGE_SIZE); | 824 | for (off = PAGE_SIZE, i = 1; |
| 810 | for (off = PAGE_SIZE; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) { | 825 | off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) { |
| 811 | error = swap_read_page(handle, page, NULL); /* sync */ | 826 | error = swap_read_page(handle, page[i], &bio); |
| 812 | if (error) | 827 | if (error) |
| 813 | goto out_finish; | 828 | goto out_finish; |
| 829 | } | ||
| 814 | 830 | ||
| 815 | memcpy(cmp + off, page, PAGE_SIZE); | 831 | error = hib_wait_on_bio_chain(&bio); /* need all data now */ |
| 832 | if (error) | ||
| 833 | goto out_finish; | ||
| 834 | |||
| 835 | for (off = 0, i = 0; | ||
| 836 | off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) { | ||
| 837 | memcpy(cmp + off, page[i], PAGE_SIZE); | ||
| 816 | } | 838 | } |
| 817 | 839 | ||
| 818 | unc_len = LZO_UNC_SIZE; | 840 | unc_len = LZO_UNC_SIZE; |
| @@ -857,7 +879,8 @@ out_finish: | |||
| 857 | 879 | ||
| 858 | vfree(cmp); | 880 | vfree(cmp); |
| 859 | vfree(unc); | 881 | vfree(unc); |
| 860 | free_page((unsigned long)page); | 882 | for (i = 0; i < LZO_CMP_PAGES; i++) |
| 883 | free_page((unsigned long)page[i]); | ||
| 861 | 884 | ||
| 862 | return error; | 885 | return error; |
| 863 | } | 886 | } |
diff --git a/kernel/power/user.c b/kernel/power/user.c index e819e17877ca..c36c3b9e8a84 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c | |||
| @@ -137,7 +137,7 @@ static int snapshot_release(struct inode *inode, struct file *filp) | |||
| 137 | free_all_swap_pages(data->swap); | 137 | free_all_swap_pages(data->swap); |
| 138 | if (data->frozen) | 138 | if (data->frozen) |
| 139 | thaw_processes(); | 139 | thaw_processes(); |
| 140 | pm_notifier_call_chain(data->mode == O_WRONLY ? | 140 | pm_notifier_call_chain(data->mode == O_RDONLY ? |
| 141 | PM_POST_HIBERNATION : PM_POST_RESTORE); | 141 | PM_POST_HIBERNATION : PM_POST_RESTORE); |
| 142 | atomic_inc(&snapshot_device_available); | 142 | atomic_inc(&snapshot_device_available); |
| 143 | 143 | ||
| @@ -263,6 +263,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, | |||
| 263 | case SNAPSHOT_UNFREEZE: | 263 | case SNAPSHOT_UNFREEZE: |
| 264 | if (!data->frozen || data->ready) | 264 | if (!data->frozen || data->ready) |
| 265 | break; | 265 | break; |
| 266 | pm_restore_gfp_mask(); | ||
| 266 | thaw_processes(); | 267 | thaw_processes(); |
| 267 | usermodehelper_enable(); | 268 | usermodehelper_enable(); |
| 268 | data->frozen = 0; | 269 | data->frozen = 0; |
| @@ -275,6 +276,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, | |||
| 275 | error = -EPERM; | 276 | error = -EPERM; |
| 276 | break; | 277 | break; |
| 277 | } | 278 | } |
| 279 | pm_restore_gfp_mask(); | ||
| 278 | error = hibernation_snapshot(data->platform_support); | 280 | error = hibernation_snapshot(data->platform_support); |
| 279 | if (!error) | 281 | if (!error) |
| 280 | error = put_user(in_suspend, (int __user *)arg); | 282 | error = put_user(in_suspend, (int __user *)arg); |
diff --git a/kernel/printk.c b/kernel/printk.c index b2ebaee8c377..a23315dc4498 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
| @@ -261,6 +261,12 @@ static inline void boot_delay_msec(void) | |||
| 261 | } | 261 | } |
| 262 | #endif | 262 | #endif |
| 263 | 263 | ||
| 264 | #ifdef CONFIG_SECURITY_DMESG_RESTRICT | ||
| 265 | int dmesg_restrict = 1; | ||
| 266 | #else | ||
| 267 | int dmesg_restrict; | ||
| 268 | #endif | ||
| 269 | |||
| 264 | int do_syslog(int type, char __user *buf, int len, bool from_file) | 270 | int do_syslog(int type, char __user *buf, int len, bool from_file) |
| 265 | { | 271 | { |
| 266 | unsigned i, j, limit, count; | 272 | unsigned i, j, limit, count; |
| @@ -268,7 +274,20 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) | |||
| 268 | char c; | 274 | char c; |
| 269 | int error = 0; | 275 | int error = 0; |
| 270 | 276 | ||
| 271 | error = security_syslog(type, from_file); | 277 | /* |
| 278 | * If this is from /proc/kmsg we only do the capabilities checks | ||
| 279 | * at open time. | ||
| 280 | */ | ||
| 281 | if (type == SYSLOG_ACTION_OPEN || !from_file) { | ||
| 282 | if (dmesg_restrict && !capable(CAP_SYS_ADMIN)) | ||
| 283 | return -EPERM; | ||
| 284 | if ((type != SYSLOG_ACTION_READ_ALL && | ||
| 285 | type != SYSLOG_ACTION_SIZE_BUFFER) && | ||
| 286 | !capable(CAP_SYS_ADMIN)) | ||
| 287 | return -EPERM; | ||
| 288 | } | ||
| 289 | |||
| 290 | error = security_syslog(type); | ||
| 272 | if (error) | 291 | if (error) |
| 273 | return error; | 292 | return error; |
| 274 | 293 | ||
| @@ -1063,13 +1082,15 @@ void printk_tick(void) | |||
| 1063 | 1082 | ||
| 1064 | int printk_needs_cpu(int cpu) | 1083 | int printk_needs_cpu(int cpu) |
| 1065 | { | 1084 | { |
| 1085 | if (unlikely(cpu_is_offline(cpu))) | ||
| 1086 | printk_tick(); | ||
| 1066 | return per_cpu(printk_pending, cpu); | 1087 | return per_cpu(printk_pending, cpu); |
| 1067 | } | 1088 | } |
| 1068 | 1089 | ||
| 1069 | void wake_up_klogd(void) | 1090 | void wake_up_klogd(void) |
| 1070 | { | 1091 | { |
| 1071 | if (waitqueue_active(&log_wait)) | 1092 | if (waitqueue_active(&log_wait)) |
| 1072 | __raw_get_cpu_var(printk_pending) = 1; | 1093 | this_cpu_write(printk_pending, 1); |
| 1073 | } | 1094 | } |
| 1074 | 1095 | ||
| 1075 | /** | 1096 | /** |
diff --git a/kernel/range.c b/kernel/range.c index 471b66acabb5..37fa9b99ad58 100644 --- a/kernel/range.c +++ b/kernel/range.c | |||
| @@ -119,7 +119,7 @@ static int cmp_range(const void *x1, const void *x2) | |||
| 119 | 119 | ||
| 120 | int clean_sort_range(struct range *range, int az) | 120 | int clean_sort_range(struct range *range, int az) |
| 121 | { | 121 | { |
| 122 | int i, j, k = az - 1, nr_range = 0; | 122 | int i, j, k = az - 1, nr_range = az; |
| 123 | 123 | ||
| 124 | for (i = 0; i < k; i++) { | 124 | for (i = 0; i < k; i++) { |
| 125 | if (range[i].end) | 125 | if (range[i].end) |
diff --git a/kernel/resource.c b/kernel/resource.c index 9fad33efd0db..798e2fae2a06 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
| @@ -40,23 +40,6 @@ EXPORT_SYMBOL(iomem_resource); | |||
| 40 | 40 | ||
| 41 | static DEFINE_RWLOCK(resource_lock); | 41 | static DEFINE_RWLOCK(resource_lock); |
| 42 | 42 | ||
| 43 | /* | ||
| 44 | * By default, we allocate free space bottom-up. The architecture can request | ||
| 45 | * top-down by clearing this flag. The user can override the architecture's | ||
| 46 | * choice with the "resource_alloc_from_bottom" kernel boot option, but that | ||
| 47 | * should only be a debugging tool. | ||
| 48 | */ | ||
| 49 | int resource_alloc_from_bottom = 1; | ||
| 50 | |||
| 51 | static __init int setup_alloc_from_bottom(char *s) | ||
| 52 | { | ||
| 53 | printk(KERN_INFO | ||
| 54 | "resource: allocating from bottom-up; please report a bug\n"); | ||
| 55 | resource_alloc_from_bottom = 1; | ||
| 56 | return 0; | ||
| 57 | } | ||
| 58 | early_param("resource_alloc_from_bottom", setup_alloc_from_bottom); | ||
| 59 | |||
| 60 | static void *r_next(struct seq_file *m, void *v, loff_t *pos) | 43 | static void *r_next(struct seq_file *m, void *v, loff_t *pos) |
| 61 | { | 44 | { |
| 62 | struct resource *p = v; | 45 | struct resource *p = v; |
| @@ -374,6 +357,10 @@ int __weak page_is_ram(unsigned long pfn) | |||
| 374 | return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1; | 357 | return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1; |
| 375 | } | 358 | } |
| 376 | 359 | ||
| 360 | void __weak arch_remove_reservations(struct resource *avail) | ||
| 361 | { | ||
| 362 | } | ||
| 363 | |||
| 377 | static resource_size_t simple_align_resource(void *data, | 364 | static resource_size_t simple_align_resource(void *data, |
| 378 | const struct resource *avail, | 365 | const struct resource *avail, |
| 379 | resource_size_t size, | 366 | resource_size_t size, |
| @@ -397,74 +384,7 @@ static bool resource_contains(struct resource *res1, struct resource *res2) | |||
| 397 | } | 384 | } |
| 398 | 385 | ||
| 399 | /* | 386 | /* |
| 400 | * Find the resource before "child" in the sibling list of "root" children. | ||
| 401 | */ | ||
| 402 | static struct resource *find_sibling_prev(struct resource *root, struct resource *child) | ||
| 403 | { | ||
| 404 | struct resource *this; | ||
| 405 | |||
| 406 | for (this = root->child; this; this = this->sibling) | ||
| 407 | if (this->sibling == child) | ||
| 408 | return this; | ||
| 409 | |||
| 410 | return NULL; | ||
| 411 | } | ||
| 412 | |||
| 413 | /* | ||
| 414 | * Find empty slot in the resource tree given range and alignment. | 387 | * Find empty slot in the resource tree given range and alignment. |
| 415 | * This version allocates from the end of the root resource first. | ||
| 416 | */ | ||
| 417 | static int find_resource_from_top(struct resource *root, struct resource *new, | ||
| 418 | resource_size_t size, resource_size_t min, | ||
| 419 | resource_size_t max, resource_size_t align, | ||
| 420 | resource_size_t (*alignf)(void *, | ||
| 421 | const struct resource *, | ||
| 422 | resource_size_t, | ||
| 423 | resource_size_t), | ||
| 424 | void *alignf_data) | ||
| 425 | { | ||
| 426 | struct resource *this; | ||
| 427 | struct resource tmp, avail, alloc; | ||
| 428 | |||
| 429 | tmp.start = root->end; | ||
| 430 | tmp.end = root->end; | ||
| 431 | |||
| 432 | this = find_sibling_prev(root, NULL); | ||
| 433 | for (;;) { | ||
| 434 | if (this) { | ||
| 435 | if (this->end < root->end) | ||
| 436 | tmp.start = this->end + 1; | ||
| 437 | } else | ||
| 438 | tmp.start = root->start; | ||
| 439 | |||
| 440 | resource_clip(&tmp, min, max); | ||
| 441 | |||
| 442 | /* Check for overflow after ALIGN() */ | ||
| 443 | avail = *new; | ||
| 444 | avail.start = ALIGN(tmp.start, align); | ||
| 445 | avail.end = tmp.end; | ||
| 446 | if (avail.start >= tmp.start) { | ||
| 447 | alloc.start = alignf(alignf_data, &avail, size, align); | ||
| 448 | alloc.end = alloc.start + size - 1; | ||
| 449 | if (resource_contains(&avail, &alloc)) { | ||
| 450 | new->start = alloc.start; | ||
| 451 | new->end = alloc.end; | ||
| 452 | return 0; | ||
| 453 | } | ||
| 454 | } | ||
| 455 | |||
| 456 | if (!this || this->start == root->start) | ||
| 457 | break; | ||
| 458 | |||
| 459 | tmp.end = this->start - 1; | ||
| 460 | this = find_sibling_prev(root, this); | ||
| 461 | } | ||
| 462 | return -EBUSY; | ||
| 463 | } | ||
| 464 | |||
| 465 | /* | ||
| 466 | * Find empty slot in the resource tree given range and alignment. | ||
| 467 | * This version allocates from the beginning of the root resource first. | ||
| 468 | */ | 388 | */ |
| 469 | static int find_resource(struct resource *root, struct resource *new, | 389 | static int find_resource(struct resource *root, struct resource *new, |
| 470 | resource_size_t size, resource_size_t min, | 390 | resource_size_t size, resource_size_t min, |
| @@ -478,23 +398,24 @@ static int find_resource(struct resource *root, struct resource *new, | |||
| 478 | struct resource *this = root->child; | 398 | struct resource *this = root->child; |
| 479 | struct resource tmp = *new, avail, alloc; | 399 | struct resource tmp = *new, avail, alloc; |
| 480 | 400 | ||
| 401 | tmp.flags = new->flags; | ||
| 481 | tmp.start = root->start; | 402 | tmp.start = root->start; |
| 482 | /* | 403 | /* |
| 483 | * Skip past an allocated resource that starts at 0, since the | 404 | * Skip past an allocated resource that starts at 0, since the assignment |
| 484 | * assignment of this->start - 1 to tmp->end below would cause an | 405 | * of this->start - 1 to tmp->end below would cause an underflow. |
| 485 | * underflow. | ||
| 486 | */ | 406 | */ |
| 487 | if (this && this->start == 0) { | 407 | if (this && this->start == 0) { |
| 488 | tmp.start = this->end + 1; | 408 | tmp.start = this->end + 1; |
| 489 | this = this->sibling; | 409 | this = this->sibling; |
| 490 | } | 410 | } |
| 491 | for (;;) { | 411 | for(;;) { |
| 492 | if (this) | 412 | if (this) |
| 493 | tmp.end = this->start - 1; | 413 | tmp.end = this->start - 1; |
| 494 | else | 414 | else |
| 495 | tmp.end = root->end; | 415 | tmp.end = root->end; |
| 496 | 416 | ||
| 497 | resource_clip(&tmp, min, max); | 417 | resource_clip(&tmp, min, max); |
| 418 | arch_remove_reservations(&tmp); | ||
| 498 | 419 | ||
| 499 | /* Check for overflow after ALIGN() */ | 420 | /* Check for overflow after ALIGN() */ |
| 500 | avail = *new; | 421 | avail = *new; |
| @@ -509,10 +430,8 @@ static int find_resource(struct resource *root, struct resource *new, | |||
| 509 | return 0; | 430 | return 0; |
| 510 | } | 431 | } |
| 511 | } | 432 | } |
| 512 | |||
| 513 | if (!this) | 433 | if (!this) |
| 514 | break; | 434 | break; |
| 515 | |||
| 516 | tmp.start = this->end + 1; | 435 | tmp.start = this->end + 1; |
| 517 | this = this->sibling; | 436 | this = this->sibling; |
| 518 | } | 437 | } |
| @@ -545,10 +464,7 @@ int allocate_resource(struct resource *root, struct resource *new, | |||
| 545 | alignf = simple_align_resource; | 464 | alignf = simple_align_resource; |
| 546 | 465 | ||
| 547 | write_lock(&resource_lock); | 466 | write_lock(&resource_lock); |
| 548 | if (resource_alloc_from_bottom) | 467 | err = find_resource(root, new, size, min, max, align, alignf, alignf_data); |
| 549 | err = find_resource(root, new, size, min, max, align, alignf, alignf_data); | ||
| 550 | else | ||
| 551 | err = find_resource_from_top(root, new, size, min, max, align, alignf, alignf_data); | ||
| 552 | if (err >= 0 && __request_resource(root, new)) | 468 | if (err >= 0 && __request_resource(root, new)) |
| 553 | err = -EBUSY; | 469 | err = -EBUSY; |
| 554 | write_unlock(&resource_lock); | 470 | write_unlock(&resource_lock); |
diff --git a/kernel/sched.c b/kernel/sched.c index aa14a56f9d03..297d1a0eedb0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -560,18 +560,8 @@ struct rq { | |||
| 560 | 560 | ||
| 561 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); | 561 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); |
| 562 | 562 | ||
| 563 | static inline | ||
| 564 | void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | ||
| 565 | { | ||
| 566 | rq->curr->sched_class->check_preempt_curr(rq, p, flags); | ||
| 567 | 563 | ||
| 568 | /* | 564 | static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags); |
| 569 | * A queue event has occurred, and we're going to schedule. In | ||
| 570 | * this case, we can save a useless back to back clock update. | ||
| 571 | */ | ||
| 572 | if (test_tsk_need_resched(p)) | ||
| 573 | rq->skip_clock_update = 1; | ||
| 574 | } | ||
| 575 | 565 | ||
| 576 | static inline int cpu_of(struct rq *rq) | 566 | static inline int cpu_of(struct rq *rq) |
| 577 | { | 567 | { |
| @@ -646,22 +636,18 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
| 646 | 636 | ||
| 647 | #endif /* CONFIG_CGROUP_SCHED */ | 637 | #endif /* CONFIG_CGROUP_SCHED */ |
| 648 | 638 | ||
| 649 | static u64 irq_time_cpu(int cpu); | 639 | static void update_rq_clock_task(struct rq *rq, s64 delta); |
| 650 | static void sched_irq_time_avg_update(struct rq *rq, u64 irq_time); | ||
| 651 | 640 | ||
| 652 | inline void update_rq_clock(struct rq *rq) | 641 | static void update_rq_clock(struct rq *rq) |
| 653 | { | 642 | { |
| 654 | if (!rq->skip_clock_update) { | 643 | s64 delta; |
| 655 | int cpu = cpu_of(rq); | ||
| 656 | u64 irq_time; | ||
| 657 | 644 | ||
| 658 | rq->clock = sched_clock_cpu(cpu); | 645 | if (rq->skip_clock_update) |
| 659 | irq_time = irq_time_cpu(cpu); | 646 | return; |
| 660 | if (rq->clock - irq_time > rq->clock_task) | ||
| 661 | rq->clock_task = rq->clock - irq_time; | ||
| 662 | 647 | ||
| 663 | sched_irq_time_avg_update(rq, irq_time); | 648 | delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; |
| 664 | } | 649 | rq->clock += delta; |
| 650 | update_rq_clock_task(rq, delta); | ||
| 665 | } | 651 | } |
| 666 | 652 | ||
| 667 | /* | 653 | /* |
| @@ -1934,10 +1920,9 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags) | |||
| 1934 | * They are read and saved off onto struct rq in update_rq_clock(). | 1920 | * They are read and saved off onto struct rq in update_rq_clock(). |
| 1935 | * This may result in other CPU reading this CPU's irq time and can | 1921 | * This may result in other CPU reading this CPU's irq time and can |
| 1936 | * race with irq/account_system_vtime on this CPU. We would either get old | 1922 | * race with irq/account_system_vtime on this CPU. We would either get old |
| 1937 | * or new value (or semi updated value on 32 bit) with a side effect of | 1923 | * or new value with a side effect of accounting a slice of irq time to wrong |
| 1938 | * accounting a slice of irq time to wrong task when irq is in progress | 1924 | * task when irq is in progress while we read rq->clock. That is a worthy |
| 1939 | * while we read rq->clock. That is a worthy compromise in place of having | 1925 | * compromise in place of having locks on each irq in account_system_time. |
| 1940 | * locks on each irq in account_system_time. | ||
| 1941 | */ | 1926 | */ |
| 1942 | static DEFINE_PER_CPU(u64, cpu_hardirq_time); | 1927 | static DEFINE_PER_CPU(u64, cpu_hardirq_time); |
| 1943 | static DEFINE_PER_CPU(u64, cpu_softirq_time); | 1928 | static DEFINE_PER_CPU(u64, cpu_softirq_time); |
| @@ -1955,19 +1940,58 @@ void disable_sched_clock_irqtime(void) | |||
| 1955 | sched_clock_irqtime = 0; | 1940 | sched_clock_irqtime = 0; |
| 1956 | } | 1941 | } |
| 1957 | 1942 | ||
| 1958 | static u64 irq_time_cpu(int cpu) | 1943 | #ifndef CONFIG_64BIT |
| 1944 | static DEFINE_PER_CPU(seqcount_t, irq_time_seq); | ||
| 1945 | |||
| 1946 | static inline void irq_time_write_begin(void) | ||
| 1959 | { | 1947 | { |
| 1960 | if (!sched_clock_irqtime) | 1948 | __this_cpu_inc(irq_time_seq.sequence); |
| 1961 | return 0; | 1949 | smp_wmb(); |
| 1950 | } | ||
| 1962 | 1951 | ||
| 1952 | static inline void irq_time_write_end(void) | ||
| 1953 | { | ||
| 1954 | smp_wmb(); | ||
| 1955 | __this_cpu_inc(irq_time_seq.sequence); | ||
| 1956 | } | ||
| 1957 | |||
| 1958 | static inline u64 irq_time_read(int cpu) | ||
| 1959 | { | ||
| 1960 | u64 irq_time; | ||
| 1961 | unsigned seq; | ||
| 1962 | |||
| 1963 | do { | ||
| 1964 | seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu)); | ||
| 1965 | irq_time = per_cpu(cpu_softirq_time, cpu) + | ||
| 1966 | per_cpu(cpu_hardirq_time, cpu); | ||
| 1967 | } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq)); | ||
| 1968 | |||
| 1969 | return irq_time; | ||
| 1970 | } | ||
| 1971 | #else /* CONFIG_64BIT */ | ||
| 1972 | static inline void irq_time_write_begin(void) | ||
| 1973 | { | ||
| 1974 | } | ||
| 1975 | |||
| 1976 | static inline void irq_time_write_end(void) | ||
| 1977 | { | ||
| 1978 | } | ||
| 1979 | |||
| 1980 | static inline u64 irq_time_read(int cpu) | ||
| 1981 | { | ||
| 1963 | return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); | 1982 | return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); |
| 1964 | } | 1983 | } |
| 1984 | #endif /* CONFIG_64BIT */ | ||
| 1965 | 1985 | ||
| 1986 | /* | ||
| 1987 | * Called before incrementing preempt_count on {soft,}irq_enter | ||
| 1988 | * and before decrementing preempt_count on {soft,}irq_exit. | ||
| 1989 | */ | ||
| 1966 | void account_system_vtime(struct task_struct *curr) | 1990 | void account_system_vtime(struct task_struct *curr) |
| 1967 | { | 1991 | { |
| 1968 | unsigned long flags; | 1992 | unsigned long flags; |
| 1993 | s64 delta; | ||
| 1969 | int cpu; | 1994 | int cpu; |
| 1970 | u64 now, delta; | ||
| 1971 | 1995 | ||
| 1972 | if (!sched_clock_irqtime) | 1996 | if (!sched_clock_irqtime) |
| 1973 | return; | 1997 | return; |
| @@ -1975,9 +1999,10 @@ void account_system_vtime(struct task_struct *curr) | |||
| 1975 | local_irq_save(flags); | 1999 | local_irq_save(flags); |
| 1976 | 2000 | ||
| 1977 | cpu = smp_processor_id(); | 2001 | cpu = smp_processor_id(); |
| 1978 | now = sched_clock_cpu(cpu); | 2002 | delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); |
| 1979 | delta = now - per_cpu(irq_start_time, cpu); | 2003 | __this_cpu_add(irq_start_time, delta); |
| 1980 | per_cpu(irq_start_time, cpu) = now; | 2004 | |
| 2005 | irq_time_write_begin(); | ||
| 1981 | /* | 2006 | /* |
| 1982 | * We do not account for softirq time from ksoftirqd here. | 2007 | * We do not account for softirq time from ksoftirqd here. |
| 1983 | * We want to continue accounting softirq time to ksoftirqd thread | 2008 | * We want to continue accounting softirq time to ksoftirqd thread |
| @@ -1985,33 +2010,55 @@ void account_system_vtime(struct task_struct *curr) | |||
| 1985 | * that do not consume any time, but still wants to run. | 2010 | * that do not consume any time, but still wants to run. |
| 1986 | */ | 2011 | */ |
| 1987 | if (hardirq_count()) | 2012 | if (hardirq_count()) |
| 1988 | per_cpu(cpu_hardirq_time, cpu) += delta; | 2013 | __this_cpu_add(cpu_hardirq_time, delta); |
| 1989 | else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) | 2014 | else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) |
| 1990 | per_cpu(cpu_softirq_time, cpu) += delta; | 2015 | __this_cpu_add(cpu_softirq_time, delta); |
| 1991 | 2016 | ||
| 2017 | irq_time_write_end(); | ||
| 1992 | local_irq_restore(flags); | 2018 | local_irq_restore(flags); |
| 1993 | } | 2019 | } |
| 1994 | EXPORT_SYMBOL_GPL(account_system_vtime); | 2020 | EXPORT_SYMBOL_GPL(account_system_vtime); |
| 1995 | 2021 | ||
| 1996 | static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) | 2022 | static void update_rq_clock_task(struct rq *rq, s64 delta) |
| 1997 | { | 2023 | { |
| 1998 | if (sched_clock_irqtime && sched_feat(NONIRQ_POWER)) { | 2024 | s64 irq_delta; |
| 1999 | u64 delta_irq = curr_irq_time - rq->prev_irq_time; | 2025 | |
| 2000 | rq->prev_irq_time = curr_irq_time; | 2026 | irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time; |
| 2001 | sched_rt_avg_update(rq, delta_irq); | 2027 | |
| 2002 | } | 2028 | /* |
| 2029 | * Since irq_time is only updated on {soft,}irq_exit, we might run into | ||
| 2030 | * this case when a previous update_rq_clock() happened inside a | ||
| 2031 | * {soft,}irq region. | ||
| 2032 | * | ||
| 2033 | * When this happens, we stop ->clock_task and only update the | ||
| 2034 | * prev_irq_time stamp to account for the part that fit, so that a next | ||
| 2035 | * update will consume the rest. This ensures ->clock_task is | ||
| 2036 | * monotonic. | ||
| 2037 | * | ||
| 2038 | * It does however cause some slight miss-attribution of {soft,}irq | ||
| 2039 | * time, a more accurate solution would be to update the irq_time using | ||
| 2040 | * the current rq->clock timestamp, except that would require using | ||
| 2041 | * atomic ops. | ||
| 2042 | */ | ||
| 2043 | if (irq_delta > delta) | ||
| 2044 | irq_delta = delta; | ||
| 2045 | |||
| 2046 | rq->prev_irq_time += irq_delta; | ||
| 2047 | delta -= irq_delta; | ||
| 2048 | rq->clock_task += delta; | ||
| 2049 | |||
| 2050 | if (irq_delta && sched_feat(NONIRQ_POWER)) | ||
| 2051 | sched_rt_avg_update(rq, irq_delta); | ||
| 2003 | } | 2052 | } |
| 2004 | 2053 | ||
| 2005 | #else | 2054 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ |
| 2006 | 2055 | ||
| 2007 | static u64 irq_time_cpu(int cpu) | 2056 | static void update_rq_clock_task(struct rq *rq, s64 delta) |
| 2008 | { | 2057 | { |
| 2009 | return 0; | 2058 | rq->clock_task += delta; |
| 2010 | } | 2059 | } |
| 2011 | 2060 | ||
| 2012 | static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { } | 2061 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ |
| 2013 | |||
| 2014 | #endif | ||
| 2015 | 2062 | ||
| 2016 | #include "sched_idletask.c" | 2063 | #include "sched_idletask.c" |
| 2017 | #include "sched_fair.c" | 2064 | #include "sched_fair.c" |
| @@ -2118,6 +2165,31 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
| 2118 | p->sched_class->prio_changed(rq, p, oldprio, running); | 2165 | p->sched_class->prio_changed(rq, p, oldprio, running); |
| 2119 | } | 2166 | } |
| 2120 | 2167 | ||
| 2168 | static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | ||
| 2169 | { | ||
| 2170 | const struct sched_class *class; | ||
| 2171 | |||
| 2172 | if (p->sched_class == rq->curr->sched_class) { | ||
| 2173 | rq->curr->sched_class->check_preempt_curr(rq, p, flags); | ||
| 2174 | } else { | ||
| 2175 | for_each_class(class) { | ||
| 2176 | if (class == rq->curr->sched_class) | ||
| 2177 | break; | ||
| 2178 | if (class == p->sched_class) { | ||
| 2179 | resched_task(rq->curr); | ||
| 2180 | break; | ||
| 2181 | } | ||
| 2182 | } | ||
| 2183 | } | ||
| 2184 | |||
| 2185 | /* | ||
| 2186 | * A queue event has occurred, and we're going to schedule. In | ||
| 2187 | * this case, we can save a useless back to back clock update. | ||
| 2188 | */ | ||
| 2189 | if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr)) | ||
| 2190 | rq->skip_clock_update = 1; | ||
| 2191 | } | ||
| 2192 | |||
| 2121 | #ifdef CONFIG_SMP | 2193 | #ifdef CONFIG_SMP |
| 2122 | /* | 2194 | /* |
| 2123 | * Is this task likely cache-hot: | 2195 | * Is this task likely cache-hot: |
| @@ -3104,6 +3176,15 @@ static long calc_load_fold_active(struct rq *this_rq) | |||
| 3104 | return delta; | 3176 | return delta; |
| 3105 | } | 3177 | } |
| 3106 | 3178 | ||
| 3179 | static unsigned long | ||
| 3180 | calc_load(unsigned long load, unsigned long exp, unsigned long active) | ||
| 3181 | { | ||
| 3182 | load *= exp; | ||
| 3183 | load += active * (FIXED_1 - exp); | ||
| 3184 | load += 1UL << (FSHIFT - 1); | ||
| 3185 | return load >> FSHIFT; | ||
| 3186 | } | ||
| 3187 | |||
| 3107 | #ifdef CONFIG_NO_HZ | 3188 | #ifdef CONFIG_NO_HZ |
| 3108 | /* | 3189 | /* |
| 3109 | * For NO_HZ we delay the active fold to the next LOAD_FREQ update. | 3190 | * For NO_HZ we delay the active fold to the next LOAD_FREQ update. |
| @@ -3133,6 +3214,128 @@ static long calc_load_fold_idle(void) | |||
| 3133 | 3214 | ||
| 3134 | return delta; | 3215 | return delta; |
| 3135 | } | 3216 | } |
| 3217 | |||
| 3218 | /** | ||
| 3219 | * fixed_power_int - compute: x^n, in O(log n) time | ||
| 3220 | * | ||
| 3221 | * @x: base of the power | ||
| 3222 | * @frac_bits: fractional bits of @x | ||
| 3223 | * @n: power to raise @x to. | ||
| 3224 | * | ||
| 3225 | * By exploiting the relation between the definition of the natural power | ||
| 3226 | * function: x^n := x*x*...*x (x multiplied by itself for n times), and | ||
| 3227 | * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i, | ||
| 3228 | * (where: n_i \elem {0, 1}, the binary vector representing n), | ||
| 3229 | * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is | ||
| 3230 | * of course trivially computable in O(log_2 n), the length of our binary | ||
| 3231 | * vector. | ||
| 3232 | */ | ||
| 3233 | static unsigned long | ||
| 3234 | fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n) | ||
| 3235 | { | ||
| 3236 | unsigned long result = 1UL << frac_bits; | ||
| 3237 | |||
| 3238 | if (n) for (;;) { | ||
| 3239 | if (n & 1) { | ||
| 3240 | result *= x; | ||
| 3241 | result += 1UL << (frac_bits - 1); | ||
| 3242 | result >>= frac_bits; | ||
| 3243 | } | ||
| 3244 | n >>= 1; | ||
| 3245 | if (!n) | ||
| 3246 | break; | ||
| 3247 | x *= x; | ||
| 3248 | x += 1UL << (frac_bits - 1); | ||
| 3249 | x >>= frac_bits; | ||
| 3250 | } | ||
| 3251 | |||
| 3252 | return result; | ||
| 3253 | } | ||
| 3254 | |||
| 3255 | /* | ||
| 3256 | * a1 = a0 * e + a * (1 - e) | ||
| 3257 | * | ||
| 3258 | * a2 = a1 * e + a * (1 - e) | ||
| 3259 | * = (a0 * e + a * (1 - e)) * e + a * (1 - e) | ||
| 3260 | * = a0 * e^2 + a * (1 - e) * (1 + e) | ||
| 3261 | * | ||
| 3262 | * a3 = a2 * e + a * (1 - e) | ||
| 3263 | * = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e) | ||
| 3264 | * = a0 * e^3 + a * (1 - e) * (1 + e + e^2) | ||
| 3265 | * | ||
| 3266 | * ... | ||
| 3267 | * | ||
| 3268 | * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1] | ||
| 3269 | * = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e) | ||
| 3270 | * = a0 * e^n + a * (1 - e^n) | ||
| 3271 | * | ||
| 3272 | * [1] application of the geometric series: | ||
| 3273 | * | ||
| 3274 | * n 1 - x^(n+1) | ||
| 3275 | * S_n := \Sum x^i = ------------- | ||
| 3276 | * i=0 1 - x | ||
| 3277 | */ | ||
| 3278 | static unsigned long | ||
| 3279 | calc_load_n(unsigned long load, unsigned long exp, | ||
| 3280 | unsigned long active, unsigned int n) | ||
| 3281 | { | ||
| 3282 | |||
| 3283 | return calc_load(load, fixed_power_int(exp, FSHIFT, n), active); | ||
| 3284 | } | ||
| 3285 | |||
| 3286 | /* | ||
| 3287 | * NO_HZ can leave us missing all per-cpu ticks calling | ||
| 3288 | * calc_load_account_active(), but since an idle CPU folds its delta into | ||
| 3289 | * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold | ||
| 3290 | * in the pending idle delta if our idle period crossed a load cycle boundary. | ||
| 3291 | * | ||
| 3292 | * Once we've updated the global active value, we need to apply the exponential | ||
| 3293 | * weights adjusted to the number of cycles missed. | ||
| 3294 | */ | ||
| 3295 | static void calc_global_nohz(unsigned long ticks) | ||
| 3296 | { | ||
| 3297 | long delta, active, n; | ||
| 3298 | |||
| 3299 | if (time_before(jiffies, calc_load_update)) | ||
| 3300 | return; | ||
| 3301 | |||
| 3302 | /* | ||
| 3303 | * If we crossed a calc_load_update boundary, make sure to fold | ||
| 3304 | * any pending idle changes, the respective CPUs might have | ||
| 3305 | * missed the tick driven calc_load_account_active() update | ||
| 3306 | * due to NO_HZ. | ||
| 3307 | */ | ||
| 3308 | delta = calc_load_fold_idle(); | ||
| 3309 | if (delta) | ||
| 3310 | atomic_long_add(delta, &calc_load_tasks); | ||
| 3311 | |||
| 3312 | /* | ||
| 3313 | * If we were idle for multiple load cycles, apply them. | ||
| 3314 | */ | ||
| 3315 | if (ticks >= LOAD_FREQ) { | ||
| 3316 | n = ticks / LOAD_FREQ; | ||
| 3317 | |||
| 3318 | active = atomic_long_read(&calc_load_tasks); | ||
| 3319 | active = active > 0 ? active * FIXED_1 : 0; | ||
| 3320 | |||
| 3321 | avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); | ||
| 3322 | avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); | ||
| 3323 | avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); | ||
| 3324 | |||
| 3325 | calc_load_update += n * LOAD_FREQ; | ||
| 3326 | } | ||
| 3327 | |||
| 3328 | /* | ||
| 3329 | * Its possible the remainder of the above division also crosses | ||
| 3330 | * a LOAD_FREQ period, the regular check in calc_global_load() | ||
| 3331 | * which comes after this will take care of that. | ||
| 3332 | * | ||
| 3333 | * Consider us being 11 ticks before a cycle completion, and us | ||
| 3334 | * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will | ||
| 3335 | * age us 4 cycles, and the test in calc_global_load() will | ||
| 3336 | * pick up the final one. | ||
| 3337 | */ | ||
| 3338 | } | ||
| 3136 | #else | 3339 | #else |
| 3137 | static void calc_load_account_idle(struct rq *this_rq) | 3340 | static void calc_load_account_idle(struct rq *this_rq) |
| 3138 | { | 3341 | { |
| @@ -3142,6 +3345,10 @@ static inline long calc_load_fold_idle(void) | |||
| 3142 | { | 3345 | { |
| 3143 | return 0; | 3346 | return 0; |
| 3144 | } | 3347 | } |
| 3348 | |||
| 3349 | static void calc_global_nohz(unsigned long ticks) | ||
| 3350 | { | ||
| 3351 | } | ||
| 3145 | #endif | 3352 | #endif |
| 3146 | 3353 | ||
| 3147 | /** | 3354 | /** |
| @@ -3159,24 +3366,17 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift) | |||
| 3159 | loads[2] = (avenrun[2] + offset) << shift; | 3366 | loads[2] = (avenrun[2] + offset) << shift; |
| 3160 | } | 3367 | } |
| 3161 | 3368 | ||
| 3162 | static unsigned long | ||
| 3163 | calc_load(unsigned long load, unsigned long exp, unsigned long active) | ||
| 3164 | { | ||
| 3165 | load *= exp; | ||
| 3166 | load += active * (FIXED_1 - exp); | ||
| 3167 | return load >> FSHIFT; | ||
| 3168 | } | ||
| 3169 | |||
| 3170 | /* | 3369 | /* |
| 3171 | * calc_load - update the avenrun load estimates 10 ticks after the | 3370 | * calc_load - update the avenrun load estimates 10 ticks after the |
| 3172 | * CPUs have updated calc_load_tasks. | 3371 | * CPUs have updated calc_load_tasks. |
| 3173 | */ | 3372 | */ |
| 3174 | void calc_global_load(void) | 3373 | void calc_global_load(unsigned long ticks) |
| 3175 | { | 3374 | { |
| 3176 | unsigned long upd = calc_load_update + 10; | ||
| 3177 | long active; | 3375 | long active; |
| 3178 | 3376 | ||
| 3179 | if (time_before(jiffies, upd)) | 3377 | calc_global_nohz(ticks); |
| 3378 | |||
| 3379 | if (time_before(jiffies, calc_load_update + 10)) | ||
| 3180 | return; | 3380 | return; |
| 3181 | 3381 | ||
| 3182 | active = atomic_long_read(&calc_load_tasks); | 3382 | active = atomic_long_read(&calc_load_tasks); |
| @@ -3830,7 +4030,6 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev) | |||
| 3830 | { | 4030 | { |
| 3831 | if (prev->se.on_rq) | 4031 | if (prev->se.on_rq) |
| 3832 | update_rq_clock(rq); | 4032 | update_rq_clock(rq); |
| 3833 | rq->skip_clock_update = 0; | ||
| 3834 | prev->sched_class->put_prev_task(rq, prev); | 4033 | prev->sched_class->put_prev_task(rq, prev); |
| 3835 | } | 4034 | } |
| 3836 | 4035 | ||
| @@ -3888,7 +4087,6 @@ need_resched_nonpreemptible: | |||
| 3888 | hrtick_clear(rq); | 4087 | hrtick_clear(rq); |
| 3889 | 4088 | ||
| 3890 | raw_spin_lock_irq(&rq->lock); | 4089 | raw_spin_lock_irq(&rq->lock); |
| 3891 | clear_tsk_need_resched(prev); | ||
| 3892 | 4090 | ||
| 3893 | switch_count = &prev->nivcsw; | 4091 | switch_count = &prev->nivcsw; |
| 3894 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { | 4092 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { |
| @@ -3920,6 +4118,8 @@ need_resched_nonpreemptible: | |||
| 3920 | 4118 | ||
| 3921 | put_prev_task(rq, prev); | 4119 | put_prev_task(rq, prev); |
| 3922 | next = pick_next_task(rq); | 4120 | next = pick_next_task(rq); |
| 4121 | clear_tsk_need_resched(prev); | ||
| 4122 | rq->skip_clock_update = 0; | ||
| 3923 | 4123 | ||
| 3924 | if (likely(prev != next)) { | 4124 | if (likely(prev != next)) { |
| 3925 | sched_info_switch(prev, next); | 4125 | sched_info_switch(prev, next); |
| @@ -6960,6 +7160,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) | |||
| 6960 | if (cpu != group_first_cpu(sd->groups)) | 7160 | if (cpu != group_first_cpu(sd->groups)) |
| 6961 | return; | 7161 | return; |
| 6962 | 7162 | ||
| 7163 | sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups)); | ||
| 7164 | |||
| 6963 | child = sd->child; | 7165 | child = sd->child; |
| 6964 | 7166 | ||
| 6965 | sd->groups->cpu_power = 0; | 7167 | sd->groups->cpu_power = 0; |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f4f6a8326dd0..00ebd7686676 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -1654,12 +1654,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
| 1654 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | 1654 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); |
| 1655 | int scale = cfs_rq->nr_running >= sched_nr_latency; | 1655 | int scale = cfs_rq->nr_running >= sched_nr_latency; |
| 1656 | 1656 | ||
| 1657 | if (unlikely(rt_prio(p->prio))) | ||
| 1658 | goto preempt; | ||
| 1659 | |||
| 1660 | if (unlikely(p->sched_class != &fair_sched_class)) | ||
| 1661 | return; | ||
| 1662 | |||
| 1663 | if (unlikely(se == pse)) | 1657 | if (unlikely(se == pse)) |
| 1664 | return; | 1658 | return; |
| 1665 | 1659 | ||
| @@ -1764,10 +1758,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p, | |||
| 1764 | set_task_cpu(p, this_cpu); | 1758 | set_task_cpu(p, this_cpu); |
| 1765 | activate_task(this_rq, p, 0); | 1759 | activate_task(this_rq, p, 0); |
| 1766 | check_preempt_curr(this_rq, p, 0); | 1760 | check_preempt_curr(this_rq, p, 0); |
| 1767 | |||
| 1768 | /* re-arm NEWIDLE balancing when moving tasks */ | ||
| 1769 | src_rq->avg_idle = this_rq->avg_idle = 2*sysctl_sched_migration_cost; | ||
| 1770 | this_rq->idle_stamp = 0; | ||
| 1771 | } | 1761 | } |
| 1772 | 1762 | ||
| 1773 | /* | 1763 | /* |
| @@ -2035,13 +2025,16 @@ struct sd_lb_stats { | |||
| 2035 | unsigned long this_load_per_task; | 2025 | unsigned long this_load_per_task; |
| 2036 | unsigned long this_nr_running; | 2026 | unsigned long this_nr_running; |
| 2037 | unsigned long this_has_capacity; | 2027 | unsigned long this_has_capacity; |
| 2028 | unsigned int this_idle_cpus; | ||
| 2038 | 2029 | ||
| 2039 | /* Statistics of the busiest group */ | 2030 | /* Statistics of the busiest group */ |
| 2031 | unsigned int busiest_idle_cpus; | ||
| 2040 | unsigned long max_load; | 2032 | unsigned long max_load; |
| 2041 | unsigned long busiest_load_per_task; | 2033 | unsigned long busiest_load_per_task; |
| 2042 | unsigned long busiest_nr_running; | 2034 | unsigned long busiest_nr_running; |
| 2043 | unsigned long busiest_group_capacity; | 2035 | unsigned long busiest_group_capacity; |
| 2044 | unsigned long busiest_has_capacity; | 2036 | unsigned long busiest_has_capacity; |
| 2037 | unsigned int busiest_group_weight; | ||
| 2045 | 2038 | ||
| 2046 | int group_imb; /* Is there imbalance in this sd */ | 2039 | int group_imb; /* Is there imbalance in this sd */ |
| 2047 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | 2040 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) |
| @@ -2063,6 +2056,8 @@ struct sg_lb_stats { | |||
| 2063 | unsigned long sum_nr_running; /* Nr tasks running in the group */ | 2056 | unsigned long sum_nr_running; /* Nr tasks running in the group */ |
| 2064 | unsigned long sum_weighted_load; /* Weighted load of group's tasks */ | 2057 | unsigned long sum_weighted_load; /* Weighted load of group's tasks */ |
| 2065 | unsigned long group_capacity; | 2058 | unsigned long group_capacity; |
| 2059 | unsigned long idle_cpus; | ||
| 2060 | unsigned long group_weight; | ||
| 2066 | int group_imb; /* Is there an imbalance in the group ? */ | 2061 | int group_imb; /* Is there an imbalance in the group ? */ |
| 2067 | int group_has_capacity; /* Is there extra capacity in the group? */ | 2062 | int group_has_capacity; /* Is there extra capacity in the group? */ |
| 2068 | }; | 2063 | }; |
| @@ -2431,7 +2426,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
| 2431 | sgs->group_load += load; | 2426 | sgs->group_load += load; |
| 2432 | sgs->sum_nr_running += rq->nr_running; | 2427 | sgs->sum_nr_running += rq->nr_running; |
| 2433 | sgs->sum_weighted_load += weighted_cpuload(i); | 2428 | sgs->sum_weighted_load += weighted_cpuload(i); |
| 2434 | 2429 | if (idle_cpu(i)) | |
| 2430 | sgs->idle_cpus++; | ||
| 2435 | } | 2431 | } |
| 2436 | 2432 | ||
| 2437 | /* | 2433 | /* |
| @@ -2469,6 +2465,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
| 2469 | sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); | 2465 | sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); |
| 2470 | if (!sgs->group_capacity) | 2466 | if (!sgs->group_capacity) |
| 2471 | sgs->group_capacity = fix_small_capacity(sd, group); | 2467 | sgs->group_capacity = fix_small_capacity(sd, group); |
| 2468 | sgs->group_weight = group->group_weight; | ||
| 2472 | 2469 | ||
| 2473 | if (sgs->group_capacity > sgs->sum_nr_running) | 2470 | if (sgs->group_capacity > sgs->sum_nr_running) |
| 2474 | sgs->group_has_capacity = 1; | 2471 | sgs->group_has_capacity = 1; |
| @@ -2576,13 +2573,16 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, | |||
| 2576 | sds->this_nr_running = sgs.sum_nr_running; | 2573 | sds->this_nr_running = sgs.sum_nr_running; |
| 2577 | sds->this_load_per_task = sgs.sum_weighted_load; | 2574 | sds->this_load_per_task = sgs.sum_weighted_load; |
| 2578 | sds->this_has_capacity = sgs.group_has_capacity; | 2575 | sds->this_has_capacity = sgs.group_has_capacity; |
| 2576 | sds->this_idle_cpus = sgs.idle_cpus; | ||
| 2579 | } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) { | 2577 | } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) { |
| 2580 | sds->max_load = sgs.avg_load; | 2578 | sds->max_load = sgs.avg_load; |
| 2581 | sds->busiest = sg; | 2579 | sds->busiest = sg; |
| 2582 | sds->busiest_nr_running = sgs.sum_nr_running; | 2580 | sds->busiest_nr_running = sgs.sum_nr_running; |
| 2581 | sds->busiest_idle_cpus = sgs.idle_cpus; | ||
| 2583 | sds->busiest_group_capacity = sgs.group_capacity; | 2582 | sds->busiest_group_capacity = sgs.group_capacity; |
| 2584 | sds->busiest_load_per_task = sgs.sum_weighted_load; | 2583 | sds->busiest_load_per_task = sgs.sum_weighted_load; |
| 2585 | sds->busiest_has_capacity = sgs.group_has_capacity; | 2584 | sds->busiest_has_capacity = sgs.group_has_capacity; |
| 2585 | sds->busiest_group_weight = sgs.group_weight; | ||
| 2586 | sds->group_imb = sgs.group_imb; | 2586 | sds->group_imb = sgs.group_imb; |
| 2587 | } | 2587 | } |
| 2588 | 2588 | ||
| @@ -2860,8 +2860,26 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 2860 | if (sds.this_load >= sds.avg_load) | 2860 | if (sds.this_load >= sds.avg_load) |
| 2861 | goto out_balanced; | 2861 | goto out_balanced; |
| 2862 | 2862 | ||
| 2863 | if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) | 2863 | /* |
| 2864 | goto out_balanced; | 2864 | * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative. |
| 2865 | * And to check for busy balance use !idle_cpu instead of | ||
| 2866 | * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE | ||
| 2867 | * even when they are idle. | ||
| 2868 | */ | ||
| 2869 | if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) { | ||
| 2870 | if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) | ||
| 2871 | goto out_balanced; | ||
| 2872 | } else { | ||
| 2873 | /* | ||
| 2874 | * This cpu is idle. If the busiest group load doesn't | ||
| 2875 | * have more tasks than the number of available cpu's and | ||
| 2876 | * there is no imbalance between this and busiest group | ||
| 2877 | * wrt to idle cpu's, it is balanced. | ||
| 2878 | */ | ||
| 2879 | if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) && | ||
| 2880 | sds.busiest_nr_running <= sds.busiest_group_weight) | ||
| 2881 | goto out_balanced; | ||
| 2882 | } | ||
| 2865 | 2883 | ||
| 2866 | force_balance: | 2884 | force_balance: |
| 2867 | /* Looks like there is an imbalance. Compute it */ | 2885 | /* Looks like there is an imbalance. Compute it */ |
| @@ -3197,8 +3215,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
| 3197 | interval = msecs_to_jiffies(sd->balance_interval); | 3215 | interval = msecs_to_jiffies(sd->balance_interval); |
| 3198 | if (time_after(next_balance, sd->last_balance + interval)) | 3216 | if (time_after(next_balance, sd->last_balance + interval)) |
| 3199 | next_balance = sd->last_balance + interval; | 3217 | next_balance = sd->last_balance + interval; |
| 3200 | if (pulled_task) | 3218 | if (pulled_task) { |
| 3219 | this_rq->idle_stamp = 0; | ||
| 3201 | break; | 3220 | break; |
| 3221 | } | ||
| 3202 | } | 3222 | } |
| 3203 | 3223 | ||
| 3204 | raw_spin_lock(&this_rq->lock); | 3224 | raw_spin_lock(&this_rq->lock); |
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c index 45bddc0c1048..2bf6b47058c1 100644 --- a/kernel/sched_stoptask.c +++ b/kernel/sched_stoptask.c | |||
| @@ -19,14 +19,14 @@ select_task_rq_stop(struct rq *rq, struct task_struct *p, | |||
| 19 | static void | 19 | static void |
| 20 | check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags) | 20 | check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags) |
| 21 | { | 21 | { |
| 22 | resched_task(rq->curr); /* we preempt everything */ | 22 | /* we're never preempted */ |
| 23 | } | 23 | } |
| 24 | 24 | ||
| 25 | static struct task_struct *pick_next_task_stop(struct rq *rq) | 25 | static struct task_struct *pick_next_task_stop(struct rq *rq) |
| 26 | { | 26 | { |
| 27 | struct task_struct *stop = rq->stop; | 27 | struct task_struct *stop = rq->stop; |
| 28 | 28 | ||
| 29 | if (stop && stop->state == TASK_RUNNING) | 29 | if (stop && stop->se.on_rq) |
| 30 | return stop; | 30 | return stop; |
| 31 | 31 | ||
| 32 | return NULL; | 32 | return NULL; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c33a1edb799f..5abfa1518554 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -702,6 +702,15 @@ static struct ctl_table kern_table[] = { | |||
| 702 | .extra1 = &zero, | 702 | .extra1 = &zero, |
| 703 | .extra2 = &ten_thousand, | 703 | .extra2 = &ten_thousand, |
| 704 | }, | 704 | }, |
| 705 | { | ||
| 706 | .procname = "dmesg_restrict", | ||
| 707 | .data = &dmesg_restrict, | ||
| 708 | .maxlen = sizeof(int), | ||
| 709 | .mode = 0644, | ||
| 710 | .proc_handler = proc_dointvec_minmax, | ||
| 711 | .extra1 = &zero, | ||
| 712 | .extra2 = &one, | ||
| 713 | }, | ||
| 705 | #endif | 714 | #endif |
| 706 | { | 715 | { |
| 707 | .procname = "ngroups_max", | 716 | .procname = "ngroups_max", |
diff --git a/kernel/timer.c b/kernel/timer.c index 68a9ae7679b7..353b9227c2ec 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
| @@ -1252,6 +1252,12 @@ unsigned long get_next_timer_interrupt(unsigned long now) | |||
| 1252 | struct tvec_base *base = __get_cpu_var(tvec_bases); | 1252 | struct tvec_base *base = __get_cpu_var(tvec_bases); |
| 1253 | unsigned long expires; | 1253 | unsigned long expires; |
| 1254 | 1254 | ||
| 1255 | /* | ||
| 1256 | * Pretend that there is no timer pending if the cpu is offline. | ||
| 1257 | * Possible pending timers will be migrated later to an active cpu. | ||
| 1258 | */ | ||
| 1259 | if (cpu_is_offline(smp_processor_id())) | ||
| 1260 | return now + NEXT_TIMER_MAX_DELTA; | ||
| 1255 | spin_lock(&base->lock); | 1261 | spin_lock(&base->lock); |
| 1256 | if (time_before_eq(base->next_timer, base->timer_jiffies)) | 1262 | if (time_before_eq(base->next_timer, base->timer_jiffies)) |
| 1257 | base->next_timer = __next_timer_interrupt(base); | 1263 | base->next_timer = __next_timer_interrupt(base); |
| @@ -1319,7 +1325,7 @@ void do_timer(unsigned long ticks) | |||
| 1319 | { | 1325 | { |
| 1320 | jiffies_64 += ticks; | 1326 | jiffies_64 += ticks; |
| 1321 | update_wall_time(); | 1327 | update_wall_time(); |
| 1322 | calc_global_load(); | 1328 | calc_global_load(ticks); |
| 1323 | } | 1329 | } |
| 1324 | 1330 | ||
| 1325 | #ifdef __ARCH_WANT_SYS_ALARM | 1331 | #ifdef __ARCH_WANT_SYS_ALARM |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e04b8bcdef88..ea37e2ff4164 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
| @@ -126,7 +126,7 @@ if FTRACE | |||
| 126 | config FUNCTION_TRACER | 126 | config FUNCTION_TRACER |
| 127 | bool "Kernel Function Tracer" | 127 | bool "Kernel Function Tracer" |
| 128 | depends on HAVE_FUNCTION_TRACER | 128 | depends on HAVE_FUNCTION_TRACER |
| 129 | select FRAME_POINTER if (!ARM_UNWIND) | 129 | select FRAME_POINTER if !ARM_UNWIND && !S390 |
| 130 | select KALLSYMS | 130 | select KALLSYMS |
| 131 | select GENERIC_TRACER | 131 | select GENERIC_TRACER |
| 132 | select CONTEXT_SWITCH_TRACER | 132 | select CONTEXT_SWITCH_TRACER |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index bc251ed66724..7b8ec0281548 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
| @@ -168,7 +168,6 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, | |||
| 168 | static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), | 168 | static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), |
| 169 | BLK_TC_ACT(BLK_TC_WRITE) }; | 169 | BLK_TC_ACT(BLK_TC_WRITE) }; |
| 170 | 170 | ||
| 171 | #define BLK_TC_HARDBARRIER BLK_TC_BARRIER | ||
| 172 | #define BLK_TC_RAHEAD BLK_TC_AHEAD | 171 | #define BLK_TC_RAHEAD BLK_TC_AHEAD |
| 173 | 172 | ||
| 174 | /* The ilog2() calls fall out because they're constant */ | 173 | /* The ilog2() calls fall out because they're constant */ |
| @@ -196,7 +195,6 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, | |||
| 196 | return; | 195 | return; |
| 197 | 196 | ||
| 198 | what |= ddir_act[rw & WRITE]; | 197 | what |= ddir_act[rw & WRITE]; |
| 199 | what |= MASK_TC_BIT(rw, HARDBARRIER); | ||
| 200 | what |= MASK_TC_BIT(rw, SYNC); | 198 | what |= MASK_TC_BIT(rw, SYNC); |
| 201 | what |= MASK_TC_BIT(rw, RAHEAD); | 199 | what |= MASK_TC_BIT(rw, RAHEAD); |
| 202 | what |= MASK_TC_BIT(rw, META); | 200 | what |= MASK_TC_BIT(rw, META); |
| @@ -1807,8 +1805,6 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) | |||
| 1807 | 1805 | ||
| 1808 | if (rw & REQ_RAHEAD) | 1806 | if (rw & REQ_RAHEAD) |
| 1809 | rwbs[i++] = 'A'; | 1807 | rwbs[i++] = 'A'; |
| 1810 | if (rw & REQ_HARDBARRIER) | ||
| 1811 | rwbs[i++] = 'B'; | ||
| 1812 | if (rw & REQ_SYNC) | 1808 | if (rw & REQ_SYNC) |
| 1813 | rwbs[i++] = 'S'; | 1809 | rwbs[i++] = 'S'; |
| 1814 | if (rw & REQ_META) | 1810 | if (rw & REQ_META) |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 82d9b8106cd0..f8cf959bad45 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -17,7 +17,6 @@ | |||
| 17 | #include <linux/writeback.h> | 17 | #include <linux/writeback.h> |
| 18 | #include <linux/kallsyms.h> | 18 | #include <linux/kallsyms.h> |
| 19 | #include <linux/seq_file.h> | 19 | #include <linux/seq_file.h> |
| 20 | #include <linux/smp_lock.h> | ||
| 21 | #include <linux/notifier.h> | 20 | #include <linux/notifier.h> |
| 22 | #include <linux/irqflags.h> | 21 | #include <linux/irqflags.h> |
| 23 | #include <linux/debugfs.h> | 22 | #include <linux/debugfs.h> |
| @@ -1284,6 +1283,8 @@ void trace_dump_stack(void) | |||
| 1284 | __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); | 1283 | __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); |
| 1285 | } | 1284 | } |
| 1286 | 1285 | ||
| 1286 | static DEFINE_PER_CPU(int, user_stack_count); | ||
| 1287 | |||
| 1287 | void | 1288 | void |
| 1288 | ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) | 1289 | ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) |
| 1289 | { | 1290 | { |
| @@ -1302,6 +1303,18 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) | |||
| 1302 | if (unlikely(in_nmi())) | 1303 | if (unlikely(in_nmi())) |
| 1303 | return; | 1304 | return; |
| 1304 | 1305 | ||
| 1306 | /* | ||
| 1307 | * prevent recursion, since the user stack tracing may | ||
| 1308 | * trigger other kernel events. | ||
| 1309 | */ | ||
| 1310 | preempt_disable(); | ||
| 1311 | if (__this_cpu_read(user_stack_count)) | ||
| 1312 | goto out; | ||
| 1313 | |||
| 1314 | __this_cpu_inc(user_stack_count); | ||
| 1315 | |||
| 1316 | |||
| 1317 | |||
| 1305 | event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, | 1318 | event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, |
| 1306 | sizeof(*entry), flags, pc); | 1319 | sizeof(*entry), flags, pc); |
| 1307 | if (!event) | 1320 | if (!event) |
| @@ -1319,6 +1332,11 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) | |||
| 1319 | save_stack_trace_user(&trace); | 1332 | save_stack_trace_user(&trace); |
| 1320 | if (!filter_check_discard(call, entry, buffer, event)) | 1333 | if (!filter_check_discard(call, entry, buffer, event)) |
| 1321 | ring_buffer_unlock_commit(buffer, event); | 1334 | ring_buffer_unlock_commit(buffer, event); |
| 1335 | |||
| 1336 | __this_cpu_dec(user_stack_count); | ||
| 1337 | |||
| 1338 | out: | ||
| 1339 | preempt_enable(); | ||
| 1322 | } | 1340 | } |
| 1323 | 1341 | ||
| 1324 | #ifdef UNUSED | 1342 | #ifdef UNUSED |
| @@ -2320,11 +2338,19 @@ tracing_write_stub(struct file *filp, const char __user *ubuf, | |||
| 2320 | return count; | 2338 | return count; |
| 2321 | } | 2339 | } |
| 2322 | 2340 | ||
| 2341 | static loff_t tracing_seek(struct file *file, loff_t offset, int origin) | ||
| 2342 | { | ||
| 2343 | if (file->f_mode & FMODE_READ) | ||
| 2344 | return seq_lseek(file, offset, origin); | ||
| 2345 | else | ||
| 2346 | return 0; | ||
| 2347 | } | ||
| 2348 | |||
| 2323 | static const struct file_operations tracing_fops = { | 2349 | static const struct file_operations tracing_fops = { |
| 2324 | .open = tracing_open, | 2350 | .open = tracing_open, |
| 2325 | .read = seq_read, | 2351 | .read = seq_read, |
| 2326 | .write = tracing_write_stub, | 2352 | .write = tracing_write_stub, |
| 2327 | .llseek = seq_lseek, | 2353 | .llseek = tracing_seek, |
| 2328 | .release = tracing_release, | 2354 | .release = tracing_release, |
| 2329 | }; | 2355 | }; |
| 2330 | 2356 | ||
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 90db1bd1a978..e785b0f2aea5 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -661,7 +661,7 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu) | |||
| 661 | { | 661 | { |
| 662 | struct worker *worker = kthread_data(task); | 662 | struct worker *worker = kthread_data(task); |
| 663 | 663 | ||
| 664 | if (likely(!(worker->flags & WORKER_NOT_RUNNING))) | 664 | if (!(worker->flags & WORKER_NOT_RUNNING)) |
| 665 | atomic_inc(get_gcwq_nr_running(cpu)); | 665 | atomic_inc(get_gcwq_nr_running(cpu)); |
| 666 | } | 666 | } |
| 667 | 667 | ||
| @@ -687,7 +687,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, | |||
| 687 | struct global_cwq *gcwq = get_gcwq(cpu); | 687 | struct global_cwq *gcwq = get_gcwq(cpu); |
| 688 | atomic_t *nr_running = get_gcwq_nr_running(cpu); | 688 | atomic_t *nr_running = get_gcwq_nr_running(cpu); |
| 689 | 689 | ||
| 690 | if (unlikely(worker->flags & WORKER_NOT_RUNNING)) | 690 | if (worker->flags & WORKER_NOT_RUNNING) |
| 691 | return NULL; | 691 | return NULL; |
| 692 | 692 | ||
| 693 | /* this can only happen on the local cpu */ | 693 | /* this can only happen on the local cpu */ |
| @@ -3692,7 +3692,8 @@ static int __init init_workqueues(void) | |||
| 3692 | system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0); | 3692 | system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0); |
| 3693 | system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, | 3693 | system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, |
| 3694 | WQ_UNBOUND_MAX_ACTIVE); | 3694 | WQ_UNBOUND_MAX_ACTIVE); |
| 3695 | BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq); | 3695 | BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq || |
| 3696 | !system_unbound_wq); | ||
| 3696 | return 0; | 3697 | return 0; |
| 3697 | } | 3698 | } |
| 3698 | early_initcall(init_workqueues); | 3699 | early_initcall(init_workqueues); |
