diff options
author | Ingo Molnar <mingo@elte.hu> | 2010-01-13 03:58:37 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-01-13 04:08:50 -0500 |
commit | 61405fea92c42d072d9b8bd189689f1502a838af (patch) | |
tree | 013ea3e7ed71f4114004d5852d40b6e89e128f76 /kernel | |
parent | 9c443dfdd31eddea6cbe6ee0ca469fbcc4e1dc3b (diff) | |
parent | 1703f2c321a8a531c393e137a82602e16c6061cb (diff) |
Merge branch 'perf/urgent' into perf/core
Merge reason: queue up dependent patch, update to -rc4
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
35 files changed, 834 insertions, 504 deletions
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 2451dc6f3282..4b05bd9479db 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c | |||
@@ -277,7 +277,7 @@ static void untag_chunk(struct node *p) | |||
277 | owner->root = NULL; | 277 | owner->root = NULL; |
278 | } | 278 | } |
279 | 279 | ||
280 | for (i = j = 0; i < size; i++, j++) { | 280 | for (i = j = 0; j <= size; i++, j++) { |
281 | struct audit_tree *s; | 281 | struct audit_tree *s; |
282 | if (&chunk->owners[j] == p) { | 282 | if (&chunk->owners[j] == p) { |
283 | list_del_init(&p->list); | 283 | list_del_init(&p->list); |
@@ -290,7 +290,7 @@ static void untag_chunk(struct node *p) | |||
290 | if (!s) /* result of earlier fallback */ | 290 | if (!s) /* result of earlier fallback */ |
291 | continue; | 291 | continue; |
292 | get_tree(s); | 292 | get_tree(s); |
293 | list_replace_init(&chunk->owners[i].list, &new->owners[j].list); | 293 | list_replace_init(&chunk->owners[j].list, &new->owners[i].list); |
294 | } | 294 | } |
295 | 295 | ||
296 | list_replace_rcu(&chunk->hash, &new->hash); | 296 | list_replace_rcu(&chunk->hash, &new->hash); |
@@ -373,15 +373,17 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) | |||
373 | for (n = 0; n < old->count; n++) { | 373 | for (n = 0; n < old->count; n++) { |
374 | if (old->owners[n].owner == tree) { | 374 | if (old->owners[n].owner == tree) { |
375 | spin_unlock(&hash_lock); | 375 | spin_unlock(&hash_lock); |
376 | put_inotify_watch(watch); | 376 | put_inotify_watch(&old->watch); |
377 | return 0; | 377 | return 0; |
378 | } | 378 | } |
379 | } | 379 | } |
380 | spin_unlock(&hash_lock); | 380 | spin_unlock(&hash_lock); |
381 | 381 | ||
382 | chunk = alloc_chunk(old->count + 1); | 382 | chunk = alloc_chunk(old->count + 1); |
383 | if (!chunk) | 383 | if (!chunk) { |
384 | put_inotify_watch(&old->watch); | ||
384 | return -ENOMEM; | 385 | return -ENOMEM; |
386 | } | ||
385 | 387 | ||
386 | mutex_lock(&inode->inotify_mutex); | 388 | mutex_lock(&inode->inotify_mutex); |
387 | if (inotify_clone_watch(&old->watch, &chunk->watch) < 0) { | 389 | if (inotify_clone_watch(&old->watch, &chunk->watch) < 0) { |
@@ -425,7 +427,8 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) | |||
425 | spin_unlock(&hash_lock); | 427 | spin_unlock(&hash_lock); |
426 | inotify_evict_watch(&old->watch); | 428 | inotify_evict_watch(&old->watch); |
427 | mutex_unlock(&inode->inotify_mutex); | 429 | mutex_unlock(&inode->inotify_mutex); |
428 | put_inotify_watch(&old->watch); | 430 | put_inotify_watch(&old->watch); /* pair to inotify_find_watch */ |
431 | put_inotify_watch(&old->watch); /* and kill it */ | ||
429 | return 0; | 432 | return 0; |
430 | } | 433 | } |
431 | 434 | ||
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 267e484f0198..fc0f928167e7 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -250,7 +250,6 @@ struct audit_context { | |||
250 | #endif | 250 | #endif |
251 | }; | 251 | }; |
252 | 252 | ||
253 | #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) | ||
254 | static inline int open_arg(int flags, int mask) | 253 | static inline int open_arg(int flags, int mask) |
255 | { | 254 | { |
256 | int n = ACC_MODE(flags); | 255 | int n = ACC_MODE(flags); |
diff --git a/kernel/bounds.c b/kernel/bounds.c index 3c5301381837..98a51f26c136 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c | |||
@@ -12,7 +12,7 @@ | |||
12 | 12 | ||
13 | void foo(void) | 13 | void foo(void) |
14 | { | 14 | { |
15 | /* The enum constants to put into include/linux/bounds.h */ | 15 | /* The enum constants to put into include/generated/bounds.h */ |
16 | DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); | 16 | DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); |
17 | DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); | 17 | DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); |
18 | /* End of constants */ | 18 | /* End of constants */ |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 0249f4be9b5c..1fbcc748044a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -2468,7 +2468,6 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, | |||
2468 | /* make sure l doesn't vanish out from under us */ | 2468 | /* make sure l doesn't vanish out from under us */ |
2469 | down_write(&l->mutex); | 2469 | down_write(&l->mutex); |
2470 | mutex_unlock(&cgrp->pidlist_mutex); | 2470 | mutex_unlock(&cgrp->pidlist_mutex); |
2471 | l->use_count++; | ||
2472 | return l; | 2471 | return l; |
2473 | } | 2472 | } |
2474 | } | 2473 | } |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 291ac586f37f..1c8ddd6ee940 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -209,6 +209,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) | |||
209 | return -ENOMEM; | 209 | return -ENOMEM; |
210 | 210 | ||
211 | cpu_hotplug_begin(); | 211 | cpu_hotplug_begin(); |
212 | set_cpu_active(cpu, false); | ||
212 | err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, | 213 | err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, |
213 | hcpu, -1, &nr_calls); | 214 | hcpu, -1, &nr_calls); |
214 | if (err == NOTIFY_BAD) { | 215 | if (err == NOTIFY_BAD) { |
@@ -280,18 +281,6 @@ int __ref cpu_down(unsigned int cpu) | |||
280 | goto out; | 281 | goto out; |
281 | } | 282 | } |
282 | 283 | ||
283 | set_cpu_active(cpu, false); | ||
284 | |||
285 | /* | ||
286 | * Make sure the all cpus did the reschedule and are not | ||
287 | * using stale version of the cpu_active_mask. | ||
288 | * This is not strictly necessary becuase stop_machine() | ||
289 | * that we run down the line already provides the required | ||
290 | * synchronization. But it's really a side effect and we do not | ||
291 | * want to depend on the innards of the stop_machine here. | ||
292 | */ | ||
293 | synchronize_sched(); | ||
294 | |||
295 | err = _cpu_down(cpu, 0); | 284 | err = _cpu_down(cpu, 0); |
296 | 285 | ||
297 | out: | 286 | out: |
@@ -382,19 +371,12 @@ int disable_nonboot_cpus(void) | |||
382 | return error; | 371 | return error; |
383 | cpu_maps_update_begin(); | 372 | cpu_maps_update_begin(); |
384 | first_cpu = cpumask_first(cpu_online_mask); | 373 | first_cpu = cpumask_first(cpu_online_mask); |
385 | /* We take down all of the non-boot CPUs in one shot to avoid races | 374 | /* |
375 | * We take down all of the non-boot CPUs in one shot to avoid races | ||
386 | * with the userspace trying to use the CPU hotplug at the same time | 376 | * with the userspace trying to use the CPU hotplug at the same time |
387 | */ | 377 | */ |
388 | cpumask_clear(frozen_cpus); | 378 | cpumask_clear(frozen_cpus); |
389 | 379 | ||
390 | for_each_online_cpu(cpu) { | ||
391 | if (cpu == first_cpu) | ||
392 | continue; | ||
393 | set_cpu_active(cpu, false); | ||
394 | } | ||
395 | |||
396 | synchronize_sched(); | ||
397 | |||
398 | printk("Disabling non-boot CPUs ...\n"); | 380 | printk("Disabling non-boot CPUs ...\n"); |
399 | for_each_online_cpu(cpu) { | 381 | for_each_online_cpu(cpu) { |
400 | if (cpu == first_cpu) | 382 | if (cpu == first_cpu) |
diff --git a/kernel/exit.c b/kernel/exit.c index 5962d7ccf243..546774a31a66 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -68,10 +68,10 @@ static void __unhash_process(struct task_struct *p) | |||
68 | detach_pid(p, PIDTYPE_SID); | 68 | detach_pid(p, PIDTYPE_SID); |
69 | 69 | ||
70 | list_del_rcu(&p->tasks); | 70 | list_del_rcu(&p->tasks); |
71 | list_del_init(&p->sibling); | ||
71 | __get_cpu_var(process_counts)--; | 72 | __get_cpu_var(process_counts)--; |
72 | } | 73 | } |
73 | list_del_rcu(&p->thread_group); | 74 | list_del_rcu(&p->thread_group); |
74 | list_del_init(&p->sibling); | ||
75 | } | 75 | } |
76 | 76 | ||
77 | /* | 77 | /* |
@@ -736,12 +736,9 @@ static struct task_struct *find_new_reaper(struct task_struct *father) | |||
736 | /* | 736 | /* |
737 | * Any that need to be release_task'd are put on the @dead list. | 737 | * Any that need to be release_task'd are put on the @dead list. |
738 | */ | 738 | */ |
739 | static void reparent_thread(struct task_struct *father, struct task_struct *p, | 739 | static void reparent_leader(struct task_struct *father, struct task_struct *p, |
740 | struct list_head *dead) | 740 | struct list_head *dead) |
741 | { | 741 | { |
742 | if (p->pdeath_signal) | ||
743 | group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p); | ||
744 | |||
745 | list_move_tail(&p->sibling, &p->real_parent->children); | 742 | list_move_tail(&p->sibling, &p->real_parent->children); |
746 | 743 | ||
747 | if (task_detached(p)) | 744 | if (task_detached(p)) |
@@ -780,12 +777,18 @@ static void forget_original_parent(struct task_struct *father) | |||
780 | reaper = find_new_reaper(father); | 777 | reaper = find_new_reaper(father); |
781 | 778 | ||
782 | list_for_each_entry_safe(p, n, &father->children, sibling) { | 779 | list_for_each_entry_safe(p, n, &father->children, sibling) { |
783 | p->real_parent = reaper; | 780 | struct task_struct *t = p; |
784 | if (p->parent == father) { | 781 | do { |
785 | BUG_ON(task_ptrace(p)); | 782 | t->real_parent = reaper; |
786 | p->parent = p->real_parent; | 783 | if (t->parent == father) { |
787 | } | 784 | BUG_ON(task_ptrace(t)); |
788 | reparent_thread(father, p, &dead_children); | 785 | t->parent = t->real_parent; |
786 | } | ||
787 | if (t->pdeath_signal) | ||
788 | group_send_sig_info(t->pdeath_signal, | ||
789 | SEND_SIG_NOINFO, t); | ||
790 | } while_each_thread(p, t); | ||
791 | reparent_leader(father, p, &dead_children); | ||
789 | } | 792 | } |
790 | write_unlock_irq(&tasklist_lock); | 793 | write_unlock_irq(&tasklist_lock); |
791 | 794 | ||
@@ -1551,14 +1554,9 @@ static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk) | |||
1551 | struct task_struct *p; | 1554 | struct task_struct *p; |
1552 | 1555 | ||
1553 | list_for_each_entry(p, &tsk->children, sibling) { | 1556 | list_for_each_entry(p, &tsk->children, sibling) { |
1554 | /* | 1557 | int ret = wait_consider_task(wo, 0, p); |
1555 | * Do not consider detached threads. | 1558 | if (ret) |
1556 | */ | 1559 | return ret; |
1557 | if (!task_detached(p)) { | ||
1558 | int ret = wait_consider_task(wo, 0, p); | ||
1559 | if (ret) | ||
1560 | return ret; | ||
1561 | } | ||
1562 | } | 1560 | } |
1563 | 1561 | ||
1564 | return 0; | 1562 | return 0; |
diff --git a/kernel/fork.c b/kernel/fork.c index 202a0ba63d3c..5b2959b3ffc2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1291,7 +1291,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1291 | } | 1291 | } |
1292 | 1292 | ||
1293 | if (likely(p->pid)) { | 1293 | if (likely(p->pid)) { |
1294 | list_add_tail(&p->sibling, &p->real_parent->children); | ||
1295 | tracehook_finish_clone(p, clone_flags, trace); | 1294 | tracehook_finish_clone(p, clone_flags, trace); |
1296 | 1295 | ||
1297 | if (thread_group_leader(p)) { | 1296 | if (thread_group_leader(p)) { |
@@ -1303,6 +1302,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1303 | p->signal->tty = tty_kref_get(current->signal->tty); | 1302 | p->signal->tty = tty_kref_get(current->signal->tty); |
1304 | attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); | 1303 | attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); |
1305 | attach_pid(p, PIDTYPE_SID, task_session(current)); | 1304 | attach_pid(p, PIDTYPE_SID, task_session(current)); |
1305 | list_add_tail(&p->sibling, &p->real_parent->children); | ||
1306 | list_add_tail_rcu(&p->tasks, &init_task.tasks); | 1306 | list_add_tail_rcu(&p->tasks, &init_task.tasks); |
1307 | __get_cpu_var(process_counts)++; | 1307 | __get_cpu_var(process_counts)++; |
1308 | } | 1308 | } |
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index dbcbf6a33a08..50dbd5999588 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/percpu.h> | 40 | #include <linux/percpu.h> |
41 | #include <linux/sched.h> | 41 | #include <linux/sched.h> |
42 | #include <linux/init.h> | 42 | #include <linux/init.h> |
43 | #include <linux/cpu.h> | ||
43 | #include <linux/smp.h> | 44 | #include <linux/smp.h> |
44 | 45 | ||
45 | #include <linux/hw_breakpoint.h> | 46 | #include <linux/hw_breakpoint.h> |
@@ -388,7 +389,8 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, | |||
388 | if (!cpu_events) | 389 | if (!cpu_events) |
389 | return ERR_PTR(-ENOMEM); | 390 | return ERR_PTR(-ENOMEM); |
390 | 391 | ||
391 | for_each_possible_cpu(cpu) { | 392 | get_online_cpus(); |
393 | for_each_online_cpu(cpu) { | ||
392 | pevent = per_cpu_ptr(cpu_events, cpu); | 394 | pevent = per_cpu_ptr(cpu_events, cpu); |
393 | bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered); | 395 | bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered); |
394 | 396 | ||
@@ -399,18 +401,20 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, | |||
399 | goto fail; | 401 | goto fail; |
400 | } | 402 | } |
401 | } | 403 | } |
404 | put_online_cpus(); | ||
402 | 405 | ||
403 | return cpu_events; | 406 | return cpu_events; |
404 | 407 | ||
405 | fail: | 408 | fail: |
406 | for_each_possible_cpu(cpu) { | 409 | for_each_online_cpu(cpu) { |
407 | pevent = per_cpu_ptr(cpu_events, cpu); | 410 | pevent = per_cpu_ptr(cpu_events, cpu); |
408 | if (IS_ERR(*pevent)) | 411 | if (IS_ERR(*pevent)) |
409 | break; | 412 | break; |
410 | unregister_hw_breakpoint(*pevent); | 413 | unregister_hw_breakpoint(*pevent); |
411 | } | 414 | } |
415 | put_online_cpus(); | ||
416 | |||
412 | free_percpu(cpu_events); | 417 | free_percpu(cpu_events); |
413 | /* return the error if any */ | ||
414 | return ERR_PTR(err); | 418 | return ERR_PTR(err); |
415 | } | 419 | } |
416 | EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); | 420 | EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); |
diff --git a/kernel/kexec.c b/kernel/kexec.c index 433e9fcc1fc5..a9a93d9ee7a7 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -21,7 +21,7 @@ | |||
21 | #include <linux/hardirq.h> | 21 | #include <linux/hardirq.h> |
22 | #include <linux/elf.h> | 22 | #include <linux/elf.h> |
23 | #include <linux/elfcore.h> | 23 | #include <linux/elfcore.h> |
24 | #include <linux/utsrelease.h> | 24 | #include <generated/utsrelease.h> |
25 | #include <linux/utsname.h> | 25 | #include <linux/utsname.h> |
26 | #include <linux/numa.h> | 26 | #include <linux/numa.h> |
27 | #include <linux/suspend.h> | 27 | #include <linux/suspend.h> |
diff --git a/kernel/kfifo.c b/kernel/kfifo.c index 3765ff3c1bbe..e92d519f93b1 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c | |||
@@ -1,6 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * A simple kernel FIFO implementation. | 2 | * A generic kernel FIFO implementation. |
3 | * | 3 | * |
4 | * Copyright (C) 2009 Stefani Seibold <stefani@seibold.net> | ||
4 | * Copyright (C) 2004 Stelian Pop <stelian@popies.net> | 5 | * Copyright (C) 2004 Stelian Pop <stelian@popies.net> |
5 | * | 6 | * |
6 | * This program is free software; you can redistribute it and/or modify | 7 | * This program is free software; you can redistribute it and/or modify |
@@ -25,50 +26,48 @@ | |||
25 | #include <linux/err.h> | 26 | #include <linux/err.h> |
26 | #include <linux/kfifo.h> | 27 | #include <linux/kfifo.h> |
27 | #include <linux/log2.h> | 28 | #include <linux/log2.h> |
29 | #include <linux/uaccess.h> | ||
30 | |||
31 | static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer, | ||
32 | unsigned int size) | ||
33 | { | ||
34 | fifo->buffer = buffer; | ||
35 | fifo->size = size; | ||
36 | |||
37 | kfifo_reset(fifo); | ||
38 | } | ||
28 | 39 | ||
29 | /** | 40 | /** |
30 | * kfifo_init - allocates a new FIFO using a preallocated buffer | 41 | * kfifo_init - initialize a FIFO using a preallocated buffer |
42 | * @fifo: the fifo to assign the buffer | ||
31 | * @buffer: the preallocated buffer to be used. | 43 | * @buffer: the preallocated buffer to be used. |
32 | * @size: the size of the internal buffer, this have to be a power of 2. | 44 | * @size: the size of the internal buffer, this have to be a power of 2. |
33 | * @gfp_mask: get_free_pages mask, passed to kmalloc() | ||
34 | * @lock: the lock to be used to protect the fifo buffer | ||
35 | * | 45 | * |
36 | * Do NOT pass the kfifo to kfifo_free() after use! Simply free the | ||
37 | * &struct kfifo with kfree(). | ||
38 | */ | 46 | */ |
39 | struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, | 47 | void kfifo_init(struct kfifo *fifo, unsigned char *buffer, unsigned int size) |
40 | gfp_t gfp_mask, spinlock_t *lock) | ||
41 | { | 48 | { |
42 | struct kfifo *fifo; | ||
43 | |||
44 | /* size must be a power of 2 */ | 49 | /* size must be a power of 2 */ |
45 | BUG_ON(!is_power_of_2(size)); | 50 | BUG_ON(!is_power_of_2(size)); |
46 | 51 | ||
47 | fifo = kmalloc(sizeof(struct kfifo), gfp_mask); | 52 | _kfifo_init(fifo, buffer, size); |
48 | if (!fifo) | ||
49 | return ERR_PTR(-ENOMEM); | ||
50 | |||
51 | fifo->buffer = buffer; | ||
52 | fifo->size = size; | ||
53 | fifo->in = fifo->out = 0; | ||
54 | fifo->lock = lock; | ||
55 | |||
56 | return fifo; | ||
57 | } | 53 | } |
58 | EXPORT_SYMBOL(kfifo_init); | 54 | EXPORT_SYMBOL(kfifo_init); |
59 | 55 | ||
60 | /** | 56 | /** |
61 | * kfifo_alloc - allocates a new FIFO and its internal buffer | 57 | * kfifo_alloc - allocates a new FIFO internal buffer |
62 | * @size: the size of the internal buffer to be allocated. | 58 | * @fifo: the fifo to assign then new buffer |
59 | * @size: the size of the buffer to be allocated, this have to be a power of 2. | ||
63 | * @gfp_mask: get_free_pages mask, passed to kmalloc() | 60 | * @gfp_mask: get_free_pages mask, passed to kmalloc() |
64 | * @lock: the lock to be used to protect the fifo buffer | 61 | * |
62 | * This function dynamically allocates a new fifo internal buffer | ||
65 | * | 63 | * |
66 | * The size will be rounded-up to a power of 2. | 64 | * The size will be rounded-up to a power of 2. |
65 | * The buffer will be release with kfifo_free(). | ||
66 | * Return 0 if no error, otherwise the an error code | ||
67 | */ | 67 | */ |
68 | struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock) | 68 | int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask) |
69 | { | 69 | { |
70 | unsigned char *buffer; | 70 | unsigned char *buffer; |
71 | struct kfifo *ret; | ||
72 | 71 | ||
73 | /* | 72 | /* |
74 | * round up to the next power of 2, since our 'let the indices | 73 | * round up to the next power of 2, since our 'let the indices |
@@ -80,48 +79,91 @@ struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock) | |||
80 | } | 79 | } |
81 | 80 | ||
82 | buffer = kmalloc(size, gfp_mask); | 81 | buffer = kmalloc(size, gfp_mask); |
83 | if (!buffer) | 82 | if (!buffer) { |
84 | return ERR_PTR(-ENOMEM); | 83 | _kfifo_init(fifo, 0, 0); |
85 | 84 | return -ENOMEM; | |
86 | ret = kfifo_init(buffer, size, gfp_mask, lock); | 85 | } |
87 | 86 | ||
88 | if (IS_ERR(ret)) | 87 | _kfifo_init(fifo, buffer, size); |
89 | kfree(buffer); | ||
90 | 88 | ||
91 | return ret; | 89 | return 0; |
92 | } | 90 | } |
93 | EXPORT_SYMBOL(kfifo_alloc); | 91 | EXPORT_SYMBOL(kfifo_alloc); |
94 | 92 | ||
95 | /** | 93 | /** |
96 | * kfifo_free - frees the FIFO | 94 | * kfifo_free - frees the FIFO internal buffer |
97 | * @fifo: the fifo to be freed. | 95 | * @fifo: the fifo to be freed. |
98 | */ | 96 | */ |
99 | void kfifo_free(struct kfifo *fifo) | 97 | void kfifo_free(struct kfifo *fifo) |
100 | { | 98 | { |
101 | kfree(fifo->buffer); | 99 | kfree(fifo->buffer); |
102 | kfree(fifo); | ||
103 | } | 100 | } |
104 | EXPORT_SYMBOL(kfifo_free); | 101 | EXPORT_SYMBOL(kfifo_free); |
105 | 102 | ||
106 | /** | 103 | /** |
107 | * __kfifo_put - puts some data into the FIFO, no locking version | 104 | * kfifo_skip - skip output data |
108 | * @fifo: the fifo to be used. | 105 | * @fifo: the fifo to be used. |
109 | * @buffer: the data to be added. | 106 | * @len: number of bytes to skip |
110 | * @len: the length of the data to be added. | ||
111 | * | ||
112 | * This function copies at most @len bytes from the @buffer into | ||
113 | * the FIFO depending on the free space, and returns the number of | ||
114 | * bytes copied. | ||
115 | * | ||
116 | * Note that with only one concurrent reader and one concurrent | ||
117 | * writer, you don't need extra locking to use these functions. | ||
118 | */ | 107 | */ |
119 | unsigned int __kfifo_put(struct kfifo *fifo, | 108 | void kfifo_skip(struct kfifo *fifo, unsigned int len) |
120 | const unsigned char *buffer, unsigned int len) | 109 | { |
110 | if (len < kfifo_len(fifo)) { | ||
111 | __kfifo_add_out(fifo, len); | ||
112 | return; | ||
113 | } | ||
114 | kfifo_reset_out(fifo); | ||
115 | } | ||
116 | EXPORT_SYMBOL(kfifo_skip); | ||
117 | |||
118 | static inline void __kfifo_in_data(struct kfifo *fifo, | ||
119 | const void *from, unsigned int len, unsigned int off) | ||
121 | { | 120 | { |
122 | unsigned int l; | 121 | unsigned int l; |
123 | 122 | ||
124 | len = min(len, fifo->size - fifo->in + fifo->out); | 123 | /* |
124 | * Ensure that we sample the fifo->out index -before- we | ||
125 | * start putting bytes into the kfifo. | ||
126 | */ | ||
127 | |||
128 | smp_mb(); | ||
129 | |||
130 | off = __kfifo_off(fifo, fifo->in + off); | ||
131 | |||
132 | /* first put the data starting from fifo->in to buffer end */ | ||
133 | l = min(len, fifo->size - off); | ||
134 | memcpy(fifo->buffer + off, from, l); | ||
135 | |||
136 | /* then put the rest (if any) at the beginning of the buffer */ | ||
137 | memcpy(fifo->buffer, from + l, len - l); | ||
138 | } | ||
139 | |||
140 | static inline void __kfifo_out_data(struct kfifo *fifo, | ||
141 | void *to, unsigned int len, unsigned int off) | ||
142 | { | ||
143 | unsigned int l; | ||
144 | |||
145 | /* | ||
146 | * Ensure that we sample the fifo->in index -before- we | ||
147 | * start removing bytes from the kfifo. | ||
148 | */ | ||
149 | |||
150 | smp_rmb(); | ||
151 | |||
152 | off = __kfifo_off(fifo, fifo->out + off); | ||
153 | |||
154 | /* first get the data from fifo->out until the end of the buffer */ | ||
155 | l = min(len, fifo->size - off); | ||
156 | memcpy(to, fifo->buffer + off, l); | ||
157 | |||
158 | /* then get the rest (if any) from the beginning of the buffer */ | ||
159 | memcpy(to + l, fifo->buffer, len - l); | ||
160 | } | ||
161 | |||
162 | static inline unsigned int __kfifo_from_user_data(struct kfifo *fifo, | ||
163 | const void __user *from, unsigned int len, unsigned int off) | ||
164 | { | ||
165 | unsigned int l; | ||
166 | int ret; | ||
125 | 167 | ||
126 | /* | 168 | /* |
127 | * Ensure that we sample the fifo->out index -before- we | 169 | * Ensure that we sample the fifo->out index -before- we |
@@ -130,68 +172,229 @@ unsigned int __kfifo_put(struct kfifo *fifo, | |||
130 | 172 | ||
131 | smp_mb(); | 173 | smp_mb(); |
132 | 174 | ||
175 | off = __kfifo_off(fifo, fifo->in + off); | ||
176 | |||
133 | /* first put the data starting from fifo->in to buffer end */ | 177 | /* first put the data starting from fifo->in to buffer end */ |
134 | l = min(len, fifo->size - (fifo->in & (fifo->size - 1))); | 178 | l = min(len, fifo->size - off); |
135 | memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), buffer, l); | 179 | ret = copy_from_user(fifo->buffer + off, from, l); |
180 | |||
181 | if (unlikely(ret)) | ||
182 | return ret + len - l; | ||
136 | 183 | ||
137 | /* then put the rest (if any) at the beginning of the buffer */ | 184 | /* then put the rest (if any) at the beginning of the buffer */ |
138 | memcpy(fifo->buffer, buffer + l, len - l); | 185 | return copy_from_user(fifo->buffer, from + l, len - l); |
186 | } | ||
187 | |||
188 | static inline unsigned int __kfifo_to_user_data(struct kfifo *fifo, | ||
189 | void __user *to, unsigned int len, unsigned int off) | ||
190 | { | ||
191 | unsigned int l; | ||
192 | int ret; | ||
139 | 193 | ||
140 | /* | 194 | /* |
141 | * Ensure that we add the bytes to the kfifo -before- | 195 | * Ensure that we sample the fifo->in index -before- we |
142 | * we update the fifo->in index. | 196 | * start removing bytes from the kfifo. |
143 | */ | 197 | */ |
144 | 198 | ||
145 | smp_wmb(); | 199 | smp_rmb(); |
200 | |||
201 | off = __kfifo_off(fifo, fifo->out + off); | ||
202 | |||
203 | /* first get the data from fifo->out until the end of the buffer */ | ||
204 | l = min(len, fifo->size - off); | ||
205 | ret = copy_to_user(to, fifo->buffer + off, l); | ||
206 | |||
207 | if (unlikely(ret)) | ||
208 | return ret + len - l; | ||
209 | |||
210 | /* then get the rest (if any) from the beginning of the buffer */ | ||
211 | return copy_to_user(to + l, fifo->buffer, len - l); | ||
212 | } | ||
213 | |||
214 | unsigned int __kfifo_in_n(struct kfifo *fifo, | ||
215 | const void *from, unsigned int len, unsigned int recsize) | ||
216 | { | ||
217 | if (kfifo_avail(fifo) < len + recsize) | ||
218 | return len + 1; | ||
219 | |||
220 | __kfifo_in_data(fifo, from, len, recsize); | ||
221 | return 0; | ||
222 | } | ||
223 | EXPORT_SYMBOL(__kfifo_in_n); | ||
146 | 224 | ||
147 | fifo->in += len; | 225 | /** |
226 | * kfifo_in - puts some data into the FIFO | ||
227 | * @fifo: the fifo to be used. | ||
228 | * @from: the data to be added. | ||
229 | * @len: the length of the data to be added. | ||
230 | * | ||
231 | * This function copies at most @len bytes from the @from buffer into | ||
232 | * the FIFO depending on the free space, and returns the number of | ||
233 | * bytes copied. | ||
234 | * | ||
235 | * Note that with only one concurrent reader and one concurrent | ||
236 | * writer, you don't need extra locking to use these functions. | ||
237 | */ | ||
238 | unsigned int kfifo_in(struct kfifo *fifo, const unsigned char *from, | ||
239 | unsigned int len) | ||
240 | { | ||
241 | len = min(kfifo_avail(fifo), len); | ||
148 | 242 | ||
243 | __kfifo_in_data(fifo, from, len, 0); | ||
244 | __kfifo_add_in(fifo, len); | ||
149 | return len; | 245 | return len; |
150 | } | 246 | } |
151 | EXPORT_SYMBOL(__kfifo_put); | 247 | EXPORT_SYMBOL(kfifo_in); |
248 | |||
249 | unsigned int __kfifo_in_generic(struct kfifo *fifo, | ||
250 | const void *from, unsigned int len, unsigned int recsize) | ||
251 | { | ||
252 | return __kfifo_in_rec(fifo, from, len, recsize); | ||
253 | } | ||
254 | EXPORT_SYMBOL(__kfifo_in_generic); | ||
255 | |||
256 | unsigned int __kfifo_out_n(struct kfifo *fifo, | ||
257 | void *to, unsigned int len, unsigned int recsize) | ||
258 | { | ||
259 | if (kfifo_len(fifo) < len + recsize) | ||
260 | return len; | ||
261 | |||
262 | __kfifo_out_data(fifo, to, len, recsize); | ||
263 | __kfifo_add_out(fifo, len + recsize); | ||
264 | return 0; | ||
265 | } | ||
266 | EXPORT_SYMBOL(__kfifo_out_n); | ||
152 | 267 | ||
153 | /** | 268 | /** |
154 | * __kfifo_get - gets some data from the FIFO, no locking version | 269 | * kfifo_out - gets some data from the FIFO |
155 | * @fifo: the fifo to be used. | 270 | * @fifo: the fifo to be used. |
156 | * @buffer: where the data must be copied. | 271 | * @to: where the data must be copied. |
157 | * @len: the size of the destination buffer. | 272 | * @len: the size of the destination buffer. |
158 | * | 273 | * |
159 | * This function copies at most @len bytes from the FIFO into the | 274 | * This function copies at most @len bytes from the FIFO into the |
160 | * @buffer and returns the number of copied bytes. | 275 | * @to buffer and returns the number of copied bytes. |
161 | * | 276 | * |
162 | * Note that with only one concurrent reader and one concurrent | 277 | * Note that with only one concurrent reader and one concurrent |
163 | * writer, you don't need extra locking to use these functions. | 278 | * writer, you don't need extra locking to use these functions. |
164 | */ | 279 | */ |
165 | unsigned int __kfifo_get(struct kfifo *fifo, | 280 | unsigned int kfifo_out(struct kfifo *fifo, unsigned char *to, unsigned int len) |
166 | unsigned char *buffer, unsigned int len) | ||
167 | { | 281 | { |
168 | unsigned int l; | 282 | len = min(kfifo_len(fifo), len); |
169 | 283 | ||
170 | len = min(len, fifo->in - fifo->out); | 284 | __kfifo_out_data(fifo, to, len, 0); |
285 | __kfifo_add_out(fifo, len); | ||
171 | 286 | ||
172 | /* | 287 | return len; |
173 | * Ensure that we sample the fifo->in index -before- we | 288 | } |
174 | * start removing bytes from the kfifo. | 289 | EXPORT_SYMBOL(kfifo_out); |
175 | */ | ||
176 | 290 | ||
177 | smp_rmb(); | 291 | unsigned int __kfifo_out_generic(struct kfifo *fifo, |
292 | void *to, unsigned int len, unsigned int recsize, | ||
293 | unsigned int *total) | ||
294 | { | ||
295 | return __kfifo_out_rec(fifo, to, len, recsize, total); | ||
296 | } | ||
297 | EXPORT_SYMBOL(__kfifo_out_generic); | ||
178 | 298 | ||
179 | /* first get the data from fifo->out until the end of the buffer */ | 299 | unsigned int __kfifo_from_user_n(struct kfifo *fifo, |
180 | l = min(len, fifo->size - (fifo->out & (fifo->size - 1))); | 300 | const void __user *from, unsigned int len, unsigned int recsize) |
181 | memcpy(buffer, fifo->buffer + (fifo->out & (fifo->size - 1)), l); | 301 | { |
302 | if (kfifo_avail(fifo) < len + recsize) | ||
303 | return len + 1; | ||
182 | 304 | ||
183 | /* then get the rest (if any) from the beginning of the buffer */ | 305 | return __kfifo_from_user_data(fifo, from, len, recsize); |
184 | memcpy(buffer + l, fifo->buffer, len - l); | 306 | } |
307 | EXPORT_SYMBOL(__kfifo_from_user_n); | ||
185 | 308 | ||
186 | /* | 309 | /** |
187 | * Ensure that we remove the bytes from the kfifo -before- | 310 | * kfifo_from_user - puts some data from user space into the FIFO |
188 | * we update the fifo->out index. | 311 | * @fifo: the fifo to be used. |
189 | */ | 312 | * @from: pointer to the data to be added. |
313 | * @len: the length of the data to be added. | ||
314 | * | ||
315 | * This function copies at most @len bytes from the @from into the | ||
316 | * FIFO depending and returns the number of copied bytes. | ||
317 | * | ||
318 | * Note that with only one concurrent reader and one concurrent | ||
319 | * writer, you don't need extra locking to use these functions. | ||
320 | */ | ||
321 | unsigned int kfifo_from_user(struct kfifo *fifo, | ||
322 | const void __user *from, unsigned int len) | ||
323 | { | ||
324 | len = min(kfifo_avail(fifo), len); | ||
325 | len -= __kfifo_from_user_data(fifo, from, len, 0); | ||
326 | __kfifo_add_in(fifo, len); | ||
327 | return len; | ||
328 | } | ||
329 | EXPORT_SYMBOL(kfifo_from_user); | ||
190 | 330 | ||
191 | smp_mb(); | 331 | unsigned int __kfifo_from_user_generic(struct kfifo *fifo, |
332 | const void __user *from, unsigned int len, unsigned int recsize) | ||
333 | { | ||
334 | return __kfifo_from_user_rec(fifo, from, len, recsize); | ||
335 | } | ||
336 | EXPORT_SYMBOL(__kfifo_from_user_generic); | ||
192 | 337 | ||
193 | fifo->out += len; | 338 | unsigned int __kfifo_to_user_n(struct kfifo *fifo, |
339 | void __user *to, unsigned int len, unsigned int reclen, | ||
340 | unsigned int recsize) | ||
341 | { | ||
342 | unsigned int ret; | ||
343 | |||
344 | if (kfifo_len(fifo) < reclen + recsize) | ||
345 | return len; | ||
346 | |||
347 | ret = __kfifo_to_user_data(fifo, to, reclen, recsize); | ||
194 | 348 | ||
349 | if (likely(ret == 0)) | ||
350 | __kfifo_add_out(fifo, reclen + recsize); | ||
351 | |||
352 | return ret; | ||
353 | } | ||
354 | EXPORT_SYMBOL(__kfifo_to_user_n); | ||
355 | |||
356 | /** | ||
357 | * kfifo_to_user - gets data from the FIFO and write it to user space | ||
358 | * @fifo: the fifo to be used. | ||
359 | * @to: where the data must be copied. | ||
360 | * @len: the size of the destination buffer. | ||
361 | * | ||
362 | * This function copies at most @len bytes from the FIFO into the | ||
363 | * @to buffer and returns the number of copied bytes. | ||
364 | * | ||
365 | * Note that with only one concurrent reader and one concurrent | ||
366 | * writer, you don't need extra locking to use these functions. | ||
367 | */ | ||
368 | unsigned int kfifo_to_user(struct kfifo *fifo, | ||
369 | void __user *to, unsigned int len) | ||
370 | { | ||
371 | len = min(kfifo_len(fifo), len); | ||
372 | len -= __kfifo_to_user_data(fifo, to, len, 0); | ||
373 | __kfifo_add_out(fifo, len); | ||
195 | return len; | 374 | return len; |
196 | } | 375 | } |
197 | EXPORT_SYMBOL(__kfifo_get); | 376 | EXPORT_SYMBOL(kfifo_to_user); |
377 | |||
378 | unsigned int __kfifo_to_user_generic(struct kfifo *fifo, | ||
379 | void __user *to, unsigned int len, unsigned int recsize, | ||
380 | unsigned int *total) | ||
381 | { | ||
382 | return __kfifo_to_user_rec(fifo, to, len, recsize, total); | ||
383 | } | ||
384 | EXPORT_SYMBOL(__kfifo_to_user_generic); | ||
385 | |||
386 | unsigned int __kfifo_peek_generic(struct kfifo *fifo, unsigned int recsize) | ||
387 | { | ||
388 | if (recsize == 0) | ||
389 | return kfifo_avail(fifo); | ||
390 | |||
391 | return __kfifo_peek_n(fifo, recsize); | ||
392 | } | ||
393 | EXPORT_SYMBOL(__kfifo_peek_generic); | ||
394 | |||
395 | void __kfifo_skip_generic(struct kfifo *fifo, unsigned int recsize) | ||
396 | { | ||
397 | __kfifo_skip_rec(fifo, recsize); | ||
398 | } | ||
399 | EXPORT_SYMBOL(__kfifo_skip_generic); | ||
400 | |||
diff --git a/kernel/kmod.c b/kernel/kmod.c index 25b103190364..bf0e231d9702 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
@@ -520,13 +520,15 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp, | |||
520 | return -ENOMEM; | 520 | return -ENOMEM; |
521 | 521 | ||
522 | ret = call_usermodehelper_stdinpipe(sub_info, filp); | 522 | ret = call_usermodehelper_stdinpipe(sub_info, filp); |
523 | if (ret < 0) | 523 | if (ret < 0) { |
524 | goto out; | 524 | call_usermodehelper_freeinfo(sub_info); |
525 | return ret; | ||
526 | } | ||
525 | 527 | ||
526 | return call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); | 528 | ret = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); |
529 | if (ret < 0) /* Failed to execute helper, close pipe */ | ||
530 | filp_close(*filp, NULL); | ||
527 | 531 | ||
528 | out: | ||
529 | call_usermodehelper_freeinfo(sub_info); | ||
530 | return ret; | 532 | return ret; |
531 | } | 533 | } |
532 | EXPORT_SYMBOL(call_usermodehelper_pipe); | 534 | EXPORT_SYMBOL(call_usermodehelper_pipe); |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index e5342a344c43..b7df302a0204 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -1035,7 +1035,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp) | |||
1035 | /* Pre-allocate memory for max kretprobe instances */ | 1035 | /* Pre-allocate memory for max kretprobe instances */ |
1036 | if (rp->maxactive <= 0) { | 1036 | if (rp->maxactive <= 0) { |
1037 | #ifdef CONFIG_PREEMPT | 1037 | #ifdef CONFIG_PREEMPT |
1038 | rp->maxactive = max(10, 2 * num_possible_cpus()); | 1038 | rp->maxactive = max_t(unsigned int, 10, 2*num_possible_cpus()); |
1039 | #else | 1039 | #else |
1040 | rp->maxactive = num_possible_cpus(); | 1040 | rp->maxactive = num_possible_cpus(); |
1041 | #endif | 1041 | #endif |
diff --git a/kernel/kthread.c b/kernel/kthread.c index ab7ae57773e1..fbb6222fe7e0 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -150,6 +150,29 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), | |||
150 | EXPORT_SYMBOL(kthread_create); | 150 | EXPORT_SYMBOL(kthread_create); |
151 | 151 | ||
152 | /** | 152 | /** |
153 | * kthread_bind - bind a just-created kthread to a cpu. | ||
154 | * @p: thread created by kthread_create(). | ||
155 | * @cpu: cpu (might not be online, must be possible) for @k to run on. | ||
156 | * | ||
157 | * Description: This function is equivalent to set_cpus_allowed(), | ||
158 | * except that @cpu doesn't need to be online, and the thread must be | ||
159 | * stopped (i.e., just returned from kthread_create()). | ||
160 | */ | ||
161 | void kthread_bind(struct task_struct *p, unsigned int cpu) | ||
162 | { | ||
163 | /* Must have done schedule() in kthread() before we set_task_cpu */ | ||
164 | if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) { | ||
165 | WARN_ON(1); | ||
166 | return; | ||
167 | } | ||
168 | |||
169 | p->cpus_allowed = cpumask_of_cpu(cpu); | ||
170 | p->rt.nr_cpus_allowed = 1; | ||
171 | p->flags |= PF_THREAD_BOUND; | ||
172 | } | ||
173 | EXPORT_SYMBOL(kthread_bind); | ||
174 | |||
175 | /** | ||
153 | * kthread_stop - stop a thread created by kthread_create(). | 176 | * kthread_stop - stop a thread created by kthread_create(). |
154 | * @k: thread created by kthread_create(). | 177 | * @k: thread created by kthread_create(). |
155 | * | 178 | * |
diff --git a/kernel/module.c b/kernel/module.c index a65dc787a27b..f82386bd9ee9 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -1010,6 +1010,12 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, | |||
1010 | * J. Corbet <corbet@lwn.net> | 1010 | * J. Corbet <corbet@lwn.net> |
1011 | */ | 1011 | */ |
1012 | #if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS) | 1012 | #if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS) |
1013 | |||
1014 | static inline bool sect_empty(const Elf_Shdr *sect) | ||
1015 | { | ||
1016 | return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0; | ||
1017 | } | ||
1018 | |||
1013 | struct module_sect_attr | 1019 | struct module_sect_attr |
1014 | { | 1020 | { |
1015 | struct module_attribute mattr; | 1021 | struct module_attribute mattr; |
@@ -1051,8 +1057,7 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, | |||
1051 | 1057 | ||
1052 | /* Count loaded sections and allocate structures */ | 1058 | /* Count loaded sections and allocate structures */ |
1053 | for (i = 0; i < nsect; i++) | 1059 | for (i = 0; i < nsect; i++) |
1054 | if (sechdrs[i].sh_flags & SHF_ALLOC | 1060 | if (!sect_empty(&sechdrs[i])) |
1055 | && sechdrs[i].sh_size) | ||
1056 | nloaded++; | 1061 | nloaded++; |
1057 | size[0] = ALIGN(sizeof(*sect_attrs) | 1062 | size[0] = ALIGN(sizeof(*sect_attrs) |
1058 | + nloaded * sizeof(sect_attrs->attrs[0]), | 1063 | + nloaded * sizeof(sect_attrs->attrs[0]), |
@@ -1070,9 +1075,7 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, | |||
1070 | sattr = §_attrs->attrs[0]; | 1075 | sattr = §_attrs->attrs[0]; |
1071 | gattr = §_attrs->grp.attrs[0]; | 1076 | gattr = §_attrs->grp.attrs[0]; |
1072 | for (i = 0; i < nsect; i++) { | 1077 | for (i = 0; i < nsect; i++) { |
1073 | if (! (sechdrs[i].sh_flags & SHF_ALLOC)) | 1078 | if (sect_empty(&sechdrs[i])) |
1074 | continue; | ||
1075 | if (!sechdrs[i].sh_size) | ||
1076 | continue; | 1079 | continue; |
1077 | sattr->address = sechdrs[i].sh_addr; | 1080 | sattr->address = sechdrs[i].sh_addr; |
1078 | sattr->name = kstrdup(secstrings + sechdrs[i].sh_name, | 1081 | sattr->name = kstrdup(secstrings + sechdrs[i].sh_name, |
@@ -1156,7 +1159,7 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect, | |||
1156 | /* Count notes sections and allocate structures. */ | 1159 | /* Count notes sections and allocate structures. */ |
1157 | notes = 0; | 1160 | notes = 0; |
1158 | for (i = 0; i < nsect; i++) | 1161 | for (i = 0; i < nsect; i++) |
1159 | if ((sechdrs[i].sh_flags & SHF_ALLOC) && | 1162 | if (!sect_empty(&sechdrs[i]) && |
1160 | (sechdrs[i].sh_type == SHT_NOTE)) | 1163 | (sechdrs[i].sh_type == SHT_NOTE)) |
1161 | ++notes; | 1164 | ++notes; |
1162 | 1165 | ||
@@ -1172,7 +1175,7 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect, | |||
1172 | notes_attrs->notes = notes; | 1175 | notes_attrs->notes = notes; |
1173 | nattr = ¬es_attrs->attrs[0]; | 1176 | nattr = ¬es_attrs->attrs[0]; |
1174 | for (loaded = i = 0; i < nsect; ++i) { | 1177 | for (loaded = i = 0; i < nsect; ++i) { |
1175 | if (!(sechdrs[i].sh_flags & SHF_ALLOC)) | 1178 | if (sect_empty(&sechdrs[i])) |
1176 | continue; | 1179 | continue; |
1177 | if (sechdrs[i].sh_type == SHT_NOTE) { | 1180 | if (sechdrs[i].sh_type == SHT_NOTE) { |
1178 | nattr->attr.name = mod->sect_attrs->attrs[loaded].name; | 1181 | nattr->attr.name = mod->sect_attrs->attrs[loaded].name; |
@@ -1910,9 +1913,7 @@ static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr, | |||
1910 | unsigned int i; | 1913 | unsigned int i; |
1911 | 1914 | ||
1912 | /* only scan the sections containing data */ | 1915 | /* only scan the sections containing data */ |
1913 | kmemleak_scan_area(mod->module_core, (unsigned long)mod - | 1916 | kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL); |
1914 | (unsigned long)mod->module_core, | ||
1915 | sizeof(struct module), GFP_KERNEL); | ||
1916 | 1917 | ||
1917 | for (i = 1; i < hdr->e_shnum; i++) { | 1918 | for (i = 1; i < hdr->e_shnum; i++) { |
1918 | if (!(sechdrs[i].sh_flags & SHF_ALLOC)) | 1919 | if (!(sechdrs[i].sh_flags & SHF_ALLOC)) |
@@ -1921,8 +1922,7 @@ static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr, | |||
1921 | && strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) != 0) | 1922 | && strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) != 0) |
1922 | continue; | 1923 | continue; |
1923 | 1924 | ||
1924 | kmemleak_scan_area(mod->module_core, sechdrs[i].sh_addr - | 1925 | kmemleak_scan_area((void *)sechdrs[i].sh_addr, |
1925 | (unsigned long)mod->module_core, | ||
1926 | sechdrs[i].sh_size, GFP_KERNEL); | 1926 | sechdrs[i].sh_size, GFP_KERNEL); |
1927 | } | 1927 | } |
1928 | } | 1928 | } |
@@ -2250,6 +2250,12 @@ static noinline struct module *load_module(void __user *umod, | |||
2250 | "_ftrace_events", | 2250 | "_ftrace_events", |
2251 | sizeof(*mod->trace_events), | 2251 | sizeof(*mod->trace_events), |
2252 | &mod->num_trace_events); | 2252 | &mod->num_trace_events); |
2253 | /* | ||
2254 | * This section contains pointers to allocated objects in the trace | ||
2255 | * code and not scanning it leads to false positives. | ||
2256 | */ | ||
2257 | kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) * | ||
2258 | mod->num_trace_events, GFP_KERNEL); | ||
2253 | #endif | 2259 | #endif |
2254 | #ifdef CONFIG_FTRACE_MCOUNT_RECORD | 2260 | #ifdef CONFIG_FTRACE_MCOUNT_RECORD |
2255 | /* sechdrs[0].sh_size is always zero */ | 2261 | /* sechdrs[0].sh_size is always zero */ |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 5b987b4a98a8..27f69a04541d 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -1618,7 +1618,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) | |||
1618 | * offline CPU and activate it when the CPU comes up, but | 1618 | * offline CPU and activate it when the CPU comes up, but |
1619 | * that's for later. | 1619 | * that's for later. |
1620 | */ | 1620 | */ |
1621 | if (!cpu_isset(cpu, cpu_online_map)) | 1621 | if (!cpu_online(cpu)) |
1622 | return ERR_PTR(-ENODEV); | 1622 | return ERR_PTR(-ENODEV); |
1623 | 1623 | ||
1624 | cpuctx = &per_cpu(perf_cpu_context, cpu); | 1624 | cpuctx = &per_cpu(perf_cpu_context, cpu); |
@@ -4725,7 +4725,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
4725 | if (IS_ERR(event)) | 4725 | if (IS_ERR(event)) |
4726 | goto err_put_context; | 4726 | goto err_put_context; |
4727 | 4727 | ||
4728 | err = anon_inode_getfd("[perf_event]", &perf_fops, event, 0); | 4728 | err = anon_inode_getfd("[perf_event]", &perf_fops, event, O_RDWR); |
4729 | if (err < 0) | 4729 | if (err < 0) |
4730 | goto err_free_put_context; | 4730 | goto err_free_put_context; |
4731 | 4731 | ||
@@ -5149,7 +5149,7 @@ int perf_event_init_task(struct task_struct *child) | |||
5149 | GFP_KERNEL); | 5149 | GFP_KERNEL); |
5150 | if (!child_ctx) { | 5150 | if (!child_ctx) { |
5151 | ret = -ENOMEM; | 5151 | ret = -ENOMEM; |
5152 | goto exit; | 5152 | break; |
5153 | } | 5153 | } |
5154 | 5154 | ||
5155 | __perf_event_init_context(child_ctx, child); | 5155 | __perf_event_init_context(child_ctx, child); |
@@ -5165,7 +5165,7 @@ int perf_event_init_task(struct task_struct *child) | |||
5165 | } | 5165 | } |
5166 | } | 5166 | } |
5167 | 5167 | ||
5168 | if (inherited_all) { | 5168 | if (child_ctx && inherited_all) { |
5169 | /* | 5169 | /* |
5170 | * Mark the child context as a clone of the parent | 5170 | * Mark the child context as a clone of the parent |
5171 | * context, or of whatever the parent is a clone of. | 5171 | * context, or of whatever the parent is a clone of. |
@@ -5185,7 +5185,6 @@ int perf_event_init_task(struct task_struct *child) | |||
5185 | get_ctx(child_ctx->parent_ctx); | 5185 | get_ctx(child_ctx->parent_ctx); |
5186 | } | 5186 | } |
5187 | 5187 | ||
5188 | exit: | ||
5189 | mutex_unlock(&parent_ctx->mutex); | 5188 | mutex_unlock(&parent_ctx->mutex); |
5190 | 5189 | ||
5191 | perf_unpin_context(parent_ctx); | 5190 | perf_unpin_context(parent_ctx); |
diff --git a/kernel/printk.c b/kernel/printk.c index 1ded8e7dd19b..17463ca2e229 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -1412,7 +1412,7 @@ static LIST_HEAD(dump_list); | |||
1412 | 1412 | ||
1413 | /** | 1413 | /** |
1414 | * kmsg_dump_register - register a kernel log dumper. | 1414 | * kmsg_dump_register - register a kernel log dumper. |
1415 | * @dump: pointer to the kmsg_dumper structure | 1415 | * @dumper: pointer to the kmsg_dumper structure |
1416 | * | 1416 | * |
1417 | * Adds a kernel log dumper to the system. The dump callback in the | 1417 | * Adds a kernel log dumper to the system. The dump callback in the |
1418 | * structure will be called when the kernel oopses or panics and must be | 1418 | * structure will be called when the kernel oopses or panics and must be |
@@ -1442,7 +1442,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_register); | |||
1442 | 1442 | ||
1443 | /** | 1443 | /** |
1444 | * kmsg_dump_unregister - unregister a kmsg dumper. | 1444 | * kmsg_dump_unregister - unregister a kmsg dumper. |
1445 | * @dump: pointer to the kmsg_dumper structure | 1445 | * @dumper: pointer to the kmsg_dumper structure |
1446 | * | 1446 | * |
1447 | * Removes a dump device from the system. Returns zero on success and | 1447 | * Removes a dump device from the system. Returns zero on success and |
1448 | * %-EINVAL otherwise. | 1448 | * %-EINVAL otherwise. |
diff --git a/kernel/resource.c b/kernel/resource.c index dc15686b7a77..af96c1e4b54b 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -308,37 +308,37 @@ static int find_resource(struct resource *root, struct resource *new, | |||
308 | void *alignf_data) | 308 | void *alignf_data) |
309 | { | 309 | { |
310 | struct resource *this = root->child; | 310 | struct resource *this = root->child; |
311 | resource_size_t start, end; | 311 | struct resource tmp = *new; |
312 | 312 | ||
313 | start = root->start; | 313 | tmp.start = root->start; |
314 | /* | 314 | /* |
315 | * Skip past an allocated resource that starts at 0, since the assignment | 315 | * Skip past an allocated resource that starts at 0, since the assignment |
316 | * of this->start - 1 to new->end below would cause an underflow. | 316 | * of this->start - 1 to tmp->end below would cause an underflow. |
317 | */ | 317 | */ |
318 | if (this && this->start == 0) { | 318 | if (this && this->start == 0) { |
319 | start = this->end + 1; | 319 | tmp.start = this->end + 1; |
320 | this = this->sibling; | 320 | this = this->sibling; |
321 | } | 321 | } |
322 | for(;;) { | 322 | for(;;) { |
323 | if (this) | 323 | if (this) |
324 | end = this->start - 1; | 324 | tmp.end = this->start - 1; |
325 | else | 325 | else |
326 | end = root->end; | 326 | tmp.end = root->end; |
327 | if (start < min) | 327 | if (tmp.start < min) |
328 | start = min; | 328 | tmp.start = min; |
329 | if (end > max) | 329 | if (tmp.end > max) |
330 | end = max; | 330 | tmp.end = max; |
331 | start = ALIGN(start, align); | 331 | tmp.start = ALIGN(tmp.start, align); |
332 | if (alignf) | 332 | if (alignf) |
333 | alignf(alignf_data, new, size, align); | 333 | alignf(alignf_data, &tmp, size, align); |
334 | if (start < end && end - start >= size - 1) { | 334 | if (tmp.start < tmp.end && tmp.end - tmp.start >= size - 1) { |
335 | new->start = start; | 335 | new->start = tmp.start; |
336 | new->end = start + size - 1; | 336 | new->end = tmp.start + size - 1; |
337 | return 0; | 337 | return 0; |
338 | } | 338 | } |
339 | if (!this) | 339 | if (!this) |
340 | break; | 340 | break; |
341 | start = this->end + 1; | 341 | tmp.start = this->end + 1; |
342 | this = this->sibling; | 342 | this = this->sibling; |
343 | } | 343 | } |
344 | return -EBUSY; | 344 | return -EBUSY; |
diff --git a/kernel/sched.c b/kernel/sched.c index d6527ac0f6e7..e507af086b42 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -2002,39 +2002,6 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
2002 | p->sched_class->prio_changed(rq, p, oldprio, running); | 2002 | p->sched_class->prio_changed(rq, p, oldprio, running); |
2003 | } | 2003 | } |
2004 | 2004 | ||
2005 | /** | ||
2006 | * kthread_bind - bind a just-created kthread to a cpu. | ||
2007 | * @p: thread created by kthread_create(). | ||
2008 | * @cpu: cpu (might not be online, must be possible) for @k to run on. | ||
2009 | * | ||
2010 | * Description: This function is equivalent to set_cpus_allowed(), | ||
2011 | * except that @cpu doesn't need to be online, and the thread must be | ||
2012 | * stopped (i.e., just returned from kthread_create()). | ||
2013 | * | ||
2014 | * Function lives here instead of kthread.c because it messes with | ||
2015 | * scheduler internals which require locking. | ||
2016 | */ | ||
2017 | void kthread_bind(struct task_struct *p, unsigned int cpu) | ||
2018 | { | ||
2019 | struct rq *rq = cpu_rq(cpu); | ||
2020 | unsigned long flags; | ||
2021 | |||
2022 | /* Must have done schedule() in kthread() before we set_task_cpu */ | ||
2023 | if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) { | ||
2024 | WARN_ON(1); | ||
2025 | return; | ||
2026 | } | ||
2027 | |||
2028 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
2029 | update_rq_clock(rq); | ||
2030 | set_task_cpu(p, cpu); | ||
2031 | p->cpus_allowed = cpumask_of_cpu(cpu); | ||
2032 | p->rt.nr_cpus_allowed = 1; | ||
2033 | p->flags |= PF_THREAD_BOUND; | ||
2034 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
2035 | } | ||
2036 | EXPORT_SYMBOL(kthread_bind); | ||
2037 | |||
2038 | #ifdef CONFIG_SMP | 2005 | #ifdef CONFIG_SMP |
2039 | /* | 2006 | /* |
2040 | * Is this task likely cache-hot: | 2007 | * Is this task likely cache-hot: |
@@ -2044,6 +2011,9 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
2044 | { | 2011 | { |
2045 | s64 delta; | 2012 | s64 delta; |
2046 | 2013 | ||
2014 | if (p->sched_class != &fair_sched_class) | ||
2015 | return 0; | ||
2016 | |||
2047 | /* | 2017 | /* |
2048 | * Buddy candidates are cache hot: | 2018 | * Buddy candidates are cache hot: |
2049 | */ | 2019 | */ |
@@ -2052,9 +2022,6 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
2052 | &p->se == cfs_rq_of(&p->se)->last)) | 2022 | &p->se == cfs_rq_of(&p->se)->last)) |
2053 | return 1; | 2023 | return 1; |
2054 | 2024 | ||
2055 | if (p->sched_class != &fair_sched_class) | ||
2056 | return 0; | ||
2057 | |||
2058 | if (sysctl_sched_migration_cost == -1) | 2025 | if (sysctl_sched_migration_cost == -1) |
2059 | return 1; | 2026 | return 1; |
2060 | if (sysctl_sched_migration_cost == 0) | 2027 | if (sysctl_sched_migration_cost == 0) |
@@ -2065,22 +2032,23 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
2065 | return delta < (s64)sysctl_sched_migration_cost; | 2032 | return delta < (s64)sysctl_sched_migration_cost; |
2066 | } | 2033 | } |
2067 | 2034 | ||
2068 | |||
2069 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | 2035 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) |
2070 | { | 2036 | { |
2071 | int old_cpu = task_cpu(p); | 2037 | #ifdef CONFIG_SCHED_DEBUG |
2072 | struct cfs_rq *old_cfsrq = task_cfs_rq(p), | 2038 | /* |
2073 | *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); | 2039 | * We should never call set_task_cpu() on a blocked task, |
2040 | * ttwu() will sort out the placement. | ||
2041 | */ | ||
2042 | WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && | ||
2043 | !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)); | ||
2044 | #endif | ||
2074 | 2045 | ||
2075 | trace_sched_migrate_task(p, new_cpu); | 2046 | trace_sched_migrate_task(p, new_cpu); |
2076 | 2047 | ||
2077 | if (old_cpu != new_cpu) { | 2048 | if (task_cpu(p) != new_cpu) { |
2078 | p->se.nr_migrations++; | 2049 | p->se.nr_migrations++; |
2079 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, | 2050 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0); |
2080 | 1, 1, NULL, 0); | ||
2081 | } | 2051 | } |
2082 | p->se.vruntime -= old_cfsrq->min_vruntime - | ||
2083 | new_cfsrq->min_vruntime; | ||
2084 | 2052 | ||
2085 | __set_task_cpu(p, new_cpu); | 2053 | __set_task_cpu(p, new_cpu); |
2086 | } | 2054 | } |
@@ -2105,13 +2073,10 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) | |||
2105 | 2073 | ||
2106 | /* | 2074 | /* |
2107 | * If the task is not on a runqueue (and not running), then | 2075 | * If the task is not on a runqueue (and not running), then |
2108 | * it is sufficient to simply update the task's cpu field. | 2076 | * the next wake-up will properly place the task. |
2109 | */ | 2077 | */ |
2110 | if (!p->se.on_rq && !task_running(rq, p)) { | 2078 | if (!p->se.on_rq && !task_running(rq, p)) |
2111 | update_rq_clock(rq); | ||
2112 | set_task_cpu(p, dest_cpu); | ||
2113 | return 0; | 2079 | return 0; |
2114 | } | ||
2115 | 2080 | ||
2116 | init_completion(&req->done); | 2081 | init_completion(&req->done); |
2117 | req->task = p; | 2082 | req->task = p; |
@@ -2317,10 +2282,73 @@ void task_oncpu_function_call(struct task_struct *p, | |||
2317 | } | 2282 | } |
2318 | 2283 | ||
2319 | #ifdef CONFIG_SMP | 2284 | #ifdef CONFIG_SMP |
2285 | static int select_fallback_rq(int cpu, struct task_struct *p) | ||
2286 | { | ||
2287 | int dest_cpu; | ||
2288 | const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu)); | ||
2289 | |||
2290 | /* Look for allowed, online CPU in same node. */ | ||
2291 | for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) | ||
2292 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | ||
2293 | return dest_cpu; | ||
2294 | |||
2295 | /* Any allowed, online CPU? */ | ||
2296 | dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); | ||
2297 | if (dest_cpu < nr_cpu_ids) | ||
2298 | return dest_cpu; | ||
2299 | |||
2300 | /* No more Mr. Nice Guy. */ | ||
2301 | if (dest_cpu >= nr_cpu_ids) { | ||
2302 | rcu_read_lock(); | ||
2303 | cpuset_cpus_allowed_locked(p, &p->cpus_allowed); | ||
2304 | rcu_read_unlock(); | ||
2305 | dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); | ||
2306 | |||
2307 | /* | ||
2308 | * Don't tell them about moving exiting tasks or | ||
2309 | * kernel threads (both mm NULL), since they never | ||
2310 | * leave kernel. | ||
2311 | */ | ||
2312 | if (p->mm && printk_ratelimit()) { | ||
2313 | printk(KERN_INFO "process %d (%s) no " | ||
2314 | "longer affine to cpu%d\n", | ||
2315 | task_pid_nr(p), p->comm, cpu); | ||
2316 | } | ||
2317 | } | ||
2318 | |||
2319 | return dest_cpu; | ||
2320 | } | ||
2321 | |||
2322 | /* | ||
2323 | * Called from: | ||
2324 | * | ||
2325 | * - fork, @p is stable because it isn't on the tasklist yet | ||
2326 | * | ||
2327 | * - exec, @p is unstable, retry loop | ||
2328 | * | ||
2329 | * - wake-up, we serialize ->cpus_allowed against TASK_WAKING so | ||
2330 | * we should be good. | ||
2331 | */ | ||
2320 | static inline | 2332 | static inline |
2321 | int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) | 2333 | int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) |
2322 | { | 2334 | { |
2323 | return p->sched_class->select_task_rq(p, sd_flags, wake_flags); | 2335 | int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags); |
2336 | |||
2337 | /* | ||
2338 | * In order not to call set_task_cpu() on a blocking task we need | ||
2339 | * to rely on ttwu() to place the task on a valid ->cpus_allowed | ||
2340 | * cpu. | ||
2341 | * | ||
2342 | * Since this is common to all placement strategies, this lives here. | ||
2343 | * | ||
2344 | * [ this allows ->select_task() to simply return task_cpu(p) and | ||
2345 | * not worry about this generic constraint ] | ||
2346 | */ | ||
2347 | if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) || | ||
2348 | !cpu_online(cpu))) | ||
2349 | cpu = select_fallback_rq(task_cpu(p), p); | ||
2350 | |||
2351 | return cpu; | ||
2324 | } | 2352 | } |
2325 | #endif | 2353 | #endif |
2326 | 2354 | ||
@@ -2375,6 +2403,10 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2375 | if (task_contributes_to_load(p)) | 2403 | if (task_contributes_to_load(p)) |
2376 | rq->nr_uninterruptible--; | 2404 | rq->nr_uninterruptible--; |
2377 | p->state = TASK_WAKING; | 2405 | p->state = TASK_WAKING; |
2406 | |||
2407 | if (p->sched_class->task_waking) | ||
2408 | p->sched_class->task_waking(rq, p); | ||
2409 | |||
2378 | __task_rq_unlock(rq); | 2410 | __task_rq_unlock(rq); |
2379 | 2411 | ||
2380 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | 2412 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); |
@@ -2438,8 +2470,8 @@ out_running: | |||
2438 | 2470 | ||
2439 | p->state = TASK_RUNNING; | 2471 | p->state = TASK_RUNNING; |
2440 | #ifdef CONFIG_SMP | 2472 | #ifdef CONFIG_SMP |
2441 | if (p->sched_class->task_wake_up) | 2473 | if (p->sched_class->task_woken) |
2442 | p->sched_class->task_wake_up(rq, p); | 2474 | p->sched_class->task_woken(rq, p); |
2443 | 2475 | ||
2444 | if (unlikely(rq->idle_stamp)) { | 2476 | if (unlikely(rq->idle_stamp)) { |
2445 | u64 delta = rq->clock - rq->idle_stamp; | 2477 | u64 delta = rq->clock - rq->idle_stamp; |
@@ -2538,14 +2570,6 @@ static void __sched_fork(struct task_struct *p) | |||
2538 | #ifdef CONFIG_PREEMPT_NOTIFIERS | 2570 | #ifdef CONFIG_PREEMPT_NOTIFIERS |
2539 | INIT_HLIST_HEAD(&p->preempt_notifiers); | 2571 | INIT_HLIST_HEAD(&p->preempt_notifiers); |
2540 | #endif | 2572 | #endif |
2541 | |||
2542 | /* | ||
2543 | * We mark the process as running here, but have not actually | ||
2544 | * inserted it onto the runqueue yet. This guarantees that | ||
2545 | * nobody will actually run it, and a signal or other external | ||
2546 | * event cannot wake it up and insert it on the runqueue either. | ||
2547 | */ | ||
2548 | p->state = TASK_RUNNING; | ||
2549 | } | 2573 | } |
2550 | 2574 | ||
2551 | /* | 2575 | /* |
@@ -2556,6 +2580,12 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2556 | int cpu = get_cpu(); | 2580 | int cpu = get_cpu(); |
2557 | 2581 | ||
2558 | __sched_fork(p); | 2582 | __sched_fork(p); |
2583 | /* | ||
2584 | * We mark the process as waking here. This guarantees that | ||
2585 | * nobody will actually run it, and a signal or other external | ||
2586 | * event cannot wake it up and insert it on the runqueue either. | ||
2587 | */ | ||
2588 | p->state = TASK_WAKING; | ||
2559 | 2589 | ||
2560 | /* | 2590 | /* |
2561 | * Revert to default priority/policy on fork if requested. | 2591 | * Revert to default priority/policy on fork if requested. |
@@ -2624,14 +2654,15 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2624 | struct rq *rq; | 2654 | struct rq *rq; |
2625 | 2655 | ||
2626 | rq = task_rq_lock(p, &flags); | 2656 | rq = task_rq_lock(p, &flags); |
2627 | BUG_ON(p->state != TASK_RUNNING); | 2657 | BUG_ON(p->state != TASK_WAKING); |
2658 | p->state = TASK_RUNNING; | ||
2628 | update_rq_clock(rq); | 2659 | update_rq_clock(rq); |
2629 | activate_task(rq, p, 0); | 2660 | activate_task(rq, p, 0); |
2630 | trace_sched_wakeup_new(rq, p, 1); | 2661 | trace_sched_wakeup_new(rq, p, 1); |
2631 | check_preempt_curr(rq, p, WF_FORK); | 2662 | check_preempt_curr(rq, p, WF_FORK); |
2632 | #ifdef CONFIG_SMP | 2663 | #ifdef CONFIG_SMP |
2633 | if (p->sched_class->task_wake_up) | 2664 | if (p->sched_class->task_woken) |
2634 | p->sched_class->task_wake_up(rq, p); | 2665 | p->sched_class->task_woken(rq, p); |
2635 | #endif | 2666 | #endif |
2636 | task_rq_unlock(rq, &flags); | 2667 | task_rq_unlock(rq, &flags); |
2637 | } | 2668 | } |
@@ -3101,21 +3132,36 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) | |||
3101 | } | 3132 | } |
3102 | 3133 | ||
3103 | /* | 3134 | /* |
3104 | * If dest_cpu is allowed for this process, migrate the task to it. | 3135 | * sched_exec - execve() is a valuable balancing opportunity, because at |
3105 | * This is accomplished by forcing the cpu_allowed mask to only | 3136 | * this point the task has the smallest effective memory and cache footprint. |
3106 | * allow dest_cpu, which will force the cpu onto dest_cpu. Then | ||
3107 | * the cpu_allowed mask is restored. | ||
3108 | */ | 3137 | */ |
3109 | static void sched_migrate_task(struct task_struct *p, int dest_cpu) | 3138 | void sched_exec(void) |
3110 | { | 3139 | { |
3140 | struct task_struct *p = current; | ||
3111 | struct migration_req req; | 3141 | struct migration_req req; |
3142 | int dest_cpu, this_cpu; | ||
3112 | unsigned long flags; | 3143 | unsigned long flags; |
3113 | struct rq *rq; | 3144 | struct rq *rq; |
3114 | 3145 | ||
3146 | again: | ||
3147 | this_cpu = get_cpu(); | ||
3148 | dest_cpu = select_task_rq(p, SD_BALANCE_EXEC, 0); | ||
3149 | if (dest_cpu == this_cpu) { | ||
3150 | put_cpu(); | ||
3151 | return; | ||
3152 | } | ||
3153 | |||
3115 | rq = task_rq_lock(p, &flags); | 3154 | rq = task_rq_lock(p, &flags); |
3155 | put_cpu(); | ||
3156 | |||
3157 | /* | ||
3158 | * select_task_rq() can race against ->cpus_allowed | ||
3159 | */ | ||
3116 | if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed) | 3160 | if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed) |
3117 | || unlikely(!cpu_active(dest_cpu))) | 3161 | || unlikely(!cpu_active(dest_cpu))) { |
3118 | goto out; | 3162 | task_rq_unlock(rq, &flags); |
3163 | goto again; | ||
3164 | } | ||
3119 | 3165 | ||
3120 | /* force the process onto the specified CPU */ | 3166 | /* force the process onto the specified CPU */ |
3121 | if (migrate_task(p, dest_cpu, &req)) { | 3167 | if (migrate_task(p, dest_cpu, &req)) { |
@@ -3130,24 +3176,10 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu) | |||
3130 | 3176 | ||
3131 | return; | 3177 | return; |
3132 | } | 3178 | } |
3133 | out: | ||
3134 | task_rq_unlock(rq, &flags); | 3179 | task_rq_unlock(rq, &flags); |
3135 | } | 3180 | } |
3136 | 3181 | ||
3137 | /* | 3182 | /* |
3138 | * sched_exec - execve() is a valuable balancing opportunity, because at | ||
3139 | * this point the task has the smallest effective memory and cache footprint. | ||
3140 | */ | ||
3141 | void sched_exec(void) | ||
3142 | { | ||
3143 | int new_cpu, this_cpu = get_cpu(); | ||
3144 | new_cpu = select_task_rq(current, SD_BALANCE_EXEC, 0); | ||
3145 | put_cpu(); | ||
3146 | if (new_cpu != this_cpu) | ||
3147 | sched_migrate_task(current, new_cpu); | ||
3148 | } | ||
3149 | |||
3150 | /* | ||
3151 | * pull_task - move a task from a remote runqueue to the local runqueue. | 3183 | * pull_task - move a task from a remote runqueue to the local runqueue. |
3152 | * Both runqueues must be locked. | 3184 | * Both runqueues must be locked. |
3153 | */ | 3185 | */ |
@@ -5911,14 +5943,15 @@ EXPORT_SYMBOL(wait_for_completion_killable); | |||
5911 | */ | 5943 | */ |
5912 | bool try_wait_for_completion(struct completion *x) | 5944 | bool try_wait_for_completion(struct completion *x) |
5913 | { | 5945 | { |
5946 | unsigned long flags; | ||
5914 | int ret = 1; | 5947 | int ret = 1; |
5915 | 5948 | ||
5916 | spin_lock_irq(&x->wait.lock); | 5949 | spin_lock_irqsave(&x->wait.lock, flags); |
5917 | if (!x->done) | 5950 | if (!x->done) |
5918 | ret = 0; | 5951 | ret = 0; |
5919 | else | 5952 | else |
5920 | x->done--; | 5953 | x->done--; |
5921 | spin_unlock_irq(&x->wait.lock); | 5954 | spin_unlock_irqrestore(&x->wait.lock, flags); |
5922 | return ret; | 5955 | return ret; |
5923 | } | 5956 | } |
5924 | EXPORT_SYMBOL(try_wait_for_completion); | 5957 | EXPORT_SYMBOL(try_wait_for_completion); |
@@ -5933,12 +5966,13 @@ EXPORT_SYMBOL(try_wait_for_completion); | |||
5933 | */ | 5966 | */ |
5934 | bool completion_done(struct completion *x) | 5967 | bool completion_done(struct completion *x) |
5935 | { | 5968 | { |
5969 | unsigned long flags; | ||
5936 | int ret = 1; | 5970 | int ret = 1; |
5937 | 5971 | ||
5938 | spin_lock_irq(&x->wait.lock); | 5972 | spin_lock_irqsave(&x->wait.lock, flags); |
5939 | if (!x->done) | 5973 | if (!x->done) |
5940 | ret = 0; | 5974 | ret = 0; |
5941 | spin_unlock_irq(&x->wait.lock); | 5975 | spin_unlock_irqrestore(&x->wait.lock, flags); |
5942 | return ret; | 5976 | return ret; |
5943 | } | 5977 | } |
5944 | EXPORT_SYMBOL(completion_done); | 5978 | EXPORT_SYMBOL(completion_done); |
@@ -6457,7 +6491,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) | |||
6457 | return -EINVAL; | 6491 | return -EINVAL; |
6458 | 6492 | ||
6459 | retval = -ESRCH; | 6493 | retval = -ESRCH; |
6460 | read_lock(&tasklist_lock); | 6494 | rcu_read_lock(); |
6461 | p = find_process_by_pid(pid); | 6495 | p = find_process_by_pid(pid); |
6462 | if (p) { | 6496 | if (p) { |
6463 | retval = security_task_getscheduler(p); | 6497 | retval = security_task_getscheduler(p); |
@@ -6465,7 +6499,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) | |||
6465 | retval = p->policy | 6499 | retval = p->policy |
6466 | | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0); | 6500 | | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0); |
6467 | } | 6501 | } |
6468 | read_unlock(&tasklist_lock); | 6502 | rcu_read_unlock(); |
6469 | return retval; | 6503 | return retval; |
6470 | } | 6504 | } |
6471 | 6505 | ||
@@ -6483,7 +6517,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) | |||
6483 | if (!param || pid < 0) | 6517 | if (!param || pid < 0) |
6484 | return -EINVAL; | 6518 | return -EINVAL; |
6485 | 6519 | ||
6486 | read_lock(&tasklist_lock); | 6520 | rcu_read_lock(); |
6487 | p = find_process_by_pid(pid); | 6521 | p = find_process_by_pid(pid); |
6488 | retval = -ESRCH; | 6522 | retval = -ESRCH; |
6489 | if (!p) | 6523 | if (!p) |
@@ -6494,7 +6528,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) | |||
6494 | goto out_unlock; | 6528 | goto out_unlock; |
6495 | 6529 | ||
6496 | lp.sched_priority = p->rt_priority; | 6530 | lp.sched_priority = p->rt_priority; |
6497 | read_unlock(&tasklist_lock); | 6531 | rcu_read_unlock(); |
6498 | 6532 | ||
6499 | /* | 6533 | /* |
6500 | * This one might sleep, we cannot do it with a spinlock held ... | 6534 | * This one might sleep, we cannot do it with a spinlock held ... |
@@ -6504,7 +6538,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) | |||
6504 | return retval; | 6538 | return retval; |
6505 | 6539 | ||
6506 | out_unlock: | 6540 | out_unlock: |
6507 | read_unlock(&tasklist_lock); | 6541 | rcu_read_unlock(); |
6508 | return retval; | 6542 | return retval; |
6509 | } | 6543 | } |
6510 | 6544 | ||
@@ -6515,22 +6549,18 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
6515 | int retval; | 6549 | int retval; |
6516 | 6550 | ||
6517 | get_online_cpus(); | 6551 | get_online_cpus(); |
6518 | read_lock(&tasklist_lock); | 6552 | rcu_read_lock(); |
6519 | 6553 | ||
6520 | p = find_process_by_pid(pid); | 6554 | p = find_process_by_pid(pid); |
6521 | if (!p) { | 6555 | if (!p) { |
6522 | read_unlock(&tasklist_lock); | 6556 | rcu_read_unlock(); |
6523 | put_online_cpus(); | 6557 | put_online_cpus(); |
6524 | return -ESRCH; | 6558 | return -ESRCH; |
6525 | } | 6559 | } |
6526 | 6560 | ||
6527 | /* | 6561 | /* Prevent p going away */ |
6528 | * It is not safe to call set_cpus_allowed with the | ||
6529 | * tasklist_lock held. We will bump the task_struct's | ||
6530 | * usage count and then drop tasklist_lock. | ||
6531 | */ | ||
6532 | get_task_struct(p); | 6562 | get_task_struct(p); |
6533 | read_unlock(&tasklist_lock); | 6563 | rcu_read_unlock(); |
6534 | 6564 | ||
6535 | if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { | 6565 | if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { |
6536 | retval = -ENOMEM; | 6566 | retval = -ENOMEM; |
@@ -6616,7 +6646,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) | |||
6616 | int retval; | 6646 | int retval; |
6617 | 6647 | ||
6618 | get_online_cpus(); | 6648 | get_online_cpus(); |
6619 | read_lock(&tasklist_lock); | 6649 | rcu_read_lock(); |
6620 | 6650 | ||
6621 | retval = -ESRCH; | 6651 | retval = -ESRCH; |
6622 | p = find_process_by_pid(pid); | 6652 | p = find_process_by_pid(pid); |
@@ -6632,7 +6662,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) | |||
6632 | task_rq_unlock(rq, &flags); | 6662 | task_rq_unlock(rq, &flags); |
6633 | 6663 | ||
6634 | out_unlock: | 6664 | out_unlock: |
6635 | read_unlock(&tasklist_lock); | 6665 | rcu_read_unlock(); |
6636 | put_online_cpus(); | 6666 | put_online_cpus(); |
6637 | 6667 | ||
6638 | return retval; | 6668 | return retval; |
@@ -6876,7 +6906,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
6876 | return -EINVAL; | 6906 | return -EINVAL; |
6877 | 6907 | ||
6878 | retval = -ESRCH; | 6908 | retval = -ESRCH; |
6879 | read_lock(&tasklist_lock); | 6909 | rcu_read_lock(); |
6880 | p = find_process_by_pid(pid); | 6910 | p = find_process_by_pid(pid); |
6881 | if (!p) | 6911 | if (!p) |
6882 | goto out_unlock; | 6912 | goto out_unlock; |
@@ -6889,13 +6919,13 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
6889 | time_slice = p->sched_class->get_rr_interval(rq, p); | 6919 | time_slice = p->sched_class->get_rr_interval(rq, p); |
6890 | task_rq_unlock(rq, &flags); | 6920 | task_rq_unlock(rq, &flags); |
6891 | 6921 | ||
6892 | read_unlock(&tasklist_lock); | 6922 | rcu_read_unlock(); |
6893 | jiffies_to_timespec(time_slice, &t); | 6923 | jiffies_to_timespec(time_slice, &t); |
6894 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; | 6924 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; |
6895 | return retval; | 6925 | return retval; |
6896 | 6926 | ||
6897 | out_unlock: | 6927 | out_unlock: |
6898 | read_unlock(&tasklist_lock); | 6928 | rcu_read_unlock(); |
6899 | return retval; | 6929 | return retval; |
6900 | } | 6930 | } |
6901 | 6931 | ||
@@ -6986,6 +7016,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
6986 | raw_spin_lock_irqsave(&rq->lock, flags); | 7016 | raw_spin_lock_irqsave(&rq->lock, flags); |
6987 | 7017 | ||
6988 | __sched_fork(idle); | 7018 | __sched_fork(idle); |
7019 | idle->state = TASK_RUNNING; | ||
6989 | idle->se.exec_start = sched_clock(); | 7020 | idle->se.exec_start = sched_clock(); |
6990 | 7021 | ||
6991 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); | 7022 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); |
@@ -7100,7 +7131,23 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
7100 | struct rq *rq; | 7131 | struct rq *rq; |
7101 | int ret = 0; | 7132 | int ret = 0; |
7102 | 7133 | ||
7134 | /* | ||
7135 | * Since we rely on wake-ups to migrate sleeping tasks, don't change | ||
7136 | * the ->cpus_allowed mask from under waking tasks, which would be | ||
7137 | * possible when we change rq->lock in ttwu(), so synchronize against | ||
7138 | * TASK_WAKING to avoid that. | ||
7139 | */ | ||
7140 | again: | ||
7141 | while (p->state == TASK_WAKING) | ||
7142 | cpu_relax(); | ||
7143 | |||
7103 | rq = task_rq_lock(p, &flags); | 7144 | rq = task_rq_lock(p, &flags); |
7145 | |||
7146 | if (p->state == TASK_WAKING) { | ||
7147 | task_rq_unlock(rq, &flags); | ||
7148 | goto again; | ||
7149 | } | ||
7150 | |||
7104 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { | 7151 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { |
7105 | ret = -EINVAL; | 7152 | ret = -EINVAL; |
7106 | goto out; | 7153 | goto out; |
@@ -7156,7 +7203,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); | |||
7156 | static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | 7203 | static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) |
7157 | { | 7204 | { |
7158 | struct rq *rq_dest, *rq_src; | 7205 | struct rq *rq_dest, *rq_src; |
7159 | int ret = 0, on_rq; | 7206 | int ret = 0; |
7160 | 7207 | ||
7161 | if (unlikely(!cpu_active(dest_cpu))) | 7208 | if (unlikely(!cpu_active(dest_cpu))) |
7162 | return ret; | 7209 | return ret; |
@@ -7172,12 +7219,13 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
7172 | if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | 7219 | if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) |
7173 | goto fail; | 7220 | goto fail; |
7174 | 7221 | ||
7175 | on_rq = p->se.on_rq; | 7222 | /* |
7176 | if (on_rq) | 7223 | * If we're not on a rq, the next wake-up will ensure we're |
7224 | * placed properly. | ||
7225 | */ | ||
7226 | if (p->se.on_rq) { | ||
7177 | deactivate_task(rq_src, p, 0); | 7227 | deactivate_task(rq_src, p, 0); |
7178 | 7228 | set_task_cpu(p, dest_cpu); | |
7179 | set_task_cpu(p, dest_cpu); | ||
7180 | if (on_rq) { | ||
7181 | activate_task(rq_dest, p, 0); | 7229 | activate_task(rq_dest, p, 0); |
7182 | check_preempt_curr(rq_dest, p, 0); | 7230 | check_preempt_curr(rq_dest, p, 0); |
7183 | } | 7231 | } |
@@ -7273,37 +7321,10 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu) | |||
7273 | static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | 7321 | static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) |
7274 | { | 7322 | { |
7275 | int dest_cpu; | 7323 | int dest_cpu; |
7276 | const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(dead_cpu)); | ||
7277 | 7324 | ||
7278 | again: | 7325 | again: |
7279 | /* Look for allowed, online CPU in same node. */ | 7326 | dest_cpu = select_fallback_rq(dead_cpu, p); |
7280 | for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) | ||
7281 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | ||
7282 | goto move; | ||
7283 | |||
7284 | /* Any allowed, online CPU? */ | ||
7285 | dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); | ||
7286 | if (dest_cpu < nr_cpu_ids) | ||
7287 | goto move; | ||
7288 | |||
7289 | /* No more Mr. Nice Guy. */ | ||
7290 | if (dest_cpu >= nr_cpu_ids) { | ||
7291 | cpuset_cpus_allowed_locked(p, &p->cpus_allowed); | ||
7292 | dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); | ||
7293 | |||
7294 | /* | ||
7295 | * Don't tell them about moving exiting tasks or | ||
7296 | * kernel threads (both mm NULL), since they never | ||
7297 | * leave kernel. | ||
7298 | */ | ||
7299 | if (p->mm && printk_ratelimit()) { | ||
7300 | printk(KERN_INFO "process %d (%s) no " | ||
7301 | "longer affine to cpu%d\n", | ||
7302 | task_pid_nr(p), p->comm, dead_cpu); | ||
7303 | } | ||
7304 | } | ||
7305 | 7327 | ||
7306 | move: | ||
7307 | /* It can have affinity changed while we were choosing. */ | 7328 | /* It can have affinity changed while we were choosing. */ |
7308 | if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu))) | 7329 | if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu))) |
7309 | goto again; | 7330 | goto again; |
@@ -9668,7 +9689,7 @@ void __init sched_init(void) | |||
9668 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP | 9689 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP |
9669 | static inline int preempt_count_equals(int preempt_offset) | 9690 | static inline int preempt_count_equals(int preempt_offset) |
9670 | { | 9691 | { |
9671 | int nested = preempt_count() & ~PREEMPT_ACTIVE; | 9692 | int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth(); |
9672 | 9693 | ||
9673 | return (nested == PREEMPT_INATOMIC_BASE + preempt_offset); | 9694 | return (nested == PREEMPT_INATOMIC_BASE + preempt_offset); |
9674 | } | 9695 | } |
@@ -10083,7 +10104,7 @@ void sched_move_task(struct task_struct *tsk) | |||
10083 | 10104 | ||
10084 | #ifdef CONFIG_FAIR_GROUP_SCHED | 10105 | #ifdef CONFIG_FAIR_GROUP_SCHED |
10085 | if (tsk->sched_class->moved_group) | 10106 | if (tsk->sched_class->moved_group) |
10086 | tsk->sched_class->moved_group(tsk); | 10107 | tsk->sched_class->moved_group(tsk, on_rq); |
10087 | #endif | 10108 | #endif |
10088 | 10109 | ||
10089 | if (unlikely(running)) | 10110 | if (unlikely(running)) |
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 479ce5682d7c..5b496132c28a 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c | |||
@@ -236,6 +236,18 @@ void sched_clock_idle_wakeup_event(u64 delta_ns) | |||
236 | } | 236 | } |
237 | EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); | 237 | EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); |
238 | 238 | ||
239 | unsigned long long cpu_clock(int cpu) | ||
240 | { | ||
241 | unsigned long long clock; | ||
242 | unsigned long flags; | ||
243 | |||
244 | local_irq_save(flags); | ||
245 | clock = sched_clock_cpu(cpu); | ||
246 | local_irq_restore(flags); | ||
247 | |||
248 | return clock; | ||
249 | } | ||
250 | |||
239 | #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ | 251 | #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ |
240 | 252 | ||
241 | void sched_clock_init(void) | 253 | void sched_clock_init(void) |
@@ -251,17 +263,12 @@ u64 sched_clock_cpu(int cpu) | |||
251 | return sched_clock(); | 263 | return sched_clock(); |
252 | } | 264 | } |
253 | 265 | ||
254 | #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ | ||
255 | 266 | ||
256 | unsigned long long cpu_clock(int cpu) | 267 | unsigned long long cpu_clock(int cpu) |
257 | { | 268 | { |
258 | unsigned long long clock; | 269 | return sched_clock_cpu(cpu); |
259 | unsigned long flags; | 270 | } |
260 | 271 | ||
261 | local_irq_save(flags); | 272 | #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ |
262 | clock = sched_clock_cpu(cpu); | ||
263 | local_irq_restore(flags); | ||
264 | 273 | ||
265 | return clock; | ||
266 | } | ||
267 | EXPORT_SYMBOL_GPL(cpu_clock); | 274 | EXPORT_SYMBOL_GPL(cpu_clock); |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 5bedf6e3ebf3..42ac3c9f66f6 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -510,6 +510,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, | |||
510 | curr->sum_exec_runtime += delta_exec; | 510 | curr->sum_exec_runtime += delta_exec; |
511 | schedstat_add(cfs_rq, exec_clock, delta_exec); | 511 | schedstat_add(cfs_rq, exec_clock, delta_exec); |
512 | delta_exec_weighted = calc_delta_fair(delta_exec, curr); | 512 | delta_exec_weighted = calc_delta_fair(delta_exec, curr); |
513 | |||
513 | curr->vruntime += delta_exec_weighted; | 514 | curr->vruntime += delta_exec_weighted; |
514 | update_min_vruntime(cfs_rq); | 515 | update_min_vruntime(cfs_rq); |
515 | } | 516 | } |
@@ -765,16 +766,26 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
765 | se->vruntime = vruntime; | 766 | se->vruntime = vruntime; |
766 | } | 767 | } |
767 | 768 | ||
769 | #define ENQUEUE_WAKEUP 1 | ||
770 | #define ENQUEUE_MIGRATE 2 | ||
771 | |||
768 | static void | 772 | static void |
769 | enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) | 773 | enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) |
770 | { | 774 | { |
771 | /* | 775 | /* |
776 | * Update the normalized vruntime before updating min_vruntime | ||
777 | * through callig update_curr(). | ||
778 | */ | ||
779 | if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATE)) | ||
780 | se->vruntime += cfs_rq->min_vruntime; | ||
781 | |||
782 | /* | ||
772 | * Update run-time statistics of the 'current'. | 783 | * Update run-time statistics of the 'current'. |
773 | */ | 784 | */ |
774 | update_curr(cfs_rq); | 785 | update_curr(cfs_rq); |
775 | account_entity_enqueue(cfs_rq, se); | 786 | account_entity_enqueue(cfs_rq, se); |
776 | 787 | ||
777 | if (wakeup) { | 788 | if (flags & ENQUEUE_WAKEUP) { |
778 | place_entity(cfs_rq, se, 0); | 789 | place_entity(cfs_rq, se, 0); |
779 | enqueue_sleeper(cfs_rq, se); | 790 | enqueue_sleeper(cfs_rq, se); |
780 | } | 791 | } |
@@ -828,6 +839,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) | |||
828 | __dequeue_entity(cfs_rq, se); | 839 | __dequeue_entity(cfs_rq, se); |
829 | account_entity_dequeue(cfs_rq, se); | 840 | account_entity_dequeue(cfs_rq, se); |
830 | update_min_vruntime(cfs_rq); | 841 | update_min_vruntime(cfs_rq); |
842 | |||
843 | /* | ||
844 | * Normalize the entity after updating the min_vruntime because the | ||
845 | * update can refer to the ->curr item and we need to reflect this | ||
846 | * movement in our normalized position. | ||
847 | */ | ||
848 | if (!sleep) | ||
849 | se->vruntime -= cfs_rq->min_vruntime; | ||
831 | } | 850 | } |
832 | 851 | ||
833 | /* | 852 | /* |
@@ -1038,13 +1057,19 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) | |||
1038 | { | 1057 | { |
1039 | struct cfs_rq *cfs_rq; | 1058 | struct cfs_rq *cfs_rq; |
1040 | struct sched_entity *se = &p->se; | 1059 | struct sched_entity *se = &p->se; |
1060 | int flags = 0; | ||
1061 | |||
1062 | if (wakeup) | ||
1063 | flags |= ENQUEUE_WAKEUP; | ||
1064 | if (p->state == TASK_WAKING) | ||
1065 | flags |= ENQUEUE_MIGRATE; | ||
1041 | 1066 | ||
1042 | for_each_sched_entity(se) { | 1067 | for_each_sched_entity(se) { |
1043 | if (se->on_rq) | 1068 | if (se->on_rq) |
1044 | break; | 1069 | break; |
1045 | cfs_rq = cfs_rq_of(se); | 1070 | cfs_rq = cfs_rq_of(se); |
1046 | enqueue_entity(cfs_rq, se, wakeup); | 1071 | enqueue_entity(cfs_rq, se, flags); |
1047 | wakeup = 1; | 1072 | flags = ENQUEUE_WAKEUP; |
1048 | } | 1073 | } |
1049 | 1074 | ||
1050 | hrtick_update(rq); | 1075 | hrtick_update(rq); |
@@ -1120,6 +1145,14 @@ static void yield_task_fair(struct rq *rq) | |||
1120 | 1145 | ||
1121 | #ifdef CONFIG_SMP | 1146 | #ifdef CONFIG_SMP |
1122 | 1147 | ||
1148 | static void task_waking_fair(struct rq *rq, struct task_struct *p) | ||
1149 | { | ||
1150 | struct sched_entity *se = &p->se; | ||
1151 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | ||
1152 | |||
1153 | se->vruntime -= cfs_rq->min_vruntime; | ||
1154 | } | ||
1155 | |||
1123 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1156 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1124 | /* | 1157 | /* |
1125 | * effective_load() calculates the load change as seen from the root_task_group | 1158 | * effective_load() calculates the load change as seen from the root_task_group |
@@ -1429,6 +1462,9 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
1429 | } | 1462 | } |
1430 | 1463 | ||
1431 | for_each_domain(cpu, tmp) { | 1464 | for_each_domain(cpu, tmp) { |
1465 | if (!(tmp->flags & SD_LOAD_BALANCE)) | ||
1466 | continue; | ||
1467 | |||
1432 | /* | 1468 | /* |
1433 | * If power savings logic is enabled for a domain, see if we | 1469 | * If power savings logic is enabled for a domain, see if we |
1434 | * are not overloaded, if so, don't balance wider. | 1470 | * are not overloaded, if so, don't balance wider. |
@@ -1975,6 +2011,8 @@ static void task_fork_fair(struct task_struct *p) | |||
1975 | resched_task(rq->curr); | 2011 | resched_task(rq->curr); |
1976 | } | 2012 | } |
1977 | 2013 | ||
2014 | se->vruntime -= cfs_rq->min_vruntime; | ||
2015 | |||
1978 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 2016 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
1979 | } | 2017 | } |
1980 | 2018 | ||
@@ -2028,12 +2066,13 @@ static void set_curr_task_fair(struct rq *rq) | |||
2028 | } | 2066 | } |
2029 | 2067 | ||
2030 | #ifdef CONFIG_FAIR_GROUP_SCHED | 2068 | #ifdef CONFIG_FAIR_GROUP_SCHED |
2031 | static void moved_group_fair(struct task_struct *p) | 2069 | static void moved_group_fair(struct task_struct *p, int on_rq) |
2032 | { | 2070 | { |
2033 | struct cfs_rq *cfs_rq = task_cfs_rq(p); | 2071 | struct cfs_rq *cfs_rq = task_cfs_rq(p); |
2034 | 2072 | ||
2035 | update_curr(cfs_rq); | 2073 | update_curr(cfs_rq); |
2036 | place_entity(cfs_rq, &p->se, 1); | 2074 | if (!on_rq) |
2075 | place_entity(cfs_rq, &p->se, 1); | ||
2037 | } | 2076 | } |
2038 | #endif | 2077 | #endif |
2039 | 2078 | ||
@@ -2073,6 +2112,8 @@ static const struct sched_class fair_sched_class = { | |||
2073 | .move_one_task = move_one_task_fair, | 2112 | .move_one_task = move_one_task_fair, |
2074 | .rq_online = rq_online_fair, | 2113 | .rq_online = rq_online_fair, |
2075 | .rq_offline = rq_offline_fair, | 2114 | .rq_offline = rq_offline_fair, |
2115 | |||
2116 | .task_waking = task_waking_fair, | ||
2076 | #endif | 2117 | #endif |
2077 | 2118 | ||
2078 | .set_curr_task = set_curr_task_fair, | 2119 | .set_curr_task = set_curr_task_fair, |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index d2ea2828164e..f48328ac216f 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -1472,7 +1472,7 @@ static void post_schedule_rt(struct rq *rq) | |||
1472 | * If we are not running and we are not going to reschedule soon, we should | 1472 | * If we are not running and we are not going to reschedule soon, we should |
1473 | * try to push tasks away now | 1473 | * try to push tasks away now |
1474 | */ | 1474 | */ |
1475 | static void task_wake_up_rt(struct rq *rq, struct task_struct *p) | 1475 | static void task_woken_rt(struct rq *rq, struct task_struct *p) |
1476 | { | 1476 | { |
1477 | if (!task_running(rq, p) && | 1477 | if (!task_running(rq, p) && |
1478 | !test_tsk_need_resched(rq->curr) && | 1478 | !test_tsk_need_resched(rq->curr) && |
@@ -1753,7 +1753,7 @@ static const struct sched_class rt_sched_class = { | |||
1753 | .rq_offline = rq_offline_rt, | 1753 | .rq_offline = rq_offline_rt, |
1754 | .pre_schedule = pre_schedule_rt, | 1754 | .pre_schedule = pre_schedule_rt, |
1755 | .post_schedule = post_schedule_rt, | 1755 | .post_schedule = post_schedule_rt, |
1756 | .task_wake_up = task_wake_up_rt, | 1756 | .task_woken = task_woken_rt, |
1757 | .switched_from = switched_from_rt, | 1757 | .switched_from = switched_from_rt, |
1758 | #endif | 1758 | #endif |
1759 | 1759 | ||
diff --git a/kernel/signal.c b/kernel/signal.c index 1814e68e4de3..934ae5e687b9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -218,13 +218,13 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi | |||
218 | struct user_struct *user; | 218 | struct user_struct *user; |
219 | 219 | ||
220 | /* | 220 | /* |
221 | * We won't get problems with the target's UID changing under us | 221 | * Protect access to @t credentials. This can go away when all |
222 | * because changing it requires RCU be used, and if t != current, the | 222 | * callers hold rcu read lock. |
223 | * caller must be holding the RCU readlock (by way of a spinlock) and | ||
224 | * we use RCU protection here | ||
225 | */ | 223 | */ |
224 | rcu_read_lock(); | ||
226 | user = get_uid(__task_cred(t)->user); | 225 | user = get_uid(__task_cred(t)->user); |
227 | atomic_inc(&user->sigpending); | 226 | atomic_inc(&user->sigpending); |
227 | rcu_read_unlock(); | ||
228 | 228 | ||
229 | if (override_rlimit || | 229 | if (override_rlimit || |
230 | atomic_read(&user->sigpending) <= | 230 | atomic_read(&user->sigpending) <= |
@@ -979,7 +979,8 @@ static void print_fatal_signal(struct pt_regs *regs, int signr) | |||
979 | for (i = 0; i < 16; i++) { | 979 | for (i = 0; i < 16; i++) { |
980 | unsigned char insn; | 980 | unsigned char insn; |
981 | 981 | ||
982 | __get_user(insn, (unsigned char *)(regs->ip + i)); | 982 | if (get_user(insn, (unsigned char *)(regs->ip + i))) |
983 | break; | ||
983 | printk("%02x ", insn); | 984 | printk("%02x ", insn); |
984 | } | 985 | } |
985 | } | 986 | } |
@@ -1179,11 +1180,12 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid, | |||
1179 | int ret = -EINVAL; | 1180 | int ret = -EINVAL; |
1180 | struct task_struct *p; | 1181 | struct task_struct *p; |
1181 | const struct cred *pcred; | 1182 | const struct cred *pcred; |
1183 | unsigned long flags; | ||
1182 | 1184 | ||
1183 | if (!valid_signal(sig)) | 1185 | if (!valid_signal(sig)) |
1184 | return ret; | 1186 | return ret; |
1185 | 1187 | ||
1186 | read_lock(&tasklist_lock); | 1188 | rcu_read_lock(); |
1187 | p = pid_task(pid, PIDTYPE_PID); | 1189 | p = pid_task(pid, PIDTYPE_PID); |
1188 | if (!p) { | 1190 | if (!p) { |
1189 | ret = -ESRCH; | 1191 | ret = -ESRCH; |
@@ -1199,14 +1201,16 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid, | |||
1199 | ret = security_task_kill(p, info, sig, secid); | 1201 | ret = security_task_kill(p, info, sig, secid); |
1200 | if (ret) | 1202 | if (ret) |
1201 | goto out_unlock; | 1203 | goto out_unlock; |
1202 | if (sig && p->sighand) { | 1204 | |
1203 | unsigned long flags; | 1205 | if (sig) { |
1204 | spin_lock_irqsave(&p->sighand->siglock, flags); | 1206 | if (lock_task_sighand(p, &flags)) { |
1205 | ret = __send_signal(sig, info, p, 1, 0); | 1207 | ret = __send_signal(sig, info, p, 1, 0); |
1206 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | 1208 | unlock_task_sighand(p, &flags); |
1209 | } else | ||
1210 | ret = -ESRCH; | ||
1207 | } | 1211 | } |
1208 | out_unlock: | 1212 | out_unlock: |
1209 | read_unlock(&tasklist_lock); | 1213 | rcu_read_unlock(); |
1210 | return ret; | 1214 | return ret; |
1211 | } | 1215 | } |
1212 | EXPORT_SYMBOL_GPL(kill_pid_info_as_uid); | 1216 | EXPORT_SYMBOL_GPL(kill_pid_info_as_uid); |
diff --git a/kernel/sys.c b/kernel/sys.c index 20ccfb5da6af..26a6b73a6b85 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -162,6 +162,7 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) | |||
162 | if (niceval > 19) | 162 | if (niceval > 19) |
163 | niceval = 19; | 163 | niceval = 19; |
164 | 164 | ||
165 | rcu_read_lock(); | ||
165 | read_lock(&tasklist_lock); | 166 | read_lock(&tasklist_lock); |
166 | switch (which) { | 167 | switch (which) { |
167 | case PRIO_PROCESS: | 168 | case PRIO_PROCESS: |
@@ -199,6 +200,7 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) | |||
199 | } | 200 | } |
200 | out_unlock: | 201 | out_unlock: |
201 | read_unlock(&tasklist_lock); | 202 | read_unlock(&tasklist_lock); |
203 | rcu_read_unlock(); | ||
202 | out: | 204 | out: |
203 | return error; | 205 | return error; |
204 | } | 206 | } |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 45e4bef0012a..8a68b2448468 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -1131,7 +1131,7 @@ static struct ctl_table vm_table[] = { | |||
1131 | .data = &sysctl_max_map_count, | 1131 | .data = &sysctl_max_map_count, |
1132 | .maxlen = sizeof(sysctl_max_map_count), | 1132 | .maxlen = sizeof(sysctl_max_map_count), |
1133 | .mode = 0644, | 1133 | .mode = 0644, |
1134 | .proc_handler = proc_dointvec, | 1134 | .proc_handler = proc_dointvec_minmax, |
1135 | .extra1 = &zero, | 1135 | .extra1 = &zero, |
1136 | }, | 1136 | }, |
1137 | #else | 1137 | #else |
@@ -1214,6 +1214,7 @@ static struct ctl_table vm_table[] = { | |||
1214 | .proc_handler = proc_dointvec_jiffies, | 1214 | .proc_handler = proc_dointvec_jiffies, |
1215 | }, | 1215 | }, |
1216 | #endif | 1216 | #endif |
1217 | #ifdef CONFIG_MMU | ||
1217 | { | 1218 | { |
1218 | .procname = "mmap_min_addr", | 1219 | .procname = "mmap_min_addr", |
1219 | .data = &dac_mmap_min_addr, | 1220 | .data = &dac_mmap_min_addr, |
@@ -1221,6 +1222,7 @@ static struct ctl_table vm_table[] = { | |||
1221 | .mode = 0644, | 1222 | .mode = 0644, |
1222 | .proc_handler = mmap_min_addr_handler, | 1223 | .proc_handler = mmap_min_addr_handler, |
1223 | }, | 1224 | }, |
1225 | #endif | ||
1224 | #ifdef CONFIG_NUMA | 1226 | #ifdef CONFIG_NUMA |
1225 | { | 1227 | { |
1226 | .procname = "numa_zonelist_order", | 1228 | .procname = "numa_zonelist_order", |
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 112533d5fc08..8f5d16e0707a 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c | |||
@@ -1417,6 +1417,35 @@ static void deprecated_sysctl_warning(const int *name, int nlen) | |||
1417 | return; | 1417 | return; |
1418 | } | 1418 | } |
1419 | 1419 | ||
1420 | #define WARN_ONCE_HASH_BITS 8 | ||
1421 | #define WARN_ONCE_HASH_SIZE (1<<WARN_ONCE_HASH_BITS) | ||
1422 | |||
1423 | static DECLARE_BITMAP(warn_once_bitmap, WARN_ONCE_HASH_SIZE); | ||
1424 | |||
1425 | #define FNV32_OFFSET 2166136261U | ||
1426 | #define FNV32_PRIME 0x01000193 | ||
1427 | |||
1428 | /* | ||
1429 | * Print each legacy sysctl (approximately) only once. | ||
1430 | * To avoid making the tables non-const use a external | ||
1431 | * hash-table instead. | ||
1432 | * Worst case hash collision: 6, but very rarely. | ||
1433 | * NOTE! We don't use the SMP-safe bit tests. We simply | ||
1434 | * don't care enough. | ||
1435 | */ | ||
1436 | static void warn_on_bintable(const int *name, int nlen) | ||
1437 | { | ||
1438 | int i; | ||
1439 | u32 hash = FNV32_OFFSET; | ||
1440 | |||
1441 | for (i = 0; i < nlen; i++) | ||
1442 | hash = (hash ^ name[i]) * FNV32_PRIME; | ||
1443 | hash %= WARN_ONCE_HASH_SIZE; | ||
1444 | if (__test_and_set_bit(hash, warn_once_bitmap)) | ||
1445 | return; | ||
1446 | deprecated_sysctl_warning(name, nlen); | ||
1447 | } | ||
1448 | |||
1420 | static ssize_t do_sysctl(int __user *args_name, int nlen, | 1449 | static ssize_t do_sysctl(int __user *args_name, int nlen, |
1421 | void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) | 1450 | void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) |
1422 | { | 1451 | { |
@@ -1431,7 +1460,7 @@ static ssize_t do_sysctl(int __user *args_name, int nlen, | |||
1431 | if (get_user(name[i], args_name + i)) | 1460 | if (get_user(name[i], args_name + i)) |
1432 | return -EFAULT; | 1461 | return -EFAULT; |
1433 | 1462 | ||
1434 | deprecated_sysctl_warning(name, nlen); | 1463 | warn_on_bintable(name, nlen); |
1435 | 1464 | ||
1436 | return binary_sysctl(name, nlen, oldval, oldlen, newval, newlen); | 1465 | return binary_sysctl(name, nlen, oldval, oldlen, newval, newlen); |
1437 | } | 1466 | } |
diff --git a/kernel/time.c b/kernel/time.c index c6324d96009e..804798005d19 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -136,6 +136,7 @@ static inline void warp_clock(void) | |||
136 | write_seqlock_irq(&xtime_lock); | 136 | write_seqlock_irq(&xtime_lock); |
137 | wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; | 137 | wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; |
138 | xtime.tv_sec += sys_tz.tz_minuteswest * 60; | 138 | xtime.tv_sec += sys_tz.tz_minuteswest * 60; |
139 | update_xtime_cache(0); | ||
139 | write_sequnlock_irq(&xtime_lock); | 140 | write_sequnlock_irq(&xtime_lock); |
140 | clock_was_set(); | 141 | clock_was_set(); |
141 | } | 142 | } |
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 3d5fc0fd1cca..6f740d9f0948 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
@@ -238,8 +238,9 @@ void clockevents_exchange_device(struct clock_event_device *old, | |||
238 | */ | 238 | */ |
239 | void clockevents_notify(unsigned long reason, void *arg) | 239 | void clockevents_notify(unsigned long reason, void *arg) |
240 | { | 240 | { |
241 | struct list_head *node, *tmp; | 241 | struct clock_event_device *dev, *tmp; |
242 | unsigned long flags; | 242 | unsigned long flags; |
243 | int cpu; | ||
243 | 244 | ||
244 | raw_spin_lock_irqsave(&clockevents_lock, flags); | 245 | raw_spin_lock_irqsave(&clockevents_lock, flags); |
245 | clockevents_do_notify(reason, arg); | 246 | clockevents_do_notify(reason, arg); |
@@ -250,8 +251,19 @@ void clockevents_notify(unsigned long reason, void *arg) | |||
250 | * Unregister the clock event devices which were | 251 | * Unregister the clock event devices which were |
251 | * released from the users in the notify chain. | 252 | * released from the users in the notify chain. |
252 | */ | 253 | */ |
253 | list_for_each_safe(node, tmp, &clockevents_released) | 254 | list_for_each_entry_safe(dev, tmp, &clockevents_released, list) |
254 | list_del(node); | 255 | list_del(&dev->list); |
256 | /* | ||
257 | * Now check whether the CPU has left unused per cpu devices | ||
258 | */ | ||
259 | cpu = *((int *)arg); | ||
260 | list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) { | ||
261 | if (cpumask_test_cpu(cpu, dev->cpumask) && | ||
262 | cpumask_weight(dev->cpumask) == 1) { | ||
263 | BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); | ||
264 | list_del(&dev->list); | ||
265 | } | ||
266 | } | ||
255 | break; | 267 | break; |
256 | default: | 268 | default: |
257 | break; | 269 | break; |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index af4135f05825..7faaa32fbf4f 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -165,6 +165,13 @@ struct timespec raw_time; | |||
165 | /* flag for if timekeeping is suspended */ | 165 | /* flag for if timekeeping is suspended */ |
166 | int __read_mostly timekeeping_suspended; | 166 | int __read_mostly timekeeping_suspended; |
167 | 167 | ||
168 | static struct timespec xtime_cache __attribute__ ((aligned (16))); | ||
169 | void update_xtime_cache(u64 nsec) | ||
170 | { | ||
171 | xtime_cache = xtime; | ||
172 | timespec_add_ns(&xtime_cache, nsec); | ||
173 | } | ||
174 | |||
168 | /* must hold xtime_lock */ | 175 | /* must hold xtime_lock */ |
169 | void timekeeping_leap_insert(int leapsecond) | 176 | void timekeeping_leap_insert(int leapsecond) |
170 | { | 177 | { |
@@ -325,6 +332,8 @@ int do_settimeofday(struct timespec *tv) | |||
325 | 332 | ||
326 | xtime = *tv; | 333 | xtime = *tv; |
327 | 334 | ||
335 | update_xtime_cache(0); | ||
336 | |||
328 | timekeeper.ntp_error = 0; | 337 | timekeeper.ntp_error = 0; |
329 | ntp_clear(); | 338 | ntp_clear(); |
330 | 339 | ||
@@ -550,6 +559,7 @@ void __init timekeeping_init(void) | |||
550 | } | 559 | } |
551 | set_normalized_timespec(&wall_to_monotonic, | 560 | set_normalized_timespec(&wall_to_monotonic, |
552 | -boot.tv_sec, -boot.tv_nsec); | 561 | -boot.tv_sec, -boot.tv_nsec); |
562 | update_xtime_cache(0); | ||
553 | total_sleep_time.tv_sec = 0; | 563 | total_sleep_time.tv_sec = 0; |
554 | total_sleep_time.tv_nsec = 0; | 564 | total_sleep_time.tv_nsec = 0; |
555 | write_sequnlock_irqrestore(&xtime_lock, flags); | 565 | write_sequnlock_irqrestore(&xtime_lock, flags); |
@@ -583,6 +593,7 @@ static int timekeeping_resume(struct sys_device *dev) | |||
583 | wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); | 593 | wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); |
584 | total_sleep_time = timespec_add_safe(total_sleep_time, ts); | 594 | total_sleep_time = timespec_add_safe(total_sleep_time, ts); |
585 | } | 595 | } |
596 | update_xtime_cache(0); | ||
586 | /* re-base the last cycle value */ | 597 | /* re-base the last cycle value */ |
587 | timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); | 598 | timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); |
588 | timekeeper.ntp_error = 0; | 599 | timekeeper.ntp_error = 0; |
@@ -722,6 +733,7 @@ static void timekeeping_adjust(s64 offset) | |||
722 | timekeeper.ntp_error_shift; | 733 | timekeeper.ntp_error_shift; |
723 | } | 734 | } |
724 | 735 | ||
736 | |||
725 | /** | 737 | /** |
726 | * logarithmic_accumulation - shifted accumulation of cycles | 738 | * logarithmic_accumulation - shifted accumulation of cycles |
727 | * | 739 | * |
@@ -765,6 +777,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) | |||
765 | return offset; | 777 | return offset; |
766 | } | 778 | } |
767 | 779 | ||
780 | |||
768 | /** | 781 | /** |
769 | * update_wall_time - Uses the current clocksource to increment the wall time | 782 | * update_wall_time - Uses the current clocksource to increment the wall time |
770 | * | 783 | * |
@@ -774,6 +787,7 @@ void update_wall_time(void) | |||
774 | { | 787 | { |
775 | struct clocksource *clock; | 788 | struct clocksource *clock; |
776 | cycle_t offset; | 789 | cycle_t offset; |
790 | u64 nsecs; | ||
777 | int shift = 0, maxshift; | 791 | int shift = 0, maxshift; |
778 | 792 | ||
779 | /* Make sure we're fully resumed: */ | 793 | /* Make sure we're fully resumed: */ |
@@ -839,6 +853,9 @@ void update_wall_time(void) | |||
839 | timekeeper.ntp_error += timekeeper.xtime_nsec << | 853 | timekeeper.ntp_error += timekeeper.xtime_nsec << |
840 | timekeeper.ntp_error_shift; | 854 | timekeeper.ntp_error_shift; |
841 | 855 | ||
856 | nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift); | ||
857 | update_xtime_cache(nsecs); | ||
858 | |||
842 | /* check to see if there is a new clocksource to use */ | 859 | /* check to see if there is a new clocksource to use */ |
843 | update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); | 860 | update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); |
844 | } | 861 | } |
@@ -875,13 +892,13 @@ void monotonic_to_bootbased(struct timespec *ts) | |||
875 | 892 | ||
876 | unsigned long get_seconds(void) | 893 | unsigned long get_seconds(void) |
877 | { | 894 | { |
878 | return xtime.tv_sec; | 895 | return xtime_cache.tv_sec; |
879 | } | 896 | } |
880 | EXPORT_SYMBOL(get_seconds); | 897 | EXPORT_SYMBOL(get_seconds); |
881 | 898 | ||
882 | struct timespec __current_kernel_time(void) | 899 | struct timespec __current_kernel_time(void) |
883 | { | 900 | { |
884 | return xtime; | 901 | return xtime_cache; |
885 | } | 902 | } |
886 | 903 | ||
887 | struct timespec current_kernel_time(void) | 904 | struct timespec current_kernel_time(void) |
@@ -891,7 +908,8 @@ struct timespec current_kernel_time(void) | |||
891 | 908 | ||
892 | do { | 909 | do { |
893 | seq = read_seqbegin(&xtime_lock); | 910 | seq = read_seqbegin(&xtime_lock); |
894 | now = xtime; | 911 | |
912 | now = xtime_cache; | ||
895 | } while (read_seqretry(&xtime_lock, seq)); | 913 | } while (read_seqretry(&xtime_lock, seq)); |
896 | 914 | ||
897 | return now; | 915 | return now; |
@@ -905,7 +923,8 @@ struct timespec get_monotonic_coarse(void) | |||
905 | 923 | ||
906 | do { | 924 | do { |
907 | seq = read_seqbegin(&xtime_lock); | 925 | seq = read_seqbegin(&xtime_lock); |
908 | now = xtime; | 926 | |
927 | now = xtime_cache; | ||
909 | mono = wall_to_monotonic; | 928 | mono = wall_to_monotonic; |
910 | } while (read_seqretry(&xtime_lock, seq)); | 929 | } while (read_seqretry(&xtime_lock, seq)); |
911 | 930 | ||
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 28265636b6c2..bdfb8dd1050c 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
@@ -237,10 +237,10 @@ static void timer_list_show_tickdevices(struct seq_file *m) | |||
237 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST | 237 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST |
238 | print_tickdevice(m, tick_get_broadcast_device(), -1); | 238 | print_tickdevice(m, tick_get_broadcast_device(), -1); |
239 | SEQ_printf(m, "tick_broadcast_mask: %08lx\n", | 239 | SEQ_printf(m, "tick_broadcast_mask: %08lx\n", |
240 | tick_get_broadcast_mask()->bits[0]); | 240 | cpumask_bits(tick_get_broadcast_mask())[0]); |
241 | #ifdef CONFIG_TICK_ONESHOT | 241 | #ifdef CONFIG_TICK_ONESHOT |
242 | SEQ_printf(m, "tick_broadcast_oneshot_mask: %08lx\n", | 242 | SEQ_printf(m, "tick_broadcast_oneshot_mask: %08lx\n", |
243 | tick_get_broadcast_oneshot_mask()->bits[0]); | 243 | cpumask_bits(tick_get_broadcast_oneshot_mask())[0]); |
244 | #endif | 244 | #endif |
245 | SEQ_printf(m, "\n"); | 245 | SEQ_printf(m, "\n"); |
246 | #endif | 246 | #endif |
diff --git a/kernel/timer.c b/kernel/timer.c index 5db5a8d26811..15533b792397 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -656,8 +656,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, | |||
656 | 656 | ||
657 | debug_activate(timer, expires); | 657 | debug_activate(timer, expires); |
658 | 658 | ||
659 | new_base = __get_cpu_var(tvec_bases); | ||
660 | |||
661 | cpu = smp_processor_id(); | 659 | cpu = smp_processor_id(); |
662 | 660 | ||
663 | #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) | 661 | #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d006554888dc..6c22d8a2f289 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -12,17 +12,17 @@ config NOP_TRACER | |||
12 | config HAVE_FTRACE_NMI_ENTER | 12 | config HAVE_FTRACE_NMI_ENTER |
13 | bool | 13 | bool |
14 | help | 14 | help |
15 | See Documentation/trace/ftrace-implementation.txt | 15 | See Documentation/trace/ftrace-design.txt |
16 | 16 | ||
17 | config HAVE_FUNCTION_TRACER | 17 | config HAVE_FUNCTION_TRACER |
18 | bool | 18 | bool |
19 | help | 19 | help |
20 | See Documentation/trace/ftrace-implementation.txt | 20 | See Documentation/trace/ftrace-design.txt |
21 | 21 | ||
22 | config HAVE_FUNCTION_GRAPH_TRACER | 22 | config HAVE_FUNCTION_GRAPH_TRACER |
23 | bool | 23 | bool |
24 | help | 24 | help |
25 | See Documentation/trace/ftrace-implementation.txt | 25 | See Documentation/trace/ftrace-design.txt |
26 | 26 | ||
27 | config HAVE_FUNCTION_GRAPH_FP_TEST | 27 | config HAVE_FUNCTION_GRAPH_FP_TEST |
28 | bool | 28 | bool |
@@ -34,17 +34,17 @@ config HAVE_FUNCTION_GRAPH_FP_TEST | |||
34 | config HAVE_FUNCTION_TRACE_MCOUNT_TEST | 34 | config HAVE_FUNCTION_TRACE_MCOUNT_TEST |
35 | bool | 35 | bool |
36 | help | 36 | help |
37 | See Documentation/trace/ftrace-implementation.txt | 37 | See Documentation/trace/ftrace-design.txt |
38 | 38 | ||
39 | config HAVE_DYNAMIC_FTRACE | 39 | config HAVE_DYNAMIC_FTRACE |
40 | bool | 40 | bool |
41 | help | 41 | help |
42 | See Documentation/trace/ftrace-implementation.txt | 42 | See Documentation/trace/ftrace-design.txt |
43 | 43 | ||
44 | config HAVE_FTRACE_MCOUNT_RECORD | 44 | config HAVE_FTRACE_MCOUNT_RECORD |
45 | bool | 45 | bool |
46 | help | 46 | help |
47 | See Documentation/trace/ftrace-implementation.txt | 47 | See Documentation/trace/ftrace-design.txt |
48 | 48 | ||
49 | config HAVE_HW_BRANCH_TRACER | 49 | config HAVE_HW_BRANCH_TRACER |
50 | bool | 50 | bool |
@@ -52,7 +52,7 @@ config HAVE_HW_BRANCH_TRACER | |||
52 | config HAVE_SYSCALL_TRACEPOINTS | 52 | config HAVE_SYSCALL_TRACEPOINTS |
53 | bool | 53 | bool |
54 | help | 54 | help |
55 | See Documentation/trace/ftrace-implementation.txt | 55 | See Documentation/trace/ftrace-design.txt |
56 | 56 | ||
57 | config TRACER_MAX_TRACE | 57 | config TRACER_MAX_TRACE |
58 | bool | 58 | bool |
@@ -83,7 +83,7 @@ config RING_BUFFER_ALLOW_SWAP | |||
83 | # This allows those options to appear when no other tracer is selected. But the | 83 | # This allows those options to appear when no other tracer is selected. But the |
84 | # options do not appear when something else selects it. We need the two options | 84 | # options do not appear when something else selects it. We need the two options |
85 | # GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the | 85 | # GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the |
86 | # hidding of the automatic options. | 86 | # hiding of the automatic options. |
87 | 87 | ||
88 | config TRACING | 88 | config TRACING |
89 | bool | 89 | bool |
@@ -119,7 +119,7 @@ menuconfig FTRACE | |||
119 | bool "Tracers" | 119 | bool "Tracers" |
120 | default y if DEBUG_KERNEL | 120 | default y if DEBUG_KERNEL |
121 | help | 121 | help |
122 | Enable the kernel tracing infrastructure. | 122 | Enable the kernel tracing infrastructure. |
123 | 123 | ||
124 | if FTRACE | 124 | if FTRACE |
125 | 125 | ||
@@ -133,7 +133,7 @@ config FUNCTION_TRACER | |||
133 | help | 133 | help |
134 | Enable the kernel to trace every kernel function. This is done | 134 | Enable the kernel to trace every kernel function. This is done |
135 | by using a compiler feature to insert a small, 5-byte No-Operation | 135 | by using a compiler feature to insert a small, 5-byte No-Operation |
136 | instruction to the beginning of every kernel function, which NOP | 136 | instruction at the beginning of every kernel function, which NOP |
137 | sequence is then dynamically patched into a tracer call when | 137 | sequence is then dynamically patched into a tracer call when |
138 | tracing is enabled by the administrator. If it's runtime disabled | 138 | tracing is enabled by the administrator. If it's runtime disabled |
139 | (the bootup default), then the overhead of the instructions is very | 139 | (the bootup default), then the overhead of the instructions is very |
@@ -150,7 +150,7 @@ config FUNCTION_GRAPH_TRACER | |||
150 | and its entry. | 150 | and its entry. |
151 | Its first purpose is to trace the duration of functions and | 151 | Its first purpose is to trace the duration of functions and |
152 | draw a call graph for each thread with some information like | 152 | draw a call graph for each thread with some information like |
153 | the return value. This is done by setting the current return | 153 | the return value. This is done by setting the current return |
154 | address on the current task structure into a stack of calls. | 154 | address on the current task structure into a stack of calls. |
155 | 155 | ||
156 | 156 | ||
@@ -173,7 +173,7 @@ config IRQSOFF_TRACER | |||
173 | 173 | ||
174 | echo 0 > /sys/kernel/debug/tracing/tracing_max_latency | 174 | echo 0 > /sys/kernel/debug/tracing/tracing_max_latency |
175 | 175 | ||
176 | (Note that kernel size and overhead increases with this option | 176 | (Note that kernel size and overhead increase with this option |
177 | enabled. This option and the preempt-off timing option can be | 177 | enabled. This option and the preempt-off timing option can be |
178 | used together or separately.) | 178 | used together or separately.) |
179 | 179 | ||
@@ -186,7 +186,7 @@ config PREEMPT_TRACER | |||
186 | select TRACER_MAX_TRACE | 186 | select TRACER_MAX_TRACE |
187 | select RING_BUFFER_ALLOW_SWAP | 187 | select RING_BUFFER_ALLOW_SWAP |
188 | help | 188 | help |
189 | This option measures the time spent in preemption off critical | 189 | This option measures the time spent in preemption-off critical |
190 | sections, with microsecond accuracy. | 190 | sections, with microsecond accuracy. |
191 | 191 | ||
192 | The default measurement method is a maximum search, which is | 192 | The default measurement method is a maximum search, which is |
@@ -195,7 +195,7 @@ config PREEMPT_TRACER | |||
195 | 195 | ||
196 | echo 0 > /sys/kernel/debug/tracing/tracing_max_latency | 196 | echo 0 > /sys/kernel/debug/tracing/tracing_max_latency |
197 | 197 | ||
198 | (Note that kernel size and overhead increases with this option | 198 | (Note that kernel size and overhead increase with this option |
199 | enabled. This option and the irqs-off timing option can be | 199 | enabled. This option and the irqs-off timing option can be |
200 | used together or separately.) | 200 | used together or separately.) |
201 | 201 | ||
@@ -222,7 +222,7 @@ config ENABLE_DEFAULT_TRACERS | |||
222 | depends on !GENERIC_TRACER | 222 | depends on !GENERIC_TRACER |
223 | select TRACING | 223 | select TRACING |
224 | help | 224 | help |
225 | This tracer hooks to various trace points in the kernel | 225 | This tracer hooks to various trace points in the kernel, |
226 | allowing the user to pick and choose which trace point they | 226 | allowing the user to pick and choose which trace point they |
227 | want to trace. It also includes the sched_switch tracer plugin. | 227 | want to trace. It also includes the sched_switch tracer plugin. |
228 | 228 | ||
@@ -265,19 +265,19 @@ choice | |||
265 | The likely/unlikely profiler only looks at the conditions that | 265 | The likely/unlikely profiler only looks at the conditions that |
266 | are annotated with a likely or unlikely macro. | 266 | are annotated with a likely or unlikely macro. |
267 | 267 | ||
268 | The "all branch" profiler will profile every if statement in the | 268 | The "all branch" profiler will profile every if-statement in the |
269 | kernel. This profiler will also enable the likely/unlikely | 269 | kernel. This profiler will also enable the likely/unlikely |
270 | profiler as well. | 270 | profiler. |
271 | 271 | ||
272 | Either of the above profilers add a bit of overhead to the system. | 272 | Either of the above profilers adds a bit of overhead to the system. |
273 | If unsure choose "No branch profiling". | 273 | If unsure, choose "No branch profiling". |
274 | 274 | ||
275 | config BRANCH_PROFILE_NONE | 275 | config BRANCH_PROFILE_NONE |
276 | bool "No branch profiling" | 276 | bool "No branch profiling" |
277 | help | 277 | help |
278 | No branch profiling. Branch profiling adds a bit of overhead. | 278 | No branch profiling. Branch profiling adds a bit of overhead. |
279 | Only enable it if you want to analyse the branching behavior. | 279 | Only enable it if you want to analyse the branching behavior. |
280 | Otherwise keep it disabled. | 280 | Otherwise keep it disabled. |
281 | 281 | ||
282 | config PROFILE_ANNOTATED_BRANCHES | 282 | config PROFILE_ANNOTATED_BRANCHES |
283 | bool "Trace likely/unlikely profiler" | 283 | bool "Trace likely/unlikely profiler" |
@@ -288,7 +288,7 @@ config PROFILE_ANNOTATED_BRANCHES | |||
288 | 288 | ||
289 | /sys/kernel/debug/tracing/profile_annotated_branch | 289 | /sys/kernel/debug/tracing/profile_annotated_branch |
290 | 290 | ||
291 | Note: this will add a significant overhead, only turn this | 291 | Note: this will add a significant overhead; only turn this |
292 | on if you need to profile the system's use of these macros. | 292 | on if you need to profile the system's use of these macros. |
293 | 293 | ||
294 | config PROFILE_ALL_BRANCHES | 294 | config PROFILE_ALL_BRANCHES |
@@ -305,7 +305,7 @@ config PROFILE_ALL_BRANCHES | |||
305 | 305 | ||
306 | This configuration, when enabled, will impose a great overhead | 306 | This configuration, when enabled, will impose a great overhead |
307 | on the system. This should only be enabled when the system | 307 | on the system. This should only be enabled when the system |
308 | is to be analyzed | 308 | is to be analyzed in much detail. |
309 | endchoice | 309 | endchoice |
310 | 310 | ||
311 | config TRACING_BRANCHES | 311 | config TRACING_BRANCHES |
@@ -335,7 +335,7 @@ config POWER_TRACER | |||
335 | depends on X86 | 335 | depends on X86 |
336 | select GENERIC_TRACER | 336 | select GENERIC_TRACER |
337 | help | 337 | help |
338 | This tracer helps developers to analyze and optimize the kernels | 338 | This tracer helps developers to analyze and optimize the kernel's |
339 | power management decisions, specifically the C-state and P-state | 339 | power management decisions, specifically the C-state and P-state |
340 | behavior. | 340 | behavior. |
341 | 341 | ||
@@ -391,14 +391,14 @@ config HW_BRANCH_TRACER | |||
391 | select GENERIC_TRACER | 391 | select GENERIC_TRACER |
392 | help | 392 | help |
393 | This tracer records all branches on the system in a circular | 393 | This tracer records all branches on the system in a circular |
394 | buffer giving access to the last N branches for each cpu. | 394 | buffer, giving access to the last N branches for each cpu. |
395 | 395 | ||
396 | config KMEMTRACE | 396 | config KMEMTRACE |
397 | bool "Trace SLAB allocations" | 397 | bool "Trace SLAB allocations" |
398 | select GENERIC_TRACER | 398 | select GENERIC_TRACER |
399 | help | 399 | help |
400 | kmemtrace provides tracing for slab allocator functions, such as | 400 | kmemtrace provides tracing for slab allocator functions, such as |
401 | kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected | 401 | kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected |
402 | data is then fed to the userspace application in order to analyse | 402 | data is then fed to the userspace application in order to analyse |
403 | allocation hotspots, internal fragmentation and so on, making it | 403 | allocation hotspots, internal fragmentation and so on, making it |
404 | possible to see how well an allocator performs, as well as debug | 404 | possible to see how well an allocator performs, as well as debug |
@@ -417,15 +417,15 @@ config WORKQUEUE_TRACER | |||
417 | bool "Trace workqueues" | 417 | bool "Trace workqueues" |
418 | select GENERIC_TRACER | 418 | select GENERIC_TRACER |
419 | help | 419 | help |
420 | The workqueue tracer provides some statistical informations | 420 | The workqueue tracer provides some statistical information |
421 | about each cpu workqueue thread such as the number of the | 421 | about each cpu workqueue thread such as the number of the |
422 | works inserted and executed since their creation. It can help | 422 | works inserted and executed since their creation. It can help |
423 | to evaluate the amount of work each of them have to perform. | 423 | to evaluate the amount of work each of them has to perform. |
424 | For example it can help a developer to decide whether he should | 424 | For example it can help a developer to decide whether he should |
425 | choose a per cpu workqueue instead of a singlethreaded one. | 425 | choose a per-cpu workqueue instead of a singlethreaded one. |
426 | 426 | ||
427 | config BLK_DEV_IO_TRACE | 427 | config BLK_DEV_IO_TRACE |
428 | bool "Support for tracing block io actions" | 428 | bool "Support for tracing block IO actions" |
429 | depends on SYSFS | 429 | depends on SYSFS |
430 | depends on BLOCK | 430 | depends on BLOCK |
431 | select RELAY | 431 | select RELAY |
@@ -456,15 +456,15 @@ config KPROBE_EVENT | |||
456 | select TRACING | 456 | select TRACING |
457 | default y | 457 | default y |
458 | help | 458 | help |
459 | This allows the user to add tracing events (similar to tracepoints) on the fly | 459 | This allows the user to add tracing events (similar to tracepoints) |
460 | via the ftrace interface. See Documentation/trace/kprobetrace.txt | 460 | on the fly via the ftrace interface. See |
461 | for more details. | 461 | Documentation/trace/kprobetrace.txt for more details. |
462 | 462 | ||
463 | Those events can be inserted wherever kprobes can probe, and record | 463 | Those events can be inserted wherever kprobes can probe, and record |
464 | various register and memory values. | 464 | various register and memory values. |
465 | 465 | ||
466 | This option is also required by perf-probe subcommand of perf tools. If | 466 | This option is also required by perf-probe subcommand of perf tools. |
467 | you want to use perf tools, this option is strongly recommended. | 467 | If you want to use perf tools, this option is strongly recommended. |
468 | 468 | ||
469 | config DYNAMIC_FTRACE | 469 | config DYNAMIC_FTRACE |
470 | bool "enable/disable ftrace tracepoints dynamically" | 470 | bool "enable/disable ftrace tracepoints dynamically" |
@@ -472,32 +472,32 @@ config DYNAMIC_FTRACE | |||
472 | depends on HAVE_DYNAMIC_FTRACE | 472 | depends on HAVE_DYNAMIC_FTRACE |
473 | default y | 473 | default y |
474 | help | 474 | help |
475 | This option will modify all the calls to ftrace dynamically | 475 | This option will modify all the calls to ftrace dynamically |
476 | (will patch them out of the binary image and replaces them | 476 | (will patch them out of the binary image and replace them |
477 | with a No-Op instruction) as they are called. A table is | 477 | with a No-Op instruction) as they are called. A table is |
478 | created to dynamically enable them again. | 478 | created to dynamically enable them again. |
479 | 479 | ||
480 | This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise | 480 | This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but |
481 | has native performance as long as no tracing is active. | 481 | otherwise has native performance as long as no tracing is active. |
482 | 482 | ||
483 | The changes to the code are done by a kernel thread that | 483 | The changes to the code are done by a kernel thread that |
484 | wakes up once a second and checks to see if any ftrace calls | 484 | wakes up once a second and checks to see if any ftrace calls |
485 | were made. If so, it runs stop_machine (stops all CPUS) | 485 | were made. If so, it runs stop_machine (stops all CPUS) |
486 | and modifies the code to jump over the call to ftrace. | 486 | and modifies the code to jump over the call to ftrace. |
487 | 487 | ||
488 | config FUNCTION_PROFILER | 488 | config FUNCTION_PROFILER |
489 | bool "Kernel function profiler" | 489 | bool "Kernel function profiler" |
490 | depends on FUNCTION_TRACER | 490 | depends on FUNCTION_TRACER |
491 | default n | 491 | default n |
492 | help | 492 | help |
493 | This option enables the kernel function profiler. A file is created | 493 | This option enables the kernel function profiler. A file is created |
494 | in debugfs called function_profile_enabled which defaults to zero. | 494 | in debugfs called function_profile_enabled which defaults to zero. |
495 | When a 1 is echoed into this file profiling begins, and when a | 495 | When a 1 is echoed into this file profiling begins, and when a |
496 | zero is entered, profiling stops. A file in the trace_stats | 496 | zero is entered, profiling stops. A "functions" file is created in |
497 | directory called functions, that show the list of functions that | 497 | the trace_stats directory; this file shows the list of functions that |
498 | have been hit and their counters. | 498 | have been hit and their counters. |
499 | 499 | ||
500 | If in doubt, say N | 500 | If in doubt, say N. |
501 | 501 | ||
502 | config FTRACE_MCOUNT_RECORD | 502 | config FTRACE_MCOUNT_RECORD |
503 | def_bool y | 503 | def_bool y |
@@ -556,8 +556,8 @@ config RING_BUFFER_BENCHMARK | |||
556 | tristate "Ring buffer benchmark stress tester" | 556 | tristate "Ring buffer benchmark stress tester" |
557 | depends on RING_BUFFER | 557 | depends on RING_BUFFER |
558 | help | 558 | help |
559 | This option creates a test to stress the ring buffer and bench mark it. | 559 | This option creates a test to stress the ring buffer and benchmark it. |
560 | It creates its own ring buffer such that it will not interfer with | 560 | It creates its own ring buffer such that it will not interfere with |
561 | any other users of the ring buffer (such as ftrace). It then creates | 561 | any other users of the ring buffer (such as ftrace). It then creates |
562 | a producer and consumer that will run for 10 seconds and sleep for | 562 | a producer and consumer that will run for 10 seconds and sleep for |
563 | 10 seconds. Each interval it will print out the number of events | 563 | 10 seconds. Each interval it will print out the number of events |
@@ -566,7 +566,7 @@ config RING_BUFFER_BENCHMARK | |||
566 | It does not disable interrupts or raise its priority, so it may be | 566 | It does not disable interrupts or raise its priority, so it may be |
567 | affected by processes that are running. | 567 | affected by processes that are running. |
568 | 568 | ||
569 | If unsure, say N | 569 | If unsure, say N. |
570 | 570 | ||
571 | endif # FTRACE | 571 | endif # FTRACE |
572 | 572 | ||
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 06ba26747d7e..0df1b0f2cb9e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -12,7 +12,7 @@ | |||
12 | * Copyright (C) 2004 William Lee Irwin III | 12 | * Copyright (C) 2004 William Lee Irwin III |
13 | */ | 13 | */ |
14 | #include <linux/ring_buffer.h> | 14 | #include <linux/ring_buffer.h> |
15 | #include <linux/utsrelease.h> | 15 | #include <generated/utsrelease.h> |
16 | #include <linux/stacktrace.h> | 16 | #include <linux/stacktrace.h> |
17 | #include <linux/writeback.h> | 17 | #include <linux/writeback.h> |
18 | #include <linux/kallsyms.h> | 18 | #include <linux/kallsyms.h> |
@@ -3949,7 +3949,7 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
3949 | if (!!(topt->flags->val & topt->opt->bit) != val) { | 3949 | if (!!(topt->flags->val & topt->opt->bit) != val) { |
3950 | mutex_lock(&trace_types_lock); | 3950 | mutex_lock(&trace_types_lock); |
3951 | ret = __set_tracer_option(current_trace, topt->flags, | 3951 | ret = __set_tracer_option(current_trace, topt->flags, |
3952 | topt->opt, val); | 3952 | topt->opt, !val); |
3953 | mutex_unlock(&trace_types_lock); | 3953 | mutex_unlock(&trace_types_lock); |
3954 | if (ret) | 3954 | if (ret) |
3955 | return ret; | 3955 | return ret; |
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 458e5bfe26d0..d4fa5dc1ee4e 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c | |||
@@ -158,7 +158,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \ | |||
158 | BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ | 158 | BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ |
159 | ret = trace_define_field(event_call, #type "[" #len "]", #item, \ | 159 | ret = trace_define_field(event_call, #type "[" #len "]", #item, \ |
160 | offsetof(typeof(field), item), \ | 160 | offsetof(typeof(field), item), \ |
161 | sizeof(field.item), 0, FILTER_OTHER); \ | 161 | sizeof(field.item), \ |
162 | is_signed_type(type), FILTER_OTHER); \ | ||
162 | if (ret) \ | 163 | if (ret) \ |
163 | return ret; | 164 | return ret; |
164 | 165 | ||
@@ -168,8 +169,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \ | |||
168 | ret = trace_define_field(event_call, #type "[" #len "]", #item, \ | 169 | ret = trace_define_field(event_call, #type "[" #len "]", #item, \ |
169 | offsetof(typeof(field), \ | 170 | offsetof(typeof(field), \ |
170 | container.item), \ | 171 | container.item), \ |
171 | sizeof(field.container.item), 0, \ | 172 | sizeof(field.container.item), \ |
172 | FILTER_OTHER); \ | 173 | is_signed_type(type), FILTER_OTHER); \ |
173 | if (ret) \ | 174 | if (ret) \ |
174 | return ret; | 175 | return ret; |
175 | 176 | ||
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 75d75dec226a..47f54ab57b68 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -1201,10 +1201,11 @@ static int __probe_event_show_format(struct trace_seq *s, | |||
1201 | #undef SHOW_FIELD | 1201 | #undef SHOW_FIELD |
1202 | #define SHOW_FIELD(type, item, name) \ | 1202 | #define SHOW_FIELD(type, item, name) \ |
1203 | do { \ | 1203 | do { \ |
1204 | ret = trace_seq_printf(s, "\tfield: " #type " %s;\t" \ | 1204 | ret = trace_seq_printf(s, "\tfield:" #type " %s;\t" \ |
1205 | "offset:%u;\tsize:%u;\n", name, \ | 1205 | "offset:%u;\tsize:%u;\tsigned:%d;\n", name,\ |
1206 | (unsigned int)offsetof(typeof(field), item),\ | 1206 | (unsigned int)offsetof(typeof(field), item),\ |
1207 | (unsigned int)sizeof(type)); \ | 1207 | (unsigned int)sizeof(type), \ |
1208 | is_signed_type(type)); \ | ||
1208 | if (!ret) \ | 1209 | if (!ret) \ |
1209 | return 0; \ | 1210 | return 0; \ |
1210 | } while (0) | 1211 | } while (0) |
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index faf37fa4408c..94103cdcf9d8 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c | |||
@@ -26,12 +26,13 @@ | |||
26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
27 | 27 | ||
28 | #include "trace_output.h" | 28 | #include "trace_output.h" |
29 | #include "trace_stat.h" | ||
30 | #include "trace.h" | 29 | #include "trace.h" |
31 | 30 | ||
32 | #include <linux/hw_breakpoint.h> | 31 | #include <linux/hw_breakpoint.h> |
33 | #include <asm/hw_breakpoint.h> | 32 | #include <asm/hw_breakpoint.h> |
34 | 33 | ||
34 | #include <asm/atomic.h> | ||
35 | |||
35 | /* | 36 | /* |
36 | * For now, let us restrict the no. of symbols traced simultaneously to number | 37 | * For now, let us restrict the no. of symbols traced simultaneously to number |
37 | * of available hardware breakpoint registers. | 38 | * of available hardware breakpoint registers. |
@@ -44,7 +45,7 @@ struct trace_ksym { | |||
44 | struct perf_event **ksym_hbp; | 45 | struct perf_event **ksym_hbp; |
45 | struct perf_event_attr attr; | 46 | struct perf_event_attr attr; |
46 | #ifdef CONFIG_PROFILE_KSYM_TRACER | 47 | #ifdef CONFIG_PROFILE_KSYM_TRACER |
47 | unsigned long counter; | 48 | atomic64_t counter; |
48 | #endif | 49 | #endif |
49 | struct hlist_node ksym_hlist; | 50 | struct hlist_node ksym_hlist; |
50 | }; | 51 | }; |
@@ -69,9 +70,8 @@ void ksym_collect_stats(unsigned long hbp_hit_addr) | |||
69 | 70 | ||
70 | rcu_read_lock(); | 71 | rcu_read_lock(); |
71 | hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { | 72 | hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { |
72 | if ((entry->attr.bp_addr == hbp_hit_addr) && | 73 | if (entry->attr.bp_addr == hbp_hit_addr) { |
73 | (entry->counter <= MAX_UL_INT)) { | 74 | atomic64_inc(&entry->counter); |
74 | entry->counter++; | ||
75 | break; | 75 | break; |
76 | } | 76 | } |
77 | } | 77 | } |
@@ -197,7 +197,6 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) | |||
197 | entry->attr.bp_addr = addr; | 197 | entry->attr.bp_addr = addr; |
198 | entry->attr.bp_len = HW_BREAKPOINT_LEN_4; | 198 | entry->attr.bp_len = HW_BREAKPOINT_LEN_4; |
199 | 199 | ||
200 | ret = -EAGAIN; | ||
201 | entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr, | 200 | entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr, |
202 | ksym_hbp_handler); | 201 | ksym_hbp_handler); |
203 | 202 | ||
@@ -300,8 +299,8 @@ static ssize_t ksym_trace_filter_write(struct file *file, | |||
300 | * 2: echo 0 > ksym_trace_filter | 299 | * 2: echo 0 > ksym_trace_filter |
301 | * 3: echo "*:---" > ksym_trace_filter | 300 | * 3: echo "*:---" > ksym_trace_filter |
302 | */ | 301 | */ |
303 | if (!buf[0] || !strcmp(buf, "0") || | 302 | if (!input_string[0] || !strcmp(input_string, "0") || |
304 | !strcmp(buf, "*:---")) { | 303 | !strcmp(input_string, "*:---")) { |
305 | __ksym_trace_reset(); | 304 | __ksym_trace_reset(); |
306 | ret = 0; | 305 | ret = 0; |
307 | goto out; | 306 | goto out; |
@@ -444,102 +443,77 @@ struct tracer ksym_tracer __read_mostly = | |||
444 | .print_line = ksym_trace_output | 443 | .print_line = ksym_trace_output |
445 | }; | 444 | }; |
446 | 445 | ||
447 | __init static int init_ksym_trace(void) | ||
448 | { | ||
449 | struct dentry *d_tracer; | ||
450 | struct dentry *entry; | ||
451 | |||
452 | d_tracer = tracing_init_dentry(); | ||
453 | ksym_filter_entry_count = 0; | ||
454 | |||
455 | entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer, | ||
456 | NULL, &ksym_tracing_fops); | ||
457 | if (!entry) | ||
458 | pr_warning("Could not create debugfs " | ||
459 | "'ksym_trace_filter' file\n"); | ||
460 | |||
461 | return register_tracer(&ksym_tracer); | ||
462 | } | ||
463 | device_initcall(init_ksym_trace); | ||
464 | |||
465 | |||
466 | #ifdef CONFIG_PROFILE_KSYM_TRACER | 446 | #ifdef CONFIG_PROFILE_KSYM_TRACER |
467 | static int ksym_tracer_stat_headers(struct seq_file *m) | 447 | static int ksym_profile_show(struct seq_file *m, void *v) |
468 | { | 448 | { |
449 | struct hlist_node *node; | ||
450 | struct trace_ksym *entry; | ||
451 | int access_type = 0; | ||
452 | char fn_name[KSYM_NAME_LEN]; | ||
453 | |||
469 | seq_puts(m, " Access Type "); | 454 | seq_puts(m, " Access Type "); |
470 | seq_puts(m, " Symbol Counter\n"); | 455 | seq_puts(m, " Symbol Counter\n"); |
471 | seq_puts(m, " ----------- "); | 456 | seq_puts(m, " ----------- "); |
472 | seq_puts(m, " ------ -------\n"); | 457 | seq_puts(m, " ------ -------\n"); |
473 | return 0; | ||
474 | } | ||
475 | 458 | ||
476 | static int ksym_tracer_stat_show(struct seq_file *m, void *v) | 459 | rcu_read_lock(); |
477 | { | 460 | hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { |
478 | struct hlist_node *stat = v; | ||
479 | struct trace_ksym *entry; | ||
480 | int access_type = 0; | ||
481 | char fn_name[KSYM_NAME_LEN]; | ||
482 | 461 | ||
483 | entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); | 462 | access_type = entry->attr.bp_type; |
484 | 463 | ||
485 | access_type = entry->attr.bp_type; | 464 | switch (access_type) { |
465 | case HW_BREAKPOINT_R: | ||
466 | seq_puts(m, " R "); | ||
467 | break; | ||
468 | case HW_BREAKPOINT_W: | ||
469 | seq_puts(m, " W "); | ||
470 | break; | ||
471 | case HW_BREAKPOINT_R | HW_BREAKPOINT_W: | ||
472 | seq_puts(m, " RW "); | ||
473 | break; | ||
474 | default: | ||
475 | seq_puts(m, " NA "); | ||
476 | } | ||
486 | 477 | ||
487 | switch (access_type) { | 478 | if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0) |
488 | case HW_BREAKPOINT_R: | 479 | seq_printf(m, " %-36s", fn_name); |
489 | seq_puts(m, " R "); | 480 | else |
490 | break; | 481 | seq_printf(m, " %-36s", "<NA>"); |
491 | case HW_BREAKPOINT_W: | 482 | seq_printf(m, " %15llu\n", |
492 | seq_puts(m, " W "); | 483 | (unsigned long long)atomic64_read(&entry->counter)); |
493 | break; | ||
494 | case HW_BREAKPOINT_R | HW_BREAKPOINT_W: | ||
495 | seq_puts(m, " RW "); | ||
496 | break; | ||
497 | default: | ||
498 | seq_puts(m, " NA "); | ||
499 | } | 484 | } |
500 | 485 | rcu_read_unlock(); | |
501 | if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0) | ||
502 | seq_printf(m, " %-36s", fn_name); | ||
503 | else | ||
504 | seq_printf(m, " %-36s", "<NA>"); | ||
505 | seq_printf(m, " %15lu\n", entry->counter); | ||
506 | 486 | ||
507 | return 0; | 487 | return 0; |
508 | } | 488 | } |
509 | 489 | ||
510 | static void *ksym_tracer_stat_start(struct tracer_stat *trace) | 490 | static int ksym_profile_open(struct inode *node, struct file *file) |
511 | { | 491 | { |
512 | return ksym_filter_head.first; | 492 | return single_open(file, ksym_profile_show, NULL); |
513 | } | ||
514 | |||
515 | static void * | ||
516 | ksym_tracer_stat_next(void *v, int idx) | ||
517 | { | ||
518 | struct hlist_node *stat = v; | ||
519 | |||
520 | return stat->next; | ||
521 | } | 493 | } |
522 | 494 | ||
523 | static struct tracer_stat ksym_tracer_stats = { | 495 | static const struct file_operations ksym_profile_fops = { |
524 | .name = "ksym_tracer", | 496 | .open = ksym_profile_open, |
525 | .stat_start = ksym_tracer_stat_start, | 497 | .read = seq_read, |
526 | .stat_next = ksym_tracer_stat_next, | 498 | .llseek = seq_lseek, |
527 | .stat_headers = ksym_tracer_stat_headers, | 499 | .release = single_release, |
528 | .stat_show = ksym_tracer_stat_show | ||
529 | }; | 500 | }; |
501 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ | ||
530 | 502 | ||
531 | __init static int ksym_tracer_stat_init(void) | 503 | __init static int init_ksym_trace(void) |
532 | { | 504 | { |
533 | int ret; | 505 | struct dentry *d_tracer; |
534 | 506 | ||
535 | ret = register_stat_tracer(&ksym_tracer_stats); | 507 | d_tracer = tracing_init_dentry(); |
536 | if (ret) { | ||
537 | printk(KERN_WARNING "Warning: could not register " | ||
538 | "ksym tracer stats\n"); | ||
539 | return 1; | ||
540 | } | ||
541 | 508 | ||
542 | return 0; | 509 | trace_create_file("ksym_trace_filter", 0644, d_tracer, |
510 | NULL, &ksym_tracing_fops); | ||
511 | |||
512 | #ifdef CONFIG_PROFILE_KSYM_TRACER | ||
513 | trace_create_file("ksym_profile", 0444, d_tracer, | ||
514 | NULL, &ksym_profile_fops); | ||
515 | #endif | ||
516 | |||
517 | return register_tracer(&ksym_tracer); | ||
543 | } | 518 | } |
544 | fs_initcall(ksym_tracer_stat_init); | 519 | device_initcall(init_ksym_trace); |
545 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ | ||