aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-11-19 03:44:37 -0500
committerIngo Molnar <mingo@elte.hu>2008-11-19 03:44:37 -0500
commit3ac3ba0b396fd99550e08034b0e4c27fdf39c252 (patch)
treef9f69fac41d66540a37a33808714d055d702328f /kernel
parent934352f214b3251eb0793c1209d346595a661d80 (diff)
parent7f0f598a0069d1ab072375965a4b69137233169c (diff)
Merge branch 'linus' into sched/core
Conflicts: kernel/Makefile
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/audit_tree.c91
-rw-r--r--kernel/auditfilter.c14
-rw-r--r--kernel/cgroup_freezer.c19
-rw-r--r--kernel/cpu.c3
-rw-r--r--kernel/cpuset.c12
-rw-r--r--kernel/exit.c14
-rw-r--r--kernel/fork.c11
-rw-r--r--kernel/hrtimer.c26
-rw-r--r--kernel/kprobes.c23
-rw-r--r--kernel/posix-cpu-timers.c7
-rw-r--r--kernel/power/main.c2
-rw-r--r--kernel/profile.c2
-rw-r--r--kernel/relay.c9
-rw-r--r--kernel/sched.c28
-rw-r--r--kernel/sched_debug.c46
-rw-r--r--kernel/sched_fair.c17
-rw-r--r--kernel/sched_stats.h15
-rw-r--r--kernel/softirq.c7
-rw-r--r--kernel/stop_machine.c5
-rw-r--r--kernel/time/tick-sched.c4
-rw-r--r--kernel/trace/ftrace.c34
-rw-r--r--kernel/trace/ring_buffer.c117
-rw-r--r--kernel/trace/trace.c19
-rw-r--r--kernel/workqueue.c45
25 files changed, 422 insertions, 150 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 46e67a398495..6a212b842d86 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -11,8 +11,6 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ 11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o 12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o
13 13
14CFLAGS_REMOVE_sched.o = -mno-spe
15
16ifdef CONFIG_FUNCTION_TRACER 14ifdef CONFIG_FUNCTION_TRACER
17# Do not trace debug files and internal ftrace files 15# Do not trace debug files and internal ftrace files
18CFLAGS_REMOVE_lockdep.o = -pg 16CFLAGS_REMOVE_lockdep.o = -pg
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 8ba0e0d934f2..8b509441f49a 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -24,6 +24,7 @@ struct audit_chunk {
24 struct list_head trees; /* with root here */ 24 struct list_head trees; /* with root here */
25 int dead; 25 int dead;
26 int count; 26 int count;
27 atomic_long_t refs;
27 struct rcu_head head; 28 struct rcu_head head;
28 struct node { 29 struct node {
29 struct list_head list; 30 struct list_head list;
@@ -56,7 +57,8 @@ static LIST_HEAD(prune_list);
56 * tree is refcounted; one reference for "some rules on rules_list refer to 57 * tree is refcounted; one reference for "some rules on rules_list refer to
57 * it", one for each chunk with pointer to it. 58 * it", one for each chunk with pointer to it.
58 * 59 *
59 * chunk is refcounted by embedded inotify_watch. 60 * chunk is refcounted by embedded inotify_watch + .refs (non-zero refcount
61 * of watch contributes 1 to .refs).
60 * 62 *
61 * node.index allows to get from node.list to containing chunk. 63 * node.index allows to get from node.list to containing chunk.
62 * MSB of that sucker is stolen to mark taggings that we might have to 64 * MSB of that sucker is stolen to mark taggings that we might have to
@@ -121,6 +123,7 @@ static struct audit_chunk *alloc_chunk(int count)
121 INIT_LIST_HEAD(&chunk->hash); 123 INIT_LIST_HEAD(&chunk->hash);
122 INIT_LIST_HEAD(&chunk->trees); 124 INIT_LIST_HEAD(&chunk->trees);
123 chunk->count = count; 125 chunk->count = count;
126 atomic_long_set(&chunk->refs, 1);
124 for (i = 0; i < count; i++) { 127 for (i = 0; i < count; i++) {
125 INIT_LIST_HEAD(&chunk->owners[i].list); 128 INIT_LIST_HEAD(&chunk->owners[i].list);
126 chunk->owners[i].index = i; 129 chunk->owners[i].index = i;
@@ -129,9 +132,8 @@ static struct audit_chunk *alloc_chunk(int count)
129 return chunk; 132 return chunk;
130} 133}
131 134
132static void __free_chunk(struct rcu_head *rcu) 135static void free_chunk(struct audit_chunk *chunk)
133{ 136{
134 struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
135 int i; 137 int i;
136 138
137 for (i = 0; i < chunk->count; i++) { 139 for (i = 0; i < chunk->count; i++) {
@@ -141,14 +143,16 @@ static void __free_chunk(struct rcu_head *rcu)
141 kfree(chunk); 143 kfree(chunk);
142} 144}
143 145
144static inline void free_chunk(struct audit_chunk *chunk) 146void audit_put_chunk(struct audit_chunk *chunk)
145{ 147{
146 call_rcu(&chunk->head, __free_chunk); 148 if (atomic_long_dec_and_test(&chunk->refs))
149 free_chunk(chunk);
147} 150}
148 151
149void audit_put_chunk(struct audit_chunk *chunk) 152static void __put_chunk(struct rcu_head *rcu)
150{ 153{
151 put_inotify_watch(&chunk->watch); 154 struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
155 audit_put_chunk(chunk);
152} 156}
153 157
154enum {HASH_SIZE = 128}; 158enum {HASH_SIZE = 128};
@@ -176,7 +180,7 @@ struct audit_chunk *audit_tree_lookup(const struct inode *inode)
176 180
177 list_for_each_entry_rcu(p, list, hash) { 181 list_for_each_entry_rcu(p, list, hash) {
178 if (p->watch.inode == inode) { 182 if (p->watch.inode == inode) {
179 get_inotify_watch(&p->watch); 183 atomic_long_inc(&p->refs);
180 return p; 184 return p;
181 } 185 }
182 } 186 }
@@ -194,17 +198,49 @@ int audit_tree_match(struct audit_chunk *chunk, struct audit_tree *tree)
194 198
195/* tagging and untagging inodes with trees */ 199/* tagging and untagging inodes with trees */
196 200
197static void untag_chunk(struct audit_chunk *chunk, struct node *p) 201static struct audit_chunk *find_chunk(struct node *p)
202{
203 int index = p->index & ~(1U<<31);
204 p -= index;
205 return container_of(p, struct audit_chunk, owners[0]);
206}
207
208static void untag_chunk(struct node *p)
198{ 209{
210 struct audit_chunk *chunk = find_chunk(p);
199 struct audit_chunk *new; 211 struct audit_chunk *new;
200 struct audit_tree *owner; 212 struct audit_tree *owner;
201 int size = chunk->count - 1; 213 int size = chunk->count - 1;
202 int i, j; 214 int i, j;
203 215
216 if (!pin_inotify_watch(&chunk->watch)) {
217 /*
218 * Filesystem is shutting down; all watches are getting
219 * evicted, just take it off the node list for this
220 * tree and let the eviction logics take care of the
221 * rest.
222 */
223 owner = p->owner;
224 if (owner->root == chunk) {
225 list_del_init(&owner->same_root);
226 owner->root = NULL;
227 }
228 list_del_init(&p->list);
229 p->owner = NULL;
230 put_tree(owner);
231 return;
232 }
233
234 spin_unlock(&hash_lock);
235
236 /*
237 * pin_inotify_watch() succeeded, so the watch won't go away
238 * from under us.
239 */
204 mutex_lock(&chunk->watch.inode->inotify_mutex); 240 mutex_lock(&chunk->watch.inode->inotify_mutex);
205 if (chunk->dead) { 241 if (chunk->dead) {
206 mutex_unlock(&chunk->watch.inode->inotify_mutex); 242 mutex_unlock(&chunk->watch.inode->inotify_mutex);
207 return; 243 goto out;
208 } 244 }
209 245
210 owner = p->owner; 246 owner = p->owner;
@@ -221,7 +257,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct node *p)
221 inotify_evict_watch(&chunk->watch); 257 inotify_evict_watch(&chunk->watch);
222 mutex_unlock(&chunk->watch.inode->inotify_mutex); 258 mutex_unlock(&chunk->watch.inode->inotify_mutex);
223 put_inotify_watch(&chunk->watch); 259 put_inotify_watch(&chunk->watch);
224 return; 260 goto out;
225 } 261 }
226 262
227 new = alloc_chunk(size); 263 new = alloc_chunk(size);
@@ -263,7 +299,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct node *p)
263 inotify_evict_watch(&chunk->watch); 299 inotify_evict_watch(&chunk->watch);
264 mutex_unlock(&chunk->watch.inode->inotify_mutex); 300 mutex_unlock(&chunk->watch.inode->inotify_mutex);
265 put_inotify_watch(&chunk->watch); 301 put_inotify_watch(&chunk->watch);
266 return; 302 goto out;
267 303
268Fallback: 304Fallback:
269 // do the best we can 305 // do the best we can
@@ -277,6 +313,9 @@ Fallback:
277 put_tree(owner); 313 put_tree(owner);
278 spin_unlock(&hash_lock); 314 spin_unlock(&hash_lock);
279 mutex_unlock(&chunk->watch.inode->inotify_mutex); 315 mutex_unlock(&chunk->watch.inode->inotify_mutex);
316out:
317 unpin_inotify_watch(&chunk->watch);
318 spin_lock(&hash_lock);
280} 319}
281 320
282static int create_chunk(struct inode *inode, struct audit_tree *tree) 321static int create_chunk(struct inode *inode, struct audit_tree *tree)
@@ -387,13 +426,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
387 return 0; 426 return 0;
388} 427}
389 428
390static struct audit_chunk *find_chunk(struct node *p)
391{
392 int index = p->index & ~(1U<<31);
393 p -= index;
394 return container_of(p, struct audit_chunk, owners[0]);
395}
396
397static void kill_rules(struct audit_tree *tree) 429static void kill_rules(struct audit_tree *tree)
398{ 430{
399 struct audit_krule *rule, *next; 431 struct audit_krule *rule, *next;
@@ -431,17 +463,10 @@ static void prune_one(struct audit_tree *victim)
431 spin_lock(&hash_lock); 463 spin_lock(&hash_lock);
432 while (!list_empty(&victim->chunks)) { 464 while (!list_empty(&victim->chunks)) {
433 struct node *p; 465 struct node *p;
434 struct audit_chunk *chunk;
435 466
436 p = list_entry(victim->chunks.next, struct node, list); 467 p = list_entry(victim->chunks.next, struct node, list);
437 chunk = find_chunk(p);
438 get_inotify_watch(&chunk->watch);
439 spin_unlock(&hash_lock);
440
441 untag_chunk(chunk, p);
442 468
443 put_inotify_watch(&chunk->watch); 469 untag_chunk(p);
444 spin_lock(&hash_lock);
445 } 470 }
446 spin_unlock(&hash_lock); 471 spin_unlock(&hash_lock);
447 put_tree(victim); 472 put_tree(victim);
@@ -469,7 +494,6 @@ static void trim_marked(struct audit_tree *tree)
469 494
470 while (!list_empty(&tree->chunks)) { 495 while (!list_empty(&tree->chunks)) {
471 struct node *node; 496 struct node *node;
472 struct audit_chunk *chunk;
473 497
474 node = list_entry(tree->chunks.next, struct node, list); 498 node = list_entry(tree->chunks.next, struct node, list);
475 499
@@ -477,14 +501,7 @@ static void trim_marked(struct audit_tree *tree)
477 if (!(node->index & (1U<<31))) 501 if (!(node->index & (1U<<31)))
478 break; 502 break;
479 503
480 chunk = find_chunk(node); 504 untag_chunk(node);
481 get_inotify_watch(&chunk->watch);
482 spin_unlock(&hash_lock);
483
484 untag_chunk(chunk, node);
485
486 put_inotify_watch(&chunk->watch);
487 spin_lock(&hash_lock);
488 } 505 }
489 if (!tree->root && !tree->goner) { 506 if (!tree->root && !tree->goner) {
490 tree->goner = 1; 507 tree->goner = 1;
@@ -878,7 +895,7 @@ static void handle_event(struct inotify_watch *watch, u32 wd, u32 mask,
878static void destroy_watch(struct inotify_watch *watch) 895static void destroy_watch(struct inotify_watch *watch)
879{ 896{
880 struct audit_chunk *chunk = container_of(watch, struct audit_chunk, watch); 897 struct audit_chunk *chunk = container_of(watch, struct audit_chunk, watch);
881 free_chunk(chunk); 898 call_rcu(&chunk->head, __put_chunk);
882} 899}
883 900
884static const struct inotify_operations rtree_inotify_ops = { 901static const struct inotify_operations rtree_inotify_ops = {
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index b7d354e2b0ef..9fd85a4640a0 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1094,8 +1094,8 @@ static void audit_inotify_unregister(struct list_head *in_list)
1094 list_for_each_entry_safe(p, n, in_list, ilist) { 1094 list_for_each_entry_safe(p, n, in_list, ilist) {
1095 list_del(&p->ilist); 1095 list_del(&p->ilist);
1096 inotify_rm_watch(audit_ih, &p->wdata); 1096 inotify_rm_watch(audit_ih, &p->wdata);
1097 /* the put matching the get in audit_do_del_rule() */ 1097 /* the unpin matching the pin in audit_do_del_rule() */
1098 put_inotify_watch(&p->wdata); 1098 unpin_inotify_watch(&p->wdata);
1099 } 1099 }
1100} 1100}
1101 1101
@@ -1389,9 +1389,13 @@ static inline int audit_del_rule(struct audit_entry *entry,
1389 /* Put parent on the inotify un-registration 1389 /* Put parent on the inotify un-registration
1390 * list. Grab a reference before releasing 1390 * list. Grab a reference before releasing
1391 * audit_filter_mutex, to be released in 1391 * audit_filter_mutex, to be released in
1392 * audit_inotify_unregister(). */ 1392 * audit_inotify_unregister().
1393 list_add(&parent->ilist, &inotify_list); 1393 * If filesystem is going away, just leave
1394 get_inotify_watch(&parent->wdata); 1394 * the sucker alone, eviction will take
1395 * care of it.
1396 */
1397 if (pin_inotify_watch(&parent->wdata))
1398 list_add(&parent->ilist, &inotify_list);
1395 } 1399 }
1396 } 1400 }
1397 } 1401 }
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 7fa476f01d05..fb249e2bcada 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -184,9 +184,20 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
184{ 184{
185 struct freezer *freezer; 185 struct freezer *freezer;
186 186
187 task_lock(task); 187 /*
188 * No lock is needed, since the task isn't on tasklist yet,
189 * so it can't be moved to another cgroup, which means the
190 * freezer won't be removed and will be valid during this
191 * function call.
192 */
188 freezer = task_freezer(task); 193 freezer = task_freezer(task);
189 task_unlock(task); 194
195 /*
196 * The root cgroup is non-freezable, so we can skip the
197 * following check.
198 */
199 if (!freezer->css.cgroup->parent)
200 return;
190 201
191 spin_lock_irq(&freezer->lock); 202 spin_lock_irq(&freezer->lock);
192 BUG_ON(freezer->state == CGROUP_FROZEN); 203 BUG_ON(freezer->state == CGROUP_FROZEN);
@@ -331,7 +342,7 @@ static int freezer_write(struct cgroup *cgroup,
331 else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0) 342 else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0)
332 goal_state = CGROUP_FROZEN; 343 goal_state = CGROUP_FROZEN;
333 else 344 else
334 return -EIO; 345 return -EINVAL;
335 346
336 if (!cgroup_lock_live_group(cgroup)) 347 if (!cgroup_lock_live_group(cgroup))
337 return -ENODEV; 348 return -ENODEV;
@@ -350,6 +361,8 @@ static struct cftype files[] = {
350 361
351static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup) 362static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup)
352{ 363{
364 if (!cgroup->parent)
365 return 0;
353 return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files)); 366 return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files));
354} 367}
355 368
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 86d49045daed..5a732c5ef08b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -499,3 +499,6 @@ const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
499#endif 499#endif
500}; 500};
501EXPORT_SYMBOL_GPL(cpu_bit_bitmap); 501EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
502
503const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
504EXPORT_SYMBOL(cpu_all_bits);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 3e00526f52ec..81fc6791a296 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -587,7 +587,6 @@ static int generate_sched_domains(cpumask_t **domains,
587 int ndoms; /* number of sched domains in result */ 587 int ndoms; /* number of sched domains in result */
588 int nslot; /* next empty doms[] cpumask_t slot */ 588 int nslot; /* next empty doms[] cpumask_t slot */
589 589
590 ndoms = 0;
591 doms = NULL; 590 doms = NULL;
592 dattr = NULL; 591 dattr = NULL;
593 csa = NULL; 592 csa = NULL;
@@ -674,10 +673,8 @@ restart:
674 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks. 673 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
675 */ 674 */
676 doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL); 675 doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
677 if (!doms) { 676 if (!doms)
678 ndoms = 0;
679 goto done; 677 goto done;
680 }
681 678
682 /* 679 /*
683 * The rest of the code, including the scheduler, can deal with 680 * The rest of the code, including the scheduler, can deal with
@@ -732,6 +729,13 @@ restart:
732done: 729done:
733 kfree(csa); 730 kfree(csa);
734 731
732 /*
733 * Fallback to the default domain if kmalloc() failed.
734 * See comments in partition_sched_domains().
735 */
736 if (doms == NULL)
737 ndoms = 1;
738
735 *domains = doms; 739 *domains = doms;
736 *attributes = dattr; 740 *attributes = dattr;
737 return ndoms; 741 return ndoms;
diff --git a/kernel/exit.c b/kernel/exit.c
index 80137a5d9467..2d8be7ebb0f7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -40,7 +40,6 @@
40#include <linux/cn_proc.h> 40#include <linux/cn_proc.h>
41#include <linux/mutex.h> 41#include <linux/mutex.h>
42#include <linux/futex.h> 42#include <linux/futex.h>
43#include <linux/compat.h>
44#include <linux/pipe_fs_i.h> 43#include <linux/pipe_fs_i.h>
45#include <linux/audit.h> /* for audit_free() */ 44#include <linux/audit.h> /* for audit_free() */
46#include <linux/resource.h> 45#include <linux/resource.h>
@@ -141,6 +140,11 @@ static void __exit_signal(struct task_struct *tsk)
141 if (sig) { 140 if (sig) {
142 flush_sigqueue(&sig->shared_pending); 141 flush_sigqueue(&sig->shared_pending);
143 taskstats_tgid_free(sig); 142 taskstats_tgid_free(sig);
143 /*
144 * Make sure ->signal can't go away under rq->lock,
145 * see account_group_exec_runtime().
146 */
147 task_rq_unlock_wait(tsk);
144 __cleanup_signal(sig); 148 __cleanup_signal(sig);
145 } 149 }
146} 150}
@@ -1054,14 +1058,6 @@ NORET_TYPE void do_exit(long code)
1054 exit_itimers(tsk->signal); 1058 exit_itimers(tsk->signal);
1055 } 1059 }
1056 acct_collect(code, group_dead); 1060 acct_collect(code, group_dead);
1057#ifdef CONFIG_FUTEX
1058 if (unlikely(tsk->robust_list))
1059 exit_robust_list(tsk);
1060#ifdef CONFIG_COMPAT
1061 if (unlikely(tsk->compat_robust_list))
1062 compat_exit_robust_list(tsk);
1063#endif
1064#endif
1065 if (group_dead) 1061 if (group_dead)
1066 tty_audit_exit(); 1062 tty_audit_exit();
1067 if (unlikely(tsk->audit_context)) 1063 if (unlikely(tsk->audit_context))
diff --git a/kernel/fork.c b/kernel/fork.c
index f6083561dfe0..2a372a0e206f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -40,6 +40,7 @@
40#include <linux/jiffies.h> 40#include <linux/jiffies.h>
41#include <linux/tracehook.h> 41#include <linux/tracehook.h>
42#include <linux/futex.h> 42#include <linux/futex.h>
43#include <linux/compat.h>
43#include <linux/task_io_accounting_ops.h> 44#include <linux/task_io_accounting_ops.h>
44#include <linux/rcupdate.h> 45#include <linux/rcupdate.h>
45#include <linux/ptrace.h> 46#include <linux/ptrace.h>
@@ -519,6 +520,16 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
519{ 520{
520 struct completion *vfork_done = tsk->vfork_done; 521 struct completion *vfork_done = tsk->vfork_done;
521 522
523 /* Get rid of any futexes when releasing the mm */
524#ifdef CONFIG_FUTEX
525 if (unlikely(tsk->robust_list))
526 exit_robust_list(tsk);
527#ifdef CONFIG_COMPAT
528 if (unlikely(tsk->compat_robust_list))
529 compat_exit_robust_list(tsk);
530#endif
531#endif
532
522 /* Get rid of any cached register state */ 533 /* Get rid of any cached register state */
523 deactivate_mm(tsk, mm); 534 deactivate_mm(tsk, mm);
524 535
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 2b465dfde426..47e63349d1b2 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -664,14 +664,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
664 664
665 /* Timer is expired, act upon the callback mode */ 665 /* Timer is expired, act upon the callback mode */
666 switch(timer->cb_mode) { 666 switch(timer->cb_mode) {
667 case HRTIMER_CB_IRQSAFE_NO_RESTART:
668 debug_hrtimer_deactivate(timer);
669 /*
670 * We can call the callback from here. No restart
671 * happens, so no danger of recursion
672 */
673 BUG_ON(timer->function(timer) != HRTIMER_NORESTART);
674 return 1;
675 case HRTIMER_CB_IRQSAFE_PERCPU: 667 case HRTIMER_CB_IRQSAFE_PERCPU:
676 case HRTIMER_CB_IRQSAFE_UNLOCKED: 668 case HRTIMER_CB_IRQSAFE_UNLOCKED:
677 /* 669 /*
@@ -683,7 +675,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
683 */ 675 */
684 debug_hrtimer_deactivate(timer); 676 debug_hrtimer_deactivate(timer);
685 return 1; 677 return 1;
686 case HRTIMER_CB_IRQSAFE:
687 case HRTIMER_CB_SOFTIRQ: 678 case HRTIMER_CB_SOFTIRQ:
688 /* 679 /*
689 * Move everything else into the softirq pending list ! 680 * Move everything else into the softirq pending list !
@@ -1209,6 +1200,7 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
1209 enum hrtimer_restart (*fn)(struct hrtimer *); 1200 enum hrtimer_restart (*fn)(struct hrtimer *);
1210 struct hrtimer *timer; 1201 struct hrtimer *timer;
1211 int restart; 1202 int restart;
1203 int emulate_hardirq_ctx = 0;
1212 1204
1213 timer = list_entry(cpu_base->cb_pending.next, 1205 timer = list_entry(cpu_base->cb_pending.next,
1214 struct hrtimer, cb_entry); 1206 struct hrtimer, cb_entry);
@@ -1217,10 +1209,24 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
1217 timer_stats_account_hrtimer(timer); 1209 timer_stats_account_hrtimer(timer);
1218 1210
1219 fn = timer->function; 1211 fn = timer->function;
1212 /*
1213 * A timer might have been added to the cb_pending list
1214 * when it was migrated during a cpu-offline operation.
1215 * Emulate hardirq context for such timers.
1216 */
1217 if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
1218 timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED)
1219 emulate_hardirq_ctx = 1;
1220
1220 __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); 1221 __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
1221 spin_unlock_irq(&cpu_base->lock); 1222 spin_unlock_irq(&cpu_base->lock);
1222 1223
1223 restart = fn(timer); 1224 if (unlikely(emulate_hardirq_ctx)) {
1225 local_irq_disable();
1226 restart = fn(timer);
1227 local_irq_enable();
1228 } else
1229 restart = fn(timer);
1224 1230
1225 spin_lock_irq(&cpu_base->lock); 1231 spin_lock_irq(&cpu_base->lock);
1226 1232
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 8b57a2597f21..9f8a3f25259a 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -72,7 +72,7 @@ static bool kprobe_enabled;
72DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ 72DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
73static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; 73static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
74static struct { 74static struct {
75 spinlock_t lock ____cacheline_aligned; 75 spinlock_t lock ____cacheline_aligned_in_smp;
76} kretprobe_table_locks[KPROBE_TABLE_SIZE]; 76} kretprobe_table_locks[KPROBE_TABLE_SIZE];
77 77
78static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) 78static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
@@ -613,30 +613,37 @@ static int __kprobes __register_kprobe(struct kprobe *p,
613 return -EINVAL; 613 return -EINVAL;
614 p->addr = addr; 614 p->addr = addr;
615 615
616 if (!kernel_text_address((unsigned long) p->addr) || 616 preempt_disable();
617 in_kprobes_functions((unsigned long) p->addr)) 617 if (!__kernel_text_address((unsigned long) p->addr) ||
618 in_kprobes_functions((unsigned long) p->addr)) {
619 preempt_enable();
618 return -EINVAL; 620 return -EINVAL;
621 }
619 622
620 p->mod_refcounted = 0; 623 p->mod_refcounted = 0;
621 624
622 /* 625 /*
623 * Check if are we probing a module. 626 * Check if are we probing a module.
624 */ 627 */
625 probed_mod = module_text_address((unsigned long) p->addr); 628 probed_mod = __module_text_address((unsigned long) p->addr);
626 if (probed_mod) { 629 if (probed_mod) {
627 struct module *calling_mod = module_text_address(called_from); 630 struct module *calling_mod;
631 calling_mod = __module_text_address(called_from);
628 /* 632 /*
629 * We must allow modules to probe themself and in this case 633 * We must allow modules to probe themself and in this case
630 * avoid incrementing the module refcount, so as to allow 634 * avoid incrementing the module refcount, so as to allow
631 * unloading of self probing modules. 635 * unloading of self probing modules.
632 */ 636 */
633 if (calling_mod && calling_mod != probed_mod) { 637 if (calling_mod && calling_mod != probed_mod) {
634 if (unlikely(!try_module_get(probed_mod))) 638 if (unlikely(!try_module_get(probed_mod))) {
639 preempt_enable();
635 return -EINVAL; 640 return -EINVAL;
641 }
636 p->mod_refcounted = 1; 642 p->mod_refcounted = 1;
637 } else 643 } else
638 probed_mod = NULL; 644 probed_mod = NULL;
639 } 645 }
646 preempt_enable();
640 647
641 p->nmissed = 0; 648 p->nmissed = 0;
642 INIT_LIST_HEAD(&p->list); 649 INIT_LIST_HEAD(&p->list);
@@ -718,6 +725,10 @@ static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
718 struct kprobe *old_p; 725 struct kprobe *old_p;
719 726
720 if (p->mod_refcounted) { 727 if (p->mod_refcounted) {
728 /*
729 * Since we've already incremented refcount,
730 * we don't need to disable preemption.
731 */
721 mod = module_text_address((unsigned long)p->addr); 732 mod = module_text_address((unsigned long)p->addr);
722 if (mod) 733 if (mod)
723 module_put(mod); 734 module_put(mod);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 153dcb2639c3..895337b16a24 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1308,9 +1308,10 @@ static inline int task_cputime_expired(const struct task_cputime *sample,
1308 */ 1308 */
1309static inline int fastpath_timer_check(struct task_struct *tsk) 1309static inline int fastpath_timer_check(struct task_struct *tsk)
1310{ 1310{
1311 struct signal_struct *sig = tsk->signal; 1311 struct signal_struct *sig;
1312 1312
1313 if (unlikely(!sig)) 1313 /* tsk == current, ensure it is safe to use ->signal/sighand */
1314 if (unlikely(tsk->exit_state))
1314 return 0; 1315 return 0;
1315 1316
1316 if (!task_cputime_zero(&tsk->cputime_expires)) { 1317 if (!task_cputime_zero(&tsk->cputime_expires)) {
@@ -1323,6 +1324,8 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
1323 if (task_cputime_expired(&task_sample, &tsk->cputime_expires)) 1324 if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
1324 return 1; 1325 return 1;
1325 } 1326 }
1327
1328 sig = tsk->signal;
1326 if (!task_cputime_zero(&sig->cputime_expires)) { 1329 if (!task_cputime_zero(&sig->cputime_expires)) {
1327 struct task_cputime group_sample; 1330 struct task_cputime group_sample;
1328 1331
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 19122cf6d827..b8f7ce9473e8 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -174,7 +174,7 @@ static void suspend_test_finish(const char *label)
174 * has some performance issues. The stack dump of a WARN_ON 174 * has some performance issues. The stack dump of a WARN_ON
175 * is more likely to get the right attention than a printk... 175 * is more likely to get the right attention than a printk...
176 */ 176 */
177 WARN_ON(msec > (TEST_SUSPEND_SECONDS * 1000)); 177 WARN(msec > (TEST_SUSPEND_SECONDS * 1000), "Component: %s\n", label);
178} 178}
179 179
180#else 180#else
diff --git a/kernel/profile.c b/kernel/profile.c
index 9830a037d8db..5b7d1ac7124c 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -544,7 +544,7 @@ static const struct file_operations proc_profile_operations = {
544}; 544};
545 545
546#ifdef CONFIG_SMP 546#ifdef CONFIG_SMP
547static void __init profile_nop(void *unused) 547static inline void profile_nop(void *unused)
548{ 548{
549} 549}
550 550
diff --git a/kernel/relay.c b/kernel/relay.c
index 8d13a7855c08..32b0befdcb6a 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -400,7 +400,7 @@ void relay_reset(struct rchan *chan)
400 } 400 }
401 401
402 mutex_lock(&relay_channels_mutex); 402 mutex_lock(&relay_channels_mutex);
403 for_each_online_cpu(i) 403 for_each_possible_cpu(i)
404 if (chan->buf[i]) 404 if (chan->buf[i])
405 __relay_reset(chan->buf[i], 0); 405 __relay_reset(chan->buf[i], 0);
406 mutex_unlock(&relay_channels_mutex); 406 mutex_unlock(&relay_channels_mutex);
@@ -611,10 +611,9 @@ struct rchan *relay_open(const char *base_filename,
611 return chan; 611 return chan;
612 612
613free_bufs: 613free_bufs:
614 for_each_online_cpu(i) { 614 for_each_possible_cpu(i) {
615 if (!chan->buf[i]) 615 if (chan->buf[i])
616 break; 616 relay_close_buf(chan->buf[i]);
617 relay_close_buf(chan->buf[i]);
618 } 617 }
619 618
620 kref_put(&chan->kref, relay_destroy_channel); 619 kref_put(&chan->kref, relay_destroy_channel);
diff --git a/kernel/sched.c b/kernel/sched.c
index ebaf432365f6..a4c156d9a4a5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -399,7 +399,7 @@ struct cfs_rq {
399 */ 399 */
400 struct sched_entity *curr, *next, *last; 400 struct sched_entity *curr, *next, *last;
401 401
402 unsigned long nr_spread_over; 402 unsigned int nr_spread_over;
403 403
404#ifdef CONFIG_FAIR_GROUP_SCHED 404#ifdef CONFIG_FAIR_GROUP_SCHED
405 struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */ 405 struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */
@@ -949,6 +949,14 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
949 } 949 }
950} 950}
951 951
952void task_rq_unlock_wait(struct task_struct *p)
953{
954 struct rq *rq = task_rq(p);
955
956 smp_mb(); /* spin-unlock-wait is not a full memory barrier */
957 spin_unlock_wait(&rq->lock);
958}
959
952static void __task_rq_unlock(struct rq *rq) 960static void __task_rq_unlock(struct rq *rq)
953 __releases(rq->lock) 961 __releases(rq->lock)
954{ 962{
@@ -1428,6 +1436,8 @@ static unsigned long cpu_avg_load_per_task(int cpu)
1428 1436
1429 if (rq->nr_running) 1437 if (rq->nr_running)
1430 rq->avg_load_per_task = rq->load.weight / rq->nr_running; 1438 rq->avg_load_per_task = rq->load.weight / rq->nr_running;
1439 else
1440 rq->avg_load_per_task = 0;
1431 1441
1432 return rq->avg_load_per_task; 1442 return rq->avg_load_per_task;
1433} 1443}
@@ -5840,6 +5850,8 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5840 struct rq *rq = cpu_rq(cpu); 5850 struct rq *rq = cpu_rq(cpu);
5841 unsigned long flags; 5851 unsigned long flags;
5842 5852
5853 spin_lock_irqsave(&rq->lock, flags);
5854
5843 __sched_fork(idle); 5855 __sched_fork(idle);
5844 idle->se.exec_start = sched_clock(); 5856 idle->se.exec_start = sched_clock();
5845 5857
@@ -5847,7 +5859,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5847 idle->cpus_allowed = cpumask_of_cpu(cpu); 5859 idle->cpus_allowed = cpumask_of_cpu(cpu);
5848 __set_task_cpu(idle, cpu); 5860 __set_task_cpu(idle, cpu);
5849 5861
5850 spin_lock_irqsave(&rq->lock, flags);
5851 rq->curr = rq->idle = idle; 5862 rq->curr = rq->idle = idle;
5852#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) 5863#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
5853 idle->oncpu = 1; 5864 idle->oncpu = 1;
@@ -7740,13 +7751,14 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
7740 * 7751 *
7741 * The passed in 'doms_new' should be kmalloc'd. This routine takes 7752 * The passed in 'doms_new' should be kmalloc'd. This routine takes
7742 * ownership of it and will kfree it when done with it. If the caller 7753 * ownership of it and will kfree it when done with it. If the caller
7743 * failed the kmalloc call, then it can pass in doms_new == NULL, 7754 * failed the kmalloc call, then it can pass in doms_new == NULL &&
7744 * and partition_sched_domains() will fallback to the single partition 7755 * ndoms_new == 1, and partition_sched_domains() will fallback to
7745 * 'fallback_doms', it also forces the domains to be rebuilt. 7756 * the single partition 'fallback_doms', it also forces the domains
7757 * to be rebuilt.
7746 * 7758 *
7747 * If doms_new==NULL it will be replaced with cpu_online_map. 7759 * If doms_new == NULL it will be replaced with cpu_online_map.
7748 * ndoms_new==0 is a special case for destroying existing domains. 7760 * ndoms_new == 0 is a special case for destroying existing domains,
7749 * It will not create the default domain. 7761 * and it will not create the default domain.
7750 * 7762 *
7751 * Call with hotplug lock held 7763 * Call with hotplug lock held
7752 */ 7764 */
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 3625a6598699..baf2f17af462 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -173,7 +173,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
173 last = __pick_last_entity(cfs_rq); 173 last = __pick_last_entity(cfs_rq);
174 if (last) 174 if (last)
175 max_vruntime = last->vruntime; 175 max_vruntime = last->vruntime;
176 min_vruntime = rq->cfs.min_vruntime; 176 min_vruntime = cfs_rq->min_vruntime;
177 rq0_min_vruntime = per_cpu(runqueues, 0).cfs.min_vruntime; 177 rq0_min_vruntime = per_cpu(runqueues, 0).cfs.min_vruntime;
178 spin_unlock_irqrestore(&rq->lock, flags); 178 spin_unlock_irqrestore(&rq->lock, flags);
179 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime", 179 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime",
@@ -190,26 +190,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
190 SPLIT_NS(spread0)); 190 SPLIT_NS(spread0));
191 SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running); 191 SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
192 SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); 192 SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
193#ifdef CONFIG_SCHEDSTATS
194#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
195
196 P(yld_exp_empty);
197 P(yld_act_empty);
198 P(yld_both_empty);
199 P(yld_count);
200 193
201 P(sched_switch); 194 SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
202 P(sched_count);
203 P(sched_goidle);
204
205 P(ttwu_count);
206 P(ttwu_local);
207
208 P(bkl_count);
209
210#undef P
211#endif
212 SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over",
213 cfs_rq->nr_spread_over); 195 cfs_rq->nr_spread_over);
214#ifdef CONFIG_FAIR_GROUP_SCHED 196#ifdef CONFIG_FAIR_GROUP_SCHED
215#ifdef CONFIG_SMP 197#ifdef CONFIG_SMP
@@ -285,6 +267,25 @@ static void print_cpu(struct seq_file *m, int cpu)
285#undef P 267#undef P
286#undef PN 268#undef PN
287 269
270#ifdef CONFIG_SCHEDSTATS
271#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
272
273 P(yld_exp_empty);
274 P(yld_act_empty);
275 P(yld_both_empty);
276 P(yld_count);
277
278 P(sched_switch);
279 P(sched_count);
280 P(sched_goidle);
281
282 P(ttwu_count);
283 P(ttwu_local);
284
285 P(bkl_count);
286
287#undef P
288#endif
288 print_cfs_stats(m, cpu); 289 print_cfs_stats(m, cpu);
289 print_rt_stats(m, cpu); 290 print_rt_stats(m, cpu);
290 291
@@ -447,10 +448,11 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
447#undef __P 448#undef __P
448 449
449 { 450 {
451 unsigned int this_cpu = raw_smp_processor_id();
450 u64 t0, t1; 452 u64 t0, t1;
451 453
452 t0 = sched_clock(); 454 t0 = cpu_clock(this_cpu);
453 t1 = sched_clock(); 455 t1 = cpu_clock(this_cpu);
454 SEQ_printf(m, "%-35s:%21Ld\n", 456 SEQ_printf(m, "%-35s:%21Ld\n",
455 "clock-delta", (long long)(t1-t0)); 457 "clock-delta", (long long)(t1-t0));
456 } 458 }
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 51aa3e102acb..98345e45b059 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -716,6 +716,15 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
716 __enqueue_entity(cfs_rq, se); 716 __enqueue_entity(cfs_rq, se);
717} 717}
718 718
719static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
720{
721 if (cfs_rq->last == se)
722 cfs_rq->last = NULL;
723
724 if (cfs_rq->next == se)
725 cfs_rq->next = NULL;
726}
727
719static void 728static void
720dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) 729dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
721{ 730{
@@ -738,11 +747,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
738#endif 747#endif
739 } 748 }
740 749
741 if (cfs_rq->last == se) 750 clear_buddies(cfs_rq, se);
742 cfs_rq->last = NULL;
743
744 if (cfs_rq->next == se)
745 cfs_rq->next = NULL;
746 751
747 if (se != cfs_rq->curr) 752 if (se != cfs_rq->curr)
748 __dequeue_entity(cfs_rq, se); 753 __dequeue_entity(cfs_rq, se);
@@ -977,6 +982,8 @@ static void yield_task_fair(struct rq *rq)
977 if (unlikely(cfs_rq->nr_running == 1)) 982 if (unlikely(cfs_rq->nr_running == 1))
978 return; 983 return;
979 984
985 clear_buddies(cfs_rq, se);
986
980 if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) { 987 if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
981 update_rq_clock(rq); 988 update_rq_clock(rq);
982 /* 989 /*
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index ee71bec1da66..7dbf72a2b02c 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -298,9 +298,11 @@ static inline void account_group_user_time(struct task_struct *tsk,
298{ 298{
299 struct signal_struct *sig; 299 struct signal_struct *sig;
300 300
301 sig = tsk->signal; 301 /* tsk == current, ensure it is safe to use ->signal */
302 if (unlikely(!sig)) 302 if (unlikely(tsk->exit_state))
303 return; 303 return;
304
305 sig = tsk->signal;
304 if (sig->cputime.totals) { 306 if (sig->cputime.totals) {
305 struct task_cputime *times; 307 struct task_cputime *times;
306 308
@@ -325,9 +327,11 @@ static inline void account_group_system_time(struct task_struct *tsk,
325{ 327{
326 struct signal_struct *sig; 328 struct signal_struct *sig;
327 329
328 sig = tsk->signal; 330 /* tsk == current, ensure it is safe to use ->signal */
329 if (unlikely(!sig)) 331 if (unlikely(tsk->exit_state))
330 return; 332 return;
333
334 sig = tsk->signal;
331 if (sig->cputime.totals) { 335 if (sig->cputime.totals) {
332 struct task_cputime *times; 336 struct task_cputime *times;
333 337
@@ -353,8 +357,11 @@ static inline void account_group_exec_runtime(struct task_struct *tsk,
353 struct signal_struct *sig; 357 struct signal_struct *sig;
354 358
355 sig = tsk->signal; 359 sig = tsk->signal;
360 /* see __exit_signal()->task_rq_unlock_wait() */
361 barrier();
356 if (unlikely(!sig)) 362 if (unlikely(!sig))
357 return; 363 return;
364
358 if (sig->cputime.totals) { 365 if (sig->cputime.totals) {
359 struct task_cputime *times; 366 struct task_cputime *times;
360 367
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 7110daeb9a90..e7c69a720d69 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -269,10 +269,11 @@ void irq_enter(void)
269{ 269{
270 int cpu = smp_processor_id(); 270 int cpu = smp_processor_id();
271 271
272 if (idle_cpu(cpu) && !in_interrupt()) 272 if (idle_cpu(cpu) && !in_interrupt()) {
273 __irq_enter();
273 tick_check_idle(cpu); 274 tick_check_idle(cpu);
274 275 } else
275 __irq_enter(); 276 __irq_enter();
276} 277}
277 278
278#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED 279#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 9bc4c00872c9..24e8ceacc388 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -112,7 +112,7 @@ static int chill(void *unused)
112int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus) 112int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
113{ 113{
114 struct work_struct *sm_work; 114 struct work_struct *sm_work;
115 int i; 115 int i, ret;
116 116
117 /* Set up initial state. */ 117 /* Set up initial state. */
118 mutex_lock(&lock); 118 mutex_lock(&lock);
@@ -137,8 +137,9 @@ int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
137 /* This will release the thread on our CPU. */ 137 /* This will release the thread on our CPU. */
138 put_cpu(); 138 put_cpu();
139 flush_workqueue(stop_machine_wq); 139 flush_workqueue(stop_machine_wq);
140 ret = active.fnret;
140 mutex_unlock(&lock); 141 mutex_unlock(&lock);
141 return active.fnret; 142 return ret;
142} 143}
143 144
144int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus) 145int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 5bbb1044f847..342fc9ccab46 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -568,6 +568,9 @@ static void tick_nohz_switch_to_nohz(void)
568 */ 568 */
569static void tick_nohz_kick_tick(int cpu) 569static void tick_nohz_kick_tick(int cpu)
570{ 570{
571#if 0
572 /* Switch back to 2.6.27 behaviour */
573
571 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 574 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
572 ktime_t delta, now; 575 ktime_t delta, now;
573 576
@@ -584,6 +587,7 @@ static void tick_nohz_kick_tick(int cpu)
584 return; 587 return;
585 588
586 tick_nohz_restart(ts, now); 589 tick_nohz_restart(ts, now);
590#endif
587} 591}
588 592
589#else 593#else
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4a39d24568c8..e60205722d0c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -185,7 +185,6 @@ enum {
185}; 185};
186 186
187static int ftrace_filtered; 187static int ftrace_filtered;
188static int tracing_on;
189 188
190static LIST_HEAD(ftrace_new_addrs); 189static LIST_HEAD(ftrace_new_addrs);
191 190
@@ -506,13 +505,10 @@ static int __ftrace_modify_code(void *data)
506{ 505{
507 int *command = data; 506 int *command = data;
508 507
509 if (*command & FTRACE_ENABLE_CALLS) { 508 if (*command & FTRACE_ENABLE_CALLS)
510 ftrace_replace_code(1); 509 ftrace_replace_code(1);
511 tracing_on = 1; 510 else if (*command & FTRACE_DISABLE_CALLS)
512 } else if (*command & FTRACE_DISABLE_CALLS) {
513 ftrace_replace_code(0); 511 ftrace_replace_code(0);
514 tracing_on = 0;
515 }
516 512
517 if (*command & FTRACE_UPDATE_TRACE_FUNC) 513 if (*command & FTRACE_UPDATE_TRACE_FUNC)
518 ftrace_update_ftrace_func(ftrace_trace_function); 514 ftrace_update_ftrace_func(ftrace_trace_function);
@@ -677,7 +673,7 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
677 673
678 cnt = num_to_init / ENTRIES_PER_PAGE; 674 cnt = num_to_init / ENTRIES_PER_PAGE;
679 pr_info("ftrace: allocating %ld entries in %d pages\n", 675 pr_info("ftrace: allocating %ld entries in %d pages\n",
680 num_to_init, cnt); 676 num_to_init, cnt + 1);
681 677
682 for (i = 0; i < cnt; i++) { 678 for (i = 0; i < cnt; i++) {
683 pg->next = (void *)get_zeroed_page(GFP_KERNEL); 679 pg->next = (void *)get_zeroed_page(GFP_KERNEL);
@@ -757,13 +753,11 @@ static void *t_start(struct seq_file *m, loff_t *pos)
757 void *p = NULL; 753 void *p = NULL;
758 loff_t l = -1; 754 loff_t l = -1;
759 755
760 if (*pos != iter->pos) { 756 if (*pos > iter->pos)
761 for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l)) 757 *pos = iter->pos;
762 ; 758
763 } else { 759 l = *pos;
764 l = *pos; 760 p = t_next(m, p, &l);
765 p = t_next(m, p, &l);
766 }
767 761
768 return p; 762 return p;
769} 763}
@@ -774,15 +768,21 @@ static void t_stop(struct seq_file *m, void *p)
774 768
775static int t_show(struct seq_file *m, void *v) 769static int t_show(struct seq_file *m, void *v)
776{ 770{
771 struct ftrace_iterator *iter = m->private;
777 struct dyn_ftrace *rec = v; 772 struct dyn_ftrace *rec = v;
778 char str[KSYM_SYMBOL_LEN]; 773 char str[KSYM_SYMBOL_LEN];
774 int ret = 0;
779 775
780 if (!rec) 776 if (!rec)
781 return 0; 777 return 0;
782 778
783 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 779 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
784 780
785 seq_printf(m, "%s\n", str); 781 ret = seq_printf(m, "%s\n", str);
782 if (ret < 0) {
783 iter->pos--;
784 iter->idx--;
785 }
786 786
787 return 0; 787 return 0;
788} 788}
@@ -808,7 +808,7 @@ ftrace_avail_open(struct inode *inode, struct file *file)
808 return -ENOMEM; 808 return -ENOMEM;
809 809
810 iter->pg = ftrace_pages_start; 810 iter->pg = ftrace_pages_start;
811 iter->pos = -1; 811 iter->pos = 0;
812 812
813 ret = seq_open(file, &show_ftrace_seq_ops); 813 ret = seq_open(file, &show_ftrace_seq_ops);
814 if (!ret) { 814 if (!ret) {
@@ -895,7 +895,7 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
895 895
896 if (file->f_mode & FMODE_READ) { 896 if (file->f_mode & FMODE_READ) {
897 iter->pg = ftrace_pages_start; 897 iter->pg = ftrace_pages_start;
898 iter->pos = -1; 898 iter->pos = 0;
899 iter->flags = enable ? FTRACE_ITER_FILTER : 899 iter->flags = enable ? FTRACE_ITER_FILTER :
900 FTRACE_ITER_NOTRACE; 900 FTRACE_ITER_NOTRACE;
901 901
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3f3380638646..036456cbb4f7 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -16,14 +16,49 @@
16#include <linux/list.h> 16#include <linux/list.h>
17#include <linux/fs.h> 17#include <linux/fs.h>
18 18
19#include "trace.h"
20
21/* Global flag to disable all recording to ring buffers */
22static int ring_buffers_off __read_mostly;
23
24/**
25 * tracing_on - enable all tracing buffers
26 *
27 * This function enables all tracing buffers that may have been
28 * disabled with tracing_off.
29 */
30void tracing_on(void)
31{
32 ring_buffers_off = 0;
33}
34
35/**
36 * tracing_off - turn off all tracing buffers
37 *
38 * This function stops all tracing buffers from recording data.
39 * It does not disable any overhead the tracers themselves may
40 * be causing. This function simply causes all recording to
41 * the ring buffers to fail.
42 */
43void tracing_off(void)
44{
45 ring_buffers_off = 1;
46}
47
19/* Up this if you want to test the TIME_EXTENTS and normalization */ 48/* Up this if you want to test the TIME_EXTENTS and normalization */
20#define DEBUG_SHIFT 0 49#define DEBUG_SHIFT 0
21 50
22/* FIXME!!! */ 51/* FIXME!!! */
23u64 ring_buffer_time_stamp(int cpu) 52u64 ring_buffer_time_stamp(int cpu)
24{ 53{
54 u64 time;
55
56 preempt_disable_notrace();
25 /* shift to debug/test normalization and TIME_EXTENTS */ 57 /* shift to debug/test normalization and TIME_EXTENTS */
26 return sched_clock() << DEBUG_SHIFT; 58 time = sched_clock() << DEBUG_SHIFT;
59 preempt_enable_notrace();
60
61 return time;
27} 62}
28 63
29void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) 64void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
@@ -503,6 +538,12 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
503 LIST_HEAD(pages); 538 LIST_HEAD(pages);
504 int i, cpu; 539 int i, cpu;
505 540
541 /*
542 * Always succeed at resizing a non-existent buffer:
543 */
544 if (!buffer)
545 return size;
546
506 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 547 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
507 size *= BUF_PAGE_SIZE; 548 size *= BUF_PAGE_SIZE;
508 buffer_size = buffer->pages * BUF_PAGE_SIZE; 549 buffer_size = buffer->pages * BUF_PAGE_SIZE;
@@ -1060,7 +1101,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1060 1101
1061 /* Did the write stamp get updated already? */ 1102 /* Did the write stamp get updated already? */
1062 if (unlikely(ts < cpu_buffer->write_stamp)) 1103 if (unlikely(ts < cpu_buffer->write_stamp))
1063 goto again; 1104 delta = 0;
1064 1105
1065 if (test_time_stamp(delta)) { 1106 if (test_time_stamp(delta)) {
1066 1107
@@ -1133,6 +1174,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1133 struct ring_buffer_event *event; 1174 struct ring_buffer_event *event;
1134 int cpu, resched; 1175 int cpu, resched;
1135 1176
1177 if (ring_buffers_off)
1178 return NULL;
1179
1136 if (atomic_read(&buffer->record_disabled)) 1180 if (atomic_read(&buffer->record_disabled))
1137 return NULL; 1181 return NULL;
1138 1182
@@ -1249,6 +1293,9 @@ int ring_buffer_write(struct ring_buffer *buffer,
1249 int ret = -EBUSY; 1293 int ret = -EBUSY;
1250 int cpu, resched; 1294 int cpu, resched;
1251 1295
1296 if (ring_buffers_off)
1297 return -EBUSY;
1298
1252 if (atomic_read(&buffer->record_disabled)) 1299 if (atomic_read(&buffer->record_disabled))
1253 return -EBUSY; 1300 return -EBUSY;
1254 1301
@@ -2070,3 +2117,69 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2070 return 0; 2117 return 0;
2071} 2118}
2072 2119
2120static ssize_t
2121rb_simple_read(struct file *filp, char __user *ubuf,
2122 size_t cnt, loff_t *ppos)
2123{
2124 int *p = filp->private_data;
2125 char buf[64];
2126 int r;
2127
2128 /* !ring_buffers_off == tracing_on */
2129 r = sprintf(buf, "%d\n", !*p);
2130
2131 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2132}
2133
2134static ssize_t
2135rb_simple_write(struct file *filp, const char __user *ubuf,
2136 size_t cnt, loff_t *ppos)
2137{
2138 int *p = filp->private_data;
2139 char buf[64];
2140 long val;
2141 int ret;
2142
2143 if (cnt >= sizeof(buf))
2144 return -EINVAL;
2145
2146 if (copy_from_user(&buf, ubuf, cnt))
2147 return -EFAULT;
2148
2149 buf[cnt] = 0;
2150
2151 ret = strict_strtoul(buf, 10, &val);
2152 if (ret < 0)
2153 return ret;
2154
2155 /* !ring_buffers_off == tracing_on */
2156 *p = !val;
2157
2158 (*ppos)++;
2159
2160 return cnt;
2161}
2162
2163static struct file_operations rb_simple_fops = {
2164 .open = tracing_open_generic,
2165 .read = rb_simple_read,
2166 .write = rb_simple_write,
2167};
2168
2169
2170static __init int rb_init_debugfs(void)
2171{
2172 struct dentry *d_tracer;
2173 struct dentry *entry;
2174
2175 d_tracer = tracing_init_dentry();
2176
2177 entry = debugfs_create_file("tracing_on", 0644, d_tracer,
2178 &ring_buffers_off, &rb_simple_fops);
2179 if (!entry)
2180 pr_warning("Could not create debugfs 'tracing_on' entry\n");
2181
2182 return 0;
2183}
2184
2185fs_initcall(rb_init_debugfs);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9f3b478f9171..697eda36b86a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1755,7 +1755,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1755 return TRACE_TYPE_HANDLED; 1755 return TRACE_TYPE_HANDLED;
1756 1756
1757 SEQ_PUT_FIELD_RET(s, entry->pid); 1757 SEQ_PUT_FIELD_RET(s, entry->pid);
1758 SEQ_PUT_FIELD_RET(s, iter->cpu); 1758 SEQ_PUT_FIELD_RET(s, entry->cpu);
1759 SEQ_PUT_FIELD_RET(s, iter->ts); 1759 SEQ_PUT_FIELD_RET(s, iter->ts);
1760 1760
1761 switch (entry->type) { 1761 switch (entry->type) {
@@ -2676,7 +2676,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2676{ 2676{
2677 unsigned long val; 2677 unsigned long val;
2678 char buf[64]; 2678 char buf[64];
2679 int ret; 2679 int ret, cpu;
2680 struct trace_array *tr = filp->private_data; 2680 struct trace_array *tr = filp->private_data;
2681 2681
2682 if (cnt >= sizeof(buf)) 2682 if (cnt >= sizeof(buf))
@@ -2704,6 +2704,14 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2704 goto out; 2704 goto out;
2705 } 2705 }
2706 2706
2707 /* disable all cpu buffers */
2708 for_each_tracing_cpu(cpu) {
2709 if (global_trace.data[cpu])
2710 atomic_inc(&global_trace.data[cpu]->disabled);
2711 if (max_tr.data[cpu])
2712 atomic_inc(&max_tr.data[cpu]->disabled);
2713 }
2714
2707 if (val != global_trace.entries) { 2715 if (val != global_trace.entries) {
2708 ret = ring_buffer_resize(global_trace.buffer, val); 2716 ret = ring_buffer_resize(global_trace.buffer, val);
2709 if (ret < 0) { 2717 if (ret < 0) {
@@ -2735,6 +2743,13 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2735 if (tracing_disabled) 2743 if (tracing_disabled)
2736 cnt = -ENOMEM; 2744 cnt = -ENOMEM;
2737 out: 2745 out:
2746 for_each_tracing_cpu(cpu) {
2747 if (global_trace.data[cpu])
2748 atomic_dec(&global_trace.data[cpu]->disabled);
2749 if (max_tr.data[cpu])
2750 atomic_dec(&max_tr.data[cpu]->disabled);
2751 }
2752
2738 max_tr.entries = global_trace.entries; 2753 max_tr.entries = global_trace.entries;
2739 mutex_unlock(&trace_types_lock); 2754 mutex_unlock(&trace_types_lock);
2740 2755
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f928f2a87b9b..d4dc69ddebd7 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -970,6 +970,51 @@ undo:
970 return ret; 970 return ret;
971} 971}
972 972
973#ifdef CONFIG_SMP
974struct work_for_cpu {
975 struct work_struct work;
976 long (*fn)(void *);
977 void *arg;
978 long ret;
979};
980
981static void do_work_for_cpu(struct work_struct *w)
982{
983 struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work);
984
985 wfc->ret = wfc->fn(wfc->arg);
986}
987
988/**
989 * work_on_cpu - run a function in user context on a particular cpu
990 * @cpu: the cpu to run on
991 * @fn: the function to run
992 * @arg: the function arg
993 *
994 * This will return -EINVAL in the cpu is not online, or the return value
995 * of @fn otherwise.
996 */
997long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
998{
999 struct work_for_cpu wfc;
1000
1001 INIT_WORK(&wfc.work, do_work_for_cpu);
1002 wfc.fn = fn;
1003 wfc.arg = arg;
1004 get_online_cpus();
1005 if (unlikely(!cpu_online(cpu)))
1006 wfc.ret = -EINVAL;
1007 else {
1008 schedule_work_on(cpu, &wfc.work);
1009 flush_work(&wfc.work);
1010 }
1011 put_online_cpus();
1012
1013 return wfc.ret;
1014}
1015EXPORT_SYMBOL_GPL(work_on_cpu);
1016#endif /* CONFIG_SMP */
1017
973void __init init_workqueues(void) 1018void __init init_workqueues(void)
974{ 1019{
975 cpu_populated_map = cpu_online_map; 1020 cpu_populated_map = cpu_online_map;