aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile8
-rw-r--r--kernel/audit_tree.c91
-rw-r--r--kernel/auditfilter.c14
-rw-r--r--kernel/cgroup.c1
-rw-r--r--kernel/cgroup_freezer.c70
-rw-r--r--kernel/cpu.c3
-rw-r--r--kernel/cpuset.c12
-rw-r--r--kernel/exit.c14
-rw-r--r--kernel/fork.c11
-rw-r--r--kernel/freezer.c20
-rw-r--r--kernel/hrtimer.c26
-rw-r--r--kernel/irq/proc.c2
-rw-r--r--kernel/kprobes.c23
-rw-r--r--kernel/lockdep.c17
-rw-r--r--kernel/posix-cpu-timers.c7
-rw-r--r--kernel/power/Kconfig2
-rw-r--r--kernel/power/main.c2
-rw-r--r--kernel/printk.c39
-rw-r--r--kernel/profile.c4
-rw-r--r--kernel/relay.c9
-rw-r--r--kernel/resource.c8
-rw-r--r--kernel/sched.c44
-rw-r--r--kernel/sched_debug.c48
-rw-r--r--kernel/sched_fair.c248
-rw-r--r--kernel/sched_features.h1
-rw-r--r--kernel/sched_idletask.c5
-rw-r--r--kernel/sched_rt.c5
-rw-r--r--kernel/sched_stats.h15
-rw-r--r--kernel/signal.c3
-rw-r--r--kernel/smp.c18
-rw-r--r--kernel/softirq.c7
-rw-r--r--kernel/stop_machine.c5
-rw-r--r--kernel/sysctl.c2
-rw-r--r--kernel/time/tick-sched.c4
-rw-r--r--kernel/timer.c129
-rw-r--r--kernel/trace/Kconfig31
-rw-r--r--kernel/trace/Makefile6
-rw-r--r--kernel/trace/ftrace.c642
-rw-r--r--kernel/trace/ring_buffer.c179
-rw-r--r--kernel/trace/trace.c82
-rw-r--r--kernel/trace/trace.h22
-rw-r--r--kernel/trace/trace_functions.c2
-rw-r--r--kernel/trace/trace_irqsoff.c4
-rw-r--r--kernel/trace/trace_sched_wakeup.c4
-rw-r--r--kernel/trace/trace_selftest.c18
-rw-r--r--kernel/trace/trace_stack.c4
-rw-r--r--kernel/tracepoint.c8
-rw-r--r--kernel/workqueue.c45
48 files changed, 1010 insertions, 954 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 305f11dbef21..19fad003b19d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -11,9 +11,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ 11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o 12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o
13 13
14CFLAGS_REMOVE_sched.o = -mno-spe 14ifdef CONFIG_FUNCTION_TRACER
15
16ifdef CONFIG_FTRACE
17# Do not trace debug files and internal ftrace files 15# Do not trace debug files and internal ftrace files
18CFLAGS_REMOVE_lockdep.o = -pg 16CFLAGS_REMOVE_lockdep.o = -pg
19CFLAGS_REMOVE_lockdep_proc.o = -pg 17CFLAGS_REMOVE_lockdep_proc.o = -pg
@@ -21,7 +19,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
21CFLAGS_REMOVE_rtmutex-debug.o = -pg 19CFLAGS_REMOVE_rtmutex-debug.o = -pg
22CFLAGS_REMOVE_cgroup-debug.o = -pg 20CFLAGS_REMOVE_cgroup-debug.o = -pg
23CFLAGS_REMOVE_sched_clock.o = -pg 21CFLAGS_REMOVE_sched_clock.o = -pg
24CFLAGS_REMOVE_sched.o = -mno-spe -pg 22CFLAGS_REMOVE_sched.o = -pg
25endif 23endif
26 24
27obj-$(CONFIG_FREEZER) += freezer.o 25obj-$(CONFIG_FREEZER) += freezer.o
@@ -88,7 +86,7 @@ obj-$(CONFIG_MARKERS) += marker.o
88obj-$(CONFIG_TRACEPOINTS) += tracepoint.o 86obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
89obj-$(CONFIG_LATENCYTOP) += latencytop.o 87obj-$(CONFIG_LATENCYTOP) += latencytop.o
90obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o 88obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
91obj-$(CONFIG_FTRACE) += trace/ 89obj-$(CONFIG_FUNCTION_TRACER) += trace/
92obj-$(CONFIG_TRACING) += trace/ 90obj-$(CONFIG_TRACING) += trace/
93obj-$(CONFIG_SMP) += sched_cpupri.o 91obj-$(CONFIG_SMP) += sched_cpupri.o
94 92
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 8ba0e0d934f2..8b509441f49a 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -24,6 +24,7 @@ struct audit_chunk {
24 struct list_head trees; /* with root here */ 24 struct list_head trees; /* with root here */
25 int dead; 25 int dead;
26 int count; 26 int count;
27 atomic_long_t refs;
27 struct rcu_head head; 28 struct rcu_head head;
28 struct node { 29 struct node {
29 struct list_head list; 30 struct list_head list;
@@ -56,7 +57,8 @@ static LIST_HEAD(prune_list);
56 * tree is refcounted; one reference for "some rules on rules_list refer to 57 * tree is refcounted; one reference for "some rules on rules_list refer to
57 * it", one for each chunk with pointer to it. 58 * it", one for each chunk with pointer to it.
58 * 59 *
59 * chunk is refcounted by embedded inotify_watch. 60 * chunk is refcounted by embedded inotify_watch + .refs (non-zero refcount
61 * of watch contributes 1 to .refs).
60 * 62 *
61 * node.index allows to get from node.list to containing chunk. 63 * node.index allows to get from node.list to containing chunk.
62 * MSB of that sucker is stolen to mark taggings that we might have to 64 * MSB of that sucker is stolen to mark taggings that we might have to
@@ -121,6 +123,7 @@ static struct audit_chunk *alloc_chunk(int count)
121 INIT_LIST_HEAD(&chunk->hash); 123 INIT_LIST_HEAD(&chunk->hash);
122 INIT_LIST_HEAD(&chunk->trees); 124 INIT_LIST_HEAD(&chunk->trees);
123 chunk->count = count; 125 chunk->count = count;
126 atomic_long_set(&chunk->refs, 1);
124 for (i = 0; i < count; i++) { 127 for (i = 0; i < count; i++) {
125 INIT_LIST_HEAD(&chunk->owners[i].list); 128 INIT_LIST_HEAD(&chunk->owners[i].list);
126 chunk->owners[i].index = i; 129 chunk->owners[i].index = i;
@@ -129,9 +132,8 @@ static struct audit_chunk *alloc_chunk(int count)
129 return chunk; 132 return chunk;
130} 133}
131 134
132static void __free_chunk(struct rcu_head *rcu) 135static void free_chunk(struct audit_chunk *chunk)
133{ 136{
134 struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
135 int i; 137 int i;
136 138
137 for (i = 0; i < chunk->count; i++) { 139 for (i = 0; i < chunk->count; i++) {
@@ -141,14 +143,16 @@ static void __free_chunk(struct rcu_head *rcu)
141 kfree(chunk); 143 kfree(chunk);
142} 144}
143 145
144static inline void free_chunk(struct audit_chunk *chunk) 146void audit_put_chunk(struct audit_chunk *chunk)
145{ 147{
146 call_rcu(&chunk->head, __free_chunk); 148 if (atomic_long_dec_and_test(&chunk->refs))
149 free_chunk(chunk);
147} 150}
148 151
149void audit_put_chunk(struct audit_chunk *chunk) 152static void __put_chunk(struct rcu_head *rcu)
150{ 153{
151 put_inotify_watch(&chunk->watch); 154 struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
155 audit_put_chunk(chunk);
152} 156}
153 157
154enum {HASH_SIZE = 128}; 158enum {HASH_SIZE = 128};
@@ -176,7 +180,7 @@ struct audit_chunk *audit_tree_lookup(const struct inode *inode)
176 180
177 list_for_each_entry_rcu(p, list, hash) { 181 list_for_each_entry_rcu(p, list, hash) {
178 if (p->watch.inode == inode) { 182 if (p->watch.inode == inode) {
179 get_inotify_watch(&p->watch); 183 atomic_long_inc(&p->refs);
180 return p; 184 return p;
181 } 185 }
182 } 186 }
@@ -194,17 +198,49 @@ int audit_tree_match(struct audit_chunk *chunk, struct audit_tree *tree)
194 198
195/* tagging and untagging inodes with trees */ 199/* tagging and untagging inodes with trees */
196 200
197static void untag_chunk(struct audit_chunk *chunk, struct node *p) 201static struct audit_chunk *find_chunk(struct node *p)
202{
203 int index = p->index & ~(1U<<31);
204 p -= index;
205 return container_of(p, struct audit_chunk, owners[0]);
206}
207
208static void untag_chunk(struct node *p)
198{ 209{
210 struct audit_chunk *chunk = find_chunk(p);
199 struct audit_chunk *new; 211 struct audit_chunk *new;
200 struct audit_tree *owner; 212 struct audit_tree *owner;
201 int size = chunk->count - 1; 213 int size = chunk->count - 1;
202 int i, j; 214 int i, j;
203 215
216 if (!pin_inotify_watch(&chunk->watch)) {
217 /*
218 * Filesystem is shutting down; all watches are getting
219 * evicted, just take it off the node list for this
220 * tree and let the eviction logics take care of the
221 * rest.
222 */
223 owner = p->owner;
224 if (owner->root == chunk) {
225 list_del_init(&owner->same_root);
226 owner->root = NULL;
227 }
228 list_del_init(&p->list);
229 p->owner = NULL;
230 put_tree(owner);
231 return;
232 }
233
234 spin_unlock(&hash_lock);
235
236 /*
237 * pin_inotify_watch() succeeded, so the watch won't go away
238 * from under us.
239 */
204 mutex_lock(&chunk->watch.inode->inotify_mutex); 240 mutex_lock(&chunk->watch.inode->inotify_mutex);
205 if (chunk->dead) { 241 if (chunk->dead) {
206 mutex_unlock(&chunk->watch.inode->inotify_mutex); 242 mutex_unlock(&chunk->watch.inode->inotify_mutex);
207 return; 243 goto out;
208 } 244 }
209 245
210 owner = p->owner; 246 owner = p->owner;
@@ -221,7 +257,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct node *p)
221 inotify_evict_watch(&chunk->watch); 257 inotify_evict_watch(&chunk->watch);
222 mutex_unlock(&chunk->watch.inode->inotify_mutex); 258 mutex_unlock(&chunk->watch.inode->inotify_mutex);
223 put_inotify_watch(&chunk->watch); 259 put_inotify_watch(&chunk->watch);
224 return; 260 goto out;
225 } 261 }
226 262
227 new = alloc_chunk(size); 263 new = alloc_chunk(size);
@@ -263,7 +299,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct node *p)
263 inotify_evict_watch(&chunk->watch); 299 inotify_evict_watch(&chunk->watch);
264 mutex_unlock(&chunk->watch.inode->inotify_mutex); 300 mutex_unlock(&chunk->watch.inode->inotify_mutex);
265 put_inotify_watch(&chunk->watch); 301 put_inotify_watch(&chunk->watch);
266 return; 302 goto out;
267 303
268Fallback: 304Fallback:
269 // do the best we can 305 // do the best we can
@@ -277,6 +313,9 @@ Fallback:
277 put_tree(owner); 313 put_tree(owner);
278 spin_unlock(&hash_lock); 314 spin_unlock(&hash_lock);
279 mutex_unlock(&chunk->watch.inode->inotify_mutex); 315 mutex_unlock(&chunk->watch.inode->inotify_mutex);
316out:
317 unpin_inotify_watch(&chunk->watch);
318 spin_lock(&hash_lock);
280} 319}
281 320
282static int create_chunk(struct inode *inode, struct audit_tree *tree) 321static int create_chunk(struct inode *inode, struct audit_tree *tree)
@@ -387,13 +426,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
387 return 0; 426 return 0;
388} 427}
389 428
390static struct audit_chunk *find_chunk(struct node *p)
391{
392 int index = p->index & ~(1U<<31);
393 p -= index;
394 return container_of(p, struct audit_chunk, owners[0]);
395}
396
397static void kill_rules(struct audit_tree *tree) 429static void kill_rules(struct audit_tree *tree)
398{ 430{
399 struct audit_krule *rule, *next; 431 struct audit_krule *rule, *next;
@@ -431,17 +463,10 @@ static void prune_one(struct audit_tree *victim)
431 spin_lock(&hash_lock); 463 spin_lock(&hash_lock);
432 while (!list_empty(&victim->chunks)) { 464 while (!list_empty(&victim->chunks)) {
433 struct node *p; 465 struct node *p;
434 struct audit_chunk *chunk;
435 466
436 p = list_entry(victim->chunks.next, struct node, list); 467 p = list_entry(victim->chunks.next, struct node, list);
437 chunk = find_chunk(p);
438 get_inotify_watch(&chunk->watch);
439 spin_unlock(&hash_lock);
440
441 untag_chunk(chunk, p);
442 468
443 put_inotify_watch(&chunk->watch); 469 untag_chunk(p);
444 spin_lock(&hash_lock);
445 } 470 }
446 spin_unlock(&hash_lock); 471 spin_unlock(&hash_lock);
447 put_tree(victim); 472 put_tree(victim);
@@ -469,7 +494,6 @@ static void trim_marked(struct audit_tree *tree)
469 494
470 while (!list_empty(&tree->chunks)) { 495 while (!list_empty(&tree->chunks)) {
471 struct node *node; 496 struct node *node;
472 struct audit_chunk *chunk;
473 497
474 node = list_entry(tree->chunks.next, struct node, list); 498 node = list_entry(tree->chunks.next, struct node, list);
475 499
@@ -477,14 +501,7 @@ static void trim_marked(struct audit_tree *tree)
477 if (!(node->index & (1U<<31))) 501 if (!(node->index & (1U<<31)))
478 break; 502 break;
479 503
480 chunk = find_chunk(node); 504 untag_chunk(node);
481 get_inotify_watch(&chunk->watch);
482 spin_unlock(&hash_lock);
483
484 untag_chunk(chunk, node);
485
486 put_inotify_watch(&chunk->watch);
487 spin_lock(&hash_lock);
488 } 505 }
489 if (!tree->root && !tree->goner) { 506 if (!tree->root && !tree->goner) {
490 tree->goner = 1; 507 tree->goner = 1;
@@ -878,7 +895,7 @@ static void handle_event(struct inotify_watch *watch, u32 wd, u32 mask,
878static void destroy_watch(struct inotify_watch *watch) 895static void destroy_watch(struct inotify_watch *watch)
879{ 896{
880 struct audit_chunk *chunk = container_of(watch, struct audit_chunk, watch); 897 struct audit_chunk *chunk = container_of(watch, struct audit_chunk, watch);
881 free_chunk(chunk); 898 call_rcu(&chunk->head, __put_chunk);
882} 899}
883 900
884static const struct inotify_operations rtree_inotify_ops = { 901static const struct inotify_operations rtree_inotify_ops = {
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index b7d354e2b0ef..9fd85a4640a0 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1094,8 +1094,8 @@ static void audit_inotify_unregister(struct list_head *in_list)
1094 list_for_each_entry_safe(p, n, in_list, ilist) { 1094 list_for_each_entry_safe(p, n, in_list, ilist) {
1095 list_del(&p->ilist); 1095 list_del(&p->ilist);
1096 inotify_rm_watch(audit_ih, &p->wdata); 1096 inotify_rm_watch(audit_ih, &p->wdata);
1097 /* the put matching the get in audit_do_del_rule() */ 1097 /* the unpin matching the pin in audit_do_del_rule() */
1098 put_inotify_watch(&p->wdata); 1098 unpin_inotify_watch(&p->wdata);
1099 } 1099 }
1100} 1100}
1101 1101
@@ -1389,9 +1389,13 @@ static inline int audit_del_rule(struct audit_entry *entry,
1389 /* Put parent on the inotify un-registration 1389 /* Put parent on the inotify un-registration
1390 * list. Grab a reference before releasing 1390 * list. Grab a reference before releasing
1391 * audit_filter_mutex, to be released in 1391 * audit_filter_mutex, to be released in
1392 * audit_inotify_unregister(). */ 1392 * audit_inotify_unregister().
1393 list_add(&parent->ilist, &inotify_list); 1393 * If filesystem is going away, just leave
1394 get_inotify_watch(&parent->wdata); 1394 * the sucker alone, eviction will take
1395 * care of it.
1396 */
1397 if (pin_inotify_watch(&parent->wdata))
1398 list_add(&parent->ilist, &inotify_list);
1395 } 1399 }
1396 } 1400 }
1397 } 1401 }
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 35eebd5510c2..358e77564e6f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2497,7 +2497,6 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
2497 list_del(&cgrp->sibling); 2497 list_del(&cgrp->sibling);
2498 spin_lock(&cgrp->dentry->d_lock); 2498 spin_lock(&cgrp->dentry->d_lock);
2499 d = dget(cgrp->dentry); 2499 d = dget(cgrp->dentry);
2500 cgrp->dentry = NULL;
2501 spin_unlock(&d->d_lock); 2500 spin_unlock(&d->d_lock);
2502 2501
2503 cgroup_d_remove_dir(d); 2502 cgroup_d_remove_dir(d);
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index e95056954498..fb249e2bcada 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -162,9 +162,13 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
162 struct task_struct *task) 162 struct task_struct *task)
163{ 163{
164 struct freezer *freezer; 164 struct freezer *freezer;
165 int retval;
166 165
167 /* Anything frozen can't move or be moved to/from */ 166 /*
167 * Anything frozen can't move or be moved to/from.
168 *
169 * Since orig_freezer->state == FROZEN means that @task has been
170 * frozen, so it's sufficient to check the latter condition.
171 */
168 172
169 if (is_task_frozen_enough(task)) 173 if (is_task_frozen_enough(task))
170 return -EBUSY; 174 return -EBUSY;
@@ -173,25 +177,31 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
173 if (freezer->state == CGROUP_FROZEN) 177 if (freezer->state == CGROUP_FROZEN)
174 return -EBUSY; 178 return -EBUSY;
175 179
176 retval = 0; 180 return 0;
177 task_lock(task);
178 freezer = task_freezer(task);
179 if (freezer->state == CGROUP_FROZEN)
180 retval = -EBUSY;
181 task_unlock(task);
182 return retval;
183} 181}
184 182
185static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) 183static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
186{ 184{
187 struct freezer *freezer; 185 struct freezer *freezer;
188 186
189 task_lock(task); 187 /*
188 * No lock is needed, since the task isn't on tasklist yet,
189 * so it can't be moved to another cgroup, which means the
190 * freezer won't be removed and will be valid during this
191 * function call.
192 */
190 freezer = task_freezer(task); 193 freezer = task_freezer(task);
191 task_unlock(task);
192 194
193 BUG_ON(freezer->state == CGROUP_FROZEN); 195 /*
196 * The root cgroup is non-freezable, so we can skip the
197 * following check.
198 */
199 if (!freezer->css.cgroup->parent)
200 return;
201
194 spin_lock_irq(&freezer->lock); 202 spin_lock_irq(&freezer->lock);
203 BUG_ON(freezer->state == CGROUP_FROZEN);
204
195 /* Locking avoids race with FREEZING -> THAWED transitions. */ 205 /* Locking avoids race with FREEZING -> THAWED transitions. */
196 if (freezer->state == CGROUP_FREEZING) 206 if (freezer->state == CGROUP_FREEZING)
197 freeze_task(task, true); 207 freeze_task(task, true);
@@ -276,25 +286,18 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
276 return num_cant_freeze_now ? -EBUSY : 0; 286 return num_cant_freeze_now ? -EBUSY : 0;
277} 287}
278 288
279static int unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) 289static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
280{ 290{
281 struct cgroup_iter it; 291 struct cgroup_iter it;
282 struct task_struct *task; 292 struct task_struct *task;
283 293
284 cgroup_iter_start(cgroup, &it); 294 cgroup_iter_start(cgroup, &it);
285 while ((task = cgroup_iter_next(cgroup, &it))) { 295 while ((task = cgroup_iter_next(cgroup, &it))) {
286 int do_wake; 296 thaw_process(task);
287
288 task_lock(task);
289 do_wake = __thaw_process(task);
290 task_unlock(task);
291 if (do_wake)
292 wake_up_process(task);
293 } 297 }
294 cgroup_iter_end(cgroup, &it); 298 cgroup_iter_end(cgroup, &it);
295 freezer->state = CGROUP_THAWED;
296 299
297 return 0; 300 freezer->state = CGROUP_THAWED;
298} 301}
299 302
300static int freezer_change_state(struct cgroup *cgroup, 303static int freezer_change_state(struct cgroup *cgroup,
@@ -304,27 +307,22 @@ static int freezer_change_state(struct cgroup *cgroup,
304 int retval = 0; 307 int retval = 0;
305 308
306 freezer = cgroup_freezer(cgroup); 309 freezer = cgroup_freezer(cgroup);
310
307 spin_lock_irq(&freezer->lock); 311 spin_lock_irq(&freezer->lock);
312
308 update_freezer_state(cgroup, freezer); 313 update_freezer_state(cgroup, freezer);
309 if (goal_state == freezer->state) 314 if (goal_state == freezer->state)
310 goto out; 315 goto out;
311 switch (freezer->state) { 316
317 switch (goal_state) {
312 case CGROUP_THAWED: 318 case CGROUP_THAWED:
313 retval = try_to_freeze_cgroup(cgroup, freezer); 319 unfreeze_cgroup(cgroup, freezer);
314 break; 320 break;
315 case CGROUP_FREEZING:
316 if (goal_state == CGROUP_FROZEN) {
317 /* Userspace is retrying after
318 * "/bin/echo FROZEN > freezer.state" returned -EBUSY */
319 retval = try_to_freeze_cgroup(cgroup, freezer);
320 break;
321 }
322 /* state == FREEZING and goal_state == THAWED, so unfreeze */
323 case CGROUP_FROZEN: 321 case CGROUP_FROZEN:
324 retval = unfreeze_cgroup(cgroup, freezer); 322 retval = try_to_freeze_cgroup(cgroup, freezer);
325 break; 323 break;
326 default: 324 default:
327 break; 325 BUG();
328 } 326 }
329out: 327out:
330 spin_unlock_irq(&freezer->lock); 328 spin_unlock_irq(&freezer->lock);
@@ -344,7 +342,7 @@ static int freezer_write(struct cgroup *cgroup,
344 else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0) 342 else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0)
345 goal_state = CGROUP_FROZEN; 343 goal_state = CGROUP_FROZEN;
346 else 344 else
347 return -EIO; 345 return -EINVAL;
348 346
349 if (!cgroup_lock_live_group(cgroup)) 347 if (!cgroup_lock_live_group(cgroup))
350 return -ENODEV; 348 return -ENODEV;
@@ -363,6 +361,8 @@ static struct cftype files[] = {
363 361
364static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup) 362static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup)
365{ 363{
364 if (!cgroup->parent)
365 return 0;
366 return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files)); 366 return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files));
367} 367}
368 368
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 86d49045daed..5a732c5ef08b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -499,3 +499,6 @@ const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
499#endif 499#endif
500}; 500};
501EXPORT_SYMBOL_GPL(cpu_bit_bitmap); 501EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
502
503const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
504EXPORT_SYMBOL(cpu_all_bits);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 3e00526f52ec..81fc6791a296 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -587,7 +587,6 @@ static int generate_sched_domains(cpumask_t **domains,
587 int ndoms; /* number of sched domains in result */ 587 int ndoms; /* number of sched domains in result */
588 int nslot; /* next empty doms[] cpumask_t slot */ 588 int nslot; /* next empty doms[] cpumask_t slot */
589 589
590 ndoms = 0;
591 doms = NULL; 590 doms = NULL;
592 dattr = NULL; 591 dattr = NULL;
593 csa = NULL; 592 csa = NULL;
@@ -674,10 +673,8 @@ restart:
674 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks. 673 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
675 */ 674 */
676 doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL); 675 doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
677 if (!doms) { 676 if (!doms)
678 ndoms = 0;
679 goto done; 677 goto done;
680 }
681 678
682 /* 679 /*
683 * The rest of the code, including the scheduler, can deal with 680 * The rest of the code, including the scheduler, can deal with
@@ -732,6 +729,13 @@ restart:
732done: 729done:
733 kfree(csa); 730 kfree(csa);
734 731
732 /*
733 * Fallback to the default domain if kmalloc() failed.
734 * See comments in partition_sched_domains().
735 */
736 if (doms == NULL)
737 ndoms = 1;
738
735 *domains = doms; 739 *domains = doms;
736 *attributes = dattr; 740 *attributes = dattr;
737 return ndoms; 741 return ndoms;
diff --git a/kernel/exit.c b/kernel/exit.c
index 80137a5d9467..2d8be7ebb0f7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -40,7 +40,6 @@
40#include <linux/cn_proc.h> 40#include <linux/cn_proc.h>
41#include <linux/mutex.h> 41#include <linux/mutex.h>
42#include <linux/futex.h> 42#include <linux/futex.h>
43#include <linux/compat.h>
44#include <linux/pipe_fs_i.h> 43#include <linux/pipe_fs_i.h>
45#include <linux/audit.h> /* for audit_free() */ 44#include <linux/audit.h> /* for audit_free() */
46#include <linux/resource.h> 45#include <linux/resource.h>
@@ -141,6 +140,11 @@ static void __exit_signal(struct task_struct *tsk)
141 if (sig) { 140 if (sig) {
142 flush_sigqueue(&sig->shared_pending); 141 flush_sigqueue(&sig->shared_pending);
143 taskstats_tgid_free(sig); 142 taskstats_tgid_free(sig);
143 /*
144 * Make sure ->signal can't go away under rq->lock,
145 * see account_group_exec_runtime().
146 */
147 task_rq_unlock_wait(tsk);
144 __cleanup_signal(sig); 148 __cleanup_signal(sig);
145 } 149 }
146} 150}
@@ -1054,14 +1058,6 @@ NORET_TYPE void do_exit(long code)
1054 exit_itimers(tsk->signal); 1058 exit_itimers(tsk->signal);
1055 } 1059 }
1056 acct_collect(code, group_dead); 1060 acct_collect(code, group_dead);
1057#ifdef CONFIG_FUTEX
1058 if (unlikely(tsk->robust_list))
1059 exit_robust_list(tsk);
1060#ifdef CONFIG_COMPAT
1061 if (unlikely(tsk->compat_robust_list))
1062 compat_exit_robust_list(tsk);
1063#endif
1064#endif
1065 if (group_dead) 1061 if (group_dead)
1066 tty_audit_exit(); 1062 tty_audit_exit();
1067 if (unlikely(tsk->audit_context)) 1063 if (unlikely(tsk->audit_context))
diff --git a/kernel/fork.c b/kernel/fork.c
index f6083561dfe0..2a372a0e206f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -40,6 +40,7 @@
40#include <linux/jiffies.h> 40#include <linux/jiffies.h>
41#include <linux/tracehook.h> 41#include <linux/tracehook.h>
42#include <linux/futex.h> 42#include <linux/futex.h>
43#include <linux/compat.h>
43#include <linux/task_io_accounting_ops.h> 44#include <linux/task_io_accounting_ops.h>
44#include <linux/rcupdate.h> 45#include <linux/rcupdate.h>
45#include <linux/ptrace.h> 46#include <linux/ptrace.h>
@@ -519,6 +520,16 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
519{ 520{
520 struct completion *vfork_done = tsk->vfork_done; 521 struct completion *vfork_done = tsk->vfork_done;
521 522
523 /* Get rid of any futexes when releasing the mm */
524#ifdef CONFIG_FUTEX
525 if (unlikely(tsk->robust_list))
526 exit_robust_list(tsk);
527#ifdef CONFIG_COMPAT
528 if (unlikely(tsk->compat_robust_list))
529 compat_exit_robust_list(tsk);
530#endif
531#endif
532
522 /* Get rid of any cached register state */ 533 /* Get rid of any cached register state */
523 deactivate_mm(tsk, mm); 534 deactivate_mm(tsk, mm);
524 535
diff --git a/kernel/freezer.c b/kernel/freezer.c
index ba6248b323ef..2f4936cf7083 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -121,16 +121,7 @@ void cancel_freezing(struct task_struct *p)
121 } 121 }
122} 122}
123 123
124/* 124static int __thaw_process(struct task_struct *p)
125 * Wake up a frozen process
126 *
127 * task_lock() is needed to prevent the race with refrigerator() which may
128 * occur if the freezing of tasks fails. Namely, without the lock, if the
129 * freezing of tasks failed, thaw_tasks() might have run before a task in
130 * refrigerator() could call frozen_process(), in which case the task would be
131 * frozen and no one would thaw it.
132 */
133int __thaw_process(struct task_struct *p)
134{ 125{
135 if (frozen(p)) { 126 if (frozen(p)) {
136 p->flags &= ~PF_FROZEN; 127 p->flags &= ~PF_FROZEN;
@@ -140,6 +131,15 @@ int __thaw_process(struct task_struct *p)
140 return 0; 131 return 0;
141} 132}
142 133
134/*
135 * Wake up a frozen process
136 *
137 * task_lock() is needed to prevent the race with refrigerator() which may
138 * occur if the freezing of tasks fails. Namely, without the lock, if the
139 * freezing of tasks failed, thaw_tasks() might have run before a task in
140 * refrigerator() could call frozen_process(), in which case the task would be
141 * frozen and no one would thaw it.
142 */
143int thaw_process(struct task_struct *p) 143int thaw_process(struct task_struct *p)
144{ 144{
145 task_lock(p); 145 task_lock(p);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 2b465dfde426..47e63349d1b2 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -664,14 +664,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
664 664
665 /* Timer is expired, act upon the callback mode */ 665 /* Timer is expired, act upon the callback mode */
666 switch(timer->cb_mode) { 666 switch(timer->cb_mode) {
667 case HRTIMER_CB_IRQSAFE_NO_RESTART:
668 debug_hrtimer_deactivate(timer);
669 /*
670 * We can call the callback from here. No restart
671 * happens, so no danger of recursion
672 */
673 BUG_ON(timer->function(timer) != HRTIMER_NORESTART);
674 return 1;
675 case HRTIMER_CB_IRQSAFE_PERCPU: 667 case HRTIMER_CB_IRQSAFE_PERCPU:
676 case HRTIMER_CB_IRQSAFE_UNLOCKED: 668 case HRTIMER_CB_IRQSAFE_UNLOCKED:
677 /* 669 /*
@@ -683,7 +675,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
683 */ 675 */
684 debug_hrtimer_deactivate(timer); 676 debug_hrtimer_deactivate(timer);
685 return 1; 677 return 1;
686 case HRTIMER_CB_IRQSAFE:
687 case HRTIMER_CB_SOFTIRQ: 678 case HRTIMER_CB_SOFTIRQ:
688 /* 679 /*
689 * Move everything else into the softirq pending list ! 680 * Move everything else into the softirq pending list !
@@ -1209,6 +1200,7 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
1209 enum hrtimer_restart (*fn)(struct hrtimer *); 1200 enum hrtimer_restart (*fn)(struct hrtimer *);
1210 struct hrtimer *timer; 1201 struct hrtimer *timer;
1211 int restart; 1202 int restart;
1203 int emulate_hardirq_ctx = 0;
1212 1204
1213 timer = list_entry(cpu_base->cb_pending.next, 1205 timer = list_entry(cpu_base->cb_pending.next,
1214 struct hrtimer, cb_entry); 1206 struct hrtimer, cb_entry);
@@ -1217,10 +1209,24 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
1217 timer_stats_account_hrtimer(timer); 1209 timer_stats_account_hrtimer(timer);
1218 1210
1219 fn = timer->function; 1211 fn = timer->function;
1212 /*
1213 * A timer might have been added to the cb_pending list
1214 * when it was migrated during a cpu-offline operation.
1215 * Emulate hardirq context for such timers.
1216 */
1217 if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
1218 timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED)
1219 emulate_hardirq_ctx = 1;
1220
1220 __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); 1221 __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
1221 spin_unlock_irq(&cpu_base->lock); 1222 spin_unlock_irq(&cpu_base->lock);
1222 1223
1223 restart = fn(timer); 1224 if (unlikely(emulate_hardirq_ctx)) {
1225 local_irq_disable();
1226 restart = fn(timer);
1227 local_irq_enable();
1228 } else
1229 restart = fn(timer);
1224 1230
1225 spin_lock_irq(&cpu_base->lock); 1231 spin_lock_irq(&cpu_base->lock);
1226 1232
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index fac014a81b24..4d161c70ba55 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -220,7 +220,7 @@ void unregister_handler_proc(unsigned int irq, struct irqaction *action)
220 } 220 }
221} 221}
222 222
223void register_default_affinity_proc(void) 223static void register_default_affinity_proc(void)
224{ 224{
225#ifdef CONFIG_SMP 225#ifdef CONFIG_SMP
226 proc_create("irq/default_smp_affinity", 0600, NULL, 226 proc_create("irq/default_smp_affinity", 0600, NULL,
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 8b57a2597f21..9f8a3f25259a 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -72,7 +72,7 @@ static bool kprobe_enabled;
72DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ 72DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
73static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; 73static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
74static struct { 74static struct {
75 spinlock_t lock ____cacheline_aligned; 75 spinlock_t lock ____cacheline_aligned_in_smp;
76} kretprobe_table_locks[KPROBE_TABLE_SIZE]; 76} kretprobe_table_locks[KPROBE_TABLE_SIZE];
77 77
78static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) 78static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
@@ -613,30 +613,37 @@ static int __kprobes __register_kprobe(struct kprobe *p,
613 return -EINVAL; 613 return -EINVAL;
614 p->addr = addr; 614 p->addr = addr;
615 615
616 if (!kernel_text_address((unsigned long) p->addr) || 616 preempt_disable();
617 in_kprobes_functions((unsigned long) p->addr)) 617 if (!__kernel_text_address((unsigned long) p->addr) ||
618 in_kprobes_functions((unsigned long) p->addr)) {
619 preempt_enable();
618 return -EINVAL; 620 return -EINVAL;
621 }
619 622
620 p->mod_refcounted = 0; 623 p->mod_refcounted = 0;
621 624
622 /* 625 /*
623 * Check if are we probing a module. 626 * Check if are we probing a module.
624 */ 627 */
625 probed_mod = module_text_address((unsigned long) p->addr); 628 probed_mod = __module_text_address((unsigned long) p->addr);
626 if (probed_mod) { 629 if (probed_mod) {
627 struct module *calling_mod = module_text_address(called_from); 630 struct module *calling_mod;
631 calling_mod = __module_text_address(called_from);
628 /* 632 /*
629 * We must allow modules to probe themself and in this case 633 * We must allow modules to probe themself and in this case
630 * avoid incrementing the module refcount, so as to allow 634 * avoid incrementing the module refcount, so as to allow
631 * unloading of self probing modules. 635 * unloading of self probing modules.
632 */ 636 */
633 if (calling_mod && calling_mod != probed_mod) { 637 if (calling_mod && calling_mod != probed_mod) {
634 if (unlikely(!try_module_get(probed_mod))) 638 if (unlikely(!try_module_get(probed_mod))) {
639 preempt_enable();
635 return -EINVAL; 640 return -EINVAL;
641 }
636 p->mod_refcounted = 1; 642 p->mod_refcounted = 1;
637 } else 643 } else
638 probed_mod = NULL; 644 probed_mod = NULL;
639 } 645 }
646 preempt_enable();
640 647
641 p->nmissed = 0; 648 p->nmissed = 0;
642 INIT_LIST_HEAD(&p->list); 649 INIT_LIST_HEAD(&p->list);
@@ -718,6 +725,10 @@ static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
718 struct kprobe *old_p; 725 struct kprobe *old_p;
719 726
720 if (p->mod_refcounted) { 727 if (p->mod_refcounted) {
728 /*
729 * Since we've already incremented refcount,
730 * we don't need to disable preemption.
731 */
721 mod = module_text_address((unsigned long)p->addr); 732 mod = module_text_address((unsigned long)p->addr);
722 if (mod) 733 if (mod)
723 module_put(mod); 734 module_put(mod);
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index dbda475b13bd..06e157119d2b 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2169,12 +2169,11 @@ void early_boot_irqs_on(void)
2169/* 2169/*
2170 * Hardirqs will be enabled: 2170 * Hardirqs will be enabled:
2171 */ 2171 */
2172void trace_hardirqs_on_caller(unsigned long a0) 2172void trace_hardirqs_on_caller(unsigned long ip)
2173{ 2173{
2174 struct task_struct *curr = current; 2174 struct task_struct *curr = current;
2175 unsigned long ip;
2176 2175
2177 time_hardirqs_on(CALLER_ADDR0, a0); 2176 time_hardirqs_on(CALLER_ADDR0, ip);
2178 2177
2179 if (unlikely(!debug_locks || current->lockdep_recursion)) 2178 if (unlikely(!debug_locks || current->lockdep_recursion))
2180 return; 2179 return;
@@ -2188,7 +2187,6 @@ void trace_hardirqs_on_caller(unsigned long a0)
2188 } 2187 }
2189 /* we'll do an OFF -> ON transition: */ 2188 /* we'll do an OFF -> ON transition: */
2190 curr->hardirqs_enabled = 1; 2189 curr->hardirqs_enabled = 1;
2191 ip = (unsigned long) __builtin_return_address(0);
2192 2190
2193 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) 2191 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
2194 return; 2192 return;
@@ -2224,11 +2222,11 @@ EXPORT_SYMBOL(trace_hardirqs_on);
2224/* 2222/*
2225 * Hardirqs were disabled: 2223 * Hardirqs were disabled:
2226 */ 2224 */
2227void trace_hardirqs_off_caller(unsigned long a0) 2225void trace_hardirqs_off_caller(unsigned long ip)
2228{ 2226{
2229 struct task_struct *curr = current; 2227 struct task_struct *curr = current;
2230 2228
2231 time_hardirqs_off(CALLER_ADDR0, a0); 2229 time_hardirqs_off(CALLER_ADDR0, ip);
2232 2230
2233 if (unlikely(!debug_locks || current->lockdep_recursion)) 2231 if (unlikely(!debug_locks || current->lockdep_recursion))
2234 return; 2232 return;
@@ -2241,7 +2239,7 @@ void trace_hardirqs_off_caller(unsigned long a0)
2241 * We have done an ON -> OFF transition: 2239 * We have done an ON -> OFF transition:
2242 */ 2240 */
2243 curr->hardirqs_enabled = 0; 2241 curr->hardirqs_enabled = 0;
2244 curr->hardirq_disable_ip = _RET_IP_; 2242 curr->hardirq_disable_ip = ip;
2245 curr->hardirq_disable_event = ++curr->irq_events; 2243 curr->hardirq_disable_event = ++curr->irq_events;
2246 debug_atomic_inc(&hardirqs_off_events); 2244 debug_atomic_inc(&hardirqs_off_events);
2247 } else 2245 } else
@@ -3417,9 +3415,10 @@ retry:
3417 } 3415 }
3418 printk(" ignoring it.\n"); 3416 printk(" ignoring it.\n");
3419 unlock = 0; 3417 unlock = 0;
3418 } else {
3419 if (count != 10)
3420 printk(KERN_CONT " locked it.\n");
3420 } 3421 }
3421 if (count != 10)
3422 printk(" locked it.\n");
3423 3422
3424 do_each_thread(g, p) { 3423 do_each_thread(g, p) {
3425 /* 3424 /*
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 153dcb2639c3..895337b16a24 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1308,9 +1308,10 @@ static inline int task_cputime_expired(const struct task_cputime *sample,
1308 */ 1308 */
1309static inline int fastpath_timer_check(struct task_struct *tsk) 1309static inline int fastpath_timer_check(struct task_struct *tsk)
1310{ 1310{
1311 struct signal_struct *sig = tsk->signal; 1311 struct signal_struct *sig;
1312 1312
1313 if (unlikely(!sig)) 1313 /* tsk == current, ensure it is safe to use ->signal/sighand */
1314 if (unlikely(tsk->exit_state))
1314 return 0; 1315 return 0;
1315 1316
1316 if (!task_cputime_zero(&tsk->cputime_expires)) { 1317 if (!task_cputime_zero(&tsk->cputime_expires)) {
@@ -1323,6 +1324,8 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
1323 if (task_cputime_expired(&task_sample, &tsk->cputime_expires)) 1324 if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
1324 return 1; 1325 return 1;
1325 } 1326 }
1327
1328 sig = tsk->signal;
1326 if (!task_cputime_zero(&sig->cputime_expires)) { 1329 if (!task_cputime_zero(&sig->cputime_expires)) {
1327 struct task_cputime group_sample; 1330 struct task_cputime group_sample;
1328 1331
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index dcd165f92a88..23bd4daeb96b 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -96,7 +96,7 @@ config SUSPEND
96 96
97config PM_TEST_SUSPEND 97config PM_TEST_SUSPEND
98 bool "Test suspend/resume and wakealarm during bootup" 98 bool "Test suspend/resume and wakealarm during bootup"
99 depends on SUSPEND && PM_DEBUG && RTC_LIB=y 99 depends on SUSPEND && PM_DEBUG && RTC_CLASS=y
100 ---help--- 100 ---help---
101 This option will let you suspend your machine during bootup, and 101 This option will let you suspend your machine during bootup, and
102 make it wake up a few seconds later using an RTC wakeup alarm. 102 make it wake up a few seconds later using an RTC wakeup alarm.
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 19122cf6d827..b8f7ce9473e8 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -174,7 +174,7 @@ static void suspend_test_finish(const char *label)
174 * has some performance issues. The stack dump of a WARN_ON 174 * has some performance issues. The stack dump of a WARN_ON
175 * is more likely to get the right attention than a printk... 175 * is more likely to get the right attention than a printk...
176 */ 176 */
177 WARN_ON(msec > (TEST_SUSPEND_SECONDS * 1000)); 177 WARN(msec > (TEST_SUSPEND_SECONDS * 1000), "Component: %s\n", label);
178} 178}
179 179
180#else 180#else
diff --git a/kernel/printk.c b/kernel/printk.c
index 6341af77eb65..f492f1583d77 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -233,45 +233,6 @@ static inline void boot_delay_msec(void)
233#endif 233#endif
234 234
235/* 235/*
236 * Return the number of unread characters in the log buffer.
237 */
238static int log_buf_get_len(void)
239{
240 return logged_chars;
241}
242
243/*
244 * Copy a range of characters from the log buffer.
245 */
246int log_buf_copy(char *dest, int idx, int len)
247{
248 int ret, max;
249 bool took_lock = false;
250
251 if (!oops_in_progress) {
252 spin_lock_irq(&logbuf_lock);
253 took_lock = true;
254 }
255
256 max = log_buf_get_len();
257 if (idx < 0 || idx >= max) {
258 ret = -1;
259 } else {
260 if (len > max)
261 len = max;
262 ret = len;
263 idx += (log_end - max);
264 while (len-- > 0)
265 dest[len] = LOG_BUF(idx + len);
266 }
267
268 if (took_lock)
269 spin_unlock_irq(&logbuf_lock);
270
271 return ret;
272}
273
274/*
275 * Commands to do_syslog: 236 * Commands to do_syslog:
276 * 237 *
277 * 0 -- Close the log. Currently a NOP. 238 * 0 -- Close the log. Currently a NOP.
diff --git a/kernel/profile.c b/kernel/profile.c
index a9e422df6bf6..5b7d1ac7124c 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -102,7 +102,7 @@ int profile_setup(char *str)
102__setup("profile=", profile_setup); 102__setup("profile=", profile_setup);
103 103
104 104
105int profile_init(void) 105int __ref profile_init(void)
106{ 106{
107 int buffer_bytes; 107 int buffer_bytes;
108 if (!prof_on) 108 if (!prof_on)
@@ -544,7 +544,7 @@ static const struct file_operations proc_profile_operations = {
544}; 544};
545 545
546#ifdef CONFIG_SMP 546#ifdef CONFIG_SMP
547static void __init profile_nop(void *unused) 547static inline void profile_nop(void *unused)
548{ 548{
549} 549}
550 550
diff --git a/kernel/relay.c b/kernel/relay.c
index 8d13a7855c08..32b0befdcb6a 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -400,7 +400,7 @@ void relay_reset(struct rchan *chan)
400 } 400 }
401 401
402 mutex_lock(&relay_channels_mutex); 402 mutex_lock(&relay_channels_mutex);
403 for_each_online_cpu(i) 403 for_each_possible_cpu(i)
404 if (chan->buf[i]) 404 if (chan->buf[i])
405 __relay_reset(chan->buf[i], 0); 405 __relay_reset(chan->buf[i], 0);
406 mutex_unlock(&relay_channels_mutex); 406 mutex_unlock(&relay_channels_mutex);
@@ -611,10 +611,9 @@ struct rchan *relay_open(const char *base_filename,
611 return chan; 611 return chan;
612 612
613free_bufs: 613free_bufs:
614 for_each_online_cpu(i) { 614 for_each_possible_cpu(i) {
615 if (!chan->buf[i]) 615 if (chan->buf[i])
616 break; 616 relay_close_buf(chan->buf[i]);
617 relay_close_buf(chan->buf[i]);
618 } 617 }
619 618
620 kref_put(&chan->kref, relay_destroy_channel); 619 kref_put(&chan->kref, relay_destroy_channel);
diff --git a/kernel/resource.c b/kernel/resource.c
index 4089d12af6e0..4337063663ef 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -17,6 +17,7 @@
17#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
18#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include <linux/device.h> 19#include <linux/device.h>
20#include <linux/pfn.h>
20#include <asm/io.h> 21#include <asm/io.h>
21 22
22 23
@@ -522,7 +523,7 @@ static void __init __reserve_region_with_split(struct resource *root,
522{ 523{
523 struct resource *parent = root; 524 struct resource *parent = root;
524 struct resource *conflict; 525 struct resource *conflict;
525 struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL); 526 struct resource *res = kzalloc(sizeof(*res), GFP_ATOMIC);
526 527
527 if (!res) 528 if (!res)
528 return; 529 return;
@@ -571,7 +572,7 @@ static void __init __reserve_region_with_split(struct resource *root,
571 572
572} 573}
573 574
574void reserve_region_with_split(struct resource *root, 575void __init reserve_region_with_split(struct resource *root,
575 resource_size_t start, resource_size_t end, 576 resource_size_t start, resource_size_t end,
576 const char *name) 577 const char *name)
577{ 578{
@@ -849,7 +850,8 @@ int iomem_map_sanity_check(resource_size_t addr, unsigned long size)
849 continue; 850 continue;
850 if (p->end < addr) 851 if (p->end < addr)
851 continue; 852 continue;
852 if (p->start <= addr && (p->end >= addr + size - 1)) 853 if (PFN_DOWN(p->start) <= PFN_DOWN(addr) &&
854 PFN_DOWN(p->end) >= PFN_DOWN(addr + size - 1))
853 continue; 855 continue;
854 printk(KERN_WARNING "resource map sanity check conflict: " 856 printk(KERN_WARNING "resource map sanity check conflict: "
855 "0x%llx 0x%llx 0x%llx 0x%llx %s\n", 857 "0x%llx 0x%llx 0x%llx 0x%llx %s\n",
diff --git a/kernel/sched.c b/kernel/sched.c
index 6625c3c4b10d..9b1e79371c20 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -386,7 +386,6 @@ struct cfs_rq {
386 386
387 u64 exec_clock; 387 u64 exec_clock;
388 u64 min_vruntime; 388 u64 min_vruntime;
389 u64 pair_start;
390 389
391 struct rb_root tasks_timeline; 390 struct rb_root tasks_timeline;
392 struct rb_node *rb_leftmost; 391 struct rb_node *rb_leftmost;
@@ -398,9 +397,9 @@ struct cfs_rq {
398 * 'curr' points to currently running entity on this cfs_rq. 397 * 'curr' points to currently running entity on this cfs_rq.
399 * It is set to NULL otherwise (i.e when none are currently running). 398 * It is set to NULL otherwise (i.e when none are currently running).
400 */ 399 */
401 struct sched_entity *curr, *next; 400 struct sched_entity *curr, *next, *last;
402 401
403 unsigned long nr_spread_over; 402 unsigned int nr_spread_over;
404 403
405#ifdef CONFIG_FAIR_GROUP_SCHED 404#ifdef CONFIG_FAIR_GROUP_SCHED
406 struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */ 405 struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */
@@ -970,6 +969,14 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
970 } 969 }
971} 970}
972 971
972void task_rq_unlock_wait(struct task_struct *p)
973{
974 struct rq *rq = task_rq(p);
975
976 smp_mb(); /* spin-unlock-wait is not a full memory barrier */
977 spin_unlock_wait(&rq->lock);
978}
979
973static void __task_rq_unlock(struct rq *rq) 980static void __task_rq_unlock(struct rq *rq)
974 __releases(rq->lock) 981 __releases(rq->lock)
975{ 982{
@@ -1449,6 +1456,8 @@ static unsigned long cpu_avg_load_per_task(int cpu)
1449 1456
1450 if (rq->nr_running) 1457 if (rq->nr_running)
1451 rq->avg_load_per_task = rq->load.weight / rq->nr_running; 1458 rq->avg_load_per_task = rq->load.weight / rq->nr_running;
1459 else
1460 rq->avg_load_per_task = 0;
1452 1461
1453 return rq->avg_load_per_task; 1462 return rq->avg_load_per_task;
1454} 1463}
@@ -1806,7 +1815,9 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
1806 /* 1815 /*
1807 * Buddy candidates are cache hot: 1816 * Buddy candidates are cache hot:
1808 */ 1817 */
1809 if (sched_feat(CACHE_HOT_BUDDY) && (&p->se == cfs_rq_of(&p->se)->next)) 1818 if (sched_feat(CACHE_HOT_BUDDY) &&
1819 (&p->se == cfs_rq_of(&p->se)->next ||
1820 &p->se == cfs_rq_of(&p->se)->last))
1810 return 1; 1821 return 1;
1811 1822
1812 if (p->sched_class != &fair_sched_class) 1823 if (p->sched_class != &fair_sched_class)
@@ -3344,7 +3355,7 @@ small_imbalance:
3344 } else 3355 } else
3345 this_load_per_task = cpu_avg_load_per_task(this_cpu); 3356 this_load_per_task = cpu_avg_load_per_task(this_cpu);
3346 3357
3347 if (max_load - this_load + 2*busiest_load_per_task >= 3358 if (max_load - this_load + busiest_load_per_task >=
3348 busiest_load_per_task * imbn) { 3359 busiest_load_per_task * imbn) {
3349 *imbalance = busiest_load_per_task; 3360 *imbalance = busiest_load_per_task;
3350 return busiest; 3361 return busiest;
@@ -5859,6 +5870,8 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5859 struct rq *rq = cpu_rq(cpu); 5870 struct rq *rq = cpu_rq(cpu);
5860 unsigned long flags; 5871 unsigned long flags;
5861 5872
5873 spin_lock_irqsave(&rq->lock, flags);
5874
5862 __sched_fork(idle); 5875 __sched_fork(idle);
5863 idle->se.exec_start = sched_clock(); 5876 idle->se.exec_start = sched_clock();
5864 5877
@@ -5866,7 +5879,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5866 idle->cpus_allowed = cpumask_of_cpu(cpu); 5879 idle->cpus_allowed = cpumask_of_cpu(cpu);
5867 __set_task_cpu(idle, cpu); 5880 __set_task_cpu(idle, cpu);
5868 5881
5869 spin_lock_irqsave(&rq->lock, flags);
5870 rq->curr = rq->idle = idle; 5882 rq->curr = rq->idle = idle;
5871#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) 5883#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
5872 idle->oncpu = 1; 5884 idle->oncpu = 1;
@@ -6876,15 +6888,17 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
6876 struct sched_domain *tmp; 6888 struct sched_domain *tmp;
6877 6889
6878 /* Remove the sched domains which do not contribute to scheduling. */ 6890 /* Remove the sched domains which do not contribute to scheduling. */
6879 for (tmp = sd; tmp; tmp = tmp->parent) { 6891 for (tmp = sd; tmp; ) {
6880 struct sched_domain *parent = tmp->parent; 6892 struct sched_domain *parent = tmp->parent;
6881 if (!parent) 6893 if (!parent)
6882 break; 6894 break;
6895
6883 if (sd_parent_degenerate(tmp, parent)) { 6896 if (sd_parent_degenerate(tmp, parent)) {
6884 tmp->parent = parent->parent; 6897 tmp->parent = parent->parent;
6885 if (parent->parent) 6898 if (parent->parent)
6886 parent->parent->child = tmp; 6899 parent->parent->child = tmp;
6887 } 6900 } else
6901 tmp = tmp->parent;
6888 } 6902 }
6889 6903
6890 if (sd && sd_degenerate(sd)) { 6904 if (sd && sd_degenerate(sd)) {
@@ -7673,6 +7687,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7673error: 7687error:
7674 free_sched_groups(cpu_map, tmpmask); 7688 free_sched_groups(cpu_map, tmpmask);
7675 SCHED_CPUMASK_FREE((void *)allmasks); 7689 SCHED_CPUMASK_FREE((void *)allmasks);
7690 kfree(rd);
7676 return -ENOMEM; 7691 return -ENOMEM;
7677#endif 7692#endif
7678} 7693}
@@ -7774,13 +7789,14 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
7774 * 7789 *
7775 * The passed in 'doms_new' should be kmalloc'd. This routine takes 7790 * The passed in 'doms_new' should be kmalloc'd. This routine takes
7776 * ownership of it and will kfree it when done with it. If the caller 7791 * ownership of it and will kfree it when done with it. If the caller
7777 * failed the kmalloc call, then it can pass in doms_new == NULL, 7792 * failed the kmalloc call, then it can pass in doms_new == NULL &&
7778 * and partition_sched_domains() will fallback to the single partition 7793 * ndoms_new == 1, and partition_sched_domains() will fallback to
7779 * 'fallback_doms', it also forces the domains to be rebuilt. 7794 * the single partition 'fallback_doms', it also forces the domains
7795 * to be rebuilt.
7780 * 7796 *
7781 * If doms_new==NULL it will be replaced with cpu_online_map. 7797 * If doms_new == NULL it will be replaced with cpu_online_map.
7782 * ndoms_new==0 is a special case for destroying existing domains. 7798 * ndoms_new == 0 is a special case for destroying existing domains,
7783 * It will not create the default domain. 7799 * and it will not create the default domain.
7784 * 7800 *
7785 * Call with hotplug lock held 7801 * Call with hotplug lock held
7786 */ 7802 */
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index ad958c1ec708..26ed8e3d1c15 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -144,7 +144,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
144 last = __pick_last_entity(cfs_rq); 144 last = __pick_last_entity(cfs_rq);
145 if (last) 145 if (last)
146 max_vruntime = last->vruntime; 146 max_vruntime = last->vruntime;
147 min_vruntime = rq->cfs.min_vruntime; 147 min_vruntime = cfs_rq->min_vruntime;
148 rq0_min_vruntime = per_cpu(runqueues, 0).cfs.min_vruntime; 148 rq0_min_vruntime = per_cpu(runqueues, 0).cfs.min_vruntime;
149 spin_unlock_irqrestore(&rq->lock, flags); 149 spin_unlock_irqrestore(&rq->lock, flags);
150 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime", 150 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime",
@@ -161,26 +161,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
161 SPLIT_NS(spread0)); 161 SPLIT_NS(spread0));
162 SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running); 162 SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
163 SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); 163 SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
164#ifdef CONFIG_SCHEDSTATS
165#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
166
167 P(yld_exp_empty);
168 P(yld_act_empty);
169 P(yld_both_empty);
170 P(yld_count);
171 164
172 P(sched_switch); 165 SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
173 P(sched_count);
174 P(sched_goidle);
175
176 P(ttwu_count);
177 P(ttwu_local);
178
179 P(bkl_count);
180
181#undef P
182#endif
183 SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over",
184 cfs_rq->nr_spread_over); 166 cfs_rq->nr_spread_over);
185#ifdef CONFIG_FAIR_GROUP_SCHED 167#ifdef CONFIG_FAIR_GROUP_SCHED
186#ifdef CONFIG_SMP 168#ifdef CONFIG_SMP
@@ -260,6 +242,25 @@ static void print_cpu(struct seq_file *m, int cpu)
260#undef P 242#undef P
261#undef PN 243#undef PN
262 244
245#ifdef CONFIG_SCHEDSTATS
246#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
247
248 P(yld_exp_empty);
249 P(yld_act_empty);
250 P(yld_both_empty);
251 P(yld_count);
252
253 P(sched_switch);
254 P(sched_count);
255 P(sched_goidle);
256
257 P(ttwu_count);
258 P(ttwu_local);
259
260 P(bkl_count);
261
262#undef P
263#endif
263 print_cfs_stats(m, cpu); 264 print_cfs_stats(m, cpu);
264 print_rt_stats(m, cpu); 265 print_rt_stats(m, cpu);
265 266
@@ -319,7 +320,7 @@ static int __init init_sched_debug_procfs(void)
319{ 320{
320 struct proc_dir_entry *pe; 321 struct proc_dir_entry *pe;
321 322
322 pe = proc_create("sched_debug", 0644, NULL, &sched_debug_fops); 323 pe = proc_create("sched_debug", 0444, NULL, &sched_debug_fops);
323 if (!pe) 324 if (!pe)
324 return -ENOMEM; 325 return -ENOMEM;
325 return 0; 326 return 0;
@@ -422,10 +423,11 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
422#undef __P 423#undef __P
423 424
424 { 425 {
426 unsigned int this_cpu = raw_smp_processor_id();
425 u64 t0, t1; 427 u64 t0, t1;
426 428
427 t0 = sched_clock(); 429 t0 = cpu_clock(this_cpu);
428 t1 = sched_clock(); 430 t1 = cpu_clock(this_cpu);
429 SEQ_printf(m, "%-35s:%21Ld\n", 431 SEQ_printf(m, "%-35s:%21Ld\n",
430 "clock-delta", (long long)(t1-t0)); 432 "clock-delta", (long long)(t1-t0));
431 } 433 }
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 9573c33688b8..98345e45b059 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -143,6 +143,49 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
143 return se->parent; 143 return se->parent;
144} 144}
145 145
146/* return depth at which a sched entity is present in the hierarchy */
147static inline int depth_se(struct sched_entity *se)
148{
149 int depth = 0;
150
151 for_each_sched_entity(se)
152 depth++;
153
154 return depth;
155}
156
157static void
158find_matching_se(struct sched_entity **se, struct sched_entity **pse)
159{
160 int se_depth, pse_depth;
161
162 /*
163 * preemption test can be made between sibling entities who are in the
164 * same cfs_rq i.e who have a common parent. Walk up the hierarchy of
165 * both tasks until we find their ancestors who are siblings of common
166 * parent.
167 */
168
169 /* First walk up until both entities are at same depth */
170 se_depth = depth_se(*se);
171 pse_depth = depth_se(*pse);
172
173 while (se_depth > pse_depth) {
174 se_depth--;
175 *se = parent_entity(*se);
176 }
177
178 while (pse_depth > se_depth) {
179 pse_depth--;
180 *pse = parent_entity(*pse);
181 }
182
183 while (!is_same_group(*se, *pse)) {
184 *se = parent_entity(*se);
185 *pse = parent_entity(*pse);
186 }
187}
188
146#else /* CONFIG_FAIR_GROUP_SCHED */ 189#else /* CONFIG_FAIR_GROUP_SCHED */
147 190
148static inline struct rq *rq_of(struct cfs_rq *cfs_rq) 191static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
@@ -193,6 +236,11 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
193 return NULL; 236 return NULL;
194} 237}
195 238
239static inline void
240find_matching_se(struct sched_entity **se, struct sched_entity **pse)
241{
242}
243
196#endif /* CONFIG_FAIR_GROUP_SCHED */ 244#endif /* CONFIG_FAIR_GROUP_SCHED */
197 245
198 246
@@ -223,6 +271,27 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
223 return se->vruntime - cfs_rq->min_vruntime; 271 return se->vruntime - cfs_rq->min_vruntime;
224} 272}
225 273
274static void update_min_vruntime(struct cfs_rq *cfs_rq)
275{
276 u64 vruntime = cfs_rq->min_vruntime;
277
278 if (cfs_rq->curr)
279 vruntime = cfs_rq->curr->vruntime;
280
281 if (cfs_rq->rb_leftmost) {
282 struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost,
283 struct sched_entity,
284 run_node);
285
286 if (vruntime == cfs_rq->min_vruntime)
287 vruntime = se->vruntime;
288 else
289 vruntime = min_vruntime(vruntime, se->vruntime);
290 }
291
292 cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
293}
294
226/* 295/*
227 * Enqueue an entity into the rb-tree: 296 * Enqueue an entity into the rb-tree:
228 */ 297 */
@@ -256,15 +325,8 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
256 * Maintain a cache of leftmost tree entries (it is frequently 325 * Maintain a cache of leftmost tree entries (it is frequently
257 * used): 326 * used):
258 */ 327 */
259 if (leftmost) { 328 if (leftmost)
260 cfs_rq->rb_leftmost = &se->run_node; 329 cfs_rq->rb_leftmost = &se->run_node;
261 /*
262 * maintain cfs_rq->min_vruntime to be a monotonic increasing
263 * value tracking the leftmost vruntime in the tree.
264 */
265 cfs_rq->min_vruntime =
266 max_vruntime(cfs_rq->min_vruntime, se->vruntime);
267 }
268 330
269 rb_link_node(&se->run_node, parent, link); 331 rb_link_node(&se->run_node, parent, link);
270 rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); 332 rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
@@ -274,37 +336,25 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
274{ 336{
275 if (cfs_rq->rb_leftmost == &se->run_node) { 337 if (cfs_rq->rb_leftmost == &se->run_node) {
276 struct rb_node *next_node; 338 struct rb_node *next_node;
277 struct sched_entity *next;
278 339
279 next_node = rb_next(&se->run_node); 340 next_node = rb_next(&se->run_node);
280 cfs_rq->rb_leftmost = next_node; 341 cfs_rq->rb_leftmost = next_node;
281
282 if (next_node) {
283 next = rb_entry(next_node,
284 struct sched_entity, run_node);
285 cfs_rq->min_vruntime =
286 max_vruntime(cfs_rq->min_vruntime,
287 next->vruntime);
288 }
289 } 342 }
290 343
291 if (cfs_rq->next == se)
292 cfs_rq->next = NULL;
293
294 rb_erase(&se->run_node, &cfs_rq->tasks_timeline); 344 rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
295} 345}
296 346
297static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq)
298{
299 return cfs_rq->rb_leftmost;
300}
301
302static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) 347static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
303{ 348{
304 return rb_entry(first_fair(cfs_rq), struct sched_entity, run_node); 349 struct rb_node *left = cfs_rq->rb_leftmost;
350
351 if (!left)
352 return NULL;
353
354 return rb_entry(left, struct sched_entity, run_node);
305} 355}
306 356
307static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) 357static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
308{ 358{
309 struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); 359 struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
310 360
@@ -424,6 +474,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
424 schedstat_add(cfs_rq, exec_clock, delta_exec); 474 schedstat_add(cfs_rq, exec_clock, delta_exec);
425 delta_exec_weighted = calc_delta_fair(delta_exec, curr); 475 delta_exec_weighted = calc_delta_fair(delta_exec, curr);
426 curr->vruntime += delta_exec_weighted; 476 curr->vruntime += delta_exec_weighted;
477 update_min_vruntime(cfs_rq);
427} 478}
428 479
429static void update_curr(struct cfs_rq *cfs_rq) 480static void update_curr(struct cfs_rq *cfs_rq)
@@ -613,13 +664,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
613static void 664static void
614place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) 665place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
615{ 666{
616 u64 vruntime; 667 u64 vruntime = cfs_rq->min_vruntime;
617
618 if (first_fair(cfs_rq)) {
619 vruntime = min_vruntime(cfs_rq->min_vruntime,
620 __pick_next_entity(cfs_rq)->vruntime);
621 } else
622 vruntime = cfs_rq->min_vruntime;
623 668
624 /* 669 /*
625 * The 'current' period is already promised to the current tasks, 670 * The 'current' period is already promised to the current tasks,
@@ -671,6 +716,15 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
671 __enqueue_entity(cfs_rq, se); 716 __enqueue_entity(cfs_rq, se);
672} 717}
673 718
719static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
720{
721 if (cfs_rq->last == se)
722 cfs_rq->last = NULL;
723
724 if (cfs_rq->next == se)
725 cfs_rq->next = NULL;
726}
727
674static void 728static void
675dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) 729dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
676{ 730{
@@ -693,9 +747,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
693#endif 747#endif
694 } 748 }
695 749
750 clear_buddies(cfs_rq, se);
751
696 if (se != cfs_rq->curr) 752 if (se != cfs_rq->curr)
697 __dequeue_entity(cfs_rq, se); 753 __dequeue_entity(cfs_rq, se);
698 account_entity_dequeue(cfs_rq, se); 754 account_entity_dequeue(cfs_rq, se);
755 update_min_vruntime(cfs_rq);
699} 756}
700 757
701/* 758/*
@@ -742,29 +799,18 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
742 se->prev_sum_exec_runtime = se->sum_exec_runtime; 799 se->prev_sum_exec_runtime = se->sum_exec_runtime;
743} 800}
744 801
745static struct sched_entity * 802static int
746pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) 803wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
747{
748 struct rq *rq = rq_of(cfs_rq);
749 u64 pair_slice = rq->clock - cfs_rq->pair_start;
750
751 if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) {
752 cfs_rq->pair_start = rq->clock;
753 return se;
754 }
755
756 return cfs_rq->next;
757}
758 804
759static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) 805static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
760{ 806{
761 struct sched_entity *se = NULL; 807 struct sched_entity *se = __pick_next_entity(cfs_rq);
762 808
763 if (first_fair(cfs_rq)) { 809 if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, se) < 1)
764 se = __pick_next_entity(cfs_rq); 810 return cfs_rq->next;
765 se = pick_next(cfs_rq, se); 811
766 set_next_entity(cfs_rq, se); 812 if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, se) < 1)
767 } 813 return cfs_rq->last;
768 814
769 return se; 815 return se;
770} 816}
@@ -936,6 +982,8 @@ static void yield_task_fair(struct rq *rq)
936 if (unlikely(cfs_rq->nr_running == 1)) 982 if (unlikely(cfs_rq->nr_running == 1))
937 return; 983 return;
938 984
985 clear_buddies(cfs_rq, se);
986
939 if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) { 987 if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
940 update_rq_clock(rq); 988 update_rq_clock(rq);
941 /* 989 /*
@@ -1122,10 +1170,9 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
1122 if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) 1170 if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
1123 return 0; 1171 return 0;
1124 1172
1125 if (!sync && sched_feat(SYNC_WAKEUPS) && 1173 if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost ||
1126 curr->se.avg_overlap < sysctl_sched_migration_cost && 1174 p->se.avg_overlap > sysctl_sched_migration_cost))
1127 p->se.avg_overlap < sysctl_sched_migration_cost) 1175 sync = 0;
1128 sync = 1;
1129 1176
1130 /* 1177 /*
1131 * If sync wakeup then subtract the (maximum possible) 1178 * If sync wakeup then subtract the (maximum possible)
@@ -1244,33 +1291,88 @@ static unsigned long wakeup_gran(struct sched_entity *se)
1244 * More easily preempt - nice tasks, while not making it harder for 1291 * More easily preempt - nice tasks, while not making it harder for
1245 * + nice tasks. 1292 * + nice tasks.
1246 */ 1293 */
1247 if (sched_feat(ASYM_GRAN)) 1294 if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD)
1248 gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load); 1295 gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
1249 1296
1250 return gran; 1297 return gran;
1251} 1298}
1252 1299
1253/* 1300/*
1301 * Should 'se' preempt 'curr'.
1302 *
1303 * |s1
1304 * |s2
1305 * |s3
1306 * g
1307 * |<--->|c
1308 *
1309 * w(c, s1) = -1
1310 * w(c, s2) = 0
1311 * w(c, s3) = 1
1312 *
1313 */
1314static int
1315wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
1316{
1317 s64 gran, vdiff = curr->vruntime - se->vruntime;
1318
1319 if (vdiff <= 0)
1320 return -1;
1321
1322 gran = wakeup_gran(curr);
1323 if (vdiff > gran)
1324 return 1;
1325
1326 return 0;
1327}
1328
1329static void set_last_buddy(struct sched_entity *se)
1330{
1331 for_each_sched_entity(se)
1332 cfs_rq_of(se)->last = se;
1333}
1334
1335static void set_next_buddy(struct sched_entity *se)
1336{
1337 for_each_sched_entity(se)
1338 cfs_rq_of(se)->next = se;
1339}
1340
1341/*
1254 * Preempt the current task with a newly woken task if needed: 1342 * Preempt the current task with a newly woken task if needed:
1255 */ 1343 */
1256static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) 1344static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
1257{ 1345{
1258 struct task_struct *curr = rq->curr; 1346 struct task_struct *curr = rq->curr;
1259 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1260 struct sched_entity *se = &curr->se, *pse = &p->se; 1347 struct sched_entity *se = &curr->se, *pse = &p->se;
1261 s64 delta_exec;
1262 1348
1263 if (unlikely(rt_prio(p->prio))) { 1349 if (unlikely(rt_prio(p->prio))) {
1350 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1351
1264 update_rq_clock(rq); 1352 update_rq_clock(rq);
1265 update_curr(cfs_rq); 1353 update_curr(cfs_rq);
1266 resched_task(curr); 1354 resched_task(curr);
1267 return; 1355 return;
1268 } 1356 }
1269 1357
1358 if (unlikely(p->sched_class != &fair_sched_class))
1359 return;
1360
1270 if (unlikely(se == pse)) 1361 if (unlikely(se == pse))
1271 return; 1362 return;
1272 1363
1273 cfs_rq_of(pse)->next = pse; 1364 /*
1365 * Only set the backward buddy when the current task is still on the
1366 * rq. This can happen when a wakeup gets interleaved with schedule on
1367 * the ->pre_schedule() or idle_balance() point, either of which can
1368 * drop the rq lock.
1369 *
1370 * Also, during early boot the idle thread is in the fair class, for
1371 * obvious reasons its a bad idea to schedule back to the idle thread.
1372 */
1373 if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle))
1374 set_last_buddy(se);
1375 set_next_buddy(pse);
1274 1376
1275 /* 1377 /*
1276 * We can come here with TIF_NEED_RESCHED already set from new task 1378 * We can come here with TIF_NEED_RESCHED already set from new task
@@ -1296,9 +1398,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
1296 return; 1398 return;
1297 } 1399 }
1298 1400
1299 delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime; 1401 find_matching_se(&se, &pse);
1300 if (delta_exec > wakeup_gran(pse)) 1402
1301 resched_task(curr); 1403 while (se) {
1404 BUG_ON(!pse);
1405
1406 if (wakeup_preempt_entity(se, pse) == 1) {
1407 resched_task(curr);
1408 break;
1409 }
1410
1411 se = parent_entity(se);
1412 pse = parent_entity(pse);
1413 }
1302} 1414}
1303 1415
1304static struct task_struct *pick_next_task_fair(struct rq *rq) 1416static struct task_struct *pick_next_task_fair(struct rq *rq)
@@ -1312,6 +1424,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
1312 1424
1313 do { 1425 do {
1314 se = pick_next_entity(cfs_rq); 1426 se = pick_next_entity(cfs_rq);
1427 set_next_entity(cfs_rq, se);
1315 cfs_rq = group_cfs_rq(se); 1428 cfs_rq = group_cfs_rq(se);
1316 } while (cfs_rq); 1429 } while (cfs_rq);
1317 1430
@@ -1594,9 +1707,6 @@ static const struct sched_class fair_sched_class = {
1594 .enqueue_task = enqueue_task_fair, 1707 .enqueue_task = enqueue_task_fair,
1595 .dequeue_task = dequeue_task_fair, 1708 .dequeue_task = dequeue_task_fair,
1596 .yield_task = yield_task_fair, 1709 .yield_task = yield_task_fair,
1597#ifdef CONFIG_SMP
1598 .select_task_rq = select_task_rq_fair,
1599#endif /* CONFIG_SMP */
1600 1710
1601 .check_preempt_curr = check_preempt_wakeup, 1711 .check_preempt_curr = check_preempt_wakeup,
1602 1712
@@ -1604,6 +1714,8 @@ static const struct sched_class fair_sched_class = {
1604 .put_prev_task = put_prev_task_fair, 1714 .put_prev_task = put_prev_task_fair,
1605 1715
1606#ifdef CONFIG_SMP 1716#ifdef CONFIG_SMP
1717 .select_task_rq = select_task_rq_fair,
1718
1607 .load_balance = load_balance_fair, 1719 .load_balance = load_balance_fair,
1608 .move_one_task = move_one_task_fair, 1720 .move_one_task = move_one_task_fair,
1609#endif 1721#endif
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index fda016218296..da5d93b5d2c6 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -12,3 +12,4 @@ SCHED_FEAT(LB_BIAS, 1)
12SCHED_FEAT(LB_WAKEUP_UPDATE, 1) 12SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
13SCHED_FEAT(ASYM_EFF_LOAD, 1) 13SCHED_FEAT(ASYM_EFF_LOAD, 1)
14SCHED_FEAT(WAKEUP_OVERLAP, 0) 14SCHED_FEAT(WAKEUP_OVERLAP, 0)
15SCHED_FEAT(LAST_BUDDY, 1)
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index dec4ccabe2f5..8a21a2e28c13 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -105,9 +105,6 @@ static const struct sched_class idle_sched_class = {
105 105
106 /* dequeue is not valid, we print a debug message there: */ 106 /* dequeue is not valid, we print a debug message there: */
107 .dequeue_task = dequeue_task_idle, 107 .dequeue_task = dequeue_task_idle,
108#ifdef CONFIG_SMP
109 .select_task_rq = select_task_rq_idle,
110#endif /* CONFIG_SMP */
111 108
112 .check_preempt_curr = check_preempt_curr_idle, 109 .check_preempt_curr = check_preempt_curr_idle,
113 110
@@ -115,6 +112,8 @@ static const struct sched_class idle_sched_class = {
115 .put_prev_task = put_prev_task_idle, 112 .put_prev_task = put_prev_task_idle,
116 113
117#ifdef CONFIG_SMP 114#ifdef CONFIG_SMP
115 .select_task_rq = select_task_rq_idle,
116
118 .load_balance = load_balance_idle, 117 .load_balance = load_balance_idle,
119 .move_one_task = move_one_task_idle, 118 .move_one_task = move_one_task_idle,
120#endif 119#endif
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index b446dc87494f..d9ba9d5f99d6 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1504,9 +1504,6 @@ static const struct sched_class rt_sched_class = {
1504 .enqueue_task = enqueue_task_rt, 1504 .enqueue_task = enqueue_task_rt,
1505 .dequeue_task = dequeue_task_rt, 1505 .dequeue_task = dequeue_task_rt,
1506 .yield_task = yield_task_rt, 1506 .yield_task = yield_task_rt,
1507#ifdef CONFIG_SMP
1508 .select_task_rq = select_task_rq_rt,
1509#endif /* CONFIG_SMP */
1510 1507
1511 .check_preempt_curr = check_preempt_curr_rt, 1508 .check_preempt_curr = check_preempt_curr_rt,
1512 1509
@@ -1514,6 +1511,8 @@ static const struct sched_class rt_sched_class = {
1514 .put_prev_task = put_prev_task_rt, 1511 .put_prev_task = put_prev_task_rt,
1515 1512
1516#ifdef CONFIG_SMP 1513#ifdef CONFIG_SMP
1514 .select_task_rq = select_task_rq_rt,
1515
1517 .load_balance = load_balance_rt, 1516 .load_balance = load_balance_rt,
1518 .move_one_task = move_one_task_rt, 1517 .move_one_task = move_one_task_rt,
1519 .set_cpus_allowed = set_cpus_allowed_rt, 1518 .set_cpus_allowed = set_cpus_allowed_rt,
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index ee71bec1da66..7dbf72a2b02c 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -298,9 +298,11 @@ static inline void account_group_user_time(struct task_struct *tsk,
298{ 298{
299 struct signal_struct *sig; 299 struct signal_struct *sig;
300 300
301 sig = tsk->signal; 301 /* tsk == current, ensure it is safe to use ->signal */
302 if (unlikely(!sig)) 302 if (unlikely(tsk->exit_state))
303 return; 303 return;
304
305 sig = tsk->signal;
304 if (sig->cputime.totals) { 306 if (sig->cputime.totals) {
305 struct task_cputime *times; 307 struct task_cputime *times;
306 308
@@ -325,9 +327,11 @@ static inline void account_group_system_time(struct task_struct *tsk,
325{ 327{
326 struct signal_struct *sig; 328 struct signal_struct *sig;
327 329
328 sig = tsk->signal; 330 /* tsk == current, ensure it is safe to use ->signal */
329 if (unlikely(!sig)) 331 if (unlikely(tsk->exit_state))
330 return; 332 return;
333
334 sig = tsk->signal;
331 if (sig->cputime.totals) { 335 if (sig->cputime.totals) {
332 struct task_cputime *times; 336 struct task_cputime *times;
333 337
@@ -353,8 +357,11 @@ static inline void account_group_exec_runtime(struct task_struct *tsk,
353 struct signal_struct *sig; 357 struct signal_struct *sig;
354 358
355 sig = tsk->signal; 359 sig = tsk->signal;
360 /* see __exit_signal()->task_rq_unlock_wait() */
361 barrier();
356 if (unlikely(!sig)) 362 if (unlikely(!sig))
357 return; 363 return;
364
358 if (sig->cputime.totals) { 365 if (sig->cputime.totals) {
359 struct task_cputime *times; 366 struct task_cputime *times;
360 367
diff --git a/kernel/signal.c b/kernel/signal.c
index 105217da5c82..4530fc654455 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1144,7 +1144,8 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
1144 struct task_struct * p; 1144 struct task_struct * p;
1145 1145
1146 for_each_process(p) { 1146 for_each_process(p) {
1147 if (p->pid > 1 && !same_thread_group(p, current)) { 1147 if (task_pid_vnr(p) > 1 &&
1148 !same_thread_group(p, current)) {
1148 int err = group_send_sig_info(sig, info, p); 1149 int err = group_send_sig_info(sig, info, p);
1149 ++count; 1150 ++count;
1150 if (err != -EPERM) 1151 if (err != -EPERM)
diff --git a/kernel/smp.c b/kernel/smp.c
index f362a8553777..75c8dde58c55 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -51,10 +51,6 @@ static void csd_flag_wait(struct call_single_data *data)
51{ 51{
52 /* Wait for response */ 52 /* Wait for response */
53 do { 53 do {
54 /*
55 * We need to see the flags store in the IPI handler
56 */
57 smp_mb();
58 if (!(data->flags & CSD_FLAG_WAIT)) 54 if (!(data->flags & CSD_FLAG_WAIT))
59 break; 55 break;
60 cpu_relax(); 56 cpu_relax();
@@ -76,6 +72,11 @@ static void generic_exec_single(int cpu, struct call_single_data *data)
76 list_add_tail(&data->list, &dst->list); 72 list_add_tail(&data->list, &dst->list);
77 spin_unlock_irqrestore(&dst->lock, flags); 73 spin_unlock_irqrestore(&dst->lock, flags);
78 74
75 /*
76 * Make the list addition visible before sending the ipi.
77 */
78 smp_mb();
79
79 if (ipi) 80 if (ipi)
80 arch_send_call_function_single_ipi(cpu); 81 arch_send_call_function_single_ipi(cpu);
81 82
@@ -157,7 +158,7 @@ void generic_smp_call_function_single_interrupt(void)
157 * Need to see other stores to list head for checking whether 158 * Need to see other stores to list head for checking whether
158 * list is empty without holding q->lock 159 * list is empty without holding q->lock
159 */ 160 */
160 smp_mb(); 161 smp_read_barrier_depends();
161 while (!list_empty(&q->list)) { 162 while (!list_empty(&q->list)) {
162 unsigned int data_flags; 163 unsigned int data_flags;
163 164
@@ -191,7 +192,7 @@ void generic_smp_call_function_single_interrupt(void)
191 /* 192 /*
192 * See comment on outer loop 193 * See comment on outer loop
193 */ 194 */
194 smp_mb(); 195 smp_read_barrier_depends();
195 } 196 }
196} 197}
197 198
@@ -370,6 +371,11 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
370 list_add_tail_rcu(&data->csd.list, &call_function_queue); 371 list_add_tail_rcu(&data->csd.list, &call_function_queue);
371 spin_unlock_irqrestore(&call_function_lock, flags); 372 spin_unlock_irqrestore(&call_function_lock, flags);
372 373
374 /*
375 * Make the list addition visible before sending the ipi.
376 */
377 smp_mb();
378
373 /* Send a message to all CPUs in the map */ 379 /* Send a message to all CPUs in the map */
374 arch_send_call_function_ipi(mask); 380 arch_send_call_function_ipi(mask);
375 381
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 7110daeb9a90..e7c69a720d69 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -269,10 +269,11 @@ void irq_enter(void)
269{ 269{
270 int cpu = smp_processor_id(); 270 int cpu = smp_processor_id();
271 271
272 if (idle_cpu(cpu) && !in_interrupt()) 272 if (idle_cpu(cpu) && !in_interrupt()) {
273 __irq_enter();
273 tick_check_idle(cpu); 274 tick_check_idle(cpu);
274 275 } else
275 __irq_enter(); 276 __irq_enter();
276} 277}
277 278
278#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED 279#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 9bc4c00872c9..24e8ceacc388 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -112,7 +112,7 @@ static int chill(void *unused)
112int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus) 112int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
113{ 113{
114 struct work_struct *sm_work; 114 struct work_struct *sm_work;
115 int i; 115 int i, ret;
116 116
117 /* Set up initial state. */ 117 /* Set up initial state. */
118 mutex_lock(&lock); 118 mutex_lock(&lock);
@@ -137,8 +137,9 @@ int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
137 /* This will release the thread on our CPU. */ 137 /* This will release the thread on our CPU. */
138 put_cpu(); 138 put_cpu();
139 flush_workqueue(stop_machine_wq); 139 flush_workqueue(stop_machine_wq);
140 ret = active.fnret;
140 mutex_unlock(&lock); 141 mutex_unlock(&lock);
141 return active.fnret; 142 return ret;
142} 143}
143 144
144int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus) 145int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index a13bd4dfaeb1..9d048fa2d902 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -474,7 +474,7 @@ static struct ctl_table kern_table[] = {
474 .mode = 0644, 474 .mode = 0644,
475 .proc_handler = &proc_dointvec, 475 .proc_handler = &proc_dointvec,
476 }, 476 },
477#ifdef CONFIG_FTRACE 477#ifdef CONFIG_FUNCTION_TRACER
478 { 478 {
479 .ctl_name = CTL_UNNUMBERED, 479 .ctl_name = CTL_UNNUMBERED,
480 .procname = "ftrace_enabled", 480 .procname = "ftrace_enabled",
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 5bbb1044f847..342fc9ccab46 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -568,6 +568,9 @@ static void tick_nohz_switch_to_nohz(void)
568 */ 568 */
569static void tick_nohz_kick_tick(int cpu) 569static void tick_nohz_kick_tick(int cpu)
570{ 570{
571#if 0
572 /* Switch back to 2.6.27 behaviour */
573
571 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 574 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
572 ktime_t delta, now; 575 ktime_t delta, now;
573 576
@@ -584,6 +587,7 @@ static void tick_nohz_kick_tick(int cpu)
584 return; 587 return;
585 588
586 tick_nohz_restart(ts, now); 589 tick_nohz_restart(ts, now);
590#endif
587} 591}
588 592
589#else 593#else
diff --git a/kernel/timer.c b/kernel/timer.c
index 56becf373c58..dbd50fabe4c7 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -112,27 +112,8 @@ timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
112 tbase_get_deferrable(timer->base)); 112 tbase_get_deferrable(timer->base));
113} 113}
114 114
115/** 115static unsigned long round_jiffies_common(unsigned long j, int cpu,
116 * __round_jiffies - function to round jiffies to a full second 116 bool force_up)
117 * @j: the time in (absolute) jiffies that should be rounded
118 * @cpu: the processor number on which the timeout will happen
119 *
120 * __round_jiffies() rounds an absolute time in the future (in jiffies)
121 * up or down to (approximately) full seconds. This is useful for timers
122 * for which the exact time they fire does not matter too much, as long as
123 * they fire approximately every X seconds.
124 *
125 * By rounding these timers to whole seconds, all such timers will fire
126 * at the same time, rather than at various times spread out. The goal
127 * of this is to have the CPU wake up less, which saves power.
128 *
129 * The exact rounding is skewed for each processor to avoid all
130 * processors firing at the exact same time, which could lead
131 * to lock contention or spurious cache line bouncing.
132 *
133 * The return value is the rounded version of the @j parameter.
134 */
135unsigned long __round_jiffies(unsigned long j, int cpu)
136{ 117{
137 int rem; 118 int rem;
138 unsigned long original = j; 119 unsigned long original = j;
@@ -154,8 +135,9 @@ unsigned long __round_jiffies(unsigned long j, int cpu)
154 * due to delays of the timer irq, long irq off times etc etc) then 135 * due to delays of the timer irq, long irq off times etc etc) then
155 * we should round down to the whole second, not up. Use 1/4th second 136 * we should round down to the whole second, not up. Use 1/4th second
156 * as cutoff for this rounding as an extreme upper bound for this. 137 * as cutoff for this rounding as an extreme upper bound for this.
138 * But never round down if @force_up is set.
157 */ 139 */
158 if (rem < HZ/4) /* round down */ 140 if (rem < HZ/4 && !force_up) /* round down */
159 j = j - rem; 141 j = j - rem;
160 else /* round up */ 142 else /* round up */
161 j = j - rem + HZ; 143 j = j - rem + HZ;
@@ -167,6 +149,31 @@ unsigned long __round_jiffies(unsigned long j, int cpu)
167 return original; 149 return original;
168 return j; 150 return j;
169} 151}
152
153/**
154 * __round_jiffies - function to round jiffies to a full second
155 * @j: the time in (absolute) jiffies that should be rounded
156 * @cpu: the processor number on which the timeout will happen
157 *
158 * __round_jiffies() rounds an absolute time in the future (in jiffies)
159 * up or down to (approximately) full seconds. This is useful for timers
160 * for which the exact time they fire does not matter too much, as long as
161 * they fire approximately every X seconds.
162 *
163 * By rounding these timers to whole seconds, all such timers will fire
164 * at the same time, rather than at various times spread out. The goal
165 * of this is to have the CPU wake up less, which saves power.
166 *
167 * The exact rounding is skewed for each processor to avoid all
168 * processors firing at the exact same time, which could lead
169 * to lock contention or spurious cache line bouncing.
170 *
171 * The return value is the rounded version of the @j parameter.
172 */
173unsigned long __round_jiffies(unsigned long j, int cpu)
174{
175 return round_jiffies_common(j, cpu, false);
176}
170EXPORT_SYMBOL_GPL(__round_jiffies); 177EXPORT_SYMBOL_GPL(__round_jiffies);
171 178
172/** 179/**
@@ -191,13 +198,10 @@ EXPORT_SYMBOL_GPL(__round_jiffies);
191 */ 198 */
192unsigned long __round_jiffies_relative(unsigned long j, int cpu) 199unsigned long __round_jiffies_relative(unsigned long j, int cpu)
193{ 200{
194 /* 201 unsigned long j0 = jiffies;
195 * In theory the following code can skip a jiffy in case jiffies 202
196 * increments right between the addition and the later subtraction. 203 /* Use j0 because jiffies might change while we run */
197 * However since the entire point of this function is to use approximate 204 return round_jiffies_common(j + j0, cpu, false) - j0;
198 * timeouts, it's entirely ok to not handle that.
199 */
200 return __round_jiffies(j + jiffies, cpu) - jiffies;
201} 205}
202EXPORT_SYMBOL_GPL(__round_jiffies_relative); 206EXPORT_SYMBOL_GPL(__round_jiffies_relative);
203 207
@@ -218,7 +222,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies_relative);
218 */ 222 */
219unsigned long round_jiffies(unsigned long j) 223unsigned long round_jiffies(unsigned long j)
220{ 224{
221 return __round_jiffies(j, raw_smp_processor_id()); 225 return round_jiffies_common(j, raw_smp_processor_id(), false);
222} 226}
223EXPORT_SYMBOL_GPL(round_jiffies); 227EXPORT_SYMBOL_GPL(round_jiffies);
224 228
@@ -243,6 +247,71 @@ unsigned long round_jiffies_relative(unsigned long j)
243} 247}
244EXPORT_SYMBOL_GPL(round_jiffies_relative); 248EXPORT_SYMBOL_GPL(round_jiffies_relative);
245 249
250/**
251 * __round_jiffies_up - function to round jiffies up to a full second
252 * @j: the time in (absolute) jiffies that should be rounded
253 * @cpu: the processor number on which the timeout will happen
254 *
255 * This is the same as __round_jiffies() except that it will never
256 * round down. This is useful for timeouts for which the exact time
257 * of firing does not matter too much, as long as they don't fire too
258 * early.
259 */
260unsigned long __round_jiffies_up(unsigned long j, int cpu)
261{
262 return round_jiffies_common(j, cpu, true);
263}
264EXPORT_SYMBOL_GPL(__round_jiffies_up);
265
266/**
267 * __round_jiffies_up_relative - function to round jiffies up to a full second
268 * @j: the time in (relative) jiffies that should be rounded
269 * @cpu: the processor number on which the timeout will happen
270 *
271 * This is the same as __round_jiffies_relative() except that it will never
272 * round down. This is useful for timeouts for which the exact time
273 * of firing does not matter too much, as long as they don't fire too
274 * early.
275 */
276unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
277{
278 unsigned long j0 = jiffies;
279
280 /* Use j0 because jiffies might change while we run */
281 return round_jiffies_common(j + j0, cpu, true) - j0;
282}
283EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);
284
285/**
286 * round_jiffies_up - function to round jiffies up to a full second
287 * @j: the time in (absolute) jiffies that should be rounded
288 *
289 * This is the same as round_jiffies() except that it will never
290 * round down. This is useful for timeouts for which the exact time
291 * of firing does not matter too much, as long as they don't fire too
292 * early.
293 */
294unsigned long round_jiffies_up(unsigned long j)
295{
296 return round_jiffies_common(j, raw_smp_processor_id(), true);
297}
298EXPORT_SYMBOL_GPL(round_jiffies_up);
299
300/**
301 * round_jiffies_up_relative - function to round jiffies up to a full second
302 * @j: the time in (relative) jiffies that should be rounded
303 *
304 * This is the same as round_jiffies_relative() except that it will never
305 * round down. This is useful for timeouts for which the exact time
306 * of firing does not matter too much, as long as they don't fire too
307 * early.
308 */
309unsigned long round_jiffies_up_relative(unsigned long j)
310{
311 return __round_jiffies_up_relative(j, raw_smp_processor_id());
312}
313EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
314
246 315
247static inline void set_running_timer(struct tvec_base *base, 316static inline void set_running_timer(struct tvec_base *base,
248 struct timer_list *timer) 317 struct timer_list *timer)
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 1cb3e1f616af..33dbefd471e8 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1,13 +1,13 @@
1# 1#
2# Architectures that offer an FTRACE implementation should select HAVE_FTRACE: 2# Architectures that offer an FUNCTION_TRACER implementation should
3# select HAVE_FUNCTION_TRACER:
3# 4#
4 5
5config NOP_TRACER 6config NOP_TRACER
6 bool 7 bool
7 8
8config HAVE_FTRACE 9config HAVE_FUNCTION_TRACER
9 bool 10 bool
10 select NOP_TRACER
11 11
12config HAVE_DYNAMIC_FTRACE 12config HAVE_DYNAMIC_FTRACE
13 bool 13 bool
@@ -25,12 +25,15 @@ config TRACING
25 bool 25 bool
26 select DEBUG_FS 26 select DEBUG_FS
27 select RING_BUFFER 27 select RING_BUFFER
28 select STACKTRACE 28 select STACKTRACE if STACKTRACE_SUPPORT
29 select TRACEPOINTS 29 select TRACEPOINTS
30 select NOP_TRACER
30 31
31config FTRACE 32menu "Tracers"
33
34config FUNCTION_TRACER
32 bool "Kernel Function Tracer" 35 bool "Kernel Function Tracer"
33 depends on HAVE_FTRACE 36 depends on HAVE_FUNCTION_TRACER
34 depends on DEBUG_KERNEL 37 depends on DEBUG_KERNEL
35 select FRAME_POINTER 38 select FRAME_POINTER
36 select TRACING 39 select TRACING
@@ -49,7 +52,6 @@ config IRQSOFF_TRACER
49 default n 52 default n
50 depends on TRACE_IRQFLAGS_SUPPORT 53 depends on TRACE_IRQFLAGS_SUPPORT
51 depends on GENERIC_TIME 54 depends on GENERIC_TIME
52 depends on HAVE_FTRACE
53 depends on DEBUG_KERNEL 55 depends on DEBUG_KERNEL
54 select TRACE_IRQFLAGS 56 select TRACE_IRQFLAGS
55 select TRACING 57 select TRACING
@@ -73,7 +75,6 @@ config PREEMPT_TRACER
73 default n 75 default n
74 depends on GENERIC_TIME 76 depends on GENERIC_TIME
75 depends on PREEMPT 77 depends on PREEMPT
76 depends on HAVE_FTRACE
77 depends on DEBUG_KERNEL 78 depends on DEBUG_KERNEL
78 select TRACING 79 select TRACING
79 select TRACER_MAX_TRACE 80 select TRACER_MAX_TRACE
@@ -101,7 +102,6 @@ config SYSPROF_TRACER
101 102
102config SCHED_TRACER 103config SCHED_TRACER
103 bool "Scheduling Latency Tracer" 104 bool "Scheduling Latency Tracer"
104 depends on HAVE_FTRACE
105 depends on DEBUG_KERNEL 105 depends on DEBUG_KERNEL
106 select TRACING 106 select TRACING
107 select CONTEXT_SWITCH_TRACER 107 select CONTEXT_SWITCH_TRACER
@@ -112,7 +112,6 @@ config SCHED_TRACER
112 112
113config CONTEXT_SWITCH_TRACER 113config CONTEXT_SWITCH_TRACER
114 bool "Trace process context switches" 114 bool "Trace process context switches"
115 depends on HAVE_FTRACE
116 depends on DEBUG_KERNEL 115 depends on DEBUG_KERNEL
117 select TRACING 116 select TRACING
118 select MARKERS 117 select MARKERS
@@ -122,9 +121,9 @@ config CONTEXT_SWITCH_TRACER
122 121
123config BOOT_TRACER 122config BOOT_TRACER
124 bool "Trace boot initcalls" 123 bool "Trace boot initcalls"
125 depends on HAVE_FTRACE
126 depends on DEBUG_KERNEL 124 depends on DEBUG_KERNEL
127 select TRACING 125 select TRACING
126 select CONTEXT_SWITCH_TRACER
128 help 127 help
129 This tracer helps developers to optimize boot times: it records 128 This tracer helps developers to optimize boot times: it records
130 the timings of the initcalls and traces key events and the identity 129 the timings of the initcalls and traces key events and the identity
@@ -141,9 +140,9 @@ config BOOT_TRACER
141 140
142config STACK_TRACER 141config STACK_TRACER
143 bool "Trace max stack" 142 bool "Trace max stack"
144 depends on HAVE_FTRACE 143 depends on HAVE_FUNCTION_TRACER
145 depends on DEBUG_KERNEL 144 depends on DEBUG_KERNEL
146 select FTRACE 145 select FUNCTION_TRACER
147 select STACKTRACE 146 select STACKTRACE
148 help 147 help
149 This special tracer records the maximum stack footprint of the 148 This special tracer records the maximum stack footprint of the
@@ -160,7 +159,7 @@ config STACK_TRACER
160 159
161config DYNAMIC_FTRACE 160config DYNAMIC_FTRACE
162 bool "enable/disable ftrace tracepoints dynamically" 161 bool "enable/disable ftrace tracepoints dynamically"
163 depends on FTRACE 162 depends on FUNCTION_TRACER
164 depends on HAVE_DYNAMIC_FTRACE 163 depends on HAVE_DYNAMIC_FTRACE
165 depends on DEBUG_KERNEL 164 depends on DEBUG_KERNEL
166 default y 165 default y
@@ -170,7 +169,7 @@ config DYNAMIC_FTRACE
170 with a No-Op instruction) as they are called. A table is 169 with a No-Op instruction) as they are called. A table is
171 created to dynamically enable them again. 170 created to dynamically enable them again.
172 171
173 This way a CONFIG_FTRACE kernel is slightly larger, but otherwise 172 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise
174 has native performance as long as no tracing is active. 173 has native performance as long as no tracing is active.
175 174
176 The changes to the code are done by a kernel thread that 175 The changes to the code are done by a kernel thread that
@@ -195,3 +194,5 @@ config FTRACE_STARTUP_TEST
195 a series of tests are made to verify that the tracer is 194 a series of tests are made to verify that the tracer is
196 functioning properly. It will do tests on all the configured 195 functioning properly. It will do tests on all the configured
197 tracers of ftrace. 196 tracers of ftrace.
197
198endmenu
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index a85dfba88ba0..c8228b1a49e9 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -1,7 +1,7 @@
1 1
2# Do not instrument the tracer itself: 2# Do not instrument the tracer itself:
3 3
4ifdef CONFIG_FTRACE 4ifdef CONFIG_FUNCTION_TRACER
5ORIG_CFLAGS := $(KBUILD_CFLAGS) 5ORIG_CFLAGS := $(KBUILD_CFLAGS)
6KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) 6KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
7 7
@@ -10,13 +10,13 @@ CFLAGS_trace_selftest_dynamic.o = -pg
10obj-y += trace_selftest_dynamic.o 10obj-y += trace_selftest_dynamic.o
11endif 11endif
12 12
13obj-$(CONFIG_FTRACE) += libftrace.o 13obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
14obj-$(CONFIG_RING_BUFFER) += ring_buffer.o 14obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
15 15
16obj-$(CONFIG_TRACING) += trace.o 16obj-$(CONFIG_TRACING) += trace.o
17obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o 17obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
18obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o 18obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
19obj-$(CONFIG_FTRACE) += trace_functions.o 19obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
20obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o 20obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
21obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o 21obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
22obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o 22obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4dda4f60a2a9..e60205722d0c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -25,13 +25,24 @@
25#include <linux/ftrace.h> 25#include <linux/ftrace.h>
26#include <linux/sysctl.h> 26#include <linux/sysctl.h>
27#include <linux/ctype.h> 27#include <linux/ctype.h>
28#include <linux/hash.h>
29#include <linux/list.h> 28#include <linux/list.h>
30 29
31#include <asm/ftrace.h> 30#include <asm/ftrace.h>
32 31
33#include "trace.h" 32#include "trace.h"
34 33
34#define FTRACE_WARN_ON(cond) \
35 do { \
36 if (WARN_ON(cond)) \
37 ftrace_kill(); \
38 } while (0)
39
40#define FTRACE_WARN_ON_ONCE(cond) \
41 do { \
42 if (WARN_ON_ONCE(cond)) \
43 ftrace_kill(); \
44 } while (0)
45
35/* ftrace_enabled is a method to turn ftrace on or off */ 46/* ftrace_enabled is a method to turn ftrace on or off */
36int ftrace_enabled __read_mostly; 47int ftrace_enabled __read_mostly;
37static int last_ftrace_enabled; 48static int last_ftrace_enabled;
@@ -153,21 +164,8 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
153} 164}
154 165
155#ifdef CONFIG_DYNAMIC_FTRACE 166#ifdef CONFIG_DYNAMIC_FTRACE
156
157#ifndef CONFIG_FTRACE_MCOUNT_RECORD 167#ifndef CONFIG_FTRACE_MCOUNT_RECORD
158/* 168# error Dynamic ftrace depends on MCOUNT_RECORD
159 * The hash lock is only needed when the recording of the mcount
160 * callers are dynamic. That is, by the caller themselves and
161 * not recorded via the compilation.
162 */
163static DEFINE_SPINLOCK(ftrace_hash_lock);
164#define ftrace_hash_lock(flags) spin_lock_irqsave(&ftrace_hash_lock, flags)
165#define ftrace_hash_unlock(flags) \
166 spin_unlock_irqrestore(&ftrace_hash_lock, flags)
167#else
168/* This is protected via the ftrace_lock with MCOUNT_RECORD. */
169#define ftrace_hash_lock(flags) do { (void)(flags); } while (0)
170#define ftrace_hash_unlock(flags) do { } while(0)
171#endif 169#endif
172 170
173/* 171/*
@@ -178,8 +176,6 @@ static DEFINE_SPINLOCK(ftrace_hash_lock);
178 */ 176 */
179static unsigned long mcount_addr = MCOUNT_ADDR; 177static unsigned long mcount_addr = MCOUNT_ADDR;
180 178
181static struct task_struct *ftraced_task;
182
183enum { 179enum {
184 FTRACE_ENABLE_CALLS = (1 << 0), 180 FTRACE_ENABLE_CALLS = (1 << 0),
185 FTRACE_DISABLE_CALLS = (1 << 1), 181 FTRACE_DISABLE_CALLS = (1 << 1),
@@ -189,14 +185,9 @@ enum {
189}; 185};
190 186
191static int ftrace_filtered; 187static int ftrace_filtered;
192static int tracing_on;
193static int frozen_record_count;
194 188
195static struct hlist_head ftrace_hash[FTRACE_HASHSIZE]; 189static LIST_HEAD(ftrace_new_addrs);
196 190
197static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu);
198
199static DEFINE_MUTEX(ftraced_lock);
200static DEFINE_MUTEX(ftrace_regex_lock); 191static DEFINE_MUTEX(ftrace_regex_lock);
201 192
202struct ftrace_page { 193struct ftrace_page {
@@ -214,16 +205,13 @@ struct ftrace_page {
214static struct ftrace_page *ftrace_pages_start; 205static struct ftrace_page *ftrace_pages_start;
215static struct ftrace_page *ftrace_pages; 206static struct ftrace_page *ftrace_pages;
216 207
217static int ftraced_trigger;
218static int ftraced_suspend;
219static int ftraced_stop;
220
221static int ftrace_record_suspend;
222
223static struct dyn_ftrace *ftrace_free_records; 208static struct dyn_ftrace *ftrace_free_records;
224 209
225 210
226#ifdef CONFIG_KPROBES 211#ifdef CONFIG_KPROBES
212
213static int frozen_record_count;
214
227static inline void freeze_record(struct dyn_ftrace *rec) 215static inline void freeze_record(struct dyn_ftrace *rec)
228{ 216{
229 if (!(rec->flags & FTRACE_FL_FROZEN)) { 217 if (!(rec->flags & FTRACE_FL_FROZEN)) {
@@ -250,72 +238,6 @@ static inline int record_frozen(struct dyn_ftrace *rec)
250# define record_frozen(rec) ({ 0; }) 238# define record_frozen(rec) ({ 0; })
251#endif /* CONFIG_KPROBES */ 239#endif /* CONFIG_KPROBES */
252 240
253int skip_trace(unsigned long ip)
254{
255 unsigned long fl;
256 struct dyn_ftrace *rec;
257 struct hlist_node *t;
258 struct hlist_head *head;
259
260 if (frozen_record_count == 0)
261 return 0;
262
263 head = &ftrace_hash[hash_long(ip, FTRACE_HASHBITS)];
264 hlist_for_each_entry_rcu(rec, t, head, node) {
265 if (rec->ip == ip) {
266 if (record_frozen(rec)) {
267 if (rec->flags & FTRACE_FL_FAILED)
268 return 1;
269
270 if (!(rec->flags & FTRACE_FL_CONVERTED))
271 return 1;
272
273 if (!tracing_on || !ftrace_enabled)
274 return 1;
275
276 if (ftrace_filtered) {
277 fl = rec->flags & (FTRACE_FL_FILTER |
278 FTRACE_FL_NOTRACE);
279 if (!fl || (fl & FTRACE_FL_NOTRACE))
280 return 1;
281 }
282 }
283 break;
284 }
285 }
286
287 return 0;
288}
289
290static inline int
291ftrace_ip_in_hash(unsigned long ip, unsigned long key)
292{
293 struct dyn_ftrace *p;
294 struct hlist_node *t;
295 int found = 0;
296
297 hlist_for_each_entry_rcu(p, t, &ftrace_hash[key], node) {
298 if (p->ip == ip) {
299 found = 1;
300 break;
301 }
302 }
303
304 return found;
305}
306
307static inline void
308ftrace_add_hash(struct dyn_ftrace *node, unsigned long key)
309{
310 hlist_add_head_rcu(&node->node, &ftrace_hash[key]);
311}
312
313/* called from kstop_machine */
314static inline void ftrace_del_hash(struct dyn_ftrace *node)
315{
316 hlist_del(&node->node);
317}
318
319static void ftrace_free_rec(struct dyn_ftrace *rec) 241static void ftrace_free_rec(struct dyn_ftrace *rec)
320{ 242{
321 rec->ip = (unsigned long)ftrace_free_records; 243 rec->ip = (unsigned long)ftrace_free_records;
@@ -346,7 +268,6 @@ void ftrace_release(void *start, unsigned long size)
346 } 268 }
347 } 269 }
348 spin_unlock(&ftrace_lock); 270 spin_unlock(&ftrace_lock);
349
350} 271}
351 272
352static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) 273static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
@@ -358,10 +279,8 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
358 rec = ftrace_free_records; 279 rec = ftrace_free_records;
359 280
360 if (unlikely(!(rec->flags & FTRACE_FL_FREE))) { 281 if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
361 WARN_ON_ONCE(1); 282 FTRACE_WARN_ON_ONCE(1);
362 ftrace_free_records = NULL; 283 ftrace_free_records = NULL;
363 ftrace_disabled = 1;
364 ftrace_enabled = 0;
365 return NULL; 284 return NULL;
366 } 285 }
367 286
@@ -371,76 +290,36 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
371 } 290 }
372 291
373 if (ftrace_pages->index == ENTRIES_PER_PAGE) { 292 if (ftrace_pages->index == ENTRIES_PER_PAGE) {
374 if (!ftrace_pages->next) 293 if (!ftrace_pages->next) {
375 return NULL; 294 /* allocate another page */
295 ftrace_pages->next =
296 (void *)get_zeroed_page(GFP_KERNEL);
297 if (!ftrace_pages->next)
298 return NULL;
299 }
376 ftrace_pages = ftrace_pages->next; 300 ftrace_pages = ftrace_pages->next;
377 } 301 }
378 302
379 return &ftrace_pages->records[ftrace_pages->index++]; 303 return &ftrace_pages->records[ftrace_pages->index++];
380} 304}
381 305
382static void 306static struct dyn_ftrace *
383ftrace_record_ip(unsigned long ip) 307ftrace_record_ip(unsigned long ip)
384{ 308{
385 struct dyn_ftrace *node; 309 struct dyn_ftrace *rec;
386 unsigned long flags;
387 unsigned long key;
388 int resched;
389 int cpu;
390 310
391 if (!ftrace_enabled || ftrace_disabled) 311 if (!ftrace_enabled || ftrace_disabled)
392 return; 312 return NULL;
393
394 resched = need_resched();
395 preempt_disable_notrace();
396 313
397 /* 314 rec = ftrace_alloc_dyn_node(ip);
398 * We simply need to protect against recursion. 315 if (!rec)
399 * Use the the raw version of smp_processor_id and not 316 return NULL;
400 * __get_cpu_var which can call debug hooks that can
401 * cause a recursive crash here.
402 */
403 cpu = raw_smp_processor_id();
404 per_cpu(ftrace_shutdown_disable_cpu, cpu)++;
405 if (per_cpu(ftrace_shutdown_disable_cpu, cpu) != 1)
406 goto out;
407
408 if (unlikely(ftrace_record_suspend))
409 goto out;
410
411 key = hash_long(ip, FTRACE_HASHBITS);
412
413 WARN_ON_ONCE(key >= FTRACE_HASHSIZE);
414
415 if (ftrace_ip_in_hash(ip, key))
416 goto out;
417
418 ftrace_hash_lock(flags);
419
420 /* This ip may have hit the hash before the lock */
421 if (ftrace_ip_in_hash(ip, key))
422 goto out_unlock;
423
424 node = ftrace_alloc_dyn_node(ip);
425 if (!node)
426 goto out_unlock;
427
428 node->ip = ip;
429
430 ftrace_add_hash(node, key);
431 317
432 ftraced_trigger = 1; 318 rec->ip = ip;
433 319
434 out_unlock: 320 list_add(&rec->list, &ftrace_new_addrs);
435 ftrace_hash_unlock(flags);
436 out:
437 per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
438 321
439 /* prevent recursion with scheduler */ 322 return rec;
440 if (resched)
441 preempt_enable_no_resched_notrace();
442 else
443 preempt_enable_notrace();
444} 323}
445 324
446#define FTRACE_ADDR ((long)(ftrace_caller)) 325#define FTRACE_ADDR ((long)(ftrace_caller))
@@ -559,7 +438,6 @@ static void ftrace_replace_code(int enable)
559 rec->flags |= FTRACE_FL_FAILED; 438 rec->flags |= FTRACE_FL_FAILED;
560 if ((system_state == SYSTEM_BOOTING) || 439 if ((system_state == SYSTEM_BOOTING) ||
561 !core_kernel_text(rec->ip)) { 440 !core_kernel_text(rec->ip)) {
562 ftrace_del_hash(rec);
563 ftrace_free_rec(rec); 441 ftrace_free_rec(rec);
564 } 442 }
565 } 443 }
@@ -567,15 +445,6 @@ static void ftrace_replace_code(int enable)
567 } 445 }
568} 446}
569 447
570static void ftrace_shutdown_replenish(void)
571{
572 if (ftrace_pages->next)
573 return;
574
575 /* allocate another page */
576 ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
577}
578
579static void print_ip_ins(const char *fmt, unsigned char *p) 448static void print_ip_ins(const char *fmt, unsigned char *p)
580{ 449{
581 int i; 450 int i;
@@ -591,23 +460,23 @@ ftrace_code_disable(struct dyn_ftrace *rec)
591{ 460{
592 unsigned long ip; 461 unsigned long ip;
593 unsigned char *nop, *call; 462 unsigned char *nop, *call;
594 int failed; 463 int ret;
595 464
596 ip = rec->ip; 465 ip = rec->ip;
597 466
598 nop = ftrace_nop_replace(); 467 nop = ftrace_nop_replace();
599 call = ftrace_call_replace(ip, mcount_addr); 468 call = ftrace_call_replace(ip, mcount_addr);
600 469
601 failed = ftrace_modify_code(ip, call, nop); 470 ret = ftrace_modify_code(ip, call, nop);
602 if (failed) { 471 if (ret) {
603 switch (failed) { 472 switch (ret) {
604 case 1: 473 case -EFAULT:
605 WARN_ON_ONCE(1); 474 FTRACE_WARN_ON_ONCE(1);
606 pr_info("ftrace faulted on modifying "); 475 pr_info("ftrace faulted on modifying ");
607 print_ip_sym(ip); 476 print_ip_sym(ip);
608 break; 477 break;
609 case 2: 478 case -EINVAL:
610 WARN_ON_ONCE(1); 479 FTRACE_WARN_ON_ONCE(1);
611 pr_info("ftrace failed to modify "); 480 pr_info("ftrace failed to modify ");
612 print_ip_sym(ip); 481 print_ip_sym(ip);
613 print_ip_ins(" expected: ", call); 482 print_ip_ins(" expected: ", call);
@@ -615,6 +484,15 @@ ftrace_code_disable(struct dyn_ftrace *rec)
615 print_ip_ins(" replace: ", nop); 484 print_ip_ins(" replace: ", nop);
616 printk(KERN_CONT "\n"); 485 printk(KERN_CONT "\n");
617 break; 486 break;
487 case -EPERM:
488 FTRACE_WARN_ON_ONCE(1);
489 pr_info("ftrace faulted on writing ");
490 print_ip_sym(ip);
491 break;
492 default:
493 FTRACE_WARN_ON_ONCE(1);
494 pr_info("ftrace faulted on unknown error ");
495 print_ip_sym(ip);
618 } 496 }
619 497
620 rec->flags |= FTRACE_FL_FAILED; 498 rec->flags |= FTRACE_FL_FAILED;
@@ -623,37 +501,18 @@ ftrace_code_disable(struct dyn_ftrace *rec)
623 return 1; 501 return 1;
624} 502}
625 503
626static int __ftrace_update_code(void *ignore);
627
628static int __ftrace_modify_code(void *data) 504static int __ftrace_modify_code(void *data)
629{ 505{
630 unsigned long addr;
631 int *command = data; 506 int *command = data;
632 507
633 if (*command & FTRACE_ENABLE_CALLS) { 508 if (*command & FTRACE_ENABLE_CALLS)
634 /*
635 * Update any recorded ips now that we have the
636 * machine stopped
637 */
638 __ftrace_update_code(NULL);
639 ftrace_replace_code(1); 509 ftrace_replace_code(1);
640 tracing_on = 1; 510 else if (*command & FTRACE_DISABLE_CALLS)
641 } else if (*command & FTRACE_DISABLE_CALLS) {
642 ftrace_replace_code(0); 511 ftrace_replace_code(0);
643 tracing_on = 0;
644 }
645 512
646 if (*command & FTRACE_UPDATE_TRACE_FUNC) 513 if (*command & FTRACE_UPDATE_TRACE_FUNC)
647 ftrace_update_ftrace_func(ftrace_trace_function); 514 ftrace_update_ftrace_func(ftrace_trace_function);
648 515
649 if (*command & FTRACE_ENABLE_MCOUNT) {
650 addr = (unsigned long)ftrace_record_ip;
651 ftrace_mcount_set(&addr);
652 } else if (*command & FTRACE_DISABLE_MCOUNT) {
653 addr = (unsigned long)ftrace_stub;
654 ftrace_mcount_set(&addr);
655 }
656
657 return 0; 516 return 0;
658} 517}
659 518
@@ -662,26 +521,9 @@ static void ftrace_run_update_code(int command)
662 stop_machine(__ftrace_modify_code, &command, NULL); 521 stop_machine(__ftrace_modify_code, &command, NULL);
663} 522}
664 523
665void ftrace_disable_daemon(void)
666{
667 /* Stop the daemon from calling kstop_machine */
668 mutex_lock(&ftraced_lock);
669 ftraced_stop = 1;
670 mutex_unlock(&ftraced_lock);
671
672 ftrace_force_update();
673}
674
675void ftrace_enable_daemon(void)
676{
677 mutex_lock(&ftraced_lock);
678 ftraced_stop = 0;
679 mutex_unlock(&ftraced_lock);
680
681 ftrace_force_update();
682}
683
684static ftrace_func_t saved_ftrace_func; 524static ftrace_func_t saved_ftrace_func;
525static int ftrace_start;
526static DEFINE_MUTEX(ftrace_start_lock);
685 527
686static void ftrace_startup(void) 528static void ftrace_startup(void)
687{ 529{
@@ -690,9 +532,9 @@ static void ftrace_startup(void)
690 if (unlikely(ftrace_disabled)) 532 if (unlikely(ftrace_disabled))
691 return; 533 return;
692 534
693 mutex_lock(&ftraced_lock); 535 mutex_lock(&ftrace_start_lock);
694 ftraced_suspend++; 536 ftrace_start++;
695 if (ftraced_suspend == 1) 537 if (ftrace_start == 1)
696 command |= FTRACE_ENABLE_CALLS; 538 command |= FTRACE_ENABLE_CALLS;
697 539
698 if (saved_ftrace_func != ftrace_trace_function) { 540 if (saved_ftrace_func != ftrace_trace_function) {
@@ -705,7 +547,7 @@ static void ftrace_startup(void)
705 547
706 ftrace_run_update_code(command); 548 ftrace_run_update_code(command);
707 out: 549 out:
708 mutex_unlock(&ftraced_lock); 550 mutex_unlock(&ftrace_start_lock);
709} 551}
710 552
711static void ftrace_shutdown(void) 553static void ftrace_shutdown(void)
@@ -715,9 +557,9 @@ static void ftrace_shutdown(void)
715 if (unlikely(ftrace_disabled)) 557 if (unlikely(ftrace_disabled))
716 return; 558 return;
717 559
718 mutex_lock(&ftraced_lock); 560 mutex_lock(&ftrace_start_lock);
719 ftraced_suspend--; 561 ftrace_start--;
720 if (!ftraced_suspend) 562 if (!ftrace_start)
721 command |= FTRACE_DISABLE_CALLS; 563 command |= FTRACE_DISABLE_CALLS;
722 564
723 if (saved_ftrace_func != ftrace_trace_function) { 565 if (saved_ftrace_func != ftrace_trace_function) {
@@ -730,7 +572,7 @@ static void ftrace_shutdown(void)
730 572
731 ftrace_run_update_code(command); 573 ftrace_run_update_code(command);
732 out: 574 out:
733 mutex_unlock(&ftraced_lock); 575 mutex_unlock(&ftrace_start_lock);
734} 576}
735 577
736static void ftrace_startup_sysctl(void) 578static void ftrace_startup_sysctl(void)
@@ -740,15 +582,15 @@ static void ftrace_startup_sysctl(void)
740 if (unlikely(ftrace_disabled)) 582 if (unlikely(ftrace_disabled))
741 return; 583 return;
742 584
743 mutex_lock(&ftraced_lock); 585 mutex_lock(&ftrace_start_lock);
744 /* Force update next time */ 586 /* Force update next time */
745 saved_ftrace_func = NULL; 587 saved_ftrace_func = NULL;
746 /* ftraced_suspend is true if we want ftrace running */ 588 /* ftrace_start is true if we want ftrace running */
747 if (ftraced_suspend) 589 if (ftrace_start)
748 command |= FTRACE_ENABLE_CALLS; 590 command |= FTRACE_ENABLE_CALLS;
749 591
750 ftrace_run_update_code(command); 592 ftrace_run_update_code(command);
751 mutex_unlock(&ftraced_lock); 593 mutex_unlock(&ftrace_start_lock);
752} 594}
753 595
754static void ftrace_shutdown_sysctl(void) 596static void ftrace_shutdown_sysctl(void)
@@ -758,112 +600,50 @@ static void ftrace_shutdown_sysctl(void)
758 if (unlikely(ftrace_disabled)) 600 if (unlikely(ftrace_disabled))
759 return; 601 return;
760 602
761 mutex_lock(&ftraced_lock); 603 mutex_lock(&ftrace_start_lock);
762 /* ftraced_suspend is true if ftrace is running */ 604 /* ftrace_start is true if ftrace is running */
763 if (ftraced_suspend) 605 if (ftrace_start)
764 command |= FTRACE_DISABLE_CALLS; 606 command |= FTRACE_DISABLE_CALLS;
765 607
766 ftrace_run_update_code(command); 608 ftrace_run_update_code(command);
767 mutex_unlock(&ftraced_lock); 609 mutex_unlock(&ftrace_start_lock);
768} 610}
769 611
770static cycle_t ftrace_update_time; 612static cycle_t ftrace_update_time;
771static unsigned long ftrace_update_cnt; 613static unsigned long ftrace_update_cnt;
772unsigned long ftrace_update_tot_cnt; 614unsigned long ftrace_update_tot_cnt;
773 615
774static int __ftrace_update_code(void *ignore) 616static int ftrace_update_code(void)
775{ 617{
776 int i, save_ftrace_enabled; 618 struct dyn_ftrace *p, *t;
777 cycle_t start, stop; 619 cycle_t start, stop;
778 struct dyn_ftrace *p;
779 struct hlist_node *t, *n;
780 struct hlist_head *head, temp_list;
781
782 /* Don't be recording funcs now */
783 ftrace_record_suspend++;
784 save_ftrace_enabled = ftrace_enabled;
785 ftrace_enabled = 0;
786 620
787 start = ftrace_now(raw_smp_processor_id()); 621 start = ftrace_now(raw_smp_processor_id());
788 ftrace_update_cnt = 0; 622 ftrace_update_cnt = 0;
789 623
790 /* No locks needed, the machine is stopped! */ 624 list_for_each_entry_safe(p, t, &ftrace_new_addrs, list) {
791 for (i = 0; i < FTRACE_HASHSIZE; i++) {
792 INIT_HLIST_HEAD(&temp_list);
793 head = &ftrace_hash[i];
794
795 /* all CPUS are stopped, we are safe to modify code */
796 hlist_for_each_entry_safe(p, t, n, head, node) {
797 /* Skip over failed records which have not been
798 * freed. */
799 if (p->flags & FTRACE_FL_FAILED)
800 continue;
801
802 /* Unconverted records are always at the head of the
803 * hash bucket. Once we encounter a converted record,
804 * simply skip over to the next bucket. Saves ftraced
805 * some processor cycles (ftrace does its bid for
806 * global warming :-p ). */
807 if (p->flags & (FTRACE_FL_CONVERTED))
808 break;
809 625
810 /* Ignore updates to this record's mcount site. 626 /* If something went wrong, bail without enabling anything */
811 * Reintroduce this record at the head of this 627 if (unlikely(ftrace_disabled))
812 * bucket to attempt to "convert" it again if 628 return -1;
813 * the kprobe on it is unregistered before the
814 * next run. */
815 if (get_kprobe((void *)p->ip)) {
816 ftrace_del_hash(p);
817 INIT_HLIST_NODE(&p->node);
818 hlist_add_head(&p->node, &temp_list);
819 freeze_record(p);
820 continue;
821 } else {
822 unfreeze_record(p);
823 }
824 629
825 /* convert record (i.e, patch mcount-call with NOP) */ 630 list_del_init(&p->list);
826 if (ftrace_code_disable(p)) {
827 p->flags |= FTRACE_FL_CONVERTED;
828 ftrace_update_cnt++;
829 } else {
830 if ((system_state == SYSTEM_BOOTING) ||
831 !core_kernel_text(p->ip)) {
832 ftrace_del_hash(p);
833 ftrace_free_rec(p);
834 }
835 }
836 }
837 631
838 hlist_for_each_entry_safe(p, t, n, &temp_list, node) { 632 /* convert record (i.e, patch mcount-call with NOP) */
839 hlist_del(&p->node); 633 if (ftrace_code_disable(p)) {
840 INIT_HLIST_NODE(&p->node); 634 p->flags |= FTRACE_FL_CONVERTED;
841 hlist_add_head(&p->node, head); 635 ftrace_update_cnt++;
842 } 636 } else
637 ftrace_free_rec(p);
843 } 638 }
844 639
845 stop = ftrace_now(raw_smp_processor_id()); 640 stop = ftrace_now(raw_smp_processor_id());
846 ftrace_update_time = stop - start; 641 ftrace_update_time = stop - start;
847 ftrace_update_tot_cnt += ftrace_update_cnt; 642 ftrace_update_tot_cnt += ftrace_update_cnt;
848 ftraced_trigger = 0;
849
850 ftrace_enabled = save_ftrace_enabled;
851 ftrace_record_suspend--;
852 643
853 return 0; 644 return 0;
854} 645}
855 646
856static int ftrace_update_code(void)
857{
858 if (unlikely(ftrace_disabled) ||
859 !ftrace_enabled || !ftraced_trigger)
860 return 0;
861
862 stop_machine(__ftrace_update_code, NULL, NULL);
863
864 return 1;
865}
866
867static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) 647static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
868{ 648{
869 struct ftrace_page *pg; 649 struct ftrace_page *pg;
@@ -892,8 +672,8 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
892 pg = ftrace_pages = ftrace_pages_start; 672 pg = ftrace_pages = ftrace_pages_start;
893 673
894 cnt = num_to_init / ENTRIES_PER_PAGE; 674 cnt = num_to_init / ENTRIES_PER_PAGE;
895 pr_info("ftrace: allocating %ld hash entries in %d pages\n", 675 pr_info("ftrace: allocating %ld entries in %d pages\n",
896 num_to_init, cnt); 676 num_to_init, cnt + 1);
897 677
898 for (i = 0; i < cnt; i++) { 678 for (i = 0; i < cnt; i++) {
899 pg->next = (void *)get_zeroed_page(GFP_KERNEL); 679 pg->next = (void *)get_zeroed_page(GFP_KERNEL);
@@ -973,13 +753,11 @@ static void *t_start(struct seq_file *m, loff_t *pos)
973 void *p = NULL; 753 void *p = NULL;
974 loff_t l = -1; 754 loff_t l = -1;
975 755
976 if (*pos != iter->pos) { 756 if (*pos > iter->pos)
977 for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l)) 757 *pos = iter->pos;
978 ; 758
979 } else { 759 l = *pos;
980 l = *pos; 760 p = t_next(m, p, &l);
981 p = t_next(m, p, &l);
982 }
983 761
984 return p; 762 return p;
985} 763}
@@ -990,15 +768,21 @@ static void t_stop(struct seq_file *m, void *p)
990 768
991static int t_show(struct seq_file *m, void *v) 769static int t_show(struct seq_file *m, void *v)
992{ 770{
771 struct ftrace_iterator *iter = m->private;
993 struct dyn_ftrace *rec = v; 772 struct dyn_ftrace *rec = v;
994 char str[KSYM_SYMBOL_LEN]; 773 char str[KSYM_SYMBOL_LEN];
774 int ret = 0;
995 775
996 if (!rec) 776 if (!rec)
997 return 0; 777 return 0;
998 778
999 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 779 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
1000 780
1001 seq_printf(m, "%s\n", str); 781 ret = seq_printf(m, "%s\n", str);
782 if (ret < 0) {
783 iter->pos--;
784 iter->idx--;
785 }
1002 786
1003 return 0; 787 return 0;
1004} 788}
@@ -1024,7 +808,7 @@ ftrace_avail_open(struct inode *inode, struct file *file)
1024 return -ENOMEM; 808 return -ENOMEM;
1025 809
1026 iter->pg = ftrace_pages_start; 810 iter->pg = ftrace_pages_start;
1027 iter->pos = -1; 811 iter->pos = 0;
1028 812
1029 ret = seq_open(file, &show_ftrace_seq_ops); 813 ret = seq_open(file, &show_ftrace_seq_ops);
1030 if (!ret) { 814 if (!ret) {
@@ -1111,7 +895,7 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
1111 895
1112 if (file->f_mode & FMODE_READ) { 896 if (file->f_mode & FMODE_READ) {
1113 iter->pg = ftrace_pages_start; 897 iter->pg = ftrace_pages_start;
1114 iter->pos = -1; 898 iter->pos = 0;
1115 iter->flags = enable ? FTRACE_ITER_FILTER : 899 iter->flags = enable ? FTRACE_ITER_FILTER :
1116 FTRACE_ITER_NOTRACE; 900 FTRACE_ITER_NOTRACE;
1117 901
@@ -1401,10 +1185,10 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1401 } 1185 }
1402 1186
1403 mutex_lock(&ftrace_sysctl_lock); 1187 mutex_lock(&ftrace_sysctl_lock);
1404 mutex_lock(&ftraced_lock); 1188 mutex_lock(&ftrace_start_lock);
1405 if (iter->filtered && ftraced_suspend && ftrace_enabled) 1189 if (iter->filtered && ftrace_start && ftrace_enabled)
1406 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 1190 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1407 mutex_unlock(&ftraced_lock); 1191 mutex_unlock(&ftrace_start_lock);
1408 mutex_unlock(&ftrace_sysctl_lock); 1192 mutex_unlock(&ftrace_sysctl_lock);
1409 1193
1410 kfree(iter); 1194 kfree(iter);
@@ -1424,55 +1208,6 @@ ftrace_notrace_release(struct inode *inode, struct file *file)
1424 return ftrace_regex_release(inode, file, 0); 1208 return ftrace_regex_release(inode, file, 0);
1425} 1209}
1426 1210
1427static ssize_t
1428ftraced_read(struct file *filp, char __user *ubuf,
1429 size_t cnt, loff_t *ppos)
1430{
1431 /* don't worry about races */
1432 char *buf = ftraced_stop ? "disabled\n" : "enabled\n";
1433 int r = strlen(buf);
1434
1435 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
1436}
1437
1438static ssize_t
1439ftraced_write(struct file *filp, const char __user *ubuf,
1440 size_t cnt, loff_t *ppos)
1441{
1442 char buf[64];
1443 long val;
1444 int ret;
1445
1446 if (cnt >= sizeof(buf))
1447 return -EINVAL;
1448
1449 if (copy_from_user(&buf, ubuf, cnt))
1450 return -EFAULT;
1451
1452 if (strncmp(buf, "enable", 6) == 0)
1453 val = 1;
1454 else if (strncmp(buf, "disable", 7) == 0)
1455 val = 0;
1456 else {
1457 buf[cnt] = 0;
1458
1459 ret = strict_strtoul(buf, 10, &val);
1460 if (ret < 0)
1461 return ret;
1462
1463 val = !!val;
1464 }
1465
1466 if (val)
1467 ftrace_enable_daemon();
1468 else
1469 ftrace_disable_daemon();
1470
1471 filp->f_pos += cnt;
1472
1473 return cnt;
1474}
1475
1476static struct file_operations ftrace_avail_fops = { 1211static struct file_operations ftrace_avail_fops = {
1477 .open = ftrace_avail_open, 1212 .open = ftrace_avail_open,
1478 .read = seq_read, 1213 .read = seq_read,
@@ -1503,54 +1238,6 @@ static struct file_operations ftrace_notrace_fops = {
1503 .release = ftrace_notrace_release, 1238 .release = ftrace_notrace_release,
1504}; 1239};
1505 1240
1506static struct file_operations ftraced_fops = {
1507 .open = tracing_open_generic,
1508 .read = ftraced_read,
1509 .write = ftraced_write,
1510};
1511
1512/**
1513 * ftrace_force_update - force an update to all recording ftrace functions
1514 */
1515int ftrace_force_update(void)
1516{
1517 int ret = 0;
1518
1519 if (unlikely(ftrace_disabled))
1520 return -ENODEV;
1521
1522 mutex_lock(&ftrace_sysctl_lock);
1523 mutex_lock(&ftraced_lock);
1524
1525 /*
1526 * If ftraced_trigger is not set, then there is nothing
1527 * to update.
1528 */
1529 if (ftraced_trigger && !ftrace_update_code())
1530 ret = -EBUSY;
1531
1532 mutex_unlock(&ftraced_lock);
1533 mutex_unlock(&ftrace_sysctl_lock);
1534
1535 return ret;
1536}
1537
1538static void ftrace_force_shutdown(void)
1539{
1540 struct task_struct *task;
1541 int command = FTRACE_DISABLE_CALLS | FTRACE_UPDATE_TRACE_FUNC;
1542
1543 mutex_lock(&ftraced_lock);
1544 task = ftraced_task;
1545 ftraced_task = NULL;
1546 ftraced_suspend = -1;
1547 ftrace_run_update_code(command);
1548 mutex_unlock(&ftraced_lock);
1549
1550 if (task)
1551 kthread_stop(task);
1552}
1553
1554static __init int ftrace_init_debugfs(void) 1241static __init int ftrace_init_debugfs(void)
1555{ 1242{
1556 struct dentry *d_tracer; 1243 struct dentry *d_tracer;
@@ -1581,17 +1268,11 @@ static __init int ftrace_init_debugfs(void)
1581 pr_warning("Could not create debugfs " 1268 pr_warning("Could not create debugfs "
1582 "'set_ftrace_notrace' entry\n"); 1269 "'set_ftrace_notrace' entry\n");
1583 1270
1584 entry = debugfs_create_file("ftraced_enabled", 0644, d_tracer,
1585 NULL, &ftraced_fops);
1586 if (!entry)
1587 pr_warning("Could not create debugfs "
1588 "'ftraced_enabled' entry\n");
1589 return 0; 1271 return 0;
1590} 1272}
1591 1273
1592fs_initcall(ftrace_init_debugfs); 1274fs_initcall(ftrace_init_debugfs);
1593 1275
1594#ifdef CONFIG_FTRACE_MCOUNT_RECORD
1595static int ftrace_convert_nops(unsigned long *start, 1276static int ftrace_convert_nops(unsigned long *start,
1596 unsigned long *end) 1277 unsigned long *end)
1597{ 1278{
@@ -1599,20 +1280,18 @@ static int ftrace_convert_nops(unsigned long *start,
1599 unsigned long addr; 1280 unsigned long addr;
1600 unsigned long flags; 1281 unsigned long flags;
1601 1282
1283 mutex_lock(&ftrace_start_lock);
1602 p = start; 1284 p = start;
1603 while (p < end) { 1285 while (p < end) {
1604 addr = ftrace_call_adjust(*p++); 1286 addr = ftrace_call_adjust(*p++);
1605 /* should not be called from interrupt context */
1606 spin_lock(&ftrace_lock);
1607 ftrace_record_ip(addr); 1287 ftrace_record_ip(addr);
1608 spin_unlock(&ftrace_lock);
1609 ftrace_shutdown_replenish();
1610 } 1288 }
1611 1289
1612 /* p is ignored */ 1290 /* disable interrupts to prevent kstop machine */
1613 local_irq_save(flags); 1291 local_irq_save(flags);
1614 __ftrace_update_code(p); 1292 ftrace_update_code();
1615 local_irq_restore(flags); 1293 local_irq_restore(flags);
1294 mutex_unlock(&ftrace_start_lock);
1616 1295
1617 return 0; 1296 return 0;
1618} 1297}
@@ -1658,130 +1337,34 @@ void __init ftrace_init(void)
1658 failed: 1337 failed:
1659 ftrace_disabled = 1; 1338 ftrace_disabled = 1;
1660} 1339}
1661#else /* CONFIG_FTRACE_MCOUNT_RECORD */
1662static int ftraced(void *ignore)
1663{
1664 unsigned long usecs;
1665
1666 while (!kthread_should_stop()) {
1667
1668 set_current_state(TASK_INTERRUPTIBLE);
1669 1340
1670 /* check once a second */ 1341#else
1671 schedule_timeout(HZ);
1672
1673 if (unlikely(ftrace_disabled))
1674 continue;
1675
1676 mutex_lock(&ftrace_sysctl_lock);
1677 mutex_lock(&ftraced_lock);
1678 if (!ftraced_suspend && !ftraced_stop &&
1679 ftrace_update_code()) {
1680 usecs = nsecs_to_usecs(ftrace_update_time);
1681 if (ftrace_update_tot_cnt > 100000) {
1682 ftrace_update_tot_cnt = 0;
1683 pr_info("hm, dftrace overflow: %lu change%s"
1684 " (%lu total) in %lu usec%s\n",
1685 ftrace_update_cnt,
1686 ftrace_update_cnt != 1 ? "s" : "",
1687 ftrace_update_tot_cnt,
1688 usecs, usecs != 1 ? "s" : "");
1689 ftrace_disabled = 1;
1690 WARN_ON_ONCE(1);
1691 }
1692 }
1693 mutex_unlock(&ftraced_lock);
1694 mutex_unlock(&ftrace_sysctl_lock);
1695
1696 ftrace_shutdown_replenish();
1697 }
1698 __set_current_state(TASK_RUNNING);
1699 return 0;
1700}
1701 1342
1702static int __init ftrace_dynamic_init(void) 1343static int __init ftrace_nodyn_init(void)
1703{ 1344{
1704 struct task_struct *p; 1345 ftrace_enabled = 1;
1705 unsigned long addr;
1706 int ret;
1707
1708 addr = (unsigned long)ftrace_record_ip;
1709
1710 stop_machine(ftrace_dyn_arch_init, &addr, NULL);
1711
1712 /* ftrace_dyn_arch_init places the return code in addr */
1713 if (addr) {
1714 ret = (int)addr;
1715 goto failed;
1716 }
1717
1718 ret = ftrace_dyn_table_alloc(NR_TO_INIT);
1719 if (ret)
1720 goto failed;
1721
1722 p = kthread_run(ftraced, NULL, "ftraced");
1723 if (IS_ERR(p)) {
1724 ret = -1;
1725 goto failed;
1726 }
1727
1728 last_ftrace_enabled = ftrace_enabled = 1;
1729 ftraced_task = p;
1730
1731 return 0; 1346 return 0;
1732
1733 failed:
1734 ftrace_disabled = 1;
1735 return ret;
1736} 1347}
1348device_initcall(ftrace_nodyn_init);
1737 1349
1738core_initcall(ftrace_dynamic_init);
1739#endif /* CONFIG_FTRACE_MCOUNT_RECORD */
1740
1741#else
1742# define ftrace_startup() do { } while (0) 1350# define ftrace_startup() do { } while (0)
1743# define ftrace_shutdown() do { } while (0) 1351# define ftrace_shutdown() do { } while (0)
1744# define ftrace_startup_sysctl() do { } while (0) 1352# define ftrace_startup_sysctl() do { } while (0)
1745# define ftrace_shutdown_sysctl() do { } while (0) 1353# define ftrace_shutdown_sysctl() do { } while (0)
1746# define ftrace_force_shutdown() do { } while (0)
1747#endif /* CONFIG_DYNAMIC_FTRACE */ 1354#endif /* CONFIG_DYNAMIC_FTRACE */
1748 1355
1749/** 1356/**
1750 * ftrace_kill_atomic - kill ftrace from critical sections 1357 * ftrace_kill - kill ftrace
1751 * 1358 *
1752 * This function should be used by panic code. It stops ftrace 1359 * This function should be used by panic code. It stops ftrace
1753 * but in a not so nice way. If you need to simply kill ftrace 1360 * but in a not so nice way. If you need to simply kill ftrace
1754 * from a non-atomic section, use ftrace_kill. 1361 * from a non-atomic section, use ftrace_kill.
1755 */ 1362 */
1756void ftrace_kill_atomic(void)
1757{
1758 ftrace_disabled = 1;
1759 ftrace_enabled = 0;
1760#ifdef CONFIG_DYNAMIC_FTRACE
1761 ftraced_suspend = -1;
1762#endif
1763 clear_ftrace_function();
1764}
1765
1766/**
1767 * ftrace_kill - totally shutdown ftrace
1768 *
1769 * This is a safety measure. If something was detected that seems
1770 * wrong, calling this function will keep ftrace from doing
1771 * any more modifications, and updates.
1772 * used when something went wrong.
1773 */
1774void ftrace_kill(void) 1363void ftrace_kill(void)
1775{ 1364{
1776 mutex_lock(&ftrace_sysctl_lock);
1777 ftrace_disabled = 1; 1365 ftrace_disabled = 1;
1778 ftrace_enabled = 0; 1366 ftrace_enabled = 0;
1779
1780 clear_ftrace_function(); 1367 clear_ftrace_function();
1781 mutex_unlock(&ftrace_sysctl_lock);
1782
1783 /* Try to totally disable ftrace */
1784 ftrace_force_shutdown();
1785} 1368}
1786 1369
1787/** 1370/**
@@ -1870,3 +1453,4 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1870 mutex_unlock(&ftrace_sysctl_lock); 1453 mutex_unlock(&ftrace_sysctl_lock);
1871 return ret; 1454 return ret;
1872} 1455}
1456
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 94af1fe56bb4..036456cbb4f7 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -16,14 +16,49 @@
16#include <linux/list.h> 16#include <linux/list.h>
17#include <linux/fs.h> 17#include <linux/fs.h>
18 18
19#include "trace.h"
20
21/* Global flag to disable all recording to ring buffers */
22static int ring_buffers_off __read_mostly;
23
24/**
25 * tracing_on - enable all tracing buffers
26 *
27 * This function enables all tracing buffers that may have been
28 * disabled with tracing_off.
29 */
30void tracing_on(void)
31{
32 ring_buffers_off = 0;
33}
34
35/**
36 * tracing_off - turn off all tracing buffers
37 *
38 * This function stops all tracing buffers from recording data.
39 * It does not disable any overhead the tracers themselves may
40 * be causing. This function simply causes all recording to
41 * the ring buffers to fail.
42 */
43void tracing_off(void)
44{
45 ring_buffers_off = 1;
46}
47
19/* Up this if you want to test the TIME_EXTENTS and normalization */ 48/* Up this if you want to test the TIME_EXTENTS and normalization */
20#define DEBUG_SHIFT 0 49#define DEBUG_SHIFT 0
21 50
22/* FIXME!!! */ 51/* FIXME!!! */
23u64 ring_buffer_time_stamp(int cpu) 52u64 ring_buffer_time_stamp(int cpu)
24{ 53{
54 u64 time;
55
56 preempt_disable_notrace();
25 /* shift to debug/test normalization and TIME_EXTENTS */ 57 /* shift to debug/test normalization and TIME_EXTENTS */
26 return sched_clock() << DEBUG_SHIFT; 58 time = sched_clock() << DEBUG_SHIFT;
59 preempt_enable_notrace();
60
61 return time;
27} 62}
28 63
29void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) 64void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
@@ -130,7 +165,7 @@ struct buffer_page {
130static inline void free_buffer_page(struct buffer_page *bpage) 165static inline void free_buffer_page(struct buffer_page *bpage)
131{ 166{
132 if (bpage->page) 167 if (bpage->page)
133 __free_page(bpage->page); 168 free_page((unsigned long)bpage->page);
134 kfree(bpage); 169 kfree(bpage);
135} 170}
136 171
@@ -503,6 +538,12 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
503 LIST_HEAD(pages); 538 LIST_HEAD(pages);
504 int i, cpu; 539 int i, cpu;
505 540
541 /*
542 * Always succeed at resizing a non-existent buffer:
543 */
544 if (!buffer)
545 return size;
546
506 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 547 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
507 size *= BUF_PAGE_SIZE; 548 size *= BUF_PAGE_SIZE;
508 buffer_size = buffer->pages * BUF_PAGE_SIZE; 549 buffer_size = buffer->pages * BUF_PAGE_SIZE;
@@ -966,7 +1007,9 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
966 if (unlikely(*delta > (1ULL << 59) && !once++)) { 1007 if (unlikely(*delta > (1ULL << 59) && !once++)) {
967 printk(KERN_WARNING "Delta way too big! %llu" 1008 printk(KERN_WARNING "Delta way too big! %llu"
968 " ts=%llu write stamp = %llu\n", 1009 " ts=%llu write stamp = %llu\n",
969 *delta, *ts, cpu_buffer->write_stamp); 1010 (unsigned long long)*delta,
1011 (unsigned long long)*ts,
1012 (unsigned long long)cpu_buffer->write_stamp);
970 WARN_ON(1); 1013 WARN_ON(1);
971 } 1014 }
972 1015
@@ -1020,8 +1063,23 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1020 struct ring_buffer_event *event; 1063 struct ring_buffer_event *event;
1021 u64 ts, delta; 1064 u64 ts, delta;
1022 int commit = 0; 1065 int commit = 0;
1066 int nr_loops = 0;
1023 1067
1024 again: 1068 again:
1069 /*
1070 * We allow for interrupts to reenter here and do a trace.
1071 * If one does, it will cause this original code to loop
1072 * back here. Even with heavy interrupts happening, this
1073 * should only happen a few times in a row. If this happens
1074 * 1000 times in a row, there must be either an interrupt
1075 * storm or we have something buggy.
1076 * Bail!
1077 */
1078 if (unlikely(++nr_loops > 1000)) {
1079 RB_WARN_ON(cpu_buffer, 1);
1080 return NULL;
1081 }
1082
1025 ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1083 ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1026 1084
1027 /* 1085 /*
@@ -1043,7 +1101,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1043 1101
1044 /* Did the write stamp get updated already? */ 1102 /* Did the write stamp get updated already? */
1045 if (unlikely(ts < cpu_buffer->write_stamp)) 1103 if (unlikely(ts < cpu_buffer->write_stamp))
1046 goto again; 1104 delta = 0;
1047 1105
1048 if (test_time_stamp(delta)) { 1106 if (test_time_stamp(delta)) {
1049 1107
@@ -1116,6 +1174,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1116 struct ring_buffer_event *event; 1174 struct ring_buffer_event *event;
1117 int cpu, resched; 1175 int cpu, resched;
1118 1176
1177 if (ring_buffers_off)
1178 return NULL;
1179
1119 if (atomic_read(&buffer->record_disabled)) 1180 if (atomic_read(&buffer->record_disabled))
1120 return NULL; 1181 return NULL;
1121 1182
@@ -1232,6 +1293,9 @@ int ring_buffer_write(struct ring_buffer *buffer,
1232 int ret = -EBUSY; 1293 int ret = -EBUSY;
1233 int cpu, resched; 1294 int cpu, resched;
1234 1295
1296 if (ring_buffers_off)
1297 return -EBUSY;
1298
1235 if (atomic_read(&buffer->record_disabled)) 1299 if (atomic_read(&buffer->record_disabled))
1236 return -EBUSY; 1300 return -EBUSY;
1237 1301
@@ -1530,10 +1594,23 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1530{ 1594{
1531 struct buffer_page *reader = NULL; 1595 struct buffer_page *reader = NULL;
1532 unsigned long flags; 1596 unsigned long flags;
1597 int nr_loops = 0;
1533 1598
1534 spin_lock_irqsave(&cpu_buffer->lock, flags); 1599 spin_lock_irqsave(&cpu_buffer->lock, flags);
1535 1600
1536 again: 1601 again:
1602 /*
1603 * This should normally only loop twice. But because the
1604 * start of the reader inserts an empty page, it causes
1605 * a case where we will loop three times. There should be no
1606 * reason to loop four times (that I know of).
1607 */
1608 if (unlikely(++nr_loops > 3)) {
1609 RB_WARN_ON(cpu_buffer, 1);
1610 reader = NULL;
1611 goto out;
1612 }
1613
1537 reader = cpu_buffer->reader_page; 1614 reader = cpu_buffer->reader_page;
1538 1615
1539 /* If there's more to read, return this page */ 1616 /* If there's more to read, return this page */
@@ -1663,6 +1740,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1663 struct ring_buffer_per_cpu *cpu_buffer; 1740 struct ring_buffer_per_cpu *cpu_buffer;
1664 struct ring_buffer_event *event; 1741 struct ring_buffer_event *event;
1665 struct buffer_page *reader; 1742 struct buffer_page *reader;
1743 int nr_loops = 0;
1666 1744
1667 if (!cpu_isset(cpu, buffer->cpumask)) 1745 if (!cpu_isset(cpu, buffer->cpumask))
1668 return NULL; 1746 return NULL;
@@ -1670,6 +1748,19 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1670 cpu_buffer = buffer->buffers[cpu]; 1748 cpu_buffer = buffer->buffers[cpu];
1671 1749
1672 again: 1750 again:
1751 /*
1752 * We repeat when a timestamp is encountered. It is possible
1753 * to get multiple timestamps from an interrupt entering just
1754 * as one timestamp is about to be written. The max times
1755 * that this can happen is the number of nested interrupts we
1756 * can have. Nesting 10 deep of interrupts is clearly
1757 * an anomaly.
1758 */
1759 if (unlikely(++nr_loops > 10)) {
1760 RB_WARN_ON(cpu_buffer, 1);
1761 return NULL;
1762 }
1763
1673 reader = rb_get_reader_page(cpu_buffer); 1764 reader = rb_get_reader_page(cpu_buffer);
1674 if (!reader) 1765 if (!reader)
1675 return NULL; 1766 return NULL;
@@ -1720,6 +1811,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1720 struct ring_buffer *buffer; 1811 struct ring_buffer *buffer;
1721 struct ring_buffer_per_cpu *cpu_buffer; 1812 struct ring_buffer_per_cpu *cpu_buffer;
1722 struct ring_buffer_event *event; 1813 struct ring_buffer_event *event;
1814 int nr_loops = 0;
1723 1815
1724 if (ring_buffer_iter_empty(iter)) 1816 if (ring_buffer_iter_empty(iter))
1725 return NULL; 1817 return NULL;
@@ -1728,6 +1820,19 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1728 buffer = cpu_buffer->buffer; 1820 buffer = cpu_buffer->buffer;
1729 1821
1730 again: 1822 again:
1823 /*
1824 * We repeat when a timestamp is encountered. It is possible
1825 * to get multiple timestamps from an interrupt entering just
1826 * as one timestamp is about to be written. The max times
1827 * that this can happen is the number of nested interrupts we
1828 * can have. Nesting 10 deep of interrupts is clearly
1829 * an anomaly.
1830 */
1831 if (unlikely(++nr_loops > 10)) {
1832 RB_WARN_ON(cpu_buffer, 1);
1833 return NULL;
1834 }
1835
1731 if (rb_per_cpu_empty(cpu_buffer)) 1836 if (rb_per_cpu_empty(cpu_buffer))
1732 return NULL; 1837 return NULL;
1733 1838
@@ -2012,3 +2117,69 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2012 return 0; 2117 return 0;
2013} 2118}
2014 2119
2120static ssize_t
2121rb_simple_read(struct file *filp, char __user *ubuf,
2122 size_t cnt, loff_t *ppos)
2123{
2124 int *p = filp->private_data;
2125 char buf[64];
2126 int r;
2127
2128 /* !ring_buffers_off == tracing_on */
2129 r = sprintf(buf, "%d\n", !*p);
2130
2131 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2132}
2133
2134static ssize_t
2135rb_simple_write(struct file *filp, const char __user *ubuf,
2136 size_t cnt, loff_t *ppos)
2137{
2138 int *p = filp->private_data;
2139 char buf[64];
2140 long val;
2141 int ret;
2142
2143 if (cnt >= sizeof(buf))
2144 return -EINVAL;
2145
2146 if (copy_from_user(&buf, ubuf, cnt))
2147 return -EFAULT;
2148
2149 buf[cnt] = 0;
2150
2151 ret = strict_strtoul(buf, 10, &val);
2152 if (ret < 0)
2153 return ret;
2154
2155 /* !ring_buffers_off == tracing_on */
2156 *p = !val;
2157
2158 (*ppos)++;
2159
2160 return cnt;
2161}
2162
2163static struct file_operations rb_simple_fops = {
2164 .open = tracing_open_generic,
2165 .read = rb_simple_read,
2166 .write = rb_simple_write,
2167};
2168
2169
2170static __init int rb_init_debugfs(void)
2171{
2172 struct dentry *d_tracer;
2173 struct dentry *entry;
2174
2175 d_tracer = tracing_init_dentry();
2176
2177 entry = debugfs_create_file("tracing_on", 0644, d_tracer,
2178 &ring_buffers_off, &rb_simple_fops);
2179 if (!entry)
2180 pr_warning("Could not create debugfs 'tracing_on' entry\n");
2181
2182 return 0;
2183}
2184
2185fs_initcall(rb_init_debugfs);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d345d649d073..697eda36b86a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -34,6 +34,7 @@
34 34
35#include <linux/stacktrace.h> 35#include <linux/stacktrace.h>
36#include <linux/ring_buffer.h> 36#include <linux/ring_buffer.h>
37#include <linux/irqflags.h>
37 38
38#include "trace.h" 39#include "trace.h"
39 40
@@ -655,7 +656,11 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
655 entry->preempt_count = pc & 0xff; 656 entry->preempt_count = pc & 0xff;
656 entry->pid = (tsk) ? tsk->pid : 0; 657 entry->pid = (tsk) ? tsk->pid : 0;
657 entry->flags = 658 entry->flags =
659#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
658 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 660 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
661#else
662 TRACE_FLAG_IRQS_NOSUPPORT |
663#endif
659 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | 664 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
660 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | 665 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
661 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); 666 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
@@ -700,6 +705,7 @@ static void ftrace_trace_stack(struct trace_array *tr,
700 unsigned long flags, 705 unsigned long flags,
701 int skip, int pc) 706 int skip, int pc)
702{ 707{
708#ifdef CONFIG_STACKTRACE
703 struct ring_buffer_event *event; 709 struct ring_buffer_event *event;
704 struct stack_entry *entry; 710 struct stack_entry *entry;
705 struct stack_trace trace; 711 struct stack_trace trace;
@@ -725,6 +731,7 @@ static void ftrace_trace_stack(struct trace_array *tr,
725 731
726 save_stack_trace(&trace); 732 save_stack_trace(&trace);
727 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 733 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
734#endif
728} 735}
729 736
730void __trace_stack(struct trace_array *tr, 737void __trace_stack(struct trace_array *tr,
@@ -851,7 +858,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
851 preempt_enable_notrace(); 858 preempt_enable_notrace();
852} 859}
853 860
854#ifdef CONFIG_FTRACE 861#ifdef CONFIG_FUNCTION_TRACER
855static void 862static void
856function_trace_call(unsigned long ip, unsigned long parent_ip) 863function_trace_call(unsigned long ip, unsigned long parent_ip)
857{ 864{
@@ -865,9 +872,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
865 if (unlikely(!ftrace_function_enabled)) 872 if (unlikely(!ftrace_function_enabled))
866 return; 873 return;
867 874
868 if (skip_trace(ip))
869 return;
870
871 pc = preempt_count(); 875 pc = preempt_count();
872 resched = need_resched(); 876 resched = need_resched();
873 preempt_disable_notrace(); 877 preempt_disable_notrace();
@@ -1084,17 +1088,20 @@ static void s_stop(struct seq_file *m, void *p)
1084 mutex_unlock(&trace_types_lock); 1088 mutex_unlock(&trace_types_lock);
1085} 1089}
1086 1090
1087#define KRETPROBE_MSG "[unknown/kretprobe'd]"
1088
1089#ifdef CONFIG_KRETPROBES 1091#ifdef CONFIG_KRETPROBES
1090static inline int kretprobed(unsigned long addr) 1092static inline const char *kretprobed(const char *name)
1091{ 1093{
1092 return addr == (unsigned long)kretprobe_trampoline; 1094 static const char tramp_name[] = "kretprobe_trampoline";
1095 int size = sizeof(tramp_name);
1096
1097 if (strncmp(tramp_name, name, size) == 0)
1098 return "[unknown/kretprobe'd]";
1099 return name;
1093} 1100}
1094#else 1101#else
1095static inline int kretprobed(unsigned long addr) 1102static inline const char *kretprobed(const char *name)
1096{ 1103{
1097 return 0; 1104 return name;
1098} 1105}
1099#endif /* CONFIG_KRETPROBES */ 1106#endif /* CONFIG_KRETPROBES */
1100 1107
@@ -1103,10 +1110,13 @@ seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1103{ 1110{
1104#ifdef CONFIG_KALLSYMS 1111#ifdef CONFIG_KALLSYMS
1105 char str[KSYM_SYMBOL_LEN]; 1112 char str[KSYM_SYMBOL_LEN];
1113 const char *name;
1106 1114
1107 kallsyms_lookup(address, NULL, NULL, NULL, str); 1115 kallsyms_lookup(address, NULL, NULL, NULL, str);
1108 1116
1109 return trace_seq_printf(s, fmt, str); 1117 name = kretprobed(str);
1118
1119 return trace_seq_printf(s, fmt, name);
1110#endif 1120#endif
1111 return 1; 1121 return 1;
1112} 1122}
@@ -1117,9 +1127,12 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1117{ 1127{
1118#ifdef CONFIG_KALLSYMS 1128#ifdef CONFIG_KALLSYMS
1119 char str[KSYM_SYMBOL_LEN]; 1129 char str[KSYM_SYMBOL_LEN];
1130 const char *name;
1120 1131
1121 sprint_symbol(str, address); 1132 sprint_symbol(str, address);
1122 return trace_seq_printf(s, fmt, str); 1133 name = kretprobed(str);
1134
1135 return trace_seq_printf(s, fmt, name);
1123#endif 1136#endif
1124 return 1; 1137 return 1;
1125} 1138}
@@ -1246,7 +1259,8 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1246 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid); 1259 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1247 trace_seq_printf(s, "%3d", cpu); 1260 trace_seq_printf(s, "%3d", cpu);
1248 trace_seq_printf(s, "%c%c", 1261 trace_seq_printf(s, "%c%c",
1249 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.', 1262 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
1263 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.',
1250 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); 1264 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1251 1265
1252 hardirq = entry->flags & TRACE_FLAG_HARDIRQ; 1266 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
@@ -1372,10 +1386,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1372 1386
1373 seq_print_ip_sym(s, field->ip, sym_flags); 1387 seq_print_ip_sym(s, field->ip, sym_flags);
1374 trace_seq_puts(s, " ("); 1388 trace_seq_puts(s, " (");
1375 if (kretprobed(field->parent_ip)) 1389 seq_print_ip_sym(s, field->parent_ip, sym_flags);
1376 trace_seq_puts(s, KRETPROBE_MSG);
1377 else
1378 seq_print_ip_sym(s, field->parent_ip, sym_flags);
1379 trace_seq_puts(s, ")\n"); 1390 trace_seq_puts(s, ")\n");
1380 break; 1391 break;
1381 } 1392 }
@@ -1491,12 +1502,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1491 ret = trace_seq_printf(s, " <-"); 1502 ret = trace_seq_printf(s, " <-");
1492 if (!ret) 1503 if (!ret)
1493 return TRACE_TYPE_PARTIAL_LINE; 1504 return TRACE_TYPE_PARTIAL_LINE;
1494 if (kretprobed(field->parent_ip)) 1505 ret = seq_print_ip_sym(s,
1495 ret = trace_seq_puts(s, KRETPROBE_MSG); 1506 field->parent_ip,
1496 else 1507 sym_flags);
1497 ret = seq_print_ip_sym(s,
1498 field->parent_ip,
1499 sym_flags);
1500 if (!ret) 1508 if (!ret)
1501 return TRACE_TYPE_PARTIAL_LINE; 1509 return TRACE_TYPE_PARTIAL_LINE;
1502 } 1510 }
@@ -1747,7 +1755,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1747 return TRACE_TYPE_HANDLED; 1755 return TRACE_TYPE_HANDLED;
1748 1756
1749 SEQ_PUT_FIELD_RET(s, entry->pid); 1757 SEQ_PUT_FIELD_RET(s, entry->pid);
1750 SEQ_PUT_FIELD_RET(s, iter->cpu); 1758 SEQ_PUT_FIELD_RET(s, entry->cpu);
1751 SEQ_PUT_FIELD_RET(s, iter->ts); 1759 SEQ_PUT_FIELD_RET(s, iter->ts);
1752 1760
1753 switch (entry->type) { 1761 switch (entry->type) {
@@ -2379,9 +2387,10 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2379 int i; 2387 int i;
2380 size_t ret; 2388 size_t ret;
2381 2389
2390 ret = cnt;
2391
2382 if (cnt > max_tracer_type_len) 2392 if (cnt > max_tracer_type_len)
2383 cnt = max_tracer_type_len; 2393 cnt = max_tracer_type_len;
2384 ret = cnt;
2385 2394
2386 if (copy_from_user(&buf, ubuf, cnt)) 2395 if (copy_from_user(&buf, ubuf, cnt))
2387 return -EFAULT; 2396 return -EFAULT;
@@ -2414,8 +2423,8 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2414 out: 2423 out:
2415 mutex_unlock(&trace_types_lock); 2424 mutex_unlock(&trace_types_lock);
2416 2425
2417 if (ret == cnt) 2426 if (ret > 0)
2418 filp->f_pos += cnt; 2427 filp->f_pos += ret;
2419 2428
2420 return ret; 2429 return ret;
2421} 2430}
@@ -2667,7 +2676,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2667{ 2676{
2668 unsigned long val; 2677 unsigned long val;
2669 char buf[64]; 2678 char buf[64];
2670 int ret; 2679 int ret, cpu;
2671 struct trace_array *tr = filp->private_data; 2680 struct trace_array *tr = filp->private_data;
2672 2681
2673 if (cnt >= sizeof(buf)) 2682 if (cnt >= sizeof(buf))
@@ -2695,6 +2704,14 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2695 goto out; 2704 goto out;
2696 } 2705 }
2697 2706
2707 /* disable all cpu buffers */
2708 for_each_tracing_cpu(cpu) {
2709 if (global_trace.data[cpu])
2710 atomic_inc(&global_trace.data[cpu]->disabled);
2711 if (max_tr.data[cpu])
2712 atomic_inc(&max_tr.data[cpu]->disabled);
2713 }
2714
2698 if (val != global_trace.entries) { 2715 if (val != global_trace.entries) {
2699 ret = ring_buffer_resize(global_trace.buffer, val); 2716 ret = ring_buffer_resize(global_trace.buffer, val);
2700 if (ret < 0) { 2717 if (ret < 0) {
@@ -2726,6 +2743,13 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2726 if (tracing_disabled) 2743 if (tracing_disabled)
2727 cnt = -ENOMEM; 2744 cnt = -ENOMEM;
2728 out: 2745 out:
2746 for_each_tracing_cpu(cpu) {
2747 if (global_trace.data[cpu])
2748 atomic_dec(&global_trace.data[cpu]->disabled);
2749 if (max_tr.data[cpu])
2750 atomic_dec(&max_tr.data[cpu]->disabled);
2751 }
2752
2729 max_tr.entries = global_trace.entries; 2753 max_tr.entries = global_trace.entries;
2730 mutex_unlock(&trace_types_lock); 2754 mutex_unlock(&trace_types_lock);
2731 2755
@@ -3097,7 +3121,7 @@ void ftrace_dump(void)
3097 dump_ran = 1; 3121 dump_ran = 1;
3098 3122
3099 /* No turning back! */ 3123 /* No turning back! */
3100 ftrace_kill_atomic(); 3124 ftrace_kill();
3101 3125
3102 for_each_tracing_cpu(cpu) { 3126 for_each_tracing_cpu(cpu) {
3103 atomic_inc(&global_trace.data[cpu]->disabled); 3127 atomic_inc(&global_trace.data[cpu]->disabled);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f1f99572cde7..8465ad052707 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -120,18 +120,20 @@ struct trace_boot {
120/* 120/*
121 * trace_flag_type is an enumeration that holds different 121 * trace_flag_type is an enumeration that holds different
122 * states when a trace occurs. These are: 122 * states when a trace occurs. These are:
123 * IRQS_OFF - interrupts were disabled 123 * IRQS_OFF - interrupts were disabled
124 * NEED_RESCED - reschedule is requested 124 * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags
125 * HARDIRQ - inside an interrupt handler 125 * NEED_RESCED - reschedule is requested
126 * SOFTIRQ - inside a softirq handler 126 * HARDIRQ - inside an interrupt handler
127 * CONT - multiple entries hold the trace item 127 * SOFTIRQ - inside a softirq handler
128 * CONT - multiple entries hold the trace item
128 */ 129 */
129enum trace_flag_type { 130enum trace_flag_type {
130 TRACE_FLAG_IRQS_OFF = 0x01, 131 TRACE_FLAG_IRQS_OFF = 0x01,
131 TRACE_FLAG_NEED_RESCHED = 0x02, 132 TRACE_FLAG_IRQS_NOSUPPORT = 0x02,
132 TRACE_FLAG_HARDIRQ = 0x04, 133 TRACE_FLAG_NEED_RESCHED = 0x04,
133 TRACE_FLAG_SOFTIRQ = 0x08, 134 TRACE_FLAG_HARDIRQ = 0x08,
134 TRACE_FLAG_CONT = 0x10, 135 TRACE_FLAG_SOFTIRQ = 0x10,
136 TRACE_FLAG_CONT = 0x20,
135}; 137};
136 138
137#define TRACE_BUF_SIZE 1024 139#define TRACE_BUF_SIZE 1024
@@ -335,7 +337,7 @@ void update_max_tr_single(struct trace_array *tr,
335 337
336extern cycle_t ftrace_now(int cpu); 338extern cycle_t ftrace_now(int cpu);
337 339
338#ifdef CONFIG_FTRACE 340#ifdef CONFIG_FUNCTION_TRACER
339void tracing_start_function_trace(void); 341void tracing_start_function_trace(void);
340void tracing_stop_function_trace(void); 342void tracing_stop_function_trace(void);
341#else 343#else
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index e90eb0c2c56c..0f85a64003d3 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -64,7 +64,7 @@ static void function_trace_ctrl_update(struct trace_array *tr)
64 64
65static struct tracer function_trace __read_mostly = 65static struct tracer function_trace __read_mostly =
66{ 66{
67 .name = "ftrace", 67 .name = "function",
68 .init = function_trace_init, 68 .init = function_trace_init,
69 .reset = function_trace_reset, 69 .reset = function_trace_reset,
70 .ctrl_update = function_trace_ctrl_update, 70 .ctrl_update = function_trace_ctrl_update,
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index a7db7f040ae0..9c74071c10e0 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -63,7 +63,7 @@ irq_trace(void)
63 */ 63 */
64static __cacheline_aligned_in_smp unsigned long max_sequence; 64static __cacheline_aligned_in_smp unsigned long max_sequence;
65 65
66#ifdef CONFIG_FTRACE 66#ifdef CONFIG_FUNCTION_TRACER
67/* 67/*
68 * irqsoff uses its own tracer function to keep the overhead down: 68 * irqsoff uses its own tracer function to keep the overhead down:
69 */ 69 */
@@ -104,7 +104,7 @@ static struct ftrace_ops trace_ops __read_mostly =
104{ 104{
105 .func = irqsoff_tracer_call, 105 .func = irqsoff_tracer_call,
106}; 106};
107#endif /* CONFIG_FTRACE */ 107#endif /* CONFIG_FUNCTION_TRACER */
108 108
109/* 109/*
110 * Should this new latency be reported/recorded? 110 * Should this new latency be reported/recorded?
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index fe4a252c2363..3ae93f16b565 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -31,7 +31,7 @@ static raw_spinlock_t wakeup_lock =
31 31
32static void __wakeup_reset(struct trace_array *tr); 32static void __wakeup_reset(struct trace_array *tr);
33 33
34#ifdef CONFIG_FTRACE 34#ifdef CONFIG_FUNCTION_TRACER
35/* 35/*
36 * irqsoff uses its own tracer function to keep the overhead down: 36 * irqsoff uses its own tracer function to keep the overhead down:
37 */ 37 */
@@ -96,7 +96,7 @@ static struct ftrace_ops trace_ops __read_mostly =
96{ 96{
97 .func = wakeup_tracer_call, 97 .func = wakeup_tracer_call,
98}; 98};
99#endif /* CONFIG_FTRACE */ 99#endif /* CONFIG_FUNCTION_TRACER */
100 100
101/* 101/*
102 * Should this new latency be reported/recorded? 102 * Should this new latency be reported/recorded?
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 09cf230d7eca..90bc752a7580 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -70,7 +70,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
70 return ret; 70 return ret;
71} 71}
72 72
73#ifdef CONFIG_FTRACE 73#ifdef CONFIG_FUNCTION_TRACER
74 74
75#ifdef CONFIG_DYNAMIC_FTRACE 75#ifdef CONFIG_DYNAMIC_FTRACE
76 76
@@ -99,13 +99,6 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
99 /* passed in by parameter to fool gcc from optimizing */ 99 /* passed in by parameter to fool gcc from optimizing */
100 func(); 100 func();
101 101
102 /* update the records */
103 ret = ftrace_force_update();
104 if (ret) {
105 printk(KERN_CONT ".. ftraced failed .. ");
106 return ret;
107 }
108
109 /* 102 /*
110 * Some archs *cough*PowerPC*cough* add charachters to the 103 * Some archs *cough*PowerPC*cough* add charachters to the
111 * start of the function names. We simply put a '*' to 104 * start of the function names. We simply put a '*' to
@@ -183,13 +176,6 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
183 /* make sure msleep has been recorded */ 176 /* make sure msleep has been recorded */
184 msleep(1); 177 msleep(1);
185 178
186 /* force the recorded functions to be traced */
187 ret = ftrace_force_update();
188 if (ret) {
189 printk(KERN_CONT ".. ftraced failed .. ");
190 return ret;
191 }
192
193 /* start the tracing */ 179 /* start the tracing */
194 ftrace_enabled = 1; 180 ftrace_enabled = 1;
195 tracer_enabled = 1; 181 tracer_enabled = 1;
@@ -226,7 +212,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
226 212
227 return ret; 213 return ret;
228} 214}
229#endif /* CONFIG_FTRACE */ 215#endif /* CONFIG_FUNCTION_TRACER */
230 216
231#ifdef CONFIG_IRQSOFF_TRACER 217#ifdef CONFIG_IRQSOFF_TRACER
232int 218int
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 74c5d9a3afae..be682b62fe58 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -44,6 +44,10 @@ static inline void check_stack(void)
44 if (this_size <= max_stack_size) 44 if (this_size <= max_stack_size)
45 return; 45 return;
46 46
47 /* we do not handle interrupt stacks yet */
48 if (!object_is_on_stack(&this_size))
49 return;
50
47 raw_local_irq_save(flags); 51 raw_local_irq_save(flags);
48 __raw_spin_lock(&max_stack_lock); 52 __raw_spin_lock(&max_stack_lock);
49 53
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index f2b7c28a4708..af8c85664882 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -131,6 +131,9 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
131 131
132 old = entry->funcs; 132 old = entry->funcs;
133 133
134 if (!old)
135 return NULL;
136
134 debug_print_probes(entry); 137 debug_print_probes(entry);
135 /* (N -> M), (N > 1, M >= 0) probes */ 138 /* (N -> M), (N > 1, M >= 0) probes */
136 for (nr_probes = 0; old[nr_probes]; nr_probes++) { 139 for (nr_probes = 0; old[nr_probes]; nr_probes++) {
@@ -388,6 +391,11 @@ int tracepoint_probe_unregister(const char *name, void *probe)
388 if (entry->rcu_pending) 391 if (entry->rcu_pending)
389 rcu_barrier_sched(); 392 rcu_barrier_sched();
390 old = tracepoint_entry_remove_probe(entry, probe); 393 old = tracepoint_entry_remove_probe(entry, probe);
394 if (!old) {
395 printk(KERN_WARNING "Warning: Trying to unregister a probe"
396 "that doesn't exist\n");
397 goto end;
398 }
391 mutex_unlock(&tracepoints_mutex); 399 mutex_unlock(&tracepoints_mutex);
392 tracepoint_update_probes(); /* may update entry */ 400 tracepoint_update_probes(); /* may update entry */
393 mutex_lock(&tracepoints_mutex); 401 mutex_lock(&tracepoints_mutex);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f928f2a87b9b..d4dc69ddebd7 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -970,6 +970,51 @@ undo:
970 return ret; 970 return ret;
971} 971}
972 972
973#ifdef CONFIG_SMP
974struct work_for_cpu {
975 struct work_struct work;
976 long (*fn)(void *);
977 void *arg;
978 long ret;
979};
980
981static void do_work_for_cpu(struct work_struct *w)
982{
983 struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work);
984
985 wfc->ret = wfc->fn(wfc->arg);
986}
987
988/**
989 * work_on_cpu - run a function in user context on a particular cpu
990 * @cpu: the cpu to run on
991 * @fn: the function to run
992 * @arg: the function arg
993 *
994 * This will return -EINVAL in the cpu is not online, or the return value
995 * of @fn otherwise.
996 */
997long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
998{
999 struct work_for_cpu wfc;
1000
1001 INIT_WORK(&wfc.work, do_work_for_cpu);
1002 wfc.fn = fn;
1003 wfc.arg = arg;
1004 get_online_cpus();
1005 if (unlikely(!cpu_online(cpu)))
1006 wfc.ret = -EINVAL;
1007 else {
1008 schedule_work_on(cpu, &wfc.work);
1009 flush_work(&wfc.work);
1010 }
1011 put_online_cpus();
1012
1013 return wfc.ret;
1014}
1015EXPORT_SYMBOL_GPL(work_on_cpu);
1016#endif /* CONFIG_SMP */
1017
973void __init init_workqueues(void) 1018void __init init_workqueues(void)
974{ 1019{
975 cpu_populated_map = cpu_online_map; 1020 cpu_populated_map = cpu_online_map;