aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile4
-rw-r--r--kernel/audit_tree.c91
-rw-r--r--kernel/auditfilter.c14
-rw-r--r--kernel/cgroup.c21
-rw-r--r--kernel/cpuset.c31
-rw-r--r--kernel/exit.c9
-rw-r--r--kernel/extable.c16
-rw-r--r--kernel/fork.c11
-rw-r--r--kernel/futex.c290
-rw-r--r--kernel/kallsyms.c17
-rw-r--r--kernel/lockdep.c37
-rw-r--r--kernel/lockdep_proc.c28
-rw-r--r--kernel/mutex.c10
-rw-r--r--kernel/notifier.c8
-rw-r--r--kernel/power/main.c2
-rw-r--r--kernel/profile.c2
-rw-r--r--kernel/rcuclassic.c4
-rw-r--r--kernel/relay.c9
-rw-r--r--kernel/sched.c15
-rw-r--r--kernel/softlockup.c2
-rw-r--r--kernel/stop_machine.c5
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--kernel/trace/ftrace.c147
-rw-r--r--kernel/trace/ring_buffer.c116
-rw-r--r--kernel/trace/trace.c1
25 files changed, 509 insertions, 383 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 9a3ec66a9d84..19fad003b19d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -11,8 +11,6 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ 11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o 12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o
13 13
14CFLAGS_REMOVE_sched.o = -mno-spe
15
16ifdef CONFIG_FUNCTION_TRACER 14ifdef CONFIG_FUNCTION_TRACER
17# Do not trace debug files and internal ftrace files 15# Do not trace debug files and internal ftrace files
18CFLAGS_REMOVE_lockdep.o = -pg 16CFLAGS_REMOVE_lockdep.o = -pg
@@ -21,7 +19,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
21CFLAGS_REMOVE_rtmutex-debug.o = -pg 19CFLAGS_REMOVE_rtmutex-debug.o = -pg
22CFLAGS_REMOVE_cgroup-debug.o = -pg 20CFLAGS_REMOVE_cgroup-debug.o = -pg
23CFLAGS_REMOVE_sched_clock.o = -pg 21CFLAGS_REMOVE_sched_clock.o = -pg
24CFLAGS_REMOVE_sched.o = -mno-spe -pg 22CFLAGS_REMOVE_sched.o = -pg
25endif 23endif
26 24
27obj-$(CONFIG_FREEZER) += freezer.o 25obj-$(CONFIG_FREEZER) += freezer.o
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 8ba0e0d934f2..8b509441f49a 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -24,6 +24,7 @@ struct audit_chunk {
24 struct list_head trees; /* with root here */ 24 struct list_head trees; /* with root here */
25 int dead; 25 int dead;
26 int count; 26 int count;
27 atomic_long_t refs;
27 struct rcu_head head; 28 struct rcu_head head;
28 struct node { 29 struct node {
29 struct list_head list; 30 struct list_head list;
@@ -56,7 +57,8 @@ static LIST_HEAD(prune_list);
56 * tree is refcounted; one reference for "some rules on rules_list refer to 57 * tree is refcounted; one reference for "some rules on rules_list refer to
57 * it", one for each chunk with pointer to it. 58 * it", one for each chunk with pointer to it.
58 * 59 *
59 * chunk is refcounted by embedded inotify_watch. 60 * chunk is refcounted by embedded inotify_watch + .refs (non-zero refcount
61 * of watch contributes 1 to .refs).
60 * 62 *
61 * node.index allows to get from node.list to containing chunk. 63 * node.index allows to get from node.list to containing chunk.
62 * MSB of that sucker is stolen to mark taggings that we might have to 64 * MSB of that sucker is stolen to mark taggings that we might have to
@@ -121,6 +123,7 @@ static struct audit_chunk *alloc_chunk(int count)
121 INIT_LIST_HEAD(&chunk->hash); 123 INIT_LIST_HEAD(&chunk->hash);
122 INIT_LIST_HEAD(&chunk->trees); 124 INIT_LIST_HEAD(&chunk->trees);
123 chunk->count = count; 125 chunk->count = count;
126 atomic_long_set(&chunk->refs, 1);
124 for (i = 0; i < count; i++) { 127 for (i = 0; i < count; i++) {
125 INIT_LIST_HEAD(&chunk->owners[i].list); 128 INIT_LIST_HEAD(&chunk->owners[i].list);
126 chunk->owners[i].index = i; 129 chunk->owners[i].index = i;
@@ -129,9 +132,8 @@ static struct audit_chunk *alloc_chunk(int count)
129 return chunk; 132 return chunk;
130} 133}
131 134
132static void __free_chunk(struct rcu_head *rcu) 135static void free_chunk(struct audit_chunk *chunk)
133{ 136{
134 struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
135 int i; 137 int i;
136 138
137 for (i = 0; i < chunk->count; i++) { 139 for (i = 0; i < chunk->count; i++) {
@@ -141,14 +143,16 @@ static void __free_chunk(struct rcu_head *rcu)
141 kfree(chunk); 143 kfree(chunk);
142} 144}
143 145
144static inline void free_chunk(struct audit_chunk *chunk) 146void audit_put_chunk(struct audit_chunk *chunk)
145{ 147{
146 call_rcu(&chunk->head, __free_chunk); 148 if (atomic_long_dec_and_test(&chunk->refs))
149 free_chunk(chunk);
147} 150}
148 151
149void audit_put_chunk(struct audit_chunk *chunk) 152static void __put_chunk(struct rcu_head *rcu)
150{ 153{
151 put_inotify_watch(&chunk->watch); 154 struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
155 audit_put_chunk(chunk);
152} 156}
153 157
154enum {HASH_SIZE = 128}; 158enum {HASH_SIZE = 128};
@@ -176,7 +180,7 @@ struct audit_chunk *audit_tree_lookup(const struct inode *inode)
176 180
177 list_for_each_entry_rcu(p, list, hash) { 181 list_for_each_entry_rcu(p, list, hash) {
178 if (p->watch.inode == inode) { 182 if (p->watch.inode == inode) {
179 get_inotify_watch(&p->watch); 183 atomic_long_inc(&p->refs);
180 return p; 184 return p;
181 } 185 }
182 } 186 }
@@ -194,17 +198,49 @@ int audit_tree_match(struct audit_chunk *chunk, struct audit_tree *tree)
194 198
195/* tagging and untagging inodes with trees */ 199/* tagging and untagging inodes with trees */
196 200
197static void untag_chunk(struct audit_chunk *chunk, struct node *p) 201static struct audit_chunk *find_chunk(struct node *p)
202{
203 int index = p->index & ~(1U<<31);
204 p -= index;
205 return container_of(p, struct audit_chunk, owners[0]);
206}
207
208static void untag_chunk(struct node *p)
198{ 209{
210 struct audit_chunk *chunk = find_chunk(p);
199 struct audit_chunk *new; 211 struct audit_chunk *new;
200 struct audit_tree *owner; 212 struct audit_tree *owner;
201 int size = chunk->count - 1; 213 int size = chunk->count - 1;
202 int i, j; 214 int i, j;
203 215
216 if (!pin_inotify_watch(&chunk->watch)) {
217 /*
218 * Filesystem is shutting down; all watches are getting
219 * evicted, just take it off the node list for this
220 * tree and let the eviction logics take care of the
221 * rest.
222 */
223 owner = p->owner;
224 if (owner->root == chunk) {
225 list_del_init(&owner->same_root);
226 owner->root = NULL;
227 }
228 list_del_init(&p->list);
229 p->owner = NULL;
230 put_tree(owner);
231 return;
232 }
233
234 spin_unlock(&hash_lock);
235
236 /*
237 * pin_inotify_watch() succeeded, so the watch won't go away
238 * from under us.
239 */
204 mutex_lock(&chunk->watch.inode->inotify_mutex); 240 mutex_lock(&chunk->watch.inode->inotify_mutex);
205 if (chunk->dead) { 241 if (chunk->dead) {
206 mutex_unlock(&chunk->watch.inode->inotify_mutex); 242 mutex_unlock(&chunk->watch.inode->inotify_mutex);
207 return; 243 goto out;
208 } 244 }
209 245
210 owner = p->owner; 246 owner = p->owner;
@@ -221,7 +257,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct node *p)
221 inotify_evict_watch(&chunk->watch); 257 inotify_evict_watch(&chunk->watch);
222 mutex_unlock(&chunk->watch.inode->inotify_mutex); 258 mutex_unlock(&chunk->watch.inode->inotify_mutex);
223 put_inotify_watch(&chunk->watch); 259 put_inotify_watch(&chunk->watch);
224 return; 260 goto out;
225 } 261 }
226 262
227 new = alloc_chunk(size); 263 new = alloc_chunk(size);
@@ -263,7 +299,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct node *p)
263 inotify_evict_watch(&chunk->watch); 299 inotify_evict_watch(&chunk->watch);
264 mutex_unlock(&chunk->watch.inode->inotify_mutex); 300 mutex_unlock(&chunk->watch.inode->inotify_mutex);
265 put_inotify_watch(&chunk->watch); 301 put_inotify_watch(&chunk->watch);
266 return; 302 goto out;
267 303
268Fallback: 304Fallback:
269 // do the best we can 305 // do the best we can
@@ -277,6 +313,9 @@ Fallback:
277 put_tree(owner); 313 put_tree(owner);
278 spin_unlock(&hash_lock); 314 spin_unlock(&hash_lock);
279 mutex_unlock(&chunk->watch.inode->inotify_mutex); 315 mutex_unlock(&chunk->watch.inode->inotify_mutex);
316out:
317 unpin_inotify_watch(&chunk->watch);
318 spin_lock(&hash_lock);
280} 319}
281 320
282static int create_chunk(struct inode *inode, struct audit_tree *tree) 321static int create_chunk(struct inode *inode, struct audit_tree *tree)
@@ -387,13 +426,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
387 return 0; 426 return 0;
388} 427}
389 428
390static struct audit_chunk *find_chunk(struct node *p)
391{
392 int index = p->index & ~(1U<<31);
393 p -= index;
394 return container_of(p, struct audit_chunk, owners[0]);
395}
396
397static void kill_rules(struct audit_tree *tree) 429static void kill_rules(struct audit_tree *tree)
398{ 430{
399 struct audit_krule *rule, *next; 431 struct audit_krule *rule, *next;
@@ -431,17 +463,10 @@ static void prune_one(struct audit_tree *victim)
431 spin_lock(&hash_lock); 463 spin_lock(&hash_lock);
432 while (!list_empty(&victim->chunks)) { 464 while (!list_empty(&victim->chunks)) {
433 struct node *p; 465 struct node *p;
434 struct audit_chunk *chunk;
435 466
436 p = list_entry(victim->chunks.next, struct node, list); 467 p = list_entry(victim->chunks.next, struct node, list);
437 chunk = find_chunk(p);
438 get_inotify_watch(&chunk->watch);
439 spin_unlock(&hash_lock);
440
441 untag_chunk(chunk, p);
442 468
443 put_inotify_watch(&chunk->watch); 469 untag_chunk(p);
444 spin_lock(&hash_lock);
445 } 470 }
446 spin_unlock(&hash_lock); 471 spin_unlock(&hash_lock);
447 put_tree(victim); 472 put_tree(victim);
@@ -469,7 +494,6 @@ static void trim_marked(struct audit_tree *tree)
469 494
470 while (!list_empty(&tree->chunks)) { 495 while (!list_empty(&tree->chunks)) {
471 struct node *node; 496 struct node *node;
472 struct audit_chunk *chunk;
473 497
474 node = list_entry(tree->chunks.next, struct node, list); 498 node = list_entry(tree->chunks.next, struct node, list);
475 499
@@ -477,14 +501,7 @@ static void trim_marked(struct audit_tree *tree)
477 if (!(node->index & (1U<<31))) 501 if (!(node->index & (1U<<31)))
478 break; 502 break;
479 503
480 chunk = find_chunk(node); 504 untag_chunk(node);
481 get_inotify_watch(&chunk->watch);
482 spin_unlock(&hash_lock);
483
484 untag_chunk(chunk, node);
485
486 put_inotify_watch(&chunk->watch);
487 spin_lock(&hash_lock);
488 } 505 }
489 if (!tree->root && !tree->goner) { 506 if (!tree->root && !tree->goner) {
490 tree->goner = 1; 507 tree->goner = 1;
@@ -878,7 +895,7 @@ static void handle_event(struct inotify_watch *watch, u32 wd, u32 mask,
878static void destroy_watch(struct inotify_watch *watch) 895static void destroy_watch(struct inotify_watch *watch)
879{ 896{
880 struct audit_chunk *chunk = container_of(watch, struct audit_chunk, watch); 897 struct audit_chunk *chunk = container_of(watch, struct audit_chunk, watch);
881 free_chunk(chunk); 898 call_rcu(&chunk->head, __put_chunk);
882} 899}
883 900
884static const struct inotify_operations rtree_inotify_ops = { 901static const struct inotify_operations rtree_inotify_ops = {
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index b7d354e2b0ef..9fd85a4640a0 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1094,8 +1094,8 @@ static void audit_inotify_unregister(struct list_head *in_list)
1094 list_for_each_entry_safe(p, n, in_list, ilist) { 1094 list_for_each_entry_safe(p, n, in_list, ilist) {
1095 list_del(&p->ilist); 1095 list_del(&p->ilist);
1096 inotify_rm_watch(audit_ih, &p->wdata); 1096 inotify_rm_watch(audit_ih, &p->wdata);
1097 /* the put matching the get in audit_do_del_rule() */ 1097 /* the unpin matching the pin in audit_do_del_rule() */
1098 put_inotify_watch(&p->wdata); 1098 unpin_inotify_watch(&p->wdata);
1099 } 1099 }
1100} 1100}
1101 1101
@@ -1389,9 +1389,13 @@ static inline int audit_del_rule(struct audit_entry *entry,
1389 /* Put parent on the inotify un-registration 1389 /* Put parent on the inotify un-registration
1390 * list. Grab a reference before releasing 1390 * list. Grab a reference before releasing
1391 * audit_filter_mutex, to be released in 1391 * audit_filter_mutex, to be released in
1392 * audit_inotify_unregister(). */ 1392 * audit_inotify_unregister().
1393 list_add(&parent->ilist, &inotify_list); 1393 * If filesystem is going away, just leave
1394 get_inotify_watch(&parent->wdata); 1394 * the sucker alone, eviction will take
1395 * care of it.
1396 */
1397 if (pin_inotify_watch(&parent->wdata))
1398 list_add(&parent->ilist, &inotify_list);
1395 } 1399 }
1396 } 1400 }
1397 } 1401 }
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 358e77564e6f..fe00b3b983a8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2039,10 +2039,13 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
2039 struct cgroup *cgrp; 2039 struct cgroup *cgrp;
2040 struct cgroup_iter it; 2040 struct cgroup_iter it;
2041 struct task_struct *tsk; 2041 struct task_struct *tsk;
2042
2042 /* 2043 /*
2043 * Validate dentry by checking the superblock operations 2044 * Validate dentry by checking the superblock operations,
2045 * and make sure it's a directory.
2044 */ 2046 */
2045 if (dentry->d_sb->s_op != &cgroup_ops) 2047 if (dentry->d_sb->s_op != &cgroup_ops ||
2048 !S_ISDIR(dentry->d_inode->i_mode))
2046 goto err; 2049 goto err;
2047 2050
2048 ret = 0; 2051 ret = 0;
@@ -2472,10 +2475,7 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
2472 mutex_unlock(&cgroup_mutex); 2475 mutex_unlock(&cgroup_mutex);
2473 return -EBUSY; 2476 return -EBUSY;
2474 } 2477 }
2475 2478 mutex_unlock(&cgroup_mutex);
2476 parent = cgrp->parent;
2477 root = cgrp->root;
2478 sb = root->sb;
2479 2479
2480 /* 2480 /*
2481 * Call pre_destroy handlers of subsys. Notify subsystems 2481 * Call pre_destroy handlers of subsys. Notify subsystems
@@ -2483,7 +2483,14 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
2483 */ 2483 */
2484 cgroup_call_pre_destroy(cgrp); 2484 cgroup_call_pre_destroy(cgrp);
2485 2485
2486 if (cgroup_has_css_refs(cgrp)) { 2486 mutex_lock(&cgroup_mutex);
2487 parent = cgrp->parent;
2488 root = cgrp->root;
2489 sb = root->sb;
2490
2491 if (atomic_read(&cgrp->count)
2492 || !list_empty(&cgrp->children)
2493 || cgroup_has_css_refs(cgrp)) {
2487 mutex_unlock(&cgroup_mutex); 2494 mutex_unlock(&cgroup_mutex);
2488 return -EBUSY; 2495 return -EBUSY;
2489 } 2496 }
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 3e00526f52ec..da7ff6137f37 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -36,6 +36,7 @@
36#include <linux/list.h> 36#include <linux/list.h>
37#include <linux/mempolicy.h> 37#include <linux/mempolicy.h>
38#include <linux/mm.h> 38#include <linux/mm.h>
39#include <linux/memory.h>
39#include <linux/module.h> 40#include <linux/module.h>
40#include <linux/mount.h> 41#include <linux/mount.h>
41#include <linux/namei.h> 42#include <linux/namei.h>
@@ -587,7 +588,6 @@ static int generate_sched_domains(cpumask_t **domains,
587 int ndoms; /* number of sched domains in result */ 588 int ndoms; /* number of sched domains in result */
588 int nslot; /* next empty doms[] cpumask_t slot */ 589 int nslot; /* next empty doms[] cpumask_t slot */
589 590
590 ndoms = 0;
591 doms = NULL; 591 doms = NULL;
592 dattr = NULL; 592 dattr = NULL;
593 csa = NULL; 593 csa = NULL;
@@ -674,10 +674,8 @@ restart:
674 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks. 674 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
675 */ 675 */
676 doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL); 676 doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
677 if (!doms) { 677 if (!doms)
678 ndoms = 0;
679 goto done; 678 goto done;
680 }
681 679
682 /* 680 /*
683 * The rest of the code, including the scheduler, can deal with 681 * The rest of the code, including the scheduler, can deal with
@@ -732,6 +730,13 @@ restart:
732done: 730done:
733 kfree(csa); 731 kfree(csa);
734 732
733 /*
734 * Fallback to the default domain if kmalloc() failed.
735 * See comments in partition_sched_domains().
736 */
737 if (doms == NULL)
738 ndoms = 1;
739
735 *domains = doms; 740 *domains = doms;
736 *attributes = dattr; 741 *attributes = dattr;
737 return ndoms; 742 return ndoms;
@@ -2011,12 +2016,23 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
2011 * Call this routine anytime after node_states[N_HIGH_MEMORY] changes. 2016 * Call this routine anytime after node_states[N_HIGH_MEMORY] changes.
2012 * See also the previous routine cpuset_track_online_cpus(). 2017 * See also the previous routine cpuset_track_online_cpus().
2013 */ 2018 */
2014void cpuset_track_online_nodes(void) 2019static int cpuset_track_online_nodes(struct notifier_block *self,
2020 unsigned long action, void *arg)
2015{ 2021{
2016 cgroup_lock(); 2022 cgroup_lock();
2017 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; 2023 switch (action) {
2018 scan_for_empty_cpusets(&top_cpuset); 2024 case MEM_ONLINE:
2025 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2026 break;
2027 case MEM_OFFLINE:
2028 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2029 scan_for_empty_cpusets(&top_cpuset);
2030 break;
2031 default:
2032 break;
2033 }
2019 cgroup_unlock(); 2034 cgroup_unlock();
2035 return NOTIFY_OK;
2020} 2036}
2021#endif 2037#endif
2022 2038
@@ -2032,6 +2048,7 @@ void __init cpuset_init_smp(void)
2032 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; 2048 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2033 2049
2034 hotcpu_notifier(cpuset_track_online_cpus, 0); 2050 hotcpu_notifier(cpuset_track_online_cpus, 0);
2051 hotplug_memory_notifier(cpuset_track_online_nodes, 10);
2035} 2052}
2036 2053
2037/** 2054/**
diff --git a/kernel/exit.c b/kernel/exit.c
index b9c4d8bb72e5..30fcdf16737a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -40,7 +40,6 @@
40#include <linux/cn_proc.h> 40#include <linux/cn_proc.h>
41#include <linux/mutex.h> 41#include <linux/mutex.h>
42#include <linux/futex.h> 42#include <linux/futex.h>
43#include <linux/compat.h>
44#include <linux/pipe_fs_i.h> 43#include <linux/pipe_fs_i.h>
45#include <linux/audit.h> /* for audit_free() */ 44#include <linux/audit.h> /* for audit_free() */
46#include <linux/resource.h> 45#include <linux/resource.h>
@@ -1059,14 +1058,6 @@ NORET_TYPE void do_exit(long code)
1059 exit_itimers(tsk->signal); 1058 exit_itimers(tsk->signal);
1060 } 1059 }
1061 acct_collect(code, group_dead); 1060 acct_collect(code, group_dead);
1062#ifdef CONFIG_FUTEX
1063 if (unlikely(tsk->robust_list))
1064 exit_robust_list(tsk);
1065#ifdef CONFIG_COMPAT
1066 if (unlikely(tsk->compat_robust_list))
1067 compat_exit_robust_list(tsk);
1068#endif
1069#endif
1070 if (group_dead) 1061 if (group_dead)
1071 tty_audit_exit(); 1062 tty_audit_exit();
1072 if (unlikely(tsk->audit_context)) 1063 if (unlikely(tsk->audit_context))
diff --git a/kernel/extable.c b/kernel/extable.c
index a26cb2e17023..adf0cc9c02d6 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -66,3 +66,19 @@ int kernel_text_address(unsigned long addr)
66 return 1; 66 return 1;
67 return module_text_address(addr) != NULL; 67 return module_text_address(addr) != NULL;
68} 68}
69
70/*
71 * On some architectures (PPC64, IA64) function pointers
72 * are actually only tokens to some data that then holds the
73 * real function address. As a result, to find if a function
74 * pointer is part of the kernel text, we need to do some
75 * special dereferencing first.
76 */
77int func_ptr_is_kernel_text(void *ptr)
78{
79 unsigned long addr;
80 addr = (unsigned long) dereference_function_descriptor(ptr);
81 if (core_kernel_text(addr))
82 return 1;
83 return module_text_address(addr) != NULL;
84}
diff --git a/kernel/fork.c b/kernel/fork.c
index f6083561dfe0..2a372a0e206f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -40,6 +40,7 @@
40#include <linux/jiffies.h> 40#include <linux/jiffies.h>
41#include <linux/tracehook.h> 41#include <linux/tracehook.h>
42#include <linux/futex.h> 42#include <linux/futex.h>
43#include <linux/compat.h>
43#include <linux/task_io_accounting_ops.h> 44#include <linux/task_io_accounting_ops.h>
44#include <linux/rcupdate.h> 45#include <linux/rcupdate.h>
45#include <linux/ptrace.h> 46#include <linux/ptrace.h>
@@ -519,6 +520,16 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
519{ 520{
520 struct completion *vfork_done = tsk->vfork_done; 521 struct completion *vfork_done = tsk->vfork_done;
521 522
523 /* Get rid of any futexes when releasing the mm */
524#ifdef CONFIG_FUTEX
525 if (unlikely(tsk->robust_list))
526 exit_robust_list(tsk);
527#ifdef CONFIG_COMPAT
528 if (unlikely(tsk->compat_robust_list))
529 compat_exit_robust_list(tsk);
530#endif
531#endif
532
522 /* Get rid of any cached register state */ 533 /* Get rid of any cached register state */
523 deactivate_mm(tsk, mm); 534 deactivate_mm(tsk, mm);
524 535
diff --git a/kernel/futex.c b/kernel/futex.c
index 8af10027514b..e10c5c8786a6 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -123,24 +123,6 @@ struct futex_hash_bucket {
123static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS]; 123static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
124 124
125/* 125/*
126 * Take mm->mmap_sem, when futex is shared
127 */
128static inline void futex_lock_mm(struct rw_semaphore *fshared)
129{
130 if (fshared)
131 down_read(fshared);
132}
133
134/*
135 * Release mm->mmap_sem, when the futex is shared
136 */
137static inline void futex_unlock_mm(struct rw_semaphore *fshared)
138{
139 if (fshared)
140 up_read(fshared);
141}
142
143/*
144 * We hash on the keys returned from get_futex_key (see below). 126 * We hash on the keys returned from get_futex_key (see below).
145 */ 127 */
146static struct futex_hash_bucket *hash_futex(union futex_key *key) 128static struct futex_hash_bucket *hash_futex(union futex_key *key)
@@ -161,6 +143,45 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
161 && key1->both.offset == key2->both.offset); 143 && key1->both.offset == key2->both.offset);
162} 144}
163 145
146/*
147 * Take a reference to the resource addressed by a key.
148 * Can be called while holding spinlocks.
149 *
150 */
151static void get_futex_key_refs(union futex_key *key)
152{
153 if (!key->both.ptr)
154 return;
155
156 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
157 case FUT_OFF_INODE:
158 atomic_inc(&key->shared.inode->i_count);
159 break;
160 case FUT_OFF_MMSHARED:
161 atomic_inc(&key->private.mm->mm_count);
162 break;
163 }
164}
165
166/*
167 * Drop a reference to the resource addressed by a key.
168 * The hash bucket spinlock must not be held.
169 */
170static void drop_futex_key_refs(union futex_key *key)
171{
172 if (!key->both.ptr)
173 return;
174
175 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
176 case FUT_OFF_INODE:
177 iput(key->shared.inode);
178 break;
179 case FUT_OFF_MMSHARED:
180 mmdrop(key->private.mm);
181 break;
182 }
183}
184
164/** 185/**
165 * get_futex_key - Get parameters which are the keys for a futex. 186 * get_futex_key - Get parameters which are the keys for a futex.
166 * @uaddr: virtual address of the futex 187 * @uaddr: virtual address of the futex
@@ -179,12 +200,10 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
179 * For other futexes, it points to &current->mm->mmap_sem and 200 * For other futexes, it points to &current->mm->mmap_sem and
180 * caller must have taken the reader lock. but NOT any spinlocks. 201 * caller must have taken the reader lock. but NOT any spinlocks.
181 */ 202 */
182static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared, 203static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
183 union futex_key *key)
184{ 204{
185 unsigned long address = (unsigned long)uaddr; 205 unsigned long address = (unsigned long)uaddr;
186 struct mm_struct *mm = current->mm; 206 struct mm_struct *mm = current->mm;
187 struct vm_area_struct *vma;
188 struct page *page; 207 struct page *page;
189 int err; 208 int err;
190 209
@@ -208,100 +227,50 @@ static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
208 return -EFAULT; 227 return -EFAULT;
209 key->private.mm = mm; 228 key->private.mm = mm;
210 key->private.address = address; 229 key->private.address = address;
230 get_futex_key_refs(key);
211 return 0; 231 return 0;
212 } 232 }
213 /*
214 * The futex is hashed differently depending on whether
215 * it's in a shared or private mapping. So check vma first.
216 */
217 vma = find_extend_vma(mm, address);
218 if (unlikely(!vma))
219 return -EFAULT;
220 233
221 /* 234again:
222 * Permissions. 235 err = get_user_pages_fast(address, 1, 0, &page);
223 */ 236 if (err < 0)
224 if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ)) 237 return err;
225 return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES; 238
239 lock_page(page);
240 if (!page->mapping) {
241 unlock_page(page);
242 put_page(page);
243 goto again;
244 }
226 245
227 /* 246 /*
228 * Private mappings are handled in a simple way. 247 * Private mappings are handled in a simple way.
229 * 248 *
230 * NOTE: When userspace waits on a MAP_SHARED mapping, even if 249 * NOTE: When userspace waits on a MAP_SHARED mapping, even if
231 * it's a read-only handle, it's expected that futexes attach to 250 * it's a read-only handle, it's expected that futexes attach to
232 * the object not the particular process. Therefore we use 251 * the object not the particular process.
233 * VM_MAYSHARE here, not VM_SHARED which is restricted to shared
234 * mappings of _writable_ handles.
235 */ 252 */
236 if (likely(!(vma->vm_flags & VM_MAYSHARE))) { 253 if (PageAnon(page)) {
237 key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */ 254 key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
238 key->private.mm = mm; 255 key->private.mm = mm;
239 key->private.address = address; 256 key->private.address = address;
240 return 0; 257 } else {
258 key->both.offset |= FUT_OFF_INODE; /* inode-based key */
259 key->shared.inode = page->mapping->host;
260 key->shared.pgoff = page->index;
241 } 261 }
242 262
243 /* 263 get_futex_key_refs(key);
244 * Linear file mappings are also simple.
245 */
246 key->shared.inode = vma->vm_file->f_path.dentry->d_inode;
247 key->both.offset |= FUT_OFF_INODE; /* inode-based key. */
248 if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
249 key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT)
250 + vma->vm_pgoff);
251 return 0;
252 }
253 264
254 /* 265 unlock_page(page);
255 * We could walk the page table to read the non-linear 266 put_page(page);
256 * pte, and get the page index without fetching the page 267 return 0;
257 * from swap. But that's a lot of code to duplicate here
258 * for a rare case, so we simply fetch the page.
259 */
260 err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL);
261 if (err >= 0) {
262 key->shared.pgoff =
263 page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
264 put_page(page);
265 return 0;
266 }
267 return err;
268}
269
270/*
271 * Take a reference to the resource addressed by a key.
272 * Can be called while holding spinlocks.
273 *
274 */
275static void get_futex_key_refs(union futex_key *key)
276{
277 if (key->both.ptr == NULL)
278 return;
279 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
280 case FUT_OFF_INODE:
281 atomic_inc(&key->shared.inode->i_count);
282 break;
283 case FUT_OFF_MMSHARED:
284 atomic_inc(&key->private.mm->mm_count);
285 break;
286 }
287} 268}
288 269
289/* 270static inline
290 * Drop a reference to the resource addressed by a key. 271void put_futex_key(int fshared, union futex_key *key)
291 * The hash bucket spinlock must not be held.
292 */
293static void drop_futex_key_refs(union futex_key *key)
294{ 272{
295 if (!key->both.ptr) 273 drop_futex_key_refs(key);
296 return;
297 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
298 case FUT_OFF_INODE:
299 iput(key->shared.inode);
300 break;
301 case FUT_OFF_MMSHARED:
302 mmdrop(key->private.mm);
303 break;
304 }
305} 274}
306 275
307static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) 276static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
@@ -328,10 +297,8 @@ static int get_futex_value_locked(u32 *dest, u32 __user *from)
328 297
329/* 298/*
330 * Fault handling. 299 * Fault handling.
331 * if fshared is non NULL, current->mm->mmap_sem is already held
332 */ 300 */
333static int futex_handle_fault(unsigned long address, 301static int futex_handle_fault(unsigned long address, int attempt)
334 struct rw_semaphore *fshared, int attempt)
335{ 302{
336 struct vm_area_struct * vma; 303 struct vm_area_struct * vma;
337 struct mm_struct *mm = current->mm; 304 struct mm_struct *mm = current->mm;
@@ -340,8 +307,7 @@ static int futex_handle_fault(unsigned long address,
340 if (attempt > 2) 307 if (attempt > 2)
341 return ret; 308 return ret;
342 309
343 if (!fshared) 310 down_read(&mm->mmap_sem);
344 down_read(&mm->mmap_sem);
345 vma = find_vma(mm, address); 311 vma = find_vma(mm, address);
346 if (vma && address >= vma->vm_start && 312 if (vma && address >= vma->vm_start &&
347 (vma->vm_flags & VM_WRITE)) { 313 (vma->vm_flags & VM_WRITE)) {
@@ -361,8 +327,7 @@ static int futex_handle_fault(unsigned long address,
361 current->min_flt++; 327 current->min_flt++;
362 } 328 }
363 } 329 }
364 if (!fshared) 330 up_read(&mm->mmap_sem);
365 up_read(&mm->mmap_sem);
366 return ret; 331 return ret;
367} 332}
368 333
@@ -385,6 +350,7 @@ static int refill_pi_state_cache(void)
385 /* pi_mutex gets initialized later */ 350 /* pi_mutex gets initialized later */
386 pi_state->owner = NULL; 351 pi_state->owner = NULL;
387 atomic_set(&pi_state->refcount, 1); 352 atomic_set(&pi_state->refcount, 1);
353 pi_state->key = FUTEX_KEY_INIT;
388 354
389 current->pi_state_cache = pi_state; 355 current->pi_state_cache = pi_state;
390 356
@@ -462,7 +428,7 @@ void exit_pi_state_list(struct task_struct *curr)
462 struct list_head *next, *head = &curr->pi_state_list; 428 struct list_head *next, *head = &curr->pi_state_list;
463 struct futex_pi_state *pi_state; 429 struct futex_pi_state *pi_state;
464 struct futex_hash_bucket *hb; 430 struct futex_hash_bucket *hb;
465 union futex_key key; 431 union futex_key key = FUTEX_KEY_INIT;
466 432
467 if (!futex_cmpxchg_enabled) 433 if (!futex_cmpxchg_enabled)
468 return; 434 return;
@@ -719,20 +685,17 @@ double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
719 * Wake up all waiters hashed on the physical page that is mapped 685 * Wake up all waiters hashed on the physical page that is mapped
720 * to this virtual address: 686 * to this virtual address:
721 */ 687 */
722static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, 688static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
723 int nr_wake, u32 bitset)
724{ 689{
725 struct futex_hash_bucket *hb; 690 struct futex_hash_bucket *hb;
726 struct futex_q *this, *next; 691 struct futex_q *this, *next;
727 struct plist_head *head; 692 struct plist_head *head;
728 union futex_key key; 693 union futex_key key = FUTEX_KEY_INIT;
729 int ret; 694 int ret;
730 695
731 if (!bitset) 696 if (!bitset)
732 return -EINVAL; 697 return -EINVAL;
733 698
734 futex_lock_mm(fshared);
735
736 ret = get_futex_key(uaddr, fshared, &key); 699 ret = get_futex_key(uaddr, fshared, &key);
737 if (unlikely(ret != 0)) 700 if (unlikely(ret != 0))
738 goto out; 701 goto out;
@@ -760,7 +723,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
760 723
761 spin_unlock(&hb->lock); 724 spin_unlock(&hb->lock);
762out: 725out:
763 futex_unlock_mm(fshared); 726 put_futex_key(fshared, &key);
764 return ret; 727 return ret;
765} 728}
766 729
@@ -769,19 +732,16 @@ out:
769 * to this virtual address: 732 * to this virtual address:
770 */ 733 */
771static int 734static int
772futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared, 735futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
773 u32 __user *uaddr2,
774 int nr_wake, int nr_wake2, int op) 736 int nr_wake, int nr_wake2, int op)
775{ 737{
776 union futex_key key1, key2; 738 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
777 struct futex_hash_bucket *hb1, *hb2; 739 struct futex_hash_bucket *hb1, *hb2;
778 struct plist_head *head; 740 struct plist_head *head;
779 struct futex_q *this, *next; 741 struct futex_q *this, *next;
780 int ret, op_ret, attempt = 0; 742 int ret, op_ret, attempt = 0;
781 743
782retryfull: 744retryfull:
783 futex_lock_mm(fshared);
784
785 ret = get_futex_key(uaddr1, fshared, &key1); 745 ret = get_futex_key(uaddr1, fshared, &key1);
786 if (unlikely(ret != 0)) 746 if (unlikely(ret != 0))
787 goto out; 747 goto out;
@@ -826,18 +786,12 @@ retry:
826 */ 786 */
827 if (attempt++) { 787 if (attempt++) {
828 ret = futex_handle_fault((unsigned long)uaddr2, 788 ret = futex_handle_fault((unsigned long)uaddr2,
829 fshared, attempt); 789 attempt);
830 if (ret) 790 if (ret)
831 goto out; 791 goto out;
832 goto retry; 792 goto retry;
833 } 793 }
834 794
835 /*
836 * If we would have faulted, release mmap_sem,
837 * fault it in and start all over again.
838 */
839 futex_unlock_mm(fshared);
840
841 ret = get_user(dummy, uaddr2); 795 ret = get_user(dummy, uaddr2);
842 if (ret) 796 if (ret)
843 return ret; 797 return ret;
@@ -873,7 +827,8 @@ retry:
873 if (hb1 != hb2) 827 if (hb1 != hb2)
874 spin_unlock(&hb2->lock); 828 spin_unlock(&hb2->lock);
875out: 829out:
876 futex_unlock_mm(fshared); 830 put_futex_key(fshared, &key2);
831 put_futex_key(fshared, &key1);
877 832
878 return ret; 833 return ret;
879} 834}
@@ -882,19 +837,16 @@ out:
882 * Requeue all waiters hashed on one physical page to another 837 * Requeue all waiters hashed on one physical page to another
883 * physical page. 838 * physical page.
884 */ 839 */
885static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared, 840static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
886 u32 __user *uaddr2,
887 int nr_wake, int nr_requeue, u32 *cmpval) 841 int nr_wake, int nr_requeue, u32 *cmpval)
888{ 842{
889 union futex_key key1, key2; 843 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
890 struct futex_hash_bucket *hb1, *hb2; 844 struct futex_hash_bucket *hb1, *hb2;
891 struct plist_head *head1; 845 struct plist_head *head1;
892 struct futex_q *this, *next; 846 struct futex_q *this, *next;
893 int ret, drop_count = 0; 847 int ret, drop_count = 0;
894 848
895 retry: 849 retry:
896 futex_lock_mm(fshared);
897
898 ret = get_futex_key(uaddr1, fshared, &key1); 850 ret = get_futex_key(uaddr1, fshared, &key1);
899 if (unlikely(ret != 0)) 851 if (unlikely(ret != 0))
900 goto out; 852 goto out;
@@ -917,12 +869,6 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
917 if (hb1 != hb2) 869 if (hb1 != hb2)
918 spin_unlock(&hb2->lock); 870 spin_unlock(&hb2->lock);
919 871
920 /*
921 * If we would have faulted, release mmap_sem, fault
922 * it in and start all over again.
923 */
924 futex_unlock_mm(fshared);
925
926 ret = get_user(curval, uaddr1); 872 ret = get_user(curval, uaddr1);
927 873
928 if (!ret) 874 if (!ret)
@@ -974,7 +920,8 @@ out_unlock:
974 drop_futex_key_refs(&key1); 920 drop_futex_key_refs(&key1);
975 921
976out: 922out:
977 futex_unlock_mm(fshared); 923 put_futex_key(fshared, &key2);
924 put_futex_key(fshared, &key1);
978 return ret; 925 return ret;
979} 926}
980 927
@@ -1096,8 +1043,7 @@ static void unqueue_me_pi(struct futex_q *q)
1096 * private futexes. 1043 * private futexes.
1097 */ 1044 */
1098static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, 1045static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1099 struct task_struct *newowner, 1046 struct task_struct *newowner, int fshared)
1100 struct rw_semaphore *fshared)
1101{ 1047{
1102 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; 1048 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1103 struct futex_pi_state *pi_state = q->pi_state; 1049 struct futex_pi_state *pi_state = q->pi_state;
@@ -1176,7 +1122,7 @@ retry:
1176handle_fault: 1122handle_fault:
1177 spin_unlock(q->lock_ptr); 1123 spin_unlock(q->lock_ptr);
1178 1124
1179 ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt++); 1125 ret = futex_handle_fault((unsigned long)uaddr, attempt++);
1180 1126
1181 spin_lock(q->lock_ptr); 1127 spin_lock(q->lock_ptr);
1182 1128
@@ -1200,7 +1146,7 @@ handle_fault:
1200 1146
1201static long futex_wait_restart(struct restart_block *restart); 1147static long futex_wait_restart(struct restart_block *restart);
1202 1148
1203static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, 1149static int futex_wait(u32 __user *uaddr, int fshared,
1204 u32 val, ktime_t *abs_time, u32 bitset) 1150 u32 val, ktime_t *abs_time, u32 bitset)
1205{ 1151{
1206 struct task_struct *curr = current; 1152 struct task_struct *curr = current;
@@ -1218,8 +1164,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1218 q.pi_state = NULL; 1164 q.pi_state = NULL;
1219 q.bitset = bitset; 1165 q.bitset = bitset;
1220 retry: 1166 retry:
1221 futex_lock_mm(fshared); 1167 q.key = FUTEX_KEY_INIT;
1222
1223 ret = get_futex_key(uaddr, fshared, &q.key); 1168 ret = get_futex_key(uaddr, fshared, &q.key);
1224 if (unlikely(ret != 0)) 1169 if (unlikely(ret != 0))
1225 goto out_release_sem; 1170 goto out_release_sem;
@@ -1251,12 +1196,6 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1251 if (unlikely(ret)) { 1196 if (unlikely(ret)) {
1252 queue_unlock(&q, hb); 1197 queue_unlock(&q, hb);
1253 1198
1254 /*
1255 * If we would have faulted, release mmap_sem, fault it in and
1256 * start all over again.
1257 */
1258 futex_unlock_mm(fshared);
1259
1260 ret = get_user(uval, uaddr); 1199 ret = get_user(uval, uaddr);
1261 1200
1262 if (!ret) 1201 if (!ret)
@@ -1271,12 +1210,6 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1271 queue_me(&q, hb); 1210 queue_me(&q, hb);
1272 1211
1273 /* 1212 /*
1274 * Now the futex is queued and we have checked the data, we
1275 * don't want to hold mmap_sem while we sleep.
1276 */
1277 futex_unlock_mm(fshared);
1278
1279 /*
1280 * There might have been scheduling since the queue_me(), as we 1213 * There might have been scheduling since the queue_me(), as we
1281 * cannot hold a spinlock across the get_user() in case it 1214 * cannot hold a spinlock across the get_user() in case it
1282 * faults, and we cannot just set TASK_INTERRUPTIBLE state when 1215 * faults, and we cannot just set TASK_INTERRUPTIBLE state when
@@ -1363,7 +1296,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1363 queue_unlock(&q, hb); 1296 queue_unlock(&q, hb);
1364 1297
1365 out_release_sem: 1298 out_release_sem:
1366 futex_unlock_mm(fshared); 1299 put_futex_key(fshared, &q.key);
1367 return ret; 1300 return ret;
1368} 1301}
1369 1302
@@ -1371,13 +1304,13 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1371static long futex_wait_restart(struct restart_block *restart) 1304static long futex_wait_restart(struct restart_block *restart)
1372{ 1305{
1373 u32 __user *uaddr = (u32 __user *)restart->futex.uaddr; 1306 u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
1374 struct rw_semaphore *fshared = NULL; 1307 int fshared = 0;
1375 ktime_t t; 1308 ktime_t t;
1376 1309
1377 t.tv64 = restart->futex.time; 1310 t.tv64 = restart->futex.time;
1378 restart->fn = do_no_restart_syscall; 1311 restart->fn = do_no_restart_syscall;
1379 if (restart->futex.flags & FLAGS_SHARED) 1312 if (restart->futex.flags & FLAGS_SHARED)
1380 fshared = &current->mm->mmap_sem; 1313 fshared = 1;
1381 return (long)futex_wait(uaddr, fshared, restart->futex.val, &t, 1314 return (long)futex_wait(uaddr, fshared, restart->futex.val, &t,
1382 restart->futex.bitset); 1315 restart->futex.bitset);
1383} 1316}
@@ -1389,7 +1322,7 @@ static long futex_wait_restart(struct restart_block *restart)
1389 * if there are waiters then it will block, it does PI, etc. (Due to 1322 * if there are waiters then it will block, it does PI, etc. (Due to
1390 * races the kernel might see a 0 value of the futex too.) 1323 * races the kernel might see a 0 value of the futex too.)
1391 */ 1324 */
1392static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, 1325static int futex_lock_pi(u32 __user *uaddr, int fshared,
1393 int detect, ktime_t *time, int trylock) 1326 int detect, ktime_t *time, int trylock)
1394{ 1327{
1395 struct hrtimer_sleeper timeout, *to = NULL; 1328 struct hrtimer_sleeper timeout, *to = NULL;
@@ -1412,8 +1345,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1412 1345
1413 q.pi_state = NULL; 1346 q.pi_state = NULL;
1414 retry: 1347 retry:
1415 futex_lock_mm(fshared); 1348 q.key = FUTEX_KEY_INIT;
1416
1417 ret = get_futex_key(uaddr, fshared, &q.key); 1349 ret = get_futex_key(uaddr, fshared, &q.key);
1418 if (unlikely(ret != 0)) 1350 if (unlikely(ret != 0))
1419 goto out_release_sem; 1351 goto out_release_sem;
@@ -1502,7 +1434,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1502 * exit to complete. 1434 * exit to complete.
1503 */ 1435 */
1504 queue_unlock(&q, hb); 1436 queue_unlock(&q, hb);
1505 futex_unlock_mm(fshared);
1506 cond_resched(); 1437 cond_resched();
1507 goto retry; 1438 goto retry;
1508 1439
@@ -1534,12 +1465,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1534 */ 1465 */
1535 queue_me(&q, hb); 1466 queue_me(&q, hb);
1536 1467
1537 /*
1538 * Now the futex is queued and we have checked the data, we
1539 * don't want to hold mmap_sem while we sleep.
1540 */
1541 futex_unlock_mm(fshared);
1542
1543 WARN_ON(!q.pi_state); 1468 WARN_ON(!q.pi_state);
1544 /* 1469 /*
1545 * Block on the PI mutex: 1470 * Block on the PI mutex:
@@ -1552,7 +1477,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1552 ret = ret ? 0 : -EWOULDBLOCK; 1477 ret = ret ? 0 : -EWOULDBLOCK;
1553 } 1478 }
1554 1479
1555 futex_lock_mm(fshared);
1556 spin_lock(q.lock_ptr); 1480 spin_lock(q.lock_ptr);
1557 1481
1558 if (!ret) { 1482 if (!ret) {
@@ -1618,7 +1542,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1618 1542
1619 /* Unqueue and drop the lock */ 1543 /* Unqueue and drop the lock */
1620 unqueue_me_pi(&q); 1544 unqueue_me_pi(&q);
1621 futex_unlock_mm(fshared);
1622 1545
1623 if (to) 1546 if (to)
1624 destroy_hrtimer_on_stack(&to->timer); 1547 destroy_hrtimer_on_stack(&to->timer);
@@ -1628,7 +1551,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1628 queue_unlock(&q, hb); 1551 queue_unlock(&q, hb);
1629 1552
1630 out_release_sem: 1553 out_release_sem:
1631 futex_unlock_mm(fshared); 1554 put_futex_key(fshared, &q.key);
1632 if (to) 1555 if (to)
1633 destroy_hrtimer_on_stack(&to->timer); 1556 destroy_hrtimer_on_stack(&to->timer);
1634 return ret; 1557 return ret;
@@ -1645,15 +1568,12 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1645 queue_unlock(&q, hb); 1568 queue_unlock(&q, hb);
1646 1569
1647 if (attempt++) { 1570 if (attempt++) {
1648 ret = futex_handle_fault((unsigned long)uaddr, fshared, 1571 ret = futex_handle_fault((unsigned long)uaddr, attempt);
1649 attempt);
1650 if (ret) 1572 if (ret)
1651 goto out_release_sem; 1573 goto out_release_sem;
1652 goto retry_unlocked; 1574 goto retry_unlocked;
1653 } 1575 }
1654 1576
1655 futex_unlock_mm(fshared);
1656
1657 ret = get_user(uval, uaddr); 1577 ret = get_user(uval, uaddr);
1658 if (!ret && (uval != -EFAULT)) 1578 if (!ret && (uval != -EFAULT))
1659 goto retry; 1579 goto retry;
@@ -1668,13 +1588,13 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1668 * This is the in-kernel slowpath: we look up the PI state (if any), 1588 * This is the in-kernel slowpath: we look up the PI state (if any),
1669 * and do the rt-mutex unlock. 1589 * and do the rt-mutex unlock.
1670 */ 1590 */
1671static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared) 1591static int futex_unlock_pi(u32 __user *uaddr, int fshared)
1672{ 1592{
1673 struct futex_hash_bucket *hb; 1593 struct futex_hash_bucket *hb;
1674 struct futex_q *this, *next; 1594 struct futex_q *this, *next;
1675 u32 uval; 1595 u32 uval;
1676 struct plist_head *head; 1596 struct plist_head *head;
1677 union futex_key key; 1597 union futex_key key = FUTEX_KEY_INIT;
1678 int ret, attempt = 0; 1598 int ret, attempt = 0;
1679 1599
1680retry: 1600retry:
@@ -1685,10 +1605,6 @@ retry:
1685 */ 1605 */
1686 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) 1606 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
1687 return -EPERM; 1607 return -EPERM;
1688 /*
1689 * First take all the futex related locks:
1690 */
1691 futex_lock_mm(fshared);
1692 1608
1693 ret = get_futex_key(uaddr, fshared, &key); 1609 ret = get_futex_key(uaddr, fshared, &key);
1694 if (unlikely(ret != 0)) 1610 if (unlikely(ret != 0))
@@ -1747,7 +1663,7 @@ retry_unlocked:
1747out_unlock: 1663out_unlock:
1748 spin_unlock(&hb->lock); 1664 spin_unlock(&hb->lock);
1749out: 1665out:
1750 futex_unlock_mm(fshared); 1666 put_futex_key(fshared, &key);
1751 1667
1752 return ret; 1668 return ret;
1753 1669
@@ -1763,16 +1679,13 @@ pi_faulted:
1763 spin_unlock(&hb->lock); 1679 spin_unlock(&hb->lock);
1764 1680
1765 if (attempt++) { 1681 if (attempt++) {
1766 ret = futex_handle_fault((unsigned long)uaddr, fshared, 1682 ret = futex_handle_fault((unsigned long)uaddr, attempt);
1767 attempt);
1768 if (ret) 1683 if (ret)
1769 goto out; 1684 goto out;
1770 uval = 0; 1685 uval = 0;
1771 goto retry_unlocked; 1686 goto retry_unlocked;
1772 } 1687 }
1773 1688
1774 futex_unlock_mm(fshared);
1775
1776 ret = get_user(uval, uaddr); 1689 ret = get_user(uval, uaddr);
1777 if (!ret && (uval != -EFAULT)) 1690 if (!ret && (uval != -EFAULT))
1778 goto retry; 1691 goto retry;
@@ -1898,8 +1811,7 @@ retry:
1898 * PI futexes happens in exit_pi_state(): 1811 * PI futexes happens in exit_pi_state():
1899 */ 1812 */
1900 if (!pi && (uval & FUTEX_WAITERS)) 1813 if (!pi && (uval & FUTEX_WAITERS))
1901 futex_wake(uaddr, &curr->mm->mmap_sem, 1, 1814 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
1902 FUTEX_BITSET_MATCH_ANY);
1903 } 1815 }
1904 return 0; 1816 return 0;
1905} 1817}
@@ -1995,10 +1907,10 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
1995{ 1907{
1996 int ret = -ENOSYS; 1908 int ret = -ENOSYS;
1997 int cmd = op & FUTEX_CMD_MASK; 1909 int cmd = op & FUTEX_CMD_MASK;
1998 struct rw_semaphore *fshared = NULL; 1910 int fshared = 0;
1999 1911
2000 if (!(op & FUTEX_PRIVATE_FLAG)) 1912 if (!(op & FUTEX_PRIVATE_FLAG))
2001 fshared = &current->mm->mmap_sem; 1913 fshared = 1;
2002 1914
2003 switch (cmd) { 1915 switch (cmd) {
2004 case FUTEX_WAIT: 1916 case FUTEX_WAIT:
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 5072cf1685a2..7b8b0f21a5b1 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -304,17 +304,24 @@ int sprint_symbol(char *buffer, unsigned long address)
304 char *modname; 304 char *modname;
305 const char *name; 305 const char *name;
306 unsigned long offset, size; 306 unsigned long offset, size;
307 char namebuf[KSYM_NAME_LEN]; 307 int len;
308 308
309 name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); 309 name = kallsyms_lookup(address, &size, &offset, &modname, buffer);
310 if (!name) 310 if (!name)
311 return sprintf(buffer, "0x%lx", address); 311 return sprintf(buffer, "0x%lx", address);
312 312
313 if (name != buffer)
314 strcpy(buffer, name);
315 len = strlen(buffer);
316 buffer += len;
317
313 if (modname) 318 if (modname)
314 return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset, 319 len += sprintf(buffer, "+%#lx/%#lx [%s]",
315 size, modname); 320 offset, size, modname);
316 else 321 else
317 return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size); 322 len += sprintf(buffer, "+%#lx/%#lx", offset, size);
323
324 return len;
318} 325}
319 326
320/* Look up a kernel symbol and print it to the kernel messages. */ 327/* Look up a kernel symbol and print it to the kernel messages. */
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 06e157119d2b..e4bdda8dcf04 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -136,16 +136,16 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock)
136#ifdef CONFIG_LOCK_STAT 136#ifdef CONFIG_LOCK_STAT
137static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats); 137static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats);
138 138
139static int lock_contention_point(struct lock_class *class, unsigned long ip) 139static int lock_point(unsigned long points[], unsigned long ip)
140{ 140{
141 int i; 141 int i;
142 142
143 for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { 143 for (i = 0; i < LOCKSTAT_POINTS; i++) {
144 if (class->contention_point[i] == 0) { 144 if (points[i] == 0) {
145 class->contention_point[i] = ip; 145 points[i] = ip;
146 break; 146 break;
147 } 147 }
148 if (class->contention_point[i] == ip) 148 if (points[i] == ip)
149 break; 149 break;
150 } 150 }
151 151
@@ -185,6 +185,9 @@ struct lock_class_stats lock_stats(struct lock_class *class)
185 for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++) 185 for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++)
186 stats.contention_point[i] += pcs->contention_point[i]; 186 stats.contention_point[i] += pcs->contention_point[i];
187 187
188 for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++)
189 stats.contending_point[i] += pcs->contending_point[i];
190
188 lock_time_add(&pcs->read_waittime, &stats.read_waittime); 191 lock_time_add(&pcs->read_waittime, &stats.read_waittime);
189 lock_time_add(&pcs->write_waittime, &stats.write_waittime); 192 lock_time_add(&pcs->write_waittime, &stats.write_waittime);
190 193
@@ -209,6 +212,7 @@ void clear_lock_stats(struct lock_class *class)
209 memset(cpu_stats, 0, sizeof(struct lock_class_stats)); 212 memset(cpu_stats, 0, sizeof(struct lock_class_stats));
210 } 213 }
211 memset(class->contention_point, 0, sizeof(class->contention_point)); 214 memset(class->contention_point, 0, sizeof(class->contention_point));
215 memset(class->contending_point, 0, sizeof(class->contending_point));
212} 216}
213 217
214static struct lock_class_stats *get_lock_stats(struct lock_class *class) 218static struct lock_class_stats *get_lock_stats(struct lock_class *class)
@@ -2999,7 +3003,7 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
2999 struct held_lock *hlock, *prev_hlock; 3003 struct held_lock *hlock, *prev_hlock;
3000 struct lock_class_stats *stats; 3004 struct lock_class_stats *stats;
3001 unsigned int depth; 3005 unsigned int depth;
3002 int i, point; 3006 int i, contention_point, contending_point;
3003 3007
3004 depth = curr->lockdep_depth; 3008 depth = curr->lockdep_depth;
3005 if (DEBUG_LOCKS_WARN_ON(!depth)) 3009 if (DEBUG_LOCKS_WARN_ON(!depth))
@@ -3023,18 +3027,22 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
3023found_it: 3027found_it:
3024 hlock->waittime_stamp = sched_clock(); 3028 hlock->waittime_stamp = sched_clock();
3025 3029
3026 point = lock_contention_point(hlock_class(hlock), ip); 3030 contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
3031 contending_point = lock_point(hlock_class(hlock)->contending_point,
3032 lock->ip);
3027 3033
3028 stats = get_lock_stats(hlock_class(hlock)); 3034 stats = get_lock_stats(hlock_class(hlock));
3029 if (point < ARRAY_SIZE(stats->contention_point)) 3035 if (contention_point < LOCKSTAT_POINTS)
3030 stats->contention_point[point]++; 3036 stats->contention_point[contention_point]++;
3037 if (contending_point < LOCKSTAT_POINTS)
3038 stats->contending_point[contending_point]++;
3031 if (lock->cpu != smp_processor_id()) 3039 if (lock->cpu != smp_processor_id())
3032 stats->bounces[bounce_contended + !!hlock->read]++; 3040 stats->bounces[bounce_contended + !!hlock->read]++;
3033 put_lock_stats(stats); 3041 put_lock_stats(stats);
3034} 3042}
3035 3043
3036static void 3044static void
3037__lock_acquired(struct lockdep_map *lock) 3045__lock_acquired(struct lockdep_map *lock, unsigned long ip)
3038{ 3046{
3039 struct task_struct *curr = current; 3047 struct task_struct *curr = current;
3040 struct held_lock *hlock, *prev_hlock; 3048 struct held_lock *hlock, *prev_hlock;
@@ -3083,6 +3091,7 @@ found_it:
3083 put_lock_stats(stats); 3091 put_lock_stats(stats);
3084 3092
3085 lock->cpu = cpu; 3093 lock->cpu = cpu;
3094 lock->ip = ip;
3086} 3095}
3087 3096
3088void lock_contended(struct lockdep_map *lock, unsigned long ip) 3097void lock_contended(struct lockdep_map *lock, unsigned long ip)
@@ -3104,7 +3113,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
3104} 3113}
3105EXPORT_SYMBOL_GPL(lock_contended); 3114EXPORT_SYMBOL_GPL(lock_contended);
3106 3115
3107void lock_acquired(struct lockdep_map *lock) 3116void lock_acquired(struct lockdep_map *lock, unsigned long ip)
3108{ 3117{
3109 unsigned long flags; 3118 unsigned long flags;
3110 3119
@@ -3117,7 +3126,7 @@ void lock_acquired(struct lockdep_map *lock)
3117 raw_local_irq_save(flags); 3126 raw_local_irq_save(flags);
3118 check_flags(flags); 3127 check_flags(flags);
3119 current->lockdep_recursion = 1; 3128 current->lockdep_recursion = 1;
3120 __lock_acquired(lock); 3129 __lock_acquired(lock, ip);
3121 current->lockdep_recursion = 0; 3130 current->lockdep_recursion = 0;
3122 raw_local_irq_restore(flags); 3131 raw_local_irq_restore(flags);
3123} 3132}
@@ -3276,10 +3285,10 @@ void __init lockdep_info(void)
3276{ 3285{
3277 printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n"); 3286 printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
3278 3287
3279 printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES); 3288 printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES);
3280 printk("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH); 3289 printk("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH);
3281 printk("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS); 3290 printk("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS);
3282 printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE); 3291 printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE);
3283 printk("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES); 3292 printk("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES);
3284 printk("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS); 3293 printk("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS);
3285 printk("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE); 3294 printk("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE);
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 20dbcbf9c7dd..13716b813896 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -470,11 +470,12 @@ static void seq_line(struct seq_file *m, char c, int offset, int length)
470 470
471static void snprint_time(char *buf, size_t bufsiz, s64 nr) 471static void snprint_time(char *buf, size_t bufsiz, s64 nr)
472{ 472{
473 unsigned long rem; 473 s64 div;
474 s32 rem;
474 475
475 nr += 5; /* for display rounding */ 476 nr += 5; /* for display rounding */
476 rem = do_div(nr, 1000); /* XXX: do_div_signed */ 477 div = div_s64_rem(nr, 1000, &rem);
477 snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, (int)rem/10); 478 snprintf(buf, bufsiz, "%lld.%02d", (long long)div, (int)rem/10);
478} 479}
479 480
480static void seq_time(struct seq_file *m, s64 time) 481static void seq_time(struct seq_file *m, s64 time)
@@ -556,7 +557,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
556 if (stats->read_holdtime.nr) 557 if (stats->read_holdtime.nr)
557 namelen += 2; 558 namelen += 2;
558 559
559 for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { 560 for (i = 0; i < LOCKSTAT_POINTS; i++) {
560 char sym[KSYM_SYMBOL_LEN]; 561 char sym[KSYM_SYMBOL_LEN];
561 char ip[32]; 562 char ip[32];
562 563
@@ -573,6 +574,23 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
573 stats->contention_point[i], 574 stats->contention_point[i],
574 ip, sym); 575 ip, sym);
575 } 576 }
577 for (i = 0; i < LOCKSTAT_POINTS; i++) {
578 char sym[KSYM_SYMBOL_LEN];
579 char ip[32];
580
581 if (class->contending_point[i] == 0)
582 break;
583
584 if (!i)
585 seq_line(m, '-', 40-namelen, namelen);
586
587 sprint_symbol(sym, class->contending_point[i]);
588 snprintf(ip, sizeof(ip), "[<%p>]",
589 (void *)class->contending_point[i]);
590 seq_printf(m, "%40s %14lu %29s %s\n", name,
591 stats->contending_point[i],
592 ip, sym);
593 }
576 if (i) { 594 if (i) {
577 seq_puts(m, "\n"); 595 seq_puts(m, "\n");
578 seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1)); 596 seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1));
@@ -582,7 +600,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
582 600
583static void seq_header(struct seq_file *m) 601static void seq_header(struct seq_file *m)
584{ 602{
585 seq_printf(m, "lock_stat version 0.2\n"); 603 seq_printf(m, "lock_stat version 0.3\n");
586 seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); 604 seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
587 seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s " 605 seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s "
588 "%14s %14s\n", 606 "%14s %14s\n",
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 12c779dc65d4..4f45d4b658ef 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -59,7 +59,7 @@ EXPORT_SYMBOL(__mutex_init);
59 * We also put the fastpath first in the kernel image, to make sure the 59 * We also put the fastpath first in the kernel image, to make sure the
60 * branch is predicted by the CPU as default-untaken. 60 * branch is predicted by the CPU as default-untaken.
61 */ 61 */
62static void noinline __sched 62static __used noinline void __sched
63__mutex_lock_slowpath(atomic_t *lock_count); 63__mutex_lock_slowpath(atomic_t *lock_count);
64 64
65/*** 65/***
@@ -96,7 +96,7 @@ void inline __sched mutex_lock(struct mutex *lock)
96EXPORT_SYMBOL(mutex_lock); 96EXPORT_SYMBOL(mutex_lock);
97#endif 97#endif
98 98
99static noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); 99static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
100 100
101/*** 101/***
102 * mutex_unlock - release the mutex 102 * mutex_unlock - release the mutex
@@ -184,7 +184,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
184 } 184 }
185 185
186done: 186done:
187 lock_acquired(&lock->dep_map); 187 lock_acquired(&lock->dep_map, ip);
188 /* got the lock - rejoice! */ 188 /* got the lock - rejoice! */
189 mutex_remove_waiter(lock, &waiter, task_thread_info(task)); 189 mutex_remove_waiter(lock, &waiter, task_thread_info(task));
190 debug_mutex_set_owner(lock, task_thread_info(task)); 190 debug_mutex_set_owner(lock, task_thread_info(task));
@@ -268,7 +268,7 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
268/* 268/*
269 * Release the lock, slowpath: 269 * Release the lock, slowpath:
270 */ 270 */
271static noinline void 271static __used noinline void
272__mutex_unlock_slowpath(atomic_t *lock_count) 272__mutex_unlock_slowpath(atomic_t *lock_count)
273{ 273{
274 __mutex_unlock_common_slowpath(lock_count, 1); 274 __mutex_unlock_common_slowpath(lock_count, 1);
@@ -313,7 +313,7 @@ int __sched mutex_lock_killable(struct mutex *lock)
313} 313}
314EXPORT_SYMBOL(mutex_lock_killable); 314EXPORT_SYMBOL(mutex_lock_killable);
315 315
316static noinline void __sched 316static __used noinline void __sched
317__mutex_lock_slowpath(atomic_t *lock_count) 317__mutex_lock_slowpath(atomic_t *lock_count)
318{ 318{
319 struct mutex *lock = container_of(lock_count, struct mutex, count); 319 struct mutex *lock = container_of(lock_count, struct mutex, count);
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 4282c0a40a57..61d5aa5eced3 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -82,6 +82,14 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
82 82
83 while (nb && nr_to_call) { 83 while (nb && nr_to_call) {
84 next_nb = rcu_dereference(nb->next); 84 next_nb = rcu_dereference(nb->next);
85
86#ifdef CONFIG_DEBUG_NOTIFIERS
87 if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) {
88 WARN(1, "Invalid notifier called!");
89 nb = next_nb;
90 continue;
91 }
92#endif
85 ret = nb->notifier_call(nb, val, v); 93 ret = nb->notifier_call(nb, val, v);
86 94
87 if (nr_calls) 95 if (nr_calls)
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 19122cf6d827..b8f7ce9473e8 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -174,7 +174,7 @@ static void suspend_test_finish(const char *label)
174 * has some performance issues. The stack dump of a WARN_ON 174 * has some performance issues. The stack dump of a WARN_ON
175 * is more likely to get the right attention than a printk... 175 * is more likely to get the right attention than a printk...
176 */ 176 */
177 WARN_ON(msec > (TEST_SUSPEND_SECONDS * 1000)); 177 WARN(msec > (TEST_SUSPEND_SECONDS * 1000), "Component: %s\n", label);
178} 178}
179 179
180#else 180#else
diff --git a/kernel/profile.c b/kernel/profile.c
index 9830a037d8db..5b7d1ac7124c 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -544,7 +544,7 @@ static const struct file_operations proc_profile_operations = {
544}; 544};
545 545
546#ifdef CONFIG_SMP 546#ifdef CONFIG_SMP
547static void __init profile_nop(void *unused) 547static inline void profile_nop(void *unused)
548{ 548{
549} 549}
550 550
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 37f72e551542..e503a002f330 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -191,7 +191,7 @@ static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
191 191
192 /* OK, time to rat on our buddy... */ 192 /* OK, time to rat on our buddy... */
193 193
194 printk(KERN_ERR "RCU detected CPU stalls:"); 194 printk(KERN_ERR "INFO: RCU detected CPU stalls:");
195 for_each_possible_cpu(cpu) { 195 for_each_possible_cpu(cpu) {
196 if (cpu_isset(cpu, rcp->cpumask)) 196 if (cpu_isset(cpu, rcp->cpumask))
197 printk(" %d", cpu); 197 printk(" %d", cpu);
@@ -204,7 +204,7 @@ static void print_cpu_stall(struct rcu_ctrlblk *rcp)
204{ 204{
205 unsigned long flags; 205 unsigned long flags;
206 206
207 printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n", 207 printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
208 smp_processor_id(), jiffies, 208 smp_processor_id(), jiffies,
209 jiffies - rcp->gp_start); 209 jiffies - rcp->gp_start);
210 dump_stack(); 210 dump_stack();
diff --git a/kernel/relay.c b/kernel/relay.c
index 8d13a7855c08..32b0befdcb6a 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -400,7 +400,7 @@ void relay_reset(struct rchan *chan)
400 } 400 }
401 401
402 mutex_lock(&relay_channels_mutex); 402 mutex_lock(&relay_channels_mutex);
403 for_each_online_cpu(i) 403 for_each_possible_cpu(i)
404 if (chan->buf[i]) 404 if (chan->buf[i])
405 __relay_reset(chan->buf[i], 0); 405 __relay_reset(chan->buf[i], 0);
406 mutex_unlock(&relay_channels_mutex); 406 mutex_unlock(&relay_channels_mutex);
@@ -611,10 +611,9 @@ struct rchan *relay_open(const char *base_filename,
611 return chan; 611 return chan;
612 612
613free_bufs: 613free_bufs:
614 for_each_online_cpu(i) { 614 for_each_possible_cpu(i) {
615 if (!chan->buf[i]) 615 if (chan->buf[i])
616 break; 616 relay_close_buf(chan->buf[i]);
617 relay_close_buf(chan->buf[i]);
618 } 617 }
619 618
620 kref_put(&chan->kref, relay_destroy_channel); 619 kref_put(&chan->kref, relay_destroy_channel);
diff --git a/kernel/sched.c b/kernel/sched.c
index b388c9b243e9..558e5f284269 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4337,7 +4337,7 @@ void __kprobes sub_preempt_count(int val)
4337 /* 4337 /*
4338 * Underflow? 4338 * Underflow?
4339 */ 4339 */
4340 if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) 4340 if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked())))
4341 return; 4341 return;
4342 /* 4342 /*
4343 * Is the spinlock portion underflowing? 4343 * Is the spinlock portion underflowing?
@@ -7788,13 +7788,14 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
7788 * 7788 *
7789 * The passed in 'doms_new' should be kmalloc'd. This routine takes 7789 * The passed in 'doms_new' should be kmalloc'd. This routine takes
7790 * ownership of it and will kfree it when done with it. If the caller 7790 * ownership of it and will kfree it when done with it. If the caller
7791 * failed the kmalloc call, then it can pass in doms_new == NULL, 7791 * failed the kmalloc call, then it can pass in doms_new == NULL &&
7792 * and partition_sched_domains() will fallback to the single partition 7792 * ndoms_new == 1, and partition_sched_domains() will fallback to
7793 * 'fallback_doms', it also forces the domains to be rebuilt. 7793 * the single partition 'fallback_doms', it also forces the domains
7794 * to be rebuilt.
7794 * 7795 *
7795 * If doms_new==NULL it will be replaced with cpu_online_map. 7796 * If doms_new == NULL it will be replaced with cpu_online_map.
7796 * ndoms_new==0 is a special case for destroying existing domains. 7797 * ndoms_new == 0 is a special case for destroying existing domains,
7797 * It will not create the default domain. 7798 * and it will not create the default domain.
7798 * 7799 *
7799 * Call with hotplug lock held 7800 * Call with hotplug lock held
7800 */ 7801 */
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 3953e4aed733..884e6cd2769c 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -164,7 +164,7 @@ unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
164/* 164/*
165 * Zero means infinite timeout - no checking done: 165 * Zero means infinite timeout - no checking done:
166 */ 166 */
167unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120; 167unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
168 168
169unsigned long __read_mostly sysctl_hung_task_warnings = 10; 169unsigned long __read_mostly sysctl_hung_task_warnings = 10;
170 170
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 9bc4c00872c9..24e8ceacc388 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -112,7 +112,7 @@ static int chill(void *unused)
112int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus) 112int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
113{ 113{
114 struct work_struct *sm_work; 114 struct work_struct *sm_work;
115 int i; 115 int i, ret;
116 116
117 /* Set up initial state. */ 117 /* Set up initial state. */
118 mutex_lock(&lock); 118 mutex_lock(&lock);
@@ -137,8 +137,9 @@ int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
137 /* This will release the thread on our CPU. */ 137 /* This will release the thread on our CPU. */
138 put_cpu(); 138 put_cpu();
139 flush_workqueue(stop_machine_wq); 139 flush_workqueue(stop_machine_wq);
140 ret = active.fnret;
140 mutex_unlock(&lock); 141 mutex_unlock(&lock);
141 return active.fnret; 142 return ret;
142} 143}
143 144
144int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus) 145int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index a77b27b11b04..e14a23281707 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -31,7 +31,7 @@ cond_syscall(sys_socketpair);
31cond_syscall(sys_bind); 31cond_syscall(sys_bind);
32cond_syscall(sys_listen); 32cond_syscall(sys_listen);
33cond_syscall(sys_accept); 33cond_syscall(sys_accept);
34cond_syscall(sys_paccept); 34cond_syscall(sys_accept4);
35cond_syscall(sys_connect); 35cond_syscall(sys_connect);
36cond_syscall(sys_getsockname); 36cond_syscall(sys_getsockname);
37cond_syscall(sys_getpeername); 37cond_syscall(sys_getpeername);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4a39d24568c8..78db083390f0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -185,7 +185,6 @@ enum {
185}; 185};
186 186
187static int ftrace_filtered; 187static int ftrace_filtered;
188static int tracing_on;
189 188
190static LIST_HEAD(ftrace_new_addrs); 189static LIST_HEAD(ftrace_new_addrs);
191 190
@@ -327,96 +326,89 @@ ftrace_record_ip(unsigned long ip)
327 326
328static int 327static int
329__ftrace_replace_code(struct dyn_ftrace *rec, 328__ftrace_replace_code(struct dyn_ftrace *rec,
330 unsigned char *old, unsigned char *new, int enable) 329 unsigned char *nop, int enable)
331{ 330{
332 unsigned long ip, fl; 331 unsigned long ip, fl;
332 unsigned char *call, *old, *new;
333 333
334 ip = rec->ip; 334 ip = rec->ip;
335 335
336 if (ftrace_filtered && enable) { 336 /*
337 * If this record is not to be traced and
338 * it is not enabled then do nothing.
339 *
340 * If this record is not to be traced and
341 * it is enabled then disabled it.
342 *
343 */
344 if (rec->flags & FTRACE_FL_NOTRACE) {
345 if (rec->flags & FTRACE_FL_ENABLED)
346 rec->flags &= ~FTRACE_FL_ENABLED;
347 else
348 return 0;
349
350 } else if (ftrace_filtered && enable) {
337 /* 351 /*
338 * If filtering is on: 352 * Filtering is on:
339 *
340 * If this record is set to be filtered and
341 * is enabled then do nothing.
342 *
343 * If this record is set to be filtered and
344 * it is not enabled, enable it.
345 *
346 * If this record is not set to be filtered
347 * and it is not enabled do nothing.
348 *
349 * If this record is set not to trace then
350 * do nothing.
351 *
352 * If this record is set not to trace and
353 * it is enabled then disable it.
354 *
355 * If this record is not set to be filtered and
356 * it is enabled, disable it.
357 */ 353 */
358 354
359 fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE | 355 fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED);
360 FTRACE_FL_ENABLED);
361 356
362 if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) || 357 /* Record is filtered and enabled, do nothing */
363 (fl == (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE)) || 358 if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
364 !fl || (fl == FTRACE_FL_NOTRACE))
365 return 0; 359 return 0;
366 360
367 /* 361 /* Record is not filtered and is not enabled do nothing */
368 * If it is enabled disable it, 362 if (!fl)
369 * otherwise enable it! 363 return 0;
370 */ 364
371 if (fl & FTRACE_FL_ENABLED) { 365 /* Record is not filtered but enabled, disable it */
372 /* swap new and old */ 366 if (fl == FTRACE_FL_ENABLED)
373 new = old;
374 old = ftrace_call_replace(ip, FTRACE_ADDR);
375 rec->flags &= ~FTRACE_FL_ENABLED; 367 rec->flags &= ~FTRACE_FL_ENABLED;
376 } else { 368 else
377 new = ftrace_call_replace(ip, FTRACE_ADDR); 369 /* Otherwise record is filtered but not enabled, enable it */
378 rec->flags |= FTRACE_FL_ENABLED; 370 rec->flags |= FTRACE_FL_ENABLED;
379 }
380 } else { 371 } else {
372 /* Disable or not filtered */
381 373
382 if (enable) { 374 if (enable) {
383 /* 375 /* if record is enabled, do nothing */
384 * If this record is set not to trace and is
385 * not enabled, do nothing.
386 */
387 fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED);
388 if (fl == FTRACE_FL_NOTRACE)
389 return 0;
390
391 new = ftrace_call_replace(ip, FTRACE_ADDR);
392 } else
393 old = ftrace_call_replace(ip, FTRACE_ADDR);
394
395 if (enable) {
396 if (rec->flags & FTRACE_FL_ENABLED) 376 if (rec->flags & FTRACE_FL_ENABLED)
397 return 0; 377 return 0;
378
398 rec->flags |= FTRACE_FL_ENABLED; 379 rec->flags |= FTRACE_FL_ENABLED;
380
399 } else { 381 } else {
382
383 /* if record is not enabled do nothing */
400 if (!(rec->flags & FTRACE_FL_ENABLED)) 384 if (!(rec->flags & FTRACE_FL_ENABLED))
401 return 0; 385 return 0;
386
402 rec->flags &= ~FTRACE_FL_ENABLED; 387 rec->flags &= ~FTRACE_FL_ENABLED;
403 } 388 }
404 } 389 }
405 390
391 call = ftrace_call_replace(ip, FTRACE_ADDR);
392
393 if (rec->flags & FTRACE_FL_ENABLED) {
394 old = nop;
395 new = call;
396 } else {
397 old = call;
398 new = nop;
399 }
400
406 return ftrace_modify_code(ip, old, new); 401 return ftrace_modify_code(ip, old, new);
407} 402}
408 403
409static void ftrace_replace_code(int enable) 404static void ftrace_replace_code(int enable)
410{ 405{
411 int i, failed; 406 int i, failed;
412 unsigned char *new = NULL, *old = NULL; 407 unsigned char *nop = NULL;
413 struct dyn_ftrace *rec; 408 struct dyn_ftrace *rec;
414 struct ftrace_page *pg; 409 struct ftrace_page *pg;
415 410
416 if (enable) 411 nop = ftrace_nop_replace();
417 old = ftrace_nop_replace();
418 else
419 new = ftrace_nop_replace();
420 412
421 for (pg = ftrace_pages_start; pg; pg = pg->next) { 413 for (pg = ftrace_pages_start; pg; pg = pg->next) {
422 for (i = 0; i < pg->index; i++) { 414 for (i = 0; i < pg->index; i++) {
@@ -434,7 +426,7 @@ static void ftrace_replace_code(int enable)
434 unfreeze_record(rec); 426 unfreeze_record(rec);
435 } 427 }
436 428
437 failed = __ftrace_replace_code(rec, old, new, enable); 429 failed = __ftrace_replace_code(rec, nop, enable);
438 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { 430 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
439 rec->flags |= FTRACE_FL_FAILED; 431 rec->flags |= FTRACE_FL_FAILED;
440 if ((system_state == SYSTEM_BOOTING) || 432 if ((system_state == SYSTEM_BOOTING) ||
@@ -506,13 +498,10 @@ static int __ftrace_modify_code(void *data)
506{ 498{
507 int *command = data; 499 int *command = data;
508 500
509 if (*command & FTRACE_ENABLE_CALLS) { 501 if (*command & FTRACE_ENABLE_CALLS)
510 ftrace_replace_code(1); 502 ftrace_replace_code(1);
511 tracing_on = 1; 503 else if (*command & FTRACE_DISABLE_CALLS)
512 } else if (*command & FTRACE_DISABLE_CALLS) {
513 ftrace_replace_code(0); 504 ftrace_replace_code(0);
514 tracing_on = 0;
515 }
516 505
517 if (*command & FTRACE_UPDATE_TRACE_FUNC) 506 if (*command & FTRACE_UPDATE_TRACE_FUNC)
518 ftrace_update_ftrace_func(ftrace_trace_function); 507 ftrace_update_ftrace_func(ftrace_trace_function);
@@ -538,8 +527,7 @@ static void ftrace_startup(void)
538 527
539 mutex_lock(&ftrace_start_lock); 528 mutex_lock(&ftrace_start_lock);
540 ftrace_start++; 529 ftrace_start++;
541 if (ftrace_start == 1) 530 command |= FTRACE_ENABLE_CALLS;
542 command |= FTRACE_ENABLE_CALLS;
543 531
544 if (saved_ftrace_func != ftrace_trace_function) { 532 if (saved_ftrace_func != ftrace_trace_function) {
545 saved_ftrace_func = ftrace_trace_function; 533 saved_ftrace_func = ftrace_trace_function;
@@ -677,7 +665,7 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
677 665
678 cnt = num_to_init / ENTRIES_PER_PAGE; 666 cnt = num_to_init / ENTRIES_PER_PAGE;
679 pr_info("ftrace: allocating %ld entries in %d pages\n", 667 pr_info("ftrace: allocating %ld entries in %d pages\n",
680 num_to_init, cnt); 668 num_to_init, cnt + 1);
681 669
682 for (i = 0; i < cnt; i++) { 670 for (i = 0; i < cnt; i++) {
683 pg->next = (void *)get_zeroed_page(GFP_KERNEL); 671 pg->next = (void *)get_zeroed_page(GFP_KERNEL);
@@ -738,6 +726,9 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
738 ((iter->flags & FTRACE_ITER_FAILURES) && 726 ((iter->flags & FTRACE_ITER_FAILURES) &&
739 !(rec->flags & FTRACE_FL_FAILED)) || 727 !(rec->flags & FTRACE_FL_FAILED)) ||
740 728
729 ((iter->flags & FTRACE_ITER_FILTER) &&
730 !(rec->flags & FTRACE_FL_FILTER)) ||
731
741 ((iter->flags & FTRACE_ITER_NOTRACE) && 732 ((iter->flags & FTRACE_ITER_NOTRACE) &&
742 !(rec->flags & FTRACE_FL_NOTRACE))) { 733 !(rec->flags & FTRACE_FL_NOTRACE))) {
743 rec = NULL; 734 rec = NULL;
@@ -757,13 +748,11 @@ static void *t_start(struct seq_file *m, loff_t *pos)
757 void *p = NULL; 748 void *p = NULL;
758 loff_t l = -1; 749 loff_t l = -1;
759 750
760 if (*pos != iter->pos) { 751 if (*pos > iter->pos)
761 for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l)) 752 *pos = iter->pos;
762 ; 753
763 } else { 754 l = *pos;
764 l = *pos; 755 p = t_next(m, p, &l);
765 p = t_next(m, p, &l);
766 }
767 756
768 return p; 757 return p;
769} 758}
@@ -774,15 +763,21 @@ static void t_stop(struct seq_file *m, void *p)
774 763
775static int t_show(struct seq_file *m, void *v) 764static int t_show(struct seq_file *m, void *v)
776{ 765{
766 struct ftrace_iterator *iter = m->private;
777 struct dyn_ftrace *rec = v; 767 struct dyn_ftrace *rec = v;
778 char str[KSYM_SYMBOL_LEN]; 768 char str[KSYM_SYMBOL_LEN];
769 int ret = 0;
779 770
780 if (!rec) 771 if (!rec)
781 return 0; 772 return 0;
782 773
783 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 774 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
784 775
785 seq_printf(m, "%s\n", str); 776 ret = seq_printf(m, "%s\n", str);
777 if (ret < 0) {
778 iter->pos--;
779 iter->idx--;
780 }
786 781
787 return 0; 782 return 0;
788} 783}
@@ -808,7 +803,7 @@ ftrace_avail_open(struct inode *inode, struct file *file)
808 return -ENOMEM; 803 return -ENOMEM;
809 804
810 iter->pg = ftrace_pages_start; 805 iter->pg = ftrace_pages_start;
811 iter->pos = -1; 806 iter->pos = 0;
812 807
813 ret = seq_open(file, &show_ftrace_seq_ops); 808 ret = seq_open(file, &show_ftrace_seq_ops);
814 if (!ret) { 809 if (!ret) {
@@ -895,7 +890,7 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
895 890
896 if (file->f_mode & FMODE_READ) { 891 if (file->f_mode & FMODE_READ) {
897 iter->pg = ftrace_pages_start; 892 iter->pg = ftrace_pages_start;
898 iter->pos = -1; 893 iter->pos = 0;
899 iter->flags = enable ? FTRACE_ITER_FILTER : 894 iter->flags = enable ? FTRACE_ITER_FILTER :
900 FTRACE_ITER_NOTRACE; 895 FTRACE_ITER_NOTRACE;
901 896
@@ -1186,7 +1181,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1186 1181
1187 mutex_lock(&ftrace_sysctl_lock); 1182 mutex_lock(&ftrace_sysctl_lock);
1188 mutex_lock(&ftrace_start_lock); 1183 mutex_lock(&ftrace_start_lock);
1189 if (iter->filtered && ftrace_start && ftrace_enabled) 1184 if (ftrace_start && ftrace_enabled)
1190 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 1185 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1191 mutex_unlock(&ftrace_start_lock); 1186 mutex_unlock(&ftrace_start_lock);
1192 mutex_unlock(&ftrace_sysctl_lock); 1187 mutex_unlock(&ftrace_sysctl_lock);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2f76193c3489..f780e9552f91 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -16,14 +16,49 @@
16#include <linux/list.h> 16#include <linux/list.h>
17#include <linux/fs.h> 17#include <linux/fs.h>
18 18
19#include "trace.h"
20
21/* Global flag to disable all recording to ring buffers */
22static int ring_buffers_off __read_mostly;
23
24/**
25 * tracing_on - enable all tracing buffers
26 *
27 * This function enables all tracing buffers that may have been
28 * disabled with tracing_off.
29 */
30void tracing_on(void)
31{
32 ring_buffers_off = 0;
33}
34
35/**
36 * tracing_off - turn off all tracing buffers
37 *
38 * This function stops all tracing buffers from recording data.
39 * It does not disable any overhead the tracers themselves may
40 * be causing. This function simply causes all recording to
41 * the ring buffers to fail.
42 */
43void tracing_off(void)
44{
45 ring_buffers_off = 1;
46}
47
19/* Up this if you want to test the TIME_EXTENTS and normalization */ 48/* Up this if you want to test the TIME_EXTENTS and normalization */
20#define DEBUG_SHIFT 0 49#define DEBUG_SHIFT 0
21 50
22/* FIXME!!! */ 51/* FIXME!!! */
23u64 ring_buffer_time_stamp(int cpu) 52u64 ring_buffer_time_stamp(int cpu)
24{ 53{
54 u64 time;
55
56 preempt_disable_notrace();
25 /* shift to debug/test normalization and TIME_EXTENTS */ 57 /* shift to debug/test normalization and TIME_EXTENTS */
26 return sched_clock() << DEBUG_SHIFT; 58 time = sched_clock() << DEBUG_SHIFT;
59 preempt_enable_notrace();
60
61 return time;
27} 62}
28 63
29void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) 64void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
@@ -503,6 +538,12 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
503 LIST_HEAD(pages); 538 LIST_HEAD(pages);
504 int i, cpu; 539 int i, cpu;
505 540
541 /*
542 * Always succeed at resizing a non-existent buffer:
543 */
544 if (!buffer)
545 return size;
546
506 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 547 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
507 size *= BUF_PAGE_SIZE; 548 size *= BUF_PAGE_SIZE;
508 buffer_size = buffer->pages * BUF_PAGE_SIZE; 549 buffer_size = buffer->pages * BUF_PAGE_SIZE;
@@ -576,6 +617,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
576 list_del_init(&page->list); 617 list_del_init(&page->list);
577 free_buffer_page(page); 618 free_buffer_page(page);
578 } 619 }
620 mutex_unlock(&buffer->mutex);
579 return -ENOMEM; 621 return -ENOMEM;
580} 622}
581 623
@@ -1133,6 +1175,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1133 struct ring_buffer_event *event; 1175 struct ring_buffer_event *event;
1134 int cpu, resched; 1176 int cpu, resched;
1135 1177
1178 if (ring_buffers_off)
1179 return NULL;
1180
1136 if (atomic_read(&buffer->record_disabled)) 1181 if (atomic_read(&buffer->record_disabled))
1137 return NULL; 1182 return NULL;
1138 1183
@@ -1249,6 +1294,9 @@ int ring_buffer_write(struct ring_buffer *buffer,
1249 int ret = -EBUSY; 1294 int ret = -EBUSY;
1250 int cpu, resched; 1295 int cpu, resched;
1251 1296
1297 if (ring_buffers_off)
1298 return -EBUSY;
1299
1252 if (atomic_read(&buffer->record_disabled)) 1300 if (atomic_read(&buffer->record_disabled))
1253 return -EBUSY; 1301 return -EBUSY;
1254 1302
@@ -2070,3 +2118,69 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2070 return 0; 2118 return 0;
2071} 2119}
2072 2120
2121static ssize_t
2122rb_simple_read(struct file *filp, char __user *ubuf,
2123 size_t cnt, loff_t *ppos)
2124{
2125 int *p = filp->private_data;
2126 char buf[64];
2127 int r;
2128
2129 /* !ring_buffers_off == tracing_on */
2130 r = sprintf(buf, "%d\n", !*p);
2131
2132 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2133}
2134
2135static ssize_t
2136rb_simple_write(struct file *filp, const char __user *ubuf,
2137 size_t cnt, loff_t *ppos)
2138{
2139 int *p = filp->private_data;
2140 char buf[64];
2141 long val;
2142 int ret;
2143
2144 if (cnt >= sizeof(buf))
2145 return -EINVAL;
2146
2147 if (copy_from_user(&buf, ubuf, cnt))
2148 return -EFAULT;
2149
2150 buf[cnt] = 0;
2151
2152 ret = strict_strtoul(buf, 10, &val);
2153 if (ret < 0)
2154 return ret;
2155
2156 /* !ring_buffers_off == tracing_on */
2157 *p = !val;
2158
2159 (*ppos)++;
2160
2161 return cnt;
2162}
2163
2164static struct file_operations rb_simple_fops = {
2165 .open = tracing_open_generic,
2166 .read = rb_simple_read,
2167 .write = rb_simple_write,
2168};
2169
2170
2171static __init int rb_init_debugfs(void)
2172{
2173 struct dentry *d_tracer;
2174 struct dentry *entry;
2175
2176 d_tracer = tracing_init_dentry();
2177
2178 entry = debugfs_create_file("tracing_on", 0644, d_tracer,
2179 &ring_buffers_off, &rb_simple_fops);
2180 if (!entry)
2181 pr_warning("Could not create debugfs 'tracing_on' entry\n");
2182
2183 return 0;
2184}
2185
2186fs_initcall(rb_init_debugfs);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 697eda36b86a..d86e3252f300 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1936,6 +1936,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
1936 ring_buffer_read_finish(iter->buffer_iter[cpu]); 1936 ring_buffer_read_finish(iter->buffer_iter[cpu]);
1937 } 1937 }
1938 mutex_unlock(&trace_types_lock); 1938 mutex_unlock(&trace_types_lock);
1939 kfree(iter);
1939 1940
1940 return ERR_PTR(-ENOMEM); 1941 return ERR_PTR(-ENOMEM);
1941} 1942}