aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2014-02-03 22:13:49 -0500
committerEric W. Biederman <ebiederm@xmission.com>2014-07-29 21:08:50 -0400
commit728dba3a39c66b3d8ac889ddbe38b5b1c264aec3 (patch)
tree26f69d0fe363f00b628d698b9df2634a33e42482
parent9a3c4145af32125c5ee39c0272662b47307a8323 (diff)
namespaces: Use task_lock and not rcu to protect nsproxy
The synchronous syncrhonize_rcu in switch_task_namespaces makes setns a sufficiently expensive system call that people have complained. Upon inspect nsproxy no longer needs rcu protection for remote reads. remote reads are rare. So optimize for same process reads and write by switching using rask_lock instead. This yields a simpler to understand lock, and a faster setns system call. In particular this fixes a performance regression observed by Rafael David Tinoco <rafael.tinoco@canonical.com>. This is effectively a revert of Pavel Emelyanov's commit cf7b708c8d1d7a27736771bcf4c457b332b0f818 Make access to task's nsproxy lighter from 2007. The race this originialy fixed no longer exists as do_notify_parent uses task_active_pid_ns(parent) instead of parent->nsproxy. Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
-rw-r--r--fs/namespace.c6
-rw-r--r--fs/proc/proc_net.c4
-rw-r--r--fs/proc_namespace.c8
-rw-r--r--include/linux/nsproxy.h16
-rw-r--r--ipc/namespace.c6
-rw-r--r--kernel/nsproxy.c15
-rw-r--r--kernel/utsname.c6
-rw-r--r--net/core/net_namespace.c10
8 files changed, 31 insertions, 40 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index 182bc41cd887..7187d01329c3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2972,13 +2972,13 @@ static void *mntns_get(struct task_struct *task)
2972 struct mnt_namespace *ns = NULL; 2972 struct mnt_namespace *ns = NULL;
2973 struct nsproxy *nsproxy; 2973 struct nsproxy *nsproxy;
2974 2974
2975 rcu_read_lock(); 2975 task_lock(task);
2976 nsproxy = task_nsproxy(task); 2976 nsproxy = task->nsproxy;
2977 if (nsproxy) { 2977 if (nsproxy) {
2978 ns = nsproxy->mnt_ns; 2978 ns = nsproxy->mnt_ns;
2979 get_mnt_ns(ns); 2979 get_mnt_ns(ns);
2980 } 2980 }
2981 rcu_read_unlock(); 2981 task_unlock(task);
2982 2982
2983 return ns; 2983 return ns;
2984} 2984}
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 4677bb7dc7c2..a63af3e0a612 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -113,9 +113,11 @@ static struct net *get_proc_task_net(struct inode *dir)
113 rcu_read_lock(); 113 rcu_read_lock();
114 task = pid_task(proc_pid(dir), PIDTYPE_PID); 114 task = pid_task(proc_pid(dir), PIDTYPE_PID);
115 if (task != NULL) { 115 if (task != NULL) {
116 ns = task_nsproxy(task); 116 task_lock(task);
117 ns = task->nsproxy;
117 if (ns != NULL) 118 if (ns != NULL)
118 net = get_net(ns->net_ns); 119 net = get_net(ns->net_ns);
120 task_unlock(task);
119 } 121 }
120 rcu_read_unlock(); 122 rcu_read_unlock();
121 123
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 1a81373947f3..73ca1740d839 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -232,17 +232,15 @@ static int mounts_open_common(struct inode *inode, struct file *file,
232 if (!task) 232 if (!task)
233 goto err; 233 goto err;
234 234
235 rcu_read_lock(); 235 task_lock(task);
236 nsp = task_nsproxy(task); 236 nsp = task->nsproxy;
237 if (!nsp || !nsp->mnt_ns) { 237 if (!nsp || !nsp->mnt_ns) {
238 rcu_read_unlock(); 238 task_unlock(task);
239 put_task_struct(task); 239 put_task_struct(task);
240 goto err; 240 goto err;
241 } 241 }
242 ns = nsp->mnt_ns; 242 ns = nsp->mnt_ns;
243 get_mnt_ns(ns); 243 get_mnt_ns(ns);
244 rcu_read_unlock();
245 task_lock(task);
246 if (!task->fs) { 244 if (!task->fs) {
247 task_unlock(task); 245 task_unlock(task);
248 put_task_struct(task); 246 put_task_struct(task);
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index b4ec59d159ac..35fa08fd7739 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -40,32 +40,28 @@ extern struct nsproxy init_nsproxy;
40 * the namespaces access rules are: 40 * the namespaces access rules are:
41 * 41 *
42 * 1. only current task is allowed to change tsk->nsproxy pointer or 42 * 1. only current task is allowed to change tsk->nsproxy pointer or
43 * any pointer on the nsproxy itself 43 * any pointer on the nsproxy itself. Current must hold the task_lock
44 * when changing tsk->nsproxy.
44 * 45 *
45 * 2. when accessing (i.e. reading) current task's namespaces - no 46 * 2. when accessing (i.e. reading) current task's namespaces - no
46 * precautions should be taken - just dereference the pointers 47 * precautions should be taken - just dereference the pointers
47 * 48 *
48 * 3. the access to other task namespaces is performed like this 49 * 3. the access to other task namespaces is performed like this
49 * rcu_read_lock(); 50 * task_lock(task);
50 * nsproxy = task_nsproxy(tsk); 51 * nsproxy = task->nsproxy;
51 * if (nsproxy != NULL) { 52 * if (nsproxy != NULL) {
52 * / * 53 * / *
53 * * work with the namespaces here 54 * * work with the namespaces here
54 * * e.g. get the reference on one of them 55 * * e.g. get the reference on one of them
55 * * / 56 * * /
56 * } / * 57 * } / *
57 * * NULL task_nsproxy() means that this task is 58 * * NULL task->nsproxy means that this task is
58 * * almost dead (zombie) 59 * * almost dead (zombie)
59 * * / 60 * * /
60 * rcu_read_unlock(); 61 * task_unlock(task);
61 * 62 *
62 */ 63 */
63 64
64static inline struct nsproxy *task_nsproxy(struct task_struct *tsk)
65{
66 return rcu_dereference(tsk->nsproxy);
67}
68
69int copy_namespaces(unsigned long flags, struct task_struct *tsk); 65int copy_namespaces(unsigned long flags, struct task_struct *tsk);
70void exit_task_namespaces(struct task_struct *tsk); 66void exit_task_namespaces(struct task_struct *tsk);
71void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new); 67void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 59451c1e214d..b54468e48e32 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -154,11 +154,11 @@ static void *ipcns_get(struct task_struct *task)
154 struct ipc_namespace *ns = NULL; 154 struct ipc_namespace *ns = NULL;
155 struct nsproxy *nsproxy; 155 struct nsproxy *nsproxy;
156 156
157 rcu_read_lock(); 157 task_lock(task);
158 nsproxy = task_nsproxy(task); 158 nsproxy = task->nsproxy;
159 if (nsproxy) 159 if (nsproxy)
160 ns = get_ipc_ns(nsproxy->ipc_ns); 160 ns = get_ipc_ns(nsproxy->ipc_ns);
161 rcu_read_unlock(); 161 task_unlock(task);
162 162
163 return ns; 163 return ns;
164} 164}
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 8e7811086b82..ef42d0ab3115 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -204,20 +204,13 @@ void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
204 204
205 might_sleep(); 205 might_sleep();
206 206
207 task_lock(p);
207 ns = p->nsproxy; 208 ns = p->nsproxy;
209 p->nsproxy = new;
210 task_unlock(p);
208 211
209 rcu_assign_pointer(p->nsproxy, new); 212 if (ns && atomic_dec_and_test(&ns->count))
210
211 if (ns && atomic_dec_and_test(&ns->count)) {
212 /*
213 * wait for others to get what they want from this nsproxy.
214 *
215 * cannot release this nsproxy via the call_rcu() since
216 * put_mnt_ns() will want to sleep
217 */
218 synchronize_rcu();
219 free_nsproxy(ns); 213 free_nsproxy(ns);
220 }
221} 214}
222 215
223void exit_task_namespaces(struct task_struct *p) 216void exit_task_namespaces(struct task_struct *p)
diff --git a/kernel/utsname.c b/kernel/utsname.c
index fd393124e507..883aaaa7de8a 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -93,13 +93,13 @@ static void *utsns_get(struct task_struct *task)
93 struct uts_namespace *ns = NULL; 93 struct uts_namespace *ns = NULL;
94 struct nsproxy *nsproxy; 94 struct nsproxy *nsproxy;
95 95
96 rcu_read_lock(); 96 task_lock(task);
97 nsproxy = task_nsproxy(task); 97 nsproxy = task->nsproxy;
98 if (nsproxy) { 98 if (nsproxy) {
99 ns = nsproxy->uts_ns; 99 ns = nsproxy->uts_ns;
100 get_uts_ns(ns); 100 get_uts_ns(ns);
101 } 101 }
102 rcu_read_unlock(); 102 task_unlock(task);
103 103
104 return ns; 104 return ns;
105} 105}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 85b62691f4f2..7c6b51a58968 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -373,9 +373,11 @@ struct net *get_net_ns_by_pid(pid_t pid)
373 tsk = find_task_by_vpid(pid); 373 tsk = find_task_by_vpid(pid);
374 if (tsk) { 374 if (tsk) {
375 struct nsproxy *nsproxy; 375 struct nsproxy *nsproxy;
376 nsproxy = task_nsproxy(tsk); 376 task_lock(tsk);
377 nsproxy = tsk->nsproxy;
377 if (nsproxy) 378 if (nsproxy)
378 net = get_net(nsproxy->net_ns); 379 net = get_net(nsproxy->net_ns);
380 task_unlock(tsk);
379 } 381 }
380 rcu_read_unlock(); 382 rcu_read_unlock();
381 return net; 383 return net;
@@ -632,11 +634,11 @@ static void *netns_get(struct task_struct *task)
632 struct net *net = NULL; 634 struct net *net = NULL;
633 struct nsproxy *nsproxy; 635 struct nsproxy *nsproxy;
634 636
635 rcu_read_lock(); 637 task_lock(task);
636 nsproxy = task_nsproxy(task); 638 nsproxy = task->nsproxy;
637 if (nsproxy) 639 if (nsproxy)
638 net = get_net(nsproxy->net_ns); 640 net = get_net(nsproxy->net_ns);
639 rcu_read_unlock(); 641 task_unlock(task);
640 642
641 return net; 643 return net;
642} 644}