aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPavel Emelyanov <xemul@parallels.com>2012-01-12 20:20:27 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-12 23:13:11 -0500
commitb8f566b04d3cddd192cfd2418ae6d54ac6353792 (patch)
tree32a5bf86548cd43feff4822d800b6a99e157b5d7
parentf5138e42211d4e8bfbd6ac5b3816348da1533433 (diff)
sysctl: add the kernel.ns_last_pid control
The sysctl works on the current task's pid namespace, getting and setting its last_pid field. Writing is allowed for CAP_SYS_ADMIN-capable tasks thus making it possible to create a task with desired pid value. This ability is required badly for the checkpoint/restore in userspace. This approach suits all the parties for now. Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Acked-by: Tejun Heo <tj@kernel.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Cyrill Gorcunov <gorcunov@openvz.org> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Serge Hallyn <serue@us.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/sysctl/kernel.txt8
-rw-r--r--kernel/pid.c4
-rw-r--r--kernel/pid_namespace.c31
3 files changed, 42 insertions, 1 deletions
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 6d8cd8b2c30d..8c20fbd8b42d 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -415,6 +415,14 @@ PIDs of value pid_max or larger are not allocated.
415 415
416============================================================== 416==============================================================
417 417
418ns_last_pid:
419
420The last pid allocated in the current (the one task using this sysctl
421lives in) pid namespace. When selecting a pid for a next task on fork
422kernel tries to allocate a number starting from this one.
423
424==============================================================
425
418powersave-nap: (PPC only) 426powersave-nap: (PPC only)
419 427
420If set, Linux-PPC will use the 'nap' mode of powersaving, 428If set, Linux-PPC will use the 'nap' mode of powersaving,
diff --git a/kernel/pid.c b/kernel/pid.c
index fa5f72227e5f..ce8e00deaccb 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -137,7 +137,9 @@ static int pid_before(int base, int a, int b)
137} 137}
138 138
139/* 139/*
140 * We might be racing with someone else trying to set pid_ns->last_pid. 140 * We might be racing with someone else trying to set pid_ns->last_pid
141 * at the pid allocation time (there's also a sysctl for this, but racing
142 * with this one is OK, see comment in kernel/pid_namespace.c about it).
141 * We want the winner to have the "later" value, because if the 143 * We want the winner to have the "later" value, because if the
142 * "earlier" value prevails, then a pid may get reused immediately. 144 * "earlier" value prevails, then a pid may get reused immediately.
143 * 145 *
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index e9c9adc84ca6..a8968396046d 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -191,9 +191,40 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
191 return; 191 return;
192} 192}
193 193
194static int pid_ns_ctl_handler(struct ctl_table *table, int write,
195 void __user *buffer, size_t *lenp, loff_t *ppos)
196{
197 struct ctl_table tmp = *table;
198
199 if (write && !capable(CAP_SYS_ADMIN))
200 return -EPERM;
201
202 /*
203 * Writing directly to ns' last_pid field is OK, since this field
204 * is volatile in a living namespace anyway and a code writing to
205 * it should synchronize its usage with external means.
206 */
207
208 tmp.data = &current->nsproxy->pid_ns->last_pid;
209 return proc_dointvec(&tmp, write, buffer, lenp, ppos);
210}
211
212static struct ctl_table pid_ns_ctl_table[] = {
213 {
214 .procname = "ns_last_pid",
215 .maxlen = sizeof(int),
216 .mode = 0666, /* permissions are checked in the handler */
217 .proc_handler = pid_ns_ctl_handler,
218 },
219 { }
220};
221
222static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } };
223
194static __init int pid_namespaces_init(void) 224static __init int pid_namespaces_init(void)
195{ 225{
196 pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); 226 pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
227 register_sysctl_paths(kern_path, pid_ns_ctl_table);
197 return 0; 228 return 0;
198} 229}
199 230