aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/kcmp.c
diff options
context:
space:
mode:
authorCyrill Gorcunov <gorcunov@openvz.org>2012-05-31 19:26:44 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-31 20:49:32 -0400
commitd97b46a64674a267bc41c9e16132ee2a98c3347d (patch)
tree316f77d212c84aef226684eb05d5d33f40743ac9 /kernel/kcmp.c
parent818411616baf46ceba0cff6f05af3a9b294734f7 (diff)
syscalls, x86: add __NR_kcmp syscall
While doing the checkpoint-restore in the user space one need to determine whether various kernel objects (like mm_struct-s of file_struct-s) are shared between tasks and restore this state. The 2nd step can be solved by using appropriate CLONE_ flags and the unshare syscall, while there's currently no ways for solving the 1st one. One of the ways for checking whether two tasks share e.g. mm_struct is to provide some mm_struct ID of a task to its proc file, but showing such info considered to be not that good for security reasons. Thus after some debates we end up in conclusion that using that named 'comparison' syscall might be the best candidate. So here is it -- __NR_kcmp. It takes up to 5 arguments - the pids of the two tasks (which characteristics should be compared), the comparison type and (in case of comparison of files) two file descriptors. Lookups for pids are done in the caller's PID namespace only. At moment only x86 is supported and tested. [akpm@linux-foundation.org: fix up selftests, warnings] [akpm@linux-foundation.org: include errno.h] [akpm@linux-foundation.org: tweak comment text] Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Pavel Emelyanov <xemul@parallels.com> Cc: Andrey Vagin <avagin@openvz.org> Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Glauber Costa <glommer@parallels.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Tejun Heo <tj@kernel.org> Cc: Matt Helsley <matthltc@us.ibm.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: Vasiliy Kulikov <segoon@openwall.com> Cc: Alexey Dobriyan <adobriyan@gmail.com> Cc: Valdis.Kletnieks@vt.edu Cc: Michal Marek <mmarek@suse.cz> Cc: Frederic Weisbecker <fweisbec@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/kcmp.c')
-rw-r--r--kernel/kcmp.c196
1 files changed, 196 insertions, 0 deletions
diff --git a/kernel/kcmp.c b/kernel/kcmp.c
new file mode 100644
index 000000000000..30b7b225306c
--- /dev/null
+++ b/kernel/kcmp.c
@@ -0,0 +1,196 @@
1#include <linux/kernel.h>
2#include <linux/syscalls.h>
3#include <linux/fdtable.h>
4#include <linux/string.h>
5#include <linux/random.h>
6#include <linux/module.h>
7#include <linux/init.h>
8#include <linux/errno.h>
9#include <linux/cache.h>
10#include <linux/bug.h>
11#include <linux/err.h>
12#include <linux/kcmp.h>
13
14#include <asm/unistd.h>
15
16/*
17 * We don't expose the real in-memory order of objects for security reasons.
18 * But still the comparison results should be suitable for sorting. So we
19 * obfuscate kernel pointers values and compare the production instead.
20 *
21 * The obfuscation is done in two steps. First we xor the kernel pointer with
22 * a random value, which puts pointer into a new position in a reordered space.
23 * Secondly we multiply the xor production with a large odd random number to
24 * permute its bits even more (the odd multiplier guarantees that the product
25 * is unique ever after the high bits are truncated, since any odd number is
26 * relative prime to 2^n).
27 *
28 * Note also that the obfuscation itself is invisible to userspace and if needed
29 * it can be changed to an alternate scheme.
30 */
31static unsigned long cookies[KCMP_TYPES][2] __read_mostly;
32
33static long kptr_obfuscate(long v, int type)
34{
35 return (v ^ cookies[type][0]) * cookies[type][1];
36}
37
38/*
39 * 0 - equal, i.e. v1 = v2
40 * 1 - less than, i.e. v1 < v2
41 * 2 - greater than, i.e. v1 > v2
42 * 3 - not equal but ordering unavailable (reserved for future)
43 */
44static int kcmp_ptr(void *v1, void *v2, enum kcmp_type type)
45{
46 long ret;
47
48 ret = kptr_obfuscate((long)v1, type) - kptr_obfuscate((long)v2, type);
49
50 return (ret < 0) | ((ret > 0) << 1);
51}
52
53/* The caller must have pinned the task */
54static struct file *
55get_file_raw_ptr(struct task_struct *task, unsigned int idx)
56{
57 struct file *file = NULL;
58
59 task_lock(task);
60 rcu_read_lock();
61
62 if (task->files)
63 file = fcheck_files(task->files, idx);
64
65 rcu_read_unlock();
66 task_unlock(task);
67
68 return file;
69}
70
71static void kcmp_unlock(struct mutex *m1, struct mutex *m2)
72{
73 if (likely(m2 != m1))
74 mutex_unlock(m2);
75 mutex_unlock(m1);
76}
77
78static int kcmp_lock(struct mutex *m1, struct mutex *m2)
79{
80 int err;
81
82 if (m2 > m1)
83 swap(m1, m2);
84
85 err = mutex_lock_killable(m1);
86 if (!err && likely(m1 != m2)) {
87 err = mutex_lock_killable_nested(m2, SINGLE_DEPTH_NESTING);
88 if (err)
89 mutex_unlock(m1);
90 }
91
92 return err;
93}
94
95SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
96 unsigned long, idx1, unsigned long, idx2)
97{
98 struct task_struct *task1, *task2;
99 int ret;
100
101 rcu_read_lock();
102
103 /*
104 * Tasks are looked up in caller's PID namespace only.
105 */
106 task1 = find_task_by_vpid(pid1);
107 task2 = find_task_by_vpid(pid2);
108 if (!task1 || !task2)
109 goto err_no_task;
110
111 get_task_struct(task1);
112 get_task_struct(task2);
113
114 rcu_read_unlock();
115
116 /*
117 * One should have enough rights to inspect task details.
118 */
119 ret = kcmp_lock(&task1->signal->cred_guard_mutex,
120 &task2->signal->cred_guard_mutex);
121 if (ret)
122 goto err;
123 if (!ptrace_may_access(task1, PTRACE_MODE_READ) ||
124 !ptrace_may_access(task2, PTRACE_MODE_READ)) {
125 ret = -EPERM;
126 goto err_unlock;
127 }
128
129 switch (type) {
130 case KCMP_FILE: {
131 struct file *filp1, *filp2;
132
133 filp1 = get_file_raw_ptr(task1, idx1);
134 filp2 = get_file_raw_ptr(task2, idx2);
135
136 if (filp1 && filp2)
137 ret = kcmp_ptr(filp1, filp2, KCMP_FILE);
138 else
139 ret = -EBADF;
140 break;
141 }
142 case KCMP_VM:
143 ret = kcmp_ptr(task1->mm, task2->mm, KCMP_VM);
144 break;
145 case KCMP_FILES:
146 ret = kcmp_ptr(task1->files, task2->files, KCMP_FILES);
147 break;
148 case KCMP_FS:
149 ret = kcmp_ptr(task1->fs, task2->fs, KCMP_FS);
150 break;
151 case KCMP_SIGHAND:
152 ret = kcmp_ptr(task1->sighand, task2->sighand, KCMP_SIGHAND);
153 break;
154 case KCMP_IO:
155 ret = kcmp_ptr(task1->io_context, task2->io_context, KCMP_IO);
156 break;
157 case KCMP_SYSVSEM:
158#ifdef CONFIG_SYSVIPC
159 ret = kcmp_ptr(task1->sysvsem.undo_list,
160 task2->sysvsem.undo_list,
161 KCMP_SYSVSEM);
162#else
163 ret = -EOPNOTSUPP;
164#endif
165 break;
166 default:
167 ret = -EINVAL;
168 break;
169 }
170
171err_unlock:
172 kcmp_unlock(&task1->signal->cred_guard_mutex,
173 &task2->signal->cred_guard_mutex);
174err:
175 put_task_struct(task1);
176 put_task_struct(task2);
177
178 return ret;
179
180err_no_task:
181 rcu_read_unlock();
182 return -ESRCH;
183}
184
185static __init int kcmp_cookies_init(void)
186{
187 int i;
188
189 get_random_bytes(cookies, sizeof(cookies));
190
191 for (i = 0; i < KCMP_TYPES; i++)
192 cookies[i][1] |= (~(~0UL >> 1) | 1);
193
194 return 0;
195}
196arch_initcall(kcmp_cookies_init);