aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@goop.org>2009-02-09 15:05:49 -0500
committerJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>2010-10-20 19:22:29 -0400
commit1c5de1939c204bde9cce87f4eb3d26e9f9eb732b (patch)
treec67089c6f7d482623b89710938f7e8ee9f8e7806
parent1246ae0bb992f106a245eea2b8dd901ced868e7a (diff)
xen: add privcmd driver
The privcmd interface in xenfs allows the tool stack in the privileged domain to get fairly direct access to the hypervisor in order to do various management things such as domain construction. [ Impact: new xenfs interface for privileged operations ] Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
-rw-r--r--drivers/xen/xenfs/Makefile2
-rw-r--r--drivers/xen/xenfs/privcmd.c436
-rw-r--r--drivers/xen/xenfs/super.c2
-rw-r--r--drivers/xen/xenfs/xenfs.h1
-rw-r--r--include/xen/Kbuild1
-rw-r--r--include/xen/privcmd.h80
6 files changed, 521 insertions, 1 deletions
diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile
index 5d45ff13cc01..4a0be9a82af3 100644
--- a/drivers/xen/xenfs/Makefile
+++ b/drivers/xen/xenfs/Makefile
@@ -1,4 +1,4 @@
1obj-$(CONFIG_XENFS) += xenfs.o 1obj-$(CONFIG_XENFS) += xenfs.o
2 2
3xenfs-y = super.o xenbus.o 3xenfs-y = super.o xenbus.o
4xenfs-$(CONFIG_XEN_DOM0) += xenstored.o 4xenfs-$(CONFIG_XEN_DOM0) += xenstored.o privcmd.o
diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c
new file mode 100644
index 000000000000..c7192f314f86
--- /dev/null
+++ b/drivers/xen/xenfs/privcmd.c
@@ -0,0 +1,436 @@
1/******************************************************************************
2 * privcmd.c
3 *
4 * Interface to privileged domain-0 commands.
5 *
6 * Copyright (c) 2002-2004, K A Fraser, B Dragovic
7 */
8
9#include <linux/kernel.h>
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/string.h>
13#include <linux/errno.h>
14#include <linux/mm.h>
15#include <linux/mman.h>
16#include <linux/uaccess.h>
17#include <linux/swap.h>
18#include <linux/smp_lock.h>
19#include <linux/highmem.h>
20#include <linux/pagemap.h>
21#include <linux/seq_file.h>
22
23#include <asm/pgalloc.h>
24#include <asm/pgtable.h>
25#include <asm/tlb.h>
26#include <asm/xen/hypervisor.h>
27#include <asm/xen/hypercall.h>
28
29#include <xen/xen.h>
30#include <xen/privcmd.h>
31#include <xen/interface/xen.h>
32#include <xen/features.h>
33#include <xen/page.h>
34
35#ifndef HAVE_ARCH_PRIVCMD_MMAP
36static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
37#endif
38
39struct remap_data {
40 unsigned long mfn;
41 unsigned domid;
42 pgprot_t prot;
43};
44
45static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
46 unsigned long addr, void *data)
47{
48 struct remap_data *rmd = data;
49 pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
50
51 xen_set_domain_pte(ptep, pte, rmd->domid);
52
53 return 0;
54}
55
56int remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr,
57 unsigned long mfn, unsigned long size,
58 pgprot_t prot, unsigned domid)
59{
60 struct remap_data rmd;
61 int err;
62
63 prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
64
65 vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
66
67 rmd.mfn = mfn;
68 rmd.prot = prot;
69 rmd.domid = domid;
70
71 err = apply_to_page_range(vma->vm_mm, addr, size,
72 remap_area_mfn_pte_fn, &rmd);
73
74 return err;
75}
76
77static long privcmd_ioctl_hypercall(void __user *udata)
78{
79 struct privcmd_hypercall hypercall;
80 long ret;
81
82 if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
83 return -EFAULT;
84
85 ret = privcmd_call(hypercall.op,
86 hypercall.arg[0], hypercall.arg[1],
87 hypercall.arg[2], hypercall.arg[3],
88 hypercall.arg[4]);
89
90 return ret;
91}
92
93static void free_page_list(struct list_head *pages)
94{
95 struct page *p, *n;
96
97 list_for_each_entry_safe(p, n, pages, lru)
98 __free_page(p);
99
100 INIT_LIST_HEAD(pages);
101}
102
103/*
104 * Given an array of items in userspace, return a list of pages
105 * containing the data. If copying fails, either because of memory
106 * allocation failure or a problem reading user memory, return an
107 * error code; its up to the caller to dispose of any partial list.
108 */
109static int gather_array(struct list_head *pagelist,
110 unsigned nelem, size_t size,
111 void __user *data)
112{
113 unsigned pageidx;
114 void *pagedata;
115 int ret;
116
117 if (size > PAGE_SIZE)
118 return 0;
119
120 pageidx = PAGE_SIZE;
121 pagedata = NULL; /* quiet, gcc */
122 while (nelem--) {
123 if (pageidx > PAGE_SIZE-size) {
124 struct page *page = alloc_page(GFP_KERNEL);
125
126 ret = -ENOMEM;
127 if (page == NULL)
128 goto fail;
129
130 pagedata = page_address(page);
131
132 list_add_tail(&page->lru, pagelist);
133 pageidx = 0;
134 }
135
136 ret = -EFAULT;
137 if (copy_from_user(pagedata + pageidx, data, size))
138 goto fail;
139
140 data += size;
141 pageidx += size;
142 }
143
144 ret = 0;
145
146fail:
147 return ret;
148}
149
150/*
151 * Call function "fn" on each element of the array fragmented
152 * over a list of pages.
153 */
154static int traverse_pages(unsigned nelem, size_t size,
155 struct list_head *pos,
156 int (*fn)(void *data, void *state),
157 void *state)
158{
159 void *pagedata;
160 unsigned pageidx;
161 int ret;
162
163 BUG_ON(size > PAGE_SIZE);
164
165 pageidx = PAGE_SIZE;
166 pagedata = NULL; /* hush, gcc */
167
168 while (nelem--) {
169 if (pageidx > PAGE_SIZE-size) {
170 struct page *page;
171 pos = pos->next;
172 page = list_entry(pos, struct page, lru);
173 pagedata = page_address(page);
174 pageidx = 0;
175 }
176
177 ret = (*fn)(pagedata + pageidx, state);
178 if (ret)
179 break;
180 pageidx += size;
181 }
182
183 return ret;
184}
185
186struct mmap_mfn_state {
187 unsigned long va;
188 struct vm_area_struct *vma;
189 domid_t domain;
190};
191
192static int mmap_mfn_range(void *data, void *state)
193{
194 struct privcmd_mmap_entry *msg = data;
195 struct mmap_mfn_state *st = state;
196 struct vm_area_struct *vma = st->vma;
197 int rc;
198
199 /* Do not allow range to wrap the address space. */
200 if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
201 ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
202 return -EINVAL;
203
204 /* Range chunks must be contiguous in va space. */
205 if ((msg->va != st->va) ||
206 ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
207 return -EINVAL;
208
209 rc = remap_domain_mfn_range(vma,
210 msg->va & PAGE_MASK,
211 msg->mfn,
212 msg->npages << PAGE_SHIFT,
213 vma->vm_page_prot,
214 st->domain);
215 if (rc < 0)
216 return rc;
217
218 st->va += msg->npages << PAGE_SHIFT;
219
220 return 0;
221}
222
223static long privcmd_ioctl_mmap(void __user *udata)
224{
225 struct privcmd_mmap mmapcmd;
226 struct mm_struct *mm = current->mm;
227 struct vm_area_struct *vma;
228 int rc;
229 LIST_HEAD(pagelist);
230 struct mmap_mfn_state state;
231
232 if (!xen_initial_domain())
233 return -EPERM;
234
235 if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
236 return -EFAULT;
237
238 rc = gather_array(&pagelist,
239 mmapcmd.num, sizeof(struct privcmd_mmap_entry),
240 mmapcmd.entry);
241
242 if (rc || list_empty(&pagelist))
243 goto out;
244
245 down_write(&mm->mmap_sem);
246
247 {
248 struct page *page = list_first_entry(&pagelist,
249 struct page, lru);
250 struct privcmd_mmap_entry *msg = page_address(page);
251
252 vma = find_vma(mm, msg->va);
253 rc = -EINVAL;
254
255 if (!vma || (msg->va != vma->vm_start) ||
256 !privcmd_enforce_singleshot_mapping(vma))
257 goto out_up;
258 }
259
260 state.va = vma->vm_start;
261 state.vma = vma;
262 state.domain = mmapcmd.dom;
263
264 rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
265 &pagelist,
266 mmap_mfn_range, &state);
267
268
269out_up:
270 up_write(&mm->mmap_sem);
271
272out:
273 free_page_list(&pagelist);
274
275 return rc;
276}
277
278struct mmap_batch_state {
279 domid_t domain;
280 unsigned long va;
281 struct vm_area_struct *vma;
282 int err;
283
284 xen_pfn_t __user *user;
285};
286
287static int mmap_batch_fn(void *data, void *state)
288{
289 xen_pfn_t *mfnp = data;
290 struct mmap_batch_state *st = state;
291
292 if (remap_domain_mfn_range(st->vma, st->va & PAGE_MASK,
293 *mfnp, PAGE_SIZE,
294 st->vma->vm_page_prot, st->domain) < 0) {
295 *mfnp |= 0xf0000000U;
296 st->err++;
297 }
298 st->va += PAGE_SIZE;
299
300 return 0;
301}
302
303static int mmap_return_errors(void *data, void *state)
304{
305 xen_pfn_t *mfnp = data;
306 struct mmap_batch_state *st = state;
307
308 put_user(*mfnp, st->user++);
309
310 return 0;
311}
312
313static long privcmd_ioctl_mmap_batch(void __user *udata)
314{
315 int ret;
316 struct privcmd_mmapbatch m;
317 struct mm_struct *mm = current->mm;
318 struct vm_area_struct *vma;
319 unsigned long nr_pages;
320 LIST_HEAD(pagelist);
321 struct mmap_batch_state state;
322
323 if (!xen_initial_domain())
324 return -EPERM;
325
326 if (copy_from_user(&m, udata, sizeof(m)))
327 return -EFAULT;
328
329 nr_pages = m.num;
330 if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
331 return -EINVAL;
332
333 ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
334 m.arr);
335
336 if (ret || list_empty(&pagelist))
337 goto out;
338
339 down_write(&mm->mmap_sem);
340
341 vma = find_vma(mm, m.addr);
342 ret = -EINVAL;
343 if (!vma ||
344 (m.addr != vma->vm_start) ||
345 ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
346 !privcmd_enforce_singleshot_mapping(vma)) {
347 up_write(&mm->mmap_sem);
348 goto out;
349 }
350
351 state.domain = m.dom;
352 state.vma = vma;
353 state.va = m.addr;
354 state.err = 0;
355
356 ret = traverse_pages(m.num, sizeof(xen_pfn_t),
357 &pagelist, mmap_batch_fn, &state);
358
359 up_write(&mm->mmap_sem);
360
361 if (state.err > 0) {
362 ret = state.err;
363
364 state.user = udata;
365 traverse_pages(m.num, sizeof(xen_pfn_t),
366 &pagelist,
367 mmap_return_errors, &state);
368 }
369
370out:
371 free_page_list(&pagelist);
372
373 return ret;
374}
375
376static long privcmd_ioctl(struct file *file,
377 unsigned int cmd, unsigned long data)
378{
379 int ret = -ENOSYS;
380 void __user *udata = (void __user *) data;
381
382 switch (cmd) {
383 case IOCTL_PRIVCMD_HYPERCALL:
384 ret = privcmd_ioctl_hypercall(udata);
385 break;
386
387 case IOCTL_PRIVCMD_MMAP:
388 ret = privcmd_ioctl_mmap(udata);
389 break;
390
391 case IOCTL_PRIVCMD_MMAPBATCH:
392 ret = privcmd_ioctl_mmap_batch(udata);
393 break;
394
395 default:
396 ret = -EINVAL;
397 break;
398 }
399
400 return ret;
401}
402
403#ifndef HAVE_ARCH_PRIVCMD_MMAP
404static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
405{
406 return VM_FAULT_SIGBUS;
407}
408
409static struct vm_operations_struct privcmd_vm_ops = {
410 .fault = privcmd_fault
411};
412
413static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
414{
415 /* Unsupported for auto-translate guests. */
416 if (xen_feature(XENFEAT_auto_translated_physmap))
417 return -ENOSYS;
418
419 /* DONTCOPY is essential for Xen as copy_page_range is broken. */
420 vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
421 vma->vm_ops = &privcmd_vm_ops;
422 vma->vm_private_data = NULL;
423
424 return 0;
425}
426
427static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
428{
429 return (xchg(&vma->vm_private_data, (void *)1) == NULL);
430}
431#endif
432
433const struct file_operations privcmd_file_ops = {
434 .unlocked_ioctl = privcmd_ioctl,
435 .mmap = privcmd_mmap,
436};
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
index 3cf7707217f2..8c7462866e90 100644
--- a/drivers/xen/xenfs/super.c
+++ b/drivers/xen/xenfs/super.c
@@ -96,6 +96,8 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
96 &xsd_kva_file_ops, NULL, S_IRUSR|S_IWUSR); 96 &xsd_kva_file_ops, NULL, S_IRUSR|S_IWUSR);
97 xenfs_create_file(sb, sb->s_root, "xsd_port", 97 xenfs_create_file(sb, sb->s_root, "xsd_port",
98 &xsd_port_file_ops, NULL, S_IRUSR|S_IWUSR); 98 &xsd_port_file_ops, NULL, S_IRUSR|S_IWUSR);
99 xenfs_create_file(sb, sb->s_root, "privcmd",
100 &privcmd_file_ops, NULL, S_IRUSR|S_IWUSR);
99 } 101 }
100 102
101 return rc; 103 return rc;
diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h
index 5056306e7aa8..b68aa6200003 100644
--- a/drivers/xen/xenfs/xenfs.h
+++ b/drivers/xen/xenfs/xenfs.h
@@ -2,6 +2,7 @@
2#define _XENFS_XENBUS_H 2#define _XENFS_XENBUS_H
3 3
4extern const struct file_operations xenbus_file_ops; 4extern const struct file_operations xenbus_file_ops;
5extern const struct file_operations privcmd_file_ops;
5extern const struct file_operations xsd_kva_file_ops; 6extern const struct file_operations xsd_kva_file_ops;
6extern const struct file_operations xsd_port_file_ops; 7extern const struct file_operations xsd_port_file_ops;
7 8
diff --git a/include/xen/Kbuild b/include/xen/Kbuild
index 4e65c16a445b..84ad8f02fee5 100644
--- a/include/xen/Kbuild
+++ b/include/xen/Kbuild
@@ -1 +1,2 @@
1header-y += evtchn.h 1header-y += evtchn.h
2header-y += privcmd.h
diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
new file mode 100644
index 000000000000..b42cdfd92fee
--- /dev/null
+++ b/include/xen/privcmd.h
@@ -0,0 +1,80 @@
1/******************************************************************************
2 * privcmd.h
3 *
4 * Interface to /proc/xen/privcmd.
5 *
6 * Copyright (c) 2003-2005, K A Fraser
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license:
13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions:
20 *
21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 * IN THE SOFTWARE.
31 */
32
33#ifndef __LINUX_PUBLIC_PRIVCMD_H__
34#define __LINUX_PUBLIC_PRIVCMD_H__
35
36#include <linux/types.h>
37
38typedef unsigned long xen_pfn_t;
39
40#ifndef __user
41#define __user
42#endif
43
44struct privcmd_hypercall {
45 __u64 op;
46 __u64 arg[5];
47};
48
49struct privcmd_mmap_entry {
50 __u64 va;
51 __u64 mfn;
52 __u64 npages;
53};
54
55struct privcmd_mmap {
56 int num;
57 domid_t dom; /* target domain */
58 struct privcmd_mmap_entry __user *entry;
59};
60
61struct privcmd_mmapbatch {
62 int num; /* number of pages to populate */
63 domid_t dom; /* target domain */
64 __u64 addr; /* virtual address */
65 xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
66};
67
68/*
69 * @cmd: IOCTL_PRIVCMD_HYPERCALL
70 * @arg: &privcmd_hypercall_t
71 * Return: Value returned from execution of the specified hypercall.
72 */
73#define IOCTL_PRIVCMD_HYPERCALL \
74 _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
75#define IOCTL_PRIVCMD_MMAP \
76 _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
77#define IOCTL_PRIVCMD_MMAPBATCH \
78 _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
79
80#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */