diff options
Diffstat (limited to 'drivers/xen/xenfs/privcmd.c')
-rw-r--r-- | drivers/xen/xenfs/privcmd.c | 436 |
1 files changed, 436 insertions, 0 deletions
diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c new file mode 100644 index 000000000000..c7192f314f86 --- /dev/null +++ b/drivers/xen/xenfs/privcmd.c | |||
@@ -0,0 +1,436 @@ | |||
1 | /****************************************************************************** | ||
2 | * privcmd.c | ||
3 | * | ||
4 | * Interface to privileged domain-0 commands. | ||
5 | * | ||
6 | * Copyright (c) 2002-2004, K A Fraser, B Dragovic | ||
7 | */ | ||
8 | |||
9 | #include <linux/kernel.h> | ||
10 | #include <linux/sched.h> | ||
11 | #include <linux/slab.h> | ||
12 | #include <linux/string.h> | ||
13 | #include <linux/errno.h> | ||
14 | #include <linux/mm.h> | ||
15 | #include <linux/mman.h> | ||
16 | #include <linux/uaccess.h> | ||
17 | #include <linux/swap.h> | ||
18 | #include <linux/smp_lock.h> | ||
19 | #include <linux/highmem.h> | ||
20 | #include <linux/pagemap.h> | ||
21 | #include <linux/seq_file.h> | ||
22 | |||
23 | #include <asm/pgalloc.h> | ||
24 | #include <asm/pgtable.h> | ||
25 | #include <asm/tlb.h> | ||
26 | #include <asm/xen/hypervisor.h> | ||
27 | #include <asm/xen/hypercall.h> | ||
28 | |||
29 | #include <xen/xen.h> | ||
30 | #include <xen/privcmd.h> | ||
31 | #include <xen/interface/xen.h> | ||
32 | #include <xen/features.h> | ||
33 | #include <xen/page.h> | ||
34 | |||
35 | #ifndef HAVE_ARCH_PRIVCMD_MMAP | ||
36 | static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); | ||
37 | #endif | ||
38 | |||
39 | struct remap_data { | ||
40 | unsigned long mfn; | ||
41 | unsigned domid; | ||
42 | pgprot_t prot; | ||
43 | }; | ||
44 | |||
45 | static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, | ||
46 | unsigned long addr, void *data) | ||
47 | { | ||
48 | struct remap_data *rmd = data; | ||
49 | pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot)); | ||
50 | |||
51 | xen_set_domain_pte(ptep, pte, rmd->domid); | ||
52 | |||
53 | return 0; | ||
54 | } | ||
55 | |||
56 | int remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr, | ||
57 | unsigned long mfn, unsigned long size, | ||
58 | pgprot_t prot, unsigned domid) | ||
59 | { | ||
60 | struct remap_data rmd; | ||
61 | int err; | ||
62 | |||
63 | prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); | ||
64 | |||
65 | vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; | ||
66 | |||
67 | rmd.mfn = mfn; | ||
68 | rmd.prot = prot; | ||
69 | rmd.domid = domid; | ||
70 | |||
71 | err = apply_to_page_range(vma->vm_mm, addr, size, | ||
72 | remap_area_mfn_pte_fn, &rmd); | ||
73 | |||
74 | return err; | ||
75 | } | ||
76 | |||
77 | static long privcmd_ioctl_hypercall(void __user *udata) | ||
78 | { | ||
79 | struct privcmd_hypercall hypercall; | ||
80 | long ret; | ||
81 | |||
82 | if (copy_from_user(&hypercall, udata, sizeof(hypercall))) | ||
83 | return -EFAULT; | ||
84 | |||
85 | ret = privcmd_call(hypercall.op, | ||
86 | hypercall.arg[0], hypercall.arg[1], | ||
87 | hypercall.arg[2], hypercall.arg[3], | ||
88 | hypercall.arg[4]); | ||
89 | |||
90 | return ret; | ||
91 | } | ||
92 | |||
93 | static void free_page_list(struct list_head *pages) | ||
94 | { | ||
95 | struct page *p, *n; | ||
96 | |||
97 | list_for_each_entry_safe(p, n, pages, lru) | ||
98 | __free_page(p); | ||
99 | |||
100 | INIT_LIST_HEAD(pages); | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * Given an array of items in userspace, return a list of pages | ||
105 | * containing the data. If copying fails, either because of memory | ||
106 | * allocation failure or a problem reading user memory, return an | ||
107 | * error code; its up to the caller to dispose of any partial list. | ||
108 | */ | ||
109 | static int gather_array(struct list_head *pagelist, | ||
110 | unsigned nelem, size_t size, | ||
111 | void __user *data) | ||
112 | { | ||
113 | unsigned pageidx; | ||
114 | void *pagedata; | ||
115 | int ret; | ||
116 | |||
117 | if (size > PAGE_SIZE) | ||
118 | return 0; | ||
119 | |||
120 | pageidx = PAGE_SIZE; | ||
121 | pagedata = NULL; /* quiet, gcc */ | ||
122 | while (nelem--) { | ||
123 | if (pageidx > PAGE_SIZE-size) { | ||
124 | struct page *page = alloc_page(GFP_KERNEL); | ||
125 | |||
126 | ret = -ENOMEM; | ||
127 | if (page == NULL) | ||
128 | goto fail; | ||
129 | |||
130 | pagedata = page_address(page); | ||
131 | |||
132 | list_add_tail(&page->lru, pagelist); | ||
133 | pageidx = 0; | ||
134 | } | ||
135 | |||
136 | ret = -EFAULT; | ||
137 | if (copy_from_user(pagedata + pageidx, data, size)) | ||
138 | goto fail; | ||
139 | |||
140 | data += size; | ||
141 | pageidx += size; | ||
142 | } | ||
143 | |||
144 | ret = 0; | ||
145 | |||
146 | fail: | ||
147 | return ret; | ||
148 | } | ||
149 | |||
150 | /* | ||
151 | * Call function "fn" on each element of the array fragmented | ||
152 | * over a list of pages. | ||
153 | */ | ||
154 | static int traverse_pages(unsigned nelem, size_t size, | ||
155 | struct list_head *pos, | ||
156 | int (*fn)(void *data, void *state), | ||
157 | void *state) | ||
158 | { | ||
159 | void *pagedata; | ||
160 | unsigned pageidx; | ||
161 | int ret; | ||
162 | |||
163 | BUG_ON(size > PAGE_SIZE); | ||
164 | |||
165 | pageidx = PAGE_SIZE; | ||
166 | pagedata = NULL; /* hush, gcc */ | ||
167 | |||
168 | while (nelem--) { | ||
169 | if (pageidx > PAGE_SIZE-size) { | ||
170 | struct page *page; | ||
171 | pos = pos->next; | ||
172 | page = list_entry(pos, struct page, lru); | ||
173 | pagedata = page_address(page); | ||
174 | pageidx = 0; | ||
175 | } | ||
176 | |||
177 | ret = (*fn)(pagedata + pageidx, state); | ||
178 | if (ret) | ||
179 | break; | ||
180 | pageidx += size; | ||
181 | } | ||
182 | |||
183 | return ret; | ||
184 | } | ||
185 | |||
186 | struct mmap_mfn_state { | ||
187 | unsigned long va; | ||
188 | struct vm_area_struct *vma; | ||
189 | domid_t domain; | ||
190 | }; | ||
191 | |||
192 | static int mmap_mfn_range(void *data, void *state) | ||
193 | { | ||
194 | struct privcmd_mmap_entry *msg = data; | ||
195 | struct mmap_mfn_state *st = state; | ||
196 | struct vm_area_struct *vma = st->vma; | ||
197 | int rc; | ||
198 | |||
199 | /* Do not allow range to wrap the address space. */ | ||
200 | if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) || | ||
201 | ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va)) | ||
202 | return -EINVAL; | ||
203 | |||
204 | /* Range chunks must be contiguous in va space. */ | ||
205 | if ((msg->va != st->va) || | ||
206 | ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end)) | ||
207 | return -EINVAL; | ||
208 | |||
209 | rc = remap_domain_mfn_range(vma, | ||
210 | msg->va & PAGE_MASK, | ||
211 | msg->mfn, | ||
212 | msg->npages << PAGE_SHIFT, | ||
213 | vma->vm_page_prot, | ||
214 | st->domain); | ||
215 | if (rc < 0) | ||
216 | return rc; | ||
217 | |||
218 | st->va += msg->npages << PAGE_SHIFT; | ||
219 | |||
220 | return 0; | ||
221 | } | ||
222 | |||
223 | static long privcmd_ioctl_mmap(void __user *udata) | ||
224 | { | ||
225 | struct privcmd_mmap mmapcmd; | ||
226 | struct mm_struct *mm = current->mm; | ||
227 | struct vm_area_struct *vma; | ||
228 | int rc; | ||
229 | LIST_HEAD(pagelist); | ||
230 | struct mmap_mfn_state state; | ||
231 | |||
232 | if (!xen_initial_domain()) | ||
233 | return -EPERM; | ||
234 | |||
235 | if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) | ||
236 | return -EFAULT; | ||
237 | |||
238 | rc = gather_array(&pagelist, | ||
239 | mmapcmd.num, sizeof(struct privcmd_mmap_entry), | ||
240 | mmapcmd.entry); | ||
241 | |||
242 | if (rc || list_empty(&pagelist)) | ||
243 | goto out; | ||
244 | |||
245 | down_write(&mm->mmap_sem); | ||
246 | |||
247 | { | ||
248 | struct page *page = list_first_entry(&pagelist, | ||
249 | struct page, lru); | ||
250 | struct privcmd_mmap_entry *msg = page_address(page); | ||
251 | |||
252 | vma = find_vma(mm, msg->va); | ||
253 | rc = -EINVAL; | ||
254 | |||
255 | if (!vma || (msg->va != vma->vm_start) || | ||
256 | !privcmd_enforce_singleshot_mapping(vma)) | ||
257 | goto out_up; | ||
258 | } | ||
259 | |||
260 | state.va = vma->vm_start; | ||
261 | state.vma = vma; | ||
262 | state.domain = mmapcmd.dom; | ||
263 | |||
264 | rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry), | ||
265 | &pagelist, | ||
266 | mmap_mfn_range, &state); | ||
267 | |||
268 | |||
269 | out_up: | ||
270 | up_write(&mm->mmap_sem); | ||
271 | |||
272 | out: | ||
273 | free_page_list(&pagelist); | ||
274 | |||
275 | return rc; | ||
276 | } | ||
277 | |||
278 | struct mmap_batch_state { | ||
279 | domid_t domain; | ||
280 | unsigned long va; | ||
281 | struct vm_area_struct *vma; | ||
282 | int err; | ||
283 | |||
284 | xen_pfn_t __user *user; | ||
285 | }; | ||
286 | |||
287 | static int mmap_batch_fn(void *data, void *state) | ||
288 | { | ||
289 | xen_pfn_t *mfnp = data; | ||
290 | struct mmap_batch_state *st = state; | ||
291 | |||
292 | if (remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, | ||
293 | *mfnp, PAGE_SIZE, | ||
294 | st->vma->vm_page_prot, st->domain) < 0) { | ||
295 | *mfnp |= 0xf0000000U; | ||
296 | st->err++; | ||
297 | } | ||
298 | st->va += PAGE_SIZE; | ||
299 | |||
300 | return 0; | ||
301 | } | ||
302 | |||
303 | static int mmap_return_errors(void *data, void *state) | ||
304 | { | ||
305 | xen_pfn_t *mfnp = data; | ||
306 | struct mmap_batch_state *st = state; | ||
307 | |||
308 | put_user(*mfnp, st->user++); | ||
309 | |||
310 | return 0; | ||
311 | } | ||
312 | |||
313 | static long privcmd_ioctl_mmap_batch(void __user *udata) | ||
314 | { | ||
315 | int ret; | ||
316 | struct privcmd_mmapbatch m; | ||
317 | struct mm_struct *mm = current->mm; | ||
318 | struct vm_area_struct *vma; | ||
319 | unsigned long nr_pages; | ||
320 | LIST_HEAD(pagelist); | ||
321 | struct mmap_batch_state state; | ||
322 | |||
323 | if (!xen_initial_domain()) | ||
324 | return -EPERM; | ||
325 | |||
326 | if (copy_from_user(&m, udata, sizeof(m))) | ||
327 | return -EFAULT; | ||
328 | |||
329 | nr_pages = m.num; | ||
330 | if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) | ||
331 | return -EINVAL; | ||
332 | |||
333 | ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), | ||
334 | m.arr); | ||
335 | |||
336 | if (ret || list_empty(&pagelist)) | ||
337 | goto out; | ||
338 | |||
339 | down_write(&mm->mmap_sem); | ||
340 | |||
341 | vma = find_vma(mm, m.addr); | ||
342 | ret = -EINVAL; | ||
343 | if (!vma || | ||
344 | (m.addr != vma->vm_start) || | ||
345 | ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) || | ||
346 | !privcmd_enforce_singleshot_mapping(vma)) { | ||
347 | up_write(&mm->mmap_sem); | ||
348 | goto out; | ||
349 | } | ||
350 | |||
351 | state.domain = m.dom; | ||
352 | state.vma = vma; | ||
353 | state.va = m.addr; | ||
354 | state.err = 0; | ||
355 | |||
356 | ret = traverse_pages(m.num, sizeof(xen_pfn_t), | ||
357 | &pagelist, mmap_batch_fn, &state); | ||
358 | |||
359 | up_write(&mm->mmap_sem); | ||
360 | |||
361 | if (state.err > 0) { | ||
362 | ret = state.err; | ||
363 | |||
364 | state.user = udata; | ||
365 | traverse_pages(m.num, sizeof(xen_pfn_t), | ||
366 | &pagelist, | ||
367 | mmap_return_errors, &state); | ||
368 | } | ||
369 | |||
370 | out: | ||
371 | free_page_list(&pagelist); | ||
372 | |||
373 | return ret; | ||
374 | } | ||
375 | |||
376 | static long privcmd_ioctl(struct file *file, | ||
377 | unsigned int cmd, unsigned long data) | ||
378 | { | ||
379 | int ret = -ENOSYS; | ||
380 | void __user *udata = (void __user *) data; | ||
381 | |||
382 | switch (cmd) { | ||
383 | case IOCTL_PRIVCMD_HYPERCALL: | ||
384 | ret = privcmd_ioctl_hypercall(udata); | ||
385 | break; | ||
386 | |||
387 | case IOCTL_PRIVCMD_MMAP: | ||
388 | ret = privcmd_ioctl_mmap(udata); | ||
389 | break; | ||
390 | |||
391 | case IOCTL_PRIVCMD_MMAPBATCH: | ||
392 | ret = privcmd_ioctl_mmap_batch(udata); | ||
393 | break; | ||
394 | |||
395 | default: | ||
396 | ret = -EINVAL; | ||
397 | break; | ||
398 | } | ||
399 | |||
400 | return ret; | ||
401 | } | ||
402 | |||
403 | #ifndef HAVE_ARCH_PRIVCMD_MMAP | ||
404 | static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
405 | { | ||
406 | return VM_FAULT_SIGBUS; | ||
407 | } | ||
408 | |||
409 | static struct vm_operations_struct privcmd_vm_ops = { | ||
410 | .fault = privcmd_fault | ||
411 | }; | ||
412 | |||
413 | static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) | ||
414 | { | ||
415 | /* Unsupported for auto-translate guests. */ | ||
416 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
417 | return -ENOSYS; | ||
418 | |||
419 | /* DONTCOPY is essential for Xen as copy_page_range is broken. */ | ||
420 | vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY; | ||
421 | vma->vm_ops = &privcmd_vm_ops; | ||
422 | vma->vm_private_data = NULL; | ||
423 | |||
424 | return 0; | ||
425 | } | ||
426 | |||
427 | static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) | ||
428 | { | ||
429 | return (xchg(&vma->vm_private_data, (void *)1) == NULL); | ||
430 | } | ||
431 | #endif | ||
432 | |||
433 | const struct file_operations privcmd_file_ops = { | ||
434 | .unlocked_ioctl = privcmd_ioctl, | ||
435 | .mmap = privcmd_mmap, | ||
436 | }; | ||