aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@xensource.com>2007-07-17 21:37:06 -0400
committerJeremy Fitzhardinge <jeremy@goop.org>2007-07-18 11:47:44 -0400
commitad9a86121f5a374b48ce2924f8a9d7e94a04db27 (patch)
treec14af462957ce9ee6de3e4537e15879c25a679aa /drivers
parentb536b4b9623084d86f2b1f19cb44a2d6d74f00bf (diff)
xen: Add grant table support
Add Xen 'grant table' driver which allows granting of access to selected local memory pages by other virtual machines and, symmetrically, the mapping of remote memory pages which other virtual machines have granted access to. This driver is a prerequisite for many of the Xen virtual device drivers, which grant the 'device driver domain' restricted and temporary access to only those memory pages that are currently involved in I/O operations. Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Signed-off-by: Ian Pratt <ian.pratt@xensource.com> Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk> Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/Makefile2
-rw-r--r--drivers/xen/Makefile1
-rw-r--r--drivers/xen/grant-table.c582
3 files changed, 585 insertions, 0 deletions
diff --git a/drivers/Makefile b/drivers/Makefile
index 503d82569449..6d9d7fab77f5 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -15,6 +15,8 @@ obj-$(CONFIG_ACPI) += acpi/
15obj-$(CONFIG_PNP) += pnp/ 15obj-$(CONFIG_PNP) += pnp/
16obj-$(CONFIG_ARM_AMBA) += amba/ 16obj-$(CONFIG_ARM_AMBA) += amba/
17 17
18obj-$(CONFIG_XEN) += xen/
19
18# char/ comes before serial/ etc so that the VT console is the boot-time 20# char/ comes before serial/ etc so that the VT console is the boot-time
19# default. 21# default.
20obj-y += char/ 22obj-y += char/
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
new file mode 100644
index 000000000000..eb42b521eef9
--- /dev/null
+++ b/drivers/xen/Makefile
@@ -0,0 +1 @@
obj-y += grant-table.o
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
new file mode 100644
index 000000000000..ea94dbabf9a9
--- /dev/null
+++ b/drivers/xen/grant-table.c
@@ -0,0 +1,582 @@
1/******************************************************************************
2 * grant_table.c
3 *
4 * Granting foreign access to our memory reservation.
5 *
6 * Copyright (c) 2005-2006, Christopher Clark
7 * Copyright (c) 2004-2005, K A Fraser
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License version 2
11 * as published by the Free Software Foundation; or, when distributed
12 * separately from the Linux kernel or incorporated into other
13 * software packages, subject to the following license:
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining a copy
16 * of this source file (the "Software"), to deal in the Software without
17 * restriction, including without limitation the rights to use, copy, modify,
18 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19 * and to permit persons to whom the Software is furnished to do so, subject to
20 * the following conditions:
21 *
22 * The above copyright notice and this permission notice shall be included in
23 * all copies or substantial portions of the Software.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 * IN THE SOFTWARE.
32 */
33
34#include <linux/module.h>
35#include <linux/sched.h>
36#include <linux/mm.h>
37#include <linux/vmalloc.h>
38#include <linux/uaccess.h>
39
40#include <xen/interface/xen.h>
41#include <xen/page.h>
42#include <xen/grant_table.h>
43
44#include <asm/pgtable.h>
45#include <asm/sync_bitops.h>
46
47
48/* External tools reserve first few grant table entries. */
49#define NR_RESERVED_ENTRIES 8
50#define GNTTAB_LIST_END 0xffffffff
51#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry))
52
53static grant_ref_t **gnttab_list;
54static unsigned int nr_grant_frames;
55static unsigned int boot_max_nr_grant_frames;
56static int gnttab_free_count;
57static grant_ref_t gnttab_free_head;
58static DEFINE_SPINLOCK(gnttab_list_lock);
59
60static struct grant_entry *shared;
61
62static struct gnttab_free_callback *gnttab_free_callback_list;
63
64static int gnttab_expand(unsigned int req_entries);
65
66#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
67
68static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
69{
70 return &gnttab_list[(entry) / RPP][(entry) % RPP];
71}
72/* This can be used as an l-value */
73#define gnttab_entry(entry) (*__gnttab_entry(entry))
74
75static int get_free_entries(unsigned count)
76{
77 unsigned long flags;
78 int ref, rc;
79 grant_ref_t head;
80
81 spin_lock_irqsave(&gnttab_list_lock, flags);
82
83 if ((gnttab_free_count < count) &&
84 ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
85 spin_unlock_irqrestore(&gnttab_list_lock, flags);
86 return rc;
87 }
88
89 ref = head = gnttab_free_head;
90 gnttab_free_count -= count;
91 while (count-- > 1)
92 head = gnttab_entry(head);
93 gnttab_free_head = gnttab_entry(head);
94 gnttab_entry(head) = GNTTAB_LIST_END;
95
96 spin_unlock_irqrestore(&gnttab_list_lock, flags);
97
98 return ref;
99}
100
101static void do_free_callbacks(void)
102{
103 struct gnttab_free_callback *callback, *next;
104
105 callback = gnttab_free_callback_list;
106 gnttab_free_callback_list = NULL;
107
108 while (callback != NULL) {
109 next = callback->next;
110 if (gnttab_free_count >= callback->count) {
111 callback->next = NULL;
112 callback->fn(callback->arg);
113 } else {
114 callback->next = gnttab_free_callback_list;
115 gnttab_free_callback_list = callback;
116 }
117 callback = next;
118 }
119}
120
121static inline void check_free_callbacks(void)
122{
123 if (unlikely(gnttab_free_callback_list))
124 do_free_callbacks();
125}
126
127static void put_free_entry(grant_ref_t ref)
128{
129 unsigned long flags;
130 spin_lock_irqsave(&gnttab_list_lock, flags);
131 gnttab_entry(ref) = gnttab_free_head;
132 gnttab_free_head = ref;
133 gnttab_free_count++;
134 check_free_callbacks();
135 spin_unlock_irqrestore(&gnttab_list_lock, flags);
136}
137
138static void update_grant_entry(grant_ref_t ref, domid_t domid,
139 unsigned long frame, unsigned flags)
140{
141 /*
142 * Introducing a valid entry into the grant table:
143 * 1. Write ent->domid.
144 * 2. Write ent->frame:
145 * GTF_permit_access: Frame to which access is permitted.
146 * GTF_accept_transfer: Pseudo-phys frame slot being filled by new
147 * frame, or zero if none.
148 * 3. Write memory barrier (WMB).
149 * 4. Write ent->flags, inc. valid type.
150 */
151 shared[ref].frame = frame;
152 shared[ref].domid = domid;
153 wmb();
154 shared[ref].flags = flags;
155}
156
157/*
158 * Public grant-issuing interface functions
159 */
160void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
161 unsigned long frame, int readonly)
162{
163 update_grant_entry(ref, domid, frame,
164 GTF_permit_access | (readonly ? GTF_readonly : 0));
165}
166EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
167
168int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
169 int readonly)
170{
171 int ref;
172
173 ref = get_free_entries(1);
174 if (unlikely(ref < 0))
175 return -ENOSPC;
176
177 gnttab_grant_foreign_access_ref(ref, domid, frame, readonly);
178
179 return ref;
180}
181EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
182
183int gnttab_query_foreign_access(grant_ref_t ref)
184{
185 u16 nflags;
186
187 nflags = shared[ref].flags;
188
189 return (nflags & (GTF_reading|GTF_writing));
190}
191EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
192
193int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
194{
195 u16 flags, nflags;
196
197 nflags = shared[ref].flags;
198 do {
199 flags = nflags;
200 if (flags & (GTF_reading|GTF_writing)) {
201 printk(KERN_ALERT "WARNING: g.e. still in use!\n");
202 return 0;
203 }
204 } while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) != flags);
205
206 return 1;
207}
208EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
209
210void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
211 unsigned long page)
212{
213 if (gnttab_end_foreign_access_ref(ref, readonly)) {
214 put_free_entry(ref);
215 if (page != 0)
216 free_page(page);
217 } else {
218 /* XXX This needs to be fixed so that the ref and page are
219 placed on a list to be freed up later. */
220 printk(KERN_WARNING
221 "WARNING: leaking g.e. and page still in use!\n");
222 }
223}
224EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
225
226int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
227{
228 int ref;
229
230 ref = get_free_entries(1);
231 if (unlikely(ref < 0))
232 return -ENOSPC;
233 gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
234
235 return ref;
236}
237EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer);
238
239void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
240 unsigned long pfn)
241{
242 update_grant_entry(ref, domid, pfn, GTF_accept_transfer);
243}
244EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref);
245
246unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
247{
248 unsigned long frame;
249 u16 flags;
250
251 /*
252 * If a transfer is not even yet started, try to reclaim the grant
253 * reference and return failure (== 0).
254 */
255 while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
256 if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags)
257 return 0;
258 cpu_relax();
259 }
260
261 /* If a transfer is in progress then wait until it is completed. */
262 while (!(flags & GTF_transfer_completed)) {
263 flags = shared[ref].flags;
264 cpu_relax();
265 }
266
267 rmb(); /* Read the frame number /after/ reading completion status. */
268 frame = shared[ref].frame;
269 BUG_ON(frame == 0);
270
271 return frame;
272}
273EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref);
274
275unsigned long gnttab_end_foreign_transfer(grant_ref_t ref)
276{
277 unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
278 put_free_entry(ref);
279 return frame;
280}
281EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer);
282
283void gnttab_free_grant_reference(grant_ref_t ref)
284{
285 put_free_entry(ref);
286}
287EXPORT_SYMBOL_GPL(gnttab_free_grant_reference);
288
289void gnttab_free_grant_references(grant_ref_t head)
290{
291 grant_ref_t ref;
292 unsigned long flags;
293 int count = 1;
294 if (head == GNTTAB_LIST_END)
295 return;
296 spin_lock_irqsave(&gnttab_list_lock, flags);
297 ref = head;
298 while (gnttab_entry(ref) != GNTTAB_LIST_END) {
299 ref = gnttab_entry(ref);
300 count++;
301 }
302 gnttab_entry(ref) = gnttab_free_head;
303 gnttab_free_head = head;
304 gnttab_free_count += count;
305 check_free_callbacks();
306 spin_unlock_irqrestore(&gnttab_list_lock, flags);
307}
308EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
309
310int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
311{
312 int h = get_free_entries(count);
313
314 if (h < 0)
315 return -ENOSPC;
316
317 *head = h;
318
319 return 0;
320}
321EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
322
323int gnttab_empty_grant_references(const grant_ref_t *private_head)
324{
325 return (*private_head == GNTTAB_LIST_END);
326}
327EXPORT_SYMBOL_GPL(gnttab_empty_grant_references);
328
329int gnttab_claim_grant_reference(grant_ref_t *private_head)
330{
331 grant_ref_t g = *private_head;
332 if (unlikely(g == GNTTAB_LIST_END))
333 return -ENOSPC;
334 *private_head = gnttab_entry(g);
335 return g;
336}
337EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
338
339void gnttab_release_grant_reference(grant_ref_t *private_head,
340 grant_ref_t release)
341{
342 gnttab_entry(release) = *private_head;
343 *private_head = release;
344}
345EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
346
347void gnttab_request_free_callback(struct gnttab_free_callback *callback,
348 void (*fn)(void *), void *arg, u16 count)
349{
350 unsigned long flags;
351 spin_lock_irqsave(&gnttab_list_lock, flags);
352 if (callback->next)
353 goto out;
354 callback->fn = fn;
355 callback->arg = arg;
356 callback->count = count;
357 callback->next = gnttab_free_callback_list;
358 gnttab_free_callback_list = callback;
359 check_free_callbacks();
360out:
361 spin_unlock_irqrestore(&gnttab_list_lock, flags);
362}
363EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
364
365void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
366{
367 struct gnttab_free_callback **pcb;
368 unsigned long flags;
369
370 spin_lock_irqsave(&gnttab_list_lock, flags);
371 for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
372 if (*pcb == callback) {
373 *pcb = callback->next;
374 break;
375 }
376 }
377 spin_unlock_irqrestore(&gnttab_list_lock, flags);
378}
379EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
380
381static int grow_gnttab_list(unsigned int more_frames)
382{
383 unsigned int new_nr_grant_frames, extra_entries, i;
384
385 new_nr_grant_frames = nr_grant_frames + more_frames;
386 extra_entries = more_frames * GREFS_PER_GRANT_FRAME;
387
388 for (i = nr_grant_frames; i < new_nr_grant_frames; i++) {
389 gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
390 if (!gnttab_list[i])
391 goto grow_nomem;
392 }
393
394
395 for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
396 i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
397 gnttab_entry(i) = i + 1;
398
399 gnttab_entry(i) = gnttab_free_head;
400 gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
401 gnttab_free_count += extra_entries;
402
403 nr_grant_frames = new_nr_grant_frames;
404
405 check_free_callbacks();
406
407 return 0;
408
409grow_nomem:
410 for ( ; i >= nr_grant_frames; i--)
411 free_page((unsigned long) gnttab_list[i]);
412 return -ENOMEM;
413}
414
415static unsigned int __max_nr_grant_frames(void)
416{
417 struct gnttab_query_size query;
418 int rc;
419
420 query.dom = DOMID_SELF;
421
422 rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
423 if ((rc < 0) || (query.status != GNTST_okay))
424 return 4; /* Legacy max supported number of frames */
425
426 return query.max_nr_frames;
427}
428
429static inline unsigned int max_nr_grant_frames(void)
430{
431 unsigned int xen_max = __max_nr_grant_frames();
432
433 if (xen_max > boot_max_nr_grant_frames)
434 return boot_max_nr_grant_frames;
435 return xen_max;
436}
437
438static int map_pte_fn(pte_t *pte, struct page *pmd_page,
439 unsigned long addr, void *data)
440{
441 unsigned long **frames = (unsigned long **)data;
442
443 set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
444 (*frames)++;
445 return 0;
446}
447
448static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
449 unsigned long addr, void *data)
450{
451
452 set_pte_at(&init_mm, addr, pte, __pte(0));
453 return 0;
454}
455
456static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
457{
458 struct gnttab_setup_table setup;
459 unsigned long *frames;
460 unsigned int nr_gframes = end_idx + 1;
461 int rc;
462
463 frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
464 if (!frames)
465 return -ENOMEM;
466
467 setup.dom = DOMID_SELF;
468 setup.nr_frames = nr_gframes;
469 setup.frame_list = frames;
470
471 rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
472 if (rc == -ENOSYS) {
473 kfree(frames);
474 return -ENOSYS;
475 }
476
477 BUG_ON(rc || setup.status);
478
479 if (shared == NULL) {
480 struct vm_struct *area;
481 area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames());
482 BUG_ON(area == NULL);
483 shared = area->addr;
484 }
485 rc = apply_to_page_range(&init_mm, (unsigned long)shared,
486 PAGE_SIZE * nr_gframes,
487 map_pte_fn, &frames);
488 BUG_ON(rc);
489 frames -= nr_gframes; /* adjust after map_pte_fn() */
490
491 kfree(frames);
492
493 return 0;
494}
495
496static int gnttab_resume(void)
497{
498 if (max_nr_grant_frames() < nr_grant_frames)
499 return -ENOSYS;
500 return gnttab_map(0, nr_grant_frames - 1);
501}
502
503static int gnttab_suspend(void)
504{
505 apply_to_page_range(&init_mm, (unsigned long)shared,
506 PAGE_SIZE * nr_grant_frames,
507 unmap_pte_fn, NULL);
508
509 return 0;
510}
511
512static int gnttab_expand(unsigned int req_entries)
513{
514 int rc;
515 unsigned int cur, extra;
516
517 cur = nr_grant_frames;
518 extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
519 GREFS_PER_GRANT_FRAME);
520 if (cur + extra > max_nr_grant_frames())
521 return -ENOSPC;
522
523 rc = gnttab_map(cur, cur + extra - 1);
524 if (rc == 0)
525 rc = grow_gnttab_list(extra);
526
527 return rc;
528}
529
530static int __devinit gnttab_init(void)
531{
532 int i;
533 unsigned int max_nr_glist_frames;
534 unsigned int nr_init_grefs;
535
536 if (!is_running_on_xen())
537 return -ENODEV;
538
539 nr_grant_frames = 1;
540 boot_max_nr_grant_frames = __max_nr_grant_frames();
541
542 /* Determine the maximum number of frames required for the
543 * grant reference free list on the current hypervisor.
544 */
545 max_nr_glist_frames = (boot_max_nr_grant_frames *
546 GREFS_PER_GRANT_FRAME /
547 (PAGE_SIZE / sizeof(grant_ref_t)));
548
549 gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
550 GFP_KERNEL);
551 if (gnttab_list == NULL)
552 return -ENOMEM;
553
554 for (i = 0; i < nr_grant_frames; i++) {
555 gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
556 if (gnttab_list[i] == NULL)
557 goto ini_nomem;
558 }
559
560 if (gnttab_resume() < 0)
561 return -ENODEV;
562
563 nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
564
565 for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
566 gnttab_entry(i) = i + 1;
567
568 gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
569 gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
570 gnttab_free_head = NR_RESERVED_ENTRIES;
571
572 printk("Grant table initialized\n");
573 return 0;
574
575 ini_nomem:
576 for (i--; i >= 0; i--)
577 free_page((unsigned long)gnttab_list[i]);
578 kfree(gnttab_list);
579 return -ENOMEM;
580}
581
582core_initcall(gnttab_init);