aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTimur Tabi <timur@freescale.com>2011-06-09 16:52:06 -0400
committerKumar Gala <galak@kernel.crashing.org>2011-07-08 01:21:27 -0400
commit6db7199407ca56f55bc0832fb124e1ad216ea57b (patch)
tree62321e519dd0f74e24de02492fdd79c51173e08a
parent8dbb6bc13617379a6534700e51634a3f88c9a513 (diff)
drivers/virt: introduce Freescale hypervisor management driver
Add the drivers/virt directory, which houses drivers that support virtualization environments, and add the Freescale hypervisor management driver. The Freescale hypervisor management driver provides several services to drivers and applications related to the Freescale hypervisor: 1. An ioctl interface for querying and managing partitions 2. A file interface to reading incoming doorbells 3. An interrupt handler for shutting down the partition upon receiving the shutdown doorbell from a manager partition 4. A kernel interface for receiving callbacks when a managed partition shuts down. Signed-off-by: Timur Tabi <timur@freescale.com> Acked-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
-rw-r--r--Documentation/ioctl/ioctl-number.txt1
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/Makefile3
-rw-r--r--drivers/virt/Kconfig32
-rw-r--r--drivers/virt/Makefile5
-rw-r--r--drivers/virt/fsl_hypervisor.c937
-rw-r--r--include/linux/Kbuild1
-rw-r--r--include/linux/fsl_hypervisor.h241
8 files changed, 1222 insertions, 0 deletions
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 3a46e360496d..72ba8d51dbc1 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -301,6 +301,7 @@ Code Seq#(hex) Include File Comments
301 <mailto:rusty@rustcorp.com.au> 301 <mailto:rusty@rustcorp.com.au>
3020xAE all linux/kvm.h Kernel-based Virtual Machine 3020xAE all linux/kvm.h Kernel-based Virtual Machine
303 <mailto:kvm@vger.kernel.org> 303 <mailto:kvm@vger.kernel.org>
3040xAF 00-1F linux/fsl_hypervisor.h Freescale hypervisor
3040xB0 all RATIO devices in development: 3050xB0 all RATIO devices in development:
305 <mailto:vgo@ratio.de> 306 <mailto:vgo@ratio.de>
3060xB1 00-1F PPPoX <mailto:mostrows@styx.uwaterloo.ca> 3070xB1 00-1F PPPoX <mailto:mostrows@styx.uwaterloo.ca>
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 3bb154d8c8cc..3c1d4a59a864 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -126,4 +126,6 @@ source "drivers/hwspinlock/Kconfig"
126 126
127source "drivers/clocksource/Kconfig" 127source "drivers/clocksource/Kconfig"
128 128
129source "drivers/virt/Kconfig"
130
129endmenu 131endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 09f3232bcdcd..cd546ebab9a7 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -122,3 +122,6 @@ obj-y += ieee802154/
122obj-y += clk/ 122obj-y += clk/
123 123
124obj-$(CONFIG_HWSPINLOCK) += hwspinlock/ 124obj-$(CONFIG_HWSPINLOCK) += hwspinlock/
125
126# Virtualization drivers
127obj-$(CONFIG_VIRT_DRIVERS) += virt/
diff --git a/drivers/virt/Kconfig b/drivers/virt/Kconfig
new file mode 100644
index 000000000000..2dcdbc9364d8
--- /dev/null
+++ b/drivers/virt/Kconfig
@@ -0,0 +1,32 @@
1#
2# Virtualization support drivers
3#
4
5menuconfig VIRT_DRIVERS
6 bool "Virtualization drivers"
7 ---help---
8 Say Y here to get to see options for device drivers that support
9 virtualization environments.
10
11 If you say N, all options in this submenu will be skipped and disabled.
12
13if VIRT_DRIVERS
14
15config FSL_HV_MANAGER
16 tristate "Freescale hypervisor management driver"
17 depends on FSL_SOC
18 help
19 The Freescale hypervisor management driver provides several services
20 to drivers and applications related to the Freescale hypervisor:
21
22 1) An ioctl interface for querying and managing partitions.
23
24 2) A file interface to reading incoming doorbells.
25
26 3) An interrupt handler for shutting down the partition upon
27 receiving the shutdown doorbell from a manager partition.
28
29 4) A kernel interface for receiving callbacks when a managed
30 partition shuts down.
31
32endif
diff --git a/drivers/virt/Makefile b/drivers/virt/Makefile
new file mode 100644
index 000000000000..c47f04dd343b
--- /dev/null
+++ b/drivers/virt/Makefile
@@ -0,0 +1,5 @@
1#
2# Makefile for drivers that support virtualization
3#
4
5obj-$(CONFIG_FSL_HV_MANAGER) += fsl_hypervisor.o
diff --git a/drivers/virt/fsl_hypervisor.c b/drivers/virt/fsl_hypervisor.c
new file mode 100644
index 000000000000..1d3b8ebb3141
--- /dev/null
+++ b/drivers/virt/fsl_hypervisor.c
@@ -0,0 +1,937 @@
1/*
2 * Freescale Hypervisor Management Driver
3
4 * Copyright (C) 2008-2011 Freescale Semiconductor, Inc.
5 * Author: Timur Tabi <timur@freescale.com>
6 *
7 * This file is licensed under the terms of the GNU General Public License
8 * version 2. This program is licensed "as is" without any warranty of any
9 * kind, whether express or implied.
10 *
11 * The Freescale hypervisor management driver provides several services to
12 * drivers and applications related to the Freescale hypervisor:
13 *
14 * 1. An ioctl interface for querying and managing partitions.
15 *
16 * 2. A file interface to reading incoming doorbells.
17 *
18 * 3. An interrupt handler for shutting down the partition upon receiving the
19 * shutdown doorbell from a manager partition.
20 *
21 * 4. A kernel interface for receiving callbacks when a managed partition
22 * shuts down.
23 */
24
25#include <linux/kernel.h>
26#include <linux/module.h>
27#include <linux/init.h>
28#include <linux/types.h>
29#include <linux/err.h>
30#include <linux/fs.h>
31#include <linux/miscdevice.h>
32#include <linux/mm.h>
33#include <linux/pagemap.h>
34#include <linux/slab.h>
35#include <linux/poll.h>
36#include <linux/of.h>
37#include <linux/reboot.h>
38#include <linux/uaccess.h>
39#include <linux/notifier.h>
40
41#include <linux/io.h>
42#include <asm/fsl_hcalls.h>
43
44#include <linux/fsl_hypervisor.h>
45
46static BLOCKING_NOTIFIER_HEAD(failover_subscribers);
47
48/*
49 * Ioctl interface for FSL_HV_IOCTL_PARTITION_RESTART
50 *
51 * Restart a running partition
52 */
53static long ioctl_restart(struct fsl_hv_ioctl_restart __user *p)
54{
55 struct fsl_hv_ioctl_restart param;
56
57 /* Get the parameters from the user */
58 if (copy_from_user(&param, p, sizeof(struct fsl_hv_ioctl_restart)))
59 return -EFAULT;
60
61 param.ret = fh_partition_restart(param.partition);
62
63 if (copy_to_user(&p->ret, &param.ret, sizeof(__u32)))
64 return -EFAULT;
65
66 return 0;
67}
68
69/*
70 * Ioctl interface for FSL_HV_IOCTL_PARTITION_STATUS
71 *
72 * Query the status of a partition
73 */
74static long ioctl_status(struct fsl_hv_ioctl_status __user *p)
75{
76 struct fsl_hv_ioctl_status param;
77 u32 status;
78
79 /* Get the parameters from the user */
80 if (copy_from_user(&param, p, sizeof(struct fsl_hv_ioctl_status)))
81 return -EFAULT;
82
83 param.ret = fh_partition_get_status(param.partition, &status);
84 if (!param.ret)
85 param.status = status;
86
87 if (copy_to_user(p, &param, sizeof(struct fsl_hv_ioctl_status)))
88 return -EFAULT;
89
90 return 0;
91}
92
93/*
94 * Ioctl interface for FSL_HV_IOCTL_PARTITION_START
95 *
96 * Start a stopped partition.
97 */
98static long ioctl_start(struct fsl_hv_ioctl_start __user *p)
99{
100 struct fsl_hv_ioctl_start param;
101
102 /* Get the parameters from the user */
103 if (copy_from_user(&param, p, sizeof(struct fsl_hv_ioctl_start)))
104 return -EFAULT;
105
106 param.ret = fh_partition_start(param.partition, param.entry_point,
107 param.load);
108
109 if (copy_to_user(&p->ret, &param.ret, sizeof(__u32)))
110 return -EFAULT;
111
112 return 0;
113}
114
115/*
116 * Ioctl interface for FSL_HV_IOCTL_PARTITION_STOP
117 *
118 * Stop a running partition
119 */
120static long ioctl_stop(struct fsl_hv_ioctl_stop __user *p)
121{
122 struct fsl_hv_ioctl_stop param;
123
124 /* Get the parameters from the user */
125 if (copy_from_user(&param, p, sizeof(struct fsl_hv_ioctl_stop)))
126 return -EFAULT;
127
128 param.ret = fh_partition_stop(param.partition);
129
130 if (copy_to_user(&p->ret, &param.ret, sizeof(__u32)))
131 return -EFAULT;
132
133 return 0;
134}
135
136/*
137 * Ioctl interface for FSL_HV_IOCTL_MEMCPY
138 *
139 * The FH_MEMCPY hypercall takes an array of address/address/size structures
140 * to represent the data being copied. As a convenience to the user, this
141 * ioctl takes a user-create buffer and a pointer to a guest physically
142 * contiguous buffer in the remote partition, and creates the
143 * address/address/size array for the hypercall.
144 */
145static long ioctl_memcpy(struct fsl_hv_ioctl_memcpy __user *p)
146{
147 struct fsl_hv_ioctl_memcpy param;
148
149 struct page **pages = NULL;
150 void *sg_list_unaligned = NULL;
151 struct fh_sg_list *sg_list = NULL;
152
153 unsigned int num_pages;
154 unsigned long lb_offset; /* Offset within a page of the local buffer */
155
156 unsigned int i;
157 long ret = 0;
158 int num_pinned; /* return value from get_user_pages() */
159 phys_addr_t remote_paddr; /* The next address in the remote buffer */
160 uint32_t count; /* The number of bytes left to copy */
161
162 /* Get the parameters from the user */
163 if (copy_from_user(&param, p, sizeof(struct fsl_hv_ioctl_memcpy)))
164 return -EFAULT;
165
166 /*
167 * One partition must be local, the other must be remote. In other
168 * words, if source and target are both -1, or are both not -1, then
169 * return an error.
170 */
171 if ((param.source == -1) == (param.target == -1))
172 return -EINVAL;
173
174 /*
175 * The array of pages returned by get_user_pages() covers only
176 * page-aligned memory. Since the user buffer is probably not
177 * page-aligned, we need to handle the discrepancy.
178 *
179 * We calculate the offset within a page of the S/G list, and make
180 * adjustments accordingly. This will result in a page list that looks
181 * like this:
182 *
183 * ---- <-- first page starts before the buffer
184 * | |
185 * |////|-> ----
186 * |////| | |
187 * ---- | |
188 * | |
189 * ---- | |
190 * |////| | |
191 * |////| | |
192 * |////| | |
193 * ---- | |
194 * | |
195 * ---- | |
196 * |////| | |
197 * |////| | |
198 * |////| | |
199 * ---- | |
200 * | |
201 * ---- | |
202 * |////| | |
203 * |////|-> ----
204 * | | <-- last page ends after the buffer
205 * ----
206 *
207 * The distance between the start of the first page and the start of the
208 * buffer is lb_offset. The hashed (///) areas are the parts of the
209 * page list that contain the actual buffer.
210 *
211 * The advantage of this approach is that the number of pages is
212 * equal to the number of entries in the S/G list that we give to the
213 * hypervisor.
214 */
215 lb_offset = param.local_vaddr & (PAGE_SIZE - 1);
216 num_pages = (param.count + lb_offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
217
218 /* Allocate the buffers we need */
219
220 /*
221 * 'pages' is an array of struct page pointers that's initialized by
222 * get_user_pages().
223 */
224 pages = kzalloc(num_pages * sizeof(struct page *), GFP_KERNEL);
225 if (!pages) {
226 pr_debug("fsl-hv: could not allocate page list\n");
227 return -ENOMEM;
228 }
229
230 /*
231 * sg_list is the list of fh_sg_list objects that we pass to the
232 * hypervisor.
233 */
234 sg_list_unaligned = kmalloc(num_pages * sizeof(struct fh_sg_list) +
235 sizeof(struct fh_sg_list) - 1, GFP_KERNEL);
236 if (!sg_list_unaligned) {
237 pr_debug("fsl-hv: could not allocate S/G list\n");
238 ret = -ENOMEM;
239 goto exit;
240 }
241 sg_list = PTR_ALIGN(sg_list_unaligned, sizeof(struct fh_sg_list));
242
243 /* Get the physical addresses of the source buffer */
244 down_read(&current->mm->mmap_sem);
245 num_pinned = get_user_pages(current, current->mm,
246 param.local_vaddr - lb_offset, num_pages,
247 (param.source == -1) ? READ : WRITE,
248 0, pages, NULL);
249 up_read(&current->mm->mmap_sem);
250
251 if (num_pinned != num_pages) {
252 /* get_user_pages() failed */
253 pr_debug("fsl-hv: could not lock source buffer\n");
254 ret = (num_pinned < 0) ? num_pinned : -EFAULT;
255 goto exit;
256 }
257
258 /*
259 * Build the fh_sg_list[] array. The first page is special
260 * because it's misaligned.
261 */
262 if (param.source == -1) {
263 sg_list[0].source = page_to_phys(pages[0]) + lb_offset;
264 sg_list[0].target = param.remote_paddr;
265 } else {
266 sg_list[0].source = param.remote_paddr;
267 sg_list[0].target = page_to_phys(pages[0]) + lb_offset;
268 }
269 sg_list[0].size = min_t(uint64_t, param.count, PAGE_SIZE - lb_offset);
270
271 remote_paddr = param.remote_paddr + sg_list[0].size;
272 count = param.count - sg_list[0].size;
273
274 for (i = 1; i < num_pages; i++) {
275 if (param.source == -1) {
276 /* local to remote */
277 sg_list[i].source = page_to_phys(pages[i]);
278 sg_list[i].target = remote_paddr;
279 } else {
280 /* remote to local */
281 sg_list[i].source = remote_paddr;
282 sg_list[i].target = page_to_phys(pages[i]);
283 }
284 sg_list[i].size = min_t(uint64_t, count, PAGE_SIZE);
285
286 remote_paddr += sg_list[i].size;
287 count -= sg_list[i].size;
288 }
289
290 param.ret = fh_partition_memcpy(param.source, param.target,
291 virt_to_phys(sg_list), num_pages);
292
293exit:
294 if (pages) {
295 for (i = 0; i < num_pages; i++)
296 if (pages[i])
297 put_page(pages[i]);
298 }
299
300 kfree(sg_list_unaligned);
301 kfree(pages);
302
303 if (!ret)
304 if (copy_to_user(&p->ret, &param.ret, sizeof(__u32)))
305 return -EFAULT;
306
307 return ret;
308}
309
310/*
311 * Ioctl interface for FSL_HV_IOCTL_DOORBELL
312 *
313 * Ring a doorbell
314 */
315static long ioctl_doorbell(struct fsl_hv_ioctl_doorbell __user *p)
316{
317 struct fsl_hv_ioctl_doorbell param;
318
319 /* Get the parameters from the user. */
320 if (copy_from_user(&param, p, sizeof(struct fsl_hv_ioctl_doorbell)))
321 return -EFAULT;
322
323 param.ret = ev_doorbell_send(param.doorbell);
324
325 if (copy_to_user(&p->ret, &param.ret, sizeof(__u32)))
326 return -EFAULT;
327
328 return 0;
329}
330
331static long ioctl_dtprop(struct fsl_hv_ioctl_prop __user *p, int set)
332{
333 struct fsl_hv_ioctl_prop param;
334 char __user *upath, *upropname;
335 void __user *upropval;
336 char *path = NULL, *propname = NULL;
337 void *propval = NULL;
338 int ret = 0;
339
340 /* Get the parameters from the user. */
341 if (copy_from_user(&param, p, sizeof(struct fsl_hv_ioctl_prop)))
342 return -EFAULT;
343
344 upath = (char __user *)(uintptr_t)param.path;
345 upropname = (char __user *)(uintptr_t)param.propname;
346 upropval = (void __user *)(uintptr_t)param.propval;
347
348 path = strndup_user(upath, FH_DTPROP_MAX_PATHLEN);
349 if (IS_ERR(path)) {
350 ret = PTR_ERR(path);
351 goto out;
352 }
353
354 propname = strndup_user(upropname, FH_DTPROP_MAX_PATHLEN);
355 if (IS_ERR(propname)) {
356 ret = PTR_ERR(propname);
357 goto out;
358 }
359
360 if (param.proplen > FH_DTPROP_MAX_PROPLEN) {
361 ret = -EINVAL;
362 goto out;
363 }
364
365 propval = kmalloc(param.proplen, GFP_KERNEL);
366 if (!propval) {
367 ret = -ENOMEM;
368 goto out;
369 }
370
371 if (set) {
372 if (copy_from_user(propval, upropval, param.proplen)) {
373 ret = -EFAULT;
374 goto out;
375 }
376
377 param.ret = fh_partition_set_dtprop(param.handle,
378 virt_to_phys(path),
379 virt_to_phys(propname),
380 virt_to_phys(propval),
381 param.proplen);
382 } else {
383 param.ret = fh_partition_get_dtprop(param.handle,
384 virt_to_phys(path),
385 virt_to_phys(propname),
386 virt_to_phys(propval),
387 &param.proplen);
388
389 if (param.ret == 0) {
390 if (copy_to_user(upropval, propval, param.proplen) ||
391 put_user(param.proplen, &p->proplen)) {
392 ret = -EFAULT;
393 goto out;
394 }
395 }
396 }
397
398 if (put_user(param.ret, &p->ret))
399 ret = -EFAULT;
400
401out:
402 kfree(path);
403 kfree(propval);
404 kfree(propname);
405
406 return ret;
407}
408
409/*
410 * Ioctl main entry point
411 */
412static long fsl_hv_ioctl(struct file *file, unsigned int cmd,
413 unsigned long argaddr)
414{
415 void __user *arg = (void __user *)argaddr;
416 long ret;
417
418 switch (cmd) {
419 case FSL_HV_IOCTL_PARTITION_RESTART:
420 ret = ioctl_restart(arg);
421 break;
422 case FSL_HV_IOCTL_PARTITION_GET_STATUS:
423 ret = ioctl_status(arg);
424 break;
425 case FSL_HV_IOCTL_PARTITION_START:
426 ret = ioctl_start(arg);
427 break;
428 case FSL_HV_IOCTL_PARTITION_STOP:
429 ret = ioctl_stop(arg);
430 break;
431 case FSL_HV_IOCTL_MEMCPY:
432 ret = ioctl_memcpy(arg);
433 break;
434 case FSL_HV_IOCTL_DOORBELL:
435 ret = ioctl_doorbell(arg);
436 break;
437 case FSL_HV_IOCTL_GETPROP:
438 ret = ioctl_dtprop(arg, 0);
439 break;
440 case FSL_HV_IOCTL_SETPROP:
441 ret = ioctl_dtprop(arg, 1);
442 break;
443 default:
444 pr_debug("fsl-hv: bad ioctl dir=%u type=%u cmd=%u size=%u\n",
445 _IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd),
446 _IOC_SIZE(cmd));
447 return -ENOTTY;
448 }
449
450 return ret;
451}
452
453/* Linked list of processes that have us open */
454static struct list_head db_list;
455
456/* spinlock for db_list */
457static DEFINE_SPINLOCK(db_list_lock);
458
459/* The size of the doorbell event queue. This must be a power of two. */
460#define QSIZE 16
461
462/* Returns the next head/tail pointer, wrapping around the queue if necessary */
463#define nextp(x) (((x) + 1) & (QSIZE - 1))
464
465/* Per-open data structure */
466struct doorbell_queue {
467 struct list_head list;
468 spinlock_t lock;
469 wait_queue_head_t wait;
470 unsigned int head;
471 unsigned int tail;
472 uint32_t q[QSIZE];
473};
474
475/* Linked list of ISRs that we registered */
476struct list_head isr_list;
477
478/* Per-ISR data structure */
479struct doorbell_isr {
480 struct list_head list;
481 unsigned int irq;
482 uint32_t doorbell; /* The doorbell handle */
483 uint32_t partition; /* The partition handle, if used */
484};
485
486/*
487 * Add a doorbell to all of the doorbell queues
488 */
489static void fsl_hv_queue_doorbell(uint32_t doorbell)
490{
491 struct doorbell_queue *dbq;
492 unsigned long flags;
493
494 /* Prevent another core from modifying db_list */
495 spin_lock_irqsave(&db_list_lock, flags);
496
497 list_for_each_entry(dbq, &db_list, list) {
498 if (dbq->head != nextp(dbq->tail)) {
499 dbq->q[dbq->tail] = doorbell;
500 /*
501 * This memory barrier eliminates the need to grab
502 * the spinlock for dbq.
503 */
504 smp_wmb();
505 dbq->tail = nextp(dbq->tail);
506 wake_up_interruptible(&dbq->wait);
507 }
508 }
509
510 spin_unlock_irqrestore(&db_list_lock, flags);
511}
512
513/*
514 * Interrupt handler for all doorbells
515 *
516 * We use the same interrupt handler for all doorbells. Whenever a doorbell
517 * is rung, and we receive an interrupt, we just put the handle for that
518 * doorbell (passed to us as *data) into all of the queues.
519 */
520static irqreturn_t fsl_hv_isr(int irq, void *data)
521{
522 fsl_hv_queue_doorbell((uintptr_t) data);
523
524 return IRQ_HANDLED;
525}
526
527/*
528 * State change thread function
529 *
530 * The state change notification arrives in an interrupt, but we can't call
531 * blocking_notifier_call_chain() in an interrupt handler. We could call
532 * atomic_notifier_call_chain(), but that would require the clients' call-back
533 * function to run in interrupt context. Since we don't want to impose that
534 * restriction on the clients, we use a threaded IRQ to process the
535 * notification in kernel context.
536 */
537static irqreturn_t fsl_hv_state_change_thread(int irq, void *data)
538{
539 struct doorbell_isr *dbisr = data;
540
541 blocking_notifier_call_chain(&failover_subscribers, dbisr->partition,
542 NULL);
543
544 return IRQ_HANDLED;
545}
546
547/*
548 * Interrupt handler for state-change doorbells
549 */
550static irqreturn_t fsl_hv_state_change_isr(int irq, void *data)
551{
552 unsigned int status;
553 struct doorbell_isr *dbisr = data;
554 int ret;
555
556 /* It's still a doorbell, so add it to all the queues. */
557 fsl_hv_queue_doorbell(dbisr->doorbell);
558
559 /* Determine the new state, and if it's stopped, notify the clients. */
560 ret = fh_partition_get_status(dbisr->partition, &status);
561 if (!ret && (status == FH_PARTITION_STOPPED))
562 return IRQ_WAKE_THREAD;
563
564 return IRQ_HANDLED;
565}
566
567/*
568 * Returns a bitmask indicating whether a read will block
569 */
570static unsigned int fsl_hv_poll(struct file *filp, struct poll_table_struct *p)
571{
572 struct doorbell_queue *dbq = filp->private_data;
573 unsigned long flags;
574 unsigned int mask;
575
576 spin_lock_irqsave(&dbq->lock, flags);
577
578 poll_wait(filp, &dbq->wait, p);
579 mask = (dbq->head == dbq->tail) ? 0 : (POLLIN | POLLRDNORM);
580
581 spin_unlock_irqrestore(&dbq->lock, flags);
582
583 return mask;
584}
585
586/*
587 * Return the handles for any incoming doorbells
588 *
589 * If there are doorbell handles in the queue for this open instance, then
590 * return them to the caller as an array of 32-bit integers. Otherwise,
591 * block until there is at least one handle to return.
592 */
593static ssize_t fsl_hv_read(struct file *filp, char __user *buf, size_t len,
594 loff_t *off)
595{
596 struct doorbell_queue *dbq = filp->private_data;
597 uint32_t __user *p = (uint32_t __user *) buf; /* for put_user() */
598 unsigned long flags;
599 ssize_t count = 0;
600
601 /* Make sure we stop when the user buffer is full. */
602 while (len >= sizeof(uint32_t)) {
603 uint32_t dbell; /* Local copy of doorbell queue data */
604
605 spin_lock_irqsave(&dbq->lock, flags);
606
607 /*
608 * If the queue is empty, then either we're done or we need
609 * to block. If the application specified O_NONBLOCK, then
610 * we return the appropriate error code.
611 */
612 if (dbq->head == dbq->tail) {
613 spin_unlock_irqrestore(&dbq->lock, flags);
614 if (count)
615 break;
616 if (filp->f_flags & O_NONBLOCK)
617 return -EAGAIN;
618 if (wait_event_interruptible(dbq->wait,
619 dbq->head != dbq->tail))
620 return -ERESTARTSYS;
621 continue;
622 }
623
624 /*
625 * Even though we have an smp_wmb() in the ISR, the core
626 * might speculatively execute the "dbell = ..." below while
627 * it's evaluating the if-statement above. In that case, the
628 * value put into dbell could be stale if the core accepts the
629 * speculation. To prevent that, we need a read memory barrier
630 * here as well.
631 */
632 smp_rmb();
633
634 /* Copy the data to a temporary local buffer, because
635 * we can't call copy_to_user() from inside a spinlock
636 */
637 dbell = dbq->q[dbq->head];
638 dbq->head = nextp(dbq->head);
639
640 spin_unlock_irqrestore(&dbq->lock, flags);
641
642 if (put_user(dbell, p))
643 return -EFAULT;
644 p++;
645 count += sizeof(uint32_t);
646 len -= sizeof(uint32_t);
647 }
648
649 return count;
650}
651
652/*
653 * Open the driver and prepare for reading doorbells.
654 *
655 * Every time an application opens the driver, we create a doorbell queue
656 * for that file handle. This queue is used for any incoming doorbells.
657 */
658static int fsl_hv_open(struct inode *inode, struct file *filp)
659{
660 struct doorbell_queue *dbq;
661 unsigned long flags;
662 int ret = 0;
663
664 dbq = kzalloc(sizeof(struct doorbell_queue), GFP_KERNEL);
665 if (!dbq) {
666 pr_err("fsl-hv: out of memory\n");
667 return -ENOMEM;
668 }
669
670 spin_lock_init(&dbq->lock);
671 init_waitqueue_head(&dbq->wait);
672
673 spin_lock_irqsave(&db_list_lock, flags);
674 list_add(&dbq->list, &db_list);
675 spin_unlock_irqrestore(&db_list_lock, flags);
676
677 filp->private_data = dbq;
678
679 return ret;
680}
681
682/*
683 * Close the driver
684 */
685static int fsl_hv_close(struct inode *inode, struct file *filp)
686{
687 struct doorbell_queue *dbq = filp->private_data;
688 unsigned long flags;
689
690 int ret = 0;
691
692 spin_lock_irqsave(&db_list_lock, flags);
693 list_del(&dbq->list);
694 spin_unlock_irqrestore(&db_list_lock, flags);
695
696 kfree(dbq);
697
698 return ret;
699}
700
701static const struct file_operations fsl_hv_fops = {
702 .owner = THIS_MODULE,
703 .open = fsl_hv_open,
704 .release = fsl_hv_close,
705 .poll = fsl_hv_poll,
706 .read = fsl_hv_read,
707 .unlocked_ioctl = fsl_hv_ioctl,
708};
709
710static struct miscdevice fsl_hv_misc_dev = {
711 MISC_DYNAMIC_MINOR,
712 "fsl-hv",
713 &fsl_hv_fops
714};
715
716static irqreturn_t fsl_hv_shutdown_isr(int irq, void *data)
717{
718 orderly_poweroff(false);
719
720 return IRQ_HANDLED;
721}
722
723/*
724 * Returns the handle of the parent of the given node
725 *
726 * The handle is the value of the 'hv-handle' property
727 */
728static int get_parent_handle(struct device_node *np)
729{
730 struct device_node *parent;
731 const uint32_t *prop;
732 uint32_t handle;
733 int len;
734
735 parent = of_get_parent(np);
736 if (!parent)
737 /* It's not really possible for this to fail */
738 return -ENODEV;
739
740 /*
741 * The proper name for the handle property is "hv-handle", but some
742 * older versions of the hypervisor used "reg".
743 */
744 prop = of_get_property(parent, "hv-handle", &len);
745 if (!prop)
746 prop = of_get_property(parent, "reg", &len);
747
748 if (!prop || (len != sizeof(uint32_t))) {
749 /* This can happen only if the node is malformed */
750 of_node_put(parent);
751 return -ENODEV;
752 }
753
754 handle = be32_to_cpup(prop);
755 of_node_put(parent);
756
757 return handle;
758}
759
760/*
761 * Register a callback for failover events
762 *
763 * This function is called by device drivers to register their callback
764 * functions for fail-over events.
765 */
766int fsl_hv_failover_register(struct notifier_block *nb)
767{
768 return blocking_notifier_chain_register(&failover_subscribers, nb);
769}
770EXPORT_SYMBOL(fsl_hv_failover_register);
771
772/*
773 * Unregister a callback for failover events
774 */
775int fsl_hv_failover_unregister(struct notifier_block *nb)
776{
777 return blocking_notifier_chain_unregister(&failover_subscribers, nb);
778}
779EXPORT_SYMBOL(fsl_hv_failover_unregister);
780
781/*
782 * Return TRUE if we're running under FSL hypervisor
783 *
784 * This function checks to see if we're running under the Freescale
785 * hypervisor, and returns zero if we're not, or non-zero if we are.
786 *
787 * First, it checks if MSR[GS]==1, which means we're running under some
788 * hypervisor. Then it checks if there is a hypervisor node in the device
789 * tree. Currently, that means there needs to be a node in the root called
790 * "hypervisor" and which has a property named "fsl,hv-version".
791 */
792static int has_fsl_hypervisor(void)
793{
794 struct device_node *node;
795 int ret;
796
797 if (!(mfmsr() & MSR_GS))
798 return 0;
799
800 node = of_find_node_by_path("/hypervisor");
801 if (!node)
802 return 0;
803
804 ret = of_find_property(node, "fsl,hv-version", NULL) != NULL;
805
806 of_node_put(node);
807
808 return ret;
809}
810
811/*
812 * Freescale hypervisor management driver init
813 *
814 * This function is called when this module is loaded.
815 *
816 * Register ourselves as a miscellaneous driver. This will register the
817 * fops structure and create the right sysfs entries for udev.
818 */
819static int __init fsl_hypervisor_init(void)
820{
821 struct device_node *np;
822 struct doorbell_isr *dbisr, *n;
823 int ret;
824
825 pr_info("Freescale hypervisor management driver\n");
826
827 if (!has_fsl_hypervisor()) {
828 pr_info("fsl-hv: no hypervisor found\n");
829 return -ENODEV;
830 }
831
832 ret = misc_register(&fsl_hv_misc_dev);
833 if (ret) {
834 pr_err("fsl-hv: cannot register device\n");
835 return ret;
836 }
837
838 INIT_LIST_HEAD(&db_list);
839 INIT_LIST_HEAD(&isr_list);
840
841 for_each_compatible_node(np, NULL, "epapr,hv-receive-doorbell") {
842 unsigned int irq;
843 const uint32_t *handle;
844
845 handle = of_get_property(np, "interrupts", NULL);
846 irq = irq_of_parse_and_map(np, 0);
847 if (!handle || (irq == NO_IRQ)) {
848 pr_err("fsl-hv: no 'interrupts' property in %s node\n",
849 np->full_name);
850 continue;
851 }
852
853 dbisr = kzalloc(sizeof(*dbisr), GFP_KERNEL);
854 if (!dbisr)
855 goto out_of_memory;
856
857 dbisr->irq = irq;
858 dbisr->doorbell = be32_to_cpup(handle);
859
860 if (of_device_is_compatible(np, "fsl,hv-shutdown-doorbell")) {
861 /* The shutdown doorbell gets its own ISR */
862 ret = request_irq(irq, fsl_hv_shutdown_isr, 0,
863 np->name, NULL);
864 } else if (of_device_is_compatible(np,
865 "fsl,hv-state-change-doorbell")) {
866 /*
867 * The state change doorbell triggers a notification if
868 * the state of the managed partition changes to
869 * "stopped". We need a separate interrupt handler for
870 * that, and we also need to know the handle of the
871 * target partition, not just the handle of the
872 * doorbell.
873 */
874 dbisr->partition = ret = get_parent_handle(np);
875 if (ret < 0) {
876 pr_err("fsl-hv: node %s has missing or "
877 "malformed parent\n", np->full_name);
878 kfree(dbisr);
879 continue;
880 }
881 ret = request_threaded_irq(irq, fsl_hv_state_change_isr,
882 fsl_hv_state_change_thread,
883 0, np->name, dbisr);
884 } else
885 ret = request_irq(irq, fsl_hv_isr, 0, np->name, dbisr);
886
887 if (ret < 0) {
888 pr_err("fsl-hv: could not request irq %u for node %s\n",
889 irq, np->full_name);
890 kfree(dbisr);
891 continue;
892 }
893
894 list_add(&dbisr->list, &isr_list);
895
896 pr_info("fsl-hv: registered handler for doorbell %u\n",
897 dbisr->doorbell);
898 }
899
900 return 0;
901
902out_of_memory:
903 list_for_each_entry_safe(dbisr, n, &isr_list, list) {
904 free_irq(dbisr->irq, dbisr);
905 list_del(&dbisr->list);
906 kfree(dbisr);
907 }
908
909 misc_deregister(&fsl_hv_misc_dev);
910
911 return -ENOMEM;
912}
913
914/*
915 * Freescale hypervisor management driver termination
916 *
917 * This function is called when this driver is unloaded.
918 */
919static void __exit fsl_hypervisor_exit(void)
920{
921 struct doorbell_isr *dbisr, *n;
922
923 list_for_each_entry_safe(dbisr, n, &isr_list, list) {
924 free_irq(dbisr->irq, dbisr);
925 list_del(&dbisr->list);
926 kfree(dbisr);
927 }
928
929 misc_deregister(&fsl_hv_misc_dev);
930}
931
932module_init(fsl_hypervisor_init);
933module_exit(fsl_hypervisor_exit);
934
935MODULE_AUTHOR("Timur Tabi <timur@freescale.com>");
936MODULE_DESCRIPTION("Freescale hypervisor management driver");
937MODULE_LICENSE("GPL v2");
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 01f636275057..619b5657af77 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -135,6 +135,7 @@ header-y += firewire-cdev.h
135header-y += firewire-constants.h 135header-y += firewire-constants.h
136header-y += flat.h 136header-y += flat.h
137header-y += fs.h 137header-y += fs.h
138header-y += fsl_hypervisor.h
138header-y += fuse.h 139header-y += fuse.h
139header-y += futex.h 140header-y += futex.h
140header-y += gameport.h 141header-y += gameport.h
diff --git a/include/linux/fsl_hypervisor.h b/include/linux/fsl_hypervisor.h
new file mode 100644
index 000000000000..1cebaeeeef57
--- /dev/null
+++ b/include/linux/fsl_hypervisor.h
@@ -0,0 +1,241 @@
1/*
2 * Freescale hypervisor ioctl and kernel interface
3 *
4 * Copyright (C) 2008-2011 Freescale Semiconductor, Inc.
5 * Author: Timur Tabi <timur@freescale.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 * * Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * * Neither the name of Freescale Semiconductor nor the
15 * names of its contributors may be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 *
19 * ALTERNATIVELY, this software may be distributed under the terms of the
20 * GNU General Public License ("GPL") as published by the Free Software
21 * Foundation, either version 2 of that License or (at your option) any
22 * later version.
23 *
24 * This software is provided by Freescale Semiconductor "as is" and any
25 * express or implied warranties, including, but not limited to, the implied
26 * warranties of merchantability and fitness for a particular purpose are
27 * disclaimed. In no event shall Freescale Semiconductor be liable for any
28 * direct, indirect, incidental, special, exemplary, or consequential damages
29 * (including, but not limited to, procurement of substitute goods or services;
30 * loss of use, data, or profits; or business interruption) however caused and
31 * on any theory of liability, whether in contract, strict liability, or tort
32 * (including negligence or otherwise) arising in any way out of the use of this
33 * software, even if advised of the possibility of such damage.
34 *
35 * This file is used by the Freescale hypervisor management driver. It can
36 * also be included by applications that need to communicate with the driver
37 * via the ioctl interface.
38 */
39
40#ifndef FSL_HYPERVISOR_H
41#define FSL_HYPERVISOR_H
42
43#include <linux/types.h>
44
45/**
46 * struct fsl_hv_ioctl_restart - restart a partition
47 * @ret: return error code from the hypervisor
48 * @partition: the ID of the partition to restart, or -1 for the
49 * calling partition
50 *
51 * Used by FSL_HV_IOCTL_PARTITION_RESTART
52 */
53struct fsl_hv_ioctl_restart {
54 __u32 ret;
55 __u32 partition;
56};
57
58/**
59 * struct fsl_hv_ioctl_status - get a partition's status
60 * @ret: return error code from the hypervisor
61 * @partition: the ID of the partition to query, or -1 for the
62 * calling partition
63 * @status: The returned status of the partition
64 *
65 * Used by FSL_HV_IOCTL_PARTITION_GET_STATUS
66 *
67 * Values of 'status':
68 * 0 = Stopped
69 * 1 = Running
70 * 2 = Starting
71 * 3 = Stopping
72 */
73struct fsl_hv_ioctl_status {
74 __u32 ret;
75 __u32 partition;
76 __u32 status;
77};
78
79/**
80 * struct fsl_hv_ioctl_start - start a partition
81 * @ret: return error code from the hypervisor
82 * @partition: the ID of the partition to control
83 * @entry_point: The offset within the guest IMA to start execution
84 * @load: If non-zero, reload the partition's images before starting
85 *
86 * Used by FSL_HV_IOCTL_PARTITION_START
87 */
88struct fsl_hv_ioctl_start {
89 __u32 ret;
90 __u32 partition;
91 __u32 entry_point;
92 __u32 load;
93};
94
95/**
96 * struct fsl_hv_ioctl_stop - stop a partition
97 * @ret: return error code from the hypervisor
98 * @partition: the ID of the partition to stop, or -1 for the calling
99 * partition
100 *
101 * Used by FSL_HV_IOCTL_PARTITION_STOP
102 */
103struct fsl_hv_ioctl_stop {
104 __u32 ret;
105 __u32 partition;
106};
107
108/**
109 * struct fsl_hv_ioctl_memcpy - copy memory between partitions
110 * @ret: return error code from the hypervisor
111 * @source: the partition ID of the source partition, or -1 for this
112 * partition
113 * @target: the partition ID of the target partition, or -1 for this
114 * partition
115 * @reserved: reserved, must be set to 0
116 * @local_addr: user-space virtual address of a buffer in the local
117 * partition
118 * @remote_addr: guest physical address of a buffer in the
119 * remote partition
120 * @count: the number of bytes to copy. Both the local and remote
121 * buffers must be at least 'count' bytes long
122 *
123 * Used by FSL_HV_IOCTL_MEMCPY
124 *
125 * The 'local' partition is the partition that calls this ioctl. The
126 * 'remote' partition is a different partition. The data is copied from
127 * the 'source' paritition' to the 'target' partition.
128 *
129 * The buffer in the remote partition must be guest physically
130 * contiguous.
131 *
132 * This ioctl does not support copying memory between two remote
133 * partitions or within the same partition, so either 'source' or
134 * 'target' (but not both) must be -1. In other words, either
135 *
136 * source == local and target == remote
137 * or
138 * source == remote and target == local
139 */
140struct fsl_hv_ioctl_memcpy {
141 __u32 ret;
142 __u32 source;
143 __u32 target;
144 __u32 reserved; /* padding to ensure local_vaddr is aligned */
145 __u64 local_vaddr;
146 __u64 remote_paddr;
147 __u64 count;
148};
149
150/**
151 * struct fsl_hv_ioctl_doorbell - ring a doorbell
152 * @ret: return error code from the hypervisor
153 * @doorbell: the handle of the doorbell to ring doorbell
154 *
155 * Used by FSL_HV_IOCTL_DOORBELL
156 */
157struct fsl_hv_ioctl_doorbell {
158 __u32 ret;
159 __u32 doorbell;
160};
161
162/**
163 * struct fsl_hv_ioctl_prop - get/set a device tree property
164 * @ret: return error code from the hypervisor
165 * @handle: handle of partition whose tree to access
166 * @path: virtual address of path name of node to access
167 * @propname: virtual address of name of property to access
168 * @propval: virtual address of property data buffer
169 * @proplen: Size of property data buffer
170 * @reserved: reserved, must be set to 0
171 *
172 * Used by FSL_HV_IOCTL_DOORBELL
173 */
174struct fsl_hv_ioctl_prop {
175 __u32 ret;
176 __u32 handle;
177 __u64 path;
178 __u64 propname;
179 __u64 propval;
180 __u32 proplen;
181 __u32 reserved; /* padding to ensure structure is aligned */
182};
183
184/* The ioctl type, documented in ioctl-number.txt */
185#define FSL_HV_IOCTL_TYPE 0xAF
186
187/* Restart another partition */
188#define FSL_HV_IOCTL_PARTITION_RESTART \
189 _IOWR(FSL_HV_IOCTL_TYPE, 1, struct fsl_hv_ioctl_restart)
190
191/* Get a partition's status */
192#define FSL_HV_IOCTL_PARTITION_GET_STATUS \
193 _IOWR(FSL_HV_IOCTL_TYPE, 2, struct fsl_hv_ioctl_status)
194
195/* Boot another partition */
196#define FSL_HV_IOCTL_PARTITION_START \
197 _IOWR(FSL_HV_IOCTL_TYPE, 3, struct fsl_hv_ioctl_start)
198
199/* Stop this or another partition */
200#define FSL_HV_IOCTL_PARTITION_STOP \
201 _IOWR(FSL_HV_IOCTL_TYPE, 4, struct fsl_hv_ioctl_stop)
202
203/* Copy data from one partition to another */
204#define FSL_HV_IOCTL_MEMCPY \
205 _IOWR(FSL_HV_IOCTL_TYPE, 5, struct fsl_hv_ioctl_memcpy)
206
207/* Ring a doorbell */
208#define FSL_HV_IOCTL_DOORBELL \
209 _IOWR(FSL_HV_IOCTL_TYPE, 6, struct fsl_hv_ioctl_doorbell)
210
211/* Get a property from another guest's device tree */
212#define FSL_HV_IOCTL_GETPROP \
213 _IOWR(FSL_HV_IOCTL_TYPE, 7, struct fsl_hv_ioctl_prop)
214
215/* Set a property in another guest's device tree */
216#define FSL_HV_IOCTL_SETPROP \
217 _IOWR(FSL_HV_IOCTL_TYPE, 8, struct fsl_hv_ioctl_prop)
218
219#ifdef __KERNEL__
220
221/**
222 * fsl_hv_event_register() - register a callback for failover events
223 * @nb: pointer to caller-supplied notifier_block structure
224 *
225 * This function is called by device drivers to register their callback
226 * functions for fail-over events.
227 *
228 * The caller should allocate a notifier_block object and initialize the
229 * 'priority' and 'notifier_call' fields.
230 */
231int fsl_hv_failover_register(struct notifier_block *nb);
232
233/**
234 * fsl_hv_event_unregister() - unregister a callback for failover events
235 * @nb: the same 'nb' used in previous fsl_hv_failover_register call
236 */
237int fsl_hv_failover_unregister(struct notifier_block *nb);
238
239#endif
240
241#endif