aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorTimur Tabi <timur@freescale.com>2011-06-09 16:52:06 -0400
committerKumar Gala <galak@kernel.crashing.org>2011-07-08 01:21:27 -0400
commit6db7199407ca56f55bc0832fb124e1ad216ea57b (patch)
tree62321e519dd0f74e24de02492fdd79c51173e08a /drivers
parent8dbb6bc13617379a6534700e51634a3f88c9a513 (diff)
drivers/virt: introduce Freescale hypervisor management driver
Add the drivers/virt directory, which houses drivers that support virtualization environments, and add the Freescale hypervisor management driver. The Freescale hypervisor management driver provides several services to drivers and applications related to the Freescale hypervisor: 1. An ioctl interface for querying and managing partitions 2. A file interface to reading incoming doorbells 3. An interrupt handler for shutting down the partition upon receiving the shutdown doorbell from a manager partition 4. A kernel interface for receiving callbacks when a managed partition shuts down. Signed-off-by: Timur Tabi <timur@freescale.com> Acked-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/Makefile3
-rw-r--r--drivers/virt/Kconfig32
-rw-r--r--drivers/virt/Makefile5
-rw-r--r--drivers/virt/fsl_hypervisor.c937
5 files changed, 979 insertions, 0 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 3bb154d8c8cc..3c1d4a59a864 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -126,4 +126,6 @@ source "drivers/hwspinlock/Kconfig"
126 126
127source "drivers/clocksource/Kconfig" 127source "drivers/clocksource/Kconfig"
128 128
129source "drivers/virt/Kconfig"
130
129endmenu 131endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 09f3232bcdcd..cd546ebab9a7 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -122,3 +122,6 @@ obj-y += ieee802154/
122obj-y += clk/ 122obj-y += clk/
123 123
124obj-$(CONFIG_HWSPINLOCK) += hwspinlock/ 124obj-$(CONFIG_HWSPINLOCK) += hwspinlock/
125
126# Virtualization drivers
127obj-$(CONFIG_VIRT_DRIVERS) += virt/
diff --git a/drivers/virt/Kconfig b/drivers/virt/Kconfig
new file mode 100644
index 000000000000..2dcdbc9364d8
--- /dev/null
+++ b/drivers/virt/Kconfig
@@ -0,0 +1,32 @@
1#
2# Virtualization support drivers
3#
4
5menuconfig VIRT_DRIVERS
6 bool "Virtualization drivers"
7 ---help---
8 Say Y here to get to see options for device drivers that support
9 virtualization environments.
10
11 If you say N, all options in this submenu will be skipped and disabled.
12
13if VIRT_DRIVERS
14
15config FSL_HV_MANAGER
16 tristate "Freescale hypervisor management driver"
17 depends on FSL_SOC
18 help
19 The Freescale hypervisor management driver provides several services
20 to drivers and applications related to the Freescale hypervisor:
21
22 1) An ioctl interface for querying and managing partitions.
23
24 2) A file interface to reading incoming doorbells.
25
26 3) An interrupt handler for shutting down the partition upon
27 receiving the shutdown doorbell from a manager partition.
28
29 4) A kernel interface for receiving callbacks when a managed
30 partition shuts down.
31
32endif
diff --git a/drivers/virt/Makefile b/drivers/virt/Makefile
new file mode 100644
index 000000000000..c47f04dd343b
--- /dev/null
+++ b/drivers/virt/Makefile
@@ -0,0 +1,5 @@
1#
2# Makefile for drivers that support virtualization
3#
4
5obj-$(CONFIG_FSL_HV_MANAGER) += fsl_hypervisor.o
diff --git a/drivers/virt/fsl_hypervisor.c b/drivers/virt/fsl_hypervisor.c
new file mode 100644
index 000000000000..1d3b8ebb3141
--- /dev/null
+++ b/drivers/virt/fsl_hypervisor.c
@@ -0,0 +1,937 @@
1/*
2 * Freescale Hypervisor Management Driver
3
4 * Copyright (C) 2008-2011 Freescale Semiconductor, Inc.
5 * Author: Timur Tabi <timur@freescale.com>
6 *
7 * This file is licensed under the terms of the GNU General Public License
8 * version 2. This program is licensed "as is" without any warranty of any
9 * kind, whether express or implied.
10 *
11 * The Freescale hypervisor management driver provides several services to
12 * drivers and applications related to the Freescale hypervisor:
13 *
14 * 1. An ioctl interface for querying and managing partitions.
15 *
16 * 2. A file interface to reading incoming doorbells.
17 *
18 * 3. An interrupt handler for shutting down the partition upon receiving the
19 * shutdown doorbell from a manager partition.
20 *
21 * 4. A kernel interface for receiving callbacks when a managed partition
22 * shuts down.
23 */
24
25#include <linux/kernel.h>
26#include <linux/module.h>
27#include <linux/init.h>
28#include <linux/types.h>
29#include <linux/err.h>
30#include <linux/fs.h>
31#include <linux/miscdevice.h>
32#include <linux/mm.h>
33#include <linux/pagemap.h>
34#include <linux/slab.h>
35#include <linux/poll.h>
36#include <linux/of.h>
37#include <linux/reboot.h>
38#include <linux/uaccess.h>
39#include <linux/notifier.h>
40
41#include <linux/io.h>
42#include <asm/fsl_hcalls.h>
43
44#include <linux/fsl_hypervisor.h>
45
46static BLOCKING_NOTIFIER_HEAD(failover_subscribers);
47
48/*
49 * Ioctl interface for FSL_HV_IOCTL_PARTITION_RESTART
50 *
51 * Restart a running partition
52 */
53static long ioctl_restart(struct fsl_hv_ioctl_restart __user *p)
54{
55 struct fsl_hv_ioctl_restart param;
56
57 /* Get the parameters from the user */
58 if (copy_from_user(&param, p, sizeof(struct fsl_hv_ioctl_restart)))
59 return -EFAULT;
60
61 param.ret = fh_partition_restart(param.partition);
62
63 if (copy_to_user(&p->ret, &param.ret, sizeof(__u32)))
64 return -EFAULT;
65
66 return 0;
67}
68
69/*
70 * Ioctl interface for FSL_HV_IOCTL_PARTITION_STATUS
71 *
72 * Query the status of a partition
73 */
74static long ioctl_status(struct fsl_hv_ioctl_status __user *p)
75{
76 struct fsl_hv_ioctl_status param;
77 u32 status;
78
79 /* Get the parameters from the user */
80 if (copy_from_user(&param, p, sizeof(struct fsl_hv_ioctl_status)))
81 return -EFAULT;
82
83 param.ret = fh_partition_get_status(param.partition, &status);
84 if (!param.ret)
85 param.status = status;
86
87 if (copy_to_user(p, &param, sizeof(struct fsl_hv_ioctl_status)))
88 return -EFAULT;
89
90 return 0;
91}
92
93/*
94 * Ioctl interface for FSL_HV_IOCTL_PARTITION_START
95 *
96 * Start a stopped partition.
97 */
98static long ioctl_start(struct fsl_hv_ioctl_start __user *p)
99{
100 struct fsl_hv_ioctl_start param;
101
102 /* Get the parameters from the user */
103 if (copy_from_user(&param, p, sizeof(struct fsl_hv_ioctl_start)))
104 return -EFAULT;
105
106 param.ret = fh_partition_start(param.partition, param.entry_point,
107 param.load);
108
109 if (copy_to_user(&p->ret, &param.ret, sizeof(__u32)))
110 return -EFAULT;
111
112 return 0;
113}
114
115/*
116 * Ioctl interface for FSL_HV_IOCTL_PARTITION_STOP
117 *
118 * Stop a running partition
119 */
120static long ioctl_stop(struct fsl_hv_ioctl_stop __user *p)
121{
122 struct fsl_hv_ioctl_stop param;
123
124 /* Get the parameters from the user */
125 if (copy_from_user(&param, p, sizeof(struct fsl_hv_ioctl_stop)))
126 return -EFAULT;
127
128 param.ret = fh_partition_stop(param.partition);
129
130 if (copy_to_user(&p->ret, &param.ret, sizeof(__u32)))
131 return -EFAULT;
132
133 return 0;
134}
135
136/*
137 * Ioctl interface for FSL_HV_IOCTL_MEMCPY
138 *
139 * The FH_MEMCPY hypercall takes an array of address/address/size structures
140 * to represent the data being copied. As a convenience to the user, this
141 * ioctl takes a user-create buffer and a pointer to a guest physically
142 * contiguous buffer in the remote partition, and creates the
143 * address/address/size array for the hypercall.
144 */
145static long ioctl_memcpy(struct fsl_hv_ioctl_memcpy __user *p)
146{
147 struct fsl_hv_ioctl_memcpy param;
148
149 struct page **pages = NULL;
150 void *sg_list_unaligned = NULL;
151 struct fh_sg_list *sg_list = NULL;
152
153 unsigned int num_pages;
154 unsigned long lb_offset; /* Offset within a page of the local buffer */
155
156 unsigned int i;
157 long ret = 0;
158 int num_pinned; /* return value from get_user_pages() */
159 phys_addr_t remote_paddr; /* The next address in the remote buffer */
160 uint32_t count; /* The number of bytes left to copy */
161
162 /* Get the parameters from the user */
163 if (copy_from_user(&param, p, sizeof(struct fsl_hv_ioctl_memcpy)))
164 return -EFAULT;
165
166 /*
167 * One partition must be local, the other must be remote. In other
168 * words, if source and target are both -1, or are both not -1, then
169 * return an error.
170 */
171 if ((param.source == -1) == (param.target == -1))
172 return -EINVAL;
173
174 /*
175 * The array of pages returned by get_user_pages() covers only
176 * page-aligned memory. Since the user buffer is probably not
177 * page-aligned, we need to handle the discrepancy.
178 *
179 * We calculate the offset within a page of the S/G list, and make
180 * adjustments accordingly. This will result in a page list that looks
181 * like this:
182 *
183 * ---- <-- first page starts before the buffer
184 * | |
185 * |////|-> ----
186 * |////| | |
187 * ---- | |
188 * | |
189 * ---- | |
190 * |////| | |
191 * |////| | |
192 * |////| | |
193 * ---- | |
194 * | |
195 * ---- | |
196 * |////| | |
197 * |////| | |
198 * |////| | |
199 * ---- | |
200 * | |
201 * ---- | |
202 * |////| | |
203 * |////|-> ----
204 * | | <-- last page ends after the buffer
205 * ----
206 *
207 * The distance between the start of the first page and the start of the
208 * buffer is lb_offset. The hashed (///) areas are the parts of the
209 * page list that contain the actual buffer.
210 *
211 * The advantage of this approach is that the number of pages is
212 * equal to the number of entries in the S/G list that we give to the
213 * hypervisor.
214 */
215 lb_offset = param.local_vaddr & (PAGE_SIZE - 1);
216 num_pages = (param.count + lb_offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
217
218 /* Allocate the buffers we need */
219
220 /*
221 * 'pages' is an array of struct page pointers that's initialized by
222 * get_user_pages().
223 */
224 pages = kzalloc(num_pages * sizeof(struct page *), GFP_KERNEL);
225 if (!pages) {
226 pr_debug("fsl-hv: could not allocate page list\n");
227 return -ENOMEM;
228 }
229
230 /*
231 * sg_list is the list of fh_sg_list objects that we pass to the
232 * hypervisor.
233 */
234 sg_list_unaligned = kmalloc(num_pages * sizeof(struct fh_sg_list) +
235 sizeof(struct fh_sg_list) - 1, GFP_KERNEL);
236 if (!sg_list_unaligned) {
237 pr_debug("fsl-hv: could not allocate S/G list\n");
238 ret = -ENOMEM;
239 goto exit;
240 }
241 sg_list = PTR_ALIGN(sg_list_unaligned, sizeof(struct fh_sg_list));
242
243 /* Get the physical addresses of the source buffer */
244 down_read(&current->mm->mmap_sem);
245 num_pinned = get_user_pages(current, current->mm,
246 param.local_vaddr - lb_offset, num_pages,
247 (param.source == -1) ? READ : WRITE,
248 0, pages, NULL);
249 up_read(&current->mm->mmap_sem);
250
251 if (num_pinned != num_pages) {
252 /* get_user_pages() failed */
253 pr_debug("fsl-hv: could not lock source buffer\n");
254 ret = (num_pinned < 0) ? num_pinned : -EFAULT;
255 goto exit;
256 }
257
258 /*
259 * Build the fh_sg_list[] array. The first page is special
260 * because it's misaligned.
261 */
262 if (param.source == -1) {
263 sg_list[0].source = page_to_phys(pages[0]) + lb_offset;
264 sg_list[0].target = param.remote_paddr;
265 } else {
266 sg_list[0].source = param.remote_paddr;
267 sg_list[0].target = page_to_phys(pages[0]) + lb_offset;
268 }
269 sg_list[0].size = min_t(uint64_t, param.count, PAGE_SIZE - lb_offset);
270
271 remote_paddr = param.remote_paddr + sg_list[0].size;
272 count = param.count - sg_list[0].size;
273
274 for (i = 1; i < num_pages; i++) {
275 if (param.source == -1) {
276 /* local to remote */
277 sg_list[i].source = page_to_phys(pages[i]);
278 sg_list[i].target = remote_paddr;
279 } else {
280 /* remote to local */
281 sg_list[i].source = remote_paddr;
282 sg_list[i].target = page_to_phys(pages[i]);
283 }
284 sg_list[i].size = min_t(uint64_t, count, PAGE_SIZE);
285
286 remote_paddr += sg_list[i].size;
287 count -= sg_list[i].size;
288 }
289
290 param.ret = fh_partition_memcpy(param.source, param.target,
291 virt_to_phys(sg_list), num_pages);
292
293exit:
294 if (pages) {
295 for (i = 0; i < num_pages; i++)
296 if (pages[i])
297 put_page(pages[i]);
298 }
299
300 kfree(sg_list_unaligned);
301 kfree(pages);
302
303 if (!ret)
304 if (copy_to_user(&p->ret, &param.ret, sizeof(__u32)))
305 return -EFAULT;
306
307 return ret;
308}
309
310/*
311 * Ioctl interface for FSL_HV_IOCTL_DOORBELL
312 *
313 * Ring a doorbell
314 */
315static long ioctl_doorbell(struct fsl_hv_ioctl_doorbell __user *p)
316{
317 struct fsl_hv_ioctl_doorbell param;
318
319 /* Get the parameters from the user. */
320 if (copy_from_user(&param, p, sizeof(struct fsl_hv_ioctl_doorbell)))
321 return -EFAULT;
322
323 param.ret = ev_doorbell_send(param.doorbell);
324
325 if (copy_to_user(&p->ret, &param.ret, sizeof(__u32)))
326 return -EFAULT;
327
328 return 0;
329}
330
331static long ioctl_dtprop(struct fsl_hv_ioctl_prop __user *p, int set)
332{
333 struct fsl_hv_ioctl_prop param;
334 char __user *upath, *upropname;
335 void __user *upropval;
336 char *path = NULL, *propname = NULL;
337 void *propval = NULL;
338 int ret = 0;
339
340 /* Get the parameters from the user. */
341 if (copy_from_user(&param, p, sizeof(struct fsl_hv_ioctl_prop)))
342 return -EFAULT;
343
344 upath = (char __user *)(uintptr_t)param.path;
345 upropname = (char __user *)(uintptr_t)param.propname;
346 upropval = (void __user *)(uintptr_t)param.propval;
347
348 path = strndup_user(upath, FH_DTPROP_MAX_PATHLEN);
349 if (IS_ERR(path)) {
350 ret = PTR_ERR(path);
351 goto out;
352 }
353
354 propname = strndup_user(upropname, FH_DTPROP_MAX_PATHLEN);
355 if (IS_ERR(propname)) {
356 ret = PTR_ERR(propname);
357 goto out;
358 }
359
360 if (param.proplen > FH_DTPROP_MAX_PROPLEN) {
361 ret = -EINVAL;
362 goto out;
363 }
364
365 propval = kmalloc(param.proplen, GFP_KERNEL);
366 if (!propval) {
367 ret = -ENOMEM;
368 goto out;
369 }
370
371 if (set) {
372 if (copy_from_user(propval, upropval, param.proplen)) {
373 ret = -EFAULT;
374 goto out;
375 }
376
377 param.ret = fh_partition_set_dtprop(param.handle,
378 virt_to_phys(path),
379 virt_to_phys(propname),
380 virt_to_phys(propval),
381 param.proplen);
382 } else {
383 param.ret = fh_partition_get_dtprop(param.handle,
384 virt_to_phys(path),
385 virt_to_phys(propname),
386 virt_to_phys(propval),
387 &param.proplen);
388
389 if (param.ret == 0) {
390 if (copy_to_user(upropval, propval, param.proplen) ||
391 put_user(param.proplen, &p->proplen)) {
392 ret = -EFAULT;
393 goto out;
394 }
395 }
396 }
397
398 if (put_user(param.ret, &p->ret))
399 ret = -EFAULT;
400
401out:
402 kfree(path);
403 kfree(propval);
404 kfree(propname);
405
406 return ret;
407}
408
409/*
410 * Ioctl main entry point
411 */
412static long fsl_hv_ioctl(struct file *file, unsigned int cmd,
413 unsigned long argaddr)
414{
415 void __user *arg = (void __user *)argaddr;
416 long ret;
417
418 switch (cmd) {
419 case FSL_HV_IOCTL_PARTITION_RESTART:
420 ret = ioctl_restart(arg);
421 break;
422 case FSL_HV_IOCTL_PARTITION_GET_STATUS:
423 ret = ioctl_status(arg);
424 break;
425 case FSL_HV_IOCTL_PARTITION_START:
426 ret = ioctl_start(arg);
427 break;
428 case FSL_HV_IOCTL_PARTITION_STOP:
429 ret = ioctl_stop(arg);
430 break;
431 case FSL_HV_IOCTL_MEMCPY:
432 ret = ioctl_memcpy(arg);
433 break;
434 case FSL_HV_IOCTL_DOORBELL:
435 ret = ioctl_doorbell(arg);
436 break;
437 case FSL_HV_IOCTL_GETPROP:
438 ret = ioctl_dtprop(arg, 0);
439 break;
440 case FSL_HV_IOCTL_SETPROP:
441 ret = ioctl_dtprop(arg, 1);
442 break;
443 default:
444 pr_debug("fsl-hv: bad ioctl dir=%u type=%u cmd=%u size=%u\n",
445 _IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd),
446 _IOC_SIZE(cmd));
447 return -ENOTTY;
448 }
449
450 return ret;
451}
452
453/* Linked list of processes that have us open */
454static struct list_head db_list;
455
456/* spinlock for db_list */
457static DEFINE_SPINLOCK(db_list_lock);
458
459/* The size of the doorbell event queue. This must be a power of two. */
460#define QSIZE 16
461
462/* Returns the next head/tail pointer, wrapping around the queue if necessary */
463#define nextp(x) (((x) + 1) & (QSIZE - 1))
464
465/* Per-open data structure */
466struct doorbell_queue {
467 struct list_head list;
468 spinlock_t lock;
469 wait_queue_head_t wait;
470 unsigned int head;
471 unsigned int tail;
472 uint32_t q[QSIZE];
473};
474
475/* Linked list of ISRs that we registered */
476struct list_head isr_list;
477
478/* Per-ISR data structure */
479struct doorbell_isr {
480 struct list_head list;
481 unsigned int irq;
482 uint32_t doorbell; /* The doorbell handle */
483 uint32_t partition; /* The partition handle, if used */
484};
485
486/*
487 * Add a doorbell to all of the doorbell queues
488 */
489static void fsl_hv_queue_doorbell(uint32_t doorbell)
490{
491 struct doorbell_queue *dbq;
492 unsigned long flags;
493
494 /* Prevent another core from modifying db_list */
495 spin_lock_irqsave(&db_list_lock, flags);
496
497 list_for_each_entry(dbq, &db_list, list) {
498 if (dbq->head != nextp(dbq->tail)) {
499 dbq->q[dbq->tail] = doorbell;
500 /*
501 * This memory barrier eliminates the need to grab
502 * the spinlock for dbq.
503 */
504 smp_wmb();
505 dbq->tail = nextp(dbq->tail);
506 wake_up_interruptible(&dbq->wait);
507 }
508 }
509
510 spin_unlock_irqrestore(&db_list_lock, flags);
511}
512
513/*
514 * Interrupt handler for all doorbells
515 *
516 * We use the same interrupt handler for all doorbells. Whenever a doorbell
517 * is rung, and we receive an interrupt, we just put the handle for that
518 * doorbell (passed to us as *data) into all of the queues.
519 */
520static irqreturn_t fsl_hv_isr(int irq, void *data)
521{
522 fsl_hv_queue_doorbell((uintptr_t) data);
523
524 return IRQ_HANDLED;
525}
526
527/*
528 * State change thread function
529 *
530 * The state change notification arrives in an interrupt, but we can't call
531 * blocking_notifier_call_chain() in an interrupt handler. We could call
532 * atomic_notifier_call_chain(), but that would require the clients' call-back
533 * function to run in interrupt context. Since we don't want to impose that
534 * restriction on the clients, we use a threaded IRQ to process the
535 * notification in kernel context.
536 */
537static irqreturn_t fsl_hv_state_change_thread(int irq, void *data)
538{
539 struct doorbell_isr *dbisr = data;
540
541 blocking_notifier_call_chain(&failover_subscribers, dbisr->partition,
542 NULL);
543
544 return IRQ_HANDLED;
545}
546
547/*
548 * Interrupt handler for state-change doorbells
549 */
550static irqreturn_t fsl_hv_state_change_isr(int irq, void *data)
551{
552 unsigned int status;
553 struct doorbell_isr *dbisr = data;
554 int ret;
555
556 /* It's still a doorbell, so add it to all the queues. */
557 fsl_hv_queue_doorbell(dbisr->doorbell);
558
559 /* Determine the new state, and if it's stopped, notify the clients. */
560 ret = fh_partition_get_status(dbisr->partition, &status);
561 if (!ret && (status == FH_PARTITION_STOPPED))
562 return IRQ_WAKE_THREAD;
563
564 return IRQ_HANDLED;
565}
566
567/*
568 * Returns a bitmask indicating whether a read will block
569 */
570static unsigned int fsl_hv_poll(struct file *filp, struct poll_table_struct *p)
571{
572 struct doorbell_queue *dbq = filp->private_data;
573 unsigned long flags;
574 unsigned int mask;
575
576 spin_lock_irqsave(&dbq->lock, flags);
577
578 poll_wait(filp, &dbq->wait, p);
579 mask = (dbq->head == dbq->tail) ? 0 : (POLLIN | POLLRDNORM);
580
581 spin_unlock_irqrestore(&dbq->lock, flags);
582
583 return mask;
584}
585
586/*
587 * Return the handles for any incoming doorbells
588 *
589 * If there are doorbell handles in the queue for this open instance, then
590 * return them to the caller as an array of 32-bit integers. Otherwise,
591 * block until there is at least one handle to return.
592 */
593static ssize_t fsl_hv_read(struct file *filp, char __user *buf, size_t len,
594 loff_t *off)
595{
596 struct doorbell_queue *dbq = filp->private_data;
597 uint32_t __user *p = (uint32_t __user *) buf; /* for put_user() */
598 unsigned long flags;
599 ssize_t count = 0;
600
601 /* Make sure we stop when the user buffer is full. */
602 while (len >= sizeof(uint32_t)) {
603 uint32_t dbell; /* Local copy of doorbell queue data */
604
605 spin_lock_irqsave(&dbq->lock, flags);
606
607 /*
608 * If the queue is empty, then either we're done or we need
609 * to block. If the application specified O_NONBLOCK, then
610 * we return the appropriate error code.
611 */
612 if (dbq->head == dbq->tail) {
613 spin_unlock_irqrestore(&dbq->lock, flags);
614 if (count)
615 break;
616 if (filp->f_flags & O_NONBLOCK)
617 return -EAGAIN;
618 if (wait_event_interruptible(dbq->wait,
619 dbq->head != dbq->tail))
620 return -ERESTARTSYS;
621 continue;
622 }
623
624 /*
625 * Even though we have an smp_wmb() in the ISR, the core
626 * might speculatively execute the "dbell = ..." below while
627 * it's evaluating the if-statement above. In that case, the
628 * value put into dbell could be stale if the core accepts the
629 * speculation. To prevent that, we need a read memory barrier
630 * here as well.
631 */
632 smp_rmb();
633
634 /* Copy the data to a temporary local buffer, because
635 * we can't call copy_to_user() from inside a spinlock
636 */
637 dbell = dbq->q[dbq->head];
638 dbq->head = nextp(dbq->head);
639
640 spin_unlock_irqrestore(&dbq->lock, flags);
641
642 if (put_user(dbell, p))
643 return -EFAULT;
644 p++;
645 count += sizeof(uint32_t);
646 len -= sizeof(uint32_t);
647 }
648
649 return count;
650}
651
652/*
653 * Open the driver and prepare for reading doorbells.
654 *
655 * Every time an application opens the driver, we create a doorbell queue
656 * for that file handle. This queue is used for any incoming doorbells.
657 */
658static int fsl_hv_open(struct inode *inode, struct file *filp)
659{
660 struct doorbell_queue *dbq;
661 unsigned long flags;
662 int ret = 0;
663
664 dbq = kzalloc(sizeof(struct doorbell_queue), GFP_KERNEL);
665 if (!dbq) {
666 pr_err("fsl-hv: out of memory\n");
667 return -ENOMEM;
668 }
669
670 spin_lock_init(&dbq->lock);
671 init_waitqueue_head(&dbq->wait);
672
673 spin_lock_irqsave(&db_list_lock, flags);
674 list_add(&dbq->list, &db_list);
675 spin_unlock_irqrestore(&db_list_lock, flags);
676
677 filp->private_data = dbq;
678
679 return ret;
680}
681
682/*
683 * Close the driver
684 */
685static int fsl_hv_close(struct inode *inode, struct file *filp)
686{
687 struct doorbell_queue *dbq = filp->private_data;
688 unsigned long flags;
689
690 int ret = 0;
691
692 spin_lock_irqsave(&db_list_lock, flags);
693 list_del(&dbq->list);
694 spin_unlock_irqrestore(&db_list_lock, flags);
695
696 kfree(dbq);
697
698 return ret;
699}
700
701static const struct file_operations fsl_hv_fops = {
702 .owner = THIS_MODULE,
703 .open = fsl_hv_open,
704 .release = fsl_hv_close,
705 .poll = fsl_hv_poll,
706 .read = fsl_hv_read,
707 .unlocked_ioctl = fsl_hv_ioctl,
708};
709
710static struct miscdevice fsl_hv_misc_dev = {
711 MISC_DYNAMIC_MINOR,
712 "fsl-hv",
713 &fsl_hv_fops
714};
715
716static irqreturn_t fsl_hv_shutdown_isr(int irq, void *data)
717{
718 orderly_poweroff(false);
719
720 return IRQ_HANDLED;
721}
722
723/*
724 * Returns the handle of the parent of the given node
725 *
726 * The handle is the value of the 'hv-handle' property
727 */
728static int get_parent_handle(struct device_node *np)
729{
730 struct device_node *parent;
731 const uint32_t *prop;
732 uint32_t handle;
733 int len;
734
735 parent = of_get_parent(np);
736 if (!parent)
737 /* It's not really possible for this to fail */
738 return -ENODEV;
739
740 /*
741 * The proper name for the handle property is "hv-handle", but some
742 * older versions of the hypervisor used "reg".
743 */
744 prop = of_get_property(parent, "hv-handle", &len);
745 if (!prop)
746 prop = of_get_property(parent, "reg", &len);
747
748 if (!prop || (len != sizeof(uint32_t))) {
749 /* This can happen only if the node is malformed */
750 of_node_put(parent);
751 return -ENODEV;
752 }
753
754 handle = be32_to_cpup(prop);
755 of_node_put(parent);
756
757 return handle;
758}
759
760/*
761 * Register a callback for failover events
762 *
763 * This function is called by device drivers to register their callback
764 * functions for fail-over events.
765 */
766int fsl_hv_failover_register(struct notifier_block *nb)
767{
768 return blocking_notifier_chain_register(&failover_subscribers, nb);
769}
770EXPORT_SYMBOL(fsl_hv_failover_register);
771
772/*
773 * Unregister a callback for failover events
774 */
775int fsl_hv_failover_unregister(struct notifier_block *nb)
776{
777 return blocking_notifier_chain_unregister(&failover_subscribers, nb);
778}
779EXPORT_SYMBOL(fsl_hv_failover_unregister);
780
781/*
782 * Return TRUE if we're running under FSL hypervisor
783 *
784 * This function checks to see if we're running under the Freescale
785 * hypervisor, and returns zero if we're not, or non-zero if we are.
786 *
787 * First, it checks if MSR[GS]==1, which means we're running under some
788 * hypervisor. Then it checks if there is a hypervisor node in the device
789 * tree. Currently, that means there needs to be a node in the root called
790 * "hypervisor" and which has a property named "fsl,hv-version".
791 */
792static int has_fsl_hypervisor(void)
793{
794 struct device_node *node;
795 int ret;
796
797 if (!(mfmsr() & MSR_GS))
798 return 0;
799
800 node = of_find_node_by_path("/hypervisor");
801 if (!node)
802 return 0;
803
804 ret = of_find_property(node, "fsl,hv-version", NULL) != NULL;
805
806 of_node_put(node);
807
808 return ret;
809}
810
811/*
812 * Freescale hypervisor management driver init
813 *
814 * This function is called when this module is loaded.
815 *
816 * Register ourselves as a miscellaneous driver. This will register the
817 * fops structure and create the right sysfs entries for udev.
818 */
819static int __init fsl_hypervisor_init(void)
820{
821 struct device_node *np;
822 struct doorbell_isr *dbisr, *n;
823 int ret;
824
825 pr_info("Freescale hypervisor management driver\n");
826
827 if (!has_fsl_hypervisor()) {
828 pr_info("fsl-hv: no hypervisor found\n");
829 return -ENODEV;
830 }
831
832 ret = misc_register(&fsl_hv_misc_dev);
833 if (ret) {
834 pr_err("fsl-hv: cannot register device\n");
835 return ret;
836 }
837
838 INIT_LIST_HEAD(&db_list);
839 INIT_LIST_HEAD(&isr_list);
840
841 for_each_compatible_node(np, NULL, "epapr,hv-receive-doorbell") {
842 unsigned int irq;
843 const uint32_t *handle;
844
845 handle = of_get_property(np, "interrupts", NULL);
846 irq = irq_of_parse_and_map(np, 0);
847 if (!handle || (irq == NO_IRQ)) {
848 pr_err("fsl-hv: no 'interrupts' property in %s node\n",
849 np->full_name);
850 continue;
851 }
852
853 dbisr = kzalloc(sizeof(*dbisr), GFP_KERNEL);
854 if (!dbisr)
855 goto out_of_memory;
856
857 dbisr->irq = irq;
858 dbisr->doorbell = be32_to_cpup(handle);
859
860 if (of_device_is_compatible(np, "fsl,hv-shutdown-doorbell")) {
861 /* The shutdown doorbell gets its own ISR */
862 ret = request_irq(irq, fsl_hv_shutdown_isr, 0,
863 np->name, NULL);
864 } else if (of_device_is_compatible(np,
865 "fsl,hv-state-change-doorbell")) {
866 /*
867 * The state change doorbell triggers a notification if
868 * the state of the managed partition changes to
869 * "stopped". We need a separate interrupt handler for
870 * that, and we also need to know the handle of the
871 * target partition, not just the handle of the
872 * doorbell.
873 */
874 dbisr->partition = ret = get_parent_handle(np);
875 if (ret < 0) {
876 pr_err("fsl-hv: node %s has missing or "
877 "malformed parent\n", np->full_name);
878 kfree(dbisr);
879 continue;
880 }
881 ret = request_threaded_irq(irq, fsl_hv_state_change_isr,
882 fsl_hv_state_change_thread,
883 0, np->name, dbisr);
884 } else
885 ret = request_irq(irq, fsl_hv_isr, 0, np->name, dbisr);
886
887 if (ret < 0) {
888 pr_err("fsl-hv: could not request irq %u for node %s\n",
889 irq, np->full_name);
890 kfree(dbisr);
891 continue;
892 }
893
894 list_add(&dbisr->list, &isr_list);
895
896 pr_info("fsl-hv: registered handler for doorbell %u\n",
897 dbisr->doorbell);
898 }
899
900 return 0;
901
902out_of_memory:
903 list_for_each_entry_safe(dbisr, n, &isr_list, list) {
904 free_irq(dbisr->irq, dbisr);
905 list_del(&dbisr->list);
906 kfree(dbisr);
907 }
908
909 misc_deregister(&fsl_hv_misc_dev);
910
911 return -ENOMEM;
912}
913
914/*
915 * Freescale hypervisor management driver termination
916 *
917 * This function is called when this driver is unloaded.
918 */
919static void __exit fsl_hypervisor_exit(void)
920{
921 struct doorbell_isr *dbisr, *n;
922
923 list_for_each_entry_safe(dbisr, n, &isr_list, list) {
924 free_irq(dbisr->irq, dbisr);
925 list_del(&dbisr->list);
926 kfree(dbisr);
927 }
928
929 misc_deregister(&fsl_hv_misc_dev);
930}
931
932module_init(fsl_hypervisor_init);
933module_exit(fsl_hypervisor_exit);
934
935MODULE_AUTHOR("Timur Tabi <timur@freescale.com>");
936MODULE_DESCRIPTION("Freescale hypervisor management driver");
937MODULE_LICENSE("GPL v2");