aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/misc/vmw_vmci
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/vmw_vmci')
-rw-r--r--drivers/misc/vmw_vmci/Kconfig16
-rw-r--r--drivers/misc/vmw_vmci/Makefile4
-rw-r--r--drivers/misc/vmw_vmci/vmci_context.c1214
-rw-r--r--drivers/misc/vmw_vmci/vmci_context.h182
-rw-r--r--drivers/misc/vmw_vmci/vmci_datagram.c500
-rw-r--r--drivers/misc/vmw_vmci/vmci_datagram.h52
-rw-r--r--drivers/misc/vmw_vmci/vmci_doorbell.c604
-rw-r--r--drivers/misc/vmw_vmci/vmci_doorbell.h51
-rw-r--r--drivers/misc/vmw_vmci/vmci_driver.c117
-rw-r--r--drivers/misc/vmw_vmci/vmci_driver.h50
-rw-r--r--drivers/misc/vmw_vmci/vmci_event.c224
-rw-r--r--drivers/misc/vmw_vmci/vmci_event.h25
-rw-r--r--drivers/misc/vmw_vmci/vmci_guest.c759
-rw-r--r--drivers/misc/vmw_vmci/vmci_handle_array.c142
-rw-r--r--drivers/misc/vmw_vmci/vmci_handle_array.h52
-rw-r--r--drivers/misc/vmw_vmci/vmci_host.c1043
-rw-r--r--drivers/misc/vmw_vmci/vmci_queue_pair.c3425
-rw-r--r--drivers/misc/vmw_vmci/vmci_queue_pair.h191
-rw-r--r--drivers/misc/vmw_vmci/vmci_resource.c229
-rw-r--r--drivers/misc/vmw_vmci/vmci_resource.h59
-rw-r--r--drivers/misc/vmw_vmci/vmci_route.c226
-rw-r--r--drivers/misc/vmw_vmci/vmci_route.h30
22 files changed, 9195 insertions, 0 deletions
diff --git a/drivers/misc/vmw_vmci/Kconfig b/drivers/misc/vmw_vmci/Kconfig
new file mode 100644
index 000000000000..39c2ecadb273
--- /dev/null
+++ b/drivers/misc/vmw_vmci/Kconfig
@@ -0,0 +1,16 @@
1#
2# VMware VMCI device
3#
4
5config VMWARE_VMCI
6 tristate "VMware VMCI Driver"
7 depends on X86 && PCI
8 help
9 This is VMware's Virtual Machine Communication Interface. It enables
10 high-speed communication between host and guest in a virtual
11 environment via the VMCI virtual device.
12
13 If unsure, say N.
14
15 To compile this driver as a module, choose M here: the
16 module will be called vmw_vmci.
diff --git a/drivers/misc/vmw_vmci/Makefile b/drivers/misc/vmw_vmci/Makefile
new file mode 100644
index 000000000000..4da9893c3942
--- /dev/null
+++ b/drivers/misc/vmw_vmci/Makefile
@@ -0,0 +1,4 @@
1obj-$(CONFIG_VMWARE_VMCI) += vmw_vmci.o
2vmw_vmci-y += vmci_context.o vmci_datagram.o vmci_doorbell.o \
3 vmci_driver.o vmci_event.o vmci_guest.o vmci_handle_array.o \
4 vmci_host.o vmci_queue_pair.o vmci_resource.o vmci_route.o
diff --git a/drivers/misc/vmw_vmci/vmci_context.c b/drivers/misc/vmw_vmci/vmci_context.c
new file mode 100644
index 000000000000..f866a4baecb5
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_context.c
@@ -0,0 +1,1214 @@
1/*
2 * VMware VMCI Driver
3 *
4 * Copyright (C) 2012 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16#include <linux/vmw_vmci_defs.h>
17#include <linux/vmw_vmci_api.h>
18#include <linux/highmem.h>
19#include <linux/kernel.h>
20#include <linux/module.h>
21#include <linux/sched.h>
22#include <linux/slab.h>
23
24#include "vmci_queue_pair.h"
25#include "vmci_datagram.h"
26#include "vmci_doorbell.h"
27#include "vmci_context.h"
28#include "vmci_driver.h"
29#include "vmci_event.h"
30
31/*
32 * List of current VMCI contexts. Contexts can be added by
33 * vmci_ctx_create() and removed via vmci_ctx_destroy().
34 * These, along with context lookup, are protected by the
35 * list structure's lock.
36 */
37static struct {
38 struct list_head head;
39 spinlock_t lock; /* Spinlock for context list operations */
40} ctx_list = {
41 .head = LIST_HEAD_INIT(ctx_list.head),
42 .lock = __SPIN_LOCK_UNLOCKED(ctx_list.lock),
43};
44
45/* Used by contexts that did not set up notify flag pointers */
46static bool ctx_dummy_notify;
47
48static void ctx_signal_notify(struct vmci_ctx *context)
49{
50 *context->notify = true;
51}
52
53static void ctx_clear_notify(struct vmci_ctx *context)
54{
55 *context->notify = false;
56}
57
58/*
59 * If nothing requires the attention of the guest, clears both
60 * notify flag and call.
61 */
62static void ctx_clear_notify_call(struct vmci_ctx *context)
63{
64 if (context->pending_datagrams == 0 &&
65 vmci_handle_arr_get_size(context->pending_doorbell_array) == 0)
66 ctx_clear_notify(context);
67}
68
69/*
70 * Sets the context's notify flag iff datagrams are pending for this
71 * context. Called from vmci_setup_notify().
72 */
73void vmci_ctx_check_signal_notify(struct vmci_ctx *context)
74{
75 spin_lock(&context->lock);
76 if (context->pending_datagrams)
77 ctx_signal_notify(context);
78 spin_unlock(&context->lock);
79}
80
81/*
82 * Allocates and initializes a VMCI context.
83 */
84struct vmci_ctx *vmci_ctx_create(u32 cid, u32 priv_flags,
85 uintptr_t event_hnd,
86 int user_version,
87 const struct cred *cred)
88{
89 struct vmci_ctx *context;
90 int error;
91
92 if (cid == VMCI_INVALID_ID) {
93 pr_devel("Invalid context ID for VMCI context\n");
94 error = -EINVAL;
95 goto err_out;
96 }
97
98 if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS) {
99 pr_devel("Invalid flag (flags=0x%x) for VMCI context\n",
100 priv_flags);
101 error = -EINVAL;
102 goto err_out;
103 }
104
105 if (user_version == 0) {
106 pr_devel("Invalid suer_version %d\n", user_version);
107 error = -EINVAL;
108 goto err_out;
109 }
110
111 context = kzalloc(sizeof(*context), GFP_KERNEL);
112 if (!context) {
113 pr_warn("Failed to allocate memory for VMCI context\n");
114 error = -EINVAL;
115 goto err_out;
116 }
117
118 kref_init(&context->kref);
119 spin_lock_init(&context->lock);
120 INIT_LIST_HEAD(&context->list_item);
121 INIT_LIST_HEAD(&context->datagram_queue);
122 INIT_LIST_HEAD(&context->notifier_list);
123
124 /* Initialize host-specific VMCI context. */
125 init_waitqueue_head(&context->host_context.wait_queue);
126
127 context->queue_pair_array = vmci_handle_arr_create(0);
128 if (!context->queue_pair_array) {
129 error = -ENOMEM;
130 goto err_free_ctx;
131 }
132
133 context->doorbell_array = vmci_handle_arr_create(0);
134 if (!context->doorbell_array) {
135 error = -ENOMEM;
136 goto err_free_qp_array;
137 }
138
139 context->pending_doorbell_array = vmci_handle_arr_create(0);
140 if (!context->pending_doorbell_array) {
141 error = -ENOMEM;
142 goto err_free_db_array;
143 }
144
145 context->user_version = user_version;
146
147 context->priv_flags = priv_flags;
148
149 if (cred)
150 context->cred = get_cred(cred);
151
152 context->notify = &ctx_dummy_notify;
153 context->notify_page = NULL;
154
155 /*
156 * If we collide with an existing context we generate a new
157 * and use it instead. The VMX will determine if regeneration
158 * is okay. Since there isn't 4B - 16 VMs running on a given
159 * host, the below loop will terminate.
160 */
161 spin_lock(&ctx_list.lock);
162
163 while (vmci_ctx_exists(cid)) {
164 /* We reserve the lowest 16 ids for fixed contexts. */
165 cid = max(cid, VMCI_RESERVED_CID_LIMIT - 1) + 1;
166 if (cid == VMCI_INVALID_ID)
167 cid = VMCI_RESERVED_CID_LIMIT;
168 }
169 context->cid = cid;
170
171 list_add_tail_rcu(&context->list_item, &ctx_list.head);
172 spin_unlock(&ctx_list.lock);
173
174 return context;
175
176 err_free_db_array:
177 vmci_handle_arr_destroy(context->doorbell_array);
178 err_free_qp_array:
179 vmci_handle_arr_destroy(context->queue_pair_array);
180 err_free_ctx:
181 kfree(context);
182 err_out:
183 return ERR_PTR(error);
184}
185
186/*
187 * Destroy VMCI context.
188 */
189void vmci_ctx_destroy(struct vmci_ctx *context)
190{
191 spin_lock(&ctx_list.lock);
192 list_del_rcu(&context->list_item);
193 spin_unlock(&ctx_list.lock);
194 synchronize_rcu();
195
196 vmci_ctx_put(context);
197}
198
199/*
200 * Fire notification for all contexts interested in given cid.
201 */
202static int ctx_fire_notification(u32 context_id, u32 priv_flags)
203{
204 u32 i, array_size;
205 struct vmci_ctx *sub_ctx;
206 struct vmci_handle_arr *subscriber_array;
207 struct vmci_handle context_handle =
208 vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
209
210 /*
211 * We create an array to hold the subscribers we find when
212 * scanning through all contexts.
213 */
214 subscriber_array = vmci_handle_arr_create(0);
215 if (subscriber_array == NULL)
216 return VMCI_ERROR_NO_MEM;
217
218 /*
219 * Scan all contexts to find who is interested in being
220 * notified about given contextID.
221 */
222 rcu_read_lock();
223 list_for_each_entry_rcu(sub_ctx, &ctx_list.head, list_item) {
224 struct vmci_handle_list *node;
225
226 /*
227 * We only deliver notifications of the removal of
228 * contexts, if the two contexts are allowed to
229 * interact.
230 */
231 if (vmci_deny_interaction(priv_flags, sub_ctx->priv_flags))
232 continue;
233
234 list_for_each_entry_rcu(node, &sub_ctx->notifier_list, node) {
235 if (!vmci_handle_is_equal(node->handle, context_handle))
236 continue;
237
238 vmci_handle_arr_append_entry(&subscriber_array,
239 vmci_make_handle(sub_ctx->cid,
240 VMCI_EVENT_HANDLER));
241 }
242 }
243 rcu_read_unlock();
244
245 /* Fire event to all subscribers. */
246 array_size = vmci_handle_arr_get_size(subscriber_array);
247 for (i = 0; i < array_size; i++) {
248 int result;
249 struct vmci_event_ctx ev;
250
251 ev.msg.hdr.dst = vmci_handle_arr_get_entry(subscriber_array, i);
252 ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
253 VMCI_CONTEXT_RESOURCE_ID);
254 ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
255 ev.msg.event_data.event = VMCI_EVENT_CTX_REMOVED;
256 ev.payload.context_id = context_id;
257
258 result = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID,
259 &ev.msg.hdr, false);
260 if (result < VMCI_SUCCESS) {
261 pr_devel("Failed to enqueue event datagram (type=%d) for context (ID=0x%x)\n",
262 ev.msg.event_data.event,
263 ev.msg.hdr.dst.context);
264 /* We continue to enqueue on next subscriber. */
265 }
266 }
267 vmci_handle_arr_destroy(subscriber_array);
268
269 return VMCI_SUCCESS;
270}
271
272/*
273 * Returns the current number of pending datagrams. The call may
274 * also serve as a synchronization point for the datagram queue,
275 * as no enqueue operations can occur concurrently.
276 */
277int vmci_ctx_pending_datagrams(u32 cid, u32 *pending)
278{
279 struct vmci_ctx *context;
280
281 context = vmci_ctx_get(cid);
282 if (context == NULL)
283 return VMCI_ERROR_INVALID_ARGS;
284
285 spin_lock(&context->lock);
286 if (pending)
287 *pending = context->pending_datagrams;
288 spin_unlock(&context->lock);
289 vmci_ctx_put(context);
290
291 return VMCI_SUCCESS;
292}
293
294/*
295 * Queues a VMCI datagram for the appropriate target VM context.
296 */
297int vmci_ctx_enqueue_datagram(u32 cid, struct vmci_datagram *dg)
298{
299 struct vmci_datagram_queue_entry *dq_entry;
300 struct vmci_ctx *context;
301 struct vmci_handle dg_src;
302 size_t vmci_dg_size;
303
304 vmci_dg_size = VMCI_DG_SIZE(dg);
305 if (vmci_dg_size > VMCI_MAX_DG_SIZE) {
306 pr_devel("Datagram too large (bytes=%Zu)\n", vmci_dg_size);
307 return VMCI_ERROR_INVALID_ARGS;
308 }
309
310 /* Get the target VM's VMCI context. */
311 context = vmci_ctx_get(cid);
312 if (!context) {
313 pr_devel("Invalid context (ID=0x%x)\n", cid);
314 return VMCI_ERROR_INVALID_ARGS;
315 }
316
317 /* Allocate guest call entry and add it to the target VM's queue. */
318 dq_entry = kmalloc(sizeof(*dq_entry), GFP_KERNEL);
319 if (dq_entry == NULL) {
320 pr_warn("Failed to allocate memory for datagram\n");
321 vmci_ctx_put(context);
322 return VMCI_ERROR_NO_MEM;
323 }
324 dq_entry->dg = dg;
325 dq_entry->dg_size = vmci_dg_size;
326 dg_src = dg->src;
327 INIT_LIST_HEAD(&dq_entry->list_item);
328
329 spin_lock(&context->lock);
330
331 /*
332 * We put a higher limit on datagrams from the hypervisor. If
333 * the pending datagram is not from hypervisor, then we check
334 * if enqueueing it would exceed the
335 * VMCI_MAX_DATAGRAM_QUEUE_SIZE limit on the destination. If
336 * the pending datagram is from hypervisor, we allow it to be
337 * queued at the destination side provided we don't reach the
338 * VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE limit.
339 */
340 if (context->datagram_queue_size + vmci_dg_size >=
341 VMCI_MAX_DATAGRAM_QUEUE_SIZE &&
342 (!vmci_handle_is_equal(dg_src,
343 vmci_make_handle
344 (VMCI_HYPERVISOR_CONTEXT_ID,
345 VMCI_CONTEXT_RESOURCE_ID)) ||
346 context->datagram_queue_size + vmci_dg_size >=
347 VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE)) {
348 spin_unlock(&context->lock);
349 vmci_ctx_put(context);
350 kfree(dq_entry);
351 pr_devel("Context (ID=0x%x) receive queue is full\n", cid);
352 return VMCI_ERROR_NO_RESOURCES;
353 }
354
355 list_add(&dq_entry->list_item, &context->datagram_queue);
356 context->pending_datagrams++;
357 context->datagram_queue_size += vmci_dg_size;
358 ctx_signal_notify(context);
359 wake_up(&context->host_context.wait_queue);
360 spin_unlock(&context->lock);
361 vmci_ctx_put(context);
362
363 return vmci_dg_size;
364}
365
366/*
367 * Verifies whether a context with the specified context ID exists.
368 * FIXME: utility is dubious as no decisions can be reliably made
369 * using this data as context can appear and disappear at any time.
370 */
371bool vmci_ctx_exists(u32 cid)
372{
373 struct vmci_ctx *context;
374 bool exists = false;
375
376 rcu_read_lock();
377
378 list_for_each_entry_rcu(context, &ctx_list.head, list_item) {
379 if (context->cid == cid) {
380 exists = true;
381 break;
382 }
383 }
384
385 rcu_read_unlock();
386 return exists;
387}
388
389/*
390 * Retrieves VMCI context corresponding to the given cid.
391 */
392struct vmci_ctx *vmci_ctx_get(u32 cid)
393{
394 struct vmci_ctx *c, *context = NULL;
395
396 if (cid == VMCI_INVALID_ID)
397 return NULL;
398
399 rcu_read_lock();
400 list_for_each_entry_rcu(c, &ctx_list.head, list_item) {
401 if (c->cid == cid) {
402 /*
403 * The context owner drops its own reference to the
404 * context only after removing it from the list and
405 * waiting for RCU grace period to expire. This
406 * means that we are not about to increase the
407 * reference count of something that is in the
408 * process of being destroyed.
409 */
410 context = c;
411 kref_get(&context->kref);
412 break;
413 }
414 }
415 rcu_read_unlock();
416
417 return context;
418}
419
420/*
421 * Deallocates all parts of a context data structure. This
422 * function doesn't lock the context, because it assumes that
423 * the caller was holding the last reference to context.
424 */
425static void ctx_free_ctx(struct kref *kref)
426{
427 struct vmci_ctx *context = container_of(kref, struct vmci_ctx, kref);
428 struct vmci_datagram_queue_entry *dq_entry, *dq_entry_tmp;
429 struct vmci_handle temp_handle;
430 struct vmci_handle_list *notifier, *tmp;
431
432 /*
433 * Fire event to all contexts interested in knowing this
434 * context is dying.
435 */
436 ctx_fire_notification(context->cid, context->priv_flags);
437
438 /*
439 * Cleanup all queue pair resources attached to context. If
440 * the VM dies without cleaning up, this code will make sure
441 * that no resources are leaked.
442 */
443 temp_handle = vmci_handle_arr_get_entry(context->queue_pair_array, 0);
444 while (!vmci_handle_is_equal(temp_handle, VMCI_INVALID_HANDLE)) {
445 if (vmci_qp_broker_detach(temp_handle,
446 context) < VMCI_SUCCESS) {
447 /*
448 * When vmci_qp_broker_detach() succeeds it
449 * removes the handle from the array. If
450 * detach fails, we must remove the handle
451 * ourselves.
452 */
453 vmci_handle_arr_remove_entry(context->queue_pair_array,
454 temp_handle);
455 }
456 temp_handle =
457 vmci_handle_arr_get_entry(context->queue_pair_array, 0);
458 }
459
460 /*
461 * It is fine to destroy this without locking the callQueue, as
462 * this is the only thread having a reference to the context.
463 */
464 list_for_each_entry_safe(dq_entry, dq_entry_tmp,
465 &context->datagram_queue, list_item) {
466 WARN_ON(dq_entry->dg_size != VMCI_DG_SIZE(dq_entry->dg));
467 list_del(&dq_entry->list_item);
468 kfree(dq_entry->dg);
469 kfree(dq_entry);
470 }
471
472 list_for_each_entry_safe(notifier, tmp,
473 &context->notifier_list, node) {
474 list_del(&notifier->node);
475 kfree(notifier);
476 }
477
478 vmci_handle_arr_destroy(context->queue_pair_array);
479 vmci_handle_arr_destroy(context->doorbell_array);
480 vmci_handle_arr_destroy(context->pending_doorbell_array);
481 vmci_ctx_unset_notify(context);
482 if (context->cred)
483 put_cred(context->cred);
484 kfree(context);
485}
486
487/*
488 * Drops reference to VMCI context. If this is the last reference to
489 * the context it will be deallocated. A context is created with
490 * a reference count of one, and on destroy, it is removed from
491 * the context list before its reference count is decremented. Thus,
492 * if we reach zero, we are sure that nobody else are about to increment
493 * it (they need the entry in the context list for that), and so there
494 * is no need for locking.
495 */
496void vmci_ctx_put(struct vmci_ctx *context)
497{
498 kref_put(&context->kref, ctx_free_ctx);
499}
500
501/*
502 * Dequeues the next datagram and returns it to caller.
503 * The caller passes in a pointer to the max size datagram
504 * it can handle and the datagram is only unqueued if the
505 * size is less than max_size. If larger max_size is set to
506 * the size of the datagram to give the caller a chance to
507 * set up a larger buffer for the guestcall.
508 */
509int vmci_ctx_dequeue_datagram(struct vmci_ctx *context,
510 size_t *max_size,
511 struct vmci_datagram **dg)
512{
513 struct vmci_datagram_queue_entry *dq_entry;
514 struct list_head *list_item;
515 int rv;
516
517 /* Dequeue the next datagram entry. */
518 spin_lock(&context->lock);
519 if (context->pending_datagrams == 0) {
520 ctx_clear_notify_call(context);
521 spin_unlock(&context->lock);
522 pr_devel("No datagrams pending\n");
523 return VMCI_ERROR_NO_MORE_DATAGRAMS;
524 }
525
526 list_item = context->datagram_queue.next;
527
528 dq_entry =
529 list_entry(list_item, struct vmci_datagram_queue_entry, list_item);
530
531 /* Check size of caller's buffer. */
532 if (*max_size < dq_entry->dg_size) {
533 *max_size = dq_entry->dg_size;
534 spin_unlock(&context->lock);
535 pr_devel("Caller's buffer should be at least (size=%u bytes)\n",
536 (u32) *max_size);
537 return VMCI_ERROR_NO_MEM;
538 }
539
540 list_del(list_item);
541 context->pending_datagrams--;
542 context->datagram_queue_size -= dq_entry->dg_size;
543 if (context->pending_datagrams == 0) {
544 ctx_clear_notify_call(context);
545 rv = VMCI_SUCCESS;
546 } else {
547 /*
548 * Return the size of the next datagram.
549 */
550 struct vmci_datagram_queue_entry *next_entry;
551
552 list_item = context->datagram_queue.next;
553 next_entry =
554 list_entry(list_item, struct vmci_datagram_queue_entry,
555 list_item);
556
557 /*
558 * The following size_t -> int truncation is fine as
559 * the maximum size of a (routable) datagram is 68KB.
560 */
561 rv = (int)next_entry->dg_size;
562 }
563 spin_unlock(&context->lock);
564
565 /* Caller must free datagram. */
566 *dg = dq_entry->dg;
567 dq_entry->dg = NULL;
568 kfree(dq_entry);
569
570 return rv;
571}
572
573/*
574 * Reverts actions set up by vmci_setup_notify(). Unmaps and unlocks the
575 * page mapped/locked by vmci_setup_notify().
576 */
577void vmci_ctx_unset_notify(struct vmci_ctx *context)
578{
579 struct page *notify_page;
580
581 spin_lock(&context->lock);
582
583 notify_page = context->notify_page;
584 context->notify = &ctx_dummy_notify;
585 context->notify_page = NULL;
586
587 spin_unlock(&context->lock);
588
589 if (notify_page) {
590 kunmap(notify_page);
591 put_page(notify_page);
592 }
593}
594
595/*
596 * Add remote_cid to list of contexts current contexts wants
597 * notifications from/about.
598 */
599int vmci_ctx_add_notification(u32 context_id, u32 remote_cid)
600{
601 struct vmci_ctx *context;
602 struct vmci_handle_list *notifier, *n;
603 int result;
604 bool exists = false;
605
606 context = vmci_ctx_get(context_id);
607 if (!context)
608 return VMCI_ERROR_NOT_FOUND;
609
610 if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(remote_cid)) {
611 pr_devel("Context removed notifications for other VMs not supported (src=0x%x, remote=0x%x)\n",
612 context_id, remote_cid);
613 result = VMCI_ERROR_DST_UNREACHABLE;
614 goto out;
615 }
616
617 if (context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) {
618 result = VMCI_ERROR_NO_ACCESS;
619 goto out;
620 }
621
622 notifier = kmalloc(sizeof(struct vmci_handle_list), GFP_KERNEL);
623 if (!notifier) {
624 result = VMCI_ERROR_NO_MEM;
625 goto out;
626 }
627
628 INIT_LIST_HEAD(&notifier->node);
629 notifier->handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
630
631 spin_lock(&context->lock);
632
633 list_for_each_entry(n, &context->notifier_list, node) {
634 if (vmci_handle_is_equal(n->handle, notifier->handle)) {
635 exists = true;
636 break;
637 }
638 }
639
640 if (exists) {
641 kfree(notifier);
642 result = VMCI_ERROR_ALREADY_EXISTS;
643 } else {
644 list_add_tail_rcu(&notifier->node, &context->notifier_list);
645 context->n_notifiers++;
646 result = VMCI_SUCCESS;
647 }
648
649 spin_unlock(&context->lock);
650
651 out:
652 vmci_ctx_put(context);
653 return result;
654}
655
656/*
657 * Remove remote_cid from current context's list of contexts it is
658 * interested in getting notifications from/about.
659 */
660int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid)
661{
662 struct vmci_ctx *context;
663 struct vmci_handle_list *notifier, *tmp;
664 struct vmci_handle handle;
665 bool found = false;
666
667 context = vmci_ctx_get(context_id);
668 if (!context)
669 return VMCI_ERROR_NOT_FOUND;
670
671 handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
672
673 spin_lock(&context->lock);
674 list_for_each_entry_safe(notifier, tmp,
675 &context->notifier_list, node) {
676 if (vmci_handle_is_equal(notifier->handle, handle)) {
677 list_del_rcu(&notifier->node);
678 context->n_notifiers--;
679 found = true;
680 break;
681 }
682 }
683 spin_unlock(&context->lock);
684
685 if (found) {
686 synchronize_rcu();
687 kfree(notifier);
688 }
689
690 vmci_ctx_put(context);
691
692 return found ? VMCI_SUCCESS : VMCI_ERROR_NOT_FOUND;
693}
694
695static int vmci_ctx_get_chkpt_notifiers(struct vmci_ctx *context,
696 u32 *buf_size, void **pbuf)
697{
698 u32 *notifiers;
699 size_t data_size;
700 struct vmci_handle_list *entry;
701 int i = 0;
702
703 if (context->n_notifiers == 0) {
704 *buf_size = 0;
705 *pbuf = NULL;
706 return VMCI_SUCCESS;
707 }
708
709 data_size = context->n_notifiers * sizeof(*notifiers);
710 if (*buf_size < data_size) {
711 *buf_size = data_size;
712 return VMCI_ERROR_MORE_DATA;
713 }
714
715 notifiers = kmalloc(data_size, GFP_ATOMIC); /* FIXME: want GFP_KERNEL */
716 if (!notifiers)
717 return VMCI_ERROR_NO_MEM;
718
719 list_for_each_entry(entry, &context->notifier_list, node)
720 notifiers[i++] = entry->handle.context;
721
722 *buf_size = data_size;
723 *pbuf = notifiers;
724 return VMCI_SUCCESS;
725}
726
727static int vmci_ctx_get_chkpt_doorbells(struct vmci_ctx *context,
728 u32 *buf_size, void **pbuf)
729{
730 struct dbell_cpt_state *dbells;
731 size_t n_doorbells;
732 int i;
733
734 n_doorbells = vmci_handle_arr_get_size(context->doorbell_array);
735 if (n_doorbells > 0) {
736 size_t data_size = n_doorbells * sizeof(*dbells);
737 if (*buf_size < data_size) {
738 *buf_size = data_size;
739 return VMCI_ERROR_MORE_DATA;
740 }
741
742 dbells = kmalloc(data_size, GFP_ATOMIC);
743 if (!dbells)
744 return VMCI_ERROR_NO_MEM;
745
746 for (i = 0; i < n_doorbells; i++)
747 dbells[i].handle = vmci_handle_arr_get_entry(
748 context->doorbell_array, i);
749
750 *buf_size = data_size;
751 *pbuf = dbells;
752 } else {
753 *buf_size = 0;
754 *pbuf = NULL;
755 }
756
757 return VMCI_SUCCESS;
758}
759
760/*
761 * Get current context's checkpoint state of given type.
762 */
763int vmci_ctx_get_chkpt_state(u32 context_id,
764 u32 cpt_type,
765 u32 *buf_size,
766 void **pbuf)
767{
768 struct vmci_ctx *context;
769 int result;
770
771 context = vmci_ctx_get(context_id);
772 if (!context)
773 return VMCI_ERROR_NOT_FOUND;
774
775 spin_lock(&context->lock);
776
777 switch (cpt_type) {
778 case VMCI_NOTIFICATION_CPT_STATE:
779 result = vmci_ctx_get_chkpt_notifiers(context, buf_size, pbuf);
780 break;
781
782 case VMCI_WELLKNOWN_CPT_STATE:
783 /*
784 * For compatibility with VMX'en with VM to VM communication, we
785 * always return zero wellknown handles.
786 */
787
788 *buf_size = 0;
789 *pbuf = NULL;
790 result = VMCI_SUCCESS;
791 break;
792
793 case VMCI_DOORBELL_CPT_STATE:
794 result = vmci_ctx_get_chkpt_doorbells(context, buf_size, pbuf);
795 break;
796
797 default:
798 pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
799 result = VMCI_ERROR_INVALID_ARGS;
800 break;
801 }
802
803 spin_unlock(&context->lock);
804 vmci_ctx_put(context);
805
806 return result;
807}
808
809/*
810 * Set current context's checkpoint state of given type.
811 */
812int vmci_ctx_set_chkpt_state(u32 context_id,
813 u32 cpt_type,
814 u32 buf_size,
815 void *cpt_buf)
816{
817 u32 i;
818 u32 current_id;
819 int result = VMCI_SUCCESS;
820 u32 num_ids = buf_size / sizeof(u32);
821
822 if (cpt_type == VMCI_WELLKNOWN_CPT_STATE && num_ids > 0) {
823 /*
824 * We would end up here if VMX with VM to VM communication
825 * attempts to restore a checkpoint with wellknown handles.
826 */
827 pr_warn("Attempt to restore checkpoint with obsolete wellknown handles\n");
828 return VMCI_ERROR_OBSOLETE;
829 }
830
831 if (cpt_type != VMCI_NOTIFICATION_CPT_STATE) {
832 pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
833 return VMCI_ERROR_INVALID_ARGS;
834 }
835
836 for (i = 0; i < num_ids && result == VMCI_SUCCESS; i++) {
837 current_id = ((u32 *)cpt_buf)[i];
838 result = vmci_ctx_add_notification(context_id, current_id);
839 if (result != VMCI_SUCCESS)
840 break;
841 }
842 if (result != VMCI_SUCCESS)
843 pr_devel("Failed to set cpt state (type=%d) (error=%d)\n",
844 cpt_type, result);
845
846 return result;
847}
848
849/*
850 * Retrieves the specified context's pending notifications in the
851 * form of a handle array. The handle arrays returned are the
852 * actual data - not a copy and should not be modified by the
853 * caller. They must be released using
854 * vmci_ctx_rcv_notifications_release.
855 */
856int vmci_ctx_rcv_notifications_get(u32 context_id,
857 struct vmci_handle_arr **db_handle_array,
858 struct vmci_handle_arr **qp_handle_array)
859{
860 struct vmci_ctx *context;
861 int result = VMCI_SUCCESS;
862
863 context = vmci_ctx_get(context_id);
864 if (context == NULL)
865 return VMCI_ERROR_NOT_FOUND;
866
867 spin_lock(&context->lock);
868
869 *db_handle_array = context->pending_doorbell_array;
870 context->pending_doorbell_array = vmci_handle_arr_create(0);
871 if (!context->pending_doorbell_array) {
872 context->pending_doorbell_array = *db_handle_array;
873 *db_handle_array = NULL;
874 result = VMCI_ERROR_NO_MEM;
875 }
876 *qp_handle_array = NULL;
877
878 spin_unlock(&context->lock);
879 vmci_ctx_put(context);
880
881 return result;
882}
883
884/*
885 * Releases handle arrays with pending notifications previously
886 * retrieved using vmci_ctx_rcv_notifications_get. If the
887 * notifications were not successfully handed over to the guest,
888 * success must be false.
889 */
890void vmci_ctx_rcv_notifications_release(u32 context_id,
891 struct vmci_handle_arr *db_handle_array,
892 struct vmci_handle_arr *qp_handle_array,
893 bool success)
894{
895 struct vmci_ctx *context = vmci_ctx_get(context_id);
896
897 spin_lock(&context->lock);
898 if (!success) {
899 struct vmci_handle handle;
900
901 /*
902 * New notifications may have been added while we were not
903 * holding the context lock, so we transfer any new pending
904 * doorbell notifications to the old array, and reinstate the
905 * old array.
906 */
907
908 handle = vmci_handle_arr_remove_tail(
909 context->pending_doorbell_array);
910 while (!vmci_handle_is_invalid(handle)) {
911 if (!vmci_handle_arr_has_entry(db_handle_array,
912 handle)) {
913 vmci_handle_arr_append_entry(
914 &db_handle_array, handle);
915 }
916 handle = vmci_handle_arr_remove_tail(
917 context->pending_doorbell_array);
918 }
919 vmci_handle_arr_destroy(context->pending_doorbell_array);
920 context->pending_doorbell_array = db_handle_array;
921 db_handle_array = NULL;
922 } else {
923 ctx_clear_notify_call(context);
924 }
925 spin_unlock(&context->lock);
926 vmci_ctx_put(context);
927
928 if (db_handle_array)
929 vmci_handle_arr_destroy(db_handle_array);
930
931 if (qp_handle_array)
932 vmci_handle_arr_destroy(qp_handle_array);
933}
934
935/*
936 * Registers that a new doorbell handle has been allocated by the
937 * context. Only doorbell handles registered can be notified.
938 */
939int vmci_ctx_dbell_create(u32 context_id, struct vmci_handle handle)
940{
941 struct vmci_ctx *context;
942 int result;
943
944 if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
945 return VMCI_ERROR_INVALID_ARGS;
946
947 context = vmci_ctx_get(context_id);
948 if (context == NULL)
949 return VMCI_ERROR_NOT_FOUND;
950
951 spin_lock(&context->lock);
952 if (!vmci_handle_arr_has_entry(context->doorbell_array, handle)) {
953 vmci_handle_arr_append_entry(&context->doorbell_array, handle);
954 result = VMCI_SUCCESS;
955 } else {
956 result = VMCI_ERROR_DUPLICATE_ENTRY;
957 }
958
959 spin_unlock(&context->lock);
960 vmci_ctx_put(context);
961
962 return result;
963}
964
965/*
966 * Unregisters a doorbell handle that was previously registered
967 * with vmci_ctx_dbell_create.
968 */
969int vmci_ctx_dbell_destroy(u32 context_id, struct vmci_handle handle)
970{
971 struct vmci_ctx *context;
972 struct vmci_handle removed_handle;
973
974 if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
975 return VMCI_ERROR_INVALID_ARGS;
976
977 context = vmci_ctx_get(context_id);
978 if (context == NULL)
979 return VMCI_ERROR_NOT_FOUND;
980
981 spin_lock(&context->lock);
982 removed_handle =
983 vmci_handle_arr_remove_entry(context->doorbell_array, handle);
984 vmci_handle_arr_remove_entry(context->pending_doorbell_array, handle);
985 spin_unlock(&context->lock);
986
987 vmci_ctx_put(context);
988
989 return vmci_handle_is_invalid(removed_handle) ?
990 VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
991}
992
993/*
994 * Unregisters all doorbell handles that were previously
995 * registered with vmci_ctx_dbell_create.
996 */
997int vmci_ctx_dbell_destroy_all(u32 context_id)
998{
999 struct vmci_ctx *context;
1000 struct vmci_handle handle;
1001
1002 if (context_id == VMCI_INVALID_ID)
1003 return VMCI_ERROR_INVALID_ARGS;
1004
1005 context = vmci_ctx_get(context_id);
1006 if (context == NULL)
1007 return VMCI_ERROR_NOT_FOUND;
1008
1009 spin_lock(&context->lock);
1010 do {
1011 struct vmci_handle_arr *arr = context->doorbell_array;
1012 handle = vmci_handle_arr_remove_tail(arr);
1013 } while (!vmci_handle_is_invalid(handle));
1014 do {
1015 struct vmci_handle_arr *arr = context->pending_doorbell_array;
1016 handle = vmci_handle_arr_remove_tail(arr);
1017 } while (!vmci_handle_is_invalid(handle));
1018 spin_unlock(&context->lock);
1019
1020 vmci_ctx_put(context);
1021
1022 return VMCI_SUCCESS;
1023}
1024
1025/*
1026 * Registers a notification of a doorbell handle initiated by the
1027 * specified source context. The notification of doorbells are
1028 * subject to the same isolation rules as datagram delivery. To
1029 * allow host side senders of notifications a finer granularity
1030 * of sender rights than those assigned to the sending context
1031 * itself, the host context is required to specify a different
1032 * set of privilege flags that will override the privileges of
1033 * the source context.
1034 */
1035int vmci_ctx_notify_dbell(u32 src_cid,
1036 struct vmci_handle handle,
1037 u32 src_priv_flags)
1038{
1039 struct vmci_ctx *dst_context;
1040 int result;
1041
1042 if (vmci_handle_is_invalid(handle))
1043 return VMCI_ERROR_INVALID_ARGS;
1044
1045 /* Get the target VM's VMCI context. */
1046 dst_context = vmci_ctx_get(handle.context);
1047 if (!dst_context) {
1048 pr_devel("Invalid context (ID=0x%x)\n", handle.context);
1049 return VMCI_ERROR_NOT_FOUND;
1050 }
1051
1052 if (src_cid != handle.context) {
1053 u32 dst_priv_flags;
1054
1055 if (VMCI_CONTEXT_IS_VM(src_cid) &&
1056 VMCI_CONTEXT_IS_VM(handle.context)) {
1057 pr_devel("Doorbell notification from VM to VM not supported (src=0x%x, dst=0x%x)\n",
1058 src_cid, handle.context);
1059 result = VMCI_ERROR_DST_UNREACHABLE;
1060 goto out;
1061 }
1062
1063 result = vmci_dbell_get_priv_flags(handle, &dst_priv_flags);
1064 if (result < VMCI_SUCCESS) {
1065 pr_warn("Failed to get privilege flags for destination (handle=0x%x:0x%x)\n",
1066 handle.context, handle.resource);
1067 goto out;
1068 }
1069
1070 if (src_cid != VMCI_HOST_CONTEXT_ID ||
1071 src_priv_flags == VMCI_NO_PRIVILEGE_FLAGS) {
1072 src_priv_flags = vmci_context_get_priv_flags(src_cid);
1073 }
1074
1075 if (vmci_deny_interaction(src_priv_flags, dst_priv_flags)) {
1076 result = VMCI_ERROR_NO_ACCESS;
1077 goto out;
1078 }
1079 }
1080
1081 if (handle.context == VMCI_HOST_CONTEXT_ID) {
1082 result = vmci_dbell_host_context_notify(src_cid, handle);
1083 } else {
1084 spin_lock(&dst_context->lock);
1085
1086 if (!vmci_handle_arr_has_entry(dst_context->doorbell_array,
1087 handle)) {
1088 result = VMCI_ERROR_NOT_FOUND;
1089 } else {
1090 if (!vmci_handle_arr_has_entry(
1091 dst_context->pending_doorbell_array,
1092 handle)) {
1093 vmci_handle_arr_append_entry(
1094 &dst_context->pending_doorbell_array,
1095 handle);
1096
1097 ctx_signal_notify(dst_context);
1098 wake_up(&dst_context->host_context.wait_queue);
1099
1100 }
1101 result = VMCI_SUCCESS;
1102 }
1103 spin_unlock(&dst_context->lock);
1104 }
1105
1106 out:
1107 vmci_ctx_put(dst_context);
1108
1109 return result;
1110}
1111
1112bool vmci_ctx_supports_host_qp(struct vmci_ctx *context)
1113{
1114 return context && context->user_version >= VMCI_VERSION_HOSTQP;
1115}
1116
1117/*
1118 * Registers that a new queue pair handle has been allocated by
1119 * the context.
1120 */
1121int vmci_ctx_qp_create(struct vmci_ctx *context, struct vmci_handle handle)
1122{
1123 int result;
1124
1125 if (context == NULL || vmci_handle_is_invalid(handle))
1126 return VMCI_ERROR_INVALID_ARGS;
1127
1128 if (!vmci_handle_arr_has_entry(context->queue_pair_array, handle)) {
1129 vmci_handle_arr_append_entry(&context->queue_pair_array,
1130 handle);
1131 result = VMCI_SUCCESS;
1132 } else {
1133 result = VMCI_ERROR_DUPLICATE_ENTRY;
1134 }
1135
1136 return result;
1137}
1138
1139/*
1140 * Unregisters a queue pair handle that was previously registered
1141 * with vmci_ctx_qp_create.
1142 */
1143int vmci_ctx_qp_destroy(struct vmci_ctx *context, struct vmci_handle handle)
1144{
1145 struct vmci_handle hndl;
1146
1147 if (context == NULL || vmci_handle_is_invalid(handle))
1148 return VMCI_ERROR_INVALID_ARGS;
1149
1150 hndl = vmci_handle_arr_remove_entry(context->queue_pair_array, handle);
1151
1152 return vmci_handle_is_invalid(hndl) ?
1153 VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
1154}
1155
1156/*
1157 * Determines whether a given queue pair handle is registered
1158 * with the given context.
1159 */
1160bool vmci_ctx_qp_exists(struct vmci_ctx *context, struct vmci_handle handle)
1161{
1162 if (context == NULL || vmci_handle_is_invalid(handle))
1163 return false;
1164
1165 return vmci_handle_arr_has_entry(context->queue_pair_array, handle);
1166}
1167
1168/*
1169 * vmci_context_get_priv_flags() - Retrieve privilege flags.
1170 * @context_id: The context ID of the VMCI context.
1171 *
1172 * Retrieves privilege flags of the given VMCI context ID.
1173 */
1174u32 vmci_context_get_priv_flags(u32 context_id)
1175{
1176 if (vmci_host_code_active()) {
1177 u32 flags;
1178 struct vmci_ctx *context;
1179
1180 context = vmci_ctx_get(context_id);
1181 if (!context)
1182 return VMCI_LEAST_PRIVILEGE_FLAGS;
1183
1184 flags = context->priv_flags;
1185 vmci_ctx_put(context);
1186 return flags;
1187 }
1188 return VMCI_NO_PRIVILEGE_FLAGS;
1189}
1190EXPORT_SYMBOL_GPL(vmci_context_get_priv_flags);
1191
1192/*
1193 * vmci_is_context_owner() - Determimnes if user is the context owner
1194 * @context_id: The context ID of the VMCI context.
1195 * @uid: The host user id (real kernel value).
1196 *
1197 * Determines whether a given UID is the owner of given VMCI context.
1198 */
1199bool vmci_is_context_owner(u32 context_id, kuid_t uid)
1200{
1201 bool is_owner = false;
1202
1203 if (vmci_host_code_active()) {
1204 struct vmci_ctx *context = vmci_ctx_get(context_id);
1205 if (context) {
1206 if (context->cred)
1207 is_owner = uid_eq(context->cred->uid, uid);
1208 vmci_ctx_put(context);
1209 }
1210 }
1211
1212 return is_owner;
1213}
1214EXPORT_SYMBOL_GPL(vmci_is_context_owner);
diff --git a/drivers/misc/vmw_vmci/vmci_context.h b/drivers/misc/vmw_vmci/vmci_context.h
new file mode 100644
index 000000000000..24a88e68a1e6
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_context.h
@@ -0,0 +1,182 @@
1/*
2 * VMware VMCI driver (vmciContext.h)
3 *
4 * Copyright (C) 2012 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16#ifndef _VMCI_CONTEXT_H_
17#define _VMCI_CONTEXT_H_
18
19#include <linux/vmw_vmci_defs.h>
20#include <linux/atomic.h>
21#include <linux/kref.h>
22#include <linux/types.h>
23#include <linux/wait.h>
24
25#include "vmci_handle_array.h"
26#include "vmci_datagram.h"
27
28/* Used to determine what checkpoint state to get and set. */
29enum {
30 VMCI_NOTIFICATION_CPT_STATE = 1,
31 VMCI_WELLKNOWN_CPT_STATE = 2,
32 VMCI_DG_OUT_STATE = 3,
33 VMCI_DG_IN_STATE = 4,
34 VMCI_DG_IN_SIZE_STATE = 5,
35 VMCI_DOORBELL_CPT_STATE = 6,
36};
37
38/* Host specific struct used for signalling */
39struct vmci_host {
40 wait_queue_head_t wait_queue;
41};
42
43struct vmci_handle_list {
44 struct list_head node;
45 struct vmci_handle handle;
46};
47
48struct vmci_ctx {
49 struct list_head list_item; /* For global VMCI list. */
50 u32 cid;
51 struct kref kref;
52 struct list_head datagram_queue; /* Head of per VM queue. */
53 u32 pending_datagrams;
54 size_t datagram_queue_size; /* Size of datagram queue in bytes. */
55
56 /*
57 * Version of the code that created
58 * this context; e.g., VMX.
59 */
60 int user_version;
61 spinlock_t lock; /* Locks callQueue and handle_arrays. */
62
63 /*
64 * queue_pairs attached to. The array of
65 * handles for queue pairs is accessed
66 * from the code for QP API, and there
67 * it is protected by the QP lock. It
68 * is also accessed from the context
69 * clean up path, which does not
70 * require a lock. VMCILock is not
71 * used to protect the QP array field.
72 */
73 struct vmci_handle_arr *queue_pair_array;
74
75 /* Doorbells created by context. */
76 struct vmci_handle_arr *doorbell_array;
77
78 /* Doorbells pending for context. */
79 struct vmci_handle_arr *pending_doorbell_array;
80
81 /* Contexts current context is subscribing to. */
82 struct list_head notifier_list;
83 unsigned int n_notifiers;
84
85 struct vmci_host host_context;
86 u32 priv_flags;
87
88 const struct cred *cred;
89 bool *notify; /* Notify flag pointer - hosted only. */
90 struct page *notify_page; /* Page backing the notify UVA. */
91};
92
93/* VMCINotifyAddRemoveInfo: Used to add/remove remote context notifications. */
94struct vmci_ctx_info {
95 u32 remote_cid;
96 int result;
97};
98
99/* VMCICptBufInfo: Used to set/get current context's checkpoint state. */
100struct vmci_ctx_chkpt_buf_info {
101 u64 cpt_buf;
102 u32 cpt_type;
103 u32 buf_size;
104 s32 result;
105 u32 _pad;
106};
107
108/*
109 * VMCINotificationReceiveInfo: Used to recieve pending notifications
110 * for doorbells and queue pairs.
111 */
112struct vmci_ctx_notify_recv_info {
113 u64 db_handle_buf_uva;
114 u64 db_handle_buf_size;
115 u64 qp_handle_buf_uva;
116 u64 qp_handle_buf_size;
117 s32 result;
118 u32 _pad;
119};
120
121/*
122 * Utilility function that checks whether two entities are allowed
123 * to interact. If one of them is restricted, the other one must
124 * be trusted.
125 */
126static inline bool vmci_deny_interaction(u32 part_one, u32 part_two)
127{
128 return ((part_one & VMCI_PRIVILEGE_FLAG_RESTRICTED) &&
129 !(part_two & VMCI_PRIVILEGE_FLAG_TRUSTED)) ||
130 ((part_two & VMCI_PRIVILEGE_FLAG_RESTRICTED) &&
131 !(part_one & VMCI_PRIVILEGE_FLAG_TRUSTED));
132}
133
134struct vmci_ctx *vmci_ctx_create(u32 cid, u32 flags,
135 uintptr_t event_hnd, int version,
136 const struct cred *cred);
137void vmci_ctx_destroy(struct vmci_ctx *context);
138
139bool vmci_ctx_supports_host_qp(struct vmci_ctx *context);
140int vmci_ctx_enqueue_datagram(u32 cid, struct vmci_datagram *dg);
141int vmci_ctx_dequeue_datagram(struct vmci_ctx *context,
142 size_t *max_size, struct vmci_datagram **dg);
143int vmci_ctx_pending_datagrams(u32 cid, u32 *pending);
144struct vmci_ctx *vmci_ctx_get(u32 cid);
145void vmci_ctx_put(struct vmci_ctx *context);
146bool vmci_ctx_exists(u32 cid);
147
148int vmci_ctx_add_notification(u32 context_id, u32 remote_cid);
149int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid);
150int vmci_ctx_get_chkpt_state(u32 context_id, u32 cpt_type,
151 u32 *num_cids, void **cpt_buf_ptr);
152int vmci_ctx_set_chkpt_state(u32 context_id, u32 cpt_type,
153 u32 num_cids, void *cpt_buf);
154
155int vmci_ctx_qp_create(struct vmci_ctx *context, struct vmci_handle handle);
156int vmci_ctx_qp_destroy(struct vmci_ctx *context, struct vmci_handle handle);
157bool vmci_ctx_qp_exists(struct vmci_ctx *context, struct vmci_handle handle);
158
159void vmci_ctx_check_signal_notify(struct vmci_ctx *context);
160void vmci_ctx_unset_notify(struct vmci_ctx *context);
161
162int vmci_ctx_dbell_create(u32 context_id, struct vmci_handle handle);
163int vmci_ctx_dbell_destroy(u32 context_id, struct vmci_handle handle);
164int vmci_ctx_dbell_destroy_all(u32 context_id);
165int vmci_ctx_notify_dbell(u32 cid, struct vmci_handle handle,
166 u32 src_priv_flags);
167
168int vmci_ctx_rcv_notifications_get(u32 context_id, struct vmci_handle_arr
169 **db_handle_array, struct vmci_handle_arr
170 **qp_handle_array);
171void vmci_ctx_rcv_notifications_release(u32 context_id, struct vmci_handle_arr
172 *db_handle_array, struct vmci_handle_arr
173 *qp_handle_array, bool success);
174
175static inline u32 vmci_ctx_get_id(struct vmci_ctx *context)
176{
177 if (!context)
178 return VMCI_INVALID_ID;
179 return context->cid;
180}
181
182#endif /* _VMCI_CONTEXT_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_datagram.c b/drivers/misc/vmw_vmci/vmci_datagram.c
new file mode 100644
index 000000000000..ed5c433cd493
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_datagram.c
@@ -0,0 +1,500 @@
1/*
2 * VMware VMCI Driver
3 *
4 * Copyright (C) 2012 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16#include <linux/vmw_vmci_defs.h>
17#include <linux/vmw_vmci_api.h>
18#include <linux/module.h>
19#include <linux/sched.h>
20#include <linux/slab.h>
21#include <linux/bug.h>
22
23#include "vmci_datagram.h"
24#include "vmci_resource.h"
25#include "vmci_context.h"
26#include "vmci_driver.h"
27#include "vmci_event.h"
28#include "vmci_route.h"
29
30/*
31 * struct datagram_entry describes the datagram entity. It is used for datagram
32 * entities created only on the host.
33 */
34struct datagram_entry {
35 struct vmci_resource resource;
36 u32 flags;
37 bool run_delayed;
38 vmci_datagram_recv_cb recv_cb;
39 void *client_data;
40 u32 priv_flags;
41};
42
43struct delayed_datagram_info {
44 struct datagram_entry *entry;
45 struct vmci_datagram msg;
46 struct work_struct work;
47 bool in_dg_host_queue;
48};
49
50/* Number of in-flight host->host datagrams */
51static atomic_t delayed_dg_host_queue_size = ATOMIC_INIT(0);
52
53/*
54 * Create a datagram entry given a handle pointer.
55 */
56static int dg_create_handle(u32 resource_id,
57 u32 flags,
58 u32 priv_flags,
59 vmci_datagram_recv_cb recv_cb,
60 void *client_data, struct vmci_handle *out_handle)
61{
62 int result;
63 u32 context_id;
64 struct vmci_handle handle;
65 struct datagram_entry *entry;
66
67 if ((flags & VMCI_FLAG_WELLKNOWN_DG_HND) != 0)
68 return VMCI_ERROR_INVALID_ARGS;
69
70 if ((flags & VMCI_FLAG_ANYCID_DG_HND) != 0) {
71 context_id = VMCI_INVALID_ID;
72 } else {
73 context_id = vmci_get_context_id();
74 if (context_id == VMCI_INVALID_ID)
75 return VMCI_ERROR_NO_RESOURCES;
76 }
77
78 handle = vmci_make_handle(context_id, resource_id);
79
80 entry = kmalloc(sizeof(*entry), GFP_KERNEL);
81 if (!entry) {
82 pr_warn("Failed allocating memory for datagram entry\n");
83 return VMCI_ERROR_NO_MEM;
84 }
85
86 entry->run_delayed = (flags & VMCI_FLAG_DG_DELAYED_CB) ? true : false;
87 entry->flags = flags;
88 entry->recv_cb = recv_cb;
89 entry->client_data = client_data;
90 entry->priv_flags = priv_flags;
91
92 /* Make datagram resource live. */
93 result = vmci_resource_add(&entry->resource,
94 VMCI_RESOURCE_TYPE_DATAGRAM,
95 handle);
96 if (result != VMCI_SUCCESS) {
97 pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d\n",
98 handle.context, handle.resource, result);
99 kfree(entry);
100 return result;
101 }
102
103 *out_handle = vmci_resource_handle(&entry->resource);
104 return VMCI_SUCCESS;
105}
106
107/*
108 * Internal utility function with the same purpose as
109 * vmci_datagram_get_priv_flags that also takes a context_id.
110 */
111static int vmci_datagram_get_priv_flags(u32 context_id,
112 struct vmci_handle handle,
113 u32 *priv_flags)
114{
115 if (context_id == VMCI_INVALID_ID)
116 return VMCI_ERROR_INVALID_ARGS;
117
118 if (context_id == VMCI_HOST_CONTEXT_ID) {
119 struct datagram_entry *src_entry;
120 struct vmci_resource *resource;
121
122 resource = vmci_resource_by_handle(handle,
123 VMCI_RESOURCE_TYPE_DATAGRAM);
124 if (!resource)
125 return VMCI_ERROR_INVALID_ARGS;
126
127 src_entry = container_of(resource, struct datagram_entry,
128 resource);
129 *priv_flags = src_entry->priv_flags;
130 vmci_resource_put(resource);
131 } else if (context_id == VMCI_HYPERVISOR_CONTEXT_ID)
132 *priv_flags = VMCI_MAX_PRIVILEGE_FLAGS;
133 else
134 *priv_flags = vmci_context_get_priv_flags(context_id);
135
136 return VMCI_SUCCESS;
137}
138
139/*
140 * Calls the specified callback in a delayed context.
141 */
142static void dg_delayed_dispatch(struct work_struct *work)
143{
144 struct delayed_datagram_info *dg_info =
145 container_of(work, struct delayed_datagram_info, work);
146
147 dg_info->entry->recv_cb(dg_info->entry->client_data, &dg_info->msg);
148
149 vmci_resource_put(&dg_info->entry->resource);
150
151 if (dg_info->in_dg_host_queue)
152 atomic_dec(&delayed_dg_host_queue_size);
153
154 kfree(dg_info);
155}
156
157/*
158 * Dispatch datagram as a host, to the host, or other vm context. This
159 * function cannot dispatch to hypervisor context handlers. This should
160 * have been handled before we get here by vmci_datagram_dispatch.
161 * Returns number of bytes sent on success, error code otherwise.
162 */
163static int dg_dispatch_as_host(u32 context_id, struct vmci_datagram *dg)
164{
165 int retval;
166 size_t dg_size;
167 u32 src_priv_flags;
168
169 dg_size = VMCI_DG_SIZE(dg);
170
171 /* Host cannot send to the hypervisor. */
172 if (dg->dst.context == VMCI_HYPERVISOR_CONTEXT_ID)
173 return VMCI_ERROR_DST_UNREACHABLE;
174
175 /* Check that source handle matches sending context. */
176 if (dg->src.context != context_id) {
177 pr_devel("Sender context (ID=0x%x) is not owner of src datagram entry (handle=0x%x:0x%x)\n",
178 context_id, dg->src.context, dg->src.resource);
179 return VMCI_ERROR_NO_ACCESS;
180 }
181
182 /* Get hold of privileges of sending endpoint. */
183 retval = vmci_datagram_get_priv_flags(context_id, dg->src,
184 &src_priv_flags);
185 if (retval != VMCI_SUCCESS) {
186 pr_warn("Couldn't get privileges (handle=0x%x:0x%x)\n",
187 dg->src.context, dg->src.resource);
188 return retval;
189 }
190
191 /* Determine if we should route to host or guest destination. */
192 if (dg->dst.context == VMCI_HOST_CONTEXT_ID) {
193 /* Route to host datagram entry. */
194 struct datagram_entry *dst_entry;
195 struct vmci_resource *resource;
196
197 if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&
198 dg->dst.resource == VMCI_EVENT_HANDLER) {
199 return vmci_event_dispatch(dg);
200 }
201
202 resource = vmci_resource_by_handle(dg->dst,
203 VMCI_RESOURCE_TYPE_DATAGRAM);
204 if (!resource) {
205 pr_devel("Sending to invalid destination (handle=0x%x:0x%x)\n",
206 dg->dst.context, dg->dst.resource);
207 return VMCI_ERROR_INVALID_RESOURCE;
208 }
209 dst_entry = container_of(resource, struct datagram_entry,
210 resource);
211 if (vmci_deny_interaction(src_priv_flags,
212 dst_entry->priv_flags)) {
213 vmci_resource_put(resource);
214 return VMCI_ERROR_NO_ACCESS;
215 }
216
217 /*
218 * If a VMCI datagram destined for the host is also sent by the
219 * host, we always run it delayed. This ensures that no locks
220 * are held when the datagram callback runs.
221 */
222 if (dst_entry->run_delayed ||
223 dg->src.context == VMCI_HOST_CONTEXT_ID) {
224 struct delayed_datagram_info *dg_info;
225
226 if (atomic_add_return(1, &delayed_dg_host_queue_size)
227 == VMCI_MAX_DELAYED_DG_HOST_QUEUE_SIZE) {
228 atomic_dec(&delayed_dg_host_queue_size);
229 vmci_resource_put(resource);
230 return VMCI_ERROR_NO_MEM;
231 }
232
233 dg_info = kmalloc(sizeof(*dg_info) +
234 (size_t) dg->payload_size, GFP_ATOMIC);
235 if (!dg_info) {
236 atomic_dec(&delayed_dg_host_queue_size);
237 vmci_resource_put(resource);
238 return VMCI_ERROR_NO_MEM;
239 }
240
241 dg_info->in_dg_host_queue = true;
242 dg_info->entry = dst_entry;
243 memcpy(&dg_info->msg, dg, dg_size);
244
245 INIT_WORK(&dg_info->work, dg_delayed_dispatch);
246 schedule_work(&dg_info->work);
247 retval = VMCI_SUCCESS;
248
249 } else {
250 retval = dst_entry->recv_cb(dst_entry->client_data, dg);
251 vmci_resource_put(resource);
252 if (retval < VMCI_SUCCESS)
253 return retval;
254 }
255 } else {
256 /* Route to destination VM context. */
257 struct vmci_datagram *new_dg;
258
259 if (context_id != dg->dst.context) {
260 if (vmci_deny_interaction(src_priv_flags,
261 vmci_context_get_priv_flags
262 (dg->dst.context))) {
263 return VMCI_ERROR_NO_ACCESS;
264 } else if (VMCI_CONTEXT_IS_VM(context_id)) {
265 /*
266 * If the sending context is a VM, it
267 * cannot reach another VM.
268 */
269
270 pr_devel("Datagram communication between VMs not supported (src=0x%x, dst=0x%x)\n",
271 context_id, dg->dst.context);
272 return VMCI_ERROR_DST_UNREACHABLE;
273 }
274 }
275
276 /* We make a copy to enqueue. */
277 new_dg = kmalloc(dg_size, GFP_KERNEL);
278 if (new_dg == NULL)
279 return VMCI_ERROR_NO_MEM;
280
281 memcpy(new_dg, dg, dg_size);
282 retval = vmci_ctx_enqueue_datagram(dg->dst.context, new_dg);
283 if (retval < VMCI_SUCCESS) {
284 kfree(new_dg);
285 return retval;
286 }
287 }
288
289 /*
290 * We currently truncate the size to signed 32 bits. This doesn't
291 * matter for this handler as it only support 4Kb messages.
292 */
293 return (int)dg_size;
294}
295
296/*
297 * Dispatch datagram as a guest, down through the VMX and potentially to
298 * the host.
299 * Returns number of bytes sent on success, error code otherwise.
300 */
301static int dg_dispatch_as_guest(struct vmci_datagram *dg)
302{
303 int retval;
304 struct vmci_resource *resource;
305
306 resource = vmci_resource_by_handle(dg->src,
307 VMCI_RESOURCE_TYPE_DATAGRAM);
308 if (!resource)
309 return VMCI_ERROR_NO_HANDLE;
310
311 retval = vmci_send_datagram(dg);
312 vmci_resource_put(resource);
313 return retval;
314}
315
316/*
317 * Dispatch datagram. This will determine the routing for the datagram
318 * and dispatch it accordingly.
319 * Returns number of bytes sent on success, error code otherwise.
320 */
321int vmci_datagram_dispatch(u32 context_id,
322 struct vmci_datagram *dg, bool from_guest)
323{
324 int retval;
325 enum vmci_route route;
326
327 BUILD_BUG_ON(sizeof(struct vmci_datagram) != 24);
328
329 if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE) {
330 pr_devel("Payload (size=%llu bytes) too big to send\n",
331 (unsigned long long)dg->payload_size);
332 return VMCI_ERROR_INVALID_ARGS;
333 }
334
335 retval = vmci_route(&dg->src, &dg->dst, from_guest, &route);
336 if (retval < VMCI_SUCCESS) {
337 pr_devel("Failed to route datagram (src=0x%x, dst=0x%x, err=%d)\n",
338 dg->src.context, dg->dst.context, retval);
339 return retval;
340 }
341
342 if (VMCI_ROUTE_AS_HOST == route) {
343 if (VMCI_INVALID_ID == context_id)
344 context_id = VMCI_HOST_CONTEXT_ID;
345 return dg_dispatch_as_host(context_id, dg);
346 }
347
348 if (VMCI_ROUTE_AS_GUEST == route)
349 return dg_dispatch_as_guest(dg);
350
351 pr_warn("Unknown route (%d) for datagram\n", route);
352 return VMCI_ERROR_DST_UNREACHABLE;
353}
354
355/*
356 * Invoke the handler for the given datagram. This is intended to be
357 * called only when acting as a guest and receiving a datagram from the
358 * virtual device.
359 */
360int vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg)
361{
362 struct vmci_resource *resource;
363 struct datagram_entry *dst_entry;
364
365 resource = vmci_resource_by_handle(dg->dst,
366 VMCI_RESOURCE_TYPE_DATAGRAM);
367 if (!resource) {
368 pr_devel("destination (handle=0x%x:0x%x) doesn't exist\n",
369 dg->dst.context, dg->dst.resource);
370 return VMCI_ERROR_NO_HANDLE;
371 }
372
373 dst_entry = container_of(resource, struct datagram_entry, resource);
374 if (dst_entry->run_delayed) {
375 struct delayed_datagram_info *dg_info;
376
377 dg_info = kmalloc(sizeof(*dg_info) + (size_t)dg->payload_size,
378 GFP_ATOMIC);
379 if (!dg_info) {
380 vmci_resource_put(resource);
381 return VMCI_ERROR_NO_MEM;
382 }
383
384 dg_info->in_dg_host_queue = false;
385 dg_info->entry = dst_entry;
386 memcpy(&dg_info->msg, dg, VMCI_DG_SIZE(dg));
387
388 INIT_WORK(&dg_info->work, dg_delayed_dispatch);
389 schedule_work(&dg_info->work);
390 } else {
391 dst_entry->recv_cb(dst_entry->client_data, dg);
392 vmci_resource_put(resource);
393 }
394
395 return VMCI_SUCCESS;
396}
397
398/*
399 * vmci_datagram_create_handle_priv() - Create host context datagram endpoint
400 * @resource_id: The resource ID.
401 * @flags: Datagram Flags.
402 * @priv_flags: Privilege Flags.
403 * @recv_cb: Callback when receiving datagrams.
404 * @client_data: Pointer for a datagram_entry struct
405 * @out_handle: vmci_handle that is populated as a result of this function.
406 *
407 * Creates a host context datagram endpoint and returns a handle to it.
408 */
409int vmci_datagram_create_handle_priv(u32 resource_id,
410 u32 flags,
411 u32 priv_flags,
412 vmci_datagram_recv_cb recv_cb,
413 void *client_data,
414 struct vmci_handle *out_handle)
415{
416 if (out_handle == NULL)
417 return VMCI_ERROR_INVALID_ARGS;
418
419 if (recv_cb == NULL) {
420 pr_devel("Client callback needed when creating datagram\n");
421 return VMCI_ERROR_INVALID_ARGS;
422 }
423
424 if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS)
425 return VMCI_ERROR_INVALID_ARGS;
426
427 return dg_create_handle(resource_id, flags, priv_flags, recv_cb,
428 client_data, out_handle);
429}
430EXPORT_SYMBOL_GPL(vmci_datagram_create_handle_priv);
431
432/*
433 * vmci_datagram_create_handle() - Create host context datagram endpoint
434 * @resource_id: Resource ID.
435 * @flags: Datagram Flags.
436 * @recv_cb: Callback when receiving datagrams.
437 * @client_ata: Pointer for a datagram_entry struct
438 * @out_handle: vmci_handle that is populated as a result of this function.
439 *
440 * Creates a host context datagram endpoint and returns a handle to
441 * it. Same as vmci_datagram_create_handle_priv without the priviledge
442 * flags argument.
443 */
444int vmci_datagram_create_handle(u32 resource_id,
445 u32 flags,
446 vmci_datagram_recv_cb recv_cb,
447 void *client_data,
448 struct vmci_handle *out_handle)
449{
450 return vmci_datagram_create_handle_priv(
451 resource_id, flags,
452 VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS,
453 recv_cb, client_data,
454 out_handle);
455}
456EXPORT_SYMBOL_GPL(vmci_datagram_create_handle);
457
458/*
459 * vmci_datagram_destroy_handle() - Destroys datagram handle
460 * @handle: vmci_handle to be destroyed and reaped.
461 *
462 * Use this function to destroy any datagram handles created by
463 * vmci_datagram_create_handle{,Priv} functions.
464 */
465int vmci_datagram_destroy_handle(struct vmci_handle handle)
466{
467 struct datagram_entry *entry;
468 struct vmci_resource *resource;
469
470 resource = vmci_resource_by_handle(handle, VMCI_RESOURCE_TYPE_DATAGRAM);
471 if (!resource) {
472 pr_devel("Failed to destroy datagram (handle=0x%x:0x%x)\n",
473 handle.context, handle.resource);
474 return VMCI_ERROR_NOT_FOUND;
475 }
476
477 entry = container_of(resource, struct datagram_entry, resource);
478
479 vmci_resource_put(&entry->resource);
480 vmci_resource_remove(&entry->resource);
481 kfree(entry);
482
483 return VMCI_SUCCESS;
484}
485EXPORT_SYMBOL_GPL(vmci_datagram_destroy_handle);
486
487/*
488 * vmci_datagram_send() - Send a datagram
489 * @msg: The datagram to send.
490 *
491 * Sends the provided datagram on its merry way.
492 */
493int vmci_datagram_send(struct vmci_datagram *msg)
494{
495 if (msg == NULL)
496 return VMCI_ERROR_INVALID_ARGS;
497
498 return vmci_datagram_dispatch(VMCI_INVALID_ID, msg, false);
499}
500EXPORT_SYMBOL_GPL(vmci_datagram_send);
diff --git a/drivers/misc/vmw_vmci/vmci_datagram.h b/drivers/misc/vmw_vmci/vmci_datagram.h
new file mode 100644
index 000000000000..eb4aab7f64ec
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_datagram.h
@@ -0,0 +1,52 @@
1/*
2 * VMware VMCI Driver
3 *
4 * Copyright (C) 2012 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16#ifndef _VMCI_DATAGRAM_H_
17#define _VMCI_DATAGRAM_H_
18
19#include <linux/types.h>
20#include <linux/list.h>
21
22#include "vmci_context.h"
23
24#define VMCI_MAX_DELAYED_DG_HOST_QUEUE_SIZE 256
25
26/*
27 * The struct vmci_datagram_queue_entry is a queue header for the in-kernel VMCI
28 * datagram queues. It is allocated in non-paged memory, as the
29 * content is accessed while holding a spinlock. The pending datagram
30 * itself may be allocated from paged memory. We shadow the size of
31 * the datagram in the non-paged queue entry as this size is used
32 * while holding the same spinlock as above.
33 */
34struct vmci_datagram_queue_entry {
35 struct list_head list_item; /* For queuing. */
36 size_t dg_size; /* Size of datagram. */
37 struct vmci_datagram *dg; /* Pending datagram. */
38};
39
40/* VMCIDatagramSendRecvInfo */
41struct vmci_datagram_snd_rcv_info {
42 u64 addr;
43 u32 len;
44 s32 result;
45};
46
47/* Datagram API for non-public use. */
48int vmci_datagram_dispatch(u32 context_id, struct vmci_datagram *dg,
49 bool from_guest);
50int vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg);
51
52#endif /* _VMCI_DATAGRAM_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_doorbell.c b/drivers/misc/vmw_vmci/vmci_doorbell.c
new file mode 100644
index 000000000000..c3e8397f62ed
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_doorbell.c
@@ -0,0 +1,604 @@
1/*
2 * VMware VMCI Driver
3 *
4 * Copyright (C) 2012 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16#include <linux/vmw_vmci_defs.h>
17#include <linux/vmw_vmci_api.h>
18#include <linux/completion.h>
19#include <linux/hash.h>
20#include <linux/kernel.h>
21#include <linux/list.h>
22#include <linux/module.h>
23#include <linux/sched.h>
24#include <linux/slab.h>
25
26#include "vmci_datagram.h"
27#include "vmci_doorbell.h"
28#include "vmci_resource.h"
29#include "vmci_driver.h"
30#include "vmci_route.h"
31
32
33#define VMCI_DOORBELL_INDEX_BITS 6
34#define VMCI_DOORBELL_INDEX_TABLE_SIZE (1 << VMCI_DOORBELL_INDEX_BITS)
35#define VMCI_DOORBELL_HASH(_idx) hash_32(_idx, VMCI_DOORBELL_INDEX_BITS)
36
37/*
38 * DoorbellEntry describes the a doorbell notification handle allocated by the
39 * host.
40 */
41struct dbell_entry {
42 struct vmci_resource resource;
43 struct hlist_node node;
44 struct work_struct work;
45 vmci_callback notify_cb;
46 void *client_data;
47 u32 idx;
48 u32 priv_flags;
49 bool run_delayed;
50 atomic_t active; /* Only used by guest personality */
51};
52
53/* The VMCI index table keeps track of currently registered doorbells. */
54struct dbell_index_table {
55 spinlock_t lock; /* Index table lock */
56 struct hlist_head entries[VMCI_DOORBELL_INDEX_TABLE_SIZE];
57};
58
59static struct dbell_index_table vmci_doorbell_it = {
60 .lock = __SPIN_LOCK_UNLOCKED(vmci_doorbell_it.lock),
61};
62
63/*
64 * The max_notify_idx is one larger than the currently known bitmap index in
65 * use, and is used to determine how much of the bitmap needs to be scanned.
66 */
67static u32 max_notify_idx;
68
69/*
70 * The notify_idx_count is used for determining whether there are free entries
71 * within the bitmap (if notify_idx_count + 1 < max_notify_idx).
72 */
73static u32 notify_idx_count;
74
75/*
76 * The last_notify_idx_reserved is used to track the last index handed out - in
77 * the case where multiple handles share a notification index, we hand out
78 * indexes round robin based on last_notify_idx_reserved.
79 */
80static u32 last_notify_idx_reserved;
81
82/* This is a one entry cache used to by the index allocation. */
83static u32 last_notify_idx_released = PAGE_SIZE;
84
85
86/*
87 * Utility function that retrieves the privilege flags associated
88 * with a given doorbell handle. For guest endpoints, the
89 * privileges are determined by the context ID, but for host
90 * endpoints privileges are associated with the complete
91 * handle. Hypervisor endpoints are not yet supported.
92 */
93int vmci_dbell_get_priv_flags(struct vmci_handle handle, u32 *priv_flags)
94{
95 if (priv_flags == NULL || handle.context == VMCI_INVALID_ID)
96 return VMCI_ERROR_INVALID_ARGS;
97
98 if (handle.context == VMCI_HOST_CONTEXT_ID) {
99 struct dbell_entry *entry;
100 struct vmci_resource *resource;
101
102 resource = vmci_resource_by_handle(handle,
103 VMCI_RESOURCE_TYPE_DOORBELL);
104 if (!resource)
105 return VMCI_ERROR_NOT_FOUND;
106
107 entry = container_of(resource, struct dbell_entry, resource);
108 *priv_flags = entry->priv_flags;
109 vmci_resource_put(resource);
110 } else if (handle.context == VMCI_HYPERVISOR_CONTEXT_ID) {
111 /*
112 * Hypervisor endpoints for notifications are not
113 * supported (yet).
114 */
115 return VMCI_ERROR_INVALID_ARGS;
116 } else {
117 *priv_flags = vmci_context_get_priv_flags(handle.context);
118 }
119
120 return VMCI_SUCCESS;
121}
122
123/*
124 * Find doorbell entry by bitmap index.
125 */
126static struct dbell_entry *dbell_index_table_find(u32 idx)
127{
128 u32 bucket = VMCI_DOORBELL_HASH(idx);
129 struct dbell_entry *dbell;
130 struct hlist_node *node;
131
132 hlist_for_each_entry(dbell, node, &vmci_doorbell_it.entries[bucket],
133 node) {
134 if (idx == dbell->idx)
135 return dbell;
136 }
137
138 return NULL;
139}
140
141/*
142 * Add the given entry to the index table. This willi take a reference to the
143 * entry's resource so that the entry is not deleted before it is removed from
144 * the * table.
145 */
146static void dbell_index_table_add(struct dbell_entry *entry)
147{
148 u32 bucket;
149 u32 new_notify_idx;
150
151 vmci_resource_get(&entry->resource);
152
153 spin_lock_bh(&vmci_doorbell_it.lock);
154
155 /*
156 * Below we try to allocate an index in the notification
157 * bitmap with "not too much" sharing between resources. If we
158 * use less that the full bitmap, we either add to the end if
159 * there are no unused flags within the currently used area,
160 * or we search for unused ones. If we use the full bitmap, we
161 * allocate the index round robin.
162 */
163 if (max_notify_idx < PAGE_SIZE || notify_idx_count < PAGE_SIZE) {
164 if (last_notify_idx_released < max_notify_idx &&
165 !dbell_index_table_find(last_notify_idx_released)) {
166 new_notify_idx = last_notify_idx_released;
167 last_notify_idx_released = PAGE_SIZE;
168 } else {
169 bool reused = false;
170 new_notify_idx = last_notify_idx_reserved;
171 if (notify_idx_count + 1 < max_notify_idx) {
172 do {
173 if (!dbell_index_table_find
174 (new_notify_idx)) {
175 reused = true;
176 break;
177 }
178 new_notify_idx = (new_notify_idx + 1) %
179 max_notify_idx;
180 } while (new_notify_idx !=
181 last_notify_idx_released);
182 }
183 if (!reused) {
184 new_notify_idx = max_notify_idx;
185 max_notify_idx++;
186 }
187 }
188 } else {
189 new_notify_idx = (last_notify_idx_reserved + 1) % PAGE_SIZE;
190 }
191
192 last_notify_idx_reserved = new_notify_idx;
193 notify_idx_count++;
194
195 entry->idx = new_notify_idx;
196 bucket = VMCI_DOORBELL_HASH(entry->idx);
197 hlist_add_head(&entry->node, &vmci_doorbell_it.entries[bucket]);
198
199 spin_unlock_bh(&vmci_doorbell_it.lock);
200}
201
202/*
203 * Remove the given entry from the index table. This will release() the
204 * entry's resource.
205 */
206static void dbell_index_table_remove(struct dbell_entry *entry)
207{
208 spin_lock_bh(&vmci_doorbell_it.lock);
209
210 hlist_del_init(&entry->node);
211
212 notify_idx_count--;
213 if (entry->idx == max_notify_idx - 1) {
214 /*
215 * If we delete an entry with the maximum known
216 * notification index, we take the opportunity to
217 * prune the current max. As there might be other
218 * unused indices immediately below, we lower the
219 * maximum until we hit an index in use.
220 */
221 while (max_notify_idx > 0 &&
222 !dbell_index_table_find(max_notify_idx - 1))
223 max_notify_idx--;
224 }
225
226 last_notify_idx_released = entry->idx;
227
228 spin_unlock_bh(&vmci_doorbell_it.lock);
229
230 vmci_resource_put(&entry->resource);
231}
232
233/*
234 * Creates a link between the given doorbell handle and the given
235 * index in the bitmap in the device backend. A notification state
236 * is created in hypervisor.
237 */
238static int dbell_link(struct vmci_handle handle, u32 notify_idx)
239{
240 struct vmci_doorbell_link_msg link_msg;
241
242 link_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
243 VMCI_DOORBELL_LINK);
244 link_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
245 link_msg.hdr.payload_size = sizeof(link_msg) - VMCI_DG_HEADERSIZE;
246 link_msg.handle = handle;
247 link_msg.notify_idx = notify_idx;
248
249 return vmci_send_datagram(&link_msg.hdr);
250}
251
252/*
253 * Unlinks the given doorbell handle from an index in the bitmap in
254 * the device backend. The notification state is destroyed in hypervisor.
255 */
256static int dbell_unlink(struct vmci_handle handle)
257{
258 struct vmci_doorbell_unlink_msg unlink_msg;
259
260 unlink_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
261 VMCI_DOORBELL_UNLINK);
262 unlink_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
263 unlink_msg.hdr.payload_size = sizeof(unlink_msg) - VMCI_DG_HEADERSIZE;
264 unlink_msg.handle = handle;
265
266 return vmci_send_datagram(&unlink_msg.hdr);
267}
268
269/*
270 * Notify another guest or the host. We send a datagram down to the
271 * host via the hypervisor with the notification info.
272 */
273static int dbell_notify_as_guest(struct vmci_handle handle, u32 priv_flags)
274{
275 struct vmci_doorbell_notify_msg notify_msg;
276
277 notify_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
278 VMCI_DOORBELL_NOTIFY);
279 notify_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
280 notify_msg.hdr.payload_size = sizeof(notify_msg) - VMCI_DG_HEADERSIZE;
281 notify_msg.handle = handle;
282
283 return vmci_send_datagram(&notify_msg.hdr);
284}
285
286/*
287 * Calls the specified callback in a delayed context.
288 */
289static void dbell_delayed_dispatch(struct work_struct *work)
290{
291 struct dbell_entry *entry = container_of(work,
292 struct dbell_entry, work);
293
294 entry->notify_cb(entry->client_data);
295 vmci_resource_put(&entry->resource);
296}
297
298/*
299 * Dispatches a doorbell notification to the host context.
300 */
301int vmci_dbell_host_context_notify(u32 src_cid, struct vmci_handle handle)
302{
303 struct dbell_entry *entry;
304 struct vmci_resource *resource;
305
306 if (vmci_handle_is_invalid(handle)) {
307 pr_devel("Notifying an invalid doorbell (handle=0x%x:0x%x)\n",
308 handle.context, handle.resource);
309 return VMCI_ERROR_INVALID_ARGS;
310 }
311
312 resource = vmci_resource_by_handle(handle,
313 VMCI_RESOURCE_TYPE_DOORBELL);
314 if (!resource) {
315 pr_devel("Notifying an unknown doorbell (handle=0x%x:0x%x)\n",
316 handle.context, handle.resource);
317 return VMCI_ERROR_NOT_FOUND;
318 }
319
320 entry = container_of(resource, struct dbell_entry, resource);
321 if (entry->run_delayed) {
322 schedule_work(&entry->work);
323 } else {
324 entry->notify_cb(entry->client_data);
325 vmci_resource_put(resource);
326 }
327
328 return VMCI_SUCCESS;
329}
330
331/*
332 * Register the notification bitmap with the host.
333 */
334bool vmci_dbell_register_notification_bitmap(u32 bitmap_ppn)
335{
336 int result;
337 struct vmci_notify_bm_set_msg bitmap_set_msg;
338
339 bitmap_set_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
340 VMCI_SET_NOTIFY_BITMAP);
341 bitmap_set_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
342 bitmap_set_msg.hdr.payload_size = sizeof(bitmap_set_msg) -
343 VMCI_DG_HEADERSIZE;
344 bitmap_set_msg.bitmap_ppn = bitmap_ppn;
345
346 result = vmci_send_datagram(&bitmap_set_msg.hdr);
347 if (result != VMCI_SUCCESS) {
348 pr_devel("Failed to register (PPN=%u) as notification bitmap (error=%d)\n",
349 bitmap_ppn, result);
350 return false;
351 }
352 return true;
353}
354
355/*
356 * Executes or schedules the handlers for a given notify index.
357 */
358static void dbell_fire_entries(u32 notify_idx)
359{
360 u32 bucket = VMCI_DOORBELL_HASH(notify_idx);
361 struct dbell_entry *dbell;
362 struct hlist_node *node;
363
364 spin_lock_bh(&vmci_doorbell_it.lock);
365
366 hlist_for_each_entry(dbell, node,
367 &vmci_doorbell_it.entries[bucket], node) {
368 if (dbell->idx == notify_idx &&
369 atomic_read(&dbell->active) == 1) {
370 if (dbell->run_delayed) {
371 vmci_resource_get(&dbell->resource);
372 schedule_work(&dbell->work);
373 } else {
374 dbell->notify_cb(dbell->client_data);
375 }
376 }
377 }
378
379 spin_unlock_bh(&vmci_doorbell_it.lock);
380}
381
382/*
383 * Scans the notification bitmap, collects pending notifications,
384 * resets the bitmap and invokes appropriate callbacks.
385 */
386void vmci_dbell_scan_notification_entries(u8 *bitmap)
387{
388 u32 idx;
389
390 for (idx = 0; idx < max_notify_idx; idx++) {
391 if (bitmap[idx] & 0x1) {
392 bitmap[idx] &= ~1;
393 dbell_fire_entries(idx);
394 }
395 }
396}
397
398/*
399 * vmci_doorbell_create() - Creates a doorbell
400 * @handle: A handle used to track the resource. Can be invalid.
401 * @flags: Flag that determines context of callback.
402 * @priv_flags: Privileges flags.
403 * @notify_cb: The callback to be ivoked when the doorbell fires.
404 * @client_data: A parameter to be passed to the callback.
405 *
406 * Creates a doorbell with the given callback. If the handle is
407 * VMCI_INVALID_HANDLE, a free handle will be assigned, if
408 * possible. The callback can be run immediately (potentially with
409 * locks held - the default) or delayed (in a kernel thread) by
410 * specifying the flag VMCI_FLAG_DELAYED_CB. If delayed execution
411 * is selected, a given callback may not be run if the kernel is
412 * unable to allocate memory for the delayed execution (highly
413 * unlikely).
414 */
415int vmci_doorbell_create(struct vmci_handle *handle,
416 u32 flags,
417 u32 priv_flags,
418 vmci_callback notify_cb, void *client_data)
419{
420 struct dbell_entry *entry;
421 struct vmci_handle new_handle;
422 int result;
423
424 if (!handle || !notify_cb || flags & ~VMCI_FLAG_DELAYED_CB ||
425 priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS)
426 return VMCI_ERROR_INVALID_ARGS;
427
428 entry = kmalloc(sizeof(*entry), GFP_KERNEL);
429 if (entry == NULL) {
430 pr_warn("Failed allocating memory for datagram entry\n");
431 return VMCI_ERROR_NO_MEM;
432 }
433
434 if (vmci_handle_is_invalid(*handle)) {
435 u32 context_id = vmci_get_context_id();
436
437 /* Let resource code allocate a free ID for us */
438 new_handle = vmci_make_handle(context_id, VMCI_INVALID_ID);
439 } else {
440 bool valid_context = false;
441
442 /*
443 * Validate the handle. We must do both of the checks below
444 * because we can be acting as both a host and a guest at the
445 * same time. We always allow the host context ID, since the
446 * host functionality is in practice always there with the
447 * unified driver.
448 */
449 if (handle->context == VMCI_HOST_CONTEXT_ID ||
450 (vmci_guest_code_active() &&
451 vmci_get_context_id() == handle->context)) {
452 valid_context = true;
453 }
454
455 if (!valid_context || handle->resource == VMCI_INVALID_ID) {
456 pr_devel("Invalid argument (handle=0x%x:0x%x)\n",
457 handle->context, handle->resource);
458 result = VMCI_ERROR_INVALID_ARGS;
459 goto free_mem;
460 }
461
462 new_handle = *handle;
463 }
464
465 entry->idx = 0;
466 INIT_HLIST_NODE(&entry->node);
467 entry->priv_flags = priv_flags;
468 INIT_WORK(&entry->work, dbell_delayed_dispatch);
469 entry->run_delayed = flags & VMCI_FLAG_DELAYED_CB;
470 entry->notify_cb = notify_cb;
471 entry->client_data = client_data;
472 atomic_set(&entry->active, 0);
473
474 result = vmci_resource_add(&entry->resource,
475 VMCI_RESOURCE_TYPE_DOORBELL,
476 new_handle);
477 if (result != VMCI_SUCCESS) {
478 pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d\n",
479 new_handle.context, new_handle.resource, result);
480 goto free_mem;
481 }
482
483 new_handle = vmci_resource_handle(&entry->resource);
484 if (vmci_guest_code_active()) {
485 dbell_index_table_add(entry);
486 result = dbell_link(new_handle, entry->idx);
487 if (VMCI_SUCCESS != result)
488 goto destroy_resource;
489
490 atomic_set(&entry->active, 1);
491 }
492
493 *handle = new_handle;
494
495 return result;
496
497 destroy_resource:
498 dbell_index_table_remove(entry);
499 vmci_resource_remove(&entry->resource);
500 free_mem:
501 kfree(entry);
502 return result;
503}
504EXPORT_SYMBOL_GPL(vmci_doorbell_create);
505
506/*
507 * vmci_doorbell_destroy() - Destroy a doorbell.
508 * @handle: The handle tracking the resource.
509 *
510 * Destroys a doorbell previously created with vmcii_doorbell_create. This
511 * operation may block waiting for a callback to finish.
512 */
513int vmci_doorbell_destroy(struct vmci_handle handle)
514{
515 struct dbell_entry *entry;
516 struct vmci_resource *resource;
517
518 if (vmci_handle_is_invalid(handle))
519 return VMCI_ERROR_INVALID_ARGS;
520
521 resource = vmci_resource_by_handle(handle,
522 VMCI_RESOURCE_TYPE_DOORBELL);
523 if (!resource) {
524 pr_devel("Failed to destroy doorbell (handle=0x%x:0x%x)\n",
525 handle.context, handle.resource);
526 return VMCI_ERROR_NOT_FOUND;
527 }
528
529 entry = container_of(resource, struct dbell_entry, resource);
530
531 if (vmci_guest_code_active()) {
532 int result;
533
534 dbell_index_table_remove(entry);
535
536 result = dbell_unlink(handle);
537 if (VMCI_SUCCESS != result) {
538
539 /*
540 * The only reason this should fail would be
541 * an inconsistency between guest and
542 * hypervisor state, where the guest believes
543 * it has an active registration whereas the
544 * hypervisor doesn't. One case where this may
545 * happen is if a doorbell is unregistered
546 * following a hibernation at a time where the
547 * doorbell state hasn't been restored on the
548 * hypervisor side yet. Since the handle has
549 * now been removed in the guest, we just
550 * print a warning and return success.
551 */
552 pr_devel("Unlink of doorbell (handle=0x%x:0x%x) unknown by hypervisor (error=%d)\n",
553 handle.context, handle.resource, result);
554 }
555 }
556
557 /*
558 * Now remove the resource from the table. It might still be in use
559 * after this, in a callback or still on the delayed work queue.
560 */
561 vmci_resource_put(&entry->resource);
562 vmci_resource_remove(&entry->resource);
563
564 kfree(entry);
565
566 return VMCI_SUCCESS;
567}
568EXPORT_SYMBOL_GPL(vmci_doorbell_destroy);
569
570/*
571 * vmci_doorbell_notify() - Ring the doorbell (and hide in the bushes).
572 * @dst: The handlle identifying the doorbell resource
573 * @priv_flags: Priviledge flags.
574 *
575 * Generates a notification on the doorbell identified by the
576 * handle. For host side generation of notifications, the caller
577 * can specify what the privilege of the calling side is.
578 */
579int vmci_doorbell_notify(struct vmci_handle dst, u32 priv_flags)
580{
581 int retval;
582 enum vmci_route route;
583 struct vmci_handle src;
584
585 if (vmci_handle_is_invalid(dst) ||
586 (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS))
587 return VMCI_ERROR_INVALID_ARGS;
588
589 src = VMCI_INVALID_HANDLE;
590 retval = vmci_route(&src, &dst, false, &route);
591 if (retval < VMCI_SUCCESS)
592 return retval;
593
594 if (VMCI_ROUTE_AS_HOST == route)
595 return vmci_ctx_notify_dbell(VMCI_HOST_CONTEXT_ID,
596 dst, priv_flags);
597
598 if (VMCI_ROUTE_AS_GUEST == route)
599 return dbell_notify_as_guest(dst, priv_flags);
600
601 pr_warn("Unknown route (%d) for doorbell\n", route);
602 return VMCI_ERROR_DST_UNREACHABLE;
603}
604EXPORT_SYMBOL_GPL(vmci_doorbell_notify);
diff --git a/drivers/misc/vmw_vmci/vmci_doorbell.h b/drivers/misc/vmw_vmci/vmci_doorbell.h
new file mode 100644
index 000000000000..e4c0b17486a5
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_doorbell.h
@@ -0,0 +1,51 @@
1/*
2 * VMware VMCI Driver
3 *
4 * Copyright (C) 2012 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16#ifndef VMCI_DOORBELL_H
17#define VMCI_DOORBELL_H
18
19#include <linux/vmw_vmci_defs.h>
20#include <linux/types.h>
21
22#include "vmci_driver.h"
23
24/*
25 * VMCINotifyResourceInfo: Used to create and destroy doorbells, and
26 * generate a notification for a doorbell or queue pair.
27 */
28struct vmci_dbell_notify_resource_info {
29 struct vmci_handle handle;
30 u16 resource;
31 u16 action;
32 s32 result;
33};
34
35/*
36 * Structure used for checkpointing the doorbell mappings. It is
37 * written to the checkpoint as is, so changing this structure will
38 * break checkpoint compatibility.
39 */
40struct dbell_cpt_state {
41 struct vmci_handle handle;
42 u64 bitmap_idx;
43};
44
45int vmci_dbell_host_context_notify(u32 src_cid, struct vmci_handle handle);
46int vmci_dbell_get_priv_flags(struct vmci_handle handle, u32 *priv_flags);
47
48bool vmci_dbell_register_notification_bitmap(u32 bitmap_ppn);
49void vmci_dbell_scan_notification_entries(u8 *bitmap);
50
51#endif /* VMCI_DOORBELL_H */
diff --git a/drivers/misc/vmw_vmci/vmci_driver.c b/drivers/misc/vmw_vmci/vmci_driver.c
new file mode 100644
index 000000000000..7b3fce2da6c3
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_driver.c
@@ -0,0 +1,117 @@
1/*
2 * VMware VMCI Driver
3 *
4 * Copyright (C) 2012 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16#include <linux/vmw_vmci_defs.h>
17#include <linux/vmw_vmci_api.h>
18#include <linux/atomic.h>
19#include <linux/kernel.h>
20#include <linux/module.h>
21#include <linux/init.h>
22
23#include "vmci_driver.h"
24#include "vmci_event.h"
25
26static bool vmci_disable_host;
27module_param_named(disable_host, vmci_disable_host, bool, 0);
28MODULE_PARM_DESC(disable_host,
29 "Disable driver host personality (default=enabled)");
30
31static bool vmci_disable_guest;
32module_param_named(disable_guest, vmci_disable_guest, bool, 0);
33MODULE_PARM_DESC(disable_guest,
34 "Disable driver guest personality (default=enabled)");
35
36static bool vmci_guest_personality_initialized;
37static bool vmci_host_personality_initialized;
38
39/*
40 * vmci_get_context_id() - Gets the current context ID.
41 *
42 * Returns the current context ID. Note that since this is accessed only
43 * from code running in the host, this always returns the host context ID.
44 */
45u32 vmci_get_context_id(void)
46{
47 if (vmci_guest_code_active())
48 return vmci_get_vm_context_id();
49 else if (vmci_host_code_active())
50 return VMCI_HOST_CONTEXT_ID;
51
52 return VMCI_INVALID_ID;
53}
54EXPORT_SYMBOL_GPL(vmci_get_context_id);
55
56static int __init vmci_drv_init(void)
57{
58 int vmci_err;
59 int error;
60
61 vmci_err = vmci_event_init();
62 if (vmci_err < VMCI_SUCCESS) {
63 pr_err("Failed to initialize VMCIEvent (result=%d)\n",
64 vmci_err);
65 return -EINVAL;
66 }
67
68 if (!vmci_disable_guest) {
69 error = vmci_guest_init();
70 if (error) {
71 pr_warn("Failed to initialize guest personality (err=%d)\n",
72 error);
73 } else {
74 vmci_guest_personality_initialized = true;
75 pr_info("Guest personality initialized and is %s\n",
76 vmci_guest_code_active() ?
77 "active" : "inactive");
78 }
79 }
80
81 if (!vmci_disable_host) {
82 error = vmci_host_init();
83 if (error) {
84 pr_warn("Unable to initialize host personality (err=%d)\n",
85 error);
86 } else {
87 vmci_host_personality_initialized = true;
88 pr_info("Initialized host personality\n");
89 }
90 }
91
92 if (!vmci_guest_personality_initialized &&
93 !vmci_host_personality_initialized) {
94 vmci_event_exit();
95 return -ENODEV;
96 }
97
98 return 0;
99}
100module_init(vmci_drv_init);
101
102static void __exit vmci_drv_exit(void)
103{
104 if (vmci_guest_personality_initialized)
105 vmci_guest_exit();
106
107 if (vmci_host_personality_initialized)
108 vmci_host_exit();
109
110 vmci_event_exit();
111}
112module_exit(vmci_drv_exit);
113
114MODULE_AUTHOR("VMware, Inc.");
115MODULE_DESCRIPTION("VMware Virtual Machine Communication Interface.");
116MODULE_VERSION("1.0.0.0-k");
117MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/vmw_vmci/vmci_driver.h b/drivers/misc/vmw_vmci/vmci_driver.h
new file mode 100644
index 000000000000..f69156a1f30c
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_driver.h
@@ -0,0 +1,50 @@
1/*
2 * VMware VMCI Driver
3 *
4 * Copyright (C) 2012 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16#ifndef _VMCI_DRIVER_H_
17#define _VMCI_DRIVER_H_
18
19#include <linux/vmw_vmci_defs.h>
20#include <linux/wait.h>
21
22#include "vmci_queue_pair.h"
23#include "vmci_context.h"
24
25enum vmci_obj_type {
26 VMCIOBJ_VMX_VM = 10,
27 VMCIOBJ_CONTEXT,
28 VMCIOBJ_SOCKET,
29 VMCIOBJ_NOT_SET,
30};
31
32/* For storing VMCI structures in file handles. */
33struct vmci_obj {
34 void *ptr;
35 enum vmci_obj_type type;
36};
37
38u32 vmci_get_context_id(void);
39int vmci_send_datagram(struct vmci_datagram *dg);
40
41int vmci_host_init(void);
42void vmci_host_exit(void);
43bool vmci_host_code_active(void);
44
45int vmci_guest_init(void);
46void vmci_guest_exit(void);
47bool vmci_guest_code_active(void);
48u32 vmci_get_vm_context_id(void);
49
50#endif /* _VMCI_DRIVER_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_event.c b/drivers/misc/vmw_vmci/vmci_event.c
new file mode 100644
index 000000000000..8449516d6ac6
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_event.c
@@ -0,0 +1,224 @@
1/*
2 * VMware VMCI Driver
3 *
4 * Copyright (C) 2012 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16#include <linux/vmw_vmci_defs.h>
17#include <linux/vmw_vmci_api.h>
18#include <linux/list.h>
19#include <linux/module.h>
20#include <linux/sched.h>
21#include <linux/slab.h>
22
23#include "vmci_driver.h"
24#include "vmci_event.h"
25
26#define EVENT_MAGIC 0xEABE0000
27#define VMCI_EVENT_MAX_ATTEMPTS 10
28
29struct vmci_subscription {
30 u32 id;
31 u32 event;
32 vmci_event_cb callback;
33 void *callback_data;
34 struct list_head node; /* on one of subscriber lists */
35};
36
37static struct list_head subscriber_array[VMCI_EVENT_MAX];
38static DEFINE_MUTEX(subscriber_mutex);
39
40int __init vmci_event_init(void)
41{
42 int i;
43
44 for (i = 0; i < VMCI_EVENT_MAX; i++)
45 INIT_LIST_HEAD(&subscriber_array[i]);
46
47 return VMCI_SUCCESS;
48}
49
50void vmci_event_exit(void)
51{
52 int e;
53
54 /* We free all memory at exit. */
55 for (e = 0; e < VMCI_EVENT_MAX; e++) {
56 struct vmci_subscription *cur, *p2;
57 list_for_each_entry_safe(cur, p2, &subscriber_array[e], node) {
58
59 /*
60 * We should never get here because all events
61 * should have been unregistered before we try
62 * to unload the driver module.
63 */
64 pr_warn("Unexpected free events occurring\n");
65 list_del(&cur->node);
66 kfree(cur);
67 }
68 }
69}
70
71/*
72 * Find entry. Assumes subscriber_mutex is held.
73 */
74static struct vmci_subscription *event_find(u32 sub_id)
75{
76 int e;
77
78 for (e = 0; e < VMCI_EVENT_MAX; e++) {
79 struct vmci_subscription *cur;
80 list_for_each_entry(cur, &subscriber_array[e], node) {
81 if (cur->id == sub_id)
82 return cur;
83 }
84 }
85 return NULL;
86}
87
88/*
89 * Actually delivers the events to the subscribers.
90 * The callback function for each subscriber is invoked.
91 */
92static void event_deliver(struct vmci_event_msg *event_msg)
93{
94 struct vmci_subscription *cur;
95 struct list_head *subscriber_list;
96
97 rcu_read_lock();
98 subscriber_list = &subscriber_array[event_msg->event_data.event];
99 list_for_each_entry_rcu(cur, subscriber_list, node) {
100 cur->callback(cur->id, &event_msg->event_data,
101 cur->callback_data);
102 }
103 rcu_read_unlock();
104}
105
106/*
107 * Dispatcher for the VMCI_EVENT_RECEIVE datagrams. Calls all
108 * subscribers for given event.
109 */
110int vmci_event_dispatch(struct vmci_datagram *msg)
111{
112 struct vmci_event_msg *event_msg = (struct vmci_event_msg *)msg;
113
114 if (msg->payload_size < sizeof(u32) ||
115 msg->payload_size > sizeof(struct vmci_event_data_max))
116 return VMCI_ERROR_INVALID_ARGS;
117
118 if (!VMCI_EVENT_VALID(event_msg->event_data.event))
119 return VMCI_ERROR_EVENT_UNKNOWN;
120
121 event_deliver(event_msg);
122 return VMCI_SUCCESS;
123}
124
125/*
126 * vmci_event_subscribe() - Subscribe to a given event.
127 * @event: The event to subscribe to.
128 * @callback: The callback to invoke upon the event.
129 * @callback_data: Data to pass to the callback.
130 * @subscription_id: ID used to track subscription. Used with
131 * vmci_event_unsubscribe()
132 *
133 * Subscribes to the provided event. The callback specified will be
134 * fired from RCU critical section and therefore must not sleep.
135 */
136int vmci_event_subscribe(u32 event,
137 vmci_event_cb callback,
138 void *callback_data,
139 u32 *new_subscription_id)
140{
141 struct vmci_subscription *sub;
142 int attempts;
143 int retval;
144 bool have_new_id = false;
145
146 if (!new_subscription_id) {
147 pr_devel("%s: Invalid subscription (NULL)\n", __func__);
148 return VMCI_ERROR_INVALID_ARGS;
149 }
150
151 if (!VMCI_EVENT_VALID(event) || !callback) {
152 pr_devel("%s: Failed to subscribe to event (type=%d) (callback=%p) (data=%p)\n",
153 __func__, event, callback, callback_data);
154 return VMCI_ERROR_INVALID_ARGS;
155 }
156
157 sub = kzalloc(sizeof(*sub), GFP_KERNEL);
158 if (!sub)
159 return VMCI_ERROR_NO_MEM;
160
161 sub->id = VMCI_EVENT_MAX;
162 sub->event = event;
163 sub->callback = callback;
164 sub->callback_data = callback_data;
165 INIT_LIST_HEAD(&sub->node);
166
167 mutex_lock(&subscriber_mutex);
168
169 /* Creation of a new event is always allowed. */
170 for (attempts = 0; attempts < VMCI_EVENT_MAX_ATTEMPTS; attempts++) {
171 static u32 subscription_id;
172 /*
173 * We try to get an id a couple of time before
174 * claiming we are out of resources.
175 */
176
177 /* Test for duplicate id. */
178 if (!event_find(++subscription_id)) {
179 sub->id = subscription_id;
180 have_new_id = true;
181 break;
182 }
183 }
184
185 if (have_new_id) {
186 list_add_rcu(&sub->node, &subscriber_array[event]);
187 retval = VMCI_SUCCESS;
188 } else {
189 retval = VMCI_ERROR_NO_RESOURCES;
190 }
191
192 mutex_unlock(&subscriber_mutex);
193
194 *new_subscription_id = sub->id;
195 return retval;
196}
197EXPORT_SYMBOL_GPL(vmci_event_subscribe);
198
199/*
200 * vmci_event_unsubscribe() - unsubscribe from an event.
201 * @sub_id: A subscription ID as provided by vmci_event_subscribe()
202 *
203 * Unsubscribe from given event. Removes it from list and frees it.
204 * Will return callback_data if requested by caller.
205 */
206int vmci_event_unsubscribe(u32 sub_id)
207{
208 struct vmci_subscription *s;
209
210 mutex_lock(&subscriber_mutex);
211 s = event_find(sub_id);
212 if (s)
213 list_del_rcu(&s->node);
214 mutex_unlock(&subscriber_mutex);
215
216 if (!s)
217 return VMCI_ERROR_NOT_FOUND;
218
219 synchronize_rcu();
220 kfree(s);
221
222 return VMCI_SUCCESS;
223}
224EXPORT_SYMBOL_GPL(vmci_event_unsubscribe);
diff --git a/drivers/misc/vmw_vmci/vmci_event.h b/drivers/misc/vmw_vmci/vmci_event.h
new file mode 100644
index 000000000000..7df9b1c0a96c
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_event.h
@@ -0,0 +1,25 @@
1/*
2 * VMware VMCI Driver
3 *
4 * Copyright (C) 2012 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16#ifndef __VMCI_EVENT_H__
17#define __VMCI_EVENT_H__
18
19#include <linux/vmw_vmci_api.h>
20
21int vmci_event_init(void);
22void vmci_event_exit(void);
23int vmci_event_dispatch(struct vmci_datagram *msg);
24
25#endif /*__VMCI_EVENT_H__ */
diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c
new file mode 100644
index 000000000000..60c01999f489
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -0,0 +1,759 @@
1/*
2 * VMware VMCI Driver
3 *
4 * Copyright (C) 2012 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16#include <linux/vmw_vmci_defs.h>
17#include <linux/vmw_vmci_api.h>
18#include <linux/moduleparam.h>
19#include <linux/interrupt.h>
20#include <linux/highmem.h>
21#include <linux/kernel.h>
22#include <linux/mm.h>
23#include <linux/module.h>
24#include <linux/sched.h>
25#include <linux/slab.h>
26#include <linux/init.h>
27#include <linux/pci.h>
28#include <linux/smp.h>
29#include <linux/io.h>
30#include <linux/vmalloc.h>
31
32#include "vmci_datagram.h"
33#include "vmci_doorbell.h"
34#include "vmci_context.h"
35#include "vmci_driver.h"
36#include "vmci_event.h"
37
38#define PCI_VENDOR_ID_VMWARE 0x15AD
39#define PCI_DEVICE_ID_VMWARE_VMCI 0x0740
40
41#define VMCI_UTIL_NUM_RESOURCES 1
42
43static bool vmci_disable_msi;
44module_param_named(disable_msi, vmci_disable_msi, bool, 0);
45MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
46
47static bool vmci_disable_msix;
48module_param_named(disable_msix, vmci_disable_msix, bool, 0);
49MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)");
50
51static u32 ctx_update_sub_id = VMCI_INVALID_ID;
52static u32 vm_context_id = VMCI_INVALID_ID;
53
54struct vmci_guest_device {
55 struct device *dev; /* PCI device we are attached to */
56 void __iomem *iobase;
57
58 unsigned int irq;
59 unsigned int intr_type;
60 bool exclusive_vectors;
61 struct msix_entry msix_entries[VMCI_MAX_INTRS];
62
63 struct tasklet_struct datagram_tasklet;
64 struct tasklet_struct bm_tasklet;
65
66 void *data_buffer;
67 void *notification_bitmap;
68};
69
70/* vmci_dev singleton device and supporting data*/
71static struct vmci_guest_device *vmci_dev_g;
72static DEFINE_SPINLOCK(vmci_dev_spinlock);
73
74static atomic_t vmci_num_guest_devices = ATOMIC_INIT(0);
75
76bool vmci_guest_code_active(void)
77{
78 return atomic_read(&vmci_num_guest_devices) != 0;
79}
80
81u32 vmci_get_vm_context_id(void)
82{
83 if (vm_context_id == VMCI_INVALID_ID) {
84 struct vmci_datagram get_cid_msg;
85 get_cid_msg.dst =
86 vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
87 VMCI_GET_CONTEXT_ID);
88 get_cid_msg.src = VMCI_ANON_SRC_HANDLE;
89 get_cid_msg.payload_size = 0;
90 vm_context_id = vmci_send_datagram(&get_cid_msg);
91 }
92 return vm_context_id;
93}
94
95/*
96 * VM to hypervisor call mechanism. We use the standard VMware naming
97 * convention since shared code is calling this function as well.
98 */
99int vmci_send_datagram(struct vmci_datagram *dg)
100{
101 unsigned long flags;
102 int result;
103
104 /* Check args. */
105 if (dg == NULL)
106 return VMCI_ERROR_INVALID_ARGS;
107
108 /*
109 * Need to acquire spinlock on the device because the datagram
110 * data may be spread over multiple pages and the monitor may
111 * interleave device user rpc calls from multiple
112 * VCPUs. Acquiring the spinlock precludes that
113 * possibility. Disabling interrupts to avoid incoming
114 * datagrams during a "rep out" and possibly landing up in
115 * this function.
116 */
117 spin_lock_irqsave(&vmci_dev_spinlock, flags);
118
119 if (vmci_dev_g) {
120 iowrite8_rep(vmci_dev_g->iobase + VMCI_DATA_OUT_ADDR,
121 dg, VMCI_DG_SIZE(dg));
122 result = ioread32(vmci_dev_g->iobase + VMCI_RESULT_LOW_ADDR);
123 } else {
124 result = VMCI_ERROR_UNAVAILABLE;
125 }
126
127 spin_unlock_irqrestore(&vmci_dev_spinlock, flags);
128
129 return result;
130}
131EXPORT_SYMBOL_GPL(vmci_send_datagram);
132
133/*
134 * Gets called with the new context id if updated or resumed.
135 * Context id.
136 */
137static void vmci_guest_cid_update(u32 sub_id,
138 const struct vmci_event_data *event_data,
139 void *client_data)
140{
141 const struct vmci_event_payld_ctx *ev_payload =
142 vmci_event_data_const_payload(event_data);
143
144 if (sub_id != ctx_update_sub_id) {
145 pr_devel("Invalid subscriber (ID=0x%x)\n", sub_id);
146 return;
147 }
148
149 if (!event_data || ev_payload->context_id == VMCI_INVALID_ID) {
150 pr_devel("Invalid event data\n");
151 return;
152 }
153
154 pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event (type=%d)\n",
155 vm_context_id, ev_payload->context_id, event_data->event);
156
157 vm_context_id = ev_payload->context_id;
158}
159
160/*
161 * Verify that the host supports the hypercalls we need. If it does not,
162 * try to find fallback hypercalls and use those instead. Returns
163 * true if required hypercalls (or fallback hypercalls) are
164 * supported by the host, false otherwise.
165 */
166static bool vmci_check_host_caps(struct pci_dev *pdev)
167{
168 bool result;
169 struct vmci_resource_query_msg *msg;
170 u32 msg_size = sizeof(struct vmci_resource_query_hdr) +
171 VMCI_UTIL_NUM_RESOURCES * sizeof(u32);
172 struct vmci_datagram *check_msg;
173
174 check_msg = kmalloc(msg_size, GFP_KERNEL);
175 if (!check_msg) {
176 dev_err(&pdev->dev, "%s: Insufficient memory\n", __func__);
177 return false;
178 }
179
180 check_msg->dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
181 VMCI_RESOURCES_QUERY);
182 check_msg->src = VMCI_ANON_SRC_HANDLE;
183 check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE;
184 msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(check_msg);
185
186 msg->num_resources = VMCI_UTIL_NUM_RESOURCES;
187 msg->resources[0] = VMCI_GET_CONTEXT_ID;
188
189 /* Checks that hyper calls are supported */
190 result = vmci_send_datagram(check_msg) == 0x01;
191 kfree(check_msg);
192
193 dev_dbg(&pdev->dev, "%s: Host capability check: %s\n",
194 __func__, result ? "PASSED" : "FAILED");
195
196 /* We need the vector. There are no fallbacks. */
197 return result;
198}
199
200/*
201 * Reads datagrams from the data in port and dispatches them. We
202 * always start reading datagrams into only the first page of the
203 * datagram buffer. If the datagrams don't fit into one page, we
204 * use the maximum datagram buffer size for the remainder of the
205 * invocation. This is a simple heuristic for not penalizing
206 * small datagrams.
207 *
208 * This function assumes that it has exclusive access to the data
209 * in port for the duration of the call.
210 */
211static void vmci_dispatch_dgs(unsigned long data)
212{
213 struct vmci_guest_device *vmci_dev = (struct vmci_guest_device *)data;
214 u8 *dg_in_buffer = vmci_dev->data_buffer;
215 struct vmci_datagram *dg;
216 size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE;
217 size_t current_dg_in_buffer_size = PAGE_SIZE;
218 size_t remaining_bytes;
219
220 BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE);
221
222 ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
223 vmci_dev->data_buffer, current_dg_in_buffer_size);
224 dg = (struct vmci_datagram *)dg_in_buffer;
225 remaining_bytes = current_dg_in_buffer_size;
226
227 while (dg->dst.resource != VMCI_INVALID_ID ||
228 remaining_bytes > PAGE_SIZE) {
229 unsigned dg_in_size;
230
231 /*
232 * When the input buffer spans multiple pages, a datagram can
233 * start on any page boundary in the buffer.
234 */
235 if (dg->dst.resource == VMCI_INVALID_ID) {
236 dg = (struct vmci_datagram *)roundup(
237 (uintptr_t)dg + 1, PAGE_SIZE);
238 remaining_bytes =
239 (size_t)(dg_in_buffer +
240 current_dg_in_buffer_size -
241 (u8 *)dg);
242 continue;
243 }
244
245 dg_in_size = VMCI_DG_SIZE_ALIGNED(dg);
246
247 if (dg_in_size <= dg_in_buffer_size) {
248 int result;
249
250 /*
251 * If the remaining bytes in the datagram
252 * buffer doesn't contain the complete
253 * datagram, we first make sure we have enough
254 * room for it and then we read the reminder
255 * of the datagram and possibly any following
256 * datagrams.
257 */
258 if (dg_in_size > remaining_bytes) {
259 if (remaining_bytes !=
260 current_dg_in_buffer_size) {
261
262 /*
263 * We move the partial
264 * datagram to the front and
265 * read the reminder of the
266 * datagram and possibly
267 * following calls into the
268 * following bytes.
269 */
270 memmove(dg_in_buffer, dg_in_buffer +
271 current_dg_in_buffer_size -
272 remaining_bytes,
273 remaining_bytes);
274 dg = (struct vmci_datagram *)
275 dg_in_buffer;
276 }
277
278 if (current_dg_in_buffer_size !=
279 dg_in_buffer_size)
280 current_dg_in_buffer_size =
281 dg_in_buffer_size;
282
283 ioread8_rep(vmci_dev->iobase +
284 VMCI_DATA_IN_ADDR,
285 vmci_dev->data_buffer +
286 remaining_bytes,
287 current_dg_in_buffer_size -
288 remaining_bytes);
289 }
290
291 /*
292 * We special case event datagrams from the
293 * hypervisor.
294 */
295 if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&
296 dg->dst.resource == VMCI_EVENT_HANDLER) {
297 result = vmci_event_dispatch(dg);
298 } else {
299 result = vmci_datagram_invoke_guest_handler(dg);
300 }
301 if (result < VMCI_SUCCESS)
302 dev_dbg(vmci_dev->dev,
303 "Datagram with resource (ID=0x%x) failed (err=%d)\n",
304 dg->dst.resource, result);
305
306 /* On to the next datagram. */
307 dg = (struct vmci_datagram *)((u8 *)dg +
308 dg_in_size);
309 } else {
310 size_t bytes_to_skip;
311
312 /*
313 * Datagram doesn't fit in datagram buffer of maximal
314 * size. We drop it.
315 */
316 dev_dbg(vmci_dev->dev,
317 "Failed to receive datagram (size=%u bytes)\n",
318 dg_in_size);
319
320 bytes_to_skip = dg_in_size - remaining_bytes;
321 if (current_dg_in_buffer_size != dg_in_buffer_size)
322 current_dg_in_buffer_size = dg_in_buffer_size;
323
324 for (;;) {
325 ioread8_rep(vmci_dev->iobase +
326 VMCI_DATA_IN_ADDR,
327 vmci_dev->data_buffer,
328 current_dg_in_buffer_size);
329 if (bytes_to_skip <= current_dg_in_buffer_size)
330 break;
331
332 bytes_to_skip -= current_dg_in_buffer_size;
333 }
334 dg = (struct vmci_datagram *)(dg_in_buffer +
335 bytes_to_skip);
336 }
337
338 remaining_bytes =
339 (size_t) (dg_in_buffer + current_dg_in_buffer_size -
340 (u8 *)dg);
341
342 if (remaining_bytes < VMCI_DG_HEADERSIZE) {
343 /* Get the next batch of datagrams. */
344
345 ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
346 vmci_dev->data_buffer,
347 current_dg_in_buffer_size);
348 dg = (struct vmci_datagram *)dg_in_buffer;
349 remaining_bytes = current_dg_in_buffer_size;
350 }
351 }
352}
353
354/*
355 * Scans the notification bitmap for raised flags, clears them
356 * and handles the notifications.
357 */
358static void vmci_process_bitmap(unsigned long data)
359{
360 struct vmci_guest_device *dev = (struct vmci_guest_device *)data;
361
362 if (!dev->notification_bitmap) {
363 dev_dbg(dev->dev, "No bitmap present in %s\n", __func__);
364 return;
365 }
366
367 vmci_dbell_scan_notification_entries(dev->notification_bitmap);
368}
369
370/*
371 * Enable MSI-X. Try exclusive vectors first, then shared vectors.
372 */
373static int vmci_enable_msix(struct pci_dev *pdev,
374 struct vmci_guest_device *vmci_dev)
375{
376 int i;
377 int result;
378
379 for (i = 0; i < VMCI_MAX_INTRS; ++i) {
380 vmci_dev->msix_entries[i].entry = i;
381 vmci_dev->msix_entries[i].vector = i;
382 }
383
384 result = pci_enable_msix(pdev, vmci_dev->msix_entries, VMCI_MAX_INTRS);
385 if (result == 0)
386 vmci_dev->exclusive_vectors = true;
387 else if (result > 0)
388 result = pci_enable_msix(pdev, vmci_dev->msix_entries, 1);
389
390 return result;
391}
392
393/*
394 * Interrupt handler for legacy or MSI interrupt, or for first MSI-X
395 * interrupt (vector VMCI_INTR_DATAGRAM).
396 */
397static irqreturn_t vmci_interrupt(int irq, void *_dev)
398{
399 struct vmci_guest_device *dev = _dev;
400
401 /*
402 * If we are using MSI-X with exclusive vectors then we simply schedule
403 * the datagram tasklet, since we know the interrupt was meant for us.
404 * Otherwise we must read the ICR to determine what to do.
405 */
406
407 if (dev->intr_type == VMCI_INTR_TYPE_MSIX && dev->exclusive_vectors) {
408 tasklet_schedule(&dev->datagram_tasklet);
409 } else {
410 unsigned int icr;
411
412 /* Acknowledge interrupt and determine what needs doing. */
413 icr = ioread32(dev->iobase + VMCI_ICR_ADDR);
414 if (icr == 0 || icr == ~0)
415 return IRQ_NONE;
416
417 if (icr & VMCI_ICR_DATAGRAM) {
418 tasklet_schedule(&dev->datagram_tasklet);
419 icr &= ~VMCI_ICR_DATAGRAM;
420 }
421
422 if (icr & VMCI_ICR_NOTIFICATION) {
423 tasklet_schedule(&dev->bm_tasklet);
424 icr &= ~VMCI_ICR_NOTIFICATION;
425 }
426
427 if (icr != 0)
428 dev_warn(dev->dev,
429 "Ignoring unknown interrupt cause (%d)\n",
430 icr);
431 }
432
433 return IRQ_HANDLED;
434}
435
436/*
437 * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION,
438 * which is for the notification bitmap. Will only get called if we are
439 * using MSI-X with exclusive vectors.
440 */
441static irqreturn_t vmci_interrupt_bm(int irq, void *_dev)
442{
443 struct vmci_guest_device *dev = _dev;
444
445 /* For MSI-X we can just assume it was meant for us. */
446 tasklet_schedule(&dev->bm_tasklet);
447
448 return IRQ_HANDLED;
449}
450
451/*
452 * Most of the initialization at module load time is done here.
453 */
454static int vmci_guest_probe_device(struct pci_dev *pdev,
455 const struct pci_device_id *id)
456{
457 struct vmci_guest_device *vmci_dev;
458 void __iomem *iobase;
459 unsigned int capabilities;
460 unsigned long cmd;
461 int vmci_err;
462 int error;
463
464 dev_dbg(&pdev->dev, "Probing for vmci/PCI guest device\n");
465
466 error = pcim_enable_device(pdev);
467 if (error) {
468 dev_err(&pdev->dev,
469 "Failed to enable VMCI device: %d\n", error);
470 return error;
471 }
472
473 error = pcim_iomap_regions(pdev, 1 << 0, KBUILD_MODNAME);
474 if (error) {
475 dev_err(&pdev->dev, "Failed to reserve/map IO regions\n");
476 return error;
477 }
478
479 iobase = pcim_iomap_table(pdev)[0];
480
481 dev_info(&pdev->dev, "Found VMCI PCI device at %#lx, irq %u\n",
482 (unsigned long)iobase, pdev->irq);
483
484 vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL);
485 if (!vmci_dev) {
486 dev_err(&pdev->dev,
487 "Can't allocate memory for VMCI device\n");
488 return -ENOMEM;
489 }
490
491 vmci_dev->dev = &pdev->dev;
492 vmci_dev->intr_type = VMCI_INTR_TYPE_INTX;
493 vmci_dev->exclusive_vectors = false;
494 vmci_dev->iobase = iobase;
495
496 tasklet_init(&vmci_dev->datagram_tasklet,
497 vmci_dispatch_dgs, (unsigned long)vmci_dev);
498 tasklet_init(&vmci_dev->bm_tasklet,
499 vmci_process_bitmap, (unsigned long)vmci_dev);
500
501 vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
502 if (!vmci_dev->data_buffer) {
503 dev_err(&pdev->dev,
504 "Can't allocate memory for datagram buffer\n");
505 return -ENOMEM;
506 }
507
508 pci_set_master(pdev); /* To enable queue_pair functionality. */
509
510 /*
511 * Verify that the VMCI Device supports the capabilities that
512 * we need. If the device is missing capabilities that we would
513 * like to use, check for fallback capabilities and use those
514 * instead (so we can run a new VM on old hosts). Fail the load if
515 * a required capability is missing and there is no fallback.
516 *
517 * Right now, we need datagrams. There are no fallbacks.
518 */
519 capabilities = ioread32(vmci_dev->iobase + VMCI_CAPS_ADDR);
520 if (!(capabilities & VMCI_CAPS_DATAGRAM)) {
521 dev_err(&pdev->dev, "Device does not support datagrams\n");
522 error = -ENXIO;
523 goto err_free_data_buffer;
524 }
525
526 /*
527 * If the hardware supports notifications, we will use that as
528 * well.
529 */
530 if (capabilities & VMCI_CAPS_NOTIFICATIONS) {
531 vmci_dev->notification_bitmap = vmalloc(PAGE_SIZE);
532 if (!vmci_dev->notification_bitmap) {
533 dev_warn(&pdev->dev,
534 "Unable to allocate notification bitmap\n");
535 } else {
536 memset(vmci_dev->notification_bitmap, 0, PAGE_SIZE);
537 capabilities |= VMCI_CAPS_NOTIFICATIONS;
538 }
539 }
540
541 dev_info(&pdev->dev, "Using capabilities 0x%x\n", capabilities);
542
543 /* Let the host know which capabilities we intend to use. */
544 iowrite32(capabilities, vmci_dev->iobase + VMCI_CAPS_ADDR);
545
546 /* Set up global device so that we can start sending datagrams */
547 spin_lock_irq(&vmci_dev_spinlock);
548 vmci_dev_g = vmci_dev;
549 spin_unlock_irq(&vmci_dev_spinlock);
550
551 /*
552 * Register notification bitmap with device if that capability is
553 * used.
554 */
555 if (capabilities & VMCI_CAPS_NOTIFICATIONS) {
556 struct page *page =
557 vmalloc_to_page(vmci_dev->notification_bitmap);
558 unsigned long bitmap_ppn = page_to_pfn(page);
559 if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) {
560 dev_warn(&pdev->dev,
561 "VMCI device unable to register notification bitmap with PPN 0x%x\n",
562 (u32) bitmap_ppn);
563 goto err_remove_vmci_dev_g;
564 }
565 }
566
567 /* Check host capabilities. */
568 if (!vmci_check_host_caps(pdev))
569 goto err_remove_bitmap;
570
571 /* Enable device. */
572
573 /*
574 * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can
575 * update the internal context id when needed.
576 */
577 vmci_err = vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE,
578 vmci_guest_cid_update, NULL,
579 &ctx_update_sub_id);
580 if (vmci_err < VMCI_SUCCESS)
581 dev_warn(&pdev->dev,
582 "Failed to subscribe to event (type=%d): %d\n",
583 VMCI_EVENT_CTX_ID_UPDATE, vmci_err);
584
585 /*
586 * Enable interrupts. Try MSI-X first, then MSI, and then fallback on
587 * legacy interrupts.
588 */
589 if (!vmci_disable_msix && !vmci_enable_msix(pdev, vmci_dev)) {
590 vmci_dev->intr_type = VMCI_INTR_TYPE_MSIX;
591 vmci_dev->irq = vmci_dev->msix_entries[0].vector;
592 } else if (!vmci_disable_msi && !pci_enable_msi(pdev)) {
593 vmci_dev->intr_type = VMCI_INTR_TYPE_MSI;
594 vmci_dev->irq = pdev->irq;
595 } else {
596 vmci_dev->intr_type = VMCI_INTR_TYPE_INTX;
597 vmci_dev->irq = pdev->irq;
598 }
599
600 /*
601 * Request IRQ for legacy or MSI interrupts, or for first
602 * MSI-X vector.
603 */
604 error = request_irq(vmci_dev->irq, vmci_interrupt, IRQF_SHARED,
605 KBUILD_MODNAME, vmci_dev);
606 if (error) {
607 dev_err(&pdev->dev, "Irq %u in use: %d\n",
608 vmci_dev->irq, error);
609 goto err_disable_msi;
610 }
611
612 /*
613 * For MSI-X with exclusive vectors we need to request an
614 * interrupt for each vector so that we get a separate
615 * interrupt handler routine. This allows us to distinguish
616 * between the vectors.
617 */
618 if (vmci_dev->exclusive_vectors) {
619 error = request_irq(vmci_dev->msix_entries[1].vector,
620 vmci_interrupt_bm, 0, KBUILD_MODNAME,
621 vmci_dev);
622 if (error) {
623 dev_err(&pdev->dev,
624 "Failed to allocate irq %u: %d\n",
625 vmci_dev->msix_entries[1].vector, error);
626 goto err_free_irq;
627 }
628 }
629
630 dev_dbg(&pdev->dev, "Registered device\n");
631
632 atomic_inc(&vmci_num_guest_devices);
633
634 /* Enable specific interrupt bits. */
635 cmd = VMCI_IMR_DATAGRAM;
636 if (capabilities & VMCI_CAPS_NOTIFICATIONS)
637 cmd |= VMCI_IMR_NOTIFICATION;
638 iowrite32(cmd, vmci_dev->iobase + VMCI_IMR_ADDR);
639
640 /* Enable interrupts. */
641 iowrite32(VMCI_CONTROL_INT_ENABLE,
642 vmci_dev->iobase + VMCI_CONTROL_ADDR);
643
644 pci_set_drvdata(pdev, vmci_dev);
645 return 0;
646
647err_free_irq:
648 free_irq(vmci_dev->irq, &vmci_dev);
649 tasklet_kill(&vmci_dev->datagram_tasklet);
650 tasklet_kill(&vmci_dev->bm_tasklet);
651
652err_disable_msi:
653 if (vmci_dev->intr_type == VMCI_INTR_TYPE_MSIX)
654 pci_disable_msix(pdev);
655 else if (vmci_dev->intr_type == VMCI_INTR_TYPE_MSI)
656 pci_disable_msi(pdev);
657
658 vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
659 if (vmci_err < VMCI_SUCCESS)
660 dev_warn(&pdev->dev,
661 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
662 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
663
664err_remove_bitmap:
665 if (vmci_dev->notification_bitmap) {
666 iowrite32(VMCI_CONTROL_RESET,
667 vmci_dev->iobase + VMCI_CONTROL_ADDR);
668 vfree(vmci_dev->notification_bitmap);
669 }
670
671err_remove_vmci_dev_g:
672 spin_lock_irq(&vmci_dev_spinlock);
673 vmci_dev_g = NULL;
674 spin_unlock_irq(&vmci_dev_spinlock);
675
676err_free_data_buffer:
677 vfree(vmci_dev->data_buffer);
678
679 /* The rest are managed resources and will be freed by PCI core */
680 return error;
681}
682
683static void vmci_guest_remove_device(struct pci_dev *pdev)
684{
685 struct vmci_guest_device *vmci_dev = pci_get_drvdata(pdev);
686 int vmci_err;
687
688 dev_dbg(&pdev->dev, "Removing device\n");
689
690 atomic_dec(&vmci_num_guest_devices);
691
692 vmci_qp_guest_endpoints_exit();
693
694 vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
695 if (vmci_err < VMCI_SUCCESS)
696 dev_warn(&pdev->dev,
697 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
698 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
699
700 spin_lock_irq(&vmci_dev_spinlock);
701 vmci_dev_g = NULL;
702 spin_unlock_irq(&vmci_dev_spinlock);
703
704 dev_dbg(&pdev->dev, "Resetting vmci device\n");
705 iowrite32(VMCI_CONTROL_RESET, vmci_dev->iobase + VMCI_CONTROL_ADDR);
706
707 /*
708 * Free IRQ and then disable MSI/MSI-X as appropriate. For
709 * MSI-X, we might have multiple vectors, each with their own
710 * IRQ, which we must free too.
711 */
712 free_irq(vmci_dev->irq, vmci_dev);
713 if (vmci_dev->intr_type == VMCI_INTR_TYPE_MSIX) {
714 if (vmci_dev->exclusive_vectors)
715 free_irq(vmci_dev->msix_entries[1].vector, vmci_dev);
716 pci_disable_msix(pdev);
717 } else if (vmci_dev->intr_type == VMCI_INTR_TYPE_MSI) {
718 pci_disable_msi(pdev);
719 }
720
721 tasklet_kill(&vmci_dev->datagram_tasklet);
722 tasklet_kill(&vmci_dev->bm_tasklet);
723
724 if (vmci_dev->notification_bitmap) {
725 /*
726 * The device reset above cleared the bitmap state of the
727 * device, so we can safely free it here.
728 */
729
730 vfree(vmci_dev->notification_bitmap);
731 }
732
733 vfree(vmci_dev->data_buffer);
734
735 /* The rest are managed resources and will be freed by PCI core */
736}
737
738static DEFINE_PCI_DEVICE_TABLE(vmci_ids) = {
739 { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI), },
740 { 0 },
741};
742MODULE_DEVICE_TABLE(pci, vmci_ids);
743
744static struct pci_driver vmci_guest_driver = {
745 .name = KBUILD_MODNAME,
746 .id_table = vmci_ids,
747 .probe = vmci_guest_probe_device,
748 .remove = vmci_guest_remove_device,
749};
750
751int __init vmci_guest_init(void)
752{
753 return pci_register_driver(&vmci_guest_driver);
754}
755
756void __exit vmci_guest_exit(void)
757{
758 pci_unregister_driver(&vmci_guest_driver);
759}
diff --git a/drivers/misc/vmw_vmci/vmci_handle_array.c b/drivers/misc/vmw_vmci/vmci_handle_array.c
new file mode 100644
index 000000000000..344973a0fb0a
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_handle_array.c
@@ -0,0 +1,142 @@
1/*
2 * VMware VMCI Driver
3 *
4 * Copyright (C) 2012 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16#include <linux/slab.h>
17#include "vmci_handle_array.h"
18
19static size_t handle_arr_calc_size(size_t capacity)
20{
21 return sizeof(struct vmci_handle_arr) +
22 capacity * sizeof(struct vmci_handle);
23}
24
25struct vmci_handle_arr *vmci_handle_arr_create(size_t capacity)
26{
27 struct vmci_handle_arr *array;
28
29 if (capacity == 0)
30 capacity = VMCI_HANDLE_ARRAY_DEFAULT_SIZE;
31
32 array = kmalloc(handle_arr_calc_size(capacity), GFP_ATOMIC);
33 if (!array)
34 return NULL;
35
36 array->capacity = capacity;
37 array->size = 0;
38
39 return array;
40}
41
42void vmci_handle_arr_destroy(struct vmci_handle_arr *array)
43{
44 kfree(array);
45}
46
47void vmci_handle_arr_append_entry(struct vmci_handle_arr **array_ptr,
48 struct vmci_handle handle)
49{
50 struct vmci_handle_arr *array = *array_ptr;
51
52 if (unlikely(array->size >= array->capacity)) {
53 /* reallocate. */
54 struct vmci_handle_arr *new_array;
55 size_t new_capacity = array->capacity * VMCI_ARR_CAP_MULT;
56 size_t new_size = handle_arr_calc_size(new_capacity);
57
58 new_array = krealloc(array, new_size, GFP_ATOMIC);
59 if (!new_array)
60 return;
61
62 new_array->capacity = new_capacity;
63 *array_ptr = array = new_array;
64 }
65
66 array->entries[array->size] = handle;
67 array->size++;
68}
69
70/*
71 * Handle that was removed, VMCI_INVALID_HANDLE if entry not found.
72 */
73struct vmci_handle vmci_handle_arr_remove_entry(struct vmci_handle_arr *array,
74 struct vmci_handle entry_handle)
75{
76 struct vmci_handle handle = VMCI_INVALID_HANDLE;
77 size_t i;
78
79 for (i = 0; i < array->size; i++) {
80 if (vmci_handle_is_equal(array->entries[i], entry_handle)) {
81 handle = array->entries[i];
82 array->size--;
83 array->entries[i] = array->entries[array->size];
84 array->entries[array->size] = VMCI_INVALID_HANDLE;
85 break;
86 }
87 }
88
89 return handle;
90}
91
92/*
93 * Handle that was removed, VMCI_INVALID_HANDLE if array was empty.
94 */
95struct vmci_handle vmci_handle_arr_remove_tail(struct vmci_handle_arr *array)
96{
97 struct vmci_handle handle = VMCI_INVALID_HANDLE;
98
99 if (array->size) {
100 array->size--;
101 handle = array->entries[array->size];
102 array->entries[array->size] = VMCI_INVALID_HANDLE;
103 }
104
105 return handle;
106}
107
108/*
109 * Handle at given index, VMCI_INVALID_HANDLE if invalid index.
110 */
111struct vmci_handle
112vmci_handle_arr_get_entry(const struct vmci_handle_arr *array, size_t index)
113{
114 if (unlikely(index >= array->size))
115 return VMCI_INVALID_HANDLE;
116
117 return array->entries[index];
118}
119
120bool vmci_handle_arr_has_entry(const struct vmci_handle_arr *array,
121 struct vmci_handle entry_handle)
122{
123 size_t i;
124
125 for (i = 0; i < array->size; i++)
126 if (vmci_handle_is_equal(array->entries[i], entry_handle))
127 return true;
128
129 return false;
130}
131
132/*
133 * NULL if the array is empty. Otherwise, a pointer to the array
134 * of VMCI handles in the handle array.
135 */
136struct vmci_handle *vmci_handle_arr_get_handles(struct vmci_handle_arr *array)
137{
138 if (array->size)
139 return array->entries;
140
141 return NULL;
142}
diff --git a/drivers/misc/vmw_vmci/vmci_handle_array.h b/drivers/misc/vmw_vmci/vmci_handle_array.h
new file mode 100644
index 000000000000..b5f3a7f98cf1
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_handle_array.h
@@ -0,0 +1,52 @@
1/*
2 * VMware VMCI Driver
3 *
4 * Copyright (C) 2012 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16#ifndef _VMCI_HANDLE_ARRAY_H_
17#define _VMCI_HANDLE_ARRAY_H_
18
19#include <linux/vmw_vmci_defs.h>
20#include <linux/types.h>
21
22#define VMCI_HANDLE_ARRAY_DEFAULT_SIZE 4
23#define VMCI_ARR_CAP_MULT 2 /* Array capacity multiplier */
24
25struct vmci_handle_arr {
26 size_t capacity;
27 size_t size;
28 struct vmci_handle entries[];
29};
30
31struct vmci_handle_arr *vmci_handle_arr_create(size_t capacity);
32void vmci_handle_arr_destroy(struct vmci_handle_arr *array);
33void vmci_handle_arr_append_entry(struct vmci_handle_arr **array_ptr,
34 struct vmci_handle handle);
35struct vmci_handle vmci_handle_arr_remove_entry(struct vmci_handle_arr *array,
36 struct vmci_handle
37 entry_handle);
38struct vmci_handle vmci_handle_arr_remove_tail(struct vmci_handle_arr *array);
39struct vmci_handle
40vmci_handle_arr_get_entry(const struct vmci_handle_arr *array, size_t index);
41bool vmci_handle_arr_has_entry(const struct vmci_handle_arr *array,
42 struct vmci_handle entry_handle);
43struct vmci_handle *vmci_handle_arr_get_handles(struct vmci_handle_arr *array);
44
45static inline size_t vmci_handle_arr_get_size(
46 const struct vmci_handle_arr *array)
47{
48 return array->size;
49}
50
51
52#endif /* _VMCI_HANDLE_ARRAY_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
new file mode 100644
index 000000000000..d4722b3dc8ec
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_host.c
@@ -0,0 +1,1043 @@
1/*
2 * VMware VMCI Driver
3 *
4 * Copyright (C) 2012 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16#include <linux/vmw_vmci_defs.h>
17#include <linux/vmw_vmci_api.h>
18#include <linux/moduleparam.h>
19#include <linux/miscdevice.h>
20#include <linux/interrupt.h>
21#include <linux/highmem.h>
22#include <linux/atomic.h>
23#include <linux/kernel.h>
24#include <linux/module.h>
25#include <linux/mutex.h>
26#include <linux/sched.h>
27#include <linux/slab.h>
28#include <linux/file.h>
29#include <linux/init.h>
30#include <linux/poll.h>
31#include <linux/pci.h>
32#include <linux/smp.h>
33#include <linux/fs.h>
34#include <linux/io.h>
35
36#include "vmci_handle_array.h"
37#include "vmci_queue_pair.h"
38#include "vmci_datagram.h"
39#include "vmci_doorbell.h"
40#include "vmci_resource.h"
41#include "vmci_context.h"
42#include "vmci_driver.h"
43#include "vmci_event.h"
44
45#define VMCI_UTIL_NUM_RESOURCES 1
46
47enum {
48 VMCI_NOTIFY_RESOURCE_QUEUE_PAIR = 0,
49 VMCI_NOTIFY_RESOURCE_DOOR_BELL = 1,
50};
51
52enum {
53 VMCI_NOTIFY_RESOURCE_ACTION_NOTIFY = 0,
54 VMCI_NOTIFY_RESOURCE_ACTION_CREATE = 1,
55 VMCI_NOTIFY_RESOURCE_ACTION_DESTROY = 2,
56};
57
58/*
59 * VMCI driver initialization. This block can also be used to
60 * pass initial group membership etc.
61 */
62struct vmci_init_blk {
63 u32 cid;
64 u32 flags;
65};
66
67/* VMCIqueue_pairAllocInfo_VMToVM */
68struct vmci_qp_alloc_info_vmvm {
69 struct vmci_handle handle;
70 u32 peer;
71 u32 flags;
72 u64 produce_size;
73 u64 consume_size;
74 u64 produce_page_file; /* User VA. */
75 u64 consume_page_file; /* User VA. */
76 u64 produce_page_file_size; /* Size of the file name array. */
77 u64 consume_page_file_size; /* Size of the file name array. */
78 s32 result;
79 u32 _pad;
80};
81
82/* VMCISetNotifyInfo: Used to pass notify flag's address to the host driver. */
83struct vmci_set_notify_info {
84 u64 notify_uva;
85 s32 result;
86 u32 _pad;
87};
88
89/*
90 * Per-instance host state
91 */
92struct vmci_host_dev {
93 struct vmci_ctx *context;
94 int user_version;
95 enum vmci_obj_type ct_type;
96 struct mutex lock; /* Mutex lock for vmci context access */
97};
98
99static struct vmci_ctx *host_context;
100static bool vmci_host_device_initialized;
101static atomic_t vmci_host_active_users = ATOMIC_INIT(0);
102
103/*
104 * Determines whether the VMCI host personality is
105 * available. Since the core functionality of the host driver is
106 * always present, all guests could possibly use the host
107 * personality. However, to minimize the deviation from the
108 * pre-unified driver state of affairs, we only consider the host
109 * device active if there is no active guest device or if there
110 * are VMX'en with active VMCI contexts using the host device.
111 */
112bool vmci_host_code_active(void)
113{
114 return vmci_host_device_initialized &&
115 (!vmci_guest_code_active() ||
116 atomic_read(&vmci_host_active_users) > 0);
117}
118
119/*
120 * Called on open of /dev/vmci.
121 */
122static int vmci_host_open(struct inode *inode, struct file *filp)
123{
124 struct vmci_host_dev *vmci_host_dev;
125
126 vmci_host_dev = kzalloc(sizeof(struct vmci_host_dev), GFP_KERNEL);
127 if (vmci_host_dev == NULL)
128 return -ENOMEM;
129
130 vmci_host_dev->ct_type = VMCIOBJ_NOT_SET;
131 mutex_init(&vmci_host_dev->lock);
132 filp->private_data = vmci_host_dev;
133
134 return 0;
135}
136
137/*
138 * Called on close of /dev/vmci, most often when the process
139 * exits.
140 */
141static int vmci_host_close(struct inode *inode, struct file *filp)
142{
143 struct vmci_host_dev *vmci_host_dev = filp->private_data;
144
145 if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) {
146 vmci_ctx_destroy(vmci_host_dev->context);
147 vmci_host_dev->context = NULL;
148
149 /*
150 * The number of active contexts is used to track whether any
151 * VMX'en are using the host personality. It is incremented when
152 * a context is created through the IOCTL_VMCI_INIT_CONTEXT
153 * ioctl.
154 */
155 atomic_dec(&vmci_host_active_users);
156 }
157 vmci_host_dev->ct_type = VMCIOBJ_NOT_SET;
158
159 kfree(vmci_host_dev);
160 filp->private_data = NULL;
161 return 0;
162}
163
164/*
165 * This is used to wake up the VMX when a VMCI call arrives, or
166 * to wake up select() or poll() at the next clock tick.
167 */
168static unsigned int vmci_host_poll(struct file *filp, poll_table *wait)
169{
170 struct vmci_host_dev *vmci_host_dev = filp->private_data;
171 struct vmci_ctx *context = vmci_host_dev->context;
172 unsigned int mask = 0;
173
174 if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) {
175 /* Check for VMCI calls to this VM context. */
176 if (wait)
177 poll_wait(filp, &context->host_context.wait_queue,
178 wait);
179
180 spin_lock(&context->lock);
181 if (context->pending_datagrams > 0 ||
182 vmci_handle_arr_get_size(
183 context->pending_doorbell_array) > 0) {
184 mask = POLLIN;
185 }
186 spin_unlock(&context->lock);
187 }
188 return mask;
189}
190
191/*
192 * Copies the handles of a handle array into a user buffer, and
193 * returns the new length in userBufferSize. If the copy to the
194 * user buffer fails, the functions still returns VMCI_SUCCESS,
195 * but retval != 0.
196 */
197static int drv_cp_harray_to_user(void __user *user_buf_uva,
198 u64 *user_buf_size,
199 struct vmci_handle_arr *handle_array,
200 int *retval)
201{
202 u32 array_size = 0;
203 struct vmci_handle *handles;
204
205 if (handle_array)
206 array_size = vmci_handle_arr_get_size(handle_array);
207
208 if (array_size * sizeof(*handles) > *user_buf_size)
209 return VMCI_ERROR_MORE_DATA;
210
211 *user_buf_size = array_size * sizeof(*handles);
212 if (*user_buf_size)
213 *retval = copy_to_user(user_buf_uva,
214 vmci_handle_arr_get_handles
215 (handle_array), *user_buf_size);
216
217 return VMCI_SUCCESS;
218}
219
220/*
221 * Sets up a given context for notify to work. Calls drv_map_bool_ptr()
222 * which maps the notify boolean in user VA in kernel space.
223 */
224static int vmci_host_setup_notify(struct vmci_ctx *context,
225 unsigned long uva)
226{
227 struct page *page;
228 int retval;
229
230 if (context->notify_page) {
231 pr_devel("%s: Notify mechanism is already set up\n", __func__);
232 return VMCI_ERROR_DUPLICATE_ENTRY;
233 }
234
235 /*
236 * We are using 'bool' internally, but let's make sure we explicit
237 * about the size.
238 */
239 BUILD_BUG_ON(sizeof(bool) != sizeof(u8));
240 if (!access_ok(VERIFY_WRITE, (void __user *)uva, sizeof(u8)))
241 return VMCI_ERROR_GENERIC;
242
243 /*
244 * Lock physical page backing a given user VA.
245 */
246 down_read(&current->mm->mmap_sem);
247 retval = get_user_pages(current, current->mm,
248 PAGE_ALIGN(uva),
249 1, 1, 0, &page, NULL);
250 up_read(&current->mm->mmap_sem);
251 if (retval != 1)
252 return VMCI_ERROR_GENERIC;
253
254 /*
255 * Map the locked page and set up notify pointer.
256 */
257 context->notify = kmap(page) + (uva & (PAGE_SIZE - 1));
258 vmci_ctx_check_signal_notify(context);
259
260 return VMCI_SUCCESS;
261}
262
263static int vmci_host_get_version(struct vmci_host_dev *vmci_host_dev,
264 unsigned int cmd, void __user *uptr)
265{
266 if (cmd == IOCTL_VMCI_VERSION2) {
267 int __user *vptr = uptr;
268 if (get_user(vmci_host_dev->user_version, vptr))
269 return -EFAULT;
270 }
271
272 /*
273 * The basic logic here is:
274 *
275 * If the user sends in a version of 0 tell it our version.
276 * If the user didn't send in a version, tell it our version.
277 * If the user sent in an old version, tell it -its- version.
278 * If the user sent in an newer version, tell it our version.
279 *
280 * The rationale behind telling the caller its version is that
281 * Workstation 6.5 required that VMX and VMCI kernel module were
282 * version sync'd. All new VMX users will be programmed to
283 * handle the VMCI kernel module version.
284 */
285
286 if (vmci_host_dev->user_version > 0 &&
287 vmci_host_dev->user_version < VMCI_VERSION_HOSTQP) {
288 return vmci_host_dev->user_version;
289 }
290
291 return VMCI_VERSION;
292}
293
294#define vmci_ioctl_err(fmt, ...) \
295 pr_devel("%s: " fmt, ioctl_name, ##__VA_ARGS__)
296
297static int vmci_host_do_init_context(struct vmci_host_dev *vmci_host_dev,
298 const char *ioctl_name,
299 void __user *uptr)
300{
301 struct vmci_init_blk init_block;
302 const struct cred *cred;
303 int retval;
304
305 if (copy_from_user(&init_block, uptr, sizeof(init_block))) {
306 vmci_ioctl_err("error reading init block\n");
307 return -EFAULT;
308 }
309
310 mutex_lock(&vmci_host_dev->lock);
311
312 if (vmci_host_dev->ct_type != VMCIOBJ_NOT_SET) {
313 vmci_ioctl_err("received VMCI init on initialized handle\n");
314 retval = -EINVAL;
315 goto out;
316 }
317
318 if (init_block.flags & ~VMCI_PRIVILEGE_FLAG_RESTRICTED) {
319 vmci_ioctl_err("unsupported VMCI restriction flag\n");
320 retval = -EINVAL;
321 goto out;
322 }
323
324 cred = get_current_cred();
325 vmci_host_dev->context = vmci_ctx_create(init_block.cid,
326 init_block.flags, 0,
327 vmci_host_dev->user_version,
328 cred);
329 put_cred(cred);
330 if (IS_ERR(vmci_host_dev->context)) {
331 retval = PTR_ERR(vmci_host_dev->context);
332 vmci_ioctl_err("error initializing context\n");
333 goto out;
334 }
335
336 /*
337 * Copy cid to userlevel, we do this to allow the VMX
338 * to enforce its policy on cid generation.
339 */
340 init_block.cid = vmci_ctx_get_id(vmci_host_dev->context);
341 if (copy_to_user(uptr, &init_block, sizeof(init_block))) {
342 vmci_ctx_destroy(vmci_host_dev->context);
343 vmci_host_dev->context = NULL;
344 vmci_ioctl_err("error writing init block\n");
345 retval = -EFAULT;
346 goto out;
347 }
348
349 vmci_host_dev->ct_type = VMCIOBJ_CONTEXT;
350 atomic_inc(&vmci_host_active_users);
351
352 retval = 0;
353
354out:
355 mutex_unlock(&vmci_host_dev->lock);
356 return retval;
357}
358
359static int vmci_host_do_send_datagram(struct vmci_host_dev *vmci_host_dev,
360 const char *ioctl_name,
361 void __user *uptr)
362{
363 struct vmci_datagram_snd_rcv_info send_info;
364 struct vmci_datagram *dg = NULL;
365 u32 cid;
366
367 if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
368 vmci_ioctl_err("only valid for contexts\n");
369 return -EINVAL;
370 }
371
372 if (copy_from_user(&send_info, uptr, sizeof(send_info)))
373 return -EFAULT;
374
375 if (send_info.len > VMCI_MAX_DG_SIZE) {
376 vmci_ioctl_err("datagram is too big (size=%d)\n",
377 send_info.len);
378 return -EINVAL;
379 }
380
381 if (send_info.len < sizeof(*dg)) {
382 vmci_ioctl_err("datagram is too small (size=%d)\n",
383 send_info.len);
384 return -EINVAL;
385 }
386
387 dg = kmalloc(send_info.len, GFP_KERNEL);
388 if (!dg) {
389 vmci_ioctl_err(
390 "cannot allocate memory to dispatch datagram\n");
391 return -ENOMEM;
392 }
393
394 if (copy_from_user(dg, (void __user *)(uintptr_t)send_info.addr,
395 send_info.len)) {
396 vmci_ioctl_err("error getting datagram\n");
397 kfree(dg);
398 return -EFAULT;
399 }
400
401 pr_devel("Datagram dst (handle=0x%x:0x%x) src (handle=0x%x:0x%x), payload (size=%llu bytes)\n",
402 dg->dst.context, dg->dst.resource,
403 dg->src.context, dg->src.resource,
404 (unsigned long long)dg->payload_size);
405
406 /* Get source context id. */
407 cid = vmci_ctx_get_id(vmci_host_dev->context);
408 send_info.result = vmci_datagram_dispatch(cid, dg, true);
409 kfree(dg);
410
411 return copy_to_user(uptr, &send_info, sizeof(send_info)) ? -EFAULT : 0;
412}
413
414static int vmci_host_do_receive_datagram(struct vmci_host_dev *vmci_host_dev,
415 const char *ioctl_name,
416 void __user *uptr)
417{
418 struct vmci_datagram_snd_rcv_info recv_info;
419 struct vmci_datagram *dg = NULL;
420 int retval;
421 size_t size;
422
423 if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
424 vmci_ioctl_err("only valid for contexts\n");
425 return -EINVAL;
426 }
427
428 if (copy_from_user(&recv_info, uptr, sizeof(recv_info)))
429 return -EFAULT;
430
431 size = recv_info.len;
432 recv_info.result = vmci_ctx_dequeue_datagram(vmci_host_dev->context,
433 &size, &dg);
434
435 if (recv_info.result >= VMCI_SUCCESS) {
436 void __user *ubuf = (void __user *)(uintptr_t)recv_info.addr;
437 retval = copy_to_user(ubuf, dg, VMCI_DG_SIZE(dg));
438 kfree(dg);
439 if (retval != 0)
440 return -EFAULT;
441 }
442
443 return copy_to_user(uptr, &recv_info, sizeof(recv_info)) ? -EFAULT : 0;
444}
445
446static int vmci_host_do_alloc_queuepair(struct vmci_host_dev *vmci_host_dev,
447 const char *ioctl_name,
448 void __user *uptr)
449{
450 struct vmci_handle handle;
451 int vmci_status;
452 int __user *retptr;
453 u32 cid;
454
455 if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
456 vmci_ioctl_err("only valid for contexts\n");
457 return -EINVAL;
458 }
459
460 cid = vmci_ctx_get_id(vmci_host_dev->context);
461
462 if (vmci_host_dev->user_version < VMCI_VERSION_NOVMVM) {
463 struct vmci_qp_alloc_info_vmvm alloc_info;
464 struct vmci_qp_alloc_info_vmvm __user *info = uptr;
465
466 if (copy_from_user(&alloc_info, uptr, sizeof(alloc_info)))
467 return -EFAULT;
468
469 handle = alloc_info.handle;
470 retptr = &info->result;
471
472 vmci_status = vmci_qp_broker_alloc(alloc_info.handle,
473 alloc_info.peer,
474 alloc_info.flags,
475 VMCI_NO_PRIVILEGE_FLAGS,
476 alloc_info.produce_size,
477 alloc_info.consume_size,
478 NULL,
479 vmci_host_dev->context);
480
481 if (vmci_status == VMCI_SUCCESS)
482 vmci_status = VMCI_SUCCESS_QUEUEPAIR_CREATE;
483 } else {
484 struct vmci_qp_alloc_info alloc_info;
485 struct vmci_qp_alloc_info __user *info = uptr;
486 struct vmci_qp_page_store page_store;
487
488 if (copy_from_user(&alloc_info, uptr, sizeof(alloc_info)))
489 return -EFAULT;
490
491 handle = alloc_info.handle;
492 retptr = &info->result;
493
494 page_store.pages = alloc_info.ppn_va;
495 page_store.len = alloc_info.num_ppns;
496
497 vmci_status = vmci_qp_broker_alloc(alloc_info.handle,
498 alloc_info.peer,
499 alloc_info.flags,
500 VMCI_NO_PRIVILEGE_FLAGS,
501 alloc_info.produce_size,
502 alloc_info.consume_size,
503 &page_store,
504 vmci_host_dev->context);
505 }
506
507 if (put_user(vmci_status, retptr)) {
508 if (vmci_status >= VMCI_SUCCESS) {
509 vmci_status = vmci_qp_broker_detach(handle,
510 vmci_host_dev->context);
511 }
512 return -EFAULT;
513 }
514
515 return 0;
516}
517
518static int vmci_host_do_queuepair_setva(struct vmci_host_dev *vmci_host_dev,
519 const char *ioctl_name,
520 void __user *uptr)
521{
522 struct vmci_qp_set_va_info set_va_info;
523 struct vmci_qp_set_va_info __user *info = uptr;
524 s32 result;
525
526 if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
527 vmci_ioctl_err("only valid for contexts\n");
528 return -EINVAL;
529 }
530
531 if (vmci_host_dev->user_version < VMCI_VERSION_NOVMVM) {
532 vmci_ioctl_err("is not allowed\n");
533 return -EINVAL;
534 }
535
536 if (copy_from_user(&set_va_info, uptr, sizeof(set_va_info)))
537 return -EFAULT;
538
539 if (set_va_info.va) {
540 /*
541 * VMX is passing down a new VA for the queue
542 * pair mapping.
543 */
544 result = vmci_qp_broker_map(set_va_info.handle,
545 vmci_host_dev->context,
546 set_va_info.va);
547 } else {
548 /*
549 * The queue pair is about to be unmapped by
550 * the VMX.
551 */
552 result = vmci_qp_broker_unmap(set_va_info.handle,
553 vmci_host_dev->context, 0);
554 }
555
556 return put_user(result, &info->result) ? -EFAULT : 0;
557}
558
559static int vmci_host_do_queuepair_setpf(struct vmci_host_dev *vmci_host_dev,
560 const char *ioctl_name,
561 void __user *uptr)
562{
563 struct vmci_qp_page_file_info page_file_info;
564 struct vmci_qp_page_file_info __user *info = uptr;
565 s32 result;
566
567 if (vmci_host_dev->user_version < VMCI_VERSION_HOSTQP ||
568 vmci_host_dev->user_version >= VMCI_VERSION_NOVMVM) {
569 vmci_ioctl_err("not supported on this VMX (version=%d)\n",
570 vmci_host_dev->user_version);
571 return -EINVAL;
572 }
573
574 if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
575 vmci_ioctl_err("only valid for contexts\n");
576 return -EINVAL;
577 }
578
579 if (copy_from_user(&page_file_info, uptr, sizeof(*info)))
580 return -EFAULT;
581
582 /*
583 * Communicate success pre-emptively to the caller. Note that the
584 * basic premise is that it is incumbent upon the caller not to look at
585 * the info.result field until after the ioctl() returns. And then,
586 * only if the ioctl() result indicates no error. We send up the
587 * SUCCESS status before calling SetPageStore() store because failing
588 * to copy up the result code means unwinding the SetPageStore().
589 *
590 * It turns out the logic to unwind a SetPageStore() opens a can of
591 * worms. For example, if a host had created the queue_pair and a
592 * guest attaches and SetPageStore() is successful but writing success
593 * fails, then ... the host has to be stopped from writing (anymore)
594 * data into the queue_pair. That means an additional test in the
595 * VMCI_Enqueue() code path. Ugh.
596 */
597
598 if (put_user(VMCI_SUCCESS, &info->result)) {
599 /*
600 * In this case, we can't write a result field of the
601 * caller's info block. So, we don't even try to
602 * SetPageStore().
603 */
604 return -EFAULT;
605 }
606
607 result = vmci_qp_broker_set_page_store(page_file_info.handle,
608 page_file_info.produce_va,
609 page_file_info.consume_va,
610 vmci_host_dev->context);
611 if (result < VMCI_SUCCESS) {
612 if (put_user(result, &info->result)) {
613 /*
614 * Note that in this case the SetPageStore()
615 * call failed but we were unable to
616 * communicate that to the caller (because the
617 * copy_to_user() call failed). So, if we
618 * simply return an error (in this case
619 * -EFAULT) then the caller will know that the
620 * SetPageStore failed even though we couldn't
621 * put the result code in the result field and
622 * indicate exactly why it failed.
623 *
624 * That says nothing about the issue where we
625 * were once able to write to the caller's info
626 * memory and now can't. Something more
627 * serious is probably going on than the fact
628 * that SetPageStore() didn't work.
629 */
630 return -EFAULT;
631 }
632 }
633
634 return 0;
635}
636
637static int vmci_host_do_qp_detach(struct vmci_host_dev *vmci_host_dev,
638 const char *ioctl_name,
639 void __user *uptr)
640{
641 struct vmci_qp_dtch_info detach_info;
642 struct vmci_qp_dtch_info __user *info = uptr;
643 s32 result;
644
645 if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
646 vmci_ioctl_err("only valid for contexts\n");
647 return -EINVAL;
648 }
649
650 if (copy_from_user(&detach_info, uptr, sizeof(detach_info)))
651 return -EFAULT;
652
653 result = vmci_qp_broker_detach(detach_info.handle,
654 vmci_host_dev->context);
655 if (result == VMCI_SUCCESS &&
656 vmci_host_dev->user_version < VMCI_VERSION_NOVMVM) {
657 result = VMCI_SUCCESS_LAST_DETACH;
658 }
659
660 return put_user(result, &info->result) ? -EFAULT : 0;
661}
662
663static int vmci_host_do_ctx_add_notify(struct vmci_host_dev *vmci_host_dev,
664 const char *ioctl_name,
665 void __user *uptr)
666{
667 struct vmci_ctx_info ar_info;
668 struct vmci_ctx_info __user *info = uptr;
669 s32 result;
670 u32 cid;
671
672 if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
673 vmci_ioctl_err("only valid for contexts\n");
674 return -EINVAL;
675 }
676
677 if (copy_from_user(&ar_info, uptr, sizeof(ar_info)))
678 return -EFAULT;
679
680 cid = vmci_ctx_get_id(vmci_host_dev->context);
681 result = vmci_ctx_add_notification(cid, ar_info.remote_cid);
682
683 return put_user(result, &info->result) ? -EFAULT : 0;
684}
685
686static int vmci_host_do_ctx_remove_notify(struct vmci_host_dev *vmci_host_dev,
687 const char *ioctl_name,
688 void __user *uptr)
689{
690 struct vmci_ctx_info ar_info;
691 struct vmci_ctx_info __user *info = uptr;
692 u32 cid;
693 int result;
694
695 if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
696 vmci_ioctl_err("only valid for contexts\n");
697 return -EINVAL;
698 }
699
700 if (copy_from_user(&ar_info, uptr, sizeof(ar_info)))
701 return -EFAULT;
702
703 cid = vmci_ctx_get_id(vmci_host_dev->context);
704 result = vmci_ctx_remove_notification(cid,
705 ar_info.remote_cid);
706
707 return put_user(result, &info->result) ? -EFAULT : 0;
708}
709
710static int vmci_host_do_ctx_get_cpt_state(struct vmci_host_dev *vmci_host_dev,
711 const char *ioctl_name,
712 void __user *uptr)
713{
714 struct vmci_ctx_chkpt_buf_info get_info;
715 u32 cid;
716 void *cpt_buf;
717 int retval;
718
719 if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
720 vmci_ioctl_err("only valid for contexts\n");
721 return -EINVAL;
722 }
723
724 if (copy_from_user(&get_info, uptr, sizeof(get_info)))
725 return -EFAULT;
726
727 cid = vmci_ctx_get_id(vmci_host_dev->context);
728 get_info.result = vmci_ctx_get_chkpt_state(cid, get_info.cpt_type,
729 &get_info.buf_size, &cpt_buf);
730 if (get_info.result == VMCI_SUCCESS && get_info.buf_size) {
731 void __user *ubuf = (void __user *)(uintptr_t)get_info.cpt_buf;
732 retval = copy_to_user(ubuf, cpt_buf, get_info.buf_size);
733 kfree(cpt_buf);
734
735 if (retval)
736 return -EFAULT;
737 }
738
739 return copy_to_user(uptr, &get_info, sizeof(get_info)) ? -EFAULT : 0;
740}
741
742static int vmci_host_do_ctx_set_cpt_state(struct vmci_host_dev *vmci_host_dev,
743 const char *ioctl_name,
744 void __user *uptr)
745{
746 struct vmci_ctx_chkpt_buf_info set_info;
747 u32 cid;
748 void *cpt_buf;
749 int retval;
750
751 if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
752 vmci_ioctl_err("only valid for contexts\n");
753 return -EINVAL;
754 }
755
756 if (copy_from_user(&set_info, uptr, sizeof(set_info)))
757 return -EFAULT;
758
759 cpt_buf = kmalloc(set_info.buf_size, GFP_KERNEL);
760 if (!cpt_buf) {
761 vmci_ioctl_err(
762 "cannot allocate memory to set cpt state (type=%d)\n",
763 set_info.cpt_type);
764 return -ENOMEM;
765 }
766
767 if (copy_from_user(cpt_buf, (void __user *)(uintptr_t)set_info.cpt_buf,
768 set_info.buf_size)) {
769 retval = -EFAULT;
770 goto out;
771 }
772
773 cid = vmci_ctx_get_id(vmci_host_dev->context);
774 set_info.result = vmci_ctx_set_chkpt_state(cid, set_info.cpt_type,
775 set_info.buf_size, cpt_buf);
776
777 retval = copy_to_user(uptr, &set_info, sizeof(set_info)) ? -EFAULT : 0;
778
779out:
780 kfree(cpt_buf);
781 return retval;
782}
783
784static int vmci_host_do_get_context_id(struct vmci_host_dev *vmci_host_dev,
785 const char *ioctl_name,
786 void __user *uptr)
787{
788 u32 __user *u32ptr = uptr;
789
790 return put_user(VMCI_HOST_CONTEXT_ID, u32ptr) ? -EFAULT : 0;
791}
792
793static int vmci_host_do_set_notify(struct vmci_host_dev *vmci_host_dev,
794 const char *ioctl_name,
795 void __user *uptr)
796{
797 struct vmci_set_notify_info notify_info;
798
799 if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
800 vmci_ioctl_err("only valid for contexts\n");
801 return -EINVAL;
802 }
803
804 if (copy_from_user(&notify_info, uptr, sizeof(notify_info)))
805 return -EFAULT;
806
807 if (notify_info.notify_uva) {
808 notify_info.result =
809 vmci_host_setup_notify(vmci_host_dev->context,
810 notify_info.notify_uva);
811 } else {
812 vmci_ctx_unset_notify(vmci_host_dev->context);
813 notify_info.result = VMCI_SUCCESS;
814 }
815
816 return copy_to_user(uptr, &notify_info, sizeof(notify_info)) ?
817 -EFAULT : 0;
818}
819
820static int vmci_host_do_notify_resource(struct vmci_host_dev *vmci_host_dev,
821 const char *ioctl_name,
822 void __user *uptr)
823{
824 struct vmci_dbell_notify_resource_info info;
825 u32 cid;
826
827 if (vmci_host_dev->user_version < VMCI_VERSION_NOTIFY) {
828 vmci_ioctl_err("invalid for current VMX versions\n");
829 return -EINVAL;
830 }
831
832 if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
833 vmci_ioctl_err("only valid for contexts\n");
834 return -EINVAL;
835 }
836
837 if (copy_from_user(&info, uptr, sizeof(info)))
838 return -EFAULT;
839
840 cid = vmci_ctx_get_id(vmci_host_dev->context);
841
842 switch (info.action) {
843 case VMCI_NOTIFY_RESOURCE_ACTION_NOTIFY:
844 if (info.resource == VMCI_NOTIFY_RESOURCE_DOOR_BELL) {
845 u32 flags = VMCI_NO_PRIVILEGE_FLAGS;
846 info.result = vmci_ctx_notify_dbell(cid, info.handle,
847 flags);
848 } else {
849 info.result = VMCI_ERROR_UNAVAILABLE;
850 }
851 break;
852
853 case VMCI_NOTIFY_RESOURCE_ACTION_CREATE:
854 info.result = vmci_ctx_dbell_create(cid, info.handle);
855 break;
856
857 case VMCI_NOTIFY_RESOURCE_ACTION_DESTROY:
858 info.result = vmci_ctx_dbell_destroy(cid, info.handle);
859 break;
860
861 default:
862 vmci_ioctl_err("got unknown action (action=%d)\n",
863 info.action);
864 info.result = VMCI_ERROR_INVALID_ARGS;
865 }
866
867 return copy_to_user(uptr, &info, sizeof(info)) ? -EFAULT : 0;
868}
869
870static int vmci_host_do_recv_notifications(struct vmci_host_dev *vmci_host_dev,
871 const char *ioctl_name,
872 void __user *uptr)
873{
874 struct vmci_ctx_notify_recv_info info;
875 struct vmci_handle_arr *db_handle_array;
876 struct vmci_handle_arr *qp_handle_array;
877 void __user *ubuf;
878 u32 cid;
879 int retval = 0;
880
881 if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
882 vmci_ioctl_err("only valid for contexts\n");
883 return -EINVAL;
884 }
885
886 if (vmci_host_dev->user_version < VMCI_VERSION_NOTIFY) {
887 vmci_ioctl_err("not supported for the current vmx version\n");
888 return -EINVAL;
889 }
890
891 if (copy_from_user(&info, uptr, sizeof(info)))
892 return -EFAULT;
893
894 if ((info.db_handle_buf_size && !info.db_handle_buf_uva) ||
895 (info.qp_handle_buf_size && !info.qp_handle_buf_uva)) {
896 return -EINVAL;
897 }
898
899 cid = vmci_ctx_get_id(vmci_host_dev->context);
900
901 info.result = vmci_ctx_rcv_notifications_get(cid,
902 &db_handle_array, &qp_handle_array);
903 if (info.result != VMCI_SUCCESS)
904 return copy_to_user(uptr, &info, sizeof(info)) ? -EFAULT : 0;
905
906 ubuf = (void __user *)(uintptr_t)info.db_handle_buf_uva;
907 info.result = drv_cp_harray_to_user(ubuf, &info.db_handle_buf_size,
908 db_handle_array, &retval);
909 if (info.result == VMCI_SUCCESS && !retval) {
910 ubuf = (void __user *)(uintptr_t)info.qp_handle_buf_uva;
911 info.result = drv_cp_harray_to_user(ubuf,
912 &info.qp_handle_buf_size,
913 qp_handle_array, &retval);
914 }
915
916 if (!retval && copy_to_user(uptr, &info, sizeof(info)))
917 retval = -EFAULT;
918
919 vmci_ctx_rcv_notifications_release(cid,
920 db_handle_array, qp_handle_array,
921 info.result == VMCI_SUCCESS && !retval);
922
923 return retval;
924}
925
926static long vmci_host_unlocked_ioctl(struct file *filp,
927 unsigned int iocmd, unsigned long ioarg)
928{
929#define VMCI_DO_IOCTL(ioctl_name, ioctl_fn) do { \
930 char *name = __stringify(IOCTL_VMCI_ ## ioctl_name); \
931 return vmci_host_do_ ## ioctl_fn( \
932 vmci_host_dev, name, uptr); \
933 } while (0)
934
935 struct vmci_host_dev *vmci_host_dev = filp->private_data;
936 void __user *uptr = (void __user *)ioarg;
937
938 switch (iocmd) {
939 case IOCTL_VMCI_INIT_CONTEXT:
940 VMCI_DO_IOCTL(INIT_CONTEXT, init_context);
941 case IOCTL_VMCI_DATAGRAM_SEND:
942 VMCI_DO_IOCTL(DATAGRAM_SEND, send_datagram);
943 case IOCTL_VMCI_DATAGRAM_RECEIVE:
944 VMCI_DO_IOCTL(DATAGRAM_RECEIVE, receive_datagram);
945 case IOCTL_VMCI_QUEUEPAIR_ALLOC:
946 VMCI_DO_IOCTL(QUEUEPAIR_ALLOC, alloc_queuepair);
947 case IOCTL_VMCI_QUEUEPAIR_SETVA:
948 VMCI_DO_IOCTL(QUEUEPAIR_SETVA, queuepair_setva);
949 case IOCTL_VMCI_QUEUEPAIR_SETPAGEFILE:
950 VMCI_DO_IOCTL(QUEUEPAIR_SETPAGEFILE, queuepair_setpf);
951 case IOCTL_VMCI_QUEUEPAIR_DETACH:
952 VMCI_DO_IOCTL(QUEUEPAIR_DETACH, qp_detach);
953 case IOCTL_VMCI_CTX_ADD_NOTIFICATION:
954 VMCI_DO_IOCTL(CTX_ADD_NOTIFICATION, ctx_add_notify);
955 case IOCTL_VMCI_CTX_REMOVE_NOTIFICATION:
956 VMCI_DO_IOCTL(CTX_REMOVE_NOTIFICATION, ctx_remove_notify);
957 case IOCTL_VMCI_CTX_GET_CPT_STATE:
958 VMCI_DO_IOCTL(CTX_GET_CPT_STATE, ctx_get_cpt_state);
959 case IOCTL_VMCI_CTX_SET_CPT_STATE:
960 VMCI_DO_IOCTL(CTX_SET_CPT_STATE, ctx_set_cpt_state);
961 case IOCTL_VMCI_GET_CONTEXT_ID:
962 VMCI_DO_IOCTL(GET_CONTEXT_ID, get_context_id);
963 case IOCTL_VMCI_SET_NOTIFY:
964 VMCI_DO_IOCTL(SET_NOTIFY, set_notify);
965 case IOCTL_VMCI_NOTIFY_RESOURCE:
966 VMCI_DO_IOCTL(NOTIFY_RESOURCE, notify_resource);
967 case IOCTL_VMCI_NOTIFICATIONS_RECEIVE:
968 VMCI_DO_IOCTL(NOTIFICATIONS_RECEIVE, recv_notifications);
969
970 case IOCTL_VMCI_VERSION:
971 case IOCTL_VMCI_VERSION2:
972 return vmci_host_get_version(vmci_host_dev, iocmd, uptr);
973
974 default:
975 pr_devel("%s: Unknown ioctl (iocmd=%d)\n", __func__, iocmd);
976 return -EINVAL;
977 }
978
979#undef VMCI_DO_IOCTL
980}
981
982static const struct file_operations vmuser_fops = {
983 .owner = THIS_MODULE,
984 .open = vmci_host_open,
985 .release = vmci_host_close,
986 .poll = vmci_host_poll,
987 .unlocked_ioctl = vmci_host_unlocked_ioctl,
988 .compat_ioctl = vmci_host_unlocked_ioctl,
989};
990
991static struct miscdevice vmci_host_miscdev = {
992 .name = "vmci",
993 .minor = MISC_DYNAMIC_MINOR,
994 .fops = &vmuser_fops,
995};
996
997int __init vmci_host_init(void)
998{
999 int error;
1000
1001 host_context = vmci_ctx_create(VMCI_HOST_CONTEXT_ID,
1002 VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS,
1003 -1, VMCI_VERSION, NULL);
1004 if (IS_ERR(host_context)) {
1005 error = PTR_ERR(host_context);
1006 pr_warn("Failed to initialize VMCIContext (error%d)\n",
1007 error);
1008 return error;
1009 }
1010
1011 error = misc_register(&vmci_host_miscdev);
1012 if (error) {
1013 pr_warn("Module registration error (name=%s, major=%d, minor=%d, err=%d)\n",
1014 vmci_host_miscdev.name,
1015 MISC_MAJOR, vmci_host_miscdev.minor,
1016 error);
1017 pr_warn("Unable to initialize host personality\n");
1018 vmci_ctx_destroy(host_context);
1019 return error;
1020 }
1021
1022 pr_info("VMCI host device registered (name=%s, major=%d, minor=%d)\n",
1023 vmci_host_miscdev.name, MISC_MAJOR, vmci_host_miscdev.minor);
1024
1025 vmci_host_device_initialized = true;
1026 return 0;
1027}
1028
1029void __exit vmci_host_exit(void)
1030{
1031 int error;
1032
1033 vmci_host_device_initialized = false;
1034
1035 error = misc_deregister(&vmci_host_miscdev);
1036 if (error)
1037 pr_warn("Error unregistering character device: %d\n", error);
1038
1039 vmci_ctx_destroy(host_context);
1040 vmci_qp_broker_exit();
1041
1042 pr_debug("VMCI host driver module unloaded\n");
1043}
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
new file mode 100644
index 000000000000..d94245dbd765
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -0,0 +1,3425 @@
1/*
2 * VMware VMCI Driver
3 *
4 * Copyright (C) 2012 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16#include <linux/vmw_vmci_defs.h>
17#include <linux/vmw_vmci_api.h>
18#include <linux/highmem.h>
19#include <linux/kernel.h>
20#include <linux/mm.h>
21#include <linux/module.h>
22#include <linux/mutex.h>
23#include <linux/pagemap.h>
24#include <linux/sched.h>
25#include <linux/slab.h>
26#include <linux/socket.h>
27#include <linux/wait.h>
28#include <linux/vmalloc.h>
29
30#include "vmci_handle_array.h"
31#include "vmci_queue_pair.h"
32#include "vmci_datagram.h"
33#include "vmci_resource.h"
34#include "vmci_context.h"
35#include "vmci_driver.h"
36#include "vmci_event.h"
37#include "vmci_route.h"
38
39/*
40 * In the following, we will distinguish between two kinds of VMX processes -
41 * the ones with versions lower than VMCI_VERSION_NOVMVM that use specialized
42 * VMCI page files in the VMX and supporting VM to VM communication and the
43 * newer ones that use the guest memory directly. We will in the following
44 * refer to the older VMX versions as old-style VMX'en, and the newer ones as
45 * new-style VMX'en.
46 *
47 * The state transition datagram is as follows (the VMCIQPB_ prefix has been
48 * removed for readability) - see below for more details on the transtions:
49 *
50 * -------------- NEW -------------
51 * | |
52 * \_/ \_/
53 * CREATED_NO_MEM <-----------------> CREATED_MEM
54 * | | |
55 * | o-----------------------o |
56 * | | |
57 * \_/ \_/ \_/
58 * ATTACHED_NO_MEM <----------------> ATTACHED_MEM
59 * | | |
60 * | o----------------------o |
61 * | | |
62 * \_/ \_/ \_/
63 * SHUTDOWN_NO_MEM <----------------> SHUTDOWN_MEM
64 * | |
65 * | |
66 * -------------> gone <-------------
67 *
68 * In more detail. When a VMCI queue pair is first created, it will be in the
69 * VMCIQPB_NEW state. It will then move into one of the following states:
70 *
71 * - VMCIQPB_CREATED_NO_MEM: this state indicates that either:
72 *
73 * - the created was performed by a host endpoint, in which case there is
74 * no backing memory yet.
75 *
76 * - the create was initiated by an old-style VMX, that uses
77 * vmci_qp_broker_set_page_store to specify the UVAs of the queue pair at
78 * a later point in time. This state can be distinguished from the one
79 * above by the context ID of the creator. A host side is not allowed to
80 * attach until the page store has been set.
81 *
82 * - VMCIQPB_CREATED_MEM: this state is the result when the queue pair
83 * is created by a VMX using the queue pair device backend that
84 * sets the UVAs of the queue pair immediately and stores the
85 * information for later attachers. At this point, it is ready for
86 * the host side to attach to it.
87 *
88 * Once the queue pair is in one of the created states (with the exception of
89 * the case mentioned for older VMX'en above), it is possible to attach to the
90 * queue pair. Again we have two new states possible:
91 *
92 * - VMCIQPB_ATTACHED_MEM: this state can be reached through the following
93 * paths:
94 *
95 * - from VMCIQPB_CREATED_NO_MEM when a new-style VMX allocates a queue
96 * pair, and attaches to a queue pair previously created by the host side.
97 *
98 * - from VMCIQPB_CREATED_MEM when the host side attaches to a queue pair
99 * already created by a guest.
100 *
101 * - from VMCIQPB_ATTACHED_NO_MEM, when an old-style VMX calls
102 * vmci_qp_broker_set_page_store (see below).
103 *
104 * - VMCIQPB_ATTACHED_NO_MEM: If the queue pair already was in the
105 * VMCIQPB_CREATED_NO_MEM due to a host side create, an old-style VMX will
106 * bring the queue pair into this state. Once vmci_qp_broker_set_page_store
107 * is called to register the user memory, the VMCIQPB_ATTACH_MEM state
108 * will be entered.
109 *
110 * From the attached queue pair, the queue pair can enter the shutdown states
111 * when either side of the queue pair detaches. If the guest side detaches
112 * first, the queue pair will enter the VMCIQPB_SHUTDOWN_NO_MEM state, where
113 * the content of the queue pair will no longer be available. If the host
114 * side detaches first, the queue pair will either enter the
115 * VMCIQPB_SHUTDOWN_MEM, if the guest memory is currently mapped, or
116 * VMCIQPB_SHUTDOWN_NO_MEM, if the guest memory is not mapped
117 * (e.g., the host detaches while a guest is stunned).
118 *
119 * New-style VMX'en will also unmap guest memory, if the guest is
120 * quiesced, e.g., during a snapshot operation. In that case, the guest
121 * memory will no longer be available, and the queue pair will transition from
122 * *_MEM state to a *_NO_MEM state. The VMX may later map the memory once more,
123 * in which case the queue pair will transition from the *_NO_MEM state at that
124 * point back to the *_MEM state. Note that the *_NO_MEM state may have changed,
125 * since the peer may have either attached or detached in the meantime. The
126 * values are laid out such that ++ on a state will move from a *_NO_MEM to a
127 * *_MEM state, and vice versa.
128 */
129
130/*
131 * VMCIMemcpy{To,From}QueueFunc() prototypes. Functions of these
132 * types are passed around to enqueue and dequeue routines. Note that
133 * often the functions passed are simply wrappers around memcpy
134 * itself.
135 *
136 * Note: In order for the memcpy typedefs to be compatible with the VMKernel,
137 * there's an unused last parameter for the hosted side. In
138 * ESX, that parameter holds a buffer type.
139 */
140typedef int vmci_memcpy_to_queue_func(struct vmci_queue *queue,
141 u64 queue_offset, const void *src,
142 size_t src_offset, size_t size);
143typedef int vmci_memcpy_from_queue_func(void *dest, size_t dest_offset,
144 const struct vmci_queue *queue,
145 u64 queue_offset, size_t size);
146
147/* The Kernel specific component of the struct vmci_queue structure. */
148struct vmci_queue_kern_if {
149 struct page **page;
150 struct page **header_page;
151 void *va;
152 struct mutex __mutex; /* Protects the queue. */
153 struct mutex *mutex; /* Shared by producer and consumer queues. */
154 bool host;
155 size_t num_pages;
156 bool mapped;
157};
158
159/*
160 * This structure is opaque to the clients.
161 */
162struct vmci_qp {
163 struct vmci_handle handle;
164 struct vmci_queue *produce_q;
165 struct vmci_queue *consume_q;
166 u64 produce_q_size;
167 u64 consume_q_size;
168 u32 peer;
169 u32 flags;
170 u32 priv_flags;
171 bool guest_endpoint;
172 unsigned int blocked;
173 unsigned int generation;
174 wait_queue_head_t event;
175};
176
177enum qp_broker_state {
178 VMCIQPB_NEW,
179 VMCIQPB_CREATED_NO_MEM,
180 VMCIQPB_CREATED_MEM,
181 VMCIQPB_ATTACHED_NO_MEM,
182 VMCIQPB_ATTACHED_MEM,
183 VMCIQPB_SHUTDOWN_NO_MEM,
184 VMCIQPB_SHUTDOWN_MEM,
185 VMCIQPB_GONE
186};
187
188#define QPBROKERSTATE_HAS_MEM(_qpb) (_qpb->state == VMCIQPB_CREATED_MEM || \
189 _qpb->state == VMCIQPB_ATTACHED_MEM || \
190 _qpb->state == VMCIQPB_SHUTDOWN_MEM)
191
192/*
193 * In the queue pair broker, we always use the guest point of view for
194 * the produce and consume queue values and references, e.g., the
195 * produce queue size stored is the guests produce queue size. The
196 * host endpoint will need to swap these around. The only exception is
197 * the local queue pairs on the host, in which case the host endpoint
198 * that creates the queue pair will have the right orientation, and
199 * the attaching host endpoint will need to swap.
200 */
201struct qp_entry {
202 struct list_head list_item;
203 struct vmci_handle handle;
204 u32 peer;
205 u32 flags;
206 u64 produce_size;
207 u64 consume_size;
208 u32 ref_count;
209};
210
211struct qp_broker_entry {
212 struct vmci_resource resource;
213 struct qp_entry qp;
214 u32 create_id;
215 u32 attach_id;
216 enum qp_broker_state state;
217 bool require_trusted_attach;
218 bool created_by_trusted;
219 bool vmci_page_files; /* Created by VMX using VMCI page files */
220 struct vmci_queue *produce_q;
221 struct vmci_queue *consume_q;
222 struct vmci_queue_header saved_produce_q;
223 struct vmci_queue_header saved_consume_q;
224 vmci_event_release_cb wakeup_cb;
225 void *client_data;
226 void *local_mem; /* Kernel memory for local queue pair */
227};
228
229struct qp_guest_endpoint {
230 struct vmci_resource resource;
231 struct qp_entry qp;
232 u64 num_ppns;
233 void *produce_q;
234 void *consume_q;
235 struct ppn_set ppn_set;
236};
237
238struct qp_list {
239 struct list_head head;
240 struct mutex mutex; /* Protect queue list. */
241};
242
243static struct qp_list qp_broker_list = {
244 .head = LIST_HEAD_INIT(qp_broker_list.head),
245 .mutex = __MUTEX_INITIALIZER(qp_broker_list.mutex),
246};
247
248static struct qp_list qp_guest_endpoints = {
249 .head = LIST_HEAD_INIT(qp_guest_endpoints.head),
250 .mutex = __MUTEX_INITIALIZER(qp_guest_endpoints.mutex),
251};
252
253#define INVALID_VMCI_GUEST_MEM_ID 0
254#define QPE_NUM_PAGES(_QPE) ((u32) \
255 (DIV_ROUND_UP(_QPE.produce_size, PAGE_SIZE) + \
256 DIV_ROUND_UP(_QPE.consume_size, PAGE_SIZE) + 2))
257
258
259/*
260 * Frees kernel VA space for a given queue and its queue header, and
261 * frees physical data pages.
262 */
263static void qp_free_queue(void *q, u64 size)
264{
265 struct vmci_queue *queue = q;
266
267 if (queue) {
268 u64 i = DIV_ROUND_UP(size, PAGE_SIZE);
269
270 if (queue->kernel_if->mapped) {
271 vunmap(queue->kernel_if->va);
272 queue->kernel_if->va = NULL;
273 }
274
275 while (i)
276 __free_page(queue->kernel_if->page[--i]);
277
278 vfree(queue->q_header);
279 }
280}
281
282/*
283 * Allocates kernel VA space of specified size, plus space for the
284 * queue structure/kernel interface and the queue header. Allocates
285 * physical pages for the queue data pages.
286 *
287 * PAGE m: struct vmci_queue_header (struct vmci_queue->q_header)
288 * PAGE m+1: struct vmci_queue
289 * PAGE m+1+q: struct vmci_queue_kern_if (struct vmci_queue->kernel_if)
290 * PAGE n-size: Data pages (struct vmci_queue->kernel_if->page[])
291 */
292static void *qp_alloc_queue(u64 size, u32 flags)
293{
294 u64 i;
295 struct vmci_queue *queue;
296 struct vmci_queue_header *q_header;
297 const u64 num_data_pages = DIV_ROUND_UP(size, PAGE_SIZE);
298 const uint queue_size =
299 PAGE_SIZE +
300 sizeof(*queue) + sizeof(*(queue->kernel_if)) +
301 num_data_pages * sizeof(*(queue->kernel_if->page));
302
303 q_header = vmalloc(queue_size);
304 if (!q_header)
305 return NULL;
306
307 queue = (void *)q_header + PAGE_SIZE;
308 queue->q_header = q_header;
309 queue->saved_header = NULL;
310 queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1);
311 queue->kernel_if->header_page = NULL; /* Unused in guest. */
312 queue->kernel_if->page = (struct page **)(queue->kernel_if + 1);
313 queue->kernel_if->host = false;
314 queue->kernel_if->va = NULL;
315 queue->kernel_if->mapped = false;
316
317 for (i = 0; i < num_data_pages; i++) {
318 queue->kernel_if->page[i] = alloc_pages(GFP_KERNEL, 0);
319 if (!queue->kernel_if->page[i])
320 goto fail;
321 }
322
323 if (vmci_qp_pinned(flags)) {
324 queue->kernel_if->va =
325 vmap(queue->kernel_if->page, num_data_pages, VM_MAP,
326 PAGE_KERNEL);
327 if (!queue->kernel_if->va)
328 goto fail;
329
330 queue->kernel_if->mapped = true;
331 }
332
333 return (void *)queue;
334
335 fail:
336 qp_free_queue(queue, i * PAGE_SIZE);
337 return NULL;
338}
339
340/*
341 * Copies from a given buffer or iovector to a VMCI Queue. Uses
342 * kmap()/kunmap() to dynamically map/unmap required portions of the queue
343 * by traversing the offset -> page translation structure for the queue.
344 * Assumes that offset + size does not wrap around in the queue.
345 */
346static int __qp_memcpy_to_queue(struct vmci_queue *queue,
347 u64 queue_offset,
348 const void *src,
349 size_t size,
350 bool is_iovec)
351{
352 struct vmci_queue_kern_if *kernel_if = queue->kernel_if;
353 size_t bytes_copied = 0;
354
355 while (bytes_copied < size) {
356 u64 page_index = (queue_offset + bytes_copied) / PAGE_SIZE;
357 size_t page_offset =
358 (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
359 void *va;
360 size_t to_copy;
361
362 if (!kernel_if->mapped)
363 va = kmap(kernel_if->page[page_index]);
364 else
365 va = (void *)((u8 *)kernel_if->va +
366 (page_index * PAGE_SIZE));
367
368 if (size - bytes_copied > PAGE_SIZE - page_offset)
369 /* Enough payload to fill up from this page. */
370 to_copy = PAGE_SIZE - page_offset;
371 else
372 to_copy = size - bytes_copied;
373
374 if (is_iovec) {
375 struct iovec *iov = (struct iovec *)src;
376 int err;
377
378 /* The iovec will track bytes_copied internally. */
379 err = memcpy_fromiovec((u8 *)va + page_offset,
380 iov, to_copy);
381 if (err != 0) {
382 kunmap(kernel_if->page[page_index]);
383 return VMCI_ERROR_INVALID_ARGS;
384 }
385 } else {
386 memcpy((u8 *)va + page_offset,
387 (u8 *)src + bytes_copied, to_copy);
388 }
389
390 bytes_copied += to_copy;
391 if (!kernel_if->mapped)
392 kunmap(kernel_if->page[page_index]);
393 }
394
395 return VMCI_SUCCESS;
396}
397
398/*
399 * Copies to a given buffer or iovector from a VMCI Queue. Uses
400 * kmap()/kunmap() to dynamically map/unmap required portions of the queue
401 * by traversing the offset -> page translation structure for the queue.
402 * Assumes that offset + size does not wrap around in the queue.
403 */
404static int __qp_memcpy_from_queue(void *dest,
405 const struct vmci_queue *queue,
406 u64 queue_offset,
407 size_t size,
408 bool is_iovec)
409{
410 struct vmci_queue_kern_if *kernel_if = queue->kernel_if;
411 size_t bytes_copied = 0;
412
413 while (bytes_copied < size) {
414 u64 page_index = (queue_offset + bytes_copied) / PAGE_SIZE;
415 size_t page_offset =
416 (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
417 void *va;
418 size_t to_copy;
419
420 if (!kernel_if->mapped)
421 va = kmap(kernel_if->page[page_index]);
422 else
423 va = (void *)((u8 *)kernel_if->va +
424 (page_index * PAGE_SIZE));
425
426 if (size - bytes_copied > PAGE_SIZE - page_offset)
427 /* Enough payload to fill up this page. */
428 to_copy = PAGE_SIZE - page_offset;
429 else
430 to_copy = size - bytes_copied;
431
432 if (is_iovec) {
433 struct iovec *iov = (struct iovec *)dest;
434 int err;
435
436 /* The iovec will track bytes_copied internally. */
437 err = memcpy_toiovec(iov, (u8 *)va + page_offset,
438 to_copy);
439 if (err != 0) {
440 kunmap(kernel_if->page[page_index]);
441 return VMCI_ERROR_INVALID_ARGS;
442 }
443 } else {
444 memcpy((u8 *)dest + bytes_copied,
445 (u8 *)va + page_offset, to_copy);
446 }
447
448 bytes_copied += to_copy;
449 if (!kernel_if->mapped)
450 kunmap(kernel_if->page[page_index]);
451 }
452
453 return VMCI_SUCCESS;
454}
455
456/*
457 * Allocates two list of PPNs --- one for the pages in the produce queue,
458 * and the other for the pages in the consume queue. Intializes the list
459 * of PPNs with the page frame numbers of the KVA for the two queues (and
460 * the queue headers).
461 */
462static int qp_alloc_ppn_set(void *prod_q,
463 u64 num_produce_pages,
464 void *cons_q,
465 u64 num_consume_pages, struct ppn_set *ppn_set)
466{
467 u32 *produce_ppns;
468 u32 *consume_ppns;
469 struct vmci_queue *produce_q = prod_q;
470 struct vmci_queue *consume_q = cons_q;
471 u64 i;
472
473 if (!produce_q || !num_produce_pages || !consume_q ||
474 !num_consume_pages || !ppn_set)
475 return VMCI_ERROR_INVALID_ARGS;
476
477 if (ppn_set->initialized)
478 return VMCI_ERROR_ALREADY_EXISTS;
479
480 produce_ppns =
481 kmalloc(num_produce_pages * sizeof(*produce_ppns), GFP_KERNEL);
482 if (!produce_ppns)
483 return VMCI_ERROR_NO_MEM;
484
485 consume_ppns =
486 kmalloc(num_consume_pages * sizeof(*consume_ppns), GFP_KERNEL);
487 if (!consume_ppns) {
488 kfree(produce_ppns);
489 return VMCI_ERROR_NO_MEM;
490 }
491
492 produce_ppns[0] = page_to_pfn(vmalloc_to_page(produce_q->q_header));
493 for (i = 1; i < num_produce_pages; i++) {
494 unsigned long pfn;
495
496 produce_ppns[i] =
497 page_to_pfn(produce_q->kernel_if->page[i - 1]);
498 pfn = produce_ppns[i];
499
500 /* Fail allocation if PFN isn't supported by hypervisor. */
501 if (sizeof(pfn) > sizeof(*produce_ppns)
502 && pfn != produce_ppns[i])
503 goto ppn_error;
504 }
505
506 consume_ppns[0] = page_to_pfn(vmalloc_to_page(consume_q->q_header));
507 for (i = 1; i < num_consume_pages; i++) {
508 unsigned long pfn;
509
510 consume_ppns[i] =
511 page_to_pfn(consume_q->kernel_if->page[i - 1]);
512 pfn = consume_ppns[i];
513
514 /* Fail allocation if PFN isn't supported by hypervisor. */
515 if (sizeof(pfn) > sizeof(*consume_ppns)
516 && pfn != consume_ppns[i])
517 goto ppn_error;
518 }
519
520 ppn_set->num_produce_pages = num_produce_pages;
521 ppn_set->num_consume_pages = num_consume_pages;
522 ppn_set->produce_ppns = produce_ppns;
523 ppn_set->consume_ppns = consume_ppns;
524 ppn_set->initialized = true;
525 return VMCI_SUCCESS;
526
527 ppn_error:
528 kfree(produce_ppns);
529 kfree(consume_ppns);
530 return VMCI_ERROR_INVALID_ARGS;
531}
532
533/*
534 * Frees the two list of PPNs for a queue pair.
535 */
536static void qp_free_ppn_set(struct ppn_set *ppn_set)
537{
538 if (ppn_set->initialized) {
539 /* Do not call these functions on NULL inputs. */
540 kfree(ppn_set->produce_ppns);
541 kfree(ppn_set->consume_ppns);
542 }
543 memset(ppn_set, 0, sizeof(*ppn_set));
544}
545
546/*
547 * Populates the list of PPNs in the hypercall structure with the PPNS
548 * of the produce queue and the consume queue.
549 */
550static int qp_populate_ppn_set(u8 *call_buf, const struct ppn_set *ppn_set)
551{
552 memcpy(call_buf, ppn_set->produce_ppns,
553 ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns));
554 memcpy(call_buf +
555 ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns),
556 ppn_set->consume_ppns,
557 ppn_set->num_consume_pages * sizeof(*ppn_set->consume_ppns));
558
559 return VMCI_SUCCESS;
560}
561
562static int qp_memcpy_to_queue(struct vmci_queue *queue,
563 u64 queue_offset,
564 const void *src, size_t src_offset, size_t size)
565{
566 return __qp_memcpy_to_queue(queue, queue_offset,
567 (u8 *)src + src_offset, size, false);
568}
569
570static int qp_memcpy_from_queue(void *dest,
571 size_t dest_offset,
572 const struct vmci_queue *queue,
573 u64 queue_offset, size_t size)
574{
575 return __qp_memcpy_from_queue((u8 *)dest + dest_offset,
576 queue, queue_offset, size, false);
577}
578
579/*
580 * Copies from a given iovec from a VMCI Queue.
581 */
582static int qp_memcpy_to_queue_iov(struct vmci_queue *queue,
583 u64 queue_offset,
584 const void *src,
585 size_t src_offset, size_t size)
586{
587
588 /*
589 * We ignore src_offset because src is really a struct iovec * and will
590 * maintain offset internally.
591 */
592 return __qp_memcpy_to_queue(queue, queue_offset, src, size, true);
593}
594
595/*
596 * Copies to a given iovec from a VMCI Queue.
597 */
598static int qp_memcpy_from_queue_iov(void *dest,
599 size_t dest_offset,
600 const struct vmci_queue *queue,
601 u64 queue_offset, size_t size)
602{
603 /*
604 * We ignore dest_offset because dest is really a struct iovec * and
605 * will maintain offset internally.
606 */
607 return __qp_memcpy_from_queue(dest, queue, queue_offset, size, true);
608}
609
610/*
611 * Allocates kernel VA space of specified size plus space for the queue
612 * and kernel interface. This is different from the guest queue allocator,
613 * because we do not allocate our own queue header/data pages here but
614 * share those of the guest.
615 */
616static struct vmci_queue *qp_host_alloc_queue(u64 size)
617{
618 struct vmci_queue *queue;
619 const size_t num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1;
620 const size_t queue_size = sizeof(*queue) + sizeof(*(queue->kernel_if));
621 const size_t queue_page_size =
622 num_pages * sizeof(*queue->kernel_if->page);
623
624 queue = kzalloc(queue_size + queue_page_size, GFP_KERNEL);
625 if (queue) {
626 queue->q_header = NULL;
627 queue->saved_header = NULL;
628 queue->kernel_if =
629 (struct vmci_queue_kern_if *)((u8 *)queue +
630 sizeof(*queue));
631 queue->kernel_if->host = true;
632 queue->kernel_if->mutex = NULL;
633 queue->kernel_if->num_pages = num_pages;
634 queue->kernel_if->header_page =
635 (struct page **)((u8 *)queue + queue_size);
636 queue->kernel_if->page = &queue->kernel_if->header_page[1];
637 queue->kernel_if->va = NULL;
638 queue->kernel_if->mapped = false;
639 }
640
641 return queue;
642}
643
644/*
645 * Frees kernel memory for a given queue (header plus translation
646 * structure).
647 */
648static void qp_host_free_queue(struct vmci_queue *queue, u64 queue_size)
649{
650 kfree(queue);
651}
652
653/*
654 * Initialize the mutex for the pair of queues. This mutex is used to
655 * protect the q_header and the buffer from changing out from under any
656 * users of either queue. Of course, it's only any good if the mutexes
657 * are actually acquired. Queue structure must lie on non-paged memory
658 * or we cannot guarantee access to the mutex.
659 */
660static void qp_init_queue_mutex(struct vmci_queue *produce_q,
661 struct vmci_queue *consume_q)
662{
663 /*
664 * Only the host queue has shared state - the guest queues do not
665 * need to synchronize access using a queue mutex.
666 */
667
668 if (produce_q->kernel_if->host) {
669 produce_q->kernel_if->mutex = &produce_q->kernel_if->__mutex;
670 consume_q->kernel_if->mutex = &produce_q->kernel_if->__mutex;
671 mutex_init(produce_q->kernel_if->mutex);
672 }
673}
674
675/*
676 * Cleans up the mutex for the pair of queues.
677 */
678static void qp_cleanup_queue_mutex(struct vmci_queue *produce_q,
679 struct vmci_queue *consume_q)
680{
681 if (produce_q->kernel_if->host) {
682 produce_q->kernel_if->mutex = NULL;
683 consume_q->kernel_if->mutex = NULL;
684 }
685}
686
687/*
688 * Acquire the mutex for the queue. Note that the produce_q and
689 * the consume_q share a mutex. So, only one of the two need to
690 * be passed in to this routine. Either will work just fine.
691 */
692static void qp_acquire_queue_mutex(struct vmci_queue *queue)
693{
694 if (queue->kernel_if->host)
695 mutex_lock(queue->kernel_if->mutex);
696}
697
698/*
699 * Release the mutex for the queue. Note that the produce_q and
700 * the consume_q share a mutex. So, only one of the two need to
701 * be passed in to this routine. Either will work just fine.
702 */
703static void qp_release_queue_mutex(struct vmci_queue *queue)
704{
705 if (queue->kernel_if->host)
706 mutex_unlock(queue->kernel_if->mutex);
707}
708
709/*
710 * Helper function to release pages in the PageStoreAttachInfo
711 * previously obtained using get_user_pages.
712 */
713static void qp_release_pages(struct page **pages,
714 u64 num_pages, bool dirty)
715{
716 int i;
717
718 for (i = 0; i < num_pages; i++) {
719 if (dirty)
720 set_page_dirty(pages[i]);
721
722 page_cache_release(pages[i]);
723 pages[i] = NULL;
724 }
725}
726
727/*
728 * Lock the user pages referenced by the {produce,consume}Buffer
729 * struct into memory and populate the {produce,consume}Pages
730 * arrays in the attach structure with them.
731 */
732static int qp_host_get_user_memory(u64 produce_uva,
733 u64 consume_uva,
734 struct vmci_queue *produce_q,
735 struct vmci_queue *consume_q)
736{
737 int retval;
738 int err = VMCI_SUCCESS;
739
740 down_write(&current->mm->mmap_sem);
741 retval = get_user_pages(current,
742 current->mm,
743 (uintptr_t) produce_uva,
744 produce_q->kernel_if->num_pages,
745 1, 0, produce_q->kernel_if->header_page, NULL);
746 if (retval < produce_q->kernel_if->num_pages) {
747 pr_warn("get_user_pages(produce) failed (retval=%d)", retval);
748 qp_release_pages(produce_q->kernel_if->header_page, retval,
749 false);
750 err = VMCI_ERROR_NO_MEM;
751 goto out;
752 }
753
754 retval = get_user_pages(current,
755 current->mm,
756 (uintptr_t) consume_uva,
757 consume_q->kernel_if->num_pages,
758 1, 0, consume_q->kernel_if->header_page, NULL);
759 if (retval < consume_q->kernel_if->num_pages) {
760 pr_warn("get_user_pages(consume) failed (retval=%d)", retval);
761 qp_release_pages(consume_q->kernel_if->header_page, retval,
762 false);
763 qp_release_pages(produce_q->kernel_if->header_page,
764 produce_q->kernel_if->num_pages, false);
765 err = VMCI_ERROR_NO_MEM;
766 }
767
768 out:
769 up_write(&current->mm->mmap_sem);
770
771 return err;
772}
773
774/*
775 * Registers the specification of the user pages used for backing a queue
776 * pair. Enough information to map in pages is stored in the OS specific
777 * part of the struct vmci_queue structure.
778 */
779static int qp_host_register_user_memory(struct vmci_qp_page_store *page_store,
780 struct vmci_queue *produce_q,
781 struct vmci_queue *consume_q)
782{
783 u64 produce_uva;
784 u64 consume_uva;
785
786 /*
787 * The new style and the old style mapping only differs in
788 * that we either get a single or two UVAs, so we split the
789 * single UVA range at the appropriate spot.
790 */
791 produce_uva = page_store->pages;
792 consume_uva = page_store->pages +
793 produce_q->kernel_if->num_pages * PAGE_SIZE;
794 return qp_host_get_user_memory(produce_uva, consume_uva, produce_q,
795 consume_q);
796}
797
798/*
799 * Releases and removes the references to user pages stored in the attach
800 * struct. Pages are released from the page cache and may become
801 * swappable again.
802 */
803static void qp_host_unregister_user_memory(struct vmci_queue *produce_q,
804 struct vmci_queue *consume_q)
805{
806 qp_release_pages(produce_q->kernel_if->header_page,
807 produce_q->kernel_if->num_pages, true);
808 memset(produce_q->kernel_if->header_page, 0,
809 sizeof(*produce_q->kernel_if->header_page) *
810 produce_q->kernel_if->num_pages);
811 qp_release_pages(consume_q->kernel_if->header_page,
812 consume_q->kernel_if->num_pages, true);
813 memset(consume_q->kernel_if->header_page, 0,
814 sizeof(*consume_q->kernel_if->header_page) *
815 consume_q->kernel_if->num_pages);
816}
817
818/*
819 * Once qp_host_register_user_memory has been performed on a
820 * queue, the queue pair headers can be mapped into the
821 * kernel. Once mapped, they must be unmapped with
822 * qp_host_unmap_queues prior to calling
823 * qp_host_unregister_user_memory.
824 * Pages are pinned.
825 */
826static int qp_host_map_queues(struct vmci_queue *produce_q,
827 struct vmci_queue *consume_q)
828{
829 int result;
830
831 if (!produce_q->q_header || !consume_q->q_header) {
832 struct page *headers[2];
833
834 if (produce_q->q_header != consume_q->q_header)
835 return VMCI_ERROR_QUEUEPAIR_MISMATCH;
836
837 if (produce_q->kernel_if->header_page == NULL ||
838 *produce_q->kernel_if->header_page == NULL)
839 return VMCI_ERROR_UNAVAILABLE;
840
841 headers[0] = *produce_q->kernel_if->header_page;
842 headers[1] = *consume_q->kernel_if->header_page;
843
844 produce_q->q_header = vmap(headers, 2, VM_MAP, PAGE_KERNEL);
845 if (produce_q->q_header != NULL) {
846 consume_q->q_header =
847 (struct vmci_queue_header *)((u8 *)
848 produce_q->q_header +
849 PAGE_SIZE);
850 result = VMCI_SUCCESS;
851 } else {
852 pr_warn("vmap failed\n");
853 result = VMCI_ERROR_NO_MEM;
854 }
855 } else {
856 result = VMCI_SUCCESS;
857 }
858
859 return result;
860}
861
862/*
863 * Unmaps previously mapped queue pair headers from the kernel.
864 * Pages are unpinned.
865 */
866static int qp_host_unmap_queues(u32 gid,
867 struct vmci_queue *produce_q,
868 struct vmci_queue *consume_q)
869{
870 if (produce_q->q_header) {
871 if (produce_q->q_header < consume_q->q_header)
872 vunmap(produce_q->q_header);
873 else
874 vunmap(consume_q->q_header);
875
876 produce_q->q_header = NULL;
877 consume_q->q_header = NULL;
878 }
879
880 return VMCI_SUCCESS;
881}
882
883/*
884 * Finds the entry in the list corresponding to a given handle. Assumes
885 * that the list is locked.
886 */
887static struct qp_entry *qp_list_find(struct qp_list *qp_list,
888 struct vmci_handle handle)
889{
890 struct qp_entry *entry;
891
892 if (vmci_handle_is_invalid(handle))
893 return NULL;
894
895 list_for_each_entry(entry, &qp_list->head, list_item) {
896 if (vmci_handle_is_equal(entry->handle, handle))
897 return entry;
898 }
899
900 return NULL;
901}
902
903/*
904 * Finds the entry in the list corresponding to a given handle.
905 */
906static struct qp_guest_endpoint *
907qp_guest_handle_to_entry(struct vmci_handle handle)
908{
909 struct qp_guest_endpoint *entry;
910 struct qp_entry *qp = qp_list_find(&qp_guest_endpoints, handle);
911
912 entry = qp ? container_of(
913 qp, struct qp_guest_endpoint, qp) : NULL;
914 return entry;
915}
916
917/*
918 * Finds the entry in the list corresponding to a given handle.
919 */
920static struct qp_broker_entry *
921qp_broker_handle_to_entry(struct vmci_handle handle)
922{
923 struct qp_broker_entry *entry;
924 struct qp_entry *qp = qp_list_find(&qp_broker_list, handle);
925
926 entry = qp ? container_of(
927 qp, struct qp_broker_entry, qp) : NULL;
928 return entry;
929}
930
931/*
932 * Dispatches a queue pair event message directly into the local event
933 * queue.
934 */
935static int qp_notify_peer_local(bool attach, struct vmci_handle handle)
936{
937 u32 context_id = vmci_get_context_id();
938 struct vmci_event_qp ev;
939
940 ev.msg.hdr.dst = vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
941 ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
942 VMCI_CONTEXT_RESOURCE_ID);
943 ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
944 ev.msg.event_data.event =
945 attach ? VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH;
946 ev.payload.peer_id = context_id;
947 ev.payload.handle = handle;
948
949 return vmci_event_dispatch(&ev.msg.hdr);
950}
951
952/*
953 * Allocates and initializes a qp_guest_endpoint structure.
954 * Allocates a queue_pair rid (and handle) iff the given entry has
955 * an invalid handle. 0 through VMCI_RESERVED_RESOURCE_ID_MAX
956 * are reserved handles. Assumes that the QP list mutex is held
957 * by the caller.
958 */
959static struct qp_guest_endpoint *
960qp_guest_endpoint_create(struct vmci_handle handle,
961 u32 peer,
962 u32 flags,
963 u64 produce_size,
964 u64 consume_size,
965 void *produce_q,
966 void *consume_q)
967{
968 int result;
969 struct qp_guest_endpoint *entry;
970 /* One page each for the queue headers. */
971 const u64 num_ppns = DIV_ROUND_UP(produce_size, PAGE_SIZE) +
972 DIV_ROUND_UP(consume_size, PAGE_SIZE) + 2;
973
974 if (vmci_handle_is_invalid(handle)) {
975 u32 context_id = vmci_get_context_id();
976
977 handle = vmci_make_handle(context_id, VMCI_INVALID_ID);
978 }
979
980 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
981 if (entry) {
982 entry->qp.peer = peer;
983 entry->qp.flags = flags;
984 entry->qp.produce_size = produce_size;
985 entry->qp.consume_size = consume_size;
986 entry->qp.ref_count = 0;
987 entry->num_ppns = num_ppns;
988 entry->produce_q = produce_q;
989 entry->consume_q = consume_q;
990 INIT_LIST_HEAD(&entry->qp.list_item);
991
992 /* Add resource obj */
993 result = vmci_resource_add(&entry->resource,
994 VMCI_RESOURCE_TYPE_QPAIR_GUEST,
995 handle);
996 entry->qp.handle = vmci_resource_handle(&entry->resource);
997 if ((result != VMCI_SUCCESS) ||
998 qp_list_find(&qp_guest_endpoints, entry->qp.handle)) {
999 pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d",
1000 handle.context, handle.resource, result);
1001 kfree(entry);
1002 entry = NULL;
1003 }
1004 }
1005 return entry;
1006}
1007
1008/*
1009 * Frees a qp_guest_endpoint structure.
1010 */
1011static void qp_guest_endpoint_destroy(struct qp_guest_endpoint *entry)
1012{
1013 qp_free_ppn_set(&entry->ppn_set);
1014 qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q);
1015 qp_free_queue(entry->produce_q, entry->qp.produce_size);
1016 qp_free_queue(entry->consume_q, entry->qp.consume_size);
1017 /* Unlink from resource hash table and free callback */
1018 vmci_resource_remove(&entry->resource);
1019
1020 kfree(entry);
1021}
1022
1023/*
1024 * Helper to make a queue_pairAlloc hypercall when the driver is
1025 * supporting a guest device.
1026 */
1027static int qp_alloc_hypercall(const struct qp_guest_endpoint *entry)
1028{
1029 struct vmci_qp_alloc_msg *alloc_msg;
1030 size_t msg_size;
1031 int result;
1032
1033 if (!entry || entry->num_ppns <= 2)
1034 return VMCI_ERROR_INVALID_ARGS;
1035
1036 msg_size = sizeof(*alloc_msg) +
1037 (size_t) entry->num_ppns * sizeof(u32);
1038 alloc_msg = kmalloc(msg_size, GFP_KERNEL);
1039 if (!alloc_msg)
1040 return VMCI_ERROR_NO_MEM;
1041
1042 alloc_msg->hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
1043 VMCI_QUEUEPAIR_ALLOC);
1044 alloc_msg->hdr.src = VMCI_ANON_SRC_HANDLE;
1045 alloc_msg->hdr.payload_size = msg_size - VMCI_DG_HEADERSIZE;
1046 alloc_msg->handle = entry->qp.handle;
1047 alloc_msg->peer = entry->qp.peer;
1048 alloc_msg->flags = entry->qp.flags;
1049 alloc_msg->produce_size = entry->qp.produce_size;
1050 alloc_msg->consume_size = entry->qp.consume_size;
1051 alloc_msg->num_ppns = entry->num_ppns;
1052
1053 result = qp_populate_ppn_set((u8 *)alloc_msg + sizeof(*alloc_msg),
1054 &entry->ppn_set);
1055 if (result == VMCI_SUCCESS)
1056 result = vmci_send_datagram(&alloc_msg->hdr);
1057
1058 kfree(alloc_msg);
1059
1060 return result;
1061}
1062
1063/*
1064 * Helper to make a queue_pairDetach hypercall when the driver is
1065 * supporting a guest device.
1066 */
1067static int qp_detatch_hypercall(struct vmci_handle handle)
1068{
1069 struct vmci_qp_detach_msg detach_msg;
1070
1071 detach_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
1072 VMCI_QUEUEPAIR_DETACH);
1073 detach_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
1074 detach_msg.hdr.payload_size = sizeof(handle);
1075 detach_msg.handle = handle;
1076
1077 return vmci_send_datagram(&detach_msg.hdr);
1078}
1079
1080/*
1081 * Adds the given entry to the list. Assumes that the list is locked.
1082 */
1083static void qp_list_add_entry(struct qp_list *qp_list, struct qp_entry *entry)
1084{
1085 if (entry)
1086 list_add(&entry->list_item, &qp_list->head);
1087}
1088
1089/*
1090 * Removes the given entry from the list. Assumes that the list is locked.
1091 */
1092static void qp_list_remove_entry(struct qp_list *qp_list,
1093 struct qp_entry *entry)
1094{
1095 if (entry)
1096 list_del(&entry->list_item);
1097}
1098
1099/*
1100 * Helper for VMCI queue_pair detach interface. Frees the physical
1101 * pages for the queue pair.
1102 */
1103static int qp_detatch_guest_work(struct vmci_handle handle)
1104{
1105 int result;
1106 struct qp_guest_endpoint *entry;
1107 u32 ref_count = ~0; /* To avoid compiler warning below */
1108
1109 mutex_lock(&qp_guest_endpoints.mutex);
1110
1111 entry = qp_guest_handle_to_entry(handle);
1112 if (!entry) {
1113 mutex_unlock(&qp_guest_endpoints.mutex);
1114 return VMCI_ERROR_NOT_FOUND;
1115 }
1116
1117 if (entry->qp.flags & VMCI_QPFLAG_LOCAL) {
1118 result = VMCI_SUCCESS;
1119
1120 if (entry->qp.ref_count > 1) {
1121 result = qp_notify_peer_local(false, handle);
1122 /*
1123 * We can fail to notify a local queuepair
1124 * because we can't allocate. We still want
1125 * to release the entry if that happens, so
1126 * don't bail out yet.
1127 */
1128 }
1129 } else {
1130 result = qp_detatch_hypercall(handle);
1131 if (result < VMCI_SUCCESS) {
1132 /*
1133 * We failed to notify a non-local queuepair.
1134 * That other queuepair might still be
1135 * accessing the shared memory, so don't
1136 * release the entry yet. It will get cleaned
1137 * up by VMCIqueue_pair_Exit() if necessary
1138 * (assuming we are going away, otherwise why
1139 * did this fail?).
1140 */
1141
1142 mutex_unlock(&qp_guest_endpoints.mutex);
1143 return result;
1144 }
1145 }
1146
1147 /*
1148 * If we get here then we either failed to notify a local queuepair, or
1149 * we succeeded in all cases. Release the entry if required.
1150 */
1151
1152 entry->qp.ref_count--;
1153 if (entry->qp.ref_count == 0)
1154 qp_list_remove_entry(&qp_guest_endpoints, &entry->qp);
1155
1156 /* If we didn't remove the entry, this could change once we unlock. */
1157 if (entry)
1158 ref_count = entry->qp.ref_count;
1159
1160 mutex_unlock(&qp_guest_endpoints.mutex);
1161
1162 if (ref_count == 0)
1163 qp_guest_endpoint_destroy(entry);
1164
1165 return result;
1166}
1167
1168/*
1169 * This functions handles the actual allocation of a VMCI queue
1170 * pair guest endpoint. Allocates physical pages for the queue
1171 * pair. It makes OS dependent calls through generic wrappers.
1172 */
1173static int qp_alloc_guest_work(struct vmci_handle *handle,
1174 struct vmci_queue **produce_q,
1175 u64 produce_size,
1176 struct vmci_queue **consume_q,
1177 u64 consume_size,
1178 u32 peer,
1179 u32 flags,
1180 u32 priv_flags)
1181{
1182 const u64 num_produce_pages =
1183 DIV_ROUND_UP(produce_size, PAGE_SIZE) + 1;
1184 const u64 num_consume_pages =
1185 DIV_ROUND_UP(consume_size, PAGE_SIZE) + 1;
1186 void *my_produce_q = NULL;
1187 void *my_consume_q = NULL;
1188 int result;
1189 struct qp_guest_endpoint *queue_pair_entry = NULL;
1190
1191 if (priv_flags != VMCI_NO_PRIVILEGE_FLAGS)
1192 return VMCI_ERROR_NO_ACCESS;
1193
1194 mutex_lock(&qp_guest_endpoints.mutex);
1195
1196 queue_pair_entry = qp_guest_handle_to_entry(*handle);
1197 if (queue_pair_entry) {
1198 if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) {
1199 /* Local attach case. */
1200 if (queue_pair_entry->qp.ref_count > 1) {
1201 pr_devel("Error attempting to attach more than once\n");
1202 result = VMCI_ERROR_UNAVAILABLE;
1203 goto error_keep_entry;
1204 }
1205
1206 if (queue_pair_entry->qp.produce_size != consume_size ||
1207 queue_pair_entry->qp.consume_size !=
1208 produce_size ||
1209 queue_pair_entry->qp.flags !=
1210 (flags & ~VMCI_QPFLAG_ATTACH_ONLY)) {
1211 pr_devel("Error mismatched queue pair in local attach\n");
1212 result = VMCI_ERROR_QUEUEPAIR_MISMATCH;
1213 goto error_keep_entry;
1214 }
1215
1216 /*
1217 * Do a local attach. We swap the consume and
1218 * produce queues for the attacher and deliver
1219 * an attach event.
1220 */
1221 result = qp_notify_peer_local(true, *handle);
1222 if (result < VMCI_SUCCESS)
1223 goto error_keep_entry;
1224
1225 my_produce_q = queue_pair_entry->consume_q;
1226 my_consume_q = queue_pair_entry->produce_q;
1227 goto out;
1228 }
1229
1230 result = VMCI_ERROR_ALREADY_EXISTS;
1231 goto error_keep_entry;
1232 }
1233
1234 my_produce_q = qp_alloc_queue(produce_size, flags);
1235 if (!my_produce_q) {
1236 pr_warn("Error allocating pages for produce queue\n");
1237 result = VMCI_ERROR_NO_MEM;
1238 goto error;
1239 }
1240
1241 my_consume_q = qp_alloc_queue(consume_size, flags);
1242 if (!my_consume_q) {
1243 pr_warn("Error allocating pages for consume queue\n");
1244 result = VMCI_ERROR_NO_MEM;
1245 goto error;
1246 }
1247
1248 queue_pair_entry = qp_guest_endpoint_create(*handle, peer, flags,
1249 produce_size, consume_size,
1250 my_produce_q, my_consume_q);
1251 if (!queue_pair_entry) {
1252 pr_warn("Error allocating memory in %s\n", __func__);
1253 result = VMCI_ERROR_NO_MEM;
1254 goto error;
1255 }
1256
1257 result = qp_alloc_ppn_set(my_produce_q, num_produce_pages, my_consume_q,
1258 num_consume_pages,
1259 &queue_pair_entry->ppn_set);
1260 if (result < VMCI_SUCCESS) {
1261 pr_warn("qp_alloc_ppn_set failed\n");
1262 goto error;
1263 }
1264
1265 /*
1266 * It's only necessary to notify the host if this queue pair will be
1267 * attached to from another context.
1268 */
1269 if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) {
1270 /* Local create case. */
1271 u32 context_id = vmci_get_context_id();
1272
1273 /*
1274 * Enforce similar checks on local queue pairs as we
1275 * do for regular ones. The handle's context must
1276 * match the creator or attacher context id (here they
1277 * are both the current context id) and the
1278 * attach-only flag cannot exist during create. We
1279 * also ensure specified peer is this context or an
1280 * invalid one.
1281 */
1282 if (queue_pair_entry->qp.handle.context != context_id ||
1283 (queue_pair_entry->qp.peer != VMCI_INVALID_ID &&
1284 queue_pair_entry->qp.peer != context_id)) {
1285 result = VMCI_ERROR_NO_ACCESS;
1286 goto error;
1287 }
1288
1289 if (queue_pair_entry->qp.flags & VMCI_QPFLAG_ATTACH_ONLY) {
1290 result = VMCI_ERROR_NOT_FOUND;
1291 goto error;
1292 }
1293 } else {
1294 result = qp_alloc_hypercall(queue_pair_entry);
1295 if (result < VMCI_SUCCESS) {
1296 pr_warn("qp_alloc_hypercall result = %d\n", result);
1297 goto error;
1298 }
1299 }
1300
1301 qp_init_queue_mutex((struct vmci_queue *)my_produce_q,
1302 (struct vmci_queue *)my_consume_q);
1303
1304 qp_list_add_entry(&qp_guest_endpoints, &queue_pair_entry->qp);
1305
1306 out:
1307 queue_pair_entry->qp.ref_count++;
1308 *handle = queue_pair_entry->qp.handle;
1309 *produce_q = (struct vmci_queue *)my_produce_q;
1310 *consume_q = (struct vmci_queue *)my_consume_q;
1311
1312 /*
1313 * We should initialize the queue pair header pages on a local
1314 * queue pair create. For non-local queue pairs, the
1315 * hypervisor initializes the header pages in the create step.
1316 */
1317 if ((queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) &&
1318 queue_pair_entry->qp.ref_count == 1) {
1319 vmci_q_header_init((*produce_q)->q_header, *handle);
1320 vmci_q_header_init((*consume_q)->q_header, *handle);
1321 }
1322
1323 mutex_unlock(&qp_guest_endpoints.mutex);
1324
1325 return VMCI_SUCCESS;
1326
1327 error:
1328 mutex_unlock(&qp_guest_endpoints.mutex);
1329 if (queue_pair_entry) {
1330 /* The queues will be freed inside the destroy routine. */
1331 qp_guest_endpoint_destroy(queue_pair_entry);
1332 } else {
1333 qp_free_queue(my_produce_q, produce_size);
1334 qp_free_queue(my_consume_q, consume_size);
1335 }
1336 return result;
1337
1338 error_keep_entry:
1339 /* This path should only be used when an existing entry was found. */
1340 mutex_unlock(&qp_guest_endpoints.mutex);
1341 return result;
1342}
1343
1344/*
1345 * The first endpoint issuing a queue pair allocation will create the state
1346 * of the queue pair in the queue pair broker.
1347 *
1348 * If the creator is a guest, it will associate a VMX virtual address range
1349 * with the queue pair as specified by the page_store. For compatibility with
1350 * older VMX'en, that would use a separate step to set the VMX virtual
1351 * address range, the virtual address range can be registered later using
1352 * vmci_qp_broker_set_page_store. In that case, a page_store of NULL should be
1353 * used.
1354 *
1355 * If the creator is the host, a page_store of NULL should be used as well,
1356 * since the host is not able to supply a page store for the queue pair.
1357 *
1358 * For older VMX and host callers, the queue pair will be created in the
1359 * VMCIQPB_CREATED_NO_MEM state, and for current VMX callers, it will be
1360 * created in VMCOQPB_CREATED_MEM state.
1361 */
1362static int qp_broker_create(struct vmci_handle handle,
1363 u32 peer,
1364 u32 flags,
1365 u32 priv_flags,
1366 u64 produce_size,
1367 u64 consume_size,
1368 struct vmci_qp_page_store *page_store,
1369 struct vmci_ctx *context,
1370 vmci_event_release_cb wakeup_cb,
1371 void *client_data, struct qp_broker_entry **ent)
1372{
1373 struct qp_broker_entry *entry = NULL;
1374 const u32 context_id = vmci_ctx_get_id(context);
1375 bool is_local = flags & VMCI_QPFLAG_LOCAL;
1376 int result;
1377 u64 guest_produce_size;
1378 u64 guest_consume_size;
1379
1380 /* Do not create if the caller asked not to. */
1381 if (flags & VMCI_QPFLAG_ATTACH_ONLY)
1382 return VMCI_ERROR_NOT_FOUND;
1383
1384 /*
1385 * Creator's context ID should match handle's context ID or the creator
1386 * must allow the context in handle's context ID as the "peer".
1387 */
1388 if (handle.context != context_id && handle.context != peer)
1389 return VMCI_ERROR_NO_ACCESS;
1390
1391 if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(peer))
1392 return VMCI_ERROR_DST_UNREACHABLE;
1393
1394 /*
1395 * Creator's context ID for local queue pairs should match the
1396 * peer, if a peer is specified.
1397 */
1398 if (is_local && peer != VMCI_INVALID_ID && context_id != peer)
1399 return VMCI_ERROR_NO_ACCESS;
1400
1401 entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
1402 if (!entry)
1403 return VMCI_ERROR_NO_MEM;
1404
1405 if (vmci_ctx_get_id(context) == VMCI_HOST_CONTEXT_ID && !is_local) {
1406 /*
1407 * The queue pair broker entry stores values from the guest
1408 * point of view, so a creating host side endpoint should swap
1409 * produce and consume values -- unless it is a local queue
1410 * pair, in which case no swapping is necessary, since the local
1411 * attacher will swap queues.
1412 */
1413
1414 guest_produce_size = consume_size;
1415 guest_consume_size = produce_size;
1416 } else {
1417 guest_produce_size = produce_size;
1418 guest_consume_size = consume_size;
1419 }
1420
1421 entry->qp.handle = handle;
1422 entry->qp.peer = peer;
1423 entry->qp.flags = flags;
1424 entry->qp.produce_size = guest_produce_size;
1425 entry->qp.consume_size = guest_consume_size;
1426 entry->qp.ref_count = 1;
1427 entry->create_id = context_id;
1428 entry->attach_id = VMCI_INVALID_ID;
1429 entry->state = VMCIQPB_NEW;
1430 entry->require_trusted_attach =
1431 !!(context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED);
1432 entry->created_by_trusted =
1433 !!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED);
1434 entry->vmci_page_files = false;
1435 entry->wakeup_cb = wakeup_cb;
1436 entry->client_data = client_data;
1437 entry->produce_q = qp_host_alloc_queue(guest_produce_size);
1438 if (entry->produce_q == NULL) {
1439 result = VMCI_ERROR_NO_MEM;
1440 goto error;
1441 }
1442 entry->consume_q = qp_host_alloc_queue(guest_consume_size);
1443 if (entry->consume_q == NULL) {
1444 result = VMCI_ERROR_NO_MEM;
1445 goto error;
1446 }
1447
1448 qp_init_queue_mutex(entry->produce_q, entry->consume_q);
1449
1450 INIT_LIST_HEAD(&entry->qp.list_item);
1451
1452 if (is_local) {
1453 u8 *tmp;
1454
1455 entry->local_mem = kcalloc(QPE_NUM_PAGES(entry->qp),
1456 PAGE_SIZE, GFP_KERNEL);
1457 if (entry->local_mem == NULL) {
1458 result = VMCI_ERROR_NO_MEM;
1459 goto error;
1460 }
1461 entry->state = VMCIQPB_CREATED_MEM;
1462 entry->produce_q->q_header = entry->local_mem;
1463 tmp = (u8 *)entry->local_mem + PAGE_SIZE *
1464 (DIV_ROUND_UP(entry->qp.produce_size, PAGE_SIZE) + 1);
1465 entry->consume_q->q_header = (struct vmci_queue_header *)tmp;
1466 } else if (page_store) {
1467 /*
1468 * The VMX already initialized the queue pair headers, so no
1469 * need for the kernel side to do that.
1470 */
1471 result = qp_host_register_user_memory(page_store,
1472 entry->produce_q,
1473 entry->consume_q);
1474 if (result < VMCI_SUCCESS)
1475 goto error;
1476
1477 entry->state = VMCIQPB_CREATED_MEM;
1478 } else {
1479 /*
1480 * A create without a page_store may be either a host
1481 * side create (in which case we are waiting for the
1482 * guest side to supply the memory) or an old style
1483 * queue pair create (in which case we will expect a
1484 * set page store call as the next step).
1485 */
1486 entry->state = VMCIQPB_CREATED_NO_MEM;
1487 }
1488
1489 qp_list_add_entry(&qp_broker_list, &entry->qp);
1490 if (ent != NULL)
1491 *ent = entry;
1492
1493 /* Add to resource obj */
1494 result = vmci_resource_add(&entry->resource,
1495 VMCI_RESOURCE_TYPE_QPAIR_HOST,
1496 handle);
1497 if (result != VMCI_SUCCESS) {
1498 pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d",
1499 handle.context, handle.resource, result);
1500 goto error;
1501 }
1502
1503 entry->qp.handle = vmci_resource_handle(&entry->resource);
1504 if (is_local) {
1505 vmci_q_header_init(entry->produce_q->q_header,
1506 entry->qp.handle);
1507 vmci_q_header_init(entry->consume_q->q_header,
1508 entry->qp.handle);
1509 }
1510
1511 vmci_ctx_qp_create(context, entry->qp.handle);
1512
1513 return VMCI_SUCCESS;
1514
1515 error:
1516 if (entry != NULL) {
1517 qp_host_free_queue(entry->produce_q, guest_produce_size);
1518 qp_host_free_queue(entry->consume_q, guest_consume_size);
1519 kfree(entry);
1520 }
1521
1522 return result;
1523}
1524
1525/*
1526 * Enqueues an event datagram to notify the peer VM attached to
1527 * the given queue pair handle about attach/detach event by the
1528 * given VM. Returns Payload size of datagram enqueued on
1529 * success, error code otherwise.
1530 */
1531static int qp_notify_peer(bool attach,
1532 struct vmci_handle handle,
1533 u32 my_id,
1534 u32 peer_id)
1535{
1536 int rv;
1537 struct vmci_event_qp ev;
1538
1539 if (vmci_handle_is_invalid(handle) || my_id == VMCI_INVALID_ID ||
1540 peer_id == VMCI_INVALID_ID)
1541 return VMCI_ERROR_INVALID_ARGS;
1542
1543 /*
1544 * In vmci_ctx_enqueue_datagram() we enforce the upper limit on
1545 * number of pending events from the hypervisor to a given VM
1546 * otherwise a rogue VM could do an arbitrary number of attach
1547 * and detach operations causing memory pressure in the host
1548 * kernel.
1549 */
1550
1551 ev.msg.hdr.dst = vmci_make_handle(peer_id, VMCI_EVENT_HANDLER);
1552 ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
1553 VMCI_CONTEXT_RESOURCE_ID);
1554 ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
1555 ev.msg.event_data.event = attach ?
1556 VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH;
1557 ev.payload.handle = handle;
1558 ev.payload.peer_id = my_id;
1559
1560 rv = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID,
1561 &ev.msg.hdr, false);
1562 if (rv < VMCI_SUCCESS)
1563 pr_warn("Failed to enqueue queue_pair %s event datagram for context (ID=0x%x)\n",
1564 attach ? "ATTACH" : "DETACH", peer_id);
1565
1566 return rv;
1567}
1568
1569/*
1570 * The second endpoint issuing a queue pair allocation will attach to
1571 * the queue pair registered with the queue pair broker.
1572 *
1573 * If the attacher is a guest, it will associate a VMX virtual address
1574 * range with the queue pair as specified by the page_store. At this
1575 * point, the already attach host endpoint may start using the queue
1576 * pair, and an attach event is sent to it. For compatibility with
1577 * older VMX'en, that used a separate step to set the VMX virtual
1578 * address range, the virtual address range can be registered later
1579 * using vmci_qp_broker_set_page_store. In that case, a page_store of
1580 * NULL should be used, and the attach event will be generated once
1581 * the actual page store has been set.
1582 *
1583 * If the attacher is the host, a page_store of NULL should be used as
1584 * well, since the page store information is already set by the guest.
1585 *
1586 * For new VMX and host callers, the queue pair will be moved to the
1587 * VMCIQPB_ATTACHED_MEM state, and for older VMX callers, it will be
1588 * moved to the VMCOQPB_ATTACHED_NO_MEM state.
1589 */
1590static int qp_broker_attach(struct qp_broker_entry *entry,
1591 u32 peer,
1592 u32 flags,
1593 u32 priv_flags,
1594 u64 produce_size,
1595 u64 consume_size,
1596 struct vmci_qp_page_store *page_store,
1597 struct vmci_ctx *context,
1598 vmci_event_release_cb wakeup_cb,
1599 void *client_data,
1600 struct qp_broker_entry **ent)
1601{
1602 const u32 context_id = vmci_ctx_get_id(context);
1603 bool is_local = flags & VMCI_QPFLAG_LOCAL;
1604 int result;
1605
1606 if (entry->state != VMCIQPB_CREATED_NO_MEM &&
1607 entry->state != VMCIQPB_CREATED_MEM)
1608 return VMCI_ERROR_UNAVAILABLE;
1609
1610 if (is_local) {
1611 if (!(entry->qp.flags & VMCI_QPFLAG_LOCAL) ||
1612 context_id != entry->create_id) {
1613 return VMCI_ERROR_INVALID_ARGS;
1614 }
1615 } else if (context_id == entry->create_id ||
1616 context_id == entry->attach_id) {
1617 return VMCI_ERROR_ALREADY_EXISTS;
1618 }
1619
1620 if (VMCI_CONTEXT_IS_VM(context_id) &&
1621 VMCI_CONTEXT_IS_VM(entry->create_id))
1622 return VMCI_ERROR_DST_UNREACHABLE;
1623
1624 /*
1625 * If we are attaching from a restricted context then the queuepair
1626 * must have been created by a trusted endpoint.
1627 */
1628 if ((context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) &&
1629 !entry->created_by_trusted)
1630 return VMCI_ERROR_NO_ACCESS;
1631
1632 /*
1633 * If we are attaching to a queuepair that was created by a restricted
1634 * context then we must be trusted.
1635 */
1636 if (entry->require_trusted_attach &&
1637 (!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED)))
1638 return VMCI_ERROR_NO_ACCESS;
1639
1640 /*
1641 * If the creator specifies VMCI_INVALID_ID in "peer" field, access
1642 * control check is not performed.
1643 */
1644 if (entry->qp.peer != VMCI_INVALID_ID && entry->qp.peer != context_id)
1645 return VMCI_ERROR_NO_ACCESS;
1646
1647 if (entry->create_id == VMCI_HOST_CONTEXT_ID) {
1648 /*
1649 * Do not attach if the caller doesn't support Host Queue Pairs
1650 * and a host created this queue pair.
1651 */
1652
1653 if (!vmci_ctx_supports_host_qp(context))
1654 return VMCI_ERROR_INVALID_RESOURCE;
1655
1656 } else if (context_id == VMCI_HOST_CONTEXT_ID) {
1657 struct vmci_ctx *create_context;
1658 bool supports_host_qp;
1659
1660 /*
1661 * Do not attach a host to a user created queue pair if that
1662 * user doesn't support host queue pair end points.
1663 */
1664
1665 create_context = vmci_ctx_get(entry->create_id);
1666 supports_host_qp = vmci_ctx_supports_host_qp(create_context);
1667 vmci_ctx_put(create_context);
1668
1669 if (!supports_host_qp)
1670 return VMCI_ERROR_INVALID_RESOURCE;
1671 }
1672
1673 if ((entry->qp.flags & ~VMCI_QP_ASYMM) != (flags & ~VMCI_QP_ASYMM_PEER))
1674 return VMCI_ERROR_QUEUEPAIR_MISMATCH;
1675
1676 if (context_id != VMCI_HOST_CONTEXT_ID) {
1677 /*
1678 * The queue pair broker entry stores values from the guest
1679 * point of view, so an attaching guest should match the values
1680 * stored in the entry.
1681 */
1682
1683 if (entry->qp.produce_size != produce_size ||
1684 entry->qp.consume_size != consume_size) {
1685 return VMCI_ERROR_QUEUEPAIR_MISMATCH;
1686 }
1687 } else if (entry->qp.produce_size != consume_size ||
1688 entry->qp.consume_size != produce_size) {
1689 return VMCI_ERROR_QUEUEPAIR_MISMATCH;
1690 }
1691
1692 if (context_id != VMCI_HOST_CONTEXT_ID) {
1693 /*
1694 * If a guest attached to a queue pair, it will supply
1695 * the backing memory. If this is a pre NOVMVM vmx,
1696 * the backing memory will be supplied by calling
1697 * vmci_qp_broker_set_page_store() following the
1698 * return of the vmci_qp_broker_alloc() call. If it is
1699 * a vmx of version NOVMVM or later, the page store
1700 * must be supplied as part of the
1701 * vmci_qp_broker_alloc call. Under all circumstances
1702 * must the initially created queue pair not have any
1703 * memory associated with it already.
1704 */
1705
1706 if (entry->state != VMCIQPB_CREATED_NO_MEM)
1707 return VMCI_ERROR_INVALID_ARGS;
1708
1709 if (page_store != NULL) {
1710 /*
1711 * Patch up host state to point to guest
1712 * supplied memory. The VMX already
1713 * initialized the queue pair headers, so no
1714 * need for the kernel side to do that.
1715 */
1716
1717 result = qp_host_register_user_memory(page_store,
1718 entry->produce_q,
1719 entry->consume_q);
1720 if (result < VMCI_SUCCESS)
1721 return result;
1722
1723 /*
1724 * Preemptively load in the headers if non-blocking to
1725 * prevent blocking later.
1726 */
1727 if (entry->qp.flags & VMCI_QPFLAG_NONBLOCK) {
1728 result = qp_host_map_queues(entry->produce_q,
1729 entry->consume_q);
1730 if (result < VMCI_SUCCESS) {
1731 qp_host_unregister_user_memory(
1732 entry->produce_q,
1733 entry->consume_q);
1734 return result;
1735 }
1736 }
1737
1738 entry->state = VMCIQPB_ATTACHED_MEM;
1739 } else {
1740 entry->state = VMCIQPB_ATTACHED_NO_MEM;
1741 }
1742 } else if (entry->state == VMCIQPB_CREATED_NO_MEM) {
1743 /*
1744 * The host side is attempting to attach to a queue
1745 * pair that doesn't have any memory associated with
1746 * it. This must be a pre NOVMVM vmx that hasn't set
1747 * the page store information yet, or a quiesced VM.
1748 */
1749
1750 return VMCI_ERROR_UNAVAILABLE;
1751 } else {
1752 /*
1753 * For non-blocking queue pairs, we cannot rely on
1754 * enqueue/dequeue to map in the pages on the
1755 * host-side, since it may block, so we make an
1756 * attempt here.
1757 */
1758
1759 if (flags & VMCI_QPFLAG_NONBLOCK) {
1760 result =
1761 qp_host_map_queues(entry->produce_q,
1762 entry->consume_q);
1763 if (result < VMCI_SUCCESS)
1764 return result;
1765
1766 entry->qp.flags |= flags &
1767 (VMCI_QPFLAG_NONBLOCK | VMCI_QPFLAG_PINNED);
1768 }
1769
1770 /* The host side has successfully attached to a queue pair. */
1771 entry->state = VMCIQPB_ATTACHED_MEM;
1772 }
1773
1774 if (entry->state == VMCIQPB_ATTACHED_MEM) {
1775 result =
1776 qp_notify_peer(true, entry->qp.handle, context_id,
1777 entry->create_id);
1778 if (result < VMCI_SUCCESS)
1779 pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n",
1780 entry->create_id, entry->qp.handle.context,
1781 entry->qp.handle.resource);
1782 }
1783
1784 entry->attach_id = context_id;
1785 entry->qp.ref_count++;
1786 if (wakeup_cb) {
1787 entry->wakeup_cb = wakeup_cb;
1788 entry->client_data = client_data;
1789 }
1790
1791 /*
1792 * When attaching to local queue pairs, the context already has
1793 * an entry tracking the queue pair, so don't add another one.
1794 */
1795 if (!is_local)
1796 vmci_ctx_qp_create(context, entry->qp.handle);
1797
1798 if (ent != NULL)
1799 *ent = entry;
1800
1801 return VMCI_SUCCESS;
1802}
1803
1804/*
1805 * queue_pair_Alloc for use when setting up queue pair endpoints
1806 * on the host.
1807 */
1808static int qp_broker_alloc(struct vmci_handle handle,
1809 u32 peer,
1810 u32 flags,
1811 u32 priv_flags,
1812 u64 produce_size,
1813 u64 consume_size,
1814 struct vmci_qp_page_store *page_store,
1815 struct vmci_ctx *context,
1816 vmci_event_release_cb wakeup_cb,
1817 void *client_data,
1818 struct qp_broker_entry **ent,
1819 bool *swap)
1820{
1821 const u32 context_id = vmci_ctx_get_id(context);
1822 bool create;
1823 struct qp_broker_entry *entry = NULL;
1824 bool is_local = flags & VMCI_QPFLAG_LOCAL;
1825 int result;
1826
1827 if (vmci_handle_is_invalid(handle) ||
1828 (flags & ~VMCI_QP_ALL_FLAGS) || is_local ||
1829 !(produce_size || consume_size) ||
1830 !context || context_id == VMCI_INVALID_ID ||
1831 handle.context == VMCI_INVALID_ID) {
1832 return VMCI_ERROR_INVALID_ARGS;
1833 }
1834
1835 if (page_store && !VMCI_QP_PAGESTORE_IS_WELLFORMED(page_store))
1836 return VMCI_ERROR_INVALID_ARGS;
1837
1838 /*
1839 * In the initial argument check, we ensure that non-vmkernel hosts
1840 * are not allowed to create local queue pairs.
1841 */
1842
1843 mutex_lock(&qp_broker_list.mutex);
1844
1845 if (!is_local && vmci_ctx_qp_exists(context, handle)) {
1846 pr_devel("Context (ID=0x%x) already attached to queue pair (handle=0x%x:0x%x)\n",
1847 context_id, handle.context, handle.resource);
1848 mutex_unlock(&qp_broker_list.mutex);
1849 return VMCI_ERROR_ALREADY_EXISTS;
1850 }
1851
1852 if (handle.resource != VMCI_INVALID_ID)
1853 entry = qp_broker_handle_to_entry(handle);
1854
1855 if (!entry) {
1856 create = true;
1857 result =
1858 qp_broker_create(handle, peer, flags, priv_flags,
1859 produce_size, consume_size, page_store,
1860 context, wakeup_cb, client_data, ent);
1861 } else {
1862 create = false;
1863 result =
1864 qp_broker_attach(entry, peer, flags, priv_flags,
1865 produce_size, consume_size, page_store,
1866 context, wakeup_cb, client_data, ent);
1867 }
1868
1869 mutex_unlock(&qp_broker_list.mutex);
1870
1871 if (swap)
1872 *swap = (context_id == VMCI_HOST_CONTEXT_ID) &&
1873 !(create && is_local);
1874
1875 return result;
1876}
1877
1878/*
1879 * This function implements the kernel API for allocating a queue
1880 * pair.
1881 */
1882static int qp_alloc_host_work(struct vmci_handle *handle,
1883 struct vmci_queue **produce_q,
1884 u64 produce_size,
1885 struct vmci_queue **consume_q,
1886 u64 consume_size,
1887 u32 peer,
1888 u32 flags,
1889 u32 priv_flags,
1890 vmci_event_release_cb wakeup_cb,
1891 void *client_data)
1892{
1893 struct vmci_handle new_handle;
1894 struct vmci_ctx *context;
1895 struct qp_broker_entry *entry;
1896 int result;
1897 bool swap;
1898
1899 if (vmci_handle_is_invalid(*handle)) {
1900 new_handle = vmci_make_handle(
1901 VMCI_HOST_CONTEXT_ID, VMCI_INVALID_ID);
1902 } else
1903 new_handle = *handle;
1904
1905 context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID);
1906 entry = NULL;
1907 result =
1908 qp_broker_alloc(new_handle, peer, flags, priv_flags,
1909 produce_size, consume_size, NULL, context,
1910 wakeup_cb, client_data, &entry, &swap);
1911 if (result == VMCI_SUCCESS) {
1912 if (swap) {
1913 /*
1914 * If this is a local queue pair, the attacher
1915 * will swap around produce and consume
1916 * queues.
1917 */
1918
1919 *produce_q = entry->consume_q;
1920 *consume_q = entry->produce_q;
1921 } else {
1922 *produce_q = entry->produce_q;
1923 *consume_q = entry->consume_q;
1924 }
1925
1926 *handle = vmci_resource_handle(&entry->resource);
1927 } else {
1928 *handle = VMCI_INVALID_HANDLE;
1929 pr_devel("queue pair broker failed to alloc (result=%d)\n",
1930 result);
1931 }
1932 vmci_ctx_put(context);
1933 return result;
1934}
1935
1936/*
1937 * Allocates a VMCI queue_pair. Only checks validity of input
1938 * arguments. The real work is done in the host or guest
1939 * specific function.
1940 */
1941int vmci_qp_alloc(struct vmci_handle *handle,
1942 struct vmci_queue **produce_q,
1943 u64 produce_size,
1944 struct vmci_queue **consume_q,
1945 u64 consume_size,
1946 u32 peer,
1947 u32 flags,
1948 u32 priv_flags,
1949 bool guest_endpoint,
1950 vmci_event_release_cb wakeup_cb,
1951 void *client_data)
1952{
1953 if (!handle || !produce_q || !consume_q ||
1954 (!produce_size && !consume_size) || (flags & ~VMCI_QP_ALL_FLAGS))
1955 return VMCI_ERROR_INVALID_ARGS;
1956
1957 if (guest_endpoint) {
1958 return qp_alloc_guest_work(handle, produce_q,
1959 produce_size, consume_q,
1960 consume_size, peer,
1961 flags, priv_flags);
1962 } else {
1963 return qp_alloc_host_work(handle, produce_q,
1964 produce_size, consume_q,
1965 consume_size, peer, flags,
1966 priv_flags, wakeup_cb, client_data);
1967 }
1968}
1969
1970/*
1971 * This function implements the host kernel API for detaching from
1972 * a queue pair.
1973 */
1974static int qp_detatch_host_work(struct vmci_handle handle)
1975{
1976 int result;
1977 struct vmci_ctx *context;
1978
1979 context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID);
1980
1981 result = vmci_qp_broker_detach(handle, context);
1982
1983 vmci_ctx_put(context);
1984 return result;
1985}
1986
1987/*
1988 * Detaches from a VMCI queue_pair. Only checks validity of input argument.
1989 * Real work is done in the host or guest specific function.
1990 */
1991static int qp_detatch(struct vmci_handle handle, bool guest_endpoint)
1992{
1993 if (vmci_handle_is_invalid(handle))
1994 return VMCI_ERROR_INVALID_ARGS;
1995
1996 if (guest_endpoint)
1997 return qp_detatch_guest_work(handle);
1998 else
1999 return qp_detatch_host_work(handle);
2000}
2001
2002/*
2003 * Returns the entry from the head of the list. Assumes that the list is
2004 * locked.
2005 */
2006static struct qp_entry *qp_list_get_head(struct qp_list *qp_list)
2007{
2008 if (!list_empty(&qp_list->head)) {
2009 struct qp_entry *entry =
2010 list_first_entry(&qp_list->head, struct qp_entry,
2011 list_item);
2012 return entry;
2013 }
2014
2015 return NULL;
2016}
2017
2018void vmci_qp_broker_exit(void)
2019{
2020 struct qp_entry *entry;
2021 struct qp_broker_entry *be;
2022
2023 mutex_lock(&qp_broker_list.mutex);
2024
2025 while ((entry = qp_list_get_head(&qp_broker_list))) {
2026 be = (struct qp_broker_entry *)entry;
2027
2028 qp_list_remove_entry(&qp_broker_list, entry);
2029 kfree(be);
2030 }
2031
2032 mutex_unlock(&qp_broker_list.mutex);
2033}
2034
2035/*
2036 * Requests that a queue pair be allocated with the VMCI queue
2037 * pair broker. Allocates a queue pair entry if one does not
2038 * exist. Attaches to one if it exists, and retrieves the page
2039 * files backing that queue_pair. Assumes that the queue pair
2040 * broker lock is held.
2041 */
2042int vmci_qp_broker_alloc(struct vmci_handle handle,
2043 u32 peer,
2044 u32 flags,
2045 u32 priv_flags,
2046 u64 produce_size,
2047 u64 consume_size,
2048 struct vmci_qp_page_store *page_store,
2049 struct vmci_ctx *context)
2050{
2051 return qp_broker_alloc(handle, peer, flags, priv_flags,
2052 produce_size, consume_size,
2053 page_store, context, NULL, NULL, NULL, NULL);
2054}
2055
2056/*
2057 * VMX'en with versions lower than VMCI_VERSION_NOVMVM use a separate
2058 * step to add the UVAs of the VMX mapping of the queue pair. This function
2059 * provides backwards compatibility with such VMX'en, and takes care of
2060 * registering the page store for a queue pair previously allocated by the
2061 * VMX during create or attach. This function will move the queue pair state
2062 * to either from VMCIQBP_CREATED_NO_MEM to VMCIQBP_CREATED_MEM or
2063 * VMCIQBP_ATTACHED_NO_MEM to VMCIQBP_ATTACHED_MEM. If moving to the
2064 * attached state with memory, the queue pair is ready to be used by the
2065 * host peer, and an attached event will be generated.
2066 *
2067 * Assumes that the queue pair broker lock is held.
2068 *
2069 * This function is only used by the hosted platform, since there is no
2070 * issue with backwards compatibility for vmkernel.
2071 */
2072int vmci_qp_broker_set_page_store(struct vmci_handle handle,
2073 u64 produce_uva,
2074 u64 consume_uva,
2075 struct vmci_ctx *context)
2076{
2077 struct qp_broker_entry *entry;
2078 int result;
2079 const u32 context_id = vmci_ctx_get_id(context);
2080
2081 if (vmci_handle_is_invalid(handle) || !context ||
2082 context_id == VMCI_INVALID_ID)
2083 return VMCI_ERROR_INVALID_ARGS;
2084
2085 /*
2086 * We only support guest to host queue pairs, so the VMX must
2087 * supply UVAs for the mapped page files.
2088 */
2089
2090 if (produce_uva == 0 || consume_uva == 0)
2091 return VMCI_ERROR_INVALID_ARGS;
2092
2093 mutex_lock(&qp_broker_list.mutex);
2094
2095 if (!vmci_ctx_qp_exists(context, handle)) {
2096 pr_warn("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
2097 context_id, handle.context, handle.resource);
2098 result = VMCI_ERROR_NOT_FOUND;
2099 goto out;
2100 }
2101
2102 entry = qp_broker_handle_to_entry(handle);
2103 if (!entry) {
2104 result = VMCI_ERROR_NOT_FOUND;
2105 goto out;
2106 }
2107
2108 /*
2109 * If I'm the owner then I can set the page store.
2110 *
2111 * Or, if a host created the queue_pair and I'm the attached peer
2112 * then I can set the page store.
2113 */
2114 if (entry->create_id != context_id &&
2115 (entry->create_id != VMCI_HOST_CONTEXT_ID ||
2116 entry->attach_id != context_id)) {
2117 result = VMCI_ERROR_QUEUEPAIR_NOTOWNER;
2118 goto out;
2119 }
2120
2121 if (entry->state != VMCIQPB_CREATED_NO_MEM &&
2122 entry->state != VMCIQPB_ATTACHED_NO_MEM) {
2123 result = VMCI_ERROR_UNAVAILABLE;
2124 goto out;
2125 }
2126
2127 result = qp_host_get_user_memory(produce_uva, consume_uva,
2128 entry->produce_q, entry->consume_q);
2129 if (result < VMCI_SUCCESS)
2130 goto out;
2131
2132 result = qp_host_map_queues(entry->produce_q, entry->consume_q);
2133 if (result < VMCI_SUCCESS) {
2134 qp_host_unregister_user_memory(entry->produce_q,
2135 entry->consume_q);
2136 goto out;
2137 }
2138
2139 if (entry->state == VMCIQPB_CREATED_NO_MEM)
2140 entry->state = VMCIQPB_CREATED_MEM;
2141 else
2142 entry->state = VMCIQPB_ATTACHED_MEM;
2143
2144 entry->vmci_page_files = true;
2145
2146 if (entry->state == VMCIQPB_ATTACHED_MEM) {
2147 result =
2148 qp_notify_peer(true, handle, context_id, entry->create_id);
2149 if (result < VMCI_SUCCESS) {
2150 pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n",
2151 entry->create_id, entry->qp.handle.context,
2152 entry->qp.handle.resource);
2153 }
2154 }
2155
2156 result = VMCI_SUCCESS;
2157 out:
2158 mutex_unlock(&qp_broker_list.mutex);
2159 return result;
2160}
2161
2162/*
2163 * Resets saved queue headers for the given QP broker
2164 * entry. Should be used when guest memory becomes available
2165 * again, or the guest detaches.
2166 */
2167static void qp_reset_saved_headers(struct qp_broker_entry *entry)
2168{
2169 entry->produce_q->saved_header = NULL;
2170 entry->consume_q->saved_header = NULL;
2171}
2172
2173/*
2174 * The main entry point for detaching from a queue pair registered with the
2175 * queue pair broker. If more than one endpoint is attached to the queue
2176 * pair, the first endpoint will mainly decrement a reference count and
2177 * generate a notification to its peer. The last endpoint will clean up
2178 * the queue pair state registered with the broker.
2179 *
2180 * When a guest endpoint detaches, it will unmap and unregister the guest
2181 * memory backing the queue pair. If the host is still attached, it will
2182 * no longer be able to access the queue pair content.
2183 *
2184 * If the queue pair is already in a state where there is no memory
2185 * registered for the queue pair (any *_NO_MEM state), it will transition to
2186 * the VMCIQPB_SHUTDOWN_NO_MEM state. This will also happen, if a guest
2187 * endpoint is the first of two endpoints to detach. If the host endpoint is
2188 * the first out of two to detach, the queue pair will move to the
2189 * VMCIQPB_SHUTDOWN_MEM state.
2190 */
2191int vmci_qp_broker_detach(struct vmci_handle handle, struct vmci_ctx *context)
2192{
2193 struct qp_broker_entry *entry;
2194 const u32 context_id = vmci_ctx_get_id(context);
2195 u32 peer_id;
2196 bool is_local = false;
2197 int result;
2198
2199 if (vmci_handle_is_invalid(handle) || !context ||
2200 context_id == VMCI_INVALID_ID) {
2201 return VMCI_ERROR_INVALID_ARGS;
2202 }
2203
2204 mutex_lock(&qp_broker_list.mutex);
2205
2206 if (!vmci_ctx_qp_exists(context, handle)) {
2207 pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
2208 context_id, handle.context, handle.resource);
2209 result = VMCI_ERROR_NOT_FOUND;
2210 goto out;
2211 }
2212
2213 entry = qp_broker_handle_to_entry(handle);
2214 if (!entry) {
2215 pr_devel("Context (ID=0x%x) reports being attached to queue pair(handle=0x%x:0x%x) that isn't present in broker\n",
2216 context_id, handle.context, handle.resource);
2217 result = VMCI_ERROR_NOT_FOUND;
2218 goto out;
2219 }
2220
2221 if (context_id != entry->create_id && context_id != entry->attach_id) {
2222 result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
2223 goto out;
2224 }
2225
2226 if (context_id == entry->create_id) {
2227 peer_id = entry->attach_id;
2228 entry->create_id = VMCI_INVALID_ID;
2229 } else {
2230 peer_id = entry->create_id;
2231 entry->attach_id = VMCI_INVALID_ID;
2232 }
2233 entry->qp.ref_count--;
2234
2235 is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL;
2236
2237 if (context_id != VMCI_HOST_CONTEXT_ID) {
2238 bool headers_mapped;
2239
2240 /*
2241 * Pre NOVMVM vmx'en may detach from a queue pair
2242 * before setting the page store, and in that case
2243 * there is no user memory to detach from. Also, more
2244 * recent VMX'en may detach from a queue pair in the
2245 * quiesced state.
2246 */
2247
2248 qp_acquire_queue_mutex(entry->produce_q);
2249 headers_mapped = entry->produce_q->q_header ||
2250 entry->consume_q->q_header;
2251 if (QPBROKERSTATE_HAS_MEM(entry)) {
2252 result =
2253 qp_host_unmap_queues(INVALID_VMCI_GUEST_MEM_ID,
2254 entry->produce_q,
2255 entry->consume_q);
2256 if (result < VMCI_SUCCESS)
2257 pr_warn("Failed to unmap queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n",
2258 handle.context, handle.resource,
2259 result);
2260
2261 if (entry->vmci_page_files)
2262 qp_host_unregister_user_memory(entry->produce_q,
2263 entry->
2264 consume_q);
2265 else
2266 qp_host_unregister_user_memory(entry->produce_q,
2267 entry->
2268 consume_q);
2269
2270 }
2271
2272 if (!headers_mapped)
2273 qp_reset_saved_headers(entry);
2274
2275 qp_release_queue_mutex(entry->produce_q);
2276
2277 if (!headers_mapped && entry->wakeup_cb)
2278 entry->wakeup_cb(entry->client_data);
2279
2280 } else {
2281 if (entry->wakeup_cb) {
2282 entry->wakeup_cb = NULL;
2283 entry->client_data = NULL;
2284 }
2285 }
2286
2287 if (entry->qp.ref_count == 0) {
2288 qp_list_remove_entry(&qp_broker_list, &entry->qp);
2289
2290 if (is_local)
2291 kfree(entry->local_mem);
2292
2293 qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q);
2294 qp_host_free_queue(entry->produce_q, entry->qp.produce_size);
2295 qp_host_free_queue(entry->consume_q, entry->qp.consume_size);
2296 /* Unlink from resource hash table and free callback */
2297 vmci_resource_remove(&entry->resource);
2298
2299 kfree(entry);
2300
2301 vmci_ctx_qp_destroy(context, handle);
2302 } else {
2303 qp_notify_peer(false, handle, context_id, peer_id);
2304 if (context_id == VMCI_HOST_CONTEXT_ID &&
2305 QPBROKERSTATE_HAS_MEM(entry)) {
2306 entry->state = VMCIQPB_SHUTDOWN_MEM;
2307 } else {
2308 entry->state = VMCIQPB_SHUTDOWN_NO_MEM;
2309 }
2310
2311 if (!is_local)
2312 vmci_ctx_qp_destroy(context, handle);
2313
2314 }
2315 result = VMCI_SUCCESS;
2316 out:
2317 mutex_unlock(&qp_broker_list.mutex);
2318 return result;
2319}
2320
2321/*
2322 * Establishes the necessary mappings for a queue pair given a
2323 * reference to the queue pair guest memory. This is usually
2324 * called when a guest is unquiesced and the VMX is allowed to
2325 * map guest memory once again.
2326 */
2327int vmci_qp_broker_map(struct vmci_handle handle,
2328 struct vmci_ctx *context,
2329 u64 guest_mem)
2330{
2331 struct qp_broker_entry *entry;
2332 const u32 context_id = vmci_ctx_get_id(context);
2333 bool is_local = false;
2334 int result;
2335
2336 if (vmci_handle_is_invalid(handle) || !context ||
2337 context_id == VMCI_INVALID_ID)
2338 return VMCI_ERROR_INVALID_ARGS;
2339
2340 mutex_lock(&qp_broker_list.mutex);
2341
2342 if (!vmci_ctx_qp_exists(context, handle)) {
2343 pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
2344 context_id, handle.context, handle.resource);
2345 result = VMCI_ERROR_NOT_FOUND;
2346 goto out;
2347 }
2348
2349 entry = qp_broker_handle_to_entry(handle);
2350 if (!entry) {
2351 pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n",
2352 context_id, handle.context, handle.resource);
2353 result = VMCI_ERROR_NOT_FOUND;
2354 goto out;
2355 }
2356
2357 if (context_id != entry->create_id && context_id != entry->attach_id) {
2358 result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
2359 goto out;
2360 }
2361
2362 is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL;
2363 result = VMCI_SUCCESS;
2364
2365 if (context_id != VMCI_HOST_CONTEXT_ID) {
2366 struct vmci_qp_page_store page_store;
2367
2368 page_store.pages = guest_mem;
2369 page_store.len = QPE_NUM_PAGES(entry->qp);
2370
2371 qp_acquire_queue_mutex(entry->produce_q);
2372 qp_reset_saved_headers(entry);
2373 result =
2374 qp_host_register_user_memory(&page_store,
2375 entry->produce_q,
2376 entry->consume_q);
2377 qp_release_queue_mutex(entry->produce_q);
2378 if (result == VMCI_SUCCESS) {
2379 /* Move state from *_NO_MEM to *_MEM */
2380
2381 entry->state++;
2382
2383 if (entry->wakeup_cb)
2384 entry->wakeup_cb(entry->client_data);
2385 }
2386 }
2387
2388 out:
2389 mutex_unlock(&qp_broker_list.mutex);
2390 return result;
2391}
2392
2393/*
2394 * Saves a snapshot of the queue headers for the given QP broker
2395 * entry. Should be used when guest memory is unmapped.
2396 * Results:
2397 * VMCI_SUCCESS on success, appropriate error code if guest memory
2398 * can't be accessed..
2399 */
2400static int qp_save_headers(struct qp_broker_entry *entry)
2401{
2402 int result;
2403
2404 if (entry->produce_q->saved_header != NULL &&
2405 entry->consume_q->saved_header != NULL) {
2406 /*
2407 * If the headers have already been saved, we don't need to do
2408 * it again, and we don't want to map in the headers
2409 * unnecessarily.
2410 */
2411
2412 return VMCI_SUCCESS;
2413 }
2414
2415 if (NULL == entry->produce_q->q_header ||
2416 NULL == entry->consume_q->q_header) {
2417 result = qp_host_map_queues(entry->produce_q, entry->consume_q);
2418 if (result < VMCI_SUCCESS)
2419 return result;
2420 }
2421
2422 memcpy(&entry->saved_produce_q, entry->produce_q->q_header,
2423 sizeof(entry->saved_produce_q));
2424 entry->produce_q->saved_header = &entry->saved_produce_q;
2425 memcpy(&entry->saved_consume_q, entry->consume_q->q_header,
2426 sizeof(entry->saved_consume_q));
2427 entry->consume_q->saved_header = &entry->saved_consume_q;
2428
2429 return VMCI_SUCCESS;
2430}
2431
2432/*
2433 * Removes all references to the guest memory of a given queue pair, and
2434 * will move the queue pair from state *_MEM to *_NO_MEM. It is usually
2435 * called when a VM is being quiesced where access to guest memory should
2436 * avoided.
2437 */
2438int vmci_qp_broker_unmap(struct vmci_handle handle,
2439 struct vmci_ctx *context,
2440 u32 gid)
2441{
2442 struct qp_broker_entry *entry;
2443 const u32 context_id = vmci_ctx_get_id(context);
2444 bool is_local = false;
2445 int result;
2446
2447 if (vmci_handle_is_invalid(handle) || !context ||
2448 context_id == VMCI_INVALID_ID)
2449 return VMCI_ERROR_INVALID_ARGS;
2450
2451 mutex_lock(&qp_broker_list.mutex);
2452
2453 if (!vmci_ctx_qp_exists(context, handle)) {
2454 pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
2455 context_id, handle.context, handle.resource);
2456 result = VMCI_ERROR_NOT_FOUND;
2457 goto out;
2458 }
2459
2460 entry = qp_broker_handle_to_entry(handle);
2461 if (!entry) {
2462 pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n",
2463 context_id, handle.context, handle.resource);
2464 result = VMCI_ERROR_NOT_FOUND;
2465 goto out;
2466 }
2467
2468 if (context_id != entry->create_id && context_id != entry->attach_id) {
2469 result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
2470 goto out;
2471 }
2472
2473 is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL;
2474
2475 if (context_id != VMCI_HOST_CONTEXT_ID) {
2476 qp_acquire_queue_mutex(entry->produce_q);
2477 result = qp_save_headers(entry);
2478 if (result < VMCI_SUCCESS)
2479 pr_warn("Failed to save queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n",
2480 handle.context, handle.resource, result);
2481
2482 qp_host_unmap_queues(gid, entry->produce_q, entry->consume_q);
2483
2484 /*
2485 * On hosted, when we unmap queue pairs, the VMX will also
2486 * unmap the guest memory, so we invalidate the previously
2487 * registered memory. If the queue pair is mapped again at a
2488 * later point in time, we will need to reregister the user
2489 * memory with a possibly new user VA.
2490 */
2491 qp_host_unregister_user_memory(entry->produce_q,
2492 entry->consume_q);
2493
2494 /*
2495 * Move state from *_MEM to *_NO_MEM.
2496 */
2497 entry->state--;
2498
2499 qp_release_queue_mutex(entry->produce_q);
2500 }
2501
2502 result = VMCI_SUCCESS;
2503
2504 out:
2505 mutex_unlock(&qp_broker_list.mutex);
2506 return result;
2507}
2508
2509/*
2510 * Destroys all guest queue pair endpoints. If active guest queue
2511 * pairs still exist, hypercalls to attempt detach from these
2512 * queue pairs will be made. Any failure to detach is silently
2513 * ignored.
2514 */
2515void vmci_qp_guest_endpoints_exit(void)
2516{
2517 struct qp_entry *entry;
2518 struct qp_guest_endpoint *ep;
2519
2520 mutex_lock(&qp_guest_endpoints.mutex);
2521
2522 while ((entry = qp_list_get_head(&qp_guest_endpoints))) {
2523 ep = (struct qp_guest_endpoint *)entry;
2524
2525 /* Don't make a hypercall for local queue_pairs. */
2526 if (!(entry->flags & VMCI_QPFLAG_LOCAL))
2527 qp_detatch_hypercall(entry->handle);
2528
2529 /* We cannot fail the exit, so let's reset ref_count. */
2530 entry->ref_count = 0;
2531 qp_list_remove_entry(&qp_guest_endpoints, entry);
2532
2533 qp_guest_endpoint_destroy(ep);
2534 }
2535
2536 mutex_unlock(&qp_guest_endpoints.mutex);
2537}
2538
2539/*
2540 * Helper routine that will lock the queue pair before subsequent
2541 * operations.
2542 * Note: Non-blocking on the host side is currently only implemented in ESX.
2543 * Since non-blocking isn't yet implemented on the host personality we
2544 * have no reason to acquire a spin lock. So to avoid the use of an
2545 * unnecessary lock only acquire the mutex if we can block.
2546 * Note: It is assumed that QPFLAG_PINNED implies QPFLAG_NONBLOCK. Therefore
2547 * we can use the same locking function for access to both the queue
2548 * and the queue headers as it is the same logic. Assert this behvior.
2549 */
2550static void qp_lock(const struct vmci_qp *qpair)
2551{
2552 if (vmci_can_block(qpair->flags))
2553 qp_acquire_queue_mutex(qpair->produce_q);
2554}
2555
2556/*
2557 * Helper routine that unlocks the queue pair after calling
2558 * qp_lock. Respects non-blocking and pinning flags.
2559 */
2560static void qp_unlock(const struct vmci_qp *qpair)
2561{
2562 if (vmci_can_block(qpair->flags))
2563 qp_release_queue_mutex(qpair->produce_q);
2564}
2565
2566/*
2567 * The queue headers may not be mapped at all times. If a queue is
2568 * currently not mapped, it will be attempted to do so.
2569 */
2570static int qp_map_queue_headers(struct vmci_queue *produce_q,
2571 struct vmci_queue *consume_q,
2572 bool can_block)
2573{
2574 int result;
2575
2576 if (NULL == produce_q->q_header || NULL == consume_q->q_header) {
2577 if (can_block)
2578 result = qp_host_map_queues(produce_q, consume_q);
2579 else
2580 result = VMCI_ERROR_QUEUEPAIR_NOT_READY;
2581
2582 if (result < VMCI_SUCCESS)
2583 return (produce_q->saved_header &&
2584 consume_q->saved_header) ?
2585 VMCI_ERROR_QUEUEPAIR_NOT_READY :
2586 VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
2587 }
2588
2589 return VMCI_SUCCESS;
2590}
2591
2592/*
2593 * Helper routine that will retrieve the produce and consume
2594 * headers of a given queue pair. If the guest memory of the
2595 * queue pair is currently not available, the saved queue headers
2596 * will be returned, if these are available.
2597 */
2598static int qp_get_queue_headers(const struct vmci_qp *qpair,
2599 struct vmci_queue_header **produce_q_header,
2600 struct vmci_queue_header **consume_q_header)
2601{
2602 int result;
2603
2604 result = qp_map_queue_headers(qpair->produce_q, qpair->consume_q,
2605 vmci_can_block(qpair->flags));
2606 if (result == VMCI_SUCCESS) {
2607 *produce_q_header = qpair->produce_q->q_header;
2608 *consume_q_header = qpair->consume_q->q_header;
2609 } else if (qpair->produce_q->saved_header &&
2610 qpair->consume_q->saved_header) {
2611 *produce_q_header = qpair->produce_q->saved_header;
2612 *consume_q_header = qpair->consume_q->saved_header;
2613 result = VMCI_SUCCESS;
2614 }
2615
2616 return result;
2617}
2618
2619/*
2620 * Callback from VMCI queue pair broker indicating that a queue
2621 * pair that was previously not ready, now either is ready or
2622 * gone forever.
2623 */
2624static int qp_wakeup_cb(void *client_data)
2625{
2626 struct vmci_qp *qpair = (struct vmci_qp *)client_data;
2627
2628 qp_lock(qpair);
2629 while (qpair->blocked > 0) {
2630 qpair->blocked--;
2631 qpair->generation++;
2632 wake_up(&qpair->event);
2633 }
2634 qp_unlock(qpair);
2635
2636 return VMCI_SUCCESS;
2637}
2638
2639/*
2640 * Makes the calling thread wait for the queue pair to become
2641 * ready for host side access. Returns true when thread is
2642 * woken up after queue pair state change, false otherwise.
2643 */
2644static bool qp_wait_for_ready_queue(struct vmci_qp *qpair)
2645{
2646 unsigned int generation;
2647
2648 if (qpair->flags & VMCI_QPFLAG_NONBLOCK)
2649 return false;
2650
2651 qpair->blocked++;
2652 generation = qpair->generation;
2653 qp_unlock(qpair);
2654 wait_event(qpair->event, generation != qpair->generation);
2655 qp_lock(qpair);
2656
2657 return true;
2658}
2659
2660/*
2661 * Enqueues a given buffer to the produce queue using the provided
2662 * function. As many bytes as possible (space available in the queue)
2663 * are enqueued. Assumes the queue->mutex has been acquired. Returns
2664 * VMCI_ERROR_QUEUEPAIR_NOSPACE if no space was available to enqueue
2665 * data, VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the
2666 * queue (as defined by the queue size), VMCI_ERROR_INVALID_ARGS, if
2667 * an error occured when accessing the buffer,
2668 * VMCI_ERROR_QUEUEPAIR_NOTATTACHED, if the queue pair pages aren't
2669 * available. Otherwise, the number of bytes written to the queue is
2670 * returned. Updates the tail pointer of the produce queue.
2671 */
2672static ssize_t qp_enqueue_locked(struct vmci_queue *produce_q,
2673 struct vmci_queue *consume_q,
2674 const u64 produce_q_size,
2675 const void *buf,
2676 size_t buf_size,
2677 vmci_memcpy_to_queue_func memcpy_to_queue,
2678 bool can_block)
2679{
2680 s64 free_space;
2681 u64 tail;
2682 size_t written;
2683 ssize_t result;
2684
2685 result = qp_map_queue_headers(produce_q, consume_q, can_block);
2686 if (unlikely(result != VMCI_SUCCESS))
2687 return result;
2688
2689 free_space = vmci_q_header_free_space(produce_q->q_header,
2690 consume_q->q_header,
2691 produce_q_size);
2692 if (free_space == 0)
2693 return VMCI_ERROR_QUEUEPAIR_NOSPACE;
2694
2695 if (free_space < VMCI_SUCCESS)
2696 return (ssize_t) free_space;
2697
2698 written = (size_t) (free_space > buf_size ? buf_size : free_space);
2699 tail = vmci_q_header_producer_tail(produce_q->q_header);
2700 if (likely(tail + written < produce_q_size)) {
2701 result = memcpy_to_queue(produce_q, tail, buf, 0, written);
2702 } else {
2703 /* Tail pointer wraps around. */
2704
2705 const size_t tmp = (size_t) (produce_q_size - tail);
2706
2707 result = memcpy_to_queue(produce_q, tail, buf, 0, tmp);
2708 if (result >= VMCI_SUCCESS)
2709 result = memcpy_to_queue(produce_q, 0, buf, tmp,
2710 written - tmp);
2711 }
2712
2713 if (result < VMCI_SUCCESS)
2714 return result;
2715
2716 vmci_q_header_add_producer_tail(produce_q->q_header, written,
2717 produce_q_size);
2718 return written;
2719}
2720
2721/*
2722 * Dequeues data (if available) from the given consume queue. Writes data
2723 * to the user provided buffer using the provided function.
2724 * Assumes the queue->mutex has been acquired.
2725 * Results:
2726 * VMCI_ERROR_QUEUEPAIR_NODATA if no data was available to dequeue.
2727 * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue
2728 * (as defined by the queue size).
2729 * VMCI_ERROR_INVALID_ARGS, if an error occured when accessing the buffer.
2730 * Otherwise the number of bytes dequeued is returned.
2731 * Side effects:
2732 * Updates the head pointer of the consume queue.
2733 */
2734static ssize_t qp_dequeue_locked(struct vmci_queue *produce_q,
2735 struct vmci_queue *consume_q,
2736 const u64 consume_q_size,
2737 void *buf,
2738 size_t buf_size,
2739 vmci_memcpy_from_queue_func memcpy_from_queue,
2740 bool update_consumer,
2741 bool can_block)
2742{
2743 s64 buf_ready;
2744 u64 head;
2745 size_t read;
2746 ssize_t result;
2747
2748 result = qp_map_queue_headers(produce_q, consume_q, can_block);
2749 if (unlikely(result != VMCI_SUCCESS))
2750 return result;
2751
2752 buf_ready = vmci_q_header_buf_ready(consume_q->q_header,
2753 produce_q->q_header,
2754 consume_q_size);
2755 if (buf_ready == 0)
2756 return VMCI_ERROR_QUEUEPAIR_NODATA;
2757
2758 if (buf_ready < VMCI_SUCCESS)
2759 return (ssize_t) buf_ready;
2760
2761 read = (size_t) (buf_ready > buf_size ? buf_size : buf_ready);
2762 head = vmci_q_header_consumer_head(produce_q->q_header);
2763 if (likely(head + read < consume_q_size)) {
2764 result = memcpy_from_queue(buf, 0, consume_q, head, read);
2765 } else {
2766 /* Head pointer wraps around. */
2767
2768 const size_t tmp = (size_t) (consume_q_size - head);
2769
2770 result = memcpy_from_queue(buf, 0, consume_q, head, tmp);
2771 if (result >= VMCI_SUCCESS)
2772 result = memcpy_from_queue(buf, tmp, consume_q, 0,
2773 read - tmp);
2774
2775 }
2776
2777 if (result < VMCI_SUCCESS)
2778 return result;
2779
2780 if (update_consumer)
2781 vmci_q_header_add_consumer_head(produce_q->q_header,
2782 read, consume_q_size);
2783
2784 return read;
2785}
2786
2787/*
2788 * vmci_qpair_alloc() - Allocates a queue pair.
2789 * @qpair: Pointer for the new vmci_qp struct.
2790 * @handle: Handle to track the resource.
2791 * @produce_qsize: Desired size of the producer queue.
2792 * @consume_qsize: Desired size of the consumer queue.
2793 * @peer: ContextID of the peer.
2794 * @flags: VMCI flags.
2795 * @priv_flags: VMCI priviledge flags.
2796 *
2797 * This is the client interface for allocating the memory for a
2798 * vmci_qp structure and then attaching to the underlying
2799 * queue. If an error occurs allocating the memory for the
2800 * vmci_qp structure no attempt is made to attach. If an
2801 * error occurs attaching, then the structure is freed.
2802 */
2803int vmci_qpair_alloc(struct vmci_qp **qpair,
2804 struct vmci_handle *handle,
2805 u64 produce_qsize,
2806 u64 consume_qsize,
2807 u32 peer,
2808 u32 flags,
2809 u32 priv_flags)
2810{
2811 struct vmci_qp *my_qpair;
2812 int retval;
2813 struct vmci_handle src = VMCI_INVALID_HANDLE;
2814 struct vmci_handle dst = vmci_make_handle(peer, VMCI_INVALID_ID);
2815 enum vmci_route route;
2816 vmci_event_release_cb wakeup_cb;
2817 void *client_data;
2818
2819 /*
2820 * Restrict the size of a queuepair. The device already
2821 * enforces a limit on the total amount of memory that can be
2822 * allocated to queuepairs for a guest. However, we try to
2823 * allocate this memory before we make the queuepair
2824 * allocation hypercall. On Linux, we allocate each page
2825 * separately, which means rather than fail, the guest will
2826 * thrash while it tries to allocate, and will become
2827 * increasingly unresponsive to the point where it appears to
2828 * be hung. So we place a limit on the size of an individual
2829 * queuepair here, and leave the device to enforce the
2830 * restriction on total queuepair memory. (Note that this
2831 * doesn't prevent all cases; a user with only this much
2832 * physical memory could still get into trouble.) The error
2833 * used by the device is NO_RESOURCES, so use that here too.
2834 */
2835
2836 if (produce_qsize + consume_qsize < max(produce_qsize, consume_qsize) ||
2837 produce_qsize + consume_qsize > VMCI_MAX_GUEST_QP_MEMORY)
2838 return VMCI_ERROR_NO_RESOURCES;
2839
2840 retval = vmci_route(&src, &dst, false, &route);
2841 if (retval < VMCI_SUCCESS)
2842 route = vmci_guest_code_active() ?
2843 VMCI_ROUTE_AS_GUEST : VMCI_ROUTE_AS_HOST;
2844
2845 /* If NONBLOCK or PINNED is set, we better be the guest personality. */
2846 if ((!vmci_can_block(flags) || vmci_qp_pinned(flags)) &&
2847 VMCI_ROUTE_AS_GUEST != route) {
2848 pr_devel("Not guest personality w/ NONBLOCK OR PINNED set");
2849 return VMCI_ERROR_INVALID_ARGS;
2850 }
2851
2852 /*
2853 * Limit the size of pinned QPs and check sanity.
2854 *
2855 * Pinned pages implies non-blocking mode. Mutexes aren't acquired
2856 * when the NONBLOCK flag is set in qpair code; and also should not be
2857 * acquired when the PINNED flagged is set. Since pinning pages
2858 * implies we want speed, it makes no sense not to have NONBLOCK
2859 * set if PINNED is set. Hence enforce this implication.
2860 */
2861 if (vmci_qp_pinned(flags)) {
2862 if (vmci_can_block(flags)) {
2863 pr_err("Attempted to enable pinning w/o non-blocking");
2864 return VMCI_ERROR_INVALID_ARGS;
2865 }
2866
2867 if (produce_qsize + consume_qsize > VMCI_MAX_PINNED_QP_MEMORY)
2868 return VMCI_ERROR_NO_RESOURCES;
2869 }
2870
2871 my_qpair = kzalloc(sizeof(*my_qpair), GFP_KERNEL);
2872 if (!my_qpair)
2873 return VMCI_ERROR_NO_MEM;
2874
2875 my_qpair->produce_q_size = produce_qsize;
2876 my_qpair->consume_q_size = consume_qsize;
2877 my_qpair->peer = peer;
2878 my_qpair->flags = flags;
2879 my_qpair->priv_flags = priv_flags;
2880
2881 wakeup_cb = NULL;
2882 client_data = NULL;
2883
2884 if (VMCI_ROUTE_AS_HOST == route) {
2885 my_qpair->guest_endpoint = false;
2886 if (!(flags & VMCI_QPFLAG_LOCAL)) {
2887 my_qpair->blocked = 0;
2888 my_qpair->generation = 0;
2889 init_waitqueue_head(&my_qpair->event);
2890 wakeup_cb = qp_wakeup_cb;
2891 client_data = (void *)my_qpair;
2892 }
2893 } else {
2894 my_qpair->guest_endpoint = true;
2895 }
2896
2897 retval = vmci_qp_alloc(handle,
2898 &my_qpair->produce_q,
2899 my_qpair->produce_q_size,
2900 &my_qpair->consume_q,
2901 my_qpair->consume_q_size,
2902 my_qpair->peer,
2903 my_qpair->flags,
2904 my_qpair->priv_flags,
2905 my_qpair->guest_endpoint,
2906 wakeup_cb, client_data);
2907
2908 if (retval < VMCI_SUCCESS) {
2909 kfree(my_qpair);
2910 return retval;
2911 }
2912
2913 *qpair = my_qpair;
2914 my_qpair->handle = *handle;
2915
2916 return retval;
2917}
2918EXPORT_SYMBOL_GPL(vmci_qpair_alloc);
2919
2920/*
2921 * vmci_qpair_detach() - Detatches the client from a queue pair.
2922 * @qpair: Reference of a pointer to the qpair struct.
2923 *
2924 * This is the client interface for detaching from a VMCIQPair.
2925 * Note that this routine will free the memory allocated for the
2926 * vmci_qp structure too.
2927 */
2928int vmci_qpair_detach(struct vmci_qp **qpair)
2929{
2930 int result;
2931 struct vmci_qp *old_qpair;
2932
2933 if (!qpair || !(*qpair))
2934 return VMCI_ERROR_INVALID_ARGS;
2935
2936 old_qpair = *qpair;
2937 result = qp_detatch(old_qpair->handle, old_qpair->guest_endpoint);
2938
2939 /*
2940 * The guest can fail to detach for a number of reasons, and
2941 * if it does so, it will cleanup the entry (if there is one).
2942 * The host can fail too, but it won't cleanup the entry
2943 * immediately, it will do that later when the context is
2944 * freed. Either way, we need to release the qpair struct
2945 * here; there isn't much the caller can do, and we don't want
2946 * to leak.
2947 */
2948
2949 memset(old_qpair, 0, sizeof(*old_qpair));
2950 old_qpair->handle = VMCI_INVALID_HANDLE;
2951 old_qpair->peer = VMCI_INVALID_ID;
2952 kfree(old_qpair);
2953 *qpair = NULL;
2954
2955 return result;
2956}
2957EXPORT_SYMBOL_GPL(vmci_qpair_detach);
2958
2959/*
2960 * vmci_qpair_get_produce_indexes() - Retrieves the indexes of the producer.
2961 * @qpair: Pointer to the queue pair struct.
2962 * @producer_tail: Reference used for storing producer tail index.
2963 * @consumer_head: Reference used for storing the consumer head index.
2964 *
2965 * This is the client interface for getting the current indexes of the
2966 * QPair from the point of the view of the caller as the producer.
2967 */
2968int vmci_qpair_get_produce_indexes(const struct vmci_qp *qpair,
2969 u64 *producer_tail,
2970 u64 *consumer_head)
2971{
2972 struct vmci_queue_header *produce_q_header;
2973 struct vmci_queue_header *consume_q_header;
2974 int result;
2975
2976 if (!qpair)
2977 return VMCI_ERROR_INVALID_ARGS;
2978
2979 qp_lock(qpair);
2980 result =
2981 qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
2982 if (result == VMCI_SUCCESS)
2983 vmci_q_header_get_pointers(produce_q_header, consume_q_header,
2984 producer_tail, consumer_head);
2985 qp_unlock(qpair);
2986
2987 if (result == VMCI_SUCCESS &&
2988 ((producer_tail && *producer_tail >= qpair->produce_q_size) ||
2989 (consumer_head && *consumer_head >= qpair->produce_q_size)))
2990 return VMCI_ERROR_INVALID_SIZE;
2991
2992 return result;
2993}
2994EXPORT_SYMBOL_GPL(vmci_qpair_get_produce_indexes);
2995
2996/*
2997 * vmci_qpair_get_consume_indexes() - Retrieves the indexes of the comsumer.
2998 * @qpair: Pointer to the queue pair struct.
2999 * @consumer_tail: Reference used for storing consumer tail index.
3000 * @producer_head: Reference used for storing the producer head index.
3001 *
3002 * This is the client interface for getting the current indexes of the
3003 * QPair from the point of the view of the caller as the consumer.
3004 */
3005int vmci_qpair_get_consume_indexes(const struct vmci_qp *qpair,
3006 u64 *consumer_tail,
3007 u64 *producer_head)
3008{
3009 struct vmci_queue_header *produce_q_header;
3010 struct vmci_queue_header *consume_q_header;
3011 int result;
3012
3013 if (!qpair)
3014 return VMCI_ERROR_INVALID_ARGS;
3015
3016 qp_lock(qpair);
3017 result =
3018 qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
3019 if (result == VMCI_SUCCESS)
3020 vmci_q_header_get_pointers(consume_q_header, produce_q_header,
3021 consumer_tail, producer_head);
3022 qp_unlock(qpair);
3023
3024 if (result == VMCI_SUCCESS &&
3025 ((consumer_tail && *consumer_tail >= qpair->consume_q_size) ||
3026 (producer_head && *producer_head >= qpair->consume_q_size)))
3027 return VMCI_ERROR_INVALID_SIZE;
3028
3029 return result;
3030}
3031EXPORT_SYMBOL_GPL(vmci_qpair_get_consume_indexes);
3032
3033/*
3034 * vmci_qpair_produce_free_space() - Retrieves free space in producer queue.
3035 * @qpair: Pointer to the queue pair struct.
3036 *
3037 * This is the client interface for getting the amount of free
3038 * space in the QPair from the point of the view of the caller as
3039 * the producer which is the common case. Returns < 0 if err, else
3040 * available bytes into which data can be enqueued if > 0.
3041 */
3042s64 vmci_qpair_produce_free_space(const struct vmci_qp *qpair)
3043{
3044 struct vmci_queue_header *produce_q_header;
3045 struct vmci_queue_header *consume_q_header;
3046 s64 result;
3047
3048 if (!qpair)
3049 return VMCI_ERROR_INVALID_ARGS;
3050
3051 qp_lock(qpair);
3052 result =
3053 qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
3054 if (result == VMCI_SUCCESS)
3055 result = vmci_q_header_free_space(produce_q_header,
3056 consume_q_header,
3057 qpair->produce_q_size);
3058 else
3059 result = 0;
3060
3061 qp_unlock(qpair);
3062
3063 return result;
3064}
3065EXPORT_SYMBOL_GPL(vmci_qpair_produce_free_space);
3066
3067/*
3068 * vmci_qpair_consume_free_space() - Retrieves free space in consumer queue.
3069 * @qpair: Pointer to the queue pair struct.
3070 *
3071 * This is the client interface for getting the amount of free
3072 * space in the QPair from the point of the view of the caller as
3073 * the consumer which is not the common case. Returns < 0 if err, else
3074 * available bytes into which data can be enqueued if > 0.
3075 */
3076s64 vmci_qpair_consume_free_space(const struct vmci_qp *qpair)
3077{
3078 struct vmci_queue_header *produce_q_header;
3079 struct vmci_queue_header *consume_q_header;
3080 s64 result;
3081
3082 if (!qpair)
3083 return VMCI_ERROR_INVALID_ARGS;
3084
3085 qp_lock(qpair);
3086 result =
3087 qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
3088 if (result == VMCI_SUCCESS)
3089 result = vmci_q_header_free_space(consume_q_header,
3090 produce_q_header,
3091 qpair->consume_q_size);
3092 else
3093 result = 0;
3094
3095 qp_unlock(qpair);
3096
3097 return result;
3098}
3099EXPORT_SYMBOL_GPL(vmci_qpair_consume_free_space);
3100
3101/*
3102 * vmci_qpair_produce_buf_ready() - Gets bytes ready to read from
3103 * producer queue.
3104 * @qpair: Pointer to the queue pair struct.
3105 *
3106 * This is the client interface for getting the amount of
3107 * enqueued data in the QPair from the point of the view of the
3108 * caller as the producer which is not the common case. Returns < 0 if err,
3109 * else available bytes that may be read.
3110 */
3111s64 vmci_qpair_produce_buf_ready(const struct vmci_qp *qpair)
3112{
3113 struct vmci_queue_header *produce_q_header;
3114 struct vmci_queue_header *consume_q_header;
3115 s64 result;
3116
3117 if (!qpair)
3118 return VMCI_ERROR_INVALID_ARGS;
3119
3120 qp_lock(qpair);
3121 result =
3122 qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
3123 if (result == VMCI_SUCCESS)
3124 result = vmci_q_header_buf_ready(produce_q_header,
3125 consume_q_header,
3126 qpair->produce_q_size);
3127 else
3128 result = 0;
3129
3130 qp_unlock(qpair);
3131
3132 return result;
3133}
3134EXPORT_SYMBOL_GPL(vmci_qpair_produce_buf_ready);
3135
3136/*
3137 * vmci_qpair_consume_buf_ready() - Gets bytes ready to read from
3138 * consumer queue.
3139 * @qpair: Pointer to the queue pair struct.
3140 *
3141 * This is the client interface for getting the amount of
3142 * enqueued data in the QPair from the point of the view of the
3143 * caller as the consumer which is the normal case. Returns < 0 if err,
3144 * else available bytes that may be read.
3145 */
3146s64 vmci_qpair_consume_buf_ready(const struct vmci_qp *qpair)
3147{
3148 struct vmci_queue_header *produce_q_header;
3149 struct vmci_queue_header *consume_q_header;
3150 s64 result;
3151
3152 if (!qpair)
3153 return VMCI_ERROR_INVALID_ARGS;
3154
3155 qp_lock(qpair);
3156 result =
3157 qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
3158 if (result == VMCI_SUCCESS)
3159 result = vmci_q_header_buf_ready(consume_q_header,
3160 produce_q_header,
3161 qpair->consume_q_size);
3162 else
3163 result = 0;
3164
3165 qp_unlock(qpair);
3166
3167 return result;
3168}
3169EXPORT_SYMBOL_GPL(vmci_qpair_consume_buf_ready);
3170
3171/*
3172 * vmci_qpair_enqueue() - Throw data on the queue.
3173 * @qpair: Pointer to the queue pair struct.
3174 * @buf: Pointer to buffer containing data
3175 * @buf_size: Length of buffer.
3176 * @buf_type: Buffer type (Unused).
3177 *
3178 * This is the client interface for enqueueing data into the queue.
3179 * Returns number of bytes enqueued or < 0 on error.
3180 */
3181ssize_t vmci_qpair_enqueue(struct vmci_qp *qpair,
3182 const void *buf,
3183 size_t buf_size,
3184 int buf_type)
3185{
3186 ssize_t result;
3187
3188 if (!qpair || !buf)
3189 return VMCI_ERROR_INVALID_ARGS;
3190
3191 qp_lock(qpair);
3192
3193 do {
3194 result = qp_enqueue_locked(qpair->produce_q,
3195 qpair->consume_q,
3196 qpair->produce_q_size,
3197 buf, buf_size,
3198 qp_memcpy_to_queue,
3199 vmci_can_block(qpair->flags));
3200
3201 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3202 !qp_wait_for_ready_queue(qpair))
3203 result = VMCI_ERROR_WOULD_BLOCK;
3204
3205 } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3206
3207 qp_unlock(qpair);
3208
3209 return result;
3210}
3211EXPORT_SYMBOL_GPL(vmci_qpair_enqueue);
3212
3213/*
3214 * vmci_qpair_dequeue() - Get data from the queue.
3215 * @qpair: Pointer to the queue pair struct.
3216 * @buf: Pointer to buffer for the data
3217 * @buf_size: Length of buffer.
3218 * @buf_type: Buffer type (Unused).
3219 *
3220 * This is the client interface for dequeueing data from the queue.
3221 * Returns number of bytes dequeued or < 0 on error.
3222 */
3223ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair,
3224 void *buf,
3225 size_t buf_size,
3226 int buf_type)
3227{
3228 ssize_t result;
3229
3230 if (!qpair || !buf)
3231 return VMCI_ERROR_INVALID_ARGS;
3232
3233 qp_lock(qpair);
3234
3235 do {
3236 result = qp_dequeue_locked(qpair->produce_q,
3237 qpair->consume_q,
3238 qpair->consume_q_size,
3239 buf, buf_size,
3240 qp_memcpy_from_queue, true,
3241 vmci_can_block(qpair->flags));
3242
3243 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3244 !qp_wait_for_ready_queue(qpair))
3245 result = VMCI_ERROR_WOULD_BLOCK;
3246
3247 } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3248
3249 qp_unlock(qpair);
3250
3251 return result;
3252}
3253EXPORT_SYMBOL_GPL(vmci_qpair_dequeue);
3254
3255/*
3256 * vmci_qpair_peek() - Peek at the data in the queue.
3257 * @qpair: Pointer to the queue pair struct.
3258 * @buf: Pointer to buffer for the data
3259 * @buf_size: Length of buffer.
3260 * @buf_type: Buffer type (Unused on Linux).
3261 *
3262 * This is the client interface for peeking into a queue. (I.e.,
3263 * copy data from the queue without updating the head pointer.)
3264 * Returns number of bytes dequeued or < 0 on error.
3265 */
3266ssize_t vmci_qpair_peek(struct vmci_qp *qpair,
3267 void *buf,
3268 size_t buf_size,
3269 int buf_type)
3270{
3271 ssize_t result;
3272
3273 if (!qpair || !buf)
3274 return VMCI_ERROR_INVALID_ARGS;
3275
3276 qp_lock(qpair);
3277
3278 do {
3279 result = qp_dequeue_locked(qpair->produce_q,
3280 qpair->consume_q,
3281 qpair->consume_q_size,
3282 buf, buf_size,
3283 qp_memcpy_from_queue, false,
3284 vmci_can_block(qpair->flags));
3285
3286 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3287 !qp_wait_for_ready_queue(qpair))
3288 result = VMCI_ERROR_WOULD_BLOCK;
3289
3290 } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3291
3292 qp_unlock(qpair);
3293
3294 return result;
3295}
3296EXPORT_SYMBOL_GPL(vmci_qpair_peek);
3297
3298/*
3299 * vmci_qpair_enquev() - Throw data on the queue using iov.
3300 * @qpair: Pointer to the queue pair struct.
3301 * @iov: Pointer to buffer containing data
3302 * @iov_size: Length of buffer.
3303 * @buf_type: Buffer type (Unused).
3304 *
3305 * This is the client interface for enqueueing data into the queue.
3306 * This function uses IO vectors to handle the work. Returns number
3307 * of bytes enqueued or < 0 on error.
3308 */
3309ssize_t vmci_qpair_enquev(struct vmci_qp *qpair,
3310 void *iov,
3311 size_t iov_size,
3312 int buf_type)
3313{
3314 ssize_t result;
3315
3316 if (!qpair || !iov)
3317 return VMCI_ERROR_INVALID_ARGS;
3318
3319 qp_lock(qpair);
3320
3321 do {
3322 result = qp_enqueue_locked(qpair->produce_q,
3323 qpair->consume_q,
3324 qpair->produce_q_size,
3325 iov, iov_size,
3326 qp_memcpy_to_queue_iov,
3327 vmci_can_block(qpair->flags));
3328
3329 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3330 !qp_wait_for_ready_queue(qpair))
3331 result = VMCI_ERROR_WOULD_BLOCK;
3332
3333 } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3334
3335 qp_unlock(qpair);
3336
3337 return result;
3338}
3339EXPORT_SYMBOL_GPL(vmci_qpair_enquev);
3340
3341/*
3342 * vmci_qpair_dequev() - Get data from the queue using iov.
3343 * @qpair: Pointer to the queue pair struct.
3344 * @iov: Pointer to buffer for the data
3345 * @iov_size: Length of buffer.
3346 * @buf_type: Buffer type (Unused).
3347 *
3348 * This is the client interface for dequeueing data from the queue.
3349 * This function uses IO vectors to handle the work. Returns number
3350 * of bytes dequeued or < 0 on error.
3351 */
3352ssize_t vmci_qpair_dequev(struct vmci_qp *qpair,
3353 void *iov,
3354 size_t iov_size,
3355 int buf_type)
3356{
3357 ssize_t result;
3358
3359 if (!qpair || !iov)
3360 return VMCI_ERROR_INVALID_ARGS;
3361
3362 qp_lock(qpair);
3363
3364 do {
3365 result = qp_dequeue_locked(qpair->produce_q,
3366 qpair->consume_q,
3367 qpair->consume_q_size,
3368 iov, iov_size,
3369 qp_memcpy_from_queue_iov,
3370 true, vmci_can_block(qpair->flags));
3371
3372 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3373 !qp_wait_for_ready_queue(qpair))
3374 result = VMCI_ERROR_WOULD_BLOCK;
3375
3376 } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3377
3378 qp_unlock(qpair);
3379
3380 return result;
3381}
3382EXPORT_SYMBOL_GPL(vmci_qpair_dequev);
3383
3384/*
3385 * vmci_qpair_peekv() - Peek at the data in the queue using iov.
3386 * @qpair: Pointer to the queue pair struct.
3387 * @iov: Pointer to buffer for the data
3388 * @iov_size: Length of buffer.
3389 * @buf_type: Buffer type (Unused on Linux).
3390 *
3391 * This is the client interface for peeking into a queue. (I.e.,
3392 * copy data from the queue without updating the head pointer.)
3393 * This function uses IO vectors to handle the work. Returns number
3394 * of bytes peeked or < 0 on error.
3395 */
3396ssize_t vmci_qpair_peekv(struct vmci_qp *qpair,
3397 void *iov,
3398 size_t iov_size,
3399 int buf_type)
3400{
3401 ssize_t result;
3402
3403 if (!qpair || !iov)
3404 return VMCI_ERROR_INVALID_ARGS;
3405
3406 qp_lock(qpair);
3407
3408 do {
3409 result = qp_dequeue_locked(qpair->produce_q,
3410 qpair->consume_q,
3411 qpair->consume_q_size,
3412 iov, iov_size,
3413 qp_memcpy_from_queue_iov,
3414 false, vmci_can_block(qpair->flags));
3415
3416 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3417 !qp_wait_for_ready_queue(qpair))
3418 result = VMCI_ERROR_WOULD_BLOCK;
3419
3420 } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3421
3422 qp_unlock(qpair);
3423 return result;
3424}
3425EXPORT_SYMBOL_GPL(vmci_qpair_peekv);
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.h b/drivers/misc/vmw_vmci/vmci_queue_pair.h
new file mode 100644
index 000000000000..58c6959f6b6d
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.h
@@ -0,0 +1,191 @@
1/*
2 * VMware VMCI Driver
3 *
4 * Copyright (C) 2012 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16#ifndef _VMCI_QUEUE_PAIR_H_
17#define _VMCI_QUEUE_PAIR_H_
18
19#include <linux/vmw_vmci_defs.h>
20#include <linux/types.h>
21
22#include "vmci_context.h"
23
24/* Callback needed for correctly waiting on events. */
25typedef int (*vmci_event_release_cb) (void *client_data);
26
27/* Guest device port I/O. */
28struct ppn_set {
29 u64 num_produce_pages;
30 u64 num_consume_pages;
31 u32 *produce_ppns;
32 u32 *consume_ppns;
33 bool initialized;
34};
35
36/* VMCIqueue_pairAllocInfo */
37struct vmci_qp_alloc_info {
38 struct vmci_handle handle;
39 u32 peer;
40 u32 flags;
41 u64 produce_size;
42 u64 consume_size;
43 u64 ppn_va; /* Start VA of queue pair PPNs. */
44 u64 num_ppns;
45 s32 result;
46 u32 version;
47};
48
49/* VMCIqueue_pairSetVAInfo */
50struct vmci_qp_set_va_info {
51 struct vmci_handle handle;
52 u64 va; /* Start VA of queue pair PPNs. */
53 u64 num_ppns;
54 u32 version;
55 s32 result;
56};
57
58/*
59 * For backwards compatibility, here is a version of the
60 * VMCIqueue_pairPageFileInfo before host support end-points was added.
61 * Note that the current version of that structure requires VMX to
62 * pass down the VA of the mapped file. Before host support was added
63 * there was nothing of the sort. So, when the driver sees the ioctl
64 * with a parameter that is the sizeof
65 * VMCIqueue_pairPageFileInfo_NoHostQP then it can infer that the version
66 * of VMX running can't attach to host end points because it doesn't
67 * provide the VA of the mapped files.
68 *
69 * The Linux driver doesn't get an indication of the size of the
70 * structure passed down from user space. So, to fix a long standing
71 * but unfiled bug, the _pad field has been renamed to version.
72 * Existing versions of VMX always initialize the PageFileInfo
73 * structure so that _pad, er, version is set to 0.
74 *
75 * A version value of 1 indicates that the size of the structure has
76 * been increased to include two UVA's: produce_uva and consume_uva.
77 * These UVA's are of the mmap()'d queue contents backing files.
78 *
79 * In addition, if when VMX is sending down the
80 * VMCIqueue_pairPageFileInfo structure it gets an error then it will
81 * try again with the _NoHostQP version of the file to see if an older
82 * VMCI kernel module is running.
83 */
84
85/* VMCIqueue_pairPageFileInfo */
86struct vmci_qp_page_file_info {
87 struct vmci_handle handle;
88 u64 produce_page_file; /* User VA. */
89 u64 consume_page_file; /* User VA. */
90 u64 produce_page_file_size; /* Size of the file name array. */
91 u64 consume_page_file_size; /* Size of the file name array. */
92 s32 result;
93 u32 version; /* Was _pad. */
94 u64 produce_va; /* User VA of the mapped file. */
95 u64 consume_va; /* User VA of the mapped file. */
96};
97
98/* vmci queuepair detach info */
99struct vmci_qp_dtch_info {
100 struct vmci_handle handle;
101 s32 result;
102 u32 _pad;
103};
104
105/*
106 * struct vmci_qp_page_store describes how the memory of a given queue pair
107 * is backed. When the queue pair is between the host and a guest, the
108 * page store consists of references to the guest pages. On vmkernel,
109 * this is a list of PPNs, and on hosted, it is a user VA where the
110 * queue pair is mapped into the VMX address space.
111 */
112struct vmci_qp_page_store {
113 /* Reference to pages backing the queue pair. */
114 u64 pages;
115 /* Length of pageList/virtual addres range (in pages). */
116 u32 len;
117};
118
119/*
120 * This data type contains the information about a queue.
121 * There are two queues (hence, queue pairs) per transaction model between a
122 * pair of end points, A & B. One queue is used by end point A to transmit
123 * commands and responses to B. The other queue is used by B to transmit
124 * commands and responses.
125 *
126 * struct vmci_queue_kern_if is a per-OS defined Queue structure. It contains
127 * either a direct pointer to the linear address of the buffer contents or a
128 * pointer to structures which help the OS locate those data pages. See
129 * vmciKernelIf.c for each platform for its definition.
130 */
131struct vmci_queue {
132 struct vmci_queue_header *q_header;
133 struct vmci_queue_header *saved_header;
134 struct vmci_queue_kern_if *kernel_if;
135};
136
137/*
138 * Utility function that checks whether the fields of the page
139 * store contain valid values.
140 * Result:
141 * true if the page store is wellformed. false otherwise.
142 */
143static inline bool
144VMCI_QP_PAGESTORE_IS_WELLFORMED(struct vmci_qp_page_store *page_store)
145{
146 return page_store->len >= 2;
147}
148
149/*
150 * Helper function to check if the non-blocking flag
151 * is set for a given queue pair.
152 */
153static inline bool vmci_can_block(u32 flags)
154{
155 return !(flags & VMCI_QPFLAG_NONBLOCK);
156}
157
158/*
159 * Helper function to check if the queue pair is pinned
160 * into memory.
161 */
162static inline bool vmci_qp_pinned(u32 flags)
163{
164 return flags & VMCI_QPFLAG_PINNED;
165}
166
167void vmci_qp_broker_exit(void);
168int vmci_qp_broker_alloc(struct vmci_handle handle, u32 peer,
169 u32 flags, u32 priv_flags,
170 u64 produce_size, u64 consume_size,
171 struct vmci_qp_page_store *page_store,
172 struct vmci_ctx *context);
173int vmci_qp_broker_set_page_store(struct vmci_handle handle,
174 u64 produce_uva, u64 consume_uva,
175 struct vmci_ctx *context);
176int vmci_qp_broker_detach(struct vmci_handle handle, struct vmci_ctx *context);
177
178void vmci_qp_guest_endpoints_exit(void);
179
180int vmci_qp_alloc(struct vmci_handle *handle,
181 struct vmci_queue **produce_q, u64 produce_size,
182 struct vmci_queue **consume_q, u64 consume_size,
183 u32 peer, u32 flags, u32 priv_flags,
184 bool guest_endpoint, vmci_event_release_cb wakeup_cb,
185 void *client_data);
186int vmci_qp_broker_map(struct vmci_handle handle,
187 struct vmci_ctx *context, u64 guest_mem);
188int vmci_qp_broker_unmap(struct vmci_handle handle,
189 struct vmci_ctx *context, u32 gid);
190
191#endif /* _VMCI_QUEUE_PAIR_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_resource.c b/drivers/misc/vmw_vmci/vmci_resource.c
new file mode 100644
index 000000000000..a196f84a4fd2
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_resource.c
@@ -0,0 +1,229 @@
1/*
2 * VMware VMCI Driver
3 *
4 * Copyright (C) 2012 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16#include <linux/vmw_vmci_defs.h>
17#include <linux/hash.h>
18#include <linux/types.h>
19#include <linux/rculist.h>
20
21#include "vmci_resource.h"
22#include "vmci_driver.h"
23
24
25#define VMCI_RESOURCE_HASH_BITS 7
26#define VMCI_RESOURCE_HASH_BUCKETS (1 << VMCI_RESOURCE_HASH_BITS)
27
28struct vmci_hash_table {
29 spinlock_t lock;
30 struct hlist_head entries[VMCI_RESOURCE_HASH_BUCKETS];
31};
32
33static struct vmci_hash_table vmci_resource_table = {
34 .lock = __SPIN_LOCK_UNLOCKED(vmci_resource_table.lock),
35};
36
37static unsigned int vmci_resource_hash(struct vmci_handle handle)
38{
39 return hash_32(handle.resource, VMCI_RESOURCE_HASH_BITS);
40}
41
42/*
43 * Gets a resource (if one exists) matching given handle from the hash table.
44 */
45static struct vmci_resource *vmci_resource_lookup(struct vmci_handle handle,
46 enum vmci_resource_type type)
47{
48 struct vmci_resource *r, *resource = NULL;
49 struct hlist_node *node;
50 unsigned int idx = vmci_resource_hash(handle);
51
52 rcu_read_lock();
53 hlist_for_each_entry_rcu(r, node,
54 &vmci_resource_table.entries[idx], node) {
55 u32 cid = r->handle.context;
56 u32 rid = r->handle.resource;
57
58 if (r->type == type &&
59 rid == handle.resource &&
60 (cid == handle.context || cid == VMCI_INVALID_ID)) {
61 resource = r;
62 break;
63 }
64 }
65 rcu_read_unlock();
66
67 return resource;
68}
69
70/*
71 * Find an unused resource ID and return it. The first
72 * VMCI_RESERVED_RESOURCE_ID_MAX are reserved so we start from
73 * its value + 1.
74 * Returns VMCI resource id on success, VMCI_INVALID_ID on failure.
75 */
76static u32 vmci_resource_find_id(u32 context_id,
77 enum vmci_resource_type resource_type)
78{
79 static u32 resource_id = VMCI_RESERVED_RESOURCE_ID_MAX + 1;
80 u32 old_rid = resource_id;
81 u32 current_rid;
82
83 /*
84 * Generate a unique resource ID. Keep on trying until we wrap around
85 * in the RID space.
86 */
87 do {
88 struct vmci_handle handle;
89
90 current_rid = resource_id;
91 resource_id++;
92 if (unlikely(resource_id == VMCI_INVALID_ID)) {
93 /* Skip the reserved rids. */
94 resource_id = VMCI_RESERVED_RESOURCE_ID_MAX + 1;
95 }
96
97 handle = vmci_make_handle(context_id, current_rid);
98 if (!vmci_resource_lookup(handle, resource_type))
99 return current_rid;
100 } while (resource_id != old_rid);
101
102 return VMCI_INVALID_ID;
103}
104
105
106int vmci_resource_add(struct vmci_resource *resource,
107 enum vmci_resource_type resource_type,
108 struct vmci_handle handle)
109
110{
111 unsigned int idx;
112 int result;
113
114 spin_lock(&vmci_resource_table.lock);
115
116 if (handle.resource == VMCI_INVALID_ID) {
117 handle.resource = vmci_resource_find_id(handle.context,
118 resource_type);
119 if (handle.resource == VMCI_INVALID_ID) {
120 result = VMCI_ERROR_NO_HANDLE;
121 goto out;
122 }
123 } else if (vmci_resource_lookup(handle, resource_type)) {
124 result = VMCI_ERROR_ALREADY_EXISTS;
125 goto out;
126 }
127
128 resource->handle = handle;
129 resource->type = resource_type;
130 INIT_HLIST_NODE(&resource->node);
131 kref_init(&resource->kref);
132 init_completion(&resource->done);
133
134 idx = vmci_resource_hash(resource->handle);
135 hlist_add_head_rcu(&resource->node, &vmci_resource_table.entries[idx]);
136
137 result = VMCI_SUCCESS;
138
139out:
140 spin_unlock(&vmci_resource_table.lock);
141 return result;
142}
143
144void vmci_resource_remove(struct vmci_resource *resource)
145{
146 struct vmci_handle handle = resource->handle;
147 unsigned int idx = vmci_resource_hash(handle);
148 struct vmci_resource *r;
149 struct hlist_node *node;
150
151 /* Remove resource from hash table. */
152 spin_lock(&vmci_resource_table.lock);
153
154 hlist_for_each_entry(r, node, &vmci_resource_table.entries[idx], node) {
155 if (vmci_handle_is_equal(r->handle, resource->handle)) {
156 hlist_del_init_rcu(&r->node);
157 break;
158 }
159 }
160
161 spin_unlock(&vmci_resource_table.lock);
162 synchronize_rcu();
163
164 vmci_resource_put(resource);
165 wait_for_completion(&resource->done);
166}
167
168struct vmci_resource *
169vmci_resource_by_handle(struct vmci_handle resource_handle,
170 enum vmci_resource_type resource_type)
171{
172 struct vmci_resource *r, *resource = NULL;
173
174 rcu_read_lock();
175
176 r = vmci_resource_lookup(resource_handle, resource_type);
177 if (r &&
178 (resource_type == r->type ||
179 resource_type == VMCI_RESOURCE_TYPE_ANY)) {
180 resource = vmci_resource_get(r);
181 }
182
183 rcu_read_unlock();
184
185 return resource;
186}
187
188/*
189 * Get a reference to given resource.
190 */
191struct vmci_resource *vmci_resource_get(struct vmci_resource *resource)
192{
193 kref_get(&resource->kref);
194
195 return resource;
196}
197
198static void vmci_release_resource(struct kref *kref)
199{
200 struct vmci_resource *resource =
201 container_of(kref, struct vmci_resource, kref);
202
203 /* Verify the resource has been unlinked from hash table */
204 WARN_ON(!hlist_unhashed(&resource->node));
205
206 /* Signal that container of this resource can now be destroyed */
207 complete(&resource->done);
208}
209
210/*
211 * Resource's release function will get called if last reference.
212 * If it is the last reference, then we are sure that nobody else
213 * can increment the count again (it's gone from the resource hash
214 * table), so there's no need for locking here.
215 */
216int vmci_resource_put(struct vmci_resource *resource)
217{
218 /*
219 * We propagate the information back to caller in case it wants to know
220 * whether entry was freed.
221 */
222 return kref_put(&resource->kref, vmci_release_resource) ?
223 VMCI_SUCCESS_ENTRY_DEAD : VMCI_SUCCESS;
224}
225
226struct vmci_handle vmci_resource_handle(struct vmci_resource *resource)
227{
228 return resource->handle;
229}
diff --git a/drivers/misc/vmw_vmci/vmci_resource.h b/drivers/misc/vmw_vmci/vmci_resource.h
new file mode 100644
index 000000000000..9190cd298bee
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_resource.h
@@ -0,0 +1,59 @@
1/*
2 * VMware VMCI Driver
3 *
4 * Copyright (C) 2012 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16#ifndef _VMCI_RESOURCE_H_
17#define _VMCI_RESOURCE_H_
18
19#include <linux/vmw_vmci_defs.h>
20#include <linux/types.h>
21
22#include "vmci_context.h"
23
24
25enum vmci_resource_type {
26 VMCI_RESOURCE_TYPE_ANY,
27 VMCI_RESOURCE_TYPE_API,
28 VMCI_RESOURCE_TYPE_GROUP,
29 VMCI_RESOURCE_TYPE_DATAGRAM,
30 VMCI_RESOURCE_TYPE_DOORBELL,
31 VMCI_RESOURCE_TYPE_QPAIR_GUEST,
32 VMCI_RESOURCE_TYPE_QPAIR_HOST
33};
34
35struct vmci_resource {
36 struct vmci_handle handle;
37 enum vmci_resource_type type;
38 struct hlist_node node;
39 struct kref kref;
40 struct completion done;
41};
42
43
44int vmci_resource_add(struct vmci_resource *resource,
45 enum vmci_resource_type resource_type,
46 struct vmci_handle handle);
47
48void vmci_resource_remove(struct vmci_resource *resource);
49
50struct vmci_resource *
51vmci_resource_by_handle(struct vmci_handle resource_handle,
52 enum vmci_resource_type resource_type);
53
54struct vmci_resource *vmci_resource_get(struct vmci_resource *resource);
55int vmci_resource_put(struct vmci_resource *resource);
56
57struct vmci_handle vmci_resource_handle(struct vmci_resource *resource);
58
59#endif /* _VMCI_RESOURCE_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_route.c b/drivers/misc/vmw_vmci/vmci_route.c
new file mode 100644
index 000000000000..91090658b929
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_route.c
@@ -0,0 +1,226 @@
1/*
2 * VMware VMCI Driver
3 *
4 * Copyright (C) 2012 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16#include <linux/vmw_vmci_defs.h>
17#include <linux/vmw_vmci_api.h>
18
19#include "vmci_context.h"
20#include "vmci_driver.h"
21#include "vmci_route.h"
22
23/*
24 * Make a routing decision for the given source and destination handles.
25 * This will try to determine the route using the handles and the available
26 * devices. Will set the source context if it is invalid.
27 */
28int vmci_route(struct vmci_handle *src,
29 const struct vmci_handle *dst,
30 bool from_guest,
31 enum vmci_route *route)
32{
33 bool has_host_device = vmci_host_code_active();
34 bool has_guest_device = vmci_guest_code_active();
35
36 *route = VMCI_ROUTE_NONE;
37
38 /*
39 * "from_guest" is only ever set to true by
40 * IOCTL_VMCI_DATAGRAM_SEND (or by the vmkernel equivalent),
41 * which comes from the VMX, so we know it is coming from a
42 * guest.
43 *
44 * To avoid inconsistencies, test these once. We will test
45 * them again when we do the actual send to ensure that we do
46 * not touch a non-existent device.
47 */
48
49 /* Must have a valid destination context. */
50 if (VMCI_INVALID_ID == dst->context)
51 return VMCI_ERROR_INVALID_ARGS;
52
53 /* Anywhere to hypervisor. */
54 if (VMCI_HYPERVISOR_CONTEXT_ID == dst->context) {
55
56 /*
57 * If this message already came from a guest then we
58 * cannot send it to the hypervisor. It must come
59 * from a local client.
60 */
61 if (from_guest)
62 return VMCI_ERROR_DST_UNREACHABLE;
63
64 /*
65 * We must be acting as a guest in order to send to
66 * the hypervisor.
67 */
68 if (!has_guest_device)
69 return VMCI_ERROR_DEVICE_NOT_FOUND;
70
71 /* And we cannot send if the source is the host context. */
72 if (VMCI_HOST_CONTEXT_ID == src->context)
73 return VMCI_ERROR_INVALID_ARGS;
74
75 /*
76 * If the client passed the ANON source handle then
77 * respect it (both context and resource are invalid).
78 * However, if they passed only an invalid context,
79 * then they probably mean ANY, in which case we
80 * should set the real context here before passing it
81 * down.
82 */
83 if (VMCI_INVALID_ID == src->context &&
84 VMCI_INVALID_ID != src->resource)
85 src->context = vmci_get_context_id();
86
87 /* Send from local client down to the hypervisor. */
88 *route = VMCI_ROUTE_AS_GUEST;
89 return VMCI_SUCCESS;
90 }
91
92 /* Anywhere to local client on host. */
93 if (VMCI_HOST_CONTEXT_ID == dst->context) {
94 /*
95 * If it is not from a guest but we are acting as a
96 * guest, then we need to send it down to the host.
97 * Note that if we are also acting as a host then this
98 * will prevent us from sending from local client to
99 * local client, but we accept that restriction as a
100 * way to remove any ambiguity from the host context.
101 */
102 if (src->context == VMCI_HYPERVISOR_CONTEXT_ID) {
103 /*
104 * If the hypervisor is the source, this is
105 * host local communication. The hypervisor
106 * may send vmci event datagrams to the host
107 * itself, but it will never send datagrams to
108 * an "outer host" through the guest device.
109 */
110
111 if (has_host_device) {
112 *route = VMCI_ROUTE_AS_HOST;
113 return VMCI_SUCCESS;
114 } else {
115 return VMCI_ERROR_DEVICE_NOT_FOUND;
116 }
117 }
118
119 if (!from_guest && has_guest_device) {
120 /* If no source context then use the current. */
121 if (VMCI_INVALID_ID == src->context)
122 src->context = vmci_get_context_id();
123
124 /* Send it from local client down to the host. */
125 *route = VMCI_ROUTE_AS_GUEST;
126 return VMCI_SUCCESS;
127 }
128
129 /*
130 * Otherwise we already received it from a guest and
131 * it is destined for a local client on this host, or
132 * it is from another local client on this host. We
133 * must be acting as a host to service it.
134 */
135 if (!has_host_device)
136 return VMCI_ERROR_DEVICE_NOT_FOUND;
137
138 if (VMCI_INVALID_ID == src->context) {
139 /*
140 * If it came from a guest then it must have a
141 * valid context. Otherwise we can use the
142 * host context.
143 */
144 if (from_guest)
145 return VMCI_ERROR_INVALID_ARGS;
146
147 src->context = VMCI_HOST_CONTEXT_ID;
148 }
149
150 /* Route to local client. */
151 *route = VMCI_ROUTE_AS_HOST;
152 return VMCI_SUCCESS;
153 }
154
155 /*
156 * If we are acting as a host then this might be destined for
157 * a guest.
158 */
159 if (has_host_device) {
160 /* It will have a context if it is meant for a guest. */
161 if (vmci_ctx_exists(dst->context)) {
162 if (VMCI_INVALID_ID == src->context) {
163 /*
164 * If it came from a guest then it
165 * must have a valid context.
166 * Otherwise we can use the host
167 * context.
168 */
169
170 if (from_guest)
171 return VMCI_ERROR_INVALID_ARGS;
172
173 src->context = VMCI_HOST_CONTEXT_ID;
174 } else if (VMCI_CONTEXT_IS_VM(src->context) &&
175 src->context != dst->context) {
176 /*
177 * VM to VM communication is not
178 * allowed. Since we catch all
179 * communication destined for the host
180 * above, this must be destined for a
181 * VM since there is a valid context.
182 */
183
184 return VMCI_ERROR_DST_UNREACHABLE;
185 }
186
187 /* Pass it up to the guest. */
188 *route = VMCI_ROUTE_AS_HOST;
189 return VMCI_SUCCESS;
190 } else if (!has_guest_device) {
191 /*
192 * The host is attempting to reach a CID
193 * without an active context, and we can't
194 * send it down, since we have no guest
195 * device.
196 */
197
198 return VMCI_ERROR_DST_UNREACHABLE;
199 }
200 }
201
202 /*
203 * We must be a guest trying to send to another guest, which means
204 * we need to send it down to the host. We do not filter out VM to
205 * VM communication here, since we want to be able to use the guest
206 * driver on older versions that do support VM to VM communication.
207 */
208 if (!has_guest_device) {
209 /*
210 * Ending up here means we have neither guest nor host
211 * device.
212 */
213 return VMCI_ERROR_DEVICE_NOT_FOUND;
214 }
215
216 /* If no source context then use the current context. */
217 if (VMCI_INVALID_ID == src->context)
218 src->context = vmci_get_context_id();
219
220 /*
221 * Send it from local client down to the host, which will
222 * route it to the other guest for us.
223 */
224 *route = VMCI_ROUTE_AS_GUEST;
225 return VMCI_SUCCESS;
226}
diff --git a/drivers/misc/vmw_vmci/vmci_route.h b/drivers/misc/vmw_vmci/vmci_route.h
new file mode 100644
index 000000000000..3b30e82419c3
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_route.h
@@ -0,0 +1,30 @@
1/*
2 * VMware VMCI Driver
3 *
4 * Copyright (C) 2012 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16#ifndef _VMCI_ROUTE_H_
17#define _VMCI_ROUTE_H_
18
19#include <linux/vmw_vmci_defs.h>
20
21enum vmci_route {
22 VMCI_ROUTE_NONE,
23 VMCI_ROUTE_AS_HOST,
24 VMCI_ROUTE_AS_GUEST,
25};
26
27int vmci_route(struct vmci_handle *src, const struct vmci_handle *dst,
28 bool from_guest, enum vmci_route *route);
29
30#endif /* _VMCI_ROUTE_H_ */