aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Vrabel <david.vrabel@citrix.com>2015-11-26 11:14:35 -0500
committerDavid Vrabel <david.vrabel@citrix.com>2015-11-26 13:49:54 -0500
commit8620015499101090ae275bf11e9bc2f9febfdf08 (patch)
tree4a85edc5fa4e9b26f285e81d8a062bb10a37685d
parentb4ff8389ed14b849354b59ce9b360bdefcdbf99c (diff)
xen/evtchn: dynamically grow pending event channel ring
If more than 1024 event channels are bound to a evtchn device then it possible (even with well behaved applications) for the ring to overflow and events to be lost (reported as an -EFBIG error). Dynamically increase the size of the ring so there is always enough space for all bound events. Well behaved applicables that only unmask events after draining them from the ring can thus no longer lose events. However, an application could unmask an event before draining it, allowing multiple entries per port to accumulate in the ring, and a overflow could still occur. So the overflow detection and reporting is retained. The ring size is initially only 64 entries so the common use case of an application only binding a few events will use less memory than before. The ring size may grow to 512 KiB (enough for all 2^17 possible channels). This order 7 kmalloc() may fail due to memory fragmentation, so we fall back to trying vmalloc(). Signed-off-by: David Vrabel <david.vrabel@citrix.com> Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-rw-r--r--drivers/xen/evtchn.c123
1 files changed, 107 insertions, 16 deletions
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 00f40f051d95..38272ad24551 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -49,6 +49,8 @@
49#include <linux/init.h> 49#include <linux/init.h>
50#include <linux/mutex.h> 50#include <linux/mutex.h>
51#include <linux/cpu.h> 51#include <linux/cpu.h>
52#include <linux/mm.h>
53#include <linux/vmalloc.h>
52 54
53#include <xen/xen.h> 55#include <xen/xen.h>
54#include <xen/events.h> 56#include <xen/events.h>
@@ -58,10 +60,10 @@
58struct per_user_data { 60struct per_user_data {
59 struct mutex bind_mutex; /* serialize bind/unbind operations */ 61 struct mutex bind_mutex; /* serialize bind/unbind operations */
60 struct rb_root evtchns; 62 struct rb_root evtchns;
63 unsigned int nr_evtchns;
61 64
62 /* Notification ring, accessed via /dev/xen/evtchn. */ 65 /* Notification ring, accessed via /dev/xen/evtchn. */
63#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) 66 unsigned int ring_size;
64#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
65 evtchn_port_t *ring; 67 evtchn_port_t *ring;
66 unsigned int ring_cons, ring_prod, ring_overflow; 68 unsigned int ring_cons, ring_prod, ring_overflow;
67 struct mutex ring_cons_mutex; /* protect against concurrent readers */ 69 struct mutex ring_cons_mutex; /* protect against concurrent readers */
@@ -80,10 +82,41 @@ struct user_evtchn {
80 bool enabled; 82 bool enabled;
81}; 83};
82 84
85static evtchn_port_t *evtchn_alloc_ring(unsigned int size)
86{
87 evtchn_port_t *ring;
88 size_t s = size * sizeof(*ring);
89
90 ring = kmalloc(s, GFP_KERNEL);
91 if (!ring)
92 ring = vmalloc(s);
93
94 return ring;
95}
96
97static void evtchn_free_ring(evtchn_port_t *ring)
98{
99 kvfree(ring);
100}
101
102static unsigned int evtchn_ring_offset(struct per_user_data *u,
103 unsigned int idx)
104{
105 return idx & (u->ring_size - 1);
106}
107
108static evtchn_port_t *evtchn_ring_entry(struct per_user_data *u,
109 unsigned int idx)
110{
111 return u->ring + evtchn_ring_offset(u, idx);
112}
113
83static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) 114static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
84{ 115{
85 struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL; 116 struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL;
86 117
118 u->nr_evtchns++;
119
87 while (*new) { 120 while (*new) {
88 struct user_evtchn *this; 121 struct user_evtchn *this;
89 122
@@ -107,6 +140,7 @@ static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
107 140
108static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) 141static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
109{ 142{
143 u->nr_evtchns--;
110 rb_erase(&evtchn->node, &u->evtchns); 144 rb_erase(&evtchn->node, &u->evtchns);
111 kfree(evtchn); 145 kfree(evtchn);
112} 146}
@@ -144,8 +178,8 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
144 178
145 spin_lock(&u->ring_prod_lock); 179 spin_lock(&u->ring_prod_lock);
146 180
147 if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { 181 if ((u->ring_prod - u->ring_cons) < u->ring_size) {
148 u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port; 182 *evtchn_ring_entry(u, u->ring_prod) = evtchn->port;
149 wmb(); /* Ensure ring contents visible */ 183 wmb(); /* Ensure ring contents visible */
150 if (u->ring_cons == u->ring_prod++) { 184 if (u->ring_cons == u->ring_prod++) {
151 wake_up_interruptible(&u->evtchn_wait); 185 wake_up_interruptible(&u->evtchn_wait);
@@ -200,10 +234,10 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
200 } 234 }
201 235
202 /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ 236 /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
203 if (((c ^ p) & EVTCHN_RING_SIZE) != 0) { 237 if (((c ^ p) & u->ring_size) != 0) {
204 bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * 238 bytes1 = (u->ring_size - evtchn_ring_offset(u, c)) *
205 sizeof(evtchn_port_t); 239 sizeof(evtchn_port_t);
206 bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t); 240 bytes2 = evtchn_ring_offset(u, p) * sizeof(evtchn_port_t);
207 } else { 241 } else {
208 bytes1 = (p - c) * sizeof(evtchn_port_t); 242 bytes1 = (p - c) * sizeof(evtchn_port_t);
209 bytes2 = 0; 243 bytes2 = 0;
@@ -219,7 +253,7 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
219 253
220 rc = -EFAULT; 254 rc = -EFAULT;
221 rmb(); /* Ensure that we see the port before we copy it. */ 255 rmb(); /* Ensure that we see the port before we copy it. */
222 if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) || 256 if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) ||
223 ((bytes2 != 0) && 257 ((bytes2 != 0) &&
224 copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) 258 copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
225 goto unlock_out; 259 goto unlock_out;
@@ -278,6 +312,66 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
278 return rc; 312 return rc;
279} 313}
280 314
315static int evtchn_resize_ring(struct per_user_data *u)
316{
317 unsigned int new_size;
318 evtchn_port_t *new_ring, *old_ring;
319 unsigned int p, c;
320
321 /*
322 * Ensure the ring is large enough to capture all possible
323 * events. i.e., one free slot for each bound event.
324 */
325 if (u->nr_evtchns <= u->ring_size)
326 return 0;
327
328 if (u->ring_size == 0)
329 new_size = 64;
330 else
331 new_size = 2 * u->ring_size;
332
333 new_ring = evtchn_alloc_ring(new_size);
334 if (!new_ring)
335 return -ENOMEM;
336
337 old_ring = u->ring;
338
339 /*
340 * Access to the ring contents is serialized by either the
341 * prod /or/ cons lock so take both when resizing.
342 */
343 mutex_lock(&u->ring_cons_mutex);
344 spin_lock_irq(&u->ring_prod_lock);
345
346 /*
347 * Copy the old ring contents to the new ring.
348 *
349 * If the ring contents crosses the end of the current ring,
350 * it needs to be copied in two chunks.
351 *
352 * +---------+ +------------------+
353 * |34567 12| -> | 1234567 |
354 * +-----p-c-+ +------------------+
355 */
356 p = evtchn_ring_offset(u, u->ring_prod);
357 c = evtchn_ring_offset(u, u->ring_cons);
358 if (p < c) {
359 memcpy(new_ring + c, u->ring + c, (u->ring_size - c) * sizeof(*u->ring));
360 memcpy(new_ring + u->ring_size, u->ring, p * sizeof(*u->ring));
361 } else
362 memcpy(new_ring + c, u->ring + c, (p - c) * sizeof(*u->ring));
363
364 u->ring = new_ring;
365 u->ring_size = new_size;
366
367 spin_unlock_irq(&u->ring_prod_lock);
368 mutex_unlock(&u->ring_cons_mutex);
369
370 evtchn_free_ring(old_ring);
371
372 return 0;
373}
374
281static int evtchn_bind_to_user(struct per_user_data *u, int port) 375static int evtchn_bind_to_user(struct per_user_data *u, int port)
282{ 376{
283 struct user_evtchn *evtchn; 377 struct user_evtchn *evtchn;
@@ -305,6 +399,10 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port)
305 if (rc < 0) 399 if (rc < 0)
306 goto err; 400 goto err;
307 401
402 rc = evtchn_resize_ring(u);
403 if (rc < 0)
404 goto err;
405
308 rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0, 406 rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0,
309 u->name, evtchn); 407 u->name, evtchn);
310 if (rc < 0) 408 if (rc < 0)
@@ -503,13 +601,6 @@ static int evtchn_open(struct inode *inode, struct file *filp)
503 601
504 init_waitqueue_head(&u->evtchn_wait); 602 init_waitqueue_head(&u->evtchn_wait);
505 603
506 u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
507 if (u->ring == NULL) {
508 kfree(u->name);
509 kfree(u);
510 return -ENOMEM;
511 }
512
513 mutex_init(&u->bind_mutex); 604 mutex_init(&u->bind_mutex);
514 mutex_init(&u->ring_cons_mutex); 605 mutex_init(&u->ring_cons_mutex);
515 spin_lock_init(&u->ring_prod_lock); 606 spin_lock_init(&u->ring_prod_lock);
@@ -532,7 +623,7 @@ static int evtchn_release(struct inode *inode, struct file *filp)
532 evtchn_unbind_from_user(u, evtchn); 623 evtchn_unbind_from_user(u, evtchn);
533 } 624 }
534 625
535 free_page((unsigned long)u->ring); 626 evtchn_free_ring(u->ring);
536 kfree(u->name); 627 kfree(u->name);
537 kfree(u); 628 kfree(u);
538 629