diff options
author | David Vrabel <david.vrabel@citrix.com> | 2013-07-19 10:52:00 -0400 |
---|---|---|
committer | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2013-08-09 11:06:49 -0400 |
commit | 73cc4bb0c79eebe1f0e92b700d9fe8d1c9b061bb (patch) | |
tree | ca6e323a988c1ef325edea401744d907ad563d21 /drivers | |
parent | 65a45fa2f640f72757ef00f2bc83f9654d65a62b (diff) |
xen/evtchn: improve scalability by using per-user locks
The global array of port users and the port_user_lock limits
scalability of the evtchn device. Instead of the global array lookup,
use a per-use (per-fd) tree of event channels bound by that user and
protect the tree with a per-user lock.
This is also a prerequiste for extended the number of supported event
channels, by removing the fixed size, per-event channel array.
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/xen/evtchn.c | 192 |
1 files changed, 112 insertions, 80 deletions
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index b6165e047f48..f328f121c92d 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c | |||
@@ -57,6 +57,7 @@ | |||
57 | 57 | ||
58 | struct per_user_data { | 58 | struct per_user_data { |
59 | struct mutex bind_mutex; /* serialize bind/unbind operations */ | 59 | struct mutex bind_mutex; /* serialize bind/unbind operations */ |
60 | struct rb_root evtchns; | ||
60 | 61 | ||
61 | /* Notification ring, accessed via /dev/xen/evtchn. */ | 62 | /* Notification ring, accessed via /dev/xen/evtchn. */ |
62 | #define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) | 63 | #define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) |
@@ -64,6 +65,7 @@ struct per_user_data { | |||
64 | evtchn_port_t *ring; | 65 | evtchn_port_t *ring; |
65 | unsigned int ring_cons, ring_prod, ring_overflow; | 66 | unsigned int ring_cons, ring_prod, ring_overflow; |
66 | struct mutex ring_cons_mutex; /* protect against concurrent readers */ | 67 | struct mutex ring_cons_mutex; /* protect against concurrent readers */ |
68 | spinlock_t ring_prod_lock; /* product against concurrent interrupts */ | ||
67 | 69 | ||
68 | /* Processes wait on this queue when ring is empty. */ | 70 | /* Processes wait on this queue when ring is empty. */ |
69 | wait_queue_head_t evtchn_wait; | 71 | wait_queue_head_t evtchn_wait; |
@@ -71,54 +73,79 @@ struct per_user_data { | |||
71 | const char *name; | 73 | const char *name; |
72 | }; | 74 | }; |
73 | 75 | ||
74 | /* | 76 | struct user_evtchn { |
75 | * Who's bound to each port? This is logically an array of struct | 77 | struct rb_node node; |
76 | * per_user_data *, but we encode the current enabled-state in bit 0. | 78 | struct per_user_data *user; |
77 | */ | 79 | unsigned port; |
78 | static unsigned long *port_user; | 80 | bool enabled; |
79 | static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */ | 81 | }; |
80 | 82 | ||
81 | static inline struct per_user_data *get_port_user(unsigned port) | 83 | static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) |
82 | { | 84 | { |
83 | return (struct per_user_data *)(port_user[port] & ~1); | 85 | struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL; |
84 | } | ||
85 | 86 | ||
86 | static inline void set_port_user(unsigned port, struct per_user_data *u) | 87 | while (*new) { |
87 | { | 88 | struct user_evtchn *this; |
88 | port_user[port] = (unsigned long)u; | 89 | |
90 | this = container_of(*new, struct user_evtchn, node); | ||
91 | |||
92 | parent = *new; | ||
93 | if (this->port < evtchn->port) | ||
94 | new = &((*new)->rb_left); | ||
95 | else if (this->port > evtchn->port) | ||
96 | new = &((*new)->rb_right); | ||
97 | else | ||
98 | return -EEXIST; | ||
99 | } | ||
100 | |||
101 | /* Add new node and rebalance tree. */ | ||
102 | rb_link_node(&evtchn->node, parent, new); | ||
103 | rb_insert_color(&evtchn->node, &u->evtchns); | ||
104 | |||
105 | return 0; | ||
89 | } | 106 | } |
90 | 107 | ||
91 | static inline bool get_port_enabled(unsigned port) | 108 | static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) |
92 | { | 109 | { |
93 | return port_user[port] & 1; | 110 | rb_erase(&evtchn->node, &u->evtchns); |
111 | kfree(evtchn); | ||
94 | } | 112 | } |
95 | 113 | ||
96 | static inline void set_port_enabled(unsigned port, bool enabled) | 114 | static struct user_evtchn *find_evtchn(struct per_user_data *u, unsigned port) |
97 | { | 115 | { |
98 | if (enabled) | 116 | struct rb_node *node = u->evtchns.rb_node; |
99 | port_user[port] |= 1; | 117 | |
100 | else | 118 | while (node) { |
101 | port_user[port] &= ~1; | 119 | struct user_evtchn *evtchn; |
120 | |||
121 | evtchn = container_of(node, struct user_evtchn, node); | ||
122 | |||
123 | if (evtchn->port < port) | ||
124 | node = node->rb_left; | ||
125 | else if (evtchn->port > port) | ||
126 | node = node->rb_right; | ||
127 | else | ||
128 | return evtchn; | ||
129 | } | ||
130 | return NULL; | ||
102 | } | 131 | } |
103 | 132 | ||
104 | static irqreturn_t evtchn_interrupt(int irq, void *data) | 133 | static irqreturn_t evtchn_interrupt(int irq, void *data) |
105 | { | 134 | { |
106 | unsigned int port = (unsigned long)data; | 135 | struct user_evtchn *evtchn = data; |
107 | struct per_user_data *u; | 136 | struct per_user_data *u = evtchn->user; |
108 | |||
109 | spin_lock(&port_user_lock); | ||
110 | |||
111 | u = get_port_user(port); | ||
112 | 137 | ||
113 | WARN(!get_port_enabled(port), | 138 | WARN(!evtchn->enabled, |
114 | "Interrupt for port %d, but apparently not enabled; per-user %p\n", | 139 | "Interrupt for port %d, but apparently not enabled; per-user %p\n", |
115 | port, u); | 140 | evtchn->port, u); |
116 | 141 | ||
117 | disable_irq_nosync(irq); | 142 | disable_irq_nosync(irq); |
118 | set_port_enabled(port, false); | 143 | evtchn->enabled = false; |
144 | |||
145 | spin_lock(&u->ring_prod_lock); | ||
119 | 146 | ||
120 | if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { | 147 | if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { |
121 | u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port; | 148 | u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port; |
122 | wmb(); /* Ensure ring contents visible */ | 149 | wmb(); /* Ensure ring contents visible */ |
123 | if (u->ring_cons == u->ring_prod++) { | 150 | if (u->ring_cons == u->ring_prod++) { |
124 | wake_up_interruptible(&u->evtchn_wait); | 151 | wake_up_interruptible(&u->evtchn_wait); |
@@ -128,7 +155,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data) | |||
128 | } else | 155 | } else |
129 | u->ring_overflow = 1; | 156 | u->ring_overflow = 1; |
130 | 157 | ||
131 | spin_unlock(&port_user_lock); | 158 | spin_unlock(&u->ring_prod_lock); |
132 | 159 | ||
133 | return IRQ_HANDLED; | 160 | return IRQ_HANDLED; |
134 | } | 161 | } |
@@ -229,20 +256,20 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, | |||
229 | if (copy_from_user(kbuf, buf, count) != 0) | 256 | if (copy_from_user(kbuf, buf, count) != 0) |
230 | goto out; | 257 | goto out; |
231 | 258 | ||
232 | spin_lock_irq(&port_user_lock); | 259 | mutex_lock(&u->bind_mutex); |
233 | 260 | ||
234 | for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { | 261 | for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { |
235 | unsigned port = kbuf[i]; | 262 | unsigned port = kbuf[i]; |
263 | struct user_evtchn *evtchn; | ||
236 | 264 | ||
237 | if (port < NR_EVENT_CHANNELS && | 265 | evtchn = find_evtchn(u, port); |
238 | get_port_user(port) == u && | 266 | if (evtchn && !evtchn->enabled) { |
239 | !get_port_enabled(port)) { | 267 | evtchn->enabled = true; |
240 | set_port_enabled(port, true); | ||
241 | enable_irq(irq_from_evtchn(port)); | 268 | enable_irq(irq_from_evtchn(port)); |
242 | } | 269 | } |
243 | } | 270 | } |
244 | 271 | ||
245 | spin_unlock_irq(&port_user_lock); | 272 | mutex_unlock(&u->bind_mutex); |
246 | 273 | ||
247 | rc = count; | 274 | rc = count; |
248 | 275 | ||
@@ -253,6 +280,8 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, | |||
253 | 280 | ||
254 | static int evtchn_bind_to_user(struct per_user_data *u, int port) | 281 | static int evtchn_bind_to_user(struct per_user_data *u, int port) |
255 | { | 282 | { |
283 | struct user_evtchn *evtchn; | ||
284 | struct evtchn_close close; | ||
256 | int rc = 0; | 285 | int rc = 0; |
257 | 286 | ||
258 | /* | 287 | /* |
@@ -263,35 +292,47 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port) | |||
263 | * interrupt handler yet, and our caller has already | 292 | * interrupt handler yet, and our caller has already |
264 | * serialized bind operations.) | 293 | * serialized bind operations.) |
265 | */ | 294 | */ |
266 | BUG_ON(get_port_user(port) != NULL); | 295 | |
267 | set_port_user(port, u); | 296 | evtchn = kzalloc(sizeof(*evtchn), GFP_KERNEL); |
268 | set_port_enabled(port, true); /* start enabled */ | 297 | if (!evtchn) |
298 | return -ENOMEM; | ||
299 | |||
300 | evtchn->user = u; | ||
301 | evtchn->port = port; | ||
302 | evtchn->enabled = true; /* start enabled */ | ||
303 | |||
304 | rc = add_evtchn(u, evtchn); | ||
305 | if (rc < 0) | ||
306 | goto err; | ||
269 | 307 | ||
270 | rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, | 308 | rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, |
271 | u->name, (void *)(unsigned long)port); | 309 | u->name, evtchn); |
272 | if (rc >= 0) | 310 | if (rc < 0) |
273 | rc = evtchn_make_refcounted(port); | 311 | goto err; |
274 | else { | ||
275 | /* bind failed, should close the port now */ | ||
276 | struct evtchn_close close; | ||
277 | close.port = port; | ||
278 | if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) | ||
279 | BUG(); | ||
280 | set_port_user(port, NULL); | ||
281 | } | ||
282 | 312 | ||
313 | rc = evtchn_make_refcounted(port); | ||
314 | return rc; | ||
315 | |||
316 | err: | ||
317 | /* bind failed, should close the port now */ | ||
318 | close.port = port; | ||
319 | if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) | ||
320 | BUG(); | ||
321 | del_evtchn(u, evtchn); | ||
322 | kfree(evtchn); | ||
283 | return rc; | 323 | return rc; |
284 | } | 324 | } |
285 | 325 | ||
286 | static void evtchn_unbind_from_user(struct per_user_data *u, int port) | 326 | static void evtchn_unbind_from_user(struct per_user_data *u, |
327 | struct user_evtchn *evtchn) | ||
287 | { | 328 | { |
288 | int irq = irq_from_evtchn(port); | 329 | int irq = irq_from_evtchn(evtchn->port); |
289 | 330 | ||
290 | BUG_ON(irq < 0); | 331 | BUG_ON(irq < 0); |
291 | 332 | ||
292 | unbind_from_irqhandler(irq, (void *)(unsigned long)port); | 333 | unbind_from_irqhandler(irq, evtchn); |
293 | 334 | ||
294 | set_port_user(port, NULL); | 335 | del_evtchn(u, evtchn); |
295 | } | 336 | } |
296 | 337 | ||
297 | static long evtchn_ioctl(struct file *file, | 338 | static long evtchn_ioctl(struct file *file, |
@@ -370,6 +411,7 @@ static long evtchn_ioctl(struct file *file, | |||
370 | 411 | ||
371 | case IOCTL_EVTCHN_UNBIND: { | 412 | case IOCTL_EVTCHN_UNBIND: { |
372 | struct ioctl_evtchn_unbind unbind; | 413 | struct ioctl_evtchn_unbind unbind; |
414 | struct user_evtchn *evtchn; | ||
373 | 415 | ||
374 | rc = -EFAULT; | 416 | rc = -EFAULT; |
375 | if (copy_from_user(&unbind, uarg, sizeof(unbind))) | 417 | if (copy_from_user(&unbind, uarg, sizeof(unbind))) |
@@ -380,29 +422,27 @@ static long evtchn_ioctl(struct file *file, | |||
380 | break; | 422 | break; |
381 | 423 | ||
382 | rc = -ENOTCONN; | 424 | rc = -ENOTCONN; |
383 | if (get_port_user(unbind.port) != u) | 425 | evtchn = find_evtchn(u, unbind.port); |
426 | if (!evtchn) | ||
384 | break; | 427 | break; |
385 | 428 | ||
386 | disable_irq(irq_from_evtchn(unbind.port)); | 429 | disable_irq(irq_from_evtchn(unbind.port)); |
387 | 430 | evtchn_unbind_from_user(u, evtchn); | |
388 | evtchn_unbind_from_user(u, unbind.port); | ||
389 | |||
390 | rc = 0; | 431 | rc = 0; |
391 | break; | 432 | break; |
392 | } | 433 | } |
393 | 434 | ||
394 | case IOCTL_EVTCHN_NOTIFY: { | 435 | case IOCTL_EVTCHN_NOTIFY: { |
395 | struct ioctl_evtchn_notify notify; | 436 | struct ioctl_evtchn_notify notify; |
437 | struct user_evtchn *evtchn; | ||
396 | 438 | ||
397 | rc = -EFAULT; | 439 | rc = -EFAULT; |
398 | if (copy_from_user(¬ify, uarg, sizeof(notify))) | 440 | if (copy_from_user(¬ify, uarg, sizeof(notify))) |
399 | break; | 441 | break; |
400 | 442 | ||
401 | if (notify.port >= NR_EVENT_CHANNELS) { | 443 | rc = -ENOTCONN; |
402 | rc = -EINVAL; | 444 | evtchn = find_evtchn(u, notify.port); |
403 | } else if (get_port_user(notify.port) != u) { | 445 | if (evtchn) { |
404 | rc = -ENOTCONN; | ||
405 | } else { | ||
406 | notify_remote_via_evtchn(notify.port); | 446 | notify_remote_via_evtchn(notify.port); |
407 | rc = 0; | 447 | rc = 0; |
408 | } | 448 | } |
@@ -412,9 +452,9 @@ static long evtchn_ioctl(struct file *file, | |||
412 | case IOCTL_EVTCHN_RESET: { | 452 | case IOCTL_EVTCHN_RESET: { |
413 | /* Initialise the ring to empty. Clear errors. */ | 453 | /* Initialise the ring to empty. Clear errors. */ |
414 | mutex_lock(&u->ring_cons_mutex); | 454 | mutex_lock(&u->ring_cons_mutex); |
415 | spin_lock_irq(&port_user_lock); | 455 | spin_lock_irq(&u->ring_prod_lock); |
416 | u->ring_cons = u->ring_prod = u->ring_overflow = 0; | 456 | u->ring_cons = u->ring_prod = u->ring_overflow = 0; |
417 | spin_unlock_irq(&port_user_lock); | 457 | spin_unlock_irq(&u->ring_prod_lock); |
418 | mutex_unlock(&u->ring_cons_mutex); | 458 | mutex_unlock(&u->ring_cons_mutex); |
419 | rc = 0; | 459 | rc = 0; |
420 | break; | 460 | break; |
@@ -473,6 +513,7 @@ static int evtchn_open(struct inode *inode, struct file *filp) | |||
473 | 513 | ||
474 | mutex_init(&u->bind_mutex); | 514 | mutex_init(&u->bind_mutex); |
475 | mutex_init(&u->ring_cons_mutex); | 515 | mutex_init(&u->ring_cons_mutex); |
516 | spin_lock_init(&u->ring_prod_lock); | ||
476 | 517 | ||
477 | filp->private_data = u; | 518 | filp->private_data = u; |
478 | 519 | ||
@@ -481,15 +522,15 @@ static int evtchn_open(struct inode *inode, struct file *filp) | |||
481 | 522 | ||
482 | static int evtchn_release(struct inode *inode, struct file *filp) | 523 | static int evtchn_release(struct inode *inode, struct file *filp) |
483 | { | 524 | { |
484 | int i; | ||
485 | struct per_user_data *u = filp->private_data; | 525 | struct per_user_data *u = filp->private_data; |
526 | struct rb_node *node; | ||
486 | 527 | ||
487 | for (i = 0; i < NR_EVENT_CHANNELS; i++) { | 528 | while ((node = u->evtchns.rb_node)) { |
488 | if (get_port_user(i) != u) | 529 | struct user_evtchn *evtchn; |
489 | continue; | ||
490 | 530 | ||
491 | disable_irq(irq_from_evtchn(i)); | 531 | evtchn = rb_entry(node, struct user_evtchn, node); |
492 | evtchn_unbind_from_user(get_port_user(i), i); | 532 | disable_irq(irq_from_evtchn(evtchn->port)); |
533 | evtchn_unbind_from_user(u, evtchn); | ||
493 | } | 534 | } |
494 | 535 | ||
495 | free_page((unsigned long)u->ring); | 536 | free_page((unsigned long)u->ring); |
@@ -523,12 +564,6 @@ static int __init evtchn_init(void) | |||
523 | if (!xen_domain()) | 564 | if (!xen_domain()) |
524 | return -ENODEV; | 565 | return -ENODEV; |
525 | 566 | ||
526 | port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL); | ||
527 | if (port_user == NULL) | ||
528 | return -ENOMEM; | ||
529 | |||
530 | spin_lock_init(&port_user_lock); | ||
531 | |||
532 | /* Create '/dev/xen/evtchn'. */ | 567 | /* Create '/dev/xen/evtchn'. */ |
533 | err = misc_register(&evtchn_miscdev); | 568 | err = misc_register(&evtchn_miscdev); |
534 | if (err != 0) { | 569 | if (err != 0) { |
@@ -543,9 +578,6 @@ static int __init evtchn_init(void) | |||
543 | 578 | ||
544 | static void __exit evtchn_cleanup(void) | 579 | static void __exit evtchn_cleanup(void) |
545 | { | 580 | { |
546 | kfree(port_user); | ||
547 | port_user = NULL; | ||
548 | |||
549 | misc_deregister(&evtchn_miscdev); | 581 | misc_deregister(&evtchn_miscdev); |
550 | } | 582 | } |
551 | 583 | ||