aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorValentin Priescu <priescuv@amazon.com>2014-05-20 16:28:50 -0400
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2014-05-28 14:17:32 -0400
commit814d04e7dfc4a9cf7e36656afe2da5c0c08dde2b (patch)
treefc7ecdbe2026aaebafc52f513cc9e46aee58684a /drivers/block
parent1c339ef74b431bab18bc11fb69e46db294070dd3 (diff)
xen-blkback: defer freeing blkif to avoid blocking xenwatch
Currently xenwatch blocks in VBD disconnect, waiting for all pending I/O requests to finish. If the VBD is attached to a hot-swappable disk, then xenwatch can hang for a long period of time, stalling other watches. INFO: task xenwatch:39 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. ffff880057f01bd0 0000000000000246 ffff880057f01ac0 ffffffff810b0782 ffff880057f01ad0 00000000000131c0 0000000000000004 ffff880057edb040 ffff8800344c6080 0000000000000000 ffff880058c00ba0 ffff880057edb040 Call Trace: [<ffffffff810b0782>] ? irq_to_desc+0x12/0x20 [<ffffffff8128f761>] ? list_del+0x11/0x40 [<ffffffff8147a080>] ? wait_for_common+0x60/0x160 [<ffffffff8147bcef>] ? _raw_spin_lock_irqsave+0x2f/0x50 [<ffffffff8147bd49>] ? _raw_spin_unlock_irqrestore+0x19/0x20 [<ffffffff8147a26a>] schedule+0x3a/0x60 [<ffffffffa018fe6a>] xen_blkif_disconnect+0x8a/0x100 [xen_blkback] [<ffffffff81079f70>] ? wake_up_bit+0x40/0x40 [<ffffffffa018ffce>] xen_blkbk_remove+0xae/0x1e0 [xen_blkback] [<ffffffff8130b254>] xenbus_dev_remove+0x44/0x90 [<ffffffff81345cb7>] __device_release_driver+0x77/0xd0 [<ffffffff81346488>] device_release_driver+0x28/0x40 [<ffffffff813456e8>] bus_remove_device+0x78/0xe0 [<ffffffff81342c9f>] device_del+0x12f/0x1a0 [<ffffffff81342d2d>] device_unregister+0x1d/0x60 [<ffffffffa0190826>] frontend_changed+0xa6/0x4d0 [xen_blkback] [<ffffffffa019c252>] ? frontend_changed+0x192/0x650 [xen_netback] [<ffffffff8130ae50>] ? cmp_dev+0x60/0x60 [<ffffffff81344fe4>] ? bus_for_each_dev+0x94/0xa0 [<ffffffff8130b06e>] xenbus_otherend_changed+0xbe/0x120 [<ffffffff8130b4cb>] frontend_changed+0xb/0x10 [<ffffffff81309c82>] xenwatch_thread+0xf2/0x130 [<ffffffff81079f70>] ? wake_up_bit+0x40/0x40 [<ffffffff81309b90>] ? xenbus_directory+0x80/0x80 [<ffffffff810799d6>] kthread+0x96/0xa0 [<ffffffff81485934>] kernel_thread_helper+0x4/0x10 [<ffffffff814839f3>] ? int_ret_from_sys_call+0x7/0x1b [<ffffffff8147c17c>] ? retint_restore_args+0x5/0x6 [<ffffffff81485930>] ? gs_change+0x13/0x13 With this patch, when there is still pending I/O, the actual disconnect is done by the last reference holder (last pending I/O request). In this case, xenwatch doesn't block indefinitely. Signed-off-by: Valentin Priescu <priescuv@amazon.com> Reviewed-by: Steven Kady <stevkady@amazon.com> Reviewed-by: Steven Noonan <snoonan@amazon.com> Reviewed-by: David Vrabel <david.vrabel@citrix.com> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/xen-blkback/common.h4
-rw-r--r--drivers/block/xen-blkback/xenbus.c46
2 files changed, 36 insertions, 14 deletions
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index be052773ad03..f65b807e3236 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -314,7 +314,7 @@ struct xen_blkif {
314 unsigned long long st_rd_sect; 314 unsigned long long st_rd_sect;
315 unsigned long long st_wr_sect; 315 unsigned long long st_wr_sect;
316 316
317 wait_queue_head_t waiting_to_free; 317 struct work_struct free_work;
318 /* Thread shutdown wait queue. */ 318 /* Thread shutdown wait queue. */
319 wait_queue_head_t shutdown_wq; 319 wait_queue_head_t shutdown_wq;
320}; 320};
@@ -361,7 +361,7 @@ struct pending_req {
361#define xen_blkif_put(_b) \ 361#define xen_blkif_put(_b) \
362 do { \ 362 do { \
363 if (atomic_dec_and_test(&(_b)->refcnt)) \ 363 if (atomic_dec_and_test(&(_b)->refcnt)) \
364 wake_up(&(_b)->waiting_to_free);\ 364 schedule_work(&(_b)->free_work);\
365 } while (0) 365 } while (0)
366 366
367struct phys_req { 367struct phys_req {
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index a71ecf5f4283..3a8b810b4980 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -35,12 +35,26 @@ static void connect(struct backend_info *);
35static int connect_ring(struct backend_info *); 35static int connect_ring(struct backend_info *);
36static void backend_changed(struct xenbus_watch *, const char **, 36static void backend_changed(struct xenbus_watch *, const char **,
37 unsigned int); 37 unsigned int);
38static void xen_blkif_free(struct xen_blkif *blkif);
39static void xen_vbd_free(struct xen_vbd *vbd);
38 40
39struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be) 41struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be)
40{ 42{
41 return be->dev; 43 return be->dev;
42} 44}
43 45
46/*
47 * The last request could free the device from softirq context and
48 * xen_blkif_free() can sleep.
49 */
50static void xen_blkif_deferred_free(struct work_struct *work)
51{
52 struct xen_blkif *blkif;
53
54 blkif = container_of(work, struct xen_blkif, free_work);
55 xen_blkif_free(blkif);
56}
57
44static int blkback_name(struct xen_blkif *blkif, char *buf) 58static int blkback_name(struct xen_blkif *blkif, char *buf)
45{ 59{
46 char *devpath, *devname; 60 char *devpath, *devname;
@@ -121,7 +135,6 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
121 init_completion(&blkif->drain_complete); 135 init_completion(&blkif->drain_complete);
122 atomic_set(&blkif->drain, 0); 136 atomic_set(&blkif->drain, 0);
123 blkif->st_print = jiffies; 137 blkif->st_print = jiffies;
124 init_waitqueue_head(&blkif->waiting_to_free);
125 blkif->persistent_gnts.rb_node = NULL; 138 blkif->persistent_gnts.rb_node = NULL;
126 spin_lock_init(&blkif->free_pages_lock); 139 spin_lock_init(&blkif->free_pages_lock);
127 INIT_LIST_HEAD(&blkif->free_pages); 140 INIT_LIST_HEAD(&blkif->free_pages);
@@ -132,6 +145,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
132 INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants); 145 INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
133 146
134 INIT_LIST_HEAD(&blkif->pending_free); 147 INIT_LIST_HEAD(&blkif->pending_free);
148 INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
135 149
136 for (i = 0; i < XEN_BLKIF_REQS; i++) { 150 for (i = 0; i < XEN_BLKIF_REQS; i++) {
137 req = kzalloc(sizeof(*req), GFP_KERNEL); 151 req = kzalloc(sizeof(*req), GFP_KERNEL);
@@ -231,7 +245,7 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
231 return 0; 245 return 0;
232} 246}
233 247
234static void xen_blkif_disconnect(struct xen_blkif *blkif) 248static int xen_blkif_disconnect(struct xen_blkif *blkif)
235{ 249{
236 if (blkif->xenblkd) { 250 if (blkif->xenblkd) {
237 kthread_stop(blkif->xenblkd); 251 kthread_stop(blkif->xenblkd);
@@ -239,9 +253,12 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
239 blkif->xenblkd = NULL; 253 blkif->xenblkd = NULL;
240 } 254 }
241 255
242 atomic_dec(&blkif->refcnt); 256 /* The above kthread_stop() guarantees that at this point we
243 wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); 257 * don't have any discard_io or other_io requests. So, checking
244 atomic_inc(&blkif->refcnt); 258 * for inflight IO is enough.
259 */
260 if (atomic_read(&blkif->inflight) > 0)
261 return -EBUSY;
245 262
246 if (blkif->irq) { 263 if (blkif->irq) {
247 unbind_from_irqhandler(blkif->irq, blkif); 264 unbind_from_irqhandler(blkif->irq, blkif);
@@ -252,6 +269,8 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
252 xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring); 269 xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
253 blkif->blk_rings.common.sring = NULL; 270 blkif->blk_rings.common.sring = NULL;
254 } 271 }
272
273 return 0;
255} 274}
256 275
257static void xen_blkif_free(struct xen_blkif *blkif) 276static void xen_blkif_free(struct xen_blkif *blkif)
@@ -259,8 +278,8 @@ static void xen_blkif_free(struct xen_blkif *blkif)
259 struct pending_req *req, *n; 278 struct pending_req *req, *n;
260 int i = 0, j; 279 int i = 0, j;
261 280
262 if (!atomic_dec_and_test(&blkif->refcnt)) 281 xen_blkif_disconnect(blkif);
263 BUG(); 282 xen_vbd_free(&blkif->vbd);
264 283
265 /* Remove all persistent grants and the cache of ballooned pages. */ 284 /* Remove all persistent grants and the cache of ballooned pages. */
266 xen_blkbk_free_caches(blkif); 285 xen_blkbk_free_caches(blkif);
@@ -449,16 +468,15 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
449 be->backend_watch.node = NULL; 468 be->backend_watch.node = NULL;
450 } 469 }
451 470
471 dev_set_drvdata(&dev->dev, NULL);
472
452 if (be->blkif) { 473 if (be->blkif) {
453 xen_blkif_disconnect(be->blkif); 474 xen_blkif_disconnect(be->blkif);
454 xen_vbd_free(&be->blkif->vbd); 475 xen_blkif_put(be->blkif);
455 xen_blkif_free(be->blkif);
456 be->blkif = NULL;
457 } 476 }
458 477
459 kfree(be->mode); 478 kfree(be->mode);
460 kfree(be); 479 kfree(be);
461 dev_set_drvdata(&dev->dev, NULL);
462 return 0; 480 return 0;
463} 481}
464 482
@@ -705,7 +723,11 @@ static void frontend_changed(struct xenbus_device *dev,
705 * Enforce precondition before potential leak point. 723 * Enforce precondition before potential leak point.
706 * xen_blkif_disconnect() is idempotent. 724 * xen_blkif_disconnect() is idempotent.
707 */ 725 */
708 xen_blkif_disconnect(be->blkif); 726 err = xen_blkif_disconnect(be->blkif);
727 if (err) {
728 xenbus_dev_fatal(dev, err, "pending I/O");
729 break;
730 }
709 731
710 err = connect_ring(be); 732 err = connect_ring(be);
711 if (err) 733 if (err)