diff options
author | Valentin Priescu <priescuv@amazon.com> | 2014-05-20 16:28:50 -0400 |
---|---|---|
committer | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2014-05-28 14:17:32 -0400 |
commit | 814d04e7dfc4a9cf7e36656afe2da5c0c08dde2b (patch) | |
tree | fc7ecdbe2026aaebafc52f513cc9e46aee58684a /drivers/block | |
parent | 1c339ef74b431bab18bc11fb69e46db294070dd3 (diff) |
xen-blkback: defer freeing blkif to avoid blocking xenwatch
Currently xenwatch blocks in VBD disconnect, waiting for all pending I/O
requests to finish. If the VBD is attached to a hot-swappable disk, then
xenwatch can hang for a long period of time, stalling other watches.
INFO: task xenwatch:39 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
ffff880057f01bd0 0000000000000246 ffff880057f01ac0 ffffffff810b0782
ffff880057f01ad0 00000000000131c0 0000000000000004 ffff880057edb040
ffff8800344c6080 0000000000000000 ffff880058c00ba0 ffff880057edb040
Call Trace:
[<ffffffff810b0782>] ? irq_to_desc+0x12/0x20
[<ffffffff8128f761>] ? list_del+0x11/0x40
[<ffffffff8147a080>] ? wait_for_common+0x60/0x160
[<ffffffff8147bcef>] ? _raw_spin_lock_irqsave+0x2f/0x50
[<ffffffff8147bd49>] ? _raw_spin_unlock_irqrestore+0x19/0x20
[<ffffffff8147a26a>] schedule+0x3a/0x60
[<ffffffffa018fe6a>] xen_blkif_disconnect+0x8a/0x100 [xen_blkback]
[<ffffffff81079f70>] ? wake_up_bit+0x40/0x40
[<ffffffffa018ffce>] xen_blkbk_remove+0xae/0x1e0 [xen_blkback]
[<ffffffff8130b254>] xenbus_dev_remove+0x44/0x90
[<ffffffff81345cb7>] __device_release_driver+0x77/0xd0
[<ffffffff81346488>] device_release_driver+0x28/0x40
[<ffffffff813456e8>] bus_remove_device+0x78/0xe0
[<ffffffff81342c9f>] device_del+0x12f/0x1a0
[<ffffffff81342d2d>] device_unregister+0x1d/0x60
[<ffffffffa0190826>] frontend_changed+0xa6/0x4d0 [xen_blkback]
[<ffffffffa019c252>] ? frontend_changed+0x192/0x650 [xen_netback]
[<ffffffff8130ae50>] ? cmp_dev+0x60/0x60
[<ffffffff81344fe4>] ? bus_for_each_dev+0x94/0xa0
[<ffffffff8130b06e>] xenbus_otherend_changed+0xbe/0x120
[<ffffffff8130b4cb>] frontend_changed+0xb/0x10
[<ffffffff81309c82>] xenwatch_thread+0xf2/0x130
[<ffffffff81079f70>] ? wake_up_bit+0x40/0x40
[<ffffffff81309b90>] ? xenbus_directory+0x80/0x80
[<ffffffff810799d6>] kthread+0x96/0xa0
[<ffffffff81485934>] kernel_thread_helper+0x4/0x10
[<ffffffff814839f3>] ? int_ret_from_sys_call+0x7/0x1b
[<ffffffff8147c17c>] ? retint_restore_args+0x5/0x6
[<ffffffff81485930>] ? gs_change+0x13/0x13
With this patch, when there is still pending I/O, the actual disconnect
is done by the last reference holder (last pending I/O request). In this
case, xenwatch doesn't block indefinitely.
Signed-off-by: Valentin Priescu <priescuv@amazon.com>
Reviewed-by: Steven Kady <stevkady@amazon.com>
Reviewed-by: Steven Noonan <snoonan@amazon.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/xen-blkback/common.h | 4 | ||||
-rw-r--r-- | drivers/block/xen-blkback/xenbus.c | 46 |
2 files changed, 36 insertions, 14 deletions
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index be052773ad03..f65b807e3236 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h | |||
@@ -314,7 +314,7 @@ struct xen_blkif { | |||
314 | unsigned long long st_rd_sect; | 314 | unsigned long long st_rd_sect; |
315 | unsigned long long st_wr_sect; | 315 | unsigned long long st_wr_sect; |
316 | 316 | ||
317 | wait_queue_head_t waiting_to_free; | 317 | struct work_struct free_work; |
318 | /* Thread shutdown wait queue. */ | 318 | /* Thread shutdown wait queue. */ |
319 | wait_queue_head_t shutdown_wq; | 319 | wait_queue_head_t shutdown_wq; |
320 | }; | 320 | }; |
@@ -361,7 +361,7 @@ struct pending_req { | |||
361 | #define xen_blkif_put(_b) \ | 361 | #define xen_blkif_put(_b) \ |
362 | do { \ | 362 | do { \ |
363 | if (atomic_dec_and_test(&(_b)->refcnt)) \ | 363 | if (atomic_dec_and_test(&(_b)->refcnt)) \ |
364 | wake_up(&(_b)->waiting_to_free);\ | 364 | schedule_work(&(_b)->free_work);\ |
365 | } while (0) | 365 | } while (0) |
366 | 366 | ||
367 | struct phys_req { | 367 | struct phys_req { |
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index a71ecf5f4283..3a8b810b4980 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c | |||
@@ -35,12 +35,26 @@ static void connect(struct backend_info *); | |||
35 | static int connect_ring(struct backend_info *); | 35 | static int connect_ring(struct backend_info *); |
36 | static void backend_changed(struct xenbus_watch *, const char **, | 36 | static void backend_changed(struct xenbus_watch *, const char **, |
37 | unsigned int); | 37 | unsigned int); |
38 | static void xen_blkif_free(struct xen_blkif *blkif); | ||
39 | static void xen_vbd_free(struct xen_vbd *vbd); | ||
38 | 40 | ||
39 | struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be) | 41 | struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be) |
40 | { | 42 | { |
41 | return be->dev; | 43 | return be->dev; |
42 | } | 44 | } |
43 | 45 | ||
46 | /* | ||
47 | * The last request could free the device from softirq context and | ||
48 | * xen_blkif_free() can sleep. | ||
49 | */ | ||
50 | static void xen_blkif_deferred_free(struct work_struct *work) | ||
51 | { | ||
52 | struct xen_blkif *blkif; | ||
53 | |||
54 | blkif = container_of(work, struct xen_blkif, free_work); | ||
55 | xen_blkif_free(blkif); | ||
56 | } | ||
57 | |||
44 | static int blkback_name(struct xen_blkif *blkif, char *buf) | 58 | static int blkback_name(struct xen_blkif *blkif, char *buf) |
45 | { | 59 | { |
46 | char *devpath, *devname; | 60 | char *devpath, *devname; |
@@ -121,7 +135,6 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) | |||
121 | init_completion(&blkif->drain_complete); | 135 | init_completion(&blkif->drain_complete); |
122 | atomic_set(&blkif->drain, 0); | 136 | atomic_set(&blkif->drain, 0); |
123 | blkif->st_print = jiffies; | 137 | blkif->st_print = jiffies; |
124 | init_waitqueue_head(&blkif->waiting_to_free); | ||
125 | blkif->persistent_gnts.rb_node = NULL; | 138 | blkif->persistent_gnts.rb_node = NULL; |
126 | spin_lock_init(&blkif->free_pages_lock); | 139 | spin_lock_init(&blkif->free_pages_lock); |
127 | INIT_LIST_HEAD(&blkif->free_pages); | 140 | INIT_LIST_HEAD(&blkif->free_pages); |
@@ -132,6 +145,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) | |||
132 | INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants); | 145 | INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants); |
133 | 146 | ||
134 | INIT_LIST_HEAD(&blkif->pending_free); | 147 | INIT_LIST_HEAD(&blkif->pending_free); |
148 | INIT_WORK(&blkif->free_work, xen_blkif_deferred_free); | ||
135 | 149 | ||
136 | for (i = 0; i < XEN_BLKIF_REQS; i++) { | 150 | for (i = 0; i < XEN_BLKIF_REQS; i++) { |
137 | req = kzalloc(sizeof(*req), GFP_KERNEL); | 151 | req = kzalloc(sizeof(*req), GFP_KERNEL); |
@@ -231,7 +245,7 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, | |||
231 | return 0; | 245 | return 0; |
232 | } | 246 | } |
233 | 247 | ||
234 | static void xen_blkif_disconnect(struct xen_blkif *blkif) | 248 | static int xen_blkif_disconnect(struct xen_blkif *blkif) |
235 | { | 249 | { |
236 | if (blkif->xenblkd) { | 250 | if (blkif->xenblkd) { |
237 | kthread_stop(blkif->xenblkd); | 251 | kthread_stop(blkif->xenblkd); |
@@ -239,9 +253,12 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif) | |||
239 | blkif->xenblkd = NULL; | 253 | blkif->xenblkd = NULL; |
240 | } | 254 | } |
241 | 255 | ||
242 | atomic_dec(&blkif->refcnt); | 256 | /* The above kthread_stop() guarantees that at this point we |
243 | wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); | 257 | * don't have any discard_io or other_io requests. So, checking |
244 | atomic_inc(&blkif->refcnt); | 258 | * for inflight IO is enough. |
259 | */ | ||
260 | if (atomic_read(&blkif->inflight) > 0) | ||
261 | return -EBUSY; | ||
245 | 262 | ||
246 | if (blkif->irq) { | 263 | if (blkif->irq) { |
247 | unbind_from_irqhandler(blkif->irq, blkif); | 264 | unbind_from_irqhandler(blkif->irq, blkif); |
@@ -252,6 +269,8 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif) | |||
252 | xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring); | 269 | xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring); |
253 | blkif->blk_rings.common.sring = NULL; | 270 | blkif->blk_rings.common.sring = NULL; |
254 | } | 271 | } |
272 | |||
273 | return 0; | ||
255 | } | 274 | } |
256 | 275 | ||
257 | static void xen_blkif_free(struct xen_blkif *blkif) | 276 | static void xen_blkif_free(struct xen_blkif *blkif) |
@@ -259,8 +278,8 @@ static void xen_blkif_free(struct xen_blkif *blkif) | |||
259 | struct pending_req *req, *n; | 278 | struct pending_req *req, *n; |
260 | int i = 0, j; | 279 | int i = 0, j; |
261 | 280 | ||
262 | if (!atomic_dec_and_test(&blkif->refcnt)) | 281 | xen_blkif_disconnect(blkif); |
263 | BUG(); | 282 | xen_vbd_free(&blkif->vbd); |
264 | 283 | ||
265 | /* Remove all persistent grants and the cache of ballooned pages. */ | 284 | /* Remove all persistent grants and the cache of ballooned pages. */ |
266 | xen_blkbk_free_caches(blkif); | 285 | xen_blkbk_free_caches(blkif); |
@@ -449,16 +468,15 @@ static int xen_blkbk_remove(struct xenbus_device *dev) | |||
449 | be->backend_watch.node = NULL; | 468 | be->backend_watch.node = NULL; |
450 | } | 469 | } |
451 | 470 | ||
471 | dev_set_drvdata(&dev->dev, NULL); | ||
472 | |||
452 | if (be->blkif) { | 473 | if (be->blkif) { |
453 | xen_blkif_disconnect(be->blkif); | 474 | xen_blkif_disconnect(be->blkif); |
454 | xen_vbd_free(&be->blkif->vbd); | 475 | xen_blkif_put(be->blkif); |
455 | xen_blkif_free(be->blkif); | ||
456 | be->blkif = NULL; | ||
457 | } | 476 | } |
458 | 477 | ||
459 | kfree(be->mode); | 478 | kfree(be->mode); |
460 | kfree(be); | 479 | kfree(be); |
461 | dev_set_drvdata(&dev->dev, NULL); | ||
462 | return 0; | 480 | return 0; |
463 | } | 481 | } |
464 | 482 | ||
@@ -705,7 +723,11 @@ static void frontend_changed(struct xenbus_device *dev, | |||
705 | * Enforce precondition before potential leak point. | 723 | * Enforce precondition before potential leak point. |
706 | * xen_blkif_disconnect() is idempotent. | 724 | * xen_blkif_disconnect() is idempotent. |
707 | */ | 725 | */ |
708 | xen_blkif_disconnect(be->blkif); | 726 | err = xen_blkif_disconnect(be->blkif); |
727 | if (err) { | ||
728 | xenbus_dev_fatal(dev, err, "pending I/O"); | ||
729 | break; | ||
730 | } | ||
709 | 731 | ||
710 | err = connect_ring(be); | 732 | err = connect_ring(be); |
711 | if (err) | 733 | if (err) |