aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2013-01-23 16:54:32 -0500
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2013-06-17 15:17:16 -0400
commit8e3f8755545cc4a7f4da8e9ef76d6d32e0dca576 (patch)
tree1be09ec25e1ac7c441f6b4596b4854474d25f9c2 /drivers
parent8d9256906a97c24e97e016482b9be06ea2532b05 (diff)
xen/blkback: Check for insane amounts of request on the ring (v6).
Check that the ring does not have an insane amount of requests (more than there could fit on the ring). If we detect this case we will stop processing the requests and wait until the XenBus disconnects the ring. The existing check RING_REQUEST_CONS_OVERFLOW which checks for how many responses we have created in the past (rsp_prod_pvt) vs requests consumed (req_cons) and whether said difference is greater or equal to the size of the ring, does not catch this case. Wha the condition does check if there is a need to process more as we still have a backlog of responses to finish. Note that both of those values (rsp_prod_pvt and req_cons) are not exposed on the shared ring. To understand this problem a mini crash course in ring protocol response/request updates is in place. There are four entries: req_prod and rsp_prod; req_event and rsp_event to track the ring entries. We are only concerned about the first two - which set the tone of this bug. The req_prod is a value incremented by frontend for each request put on the ring. Conversely the rsp_prod is a value incremented by the backend for each response put on the ring (rsp_prod gets set by rsp_prod_pvt when pushing the responses on the ring). Both values can wrap and are modulo the size of the ring (in block case that is 32). Please see RING_GET_REQUEST and RING_GET_RESPONSE for the more details. The culprit here is that if the difference between the req_prod and req_cons is greater than the ring size we have a problem. Fortunately for us, the '__do_block_io_op' loop: rc = blk_rings->common.req_cons; rp = blk_rings->common.sring->req_prod; while (rc != rp) { .. blk_rings->common.req_cons = ++rc; /* before make_response() */ } will loop up to the point when rc == rp. The macros inside of the loop (RING_GET_REQUEST) is smart and is indexing based on the modulo of the ring size. If the frontend has provided a bogus req_prod value we will loop until the 'rc == rp' - which means we could be processing already processed requests (or responses) often. The reason the RING_REQUEST_CONS_OVERFLOW is not helping here is b/c it only tracks how many responses we have internally produced and whether we would should process more. The astute reader will notice that the macro RING_REQUEST_CONS_OVERFLOW provides two arguments - more on this later. For example, if we were to enter this function with these values: blk_rings->common.sring->req_prod = X+31415 (X is the value from the last time __do_block_io_op was called). blk_rings->common.req_cons = X blk_rings->common.rsp_prod_pvt = X The RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, blk_rings->common.req_cons) is doing: req_cons - rsp_prod_pvt >= 32 Which is, X - X >= 32 or 0 >= 32 And that is false, so we continue on looping (this bug). If we re-use said macro RING_REQUEST_CONS_OVERFLOW and pass in the rp instead (sring->req_prod) of rc, the this macro can do the check: req_prod - rsp_prov_pvt >= 32 Which is, X + 31415 - X >= 32 , or 31415 >= 32 which is true, so we can error out and break out of the function. Unfortunatly the difference between rsp_prov_pvt and req_prod can be at 32 (which would error out in the macro). This condition exists when the backend is lagging behind with the responses and still has not finished responding to all of them (so make_response has not been called), and the rsp_prov_pvt + 32 == req_cons. This ends up with us not being able to use said macro. Hence introducing a new macro called RING_REQUEST_PROD_OVERFLOW which does a simple check of: req_prod - rsp_prod_pvt > RING_SIZE And with the X values from above: X + 31415 - X > 32 Returns true. Also not that if the ring is full (which is where the RING_REQUEST_CONS_OVERFLOW triggered), we would not hit the same condition: X + 32 - X > 32 Which is false. Lets use that macro. Note that in v5 of this patchset the macro was different - we used an earlier version. Cc: stable@vger.kernel.org [v1: Move the check outside the loop] [v2: Add a pr_warn as suggested by David] [v3: Use RING_REQUEST_CONS_OVERFLOW as suggested by Jan] [v4: Move wake_up after kthread_stop as suggested by Jan] [v5: Use RING_REQUEST_PROD_OVERFLOW instead] [v6: Use RING_REQUEST_PROD_OVERFLOW - Jan's version] Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> gadsa
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/xen-blkback/blkback.c13
-rw-r--r--drivers/block/xen-blkback/common.h2
-rw-r--r--drivers/block/xen-blkback/xenbus.c2
3 files changed, 16 insertions, 1 deletions
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 4119bcdefd1a..ea158fe0c9a4 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -571,6 +571,7 @@ int xen_blkif_schedule(void *arg)
571 struct xen_blkif *blkif = arg; 571 struct xen_blkif *blkif = arg;
572 struct xen_vbd *vbd = &blkif->vbd; 572 struct xen_vbd *vbd = &blkif->vbd;
573 unsigned long timeout; 573 unsigned long timeout;
574 int ret;
574 575
575 xen_blkif_get(blkif); 576 xen_blkif_get(blkif);
576 577
@@ -599,8 +600,12 @@ int xen_blkif_schedule(void *arg)
599 blkif->waiting_reqs = 0; 600 blkif->waiting_reqs = 0;
600 smp_mb(); /* clear flag *before* checking for work */ 601 smp_mb(); /* clear flag *before* checking for work */
601 602
602 if (do_block_io_op(blkif)) 603 ret = do_block_io_op(blkif);
604 if (ret > 0)
603 blkif->waiting_reqs = 1; 605 blkif->waiting_reqs = 1;
606 if (ret == -EACCES)
607 wait_event_interruptible(blkif->shutdown_wq,
608 kthread_should_stop());
604 609
605purge_gnt_list: 610purge_gnt_list:
606 if (blkif->vbd.feature_gnt_persistent && 611 if (blkif->vbd.feature_gnt_persistent &&
@@ -1009,6 +1014,12 @@ __do_block_io_op(struct xen_blkif *blkif)
1009 rp = blk_rings->common.sring->req_prod; 1014 rp = blk_rings->common.sring->req_prod;
1010 rmb(); /* Ensure we see queued requests up to 'rp'. */ 1015 rmb(); /* Ensure we see queued requests up to 'rp'. */
1011 1016
1017 if (RING_REQUEST_PROD_OVERFLOW(&blk_rings->common, rp)) {
1018 rc = blk_rings->common.rsp_prod_pvt;
1019 pr_warn(DRV_PFX "Frontend provided bogus ring requests (%d - %d = %d). Halting ring processing on dev=%04x\n",
1020 rp, rc, rp - rc, blkif->vbd.pdevice);
1021 return -EACCES;
1022 }
1012 while (rc != rp) { 1023 while (rc != rp) {
1013 1024
1014 if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) 1025 if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index c6b4cb9af6c2..8d8807563d99 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -314,6 +314,8 @@ struct xen_blkif {
314 unsigned long long st_wr_sect; 314 unsigned long long st_wr_sect;
315 315
316 wait_queue_head_t waiting_to_free; 316 wait_queue_head_t waiting_to_free;
317 /* Thread shutdown wait queue. */
318 wait_queue_head_t shutdown_wq;
317}; 319};
318 320
319struct seg_buf { 321struct seg_buf {
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 7b06f943371c..2e5b69d612ac 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -151,6 +151,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
151 } 151 }
152 spin_lock_init(&blkif->pending_free_lock); 152 spin_lock_init(&blkif->pending_free_lock);
153 init_waitqueue_head(&blkif->pending_free_wq); 153 init_waitqueue_head(&blkif->pending_free_wq);
154 init_waitqueue_head(&blkif->shutdown_wq);
154 155
155 return blkif; 156 return blkif;
156 157
@@ -231,6 +232,7 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
231{ 232{
232 if (blkif->xenblkd) { 233 if (blkif->xenblkd) {
233 kthread_stop(blkif->xenblkd); 234 kthread_stop(blkif->xenblkd);
235 wake_up(&blkif->shutdown_wq);
234 blkif->xenblkd = NULL; 236 blkif->xenblkd = NULL;
235 } 237 }
236 238