aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-02-08 03:19:42 -0500
committerJens Axboe <axboe@kernel.dk>2012-02-08 03:19:42 -0500
commit07c2bd37350c9b1af71b35d05f16e300a6602948 (patch)
treee45ee2952fb78d6d8f2372c8ea1f854da825fa90
parent050c8ea80e3e90019d9e981c6a117ef614e882ed (diff)
block: don't call elevator callbacks for plug merges
Plug merge calls two elevator callbacks outside queue lock - elevator_allow_merge_fn() and elevator_bio_merged_fn(). Although attempt_plug_merge() suggests that elevator is guaranteed to be there through the existing request on the plug list, nothing prevents plug merge from calling into dying or initializing elevator. For regular merges, bypass ensures elvpriv count to reach zero, which in turn prevents merges as all !ELVPRIV requests get REQ_SOFTBARRIER from forced back insertion. Plug merge doesn't check ELVPRIV, and, as the requests haven't gone through elevator insertion yet, it doesn't have SOFTBARRIER set allowing merges on a bypassed queue. This, for example, leads to the following crash during elevator switch. BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: [<ffffffff813b34e9>] cfq_allow_merge+0x49/0xa0 PGD 112cbc067 PUD 115d5c067 PMD 0 Oops: 0000 [#1] PREEMPT SMP CPU 1 Modules linked in: deadline_iosched Pid: 819, comm: dd Not tainted 3.3.0-rc2-work+ #76 Bochs Bochs RIP: 0010:[<ffffffff813b34e9>] [<ffffffff813b34e9>] cfq_allow_merge+0x49/0xa0 RSP: 0018:ffff8801143a38f8 EFLAGS: 00010297 RAX: 0000000000000000 RBX: ffff88011817ce28 RCX: ffff880116eb6cc0 RDX: 0000000000000000 RSI: ffff880118056e20 RDI: ffff8801199512f8 RBP: ffff8801143a3908 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffff880118195708 R13: ffff880118052aa0 R14: ffff8801143a3d50 R15: ffff880118195708 FS: 00007f19f82cb700(0000) GS:ffff88011fc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000008 CR3: 0000000112c6a000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process dd (pid: 819, threadinfo ffff8801143a2000, task ffff880116eb6cc0) Stack: ffff88011817ce28 ffff880118195708 ffff8801143a3928 ffffffff81391bba ffff88011817ce28 ffff880118195708 ffff8801143a3948 ffffffff81391bf1 ffff88011817ce28 0000000000000000 ffff8801143a39a8 ffffffff81398e3e Call Trace: [<ffffffff81391bba>] elv_rq_merge_ok+0x4a/0x60 [<ffffffff81391bf1>] elv_try_merge+0x21/0x40 [<ffffffff81398e3e>] blk_queue_bio+0x8e/0x390 [<ffffffff81396a5a>] generic_make_request+0xca/0x100 [<ffffffff81396b04>] submit_bio+0x74/0x100 [<ffffffff811d45c2>] __blockdev_direct_IO+0x1ce2/0x3450 [<ffffffff811d0dc7>] blkdev_direct_IO+0x57/0x60 [<ffffffff811460b5>] generic_file_aio_read+0x6d5/0x760 [<ffffffff811986b2>] do_sync_read+0xe2/0x120 [<ffffffff81199345>] vfs_read+0xc5/0x180 [<ffffffff81199501>] sys_read+0x51/0x90 [<ffffffff81aeac12>] system_call_fastpath+0x16/0x1b There are multiple ways to fix this including making plug merge check ELVPRIV; however, * Calling into elevator outside queue lock is confusing and error-prone. * Requests on plug list aren't known to the elevator. They aren't on the elevator yet, so there's no elevator specific state to update. * Given the nature of plug merges - collecting bio's for the same purpose from the same issuer - elevator specific restrictions aren't applicable. So, simply don't call into elevator methods from plug merge by moving elv_bio_merged() from bio_attempt_*_merge() to blk_queue_bio(), and using blk_try_merge() in attempt_plug_merge(). This is based on Jens' patch to skip elevator_allow_merge_fn() from plug merge. Note that this makes per-cgroup merged stats skip plug merging. Signed-off-by: Tejun Heo <tj@kernel.org> LKML-Reference: <4F16F3CA.90904@kernel.dk> Original-patch-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--block/blk-core.c19
-rw-r--r--block/cfq-iosched.c15
-rw-r--r--include/linux/elevator.h6
3 files changed, 13 insertions, 27 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index fa697bf691eb..3a78b00edd71 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1212,7 +1212,6 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
1212 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); 1212 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1213 1213
1214 drive_stat_acct(req, 0); 1214 drive_stat_acct(req, 0);
1215 elv_bio_merged(q, req, bio);
1216 return true; 1215 return true;
1217} 1216}
1218 1217
@@ -1243,7 +1242,6 @@ static bool bio_attempt_front_merge(struct request_queue *q,
1243 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); 1242 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1244 1243
1245 drive_stat_acct(req, 0); 1244 drive_stat_acct(req, 0);
1246 elv_bio_merged(q, req, bio);
1247 return true; 1245 return true;
1248} 1246}
1249 1247
@@ -1257,13 +1255,12 @@ static bool bio_attempt_front_merge(struct request_queue *q,
1257 * on %current's plugged list. Returns %true if merge was successful, 1255 * on %current's plugged list. Returns %true if merge was successful,
1258 * otherwise %false. 1256 * otherwise %false.
1259 * 1257 *
1260 * This function is called without @q->queue_lock; however, elevator is 1258 * Plugging coalesces IOs from the same issuer for the same purpose without
1261 * accessed iff there already are requests on the plugged list which in 1259 * going through @q->queue_lock. As such it's more of an issuing mechanism
1262 * turn guarantees validity of the elevator. 1260 * than scheduling, and the request, while may have elvpriv data, is not
1263 * 1261 * added on the elevator at this point. In addition, we don't have
1264 * Note that, on successful merge, elevator operation 1262 * reliable access to the elevator outside queue lock. Only check basic
1265 * elevator_bio_merged_fn() will be called without queue lock. Elevator 1263 * merging parameters without querying the elevator.
1266 * must be ready for this.
1267 */ 1264 */
1268static bool attempt_plug_merge(struct request_queue *q, struct bio *bio, 1265static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
1269 unsigned int *request_count) 1266 unsigned int *request_count)
@@ -1282,7 +1279,7 @@ static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
1282 1279
1283 (*request_count)++; 1280 (*request_count)++;
1284 1281
1285 if (rq->q != q || !elv_rq_merge_ok(rq, bio)) 1282 if (rq->q != q || !blk_rq_merge_ok(rq, bio))
1286 continue; 1283 continue;
1287 1284
1288 el_ret = blk_try_merge(rq, bio); 1285 el_ret = blk_try_merge(rq, bio);
@@ -1347,12 +1344,14 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
1347 el_ret = elv_merge(q, &req, bio); 1344 el_ret = elv_merge(q, &req, bio);
1348 if (el_ret == ELEVATOR_BACK_MERGE) { 1345 if (el_ret == ELEVATOR_BACK_MERGE) {
1349 if (bio_attempt_back_merge(q, req, bio)) { 1346 if (bio_attempt_back_merge(q, req, bio)) {
1347 elv_bio_merged(q, req, bio);
1350 if (!attempt_back_merge(q, req)) 1348 if (!attempt_back_merge(q, req))
1351 elv_merged_request(q, req, el_ret); 1349 elv_merged_request(q, req, el_ret);
1352 goto out_unlock; 1350 goto out_unlock;
1353 } 1351 }
1354 } else if (el_ret == ELEVATOR_FRONT_MERGE) { 1352 } else if (el_ret == ELEVATOR_FRONT_MERGE) {
1355 if (bio_attempt_front_merge(q, req, bio)) { 1353 if (bio_attempt_front_merge(q, req, bio)) {
1354 elv_bio_merged(q, req, bio);
1356 if (!attempt_front_merge(q, req)) 1355 if (!attempt_front_merge(q, req))
1357 elv_merged_request(q, req, el_ret); 1356 elv_merged_request(q, req, el_ret);
1358 goto out_unlock; 1357 goto out_unlock;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 5684df6848bc..d0ba50533668 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1699,18 +1699,11 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
1699 1699
1700 /* 1700 /*
1701 * Lookup the cfqq that this bio will be queued with and allow 1701 * Lookup the cfqq that this bio will be queued with and allow
1702 * merge only if rq is queued there. This function can be called 1702 * merge only if rq is queued there.
1703 * from plug merge without queue_lock. In such cases, ioc of @rq
1704 * and %current are guaranteed to be equal. Avoid lookup which
1705 * requires queue_lock by using @rq's cic.
1706 */ 1703 */
1707 if (current->io_context == RQ_CIC(rq)->icq.ioc) { 1704 cic = cfq_cic_lookup(cfqd, current->io_context);
1708 cic = RQ_CIC(rq); 1705 if (!cic)
1709 } else { 1706 return false;
1710 cic = cfq_cic_lookup(cfqd, current->io_context);
1711 if (!cic)
1712 return false;
1713 }
1714 1707
1715 cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio)); 1708 cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
1716 return cfqq == RQ_CFQQ(rq); 1709 return cfqq == RQ_CFQQ(rq);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index d6dfb65c8885..7d4e0356f329 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -42,12 +42,6 @@ struct elevator_ops
42 elevator_merged_fn *elevator_merged_fn; 42 elevator_merged_fn *elevator_merged_fn;
43 elevator_merge_req_fn *elevator_merge_req_fn; 43 elevator_merge_req_fn *elevator_merge_req_fn;
44 elevator_allow_merge_fn *elevator_allow_merge_fn; 44 elevator_allow_merge_fn *elevator_allow_merge_fn;
45
46 /*
47 * Used for both plugged list and elevator merging and in the
48 * former case called without queue_lock. Read comment on top of
49 * attempt_plug_merge() for details.
50 */
51 elevator_bio_merged_fn *elevator_bio_merged_fn; 45 elevator_bio_merged_fn *elevator_bio_merged_fn;
52 46
53 elevator_dispatch_fn *elevator_dispatch_fn; 47 elevator_dispatch_fn *elevator_dispatch_fn;