aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVivek Goyal <vgoyal@redhat.com>2010-04-21 11:44:16 -0400
committerJens Axboe <jens.axboe@oracle.com>2010-04-21 11:44:16 -0400
commit7f1dc8a2d2f45fc557b27fd56115338b1d34fc24 (patch)
tree355619d9483aca33f5c45642ce0bf8a042ea3236
parent8d2a91f8960b230b8bbcc4d97ed2015f5271c87d (diff)
blkio: Fix blkio crash during rq stat update
blkio + cfq was crashing even when two sequential readers were put in two separate cgroups (group_isolation=0). The reason being that cfqq can migrate across groups based on its being sync-noidle or not, it can happen that at request insertion time, cfqq belonged to one cfqg and at request dispatch time, it belonged to root group. In this case request stats per cgroup can go wrong and it also runs into BUG_ON(). This patch implements rq stashing away a cfq group pointer and not relying on cfqq->cfqg pointer alone for rq stat accounting. [ 65.163523] ------------[ cut here ]------------ [ 65.164301] kernel BUG at block/blk-cgroup.c:117! [ 65.164301] invalid opcode: 0000 [#1] SMP [ 65.164301] last sysfs file: /sys/devices/pci0000:00/0000:00:05.0/0000:60:00.1/host9/rport-9:0-0/target9:0:0/9:0:0:2/block/sde/stat [ 65.164301] CPU 1 [ 65.164301] Modules linked in: dm_round_robin dm_multipath qla2xxx scsi_transport_fc dm_zero dm_mirror dm_region_hash dm_log dm_mod [last unloaded: scsi_wait_scan] [ 65.164301] [ 65.164301] Pid: 4505, comm: fio Not tainted 2.6.34-rc4-blk-for-35 #34 0A98h/HP xw8600 Workstation [ 65.164301] RIP: 0010:[<ffffffff8121924f>] [<ffffffff8121924f>] blkiocg_update_io_remove_stats+0x5b/0xaf [ 65.164301] RSP: 0018:ffff8800ba5a79e8 EFLAGS: 00010046 [ 65.164301] RAX: 0000000000000096 RBX: ffff8800bb268d60 RCX: 0000000000000000 [ 65.164301] RDX: ffff8800bb268eb8 RSI: 0000000000000000 RDI: ffff8800bb268e00 [ 65.164301] RBP: ffff8800ba5a7a08 R08: 0000000000000064 R09: 0000000000000001 [ 65.164301] R10: 0000000000079640 R11: ffff8800a0bd5bf0 R12: ffff8800bab4af01 [ 65.164301] R13: ffff8800bab4af00 R14: ffff8800bb1d8928 R15: 0000000000000000 [ 65.164301] FS: 00007f18f75056f0(0000) GS:ffff880001e40000(0000) knlGS:0000000000000000 [ 65.164301] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 65.164301] CR2: 000000000040e7f0 CR3: 00000000ba52b000 CR4: 00000000000006e0 [ 65.164301] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 65.164301] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 65.164301] Process fio (pid: 4505, threadinfo ffff8800ba5a6000, task ffff8800ba45ae80) [ 65.164301] Stack: [ 65.164301] ffff8800ba5a7a08 ffff8800ba722540 ffff8800bab4af68 ffff8800bab4af68 [ 65.164301] <0> ffff8800ba5a7a38 ffffffff8121d814 ffff8800ba722540 ffff8800bab4af68 [ 65.164301] <0> ffff8800ba722540 ffff8800a08f6800 ffff8800ba5a7a68 ffffffff8121d8ca [ 65.164301] Call Trace: [ 65.164301] [<ffffffff8121d814>] cfq_remove_request+0xe4/0x116 [ 65.164301] [<ffffffff8121d8ca>] cfq_dispatch_insert+0x84/0xe1 [ 65.164301] [<ffffffff8121e833>] cfq_dispatch_requests+0x767/0x8e8 [ 65.164301] [<ffffffff8120e524>] ? submit_bio+0xc3/0xcc [ 65.164301] [<ffffffff810ad657>] ? sync_page_killable+0x0/0x35 [ 65.164301] [<ffffffff8120ea8d>] blk_peek_request+0x191/0x1a7 [ 65.164301] [<ffffffffa000109c>] ? dm_get_live_table+0x44/0x4f [dm_mod] [ 65.164301] [<ffffffffa0002799>] dm_request_fn+0x38/0x14c [dm_mod] [ 65.164301] [<ffffffff810ad657>] ? sync_page_killable+0x0/0x35 [ 65.164301] [<ffffffff8120f600>] __generic_unplug_device+0x32/0x37 [ 65.164301] [<ffffffff8120f8a0>] generic_unplug_device+0x2e/0x3c [ 65.164301] [<ffffffffa00011a6>] dm_unplug_all+0x42/0x5b [dm_mod] [ 65.164301] [<ffffffff8120b063>] blk_unplug+0x29/0x2d [ 65.164301] [<ffffffff8120b079>] blk_backing_dev_unplug+0x12/0x14 [ 65.164301] [<ffffffff81108a82>] block_sync_page+0x35/0x39 [ 65.164301] [<ffffffff810ad64e>] sync_page+0x41/0x4a [ 65.164301] [<ffffffff810ad665>] sync_page_killable+0xe/0x35 [ 65.164301] [<ffffffff81589027>] __wait_on_bit_lock+0x46/0x8f [ 65.164301] [<ffffffff810ad52d>] __lock_page_killable+0x66/0x6d [ 65.164301] [<ffffffff81055fd4>] ? wake_bit_function+0x0/0x33 [ 65.164301] [<ffffffff810ad560>] lock_page_killable+0x2c/0x2e [ 65.164301] [<ffffffff810aebfd>] generic_file_aio_read+0x361/0x4f0 [ 65.164301] [<ffffffff810e906c>] do_sync_read+0xcb/0x108 [ 65.164301] [<ffffffff811e32a3>] ? security_file_permission+0x16/0x18 [ 65.164301] [<ffffffff810e96d3>] vfs_read+0xab/0x108 [ 65.164301] [<ffffffff810e97f0>] sys_read+0x4a/0x6e [ 65.164301] [<ffffffff81002b5b>] system_call_fastpath+0x16/0x1b [ 65.164301] Code: 00 74 1c 48 8b 8b 60 01 00 00 48 85 c9 75 04 0f 0b eb fe 48 ff c9 48 89 8b 60 01 00 00 eb 1a 48 8b 8b 58 01 00 00 48 85 c9 75 04 <0f> 0b eb fe 48 ff c9 48 89 8b 58 01 00 00 45 84 e4 74 16 48 8b [ 65.164301] RIP [<ffffffff8121924f>] blkiocg_update_io_remove_stats+0x5b/0xaf [ 65.164301] RSP <ffff8800ba5a79e8> [ 65.164301] ---[ end trace 1b2b828753032e68 ]--- Signed-off-by: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r--block/cfq-iosched.c36
-rw-r--r--include/linux/blkdev.h3
2 files changed, 28 insertions, 11 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 62defd05518f..d5927b53020e 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -55,6 +55,7 @@ static const int cfq_hist_divisor = 4;
55#define RQ_CIC(rq) \ 55#define RQ_CIC(rq) \
56 ((struct cfq_io_context *) (rq)->elevator_private) 56 ((struct cfq_io_context *) (rq)->elevator_private)
57#define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2) 57#define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2)
58#define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elevator_private3)
58 59
59static struct kmem_cache *cfq_pool; 60static struct kmem_cache *cfq_pool;
60static struct kmem_cache *cfq_ioc_pool; 61static struct kmem_cache *cfq_ioc_pool;
@@ -1001,6 +1002,12 @@ static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
1001 return cfqg; 1002 return cfqg;
1002} 1003}
1003 1004
1005static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
1006{
1007 atomic_inc(&cfqg->ref);
1008 return cfqg;
1009}
1010
1004static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) 1011static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
1005{ 1012{
1006 /* Currently, all async queues are mapped to root group */ 1013 /* Currently, all async queues are mapped to root group */
@@ -1084,6 +1091,12 @@ static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
1084{ 1091{
1085 return &cfqd->root_group; 1092 return &cfqd->root_group;
1086} 1093}
1094
1095static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
1096{
1097 return NULL;
1098}
1099
1087static inline void 1100static inline void
1088cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) { 1101cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) {
1089 cfqq->cfqg = cfqg; 1102 cfqq->cfqg = cfqg;
@@ -1386,12 +1399,12 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
1386{ 1399{
1387 elv_rb_del(&cfqq->sort_list, rq); 1400 elv_rb_del(&cfqq->sort_list, rq);
1388 cfqq->queued[rq_is_sync(rq)]--; 1401 cfqq->queued[rq_is_sync(rq)]--;
1389 blkiocg_update_io_remove_stats(&cfqq->cfqg->blkg, rq_data_dir(rq), 1402 blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq),
1390 rq_is_sync(rq)); 1403 rq_is_sync(rq));
1391 cfq_add_rq_rb(rq); 1404 cfq_add_rq_rb(rq);
1392 blkiocg_update_io_add_stats( 1405 blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg,
1393 &cfqq->cfqg->blkg, &cfqq->cfqd->serving_group->blkg, 1406 &cfqq->cfqd->serving_group->blkg, rq_data_dir(rq),
1394 rq_data_dir(rq), rq_is_sync(rq)); 1407 rq_is_sync(rq));
1395} 1408}
1396 1409
1397static struct request * 1410static struct request *
@@ -1447,7 +1460,7 @@ static void cfq_remove_request(struct request *rq)
1447 cfq_del_rq_rb(rq); 1460 cfq_del_rq_rb(rq);
1448 1461
1449 cfqq->cfqd->rq_queued--; 1462 cfqq->cfqd->rq_queued--;
1450 blkiocg_update_io_remove_stats(&cfqq->cfqg->blkg, rq_data_dir(rq), 1463 blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq),
1451 rq_is_sync(rq)); 1464 rq_is_sync(rq));
1452 if (rq_is_meta(rq)) { 1465 if (rq_is_meta(rq)) {
1453 WARN_ON(!cfqq->meta_pending); 1466 WARN_ON(!cfqq->meta_pending);
@@ -1483,8 +1496,7 @@ static void cfq_merged_request(struct request_queue *q, struct request *req,
1483static void cfq_bio_merged(struct request_queue *q, struct request *req, 1496static void cfq_bio_merged(struct request_queue *q, struct request *req,
1484 struct bio *bio) 1497 struct bio *bio)
1485{ 1498{
1486 struct cfq_queue *cfqq = RQ_CFQQ(req); 1499 blkiocg_update_io_merged_stats(&(RQ_CFQG(req))->blkg, bio_data_dir(bio),
1487 blkiocg_update_io_merged_stats(&cfqq->cfqg->blkg, bio_data_dir(bio),
1488 cfq_bio_sync(bio)); 1500 cfq_bio_sync(bio));
1489} 1501}
1490 1502
@@ -1505,7 +1517,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
1505 if (cfqq->next_rq == next) 1517 if (cfqq->next_rq == next)
1506 cfqq->next_rq = rq; 1518 cfqq->next_rq = rq;
1507 cfq_remove_request(next); 1519 cfq_remove_request(next);
1508 blkiocg_update_io_merged_stats(&cfqq->cfqg->blkg, rq_data_dir(next), 1520 blkiocg_update_io_merged_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(next),
1509 rq_is_sync(next)); 1521 rq_is_sync(next));
1510} 1522}
1511 1523
@@ -3240,8 +3252,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
3240 rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]); 3252 rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]);
3241 list_add_tail(&rq->queuelist, &cfqq->fifo); 3253 list_add_tail(&rq->queuelist, &cfqq->fifo);
3242 cfq_add_rq_rb(rq); 3254 cfq_add_rq_rb(rq);
3243 3255 blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg,
3244 blkiocg_update_io_add_stats(&cfqq->cfqg->blkg,
3245 &cfqd->serving_group->blkg, rq_data_dir(rq), 3256 &cfqd->serving_group->blkg, rq_data_dir(rq),
3246 rq_is_sync(rq)); 3257 rq_is_sync(rq));
3247 cfq_rq_enqueued(cfqd, cfqq, rq); 3258 cfq_rq_enqueued(cfqd, cfqq, rq);
@@ -3472,6 +3483,10 @@ static void cfq_put_request(struct request *rq)
3472 rq->elevator_private = NULL; 3483 rq->elevator_private = NULL;
3473 rq->elevator_private2 = NULL; 3484 rq->elevator_private2 = NULL;
3474 3485
3486 /* Put down rq reference on cfqg */
3487 cfq_put_cfqg(RQ_CFQG(rq));
3488 rq->elevator_private3 = NULL;
3489
3475 cfq_put_queue(cfqq); 3490 cfq_put_queue(cfqq);
3476 } 3491 }
3477} 3492}
@@ -3560,6 +3575,7 @@ new_queue:
3560 3575
3561 rq->elevator_private = cic; 3576 rq->elevator_private = cic;
3562 rq->elevator_private2 = cfqq; 3577 rq->elevator_private2 = cfqq;
3578 rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg);
3563 return 0; 3579 return 0;
3564 3580
3565queue_fail: 3581queue_fail:
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d483c494672a..5cf17a49ce38 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -186,11 +186,12 @@ struct request {
186 }; 186 };
187 187
188 /* 188 /*
189 * two pointers are available for the IO schedulers, if they need 189 * Three pointers are available for the IO schedulers, if they need
190 * more they have to dynamically allocate it. 190 * more they have to dynamically allocate it.
191 */ 191 */
192 void *elevator_private; 192 void *elevator_private;
193 void *elevator_private2; 193 void *elevator_private2;
194 void *elevator_private3;
194 195
195 struct gendisk *rq_disk; 196 struct gendisk *rq_disk;
196 unsigned long start_time; 197 unsigned long start_time;