diff options
author | Vivek Goyal <vgoyal@redhat.com> | 2010-04-21 11:44:16 -0400 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2010-04-21 11:44:16 -0400 |
commit | 7f1dc8a2d2f45fc557b27fd56115338b1d34fc24 (patch) | |
tree | 355619d9483aca33f5c45642ce0bf8a042ea3236 | |
parent | 8d2a91f8960b230b8bbcc4d97ed2015f5271c87d (diff) |
blkio: Fix blkio crash during rq stat update
blkio + cfq was crashing even when two sequential readers were put in two
separate cgroups (group_isolation=0).
The reason being that cfqq can migrate across groups based on its being
sync-noidle or not, it can happen that at request insertion time, cfqq
belonged to one cfqg and at request dispatch time, it belonged to root
group. In this case request stats per cgroup can go wrong and it also runs
into BUG_ON().
This patch implements rq stashing away a cfq group pointer and not relying
on cfqq->cfqg pointer alone for rq stat accounting.
[ 65.163523] ------------[ cut here ]------------
[ 65.164301] kernel BUG at block/blk-cgroup.c:117!
[ 65.164301] invalid opcode: 0000 [#1] SMP
[ 65.164301] last sysfs file: /sys/devices/pci0000:00/0000:00:05.0/0000:60:00.1/host9/rport-9:0-0/target9:0:0/9:0:0:2/block/sde/stat
[ 65.164301] CPU 1
[ 65.164301] Modules linked in: dm_round_robin dm_multipath qla2xxx scsi_transport_fc dm_zero dm_mirror dm_region_hash dm_log dm_mod [last unloaded: scsi_wait_scan]
[ 65.164301]
[ 65.164301] Pid: 4505, comm: fio Not tainted 2.6.34-rc4-blk-for-35 #34 0A98h/HP xw8600 Workstation
[ 65.164301] RIP: 0010:[<ffffffff8121924f>] [<ffffffff8121924f>] blkiocg_update_io_remove_stats+0x5b/0xaf
[ 65.164301] RSP: 0018:ffff8800ba5a79e8 EFLAGS: 00010046
[ 65.164301] RAX: 0000000000000096 RBX: ffff8800bb268d60 RCX: 0000000000000000
[ 65.164301] RDX: ffff8800bb268eb8 RSI: 0000000000000000 RDI: ffff8800bb268e00
[ 65.164301] RBP: ffff8800ba5a7a08 R08: 0000000000000064 R09: 0000000000000001
[ 65.164301] R10: 0000000000079640 R11: ffff8800a0bd5bf0 R12: ffff8800bab4af01
[ 65.164301] R13: ffff8800bab4af00 R14: ffff8800bb1d8928 R15: 0000000000000000
[ 65.164301] FS: 00007f18f75056f0(0000) GS:ffff880001e40000(0000) knlGS:0000000000000000
[ 65.164301] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 65.164301] CR2: 000000000040e7f0 CR3: 00000000ba52b000 CR4: 00000000000006e0
[ 65.164301] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 65.164301] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 65.164301] Process fio (pid: 4505, threadinfo ffff8800ba5a6000, task ffff8800ba45ae80)
[ 65.164301] Stack:
[ 65.164301] ffff8800ba5a7a08 ffff8800ba722540 ffff8800bab4af68 ffff8800bab4af68
[ 65.164301] <0> ffff8800ba5a7a38 ffffffff8121d814 ffff8800ba722540 ffff8800bab4af68
[ 65.164301] <0> ffff8800ba722540 ffff8800a08f6800 ffff8800ba5a7a68 ffffffff8121d8ca
[ 65.164301] Call Trace:
[ 65.164301] [<ffffffff8121d814>] cfq_remove_request+0xe4/0x116
[ 65.164301] [<ffffffff8121d8ca>] cfq_dispatch_insert+0x84/0xe1
[ 65.164301] [<ffffffff8121e833>] cfq_dispatch_requests+0x767/0x8e8
[ 65.164301] [<ffffffff8120e524>] ? submit_bio+0xc3/0xcc
[ 65.164301] [<ffffffff810ad657>] ? sync_page_killable+0x0/0x35
[ 65.164301] [<ffffffff8120ea8d>] blk_peek_request+0x191/0x1a7
[ 65.164301] [<ffffffffa000109c>] ? dm_get_live_table+0x44/0x4f [dm_mod]
[ 65.164301] [<ffffffffa0002799>] dm_request_fn+0x38/0x14c [dm_mod]
[ 65.164301] [<ffffffff810ad657>] ? sync_page_killable+0x0/0x35
[ 65.164301] [<ffffffff8120f600>] __generic_unplug_device+0x32/0x37
[ 65.164301] [<ffffffff8120f8a0>] generic_unplug_device+0x2e/0x3c
[ 65.164301] [<ffffffffa00011a6>] dm_unplug_all+0x42/0x5b [dm_mod]
[ 65.164301] [<ffffffff8120b063>] blk_unplug+0x29/0x2d
[ 65.164301] [<ffffffff8120b079>] blk_backing_dev_unplug+0x12/0x14
[ 65.164301] [<ffffffff81108a82>] block_sync_page+0x35/0x39
[ 65.164301] [<ffffffff810ad64e>] sync_page+0x41/0x4a
[ 65.164301] [<ffffffff810ad665>] sync_page_killable+0xe/0x35
[ 65.164301] [<ffffffff81589027>] __wait_on_bit_lock+0x46/0x8f
[ 65.164301] [<ffffffff810ad52d>] __lock_page_killable+0x66/0x6d
[ 65.164301] [<ffffffff81055fd4>] ? wake_bit_function+0x0/0x33
[ 65.164301] [<ffffffff810ad560>] lock_page_killable+0x2c/0x2e
[ 65.164301] [<ffffffff810aebfd>] generic_file_aio_read+0x361/0x4f0
[ 65.164301] [<ffffffff810e906c>] do_sync_read+0xcb/0x108
[ 65.164301] [<ffffffff811e32a3>] ? security_file_permission+0x16/0x18
[ 65.164301] [<ffffffff810e96d3>] vfs_read+0xab/0x108
[ 65.164301] [<ffffffff810e97f0>] sys_read+0x4a/0x6e
[ 65.164301] [<ffffffff81002b5b>] system_call_fastpath+0x16/0x1b
[ 65.164301] Code: 00 74 1c 48 8b 8b 60 01 00 00 48 85 c9 75 04 0f 0b eb fe 48 ff c9 48 89 8b 60 01 00 00 eb 1a 48 8b 8b 58 01 00 00 48 85 c9 75 04 <0f> 0b eb fe 48 ff c9 48 89 8b 58 01 00 00 45 84 e4 74 16 48 8b
[ 65.164301] RIP [<ffffffff8121924f>] blkiocg_update_io_remove_stats+0x5b/0xaf
[ 65.164301] RSP <ffff8800ba5a79e8>
[ 65.164301] ---[ end trace 1b2b828753032e68 ]---
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r-- | block/cfq-iosched.c | 36 | ||||
-rw-r--r-- | include/linux/blkdev.h | 3 |
2 files changed, 28 insertions, 11 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 62defd05518f..d5927b53020e 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -55,6 +55,7 @@ static const int cfq_hist_divisor = 4; | |||
55 | #define RQ_CIC(rq) \ | 55 | #define RQ_CIC(rq) \ |
56 | ((struct cfq_io_context *) (rq)->elevator_private) | 56 | ((struct cfq_io_context *) (rq)->elevator_private) |
57 | #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2) | 57 | #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2) |
58 | #define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elevator_private3) | ||
58 | 59 | ||
59 | static struct kmem_cache *cfq_pool; | 60 | static struct kmem_cache *cfq_pool; |
60 | static struct kmem_cache *cfq_ioc_pool; | 61 | static struct kmem_cache *cfq_ioc_pool; |
@@ -1001,6 +1002,12 @@ static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) | |||
1001 | return cfqg; | 1002 | return cfqg; |
1002 | } | 1003 | } |
1003 | 1004 | ||
1005 | static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) | ||
1006 | { | ||
1007 | atomic_inc(&cfqg->ref); | ||
1008 | return cfqg; | ||
1009 | } | ||
1010 | |||
1004 | static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) | 1011 | static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) |
1005 | { | 1012 | { |
1006 | /* Currently, all async queues are mapped to root group */ | 1013 | /* Currently, all async queues are mapped to root group */ |
@@ -1084,6 +1091,12 @@ static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) | |||
1084 | { | 1091 | { |
1085 | return &cfqd->root_group; | 1092 | return &cfqd->root_group; |
1086 | } | 1093 | } |
1094 | |||
1095 | static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) | ||
1096 | { | ||
1097 | return NULL; | ||
1098 | } | ||
1099 | |||
1087 | static inline void | 1100 | static inline void |
1088 | cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) { | 1101 | cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) { |
1089 | cfqq->cfqg = cfqg; | 1102 | cfqq->cfqg = cfqg; |
@@ -1386,12 +1399,12 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) | |||
1386 | { | 1399 | { |
1387 | elv_rb_del(&cfqq->sort_list, rq); | 1400 | elv_rb_del(&cfqq->sort_list, rq); |
1388 | cfqq->queued[rq_is_sync(rq)]--; | 1401 | cfqq->queued[rq_is_sync(rq)]--; |
1389 | blkiocg_update_io_remove_stats(&cfqq->cfqg->blkg, rq_data_dir(rq), | 1402 | blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq), |
1390 | rq_is_sync(rq)); | 1403 | rq_is_sync(rq)); |
1391 | cfq_add_rq_rb(rq); | 1404 | cfq_add_rq_rb(rq); |
1392 | blkiocg_update_io_add_stats( | 1405 | blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg, |
1393 | &cfqq->cfqg->blkg, &cfqq->cfqd->serving_group->blkg, | 1406 | &cfqq->cfqd->serving_group->blkg, rq_data_dir(rq), |
1394 | rq_data_dir(rq), rq_is_sync(rq)); | 1407 | rq_is_sync(rq)); |
1395 | } | 1408 | } |
1396 | 1409 | ||
1397 | static struct request * | 1410 | static struct request * |
@@ -1447,7 +1460,7 @@ static void cfq_remove_request(struct request *rq) | |||
1447 | cfq_del_rq_rb(rq); | 1460 | cfq_del_rq_rb(rq); |
1448 | 1461 | ||
1449 | cfqq->cfqd->rq_queued--; | 1462 | cfqq->cfqd->rq_queued--; |
1450 | blkiocg_update_io_remove_stats(&cfqq->cfqg->blkg, rq_data_dir(rq), | 1463 | blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq), |
1451 | rq_is_sync(rq)); | 1464 | rq_is_sync(rq)); |
1452 | if (rq_is_meta(rq)) { | 1465 | if (rq_is_meta(rq)) { |
1453 | WARN_ON(!cfqq->meta_pending); | 1466 | WARN_ON(!cfqq->meta_pending); |
@@ -1483,8 +1496,7 @@ static void cfq_merged_request(struct request_queue *q, struct request *req, | |||
1483 | static void cfq_bio_merged(struct request_queue *q, struct request *req, | 1496 | static void cfq_bio_merged(struct request_queue *q, struct request *req, |
1484 | struct bio *bio) | 1497 | struct bio *bio) |
1485 | { | 1498 | { |
1486 | struct cfq_queue *cfqq = RQ_CFQQ(req); | 1499 | blkiocg_update_io_merged_stats(&(RQ_CFQG(req))->blkg, bio_data_dir(bio), |
1487 | blkiocg_update_io_merged_stats(&cfqq->cfqg->blkg, bio_data_dir(bio), | ||
1488 | cfq_bio_sync(bio)); | 1500 | cfq_bio_sync(bio)); |
1489 | } | 1501 | } |
1490 | 1502 | ||
@@ -1505,7 +1517,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq, | |||
1505 | if (cfqq->next_rq == next) | 1517 | if (cfqq->next_rq == next) |
1506 | cfqq->next_rq = rq; | 1518 | cfqq->next_rq = rq; |
1507 | cfq_remove_request(next); | 1519 | cfq_remove_request(next); |
1508 | blkiocg_update_io_merged_stats(&cfqq->cfqg->blkg, rq_data_dir(next), | 1520 | blkiocg_update_io_merged_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(next), |
1509 | rq_is_sync(next)); | 1521 | rq_is_sync(next)); |
1510 | } | 1522 | } |
1511 | 1523 | ||
@@ -3240,8 +3252,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) | |||
3240 | rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]); | 3252 | rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]); |
3241 | list_add_tail(&rq->queuelist, &cfqq->fifo); | 3253 | list_add_tail(&rq->queuelist, &cfqq->fifo); |
3242 | cfq_add_rq_rb(rq); | 3254 | cfq_add_rq_rb(rq); |
3243 | 3255 | blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg, | |
3244 | blkiocg_update_io_add_stats(&cfqq->cfqg->blkg, | ||
3245 | &cfqd->serving_group->blkg, rq_data_dir(rq), | 3256 | &cfqd->serving_group->blkg, rq_data_dir(rq), |
3246 | rq_is_sync(rq)); | 3257 | rq_is_sync(rq)); |
3247 | cfq_rq_enqueued(cfqd, cfqq, rq); | 3258 | cfq_rq_enqueued(cfqd, cfqq, rq); |
@@ -3472,6 +3483,10 @@ static void cfq_put_request(struct request *rq) | |||
3472 | rq->elevator_private = NULL; | 3483 | rq->elevator_private = NULL; |
3473 | rq->elevator_private2 = NULL; | 3484 | rq->elevator_private2 = NULL; |
3474 | 3485 | ||
3486 | /* Put down rq reference on cfqg */ | ||
3487 | cfq_put_cfqg(RQ_CFQG(rq)); | ||
3488 | rq->elevator_private3 = NULL; | ||
3489 | |||
3475 | cfq_put_queue(cfqq); | 3490 | cfq_put_queue(cfqq); |
3476 | } | 3491 | } |
3477 | } | 3492 | } |
@@ -3560,6 +3575,7 @@ new_queue: | |||
3560 | 3575 | ||
3561 | rq->elevator_private = cic; | 3576 | rq->elevator_private = cic; |
3562 | rq->elevator_private2 = cfqq; | 3577 | rq->elevator_private2 = cfqq; |
3578 | rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg); | ||
3563 | return 0; | 3579 | return 0; |
3564 | 3580 | ||
3565 | queue_fail: | 3581 | queue_fail: |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d483c494672a..5cf17a49ce38 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -186,11 +186,12 @@ struct request { | |||
186 | }; | 186 | }; |
187 | 187 | ||
188 | /* | 188 | /* |
189 | * two pointers are available for the IO schedulers, if they need | 189 | * Three pointers are available for the IO schedulers, if they need |
190 | * more they have to dynamically allocate it. | 190 | * more they have to dynamically allocate it. |
191 | */ | 191 | */ |
192 | void *elevator_private; | 192 | void *elevator_private; |
193 | void *elevator_private2; | 193 | void *elevator_private2; |
194 | void *elevator_private3; | ||
194 | 195 | ||
195 | struct gendisk *rq_disk; | 196 | struct gendisk *rq_disk; |
196 | unsigned long start_time; | 197 | unsigned long start_time; |