aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVivek Goyal <vgoyal@redhat.com>2010-04-26 13:25:11 -0400
committerJens Axboe <jens.axboe@oracle.com>2010-04-26 13:25:11 -0400
commite5ff082e8a68d9a6874990597497c7e6a96ad752 (patch)
tree43c750821e89a50a01f4c7d73a68f779a57443fe
parent7f1dc8a2d2f45fc557b27fd56115338b1d34fc24 (diff)
blkio: Fix another BUG_ON() crash due to cfqq movement across groups
o Once in a while, I was hitting a BUG_ON() in blkio code. empty_time was assuming that upon slice expiry, group can't be marked empty already (except forced dispatch). But this assumption is broken if cfqq can move (group_isolation=0) across groups after receiving a request. I think most likely in this case we got a request in a cfqq and accounted the rq in one group, later while adding the cfqq to tree, we moved the queue to a different group which was already marked empty and after dispatch from slice we found group already marked empty and raised alarm. This patch does not error out if group is already marked empty. This can introduce some empty_time stat error only in case of group_isolation=0. This is better than crashing. In case of group_isolation=1 we should still get same stats as before this patch. [ 222.308546] ------------[ cut here ]------------ [ 222.309311] kernel BUG at block/blk-cgroup.c:236! [ 222.309311] invalid opcode: 0000 [#1] SMP [ 222.309311] last sysfs file: /sys/devices/virtual/block/dm-3/queue/scheduler [ 222.309311] CPU 1 [ 222.309311] Modules linked in: dm_round_robin dm_multipath qla2xxx scsi_transport_fc dm_zero dm_mirror dm_region_hash dm_log dm_mod [last unloaded: scsi_wait_scan] [ 222.309311] [ 222.309311] Pid: 4780, comm: fio Not tainted 2.6.34-rc4-blkio-config #68 0A98h/HP xw8600 Workstation [ 222.309311] RIP: 0010:[<ffffffff8121ad88>] [<ffffffff8121ad88>] blkiocg_set_start_empty_time+0x50/0x83 [ 222.309311] RSP: 0018:ffff8800ba6e79f8 EFLAGS: 00010002 [ 222.309311] RAX: 0000000000000082 RBX: ffff8800a13b7990 RCX: ffff8800a13b7808 [ 222.309311] RDX: 0000000000002121 RSI: 0000000000000082 RDI: ffff8800a13b7a30 [ 222.309311] RBP: ffff8800ba6e7a18 R08: 0000000000000000 R09: 0000000000000001 [ 222.309311] R10: 000000000002f8c8 R11: ffff8800ba6e7ad8 R12: ffff8800a13b78ff [ 222.309311] R13: ffff8800a13b7990 R14: 0000000000000001 R15: ffff8800a13b7808 [ 222.309311] FS: 00007f3beec476f0(0000) GS:ffff880001e40000(0000) knlGS:0000000000000000 [ 222.309311] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 222.309311] CR2: 000000000040e7f0 CR3: 00000000a12d5000 CR4: 00000000000006e0 [ 222.309311] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 222.309311] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 222.309311] Process fio (pid: 4780, threadinfo ffff8800ba6e6000, task ffff8800b3d6bf00) [ 222.309311] Stack: [ 222.309311] 0000000000000001 ffff8800bab17a48 ffff8800bab17a48 ffff8800a13b7800 [ 222.309311] <0> ffff8800ba6e7a68 ffffffff8121da35 ffff880000000001 00ff8800ba5c5698 [ 222.309311] <0> ffff8800ba6e7a68 ffff8800a13b7800 0000000000000000 ffff8800bab17a48 [ 222.309311] Call Trace: [ 222.309311] [<ffffffff8121da35>] __cfq_slice_expired+0x2af/0x3ec [ 222.309311] [<ffffffff8121fd7b>] cfq_dispatch_requests+0x2c8/0x8e8 [ 222.309311] [<ffffffff8120f1cd>] ? spin_unlock_irqrestore+0xe/0x10 [ 222.309311] [<ffffffff8120fb1a>] ? blk_insert_cloned_request+0x70/0x7b [ 222.309311] [<ffffffff81210461>] blk_peek_request+0x191/0x1a7 [ 222.309311] [<ffffffffa0002799>] dm_request_fn+0x38/0x14c [dm_mod] [ 222.309311] [<ffffffff810ae61f>] ? sync_page_killable+0x0/0x35 [ 222.309311] [<ffffffff81210fd4>] __generic_unplug_device+0x32/0x37 [ 222.309311] [<ffffffff81211274>] generic_unplug_device+0x2e/0x3c [ 222.309311] [<ffffffffa00011a6>] dm_unplug_all+0x42/0x5b [dm_mod] [ 222.309311] [<ffffffff8120ca37>] blk_unplug+0x29/0x2d [ 222.309311] [<ffffffff8120ca4d>] blk_backing_dev_unplug+0x12/0x14 [ 222.309311] [<ffffffff81109a7a>] block_sync_page+0x35/0x39 [ 222.309311] [<ffffffff810ae616>] sync_page+0x41/0x4a [ 222.309311] [<ffffffff810ae62d>] sync_page_killable+0xe/0x35 [ 222.309311] [<ffffffff8158aa59>] __wait_on_bit_lock+0x46/0x8f [ 222.309311] [<ffffffff810ae4f5>] __lock_page_killable+0x66/0x6d [ 222.309311] [<ffffffff81056f9c>] ? wake_bit_function+0x0/0x33 [ 222.309311] [<ffffffff810ae528>] lock_page_killable+0x2c/0x2e [ 222.309311] [<ffffffff810afbc5>] generic_file_aio_read+0x361/0x4f0 [ 222.309311] [<ffffffff810ea044>] do_sync_read+0xcb/0x108 [ 222.309311] [<ffffffff811e42f7>] ? security_file_permission+0x16/0x18 [ 222.309311] [<ffffffff810ea6ab>] vfs_read+0xab/0x108 [ 222.309311] [<ffffffff810ea7c8>] sys_read+0x4a/0x6e [ 222.309311] [<ffffffff81002b5b>] system_call_fastpath+0x16/0x1b [ 222.309311] Code: 58 01 00 00 00 48 89 c6 75 0a 48 83 bb 60 01 00 00 00 74 09 48 8d bb a0 00 00 00 eb 35 41 fe cc 74 0d f6 83 c0 01 00 00 04 74 04 <0f> 0b eb fe 48 89 75 e8 e8 be e0 de ff 66 83 8b c0 01 00 00 04 [ 222.309311] RIP [<ffffffff8121ad88>] blkiocg_set_start_empty_time+0x50/0x83 [ 222.309311] RSP <ffff8800ba6e79f8> [ 222.309311] ---[ end trace 32b4f71dffc15712 ]--- Signed-off-by: Vivek Goyal <vgoyal@redhat.com> Acked-by: Divyesh Shah <dpshah@google.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r--block/blk-cgroup.c15
-rw-r--r--block/blk-cgroup.h5
-rw-r--r--block/cfq-iosched.c33
3 files changed, 27 insertions, 26 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 83930f65016a..af42efbb0c1d 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -213,7 +213,7 @@ void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg)
213} 213}
214EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats); 214EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats);
215 215
216void blkiocg_set_start_empty_time(struct blkio_group *blkg, bool ignore) 216void blkiocg_set_start_empty_time(struct blkio_group *blkg)
217{ 217{
218 unsigned long flags; 218 unsigned long flags;
219 struct blkio_group_stats *stats; 219 struct blkio_group_stats *stats;
@@ -228,12 +228,15 @@ void blkiocg_set_start_empty_time(struct blkio_group *blkg, bool ignore)
228 } 228 }
229 229
230 /* 230 /*
231 * If ignore is set, we do not panic on the empty flag being set 231 * group is already marked empty. This can happen if cfqq got new
232 * already. This is to avoid cases where there are superfluous timeslice 232 * request in parent group and moved to this group while being added
233 * complete events (for eg., forced_dispatch in CFQ) when no IOs are 233 * to service tree. Just ignore the event and move on.
234 * served which could result in triggering the empty check incorrectly.
235 */ 234 */
236 BUG_ON(!ignore && blkio_blkg_empty(stats)); 235 if(blkio_blkg_empty(stats)) {
236 spin_unlock_irqrestore(&blkg->stats_lock, flags);
237 return;
238 }
239
237 stats->start_empty_time = sched_clock(); 240 stats->start_empty_time = sched_clock();
238 blkio_mark_blkg_empty(stats); 241 blkio_mark_blkg_empty(stats);
239 spin_unlock_irqrestore(&blkg->stats_lock, flags); 242 spin_unlock_irqrestore(&blkg->stats_lock, flags);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 2c956a06339a..a491a6d56ecf 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -174,7 +174,7 @@ void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
174 unsigned long dequeue); 174 unsigned long dequeue);
175void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg); 175void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg);
176void blkiocg_update_idle_time_stats(struct blkio_group *blkg); 176void blkiocg_update_idle_time_stats(struct blkio_group *blkg);
177void blkiocg_set_start_empty_time(struct blkio_group *blkg, bool ignore); 177void blkiocg_set_start_empty_time(struct blkio_group *blkg);
178 178
179#define BLKG_FLAG_FNS(name) \ 179#define BLKG_FLAG_FNS(name) \
180static inline void blkio_mark_blkg_##name( \ 180static inline void blkio_mark_blkg_##name( \
@@ -205,8 +205,7 @@ static inline void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
205static inline void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg) 205static inline void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg)
206{} 206{}
207static inline void blkiocg_update_idle_time_stats(struct blkio_group *blkg) {} 207static inline void blkiocg_update_idle_time_stats(struct blkio_group *blkg) {}
208static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg, 208static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg) {}
209 bool ignore) {}
210#endif 209#endif
211 210
212#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) 211#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index d5927b53020e..002a5b621653 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -888,7 +888,7 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
888} 888}
889 889
890static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, 890static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
891 struct cfq_queue *cfqq, bool forced) 891 struct cfq_queue *cfqq)
892{ 892{
893 struct cfq_rb_root *st = &cfqd->grp_service_tree; 893 struct cfq_rb_root *st = &cfqd->grp_service_tree;
894 unsigned int used_sl, charge_sl; 894 unsigned int used_sl, charge_sl;
@@ -918,7 +918,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
918 cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, 918 cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
919 st->min_vdisktime); 919 st->min_vdisktime);
920 blkiocg_update_timeslice_used(&cfqg->blkg, used_sl); 920 blkiocg_update_timeslice_used(&cfqg->blkg, used_sl);
921 blkiocg_set_start_empty_time(&cfqg->blkg, forced); 921 blkiocg_set_start_empty_time(&cfqg->blkg);
922} 922}
923 923
924#ifdef CONFIG_CFQ_GROUP_IOSCHED 924#ifdef CONFIG_CFQ_GROUP_IOSCHED
@@ -1582,7 +1582,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
1582 */ 1582 */
1583static void 1583static void
1584__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1584__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1585 bool timed_out, bool forced) 1585 bool timed_out)
1586{ 1586{
1587 cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out); 1587 cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out);
1588 1588
@@ -1609,7 +1609,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1609 cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid); 1609 cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid);
1610 } 1610 }
1611 1611
1612 cfq_group_served(cfqd, cfqq->cfqg, cfqq, forced); 1612 cfq_group_served(cfqd, cfqq->cfqg, cfqq);
1613 1613
1614 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) 1614 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list))
1615 cfq_del_cfqq_rr(cfqd, cfqq); 1615 cfq_del_cfqq_rr(cfqd, cfqq);
@@ -1628,13 +1628,12 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1628 } 1628 }
1629} 1629}
1630 1630
1631static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out, 1631static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out)
1632 bool forced)
1633{ 1632{
1634 struct cfq_queue *cfqq = cfqd->active_queue; 1633 struct cfq_queue *cfqq = cfqd->active_queue;
1635 1634
1636 if (cfqq) 1635 if (cfqq)
1637 __cfq_slice_expired(cfqd, cfqq, timed_out, forced); 1636 __cfq_slice_expired(cfqd, cfqq, timed_out);
1638} 1637}
1639 1638
1640/* 1639/*
@@ -2202,7 +2201,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
2202 } 2201 }
2203 2202
2204expire: 2203expire:
2205 cfq_slice_expired(cfqd, 0, false); 2204 cfq_slice_expired(cfqd, 0);
2206new_queue: 2205new_queue:
2207 /* 2206 /*
2208 * Current queue expired. Check if we have to switch to a new 2207 * Current queue expired. Check if we have to switch to a new
@@ -2228,7 +2227,7 @@ static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq)
2228 BUG_ON(!list_empty(&cfqq->fifo)); 2227 BUG_ON(!list_empty(&cfqq->fifo));
2229 2228
2230 /* By default cfqq is not expired if it is empty. Do it explicitly */ 2229 /* By default cfqq is not expired if it is empty. Do it explicitly */
2231 __cfq_slice_expired(cfqq->cfqd, cfqq, 0, true); 2230 __cfq_slice_expired(cfqq->cfqd, cfqq, 0);
2232 return dispatched; 2231 return dispatched;
2233} 2232}
2234 2233
@@ -2242,7 +2241,7 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
2242 int dispatched = 0; 2241 int dispatched = 0;
2243 2242
2244 /* Expire the timeslice of the current active queue first */ 2243 /* Expire the timeslice of the current active queue first */
2245 cfq_slice_expired(cfqd, 0, true); 2244 cfq_slice_expired(cfqd, 0);
2246 while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) { 2245 while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) {
2247 __cfq_set_active_queue(cfqd, cfqq); 2246 __cfq_set_active_queue(cfqd, cfqq);
2248 dispatched += __cfq_forced_dispatch_cfqq(cfqq); 2247 dispatched += __cfq_forced_dispatch_cfqq(cfqq);
@@ -2411,7 +2410,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
2411 cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) || 2410 cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
2412 cfq_class_idle(cfqq))) { 2411 cfq_class_idle(cfqq))) {
2413 cfqq->slice_end = jiffies + 1; 2412 cfqq->slice_end = jiffies + 1;
2414 cfq_slice_expired(cfqd, 0, false); 2413 cfq_slice_expired(cfqd, 0);
2415 } 2414 }
2416 2415
2417 cfq_log_cfqq(cfqd, cfqq, "dispatched a request"); 2416 cfq_log_cfqq(cfqd, cfqq, "dispatched a request");
@@ -2442,7 +2441,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
2442 orig_cfqg = cfqq->orig_cfqg; 2441 orig_cfqg = cfqq->orig_cfqg;
2443 2442
2444 if (unlikely(cfqd->active_queue == cfqq)) { 2443 if (unlikely(cfqd->active_queue == cfqq)) {
2445 __cfq_slice_expired(cfqd, cfqq, 0, false); 2444 __cfq_slice_expired(cfqd, cfqq, 0);
2446 cfq_schedule_dispatch(cfqd); 2445 cfq_schedule_dispatch(cfqd);
2447 } 2446 }
2448 2447
@@ -2543,7 +2542,7 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
2543 struct cfq_queue *__cfqq, *next; 2542 struct cfq_queue *__cfqq, *next;
2544 2543
2545 if (unlikely(cfqq == cfqd->active_queue)) { 2544 if (unlikely(cfqq == cfqd->active_queue)) {
2546 __cfq_slice_expired(cfqd, cfqq, 0, false); 2545 __cfq_slice_expired(cfqd, cfqq, 0);
2547 cfq_schedule_dispatch(cfqd); 2546 cfq_schedule_dispatch(cfqd);
2548 } 2547 }
2549 2548
@@ -3172,7 +3171,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
3172static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) 3171static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
3173{ 3172{
3174 cfq_log_cfqq(cfqd, cfqq, "preempt"); 3173 cfq_log_cfqq(cfqd, cfqq, "preempt");
3175 cfq_slice_expired(cfqd, 1, false); 3174 cfq_slice_expired(cfqd, 1);
3176 3175
3177 /* 3176 /*
3178 * Put the new queue at the front of the of the current list, 3177 * Put the new queue at the front of the of the current list,
@@ -3383,7 +3382,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
3383 * - when there is a close cooperator 3382 * - when there is a close cooperator
3384 */ 3383 */
3385 if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) 3384 if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq))
3386 cfq_slice_expired(cfqd, 1, false); 3385 cfq_slice_expired(cfqd, 1);
3387 else if (sync && cfqq_empty && 3386 else if (sync && cfqq_empty &&
3388 !cfq_close_cooperator(cfqd, cfqq)) { 3387 !cfq_close_cooperator(cfqd, cfqq)) {
3389 cfqd->noidle_tree_requires_idle |= !rq_noidle(rq); 3388 cfqd->noidle_tree_requires_idle |= !rq_noidle(rq);
@@ -3648,7 +3647,7 @@ static void cfq_idle_slice_timer(unsigned long data)
3648 cfq_clear_cfqq_deep(cfqq); 3647 cfq_clear_cfqq_deep(cfqq);
3649 } 3648 }
3650expire: 3649expire:
3651 cfq_slice_expired(cfqd, timed_out, false); 3650 cfq_slice_expired(cfqd, timed_out);
3652out_kick: 3651out_kick:
3653 cfq_schedule_dispatch(cfqd); 3652 cfq_schedule_dispatch(cfqd);
3654out_cont: 3653out_cont:
@@ -3691,7 +3690,7 @@ static void cfq_exit_queue(struct elevator_queue *e)
3691 spin_lock_irq(q->queue_lock); 3690 spin_lock_irq(q->queue_lock);
3692 3691
3693 if (cfqd->active_queue) 3692 if (cfqd->active_queue)
3694 __cfq_slice_expired(cfqd, cfqd->active_queue, 0, false); 3693 __cfq_slice_expired(cfqd, cfqd->active_queue, 0);
3695 3694
3696 while (!list_empty(&cfqd->cic_list)) { 3695 while (!list_empty(&cfqd->cic_list)) {
3697 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, 3696 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,