aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorJaroslav Kysela <perex@perex.cz>2010-01-08 03:26:34 -0500
committerJaroslav Kysela <perex@perex.cz>2010-01-08 03:26:34 -0500
commit1cb4f624ea38361b6397966470f0a1bed5532483 (patch)
tree418b05ddc854b09d64f7d5ee0c78875e42b5f151 /block
parent444c1953d496d272208902ff7010dc70d1f887f0 (diff)
parent2c1f1895ef2aa8f0e5497893eff71304aef332e1 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into fixes
Diffstat (limited to 'block')
-rw-r--r--block/blk-barrier.c2
-rw-r--r--block/blk-settings.c114
-rw-r--r--block/cfq-iosched.c161
3 files changed, 174 insertions, 103 deletions
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 8873b9b439ff..8618d8996fea 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -402,7 +402,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
402 * our current implementations need. If we'll ever need 402 * our current implementations need. If we'll ever need
403 * more the interface will need revisiting. 403 * more the interface will need revisiting.
404 */ 404 */
405 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 405 page = alloc_page(gfp_mask | __GFP_ZERO);
406 if (!page) 406 if (!page)
407 goto out_free_bio; 407 goto out_free_bio;
408 if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size) 408 if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
diff --git a/block/blk-settings.c b/block/blk-settings.c
index dd1f1e0e196f..d52d4adc440b 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -505,21 +505,30 @@ static unsigned int lcm(unsigned int a, unsigned int b)
505 505
506/** 506/**
507 * blk_stack_limits - adjust queue_limits for stacked devices 507 * blk_stack_limits - adjust queue_limits for stacked devices
508 * @t: the stacking driver limits (top) 508 * @t: the stacking driver limits (top device)
509 * @b: the underlying queue limits (bottom) 509 * @b: the underlying queue limits (bottom, component device)
510 * @offset: offset to beginning of data within component device 510 * @offset: offset to beginning of data within component device
511 * 511 *
512 * Description: 512 * Description:
513 * Merges two queue_limit structs. Returns 0 if alignment didn't 513 * This function is used by stacking drivers like MD and DM to ensure
514 * change. Returns -1 if adding the bottom device caused 514 * that all component devices have compatible block sizes and
515 * misalignment. 515 * alignments. The stacking driver must provide a queue_limits
516 * struct (top) and then iteratively call the stacking function for
517 * all component (bottom) devices. The stacking function will
518 * attempt to combine the values and ensure proper alignment.
519 *
520 * Returns 0 if the top and bottom queue_limits are compatible. The
521 * top device's block sizes and alignment offsets may be adjusted to
522 * ensure alignment with the bottom device. If no compatible sizes
523 * and alignments exist, -1 is returned and the resulting top
524 * queue_limits will have the misaligned flag set to indicate that
525 * the alignment_offset is undefined.
516 */ 526 */
517int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, 527int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
518 sector_t offset) 528 sector_t offset)
519{ 529{
520 int ret; 530 sector_t alignment;
521 531 unsigned int top, bottom;
522 ret = 0;
523 532
524 t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); 533 t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
525 t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); 534 t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
@@ -537,6 +546,22 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
537 t->max_segment_size = min_not_zero(t->max_segment_size, 546 t->max_segment_size = min_not_zero(t->max_segment_size,
538 b->max_segment_size); 547 b->max_segment_size);
539 548
549 alignment = queue_limit_alignment_offset(b, offset);
550
551 /* Bottom device has different alignment. Check that it is
552 * compatible with the current top alignment.
553 */
554 if (t->alignment_offset != alignment) {
555
556 top = max(t->physical_block_size, t->io_min)
557 + t->alignment_offset;
558 bottom = max(b->physical_block_size, b->io_min) + alignment;
559
560 /* Verify that top and bottom intervals line up */
561 if (max(top, bottom) & (min(top, bottom) - 1))
562 t->misaligned = 1;
563 }
564
540 t->logical_block_size = max(t->logical_block_size, 565 t->logical_block_size = max(t->logical_block_size,
541 b->logical_block_size); 566 b->logical_block_size);
542 567
@@ -544,47 +569,64 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
544 b->physical_block_size); 569 b->physical_block_size);
545 570
546 t->io_min = max(t->io_min, b->io_min); 571 t->io_min = max(t->io_min, b->io_min);
572 t->io_opt = lcm(t->io_opt, b->io_opt);
573
547 t->no_cluster |= b->no_cluster; 574 t->no_cluster |= b->no_cluster;
548 t->discard_zeroes_data &= b->discard_zeroes_data; 575 t->discard_zeroes_data &= b->discard_zeroes_data;
549 576
550 /* Bottom device offset aligned? */ 577 /* Physical block size a multiple of the logical block size? */
551 if (offset && 578 if (t->physical_block_size & (t->logical_block_size - 1)) {
552 (offset & (b->physical_block_size - 1)) != b->alignment_offset) { 579 t->physical_block_size = t->logical_block_size;
553 t->misaligned = 1; 580 t->misaligned = 1;
554 ret = -1;
555 } 581 }
556 582
557 if (offset && 583 /* Minimum I/O a multiple of the physical block size? */
558 (offset & (b->discard_granularity - 1)) != b->discard_alignment) { 584 if (t->io_min & (t->physical_block_size - 1)) {
559 t->discard_misaligned = 1; 585 t->io_min = t->physical_block_size;
560 ret = -1; 586 t->misaligned = 1;
561 } 587 }
562 588
563 /* If top has no alignment offset, inherit from bottom */ 589 /* Optimal I/O a multiple of the physical block size? */
564 if (!t->alignment_offset) 590 if (t->io_opt & (t->physical_block_size - 1)) {
565 t->alignment_offset = 591 t->io_opt = 0;
566 b->alignment_offset & (b->physical_block_size - 1);
567
568 if (!t->discard_alignment)
569 t->discard_alignment =
570 b->discard_alignment & (b->discard_granularity - 1);
571
572 /* Top device aligned on logical block boundary? */
573 if (t->alignment_offset & (t->logical_block_size - 1)) {
574 t->misaligned = 1; 592 t->misaligned = 1;
575 ret = -1;
576 } 593 }
577 594
578 /* Find lcm() of optimal I/O size and granularity */ 595 /* Find lowest common alignment_offset */
579 t->io_opt = lcm(t->io_opt, b->io_opt); 596 t->alignment_offset = lcm(t->alignment_offset, alignment)
580 t->discard_granularity = lcm(t->discard_granularity, 597 & (max(t->physical_block_size, t->io_min) - 1);
581 b->discard_granularity); 598
599 /* Verify that new alignment_offset is on a logical block boundary */
600 if (t->alignment_offset & (t->logical_block_size - 1))
601 t->misaligned = 1;
582 602
583 /* Verify that optimal I/O size is a multiple of io_min */ 603 /* Discard alignment and granularity */
584 if (t->io_min && t->io_opt % t->io_min) 604 if (b->discard_granularity) {
585 ret = -1; 605 unsigned int granularity = b->discard_granularity;
606 offset &= granularity - 1;
607
608 alignment = (granularity + b->discard_alignment - offset)
609 & (granularity - 1);
610
611 if (t->discard_granularity != 0 &&
612 t->discard_alignment != alignment) {
613 top = t->discard_granularity + t->discard_alignment;
614 bottom = b->discard_granularity + alignment;
615
616 /* Verify that top and bottom intervals line up */
617 if (max(top, bottom) & (min(top, bottom) - 1))
618 t->discard_misaligned = 1;
619 }
620
621 t->max_discard_sectors = min_not_zero(t->max_discard_sectors,
622 b->max_discard_sectors);
623 t->discard_granularity = max(t->discard_granularity,
624 b->discard_granularity);
625 t->discard_alignment = lcm(t->discard_alignment, alignment) &
626 (t->discard_granularity - 1);
627 }
586 628
587 return ret; 629 return t->misaligned ? -1 : 0;
588} 630}
589EXPORT_SYMBOL(blk_stack_limits); 631EXPORT_SYMBOL(blk_stack_limits);
590 632
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index cfb0b2f5f63d..918c7fd9aeb1 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -208,8 +208,6 @@ struct cfq_data {
208 /* Root service tree for cfq_groups */ 208 /* Root service tree for cfq_groups */
209 struct cfq_rb_root grp_service_tree; 209 struct cfq_rb_root grp_service_tree;
210 struct cfq_group root_group; 210 struct cfq_group root_group;
211 /* Number of active cfq groups on group service tree */
212 int nr_groups;
213 211
214 /* 212 /*
215 * The priority currently being served 213 * The priority currently being served
@@ -283,7 +281,7 @@ struct cfq_data {
283 */ 281 */
284 struct cfq_queue oom_cfqq; 282 struct cfq_queue oom_cfqq;
285 283
286 unsigned long last_end_sync_rq; 284 unsigned long last_delayed_sync;
287 285
288 /* List of cfq groups being managed on this device*/ 286 /* List of cfq groups being managed on this device*/
289 struct hlist_head cfqg_list; 287 struct hlist_head cfqg_list;
@@ -294,8 +292,7 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
294 292
295static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg, 293static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg,
296 enum wl_prio_t prio, 294 enum wl_prio_t prio,
297 enum wl_type_t type, 295 enum wl_type_t type)
298 struct cfq_data *cfqd)
299{ 296{
300 if (!cfqg) 297 if (!cfqg)
301 return NULL; 298 return NULL;
@@ -319,7 +316,6 @@ enum cfqq_state_flags {
319 CFQ_CFQQ_FLAG_coop, /* cfqq is shared */ 316 CFQ_CFQQ_FLAG_coop, /* cfqq is shared */
320 CFQ_CFQQ_FLAG_deep, /* sync cfqq experienced large depth */ 317 CFQ_CFQQ_FLAG_deep, /* sync cfqq experienced large depth */
321 CFQ_CFQQ_FLAG_wait_busy, /* Waiting for next request */ 318 CFQ_CFQQ_FLAG_wait_busy, /* Waiting for next request */
322 CFQ_CFQQ_FLAG_wait_busy_done, /* Got new request. Expire the queue */
323}; 319};
324 320
325#define CFQ_CFQQ_FNS(name) \ 321#define CFQ_CFQQ_FNS(name) \
@@ -348,7 +344,6 @@ CFQ_CFQQ_FNS(sync);
348CFQ_CFQQ_FNS(coop); 344CFQ_CFQQ_FNS(coop);
349CFQ_CFQQ_FNS(deep); 345CFQ_CFQQ_FNS(deep);
350CFQ_CFQQ_FNS(wait_busy); 346CFQ_CFQQ_FNS(wait_busy);
351CFQ_CFQQ_FNS(wait_busy_done);
352#undef CFQ_CFQQ_FNS 347#undef CFQ_CFQQ_FNS
353 348
354#ifdef CONFIG_DEBUG_CFQ_IOSCHED 349#ifdef CONFIG_DEBUG_CFQ_IOSCHED
@@ -844,7 +839,6 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
844 839
845 __cfq_group_service_tree_add(st, cfqg); 840 __cfq_group_service_tree_add(st, cfqg);
846 cfqg->on_st = true; 841 cfqg->on_st = true;
847 cfqd->nr_groups++;
848 st->total_weight += cfqg->weight; 842 st->total_weight += cfqg->weight;
849} 843}
850 844
@@ -865,7 +859,6 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
865 859
866 cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); 860 cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
867 cfqg->on_st = false; 861 cfqg->on_st = false;
868 cfqd->nr_groups--;
869 st->total_weight -= cfqg->weight; 862 st->total_weight -= cfqg->weight;
870 if (!RB_EMPTY_NODE(&cfqg->rb_node)) 863 if (!RB_EMPTY_NODE(&cfqg->rb_node))
871 cfq_rb_erase(&cfqg->rb_node, st); 864 cfq_rb_erase(&cfqg->rb_node, st);
@@ -1152,7 +1145,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1152#endif 1145#endif
1153 1146
1154 service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq), 1147 service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq),
1155 cfqq_type(cfqq), cfqd); 1148 cfqq_type(cfqq));
1156 if (cfq_class_idle(cfqq)) { 1149 if (cfq_class_idle(cfqq)) {
1157 rb_key = CFQ_IDLE_DELAY; 1150 rb_key = CFQ_IDLE_DELAY;
1158 parent = rb_last(&service_tree->rb); 1151 parent = rb_last(&service_tree->rb);
@@ -1515,9 +1508,6 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
1515 struct cfq_io_context *cic; 1508 struct cfq_io_context *cic;
1516 struct cfq_queue *cfqq; 1509 struct cfq_queue *cfqq;
1517 1510
1518 /* Deny merge if bio and rq don't belong to same cfq group */
1519 if ((RQ_CFQQ(rq))->cfqg != cfq_get_cfqg(cfqd, 0))
1520 return false;
1521 /* 1511 /*
1522 * Disallow merge of a sync bio into an async request. 1512 * Disallow merge of a sync bio into an async request.
1523 */ 1513 */
@@ -1574,7 +1564,6 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1574 1564
1575 cfq_clear_cfqq_wait_request(cfqq); 1565 cfq_clear_cfqq_wait_request(cfqq);
1576 cfq_clear_cfqq_wait_busy(cfqq); 1566 cfq_clear_cfqq_wait_busy(cfqq);
1577 cfq_clear_cfqq_wait_busy_done(cfqq);
1578 1567
1579 /* 1568 /*
1580 * store what was left of this slice, if the queue idled/timed out 1569 * store what was left of this slice, if the queue idled/timed out
@@ -1619,7 +1608,7 @@ static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
1619{ 1608{
1620 struct cfq_rb_root *service_tree = 1609 struct cfq_rb_root *service_tree =
1621 service_tree_for(cfqd->serving_group, cfqd->serving_prio, 1610 service_tree_for(cfqd->serving_group, cfqd->serving_prio,
1622 cfqd->serving_type, cfqd); 1611 cfqd->serving_type);
1623 1612
1624 if (!cfqd->rq_queued) 1613 if (!cfqd->rq_queued)
1625 return NULL; 1614 return NULL;
@@ -1678,13 +1667,17 @@ static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
1678#define CFQQ_SEEKY(cfqq) ((cfqq)->seek_mean > CFQQ_SEEK_THR) 1667#define CFQQ_SEEKY(cfqq) ((cfqq)->seek_mean > CFQQ_SEEK_THR)
1679 1668
1680static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1669static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1681 struct request *rq) 1670 struct request *rq, bool for_preempt)
1682{ 1671{
1683 sector_t sdist = cfqq->seek_mean; 1672 sector_t sdist = cfqq->seek_mean;
1684 1673
1685 if (!sample_valid(cfqq->seek_samples)) 1674 if (!sample_valid(cfqq->seek_samples))
1686 sdist = CFQQ_SEEK_THR; 1675 sdist = CFQQ_SEEK_THR;
1687 1676
1677 /* if seek_mean is big, using it as close criteria is meaningless */
1678 if (sdist > CFQQ_SEEK_THR && !for_preempt)
1679 sdist = CFQQ_SEEK_THR;
1680
1688 return cfq_dist_from_last(cfqd, rq) <= sdist; 1681 return cfq_dist_from_last(cfqd, rq) <= sdist;
1689} 1682}
1690 1683
@@ -1712,7 +1705,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
1712 * will contain the closest sector. 1705 * will contain the closest sector.
1713 */ 1706 */
1714 __cfqq = rb_entry(parent, struct cfq_queue, p_node); 1707 __cfqq = rb_entry(parent, struct cfq_queue, p_node);
1715 if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq)) 1708 if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq, false))
1716 return __cfqq; 1709 return __cfqq;
1717 1710
1718 if (blk_rq_pos(__cfqq->next_rq) < sector) 1711 if (blk_rq_pos(__cfqq->next_rq) < sector)
@@ -1723,7 +1716,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
1723 return NULL; 1716 return NULL;
1724 1717
1725 __cfqq = rb_entry(node, struct cfq_queue, p_node); 1718 __cfqq = rb_entry(node, struct cfq_queue, p_node);
1726 if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq)) 1719 if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq, false))
1727 return __cfqq; 1720 return __cfqq;
1728 1721
1729 return NULL; 1722 return NULL;
@@ -1750,6 +1743,12 @@ static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
1750 return NULL; 1743 return NULL;
1751 1744
1752 /* 1745 /*
1746 * Don't search priority tree if it's the only queue in the group.
1747 */
1748 if (cur_cfqq->cfqg->nr_cfqq == 1)
1749 return NULL;
1750
1751 /*
1753 * We should notice if some of the queues are cooperating, eg 1752 * We should notice if some of the queues are cooperating, eg
1754 * working closely on the same area of the disk. In that case, 1753 * working closely on the same area of the disk. In that case,
1755 * we can group them together and don't waste time idling. 1754 * we can group them together and don't waste time idling.
@@ -1960,8 +1959,7 @@ static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq)
1960} 1959}
1961 1960
1962static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, 1961static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd,
1963 struct cfq_group *cfqg, enum wl_prio_t prio, 1962 struct cfq_group *cfqg, enum wl_prio_t prio)
1964 bool prio_changed)
1965{ 1963{
1966 struct cfq_queue *queue; 1964 struct cfq_queue *queue;
1967 int i; 1965 int i;
@@ -1969,24 +1967,9 @@ static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd,
1969 unsigned long lowest_key = 0; 1967 unsigned long lowest_key = 0;
1970 enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD; 1968 enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD;
1971 1969
1972 if (prio_changed) { 1970 for (i = 0; i <= SYNC_WORKLOAD; ++i) {
1973 /* 1971 /* select the one with lowest rb_key */
1974 * When priorities switched, we prefer starting 1972 queue = cfq_rb_first(service_tree_for(cfqg, prio, i));
1975 * from SYNC_NOIDLE (first choice), or just SYNC
1976 * over ASYNC
1977 */
1978 if (service_tree_for(cfqg, prio, cur_best, cfqd)->count)
1979 return cur_best;
1980 cur_best = SYNC_WORKLOAD;
1981 if (service_tree_for(cfqg, prio, cur_best, cfqd)->count)
1982 return cur_best;
1983
1984 return ASYNC_WORKLOAD;
1985 }
1986
1987 for (i = 0; i < 3; ++i) {
1988 /* otherwise, select the one with lowest rb_key */
1989 queue = cfq_rb_first(service_tree_for(cfqg, prio, i, cfqd));
1990 if (queue && 1973 if (queue &&
1991 (!key_valid || time_before(queue->rb_key, lowest_key))) { 1974 (!key_valid || time_before(queue->rb_key, lowest_key))) {
1992 lowest_key = queue->rb_key; 1975 lowest_key = queue->rb_key;
@@ -2000,8 +1983,6 @@ static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd,
2000 1983
2001static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) 1984static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
2002{ 1985{
2003 enum wl_prio_t previous_prio = cfqd->serving_prio;
2004 bool prio_changed;
2005 unsigned slice; 1986 unsigned slice;
2006 unsigned count; 1987 unsigned count;
2007 struct cfq_rb_root *st; 1988 struct cfq_rb_root *st;
@@ -2029,24 +2010,19 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
2029 * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload 2010 * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
2030 * expiration time 2011 * expiration time
2031 */ 2012 */
2032 prio_changed = (cfqd->serving_prio != previous_prio); 2013 st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type);
2033 st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type,
2034 cfqd);
2035 count = st->count; 2014 count = st->count;
2036 2015
2037 /* 2016 /*
2038 * If priority didn't change, check workload expiration, 2017 * check workload expiration, and that we still have other queues ready
2039 * and that we still have other queues ready
2040 */ 2018 */
2041 if (!prio_changed && count && 2019 if (count && !time_after(jiffies, cfqd->workload_expires))
2042 !time_after(jiffies, cfqd->workload_expires))
2043 return; 2020 return;
2044 2021
2045 /* otherwise select new workload type */ 2022 /* otherwise select new workload type */
2046 cfqd->serving_type = 2023 cfqd->serving_type =
2047 cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio, prio_changed); 2024 cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio);
2048 st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type, 2025 st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type);
2049 cfqd);
2050 count = st->count; 2026 count = st->count;
2051 2027
2052 /* 2028 /*
@@ -2110,7 +2086,9 @@ static void cfq_choose_cfqg(struct cfq_data *cfqd)
2110 cfqd->workload_expires = jiffies + cfqg->saved_workload_slice; 2086 cfqd->workload_expires = jiffies + cfqg->saved_workload_slice;
2111 cfqd->serving_type = cfqg->saved_workload; 2087 cfqd->serving_type = cfqg->saved_workload;
2112 cfqd->serving_prio = cfqg->saved_serving_prio; 2088 cfqd->serving_prio = cfqg->saved_serving_prio;
2113 } 2089 } else
2090 cfqd->workload_expires = jiffies - 1;
2091
2114 choose_service_tree(cfqd, cfqg); 2092 choose_service_tree(cfqd, cfqg);
2115} 2093}
2116 2094
@@ -2128,14 +2106,35 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
2128 2106
2129 if (!cfqd->rq_queued) 2107 if (!cfqd->rq_queued)
2130 return NULL; 2108 return NULL;
2109
2131 /* 2110 /*
2132 * The active queue has run out of time, expire it and select new. 2111 * We were waiting for group to get backlogged. Expire the queue
2133 */ 2112 */
2134 if ((cfq_slice_used(cfqq) || cfq_cfqq_wait_busy_done(cfqq)) 2113 if (cfq_cfqq_wait_busy(cfqq) && !RB_EMPTY_ROOT(&cfqq->sort_list))
2135 && !cfq_cfqq_must_dispatch(cfqq))
2136 goto expire; 2114 goto expire;
2137 2115
2138 /* 2116 /*
2117 * The active queue has run out of time, expire it and select new.
2118 */
2119 if (cfq_slice_used(cfqq) && !cfq_cfqq_must_dispatch(cfqq)) {
2120 /*
2121 * If slice had not expired at the completion of last request
2122 * we might not have turned on wait_busy flag. Don't expire
2123 * the queue yet. Allow the group to get backlogged.
2124 *
2125 * The very fact that we have used the slice, that means we
2126 * have been idling all along on this queue and it should be
2127 * ok to wait for this request to complete.
2128 */
2129 if (cfqq->cfqg->nr_cfqq == 1 && RB_EMPTY_ROOT(&cfqq->sort_list)
2130 && cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) {
2131 cfqq = NULL;
2132 goto keep_queue;
2133 } else
2134 goto expire;
2135 }
2136
2137 /*
2139 * The active queue has requests and isn't expired, allow it to 2138 * The active queue has requests and isn't expired, allow it to
2140 * dispatch. 2139 * dispatch.
2141 */ 2140 */
@@ -2264,7 +2263,7 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
2264 * based on the last sync IO we serviced 2263 * based on the last sync IO we serviced
2265 */ 2264 */
2266 if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) { 2265 if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) {
2267 unsigned long last_sync = jiffies - cfqd->last_end_sync_rq; 2266 unsigned long last_sync = jiffies - cfqd->last_delayed_sync;
2268 unsigned int depth; 2267 unsigned int depth;
2269 2268
2270 depth = last_sync / cfqd->cfq_slice[1]; 2269 depth = last_sync / cfqd->cfq_slice[1];
@@ -3117,7 +3116,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
3117 * if this request is as-good as one we would expect from the 3116 * if this request is as-good as one we would expect from the
3118 * current cfqq, let it preempt 3117 * current cfqq, let it preempt
3119 */ 3118 */
3120 if (cfq_rq_close(cfqd, cfqq, rq)) 3119 if (cfq_rq_close(cfqd, cfqq, rq, true))
3121 return true; 3120 return true;
3122 3121
3123 return false; 3122 return false;
@@ -3165,10 +3164,6 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3165 cfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); 3164 cfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
3166 3165
3167 if (cfqq == cfqd->active_queue) { 3166 if (cfqq == cfqd->active_queue) {
3168 if (cfq_cfqq_wait_busy(cfqq)) {
3169 cfq_clear_cfqq_wait_busy(cfqq);
3170 cfq_mark_cfqq_wait_busy_done(cfqq);
3171 }
3172 /* 3167 /*
3173 * Remember that we saw a request from this process, but 3168 * Remember that we saw a request from this process, but
3174 * don't start queuing just yet. Otherwise we risk seeing lots 3169 * don't start queuing just yet. Otherwise we risk seeing lots
@@ -3183,6 +3178,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3183 if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE || 3178 if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE ||
3184 cfqd->busy_queues > 1) { 3179 cfqd->busy_queues > 1) {
3185 del_timer(&cfqd->idle_slice_timer); 3180 del_timer(&cfqd->idle_slice_timer);
3181 cfq_clear_cfqq_wait_request(cfqq);
3186 __blk_run_queue(cfqd->queue); 3182 __blk_run_queue(cfqd->queue);
3187 } else 3183 } else
3188 cfq_mark_cfqq_must_dispatch(cfqq); 3184 cfq_mark_cfqq_must_dispatch(cfqq);
@@ -3251,6 +3247,35 @@ static void cfq_update_hw_tag(struct cfq_data *cfqd)
3251 cfqd->hw_tag = 0; 3247 cfqd->hw_tag = 0;
3252} 3248}
3253 3249
3250static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
3251{
3252 struct cfq_io_context *cic = cfqd->active_cic;
3253
3254 /* If there are other queues in the group, don't wait */
3255 if (cfqq->cfqg->nr_cfqq > 1)
3256 return false;
3257
3258 if (cfq_slice_used(cfqq))
3259 return true;
3260
3261 /* if slice left is less than think time, wait busy */
3262 if (cic && sample_valid(cic->ttime_samples)
3263 && (cfqq->slice_end - jiffies < cic->ttime_mean))
3264 return true;
3265
3266 /*
3267 * If think times is less than a jiffy than ttime_mean=0 and above
3268 * will not be true. It might happen that slice has not expired yet
3269 * but will expire soon (4-5 ns) during select_queue(). To cover the
3270 * case where think time is less than a jiffy, mark the queue wait
3271 * busy if only 1 jiffy is left in the slice.
3272 */
3273 if (cfqq->slice_end - jiffies == 1)
3274 return true;
3275
3276 return false;
3277}
3278
3254static void cfq_completed_request(struct request_queue *q, struct request *rq) 3279static void cfq_completed_request(struct request_queue *q, struct request *rq)
3255{ 3280{
3256 struct cfq_queue *cfqq = RQ_CFQQ(rq); 3281 struct cfq_queue *cfqq = RQ_CFQQ(rq);
@@ -3273,7 +3298,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
3273 3298
3274 if (sync) { 3299 if (sync) {
3275 RQ_CIC(rq)->last_end_request = now; 3300 RQ_CIC(rq)->last_end_request = now;
3276 cfqd->last_end_sync_rq = now; 3301 if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now))
3302 cfqd->last_delayed_sync = now;
3277 } 3303 }
3278 3304
3279 /* 3305 /*
@@ -3289,11 +3315,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
3289 } 3315 }
3290 3316
3291 /* 3317 /*
3292 * If this queue consumed its slice and this is last queue 3318 * Should we wait for next request to come in before we expire
3293 * in the group, wait for next request before we expire 3319 * the queue.
3294 * the queue
3295 */ 3320 */
3296 if (cfq_slice_used(cfqq) && cfqq->cfqg->nr_cfqq == 1) { 3321 if (cfq_should_wait_busy(cfqd, cfqq)) {
3297 cfqq->slice_end = jiffies + cfqd->cfq_slice_idle; 3322 cfqq->slice_end = jiffies + cfqd->cfq_slice_idle;
3298 cfq_mark_cfqq_wait_busy(cfqq); 3323 cfq_mark_cfqq_wait_busy(cfqq);
3299 } 3324 }
@@ -3711,7 +3736,11 @@ static void *cfq_init_queue(struct request_queue *q)
3711 cfqd->cfq_latency = 1; 3736 cfqd->cfq_latency = 1;
3712 cfqd->cfq_group_isolation = 0; 3737 cfqd->cfq_group_isolation = 0;
3713 cfqd->hw_tag = -1; 3738 cfqd->hw_tag = -1;
3714 cfqd->last_end_sync_rq = jiffies; 3739 /*
3740 * we optimistically start assuming sync ops weren't delayed in last
3741 * second, in order to have larger depth for async operations.
3742 */
3743 cfqd->last_delayed_sync = jiffies - HZ;
3715 INIT_RCU_HEAD(&cfqd->rcu); 3744 INIT_RCU_HEAD(&cfqd->rcu);
3716 return cfqd; 3745 return cfqd;
3717} 3746}