aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVivek Goyal <vgoyal@redhat.com>2010-08-23 06:23:53 -0400
committerJens Axboe <jaxboe@fusionio.com>2010-08-23 06:23:53 -0400
commit02b35081fc98f681411586d3acf9eaad8b8f6e07 (patch)
tree66cc8c375205a5b032859cdcb0b9b82f04f97443
parentb6508c1618e7aab085f191efb41b7b019a94ea38 (diff)
cfq-iosched: Do group share accounting in IOPS when slice_idle=0
o Implement another CFQ mode where we charge group in terms of number of requests dispatched instead of measuring the time. Measuring in terms of time is not possible when we are driving deeper queue depths and there are requests from multiple cfq queues in the request queue. o This mode currently gets activated if one sets slice_idle=0 and associated disk supports NCQ. Again the idea is that on an NCQ disk with idling disabled most of the queues will dispatch 1 or more requests and then cfq queue expiry happens and we don't have a way to measure time. So start providing fairness in terms of IOPS. o Currently IOPS mode works only with cfq group scheduling. CFQ is following different scheduling algorithms for queue and group scheduling. These IOPS stats are used only for group scheduling hence in non-croup mode nothing should change. o For CFQ group scheduling one can disable slice idling so that we don't idle on queue and drive deeper request queue depths (achieving better throughput), at the same time group idle is enabled so one should get service differentiation among groups. Signed-off-by: Vivek Goyal <vgoyal@redhat.com> Acked-by: Jeff Moyer <jmoyer@redhat.com> Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
-rw-r--r--block/cfq-iosched.c30
1 files changed, 24 insertions, 6 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 8830569542c4..3fc6be110c1d 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -378,6 +378,21 @@ CFQ_CFQQ_FNS(wait_busy);
378 &cfqg->service_trees[i][j]: NULL) \ 378 &cfqg->service_trees[i][j]: NULL) \
379 379
380 380
381static inline bool iops_mode(struct cfq_data *cfqd)
382{
383 /*
384 * If we are not idling on queues and it is a NCQ drive, parallel
385 * execution of requests is on and measuring time is not possible
386 * in most of the cases until and unless we drive shallower queue
387 * depths and that becomes a performance bottleneck. In such cases
388 * switch to start providing fairness in terms of number of IOs.
389 */
390 if (!cfqd->cfq_slice_idle && cfqd->hw_tag)
391 return true;
392 else
393 return false;
394}
395
381static inline enum wl_prio_t cfqq_prio(struct cfq_queue *cfqq) 396static inline enum wl_prio_t cfqq_prio(struct cfq_queue *cfqq)
382{ 397{
383 if (cfq_class_idle(cfqq)) 398 if (cfq_class_idle(cfqq))
@@ -906,7 +921,6 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
906 slice_used = cfqq->allocated_slice; 921 slice_used = cfqq->allocated_slice;
907 } 922 }
908 923
909 cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u", slice_used);
910 return slice_used; 924 return slice_used;
911} 925}
912 926
@@ -914,19 +928,21 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
914 struct cfq_queue *cfqq) 928 struct cfq_queue *cfqq)
915{ 929{
916 struct cfq_rb_root *st = &cfqd->grp_service_tree; 930 struct cfq_rb_root *st = &cfqd->grp_service_tree;
917 unsigned int used_sl, charge_sl; 931 unsigned int used_sl, charge;
918 int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg) 932 int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg)
919 - cfqg->service_tree_idle.count; 933 - cfqg->service_tree_idle.count;
920 934
921 BUG_ON(nr_sync < 0); 935 BUG_ON(nr_sync < 0);
922 used_sl = charge_sl = cfq_cfqq_slice_usage(cfqq); 936 used_sl = charge = cfq_cfqq_slice_usage(cfqq);
923 937
924 if (!cfq_cfqq_sync(cfqq) && !nr_sync) 938 if (iops_mode(cfqd))
925 charge_sl = cfqq->allocated_slice; 939 charge = cfqq->slice_dispatch;
940 else if (!cfq_cfqq_sync(cfqq) && !nr_sync)
941 charge = cfqq->allocated_slice;
926 942
927 /* Can't update vdisktime while group is on service tree */ 943 /* Can't update vdisktime while group is on service tree */
928 cfq_rb_erase(&cfqg->rb_node, st); 944 cfq_rb_erase(&cfqg->rb_node, st);
929 cfqg->vdisktime += cfq_scale_slice(charge_sl, cfqg); 945 cfqg->vdisktime += cfq_scale_slice(charge, cfqg);
930 __cfq_group_service_tree_add(st, cfqg); 946 __cfq_group_service_tree_add(st, cfqg);
931 947
932 /* This group is being expired. Save the context */ 948 /* This group is being expired. Save the context */
@@ -940,6 +956,8 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
940 956
941 cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, 957 cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
942 st->min_vdisktime); 958 st->min_vdisktime);
959 cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u disp=%u charge=%u iops=%u",
960 used_sl, cfqq->slice_dispatch, charge, iops_mode(cfqd));
943 cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl); 961 cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl);
944 cfq_blkiocg_set_start_empty_time(&cfqg->blkg); 962 cfq_blkiocg_set_start_empty_time(&cfqg->blkg);
945} 963}