aboutsummaryrefslogtreecommitdiffstats
path: root/block/cfq-iosched.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/cfq-iosched.c')
-rw-r--r--block/cfq-iosched.c232
1 files changed, 169 insertions, 63 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ab7a9e6a9b1c..7c52d6888924 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -300,7 +300,9 @@ struct cfq_data {
300 300
301 /* List of cfq groups being managed on this device*/ 301 /* List of cfq groups being managed on this device*/
302 struct hlist_head cfqg_list; 302 struct hlist_head cfqg_list;
303 struct rcu_head rcu; 303
304 /* Number of groups which are on blkcg->blkg_list */
305 unsigned int nr_blkcg_linked_grps;
304}; 306};
305 307
306static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); 308static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
@@ -665,15 +667,11 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2,
665 if (rq2 == NULL) 667 if (rq2 == NULL)
666 return rq1; 668 return rq1;
667 669
668 if (rq_is_sync(rq1) && !rq_is_sync(rq2)) 670 if (rq_is_sync(rq1) != rq_is_sync(rq2))
669 return rq1; 671 return rq_is_sync(rq1) ? rq1 : rq2;
670 else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) 672
671 return rq2; 673 if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META)
672 if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META)) 674 return rq1->cmd_flags & REQ_META ? rq1 : rq2;
673 return rq1;
674 else if ((rq2->cmd_flags & REQ_META) &&
675 !(rq1->cmd_flags & REQ_META))
676 return rq2;
677 675
678 s1 = blk_rq_pos(rq1); 676 s1 = blk_rq_pos(rq1);
679 s2 = blk_rq_pos(rq2); 677 s2 = blk_rq_pos(rq2);
@@ -1014,28 +1012,47 @@ void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
1014 cfqg->needs_update = true; 1012 cfqg->needs_update = true;
1015} 1013}
1016 1014
1017static struct cfq_group * cfq_find_alloc_cfqg(struct cfq_data *cfqd, 1015static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd,
1018 struct blkio_cgroup *blkcg, int create) 1016 struct cfq_group *cfqg, struct blkio_cgroup *blkcg)
1019{ 1017{
1020 struct cfq_group *cfqg = NULL;
1021 void *key = cfqd;
1022 int i, j;
1023 struct cfq_rb_root *st;
1024 struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; 1018 struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
1025 unsigned int major, minor; 1019 unsigned int major, minor;
1026 1020
1027 cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key)); 1021 /*
1028 if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) { 1022 * Add group onto cgroup list. It might happen that bdi->dev is
1023 * not initialized yet. Initialize this new group without major
1024 * and minor info and this info will be filled in once a new thread
1025 * comes for IO.
1026 */
1027 if (bdi->dev) {
1029 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); 1028 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
1030 cfqg->blkg.dev = MKDEV(major, minor); 1029 cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
1031 goto done; 1030 (void *)cfqd, MKDEV(major, minor));
1032 } 1031 } else
1033 if (cfqg || !create) 1032 cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
1034 goto done; 1033 (void *)cfqd, 0);
1034
1035 cfqd->nr_blkcg_linked_grps++;
1036 cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
1037
1038 /* Add group on cfqd list */
1039 hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
1040}
1041
1042/*
1043 * Should be called from sleepable context. No request queue lock as per
1044 * cpu stats are allocated dynamically and alloc_percpu needs to be called
1045 * from sleepable context.
1046 */
1047static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
1048{
1049 struct cfq_group *cfqg = NULL;
1050 int i, j, ret;
1051 struct cfq_rb_root *st;
1035 1052
1036 cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node); 1053 cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node);
1037 if (!cfqg) 1054 if (!cfqg)
1038 goto done; 1055 return NULL;
1039 1056
1040 for_each_cfqg_st(cfqg, i, j, st) 1057 for_each_cfqg_st(cfqg, i, j, st)
1041 *st = CFQ_RB_ROOT; 1058 *st = CFQ_RB_ROOT;
@@ -1049,43 +1066,94 @@ static struct cfq_group * cfq_find_alloc_cfqg(struct cfq_data *cfqd,
1049 */ 1066 */
1050 cfqg->ref = 1; 1067 cfqg->ref = 1;
1051 1068
1069 ret = blkio_alloc_blkg_stats(&cfqg->blkg);
1070 if (ret) {
1071 kfree(cfqg);
1072 return NULL;
1073 }
1074
1075 return cfqg;
1076}
1077
1078static struct cfq_group *
1079cfq_find_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg)
1080{
1081 struct cfq_group *cfqg = NULL;
1082 void *key = cfqd;
1083 struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
1084 unsigned int major, minor;
1085
1052 /* 1086 /*
1053 * Add group onto cgroup list. It might happen that bdi->dev is 1087 * This is the common case when there are no blkio cgroups.
1054 * not initialized yet. Initialize this new group without major 1088 * Avoid lookup in this case
1055 * and minor info and this info will be filled in once a new thread
1056 * comes for IO. See code above.
1057 */ 1089 */
1058 if (bdi->dev) { 1090 if (blkcg == &blkio_root_cgroup)
1059 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); 1091 cfqg = &cfqd->root_group;
1060 cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, 1092 else
1061 MKDEV(major, minor)); 1093 cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
1062 } else
1063 cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
1064 0);
1065
1066 cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
1067 1094
1068 /* Add group on cfqd list */ 1095 if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
1069 hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); 1096 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
1097 cfqg->blkg.dev = MKDEV(major, minor);
1098 }
1070 1099
1071done:
1072 return cfqg; 1100 return cfqg;
1073} 1101}
1074 1102
1075/* 1103/*
1076 * Search for the cfq group current task belongs to. If create = 1, then also 1104 * Search for the cfq group current task belongs to. request_queue lock must
1077 * create the cfq group if it does not exist. request_queue lock must be held. 1105 * be held.
1078 */ 1106 */
1079static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) 1107static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd)
1080{ 1108{
1081 struct blkio_cgroup *blkcg; 1109 struct blkio_cgroup *blkcg;
1082 struct cfq_group *cfqg = NULL; 1110 struct cfq_group *cfqg = NULL, *__cfqg = NULL;
1111 struct request_queue *q = cfqd->queue;
1083 1112
1084 rcu_read_lock(); 1113 rcu_read_lock();
1085 blkcg = task_blkio_cgroup(current); 1114 blkcg = task_blkio_cgroup(current);
1086 cfqg = cfq_find_alloc_cfqg(cfqd, blkcg, create); 1115 cfqg = cfq_find_cfqg(cfqd, blkcg);
1087 if (!cfqg && create) 1116 if (cfqg) {
1117 rcu_read_unlock();
1118 return cfqg;
1119 }
1120
1121 /*
1122 * Need to allocate a group. Allocation of group also needs allocation
1123 * of per cpu stats which in-turn takes a mutex() and can block. Hence
1124 * we need to drop rcu lock and queue_lock before we call alloc.
1125 *
1126 * Not taking any queue reference here and assuming that queue is
1127 * around by the time we return. CFQ queue allocation code does
1128 * the same. It might be racy though.
1129 */
1130
1131 rcu_read_unlock();
1132 spin_unlock_irq(q->queue_lock);
1133
1134 cfqg = cfq_alloc_cfqg(cfqd);
1135
1136 spin_lock_irq(q->queue_lock);
1137
1138 rcu_read_lock();
1139 blkcg = task_blkio_cgroup(current);
1140
1141 /*
1142 * If some other thread already allocated the group while we were
1143 * not holding queue lock, free up the group
1144 */
1145 __cfqg = cfq_find_cfqg(cfqd, blkcg);
1146
1147 if (__cfqg) {
1148 kfree(cfqg);
1149 rcu_read_unlock();
1150 return __cfqg;
1151 }
1152
1153 if (!cfqg)
1088 cfqg = &cfqd->root_group; 1154 cfqg = &cfqd->root_group;
1155
1156 cfq_init_add_cfqg_lists(cfqd, cfqg, blkcg);
1089 rcu_read_unlock(); 1157 rcu_read_unlock();
1090 return cfqg; 1158 return cfqg;
1091} 1159}
@@ -1118,6 +1186,7 @@ static void cfq_put_cfqg(struct cfq_group *cfqg)
1118 return; 1186 return;
1119 for_each_cfqg_st(cfqg, i, j, st) 1187 for_each_cfqg_st(cfqg, i, j, st)
1120 BUG_ON(!RB_EMPTY_ROOT(&st->rb)); 1188 BUG_ON(!RB_EMPTY_ROOT(&st->rb));
1189 free_percpu(cfqg->blkg.stats_cpu);
1121 kfree(cfqg); 1190 kfree(cfqg);
1122} 1191}
1123 1192
@@ -1176,7 +1245,7 @@ void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
1176} 1245}
1177 1246
1178#else /* GROUP_IOSCHED */ 1247#else /* GROUP_IOSCHED */
1179static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) 1248static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd)
1180{ 1249{
1181 return &cfqd->root_group; 1250 return &cfqd->root_group;
1182} 1251}
@@ -1210,7 +1279,6 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1210 struct cfq_rb_root *service_tree; 1279 struct cfq_rb_root *service_tree;
1211 int left; 1280 int left;
1212 int new_cfqq = 1; 1281 int new_cfqq = 1;
1213 int group_changed = 0;
1214 1282
1215 service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq), 1283 service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq),
1216 cfqq_type(cfqq)); 1284 cfqq_type(cfqq));
@@ -1281,7 +1349,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1281 rb_link_node(&cfqq->rb_node, parent, p); 1349 rb_link_node(&cfqq->rb_node, parent, p);
1282 rb_insert_color(&cfqq->rb_node, &service_tree->rb); 1350 rb_insert_color(&cfqq->rb_node, &service_tree->rb);
1283 service_tree->count++; 1351 service_tree->count++;
1284 if ((add_front || !new_cfqq) && !group_changed) 1352 if (add_front || !new_cfqq)
1285 return; 1353 return;
1286 cfq_group_notify_queue_add(cfqd, cfqq->cfqg); 1354 cfq_group_notify_queue_add(cfqd, cfqq->cfqg);
1287} 1355}
@@ -2029,7 +2097,7 @@ cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
2029 2097
2030 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); 2098 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
2031 2099
2032 return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio)); 2100 return 2 * base_rq * (IOPRIO_BE_NR - cfqq->ioprio);
2033} 2101}
2034 2102
2035/* 2103/*
@@ -2911,7 +2979,7 @@ cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
2911 struct cfq_group *cfqg; 2979 struct cfq_group *cfqg;
2912 2980
2913retry: 2981retry:
2914 cfqg = cfq_get_cfqg(cfqd, 1); 2982 cfqg = cfq_get_cfqg(cfqd);
2915 cic = cfq_cic_lookup(cfqd, ioc); 2983 cic = cfq_cic_lookup(cfqd, ioc);
2916 /* cic always exists here */ 2984 /* cic always exists here */
2917 cfqq = cic_to_cfqq(cic, is_sync); 2985 cfqq = cic_to_cfqq(cic, is_sync);
@@ -3815,15 +3883,11 @@ static void cfq_put_async_queues(struct cfq_data *cfqd)
3815 cfq_put_queue(cfqd->async_idle_cfqq); 3883 cfq_put_queue(cfqd->async_idle_cfqq);
3816} 3884}
3817 3885
3818static void cfq_cfqd_free(struct rcu_head *head)
3819{
3820 kfree(container_of(head, struct cfq_data, rcu));
3821}
3822
3823static void cfq_exit_queue(struct elevator_queue *e) 3886static void cfq_exit_queue(struct elevator_queue *e)
3824{ 3887{
3825 struct cfq_data *cfqd = e->elevator_data; 3888 struct cfq_data *cfqd = e->elevator_data;
3826 struct request_queue *q = cfqd->queue; 3889 struct request_queue *q = cfqd->queue;
3890 bool wait = false;
3827 3891
3828 cfq_shutdown_timer_wq(cfqd); 3892 cfq_shutdown_timer_wq(cfqd);
3829 3893
@@ -3842,7 +3906,13 @@ static void cfq_exit_queue(struct elevator_queue *e)
3842 3906
3843 cfq_put_async_queues(cfqd); 3907 cfq_put_async_queues(cfqd);
3844 cfq_release_cfq_groups(cfqd); 3908 cfq_release_cfq_groups(cfqd);
3845 cfq_blkiocg_del_blkio_group(&cfqd->root_group.blkg); 3909
3910 /*
3911 * If there are groups which we could not unlink from blkcg list,
3912 * wait for a rcu period for them to be freed.
3913 */
3914 if (cfqd->nr_blkcg_linked_grps)
3915 wait = true;
3846 3916
3847 spin_unlock_irq(q->queue_lock); 3917 spin_unlock_irq(q->queue_lock);
3848 3918
@@ -3852,8 +3922,25 @@ static void cfq_exit_queue(struct elevator_queue *e)
3852 ida_remove(&cic_index_ida, cfqd->cic_index); 3922 ida_remove(&cic_index_ida, cfqd->cic_index);
3853 spin_unlock(&cic_index_lock); 3923 spin_unlock(&cic_index_lock);
3854 3924
3855 /* Wait for cfqg->blkg->key accessors to exit their grace periods. */ 3925 /*
3856 call_rcu(&cfqd->rcu, cfq_cfqd_free); 3926 * Wait for cfqg->blkg->key accessors to exit their grace periods.
3927 * Do this wait only if there are other unlinked groups out
3928 * there. This can happen if cgroup deletion path claimed the
3929 * responsibility of cleaning up a group before queue cleanup code
3930 * get to the group.
3931 *
3932 * Do not call synchronize_rcu() unconditionally as there are drivers
3933 * which create/delete request queue hundreds of times during scan/boot
3934 * and synchronize_rcu() can take significant time and slow down boot.
3935 */
3936 if (wait)
3937 synchronize_rcu();
3938
3939#ifdef CONFIG_CFQ_GROUP_IOSCHED
3940 /* Free up per cpu stats for root group */
3941 free_percpu(cfqd->root_group.blkg.stats_cpu);
3942#endif
3943 kfree(cfqd);
3857} 3944}
3858 3945
3859static int cfq_alloc_cic_index(void) 3946static int cfq_alloc_cic_index(void)
@@ -3886,8 +3973,12 @@ static void *cfq_init_queue(struct request_queue *q)
3886 return NULL; 3973 return NULL;
3887 3974
3888 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); 3975 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
3889 if (!cfqd) 3976 if (!cfqd) {
3977 spin_lock(&cic_index_lock);
3978 ida_remove(&cic_index_ida, i);
3979 spin_unlock(&cic_index_lock);
3890 return NULL; 3980 return NULL;
3981 }
3891 3982
3892 /* 3983 /*
3893 * Don't need take queue_lock in the routine, since we are 3984 * Don't need take queue_lock in the routine, since we are
@@ -3909,14 +4000,29 @@ static void *cfq_init_queue(struct request_queue *q)
3909 4000
3910#ifdef CONFIG_CFQ_GROUP_IOSCHED 4001#ifdef CONFIG_CFQ_GROUP_IOSCHED
3911 /* 4002 /*
3912 * Take a reference to root group which we never drop. This is just 4003 * Set root group reference to 2. One reference will be dropped when
3913 * to make sure that cfq_put_cfqg() does not try to kfree root group 4004 * all groups on cfqd->cfqg_list are being deleted during queue exit.
4005 * Other reference will remain there as we don't want to delete this
4006 * group as it is statically allocated and gets destroyed when
4007 * throtl_data goes away.
3914 */ 4008 */
3915 cfqg->ref = 1; 4009 cfqg->ref = 2;
4010
4011 if (blkio_alloc_blkg_stats(&cfqg->blkg)) {
4012 kfree(cfqg);
4013 kfree(cfqd);
4014 return NULL;
4015 }
4016
3916 rcu_read_lock(); 4017 rcu_read_lock();
4018
3917 cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, 4019 cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg,
3918 (void *)cfqd, 0); 4020 (void *)cfqd, 0);
3919 rcu_read_unlock(); 4021 rcu_read_unlock();
4022 cfqd->nr_blkcg_linked_grps++;
4023
4024 /* Add group on cfqd->cfqg_list */
4025 hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
3920#endif 4026#endif
3921 /* 4027 /*
3922 * Not strictly needed (since RB_ROOT just clears the node and we 4028 * Not strictly needed (since RB_ROOT just clears the node and we