diff options
Diffstat (limited to 'block/cfq-iosched.c')
-rw-r--r-- | block/cfq-iosched.c | 232 |
1 files changed, 169 insertions, 63 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index ab7a9e6a9b1c..7c52d6888924 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -300,7 +300,9 @@ struct cfq_data { | |||
300 | 300 | ||
301 | /* List of cfq groups being managed on this device*/ | 301 | /* List of cfq groups being managed on this device*/ |
302 | struct hlist_head cfqg_list; | 302 | struct hlist_head cfqg_list; |
303 | struct rcu_head rcu; | 303 | |
304 | /* Number of groups which are on blkcg->blkg_list */ | ||
305 | unsigned int nr_blkcg_linked_grps; | ||
304 | }; | 306 | }; |
305 | 307 | ||
306 | static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); | 308 | static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); |
@@ -665,15 +667,11 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, | |||
665 | if (rq2 == NULL) | 667 | if (rq2 == NULL) |
666 | return rq1; | 668 | return rq1; |
667 | 669 | ||
668 | if (rq_is_sync(rq1) && !rq_is_sync(rq2)) | 670 | if (rq_is_sync(rq1) != rq_is_sync(rq2)) |
669 | return rq1; | 671 | return rq_is_sync(rq1) ? rq1 : rq2; |
670 | else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) | 672 | |
671 | return rq2; | 673 | if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META) |
672 | if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META)) | 674 | return rq1->cmd_flags & REQ_META ? rq1 : rq2; |
673 | return rq1; | ||
674 | else if ((rq2->cmd_flags & REQ_META) && | ||
675 | !(rq1->cmd_flags & REQ_META)) | ||
676 | return rq2; | ||
677 | 675 | ||
678 | s1 = blk_rq_pos(rq1); | 676 | s1 = blk_rq_pos(rq1); |
679 | s2 = blk_rq_pos(rq2); | 677 | s2 = blk_rq_pos(rq2); |
@@ -1014,28 +1012,47 @@ void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg, | |||
1014 | cfqg->needs_update = true; | 1012 | cfqg->needs_update = true; |
1015 | } | 1013 | } |
1016 | 1014 | ||
1017 | static struct cfq_group * cfq_find_alloc_cfqg(struct cfq_data *cfqd, | 1015 | static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd, |
1018 | struct blkio_cgroup *blkcg, int create) | 1016 | struct cfq_group *cfqg, struct blkio_cgroup *blkcg) |
1019 | { | 1017 | { |
1020 | struct cfq_group *cfqg = NULL; | ||
1021 | void *key = cfqd; | ||
1022 | int i, j; | ||
1023 | struct cfq_rb_root *st; | ||
1024 | struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; | 1018 | struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; |
1025 | unsigned int major, minor; | 1019 | unsigned int major, minor; |
1026 | 1020 | ||
1027 | cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key)); | 1021 | /* |
1028 | if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) { | 1022 | * Add group onto cgroup list. It might happen that bdi->dev is |
1023 | * not initialized yet. Initialize this new group without major | ||
1024 | * and minor info and this info will be filled in once a new thread | ||
1025 | * comes for IO. | ||
1026 | */ | ||
1027 | if (bdi->dev) { | ||
1029 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); | 1028 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); |
1030 | cfqg->blkg.dev = MKDEV(major, minor); | 1029 | cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, |
1031 | goto done; | 1030 | (void *)cfqd, MKDEV(major, minor)); |
1032 | } | 1031 | } else |
1033 | if (cfqg || !create) | 1032 | cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, |
1034 | goto done; | 1033 | (void *)cfqd, 0); |
1034 | |||
1035 | cfqd->nr_blkcg_linked_grps++; | ||
1036 | cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); | ||
1037 | |||
1038 | /* Add group on cfqd list */ | ||
1039 | hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); | ||
1040 | } | ||
1041 | |||
1042 | /* | ||
1043 | * Should be called from sleepable context. No request queue lock as per | ||
1044 | * cpu stats are allocated dynamically and alloc_percpu needs to be called | ||
1045 | * from sleepable context. | ||
1046 | */ | ||
1047 | static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd) | ||
1048 | { | ||
1049 | struct cfq_group *cfqg = NULL; | ||
1050 | int i, j, ret; | ||
1051 | struct cfq_rb_root *st; | ||
1035 | 1052 | ||
1036 | cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node); | 1053 | cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node); |
1037 | if (!cfqg) | 1054 | if (!cfqg) |
1038 | goto done; | 1055 | return NULL; |
1039 | 1056 | ||
1040 | for_each_cfqg_st(cfqg, i, j, st) | 1057 | for_each_cfqg_st(cfqg, i, j, st) |
1041 | *st = CFQ_RB_ROOT; | 1058 | *st = CFQ_RB_ROOT; |
@@ -1049,43 +1066,94 @@ static struct cfq_group * cfq_find_alloc_cfqg(struct cfq_data *cfqd, | |||
1049 | */ | 1066 | */ |
1050 | cfqg->ref = 1; | 1067 | cfqg->ref = 1; |
1051 | 1068 | ||
1069 | ret = blkio_alloc_blkg_stats(&cfqg->blkg); | ||
1070 | if (ret) { | ||
1071 | kfree(cfqg); | ||
1072 | return NULL; | ||
1073 | } | ||
1074 | |||
1075 | return cfqg; | ||
1076 | } | ||
1077 | |||
1078 | static struct cfq_group * | ||
1079 | cfq_find_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg) | ||
1080 | { | ||
1081 | struct cfq_group *cfqg = NULL; | ||
1082 | void *key = cfqd; | ||
1083 | struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; | ||
1084 | unsigned int major, minor; | ||
1085 | |||
1052 | /* | 1086 | /* |
1053 | * Add group onto cgroup list. It might happen that bdi->dev is | 1087 | * This is the common case when there are no blkio cgroups. |
1054 | * not initialized yet. Initialize this new group without major | 1088 | * Avoid lookup in this case |
1055 | * and minor info and this info will be filled in once a new thread | ||
1056 | * comes for IO. See code above. | ||
1057 | */ | 1089 | */ |
1058 | if (bdi->dev) { | 1090 | if (blkcg == &blkio_root_cgroup) |
1059 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); | 1091 | cfqg = &cfqd->root_group; |
1060 | cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, | 1092 | else |
1061 | MKDEV(major, minor)); | 1093 | cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key)); |
1062 | } else | ||
1063 | cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, | ||
1064 | 0); | ||
1065 | |||
1066 | cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); | ||
1067 | 1094 | ||
1068 | /* Add group on cfqd list */ | 1095 | if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) { |
1069 | hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); | 1096 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); |
1097 | cfqg->blkg.dev = MKDEV(major, minor); | ||
1098 | } | ||
1070 | 1099 | ||
1071 | done: | ||
1072 | return cfqg; | 1100 | return cfqg; |
1073 | } | 1101 | } |
1074 | 1102 | ||
1075 | /* | 1103 | /* |
1076 | * Search for the cfq group current task belongs to. If create = 1, then also | 1104 | * Search for the cfq group current task belongs to. request_queue lock must |
1077 | * create the cfq group if it does not exist. request_queue lock must be held. | 1105 | * be held. |
1078 | */ | 1106 | */ |
1079 | static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) | 1107 | static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd) |
1080 | { | 1108 | { |
1081 | struct blkio_cgroup *blkcg; | 1109 | struct blkio_cgroup *blkcg; |
1082 | struct cfq_group *cfqg = NULL; | 1110 | struct cfq_group *cfqg = NULL, *__cfqg = NULL; |
1111 | struct request_queue *q = cfqd->queue; | ||
1083 | 1112 | ||
1084 | rcu_read_lock(); | 1113 | rcu_read_lock(); |
1085 | blkcg = task_blkio_cgroup(current); | 1114 | blkcg = task_blkio_cgroup(current); |
1086 | cfqg = cfq_find_alloc_cfqg(cfqd, blkcg, create); | 1115 | cfqg = cfq_find_cfqg(cfqd, blkcg); |
1087 | if (!cfqg && create) | 1116 | if (cfqg) { |
1117 | rcu_read_unlock(); | ||
1118 | return cfqg; | ||
1119 | } | ||
1120 | |||
1121 | /* | ||
1122 | * Need to allocate a group. Allocation of group also needs allocation | ||
1123 | * of per cpu stats which in-turn takes a mutex() and can block. Hence | ||
1124 | * we need to drop rcu lock and queue_lock before we call alloc. | ||
1125 | * | ||
1126 | * Not taking any queue reference here and assuming that queue is | ||
1127 | * around by the time we return. CFQ queue allocation code does | ||
1128 | * the same. It might be racy though. | ||
1129 | */ | ||
1130 | |||
1131 | rcu_read_unlock(); | ||
1132 | spin_unlock_irq(q->queue_lock); | ||
1133 | |||
1134 | cfqg = cfq_alloc_cfqg(cfqd); | ||
1135 | |||
1136 | spin_lock_irq(q->queue_lock); | ||
1137 | |||
1138 | rcu_read_lock(); | ||
1139 | blkcg = task_blkio_cgroup(current); | ||
1140 | |||
1141 | /* | ||
1142 | * If some other thread already allocated the group while we were | ||
1143 | * not holding queue lock, free up the group | ||
1144 | */ | ||
1145 | __cfqg = cfq_find_cfqg(cfqd, blkcg); | ||
1146 | |||
1147 | if (__cfqg) { | ||
1148 | kfree(cfqg); | ||
1149 | rcu_read_unlock(); | ||
1150 | return __cfqg; | ||
1151 | } | ||
1152 | |||
1153 | if (!cfqg) | ||
1088 | cfqg = &cfqd->root_group; | 1154 | cfqg = &cfqd->root_group; |
1155 | |||
1156 | cfq_init_add_cfqg_lists(cfqd, cfqg, blkcg); | ||
1089 | rcu_read_unlock(); | 1157 | rcu_read_unlock(); |
1090 | return cfqg; | 1158 | return cfqg; |
1091 | } | 1159 | } |
@@ -1118,6 +1186,7 @@ static void cfq_put_cfqg(struct cfq_group *cfqg) | |||
1118 | return; | 1186 | return; |
1119 | for_each_cfqg_st(cfqg, i, j, st) | 1187 | for_each_cfqg_st(cfqg, i, j, st) |
1120 | BUG_ON(!RB_EMPTY_ROOT(&st->rb)); | 1188 | BUG_ON(!RB_EMPTY_ROOT(&st->rb)); |
1189 | free_percpu(cfqg->blkg.stats_cpu); | ||
1121 | kfree(cfqg); | 1190 | kfree(cfqg); |
1122 | } | 1191 | } |
1123 | 1192 | ||
@@ -1176,7 +1245,7 @@ void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg) | |||
1176 | } | 1245 | } |
1177 | 1246 | ||
1178 | #else /* GROUP_IOSCHED */ | 1247 | #else /* GROUP_IOSCHED */ |
1179 | static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) | 1248 | static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd) |
1180 | { | 1249 | { |
1181 | return &cfqd->root_group; | 1250 | return &cfqd->root_group; |
1182 | } | 1251 | } |
@@ -1210,7 +1279,6 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1210 | struct cfq_rb_root *service_tree; | 1279 | struct cfq_rb_root *service_tree; |
1211 | int left; | 1280 | int left; |
1212 | int new_cfqq = 1; | 1281 | int new_cfqq = 1; |
1213 | int group_changed = 0; | ||
1214 | 1282 | ||
1215 | service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq), | 1283 | service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq), |
1216 | cfqq_type(cfqq)); | 1284 | cfqq_type(cfqq)); |
@@ -1281,7 +1349,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1281 | rb_link_node(&cfqq->rb_node, parent, p); | 1349 | rb_link_node(&cfqq->rb_node, parent, p); |
1282 | rb_insert_color(&cfqq->rb_node, &service_tree->rb); | 1350 | rb_insert_color(&cfqq->rb_node, &service_tree->rb); |
1283 | service_tree->count++; | 1351 | service_tree->count++; |
1284 | if ((add_front || !new_cfqq) && !group_changed) | 1352 | if (add_front || !new_cfqq) |
1285 | return; | 1353 | return; |
1286 | cfq_group_notify_queue_add(cfqd, cfqq->cfqg); | 1354 | cfq_group_notify_queue_add(cfqd, cfqq->cfqg); |
1287 | } | 1355 | } |
@@ -2029,7 +2097,7 @@ cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
2029 | 2097 | ||
2030 | WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); | 2098 | WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); |
2031 | 2099 | ||
2032 | return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio)); | 2100 | return 2 * base_rq * (IOPRIO_BE_NR - cfqq->ioprio); |
2033 | } | 2101 | } |
2034 | 2102 | ||
2035 | /* | 2103 | /* |
@@ -2911,7 +2979,7 @@ cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync, | |||
2911 | struct cfq_group *cfqg; | 2979 | struct cfq_group *cfqg; |
2912 | 2980 | ||
2913 | retry: | 2981 | retry: |
2914 | cfqg = cfq_get_cfqg(cfqd, 1); | 2982 | cfqg = cfq_get_cfqg(cfqd); |
2915 | cic = cfq_cic_lookup(cfqd, ioc); | 2983 | cic = cfq_cic_lookup(cfqd, ioc); |
2916 | /* cic always exists here */ | 2984 | /* cic always exists here */ |
2917 | cfqq = cic_to_cfqq(cic, is_sync); | 2985 | cfqq = cic_to_cfqq(cic, is_sync); |
@@ -3815,15 +3883,11 @@ static void cfq_put_async_queues(struct cfq_data *cfqd) | |||
3815 | cfq_put_queue(cfqd->async_idle_cfqq); | 3883 | cfq_put_queue(cfqd->async_idle_cfqq); |
3816 | } | 3884 | } |
3817 | 3885 | ||
3818 | static void cfq_cfqd_free(struct rcu_head *head) | ||
3819 | { | ||
3820 | kfree(container_of(head, struct cfq_data, rcu)); | ||
3821 | } | ||
3822 | |||
3823 | static void cfq_exit_queue(struct elevator_queue *e) | 3886 | static void cfq_exit_queue(struct elevator_queue *e) |
3824 | { | 3887 | { |
3825 | struct cfq_data *cfqd = e->elevator_data; | 3888 | struct cfq_data *cfqd = e->elevator_data; |
3826 | struct request_queue *q = cfqd->queue; | 3889 | struct request_queue *q = cfqd->queue; |
3890 | bool wait = false; | ||
3827 | 3891 | ||
3828 | cfq_shutdown_timer_wq(cfqd); | 3892 | cfq_shutdown_timer_wq(cfqd); |
3829 | 3893 | ||
@@ -3842,7 +3906,13 @@ static void cfq_exit_queue(struct elevator_queue *e) | |||
3842 | 3906 | ||
3843 | cfq_put_async_queues(cfqd); | 3907 | cfq_put_async_queues(cfqd); |
3844 | cfq_release_cfq_groups(cfqd); | 3908 | cfq_release_cfq_groups(cfqd); |
3845 | cfq_blkiocg_del_blkio_group(&cfqd->root_group.blkg); | 3909 | |
3910 | /* | ||
3911 | * If there are groups which we could not unlink from blkcg list, | ||
3912 | * wait for a rcu period for them to be freed. | ||
3913 | */ | ||
3914 | if (cfqd->nr_blkcg_linked_grps) | ||
3915 | wait = true; | ||
3846 | 3916 | ||
3847 | spin_unlock_irq(q->queue_lock); | 3917 | spin_unlock_irq(q->queue_lock); |
3848 | 3918 | ||
@@ -3852,8 +3922,25 @@ static void cfq_exit_queue(struct elevator_queue *e) | |||
3852 | ida_remove(&cic_index_ida, cfqd->cic_index); | 3922 | ida_remove(&cic_index_ida, cfqd->cic_index); |
3853 | spin_unlock(&cic_index_lock); | 3923 | spin_unlock(&cic_index_lock); |
3854 | 3924 | ||
3855 | /* Wait for cfqg->blkg->key accessors to exit their grace periods. */ | 3925 | /* |
3856 | call_rcu(&cfqd->rcu, cfq_cfqd_free); | 3926 | * Wait for cfqg->blkg->key accessors to exit their grace periods. |
3927 | * Do this wait only if there are other unlinked groups out | ||
3928 | * there. This can happen if cgroup deletion path claimed the | ||
3929 | * responsibility of cleaning up a group before queue cleanup code | ||
3930 | * get to the group. | ||
3931 | * | ||
3932 | * Do not call synchronize_rcu() unconditionally as there are drivers | ||
3933 | * which create/delete request queue hundreds of times during scan/boot | ||
3934 | * and synchronize_rcu() can take significant time and slow down boot. | ||
3935 | */ | ||
3936 | if (wait) | ||
3937 | synchronize_rcu(); | ||
3938 | |||
3939 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | ||
3940 | /* Free up per cpu stats for root group */ | ||
3941 | free_percpu(cfqd->root_group.blkg.stats_cpu); | ||
3942 | #endif | ||
3943 | kfree(cfqd); | ||
3857 | } | 3944 | } |
3858 | 3945 | ||
3859 | static int cfq_alloc_cic_index(void) | 3946 | static int cfq_alloc_cic_index(void) |
@@ -3886,8 +3973,12 @@ static void *cfq_init_queue(struct request_queue *q) | |||
3886 | return NULL; | 3973 | return NULL; |
3887 | 3974 | ||
3888 | cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); | 3975 | cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); |
3889 | if (!cfqd) | 3976 | if (!cfqd) { |
3977 | spin_lock(&cic_index_lock); | ||
3978 | ida_remove(&cic_index_ida, i); | ||
3979 | spin_unlock(&cic_index_lock); | ||
3890 | return NULL; | 3980 | return NULL; |
3981 | } | ||
3891 | 3982 | ||
3892 | /* | 3983 | /* |
3893 | * Don't need take queue_lock in the routine, since we are | 3984 | * Don't need take queue_lock in the routine, since we are |
@@ -3909,14 +4000,29 @@ static void *cfq_init_queue(struct request_queue *q) | |||
3909 | 4000 | ||
3910 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 4001 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
3911 | /* | 4002 | /* |
3912 | * Take a reference to root group which we never drop. This is just | 4003 | * Set root group reference to 2. One reference will be dropped when |
3913 | * to make sure that cfq_put_cfqg() does not try to kfree root group | 4004 | * all groups on cfqd->cfqg_list are being deleted during queue exit. |
4005 | * Other reference will remain there as we don't want to delete this | ||
4006 | * group as it is statically allocated and gets destroyed when | ||
4007 | * throtl_data goes away. | ||
3914 | */ | 4008 | */ |
3915 | cfqg->ref = 1; | 4009 | cfqg->ref = 2; |
4010 | |||
4011 | if (blkio_alloc_blkg_stats(&cfqg->blkg)) { | ||
4012 | kfree(cfqg); | ||
4013 | kfree(cfqd); | ||
4014 | return NULL; | ||
4015 | } | ||
4016 | |||
3916 | rcu_read_lock(); | 4017 | rcu_read_lock(); |
4018 | |||
3917 | cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, | 4019 | cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, |
3918 | (void *)cfqd, 0); | 4020 | (void *)cfqd, 0); |
3919 | rcu_read_unlock(); | 4021 | rcu_read_unlock(); |
4022 | cfqd->nr_blkcg_linked_grps++; | ||
4023 | |||
4024 | /* Add group on cfqd->cfqg_list */ | ||
4025 | hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); | ||
3920 | #endif | 4026 | #endif |
3921 | /* | 4027 | /* |
3922 | * Not strictly needed (since RB_ROOT just clears the node and we | 4028 | * Not strictly needed (since RB_ROOT just clears the node and we |