diff options
author | Tejun Heo <tj@kernel.org> | 2011-12-13 18:33:38 -0500 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2011-12-13 18:33:38 -0500 |
commit | 216284c352a0061f5b20acff2c4e50fb43fea183 (patch) | |
tree | 01611f6d35c041a941269cb3dd3926c0711722ae /block | |
parent | dc86900e0a8f665122de6faadd27fb4c6d2b3e4d (diff) |
block, cfq: fix race condition in cic creation path and tighten locking
cfq_get_io_context() would fail if multiple tasks race to insert cic's
for the same association. This patch restructures
cfq_get_io_context() such that slow path insertion race is handled
properly.
Note that the restructuring also makes cfq_get_io_context() called
under queue_lock and performs both ioc and cfqd insertions while
holding both ioc and queue locks. This is part of on-going locking
tightening and will be used to simplify synchronization rules.
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r-- | block/cfq-iosched.c | 135 |
1 files changed, 76 insertions, 59 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 51aece2eea7c..181a63d36691 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -2908,13 +2908,10 @@ static void changed_ioprio(struct cfq_io_context *cic) | |||
2908 | { | 2908 | { |
2909 | struct cfq_data *cfqd = cic_to_cfqd(cic); | 2909 | struct cfq_data *cfqd = cic_to_cfqd(cic); |
2910 | struct cfq_queue *cfqq; | 2910 | struct cfq_queue *cfqq; |
2911 | unsigned long flags; | ||
2912 | 2911 | ||
2913 | if (unlikely(!cfqd)) | 2912 | if (unlikely(!cfqd)) |
2914 | return; | 2913 | return; |
2915 | 2914 | ||
2916 | spin_lock_irqsave(cfqd->queue->queue_lock, flags); | ||
2917 | |||
2918 | cfqq = cic->cfqq[BLK_RW_ASYNC]; | 2915 | cfqq = cic->cfqq[BLK_RW_ASYNC]; |
2919 | if (cfqq) { | 2916 | if (cfqq) { |
2920 | struct cfq_queue *new_cfqq; | 2917 | struct cfq_queue *new_cfqq; |
@@ -2929,8 +2926,6 @@ static void changed_ioprio(struct cfq_io_context *cic) | |||
2929 | cfqq = cic->cfqq[BLK_RW_SYNC]; | 2926 | cfqq = cic->cfqq[BLK_RW_SYNC]; |
2930 | if (cfqq) | 2927 | if (cfqq) |
2931 | cfq_mark_cfqq_prio_changed(cfqq); | 2928 | cfq_mark_cfqq_prio_changed(cfqq); |
2932 | |||
2933 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); | ||
2934 | } | 2929 | } |
2935 | 2930 | ||
2936 | static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, | 2931 | static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, |
@@ -2958,7 +2953,6 @@ static void changed_cgroup(struct cfq_io_context *cic) | |||
2958 | { | 2953 | { |
2959 | struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1); | 2954 | struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1); |
2960 | struct cfq_data *cfqd = cic_to_cfqd(cic); | 2955 | struct cfq_data *cfqd = cic_to_cfqd(cic); |
2961 | unsigned long flags; | ||
2962 | struct request_queue *q; | 2956 | struct request_queue *q; |
2963 | 2957 | ||
2964 | if (unlikely(!cfqd)) | 2958 | if (unlikely(!cfqd)) |
@@ -2966,8 +2960,6 @@ static void changed_cgroup(struct cfq_io_context *cic) | |||
2966 | 2960 | ||
2967 | q = cfqd->queue; | 2961 | q = cfqd->queue; |
2968 | 2962 | ||
2969 | spin_lock_irqsave(q->queue_lock, flags); | ||
2970 | |||
2971 | if (sync_cfqq) { | 2963 | if (sync_cfqq) { |
2972 | /* | 2964 | /* |
2973 | * Drop reference to sync queue. A new sync queue will be | 2965 | * Drop reference to sync queue. A new sync queue will be |
@@ -2977,8 +2969,6 @@ static void changed_cgroup(struct cfq_io_context *cic) | |||
2977 | cic_set_cfqq(cic, NULL, 1); | 2969 | cic_set_cfqq(cic, NULL, 1); |
2978 | cfq_put_queue(sync_cfqq); | 2970 | cfq_put_queue(sync_cfqq); |
2979 | } | 2971 | } |
2980 | |||
2981 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
2982 | } | 2972 | } |
2983 | #endif /* CONFIG_CFQ_GROUP_IOSCHED */ | 2973 | #endif /* CONFIG_CFQ_GROUP_IOSCHED */ |
2984 | 2974 | ||
@@ -3142,16 +3132,31 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) | |||
3142 | return cic; | 3132 | return cic; |
3143 | } | 3133 | } |
3144 | 3134 | ||
3145 | /* | 3135 | /** |
3146 | * Add cic into ioc, using cfqd as the search key. This enables us to lookup | 3136 | * cfq_create_cic - create and link a cfq_io_context |
3147 | * the process specific cfq io context when entered from the block layer. | 3137 | * @cfqd: cfqd of interest |
3148 | * Also adds the cic to a per-cfqd list, used when this queue is removed. | 3138 | * @gfp_mask: allocation mask |
3139 | * | ||
3140 | * Make sure cfq_io_context linking %current->io_context and @cfqd exists. | ||
3141 | * If ioc and/or cic doesn't exist, they will be created using @gfp_mask. | ||
3149 | */ | 3142 | */ |
3150 | static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, | 3143 | static int cfq_create_cic(struct cfq_data *cfqd, gfp_t gfp_mask) |
3151 | struct cfq_io_context *cic, gfp_t gfp_mask) | ||
3152 | { | 3144 | { |
3153 | unsigned long flags; | 3145 | struct request_queue *q = cfqd->queue; |
3154 | int ret; | 3146 | struct cfq_io_context *cic = NULL; |
3147 | struct io_context *ioc; | ||
3148 | int ret = -ENOMEM; | ||
3149 | |||
3150 | might_sleep_if(gfp_mask & __GFP_WAIT); | ||
3151 | |||
3152 | /* allocate stuff */ | ||
3153 | ioc = current_io_context(gfp_mask, q->node); | ||
3154 | if (!ioc) | ||
3155 | goto out; | ||
3156 | |||
3157 | cic = cfq_alloc_io_context(cfqd, gfp_mask); | ||
3158 | if (!cic) | ||
3159 | goto out; | ||
3155 | 3160 | ||
3156 | ret = radix_tree_preload(gfp_mask); | 3161 | ret = radix_tree_preload(gfp_mask); |
3157 | if (ret) | 3162 | if (ret) |
@@ -3161,53 +3166,72 @@ static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, | |||
3161 | cic->key = cfqd; | 3166 | cic->key = cfqd; |
3162 | cic->q = cfqd->queue; | 3167 | cic->q = cfqd->queue; |
3163 | 3168 | ||
3164 | spin_lock_irqsave(&ioc->lock, flags); | 3169 | /* lock both q and ioc and try to link @cic */ |
3165 | ret = radix_tree_insert(&ioc->radix_root, cfqd->queue->id, cic); | 3170 | spin_lock_irq(q->queue_lock); |
3166 | if (!ret) | 3171 | spin_lock(&ioc->lock); |
3167 | hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list); | ||
3168 | spin_unlock_irqrestore(&ioc->lock, flags); | ||
3169 | |||
3170 | radix_tree_preload_end(); | ||
3171 | 3172 | ||
3172 | if (!ret) { | 3173 | ret = radix_tree_insert(&ioc->radix_root, q->id, cic); |
3173 | spin_lock_irqsave(cfqd->queue->queue_lock, flags); | 3174 | if (likely(!ret)) { |
3175 | hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list); | ||
3174 | list_add(&cic->queue_list, &cfqd->cic_list); | 3176 | list_add(&cic->queue_list, &cfqd->cic_list); |
3175 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); | 3177 | cic = NULL; |
3178 | } else if (ret == -EEXIST) { | ||
3179 | /* someone else already did it */ | ||
3180 | ret = 0; | ||
3176 | } | 3181 | } |
3182 | |||
3183 | spin_unlock(&ioc->lock); | ||
3184 | spin_unlock_irq(q->queue_lock); | ||
3185 | |||
3186 | radix_tree_preload_end(); | ||
3177 | out: | 3187 | out: |
3178 | if (ret) | 3188 | if (ret) |
3179 | printk(KERN_ERR "cfq: cic link failed!\n"); | 3189 | printk(KERN_ERR "cfq: cic link failed!\n"); |
3190 | if (cic) | ||
3191 | cfq_cic_free(cic); | ||
3180 | return ret; | 3192 | return ret; |
3181 | } | 3193 | } |
3182 | 3194 | ||
3183 | /* | 3195 | /** |
3184 | * Setup general io context and cfq io context. There can be several cfq | 3196 | * cfq_get_io_context - acquire cfq_io_context and bump refcnt on io_context |
3185 | * io contexts per general io context, if this process is doing io to more | 3197 | * @cfqd: cfqd to setup cic for |
3186 | * than one device managed by cfq. | 3198 | * @gfp_mask: allocation mask |
3199 | * | ||
3200 | * Return cfq_io_context associating @cfqd and %current->io_context and | ||
3201 | * bump refcnt on io_context. If ioc or cic doesn't exist, they're created | ||
3202 | * using @gfp_mask. | ||
3203 | * | ||
3204 | * Must be called under queue_lock which may be released and re-acquired. | ||
3205 | * This function also may sleep depending on @gfp_mask. | ||
3187 | */ | 3206 | */ |
3188 | static struct cfq_io_context * | 3207 | static struct cfq_io_context * |
3189 | cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) | 3208 | cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) |
3190 | { | 3209 | { |
3191 | struct io_context *ioc = NULL; | 3210 | struct request_queue *q = cfqd->queue; |
3192 | struct cfq_io_context *cic = NULL; | 3211 | struct cfq_io_context *cic = NULL; |
3212 | struct io_context *ioc; | ||
3213 | int err; | ||
3214 | |||
3215 | lockdep_assert_held(q->queue_lock); | ||
3216 | |||
3217 | while (true) { | ||
3218 | /* fast path */ | ||
3219 | ioc = current->io_context; | ||
3220 | if (likely(ioc)) { | ||
3221 | cic = cfq_cic_lookup(cfqd, ioc); | ||
3222 | if (likely(cic)) | ||
3223 | break; | ||
3224 | } | ||
3193 | 3225 | ||
3194 | might_sleep_if(gfp_mask & __GFP_WAIT); | 3226 | /* slow path - unlock, create missing ones and retry */ |
3195 | 3227 | spin_unlock_irq(q->queue_lock); | |
3196 | ioc = current_io_context(gfp_mask, cfqd->queue->node); | 3228 | err = cfq_create_cic(cfqd, gfp_mask); |
3197 | if (!ioc) | 3229 | spin_lock_irq(q->queue_lock); |
3198 | goto err; | 3230 | if (err) |
3199 | 3231 | return NULL; | |
3200 | cic = cfq_cic_lookup(cfqd, ioc); | 3232 | } |
3201 | if (cic) | ||
3202 | goto out; | ||
3203 | |||
3204 | cic = cfq_alloc_io_context(cfqd, gfp_mask); | ||
3205 | if (cic == NULL) | ||
3206 | goto err; | ||
3207 | 3233 | ||
3208 | if (cfq_cic_link(cfqd, ioc, cic, gfp_mask)) | 3234 | /* bump @ioc's refcnt and handle changed notifications */ |
3209 | goto err; | ||
3210 | out: | ||
3211 | get_io_context(ioc); | 3235 | get_io_context(ioc); |
3212 | 3236 | ||
3213 | if (unlikely(cic->changed)) { | 3237 | if (unlikely(cic->changed)) { |
@@ -3220,10 +3244,6 @@ out: | |||
3220 | } | 3244 | } |
3221 | 3245 | ||
3222 | return cic; | 3246 | return cic; |
3223 | err: | ||
3224 | if (cic) | ||
3225 | cfq_cic_free(cic); | ||
3226 | return NULL; | ||
3227 | } | 3247 | } |
3228 | 3248 | ||
3229 | static void | 3249 | static void |
@@ -3759,14 +3779,11 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) | |||
3759 | const int rw = rq_data_dir(rq); | 3779 | const int rw = rq_data_dir(rq); |
3760 | const bool is_sync = rq_is_sync(rq); | 3780 | const bool is_sync = rq_is_sync(rq); |
3761 | struct cfq_queue *cfqq; | 3781 | struct cfq_queue *cfqq; |
3762 | unsigned long flags; | ||
3763 | 3782 | ||
3764 | might_sleep_if(gfp_mask & __GFP_WAIT); | 3783 | might_sleep_if(gfp_mask & __GFP_WAIT); |
3765 | 3784 | ||
3785 | spin_lock_irq(q->queue_lock); | ||
3766 | cic = cfq_get_io_context(cfqd, gfp_mask); | 3786 | cic = cfq_get_io_context(cfqd, gfp_mask); |
3767 | |||
3768 | spin_lock_irqsave(q->queue_lock, flags); | ||
3769 | |||
3770 | if (!cic) | 3787 | if (!cic) |
3771 | goto queue_fail; | 3788 | goto queue_fail; |
3772 | 3789 | ||
@@ -3802,12 +3819,12 @@ new_queue: | |||
3802 | rq->elevator_private[0] = cic; | 3819 | rq->elevator_private[0] = cic; |
3803 | rq->elevator_private[1] = cfqq; | 3820 | rq->elevator_private[1] = cfqq; |
3804 | rq->elevator_private[2] = cfq_ref_get_cfqg(cfqq->cfqg); | 3821 | rq->elevator_private[2] = cfq_ref_get_cfqg(cfqq->cfqg); |
3805 | spin_unlock_irqrestore(q->queue_lock, flags); | 3822 | spin_unlock_irq(q->queue_lock); |
3806 | return 0; | 3823 | return 0; |
3807 | 3824 | ||
3808 | queue_fail: | 3825 | queue_fail: |
3809 | cfq_schedule_dispatch(cfqd); | 3826 | cfq_schedule_dispatch(cfqd); |
3810 | spin_unlock_irqrestore(q->queue_lock, flags); | 3827 | spin_unlock_irq(q->queue_lock); |
3811 | cfq_log(cfqd, "set_request fail"); | 3828 | cfq_log(cfqd, "set_request fail"); |
3812 | return 1; | 3829 | return 1; |
3813 | } | 3830 | } |