aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorShaohua Li <shli@kernel.org>2013-08-27 05:50:39 -0400
committerNeilBrown <neilb@suse.de>2013-08-27 21:55:53 -0400
commit773ca82fa1ee58dd1bf88b6a5ca385ec83a2cac6 (patch)
tree9dd6c1d72a9fab76d668ae2500e98003290c75a4 /drivers/md
parent260fa034ef7a4ff8b73068b48ac497edd5217491 (diff)
raid5: make release_stripe lockless
release_stripe still has big lock contention. We just add the stripe to a llist without taking device_lock. We let the raid5d thread to do the real stripe release, which must hold device_lock anyway. In this way, release_stripe doesn't hold any locks. The side effect is the released stripes order is changed. But sounds not a big deal, stripes are never handled in order. And I thought block layer can already do nice request merge, which means order isn't that important. I kept the unplug release batch, which is unnecessary with this patch from lock contention avoid point of view, and actually if we delete it, the stripe_head release_list and lru can share storage. But the unplug release batch is also helpful for request merge. We probably can delay wakeup raid5d till unplug, but I'm still afraid of the case which raid5d is running. Signed-off-by: Shaohua Li <shli@fusionio.com> Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/raid5.c49
-rw-r--r--drivers/md/raid5.h3
2 files changed, 49 insertions, 3 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 78ea44336e75..287cc3b30043 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -239,12 +239,47 @@ static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
239 do_release_stripe(conf, sh); 239 do_release_stripe(conf, sh);
240} 240}
241 241
242/* should hold conf->device_lock already */
243static int release_stripe_list(struct r5conf *conf)
244{
245 struct stripe_head *sh;
246 int count = 0;
247 struct llist_node *head;
248
249 head = llist_del_all(&conf->released_stripes);
250 while (head) {
251 sh = llist_entry(head, struct stripe_head, release_list);
252 head = llist_next(head);
253 /* sh could be readded after STRIPE_ON_RELEASE_LIST is cleard */
254 smp_mb();
255 clear_bit(STRIPE_ON_RELEASE_LIST, &sh->state);
256 /*
257 * Don't worry the bit is set here, because if the bit is set
258 * again, the count is always > 1. This is true for
259 * STRIPE_ON_UNPLUG_LIST bit too.
260 */
261 __release_stripe(conf, sh);
262 count++;
263 }
264
265 return count;
266}
267
242static void release_stripe(struct stripe_head *sh) 268static void release_stripe(struct stripe_head *sh)
243{ 269{
244 struct r5conf *conf = sh->raid_conf; 270 struct r5conf *conf = sh->raid_conf;
245 unsigned long flags; 271 unsigned long flags;
272 bool wakeup;
246 273
274 if (test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state))
275 goto slow_path;
276 wakeup = llist_add(&sh->release_list, &conf->released_stripes);
277 if (wakeup)
278 md_wakeup_thread(conf->mddev->thread);
279 return;
280slow_path:
247 local_irq_save(flags); 281 local_irq_save(flags);
282 /* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */
248 if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) { 283 if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) {
249 do_release_stripe(conf, sh); 284 do_release_stripe(conf, sh);
250 spin_unlock(&conf->device_lock); 285 spin_unlock(&conf->device_lock);
@@ -491,7 +526,8 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
491 if (atomic_read(&sh->count)) { 526 if (atomic_read(&sh->count)) {
492 BUG_ON(!list_empty(&sh->lru) 527 BUG_ON(!list_empty(&sh->lru)
493 && !test_bit(STRIPE_EXPANDING, &sh->state) 528 && !test_bit(STRIPE_EXPANDING, &sh->state)
494 && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)); 529 && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)
530 && !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state));
495 } else { 531 } else {
496 if (!test_bit(STRIPE_HANDLE, &sh->state)) 532 if (!test_bit(STRIPE_HANDLE, &sh->state))
497 atomic_inc(&conf->active_stripes); 533 atomic_inc(&conf->active_stripes);
@@ -4127,6 +4163,10 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
4127 */ 4163 */
4128 smp_mb__before_clear_bit(); 4164 smp_mb__before_clear_bit();
4129 clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state); 4165 clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state);
4166 /*
4167 * STRIPE_ON_RELEASE_LIST could be set here. In that
4168 * case, the count is always > 1 here
4169 */
4130 __release_stripe(conf, sh); 4170 __release_stripe(conf, sh);
4131 cnt++; 4171 cnt++;
4132 } 4172 }
@@ -4836,7 +4876,9 @@ static void raid5d(struct md_thread *thread)
4836 spin_lock_irq(&conf->device_lock); 4876 spin_lock_irq(&conf->device_lock);
4837 while (1) { 4877 while (1) {
4838 struct bio *bio; 4878 struct bio *bio;
4839 int batch_size; 4879 int batch_size, released;
4880
4881 released = release_stripe_list(conf);
4840 4882
4841 if ( 4883 if (
4842 !list_empty(&conf->bitmap_list)) { 4884 !list_empty(&conf->bitmap_list)) {
@@ -4861,7 +4903,7 @@ static void raid5d(struct md_thread *thread)
4861 } 4903 }
4862 4904
4863 batch_size = handle_active_stripes(conf); 4905 batch_size = handle_active_stripes(conf);
4864 if (!batch_size) 4906 if (!batch_size && !released)
4865 break; 4907 break;
4866 handled += batch_size; 4908 handled += batch_size;
4867 4909
@@ -5176,6 +5218,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
5176 INIT_LIST_HEAD(&conf->delayed_list); 5218 INIT_LIST_HEAD(&conf->delayed_list);
5177 INIT_LIST_HEAD(&conf->bitmap_list); 5219 INIT_LIST_HEAD(&conf->bitmap_list);
5178 INIT_LIST_HEAD(&conf->inactive_list); 5220 INIT_LIST_HEAD(&conf->inactive_list);
5221 init_llist_head(&conf->released_stripes);
5179 atomic_set(&conf->active_stripes, 0); 5222 atomic_set(&conf->active_stripes, 0);
5180 atomic_set(&conf->preread_active_stripes, 0); 5223 atomic_set(&conf->preread_active_stripes, 0);
5181 atomic_set(&conf->active_aligned_reads, 0); 5224 atomic_set(&conf->active_aligned_reads, 0);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 70c49329ca9a..a98f99d2a58f 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -197,6 +197,7 @@ enum reconstruct_states {
197struct stripe_head { 197struct stripe_head {
198 struct hlist_node hash; 198 struct hlist_node hash;
199 struct list_head lru; /* inactive_list or handle_list */ 199 struct list_head lru; /* inactive_list or handle_list */
200 struct llist_node release_list;
200 struct r5conf *raid_conf; 201 struct r5conf *raid_conf;
201 short generation; /* increments with every 202 short generation; /* increments with every
202 * reshape */ 203 * reshape */
@@ -321,6 +322,7 @@ enum {
321 STRIPE_OPS_REQ_PENDING, 322 STRIPE_OPS_REQ_PENDING,
322 STRIPE_ON_UNPLUG_LIST, 323 STRIPE_ON_UNPLUG_LIST,
323 STRIPE_DISCARD, 324 STRIPE_DISCARD,
325 STRIPE_ON_RELEASE_LIST,
324}; 326};
325 327
326/* 328/*
@@ -445,6 +447,7 @@ struct r5conf {
445 */ 447 */
446 atomic_t active_stripes; 448 atomic_t active_stripes;
447 struct list_head inactive_list; 449 struct list_head inactive_list;
450 struct llist_head released_stripes;
448 wait_queue_head_t wait_for_stripe; 451 wait_queue_head_t wait_for_stripe;
449 wait_queue_head_t wait_for_overlap; 452 wait_queue_head_t wait_for_overlap;
450 int inactive_blocked; /* release of inactive stripes blocked, 453 int inactive_blocked; /* release of inactive stripes blocked,