diff options
-rw-r--r-- | drivers/md/raid5.c | 317 | ||||
-rw-r--r-- | drivers/md/raid5.h | 15 |
2 files changed, 259 insertions, 73 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4bbcb7e26d12..93090b2afab4 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -85,6 +85,42 @@ static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect) | |||
85 | return &conf->stripe_hashtbl[hash]; | 85 | return &conf->stripe_hashtbl[hash]; |
86 | } | 86 | } |
87 | 87 | ||
88 | static inline int stripe_hash_locks_hash(sector_t sect) | ||
89 | { | ||
90 | return (sect >> STRIPE_SHIFT) & STRIPE_HASH_LOCKS_MASK; | ||
91 | } | ||
92 | |||
93 | static inline void lock_device_hash_lock(struct r5conf *conf, int hash) | ||
94 | { | ||
95 | spin_lock_irq(conf->hash_locks + hash); | ||
96 | spin_lock(&conf->device_lock); | ||
97 | } | ||
98 | |||
99 | static inline void unlock_device_hash_lock(struct r5conf *conf, int hash) | ||
100 | { | ||
101 | spin_unlock(&conf->device_lock); | ||
102 | spin_unlock_irq(conf->hash_locks + hash); | ||
103 | } | ||
104 | |||
105 | static inline void lock_all_device_hash_locks_irq(struct r5conf *conf) | ||
106 | { | ||
107 | int i; | ||
108 | local_irq_disable(); | ||
109 | spin_lock(conf->hash_locks); | ||
110 | for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++) | ||
111 | spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks); | ||
112 | spin_lock(&conf->device_lock); | ||
113 | } | ||
114 | |||
115 | static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf) | ||
116 | { | ||
117 | int i; | ||
118 | spin_unlock(&conf->device_lock); | ||
119 | for (i = NR_STRIPE_HASH_LOCKS; i; i--) | ||
120 | spin_unlock(conf->hash_locks + i - 1); | ||
121 | local_irq_enable(); | ||
122 | } | ||
123 | |||
88 | /* bio's attached to a stripe+device for I/O are linked together in bi_sector | 124 | /* bio's attached to a stripe+device for I/O are linked together in bi_sector |
89 | * order without overlap. There may be several bio's per stripe+device, and | 125 | * order without overlap. There may be several bio's per stripe+device, and |
90 | * a bio could span several devices. | 126 | * a bio could span several devices. |
@@ -249,7 +285,8 @@ static void raid5_wakeup_stripe_thread(struct stripe_head *sh) | |||
249 | } | 285 | } |
250 | } | 286 | } |
251 | 287 | ||
252 | static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh) | 288 | static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh, |
289 | struct list_head *temp_inactive_list) | ||
253 | { | 290 | { |
254 | BUG_ON(!list_empty(&sh->lru)); | 291 | BUG_ON(!list_empty(&sh->lru)); |
255 | BUG_ON(atomic_read(&conf->active_stripes)==0); | 292 | BUG_ON(atomic_read(&conf->active_stripes)==0); |
@@ -278,19 +315,60 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh) | |||
278 | < IO_THRESHOLD) | 315 | < IO_THRESHOLD) |
279 | md_wakeup_thread(conf->mddev->thread); | 316 | md_wakeup_thread(conf->mddev->thread); |
280 | atomic_dec(&conf->active_stripes); | 317 | atomic_dec(&conf->active_stripes); |
281 | if (!test_bit(STRIPE_EXPANDING, &sh->state)) { | 318 | if (!test_bit(STRIPE_EXPANDING, &sh->state)) |
282 | list_add_tail(&sh->lru, &conf->inactive_list); | 319 | list_add_tail(&sh->lru, temp_inactive_list); |
283 | wake_up(&conf->wait_for_stripe); | ||
284 | if (conf->retry_read_aligned) | ||
285 | md_wakeup_thread(conf->mddev->thread); | ||
286 | } | ||
287 | } | 320 | } |
288 | } | 321 | } |
289 | 322 | ||
290 | static void __release_stripe(struct r5conf *conf, struct stripe_head *sh) | 323 | static void __release_stripe(struct r5conf *conf, struct stripe_head *sh, |
324 | struct list_head *temp_inactive_list) | ||
291 | { | 325 | { |
292 | if (atomic_dec_and_test(&sh->count)) | 326 | if (atomic_dec_and_test(&sh->count)) |
293 | do_release_stripe(conf, sh); | 327 | do_release_stripe(conf, sh, temp_inactive_list); |
328 | } | ||
329 | |||
330 | /* | ||
331 | * @hash could be NR_STRIPE_HASH_LOCKS, then we have a list of inactive_list | ||
332 | * | ||
333 | * Be careful: Only one task can add/delete stripes from temp_inactive_list at | ||
334 | * given time. Adding stripes only takes device lock, while deleting stripes | ||
335 | * only takes hash lock. | ||
336 | */ | ||
337 | static void release_inactive_stripe_list(struct r5conf *conf, | ||
338 | struct list_head *temp_inactive_list, | ||
339 | int hash) | ||
340 | { | ||
341 | int size; | ||
342 | bool do_wakeup = false; | ||
343 | unsigned long flags; | ||
344 | |||
345 | if (hash == NR_STRIPE_HASH_LOCKS) { | ||
346 | size = NR_STRIPE_HASH_LOCKS; | ||
347 | hash = NR_STRIPE_HASH_LOCKS - 1; | ||
348 | } else | ||
349 | size = 1; | ||
350 | while (size) { | ||
351 | struct list_head *list = &temp_inactive_list[size - 1]; | ||
352 | |||
353 | /* | ||
354 | * We don't hold any lock here yet, get_active_stripe() might | ||
355 | * remove stripes from the list | ||
356 | */ | ||
357 | if (!list_empty_careful(list)) { | ||
358 | spin_lock_irqsave(conf->hash_locks + hash, flags); | ||
359 | list_splice_tail_init(list, conf->inactive_list + hash); | ||
360 | do_wakeup = true; | ||
361 | spin_unlock_irqrestore(conf->hash_locks + hash, flags); | ||
362 | } | ||
363 | size--; | ||
364 | hash--; | ||
365 | } | ||
366 | |||
367 | if (do_wakeup) { | ||
368 | wake_up(&conf->wait_for_stripe); | ||
369 | if (conf->retry_read_aligned) | ||
370 | md_wakeup_thread(conf->mddev->thread); | ||
371 | } | ||
294 | } | 372 | } |
295 | 373 | ||
296 | static struct llist_node *llist_reverse_order(struct llist_node *head) | 374 | static struct llist_node *llist_reverse_order(struct llist_node *head) |
@@ -308,7 +386,8 @@ static struct llist_node *llist_reverse_order(struct llist_node *head) | |||
308 | } | 386 | } |
309 | 387 | ||
310 | /* should hold conf->device_lock already */ | 388 | /* should hold conf->device_lock already */ |
311 | static int release_stripe_list(struct r5conf *conf) | 389 | static int release_stripe_list(struct r5conf *conf, |
390 | struct list_head *temp_inactive_list) | ||
312 | { | 391 | { |
313 | struct stripe_head *sh; | 392 | struct stripe_head *sh; |
314 | int count = 0; | 393 | int count = 0; |
@@ -317,6 +396,8 @@ static int release_stripe_list(struct r5conf *conf) | |||
317 | head = llist_del_all(&conf->released_stripes); | 396 | head = llist_del_all(&conf->released_stripes); |
318 | head = llist_reverse_order(head); | 397 | head = llist_reverse_order(head); |
319 | while (head) { | 398 | while (head) { |
399 | int hash; | ||
400 | |||
320 | sh = llist_entry(head, struct stripe_head, release_list); | 401 | sh = llist_entry(head, struct stripe_head, release_list); |
321 | head = llist_next(head); | 402 | head = llist_next(head); |
322 | /* sh could be readded after STRIPE_ON_RELEASE_LIST is cleard */ | 403 | /* sh could be readded after STRIPE_ON_RELEASE_LIST is cleard */ |
@@ -327,7 +408,8 @@ static int release_stripe_list(struct r5conf *conf) | |||
327 | * again, the count is always > 1. This is true for | 408 | * again, the count is always > 1. This is true for |
328 | * STRIPE_ON_UNPLUG_LIST bit too. | 409 | * STRIPE_ON_UNPLUG_LIST bit too. |
329 | */ | 410 | */ |
330 | __release_stripe(conf, sh); | 411 | hash = sh->hash_lock_index; |
412 | __release_stripe(conf, sh, &temp_inactive_list[hash]); | ||
331 | count++; | 413 | count++; |
332 | } | 414 | } |
333 | 415 | ||
@@ -338,6 +420,8 @@ static void release_stripe(struct stripe_head *sh) | |||
338 | { | 420 | { |
339 | struct r5conf *conf = sh->raid_conf; | 421 | struct r5conf *conf = sh->raid_conf; |
340 | unsigned long flags; | 422 | unsigned long flags; |
423 | struct list_head list; | ||
424 | int hash; | ||
341 | bool wakeup; | 425 | bool wakeup; |
342 | 426 | ||
343 | if (unlikely(!conf->mddev->thread) || | 427 | if (unlikely(!conf->mddev->thread) || |
@@ -351,8 +435,11 @@ slow_path: | |||
351 | local_irq_save(flags); | 435 | local_irq_save(flags); |
352 | /* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */ | 436 | /* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */ |
353 | if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) { | 437 | if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) { |
354 | do_release_stripe(conf, sh); | 438 | INIT_LIST_HEAD(&list); |
439 | hash = sh->hash_lock_index; | ||
440 | do_release_stripe(conf, sh, &list); | ||
355 | spin_unlock(&conf->device_lock); | 441 | spin_unlock(&conf->device_lock); |
442 | release_inactive_stripe_list(conf, &list, hash); | ||
356 | } | 443 | } |
357 | local_irq_restore(flags); | 444 | local_irq_restore(flags); |
358 | } | 445 | } |
@@ -377,18 +464,19 @@ static inline void insert_hash(struct r5conf *conf, struct stripe_head *sh) | |||
377 | 464 | ||
378 | 465 | ||
379 | /* find an idle stripe, make sure it is unhashed, and return it. */ | 466 | /* find an idle stripe, make sure it is unhashed, and return it. */ |
380 | static struct stripe_head *get_free_stripe(struct r5conf *conf) | 467 | static struct stripe_head *get_free_stripe(struct r5conf *conf, int hash) |
381 | { | 468 | { |
382 | struct stripe_head *sh = NULL; | 469 | struct stripe_head *sh = NULL; |
383 | struct list_head *first; | 470 | struct list_head *first; |
384 | 471 | ||
385 | if (list_empty(&conf->inactive_list)) | 472 | if (list_empty(conf->inactive_list + hash)) |
386 | goto out; | 473 | goto out; |
387 | first = conf->inactive_list.next; | 474 | first = (conf->inactive_list + hash)->next; |
388 | sh = list_entry(first, struct stripe_head, lru); | 475 | sh = list_entry(first, struct stripe_head, lru); |
389 | list_del_init(first); | 476 | list_del_init(first); |
390 | remove_hash(sh); | 477 | remove_hash(sh); |
391 | atomic_inc(&conf->active_stripes); | 478 | atomic_inc(&conf->active_stripes); |
479 | BUG_ON(hash != sh->hash_lock_index); | ||
392 | out: | 480 | out: |
393 | return sh; | 481 | return sh; |
394 | } | 482 | } |
@@ -431,7 +519,7 @@ static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous, | |||
431 | static void init_stripe(struct stripe_head *sh, sector_t sector, int previous) | 519 | static void init_stripe(struct stripe_head *sh, sector_t sector, int previous) |
432 | { | 520 | { |
433 | struct r5conf *conf = sh->raid_conf; | 521 | struct r5conf *conf = sh->raid_conf; |
434 | int i; | 522 | int i, seq; |
435 | 523 | ||
436 | BUG_ON(atomic_read(&sh->count) != 0); | 524 | BUG_ON(atomic_read(&sh->count) != 0); |
437 | BUG_ON(test_bit(STRIPE_HANDLE, &sh->state)); | 525 | BUG_ON(test_bit(STRIPE_HANDLE, &sh->state)); |
@@ -441,7 +529,8 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous) | |||
441 | (unsigned long long)sh->sector); | 529 | (unsigned long long)sh->sector); |
442 | 530 | ||
443 | remove_hash(sh); | 531 | remove_hash(sh); |
444 | 532 | retry: | |
533 | seq = read_seqcount_begin(&conf->gen_lock); | ||
445 | sh->generation = conf->generation - previous; | 534 | sh->generation = conf->generation - previous; |
446 | sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks; | 535 | sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks; |
447 | sh->sector = sector; | 536 | sh->sector = sector; |
@@ -463,6 +552,8 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous) | |||
463 | dev->flags = 0; | 552 | dev->flags = 0; |
464 | raid5_build_block(sh, i, previous); | 553 | raid5_build_block(sh, i, previous); |
465 | } | 554 | } |
555 | if (read_seqcount_retry(&conf->gen_lock, seq)) | ||
556 | goto retry; | ||
466 | insert_hash(conf, sh); | 557 | insert_hash(conf, sh); |
467 | sh->cpu = smp_processor_id(); | 558 | sh->cpu = smp_processor_id(); |
468 | } | 559 | } |
@@ -567,29 +658,31 @@ get_active_stripe(struct r5conf *conf, sector_t sector, | |||
567 | int previous, int noblock, int noquiesce) | 658 | int previous, int noblock, int noquiesce) |
568 | { | 659 | { |
569 | struct stripe_head *sh; | 660 | struct stripe_head *sh; |
661 | int hash = stripe_hash_locks_hash(sector); | ||
570 | 662 | ||
571 | pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector); | 663 | pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector); |
572 | 664 | ||
573 | spin_lock_irq(&conf->device_lock); | 665 | spin_lock_irq(conf->hash_locks + hash); |
574 | 666 | ||
575 | do { | 667 | do { |
576 | wait_event_lock_irq(conf->wait_for_stripe, | 668 | wait_event_lock_irq(conf->wait_for_stripe, |
577 | conf->quiesce == 0 || noquiesce, | 669 | conf->quiesce == 0 || noquiesce, |
578 | conf->device_lock); | 670 | *(conf->hash_locks + hash)); |
579 | sh = __find_stripe(conf, sector, conf->generation - previous); | 671 | sh = __find_stripe(conf, sector, conf->generation - previous); |
580 | if (!sh) { | 672 | if (!sh) { |
581 | if (!conf->inactive_blocked) | 673 | if (!conf->inactive_blocked) |
582 | sh = get_free_stripe(conf); | 674 | sh = get_free_stripe(conf, hash); |
583 | if (noblock && sh == NULL) | 675 | if (noblock && sh == NULL) |
584 | break; | 676 | break; |
585 | if (!sh) { | 677 | if (!sh) { |
586 | conf->inactive_blocked = 1; | 678 | conf->inactive_blocked = 1; |
587 | wait_event_lock_irq(conf->wait_for_stripe, | 679 | wait_event_lock_irq( |
588 | !list_empty(&conf->inactive_list) && | 680 | conf->wait_for_stripe, |
589 | (atomic_read(&conf->active_stripes) | 681 | !list_empty(conf->inactive_list + hash) && |
590 | < (conf->max_nr_stripes *3/4) | 682 | (atomic_read(&conf->active_stripes) |
591 | || !conf->inactive_blocked), | 683 | < (conf->max_nr_stripes * 3 / 4) |
592 | conf->device_lock); | 684 | || !conf->inactive_blocked), |
685 | *(conf->hash_locks + hash)); | ||
593 | conf->inactive_blocked = 0; | 686 | conf->inactive_blocked = 0; |
594 | } else | 687 | } else |
595 | init_stripe(sh, sector, previous); | 688 | init_stripe(sh, sector, previous); |
@@ -600,9 +693,11 @@ get_active_stripe(struct r5conf *conf, sector_t sector, | |||
600 | && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state) | 693 | && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state) |
601 | && !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state)); | 694 | && !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state)); |
602 | } else { | 695 | } else { |
696 | spin_lock(&conf->device_lock); | ||
603 | if (!test_bit(STRIPE_HANDLE, &sh->state)) | 697 | if (!test_bit(STRIPE_HANDLE, &sh->state)) |
604 | atomic_inc(&conf->active_stripes); | 698 | atomic_inc(&conf->active_stripes); |
605 | if (list_empty(&sh->lru) && | 699 | if (list_empty(&sh->lru) && |
700 | !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state) && | ||
606 | !test_bit(STRIPE_EXPANDING, &sh->state)) | 701 | !test_bit(STRIPE_EXPANDING, &sh->state)) |
607 | BUG(); | 702 | BUG(); |
608 | list_del_init(&sh->lru); | 703 | list_del_init(&sh->lru); |
@@ -610,6 +705,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector, | |||
610 | sh->group->stripes_cnt--; | 705 | sh->group->stripes_cnt--; |
611 | sh->group = NULL; | 706 | sh->group = NULL; |
612 | } | 707 | } |
708 | spin_unlock(&conf->device_lock); | ||
613 | } | 709 | } |
614 | } | 710 | } |
615 | } while (sh == NULL); | 711 | } while (sh == NULL); |
@@ -617,7 +713,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector, | |||
617 | if (sh) | 713 | if (sh) |
618 | atomic_inc(&sh->count); | 714 | atomic_inc(&sh->count); |
619 | 715 | ||
620 | spin_unlock_irq(&conf->device_lock); | 716 | spin_unlock_irq(conf->hash_locks + hash); |
621 | return sh; | 717 | return sh; |
622 | } | 718 | } |
623 | 719 | ||
@@ -1597,7 +1693,7 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) | |||
1597 | put_cpu(); | 1693 | put_cpu(); |
1598 | } | 1694 | } |
1599 | 1695 | ||
1600 | static int grow_one_stripe(struct r5conf *conf) | 1696 | static int grow_one_stripe(struct r5conf *conf, int hash) |
1601 | { | 1697 | { |
1602 | struct stripe_head *sh; | 1698 | struct stripe_head *sh; |
1603 | sh = kmem_cache_zalloc(conf->slab_cache, GFP_KERNEL); | 1699 | sh = kmem_cache_zalloc(conf->slab_cache, GFP_KERNEL); |
@@ -1613,6 +1709,7 @@ static int grow_one_stripe(struct r5conf *conf) | |||
1613 | kmem_cache_free(conf->slab_cache, sh); | 1709 | kmem_cache_free(conf->slab_cache, sh); |
1614 | return 0; | 1710 | return 0; |
1615 | } | 1711 | } |
1712 | sh->hash_lock_index = hash; | ||
1616 | /* we just created an active stripe so... */ | 1713 | /* we just created an active stripe so... */ |
1617 | atomic_set(&sh->count, 1); | 1714 | atomic_set(&sh->count, 1); |
1618 | atomic_inc(&conf->active_stripes); | 1715 | atomic_inc(&conf->active_stripes); |
@@ -1625,6 +1722,7 @@ static int grow_stripes(struct r5conf *conf, int num) | |||
1625 | { | 1722 | { |
1626 | struct kmem_cache *sc; | 1723 | struct kmem_cache *sc; |
1627 | int devs = max(conf->raid_disks, conf->previous_raid_disks); | 1724 | int devs = max(conf->raid_disks, conf->previous_raid_disks); |
1725 | int hash; | ||
1628 | 1726 | ||
1629 | if (conf->mddev->gendisk) | 1727 | if (conf->mddev->gendisk) |
1630 | sprintf(conf->cache_name[0], | 1728 | sprintf(conf->cache_name[0], |
@@ -1642,9 +1740,13 @@ static int grow_stripes(struct r5conf *conf, int num) | |||
1642 | return 1; | 1740 | return 1; |
1643 | conf->slab_cache = sc; | 1741 | conf->slab_cache = sc; |
1644 | conf->pool_size = devs; | 1742 | conf->pool_size = devs; |
1645 | while (num--) | 1743 | hash = conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS; |
1646 | if (!grow_one_stripe(conf)) | 1744 | while (num--) { |
1745 | if (!grow_one_stripe(conf, hash)) | ||
1647 | return 1; | 1746 | return 1; |
1747 | conf->max_nr_stripes++; | ||
1748 | hash = (hash + 1) % NR_STRIPE_HASH_LOCKS; | ||
1749 | } | ||
1648 | return 0; | 1750 | return 0; |
1649 | } | 1751 | } |
1650 | 1752 | ||
@@ -1702,6 +1804,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) | |||
1702 | int err; | 1804 | int err; |
1703 | struct kmem_cache *sc; | 1805 | struct kmem_cache *sc; |
1704 | int i; | 1806 | int i; |
1807 | int hash, cnt; | ||
1705 | 1808 | ||
1706 | if (newsize <= conf->pool_size) | 1809 | if (newsize <= conf->pool_size) |
1707 | return 0; /* never bother to shrink */ | 1810 | return 0; /* never bother to shrink */ |
@@ -1741,19 +1844,29 @@ static int resize_stripes(struct r5conf *conf, int newsize) | |||
1741 | * OK, we have enough stripes, start collecting inactive | 1844 | * OK, we have enough stripes, start collecting inactive |
1742 | * stripes and copying them over | 1845 | * stripes and copying them over |
1743 | */ | 1846 | */ |
1847 | hash = 0; | ||
1848 | cnt = 0; | ||
1744 | list_for_each_entry(nsh, &newstripes, lru) { | 1849 | list_for_each_entry(nsh, &newstripes, lru) { |
1745 | spin_lock_irq(&conf->device_lock); | 1850 | lock_device_hash_lock(conf, hash); |
1746 | wait_event_lock_irq(conf->wait_for_stripe, | 1851 | wait_event_cmd(conf->wait_for_stripe, |
1747 | !list_empty(&conf->inactive_list), | 1852 | !list_empty(conf->inactive_list + hash), |
1748 | conf->device_lock); | 1853 | unlock_device_hash_lock(conf, hash), |
1749 | osh = get_free_stripe(conf); | 1854 | lock_device_hash_lock(conf, hash)); |
1750 | spin_unlock_irq(&conf->device_lock); | 1855 | osh = get_free_stripe(conf, hash); |
1856 | unlock_device_hash_lock(conf, hash); | ||
1751 | atomic_set(&nsh->count, 1); | 1857 | atomic_set(&nsh->count, 1); |
1752 | for(i=0; i<conf->pool_size; i++) | 1858 | for(i=0; i<conf->pool_size; i++) |
1753 | nsh->dev[i].page = osh->dev[i].page; | 1859 | nsh->dev[i].page = osh->dev[i].page; |
1754 | for( ; i<newsize; i++) | 1860 | for( ; i<newsize; i++) |
1755 | nsh->dev[i].page = NULL; | 1861 | nsh->dev[i].page = NULL; |
1862 | nsh->hash_lock_index = hash; | ||
1756 | kmem_cache_free(conf->slab_cache, osh); | 1863 | kmem_cache_free(conf->slab_cache, osh); |
1864 | cnt++; | ||
1865 | if (cnt >= conf->max_nr_stripes / NR_STRIPE_HASH_LOCKS + | ||
1866 | !!((conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS) > hash)) { | ||
1867 | hash++; | ||
1868 | cnt = 0; | ||
1869 | } | ||
1757 | } | 1870 | } |
1758 | kmem_cache_destroy(conf->slab_cache); | 1871 | kmem_cache_destroy(conf->slab_cache); |
1759 | 1872 | ||
@@ -1812,13 +1925,13 @@ static int resize_stripes(struct r5conf *conf, int newsize) | |||
1812 | return err; | 1925 | return err; |
1813 | } | 1926 | } |
1814 | 1927 | ||
1815 | static int drop_one_stripe(struct r5conf *conf) | 1928 | static int drop_one_stripe(struct r5conf *conf, int hash) |
1816 | { | 1929 | { |
1817 | struct stripe_head *sh; | 1930 | struct stripe_head *sh; |
1818 | 1931 | ||
1819 | spin_lock_irq(&conf->device_lock); | 1932 | spin_lock_irq(conf->hash_locks + hash); |
1820 | sh = get_free_stripe(conf); | 1933 | sh = get_free_stripe(conf, hash); |
1821 | spin_unlock_irq(&conf->device_lock); | 1934 | spin_unlock_irq(conf->hash_locks + hash); |
1822 | if (!sh) | 1935 | if (!sh) |
1823 | return 0; | 1936 | return 0; |
1824 | BUG_ON(atomic_read(&sh->count)); | 1937 | BUG_ON(atomic_read(&sh->count)); |
@@ -1830,8 +1943,10 @@ static int drop_one_stripe(struct r5conf *conf) | |||
1830 | 1943 | ||
1831 | static void shrink_stripes(struct r5conf *conf) | 1944 | static void shrink_stripes(struct r5conf *conf) |
1832 | { | 1945 | { |
1833 | while (drop_one_stripe(conf)) | 1946 | int hash; |
1834 | ; | 1947 | for (hash = 0; hash < NR_STRIPE_HASH_LOCKS; hash++) |
1948 | while (drop_one_stripe(conf, hash)) | ||
1949 | ; | ||
1835 | 1950 | ||
1836 | if (conf->slab_cache) | 1951 | if (conf->slab_cache) |
1837 | kmem_cache_destroy(conf->slab_cache); | 1952 | kmem_cache_destroy(conf->slab_cache); |
@@ -3915,7 +4030,8 @@ static void raid5_activate_delayed(struct r5conf *conf) | |||
3915 | } | 4030 | } |
3916 | } | 4031 | } |
3917 | 4032 | ||
3918 | static void activate_bit_delay(struct r5conf *conf) | 4033 | static void activate_bit_delay(struct r5conf *conf, |
4034 | struct list_head *temp_inactive_list) | ||
3919 | { | 4035 | { |
3920 | /* device_lock is held */ | 4036 | /* device_lock is held */ |
3921 | struct list_head head; | 4037 | struct list_head head; |
@@ -3923,9 +4039,11 @@ static void activate_bit_delay(struct r5conf *conf) | |||
3923 | list_del_init(&conf->bitmap_list); | 4039 | list_del_init(&conf->bitmap_list); |
3924 | while (!list_empty(&head)) { | 4040 | while (!list_empty(&head)) { |
3925 | struct stripe_head *sh = list_entry(head.next, struct stripe_head, lru); | 4041 | struct stripe_head *sh = list_entry(head.next, struct stripe_head, lru); |
4042 | int hash; | ||
3926 | list_del_init(&sh->lru); | 4043 | list_del_init(&sh->lru); |
3927 | atomic_inc(&sh->count); | 4044 | atomic_inc(&sh->count); |
3928 | __release_stripe(conf, sh); | 4045 | hash = sh->hash_lock_index; |
4046 | __release_stripe(conf, sh, &temp_inactive_list[hash]); | ||
3929 | } | 4047 | } |
3930 | } | 4048 | } |
3931 | 4049 | ||
@@ -3941,7 +4059,7 @@ int md_raid5_congested(struct mddev *mddev, int bits) | |||
3941 | return 1; | 4059 | return 1; |
3942 | if (conf->quiesce) | 4060 | if (conf->quiesce) |
3943 | return 1; | 4061 | return 1; |
3944 | if (list_empty_careful(&conf->inactive_list)) | 4062 | if (atomic_read(&conf->active_stripes) == conf->max_nr_stripes) |
3945 | return 1; | 4063 | return 1; |
3946 | 4064 | ||
3947 | return 0; | 4065 | return 0; |
@@ -4271,6 +4389,7 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group) | |||
4271 | struct raid5_plug_cb { | 4389 | struct raid5_plug_cb { |
4272 | struct blk_plug_cb cb; | 4390 | struct blk_plug_cb cb; |
4273 | struct list_head list; | 4391 | struct list_head list; |
4392 | struct list_head temp_inactive_list[NR_STRIPE_HASH_LOCKS]; | ||
4274 | }; | 4393 | }; |
4275 | 4394 | ||
4276 | static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule) | 4395 | static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule) |
@@ -4281,6 +4400,7 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule) | |||
4281 | struct mddev *mddev = cb->cb.data; | 4400 | struct mddev *mddev = cb->cb.data; |
4282 | struct r5conf *conf = mddev->private; | 4401 | struct r5conf *conf = mddev->private; |
4283 | int cnt = 0; | 4402 | int cnt = 0; |
4403 | int hash; | ||
4284 | 4404 | ||
4285 | if (cb->list.next && !list_empty(&cb->list)) { | 4405 | if (cb->list.next && !list_empty(&cb->list)) { |
4286 | spin_lock_irq(&conf->device_lock); | 4406 | spin_lock_irq(&conf->device_lock); |
@@ -4298,11 +4418,14 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule) | |||
4298 | * STRIPE_ON_RELEASE_LIST could be set here. In that | 4418 | * STRIPE_ON_RELEASE_LIST could be set here. In that |
4299 | * case, the count is always > 1 here | 4419 | * case, the count is always > 1 here |
4300 | */ | 4420 | */ |
4301 | __release_stripe(conf, sh); | 4421 | hash = sh->hash_lock_index; |
4422 | __release_stripe(conf, sh, &cb->temp_inactive_list[hash]); | ||
4302 | cnt++; | 4423 | cnt++; |
4303 | } | 4424 | } |
4304 | spin_unlock_irq(&conf->device_lock); | 4425 | spin_unlock_irq(&conf->device_lock); |
4305 | } | 4426 | } |
4427 | release_inactive_stripe_list(conf, cb->temp_inactive_list, | ||
4428 | NR_STRIPE_HASH_LOCKS); | ||
4306 | if (mddev->queue) | 4429 | if (mddev->queue) |
4307 | trace_block_unplug(mddev->queue, cnt, !from_schedule); | 4430 | trace_block_unplug(mddev->queue, cnt, !from_schedule); |
4308 | kfree(cb); | 4431 | kfree(cb); |
@@ -4323,8 +4446,12 @@ static void release_stripe_plug(struct mddev *mddev, | |||
4323 | 4446 | ||
4324 | cb = container_of(blk_cb, struct raid5_plug_cb, cb); | 4447 | cb = container_of(blk_cb, struct raid5_plug_cb, cb); |
4325 | 4448 | ||
4326 | if (cb->list.next == NULL) | 4449 | if (cb->list.next == NULL) { |
4450 | int i; | ||
4327 | INIT_LIST_HEAD(&cb->list); | 4451 | INIT_LIST_HEAD(&cb->list); |
4452 | for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) | ||
4453 | INIT_LIST_HEAD(cb->temp_inactive_list + i); | ||
4454 | } | ||
4328 | 4455 | ||
4329 | if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)) | 4456 | if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)) |
4330 | list_add_tail(&sh->lru, &cb->list); | 4457 | list_add_tail(&sh->lru, &cb->list); |
@@ -4969,27 +5096,45 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) | |||
4969 | } | 5096 | } |
4970 | 5097 | ||
4971 | static int handle_active_stripes(struct r5conf *conf, int group, | 5098 | static int handle_active_stripes(struct r5conf *conf, int group, |
4972 | struct r5worker *worker) | 5099 | struct r5worker *worker, |
5100 | struct list_head *temp_inactive_list) | ||
4973 | { | 5101 | { |
4974 | struct stripe_head *batch[MAX_STRIPE_BATCH], *sh; | 5102 | struct stripe_head *batch[MAX_STRIPE_BATCH], *sh; |
4975 | int i, batch_size = 0; | 5103 | int i, batch_size = 0, hash; |
5104 | bool release_inactive = false; | ||
4976 | 5105 | ||
4977 | while (batch_size < MAX_STRIPE_BATCH && | 5106 | while (batch_size < MAX_STRIPE_BATCH && |
4978 | (sh = __get_priority_stripe(conf, group)) != NULL) | 5107 | (sh = __get_priority_stripe(conf, group)) != NULL) |
4979 | batch[batch_size++] = sh; | 5108 | batch[batch_size++] = sh; |
4980 | 5109 | ||
4981 | if (batch_size == 0) | 5110 | if (batch_size == 0) { |
4982 | return batch_size; | 5111 | for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) |
5112 | if (!list_empty(temp_inactive_list + i)) | ||
5113 | break; | ||
5114 | if (i == NR_STRIPE_HASH_LOCKS) | ||
5115 | return batch_size; | ||
5116 | release_inactive = true; | ||
5117 | } | ||
4983 | spin_unlock_irq(&conf->device_lock); | 5118 | spin_unlock_irq(&conf->device_lock); |
4984 | 5119 | ||
5120 | release_inactive_stripe_list(conf, temp_inactive_list, | ||
5121 | NR_STRIPE_HASH_LOCKS); | ||
5122 | |||
5123 | if (release_inactive) { | ||
5124 | spin_lock_irq(&conf->device_lock); | ||
5125 | return 0; | ||
5126 | } | ||
5127 | |||
4985 | for (i = 0; i < batch_size; i++) | 5128 | for (i = 0; i < batch_size; i++) |
4986 | handle_stripe(batch[i]); | 5129 | handle_stripe(batch[i]); |
4987 | 5130 | ||
4988 | cond_resched(); | 5131 | cond_resched(); |
4989 | 5132 | ||
4990 | spin_lock_irq(&conf->device_lock); | 5133 | spin_lock_irq(&conf->device_lock); |
4991 | for (i = 0; i < batch_size; i++) | 5134 | for (i = 0; i < batch_size; i++) { |
4992 | __release_stripe(conf, batch[i]); | 5135 | hash = batch[i]->hash_lock_index; |
5136 | __release_stripe(conf, batch[i], &temp_inactive_list[hash]); | ||
5137 | } | ||
4993 | return batch_size; | 5138 | return batch_size; |
4994 | } | 5139 | } |
4995 | 5140 | ||
@@ -5010,9 +5155,10 @@ static void raid5_do_work(struct work_struct *work) | |||
5010 | while (1) { | 5155 | while (1) { |
5011 | int batch_size, released; | 5156 | int batch_size, released; |
5012 | 5157 | ||
5013 | released = release_stripe_list(conf); | 5158 | released = release_stripe_list(conf, worker->temp_inactive_list); |
5014 | 5159 | ||
5015 | batch_size = handle_active_stripes(conf, group_id, worker); | 5160 | batch_size = handle_active_stripes(conf, group_id, worker, |
5161 | worker->temp_inactive_list); | ||
5016 | worker->working = false; | 5162 | worker->working = false; |
5017 | if (!batch_size && !released) | 5163 | if (!batch_size && !released) |
5018 | break; | 5164 | break; |
@@ -5051,7 +5197,7 @@ static void raid5d(struct md_thread *thread) | |||
5051 | struct bio *bio; | 5197 | struct bio *bio; |
5052 | int batch_size, released; | 5198 | int batch_size, released; |
5053 | 5199 | ||
5054 | released = release_stripe_list(conf); | 5200 | released = release_stripe_list(conf, conf->temp_inactive_list); |
5055 | 5201 | ||
5056 | if ( | 5202 | if ( |
5057 | !list_empty(&conf->bitmap_list)) { | 5203 | !list_empty(&conf->bitmap_list)) { |
@@ -5061,7 +5207,7 @@ static void raid5d(struct md_thread *thread) | |||
5061 | bitmap_unplug(mddev->bitmap); | 5207 | bitmap_unplug(mddev->bitmap); |
5062 | spin_lock_irq(&conf->device_lock); | 5208 | spin_lock_irq(&conf->device_lock); |
5063 | conf->seq_write = conf->seq_flush; | 5209 | conf->seq_write = conf->seq_flush; |
5064 | activate_bit_delay(conf); | 5210 | activate_bit_delay(conf, conf->temp_inactive_list); |
5065 | } | 5211 | } |
5066 | raid5_activate_delayed(conf); | 5212 | raid5_activate_delayed(conf); |
5067 | 5213 | ||
@@ -5075,7 +5221,8 @@ static void raid5d(struct md_thread *thread) | |||
5075 | handled++; | 5221 | handled++; |
5076 | } | 5222 | } |
5077 | 5223 | ||
5078 | batch_size = handle_active_stripes(conf, ANY_GROUP, NULL); | 5224 | batch_size = handle_active_stripes(conf, ANY_GROUP, NULL, |
5225 | conf->temp_inactive_list); | ||
5079 | if (!batch_size && !released) | 5226 | if (!batch_size && !released) |
5080 | break; | 5227 | break; |
5081 | handled += batch_size; | 5228 | handled += batch_size; |
@@ -5111,22 +5258,29 @@ raid5_set_cache_size(struct mddev *mddev, int size) | |||
5111 | { | 5258 | { |
5112 | struct r5conf *conf = mddev->private; | 5259 | struct r5conf *conf = mddev->private; |
5113 | int err; | 5260 | int err; |
5261 | int hash; | ||
5114 | 5262 | ||
5115 | if (size <= 16 || size > 32768) | 5263 | if (size <= 16 || size > 32768) |
5116 | return -EINVAL; | 5264 | return -EINVAL; |
5265 | hash = (conf->max_nr_stripes - 1) % NR_STRIPE_HASH_LOCKS; | ||
5117 | while (size < conf->max_nr_stripes) { | 5266 | while (size < conf->max_nr_stripes) { |
5118 | if (drop_one_stripe(conf)) | 5267 | if (drop_one_stripe(conf, hash)) |
5119 | conf->max_nr_stripes--; | 5268 | conf->max_nr_stripes--; |
5120 | else | 5269 | else |
5121 | break; | 5270 | break; |
5271 | hash--; | ||
5272 | if (hash < 0) | ||
5273 | hash = NR_STRIPE_HASH_LOCKS - 1; | ||
5122 | } | 5274 | } |
5123 | err = md_allow_write(mddev); | 5275 | err = md_allow_write(mddev); |
5124 | if (err) | 5276 | if (err) |
5125 | return err; | 5277 | return err; |
5278 | hash = conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS; | ||
5126 | while (size > conf->max_nr_stripes) { | 5279 | while (size > conf->max_nr_stripes) { |
5127 | if (grow_one_stripe(conf)) | 5280 | if (grow_one_stripe(conf, hash)) |
5128 | conf->max_nr_stripes++; | 5281 | conf->max_nr_stripes++; |
5129 | else break; | 5282 | else break; |
5283 | hash = (hash + 1) % NR_STRIPE_HASH_LOCKS; | ||
5130 | } | 5284 | } |
5131 | return 0; | 5285 | return 0; |
5132 | } | 5286 | } |
@@ -5277,7 +5431,7 @@ static struct attribute_group raid5_attrs_group = { | |||
5277 | 5431 | ||
5278 | static int alloc_thread_groups(struct r5conf *conf, int cnt) | 5432 | static int alloc_thread_groups(struct r5conf *conf, int cnt) |
5279 | { | 5433 | { |
5280 | int i, j; | 5434 | int i, j, k; |
5281 | ssize_t size; | 5435 | ssize_t size; |
5282 | struct r5worker *workers; | 5436 | struct r5worker *workers; |
5283 | 5437 | ||
@@ -5307,8 +5461,12 @@ static int alloc_thread_groups(struct r5conf *conf, int cnt) | |||
5307 | group->workers = workers + i * cnt; | 5461 | group->workers = workers + i * cnt; |
5308 | 5462 | ||
5309 | for (j = 0; j < cnt; j++) { | 5463 | for (j = 0; j < cnt; j++) { |
5310 | group->workers[j].group = group; | 5464 | struct r5worker *worker = group->workers + j; |
5311 | INIT_WORK(&group->workers[j].work, raid5_do_work); | 5465 | worker->group = group; |
5466 | INIT_WORK(&worker->work, raid5_do_work); | ||
5467 | |||
5468 | for (k = 0; k < NR_STRIPE_HASH_LOCKS; k++) | ||
5469 | INIT_LIST_HEAD(worker->temp_inactive_list + k); | ||
5312 | } | 5470 | } |
5313 | } | 5471 | } |
5314 | 5472 | ||
@@ -5459,6 +5617,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) | |||
5459 | struct md_rdev *rdev; | 5617 | struct md_rdev *rdev; |
5460 | struct disk_info *disk; | 5618 | struct disk_info *disk; |
5461 | char pers_name[6]; | 5619 | char pers_name[6]; |
5620 | int i; | ||
5462 | 5621 | ||
5463 | if (mddev->new_level != 5 | 5622 | if (mddev->new_level != 5 |
5464 | && mddev->new_level != 4 | 5623 | && mddev->new_level != 4 |
@@ -5503,7 +5662,6 @@ static struct r5conf *setup_conf(struct mddev *mddev) | |||
5503 | INIT_LIST_HEAD(&conf->hold_list); | 5662 | INIT_LIST_HEAD(&conf->hold_list); |
5504 | INIT_LIST_HEAD(&conf->delayed_list); | 5663 | INIT_LIST_HEAD(&conf->delayed_list); |
5505 | INIT_LIST_HEAD(&conf->bitmap_list); | 5664 | INIT_LIST_HEAD(&conf->bitmap_list); |
5506 | INIT_LIST_HEAD(&conf->inactive_list); | ||
5507 | init_llist_head(&conf->released_stripes); | 5665 | init_llist_head(&conf->released_stripes); |
5508 | atomic_set(&conf->active_stripes, 0); | 5666 | atomic_set(&conf->active_stripes, 0); |
5509 | atomic_set(&conf->preread_active_stripes, 0); | 5667 | atomic_set(&conf->preread_active_stripes, 0); |
@@ -5529,6 +5687,21 @@ static struct r5conf *setup_conf(struct mddev *mddev) | |||
5529 | if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) | 5687 | if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) |
5530 | goto abort; | 5688 | goto abort; |
5531 | 5689 | ||
5690 | /* We init hash_locks[0] separately to that it can be used | ||
5691 | * as the reference lock in the spin_lock_nest_lock() call | ||
5692 | * in lock_all_device_hash_locks_irq in order to convince | ||
5693 | * lockdep that we know what we are doing. | ||
5694 | */ | ||
5695 | spin_lock_init(conf->hash_locks); | ||
5696 | for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++) | ||
5697 | spin_lock_init(conf->hash_locks + i); | ||
5698 | |||
5699 | for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) | ||
5700 | INIT_LIST_HEAD(conf->inactive_list + i); | ||
5701 | |||
5702 | for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) | ||
5703 | INIT_LIST_HEAD(conf->temp_inactive_list + i); | ||
5704 | |||
5532 | conf->level = mddev->new_level; | 5705 | conf->level = mddev->new_level; |
5533 | if (raid5_alloc_percpu(conf) != 0) | 5706 | if (raid5_alloc_percpu(conf) != 0) |
5534 | goto abort; | 5707 | goto abort; |
@@ -5569,7 +5742,6 @@ static struct r5conf *setup_conf(struct mddev *mddev) | |||
5569 | else | 5742 | else |
5570 | conf->max_degraded = 1; | 5743 | conf->max_degraded = 1; |
5571 | conf->algorithm = mddev->new_layout; | 5744 | conf->algorithm = mddev->new_layout; |
5572 | conf->max_nr_stripes = NR_STRIPES; | ||
5573 | conf->reshape_progress = mddev->reshape_position; | 5745 | conf->reshape_progress = mddev->reshape_position; |
5574 | if (conf->reshape_progress != MaxSector) { | 5746 | if (conf->reshape_progress != MaxSector) { |
5575 | conf->prev_chunk_sectors = mddev->chunk_sectors; | 5747 | conf->prev_chunk_sectors = mddev->chunk_sectors; |
@@ -5578,7 +5750,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) | |||
5578 | 5750 | ||
5579 | memory = conf->max_nr_stripes * (sizeof(struct stripe_head) + | 5751 | memory = conf->max_nr_stripes * (sizeof(struct stripe_head) + |
5580 | max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; | 5752 | max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; |
5581 | if (grow_stripes(conf, conf->max_nr_stripes)) { | 5753 | if (grow_stripes(conf, NR_STRIPES)) { |
5582 | printk(KERN_ERR | 5754 | printk(KERN_ERR |
5583 | "md/raid:%s: couldn't allocate %dkB for buffers\n", | 5755 | "md/raid:%s: couldn't allocate %dkB for buffers\n", |
5584 | mdname(mddev), memory); | 5756 | mdname(mddev), memory); |
@@ -6483,27 +6655,28 @@ static void raid5_quiesce(struct mddev *mddev, int state) | |||
6483 | break; | 6655 | break; |
6484 | 6656 | ||
6485 | case 1: /* stop all writes */ | 6657 | case 1: /* stop all writes */ |
6486 | spin_lock_irq(&conf->device_lock); | 6658 | lock_all_device_hash_locks_irq(conf); |
6487 | /* '2' tells resync/reshape to pause so that all | 6659 | /* '2' tells resync/reshape to pause so that all |
6488 | * active stripes can drain | 6660 | * active stripes can drain |
6489 | */ | 6661 | */ |
6490 | conf->quiesce = 2; | 6662 | conf->quiesce = 2; |
6491 | wait_event_lock_irq(conf->wait_for_stripe, | 6663 | wait_event_cmd(conf->wait_for_stripe, |
6492 | atomic_read(&conf->active_stripes) == 0 && | 6664 | atomic_read(&conf->active_stripes) == 0 && |
6493 | atomic_read(&conf->active_aligned_reads) == 0, | 6665 | atomic_read(&conf->active_aligned_reads) == 0, |
6494 | conf->device_lock); | 6666 | unlock_all_device_hash_locks_irq(conf), |
6667 | lock_all_device_hash_locks_irq(conf)); | ||
6495 | conf->quiesce = 1; | 6668 | conf->quiesce = 1; |
6496 | spin_unlock_irq(&conf->device_lock); | 6669 | unlock_all_device_hash_locks_irq(conf); |
6497 | /* allow reshape to continue */ | 6670 | /* allow reshape to continue */ |
6498 | wake_up(&conf->wait_for_overlap); | 6671 | wake_up(&conf->wait_for_overlap); |
6499 | break; | 6672 | break; |
6500 | 6673 | ||
6501 | case 0: /* re-enable writes */ | 6674 | case 0: /* re-enable writes */ |
6502 | spin_lock_irq(&conf->device_lock); | 6675 | lock_all_device_hash_locks_irq(conf); |
6503 | conf->quiesce = 0; | 6676 | conf->quiesce = 0; |
6504 | wake_up(&conf->wait_for_stripe); | 6677 | wake_up(&conf->wait_for_stripe); |
6505 | wake_up(&conf->wait_for_overlap); | 6678 | wake_up(&conf->wait_for_overlap); |
6506 | spin_unlock_irq(&conf->device_lock); | 6679 | unlock_all_device_hash_locks_irq(conf); |
6507 | break; | 6680 | break; |
6508 | } | 6681 | } |
6509 | } | 6682 | } |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 2113ffa82c7a..a9e443a1116f 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -205,6 +205,7 @@ struct stripe_head { | |||
205 | short pd_idx; /* parity disk index */ | 205 | short pd_idx; /* parity disk index */ |
206 | short qd_idx; /* 'Q' disk index for raid6 */ | 206 | short qd_idx; /* 'Q' disk index for raid6 */ |
207 | short ddf_layout;/* use DDF ordering to calculate Q */ | 207 | short ddf_layout;/* use DDF ordering to calculate Q */ |
208 | short hash_lock_index; | ||
208 | unsigned long state; /* state flags */ | 209 | unsigned long state; /* state flags */ |
209 | atomic_t count; /* nr of active thread/requests */ | 210 | atomic_t count; /* nr of active thread/requests */ |
210 | int bm_seq; /* sequence number for bitmap flushes */ | 211 | int bm_seq; /* sequence number for bitmap flushes */ |
@@ -367,9 +368,18 @@ struct disk_info { | |||
367 | struct md_rdev *rdev, *replacement; | 368 | struct md_rdev *rdev, *replacement; |
368 | }; | 369 | }; |
369 | 370 | ||
371 | /* NOTE NR_STRIPE_HASH_LOCKS must remain below 64. | ||
372 | * This is because we sometimes take all the spinlocks | ||
373 | * and creating that much locking depth can cause | ||
374 | * problems. | ||
375 | */ | ||
376 | #define NR_STRIPE_HASH_LOCKS 8 | ||
377 | #define STRIPE_HASH_LOCKS_MASK (NR_STRIPE_HASH_LOCKS - 1) | ||
378 | |||
370 | struct r5worker { | 379 | struct r5worker { |
371 | struct work_struct work; | 380 | struct work_struct work; |
372 | struct r5worker_group *group; | 381 | struct r5worker_group *group; |
382 | struct list_head temp_inactive_list[NR_STRIPE_HASH_LOCKS]; | ||
373 | bool working; | 383 | bool working; |
374 | }; | 384 | }; |
375 | 385 | ||
@@ -382,6 +392,8 @@ struct r5worker_group { | |||
382 | 392 | ||
383 | struct r5conf { | 393 | struct r5conf { |
384 | struct hlist_head *stripe_hashtbl; | 394 | struct hlist_head *stripe_hashtbl; |
395 | /* only protect corresponding hash list and inactive_list */ | ||
396 | spinlock_t hash_locks[NR_STRIPE_HASH_LOCKS]; | ||
385 | struct mddev *mddev; | 397 | struct mddev *mddev; |
386 | int chunk_sectors; | 398 | int chunk_sectors; |
387 | int level, algorithm; | 399 | int level, algorithm; |
@@ -462,7 +474,7 @@ struct r5conf { | |||
462 | * Free stripes pool | 474 | * Free stripes pool |
463 | */ | 475 | */ |
464 | atomic_t active_stripes; | 476 | atomic_t active_stripes; |
465 | struct list_head inactive_list; | 477 | struct list_head inactive_list[NR_STRIPE_HASH_LOCKS]; |
466 | struct llist_head released_stripes; | 478 | struct llist_head released_stripes; |
467 | wait_queue_head_t wait_for_stripe; | 479 | wait_queue_head_t wait_for_stripe; |
468 | wait_queue_head_t wait_for_overlap; | 480 | wait_queue_head_t wait_for_overlap; |
@@ -477,6 +489,7 @@ struct r5conf { | |||
477 | * the new thread here until we fully activate the array. | 489 | * the new thread here until we fully activate the array. |
478 | */ | 490 | */ |
479 | struct md_thread *thread; | 491 | struct md_thread *thread; |
492 | struct list_head temp_inactive_list[NR_STRIPE_HASH_LOCKS]; | ||
480 | struct r5worker_group *worker_groups; | 493 | struct r5worker_group *worker_groups; |
481 | int group_cnt; | 494 | int group_cnt; |
482 | int worker_cnt_per_group; | 495 | int worker_cnt_per_group; |