diff options
-rw-r--r-- | drivers/md/bitmap.c | 6 | ||||
-rw-r--r-- | drivers/md/md.c | 12 | ||||
-rw-r--r-- | drivers/md/raid5.c | 158 | ||||
-rw-r--r-- | drivers/md/raid5.h | 4 |
4 files changed, 138 insertions, 42 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 9a8e66ae04f5..67f8b31e2054 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -669,17 +669,13 @@ static inline unsigned long file_page_offset(struct bitmap_storage *store, | |||
669 | /* | 669 | /* |
670 | * return a pointer to the page in the filemap that contains the given bit | 670 | * return a pointer to the page in the filemap that contains the given bit |
671 | * | 671 | * |
672 | * this lookup is complicated by the fact that the bitmap sb might be exactly | ||
673 | * 1 page (e.g., x86) or less than 1 page -- so the bitmap might start on page | ||
674 | * 0 or page 1 | ||
675 | */ | 672 | */ |
676 | static inline struct page *filemap_get_page(struct bitmap_storage *store, | 673 | static inline struct page *filemap_get_page(struct bitmap_storage *store, |
677 | unsigned long chunk) | 674 | unsigned long chunk) |
678 | { | 675 | { |
679 | if (file_page_index(store, chunk) >= store->file_pages) | 676 | if (file_page_index(store, chunk) >= store->file_pages) |
680 | return NULL; | 677 | return NULL; |
681 | return store->filemap[file_page_index(store, chunk) | 678 | return store->filemap[file_page_index(store, chunk)]; |
682 | - file_page_index(store, 0)]; | ||
683 | } | 679 | } |
684 | 680 | ||
685 | static int bitmap_storage_alloc(struct bitmap_storage *store, | 681 | static int bitmap_storage_alloc(struct bitmap_storage *store, |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 2382cfc9bb3f..34846856dbc6 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -3448,6 +3448,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len) | |||
3448 | mddev->level = LEVEL_NONE; | 3448 | mddev->level = LEVEL_NONE; |
3449 | return rv; | 3449 | return rv; |
3450 | } | 3450 | } |
3451 | if (mddev->ro) | ||
3452 | return -EROFS; | ||
3451 | 3453 | ||
3452 | /* request to change the personality. Need to ensure: | 3454 | /* request to change the personality. Need to ensure: |
3453 | * - array is not engaged in resync/recovery/reshape | 3455 | * - array is not engaged in resync/recovery/reshape |
@@ -3634,6 +3636,8 @@ layout_store(struct mddev *mddev, const char *buf, size_t len) | |||
3634 | int err; | 3636 | int err; |
3635 | if (mddev->pers->check_reshape == NULL) | 3637 | if (mddev->pers->check_reshape == NULL) |
3636 | return -EBUSY; | 3638 | return -EBUSY; |
3639 | if (mddev->ro) | ||
3640 | return -EROFS; | ||
3637 | mddev->new_layout = n; | 3641 | mddev->new_layout = n; |
3638 | err = mddev->pers->check_reshape(mddev); | 3642 | err = mddev->pers->check_reshape(mddev); |
3639 | if (err) { | 3643 | if (err) { |
@@ -3723,6 +3727,8 @@ chunk_size_store(struct mddev *mddev, const char *buf, size_t len) | |||
3723 | int err; | 3727 | int err; |
3724 | if (mddev->pers->check_reshape == NULL) | 3728 | if (mddev->pers->check_reshape == NULL) |
3725 | return -EBUSY; | 3729 | return -EBUSY; |
3730 | if (mddev->ro) | ||
3731 | return -EROFS; | ||
3726 | mddev->new_chunk_sectors = n >> 9; | 3732 | mddev->new_chunk_sectors = n >> 9; |
3727 | err = mddev->pers->check_reshape(mddev); | 3733 | err = mddev->pers->check_reshape(mddev); |
3728 | if (err) { | 3734 | if (err) { |
@@ -6135,6 +6141,8 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) | |||
6135 | */ | 6141 | */ |
6136 | if (mddev->sync_thread) | 6142 | if (mddev->sync_thread) |
6137 | return -EBUSY; | 6143 | return -EBUSY; |
6144 | if (mddev->ro) | ||
6145 | return -EROFS; | ||
6138 | 6146 | ||
6139 | rdev_for_each(rdev, mddev) { | 6147 | rdev_for_each(rdev, mddev) { |
6140 | sector_t avail = rdev->sectors; | 6148 | sector_t avail = rdev->sectors; |
@@ -6157,6 +6165,8 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks) | |||
6157 | /* change the number of raid disks */ | 6165 | /* change the number of raid disks */ |
6158 | if (mddev->pers->check_reshape == NULL) | 6166 | if (mddev->pers->check_reshape == NULL) |
6159 | return -EINVAL; | 6167 | return -EINVAL; |
6168 | if (mddev->ro) | ||
6169 | return -EROFS; | ||
6160 | if (raid_disks <= 0 || | 6170 | if (raid_disks <= 0 || |
6161 | (mddev->max_disks && raid_disks >= mddev->max_disks)) | 6171 | (mddev->max_disks && raid_disks >= mddev->max_disks)) |
6162 | return -EINVAL; | 6172 | return -EINVAL; |
@@ -8333,7 +8343,7 @@ static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors) | |||
8333 | if (a < s) { | 8343 | if (a < s) { |
8334 | /* we need to split this range */ | 8344 | /* we need to split this range */ |
8335 | if (bb->count >= MD_MAX_BADBLOCKS) { | 8345 | if (bb->count >= MD_MAX_BADBLOCKS) { |
8336 | rv = 0; | 8346 | rv = -ENOSPC; |
8337 | goto out; | 8347 | goto out; |
8338 | } | 8348 | } |
8339 | memmove(p+lo+1, p+lo, (bb->count - lo) * 8); | 8349 | memmove(p+lo+1, p+lo, (bb->count - lo) * 8); |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2afef4ec9312..6234b2e84587 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -292,9 +292,12 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
292 | BUG_ON(atomic_read(&conf->active_stripes)==0); | 292 | BUG_ON(atomic_read(&conf->active_stripes)==0); |
293 | if (test_bit(STRIPE_HANDLE, &sh->state)) { | 293 | if (test_bit(STRIPE_HANDLE, &sh->state)) { |
294 | if (test_bit(STRIPE_DELAYED, &sh->state) && | 294 | if (test_bit(STRIPE_DELAYED, &sh->state) && |
295 | !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | 295 | !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { |
296 | list_add_tail(&sh->lru, &conf->delayed_list); | 296 | list_add_tail(&sh->lru, &conf->delayed_list); |
297 | else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | 297 | if (atomic_read(&conf->preread_active_stripes) |
298 | < IO_THRESHOLD) | ||
299 | md_wakeup_thread(conf->mddev->thread); | ||
300 | } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | ||
298 | sh->bm_seq - conf->seq_write > 0) | 301 | sh->bm_seq - conf->seq_write > 0) |
299 | list_add_tail(&sh->lru, &conf->bitmap_list); | 302 | list_add_tail(&sh->lru, &conf->bitmap_list); |
300 | else { | 303 | else { |
@@ -413,6 +416,11 @@ static void release_stripe(struct stripe_head *sh) | |||
413 | int hash; | 416 | int hash; |
414 | bool wakeup; | 417 | bool wakeup; |
415 | 418 | ||
419 | /* Avoid release_list until the last reference. | ||
420 | */ | ||
421 | if (atomic_add_unless(&sh->count, -1, 1)) | ||
422 | return; | ||
423 | |||
416 | if (unlikely(!conf->mddev->thread) || | 424 | if (unlikely(!conf->mddev->thread) || |
417 | test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state)) | 425 | test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state)) |
418 | goto slow_path; | 426 | goto slow_path; |
@@ -479,6 +487,7 @@ static void shrink_buffers(struct stripe_head *sh) | |||
479 | int num = sh->raid_conf->pool_size; | 487 | int num = sh->raid_conf->pool_size; |
480 | 488 | ||
481 | for (i = 0; i < num ; i++) { | 489 | for (i = 0; i < num ; i++) { |
490 | WARN_ON(sh->dev[i].page != sh->dev[i].orig_page); | ||
482 | p = sh->dev[i].page; | 491 | p = sh->dev[i].page; |
483 | if (!p) | 492 | if (!p) |
484 | continue; | 493 | continue; |
@@ -499,6 +508,7 @@ static int grow_buffers(struct stripe_head *sh) | |||
499 | return 1; | 508 | return 1; |
500 | } | 509 | } |
501 | sh->dev[i].page = page; | 510 | sh->dev[i].page = page; |
511 | sh->dev[i].orig_page = page; | ||
502 | } | 512 | } |
503 | return 0; | 513 | return 0; |
504 | } | 514 | } |
@@ -855,6 +865,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
855 | if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) | 865 | if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) |
856 | bi->bi_rw |= REQ_NOMERGE; | 866 | bi->bi_rw |= REQ_NOMERGE; |
857 | 867 | ||
868 | if (test_bit(R5_SkipCopy, &sh->dev[i].flags)) | ||
869 | WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); | ||
870 | sh->dev[i].vec.bv_page = sh->dev[i].page; | ||
858 | bi->bi_vcnt = 1; | 871 | bi->bi_vcnt = 1; |
859 | bi->bi_io_vec[0].bv_len = STRIPE_SIZE; | 872 | bi->bi_io_vec[0].bv_len = STRIPE_SIZE; |
860 | bi->bi_io_vec[0].bv_offset = 0; | 873 | bi->bi_io_vec[0].bv_offset = 0; |
@@ -899,6 +912,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
899 | else | 912 | else |
900 | rbi->bi_iter.bi_sector = (sh->sector | 913 | rbi->bi_iter.bi_sector = (sh->sector |
901 | + rrdev->data_offset); | 914 | + rrdev->data_offset); |
915 | if (test_bit(R5_SkipCopy, &sh->dev[i].flags)) | ||
916 | WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); | ||
917 | sh->dev[i].rvec.bv_page = sh->dev[i].page; | ||
902 | rbi->bi_vcnt = 1; | 918 | rbi->bi_vcnt = 1; |
903 | rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; | 919 | rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; |
904 | rbi->bi_io_vec[0].bv_offset = 0; | 920 | rbi->bi_io_vec[0].bv_offset = 0; |
@@ -927,8 +943,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
927 | } | 943 | } |
928 | 944 | ||
929 | static struct dma_async_tx_descriptor * | 945 | static struct dma_async_tx_descriptor * |
930 | async_copy_data(int frombio, struct bio *bio, struct page *page, | 946 | async_copy_data(int frombio, struct bio *bio, struct page **page, |
931 | sector_t sector, struct dma_async_tx_descriptor *tx) | 947 | sector_t sector, struct dma_async_tx_descriptor *tx, |
948 | struct stripe_head *sh) | ||
932 | { | 949 | { |
933 | struct bio_vec bvl; | 950 | struct bio_vec bvl; |
934 | struct bvec_iter iter; | 951 | struct bvec_iter iter; |
@@ -965,11 +982,16 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, | |||
965 | if (clen > 0) { | 982 | if (clen > 0) { |
966 | b_offset += bvl.bv_offset; | 983 | b_offset += bvl.bv_offset; |
967 | bio_page = bvl.bv_page; | 984 | bio_page = bvl.bv_page; |
968 | if (frombio) | 985 | if (frombio) { |
969 | tx = async_memcpy(page, bio_page, page_offset, | 986 | if (sh->raid_conf->skip_copy && |
987 | b_offset == 0 && page_offset == 0 && | ||
988 | clen == STRIPE_SIZE) | ||
989 | *page = bio_page; | ||
990 | else | ||
991 | tx = async_memcpy(*page, bio_page, page_offset, | ||
970 | b_offset, clen, &submit); | 992 | b_offset, clen, &submit); |
971 | else | 993 | } else |
972 | tx = async_memcpy(bio_page, page, b_offset, | 994 | tx = async_memcpy(bio_page, *page, b_offset, |
973 | page_offset, clen, &submit); | 995 | page_offset, clen, &submit); |
974 | } | 996 | } |
975 | /* chain the operations */ | 997 | /* chain the operations */ |
@@ -1045,8 +1067,8 @@ static void ops_run_biofill(struct stripe_head *sh) | |||
1045 | spin_unlock_irq(&sh->stripe_lock); | 1067 | spin_unlock_irq(&sh->stripe_lock); |
1046 | while (rbi && rbi->bi_iter.bi_sector < | 1068 | while (rbi && rbi->bi_iter.bi_sector < |
1047 | dev->sector + STRIPE_SECTORS) { | 1069 | dev->sector + STRIPE_SECTORS) { |
1048 | tx = async_copy_data(0, rbi, dev->page, | 1070 | tx = async_copy_data(0, rbi, &dev->page, |
1049 | dev->sector, tx); | 1071 | dev->sector, tx, sh); |
1050 | rbi = r5_next_bio(rbi, dev->sector); | 1072 | rbi = r5_next_bio(rbi, dev->sector); |
1051 | } | 1073 | } |
1052 | } | 1074 | } |
@@ -1384,6 +1406,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
1384 | BUG_ON(dev->written); | 1406 | BUG_ON(dev->written); |
1385 | wbi = dev->written = chosen; | 1407 | wbi = dev->written = chosen; |
1386 | spin_unlock_irq(&sh->stripe_lock); | 1408 | spin_unlock_irq(&sh->stripe_lock); |
1409 | WARN_ON(dev->page != dev->orig_page); | ||
1387 | 1410 | ||
1388 | while (wbi && wbi->bi_iter.bi_sector < | 1411 | while (wbi && wbi->bi_iter.bi_sector < |
1389 | dev->sector + STRIPE_SECTORS) { | 1412 | dev->sector + STRIPE_SECTORS) { |
@@ -1393,9 +1416,15 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
1393 | set_bit(R5_SyncIO, &dev->flags); | 1416 | set_bit(R5_SyncIO, &dev->flags); |
1394 | if (wbi->bi_rw & REQ_DISCARD) | 1417 | if (wbi->bi_rw & REQ_DISCARD) |
1395 | set_bit(R5_Discard, &dev->flags); | 1418 | set_bit(R5_Discard, &dev->flags); |
1396 | else | 1419 | else { |
1397 | tx = async_copy_data(1, wbi, dev->page, | 1420 | tx = async_copy_data(1, wbi, &dev->page, |
1398 | dev->sector, tx); | 1421 | dev->sector, tx, sh); |
1422 | if (dev->page != dev->orig_page) { | ||
1423 | set_bit(R5_SkipCopy, &dev->flags); | ||
1424 | clear_bit(R5_UPTODATE, &dev->flags); | ||
1425 | clear_bit(R5_OVERWRITE, &dev->flags); | ||
1426 | } | ||
1427 | } | ||
1399 | wbi = r5_next_bio(wbi, dev->sector); | 1428 | wbi = r5_next_bio(wbi, dev->sector); |
1400 | } | 1429 | } |
1401 | } | 1430 | } |
@@ -1426,7 +1455,7 @@ static void ops_complete_reconstruct(void *stripe_head_ref) | |||
1426 | struct r5dev *dev = &sh->dev[i]; | 1455 | struct r5dev *dev = &sh->dev[i]; |
1427 | 1456 | ||
1428 | if (dev->written || i == pd_idx || i == qd_idx) { | 1457 | if (dev->written || i == pd_idx || i == qd_idx) { |
1429 | if (!discard) | 1458 | if (!discard && !test_bit(R5_SkipCopy, &dev->flags)) |
1430 | set_bit(R5_UPTODATE, &dev->flags); | 1459 | set_bit(R5_UPTODATE, &dev->flags); |
1431 | if (fua) | 1460 | if (fua) |
1432 | set_bit(R5_WantFUA, &dev->flags); | 1461 | set_bit(R5_WantFUA, &dev->flags); |
@@ -1839,8 +1868,10 @@ static int resize_stripes(struct r5conf *conf, int newsize) | |||
1839 | osh = get_free_stripe(conf, hash); | 1868 | osh = get_free_stripe(conf, hash); |
1840 | unlock_device_hash_lock(conf, hash); | 1869 | unlock_device_hash_lock(conf, hash); |
1841 | atomic_set(&nsh->count, 1); | 1870 | atomic_set(&nsh->count, 1); |
1842 | for(i=0; i<conf->pool_size; i++) | 1871 | for(i=0; i<conf->pool_size; i++) { |
1843 | nsh->dev[i].page = osh->dev[i].page; | 1872 | nsh->dev[i].page = osh->dev[i].page; |
1873 | nsh->dev[i].orig_page = osh->dev[i].page; | ||
1874 | } | ||
1844 | for( ; i<newsize; i++) | 1875 | for( ; i<newsize; i++) |
1845 | nsh->dev[i].page = NULL; | 1876 | nsh->dev[i].page = NULL; |
1846 | nsh->hash_lock_index = hash; | 1877 | nsh->hash_lock_index = hash; |
@@ -1896,6 +1927,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) | |||
1896 | if (nsh->dev[i].page == NULL) { | 1927 | if (nsh->dev[i].page == NULL) { |
1897 | struct page *p = alloc_page(GFP_NOIO); | 1928 | struct page *p = alloc_page(GFP_NOIO); |
1898 | nsh->dev[i].page = p; | 1929 | nsh->dev[i].page = p; |
1930 | nsh->dev[i].orig_page = p; | ||
1899 | if (!p) | 1931 | if (!p) |
1900 | err = -ENOMEM; | 1932 | err = -ENOMEM; |
1901 | } | 1933 | } |
@@ -2133,24 +2165,20 @@ static void raid5_end_write_request(struct bio *bi, int error) | |||
2133 | } | 2165 | } |
2134 | 2166 | ||
2135 | static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous); | 2167 | static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous); |
2136 | 2168 | ||
2137 | static void raid5_build_block(struct stripe_head *sh, int i, int previous) | 2169 | static void raid5_build_block(struct stripe_head *sh, int i, int previous) |
2138 | { | 2170 | { |
2139 | struct r5dev *dev = &sh->dev[i]; | 2171 | struct r5dev *dev = &sh->dev[i]; |
2140 | 2172 | ||
2141 | bio_init(&dev->req); | 2173 | bio_init(&dev->req); |
2142 | dev->req.bi_io_vec = &dev->vec; | 2174 | dev->req.bi_io_vec = &dev->vec; |
2143 | dev->req.bi_vcnt++; | 2175 | dev->req.bi_max_vecs = 1; |
2144 | dev->req.bi_max_vecs++; | ||
2145 | dev->req.bi_private = sh; | 2176 | dev->req.bi_private = sh; |
2146 | dev->vec.bv_page = dev->page; | ||
2147 | 2177 | ||
2148 | bio_init(&dev->rreq); | 2178 | bio_init(&dev->rreq); |
2149 | dev->rreq.bi_io_vec = &dev->rvec; | 2179 | dev->rreq.bi_io_vec = &dev->rvec; |
2150 | dev->rreq.bi_vcnt++; | 2180 | dev->rreq.bi_max_vecs = 1; |
2151 | dev->rreq.bi_max_vecs++; | ||
2152 | dev->rreq.bi_private = sh; | 2181 | dev->rreq.bi_private = sh; |
2153 | dev->rvec.bv_page = dev->page; | ||
2154 | 2182 | ||
2155 | dev->flags = 0; | 2183 | dev->flags = 0; |
2156 | dev->sector = compute_blocknr(sh, i, previous); | 2184 | dev->sector = compute_blocknr(sh, i, previous); |
@@ -2750,6 +2778,11 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
2750 | /* and fail all 'written' */ | 2778 | /* and fail all 'written' */ |
2751 | bi = sh->dev[i].written; | 2779 | bi = sh->dev[i].written; |
2752 | sh->dev[i].written = NULL; | 2780 | sh->dev[i].written = NULL; |
2781 | if (test_and_clear_bit(R5_SkipCopy, &sh->dev[i].flags)) { | ||
2782 | WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); | ||
2783 | sh->dev[i].page = sh->dev[i].orig_page; | ||
2784 | } | ||
2785 | |||
2753 | if (bi) bitmap_end = 1; | 2786 | if (bi) bitmap_end = 1; |
2754 | while (bi && bi->bi_iter.bi_sector < | 2787 | while (bi && bi->bi_iter.bi_sector < |
2755 | sh->dev[i].sector + STRIPE_SECTORS) { | 2788 | sh->dev[i].sector + STRIPE_SECTORS) { |
@@ -2886,8 +2919,11 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s, | |||
2886 | (s->failed >= 1 && fdev[0]->toread) || | 2919 | (s->failed >= 1 && fdev[0]->toread) || |
2887 | (s->failed >= 2 && fdev[1]->toread) || | 2920 | (s->failed >= 2 && fdev[1]->toread) || |
2888 | (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite && | 2921 | (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite && |
2922 | (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) && | ||
2889 | !test_bit(R5_OVERWRITE, &fdev[0]->flags)) || | 2923 | !test_bit(R5_OVERWRITE, &fdev[0]->flags)) || |
2890 | (sh->raid_conf->level == 6 && s->failed && s->to_write))) { | 2924 | (sh->raid_conf->level == 6 && s->failed && s->to_write && |
2925 | s->to_write < sh->raid_conf->raid_disks - 2 && | ||
2926 | (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))))) { | ||
2891 | /* we would like to get this block, possibly by computing it, | 2927 | /* we would like to get this block, possibly by computing it, |
2892 | * otherwise read it if the backing disk is insync | 2928 | * otherwise read it if the backing disk is insync |
2893 | */ | 2929 | */ |
@@ -2991,12 +3027,17 @@ static void handle_stripe_clean_event(struct r5conf *conf, | |||
2991 | dev = &sh->dev[i]; | 3027 | dev = &sh->dev[i]; |
2992 | if (!test_bit(R5_LOCKED, &dev->flags) && | 3028 | if (!test_bit(R5_LOCKED, &dev->flags) && |
2993 | (test_bit(R5_UPTODATE, &dev->flags) || | 3029 | (test_bit(R5_UPTODATE, &dev->flags) || |
2994 | test_bit(R5_Discard, &dev->flags))) { | 3030 | test_bit(R5_Discard, &dev->flags) || |
3031 | test_bit(R5_SkipCopy, &dev->flags))) { | ||
2995 | /* We can return any write requests */ | 3032 | /* We can return any write requests */ |
2996 | struct bio *wbi, *wbi2; | 3033 | struct bio *wbi, *wbi2; |
2997 | pr_debug("Return write for disc %d\n", i); | 3034 | pr_debug("Return write for disc %d\n", i); |
2998 | if (test_and_clear_bit(R5_Discard, &dev->flags)) | 3035 | if (test_and_clear_bit(R5_Discard, &dev->flags)) |
2999 | clear_bit(R5_UPTODATE, &dev->flags); | 3036 | clear_bit(R5_UPTODATE, &dev->flags); |
3037 | if (test_and_clear_bit(R5_SkipCopy, &dev->flags)) { | ||
3038 | WARN_ON(test_bit(R5_UPTODATE, &dev->flags)); | ||
3039 | dev->page = dev->orig_page; | ||
3040 | } | ||
3000 | wbi = dev->written; | 3041 | wbi = dev->written; |
3001 | dev->written = NULL; | 3042 | dev->written = NULL; |
3002 | while (wbi && wbi->bi_iter.bi_sector < | 3043 | while (wbi && wbi->bi_iter.bi_sector < |
@@ -3015,6 +3056,8 @@ static void handle_stripe_clean_event(struct r5conf *conf, | |||
3015 | 0); | 3056 | 0); |
3016 | } else if (test_bit(R5_Discard, &dev->flags)) | 3057 | } else if (test_bit(R5_Discard, &dev->flags)) |
3017 | discard_pending = 1; | 3058 | discard_pending = 1; |
3059 | WARN_ON(test_bit(R5_SkipCopy, &dev->flags)); | ||
3060 | WARN_ON(dev->page != dev->orig_page); | ||
3018 | } | 3061 | } |
3019 | if (!discard_pending && | 3062 | if (!discard_pending && |
3020 | test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { | 3063 | test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { |
@@ -3086,7 +3129,8 @@ static void handle_stripe_dirtying(struct r5conf *conf, | |||
3086 | !test_bit(R5_LOCKED, &dev->flags) && | 3129 | !test_bit(R5_LOCKED, &dev->flags) && |
3087 | !(test_bit(R5_UPTODATE, &dev->flags) || | 3130 | !(test_bit(R5_UPTODATE, &dev->flags) || |
3088 | test_bit(R5_Wantcompute, &dev->flags))) { | 3131 | test_bit(R5_Wantcompute, &dev->flags))) { |
3089 | if (test_bit(R5_Insync, &dev->flags)) rcw++; | 3132 | if (test_bit(R5_Insync, &dev->flags)) |
3133 | rcw++; | ||
3090 | else | 3134 | else |
3091 | rcw += 2*disks; | 3135 | rcw += 2*disks; |
3092 | } | 3136 | } |
@@ -3107,10 +3151,10 @@ static void handle_stripe_dirtying(struct r5conf *conf, | |||
3107 | !(test_bit(R5_UPTODATE, &dev->flags) || | 3151 | !(test_bit(R5_UPTODATE, &dev->flags) || |
3108 | test_bit(R5_Wantcompute, &dev->flags)) && | 3152 | test_bit(R5_Wantcompute, &dev->flags)) && |
3109 | test_bit(R5_Insync, &dev->flags)) { | 3153 | test_bit(R5_Insync, &dev->flags)) { |
3110 | if ( | 3154 | if (test_bit(STRIPE_PREREAD_ACTIVE, |
3111 | test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | 3155 | &sh->state)) { |
3112 | pr_debug("Read_old block " | 3156 | pr_debug("Read_old block %d for r-m-w\n", |
3113 | "%d for r-m-w\n", i); | 3157 | i); |
3114 | set_bit(R5_LOCKED, &dev->flags); | 3158 | set_bit(R5_LOCKED, &dev->flags); |
3115 | set_bit(R5_Wantread, &dev->flags); | 3159 | set_bit(R5_Wantread, &dev->flags); |
3116 | s->locked++; | 3160 | s->locked++; |
@@ -3133,10 +3177,9 @@ static void handle_stripe_dirtying(struct r5conf *conf, | |||
3133 | !(test_bit(R5_UPTODATE, &dev->flags) || | 3177 | !(test_bit(R5_UPTODATE, &dev->flags) || |
3134 | test_bit(R5_Wantcompute, &dev->flags))) { | 3178 | test_bit(R5_Wantcompute, &dev->flags))) { |
3135 | rcw++; | 3179 | rcw++; |
3136 | if (!test_bit(R5_Insync, &dev->flags)) | 3180 | if (test_bit(R5_Insync, &dev->flags) && |
3137 | continue; /* it's a failed drive */ | 3181 | test_bit(STRIPE_PREREAD_ACTIVE, |
3138 | if ( | 3182 | &sh->state)) { |
3139 | test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | ||
3140 | pr_debug("Read_old block " | 3183 | pr_debug("Read_old block " |
3141 | "%d for Reconstruct\n", i); | 3184 | "%d for Reconstruct\n", i); |
3142 | set_bit(R5_LOCKED, &dev->flags); | 3185 | set_bit(R5_LOCKED, &dev->flags); |
@@ -5031,8 +5074,8 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int | |||
5031 | bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded); | 5074 | bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded); |
5032 | 5075 | ||
5033 | set_bit(STRIPE_SYNC_REQUESTED, &sh->state); | 5076 | set_bit(STRIPE_SYNC_REQUESTED, &sh->state); |
5077 | set_bit(STRIPE_HANDLE, &sh->state); | ||
5034 | 5078 | ||
5035 | handle_stripe(sh); | ||
5036 | release_stripe(sh); | 5079 | release_stripe(sh); |
5037 | 5080 | ||
5038 | return STRIPE_SECTORS; | 5081 | return STRIPE_SECTORS; |
@@ -5072,7 +5115,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) | |||
5072 | /* already done this stripe */ | 5115 | /* already done this stripe */ |
5073 | continue; | 5116 | continue; |
5074 | 5117 | ||
5075 | sh = get_active_stripe(conf, sector, 0, 1, 0); | 5118 | sh = get_active_stripe(conf, sector, 0, 1, 1); |
5076 | 5119 | ||
5077 | if (!sh) { | 5120 | if (!sh) { |
5078 | /* failed to get a stripe - must wait */ | 5121 | /* failed to get a stripe - must wait */ |
@@ -5355,6 +5398,50 @@ raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold, | |||
5355 | raid5_store_preread_threshold); | 5398 | raid5_store_preread_threshold); |
5356 | 5399 | ||
5357 | static ssize_t | 5400 | static ssize_t |
5401 | raid5_show_skip_copy(struct mddev *mddev, char *page) | ||
5402 | { | ||
5403 | struct r5conf *conf = mddev->private; | ||
5404 | if (conf) | ||
5405 | return sprintf(page, "%d\n", conf->skip_copy); | ||
5406 | else | ||
5407 | return 0; | ||
5408 | } | ||
5409 | |||
5410 | static ssize_t | ||
5411 | raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len) | ||
5412 | { | ||
5413 | struct r5conf *conf = mddev->private; | ||
5414 | unsigned long new; | ||
5415 | if (len >= PAGE_SIZE) | ||
5416 | return -EINVAL; | ||
5417 | if (!conf) | ||
5418 | return -ENODEV; | ||
5419 | |||
5420 | if (kstrtoul(page, 10, &new)) | ||
5421 | return -EINVAL; | ||
5422 | new = !!new; | ||
5423 | if (new == conf->skip_copy) | ||
5424 | return len; | ||
5425 | |||
5426 | mddev_suspend(mddev); | ||
5427 | conf->skip_copy = new; | ||
5428 | if (new) | ||
5429 | mddev->queue->backing_dev_info.capabilities |= | ||
5430 | BDI_CAP_STABLE_WRITES; | ||
5431 | else | ||
5432 | mddev->queue->backing_dev_info.capabilities &= | ||
5433 | ~BDI_CAP_STABLE_WRITES; | ||
5434 | mddev_resume(mddev); | ||
5435 | return len; | ||
5436 | } | ||
5437 | |||
5438 | static struct md_sysfs_entry | ||
5439 | raid5_skip_copy = __ATTR(skip_copy, S_IRUGO | S_IWUSR, | ||
5440 | raid5_show_skip_copy, | ||
5441 | raid5_store_skip_copy); | ||
5442 | |||
5443 | |||
5444 | static ssize_t | ||
5358 | stripe_cache_active_show(struct mddev *mddev, char *page) | 5445 | stripe_cache_active_show(struct mddev *mddev, char *page) |
5359 | { | 5446 | { |
5360 | struct r5conf *conf = mddev->private; | 5447 | struct r5conf *conf = mddev->private; |
@@ -5439,6 +5526,7 @@ static struct attribute *raid5_attrs[] = { | |||
5439 | &raid5_stripecache_active.attr, | 5526 | &raid5_stripecache_active.attr, |
5440 | &raid5_preread_bypass_threshold.attr, | 5527 | &raid5_preread_bypass_threshold.attr, |
5441 | &raid5_group_thread_cnt.attr, | 5528 | &raid5_group_thread_cnt.attr, |
5529 | &raid5_skip_copy.attr, | ||
5442 | NULL, | 5530 | NULL, |
5443 | }; | 5531 | }; |
5444 | static struct attribute_group raid5_attrs_group = { | 5532 | static struct attribute_group raid5_attrs_group = { |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 01ad8ae8f578..bc72cd4be5f8 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -232,7 +232,7 @@ struct stripe_head { | |||
232 | */ | 232 | */ |
233 | struct bio req, rreq; | 233 | struct bio req, rreq; |
234 | struct bio_vec vec, rvec; | 234 | struct bio_vec vec, rvec; |
235 | struct page *page; | 235 | struct page *page, *orig_page; |
236 | struct bio *toread, *read, *towrite, *written; | 236 | struct bio *toread, *read, *towrite, *written; |
237 | sector_t sector; /* sector of this page */ | 237 | sector_t sector; /* sector of this page */ |
238 | unsigned long flags; | 238 | unsigned long flags; |
@@ -299,6 +299,7 @@ enum r5dev_flags { | |||
299 | * data in, and now is a good time to write it out. | 299 | * data in, and now is a good time to write it out. |
300 | */ | 300 | */ |
301 | R5_Discard, /* Discard the stripe */ | 301 | R5_Discard, /* Discard the stripe */ |
302 | R5_SkipCopy, /* Don't copy data from bio to stripe cache */ | ||
302 | }; | 303 | }; |
303 | 304 | ||
304 | /* | 305 | /* |
@@ -436,6 +437,7 @@ struct r5conf { | |||
436 | atomic_t pending_full_writes; /* full write backlog */ | 437 | atomic_t pending_full_writes; /* full write backlog */ |
437 | int bypass_count; /* bypassed prereads */ | 438 | int bypass_count; /* bypassed prereads */ |
438 | int bypass_threshold; /* preread nice */ | 439 | int bypass_threshold; /* preread nice */ |
440 | int skip_copy; /* Don't copy data from bio to stripe cache */ | ||
439 | struct list_head *last_hold; /* detect hold_list promotions */ | 441 | struct list_head *last_hold; /* detect hold_list promotions */ |
440 | 442 | ||
441 | atomic_t reshape_stripes; /* stripes with pending writes for reshape */ | 443 | atomic_t reshape_stripes; /* stripes with pending writes for reshape */ |