diff options
| -rw-r--r-- | drivers/md/bitmap.c | 6 | ||||
| -rw-r--r-- | drivers/md/md.c | 12 | ||||
| -rw-r--r-- | drivers/md/raid5.c | 158 | ||||
| -rw-r--r-- | drivers/md/raid5.h | 4 |
4 files changed, 138 insertions, 42 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 9a8e66ae04f5..67f8b31e2054 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
| @@ -669,17 +669,13 @@ static inline unsigned long file_page_offset(struct bitmap_storage *store, | |||
| 669 | /* | 669 | /* |
| 670 | * return a pointer to the page in the filemap that contains the given bit | 670 | * return a pointer to the page in the filemap that contains the given bit |
| 671 | * | 671 | * |
| 672 | * this lookup is complicated by the fact that the bitmap sb might be exactly | ||
| 673 | * 1 page (e.g., x86) or less than 1 page -- so the bitmap might start on page | ||
| 674 | * 0 or page 1 | ||
| 675 | */ | 672 | */ |
| 676 | static inline struct page *filemap_get_page(struct bitmap_storage *store, | 673 | static inline struct page *filemap_get_page(struct bitmap_storage *store, |
| 677 | unsigned long chunk) | 674 | unsigned long chunk) |
| 678 | { | 675 | { |
| 679 | if (file_page_index(store, chunk) >= store->file_pages) | 676 | if (file_page_index(store, chunk) >= store->file_pages) |
| 680 | return NULL; | 677 | return NULL; |
| 681 | return store->filemap[file_page_index(store, chunk) | 678 | return store->filemap[file_page_index(store, chunk)]; |
| 682 | - file_page_index(store, 0)]; | ||
| 683 | } | 679 | } |
| 684 | 680 | ||
| 685 | static int bitmap_storage_alloc(struct bitmap_storage *store, | 681 | static int bitmap_storage_alloc(struct bitmap_storage *store, |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 2382cfc9bb3f..34846856dbc6 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
| @@ -3448,6 +3448,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len) | |||
| 3448 | mddev->level = LEVEL_NONE; | 3448 | mddev->level = LEVEL_NONE; |
| 3449 | return rv; | 3449 | return rv; |
| 3450 | } | 3450 | } |
| 3451 | if (mddev->ro) | ||
| 3452 | return -EROFS; | ||
| 3451 | 3453 | ||
| 3452 | /* request to change the personality. Need to ensure: | 3454 | /* request to change the personality. Need to ensure: |
| 3453 | * - array is not engaged in resync/recovery/reshape | 3455 | * - array is not engaged in resync/recovery/reshape |
| @@ -3634,6 +3636,8 @@ layout_store(struct mddev *mddev, const char *buf, size_t len) | |||
| 3634 | int err; | 3636 | int err; |
| 3635 | if (mddev->pers->check_reshape == NULL) | 3637 | if (mddev->pers->check_reshape == NULL) |
| 3636 | return -EBUSY; | 3638 | return -EBUSY; |
| 3639 | if (mddev->ro) | ||
| 3640 | return -EROFS; | ||
| 3637 | mddev->new_layout = n; | 3641 | mddev->new_layout = n; |
| 3638 | err = mddev->pers->check_reshape(mddev); | 3642 | err = mddev->pers->check_reshape(mddev); |
| 3639 | if (err) { | 3643 | if (err) { |
| @@ -3723,6 +3727,8 @@ chunk_size_store(struct mddev *mddev, const char *buf, size_t len) | |||
| 3723 | int err; | 3727 | int err; |
| 3724 | if (mddev->pers->check_reshape == NULL) | 3728 | if (mddev->pers->check_reshape == NULL) |
| 3725 | return -EBUSY; | 3729 | return -EBUSY; |
| 3730 | if (mddev->ro) | ||
| 3731 | return -EROFS; | ||
| 3726 | mddev->new_chunk_sectors = n >> 9; | 3732 | mddev->new_chunk_sectors = n >> 9; |
| 3727 | err = mddev->pers->check_reshape(mddev); | 3733 | err = mddev->pers->check_reshape(mddev); |
| 3728 | if (err) { | 3734 | if (err) { |
| @@ -6135,6 +6141,8 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) | |||
| 6135 | */ | 6141 | */ |
| 6136 | if (mddev->sync_thread) | 6142 | if (mddev->sync_thread) |
| 6137 | return -EBUSY; | 6143 | return -EBUSY; |
| 6144 | if (mddev->ro) | ||
| 6145 | return -EROFS; | ||
| 6138 | 6146 | ||
| 6139 | rdev_for_each(rdev, mddev) { | 6147 | rdev_for_each(rdev, mddev) { |
| 6140 | sector_t avail = rdev->sectors; | 6148 | sector_t avail = rdev->sectors; |
| @@ -6157,6 +6165,8 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks) | |||
| 6157 | /* change the number of raid disks */ | 6165 | /* change the number of raid disks */ |
| 6158 | if (mddev->pers->check_reshape == NULL) | 6166 | if (mddev->pers->check_reshape == NULL) |
| 6159 | return -EINVAL; | 6167 | return -EINVAL; |
| 6168 | if (mddev->ro) | ||
| 6169 | return -EROFS; | ||
| 6160 | if (raid_disks <= 0 || | 6170 | if (raid_disks <= 0 || |
| 6161 | (mddev->max_disks && raid_disks >= mddev->max_disks)) | 6171 | (mddev->max_disks && raid_disks >= mddev->max_disks)) |
| 6162 | return -EINVAL; | 6172 | return -EINVAL; |
| @@ -8333,7 +8343,7 @@ static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors) | |||
| 8333 | if (a < s) { | 8343 | if (a < s) { |
| 8334 | /* we need to split this range */ | 8344 | /* we need to split this range */ |
| 8335 | if (bb->count >= MD_MAX_BADBLOCKS) { | 8345 | if (bb->count >= MD_MAX_BADBLOCKS) { |
| 8336 | rv = 0; | 8346 | rv = -ENOSPC; |
| 8337 | goto out; | 8347 | goto out; |
| 8338 | } | 8348 | } |
| 8339 | memmove(p+lo+1, p+lo, (bb->count - lo) * 8); | 8349 | memmove(p+lo+1, p+lo, (bb->count - lo) * 8); |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2afef4ec9312..6234b2e84587 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
| @@ -292,9 +292,12 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
| 292 | BUG_ON(atomic_read(&conf->active_stripes)==0); | 292 | BUG_ON(atomic_read(&conf->active_stripes)==0); |
| 293 | if (test_bit(STRIPE_HANDLE, &sh->state)) { | 293 | if (test_bit(STRIPE_HANDLE, &sh->state)) { |
| 294 | if (test_bit(STRIPE_DELAYED, &sh->state) && | 294 | if (test_bit(STRIPE_DELAYED, &sh->state) && |
| 295 | !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | 295 | !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { |
| 296 | list_add_tail(&sh->lru, &conf->delayed_list); | 296 | list_add_tail(&sh->lru, &conf->delayed_list); |
| 297 | else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | 297 | if (atomic_read(&conf->preread_active_stripes) |
| 298 | < IO_THRESHOLD) | ||
| 299 | md_wakeup_thread(conf->mddev->thread); | ||
| 300 | } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | ||
| 298 | sh->bm_seq - conf->seq_write > 0) | 301 | sh->bm_seq - conf->seq_write > 0) |
| 299 | list_add_tail(&sh->lru, &conf->bitmap_list); | 302 | list_add_tail(&sh->lru, &conf->bitmap_list); |
| 300 | else { | 303 | else { |
| @@ -413,6 +416,11 @@ static void release_stripe(struct stripe_head *sh) | |||
| 413 | int hash; | 416 | int hash; |
| 414 | bool wakeup; | 417 | bool wakeup; |
| 415 | 418 | ||
| 419 | /* Avoid release_list until the last reference. | ||
| 420 | */ | ||
| 421 | if (atomic_add_unless(&sh->count, -1, 1)) | ||
| 422 | return; | ||
| 423 | |||
| 416 | if (unlikely(!conf->mddev->thread) || | 424 | if (unlikely(!conf->mddev->thread) || |
| 417 | test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state)) | 425 | test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state)) |
| 418 | goto slow_path; | 426 | goto slow_path; |
| @@ -479,6 +487,7 @@ static void shrink_buffers(struct stripe_head *sh) | |||
| 479 | int num = sh->raid_conf->pool_size; | 487 | int num = sh->raid_conf->pool_size; |
| 480 | 488 | ||
| 481 | for (i = 0; i < num ; i++) { | 489 | for (i = 0; i < num ; i++) { |
| 490 | WARN_ON(sh->dev[i].page != sh->dev[i].orig_page); | ||
| 482 | p = sh->dev[i].page; | 491 | p = sh->dev[i].page; |
| 483 | if (!p) | 492 | if (!p) |
| 484 | continue; | 493 | continue; |
| @@ -499,6 +508,7 @@ static int grow_buffers(struct stripe_head *sh) | |||
| 499 | return 1; | 508 | return 1; |
| 500 | } | 509 | } |
| 501 | sh->dev[i].page = page; | 510 | sh->dev[i].page = page; |
| 511 | sh->dev[i].orig_page = page; | ||
| 502 | } | 512 | } |
| 503 | return 0; | 513 | return 0; |
| 504 | } | 514 | } |
| @@ -855,6 +865,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
| 855 | if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) | 865 | if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) |
| 856 | bi->bi_rw |= REQ_NOMERGE; | 866 | bi->bi_rw |= REQ_NOMERGE; |
| 857 | 867 | ||
| 868 | if (test_bit(R5_SkipCopy, &sh->dev[i].flags)) | ||
| 869 | WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); | ||
| 870 | sh->dev[i].vec.bv_page = sh->dev[i].page; | ||
| 858 | bi->bi_vcnt = 1; | 871 | bi->bi_vcnt = 1; |
| 859 | bi->bi_io_vec[0].bv_len = STRIPE_SIZE; | 872 | bi->bi_io_vec[0].bv_len = STRIPE_SIZE; |
| 860 | bi->bi_io_vec[0].bv_offset = 0; | 873 | bi->bi_io_vec[0].bv_offset = 0; |
| @@ -899,6 +912,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
| 899 | else | 912 | else |
| 900 | rbi->bi_iter.bi_sector = (sh->sector | 913 | rbi->bi_iter.bi_sector = (sh->sector |
| 901 | + rrdev->data_offset); | 914 | + rrdev->data_offset); |
| 915 | if (test_bit(R5_SkipCopy, &sh->dev[i].flags)) | ||
| 916 | WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); | ||
| 917 | sh->dev[i].rvec.bv_page = sh->dev[i].page; | ||
| 902 | rbi->bi_vcnt = 1; | 918 | rbi->bi_vcnt = 1; |
| 903 | rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; | 919 | rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; |
| 904 | rbi->bi_io_vec[0].bv_offset = 0; | 920 | rbi->bi_io_vec[0].bv_offset = 0; |
| @@ -927,8 +943,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
| 927 | } | 943 | } |
| 928 | 944 | ||
| 929 | static struct dma_async_tx_descriptor * | 945 | static struct dma_async_tx_descriptor * |
| 930 | async_copy_data(int frombio, struct bio *bio, struct page *page, | 946 | async_copy_data(int frombio, struct bio *bio, struct page **page, |
| 931 | sector_t sector, struct dma_async_tx_descriptor *tx) | 947 | sector_t sector, struct dma_async_tx_descriptor *tx, |
| 948 | struct stripe_head *sh) | ||
| 932 | { | 949 | { |
| 933 | struct bio_vec bvl; | 950 | struct bio_vec bvl; |
| 934 | struct bvec_iter iter; | 951 | struct bvec_iter iter; |
| @@ -965,11 +982,16 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, | |||
| 965 | if (clen > 0) { | 982 | if (clen > 0) { |
| 966 | b_offset += bvl.bv_offset; | 983 | b_offset += bvl.bv_offset; |
| 967 | bio_page = bvl.bv_page; | 984 | bio_page = bvl.bv_page; |
| 968 | if (frombio) | 985 | if (frombio) { |
| 969 | tx = async_memcpy(page, bio_page, page_offset, | 986 | if (sh->raid_conf->skip_copy && |
| 987 | b_offset == 0 && page_offset == 0 && | ||
| 988 | clen == STRIPE_SIZE) | ||
| 989 | *page = bio_page; | ||
| 990 | else | ||
| 991 | tx = async_memcpy(*page, bio_page, page_offset, | ||
| 970 | b_offset, clen, &submit); | 992 | b_offset, clen, &submit); |
| 971 | else | 993 | } else |
| 972 | tx = async_memcpy(bio_page, page, b_offset, | 994 | tx = async_memcpy(bio_page, *page, b_offset, |
| 973 | page_offset, clen, &submit); | 995 | page_offset, clen, &submit); |
| 974 | } | 996 | } |
| 975 | /* chain the operations */ | 997 | /* chain the operations */ |
| @@ -1045,8 +1067,8 @@ static void ops_run_biofill(struct stripe_head *sh) | |||
| 1045 | spin_unlock_irq(&sh->stripe_lock); | 1067 | spin_unlock_irq(&sh->stripe_lock); |
| 1046 | while (rbi && rbi->bi_iter.bi_sector < | 1068 | while (rbi && rbi->bi_iter.bi_sector < |
| 1047 | dev->sector + STRIPE_SECTORS) { | 1069 | dev->sector + STRIPE_SECTORS) { |
| 1048 | tx = async_copy_data(0, rbi, dev->page, | 1070 | tx = async_copy_data(0, rbi, &dev->page, |
| 1049 | dev->sector, tx); | 1071 | dev->sector, tx, sh); |
| 1050 | rbi = r5_next_bio(rbi, dev->sector); | 1072 | rbi = r5_next_bio(rbi, dev->sector); |
| 1051 | } | 1073 | } |
| 1052 | } | 1074 | } |
| @@ -1384,6 +1406,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
| 1384 | BUG_ON(dev->written); | 1406 | BUG_ON(dev->written); |
| 1385 | wbi = dev->written = chosen; | 1407 | wbi = dev->written = chosen; |
| 1386 | spin_unlock_irq(&sh->stripe_lock); | 1408 | spin_unlock_irq(&sh->stripe_lock); |
| 1409 | WARN_ON(dev->page != dev->orig_page); | ||
| 1387 | 1410 | ||
| 1388 | while (wbi && wbi->bi_iter.bi_sector < | 1411 | while (wbi && wbi->bi_iter.bi_sector < |
| 1389 | dev->sector + STRIPE_SECTORS) { | 1412 | dev->sector + STRIPE_SECTORS) { |
| @@ -1393,9 +1416,15 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
| 1393 | set_bit(R5_SyncIO, &dev->flags); | 1416 | set_bit(R5_SyncIO, &dev->flags); |
| 1394 | if (wbi->bi_rw & REQ_DISCARD) | 1417 | if (wbi->bi_rw & REQ_DISCARD) |
| 1395 | set_bit(R5_Discard, &dev->flags); | 1418 | set_bit(R5_Discard, &dev->flags); |
| 1396 | else | 1419 | else { |
| 1397 | tx = async_copy_data(1, wbi, dev->page, | 1420 | tx = async_copy_data(1, wbi, &dev->page, |
| 1398 | dev->sector, tx); | 1421 | dev->sector, tx, sh); |
| 1422 | if (dev->page != dev->orig_page) { | ||
| 1423 | set_bit(R5_SkipCopy, &dev->flags); | ||
| 1424 | clear_bit(R5_UPTODATE, &dev->flags); | ||
| 1425 | clear_bit(R5_OVERWRITE, &dev->flags); | ||
| 1426 | } | ||
| 1427 | } | ||
| 1399 | wbi = r5_next_bio(wbi, dev->sector); | 1428 | wbi = r5_next_bio(wbi, dev->sector); |
| 1400 | } | 1429 | } |
| 1401 | } | 1430 | } |
| @@ -1426,7 +1455,7 @@ static void ops_complete_reconstruct(void *stripe_head_ref) | |||
| 1426 | struct r5dev *dev = &sh->dev[i]; | 1455 | struct r5dev *dev = &sh->dev[i]; |
| 1427 | 1456 | ||
| 1428 | if (dev->written || i == pd_idx || i == qd_idx) { | 1457 | if (dev->written || i == pd_idx || i == qd_idx) { |
| 1429 | if (!discard) | 1458 | if (!discard && !test_bit(R5_SkipCopy, &dev->flags)) |
| 1430 | set_bit(R5_UPTODATE, &dev->flags); | 1459 | set_bit(R5_UPTODATE, &dev->flags); |
| 1431 | if (fua) | 1460 | if (fua) |
| 1432 | set_bit(R5_WantFUA, &dev->flags); | 1461 | set_bit(R5_WantFUA, &dev->flags); |
| @@ -1839,8 +1868,10 @@ static int resize_stripes(struct r5conf *conf, int newsize) | |||
| 1839 | osh = get_free_stripe(conf, hash); | 1868 | osh = get_free_stripe(conf, hash); |
| 1840 | unlock_device_hash_lock(conf, hash); | 1869 | unlock_device_hash_lock(conf, hash); |
| 1841 | atomic_set(&nsh->count, 1); | 1870 | atomic_set(&nsh->count, 1); |
| 1842 | for(i=0; i<conf->pool_size; i++) | 1871 | for(i=0; i<conf->pool_size; i++) { |
| 1843 | nsh->dev[i].page = osh->dev[i].page; | 1872 | nsh->dev[i].page = osh->dev[i].page; |
| 1873 | nsh->dev[i].orig_page = osh->dev[i].page; | ||
| 1874 | } | ||
| 1844 | for( ; i<newsize; i++) | 1875 | for( ; i<newsize; i++) |
| 1845 | nsh->dev[i].page = NULL; | 1876 | nsh->dev[i].page = NULL; |
| 1846 | nsh->hash_lock_index = hash; | 1877 | nsh->hash_lock_index = hash; |
| @@ -1896,6 +1927,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) | |||
| 1896 | if (nsh->dev[i].page == NULL) { | 1927 | if (nsh->dev[i].page == NULL) { |
| 1897 | struct page *p = alloc_page(GFP_NOIO); | 1928 | struct page *p = alloc_page(GFP_NOIO); |
| 1898 | nsh->dev[i].page = p; | 1929 | nsh->dev[i].page = p; |
| 1930 | nsh->dev[i].orig_page = p; | ||
| 1899 | if (!p) | 1931 | if (!p) |
| 1900 | err = -ENOMEM; | 1932 | err = -ENOMEM; |
| 1901 | } | 1933 | } |
| @@ -2133,24 +2165,20 @@ static void raid5_end_write_request(struct bio *bi, int error) | |||
| 2133 | } | 2165 | } |
| 2134 | 2166 | ||
| 2135 | static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous); | 2167 | static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous); |
| 2136 | 2168 | ||
| 2137 | static void raid5_build_block(struct stripe_head *sh, int i, int previous) | 2169 | static void raid5_build_block(struct stripe_head *sh, int i, int previous) |
| 2138 | { | 2170 | { |
| 2139 | struct r5dev *dev = &sh->dev[i]; | 2171 | struct r5dev *dev = &sh->dev[i]; |
| 2140 | 2172 | ||
| 2141 | bio_init(&dev->req); | 2173 | bio_init(&dev->req); |
| 2142 | dev->req.bi_io_vec = &dev->vec; | 2174 | dev->req.bi_io_vec = &dev->vec; |
| 2143 | dev->req.bi_vcnt++; | 2175 | dev->req.bi_max_vecs = 1; |
| 2144 | dev->req.bi_max_vecs++; | ||
| 2145 | dev->req.bi_private = sh; | 2176 | dev->req.bi_private = sh; |
| 2146 | dev->vec.bv_page = dev->page; | ||
| 2147 | 2177 | ||
| 2148 | bio_init(&dev->rreq); | 2178 | bio_init(&dev->rreq); |
| 2149 | dev->rreq.bi_io_vec = &dev->rvec; | 2179 | dev->rreq.bi_io_vec = &dev->rvec; |
| 2150 | dev->rreq.bi_vcnt++; | 2180 | dev->rreq.bi_max_vecs = 1; |
| 2151 | dev->rreq.bi_max_vecs++; | ||
| 2152 | dev->rreq.bi_private = sh; | 2181 | dev->rreq.bi_private = sh; |
| 2153 | dev->rvec.bv_page = dev->page; | ||
| 2154 | 2182 | ||
| 2155 | dev->flags = 0; | 2183 | dev->flags = 0; |
| 2156 | dev->sector = compute_blocknr(sh, i, previous); | 2184 | dev->sector = compute_blocknr(sh, i, previous); |
| @@ -2750,6 +2778,11 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
| 2750 | /* and fail all 'written' */ | 2778 | /* and fail all 'written' */ |
| 2751 | bi = sh->dev[i].written; | 2779 | bi = sh->dev[i].written; |
| 2752 | sh->dev[i].written = NULL; | 2780 | sh->dev[i].written = NULL; |
| 2781 | if (test_and_clear_bit(R5_SkipCopy, &sh->dev[i].flags)) { | ||
| 2782 | WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); | ||
| 2783 | sh->dev[i].page = sh->dev[i].orig_page; | ||
| 2784 | } | ||
| 2785 | |||
| 2753 | if (bi) bitmap_end = 1; | 2786 | if (bi) bitmap_end = 1; |
| 2754 | while (bi && bi->bi_iter.bi_sector < | 2787 | while (bi && bi->bi_iter.bi_sector < |
| 2755 | sh->dev[i].sector + STRIPE_SECTORS) { | 2788 | sh->dev[i].sector + STRIPE_SECTORS) { |
| @@ -2886,8 +2919,11 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s, | |||
| 2886 | (s->failed >= 1 && fdev[0]->toread) || | 2919 | (s->failed >= 1 && fdev[0]->toread) || |
| 2887 | (s->failed >= 2 && fdev[1]->toread) || | 2920 | (s->failed >= 2 && fdev[1]->toread) || |
| 2888 | (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite && | 2921 | (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite && |
| 2922 | (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) && | ||
| 2889 | !test_bit(R5_OVERWRITE, &fdev[0]->flags)) || | 2923 | !test_bit(R5_OVERWRITE, &fdev[0]->flags)) || |
| 2890 | (sh->raid_conf->level == 6 && s->failed && s->to_write))) { | 2924 | (sh->raid_conf->level == 6 && s->failed && s->to_write && |
| 2925 | s->to_write < sh->raid_conf->raid_disks - 2 && | ||
| 2926 | (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))))) { | ||
| 2891 | /* we would like to get this block, possibly by computing it, | 2927 | /* we would like to get this block, possibly by computing it, |
| 2892 | * otherwise read it if the backing disk is insync | 2928 | * otherwise read it if the backing disk is insync |
| 2893 | */ | 2929 | */ |
| @@ -2991,12 +3027,17 @@ static void handle_stripe_clean_event(struct r5conf *conf, | |||
| 2991 | dev = &sh->dev[i]; | 3027 | dev = &sh->dev[i]; |
| 2992 | if (!test_bit(R5_LOCKED, &dev->flags) && | 3028 | if (!test_bit(R5_LOCKED, &dev->flags) && |
| 2993 | (test_bit(R5_UPTODATE, &dev->flags) || | 3029 | (test_bit(R5_UPTODATE, &dev->flags) || |
| 2994 | test_bit(R5_Discard, &dev->flags))) { | 3030 | test_bit(R5_Discard, &dev->flags) || |
| 3031 | test_bit(R5_SkipCopy, &dev->flags))) { | ||
| 2995 | /* We can return any write requests */ | 3032 | /* We can return any write requests */ |
| 2996 | struct bio *wbi, *wbi2; | 3033 | struct bio *wbi, *wbi2; |
| 2997 | pr_debug("Return write for disc %d\n", i); | 3034 | pr_debug("Return write for disc %d\n", i); |
| 2998 | if (test_and_clear_bit(R5_Discard, &dev->flags)) | 3035 | if (test_and_clear_bit(R5_Discard, &dev->flags)) |
| 2999 | clear_bit(R5_UPTODATE, &dev->flags); | 3036 | clear_bit(R5_UPTODATE, &dev->flags); |
| 3037 | if (test_and_clear_bit(R5_SkipCopy, &dev->flags)) { | ||
| 3038 | WARN_ON(test_bit(R5_UPTODATE, &dev->flags)); | ||
| 3039 | dev->page = dev->orig_page; | ||
| 3040 | } | ||
| 3000 | wbi = dev->written; | 3041 | wbi = dev->written; |
| 3001 | dev->written = NULL; | 3042 | dev->written = NULL; |
| 3002 | while (wbi && wbi->bi_iter.bi_sector < | 3043 | while (wbi && wbi->bi_iter.bi_sector < |
| @@ -3015,6 +3056,8 @@ static void handle_stripe_clean_event(struct r5conf *conf, | |||
| 3015 | 0); | 3056 | 0); |
| 3016 | } else if (test_bit(R5_Discard, &dev->flags)) | 3057 | } else if (test_bit(R5_Discard, &dev->flags)) |
| 3017 | discard_pending = 1; | 3058 | discard_pending = 1; |
| 3059 | WARN_ON(test_bit(R5_SkipCopy, &dev->flags)); | ||
| 3060 | WARN_ON(dev->page != dev->orig_page); | ||
| 3018 | } | 3061 | } |
| 3019 | if (!discard_pending && | 3062 | if (!discard_pending && |
| 3020 | test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { | 3063 | test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { |
| @@ -3086,7 +3129,8 @@ static void handle_stripe_dirtying(struct r5conf *conf, | |||
| 3086 | !test_bit(R5_LOCKED, &dev->flags) && | 3129 | !test_bit(R5_LOCKED, &dev->flags) && |
| 3087 | !(test_bit(R5_UPTODATE, &dev->flags) || | 3130 | !(test_bit(R5_UPTODATE, &dev->flags) || |
| 3088 | test_bit(R5_Wantcompute, &dev->flags))) { | 3131 | test_bit(R5_Wantcompute, &dev->flags))) { |
| 3089 | if (test_bit(R5_Insync, &dev->flags)) rcw++; | 3132 | if (test_bit(R5_Insync, &dev->flags)) |
| 3133 | rcw++; | ||
| 3090 | else | 3134 | else |
| 3091 | rcw += 2*disks; | 3135 | rcw += 2*disks; |
| 3092 | } | 3136 | } |
| @@ -3107,10 +3151,10 @@ static void handle_stripe_dirtying(struct r5conf *conf, | |||
| 3107 | !(test_bit(R5_UPTODATE, &dev->flags) || | 3151 | !(test_bit(R5_UPTODATE, &dev->flags) || |
| 3108 | test_bit(R5_Wantcompute, &dev->flags)) && | 3152 | test_bit(R5_Wantcompute, &dev->flags)) && |
| 3109 | test_bit(R5_Insync, &dev->flags)) { | 3153 | test_bit(R5_Insync, &dev->flags)) { |
| 3110 | if ( | 3154 | if (test_bit(STRIPE_PREREAD_ACTIVE, |
| 3111 | test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | 3155 | &sh->state)) { |
| 3112 | pr_debug("Read_old block " | 3156 | pr_debug("Read_old block %d for r-m-w\n", |
| 3113 | "%d for r-m-w\n", i); | 3157 | i); |
| 3114 | set_bit(R5_LOCKED, &dev->flags); | 3158 | set_bit(R5_LOCKED, &dev->flags); |
| 3115 | set_bit(R5_Wantread, &dev->flags); | 3159 | set_bit(R5_Wantread, &dev->flags); |
| 3116 | s->locked++; | 3160 | s->locked++; |
| @@ -3133,10 +3177,9 @@ static void handle_stripe_dirtying(struct r5conf *conf, | |||
| 3133 | !(test_bit(R5_UPTODATE, &dev->flags) || | 3177 | !(test_bit(R5_UPTODATE, &dev->flags) || |
| 3134 | test_bit(R5_Wantcompute, &dev->flags))) { | 3178 | test_bit(R5_Wantcompute, &dev->flags))) { |
| 3135 | rcw++; | 3179 | rcw++; |
| 3136 | if (!test_bit(R5_Insync, &dev->flags)) | 3180 | if (test_bit(R5_Insync, &dev->flags) && |
| 3137 | continue; /* it's a failed drive */ | 3181 | test_bit(STRIPE_PREREAD_ACTIVE, |
| 3138 | if ( | 3182 | &sh->state)) { |
| 3139 | test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | ||
| 3140 | pr_debug("Read_old block " | 3183 | pr_debug("Read_old block " |
| 3141 | "%d for Reconstruct\n", i); | 3184 | "%d for Reconstruct\n", i); |
| 3142 | set_bit(R5_LOCKED, &dev->flags); | 3185 | set_bit(R5_LOCKED, &dev->flags); |
| @@ -5031,8 +5074,8 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int | |||
| 5031 | bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded); | 5074 | bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded); |
| 5032 | 5075 | ||
| 5033 | set_bit(STRIPE_SYNC_REQUESTED, &sh->state); | 5076 | set_bit(STRIPE_SYNC_REQUESTED, &sh->state); |
| 5077 | set_bit(STRIPE_HANDLE, &sh->state); | ||
| 5034 | 5078 | ||
| 5035 | handle_stripe(sh); | ||
| 5036 | release_stripe(sh); | 5079 | release_stripe(sh); |
| 5037 | 5080 | ||
| 5038 | return STRIPE_SECTORS; | 5081 | return STRIPE_SECTORS; |
| @@ -5072,7 +5115,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) | |||
| 5072 | /* already done this stripe */ | 5115 | /* already done this stripe */ |
| 5073 | continue; | 5116 | continue; |
| 5074 | 5117 | ||
| 5075 | sh = get_active_stripe(conf, sector, 0, 1, 0); | 5118 | sh = get_active_stripe(conf, sector, 0, 1, 1); |
| 5076 | 5119 | ||
| 5077 | if (!sh) { | 5120 | if (!sh) { |
| 5078 | /* failed to get a stripe - must wait */ | 5121 | /* failed to get a stripe - must wait */ |
| @@ -5355,6 +5398,50 @@ raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold, | |||
| 5355 | raid5_store_preread_threshold); | 5398 | raid5_store_preread_threshold); |
| 5356 | 5399 | ||
| 5357 | static ssize_t | 5400 | static ssize_t |
| 5401 | raid5_show_skip_copy(struct mddev *mddev, char *page) | ||
| 5402 | { | ||
| 5403 | struct r5conf *conf = mddev->private; | ||
| 5404 | if (conf) | ||
| 5405 | return sprintf(page, "%d\n", conf->skip_copy); | ||
| 5406 | else | ||
| 5407 | return 0; | ||
| 5408 | } | ||
| 5409 | |||
| 5410 | static ssize_t | ||
| 5411 | raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len) | ||
| 5412 | { | ||
| 5413 | struct r5conf *conf = mddev->private; | ||
| 5414 | unsigned long new; | ||
| 5415 | if (len >= PAGE_SIZE) | ||
| 5416 | return -EINVAL; | ||
| 5417 | if (!conf) | ||
| 5418 | return -ENODEV; | ||
| 5419 | |||
| 5420 | if (kstrtoul(page, 10, &new)) | ||
| 5421 | return -EINVAL; | ||
| 5422 | new = !!new; | ||
| 5423 | if (new == conf->skip_copy) | ||
| 5424 | return len; | ||
| 5425 | |||
| 5426 | mddev_suspend(mddev); | ||
| 5427 | conf->skip_copy = new; | ||
| 5428 | if (new) | ||
| 5429 | mddev->queue->backing_dev_info.capabilities |= | ||
| 5430 | BDI_CAP_STABLE_WRITES; | ||
| 5431 | else | ||
| 5432 | mddev->queue->backing_dev_info.capabilities &= | ||
| 5433 | ~BDI_CAP_STABLE_WRITES; | ||
| 5434 | mddev_resume(mddev); | ||
| 5435 | return len; | ||
| 5436 | } | ||
| 5437 | |||
| 5438 | static struct md_sysfs_entry | ||
| 5439 | raid5_skip_copy = __ATTR(skip_copy, S_IRUGO | S_IWUSR, | ||
| 5440 | raid5_show_skip_copy, | ||
| 5441 | raid5_store_skip_copy); | ||
| 5442 | |||
| 5443 | |||
| 5444 | static ssize_t | ||
| 5358 | stripe_cache_active_show(struct mddev *mddev, char *page) | 5445 | stripe_cache_active_show(struct mddev *mddev, char *page) |
| 5359 | { | 5446 | { |
| 5360 | struct r5conf *conf = mddev->private; | 5447 | struct r5conf *conf = mddev->private; |
| @@ -5439,6 +5526,7 @@ static struct attribute *raid5_attrs[] = { | |||
| 5439 | &raid5_stripecache_active.attr, | 5526 | &raid5_stripecache_active.attr, |
| 5440 | &raid5_preread_bypass_threshold.attr, | 5527 | &raid5_preread_bypass_threshold.attr, |
| 5441 | &raid5_group_thread_cnt.attr, | 5528 | &raid5_group_thread_cnt.attr, |
| 5529 | &raid5_skip_copy.attr, | ||
| 5442 | NULL, | 5530 | NULL, |
| 5443 | }; | 5531 | }; |
| 5444 | static struct attribute_group raid5_attrs_group = { | 5532 | static struct attribute_group raid5_attrs_group = { |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 01ad8ae8f578..bc72cd4be5f8 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
| @@ -232,7 +232,7 @@ struct stripe_head { | |||
| 232 | */ | 232 | */ |
| 233 | struct bio req, rreq; | 233 | struct bio req, rreq; |
| 234 | struct bio_vec vec, rvec; | 234 | struct bio_vec vec, rvec; |
| 235 | struct page *page; | 235 | struct page *page, *orig_page; |
| 236 | struct bio *toread, *read, *towrite, *written; | 236 | struct bio *toread, *read, *towrite, *written; |
| 237 | sector_t sector; /* sector of this page */ | 237 | sector_t sector; /* sector of this page */ |
| 238 | unsigned long flags; | 238 | unsigned long flags; |
| @@ -299,6 +299,7 @@ enum r5dev_flags { | |||
| 299 | * data in, and now is a good time to write it out. | 299 | * data in, and now is a good time to write it out. |
| 300 | */ | 300 | */ |
| 301 | R5_Discard, /* Discard the stripe */ | 301 | R5_Discard, /* Discard the stripe */ |
| 302 | R5_SkipCopy, /* Don't copy data from bio to stripe cache */ | ||
| 302 | }; | 303 | }; |
| 303 | 304 | ||
| 304 | /* | 305 | /* |
| @@ -436,6 +437,7 @@ struct r5conf { | |||
| 436 | atomic_t pending_full_writes; /* full write backlog */ | 437 | atomic_t pending_full_writes; /* full write backlog */ |
| 437 | int bypass_count; /* bypassed prereads */ | 438 | int bypass_count; /* bypassed prereads */ |
| 438 | int bypass_threshold; /* preread nice */ | 439 | int bypass_threshold; /* preread nice */ |
| 440 | int skip_copy; /* Don't copy data from bio to stripe cache */ | ||
| 439 | struct list_head *last_hold; /* detect hold_list promotions */ | 441 | struct list_head *last_hold; /* detect hold_list promotions */ |
| 440 | 442 | ||
| 441 | atomic_t reshape_stripes; /* stripes with pending writes for reshape */ | 443 | atomic_t reshape_stripes; /* stripes with pending writes for reshape */ |
