diff options
| -rw-r--r-- | drivers/md/raid5.c | 119 | ||||
| -rw-r--r-- | drivers/md/raid5.h | 4 |
2 files changed, 101 insertions, 22 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 348a857ab0ff..d69fd9888c2c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
| @@ -487,6 +487,7 @@ static void shrink_buffers(struct stripe_head *sh) | |||
| 487 | int num = sh->raid_conf->pool_size; | 487 | int num = sh->raid_conf->pool_size; |
| 488 | 488 | ||
| 489 | for (i = 0; i < num ; i++) { | 489 | for (i = 0; i < num ; i++) { |
| 490 | WARN_ON(sh->dev[i].page != sh->dev[i].orig_page); | ||
| 490 | p = sh->dev[i].page; | 491 | p = sh->dev[i].page; |
| 491 | if (!p) | 492 | if (!p) |
| 492 | continue; | 493 | continue; |
| @@ -507,6 +508,7 @@ static int grow_buffers(struct stripe_head *sh) | |||
| 507 | return 1; | 508 | return 1; |
| 508 | } | 509 | } |
| 509 | sh->dev[i].page = page; | 510 | sh->dev[i].page = page; |
| 511 | sh->dev[i].orig_page = page; | ||
| 510 | } | 512 | } |
| 511 | return 0; | 513 | return 0; |
| 512 | } | 514 | } |
| @@ -863,6 +865,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
| 863 | if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) | 865 | if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) |
| 864 | bi->bi_rw |= REQ_NOMERGE; | 866 | bi->bi_rw |= REQ_NOMERGE; |
| 865 | 867 | ||
| 868 | if (test_bit(R5_SkipCopy, &sh->dev[i].flags)) | ||
| 869 | WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); | ||
| 870 | sh->dev[i].vec.bv_page = sh->dev[i].page; | ||
| 866 | bi->bi_vcnt = 1; | 871 | bi->bi_vcnt = 1; |
| 867 | bi->bi_io_vec[0].bv_len = STRIPE_SIZE; | 872 | bi->bi_io_vec[0].bv_len = STRIPE_SIZE; |
| 868 | bi->bi_io_vec[0].bv_offset = 0; | 873 | bi->bi_io_vec[0].bv_offset = 0; |
| @@ -907,6 +912,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
| 907 | else | 912 | else |
| 908 | rbi->bi_iter.bi_sector = (sh->sector | 913 | rbi->bi_iter.bi_sector = (sh->sector |
| 909 | + rrdev->data_offset); | 914 | + rrdev->data_offset); |
| 915 | if (test_bit(R5_SkipCopy, &sh->dev[i].flags)) | ||
| 916 | WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); | ||
| 917 | sh->dev[i].rvec.bv_page = sh->dev[i].page; | ||
| 910 | rbi->bi_vcnt = 1; | 918 | rbi->bi_vcnt = 1; |
| 911 | rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; | 919 | rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; |
| 912 | rbi->bi_io_vec[0].bv_offset = 0; | 920 | rbi->bi_io_vec[0].bv_offset = 0; |
| @@ -935,8 +943,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
| 935 | } | 943 | } |
| 936 | 944 | ||
| 937 | static struct dma_async_tx_descriptor * | 945 | static struct dma_async_tx_descriptor * |
| 938 | async_copy_data(int frombio, struct bio *bio, struct page *page, | 946 | async_copy_data(int frombio, struct bio *bio, struct page **page, |
| 939 | sector_t sector, struct dma_async_tx_descriptor *tx) | 947 | sector_t sector, struct dma_async_tx_descriptor *tx, |
| 948 | struct stripe_head *sh) | ||
| 940 | { | 949 | { |
| 941 | struct bio_vec bvl; | 950 | struct bio_vec bvl; |
| 942 | struct bvec_iter iter; | 951 | struct bvec_iter iter; |
| @@ -973,11 +982,16 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, | |||
| 973 | if (clen > 0) { | 982 | if (clen > 0) { |
| 974 | b_offset += bvl.bv_offset; | 983 | b_offset += bvl.bv_offset; |
| 975 | bio_page = bvl.bv_page; | 984 | bio_page = bvl.bv_page; |
| 976 | if (frombio) | 985 | if (frombio) { |
| 977 | tx = async_memcpy(page, bio_page, page_offset, | 986 | if (sh->raid_conf->skip_copy && |
| 987 | b_offset == 0 && page_offset == 0 && | ||
| 988 | clen == STRIPE_SIZE) | ||
| 989 | *page = bio_page; | ||
| 990 | else | ||
| 991 | tx = async_memcpy(*page, bio_page, page_offset, | ||
| 978 | b_offset, clen, &submit); | 992 | b_offset, clen, &submit); |
| 979 | else | 993 | } else |
| 980 | tx = async_memcpy(bio_page, page, b_offset, | 994 | tx = async_memcpy(bio_page, *page, b_offset, |
| 981 | page_offset, clen, &submit); | 995 | page_offset, clen, &submit); |
| 982 | } | 996 | } |
| 983 | /* chain the operations */ | 997 | /* chain the operations */ |
| @@ -1053,8 +1067,8 @@ static void ops_run_biofill(struct stripe_head *sh) | |||
| 1053 | spin_unlock_irq(&sh->stripe_lock); | 1067 | spin_unlock_irq(&sh->stripe_lock); |
| 1054 | while (rbi && rbi->bi_iter.bi_sector < | 1068 | while (rbi && rbi->bi_iter.bi_sector < |
| 1055 | dev->sector + STRIPE_SECTORS) { | 1069 | dev->sector + STRIPE_SECTORS) { |
| 1056 | tx = async_copy_data(0, rbi, dev->page, | 1070 | tx = async_copy_data(0, rbi, &dev->page, |
| 1057 | dev->sector, tx); | 1071 | dev->sector, tx, sh); |
| 1058 | rbi = r5_next_bio(rbi, dev->sector); | 1072 | rbi = r5_next_bio(rbi, dev->sector); |
| 1059 | } | 1073 | } |
| 1060 | } | 1074 | } |
| @@ -1392,6 +1406,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
| 1392 | BUG_ON(dev->written); | 1406 | BUG_ON(dev->written); |
| 1393 | wbi = dev->written = chosen; | 1407 | wbi = dev->written = chosen; |
| 1394 | spin_unlock_irq(&sh->stripe_lock); | 1408 | spin_unlock_irq(&sh->stripe_lock); |
| 1409 | WARN_ON(dev->page != dev->orig_page); | ||
| 1395 | 1410 | ||
| 1396 | while (wbi && wbi->bi_iter.bi_sector < | 1411 | while (wbi && wbi->bi_iter.bi_sector < |
| 1397 | dev->sector + STRIPE_SECTORS) { | 1412 | dev->sector + STRIPE_SECTORS) { |
| @@ -1401,9 +1416,15 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
| 1401 | set_bit(R5_SyncIO, &dev->flags); | 1416 | set_bit(R5_SyncIO, &dev->flags); |
| 1402 | if (wbi->bi_rw & REQ_DISCARD) | 1417 | if (wbi->bi_rw & REQ_DISCARD) |
| 1403 | set_bit(R5_Discard, &dev->flags); | 1418 | set_bit(R5_Discard, &dev->flags); |
| 1404 | else | 1419 | else { |
| 1405 | tx = async_copy_data(1, wbi, dev->page, | 1420 | tx = async_copy_data(1, wbi, &dev->page, |
| 1406 | dev->sector, tx); | 1421 | dev->sector, tx, sh); |
| 1422 | if (dev->page != dev->orig_page) { | ||
| 1423 | set_bit(R5_SkipCopy, &dev->flags); | ||
| 1424 | clear_bit(R5_UPTODATE, &dev->flags); | ||
| 1425 | clear_bit(R5_OVERWRITE, &dev->flags); | ||
| 1426 | } | ||
| 1427 | } | ||
| 1407 | wbi = r5_next_bio(wbi, dev->sector); | 1428 | wbi = r5_next_bio(wbi, dev->sector); |
| 1408 | } | 1429 | } |
| 1409 | } | 1430 | } |
| @@ -1434,7 +1455,7 @@ static void ops_complete_reconstruct(void *stripe_head_ref) | |||
| 1434 | struct r5dev *dev = &sh->dev[i]; | 1455 | struct r5dev *dev = &sh->dev[i]; |
| 1435 | 1456 | ||
| 1436 | if (dev->written || i == pd_idx || i == qd_idx) { | 1457 | if (dev->written || i == pd_idx || i == qd_idx) { |
| 1437 | if (!discard) | 1458 | if (!discard && !test_bit(R5_SkipCopy, &dev->flags)) |
| 1438 | set_bit(R5_UPTODATE, &dev->flags); | 1459 | set_bit(R5_UPTODATE, &dev->flags); |
| 1439 | if (fua) | 1460 | if (fua) |
| 1440 | set_bit(R5_WantFUA, &dev->flags); | 1461 | set_bit(R5_WantFUA, &dev->flags); |
| @@ -1847,8 +1868,10 @@ static int resize_stripes(struct r5conf *conf, int newsize) | |||
| 1847 | osh = get_free_stripe(conf, hash); | 1868 | osh = get_free_stripe(conf, hash); |
| 1848 | unlock_device_hash_lock(conf, hash); | 1869 | unlock_device_hash_lock(conf, hash); |
| 1849 | atomic_set(&nsh->count, 1); | 1870 | atomic_set(&nsh->count, 1); |
| 1850 | for(i=0; i<conf->pool_size; i++) | 1871 | for(i=0; i<conf->pool_size; i++) { |
| 1851 | nsh->dev[i].page = osh->dev[i].page; | 1872 | nsh->dev[i].page = osh->dev[i].page; |
| 1873 | nsh->dev[i].orig_page = osh->dev[i].page; | ||
| 1874 | } | ||
| 1852 | for( ; i<newsize; i++) | 1875 | for( ; i<newsize; i++) |
| 1853 | nsh->dev[i].page = NULL; | 1876 | nsh->dev[i].page = NULL; |
| 1854 | nsh->hash_lock_index = hash; | 1877 | nsh->hash_lock_index = hash; |
| @@ -1904,6 +1927,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) | |||
| 1904 | if (nsh->dev[i].page == NULL) { | 1927 | if (nsh->dev[i].page == NULL) { |
| 1905 | struct page *p = alloc_page(GFP_NOIO); | 1928 | struct page *p = alloc_page(GFP_NOIO); |
| 1906 | nsh->dev[i].page = p; | 1929 | nsh->dev[i].page = p; |
| 1930 | nsh->dev[i].orig_page = p; | ||
| 1907 | if (!p) | 1931 | if (!p) |
| 1908 | err = -ENOMEM; | 1932 | err = -ENOMEM; |
| 1909 | } | 1933 | } |
| @@ -2141,24 +2165,20 @@ static void raid5_end_write_request(struct bio *bi, int error) | |||
| 2141 | } | 2165 | } |
| 2142 | 2166 | ||
| 2143 | static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous); | 2167 | static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous); |
| 2144 | 2168 | ||
| 2145 | static void raid5_build_block(struct stripe_head *sh, int i, int previous) | 2169 | static void raid5_build_block(struct stripe_head *sh, int i, int previous) |
| 2146 | { | 2170 | { |
| 2147 | struct r5dev *dev = &sh->dev[i]; | 2171 | struct r5dev *dev = &sh->dev[i]; |
| 2148 | 2172 | ||
| 2149 | bio_init(&dev->req); | 2173 | bio_init(&dev->req); |
| 2150 | dev->req.bi_io_vec = &dev->vec; | 2174 | dev->req.bi_io_vec = &dev->vec; |
| 2151 | dev->req.bi_vcnt++; | 2175 | dev->req.bi_max_vecs = 1; |
| 2152 | dev->req.bi_max_vecs++; | ||
| 2153 | dev->req.bi_private = sh; | 2176 | dev->req.bi_private = sh; |
| 2154 | dev->vec.bv_page = dev->page; | ||
| 2155 | 2177 | ||
| 2156 | bio_init(&dev->rreq); | 2178 | bio_init(&dev->rreq); |
| 2157 | dev->rreq.bi_io_vec = &dev->rvec; | 2179 | dev->rreq.bi_io_vec = &dev->rvec; |
| 2158 | dev->rreq.bi_vcnt++; | 2180 | dev->rreq.bi_max_vecs = 1; |
| 2159 | dev->rreq.bi_max_vecs++; | ||
| 2160 | dev->rreq.bi_private = sh; | 2181 | dev->rreq.bi_private = sh; |
| 2161 | dev->rvec.bv_page = dev->page; | ||
| 2162 | 2182 | ||
| 2163 | dev->flags = 0; | 2183 | dev->flags = 0; |
| 2164 | dev->sector = compute_blocknr(sh, i, previous); | 2184 | dev->sector = compute_blocknr(sh, i, previous); |
| @@ -2758,6 +2778,11 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
| 2758 | /* and fail all 'written' */ | 2778 | /* and fail all 'written' */ |
| 2759 | bi = sh->dev[i].written; | 2779 | bi = sh->dev[i].written; |
| 2760 | sh->dev[i].written = NULL; | 2780 | sh->dev[i].written = NULL; |
| 2781 | if (test_and_clear_bit(R5_SkipCopy, &sh->dev[i].flags)) { | ||
| 2782 | WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); | ||
| 2783 | sh->dev[i].page = sh->dev[i].orig_page; | ||
| 2784 | } | ||
| 2785 | |||
| 2761 | if (bi) bitmap_end = 1; | 2786 | if (bi) bitmap_end = 1; |
| 2762 | while (bi && bi->bi_iter.bi_sector < | 2787 | while (bi && bi->bi_iter.bi_sector < |
| 2763 | sh->dev[i].sector + STRIPE_SECTORS) { | 2788 | sh->dev[i].sector + STRIPE_SECTORS) { |
| @@ -3002,12 +3027,17 @@ static void handle_stripe_clean_event(struct r5conf *conf, | |||
| 3002 | dev = &sh->dev[i]; | 3027 | dev = &sh->dev[i]; |
| 3003 | if (!test_bit(R5_LOCKED, &dev->flags) && | 3028 | if (!test_bit(R5_LOCKED, &dev->flags) && |
| 3004 | (test_bit(R5_UPTODATE, &dev->flags) || | 3029 | (test_bit(R5_UPTODATE, &dev->flags) || |
| 3005 | test_bit(R5_Discard, &dev->flags))) { | 3030 | test_bit(R5_Discard, &dev->flags) || |
| 3031 | test_bit(R5_SkipCopy, &dev->flags))) { | ||
| 3006 | /* We can return any write requests */ | 3032 | /* We can return any write requests */ |
| 3007 | struct bio *wbi, *wbi2; | 3033 | struct bio *wbi, *wbi2; |
| 3008 | pr_debug("Return write for disc %d\n", i); | 3034 | pr_debug("Return write for disc %d\n", i); |
| 3009 | if (test_and_clear_bit(R5_Discard, &dev->flags)) | 3035 | if (test_and_clear_bit(R5_Discard, &dev->flags)) |
| 3010 | clear_bit(R5_UPTODATE, &dev->flags); | 3036 | clear_bit(R5_UPTODATE, &dev->flags); |
| 3037 | if (test_and_clear_bit(R5_SkipCopy, &dev->flags)) { | ||
| 3038 | WARN_ON(test_bit(R5_UPTODATE, &dev->flags)); | ||
| 3039 | dev->page = dev->orig_page; | ||
| 3040 | } | ||
| 3011 | wbi = dev->written; | 3041 | wbi = dev->written; |
| 3012 | dev->written = NULL; | 3042 | dev->written = NULL; |
| 3013 | while (wbi && wbi->bi_iter.bi_sector < | 3043 | while (wbi && wbi->bi_iter.bi_sector < |
| @@ -3026,6 +3056,8 @@ static void handle_stripe_clean_event(struct r5conf *conf, | |||
| 3026 | 0); | 3056 | 0); |
| 3027 | } else if (test_bit(R5_Discard, &dev->flags)) | 3057 | } else if (test_bit(R5_Discard, &dev->flags)) |
| 3028 | discard_pending = 1; | 3058 | discard_pending = 1; |
| 3059 | WARN_ON(test_bit(R5_SkipCopy, &dev->flags)); | ||
| 3060 | WARN_ON(dev->page != dev->orig_page); | ||
| 3029 | } | 3061 | } |
| 3030 | if (!discard_pending && | 3062 | if (!discard_pending && |
| 3031 | test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { | 3063 | test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { |
| @@ -5366,6 +5398,50 @@ raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold, | |||
| 5366 | raid5_store_preread_threshold); | 5398 | raid5_store_preread_threshold); |
| 5367 | 5399 | ||
| 5368 | static ssize_t | 5400 | static ssize_t |
| 5401 | raid5_show_skip_copy(struct mddev *mddev, char *page) | ||
| 5402 | { | ||
| 5403 | struct r5conf *conf = mddev->private; | ||
| 5404 | if (conf) | ||
| 5405 | return sprintf(page, "%d\n", conf->skip_copy); | ||
| 5406 | else | ||
| 5407 | return 0; | ||
| 5408 | } | ||
| 5409 | |||
| 5410 | static ssize_t | ||
| 5411 | raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len) | ||
| 5412 | { | ||
| 5413 | struct r5conf *conf = mddev->private; | ||
| 5414 | unsigned long new; | ||
| 5415 | if (len >= PAGE_SIZE) | ||
| 5416 | return -EINVAL; | ||
| 5417 | if (!conf) | ||
| 5418 | return -ENODEV; | ||
| 5419 | |||
| 5420 | if (kstrtoul(page, 10, &new)) | ||
| 5421 | return -EINVAL; | ||
| 5422 | new = !!new; | ||
| 5423 | if (new == conf->skip_copy) | ||
| 5424 | return len; | ||
| 5425 | |||
| 5426 | mddev_suspend(mddev); | ||
| 5427 | conf->skip_copy = new; | ||
| 5428 | if (new) | ||
| 5429 | mddev->queue->backing_dev_info.capabilities |= | ||
| 5430 | BDI_CAP_STABLE_WRITES; | ||
| 5431 | else | ||
| 5432 | mddev->queue->backing_dev_info.capabilities &= | ||
| 5433 | ~BDI_CAP_STABLE_WRITES; | ||
| 5434 | mddev_resume(mddev); | ||
| 5435 | return len; | ||
| 5436 | } | ||
| 5437 | |||
| 5438 | static struct md_sysfs_entry | ||
| 5439 | raid5_skip_copy = __ATTR(skip_copy, S_IRUGO | S_IWUSR, | ||
| 5440 | raid5_show_skip_copy, | ||
| 5441 | raid5_store_skip_copy); | ||
| 5442 | |||
| 5443 | |||
| 5444 | static ssize_t | ||
| 5369 | stripe_cache_active_show(struct mddev *mddev, char *page) | 5445 | stripe_cache_active_show(struct mddev *mddev, char *page) |
| 5370 | { | 5446 | { |
| 5371 | struct r5conf *conf = mddev->private; | 5447 | struct r5conf *conf = mddev->private; |
| @@ -5450,6 +5526,7 @@ static struct attribute *raid5_attrs[] = { | |||
| 5450 | &raid5_stripecache_active.attr, | 5526 | &raid5_stripecache_active.attr, |
| 5451 | &raid5_preread_bypass_threshold.attr, | 5527 | &raid5_preread_bypass_threshold.attr, |
| 5452 | &raid5_group_thread_cnt.attr, | 5528 | &raid5_group_thread_cnt.attr, |
| 5529 | &raid5_skip_copy.attr, | ||
| 5453 | NULL, | 5530 | NULL, |
| 5454 | }; | 5531 | }; |
| 5455 | static struct attribute_group raid5_attrs_group = { | 5532 | static struct attribute_group raid5_attrs_group = { |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 01ad8ae8f578..bc72cd4be5f8 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
| @@ -232,7 +232,7 @@ struct stripe_head { | |||
| 232 | */ | 232 | */ |
| 233 | struct bio req, rreq; | 233 | struct bio req, rreq; |
| 234 | struct bio_vec vec, rvec; | 234 | struct bio_vec vec, rvec; |
| 235 | struct page *page; | 235 | struct page *page, *orig_page; |
| 236 | struct bio *toread, *read, *towrite, *written; | 236 | struct bio *toread, *read, *towrite, *written; |
| 237 | sector_t sector; /* sector of this page */ | 237 | sector_t sector; /* sector of this page */ |
| 238 | unsigned long flags; | 238 | unsigned long flags; |
| @@ -299,6 +299,7 @@ enum r5dev_flags { | |||
| 299 | * data in, and now is a good time to write it out. | 299 | * data in, and now is a good time to write it out. |
| 300 | */ | 300 | */ |
| 301 | R5_Discard, /* Discard the stripe */ | 301 | R5_Discard, /* Discard the stripe */ |
| 302 | R5_SkipCopy, /* Don't copy data from bio to stripe cache */ | ||
| 302 | }; | 303 | }; |
| 303 | 304 | ||
| 304 | /* | 305 | /* |
| @@ -436,6 +437,7 @@ struct r5conf { | |||
| 436 | atomic_t pending_full_writes; /* full write backlog */ | 437 | atomic_t pending_full_writes; /* full write backlog */ |
| 437 | int bypass_count; /* bypassed prereads */ | 438 | int bypass_count; /* bypassed prereads */ |
| 438 | int bypass_threshold; /* preread nice */ | 439 | int bypass_threshold; /* preread nice */ |
| 440 | int skip_copy; /* Don't copy data from bio to stripe cache */ | ||
| 439 | struct list_head *last_hold; /* detect hold_list promotions */ | 441 | struct list_head *last_hold; /* detect hold_list promotions */ |
| 440 | 442 | ||
| 441 | atomic_t reshape_stripes; /* stripes with pending writes for reshape */ | 443 | atomic_t reshape_stripes; /* stripes with pending writes for reshape */ |
