aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/md/raid5.c119
-rw-r--r--drivers/md/raid5.h4
2 files changed, 101 insertions, 22 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 348a857ab0ff..d69fd9888c2c 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -487,6 +487,7 @@ static void shrink_buffers(struct stripe_head *sh)
487 int num = sh->raid_conf->pool_size; 487 int num = sh->raid_conf->pool_size;
488 488
489 for (i = 0; i < num ; i++) { 489 for (i = 0; i < num ; i++) {
490 WARN_ON(sh->dev[i].page != sh->dev[i].orig_page);
490 p = sh->dev[i].page; 491 p = sh->dev[i].page;
491 if (!p) 492 if (!p)
492 continue; 493 continue;
@@ -507,6 +508,7 @@ static int grow_buffers(struct stripe_head *sh)
507 return 1; 508 return 1;
508 } 509 }
509 sh->dev[i].page = page; 510 sh->dev[i].page = page;
511 sh->dev[i].orig_page = page;
510 } 512 }
511 return 0; 513 return 0;
512} 514}
@@ -863,6 +865,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
863 if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) 865 if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
864 bi->bi_rw |= REQ_NOMERGE; 866 bi->bi_rw |= REQ_NOMERGE;
865 867
868 if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
869 WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
870 sh->dev[i].vec.bv_page = sh->dev[i].page;
866 bi->bi_vcnt = 1; 871 bi->bi_vcnt = 1;
867 bi->bi_io_vec[0].bv_len = STRIPE_SIZE; 872 bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
868 bi->bi_io_vec[0].bv_offset = 0; 873 bi->bi_io_vec[0].bv_offset = 0;
@@ -907,6 +912,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
907 else 912 else
908 rbi->bi_iter.bi_sector = (sh->sector 913 rbi->bi_iter.bi_sector = (sh->sector
909 + rrdev->data_offset); 914 + rrdev->data_offset);
915 if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
916 WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
917 sh->dev[i].rvec.bv_page = sh->dev[i].page;
910 rbi->bi_vcnt = 1; 918 rbi->bi_vcnt = 1;
911 rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; 919 rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
912 rbi->bi_io_vec[0].bv_offset = 0; 920 rbi->bi_io_vec[0].bv_offset = 0;
@@ -935,8 +943,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
935} 943}
936 944
937static struct dma_async_tx_descriptor * 945static struct dma_async_tx_descriptor *
938async_copy_data(int frombio, struct bio *bio, struct page *page, 946async_copy_data(int frombio, struct bio *bio, struct page **page,
939 sector_t sector, struct dma_async_tx_descriptor *tx) 947 sector_t sector, struct dma_async_tx_descriptor *tx,
948 struct stripe_head *sh)
940{ 949{
941 struct bio_vec bvl; 950 struct bio_vec bvl;
942 struct bvec_iter iter; 951 struct bvec_iter iter;
@@ -973,11 +982,16 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
973 if (clen > 0) { 982 if (clen > 0) {
974 b_offset += bvl.bv_offset; 983 b_offset += bvl.bv_offset;
975 bio_page = bvl.bv_page; 984 bio_page = bvl.bv_page;
976 if (frombio) 985 if (frombio) {
977 tx = async_memcpy(page, bio_page, page_offset, 986 if (sh->raid_conf->skip_copy &&
987 b_offset == 0 && page_offset == 0 &&
988 clen == STRIPE_SIZE)
989 *page = bio_page;
990 else
991 tx = async_memcpy(*page, bio_page, page_offset,
978 b_offset, clen, &submit); 992 b_offset, clen, &submit);
979 else 993 } else
980 tx = async_memcpy(bio_page, page, b_offset, 994 tx = async_memcpy(bio_page, *page, b_offset,
981 page_offset, clen, &submit); 995 page_offset, clen, &submit);
982 } 996 }
983 /* chain the operations */ 997 /* chain the operations */
@@ -1053,8 +1067,8 @@ static void ops_run_biofill(struct stripe_head *sh)
1053 spin_unlock_irq(&sh->stripe_lock); 1067 spin_unlock_irq(&sh->stripe_lock);
1054 while (rbi && rbi->bi_iter.bi_sector < 1068 while (rbi && rbi->bi_iter.bi_sector <
1055 dev->sector + STRIPE_SECTORS) { 1069 dev->sector + STRIPE_SECTORS) {
1056 tx = async_copy_data(0, rbi, dev->page, 1070 tx = async_copy_data(0, rbi, &dev->page,
1057 dev->sector, tx); 1071 dev->sector, tx, sh);
1058 rbi = r5_next_bio(rbi, dev->sector); 1072 rbi = r5_next_bio(rbi, dev->sector);
1059 } 1073 }
1060 } 1074 }
@@ -1392,6 +1406,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
1392 BUG_ON(dev->written); 1406 BUG_ON(dev->written);
1393 wbi = dev->written = chosen; 1407 wbi = dev->written = chosen;
1394 spin_unlock_irq(&sh->stripe_lock); 1408 spin_unlock_irq(&sh->stripe_lock);
1409 WARN_ON(dev->page != dev->orig_page);
1395 1410
1396 while (wbi && wbi->bi_iter.bi_sector < 1411 while (wbi && wbi->bi_iter.bi_sector <
1397 dev->sector + STRIPE_SECTORS) { 1412 dev->sector + STRIPE_SECTORS) {
@@ -1401,9 +1416,15 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
1401 set_bit(R5_SyncIO, &dev->flags); 1416 set_bit(R5_SyncIO, &dev->flags);
1402 if (wbi->bi_rw & REQ_DISCARD) 1417 if (wbi->bi_rw & REQ_DISCARD)
1403 set_bit(R5_Discard, &dev->flags); 1418 set_bit(R5_Discard, &dev->flags);
1404 else 1419 else {
1405 tx = async_copy_data(1, wbi, dev->page, 1420 tx = async_copy_data(1, wbi, &dev->page,
1406 dev->sector, tx); 1421 dev->sector, tx, sh);
1422 if (dev->page != dev->orig_page) {
1423 set_bit(R5_SkipCopy, &dev->flags);
1424 clear_bit(R5_UPTODATE, &dev->flags);
1425 clear_bit(R5_OVERWRITE, &dev->flags);
1426 }
1427 }
1407 wbi = r5_next_bio(wbi, dev->sector); 1428 wbi = r5_next_bio(wbi, dev->sector);
1408 } 1429 }
1409 } 1430 }
@@ -1434,7 +1455,7 @@ static void ops_complete_reconstruct(void *stripe_head_ref)
1434 struct r5dev *dev = &sh->dev[i]; 1455 struct r5dev *dev = &sh->dev[i];
1435 1456
1436 if (dev->written || i == pd_idx || i == qd_idx) { 1457 if (dev->written || i == pd_idx || i == qd_idx) {
1437 if (!discard) 1458 if (!discard && !test_bit(R5_SkipCopy, &dev->flags))
1438 set_bit(R5_UPTODATE, &dev->flags); 1459 set_bit(R5_UPTODATE, &dev->flags);
1439 if (fua) 1460 if (fua)
1440 set_bit(R5_WantFUA, &dev->flags); 1461 set_bit(R5_WantFUA, &dev->flags);
@@ -1847,8 +1868,10 @@ static int resize_stripes(struct r5conf *conf, int newsize)
1847 osh = get_free_stripe(conf, hash); 1868 osh = get_free_stripe(conf, hash);
1848 unlock_device_hash_lock(conf, hash); 1869 unlock_device_hash_lock(conf, hash);
1849 atomic_set(&nsh->count, 1); 1870 atomic_set(&nsh->count, 1);
1850 for(i=0; i<conf->pool_size; i++) 1871 for(i=0; i<conf->pool_size; i++) {
1851 nsh->dev[i].page = osh->dev[i].page; 1872 nsh->dev[i].page = osh->dev[i].page;
1873 nsh->dev[i].orig_page = osh->dev[i].page;
1874 }
1852 for( ; i<newsize; i++) 1875 for( ; i<newsize; i++)
1853 nsh->dev[i].page = NULL; 1876 nsh->dev[i].page = NULL;
1854 nsh->hash_lock_index = hash; 1877 nsh->hash_lock_index = hash;
@@ -1904,6 +1927,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
1904 if (nsh->dev[i].page == NULL) { 1927 if (nsh->dev[i].page == NULL) {
1905 struct page *p = alloc_page(GFP_NOIO); 1928 struct page *p = alloc_page(GFP_NOIO);
1906 nsh->dev[i].page = p; 1929 nsh->dev[i].page = p;
1930 nsh->dev[i].orig_page = p;
1907 if (!p) 1931 if (!p)
1908 err = -ENOMEM; 1932 err = -ENOMEM;
1909 } 1933 }
@@ -2141,24 +2165,20 @@ static void raid5_end_write_request(struct bio *bi, int error)
2141} 2165}
2142 2166
2143static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous); 2167static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous);
2144 2168
2145static void raid5_build_block(struct stripe_head *sh, int i, int previous) 2169static void raid5_build_block(struct stripe_head *sh, int i, int previous)
2146{ 2170{
2147 struct r5dev *dev = &sh->dev[i]; 2171 struct r5dev *dev = &sh->dev[i];
2148 2172
2149 bio_init(&dev->req); 2173 bio_init(&dev->req);
2150 dev->req.bi_io_vec = &dev->vec; 2174 dev->req.bi_io_vec = &dev->vec;
2151 dev->req.bi_vcnt++; 2175 dev->req.bi_max_vecs = 1;
2152 dev->req.bi_max_vecs++;
2153 dev->req.bi_private = sh; 2176 dev->req.bi_private = sh;
2154 dev->vec.bv_page = dev->page;
2155 2177
2156 bio_init(&dev->rreq); 2178 bio_init(&dev->rreq);
2157 dev->rreq.bi_io_vec = &dev->rvec; 2179 dev->rreq.bi_io_vec = &dev->rvec;
2158 dev->rreq.bi_vcnt++; 2180 dev->rreq.bi_max_vecs = 1;
2159 dev->rreq.bi_max_vecs++;
2160 dev->rreq.bi_private = sh; 2181 dev->rreq.bi_private = sh;
2161 dev->rvec.bv_page = dev->page;
2162 2182
2163 dev->flags = 0; 2183 dev->flags = 0;
2164 dev->sector = compute_blocknr(sh, i, previous); 2184 dev->sector = compute_blocknr(sh, i, previous);
@@ -2758,6 +2778,11 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
2758 /* and fail all 'written' */ 2778 /* and fail all 'written' */
2759 bi = sh->dev[i].written; 2779 bi = sh->dev[i].written;
2760 sh->dev[i].written = NULL; 2780 sh->dev[i].written = NULL;
2781 if (test_and_clear_bit(R5_SkipCopy, &sh->dev[i].flags)) {
2782 WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
2783 sh->dev[i].page = sh->dev[i].orig_page;
2784 }
2785
2761 if (bi) bitmap_end = 1; 2786 if (bi) bitmap_end = 1;
2762 while (bi && bi->bi_iter.bi_sector < 2787 while (bi && bi->bi_iter.bi_sector <
2763 sh->dev[i].sector + STRIPE_SECTORS) { 2788 sh->dev[i].sector + STRIPE_SECTORS) {
@@ -3002,12 +3027,17 @@ static void handle_stripe_clean_event(struct r5conf *conf,
3002 dev = &sh->dev[i]; 3027 dev = &sh->dev[i];
3003 if (!test_bit(R5_LOCKED, &dev->flags) && 3028 if (!test_bit(R5_LOCKED, &dev->flags) &&
3004 (test_bit(R5_UPTODATE, &dev->flags) || 3029 (test_bit(R5_UPTODATE, &dev->flags) ||
3005 test_bit(R5_Discard, &dev->flags))) { 3030 test_bit(R5_Discard, &dev->flags) ||
3031 test_bit(R5_SkipCopy, &dev->flags))) {
3006 /* We can return any write requests */ 3032 /* We can return any write requests */
3007 struct bio *wbi, *wbi2; 3033 struct bio *wbi, *wbi2;
3008 pr_debug("Return write for disc %d\n", i); 3034 pr_debug("Return write for disc %d\n", i);
3009 if (test_and_clear_bit(R5_Discard, &dev->flags)) 3035 if (test_and_clear_bit(R5_Discard, &dev->flags))
3010 clear_bit(R5_UPTODATE, &dev->flags); 3036 clear_bit(R5_UPTODATE, &dev->flags);
3037 if (test_and_clear_bit(R5_SkipCopy, &dev->flags)) {
3038 WARN_ON(test_bit(R5_UPTODATE, &dev->flags));
3039 dev->page = dev->orig_page;
3040 }
3011 wbi = dev->written; 3041 wbi = dev->written;
3012 dev->written = NULL; 3042 dev->written = NULL;
3013 while (wbi && wbi->bi_iter.bi_sector < 3043 while (wbi && wbi->bi_iter.bi_sector <
@@ -3026,6 +3056,8 @@ static void handle_stripe_clean_event(struct r5conf *conf,
3026 0); 3056 0);
3027 } else if (test_bit(R5_Discard, &dev->flags)) 3057 } else if (test_bit(R5_Discard, &dev->flags))
3028 discard_pending = 1; 3058 discard_pending = 1;
3059 WARN_ON(test_bit(R5_SkipCopy, &dev->flags));
3060 WARN_ON(dev->page != dev->orig_page);
3029 } 3061 }
3030 if (!discard_pending && 3062 if (!discard_pending &&
3031 test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { 3063 test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
@@ -5366,6 +5398,50 @@ raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold,
5366 raid5_store_preread_threshold); 5398 raid5_store_preread_threshold);
5367 5399
5368static ssize_t 5400static ssize_t
5401raid5_show_skip_copy(struct mddev *mddev, char *page)
5402{
5403 struct r5conf *conf = mddev->private;
5404 if (conf)
5405 return sprintf(page, "%d\n", conf->skip_copy);
5406 else
5407 return 0;
5408}
5409
5410static ssize_t
5411raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len)
5412{
5413 struct r5conf *conf = mddev->private;
5414 unsigned long new;
5415 if (len >= PAGE_SIZE)
5416 return -EINVAL;
5417 if (!conf)
5418 return -ENODEV;
5419
5420 if (kstrtoul(page, 10, &new))
5421 return -EINVAL;
5422 new = !!new;
5423 if (new == conf->skip_copy)
5424 return len;
5425
5426 mddev_suspend(mddev);
5427 conf->skip_copy = new;
5428 if (new)
5429 mddev->queue->backing_dev_info.capabilities |=
5430 BDI_CAP_STABLE_WRITES;
5431 else
5432 mddev->queue->backing_dev_info.capabilities &=
5433 ~BDI_CAP_STABLE_WRITES;
5434 mddev_resume(mddev);
5435 return len;
5436}
5437
5438static struct md_sysfs_entry
5439raid5_skip_copy = __ATTR(skip_copy, S_IRUGO | S_IWUSR,
5440 raid5_show_skip_copy,
5441 raid5_store_skip_copy);
5442
5443
5444static ssize_t
5369stripe_cache_active_show(struct mddev *mddev, char *page) 5445stripe_cache_active_show(struct mddev *mddev, char *page)
5370{ 5446{
5371 struct r5conf *conf = mddev->private; 5447 struct r5conf *conf = mddev->private;
@@ -5450,6 +5526,7 @@ static struct attribute *raid5_attrs[] = {
5450 &raid5_stripecache_active.attr, 5526 &raid5_stripecache_active.attr,
5451 &raid5_preread_bypass_threshold.attr, 5527 &raid5_preread_bypass_threshold.attr,
5452 &raid5_group_thread_cnt.attr, 5528 &raid5_group_thread_cnt.attr,
5529 &raid5_skip_copy.attr,
5453 NULL, 5530 NULL,
5454}; 5531};
5455static struct attribute_group raid5_attrs_group = { 5532static struct attribute_group raid5_attrs_group = {
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 01ad8ae8f578..bc72cd4be5f8 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -232,7 +232,7 @@ struct stripe_head {
232 */ 232 */
233 struct bio req, rreq; 233 struct bio req, rreq;
234 struct bio_vec vec, rvec; 234 struct bio_vec vec, rvec;
235 struct page *page; 235 struct page *page, *orig_page;
236 struct bio *toread, *read, *towrite, *written; 236 struct bio *toread, *read, *towrite, *written;
237 sector_t sector; /* sector of this page */ 237 sector_t sector; /* sector of this page */
238 unsigned long flags; 238 unsigned long flags;
@@ -299,6 +299,7 @@ enum r5dev_flags {
299 * data in, and now is a good time to write it out. 299 * data in, and now is a good time to write it out.
300 */ 300 */
301 R5_Discard, /* Discard the stripe */ 301 R5_Discard, /* Discard the stripe */
302 R5_SkipCopy, /* Don't copy data from bio to stripe cache */
302}; 303};
303 304
304/* 305/*
@@ -436,6 +437,7 @@ struct r5conf {
436 atomic_t pending_full_writes; /* full write backlog */ 437 atomic_t pending_full_writes; /* full write backlog */
437 int bypass_count; /* bypassed prereads */ 438 int bypass_count; /* bypassed prereads */
438 int bypass_threshold; /* preread nice */ 439 int bypass_threshold; /* preread nice */
440 int skip_copy; /* Don't copy data from bio to stripe cache */
439 struct list_head *last_hold; /* detect hold_list promotions */ 441 struct list_head *last_hold; /* detect hold_list promotions */
440 442
441 atomic_t reshape_stripes; /* stripes with pending writes for reshape */ 443 atomic_t reshape_stripes; /* stripes with pending writes for reshape */