aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-18 12:32:44 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-18 12:32:44 -0500
commitea88eeac0cb8328014b53d80ca631e8dc0dc18dc (patch)
treede605c1d0e7ab1e342eedad47c56061b65a6b790 /drivers/md
parent848b81415c42ff3dc9a4204749087b015c37ef66 (diff)
parenta9add5d92b64ea57fb4c3b557c3891cdeb15fa0c (diff)
Merge tag 'md-3.8' of git://neil.brown.name/md
Pull md update from Neil Brown: "Mostly just little fixes. Probably biggest part is AVX accelerated RAID6 calculations." * tag 'md-3.8' of git://neil.brown.name/md: md/raid5: add blktrace calls md/raid5: use async_tx_quiesce() instead of open-coding it. md: Use ->curr_resync as last completed request when cleanly aborting resync. lib/raid6: build proper files on corresponding arch lib/raid6: Add AVX2 optimized gen_syndrome functions lib/raid6: Add AVX2 optimized recovery functions md: Update checkpoint of resync/recovery based on time. md:Add place to update ->recovery_cp. md.c: re-indent various 'switch' statements. md: close race between removing and adding a device. md: removed unused variable in calc_sb_1_csm.
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/md.c256
-rw-r--r--drivers/md/md.h2
-rw-r--r--drivers/md/raid5.c43
3 files changed, 171 insertions, 130 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 4843b004c55..3db3d1b271f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1414,12 +1414,11 @@ static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
1414 unsigned long long newcsum; 1414 unsigned long long newcsum;
1415 int size = 256 + le32_to_cpu(sb->max_dev)*2; 1415 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1416 __le32 *isuper = (__le32*)sb; 1416 __le32 *isuper = (__le32*)sb;
1417 int i;
1418 1417
1419 disk_csum = sb->sb_csum; 1418 disk_csum = sb->sb_csum;
1420 sb->sb_csum = 0; 1419 sb->sb_csum = 0;
1421 newcsum = 0; 1420 newcsum = 0;
1422 for (i=0; size>=4; size -= 4 ) 1421 for (; size >= 4; size -= 4)
1423 newcsum += le32_to_cpu(*isuper++); 1422 newcsum += le32_to_cpu(*isuper++);
1424 1423
1425 if (size == 2) 1424 if (size == 2)
@@ -4753,6 +4752,8 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
4753 } 4752 }
4754 mddev_get(mddev); 4753 mddev_get(mddev);
4755 spin_unlock(&all_mddevs_lock); 4754 spin_unlock(&all_mddevs_lock);
4755 if (entry->store == new_dev_store)
4756 flush_workqueue(md_misc_wq);
4756 rv = mddev_lock(mddev); 4757 rv = mddev_lock(mddev);
4757 if (!rv) { 4758 if (!rv) {
4758 rv = entry->store(mddev, page, length); 4759 rv = entry->store(mddev, page, length);
@@ -6346,24 +6347,23 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
6346 * Commands dealing with the RAID driver but not any 6347 * Commands dealing with the RAID driver but not any
6347 * particular array: 6348 * particular array:
6348 */ 6349 */
6349 switch (cmd) 6350 switch (cmd) {
6350 { 6351 case RAID_VERSION:
6351 case RAID_VERSION: 6352 err = get_version(argp);
6352 err = get_version(argp); 6353 goto done;
6353 goto done;
6354 6354
6355 case PRINT_RAID_DEBUG: 6355 case PRINT_RAID_DEBUG:
6356 err = 0; 6356 err = 0;
6357 md_print_devices(); 6357 md_print_devices();
6358 goto done; 6358 goto done;
6359 6359
6360#ifndef MODULE 6360#ifndef MODULE
6361 case RAID_AUTORUN: 6361 case RAID_AUTORUN:
6362 err = 0; 6362 err = 0;
6363 autostart_arrays(arg); 6363 autostart_arrays(arg);
6364 goto done; 6364 goto done;
6365#endif 6365#endif
6366 default:; 6366 default:;
6367 } 6367 }
6368 6368
6369 /* 6369 /*
@@ -6398,6 +6398,10 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
6398 goto abort; 6398 goto abort;
6399 } 6399 }
6400 6400
6401 if (cmd == ADD_NEW_DISK)
6402 /* need to ensure md_delayed_delete() has completed */
6403 flush_workqueue(md_misc_wq);
6404
6401 err = mddev_lock(mddev); 6405 err = mddev_lock(mddev);
6402 if (err) { 6406 if (err) {
6403 printk(KERN_INFO 6407 printk(KERN_INFO
@@ -6406,50 +6410,44 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
6406 goto abort; 6410 goto abort;
6407 } 6411 }
6408 6412
6409 switch (cmd) 6413 if (cmd == SET_ARRAY_INFO) {
6410 { 6414 mdu_array_info_t info;
6411 case SET_ARRAY_INFO: 6415 if (!arg)
6412 { 6416 memset(&info, 0, sizeof(info));
6413 mdu_array_info_t info; 6417 else if (copy_from_user(&info, argp, sizeof(info))) {
6414 if (!arg) 6418 err = -EFAULT;
6415 memset(&info, 0, sizeof(info)); 6419 goto abort_unlock;
6416 else if (copy_from_user(&info, argp, sizeof(info))) { 6420 }
6417 err = -EFAULT; 6421 if (mddev->pers) {
6418 goto abort_unlock; 6422 err = update_array_info(mddev, &info);
6419 } 6423 if (err) {
6420 if (mddev->pers) { 6424 printk(KERN_WARNING "md: couldn't update"
6421 err = update_array_info(mddev, &info); 6425 " array info. %d\n", err);
6422 if (err) { 6426 goto abort_unlock;
6423 printk(KERN_WARNING "md: couldn't update"
6424 " array info. %d\n", err);
6425 goto abort_unlock;
6426 }
6427 goto done_unlock;
6428 }
6429 if (!list_empty(&mddev->disks)) {
6430 printk(KERN_WARNING
6431 "md: array %s already has disks!\n",
6432 mdname(mddev));
6433 err = -EBUSY;
6434 goto abort_unlock;
6435 }
6436 if (mddev->raid_disks) {
6437 printk(KERN_WARNING
6438 "md: array %s already initialised!\n",
6439 mdname(mddev));
6440 err = -EBUSY;
6441 goto abort_unlock;
6442 }
6443 err = set_array_info(mddev, &info);
6444 if (err) {
6445 printk(KERN_WARNING "md: couldn't set"
6446 " array info. %d\n", err);
6447 goto abort_unlock;
6448 }
6449 } 6427 }
6450 goto done_unlock; 6428 goto done_unlock;
6451 6429 }
6452 default:; 6430 if (!list_empty(&mddev->disks)) {
6431 printk(KERN_WARNING
6432 "md: array %s already has disks!\n",
6433 mdname(mddev));
6434 err = -EBUSY;
6435 goto abort_unlock;
6436 }
6437 if (mddev->raid_disks) {
6438 printk(KERN_WARNING
6439 "md: array %s already initialised!\n",
6440 mdname(mddev));
6441 err = -EBUSY;
6442 goto abort_unlock;
6443 }
6444 err = set_array_info(mddev, &info);
6445 if (err) {
6446 printk(KERN_WARNING "md: couldn't set"
6447 " array info. %d\n", err);
6448 goto abort_unlock;
6449 }
6450 goto done_unlock;
6453 } 6451 }
6454 6452
6455 /* 6453 /*
@@ -6468,52 +6466,51 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
6468 /* 6466 /*
6469 * Commands even a read-only array can execute: 6467 * Commands even a read-only array can execute:
6470 */ 6468 */
6471 switch (cmd) 6469 switch (cmd) {
6472 { 6470 case GET_BITMAP_FILE:
6473 case GET_BITMAP_FILE: 6471 err = get_bitmap_file(mddev, argp);
6474 err = get_bitmap_file(mddev, argp); 6472 goto done_unlock;
6475 goto done_unlock;
6476 6473
6477 case RESTART_ARRAY_RW: 6474 case RESTART_ARRAY_RW:
6478 err = restart_array(mddev); 6475 err = restart_array(mddev);
6479 goto done_unlock; 6476 goto done_unlock;
6480 6477
6481 case STOP_ARRAY: 6478 case STOP_ARRAY:
6482 err = do_md_stop(mddev, 0, bdev); 6479 err = do_md_stop(mddev, 0, bdev);
6483 goto done_unlock; 6480 goto done_unlock;
6484 6481
6485 case STOP_ARRAY_RO: 6482 case STOP_ARRAY_RO:
6486 err = md_set_readonly(mddev, bdev); 6483 err = md_set_readonly(mddev, bdev);
6487 goto done_unlock; 6484 goto done_unlock;
6488 6485
6489 case BLKROSET: 6486 case BLKROSET:
6490 if (get_user(ro, (int __user *)(arg))) { 6487 if (get_user(ro, (int __user *)(arg))) {
6491 err = -EFAULT; 6488 err = -EFAULT;
6492 goto done_unlock; 6489 goto done_unlock;
6493 } 6490 }
6494 err = -EINVAL; 6491 err = -EINVAL;
6495 6492
6496 /* if the bdev is going readonly the value of mddev->ro 6493 /* if the bdev is going readonly the value of mddev->ro
6497 * does not matter, no writes are coming 6494 * does not matter, no writes are coming
6498 */ 6495 */
6499 if (ro) 6496 if (ro)
6500 goto done_unlock; 6497 goto done_unlock;
6501 6498
6502 /* are we are already prepared for writes? */ 6499 /* are we are already prepared for writes? */
6503 if (mddev->ro != 1) 6500 if (mddev->ro != 1)
6504 goto done_unlock; 6501 goto done_unlock;
6505 6502
6506 /* transitioning to readauto need only happen for 6503 /* transitioning to readauto need only happen for
6507 * arrays that call md_write_start 6504 * arrays that call md_write_start
6508 */ 6505 */
6509 if (mddev->pers) { 6506 if (mddev->pers) {
6510 err = restart_array(mddev); 6507 err = restart_array(mddev);
6511 if (err == 0) { 6508 if (err == 0) {
6512 mddev->ro = 2; 6509 mddev->ro = 2;
6513 set_disk_ro(mddev->gendisk, 0); 6510 set_disk_ro(mddev->gendisk, 0);
6514 }
6515 } 6511 }
6516 goto done_unlock; 6512 }
6513 goto done_unlock;
6517 } 6514 }
6518 6515
6519 /* 6516 /*
@@ -6535,37 +6532,36 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
6535 } 6532 }
6536 } 6533 }
6537 6534
6538 switch (cmd) 6535 switch (cmd) {
6536 case ADD_NEW_DISK:
6539 { 6537 {
6540 case ADD_NEW_DISK: 6538 mdu_disk_info_t info;
6541 { 6539 if (copy_from_user(&info, argp, sizeof(info)))
6542 mdu_disk_info_t info; 6540 err = -EFAULT;
6543 if (copy_from_user(&info, argp, sizeof(info))) 6541 else
6544 err = -EFAULT; 6542 err = add_new_disk(mddev, &info);
6545 else 6543 goto done_unlock;
6546 err = add_new_disk(mddev, &info); 6544 }
6547 goto done_unlock;
6548 }
6549 6545
6550 case HOT_REMOVE_DISK: 6546 case HOT_REMOVE_DISK:
6551 err = hot_remove_disk(mddev, new_decode_dev(arg)); 6547 err = hot_remove_disk(mddev, new_decode_dev(arg));
6552 goto done_unlock; 6548 goto done_unlock;
6553 6549
6554 case HOT_ADD_DISK: 6550 case HOT_ADD_DISK:
6555 err = hot_add_disk(mddev, new_decode_dev(arg)); 6551 err = hot_add_disk(mddev, new_decode_dev(arg));
6556 goto done_unlock; 6552 goto done_unlock;
6557 6553
6558 case RUN_ARRAY: 6554 case RUN_ARRAY:
6559 err = do_md_run(mddev); 6555 err = do_md_run(mddev);
6560 goto done_unlock; 6556 goto done_unlock;
6561 6557
6562 case SET_BITMAP_FILE: 6558 case SET_BITMAP_FILE:
6563 err = set_bitmap_file(mddev, (int)arg); 6559 err = set_bitmap_file(mddev, (int)arg);
6564 goto done_unlock; 6560 goto done_unlock;
6565 6561
6566 default: 6562 default:
6567 err = -EINVAL; 6563 err = -EINVAL;
6568 goto abort_unlock; 6564 goto abort_unlock;
6569 } 6565 }
6570 6566
6571done_unlock: 6567done_unlock:
@@ -7184,6 +7180,7 @@ void md_done_sync(struct mddev *mddev, int blocks, int ok)
7184 wake_up(&mddev->recovery_wait); 7180 wake_up(&mddev->recovery_wait);
7185 if (!ok) { 7181 if (!ok) {
7186 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 7182 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7183 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
7187 md_wakeup_thread(mddev->thread); 7184 md_wakeup_thread(mddev->thread);
7188 // stop recovery, signal do_sync .... 7185 // stop recovery, signal do_sync ....
7189 } 7186 }
@@ -7281,6 +7278,7 @@ EXPORT_SYMBOL_GPL(md_allow_write);
7281 7278
7282#define SYNC_MARKS 10 7279#define SYNC_MARKS 10
7283#define SYNC_MARK_STEP (3*HZ) 7280#define SYNC_MARK_STEP (3*HZ)
7281#define UPDATE_FREQUENCY (5*60*HZ)
7284void md_do_sync(struct md_thread *thread) 7282void md_do_sync(struct md_thread *thread)
7285{ 7283{
7286 struct mddev *mddev = thread->mddev; 7284 struct mddev *mddev = thread->mddev;
@@ -7289,6 +7287,7 @@ void md_do_sync(struct md_thread *thread)
7289 window; 7287 window;
7290 sector_t max_sectors,j, io_sectors; 7288 sector_t max_sectors,j, io_sectors;
7291 unsigned long mark[SYNC_MARKS]; 7289 unsigned long mark[SYNC_MARKS];
7290 unsigned long update_time;
7292 sector_t mark_cnt[SYNC_MARKS]; 7291 sector_t mark_cnt[SYNC_MARKS];
7293 int last_mark,m; 7292 int last_mark,m;
7294 struct list_head *tmp; 7293 struct list_head *tmp;
@@ -7448,6 +7447,7 @@ void md_do_sync(struct md_thread *thread)
7448 mddev->curr_resync_completed = j; 7447 mddev->curr_resync_completed = j;
7449 sysfs_notify(&mddev->kobj, NULL, "sync_completed"); 7448 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7450 md_new_event(mddev); 7449 md_new_event(mddev);
7450 update_time = jiffies;
7451 7451
7452 blk_start_plug(&plug); 7452 blk_start_plug(&plug);
7453 while (j < max_sectors) { 7453 while (j < max_sectors) {
@@ -7459,6 +7459,7 @@ void md_do_sync(struct md_thread *thread)
7459 ((mddev->curr_resync > mddev->curr_resync_completed && 7459 ((mddev->curr_resync > mddev->curr_resync_completed &&
7460 (mddev->curr_resync - mddev->curr_resync_completed) 7460 (mddev->curr_resync - mddev->curr_resync_completed)
7461 > (max_sectors >> 4)) || 7461 > (max_sectors >> 4)) ||
7462 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
7462 (j - mddev->curr_resync_completed)*2 7463 (j - mddev->curr_resync_completed)*2
7463 >= mddev->resync_max - mddev->curr_resync_completed 7464 >= mddev->resync_max - mddev->curr_resync_completed
7464 )) { 7465 )) {
@@ -7466,6 +7467,10 @@ void md_do_sync(struct md_thread *thread)
7466 wait_event(mddev->recovery_wait, 7467 wait_event(mddev->recovery_wait,
7467 atomic_read(&mddev->recovery_active) == 0); 7468 atomic_read(&mddev->recovery_active) == 0);
7468 mddev->curr_resync_completed = j; 7469 mddev->curr_resync_completed = j;
7470 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
7471 j > mddev->recovery_cp)
7472 mddev->recovery_cp = j;
7473 update_time = jiffies;
7469 set_bit(MD_CHANGE_CLEAN, &mddev->flags); 7474 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7470 sysfs_notify(&mddev->kobj, NULL, "sync_completed"); 7475 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7471 } 7476 }
@@ -7570,8 +7575,13 @@ void md_do_sync(struct md_thread *thread)
7570 printk(KERN_INFO 7575 printk(KERN_INFO
7571 "md: checkpointing %s of %s.\n", 7576 "md: checkpointing %s of %s.\n",
7572 desc, mdname(mddev)); 7577 desc, mdname(mddev));
7573 mddev->recovery_cp = 7578 if (test_bit(MD_RECOVERY_ERROR,
7574 mddev->curr_resync_completed; 7579 &mddev->recovery))
7580 mddev->recovery_cp =
7581 mddev->curr_resync_completed;
7582 else
7583 mddev->recovery_cp =
7584 mddev->curr_resync;
7575 } 7585 }
7576 } else 7586 } else
7577 mddev->recovery_cp = MaxSector; 7587 mddev->recovery_cp = MaxSector;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 1e2fc3d9c74..eca59c3074e 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -307,6 +307,7 @@ struct mddev {
307 * REQUEST: user-space has requested a sync (used with SYNC) 307 * REQUEST: user-space has requested a sync (used with SYNC)
308 * CHECK: user-space request for check-only, no repair 308 * CHECK: user-space request for check-only, no repair
309 * RESHAPE: A reshape is happening 309 * RESHAPE: A reshape is happening
310 * ERROR: sync-action interrupted because io-error
310 * 311 *
311 * If neither SYNC or RESHAPE are set, then it is a recovery. 312 * If neither SYNC or RESHAPE are set, then it is a recovery.
312 */ 313 */
@@ -320,6 +321,7 @@ struct mddev {
320#define MD_RECOVERY_CHECK 7 321#define MD_RECOVERY_CHECK 7
321#define MD_RECOVERY_RESHAPE 8 322#define MD_RECOVERY_RESHAPE 8
322#define MD_RECOVERY_FROZEN 9 323#define MD_RECOVERY_FROZEN 9
324#define MD_RECOVERY_ERROR 10
323 325
324 unsigned long recovery; 326 unsigned long recovery;
325 /* If a RAID personality determines that recovery (of a particular 327 /* If a RAID personality determines that recovery (of a particular
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 8d8555bf3e1..19d77a02663 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -53,6 +53,8 @@
53#include <linux/cpu.h> 53#include <linux/cpu.h>
54#include <linux/slab.h> 54#include <linux/slab.h>
55#include <linux/ratelimit.h> 55#include <linux/ratelimit.h>
56#include <trace/events/block.h>
57
56#include "md.h" 58#include "md.h"
57#include "raid5.h" 59#include "raid5.h"
58#include "raid0.h" 60#include "raid0.h"
@@ -182,6 +184,8 @@ static void return_io(struct bio *return_bi)
182 return_bi = bi->bi_next; 184 return_bi = bi->bi_next;
183 bi->bi_next = NULL; 185 bi->bi_next = NULL;
184 bi->bi_size = 0; 186 bi->bi_size = 0;
187 trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
188 bi, 0);
185 bio_endio(bi, 0); 189 bio_endio(bi, 0);
186 bi = return_bi; 190 bi = return_bi;
187 } 191 }
@@ -670,6 +674,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
670 bi->bi_next = NULL; 674 bi->bi_next = NULL;
671 if (rrdev) 675 if (rrdev)
672 set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); 676 set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
677 trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
678 bi, disk_devt(conf->mddev->gendisk),
679 sh->dev[i].sector);
673 generic_make_request(bi); 680 generic_make_request(bi);
674 } 681 }
675 if (rrdev) { 682 if (rrdev) {
@@ -697,6 +704,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
697 rbi->bi_io_vec[0].bv_offset = 0; 704 rbi->bi_io_vec[0].bv_offset = 0;
698 rbi->bi_size = STRIPE_SIZE; 705 rbi->bi_size = STRIPE_SIZE;
699 rbi->bi_next = NULL; 706 rbi->bi_next = NULL;
707 trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
708 rbi, disk_devt(conf->mddev->gendisk),
709 sh->dev[i].sector);
700 generic_make_request(rbi); 710 generic_make_request(rbi);
701 } 711 }
702 if (!rdev && !rrdev) { 712 if (!rdev && !rrdev) {
@@ -2853,8 +2863,10 @@ static void handle_stripe_dirtying(struct r5conf *conf,
2853 pr_debug("for sector %llu, rmw=%d rcw=%d\n", 2863 pr_debug("for sector %llu, rmw=%d rcw=%d\n",
2854 (unsigned long long)sh->sector, rmw, rcw); 2864 (unsigned long long)sh->sector, rmw, rcw);
2855 set_bit(STRIPE_HANDLE, &sh->state); 2865 set_bit(STRIPE_HANDLE, &sh->state);
2856 if (rmw < rcw && rmw > 0) 2866 if (rmw < rcw && rmw > 0) {
2857 /* prefer read-modify-write, but need to get some data */ 2867 /* prefer read-modify-write, but need to get some data */
2868 blk_add_trace_msg(conf->mddev->queue, "raid5 rmw %llu %d",
2869 (unsigned long long)sh->sector, rmw);
2858 for (i = disks; i--; ) { 2870 for (i = disks; i--; ) {
2859 struct r5dev *dev = &sh->dev[i]; 2871 struct r5dev *dev = &sh->dev[i];
2860 if ((dev->towrite || i == sh->pd_idx) && 2872 if ((dev->towrite || i == sh->pd_idx) &&
@@ -2865,7 +2877,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
2865 if ( 2877 if (
2866 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { 2878 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2867 pr_debug("Read_old block " 2879 pr_debug("Read_old block "
2868 "%d for r-m-w\n", i); 2880 "%d for r-m-w\n", i);
2869 set_bit(R5_LOCKED, &dev->flags); 2881 set_bit(R5_LOCKED, &dev->flags);
2870 set_bit(R5_Wantread, &dev->flags); 2882 set_bit(R5_Wantread, &dev->flags);
2871 s->locked++; 2883 s->locked++;
@@ -2875,8 +2887,10 @@ static void handle_stripe_dirtying(struct r5conf *conf,
2875 } 2887 }
2876 } 2888 }
2877 } 2889 }
2890 }
2878 if (rcw <= rmw && rcw > 0) { 2891 if (rcw <= rmw && rcw > 0) {
2879 /* want reconstruct write, but need to get some data */ 2892 /* want reconstruct write, but need to get some data */
2893 int qread =0;
2880 rcw = 0; 2894 rcw = 0;
2881 for (i = disks; i--; ) { 2895 for (i = disks; i--; ) {
2882 struct r5dev *dev = &sh->dev[i]; 2896 struct r5dev *dev = &sh->dev[i];
@@ -2895,12 +2909,17 @@ static void handle_stripe_dirtying(struct r5conf *conf,
2895 set_bit(R5_LOCKED, &dev->flags); 2909 set_bit(R5_LOCKED, &dev->flags);
2896 set_bit(R5_Wantread, &dev->flags); 2910 set_bit(R5_Wantread, &dev->flags);
2897 s->locked++; 2911 s->locked++;
2912 qread++;
2898 } else { 2913 } else {
2899 set_bit(STRIPE_DELAYED, &sh->state); 2914 set_bit(STRIPE_DELAYED, &sh->state);
2900 set_bit(STRIPE_HANDLE, &sh->state); 2915 set_bit(STRIPE_HANDLE, &sh->state);
2901 } 2916 }
2902 } 2917 }
2903 } 2918 }
2919 if (rcw)
2920 blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d",
2921 (unsigned long long)sh->sector,
2922 rcw, qread, test_bit(STRIPE_DELAYED, &sh->state));
2904 } 2923 }
2905 /* now if nothing is locked, and if we have enough data, 2924 /* now if nothing is locked, and if we have enough data,
2906 * we can start a write request 2925 * we can start a write request
@@ -3222,10 +3241,7 @@ static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh)
3222 3241
3223 } 3242 }
3224 /* done submitting copies, wait for them to complete */ 3243 /* done submitting copies, wait for them to complete */
3225 if (tx) { 3244 async_tx_quiesce(&tx);
3226 async_tx_ack(tx);
3227 dma_wait_for_async_tx(tx);
3228 }
3229} 3245}
3230 3246
3231/* 3247/*
@@ -3901,6 +3917,8 @@ static void raid5_align_endio(struct bio *bi, int error)
3901 rdev_dec_pending(rdev, conf->mddev); 3917 rdev_dec_pending(rdev, conf->mddev);
3902 3918
3903 if (!error && uptodate) { 3919 if (!error && uptodate) {
3920 trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev),
3921 raid_bi, 0);
3904 bio_endio(raid_bi, 0); 3922 bio_endio(raid_bi, 0);
3905 if (atomic_dec_and_test(&conf->active_aligned_reads)) 3923 if (atomic_dec_and_test(&conf->active_aligned_reads))
3906 wake_up(&conf->wait_for_stripe); 3924 wake_up(&conf->wait_for_stripe);
@@ -4005,6 +4023,9 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
4005 atomic_inc(&conf->active_aligned_reads); 4023 atomic_inc(&conf->active_aligned_reads);
4006 spin_unlock_irq(&conf->device_lock); 4024 spin_unlock_irq(&conf->device_lock);
4007 4025
4026 trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
4027 align_bi, disk_devt(mddev->gendisk),
4028 raid_bio->bi_sector);
4008 generic_make_request(align_bi); 4029 generic_make_request(align_bi);
4009 return 1; 4030 return 1;
4010 } else { 4031 } else {
@@ -4079,6 +4100,7 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
4079 struct stripe_head *sh; 4100 struct stripe_head *sh;
4080 struct mddev *mddev = cb->cb.data; 4101 struct mddev *mddev = cb->cb.data;
4081 struct r5conf *conf = mddev->private; 4102 struct r5conf *conf = mddev->private;
4103 int cnt = 0;
4082 4104
4083 if (cb->list.next && !list_empty(&cb->list)) { 4105 if (cb->list.next && !list_empty(&cb->list)) {
4084 spin_lock_irq(&conf->device_lock); 4106 spin_lock_irq(&conf->device_lock);
@@ -4093,9 +4115,11 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
4093 smp_mb__before_clear_bit(); 4115 smp_mb__before_clear_bit();
4094 clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state); 4116 clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state);
4095 __release_stripe(conf, sh); 4117 __release_stripe(conf, sh);
4118 cnt++;
4096 } 4119 }
4097 spin_unlock_irq(&conf->device_lock); 4120 spin_unlock_irq(&conf->device_lock);
4098 } 4121 }
4122 trace_block_unplug(mddev->queue, cnt, !from_schedule);
4099 kfree(cb); 4123 kfree(cb);
4100} 4124}
4101 4125
@@ -4353,6 +4377,8 @@ static void make_request(struct mddev *mddev, struct bio * bi)
4353 if ( rw == WRITE ) 4377 if ( rw == WRITE )
4354 md_write_end(mddev); 4378 md_write_end(mddev);
4355 4379
4380 trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
4381 bi, 0);
4356 bio_endio(bi, 0); 4382 bio_endio(bi, 0);
4357 } 4383 }
4358} 4384}
@@ -4729,8 +4755,11 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
4729 handled++; 4755 handled++;
4730 } 4756 }
4731 remaining = raid5_dec_bi_active_stripes(raid_bio); 4757 remaining = raid5_dec_bi_active_stripes(raid_bio);
4732 if (remaining == 0) 4758 if (remaining == 0) {
4759 trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev),
4760 raid_bio, 0);
4733 bio_endio(raid_bio, 0); 4761 bio_endio(raid_bio, 0);
4762 }
4734 if (atomic_dec_and_test(&conf->active_aligned_reads)) 4763 if (atomic_dec_and_test(&conf->active_aligned_reads))
4735 wake_up(&conf->wait_for_stripe); 4764 wake_up(&conf->wait_for_stripe);
4736 return handled; 4765 return handled;