diff options
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r-- | drivers/md/raid1.c | 76 |
1 files changed, 52 insertions, 24 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index ba643e4bfac9..28839a8193f2 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -360,13 +360,14 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | |||
360 | { | 360 | { |
361 | const unsigned long this_sector = r1_bio->sector; | 361 | const unsigned long this_sector = r1_bio->sector; |
362 | int new_disk = conf->last_used, disk = new_disk; | 362 | int new_disk = conf->last_used, disk = new_disk; |
363 | int wonly_disk = -1; | ||
363 | const int sectors = r1_bio->sectors; | 364 | const int sectors = r1_bio->sectors; |
364 | sector_t new_distance, current_distance; | 365 | sector_t new_distance, current_distance; |
365 | mdk_rdev_t *new_rdev, *rdev; | 366 | mdk_rdev_t *rdev; |
366 | 367 | ||
367 | rcu_read_lock(); | 368 | rcu_read_lock(); |
368 | /* | 369 | /* |
369 | * Check if it if we can balance. We can balance on the whole | 370 | * Check if we can balance. We can balance on the whole |
370 | * device if no resync is going on, or below the resync window. | 371 | * device if no resync is going on, or below the resync window. |
371 | * We take the first readable disk when above the resync window. | 372 | * We take the first readable disk when above the resync window. |
372 | */ | 373 | */ |
@@ -376,11 +377,16 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | |||
376 | /* Choose the first operation device, for consistancy */ | 377 | /* Choose the first operation device, for consistancy */ |
377 | new_disk = 0; | 378 | new_disk = 0; |
378 | 379 | ||
379 | while ((new_rdev=conf->mirrors[new_disk].rdev) == NULL || | 380 | for (rdev = conf->mirrors[new_disk].rdev; |
380 | !new_rdev->in_sync) { | 381 | !rdev || !rdev->in_sync |
381 | new_disk++; | 382 | || test_bit(WriteMostly, &rdev->flags); |
382 | if (new_disk == conf->raid_disks) { | 383 | rdev = conf->mirrors[++new_disk].rdev) { |
383 | new_disk = -1; | 384 | |
385 | if (rdev && rdev->in_sync) | ||
386 | wonly_disk = new_disk; | ||
387 | |||
388 | if (new_disk == conf->raid_disks - 1) { | ||
389 | new_disk = wonly_disk; | ||
384 | break; | 390 | break; |
385 | } | 391 | } |
386 | } | 392 | } |
@@ -389,16 +395,26 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | |||
389 | 395 | ||
390 | 396 | ||
391 | /* make sure the disk is operational */ | 397 | /* make sure the disk is operational */ |
392 | while ((new_rdev=conf->mirrors[new_disk].rdev) == NULL || | 398 | for (rdev = conf->mirrors[new_disk].rdev; |
393 | !new_rdev->in_sync) { | 399 | !rdev || !rdev->in_sync || |
400 | test_bit(WriteMostly, &rdev->flags); | ||
401 | rdev = conf->mirrors[new_disk].rdev) { | ||
402 | |||
403 | if (rdev && rdev->in_sync) | ||
404 | wonly_disk = new_disk; | ||
405 | |||
394 | if (new_disk <= 0) | 406 | if (new_disk <= 0) |
395 | new_disk = conf->raid_disks; | 407 | new_disk = conf->raid_disks; |
396 | new_disk--; | 408 | new_disk--; |
397 | if (new_disk == disk) { | 409 | if (new_disk == disk) { |
398 | new_disk = -1; | 410 | new_disk = wonly_disk; |
399 | goto rb_out; | 411 | break; |
400 | } | 412 | } |
401 | } | 413 | } |
414 | |||
415 | if (new_disk < 0) | ||
416 | goto rb_out; | ||
417 | |||
402 | disk = new_disk; | 418 | disk = new_disk; |
403 | /* now disk == new_disk == starting point for search */ | 419 | /* now disk == new_disk == starting point for search */ |
404 | 420 | ||
@@ -419,37 +435,41 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | |||
419 | disk = conf->raid_disks; | 435 | disk = conf->raid_disks; |
420 | disk--; | 436 | disk--; |
421 | 437 | ||
422 | if ((rdev=conf->mirrors[disk].rdev) == NULL || | 438 | rdev = conf->mirrors[disk].rdev; |
423 | !rdev->in_sync) | 439 | |
440 | if (!rdev || | ||
441 | !rdev->in_sync || | ||
442 | test_bit(WriteMostly, &rdev->flags)) | ||
424 | continue; | 443 | continue; |
425 | 444 | ||
426 | if (!atomic_read(&rdev->nr_pending)) { | 445 | if (!atomic_read(&rdev->nr_pending)) { |
427 | new_disk = disk; | 446 | new_disk = disk; |
428 | new_rdev = rdev; | ||
429 | break; | 447 | break; |
430 | } | 448 | } |
431 | new_distance = abs(this_sector - conf->mirrors[disk].head_position); | 449 | new_distance = abs(this_sector - conf->mirrors[disk].head_position); |
432 | if (new_distance < current_distance) { | 450 | if (new_distance < current_distance) { |
433 | current_distance = new_distance; | 451 | current_distance = new_distance; |
434 | new_disk = disk; | 452 | new_disk = disk; |
435 | new_rdev = rdev; | ||
436 | } | 453 | } |
437 | } while (disk != conf->last_used); | 454 | } while (disk != conf->last_used); |
438 | 455 | ||
439 | rb_out: | 456 | rb_out: |
440 | 457 | ||
441 | 458 | ||
442 | if (new_disk >= 0) { | 459 | if (new_disk >= 0) { |
443 | conf->next_seq_sect = this_sector + sectors; | 460 | rdev = conf->mirrors[new_disk].rdev; |
444 | conf->last_used = new_disk; | 461 | if (!rdev) |
445 | atomic_inc(&new_rdev->nr_pending); | 462 | goto retry; |
446 | if (!new_rdev->in_sync) { | 463 | atomic_inc(&rdev->nr_pending); |
464 | if (!rdev->in_sync) { | ||
447 | /* cannot risk returning a device that failed | 465 | /* cannot risk returning a device that failed |
448 | * before we inc'ed nr_pending | 466 | * before we inc'ed nr_pending |
449 | */ | 467 | */ |
450 | atomic_dec(&new_rdev->nr_pending); | 468 | atomic_dec(&rdev->nr_pending); |
451 | goto retry; | 469 | goto retry; |
452 | } | 470 | } |
471 | conf->next_seq_sect = this_sector + sectors; | ||
472 | conf->last_used = new_disk; | ||
453 | } | 473 | } |
454 | rcu_read_unlock(); | 474 | rcu_read_unlock(); |
455 | 475 | ||
@@ -1109,6 +1129,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1109 | sector_t max_sector, nr_sectors; | 1129 | sector_t max_sector, nr_sectors; |
1110 | int disk; | 1130 | int disk; |
1111 | int i; | 1131 | int i; |
1132 | int wonly; | ||
1112 | int write_targets = 0; | 1133 | int write_targets = 0; |
1113 | int sync_blocks; | 1134 | int sync_blocks; |
1114 | int still_degraded = 0; | 1135 | int still_degraded = 0; |
@@ -1164,14 +1185,21 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1164 | */ | 1185 | */ |
1165 | disk = conf->last_used; | 1186 | disk = conf->last_used; |
1166 | /* make sure disk is operational */ | 1187 | /* make sure disk is operational */ |
1167 | 1188 | wonly = disk; | |
1168 | while (conf->mirrors[disk].rdev == NULL || | 1189 | while (conf->mirrors[disk].rdev == NULL || |
1169 | !conf->mirrors[disk].rdev->in_sync) { | 1190 | !conf->mirrors[disk].rdev->in_sync || |
1191 | test_bit(WriteMostly, &conf->mirrors[disk].rdev->flags) | ||
1192 | ) { | ||
1193 | if (conf->mirrors[disk].rdev && | ||
1194 | conf->mirrors[disk].rdev->in_sync) | ||
1195 | wonly = disk; | ||
1170 | if (disk <= 0) | 1196 | if (disk <= 0) |
1171 | disk = conf->raid_disks; | 1197 | disk = conf->raid_disks; |
1172 | disk--; | 1198 | disk--; |
1173 | if (disk == conf->last_used) | 1199 | if (disk == conf->last_used) { |
1200 | disk = wonly; | ||
1174 | break; | 1201 | break; |
1202 | } | ||
1175 | } | 1203 | } |
1176 | conf->last_used = disk; | 1204 | conf->last_used = disk; |
1177 | atomic_inc(&conf->mirrors[disk].rdev->nr_pending); | 1205 | atomic_inc(&conf->mirrors[disk].rdev->nr_pending); |