aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@cse.unsw.edu.au>2005-06-21 20:17:12 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-21 22:07:43 -0400
commit06d91a5fe0b50c9060e70bdf7786f8a3c66249db (patch)
tree95a8b9228534cebb12eb31c1cc9cc0c45f685410
parentfca4d848f0e6fafdc2b25f8a0cf1e76935f13ac2 (diff)
[PATCH] md: improve locking on 'safemode' and move superblock writes
When md marks the superblock dirty before a write, it calls generic_make_request (to write the superblock) from within generic_make_request (to write the first dirty block), which could cause problems later. With this patch, the superblock write is always done by the helper thread, and write request are delayed until that write completes. Also, the locking around marking the array dirty and writing the superblock is improved to avoid possible races. Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--drivers/md/md.c73
-rw-r--r--drivers/md/raid1.c4
-rw-r--r--drivers/md/raid10.c5
-rw-r--r--drivers/md/raid5.c6
-rw-r--r--drivers/md/raid6main.c6
-rw-r--r--include/linux/raid/md.h2
-rw-r--r--include/linux/raid/md_k.h7
7 files changed, 82 insertions, 21 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index c842e34d850e..177d2a7d7cea 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -218,6 +218,8 @@ static mddev_t * mddev_find(dev_t unit)
218 INIT_LIST_HEAD(&new->all_mddevs); 218 INIT_LIST_HEAD(&new->all_mddevs);
219 init_timer(&new->safemode_timer); 219 init_timer(&new->safemode_timer);
220 atomic_set(&new->active, 1); 220 atomic_set(&new->active, 1);
221 bio_list_init(&new->write_list);
222 spin_lock_init(&new->write_lock);
221 223
222 new->queue = blk_alloc_queue(GFP_KERNEL); 224 new->queue = blk_alloc_queue(GFP_KERNEL);
223 if (!new->queue) { 225 if (!new->queue) {
@@ -1251,9 +1253,11 @@ static void md_update_sb(mddev_t * mddev)
1251 int err, count = 100; 1253 int err, count = 100;
1252 struct list_head *tmp; 1254 struct list_head *tmp;
1253 mdk_rdev_t *rdev; 1255 mdk_rdev_t *rdev;
1256 int sync_req;
1254 1257
1255 mddev->sb_dirty = 0;
1256repeat: 1258repeat:
1259 spin_lock(&mddev->write_lock);
1260 sync_req = mddev->in_sync;
1257 mddev->utime = get_seconds(); 1261 mddev->utime = get_seconds();
1258 mddev->events ++; 1262 mddev->events ++;
1259 1263
@@ -1272,8 +1276,12 @@ repeat:
1272 * do not write anything to disk if using 1276 * do not write anything to disk if using
1273 * nonpersistent superblocks 1277 * nonpersistent superblocks
1274 */ 1278 */
1275 if (!mddev->persistent) 1279 if (!mddev->persistent) {
1280 mddev->sb_dirty = 0;
1281 spin_unlock(&mddev->write_lock);
1276 return; 1282 return;
1283 }
1284 spin_unlock(&mddev->write_lock);
1277 1285
1278 dprintk(KERN_INFO 1286 dprintk(KERN_INFO
1279 "md: updating %s RAID superblock on device (in sync %d)\n", 1287 "md: updating %s RAID superblock on device (in sync %d)\n",
@@ -1304,6 +1312,15 @@ repeat:
1304 printk(KERN_ERR \ 1312 printk(KERN_ERR \
1305 "md: excessive errors occurred during superblock update, exiting\n"); 1313 "md: excessive errors occurred during superblock update, exiting\n");
1306 } 1314 }
1315 spin_lock(&mddev->write_lock);
1316 if (mddev->in_sync != sync_req) {
1317 /* have to write it out again */
1318 spin_unlock(&mddev->write_lock);
1319 goto repeat;
1320 }
1321 mddev->sb_dirty = 0;
1322 spin_unlock(&mddev->write_lock);
1323
1307} 1324}
1308 1325
1309/* 1326/*
@@ -3178,19 +3195,31 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok)
3178} 3195}
3179 3196
3180 3197
3181void md_write_start(mddev_t *mddev) 3198/* md_write_start(mddev, bi)
3199 * If we need to update some array metadata (e.g. 'active' flag
3200 * in superblock) before writing, queue bi for later writing
3201 * and return 0, else return 1 and it will be written now
3202 */
3203int md_write_start(mddev_t *mddev, struct bio *bi)
3182{ 3204{
3183 if (!atomic_read(&mddev->writes_pending)) { 3205 if (bio_data_dir(bi) != WRITE)
3184 mddev_lock_uninterruptible(mddev); 3206 return 1;
3185 if (mddev->in_sync) { 3207
3186 mddev->in_sync = 0; 3208 atomic_inc(&mddev->writes_pending);
3187 del_timer(&mddev->safemode_timer); 3209 spin_lock(&mddev->write_lock);
3188 md_update_sb(mddev); 3210 if (mddev->in_sync == 0 && mddev->sb_dirty == 0) {
3189 } 3211 spin_unlock(&mddev->write_lock);
3190 atomic_inc(&mddev->writes_pending); 3212 return 1;
3191 mddev_unlock(mddev); 3213 }
3192 } else 3214 bio_list_add(&mddev->write_list, bi);
3193 atomic_inc(&mddev->writes_pending); 3215
3216 if (mddev->in_sync) {
3217 mddev->in_sync = 0;
3218 mddev->sb_dirty = 1;
3219 }
3220 spin_unlock(&mddev->write_lock);
3221 md_wakeup_thread(mddev->thread);
3222 return 0;
3194} 3223}
3195 3224
3196void md_write_end(mddev_t *mddev) 3225void md_write_end(mddev_t *mddev)
@@ -3472,6 +3501,7 @@ void md_check_recovery(mddev_t *mddev)
3472 mddev->sb_dirty || 3501 mddev->sb_dirty ||
3473 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || 3502 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
3474 test_bit(MD_RECOVERY_DONE, &mddev->recovery) || 3503 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
3504 mddev->write_list.head ||
3475 (mddev->safemode == 1) || 3505 (mddev->safemode == 1) ||
3476 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending) 3506 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
3477 && !mddev->in_sync && mddev->recovery_cp == MaxSector) 3507 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
@@ -3480,7 +3510,9 @@ void md_check_recovery(mddev_t *mddev)
3480 3510
3481 if (mddev_trylock(mddev)==0) { 3511 if (mddev_trylock(mddev)==0) {
3482 int spares =0; 3512 int spares =0;
3513 struct bio *blist;
3483 3514
3515 spin_lock(&mddev->write_lock);
3484 if (mddev->safemode && !atomic_read(&mddev->writes_pending) && 3516 if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
3485 !mddev->in_sync && mddev->recovery_cp == MaxSector) { 3517 !mddev->in_sync && mddev->recovery_cp == MaxSector) {
3486 mddev->in_sync = 1; 3518 mddev->in_sync = 1;
@@ -3488,9 +3520,22 @@ void md_check_recovery(mddev_t *mddev)
3488 } 3520 }
3489 if (mddev->safemode == 1) 3521 if (mddev->safemode == 1)
3490 mddev->safemode = 0; 3522 mddev->safemode = 0;
3523 blist = bio_list_get(&mddev->write_list);
3524 spin_unlock(&mddev->write_lock);
3491 3525
3492 if (mddev->sb_dirty) 3526 if (mddev->sb_dirty)
3493 md_update_sb(mddev); 3527 md_update_sb(mddev);
3528
3529 while (blist) {
3530 struct bio *b = blist;
3531 blist = blist->bi_next;
3532 b->bi_next = NULL;
3533 generic_make_request(b);
3534 /* we already counted this, so need to un-count */
3535 md_write_end(mddev);
3536 }
3537
3538
3494 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && 3539 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
3495 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) { 3540 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
3496 /* resync/recovery still happening */ 3541 /* resync/recovery still happening */
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index b34ad56362df..3f1280bbaf39 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -530,6 +530,8 @@ static int make_request(request_queue_t *q, struct bio * bio)
530 * thread has put up a bar for new requests. 530 * thread has put up a bar for new requests.
531 * Continue immediately if no resync is active currently. 531 * Continue immediately if no resync is active currently.
532 */ 532 */
533 if (md_write_start(mddev, bio)==0)
534 return 0;
533 spin_lock_irq(&conf->resync_lock); 535 spin_lock_irq(&conf->resync_lock);
534 wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, ); 536 wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, );
535 conf->nr_pending++; 537 conf->nr_pending++;
@@ -611,7 +613,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
611 rcu_read_unlock(); 613 rcu_read_unlock();
612 614
613 atomic_set(&r1_bio->remaining, 1); 615 atomic_set(&r1_bio->remaining, 1);
614 md_write_start(mddev); 616
615 for (i = 0; i < disks; i++) { 617 for (i = 0; i < disks; i++) {
616 struct bio *mbio; 618 struct bio *mbio;
617 if (!r1_bio->bios[i]) 619 if (!r1_bio->bios[i])
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 9ae21504db8a..bfc9f52f0ecf 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -700,6 +700,9 @@ static int make_request(request_queue_t *q, struct bio * bio)
700 return 0; 700 return 0;
701 } 701 }
702 702
703 if (md_write_start(mddev, bio) == 0)
704 return 0;
705
703 /* 706 /*
704 * Register the new request and wait if the reconstruction 707 * Register the new request and wait if the reconstruction
705 * thread has put up a bar for new requests. 708 * thread has put up a bar for new requests.
@@ -774,7 +777,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
774 rcu_read_unlock(); 777 rcu_read_unlock();
775 778
776 atomic_set(&r10_bio->remaining, 1); 779 atomic_set(&r10_bio->remaining, 1);
777 md_write_start(mddev); 780
778 for (i = 0; i < conf->copies; i++) { 781 for (i = 0; i < conf->copies; i++) {
779 struct bio *mbio; 782 struct bio *mbio;
780 int d = r10_bio->devs[i].devnum; 783 int d = r10_bio->devs[i].devnum;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 63b1c59d36ff..677ce49078da 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1411,6 +1411,9 @@ static int make_request (request_queue_t *q, struct bio * bi)
1411 sector_t logical_sector, last_sector; 1411 sector_t logical_sector, last_sector;
1412 struct stripe_head *sh; 1412 struct stripe_head *sh;
1413 1413
1414 if (md_write_start(mddev, bi)==0)
1415 return 0;
1416
1414 if (bio_data_dir(bi)==WRITE) { 1417 if (bio_data_dir(bi)==WRITE) {
1415 disk_stat_inc(mddev->gendisk, writes); 1418 disk_stat_inc(mddev->gendisk, writes);
1416 disk_stat_add(mddev->gendisk, write_sectors, bio_sectors(bi)); 1419 disk_stat_add(mddev->gendisk, write_sectors, bio_sectors(bi));
@@ -1423,8 +1426,7 @@ static int make_request (request_queue_t *q, struct bio * bi)
1423 last_sector = bi->bi_sector + (bi->bi_size>>9); 1426 last_sector = bi->bi_sector + (bi->bi_size>>9);
1424 bi->bi_next = NULL; 1427 bi->bi_next = NULL;
1425 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ 1428 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
1426 if ( bio_data_dir(bi) == WRITE ) 1429
1427 md_write_start(mddev);
1428 for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { 1430 for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
1429 DEFINE_WAIT(w); 1431 DEFINE_WAIT(w);
1430 1432
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c
index 9d0e0e42a3be..fede16c4e8f3 100644
--- a/drivers/md/raid6main.c
+++ b/drivers/md/raid6main.c
@@ -1570,6 +1570,9 @@ static int make_request (request_queue_t *q, struct bio * bi)
1570 sector_t logical_sector, last_sector; 1570 sector_t logical_sector, last_sector;
1571 struct stripe_head *sh; 1571 struct stripe_head *sh;
1572 1572
1573 if (md_write_start(mddev, bi)==0)
1574 return 0;
1575
1573 if (bio_data_dir(bi)==WRITE) { 1576 if (bio_data_dir(bi)==WRITE) {
1574 disk_stat_inc(mddev->gendisk, writes); 1577 disk_stat_inc(mddev->gendisk, writes);
1575 disk_stat_add(mddev->gendisk, write_sectors, bio_sectors(bi)); 1578 disk_stat_add(mddev->gendisk, write_sectors, bio_sectors(bi));
@@ -1583,8 +1586,7 @@ static int make_request (request_queue_t *q, struct bio * bi)
1583 1586
1584 bi->bi_next = NULL; 1587 bi->bi_next = NULL;
1585 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ 1588 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
1586 if ( bio_data_dir(bi) == WRITE ) 1589
1587 md_write_start(mddev);
1588 for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { 1590 for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
1589 DEFINE_WAIT(w); 1591 DEFINE_WAIT(w);
1590 1592
diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h
index a6a67d102bfa..cfde8f497d6d 100644
--- a/include/linux/raid/md.h
+++ b/include/linux/raid/md.h
@@ -69,7 +69,7 @@ extern mdk_thread_t * md_register_thread (void (*run) (mddev_t *mddev),
69extern void md_unregister_thread (mdk_thread_t *thread); 69extern void md_unregister_thread (mdk_thread_t *thread);
70extern void md_wakeup_thread(mdk_thread_t *thread); 70extern void md_wakeup_thread(mdk_thread_t *thread);
71extern void md_check_recovery(mddev_t *mddev); 71extern void md_check_recovery(mddev_t *mddev);
72extern void md_write_start(mddev_t *mddev); 72extern int md_write_start(mddev_t *mddev, struct bio *bi);
73extern void md_write_end(mddev_t *mddev); 73extern void md_write_end(mddev_t *mddev);
74extern void md_handle_safemode(mddev_t *mddev); 74extern void md_handle_safemode(mddev_t *mddev);
75extern void md_done_sync(mddev_t *mddev, int blocks, int ok); 75extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index c9a0d4013be7..d92db54255a3 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -15,6 +15,9 @@
15#ifndef _MD_K_H 15#ifndef _MD_K_H
16#define _MD_K_H 16#define _MD_K_H
17 17
18/* and dm-bio-list.h is not under include/linux because.... ??? */
19#include "../../../drivers/md/dm-bio-list.h"
20
18#define MD_RESERVED 0UL 21#define MD_RESERVED 0UL
19#define LINEAR 1UL 22#define LINEAR 1UL
20#define RAID0 2UL 23#define RAID0 2UL
@@ -252,6 +255,10 @@ struct mddev_s
252 atomic_t recovery_active; /* blocks scheduled, but not written */ 255 atomic_t recovery_active; /* blocks scheduled, but not written */
253 wait_queue_head_t recovery_wait; 256 wait_queue_head_t recovery_wait;
254 sector_t recovery_cp; 257 sector_t recovery_cp;
258
259 spinlock_t write_lock;
260 struct bio_list write_list;
261
255 unsigned int safemode; /* if set, update "clean" superblock 262 unsigned int safemode; /* if set, update "clean" superblock
256 * when no writes pending. 263 * when no writes pending.
257 */ 264 */