diff options
author | NeilBrown <neilb@cse.unsw.edu.au> | 2005-06-21 20:17:28 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-21 22:07:47 -0400 |
commit | 7bfa19f2748000d646dbdf8f48258cfe1d257b52 (patch) | |
tree | 2f7e6b0a0cba4ac01d7809224023a7dc73b94840 | |
parent | a654b9d8f851f4ca02649d5825cbe6c608adb10c (diff) |
[PATCH] md: allow md to update multiple superblocks in parallel.
currently, md updates all superblocks (one on each device) in series. It
waits for one write to complete before starting the next. This isn't a big
problem as superblock updates don't happen that often.
However it is neater to do it in parallel, and if the drives in the array have
gone to "sleep" after a period of idleness, then waking them is parallel is
faster (and someone else should be worrying about power drain).
Futher, we will need parallel superblock updates for a future patch which
keeps the intent-logging bitmap near the superblock.
Also remove the silly code that retired superblock updates 100 times. This
simply never made sense.
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | drivers/md/md.c | 85 | ||||
-rw-r--r-- | include/linux/raid/md_k.h | 1 |
2 files changed, 49 insertions, 37 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index fde8acfac320..ef3ad99562c1 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -328,6 +328,40 @@ static void free_disk_sb(mdk_rdev_t * rdev) | |||
328 | } | 328 | } |
329 | 329 | ||
330 | 330 | ||
331 | static int super_written(struct bio *bio, unsigned int bytes_done, int error) | ||
332 | { | ||
333 | mdk_rdev_t *rdev = bio->bi_private; | ||
334 | if (bio->bi_size) | ||
335 | return 1; | ||
336 | |||
337 | if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
338 | md_error(rdev->mddev, rdev); | ||
339 | |||
340 | if (atomic_dec_and_test(&rdev->mddev->pending_writes)) | ||
341 | wake_up(&rdev->mddev->sb_wait); | ||
342 | return 0; | ||
343 | } | ||
344 | |||
345 | void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, | ||
346 | sector_t sector, int size, struct page *page) | ||
347 | { | ||
348 | /* write first size bytes of page to sector of rdev | ||
349 | * Increment mddev->pending_writes before returning | ||
350 | * and decrement it on completion, waking up sb_wait | ||
351 | * if zero is reached. | ||
352 | * If an error occurred, call md_error | ||
353 | */ | ||
354 | struct bio *bio = bio_alloc(GFP_NOIO, 1); | ||
355 | |||
356 | bio->bi_bdev = rdev->bdev; | ||
357 | bio->bi_sector = sector; | ||
358 | bio_add_page(bio, page, size, 0); | ||
359 | bio->bi_private = rdev; | ||
360 | bio->bi_end_io = super_written; | ||
361 | atomic_inc(&mddev->pending_writes); | ||
362 | submit_bio((1<<BIO_RW)|(1<<BIO_RW_SYNC), bio); | ||
363 | } | ||
364 | |||
331 | static int bi_complete(struct bio *bio, unsigned int bytes_done, int error) | 365 | static int bi_complete(struct bio *bio, unsigned int bytes_done, int error) |
332 | { | 366 | { |
333 | if (bio->bi_size) | 367 | if (bio->bi_size) |
@@ -1268,30 +1302,6 @@ void md_print_devices(void) | |||
1268 | } | 1302 | } |
1269 | 1303 | ||
1270 | 1304 | ||
1271 | static int write_disk_sb(mdk_rdev_t * rdev) | ||
1272 | { | ||
1273 | char b[BDEVNAME_SIZE]; | ||
1274 | if (!rdev->sb_loaded) { | ||
1275 | MD_BUG(); | ||
1276 | return 1; | ||
1277 | } | ||
1278 | if (rdev->faulty) { | ||
1279 | MD_BUG(); | ||
1280 | return 1; | ||
1281 | } | ||
1282 | |||
1283 | dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", | ||
1284 | bdevname(rdev->bdev,b), | ||
1285 | (unsigned long long)rdev->sb_offset); | ||
1286 | |||
1287 | if (sync_page_io(rdev->bdev, rdev->sb_offset<<1, MD_SB_BYTES, rdev->sb_page, WRITE)) | ||
1288 | return 0; | ||
1289 | |||
1290 | printk("md: write_disk_sb failed for device %s\n", | ||
1291 | bdevname(rdev->bdev,b)); | ||
1292 | return 1; | ||
1293 | } | ||
1294 | |||
1295 | static void sync_sbs(mddev_t * mddev) | 1305 | static void sync_sbs(mddev_t * mddev) |
1296 | { | 1306 | { |
1297 | mdk_rdev_t *rdev; | 1307 | mdk_rdev_t *rdev; |
@@ -1306,7 +1316,7 @@ static void sync_sbs(mddev_t * mddev) | |||
1306 | 1316 | ||
1307 | static void md_update_sb(mddev_t * mddev) | 1317 | static void md_update_sb(mddev_t * mddev) |
1308 | { | 1318 | { |
1309 | int err, count = 100; | 1319 | int err; |
1310 | struct list_head *tmp; | 1320 | struct list_head *tmp; |
1311 | mdk_rdev_t *rdev; | 1321 | mdk_rdev_t *rdev; |
1312 | int sync_req; | 1322 | int sync_req; |
@@ -1326,6 +1336,7 @@ repeat: | |||
1326 | MD_BUG(); | 1336 | MD_BUG(); |
1327 | mddev->events --; | 1337 | mddev->events --; |
1328 | } | 1338 | } |
1339 | mddev->sb_dirty = 2; | ||
1329 | sync_sbs(mddev); | 1340 | sync_sbs(mddev); |
1330 | 1341 | ||
1331 | /* | 1342 | /* |
@@ -1353,24 +1364,24 @@ repeat: | |||
1353 | 1364 | ||
1354 | dprintk("%s ", bdevname(rdev->bdev,b)); | 1365 | dprintk("%s ", bdevname(rdev->bdev,b)); |
1355 | if (!rdev->faulty) { | 1366 | if (!rdev->faulty) { |
1356 | err += write_disk_sb(rdev); | 1367 | md_super_write(mddev,rdev, |
1368 | rdev->sb_offset<<1, MD_SB_BYTES, | ||
1369 | rdev->sb_page); | ||
1370 | dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", | ||
1371 | bdevname(rdev->bdev,b), | ||
1372 | (unsigned long long)rdev->sb_offset); | ||
1373 | |||
1357 | } else | 1374 | } else |
1358 | dprintk(")\n"); | 1375 | dprintk(")\n"); |
1359 | if (!err && mddev->level == LEVEL_MULTIPATH) | 1376 | if (mddev->level == LEVEL_MULTIPATH) |
1360 | /* only need to write one superblock... */ | 1377 | /* only need to write one superblock... */ |
1361 | break; | 1378 | break; |
1362 | } | 1379 | } |
1363 | if (err) { | 1380 | wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0); |
1364 | if (--count) { | 1381 | /* if there was a failure, sb_dirty was set to 1, and we re-write super */ |
1365 | printk(KERN_ERR "md: errors occurred during superblock" | 1382 | |
1366 | " update, repeating\n"); | ||
1367 | goto repeat; | ||
1368 | } | ||
1369 | printk(KERN_ERR \ | ||
1370 | "md: excessive errors occurred during superblock update, exiting\n"); | ||
1371 | } | ||
1372 | spin_lock(&mddev->write_lock); | 1383 | spin_lock(&mddev->write_lock); |
1373 | if (mddev->in_sync != sync_req) { | 1384 | if (mddev->in_sync != sync_req|| mddev->sb_dirty == 1) { |
1374 | /* have to write it out again */ | 1385 | /* have to write it out again */ |
1375 | spin_unlock(&mddev->write_lock); | 1386 | spin_unlock(&mddev->write_lock); |
1376 | goto repeat; | 1387 | goto repeat; |
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index a3725b57fb7d..8c14ba565a45 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h | |||
@@ -262,6 +262,7 @@ struct mddev_s | |||
262 | 262 | ||
263 | spinlock_t write_lock; | 263 | spinlock_t write_lock; |
264 | wait_queue_head_t sb_wait; /* for waiting on superblock updates */ | 264 | wait_queue_head_t sb_wait; /* for waiting on superblock updates */ |
265 | atomic_t pending_writes; /* number of active superblock writes */ | ||
265 | 266 | ||
266 | unsigned int safemode; /* if set, update "clean" superblock | 267 | unsigned int safemode; /* if set, update "clean" superblock |
267 | * when no writes pending. | 268 | * when no writes pending. |