aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@cse.unsw.edu.au>2005-06-21 20:17:28 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-21 22:07:47 -0400
commit7bfa19f2748000d646dbdf8f48258cfe1d257b52 (patch)
tree2f7e6b0a0cba4ac01d7809224023a7dc73b94840
parenta654b9d8f851f4ca02649d5825cbe6c608adb10c (diff)
[PATCH] md: allow md to update multiple superblocks in parallel.
currently, md updates all superblocks (one on each device) in series. It waits for one write to complete before starting the next. This isn't a big problem as superblock updates don't happen that often. However it is neater to do it in parallel, and if the drives in the array have gone to "sleep" after a period of idleness, then waking them is parallel is faster (and someone else should be worrying about power drain). Futher, we will need parallel superblock updates for a future patch which keeps the intent-logging bitmap near the superblock. Also remove the silly code that retired superblock updates 100 times. This simply never made sense. Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--drivers/md/md.c85
-rw-r--r--include/linux/raid/md_k.h1
2 files changed, 49 insertions, 37 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index fde8acfac320..ef3ad99562c1 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -328,6 +328,40 @@ static void free_disk_sb(mdk_rdev_t * rdev)
328} 328}
329 329
330 330
331static int super_written(struct bio *bio, unsigned int bytes_done, int error)
332{
333 mdk_rdev_t *rdev = bio->bi_private;
334 if (bio->bi_size)
335 return 1;
336
337 if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags))
338 md_error(rdev->mddev, rdev);
339
340 if (atomic_dec_and_test(&rdev->mddev->pending_writes))
341 wake_up(&rdev->mddev->sb_wait);
342 return 0;
343}
344
345void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
346 sector_t sector, int size, struct page *page)
347{
348 /* write first size bytes of page to sector of rdev
349 * Increment mddev->pending_writes before returning
350 * and decrement it on completion, waking up sb_wait
351 * if zero is reached.
352 * If an error occurred, call md_error
353 */
354 struct bio *bio = bio_alloc(GFP_NOIO, 1);
355
356 bio->bi_bdev = rdev->bdev;
357 bio->bi_sector = sector;
358 bio_add_page(bio, page, size, 0);
359 bio->bi_private = rdev;
360 bio->bi_end_io = super_written;
361 atomic_inc(&mddev->pending_writes);
362 submit_bio((1<<BIO_RW)|(1<<BIO_RW_SYNC), bio);
363}
364
331static int bi_complete(struct bio *bio, unsigned int bytes_done, int error) 365static int bi_complete(struct bio *bio, unsigned int bytes_done, int error)
332{ 366{
333 if (bio->bi_size) 367 if (bio->bi_size)
@@ -1268,30 +1302,6 @@ void md_print_devices(void)
1268} 1302}
1269 1303
1270 1304
1271static int write_disk_sb(mdk_rdev_t * rdev)
1272{
1273 char b[BDEVNAME_SIZE];
1274 if (!rdev->sb_loaded) {
1275 MD_BUG();
1276 return 1;
1277 }
1278 if (rdev->faulty) {
1279 MD_BUG();
1280 return 1;
1281 }
1282
1283 dprintk(KERN_INFO "(write) %s's sb offset: %llu\n",
1284 bdevname(rdev->bdev,b),
1285 (unsigned long long)rdev->sb_offset);
1286
1287 if (sync_page_io(rdev->bdev, rdev->sb_offset<<1, MD_SB_BYTES, rdev->sb_page, WRITE))
1288 return 0;
1289
1290 printk("md: write_disk_sb failed for device %s\n",
1291 bdevname(rdev->bdev,b));
1292 return 1;
1293}
1294
1295static void sync_sbs(mddev_t * mddev) 1305static void sync_sbs(mddev_t * mddev)
1296{ 1306{
1297 mdk_rdev_t *rdev; 1307 mdk_rdev_t *rdev;
@@ -1306,7 +1316,7 @@ static void sync_sbs(mddev_t * mddev)
1306 1316
1307static void md_update_sb(mddev_t * mddev) 1317static void md_update_sb(mddev_t * mddev)
1308{ 1318{
1309 int err, count = 100; 1319 int err;
1310 struct list_head *tmp; 1320 struct list_head *tmp;
1311 mdk_rdev_t *rdev; 1321 mdk_rdev_t *rdev;
1312 int sync_req; 1322 int sync_req;
@@ -1326,6 +1336,7 @@ repeat:
1326 MD_BUG(); 1336 MD_BUG();
1327 mddev->events --; 1337 mddev->events --;
1328 } 1338 }
1339 mddev->sb_dirty = 2;
1329 sync_sbs(mddev); 1340 sync_sbs(mddev);
1330 1341
1331 /* 1342 /*
@@ -1353,24 +1364,24 @@ repeat:
1353 1364
1354 dprintk("%s ", bdevname(rdev->bdev,b)); 1365 dprintk("%s ", bdevname(rdev->bdev,b));
1355 if (!rdev->faulty) { 1366 if (!rdev->faulty) {
1356 err += write_disk_sb(rdev); 1367 md_super_write(mddev,rdev,
1368 rdev->sb_offset<<1, MD_SB_BYTES,
1369 rdev->sb_page);
1370 dprintk(KERN_INFO "(write) %s's sb offset: %llu\n",
1371 bdevname(rdev->bdev,b),
1372 (unsigned long long)rdev->sb_offset);
1373
1357 } else 1374 } else
1358 dprintk(")\n"); 1375 dprintk(")\n");
1359 if (!err && mddev->level == LEVEL_MULTIPATH) 1376 if (mddev->level == LEVEL_MULTIPATH)
1360 /* only need to write one superblock... */ 1377 /* only need to write one superblock... */
1361 break; 1378 break;
1362 } 1379 }
1363 if (err) { 1380 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
1364 if (--count) { 1381 /* if there was a failure, sb_dirty was set to 1, and we re-write super */
1365 printk(KERN_ERR "md: errors occurred during superblock" 1382
1366 " update, repeating\n");
1367 goto repeat;
1368 }
1369 printk(KERN_ERR \
1370 "md: excessive errors occurred during superblock update, exiting\n");
1371 }
1372 spin_lock(&mddev->write_lock); 1383 spin_lock(&mddev->write_lock);
1373 if (mddev->in_sync != sync_req) { 1384 if (mddev->in_sync != sync_req|| mddev->sb_dirty == 1) {
1374 /* have to write it out again */ 1385 /* have to write it out again */
1375 spin_unlock(&mddev->write_lock); 1386 spin_unlock(&mddev->write_lock);
1376 goto repeat; 1387 goto repeat;
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index a3725b57fb7d..8c14ba565a45 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -262,6 +262,7 @@ struct mddev_s
262 262
263 spinlock_t write_lock; 263 spinlock_t write_lock;
264 wait_queue_head_t sb_wait; /* for waiting on superblock updates */ 264 wait_queue_head_t sb_wait; /* for waiting on superblock updates */
265 atomic_t pending_writes; /* number of active superblock writes */
265 266
266 unsigned int safemode; /* if set, update "clean" superblock 267 unsigned int safemode; /* if set, update "clean" superblock
267 * when no writes pending. 268 * when no writes pending.