aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2006-06-26 03:27:57 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-26 12:58:39 -0400
commit42543769142d2375f2b5f8fc9cac999f84bd4c4c (patch)
tree404606c37da872639111c5b641f07fff2ded1dff
parent07d84d109d8beedd68df9da2e4e9f25c8217e7fb (diff)
[PATCH] md: Don't write dirty/clean update to spares - leave them alone
- record the 'event' count on each individual device (they might sometimes be slightly different now) - add a new value for 'sb_dirty': '3' means that the super block only needs to be updated to record a clean<->dirty transition. - Prefer odd event numbers for dirty states and even numbers for clean states - Using all the above, don't update the superblock on a spare device if the update is just doing a clean-dirty transition. To accomodate this, a transition from dirty back to clean might now decrement the events counter if nothing else has changed. The net effect of this is that spare drives will not see any IO requests during normal running of the array, so they can go to sleep if that is what they want to do. Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--drivers/md/md.c65
-rw-r--r--include/linux/raid/md_k.h1
2 files changed, 58 insertions, 8 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 6f97817d28b9..34b6902cda46 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1558,15 +1558,30 @@ static void md_print_devices(void)
1558} 1558}
1559 1559
1560 1560
1561static void sync_sbs(mddev_t * mddev) 1561static void sync_sbs(mddev_t * mddev, int nospares)
1562{ 1562{
1563 /* Update each superblock (in-memory image), but
1564 * if we are allowed to, skip spares which already
1565 * have the right event counter, or have one earlier
1566 * (which would mean they aren't being marked as dirty
1567 * with the rest of the array)
1568 */
1563 mdk_rdev_t *rdev; 1569 mdk_rdev_t *rdev;
1564 struct list_head *tmp; 1570 struct list_head *tmp;
1565 1571
1566 ITERATE_RDEV(mddev,rdev,tmp) { 1572 ITERATE_RDEV(mddev,rdev,tmp) {
1567 super_types[mddev->major_version]. 1573 if (rdev->sb_events == mddev->events ||
1568 sync_super(mddev, rdev); 1574 (nospares &&
1569 rdev->sb_loaded = 1; 1575 rdev->raid_disk < 0 &&
1576 (rdev->sb_events&1)==0 &&
1577 rdev->sb_events+1 == mddev->events)) {
1578 /* Don't update this superblock */
1579 rdev->sb_loaded = 2;
1580 } else {
1581 super_types[mddev->major_version].
1582 sync_super(mddev, rdev);
1583 rdev->sb_loaded = 1;
1584 }
1570 } 1585 }
1571} 1586}
1572 1587
@@ -1576,12 +1591,42 @@ void md_update_sb(mddev_t * mddev)
1576 struct list_head *tmp; 1591 struct list_head *tmp;
1577 mdk_rdev_t *rdev; 1592 mdk_rdev_t *rdev;
1578 int sync_req; 1593 int sync_req;
1594 int nospares = 0;
1579 1595
1580repeat: 1596repeat:
1581 spin_lock_irq(&mddev->write_lock); 1597 spin_lock_irq(&mddev->write_lock);
1582 sync_req = mddev->in_sync; 1598 sync_req = mddev->in_sync;
1583 mddev->utime = get_seconds(); 1599 mddev->utime = get_seconds();
1584 mddev->events ++; 1600 if (mddev->sb_dirty == 3)
1601 /* just a clean<-> dirty transition, possibly leave spares alone,
1602 * though if events isn't the right even/odd, we will have to do
1603 * spares after all
1604 */
1605 nospares = 1;
1606
1607 /* If this is just a dirty<->clean transition, and the array is clean
1608 * and 'events' is odd, we can roll back to the previous clean state */
1609 if (mddev->sb_dirty == 3
1610 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
1611 && (mddev->events & 1))
1612 mddev->events--;
1613 else {
1614 /* otherwise we have to go forward and ... */
1615 mddev->events ++;
1616 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */
1617 /* .. if the array isn't clean, insist on an odd 'events' */
1618 if ((mddev->events&1)==0) {
1619 mddev->events++;
1620 nospares = 0;
1621 }
1622 } else {
1623 /* otherwise insist on an even 'events' (for clean states) */
1624 if ((mddev->events&1)) {
1625 mddev->events++;
1626 nospares = 0;
1627 }
1628 }
1629 }
1585 1630
1586 if (!mddev->events) { 1631 if (!mddev->events) {
1587 /* 1632 /*
@@ -1593,7 +1638,7 @@ repeat:
1593 mddev->events --; 1638 mddev->events --;
1594 } 1639 }
1595 mddev->sb_dirty = 2; 1640 mddev->sb_dirty = 2;
1596 sync_sbs(mddev); 1641 sync_sbs(mddev, nospares);
1597 1642
1598 /* 1643 /*
1599 * do not write anything to disk if using 1644 * do not write anything to disk if using
@@ -1615,6 +1660,8 @@ repeat:
1615 ITERATE_RDEV(mddev,rdev,tmp) { 1660 ITERATE_RDEV(mddev,rdev,tmp) {
1616 char b[BDEVNAME_SIZE]; 1661 char b[BDEVNAME_SIZE];
1617 dprintk(KERN_INFO "md: "); 1662 dprintk(KERN_INFO "md: ");
1663 if (rdev->sb_loaded != 1)
1664 continue; /* no noise on spare devices */
1618 if (test_bit(Faulty, &rdev->flags)) 1665 if (test_bit(Faulty, &rdev->flags))
1619 dprintk("(skipping faulty "); 1666 dprintk("(skipping faulty ");
1620 1667
@@ -1626,6 +1673,7 @@ repeat:
1626 dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", 1673 dprintk(KERN_INFO "(write) %s's sb offset: %llu\n",
1627 bdevname(rdev->bdev,b), 1674 bdevname(rdev->bdev,b),
1628 (unsigned long long)rdev->sb_offset); 1675 (unsigned long long)rdev->sb_offset);
1676 rdev->sb_events = mddev->events;
1629 1677
1630 } else 1678 } else
1631 dprintk(")\n"); 1679 dprintk(")\n");
@@ -1895,6 +1943,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
1895 rdev->desc_nr = -1; 1943 rdev->desc_nr = -1;
1896 rdev->flags = 0; 1944 rdev->flags = 0;
1897 rdev->data_offset = 0; 1945 rdev->data_offset = 0;
1946 rdev->sb_events = 0;
1898 atomic_set(&rdev->nr_pending, 0); 1947 atomic_set(&rdev->nr_pending, 0);
1899 atomic_set(&rdev->read_errors, 0); 1948 atomic_set(&rdev->read_errors, 0);
1900 atomic_set(&rdev->corrected_errors, 0); 1949 atomic_set(&rdev->corrected_errors, 0);
@@ -4708,7 +4757,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi)
4708 spin_lock_irq(&mddev->write_lock); 4757 spin_lock_irq(&mddev->write_lock);
4709 if (mddev->in_sync) { 4758 if (mddev->in_sync) {
4710 mddev->in_sync = 0; 4759 mddev->in_sync = 0;
4711 mddev->sb_dirty = 1; 4760 mddev->sb_dirty = 3;
4712 md_wakeup_thread(mddev->thread); 4761 md_wakeup_thread(mddev->thread);
4713 } 4762 }
4714 spin_unlock_irq(&mddev->write_lock); 4763 spin_unlock_irq(&mddev->write_lock);
@@ -5055,7 +5104,7 @@ void md_check_recovery(mddev_t *mddev)
5055 if (mddev->safemode && !atomic_read(&mddev->writes_pending) && 5104 if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
5056 !mddev->in_sync && mddev->recovery_cp == MaxSector) { 5105 !mddev->in_sync && mddev->recovery_cp == MaxSector) {
5057 mddev->in_sync = 1; 5106 mddev->in_sync = 1;
5058 mddev->sb_dirty = 1; 5107 mddev->sb_dirty = 3;
5059 } 5108 }
5060 if (mddev->safemode == 1) 5109 if (mddev->safemode == 1)
5061 mddev->safemode = 0; 5110 mddev->safemode = 0;
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 682574f3bd36..c1e0ac55bab5 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -58,6 +58,7 @@ struct mdk_rdev_s
58 58
59 struct page *sb_page; 59 struct page *sb_page;
60 int sb_loaded; 60 int sb_loaded;
61 __u64 sb_events;
61 sector_t data_offset; /* start of data in array */ 62 sector_t data_offset; /* start of data in array */
62 sector_t sb_offset; 63 sector_t sb_offset;
63 int sb_size; /* bytes in the superblock */ 64 int sb_size; /* bytes in the superblock */