aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-02-24 01:25:47 -0500
committerNeilBrown <neilb@suse.de>2011-02-24 01:25:47 -0500
commit93b270f76e7ef3b81001576860c2701931cdc78b (patch)
treeabaca0e4d3e86721815498fafd06295dd9cfd002
parentda9cf5050a2e3dbc3cf26a8d908482eb4485ed49 (diff)
Fix over-zealous flush_disk when changing device size.
There are two cases when we call flush_disk. In one, the device has disappeared (check_disk_change) so any data will hold becomes irrelevant. In the oter, the device has changed size (check_disk_size_change) so data we hold may be irrelevant. In both cases it makes sense to discard any 'clean' buffers, so they will be read back from the device if needed. In the former case it makes sense to discard 'dirty' buffers as there will never be anywhere safe to write the data. In the second case it *does*not* make sense to discard dirty buffers as that will lead to file system corruption when you simply enlarge the containing devices. flush_disk calls __invalidate_devices. __invalidate_device calls both invalidate_inodes and invalidate_bdev. invalidate_inodes *does* discard I_DIRTY inodes and this does lead to fs corruption. invalidate_bev *does*not* discard dirty pages, but I don't really care about that at present. So this patch adds a flag to __invalidate_device (calling it __invalidate_device2) to indicate whether dirty buffers should be killed, and this is passed to invalidate_inodes which can choose to skip dirty inodes. flusk_disk then passes true from check_disk_change and false from check_disk_size_change. dm avoids tripping over this problem by calling i_size_write directly rathher than using check_disk_size_change. md does use check_disk_size_change and so is affected. This regression was introduced by commit 608aeef17a which causes check_disk_size_change to call flush_disk, so it is suitable for any kernel since 2.6.27. Cc: stable@kernel.org Acked-by: Jeff Moyer <jmoyer@redhat.com> Cc: Andrew Patterson <andrew.patterson@hp.com> Cc: Jens Axboe <axboe@kernel.dk> Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--block/genhd.c2
-rw-r--r--drivers/block/floppy.c2
-rw-r--r--fs/block_dev.c12
-rw-r--r--fs/inode.c9
-rw-r--r--fs/internal.h2
-rw-r--r--include/linux/fs.h2
6 files changed, 18 insertions, 11 deletions
diff --git a/block/genhd.c b/block/genhd.c
index 6a5b772aa201..cbf1112a885c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1355,7 +1355,7 @@ int invalidate_partition(struct gendisk *disk, int partno)
1355 struct block_device *bdev = bdget_disk(disk, partno); 1355 struct block_device *bdev = bdget_disk(disk, partno);
1356 if (bdev) { 1356 if (bdev) {
1357 fsync_bdev(bdev); 1357 fsync_bdev(bdev);
1358 res = __invalidate_device(bdev); 1358 res = __invalidate_device(bdev, true);
1359 bdput(bdev); 1359 bdput(bdev);
1360 } 1360 }
1361 return res; 1361 return res;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index b9ba04fc2b34..77fc76f8aea9 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3281,7 +3281,7 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g,
3281 struct block_device *bdev = opened_bdev[cnt]; 3281 struct block_device *bdev = opened_bdev[cnt];
3282 if (!bdev || ITYPE(drive_state[cnt].fd_device) != type) 3282 if (!bdev || ITYPE(drive_state[cnt].fd_device) != type)
3283 continue; 3283 continue;
3284 __invalidate_device(bdev); 3284 __invalidate_device(bdev, true);
3285 } 3285 }
3286 mutex_unlock(&open_lock); 3286 mutex_unlock(&open_lock);
3287 } else { 3287 } else {
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 333a7bb4cb9c..5e23152d04ad 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -927,9 +927,9 @@ EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
927 * when a disk has been changed -- either by a media change or online 927 * when a disk has been changed -- either by a media change or online
928 * resize. 928 * resize.
929 */ 929 */
930static void flush_disk(struct block_device *bdev) 930static void flush_disk(struct block_device *bdev, bool kill_dirty)
931{ 931{
932 if (__invalidate_device(bdev)) { 932 if (__invalidate_device(bdev, kill_dirty)) {
933 char name[BDEVNAME_SIZE] = ""; 933 char name[BDEVNAME_SIZE] = "";
934 934
935 if (bdev->bd_disk) 935 if (bdev->bd_disk)
@@ -966,7 +966,7 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
966 "%s: detected capacity change from %lld to %lld\n", 966 "%s: detected capacity change from %lld to %lld\n",
967 name, bdev_size, disk_size); 967 name, bdev_size, disk_size);
968 i_size_write(bdev->bd_inode, disk_size); 968 i_size_write(bdev->bd_inode, disk_size);
969 flush_disk(bdev); 969 flush_disk(bdev, false);
970 } 970 }
971} 971}
972EXPORT_SYMBOL(check_disk_size_change); 972EXPORT_SYMBOL(check_disk_size_change);
@@ -1019,7 +1019,7 @@ int check_disk_change(struct block_device *bdev)
1019 if (!(events & DISK_EVENT_MEDIA_CHANGE)) 1019 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1020 return 0; 1020 return 0;
1021 1021
1022 flush_disk(bdev); 1022 flush_disk(bdev, true);
1023 if (bdops->revalidate_disk) 1023 if (bdops->revalidate_disk)
1024 bdops->revalidate_disk(bdev->bd_disk); 1024 bdops->revalidate_disk(bdev->bd_disk);
1025 return 1; 1025 return 1;
@@ -1601,7 +1601,7 @@ fail:
1601} 1601}
1602EXPORT_SYMBOL(lookup_bdev); 1602EXPORT_SYMBOL(lookup_bdev);
1603 1603
1604int __invalidate_device(struct block_device *bdev) 1604int __invalidate_device(struct block_device *bdev, bool kill_dirty)
1605{ 1605{
1606 struct super_block *sb = get_super(bdev); 1606 struct super_block *sb = get_super(bdev);
1607 int res = 0; 1607 int res = 0;
@@ -1614,7 +1614,7 @@ int __invalidate_device(struct block_device *bdev)
1614 * hold). 1614 * hold).
1615 */ 1615 */
1616 shrink_dcache_sb(sb); 1616 shrink_dcache_sb(sb);
1617 res = invalidate_inodes(sb); 1617 res = invalidate_inodes(sb, kill_dirty);
1618 drop_super(sb); 1618 drop_super(sb);
1619 } 1619 }
1620 invalidate_bdev(bdev); 1620 invalidate_bdev(bdev);
diff --git a/fs/inode.c b/fs/inode.c
index da85e56378f3..c50d7feb87b1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -540,11 +540,14 @@ void evict_inodes(struct super_block *sb)
540/** 540/**
541 * invalidate_inodes - attempt to free all inodes on a superblock 541 * invalidate_inodes - attempt to free all inodes on a superblock
542 * @sb: superblock to operate on 542 * @sb: superblock to operate on
543 * @kill_dirty: flag to guide handling of dirty inodes
543 * 544 *
544 * Attempts to free all inodes for a given superblock. If there were any 545 * Attempts to free all inodes for a given superblock. If there were any
545 * busy inodes return a non-zero value, else zero. 546 * busy inodes return a non-zero value, else zero.
547 * If @kill_dirty is set, discard dirty inodes too, otherwise treat
548 * them as busy.
546 */ 549 */
547int invalidate_inodes(struct super_block *sb) 550int invalidate_inodes(struct super_block *sb, bool kill_dirty)
548{ 551{
549 int busy = 0; 552 int busy = 0;
550 struct inode *inode, *next; 553 struct inode *inode, *next;
@@ -556,6 +559,10 @@ int invalidate_inodes(struct super_block *sb)
556 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 559 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
557 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 560 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
558 continue; 561 continue;
562 if (inode->i_state & I_DIRTY && !kill_dirty) {
563 busy = 1;
564 continue;
565 }
559 if (atomic_read(&inode->i_count)) { 566 if (atomic_read(&inode->i_count)) {
560 busy = 1; 567 busy = 1;
561 continue; 568 continue;
diff --git a/fs/internal.h b/fs/internal.h
index 0663568b1247..9b976b57d7fe 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -112,4 +112,4 @@ extern void release_open_intent(struct nameidata *);
112 */ 112 */
113extern int get_nr_dirty_inodes(void); 113extern int get_nr_dirty_inodes(void);
114extern void evict_inodes(struct super_block *); 114extern void evict_inodes(struct super_block *);
115extern int invalidate_inodes(struct super_block *); 115extern int invalidate_inodes(struct super_block *, bool);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 32b38cd829d3..683f4c566c82 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2139,7 +2139,7 @@ extern void check_disk_size_change(struct gendisk *disk,
2139 struct block_device *bdev); 2139 struct block_device *bdev);
2140extern int revalidate_disk(struct gendisk *); 2140extern int revalidate_disk(struct gendisk *);
2141extern int check_disk_change(struct block_device *); 2141extern int check_disk_change(struct block_device *);
2142extern int __invalidate_device(struct block_device *); 2142extern int __invalidate_device(struct block_device *, bool);
2143extern int invalidate_partition(struct gendisk *, int); 2143extern int invalidate_partition(struct gendisk *, int);
2144#endif 2144#endif
2145unsigned long invalidate_mapping_pages(struct address_space *mapping, 2145unsigned long invalidate_mapping_pages(struct address_space *mapping,