diff options
author | Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | 2008-04-29 08:11:12 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2008-04-29 08:11:12 -0400 |
commit | 267e4db9ac28a09973476e7ec2cb6807e609d35a (patch) | |
tree | 54eae44c7c0086800901866424ef1526e0ce863e | |
parent | 3977c965ec35ce1a7eac988ad313f0fc9aee9660 (diff) |
ext4: Fix race between migration and mmap write
Fail migrate if we allocated new blocks via mmap write.
If we write to holes in the file via mmap, we end up allocating
new blocks. This block allocation happens without taking inode->i_mutex.
Since migrate is protected by i_mutex and migrate expects that no
new blocks get allocated during migrate, fail migrate if new blocks
get allocated.
We can't take inode->i_mutex in the mmap write path because that
would result in a locking order violation between i_mutex and mmap_sem.
Also adding a separate rw_sempahore for protection is really high overhead
for a rare operation such as migrate.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r-- | fs/ext4/inode.c | 13 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 39 | ||||
-rw-r--r-- | include/linux/ext4_fs.h | 1 |
3 files changed, 47 insertions, 6 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8fab233cb05f..24a2604dde7b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -985,6 +985,16 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
985 | } else { | 985 | } else { |
986 | retval = ext4_get_blocks_handle(handle, inode, block, | 986 | retval = ext4_get_blocks_handle(handle, inode, block, |
987 | max_blocks, bh, create, extend_disksize); | 987 | max_blocks, bh, create, extend_disksize); |
988 | |||
989 | if (retval > 0 && buffer_new(bh)) { | ||
990 | /* | ||
991 | * We allocated new blocks which will result in | ||
992 | * i_data's format changing. Force the migrate | ||
993 | * to fail by clearing migrate flags | ||
994 | */ | ||
995 | EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags & | ||
996 | ~EXT4_EXT_MIGRATE; | ||
997 | } | ||
988 | } | 998 | } |
989 | up_write((&EXT4_I(inode)->i_data_sem)); | 999 | up_write((&EXT4_I(inode)->i_data_sem)); |
990 | return retval; | 1000 | return retval; |
@@ -2976,7 +2986,8 @@ static int ext4_do_update_inode(handle_t *handle, | |||
2976 | if (ext4_inode_blocks_set(handle, raw_inode, ei)) | 2986 | if (ext4_inode_blocks_set(handle, raw_inode, ei)) |
2977 | goto out_brelse; | 2987 | goto out_brelse; |
2978 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); | 2988 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); |
2979 | raw_inode->i_flags = cpu_to_le32(ei->i_flags); | 2989 | /* clear the migrate flag in the raw_inode */ |
2990 | raw_inode->i_flags = cpu_to_le32(ei->i_flags & ~EXT4_EXT_MIGRATE); | ||
2980 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | 2991 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != |
2981 | cpu_to_le32(EXT4_OS_HURD)) | 2992 | cpu_to_le32(EXT4_OS_HURD)) |
2982 | raw_inode->i_file_acl_high = | 2993 | raw_inode->i_file_acl_high = |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 5c1e27de7755..9b4fb07d192c 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -327,7 +327,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data) | |||
327 | } | 327 | } |
328 | 328 | ||
329 | static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, | 329 | static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, |
330 | struct inode *tmp_inode) | 330 | struct inode *tmp_inode) |
331 | { | 331 | { |
332 | int retval; | 332 | int retval; |
333 | __le32 i_data[3]; | 333 | __le32 i_data[3]; |
@@ -339,7 +339,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, | |||
339 | * i_data field of the original inode | 339 | * i_data field of the original inode |
340 | */ | 340 | */ |
341 | retval = ext4_journal_extend(handle, 1); | 341 | retval = ext4_journal_extend(handle, 1); |
342 | if (retval != 0) { | 342 | if (retval) { |
343 | retval = ext4_journal_restart(handle, 1); | 343 | retval = ext4_journal_restart(handle, 1); |
344 | if (retval) | 344 | if (retval) |
345 | goto err_out; | 345 | goto err_out; |
@@ -351,6 +351,18 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, | |||
351 | 351 | ||
352 | down_write(&EXT4_I(inode)->i_data_sem); | 352 | down_write(&EXT4_I(inode)->i_data_sem); |
353 | /* | 353 | /* |
354 | * if EXT4_EXT_MIGRATE is cleared a block allocation | ||
355 | * happened after we started the migrate. We need to | ||
356 | * fail the migrate | ||
357 | */ | ||
358 | if (!(EXT4_I(inode)->i_flags & EXT4_EXT_MIGRATE)) { | ||
359 | retval = -EAGAIN; | ||
360 | up_write(&EXT4_I(inode)->i_data_sem); | ||
361 | goto err_out; | ||
362 | } else | ||
363 | EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags & | ||
364 | ~EXT4_EXT_MIGRATE; | ||
365 | /* | ||
354 | * We have the extent map build with the tmp inode. | 366 | * We have the extent map build with the tmp inode. |
355 | * Now copy the i_data across | 367 | * Now copy the i_data across |
356 | */ | 368 | */ |
@@ -508,6 +520,17 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, | |||
508 | * switch the inode format to prevent read. | 520 | * switch the inode format to prevent read. |
509 | */ | 521 | */ |
510 | mutex_lock(&(inode->i_mutex)); | 522 | mutex_lock(&(inode->i_mutex)); |
523 | /* | ||
524 | * Even though we take i_mutex we can still cause block allocation | ||
525 | * via mmap write to holes. If we have allocated new blocks we fail | ||
526 | * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag. | ||
527 | * The flag is updated with i_data_sem held to prevent racing with | ||
528 | * block allocation. | ||
529 | */ | ||
530 | down_read((&EXT4_I(inode)->i_data_sem)); | ||
531 | EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags | EXT4_EXT_MIGRATE; | ||
532 | up_read((&EXT4_I(inode)->i_data_sem)); | ||
533 | |||
511 | handle = ext4_journal_start(inode, 1); | 534 | handle = ext4_journal_start(inode, 1); |
512 | 535 | ||
513 | ei = EXT4_I(inode); | 536 | ei = EXT4_I(inode); |
@@ -559,9 +582,15 @@ err_out: | |||
559 | * tmp_inode | 582 | * tmp_inode |
560 | */ | 583 | */ |
561 | free_ext_block(handle, tmp_inode); | 584 | free_ext_block(handle, tmp_inode); |
562 | else | 585 | else { |
563 | retval = ext4_ext_swap_inode_data(handle, inode, | 586 | retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode); |
564 | tmp_inode); | 587 | if (retval) |
588 | /* | ||
589 | * if we fail to swap inode data free the extent | ||
590 | * details of the tmp inode | ||
591 | */ | ||
592 | free_ext_block(handle, tmp_inode); | ||
593 | } | ||
565 | 594 | ||
566 | /* We mark the tmp_inode dirty via ext4_ext_tree_init. */ | 595 | /* We mark the tmp_inode dirty via ext4_ext_tree_init. */ |
567 | if (ext4_journal_extend(handle, 1) != 0) | 596 | if (ext4_journal_extend(handle, 1) != 0) |
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 250032548597..105337ca9ed0 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h | |||
@@ -231,6 +231,7 @@ struct ext4_group_desc | |||
231 | #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ | 231 | #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ |
232 | #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ | 232 | #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ |
233 | #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ | 233 | #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ |
234 | #define EXT4_EXT_MIGRATE 0x00100000 /* Inode is migrating */ | ||
234 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ | 235 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ |
235 | 236 | ||
236 | #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ | 237 | #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ |