diff options
-rw-r--r-- | fs/ext4/balloc.c | 2 | ||||
-rw-r--r-- | fs/ext4/extents.c | 13 | ||||
-rw-r--r-- | fs/ext4/file.c | 4 | ||||
-rw-r--r-- | fs/ext4/inode.c | 40 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 4 | ||||
-rw-r--r-- | fs/ext4/super.c | 2 | ||||
-rw-r--r-- | include/linux/ext4_fs.h | 25 | ||||
-rw-r--r-- | include/linux/ext4_fs_i.h | 6 |
8 files changed, 53 insertions, 43 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index d460223b8e1d..7ae223ed152f 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -526,7 +526,7 @@ static inline int rsv_is_empty(struct ext4_reserve_window *rsv) | |||
526 | * when setting the reservation window size through ioctl before the file | 526 | * when setting the reservation window size through ioctl before the file |
527 | * is open for write (needs block allocation). | 527 | * is open for write (needs block allocation). |
528 | * | 528 | * |
529 | * Needs truncate_mutex protection prior to call this function. | 529 | * Needs down_write(i_data_sem) protection prior to call this function. |
530 | */ | 530 | */ |
531 | void ext4_init_block_alloc_info(struct inode *inode) | 531 | void ext4_init_block_alloc_info(struct inode *inode) |
532 | { | 532 | { |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ec5019fa552f..03d1bbb78a2f 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -1565,7 +1565,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | |||
1565 | * This routine returns max. credits that the extent tree can consume. | 1565 | * This routine returns max. credits that the extent tree can consume. |
1566 | * It should be OK for low-performance paths like ->writepage() | 1566 | * It should be OK for low-performance paths like ->writepage() |
1567 | * To allow many writing processes to fit into a single transaction, | 1567 | * To allow many writing processes to fit into a single transaction, |
1568 | * the caller should calculate credits under truncate_mutex and | 1568 | * the caller should calculate credits under i_data_sem and |
1569 | * pass the actual path. | 1569 | * pass the actual path. |
1570 | */ | 1570 | */ |
1571 | int ext4_ext_calc_credits_for_insert(struct inode *inode, | 1571 | int ext4_ext_calc_credits_for_insert(struct inode *inode, |
@@ -2131,7 +2131,8 @@ out: | |||
2131 | 2131 | ||
2132 | /* | 2132 | /* |
2133 | * Need to be called with | 2133 | * Need to be called with |
2134 | * mutex_lock(&EXT4_I(inode)->truncate_mutex); | 2134 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block |
2135 | * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem) | ||
2135 | */ | 2136 | */ |
2136 | int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | 2137 | int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, |
2137 | ext4_lblk_t iblock, | 2138 | ext4_lblk_t iblock, |
@@ -2350,7 +2351,7 @@ void ext4_ext_truncate(struct inode * inode, struct page *page) | |||
2350 | if (page) | 2351 | if (page) |
2351 | ext4_block_truncate_page(handle, page, mapping, inode->i_size); | 2352 | ext4_block_truncate_page(handle, page, mapping, inode->i_size); |
2352 | 2353 | ||
2353 | mutex_lock(&EXT4_I(inode)->truncate_mutex); | 2354 | down_write(&EXT4_I(inode)->i_data_sem); |
2354 | ext4_ext_invalidate_cache(inode); | 2355 | ext4_ext_invalidate_cache(inode); |
2355 | 2356 | ||
2356 | /* | 2357 | /* |
@@ -2386,7 +2387,7 @@ out_stop: | |||
2386 | if (inode->i_nlink) | 2387 | if (inode->i_nlink) |
2387 | ext4_orphan_del(handle, inode); | 2388 | ext4_orphan_del(handle, inode); |
2388 | 2389 | ||
2389 | mutex_unlock(&EXT4_I(inode)->truncate_mutex); | 2390 | up_write(&EXT4_I(inode)->i_data_sem); |
2390 | ext4_journal_stop(handle); | 2391 | ext4_journal_stop(handle); |
2391 | } | 2392 | } |
2392 | 2393 | ||
@@ -2450,7 +2451,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) | |||
2450 | * modify 1 super block, 1 block bitmap and 1 group descriptor. | 2451 | * modify 1 super block, 1 block bitmap and 1 group descriptor. |
2451 | */ | 2452 | */ |
2452 | credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3; | 2453 | credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3; |
2453 | mutex_lock(&EXT4_I(inode)->truncate_mutex) | 2454 | down_write((&EXT4_I(inode)->i_data_sem)); |
2454 | retry: | 2455 | retry: |
2455 | while (ret >= 0 && ret < max_blocks) { | 2456 | while (ret >= 0 && ret < max_blocks) { |
2456 | block = block + ret; | 2457 | block = block + ret; |
@@ -2507,7 +2508,7 @@ retry: | |||
2507 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 2508 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
2508 | goto retry; | 2509 | goto retry; |
2509 | 2510 | ||
2510 | mutex_unlock(&EXT4_I(inode)->truncate_mutex) | 2511 | up_write((&EXT4_I(inode)->i_data_sem)); |
2511 | /* | 2512 | /* |
2512 | * Time to update the file size. | 2513 | * Time to update the file size. |
2513 | * Update only when preallocation was requested beyond the file size. | 2514 | * Update only when preallocation was requested beyond the file size. |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index a6b2aa14626e..ac35ec58db55 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -37,9 +37,9 @@ static int ext4_release_file (struct inode * inode, struct file * filp) | |||
37 | if ((filp->f_mode & FMODE_WRITE) && | 37 | if ((filp->f_mode & FMODE_WRITE) && |
38 | (atomic_read(&inode->i_writecount) == 1)) | 38 | (atomic_read(&inode->i_writecount) == 1)) |
39 | { | 39 | { |
40 | mutex_lock(&EXT4_I(inode)->truncate_mutex); | 40 | down_write(&EXT4_I(inode)->i_data_sem); |
41 | ext4_discard_reservation(inode); | 41 | ext4_discard_reservation(inode); |
42 | mutex_unlock(&EXT4_I(inode)->truncate_mutex); | 42 | up_write(&EXT4_I(inode)->i_data_sem); |
43 | } | 43 | } |
44 | if (is_dx(inode) && filp->private_data) | 44 | if (is_dx(inode) && filp->private_data) |
45 | ext4_htree_free_dir_info(filp->private_data); | 45 | ext4_htree_free_dir_info(filp->private_data); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 71c7ad0c6723..a7eb8bb4bdd4 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -308,7 +308,7 @@ static int ext4_block_to_path(struct inode *inode, | |||
308 | final = ptrs; | 308 | final = ptrs; |
309 | } else { | 309 | } else { |
310 | ext4_warning(inode->i_sb, "ext4_block_to_path", | 310 | ext4_warning(inode->i_sb, "ext4_block_to_path", |
311 | "block %u > max", | 311 | "block %lu > max", |
312 | i_block + direct_blocks + | 312 | i_block + direct_blocks + |
313 | indirect_blocks + double_blocks); | 313 | indirect_blocks + double_blocks); |
314 | } | 314 | } |
@@ -345,7 +345,7 @@ static int ext4_block_to_path(struct inode *inode, | |||
345 | * the whole chain, all way to the data (returns %NULL, *err == 0). | 345 | * the whole chain, all way to the data (returns %NULL, *err == 0). |
346 | * | 346 | * |
347 | * Need to be called with | 347 | * Need to be called with |
348 | * mutex_lock(&EXT4_I(inode)->truncate_mutex) | 348 | * down_read(&EXT4_I(inode)->i_data_sem) |
349 | */ | 349 | */ |
350 | static Indirect *ext4_get_branch(struct inode *inode, int depth, | 350 | static Indirect *ext4_get_branch(struct inode *inode, int depth, |
351 | ext4_lblk_t *offsets, | 351 | ext4_lblk_t *offsets, |
@@ -777,7 +777,8 @@ err_out: | |||
777 | * | 777 | * |
778 | * | 778 | * |
779 | * Need to be called with | 779 | * Need to be called with |
780 | * mutex_lock(&EXT4_I(inode)->truncate_mutex) | 780 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block |
781 | * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem) | ||
781 | */ | 782 | */ |
782 | int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | 783 | int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, |
783 | ext4_lblk_t iblock, unsigned long maxblocks, | 784 | ext4_lblk_t iblock, unsigned long maxblocks, |
@@ -865,7 +866,7 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
865 | err = ext4_splice_branch(handle, inode, iblock, | 866 | err = ext4_splice_branch(handle, inode, iblock, |
866 | partial, indirect_blks, count); | 867 | partial, indirect_blks, count); |
867 | /* | 868 | /* |
868 | * i_disksize growing is protected by truncate_mutex. Don't forget to | 869 | * i_disksize growing is protected by i_data_sem. Don't forget to |
869 | * protect it if you're about to implement concurrent | 870 | * protect it if you're about to implement concurrent |
870 | * ext4_get_block() -bzzz | 871 | * ext4_get_block() -bzzz |
871 | */ | 872 | */ |
@@ -895,6 +896,31 @@ out: | |||
895 | 896 | ||
896 | #define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32) | 897 | #define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32) |
897 | 898 | ||
899 | int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | ||
900 | unsigned long max_blocks, struct buffer_head *bh, | ||
901 | int create, int extend_disksize) | ||
902 | { | ||
903 | int retval; | ||
904 | if (create) { | ||
905 | down_write((&EXT4_I(inode)->i_data_sem)); | ||
906 | } else { | ||
907 | down_read((&EXT4_I(inode)->i_data_sem)); | ||
908 | } | ||
909 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | ||
910 | retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, | ||
911 | bh, create, extend_disksize); | ||
912 | } else { | ||
913 | retval = ext4_get_blocks_handle(handle, inode, block, | ||
914 | max_blocks, bh, create, extend_disksize); | ||
915 | } | ||
916 | if (create) { | ||
917 | up_write((&EXT4_I(inode)->i_data_sem)); | ||
918 | } else { | ||
919 | up_read((&EXT4_I(inode)->i_data_sem)); | ||
920 | } | ||
921 | return retval; | ||
922 | } | ||
923 | |||
898 | static int ext4_get_block(struct inode *inode, sector_t iblock, | 924 | static int ext4_get_block(struct inode *inode, sector_t iblock, |
899 | struct buffer_head *bh_result, int create) | 925 | struct buffer_head *bh_result, int create) |
900 | { | 926 | { |
@@ -1399,7 +1425,7 @@ static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) | |||
1399 | * ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ... | 1425 | * ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ... |
1400 | * | 1426 | * |
1401 | * Same applies to ext4_get_block(). We will deadlock on various things like | 1427 | * Same applies to ext4_get_block(). We will deadlock on various things like |
1402 | * lock_journal and i_truncate_mutex. | 1428 | * lock_journal and i_data_sem |
1403 | * | 1429 | * |
1404 | * Setting PF_MEMALLOC here doesn't work - too many internal memory | 1430 | * Setting PF_MEMALLOC here doesn't work - too many internal memory |
1405 | * allocations fail. | 1431 | * allocations fail. |
@@ -2325,7 +2351,7 @@ void ext4_truncate(struct inode *inode) | |||
2325 | * From here we block out all ext4_get_block() callers who want to | 2351 | * From here we block out all ext4_get_block() callers who want to |
2326 | * modify the block allocation tree. | 2352 | * modify the block allocation tree. |
2327 | */ | 2353 | */ |
2328 | mutex_lock(&ei->truncate_mutex); | 2354 | down_write(&ei->i_data_sem); |
2329 | 2355 | ||
2330 | if (n == 1) { /* direct blocks */ | 2356 | if (n == 1) { /* direct blocks */ |
2331 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], | 2357 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], |
@@ -2389,7 +2415,7 @@ do_indirects: | |||
2389 | 2415 | ||
2390 | ext4_discard_reservation(inode); | 2416 | ext4_discard_reservation(inode); |
2391 | 2417 | ||
2392 | mutex_unlock(&ei->truncate_mutex); | 2418 | up_write(&ei->i_data_sem); |
2393 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 2419 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
2394 | ext4_mark_inode_dirty(handle, inode); | 2420 | ext4_mark_inode_dirty(handle, inode); |
2395 | 2421 | ||
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index e7f894bdb420..c0e5b8cf635c 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -199,7 +199,7 @@ flags_err: | |||
199 | * need to allocate reservation structure for this inode | 199 | * need to allocate reservation structure for this inode |
200 | * before set the window size | 200 | * before set the window size |
201 | */ | 201 | */ |
202 | mutex_lock(&ei->truncate_mutex); | 202 | down_write(&ei->i_data_sem); |
203 | if (!ei->i_block_alloc_info) | 203 | if (!ei->i_block_alloc_info) |
204 | ext4_init_block_alloc_info(inode); | 204 | ext4_init_block_alloc_info(inode); |
205 | 205 | ||
@@ -207,7 +207,7 @@ flags_err: | |||
207 | struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node; | 207 | struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node; |
208 | rsv->rsv_goal_size = rsv_window_size; | 208 | rsv->rsv_goal_size = rsv_window_size; |
209 | } | 209 | } |
210 | mutex_unlock(&ei->truncate_mutex); | 210 | up_write(&ei->i_data_sem); |
211 | return 0; | 211 | return 0; |
212 | } | 212 | } |
213 | case EXT4_IOC_GROUP_EXTEND: { | 213 | case EXT4_IOC_GROUP_EXTEND: { |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index effd375ece80..c7305443e100 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -593,7 +593,7 @@ static void init_once(struct kmem_cache *cachep, void *foo) | |||
593 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 593 | #ifdef CONFIG_EXT4DEV_FS_XATTR |
594 | init_rwsem(&ei->xattr_sem); | 594 | init_rwsem(&ei->xattr_sem); |
595 | #endif | 595 | #endif |
596 | mutex_init(&ei->truncate_mutex); | 596 | init_rwsem(&ei->i_data_sem); |
597 | inode_init_once(&ei->vfs_inode); | 597 | inode_init_once(&ei->vfs_inode); |
598 | } | 598 | } |
599 | 599 | ||
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 583049c1d366..300cc5a5adb9 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h | |||
@@ -1107,27 +1107,10 @@ extern void ext4_ext_init(struct super_block *); | |||
1107 | extern void ext4_ext_release(struct super_block *); | 1107 | extern void ext4_ext_release(struct super_block *); |
1108 | extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, | 1108 | extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, |
1109 | loff_t len); | 1109 | loff_t len); |
1110 | static inline int | 1110 | extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, |
1111 | ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | 1111 | sector_t block, unsigned long max_blocks, |
1112 | unsigned long max_blocks, struct buffer_head *bh, | 1112 | struct buffer_head *bh, int create, |
1113 | int create, int extend_disksize) | 1113 | int extend_disksize); |
1114 | { | ||
1115 | int retval; | ||
1116 | mutex_lock(&EXT4_I(inode)->truncate_mutex); | ||
1117 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | ||
1118 | retval = ext4_ext_get_blocks(handle, inode, | ||
1119 | (ext4_lblk_t)block, max_blocks, | ||
1120 | bh, create, extend_disksize); | ||
1121 | } else { | ||
1122 | retval = ext4_get_blocks_handle(handle, inode, | ||
1123 | (ext4_lblk_t)block, max_blocks, | ||
1124 | bh, create, extend_disksize); | ||
1125 | } | ||
1126 | mutex_unlock(&EXT4_I(inode)->truncate_mutex); | ||
1127 | return retval; | ||
1128 | } | ||
1129 | |||
1130 | |||
1131 | #endif /* __KERNEL__ */ | 1114 | #endif /* __KERNEL__ */ |
1132 | 1115 | ||
1133 | #endif /* _LINUX_EXT4_FS_H */ | 1116 | #endif /* _LINUX_EXT4_FS_H */ |
diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h index f1cd4934e46f..4377d249d378 100644 --- a/include/linux/ext4_fs_i.h +++ b/include/linux/ext4_fs_i.h | |||
@@ -139,16 +139,16 @@ struct ext4_inode_info { | |||
139 | __u16 i_extra_isize; | 139 | __u16 i_extra_isize; |
140 | 140 | ||
141 | /* | 141 | /* |
142 | * truncate_mutex is for serialising ext4_truncate() against | 142 | * i_data_sem is for serialising ext4_truncate() against |
143 | * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's | 143 | * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's |
144 | * data tree are chopped off during truncate. We can't do that in | 144 | * data tree are chopped off during truncate. We can't do that in |
145 | * ext4 because whenever we perform intermediate commits during | 145 | * ext4 because whenever we perform intermediate commits during |
146 | * truncate, the inode and all the metadata blocks *must* be in a | 146 | * truncate, the inode and all the metadata blocks *must* be in a |
147 | * consistent state which allows truncation of the orphans to restart | 147 | * consistent state which allows truncation of the orphans to restart |
148 | * during recovery. Hence we must fix the get_block-vs-truncate race | 148 | * during recovery. Hence we must fix the get_block-vs-truncate race |
149 | * by other means, so we have truncate_mutex. | 149 | * by other means, so we have i_data_sem. |
150 | */ | 150 | */ |
151 | struct mutex truncate_mutex; | 151 | struct rw_semaphore i_data_sem; |
152 | struct inode vfs_inode; | 152 | struct inode vfs_inode; |
153 | 153 | ||
154 | unsigned long i_ext_generation; | 154 | unsigned long i_ext_generation; |