aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2008-01-28 23:58:26 -0500
committerTheodore Ts'o <tytso@mit.edu>2008-01-28 23:58:26 -0500
commit0e855ac8b103ef579052936b59fe7c599ac422a4 (patch)
treeec29f82e1d7bb1987dcadc00497daf69d6955483
parentc278bfecebfb1ed67c326ef472660878baa745cd (diff)
ext4: Convert truncate_mutex to read write semaphore.
We are currently taking the truncate_mutex for every read. This would have performance impact on large CPU configuration. Convert the lock to read write semaphore and take read lock when we are trying to read the file. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
-rw-r--r--fs/ext4/balloc.c2
-rw-r--r--fs/ext4/extents.c13
-rw-r--r--fs/ext4/file.c4
-rw-r--r--fs/ext4/inode.c40
-rw-r--r--fs/ext4/ioctl.c4
-rw-r--r--fs/ext4/super.c2
-rw-r--r--include/linux/ext4_fs.h25
-rw-r--r--include/linux/ext4_fs_i.h6
8 files changed, 53 insertions, 43 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index d460223b8e1d..7ae223ed152f 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -526,7 +526,7 @@ static inline int rsv_is_empty(struct ext4_reserve_window *rsv)
526 * when setting the reservation window size through ioctl before the file 526 * when setting the reservation window size through ioctl before the file
527 * is open for write (needs block allocation). 527 * is open for write (needs block allocation).
528 * 528 *
529 * Needs truncate_mutex protection prior to call this function. 529 * Needs down_write(i_data_sem) protection prior to call this function.
530 */ 530 */
531void ext4_init_block_alloc_info(struct inode *inode) 531void ext4_init_block_alloc_info(struct inode *inode)
532{ 532{
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index ec5019fa552f..03d1bbb78a2f 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1565,7 +1565,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
1565 * This routine returns max. credits that the extent tree can consume. 1565 * This routine returns max. credits that the extent tree can consume.
1566 * It should be OK for low-performance paths like ->writepage() 1566 * It should be OK for low-performance paths like ->writepage()
1567 * To allow many writing processes to fit into a single transaction, 1567 * To allow many writing processes to fit into a single transaction,
1568 * the caller should calculate credits under truncate_mutex and 1568 * the caller should calculate credits under i_data_sem and
1569 * pass the actual path. 1569 * pass the actual path.
1570 */ 1570 */
1571int ext4_ext_calc_credits_for_insert(struct inode *inode, 1571int ext4_ext_calc_credits_for_insert(struct inode *inode,
@@ -2131,7 +2131,8 @@ out:
2131 2131
2132/* 2132/*
2133 * Need to be called with 2133 * Need to be called with
2134 * mutex_lock(&EXT4_I(inode)->truncate_mutex); 2134 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
2135 * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
2135 */ 2136 */
2136int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, 2137int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2137 ext4_lblk_t iblock, 2138 ext4_lblk_t iblock,
@@ -2350,7 +2351,7 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
2350 if (page) 2351 if (page)
2351 ext4_block_truncate_page(handle, page, mapping, inode->i_size); 2352 ext4_block_truncate_page(handle, page, mapping, inode->i_size);
2352 2353
2353 mutex_lock(&EXT4_I(inode)->truncate_mutex); 2354 down_write(&EXT4_I(inode)->i_data_sem);
2354 ext4_ext_invalidate_cache(inode); 2355 ext4_ext_invalidate_cache(inode);
2355 2356
2356 /* 2357 /*
@@ -2386,7 +2387,7 @@ out_stop:
2386 if (inode->i_nlink) 2387 if (inode->i_nlink)
2387 ext4_orphan_del(handle, inode); 2388 ext4_orphan_del(handle, inode);
2388 2389
2389 mutex_unlock(&EXT4_I(inode)->truncate_mutex); 2390 up_write(&EXT4_I(inode)->i_data_sem);
2390 ext4_journal_stop(handle); 2391 ext4_journal_stop(handle);
2391} 2392}
2392 2393
@@ -2450,7 +2451,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
2450 * modify 1 super block, 1 block bitmap and 1 group descriptor. 2451 * modify 1 super block, 1 block bitmap and 1 group descriptor.
2451 */ 2452 */
2452 credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3; 2453 credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3;
2453 mutex_lock(&EXT4_I(inode)->truncate_mutex) 2454 down_write((&EXT4_I(inode)->i_data_sem));
2454retry: 2455retry:
2455 while (ret >= 0 && ret < max_blocks) { 2456 while (ret >= 0 && ret < max_blocks) {
2456 block = block + ret; 2457 block = block + ret;
@@ -2507,7 +2508,7 @@ retry:
2507 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 2508 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
2508 goto retry; 2509 goto retry;
2509 2510
2510 mutex_unlock(&EXT4_I(inode)->truncate_mutex) 2511 up_write((&EXT4_I(inode)->i_data_sem));
2511 /* 2512 /*
2512 * Time to update the file size. 2513 * Time to update the file size.
2513 * Update only when preallocation was requested beyond the file size. 2514 * Update only when preallocation was requested beyond the file size.
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index a6b2aa14626e..ac35ec58db55 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -37,9 +37,9 @@ static int ext4_release_file (struct inode * inode, struct file * filp)
37 if ((filp->f_mode & FMODE_WRITE) && 37 if ((filp->f_mode & FMODE_WRITE) &&
38 (atomic_read(&inode->i_writecount) == 1)) 38 (atomic_read(&inode->i_writecount) == 1))
39 { 39 {
40 mutex_lock(&EXT4_I(inode)->truncate_mutex); 40 down_write(&EXT4_I(inode)->i_data_sem);
41 ext4_discard_reservation(inode); 41 ext4_discard_reservation(inode);
42 mutex_unlock(&EXT4_I(inode)->truncate_mutex); 42 up_write(&EXT4_I(inode)->i_data_sem);
43 } 43 }
44 if (is_dx(inode) && filp->private_data) 44 if (is_dx(inode) && filp->private_data)
45 ext4_htree_free_dir_info(filp->private_data); 45 ext4_htree_free_dir_info(filp->private_data);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 71c7ad0c6723..a7eb8bb4bdd4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -308,7 +308,7 @@ static int ext4_block_to_path(struct inode *inode,
308 final = ptrs; 308 final = ptrs;
309 } else { 309 } else {
310 ext4_warning(inode->i_sb, "ext4_block_to_path", 310 ext4_warning(inode->i_sb, "ext4_block_to_path",
311 "block %u > max", 311 "block %lu > max",
312 i_block + direct_blocks + 312 i_block + direct_blocks +
313 indirect_blocks + double_blocks); 313 indirect_blocks + double_blocks);
314 } 314 }
@@ -345,7 +345,7 @@ static int ext4_block_to_path(struct inode *inode,
345 * the whole chain, all way to the data (returns %NULL, *err == 0). 345 * the whole chain, all way to the data (returns %NULL, *err == 0).
346 * 346 *
347 * Need to be called with 347 * Need to be called with
348 * mutex_lock(&EXT4_I(inode)->truncate_mutex) 348 * down_read(&EXT4_I(inode)->i_data_sem)
349 */ 349 */
350static Indirect *ext4_get_branch(struct inode *inode, int depth, 350static Indirect *ext4_get_branch(struct inode *inode, int depth,
351 ext4_lblk_t *offsets, 351 ext4_lblk_t *offsets,
@@ -777,7 +777,8 @@ err_out:
777 * 777 *
778 * 778 *
779 * Need to be called with 779 * Need to be called with
780 * mutex_lock(&EXT4_I(inode)->truncate_mutex) 780 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
781 * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
781 */ 782 */
782int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, 783int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
783 ext4_lblk_t iblock, unsigned long maxblocks, 784 ext4_lblk_t iblock, unsigned long maxblocks,
@@ -865,7 +866,7 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
865 err = ext4_splice_branch(handle, inode, iblock, 866 err = ext4_splice_branch(handle, inode, iblock,
866 partial, indirect_blks, count); 867 partial, indirect_blks, count);
867 /* 868 /*
868 * i_disksize growing is protected by truncate_mutex. Don't forget to 869 * i_disksize growing is protected by i_data_sem. Don't forget to
869 * protect it if you're about to implement concurrent 870 * protect it if you're about to implement concurrent
870 * ext4_get_block() -bzzz 871 * ext4_get_block() -bzzz
871 */ 872 */
@@ -895,6 +896,31 @@ out:
895 896
896#define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32) 897#define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32)
897 898
899int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
900 unsigned long max_blocks, struct buffer_head *bh,
901 int create, int extend_disksize)
902{
903 int retval;
904 if (create) {
905 down_write((&EXT4_I(inode)->i_data_sem));
906 } else {
907 down_read((&EXT4_I(inode)->i_data_sem));
908 }
909 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
910 retval = ext4_ext_get_blocks(handle, inode, block, max_blocks,
911 bh, create, extend_disksize);
912 } else {
913 retval = ext4_get_blocks_handle(handle, inode, block,
914 max_blocks, bh, create, extend_disksize);
915 }
916 if (create) {
917 up_write((&EXT4_I(inode)->i_data_sem));
918 } else {
919 up_read((&EXT4_I(inode)->i_data_sem));
920 }
921 return retval;
922}
923
898static int ext4_get_block(struct inode *inode, sector_t iblock, 924static int ext4_get_block(struct inode *inode, sector_t iblock,
899 struct buffer_head *bh_result, int create) 925 struct buffer_head *bh_result, int create)
900{ 926{
@@ -1399,7 +1425,7 @@ static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
1399 * ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ... 1425 * ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ...
1400 * 1426 *
1401 * Same applies to ext4_get_block(). We will deadlock on various things like 1427 * Same applies to ext4_get_block(). We will deadlock on various things like
1402 * lock_journal and i_truncate_mutex. 1428 * lock_journal and i_data_sem
1403 * 1429 *
1404 * Setting PF_MEMALLOC here doesn't work - too many internal memory 1430 * Setting PF_MEMALLOC here doesn't work - too many internal memory
1405 * allocations fail. 1431 * allocations fail.
@@ -2325,7 +2351,7 @@ void ext4_truncate(struct inode *inode)
2325 * From here we block out all ext4_get_block() callers who want to 2351 * From here we block out all ext4_get_block() callers who want to
2326 * modify the block allocation tree. 2352 * modify the block allocation tree.
2327 */ 2353 */
2328 mutex_lock(&ei->truncate_mutex); 2354 down_write(&ei->i_data_sem);
2329 2355
2330 if (n == 1) { /* direct blocks */ 2356 if (n == 1) { /* direct blocks */
2331 ext4_free_data(handle, inode, NULL, i_data+offsets[0], 2357 ext4_free_data(handle, inode, NULL, i_data+offsets[0],
@@ -2389,7 +2415,7 @@ do_indirects:
2389 2415
2390 ext4_discard_reservation(inode); 2416 ext4_discard_reservation(inode);
2391 2417
2392 mutex_unlock(&ei->truncate_mutex); 2418 up_write(&ei->i_data_sem);
2393 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 2419 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
2394 ext4_mark_inode_dirty(handle, inode); 2420 ext4_mark_inode_dirty(handle, inode);
2395 2421
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index e7f894bdb420..c0e5b8cf635c 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -199,7 +199,7 @@ flags_err:
199 * need to allocate reservation structure for this inode 199 * need to allocate reservation structure for this inode
200 * before set the window size 200 * before set the window size
201 */ 201 */
202 mutex_lock(&ei->truncate_mutex); 202 down_write(&ei->i_data_sem);
203 if (!ei->i_block_alloc_info) 203 if (!ei->i_block_alloc_info)
204 ext4_init_block_alloc_info(inode); 204 ext4_init_block_alloc_info(inode);
205 205
@@ -207,7 +207,7 @@ flags_err:
207 struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node; 207 struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node;
208 rsv->rsv_goal_size = rsv_window_size; 208 rsv->rsv_goal_size = rsv_window_size;
209 } 209 }
210 mutex_unlock(&ei->truncate_mutex); 210 up_write(&ei->i_data_sem);
211 return 0; 211 return 0;
212 } 212 }
213 case EXT4_IOC_GROUP_EXTEND: { 213 case EXT4_IOC_GROUP_EXTEND: {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index effd375ece80..c7305443e100 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -593,7 +593,7 @@ static void init_once(struct kmem_cache *cachep, void *foo)
593#ifdef CONFIG_EXT4DEV_FS_XATTR 593#ifdef CONFIG_EXT4DEV_FS_XATTR
594 init_rwsem(&ei->xattr_sem); 594 init_rwsem(&ei->xattr_sem);
595#endif 595#endif
596 mutex_init(&ei->truncate_mutex); 596 init_rwsem(&ei->i_data_sem);
597 inode_init_once(&ei->vfs_inode); 597 inode_init_once(&ei->vfs_inode);
598} 598}
599 599
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 583049c1d366..300cc5a5adb9 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -1107,27 +1107,10 @@ extern void ext4_ext_init(struct super_block *);
1107extern void ext4_ext_release(struct super_block *); 1107extern void ext4_ext_release(struct super_block *);
1108extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, 1108extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
1109 loff_t len); 1109 loff_t len);
1110static inline int 1110extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
1111ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, 1111 sector_t block, unsigned long max_blocks,
1112 unsigned long max_blocks, struct buffer_head *bh, 1112 struct buffer_head *bh, int create,
1113 int create, int extend_disksize) 1113 int extend_disksize);
1114{
1115 int retval;
1116 mutex_lock(&EXT4_I(inode)->truncate_mutex);
1117 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
1118 retval = ext4_ext_get_blocks(handle, inode,
1119 (ext4_lblk_t)block, max_blocks,
1120 bh, create, extend_disksize);
1121 } else {
1122 retval = ext4_get_blocks_handle(handle, inode,
1123 (ext4_lblk_t)block, max_blocks,
1124 bh, create, extend_disksize);
1125 }
1126 mutex_unlock(&EXT4_I(inode)->truncate_mutex);
1127 return retval;
1128}
1129
1130
1131#endif /* __KERNEL__ */ 1114#endif /* __KERNEL__ */
1132 1115
1133#endif /* _LINUX_EXT4_FS_H */ 1116#endif /* _LINUX_EXT4_FS_H */
diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h
index f1cd4934e46f..4377d249d378 100644
--- a/include/linux/ext4_fs_i.h
+++ b/include/linux/ext4_fs_i.h
@@ -139,16 +139,16 @@ struct ext4_inode_info {
139 __u16 i_extra_isize; 139 __u16 i_extra_isize;
140 140
141 /* 141 /*
142 * truncate_mutex is for serialising ext4_truncate() against 142 * i_data_sem is for serialising ext4_truncate() against
143 * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's 143 * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's
144 * data tree are chopped off during truncate. We can't do that in 144 * data tree are chopped off during truncate. We can't do that in
145 * ext4 because whenever we perform intermediate commits during 145 * ext4 because whenever we perform intermediate commits during
146 * truncate, the inode and all the metadata blocks *must* be in a 146 * truncate, the inode and all the metadata blocks *must* be in a
147 * consistent state which allows truncation of the orphans to restart 147 * consistent state which allows truncation of the orphans to restart
148 * during recovery. Hence we must fix the get_block-vs-truncate race 148 * during recovery. Hence we must fix the get_block-vs-truncate race
149 * by other means, so we have truncate_mutex. 149 * by other means, so we have i_data_sem.
150 */ 150 */
151 struct mutex truncate_mutex; 151 struct rw_semaphore i_data_sem;
152 struct inode vfs_inode; 152 struct inode vfs_inode;
153 153
154 unsigned long i_ext_generation; 154 unsigned long i_ext_generation;