aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2011-06-24 14:29:43 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2011-07-20 20:47:46 -0400
commitbd5fe6c5eb9c548d7f07fe8f89a150bb6705e8e3 (patch)
treeef5341c7747f809aec7ae233f6e3ef90af39be5f
parentf9b5570d7fdedff32a2e78102bfb54cd1b12b289 (diff)
fs: kill i_alloc_sem
i_alloc_sem is a rather special rw_semaphore. It's the last one that may be released by a non-owner, and it's write side is always mirrored by real exclusion. It's intended use it to wait for all pending direct I/O requests to finish before starting a truncate. Replace it with a hand-grown construct: - exclusion for truncates is already guaranteed by i_mutex, so it can simply fall way - the reader side is replaced by an i_dio_count member in struct inode that counts the number of pending direct I/O requests. Truncate can't proceed as long as it's non-zero - when i_dio_count reaches non-zero we wake up a pending truncate using wake_up_bit on a new bit in i_flags - new references to i_dio_count can't appear while we are waiting for it to read zero because the direct I/O count always needs i_mutex (or an equivalent like XFS's i_iolock) for starting a new operation. This scheme is much simpler, and saves the space of a spinlock_t and a struct list_head in struct inode (typically 160 bits on a non-debug 64-bit system). Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--fs/attr.c5
-rw-r--r--fs/direct-io.c65
-rw-r--r--fs/inode.c3
-rw-r--r--fs/ntfs/file.c3
-rw-r--r--fs/ntfs/inode.c10
-rw-r--r--fs/ocfs2/aops.c7
-rw-r--r--fs/ocfs2/file.c15
-rw-r--r--fs/reiserfs/xattr.c3
-rw-r--r--include/linux/fs.h11
-rw-r--r--mm/filemap.c3
-rw-r--r--mm/madvise.c2
-rw-r--r--mm/rmap.c1
-rw-r--r--mm/truncate.c3
13 files changed, 78 insertions, 53 deletions
diff --git a/fs/attr.c b/fs/attr.c
index caf2aa521e2..f177ac86fa4 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -233,16 +233,13 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
233 return error; 233 return error;
234 234
235 if (ia_valid & ATTR_SIZE) 235 if (ia_valid & ATTR_SIZE)
236 down_write(&dentry->d_inode->i_alloc_sem); 236 inode_dio_wait(inode);
237 237
238 if (inode->i_op->setattr) 238 if (inode->i_op->setattr)
239 error = inode->i_op->setattr(dentry, attr); 239 error = inode->i_op->setattr(dentry, attr);
240 else 240 else
241 error = simple_setattr(dentry, attr); 241 error = simple_setattr(dentry, attr);
242 242
243 if (ia_valid & ATTR_SIZE)
244 up_write(&dentry->d_inode->i_alloc_sem);
245
246 if (!error) 243 if (!error)
247 fsnotify_change(dentry, ia_valid); 244 fsnotify_change(dentry, ia_valid);
248 245
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 98ce3ac0d94..354cbdbc14b 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -135,6 +135,50 @@ struct dio {
135 struct page *pages[DIO_PAGES]; /* page buffer */ 135 struct page *pages[DIO_PAGES]; /* page buffer */
136}; 136};
137 137
138static void __inode_dio_wait(struct inode *inode)
139{
140 wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
141 DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
142
143 do {
144 prepare_to_wait(wq, &q.wait, TASK_UNINTERRUPTIBLE);
145 if (atomic_read(&inode->i_dio_count))
146 schedule();
147 } while (atomic_read(&inode->i_dio_count));
148 finish_wait(wq, &q.wait);
149}
150
151/**
152 * inode_dio_wait - wait for outstanding DIO requests to finish
153 * @inode: inode to wait for
154 *
155 * Waits for all pending direct I/O requests to finish so that we can
156 * proceed with a truncate or equivalent operation.
157 *
158 * Must be called under a lock that serializes taking new references
159 * to i_dio_count, usually by inode->i_mutex.
160 */
161void inode_dio_wait(struct inode *inode)
162{
163 if (atomic_read(&inode->i_dio_count))
164 __inode_dio_wait(inode);
165}
166EXPORT_SYMBOL_GPL(inode_dio_wait);
167
168/*
169 * inode_dio_done - signal finish of a direct I/O requests
170 * @inode: inode the direct I/O happens on
171 *
172 * This is called once we've finished processing a direct I/O request,
173 * and is used to wake up callers waiting for direct I/O to be quiesced.
174 */
175void inode_dio_done(struct inode *inode)
176{
177 if (atomic_dec_and_test(&inode->i_dio_count))
178 wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
179}
180EXPORT_SYMBOL_GPL(inode_dio_done);
181
138/* 182/*
139 * How many pages are in the queue? 183 * How many pages are in the queue?
140 */ 184 */
@@ -254,9 +298,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is
254 } 298 }
255 299
256 if (dio->flags & DIO_LOCKING) 300 if (dio->flags & DIO_LOCKING)
257 /* lockdep: non-owner release */ 301 inode_dio_done(dio->inode);
258 up_read_non_owner(&dio->inode->i_alloc_sem);
259
260 return ret; 302 return ret;
261} 303}
262 304
@@ -980,9 +1022,6 @@ out:
980 return ret; 1022 return ret;
981} 1023}
982 1024
983/*
984 * Releases both i_mutex and i_alloc_sem
985 */
986static ssize_t 1025static ssize_t
987direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, 1026direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
988 const struct iovec *iov, loff_t offset, unsigned long nr_segs, 1027 const struct iovec *iov, loff_t offset, unsigned long nr_segs,
@@ -1146,15 +1185,14 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1146 * For writes this function is called under i_mutex and returns with 1185 * For writes this function is called under i_mutex and returns with
1147 * i_mutex held, for reads, i_mutex is not held on entry, but it is 1186 * i_mutex held, for reads, i_mutex is not held on entry, but it is
1148 * taken and dropped again before returning. 1187 * taken and dropped again before returning.
1149 * For reads and writes i_alloc_sem is taken in shared mode and released 1188 * The i_dio_count counter keeps track of the number of outstanding
1150 * on I/O completion (which may happen asynchronously after returning to 1189 * direct I/O requests, and truncate waits for it to reach zero.
1151 * the caller). 1190 * New references to i_dio_count must only be grabbed with i_mutex
1191 * held.
1152 * 1192 *
1153 * - if the flags value does NOT contain DIO_LOCKING we don't use any 1193 * - if the flags value does NOT contain DIO_LOCKING we don't use any
1154 * internal locking but rather rely on the filesystem to synchronize 1194 * internal locking but rather rely on the filesystem to synchronize
1155 * direct I/O reads/writes versus each other and truncate. 1195 * direct I/O reads/writes versus each other and truncate.
1156 * For reads and writes both i_mutex and i_alloc_sem are not held on
1157 * entry and are never taken.
1158 */ 1196 */
1159ssize_t 1197ssize_t
1160__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1198__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
@@ -1234,10 +1272,9 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1234 } 1272 }
1235 1273
1236 /* 1274 /*
1237 * Will be released at I/O completion, possibly in a 1275 * Will be decremented at I/O completion time.
1238 * different thread.
1239 */ 1276 */
1240 down_read_non_owner(&inode->i_alloc_sem); 1277 atomic_inc(&inode->i_dio_count);
1241 } 1278 }
1242 1279
1243 /* 1280 /*
diff --git a/fs/inode.c b/fs/inode.c
index cf81baf1898..96c77b81167 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -168,8 +168,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
168 mutex_init(&inode->i_mutex); 168 mutex_init(&inode->i_mutex);
169 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); 169 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
170 170
171 init_rwsem(&inode->i_alloc_sem); 171 atomic_set(&inode->i_dio_count, 0);
172 lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);
173 172
174 mapping->a_ops = &empty_aops; 173 mapping->a_ops = &empty_aops;
175 mapping->host = inode; 174 mapping->host = inode;
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index f4b1057abdd..b59f5ac26be 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1832,9 +1832,8 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1832 * fails again. 1832 * fails again.
1833 */ 1833 */
1834 if (unlikely(NInoTruncateFailed(ni))) { 1834 if (unlikely(NInoTruncateFailed(ni))) {
1835 down_write(&vi->i_alloc_sem); 1835 inode_dio_wait(vi);
1836 err = ntfs_truncate(vi); 1836 err = ntfs_truncate(vi);
1837 up_write(&vi->i_alloc_sem);
1838 if (err || NInoTruncateFailed(ni)) { 1837 if (err || NInoTruncateFailed(ni)) {
1839 if (!err) 1838 if (!err)
1840 err = -EIO; 1839 err = -EIO;
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index c05d6dcf77a..1371487da95 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2357,12 +2357,7 @@ static const char *es = " Leaving inconsistent metadata. Unmount and run "
2357 * 2357 *
2358 * Returns 0 on success or -errno on error. 2358 * Returns 0 on success or -errno on error.
2359 * 2359 *
2360 * Called with ->i_mutex held. In all but one case ->i_alloc_sem is held for 2360 * Called with ->i_mutex held.
2361 * writing. The only case in the kernel where ->i_alloc_sem is not held is
2362 * mm/filemap.c::generic_file_buffered_write() where vmtruncate() is called
2363 * with the current i_size as the offset. The analogous place in NTFS is in
2364 * fs/ntfs/file.c::ntfs_file_buffered_write() where we call vmtruncate() again
2365 * without holding ->i_alloc_sem.
2366 */ 2361 */
2367int ntfs_truncate(struct inode *vi) 2362int ntfs_truncate(struct inode *vi)
2368{ 2363{
@@ -2887,8 +2882,7 @@ void ntfs_truncate_vfs(struct inode *vi) {
2887 * We also abort all changes of user, group, and mode as we do not implement 2882 * We also abort all changes of user, group, and mode as we do not implement
2888 * the NTFS ACLs yet. 2883 * the NTFS ACLs yet.
2889 * 2884 *
2890 * Called with ->i_mutex held. For the ATTR_SIZE (i.e. ->truncate) case, also 2885 * Called with ->i_mutex held.
2891 * called with ->i_alloc_sem held for writing.
2892 */ 2886 */
2893int ntfs_setattr(struct dentry *dentry, struct iattr *attr) 2887int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
2894{ 2888{
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index ac97bca282d..de1d3953599 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -551,9 +551,8 @@ bail:
551 551
552/* 552/*
553 * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're 553 * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're
554 * particularly interested in the aio/dio case. Like the core uses 554 * particularly interested in the aio/dio case. We use the rw_lock DLM lock
555 * i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from 555 * to protect io on one node from truncation on another.
556 * truncation on another.
557 */ 556 */
558static void ocfs2_dio_end_io(struct kiocb *iocb, 557static void ocfs2_dio_end_io(struct kiocb *iocb,
559 loff_t offset, 558 loff_t offset,
@@ -569,7 +568,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
569 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); 568 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
570 569
571 if (ocfs2_iocb_is_sem_locked(iocb)) { 570 if (ocfs2_iocb_is_sem_locked(iocb)) {
572 up_read(&inode->i_alloc_sem); 571 inode_dio_done(inode);
573 ocfs2_iocb_clear_sem_locked(iocb); 572 ocfs2_iocb_clear_sem_locked(iocb);
574 } 573 }
575 574
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 1406c37a572..2c3a465514a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2236,9 +2236,9 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
2236 ocfs2_iocb_clear_sem_locked(iocb); 2236 ocfs2_iocb_clear_sem_locked(iocb);
2237 2237
2238relock: 2238relock:
2239 /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ 2239 /* to match setattr's i_mutex -> rw_lock ordering */
2240 if (direct_io) { 2240 if (direct_io) {
2241 down_read(&inode->i_alloc_sem); 2241 atomic_inc(&inode->i_dio_count);
2242 have_alloc_sem = 1; 2242 have_alloc_sem = 1;
2243 /* communicate with ocfs2_dio_end_io */ 2243 /* communicate with ocfs2_dio_end_io */
2244 ocfs2_iocb_set_sem_locked(iocb); 2244 ocfs2_iocb_set_sem_locked(iocb);
@@ -2290,7 +2290,7 @@ relock:
2290 */ 2290 */
2291 if (direct_io && !can_do_direct) { 2291 if (direct_io && !can_do_direct) {
2292 ocfs2_rw_unlock(inode, rw_level); 2292 ocfs2_rw_unlock(inode, rw_level);
2293 up_read(&inode->i_alloc_sem); 2293 inode_dio_done(inode);
2294 2294
2295 have_alloc_sem = 0; 2295 have_alloc_sem = 0;
2296 rw_level = -1; 2296 rw_level = -1;
@@ -2361,8 +2361,7 @@ out_dio:
2361 /* 2361 /*
2362 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io 2362 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
2363 * function pointer which is called when o_direct io completes so that 2363 * function pointer which is called when o_direct io completes so that
2364 * it can unlock our rw lock. (it's the clustered equivalent of 2364 * it can unlock our rw lock.
2365 * i_alloc_sem; protects truncate from racing with pending ios).
2366 * Unfortunately there are error cases which call end_io and others 2365 * Unfortunately there are error cases which call end_io and others
2367 * that don't. so we don't have to unlock the rw_lock if either an 2366 * that don't. so we don't have to unlock the rw_lock if either an
2368 * async dio is going to do it in the future or an end_io after an 2367 * async dio is going to do it in the future or an end_io after an
@@ -2379,7 +2378,7 @@ out:
2379 2378
2380out_sems: 2379out_sems:
2381 if (have_alloc_sem) { 2380 if (have_alloc_sem) {
2382 up_read(&inode->i_alloc_sem); 2381 inode_dio_done(inode);
2383 ocfs2_iocb_clear_sem_locked(iocb); 2382 ocfs2_iocb_clear_sem_locked(iocb);
2384 } 2383 }
2385 2384
@@ -2531,8 +2530,8 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2531 * need locks to protect pending reads from racing with truncate. 2530 * need locks to protect pending reads from racing with truncate.
2532 */ 2531 */
2533 if (filp->f_flags & O_DIRECT) { 2532 if (filp->f_flags & O_DIRECT) {
2534 down_read(&inode->i_alloc_sem);
2535 have_alloc_sem = 1; 2533 have_alloc_sem = 1;
2534 atomic_inc(&inode->i_dio_count);
2536 ocfs2_iocb_set_sem_locked(iocb); 2535 ocfs2_iocb_set_sem_locked(iocb);
2537 2536
2538 ret = ocfs2_rw_lock(inode, 0); 2537 ret = ocfs2_rw_lock(inode, 0);
@@ -2575,7 +2574,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2575 2574
2576bail: 2575bail:
2577 if (have_alloc_sem) { 2576 if (have_alloc_sem) {
2578 up_read(&inode->i_alloc_sem); 2577 inode_dio_done(inode);
2579 ocfs2_iocb_clear_sem_locked(iocb); 2578 ocfs2_iocb_clear_sem_locked(iocb);
2580 } 2579 }
2581 if (rw_level != -1) 2580 if (rw_level != -1)
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 4ea2ab41fde..6938d8c68d6 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -555,11 +555,10 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
555 555
556 reiserfs_write_unlock(inode->i_sb); 556 reiserfs_write_unlock(inode->i_sb);
557 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR); 557 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR);
558 down_write(&dentry->d_inode->i_alloc_sem); 558 inode_dio_wait(dentry->d_inode);
559 reiserfs_write_lock(inode->i_sb); 559 reiserfs_write_lock(inode->i_sb);
560 560
561 err = reiserfs_setattr(dentry, &newattrs); 561 err = reiserfs_setattr(dentry, &newattrs);
562 up_write(&dentry->d_inode->i_alloc_sem);
563 mutex_unlock(&dentry->d_inode->i_mutex); 562 mutex_unlock(&dentry->d_inode->i_mutex);
564 } else 563 } else
565 update_ctime(inode); 564 update_ctime(inode);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1393742bba9..2fe920774ab 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -779,7 +779,7 @@ struct inode {
779 struct timespec i_ctime; 779 struct timespec i_ctime;
780 blkcnt_t i_blocks; 780 blkcnt_t i_blocks;
781 unsigned short i_bytes; 781 unsigned short i_bytes;
782 struct rw_semaphore i_alloc_sem; 782 atomic_t i_dio_count;
783 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ 783 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
784 struct file_lock *i_flock; 784 struct file_lock *i_flock;
785 struct address_space *i_mapping; 785 struct address_space *i_mapping;
@@ -1705,6 +1705,10 @@ struct super_operations {
1705 * set during data writeback, and cleared with a wakeup 1705 * set during data writeback, and cleared with a wakeup
1706 * on the bit address once it is done. 1706 * on the bit address once it is done.
1707 * 1707 *
1708 * I_REFERENCED Marks the inode as recently references on the LRU list.
1709 *
1710 * I_DIO_WAKEUP Never set. Only used as a key for wait_on_bit().
1711 *
1708 * Q: What is the difference between I_WILL_FREE and I_FREEING? 1712 * Q: What is the difference between I_WILL_FREE and I_FREEING?
1709 */ 1713 */
1710#define I_DIRTY_SYNC (1 << 0) 1714#define I_DIRTY_SYNC (1 << 0)
@@ -1718,6 +1722,8 @@ struct super_operations {
1718#define __I_SYNC 7 1722#define __I_SYNC 7
1719#define I_SYNC (1 << __I_SYNC) 1723#define I_SYNC (1 << __I_SYNC)
1720#define I_REFERENCED (1 << 8) 1724#define I_REFERENCED (1 << 8)
1725#define __I_DIO_WAKEUP 9
1726#define I_DIO_WAKEUP (1 << I_DIO_WAKEUP)
1721 1727
1722#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) 1728#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
1723 1729
@@ -1828,7 +1834,6 @@ struct file_system_type {
1828 struct lock_class_key i_lock_key; 1834 struct lock_class_key i_lock_key;
1829 struct lock_class_key i_mutex_key; 1835 struct lock_class_key i_mutex_key;
1830 struct lock_class_key i_mutex_dir_key; 1836 struct lock_class_key i_mutex_dir_key;
1831 struct lock_class_key i_alloc_sem_key;
1832}; 1837};
1833 1838
1834extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags, 1839extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
@@ -2404,6 +2409,8 @@ enum {
2404}; 2409};
2405 2410
2406void dio_end_io(struct bio *bio, int error); 2411void dio_end_io(struct bio *bio, int error);
2412void inode_dio_wait(struct inode *inode);
2413void inode_dio_done(struct inode *inode);
2407 2414
2408ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 2415ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
2409 struct block_device *bdev, const struct iovec *iov, loff_t offset, 2416 struct block_device *bdev, const struct iovec *iov, loff_t offset,
diff --git a/mm/filemap.c b/mm/filemap.c
index a8251a8d345..f820e600f1a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -78,9 +78,6 @@
78 * ->i_mutex (generic_file_buffered_write) 78 * ->i_mutex (generic_file_buffered_write)
79 * ->mmap_sem (fault_in_pages_readable->do_page_fault) 79 * ->mmap_sem (fault_in_pages_readable->do_page_fault)
80 * 80 *
81 * ->i_mutex
82 * ->i_alloc_sem (various)
83 *
84 * inode_wb_list_lock 81 * inode_wb_list_lock
85 * sb_lock (fs/fs-writeback.c) 82 * sb_lock (fs/fs-writeback.c)
86 * ->mapping->tree_lock (__sync_single_inode) 83 * ->mapping->tree_lock (__sync_single_inode)
diff --git a/mm/madvise.c b/mm/madvise.c
index 2221491ed50..74bf193eff0 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -218,7 +218,7 @@ static long madvise_remove(struct vm_area_struct *vma,
218 endoff = (loff_t)(end - vma->vm_start - 1) 218 endoff = (loff_t)(end - vma->vm_start - 1)
219 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 219 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
220 220
221 /* vmtruncate_range needs to take i_mutex and i_alloc_sem */ 221 /* vmtruncate_range needs to take i_mutex */
222 up_read(&current->mm->mmap_sem); 222 up_read(&current->mm->mmap_sem);
223 error = vmtruncate_range(mapping->host, offset, endoff); 223 error = vmtruncate_range(mapping->host, offset, endoff);
224 down_read(&current->mm->mmap_sem); 224 down_read(&current->mm->mmap_sem);
diff --git a/mm/rmap.c b/mm/rmap.c
index 23295f65ae4..2540a39eea4 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -21,7 +21,6 @@
21 * Lock ordering in mm: 21 * Lock ordering in mm:
22 * 22 *
23 * inode->i_mutex (while writing or truncating, not reading or faulting) 23 * inode->i_mutex (while writing or truncating, not reading or faulting)
24 * inode->i_alloc_sem (vmtruncate_range)
25 * mm->mmap_sem 24 * mm->mmap_sem
26 * page->flags PG_locked (lock_page) 25 * page->flags PG_locked (lock_page)
27 * mapping->i_mmap_mutex 26 * mapping->i_mmap_mutex
diff --git a/mm/truncate.c b/mm/truncate.c
index e13f22efaad..003c6c685fc 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -622,12 +622,11 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
622 return -ENOSYS; 622 return -ENOSYS;
623 623
624 mutex_lock(&inode->i_mutex); 624 mutex_lock(&inode->i_mutex);
625 down_write(&inode->i_alloc_sem); 625 inode_dio_wait(inode);
626 unmap_mapping_range(mapping, offset, (end - offset), 1); 626 unmap_mapping_range(mapping, offset, (end - offset), 1);
627 inode->i_op->truncate_range(inode, offset, end); 627 inode->i_op->truncate_range(inode, offset, end);
628 /* unmap again to remove racily COWed private pages */ 628 /* unmap again to remove racily COWed private pages */
629 unmap_mapping_range(mapping, offset, (end - offset), 1); 629 unmap_mapping_range(mapping, offset, (end - offset), 1);
630 up_write(&inode->i_alloc_sem);
631 mutex_unlock(&inode->i_mutex); 630 mutex_unlock(&inode->i_mutex);
632 631
633 return 0; 632 return 0;