aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornpiggin@suse.de <npiggin@suse.de>2010-05-26 11:05:33 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2010-05-27 22:15:33 -0400
commit7bb46a6734a7e1ad4beaecc11cae7ed3ff81d30f (patch)
treee575d9c55e2a6ccc645dcb3ae2564de458b428f2
parent7000d3c424e5bb350e502a477fb0e1ed42f8b10e (diff)
fs: introduce new truncate sequence
Introduce a new truncate calling sequence into fs/mm subsystems. Rather than setattr > vmtruncate > truncate, have filesystems call their truncate sequence from ->setattr if filesystem specific operations are required. vmtruncate is deprecated, and truncate_pagecache and inode_newsize_ok helpers introduced previously should be used. simple_setattr is introduced for simple in-ram filesystems to implement the new truncate sequence. Eventually all filesystems should be converted to implement a setattr, and the default code in notify_change should go away. simple_setsize is also introduced to perform just the ATTR_SIZE portion of simple_setattr (ie. changing i_size and trimming pagecache). To implement the new truncate sequence: - filesystem specific manipulations (eg freeing blocks) must be done in the setattr method rather than ->truncate. - vmtruncate can not be used by core code to trim blocks past i_size in the event of write failure after allocation, so this must be performed in the fs code. - convert usage of helpers block_write_begin, nobh_write_begin, cont_write_begin, and *blockdev_direct_IO* to use _newtrunc postfixed variants. These avoid calling vmtruncate to trim blocks (see previous). - inode_setattr should not be used. generic_setattr is a new function to be used to copy simple attributes into the generic inode. - make use of the better opportunity to handle errors with the new sequence. Big problem with the previous calling sequence: the filesystem is not called until i_size has already changed. This means it is not allowed to fail the call, and also it does not know what the previous i_size was. Also, generic code calling vmtruncate to truncate allocated blocks in case of error had no good way to return a meaningful error (or, for example, atomically handle block deallocation). Cc: Christoph Hellwig <hch@lst.de> Acked-by: Jan Kara <jack@suse.cz> Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--Documentation/filesystems/vfs.txt7
-rw-r--r--fs/attr.c50
-rw-r--r--fs/buffer.c123
-rw-r--r--fs/direct-io.c61
-rw-r--r--fs/libfs.c76
-rw-r--r--include/linux/buffer_head.h9
-rw-r--r--include/linux/fs.h27
-rw-r--r--mm/truncate.c10
8 files changed, 300 insertions, 63 deletions
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index d4f5731dcbbb..94677e7dcb13 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -401,11 +401,16 @@ otherwise noted.
401 started might not be in the page cache at the end of the 401 started might not be in the page cache at the end of the
402 walk). 402 walk).
403 403
404 truncate: called by the VFS to change the size of a file. The 404 truncate: Deprecated. This will not be called if ->setsize is defined.
405 Called by the VFS to change the size of a file. The
405 i_size field of the inode is set to the desired size by the 406 i_size field of the inode is set to the desired size by the
406 VFS before this method is called. This method is called by 407 VFS before this method is called. This method is called by
407 the truncate(2) system call and related functionality. 408 the truncate(2) system call and related functionality.
408 409
410 Note: ->truncate and vmtruncate are deprecated. Do not add new
411 instances/calls of these. Filesystems should be converted to do their
412 truncate sequence via ->setattr().
413
409 permission: called by the VFS to check for access rights on a POSIX-like 414 permission: called by the VFS to check for access rights on a POSIX-like
410 filesystem. 415 filesystem.
411 416
diff --git a/fs/attr.c b/fs/attr.c
index 0815e93bb487..b4fa3b0aa596 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -67,14 +67,14 @@ EXPORT_SYMBOL(inode_change_ok);
67 * @offset: the new size to assign to the inode 67 * @offset: the new size to assign to the inode
68 * @Returns: 0 on success, -ve errno on failure 68 * @Returns: 0 on success, -ve errno on failure
69 * 69 *
70 * inode_newsize_ok must be called with i_mutex held.
71 *
70 * inode_newsize_ok will check filesystem limits and ulimits to check that the 72 * inode_newsize_ok will check filesystem limits and ulimits to check that the
71 * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ 73 * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ
72 * when necessary. Caller must not proceed with inode size change if failure is 74 * when necessary. Caller must not proceed with inode size change if failure is
73 * returned. @inode must be a file (not directory), with appropriate 75 * returned. @inode must be a file (not directory), with appropriate
74 * permissions to allow truncate (inode_newsize_ok does NOT check these 76 * permissions to allow truncate (inode_newsize_ok does NOT check these
75 * conditions). 77 * conditions).
76 *
77 * inode_newsize_ok must be called with i_mutex held.
78 */ 78 */
79int inode_newsize_ok(const struct inode *inode, loff_t offset) 79int inode_newsize_ok(const struct inode *inode, loff_t offset)
80{ 80{
@@ -104,17 +104,25 @@ out_big:
104} 104}
105EXPORT_SYMBOL(inode_newsize_ok); 105EXPORT_SYMBOL(inode_newsize_ok);
106 106
107int inode_setattr(struct inode * inode, struct iattr * attr) 107/**
108 * generic_setattr - copy simple metadata updates into the generic inode
109 * @inode: the inode to be updated
110 * @attr: the new attributes
111 *
112 * generic_setattr must be called with i_mutex held.
113 *
114 * generic_setattr updates the inode's metadata with that specified
115 * in attr. Noticably missing is inode size update, which is more complex
116 * as it requires pagecache updates. See simple_setsize.
117 *
118 * The inode is not marked as dirty after this operation. The rationale is
119 * that for "simple" filesystems, the struct inode is the inode storage.
120 * The caller is free to mark the inode dirty afterwards if needed.
121 */
122void generic_setattr(struct inode *inode, const struct iattr *attr)
108{ 123{
109 unsigned int ia_valid = attr->ia_valid; 124 unsigned int ia_valid = attr->ia_valid;
110 125
111 if (ia_valid & ATTR_SIZE &&
112 attr->ia_size != i_size_read(inode)) {
113 int error = vmtruncate(inode, attr->ia_size);
114 if (error)
115 return error;
116 }
117
118 if (ia_valid & ATTR_UID) 126 if (ia_valid & ATTR_UID)
119 inode->i_uid = attr->ia_uid; 127 inode->i_uid = attr->ia_uid;
120 if (ia_valid & ATTR_GID) 128 if (ia_valid & ATTR_GID)
@@ -135,6 +143,28 @@ int inode_setattr(struct inode * inode, struct iattr * attr)
135 mode &= ~S_ISGID; 143 mode &= ~S_ISGID;
136 inode->i_mode = mode; 144 inode->i_mode = mode;
137 } 145 }
146}
147EXPORT_SYMBOL(generic_setattr);
148
149/*
150 * note this function is deprecated, the new truncate sequence should be
151 * used instead -- see eg. simple_setsize, generic_setattr.
152 */
153int inode_setattr(struct inode *inode, const struct iattr *attr)
154{
155 unsigned int ia_valid = attr->ia_valid;
156
157 if (ia_valid & ATTR_SIZE &&
158 attr->ia_size != i_size_read(inode)) {
159 int error;
160
161 error = vmtruncate(inode, attr->ia_size);
162 if (error)
163 return error;
164 }
165
166 generic_setattr(inode, attr);
167
138 mark_inode_dirty(inode); 168 mark_inode_dirty(inode);
139 169
140 return 0; 170 return 0;
diff --git a/fs/buffer.c b/fs/buffer.c
index e8aa7081d25c..d54812b198e9 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1949,14 +1949,11 @@ static int __block_commit_write(struct inode *inode, struct page *page,
1949} 1949}
1950 1950
1951/* 1951/*
1952 * block_write_begin takes care of the basic task of block allocation and 1952 * Filesystems implementing the new truncate sequence should use the
1953 * bringing partial write blocks uptodate first. 1953 * _newtrunc postfix variant which won't incorrectly call vmtruncate.
1954 * 1954 * The filesystem needs to handle block truncation upon failure.
1955 * If *pagep is not NULL, then block_write_begin uses the locked page
1956 * at *pagep rather than allocating its own. In this case, the page will
1957 * not be unlocked or deallocated on failure.
1958 */ 1955 */
1959int block_write_begin(struct file *file, struct address_space *mapping, 1956int block_write_begin_newtrunc(struct file *file, struct address_space *mapping,
1960 loff_t pos, unsigned len, unsigned flags, 1957 loff_t pos, unsigned len, unsigned flags,
1961 struct page **pagep, void **fsdata, 1958 struct page **pagep, void **fsdata,
1962 get_block_t *get_block) 1959 get_block_t *get_block)
@@ -1992,20 +1989,50 @@ int block_write_begin(struct file *file, struct address_space *mapping,
1992 unlock_page(page); 1989 unlock_page(page);
1993 page_cache_release(page); 1990 page_cache_release(page);
1994 *pagep = NULL; 1991 *pagep = NULL;
1995
1996 /*
1997 * prepare_write() may have instantiated a few blocks
1998 * outside i_size. Trim these off again. Don't need
1999 * i_size_read because we hold i_mutex.
2000 */
2001 if (pos + len > inode->i_size)
2002 vmtruncate(inode, inode->i_size);
2003 } 1992 }
2004 } 1993 }
2005 1994
2006out: 1995out:
2007 return status; 1996 return status;
2008} 1997}
1998EXPORT_SYMBOL(block_write_begin_newtrunc);
1999
2000/*
2001 * block_write_begin takes care of the basic task of block allocation and
2002 * bringing partial write blocks uptodate first.
2003 *
2004 * If *pagep is not NULL, then block_write_begin uses the locked page
2005 * at *pagep rather than allocating its own. In this case, the page will
2006 * not be unlocked or deallocated on failure.
2007 */
2008int block_write_begin(struct file *file, struct address_space *mapping,
2009 loff_t pos, unsigned len, unsigned flags,
2010 struct page **pagep, void **fsdata,
2011 get_block_t *get_block)
2012{
2013 int ret;
2014
2015 ret = block_write_begin_newtrunc(file, mapping, pos, len, flags,
2016 pagep, fsdata, get_block);
2017
2018 /*
2019 * prepare_write() may have instantiated a few blocks
2020 * outside i_size. Trim these off again. Don't need
2021 * i_size_read because we hold i_mutex.
2022 *
2023 * Filesystems which pass down their own page also cannot
2024 * call into vmtruncate here because it would lead to lock
2025 * inversion problems (*pagep is locked). This is a further
2026 * example of where the old truncate sequence is inadequate.
2027 */
2028 if (unlikely(ret) && *pagep == NULL) {
2029 loff_t isize = mapping->host->i_size;
2030 if (pos + len > isize)
2031 vmtruncate(mapping->host, isize);
2032 }
2033
2034 return ret;
2035}
2009EXPORT_SYMBOL(block_write_begin); 2036EXPORT_SYMBOL(block_write_begin);
2010 2037
2011int block_write_end(struct file *file, struct address_space *mapping, 2038int block_write_end(struct file *file, struct address_space *mapping,
@@ -2324,7 +2351,7 @@ out:
2324 * For moronic filesystems that do not allow holes in file. 2351 * For moronic filesystems that do not allow holes in file.
2325 * We may have to extend the file. 2352 * We may have to extend the file.
2326 */ 2353 */
2327int cont_write_begin(struct file *file, struct address_space *mapping, 2354int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
2328 loff_t pos, unsigned len, unsigned flags, 2355 loff_t pos, unsigned len, unsigned flags,
2329 struct page **pagep, void **fsdata, 2356 struct page **pagep, void **fsdata,
2330 get_block_t *get_block, loff_t *bytes) 2357 get_block_t *get_block, loff_t *bytes)
@@ -2345,11 +2372,30 @@ int cont_write_begin(struct file *file, struct address_space *mapping,
2345 } 2372 }
2346 2373
2347 *pagep = NULL; 2374 *pagep = NULL;
2348 err = block_write_begin(file, mapping, pos, len, 2375 err = block_write_begin_newtrunc(file, mapping, pos, len,
2349 flags, pagep, fsdata, get_block); 2376 flags, pagep, fsdata, get_block);
2350out: 2377out:
2351 return err; 2378 return err;
2352} 2379}
2380EXPORT_SYMBOL(cont_write_begin_newtrunc);
2381
2382int cont_write_begin(struct file *file, struct address_space *mapping,
2383 loff_t pos, unsigned len, unsigned flags,
2384 struct page **pagep, void **fsdata,
2385 get_block_t *get_block, loff_t *bytes)
2386{
2387 int ret;
2388
2389 ret = cont_write_begin_newtrunc(file, mapping, pos, len, flags,
2390 pagep, fsdata, get_block, bytes);
2391 if (unlikely(ret)) {
2392 loff_t isize = mapping->host->i_size;
2393 if (pos + len > isize)
2394 vmtruncate(mapping->host, isize);
2395 }
2396
2397 return ret;
2398}
2353EXPORT_SYMBOL(cont_write_begin); 2399EXPORT_SYMBOL(cont_write_begin);
2354 2400
2355int block_prepare_write(struct page *page, unsigned from, unsigned to, 2401int block_prepare_write(struct page *page, unsigned from, unsigned to,
@@ -2381,7 +2427,7 @@ EXPORT_SYMBOL(block_commit_write);
2381 * 2427 *
2382 * We are not allowed to take the i_mutex here so we have to play games to 2428 * We are not allowed to take the i_mutex here so we have to play games to
2383 * protect against truncate races as the page could now be beyond EOF. Because 2429 * protect against truncate races as the page could now be beyond EOF. Because
2384 * vmtruncate() writes the inode size before removing pages, once we have the 2430 * truncate writes the inode size before removing pages, once we have the
2385 * page lock we can determine safely if the page is beyond EOF. If it is not 2431 * page lock we can determine safely if the page is beyond EOF. If it is not
2386 * beyond EOF, then the page is guaranteed safe against truncation until we 2432 * beyond EOF, then the page is guaranteed safe against truncation until we
2387 * unlock the page. 2433 * unlock the page.
@@ -2464,10 +2510,11 @@ static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2464} 2510}
2465 2511
2466/* 2512/*
2467 * On entry, the page is fully not uptodate. 2513 * Filesystems implementing the new truncate sequence should use the
2468 * On exit the page is fully uptodate in the areas outside (from,to) 2514 * _newtrunc postfix variant which won't incorrectly call vmtruncate.
2515 * The filesystem needs to handle block truncation upon failure.
2469 */ 2516 */
2470int nobh_write_begin(struct file *file, struct address_space *mapping, 2517int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping,
2471 loff_t pos, unsigned len, unsigned flags, 2518 loff_t pos, unsigned len, unsigned flags,
2472 struct page **pagep, void **fsdata, 2519 struct page **pagep, void **fsdata,
2473 get_block_t *get_block) 2520 get_block_t *get_block)
@@ -2500,8 +2547,8 @@ int nobh_write_begin(struct file *file, struct address_space *mapping,
2500 unlock_page(page); 2547 unlock_page(page);
2501 page_cache_release(page); 2548 page_cache_release(page);
2502 *pagep = NULL; 2549 *pagep = NULL;
2503 return block_write_begin(file, mapping, pos, len, flags, pagep, 2550 return block_write_begin_newtrunc(file, mapping, pos, len,
2504 fsdata, get_block); 2551 flags, pagep, fsdata, get_block);
2505 } 2552 }
2506 2553
2507 if (PageMappedToDisk(page)) 2554 if (PageMappedToDisk(page))
@@ -2605,8 +2652,34 @@ out_release:
2605 page_cache_release(page); 2652 page_cache_release(page);
2606 *pagep = NULL; 2653 *pagep = NULL;
2607 2654
2608 if (pos + len > inode->i_size) 2655 return ret;
2609 vmtruncate(inode, inode->i_size); 2656}
2657EXPORT_SYMBOL(nobh_write_begin_newtrunc);
2658
2659/*
2660 * On entry, the page is fully not uptodate.
2661 * On exit the page is fully uptodate in the areas outside (from,to)
2662 */
2663int nobh_write_begin(struct file *file, struct address_space *mapping,
2664 loff_t pos, unsigned len, unsigned flags,
2665 struct page **pagep, void **fsdata,
2666 get_block_t *get_block)
2667{
2668 int ret;
2669
2670 ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags,
2671 pagep, fsdata, get_block);
2672
2673 /*
2674 * prepare_write() may have instantiated a few blocks
2675 * outside i_size. Trim these off again. Don't need
2676 * i_size_read because we hold i_mutex.
2677 */
2678 if (unlikely(ret)) {
2679 loff_t isize = mapping->host->i_size;
2680 if (pos + len > isize)
2681 vmtruncate(mapping->host, isize);
2682 }
2610 2683
2611 return ret; 2684 return ret;
2612} 2685}
diff --git a/fs/direct-io.c b/fs/direct-io.c
index da111aacb46e..7600aacf531d 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1134,27 +1134,8 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1134 return ret; 1134 return ret;
1135} 1135}
1136 1136
1137/*
1138 * This is a library function for use by filesystem drivers.
1139 *
1140 * The locking rules are governed by the flags parameter:
1141 * - if the flags value contains DIO_LOCKING we use a fancy locking
1142 * scheme for dumb filesystems.
1143 * For writes this function is called under i_mutex and returns with
1144 * i_mutex held, for reads, i_mutex is not held on entry, but it is
1145 * taken and dropped again before returning.
1146 * For reads and writes i_alloc_sem is taken in shared mode and released
1147 * on I/O completion (which may happen asynchronously after returning to
1148 * the caller).
1149 *
1150 * - if the flags value does NOT contain DIO_LOCKING we don't use any
1151 * internal locking but rather rely on the filesystem to synchronize
1152 * direct I/O reads/writes versus each other and truncate.
1153 * For reads and writes both i_mutex and i_alloc_sem are not held on
1154 * entry and are never taken.
1155 */
1156ssize_t 1137ssize_t
1157__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1138__blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode,
1158 struct block_device *bdev, const struct iovec *iov, loff_t offset, 1139 struct block_device *bdev, const struct iovec *iov, loff_t offset,
1159 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, 1140 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
1160 dio_submit_t submit_io, int flags) 1141 dio_submit_t submit_io, int flags)
@@ -1247,9 +1228,46 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1247 nr_segs, blkbits, get_block, end_io, 1228 nr_segs, blkbits, get_block, end_io,
1248 submit_io, dio); 1229 submit_io, dio);
1249 1230
1231out:
1232 return retval;
1233}
1234EXPORT_SYMBOL(__blockdev_direct_IO_newtrunc);
1235
1236/*
1237 * This is a library function for use by filesystem drivers.
1238 *
1239 * The locking rules are governed by the flags parameter:
1240 * - if the flags value contains DIO_LOCKING we use a fancy locking
1241 * scheme for dumb filesystems.
1242 * For writes this function is called under i_mutex and returns with
1243 * i_mutex held, for reads, i_mutex is not held on entry, but it is
1244 * taken and dropped again before returning.
1245 * For reads and writes i_alloc_sem is taken in shared mode and released
1246 * on I/O completion (which may happen asynchronously after returning to
1247 * the caller).
1248 *
1249 * - if the flags value does NOT contain DIO_LOCKING we don't use any
1250 * internal locking but rather rely on the filesystem to synchronize
1251 * direct I/O reads/writes versus each other and truncate.
1252 * For reads and writes both i_mutex and i_alloc_sem are not held on
1253 * entry and are never taken.
1254 */
1255ssize_t
1256__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1257 struct block_device *bdev, const struct iovec *iov, loff_t offset,
1258 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
1259 dio_submit_t submit_io, int flags)
1260{
1261 ssize_t retval;
1262
1263 retval = __blockdev_direct_IO_newtrunc(rw, iocb, inode, bdev, iov,
1264 offset, nr_segs, get_block, end_io, submit_io, flags);
1250 /* 1265 /*
1251 * In case of error extending write may have instantiated a few 1266 * In case of error extending write may have instantiated a few
1252 * blocks outside i_size. Trim these off again for DIO_LOCKING. 1267 * blocks outside i_size. Trim these off again for DIO_LOCKING.
1268 * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this in
1269 * their own manner. This is a further example of where the old
1270 * truncate sequence is inadequate.
1253 * 1271 *
1254 * NOTE: filesystems with their own locking have to handle this 1272 * NOTE: filesystems with their own locking have to handle this
1255 * on their own. 1273 * on their own.
@@ -1257,12 +1275,13 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1257 if (flags & DIO_LOCKING) { 1275 if (flags & DIO_LOCKING) {
1258 if (unlikely((rw & WRITE) && retval < 0)) { 1276 if (unlikely((rw & WRITE) && retval < 0)) {
1259 loff_t isize = i_size_read(inode); 1277 loff_t isize = i_size_read(inode);
1278 loff_t end = offset + iov_length(iov, nr_segs);
1279
1260 if (end > isize) 1280 if (end > isize)
1261 vmtruncate(inode, isize); 1281 vmtruncate(inode, isize);
1262 } 1282 }
1263 } 1283 }
1264 1284
1265out:
1266 return retval; 1285 return retval;
1267} 1286}
1268EXPORT_SYMBOL(__blockdev_direct_IO); 1287EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/libfs.c b/fs/libfs.c
index b84d0a7a2204..09e1016eb774 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -8,6 +8,7 @@
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <linux/mount.h> 9#include <linux/mount.h>
10#include <linux/vfs.h> 10#include <linux/vfs.h>
11#include <linux/quotaops.h>
11#include <linux/mutex.h> 12#include <linux/mutex.h>
12#include <linux/exportfs.h> 13#include <linux/exportfs.h>
13#include <linux/writeback.h> 14#include <linux/writeback.h>
@@ -325,6 +326,81 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
325 return 0; 326 return 0;
326} 327}
327 328
329/**
330 * simple_setsize - handle core mm and vfs requirements for file size change
331 * @inode: inode
332 * @newsize: new file size
333 *
334 * Returns 0 on success, -error on failure.
335 *
336 * simple_setsize must be called with inode_mutex held.
337 *
338 * simple_setsize will check that the requested new size is OK (see
339 * inode_newsize_ok), and then will perform the necessary i_size update
340 * and pagecache truncation (if necessary). It will be typically be called
341 * from the filesystem's setattr function when ATTR_SIZE is passed in.
342 *
343 * The inode itself must have correct permissions and attributes to allow
344 * i_size to be changed, this function then just checks that the new size
345 * requested is valid.
346 *
347 * In the case of simple in-memory filesystems with inodes stored solely
348 * in the inode cache, and file data in the pagecache, nothing more needs
349 * to be done to satisfy a truncate request. Filesystems with on-disk
350 * blocks for example will need to free them in the case of truncate, in
351 * that case it may be easier not to use simple_setsize (but each of its
352 * components will likely be required at some point to update pagecache
353 * and inode etc).
354 */
355int simple_setsize(struct inode *inode, loff_t newsize)
356{
357 loff_t oldsize;
358 int error;
359
360 error = inode_newsize_ok(inode, newsize);
361 if (error)
362 return error;
363
364 oldsize = inode->i_size;
365 i_size_write(inode, newsize);
366 truncate_pagecache(inode, oldsize, newsize);
367
368 return error;
369}
370EXPORT_SYMBOL(simple_setsize);
371
372/**
373 * simple_setattr - setattr for simple in-memory filesystem
374 * @dentry: dentry
375 * @iattr: iattr structure
376 *
377 * Returns 0 on success, -error on failure.
378 *
379 * simple_setattr implements setattr for an in-memory filesystem which
380 * does not store its own file data or metadata (eg. uses the page cache
381 * and inode cache as its data store).
382 */
383int simple_setattr(struct dentry *dentry, struct iattr *iattr)
384{
385 struct inode *inode = dentry->d_inode;
386 int error;
387
388 error = inode_change_ok(inode, iattr);
389 if (error)
390 return error;
391
392 if (iattr->ia_valid & ATTR_SIZE) {
393 error = simple_setsize(inode, iattr->ia_size);
394 if (error)
395 return error;
396 }
397
398 generic_setattr(inode, iattr);
399
400 return error;
401}
402EXPORT_SYMBOL(simple_setattr);
403
328int simple_readpage(struct file *file, struct page *page) 404int simple_readpage(struct file *file, struct page *page)
329{ 405{
330 clear_highpage(page); 406 clear_highpage(page);
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 05e5f5996216..1b9ba193b789 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -203,6 +203,9 @@ int block_write_full_page_endio(struct page *page, get_block_t *get_block,
203int block_read_full_page(struct page*, get_block_t*); 203int block_read_full_page(struct page*, get_block_t*);
204int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, 204int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
205 unsigned long from); 205 unsigned long from);
206int block_write_begin_newtrunc(struct file *, struct address_space *,
207 loff_t, unsigned, unsigned,
208 struct page **, void **, get_block_t*);
206int block_write_begin(struct file *, struct address_space *, 209int block_write_begin(struct file *, struct address_space *,
207 loff_t, unsigned, unsigned, 210 loff_t, unsigned, unsigned,
208 struct page **, void **, get_block_t*); 211 struct page **, void **, get_block_t*);
@@ -214,6 +217,9 @@ int generic_write_end(struct file *, struct address_space *,
214 struct page *, void *); 217 struct page *, void *);
215void page_zero_new_buffers(struct page *page, unsigned from, unsigned to); 218void page_zero_new_buffers(struct page *page, unsigned from, unsigned to);
216int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*); 219int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
220int cont_write_begin_newtrunc(struct file *, struct address_space *, loff_t,
221 unsigned, unsigned, struct page **, void **,
222 get_block_t *, loff_t *);
217int cont_write_begin(struct file *, struct address_space *, loff_t, 223int cont_write_begin(struct file *, struct address_space *, loff_t,
218 unsigned, unsigned, struct page **, void **, 224 unsigned, unsigned, struct page **, void **,
219 get_block_t *, loff_t *); 225 get_block_t *, loff_t *);
@@ -225,6 +231,9 @@ void block_sync_page(struct page *);
225sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); 231sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
226int block_truncate_page(struct address_space *, loff_t, get_block_t *); 232int block_truncate_page(struct address_space *, loff_t, get_block_t *);
227int file_fsync(struct file *, int); 233int file_fsync(struct file *, int);
234int nobh_write_begin_newtrunc(struct file *, struct address_space *,
235 loff_t, unsigned, unsigned,
236 struct page **, void **, get_block_t*);
228int nobh_write_begin(struct file *, struct address_space *, 237int nobh_write_begin(struct file *, struct address_space *,
229 loff_t, unsigned, unsigned, 238 loff_t, unsigned, unsigned,
230 struct page **, void **, get_block_t*); 239 struct page **, void **, get_block_t*);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index acf6c52a50dd..3428393942a6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2257,6 +2257,10 @@ typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,
2257 loff_t file_offset); 2257 loff_t file_offset);
2258void dio_end_io(struct bio *bio, int error); 2258void dio_end_io(struct bio *bio, int error);
2259 2259
2260ssize_t __blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode,
2261 struct block_device *bdev, const struct iovec *iov, loff_t offset,
2262 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
2263 dio_submit_t submit_io, int lock_type);
2260ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 2264ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
2261 struct block_device *bdev, const struct iovec *iov, loff_t offset, 2265 struct block_device *bdev, const struct iovec *iov, loff_t offset,
2262 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, 2266 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
@@ -2270,6 +2274,24 @@ enum {
2270 DIO_SKIP_HOLES = 0x02, 2274 DIO_SKIP_HOLES = 0x02,
2271}; 2275};
2272 2276
2277static inline ssize_t blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb,
2278 struct inode *inode, struct block_device *bdev, const struct iovec *iov,
2279 loff_t offset, unsigned long nr_segs, get_block_t get_block,
2280 dio_iodone_t end_io)
2281{
2282 return __blockdev_direct_IO_newtrunc(rw, iocb, inode, bdev, iov, offset,
2283 nr_segs, get_block, end_io, NULL,
2284 DIO_LOCKING | DIO_SKIP_HOLES);
2285}
2286
2287static inline ssize_t blockdev_direct_IO_no_locking_newtrunc(int rw, struct kiocb *iocb,
2288 struct inode *inode, struct block_device *bdev, const struct iovec *iov,
2289 loff_t offset, unsigned long nr_segs, get_block_t get_block,
2290 dio_iodone_t end_io)
2291{
2292 return __blockdev_direct_IO_newtrunc(rw, iocb, inode, bdev, iov, offset,
2293 nr_segs, get_block, end_io, NULL, 0);
2294}
2273static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, 2295static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
2274 struct inode *inode, struct block_device *bdev, const struct iovec *iov, 2296 struct inode *inode, struct block_device *bdev, const struct iovec *iov,
2275 loff_t offset, unsigned long nr_segs, get_block_t get_block, 2297 loff_t offset, unsigned long nr_segs, get_block_t get_block,
@@ -2342,12 +2364,14 @@ extern int dcache_dir_open(struct inode *, struct file *);
2342extern int dcache_dir_close(struct inode *, struct file *); 2364extern int dcache_dir_close(struct inode *, struct file *);
2343extern loff_t dcache_dir_lseek(struct file *, loff_t, int); 2365extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
2344extern int dcache_readdir(struct file *, void *, filldir_t); 2366extern int dcache_readdir(struct file *, void *, filldir_t);
2367extern int simple_setattr(struct dentry *, struct iattr *);
2345extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); 2368extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
2346extern int simple_statfs(struct dentry *, struct kstatfs *); 2369extern int simple_statfs(struct dentry *, struct kstatfs *);
2347extern int simple_link(struct dentry *, struct inode *, struct dentry *); 2370extern int simple_link(struct dentry *, struct inode *, struct dentry *);
2348extern int simple_unlink(struct inode *, struct dentry *); 2371extern int simple_unlink(struct inode *, struct dentry *);
2349extern int simple_rmdir(struct inode *, struct dentry *); 2372extern int simple_rmdir(struct inode *, struct dentry *);
2350extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); 2373extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
2374extern int simple_setsize(struct inode *, loff_t);
2351extern int noop_fsync(struct file *, int); 2375extern int noop_fsync(struct file *, int);
2352extern int simple_empty(struct dentry *); 2376extern int simple_empty(struct dentry *);
2353extern int simple_readpage(struct file *file, struct page *page); 2377extern int simple_readpage(struct file *file, struct page *page);
@@ -2384,7 +2408,8 @@ extern int buffer_migrate_page(struct address_space *,
2384 2408
2385extern int inode_change_ok(const struct inode *, struct iattr *); 2409extern int inode_change_ok(const struct inode *, struct iattr *);
2386extern int inode_newsize_ok(const struct inode *, loff_t offset); 2410extern int inode_newsize_ok(const struct inode *, loff_t offset);
2387extern int __must_check inode_setattr(struct inode *, struct iattr *); 2411extern int __must_check inode_setattr(struct inode *, const struct iattr *);
2412extern void generic_setattr(struct inode *inode, const struct iattr *attr);
2388 2413
2389extern void file_update_time(struct file *file); 2414extern void file_update_time(struct file *file);
2390 2415
diff --git a/mm/truncate.c b/mm/truncate.c
index f42675a3615d..937571b8b233 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -548,18 +548,18 @@ EXPORT_SYMBOL(truncate_pagecache);
548 * NOTE! We have to be ready to update the memory sharing 548 * NOTE! We have to be ready to update the memory sharing
549 * between the file and the memory map for a potential last 549 * between the file and the memory map for a potential last
550 * incomplete page. Ugly, but necessary. 550 * incomplete page. Ugly, but necessary.
551 *
552 * This function is deprecated and simple_setsize or truncate_pagecache
553 * should be used instead.
551 */ 554 */
552int vmtruncate(struct inode *inode, loff_t offset) 555int vmtruncate(struct inode *inode, loff_t offset)
553{ 556{
554 loff_t oldsize;
555 int error; 557 int error;
556 558
557 error = inode_newsize_ok(inode, offset); 559 error = simple_setsize(inode, offset);
558 if (error) 560 if (error)
559 return error; 561 return error;
560 oldsize = inode->i_size; 562
561 i_size_write(inode, offset);
562 truncate_pagecache(inode, oldsize, offset);
563 if (inode->i_op->truncate) 563 if (inode->i_op->truncate)
564 inode->i_op->truncate(inode); 564 inode->i_op->truncate(inode);
565 565