aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/attr.c50
-rw-r--r--fs/buffer.c123
-rw-r--r--fs/direct-io.c61
-rw-r--r--fs/libfs.c76
4 files changed, 254 insertions, 56 deletions
diff --git a/fs/attr.c b/fs/attr.c
index 0815e93bb487..b4fa3b0aa596 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -67,14 +67,14 @@ EXPORT_SYMBOL(inode_change_ok);
67 * @offset: the new size to assign to the inode 67 * @offset: the new size to assign to the inode
68 * @Returns: 0 on success, -ve errno on failure 68 * @Returns: 0 on success, -ve errno on failure
69 * 69 *
70 * inode_newsize_ok must be called with i_mutex held.
71 *
70 * inode_newsize_ok will check filesystem limits and ulimits to check that the 72 * inode_newsize_ok will check filesystem limits and ulimits to check that the
71 * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ 73 * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ
72 * when necessary. Caller must not proceed with inode size change if failure is 74 * when necessary. Caller must not proceed with inode size change if failure is
73 * returned. @inode must be a file (not directory), with appropriate 75 * returned. @inode must be a file (not directory), with appropriate
74 * permissions to allow truncate (inode_newsize_ok does NOT check these 76 * permissions to allow truncate (inode_newsize_ok does NOT check these
75 * conditions). 77 * conditions).
76 *
77 * inode_newsize_ok must be called with i_mutex held.
78 */ 78 */
79int inode_newsize_ok(const struct inode *inode, loff_t offset) 79int inode_newsize_ok(const struct inode *inode, loff_t offset)
80{ 80{
@@ -104,17 +104,25 @@ out_big:
104} 104}
105EXPORT_SYMBOL(inode_newsize_ok); 105EXPORT_SYMBOL(inode_newsize_ok);
106 106
107int inode_setattr(struct inode * inode, struct iattr * attr) 107/**
108 * generic_setattr - copy simple metadata updates into the generic inode
109 * @inode: the inode to be updated
110 * @attr: the new attributes
111 *
112 * generic_setattr must be called with i_mutex held.
113 *
114 * generic_setattr updates the inode's metadata with that specified
115 * in attr. Noticably missing is inode size update, which is more complex
116 * as it requires pagecache updates. See simple_setsize.
117 *
118 * The inode is not marked as dirty after this operation. The rationale is
119 * that for "simple" filesystems, the struct inode is the inode storage.
120 * The caller is free to mark the inode dirty afterwards if needed.
121 */
122void generic_setattr(struct inode *inode, const struct iattr *attr)
108{ 123{
109 unsigned int ia_valid = attr->ia_valid; 124 unsigned int ia_valid = attr->ia_valid;
110 125
111 if (ia_valid & ATTR_SIZE &&
112 attr->ia_size != i_size_read(inode)) {
113 int error = vmtruncate(inode, attr->ia_size);
114 if (error)
115 return error;
116 }
117
118 if (ia_valid & ATTR_UID) 126 if (ia_valid & ATTR_UID)
119 inode->i_uid = attr->ia_uid; 127 inode->i_uid = attr->ia_uid;
120 if (ia_valid & ATTR_GID) 128 if (ia_valid & ATTR_GID)
@@ -135,6 +143,28 @@ int inode_setattr(struct inode * inode, struct iattr * attr)
135 mode &= ~S_ISGID; 143 mode &= ~S_ISGID;
136 inode->i_mode = mode; 144 inode->i_mode = mode;
137 } 145 }
146}
147EXPORT_SYMBOL(generic_setattr);
148
149/*
150 * note this function is deprecated, the new truncate sequence should be
151 * used instead -- see eg. simple_setsize, generic_setattr.
152 */
153int inode_setattr(struct inode *inode, const struct iattr *attr)
154{
155 unsigned int ia_valid = attr->ia_valid;
156
157 if (ia_valid & ATTR_SIZE &&
158 attr->ia_size != i_size_read(inode)) {
159 int error;
160
161 error = vmtruncate(inode, attr->ia_size);
162 if (error)
163 return error;
164 }
165
166 generic_setattr(inode, attr);
167
138 mark_inode_dirty(inode); 168 mark_inode_dirty(inode);
139 169
140 return 0; 170 return 0;
diff --git a/fs/buffer.c b/fs/buffer.c
index e8aa7081d25c..d54812b198e9 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1949,14 +1949,11 @@ static int __block_commit_write(struct inode *inode, struct page *page,
1949} 1949}
1950 1950
1951/* 1951/*
1952 * block_write_begin takes care of the basic task of block allocation and 1952 * Filesystems implementing the new truncate sequence should use the
1953 * bringing partial write blocks uptodate first. 1953 * _newtrunc postfix variant which won't incorrectly call vmtruncate.
1954 * 1954 * The filesystem needs to handle block truncation upon failure.
1955 * If *pagep is not NULL, then block_write_begin uses the locked page
1956 * at *pagep rather than allocating its own. In this case, the page will
1957 * not be unlocked or deallocated on failure.
1958 */ 1955 */
1959int block_write_begin(struct file *file, struct address_space *mapping, 1956int block_write_begin_newtrunc(struct file *file, struct address_space *mapping,
1960 loff_t pos, unsigned len, unsigned flags, 1957 loff_t pos, unsigned len, unsigned flags,
1961 struct page **pagep, void **fsdata, 1958 struct page **pagep, void **fsdata,
1962 get_block_t *get_block) 1959 get_block_t *get_block)
@@ -1992,20 +1989,50 @@ int block_write_begin(struct file *file, struct address_space *mapping,
1992 unlock_page(page); 1989 unlock_page(page);
1993 page_cache_release(page); 1990 page_cache_release(page);
1994 *pagep = NULL; 1991 *pagep = NULL;
1995
1996 /*
1997 * prepare_write() may have instantiated a few blocks
1998 * outside i_size. Trim these off again. Don't need
1999 * i_size_read because we hold i_mutex.
2000 */
2001 if (pos + len > inode->i_size)
2002 vmtruncate(inode, inode->i_size);
2003 } 1992 }
2004 } 1993 }
2005 1994
2006out: 1995out:
2007 return status; 1996 return status;
2008} 1997}
1998EXPORT_SYMBOL(block_write_begin_newtrunc);
1999
2000/*
2001 * block_write_begin takes care of the basic task of block allocation and
2002 * bringing partial write blocks uptodate first.
2003 *
2004 * If *pagep is not NULL, then block_write_begin uses the locked page
2005 * at *pagep rather than allocating its own. In this case, the page will
2006 * not be unlocked or deallocated on failure.
2007 */
2008int block_write_begin(struct file *file, struct address_space *mapping,
2009 loff_t pos, unsigned len, unsigned flags,
2010 struct page **pagep, void **fsdata,
2011 get_block_t *get_block)
2012{
2013 int ret;
2014
2015 ret = block_write_begin_newtrunc(file, mapping, pos, len, flags,
2016 pagep, fsdata, get_block);
2017
2018 /*
2019 * prepare_write() may have instantiated a few blocks
2020 * outside i_size. Trim these off again. Don't need
2021 * i_size_read because we hold i_mutex.
2022 *
2023 * Filesystems which pass down their own page also cannot
2024 * call into vmtruncate here because it would lead to lock
2025 * inversion problems (*pagep is locked). This is a further
2026 * example of where the old truncate sequence is inadequate.
2027 */
2028 if (unlikely(ret) && *pagep == NULL) {
2029 loff_t isize = mapping->host->i_size;
2030 if (pos + len > isize)
2031 vmtruncate(mapping->host, isize);
2032 }
2033
2034 return ret;
2035}
2009EXPORT_SYMBOL(block_write_begin); 2036EXPORT_SYMBOL(block_write_begin);
2010 2037
2011int block_write_end(struct file *file, struct address_space *mapping, 2038int block_write_end(struct file *file, struct address_space *mapping,
@@ -2324,7 +2351,7 @@ out:
2324 * For moronic filesystems that do not allow holes in file. 2351 * For moronic filesystems that do not allow holes in file.
2325 * We may have to extend the file. 2352 * We may have to extend the file.
2326 */ 2353 */
2327int cont_write_begin(struct file *file, struct address_space *mapping, 2354int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
2328 loff_t pos, unsigned len, unsigned flags, 2355 loff_t pos, unsigned len, unsigned flags,
2329 struct page **pagep, void **fsdata, 2356 struct page **pagep, void **fsdata,
2330 get_block_t *get_block, loff_t *bytes) 2357 get_block_t *get_block, loff_t *bytes)
@@ -2345,11 +2372,30 @@ int cont_write_begin(struct file *file, struct address_space *mapping,
2345 } 2372 }
2346 2373
2347 *pagep = NULL; 2374 *pagep = NULL;
2348 err = block_write_begin(file, mapping, pos, len, 2375 err = block_write_begin_newtrunc(file, mapping, pos, len,
2349 flags, pagep, fsdata, get_block); 2376 flags, pagep, fsdata, get_block);
2350out: 2377out:
2351 return err; 2378 return err;
2352} 2379}
2380EXPORT_SYMBOL(cont_write_begin_newtrunc);
2381
2382int cont_write_begin(struct file *file, struct address_space *mapping,
2383 loff_t pos, unsigned len, unsigned flags,
2384 struct page **pagep, void **fsdata,
2385 get_block_t *get_block, loff_t *bytes)
2386{
2387 int ret;
2388
2389 ret = cont_write_begin_newtrunc(file, mapping, pos, len, flags,
2390 pagep, fsdata, get_block, bytes);
2391 if (unlikely(ret)) {
2392 loff_t isize = mapping->host->i_size;
2393 if (pos + len > isize)
2394 vmtruncate(mapping->host, isize);
2395 }
2396
2397 return ret;
2398}
2353EXPORT_SYMBOL(cont_write_begin); 2399EXPORT_SYMBOL(cont_write_begin);
2354 2400
2355int block_prepare_write(struct page *page, unsigned from, unsigned to, 2401int block_prepare_write(struct page *page, unsigned from, unsigned to,
@@ -2381,7 +2427,7 @@ EXPORT_SYMBOL(block_commit_write);
2381 * 2427 *
2382 * We are not allowed to take the i_mutex here so we have to play games to 2428 * We are not allowed to take the i_mutex here so we have to play games to
2383 * protect against truncate races as the page could now be beyond EOF. Because 2429 * protect against truncate races as the page could now be beyond EOF. Because
2384 * vmtruncate() writes the inode size before removing pages, once we have the 2430 * truncate writes the inode size before removing pages, once we have the
2385 * page lock we can determine safely if the page is beyond EOF. If it is not 2431 * page lock we can determine safely if the page is beyond EOF. If it is not
2386 * beyond EOF, then the page is guaranteed safe against truncation until we 2432 * beyond EOF, then the page is guaranteed safe against truncation until we
2387 * unlock the page. 2433 * unlock the page.
@@ -2464,10 +2510,11 @@ static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2464} 2510}
2465 2511
2466/* 2512/*
2467 * On entry, the page is fully not uptodate. 2513 * Filesystems implementing the new truncate sequence should use the
2468 * On exit the page is fully uptodate in the areas outside (from,to) 2514 * _newtrunc postfix variant which won't incorrectly call vmtruncate.
2515 * The filesystem needs to handle block truncation upon failure.
2469 */ 2516 */
2470int nobh_write_begin(struct file *file, struct address_space *mapping, 2517int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping,
2471 loff_t pos, unsigned len, unsigned flags, 2518 loff_t pos, unsigned len, unsigned flags,
2472 struct page **pagep, void **fsdata, 2519 struct page **pagep, void **fsdata,
2473 get_block_t *get_block) 2520 get_block_t *get_block)
@@ -2500,8 +2547,8 @@ int nobh_write_begin(struct file *file, struct address_space *mapping,
2500 unlock_page(page); 2547 unlock_page(page);
2501 page_cache_release(page); 2548 page_cache_release(page);
2502 *pagep = NULL; 2549 *pagep = NULL;
2503 return block_write_begin(file, mapping, pos, len, flags, pagep, 2550 return block_write_begin_newtrunc(file, mapping, pos, len,
2504 fsdata, get_block); 2551 flags, pagep, fsdata, get_block);
2505 } 2552 }
2506 2553
2507 if (PageMappedToDisk(page)) 2554 if (PageMappedToDisk(page))
@@ -2605,8 +2652,34 @@ out_release:
2605 page_cache_release(page); 2652 page_cache_release(page);
2606 *pagep = NULL; 2653 *pagep = NULL;
2607 2654
2608 if (pos + len > inode->i_size) 2655 return ret;
2609 vmtruncate(inode, inode->i_size); 2656}
2657EXPORT_SYMBOL(nobh_write_begin_newtrunc);
2658
2659/*
2660 * On entry, the page is fully not uptodate.
2661 * On exit the page is fully uptodate in the areas outside (from,to)
2662 */
2663int nobh_write_begin(struct file *file, struct address_space *mapping,
2664 loff_t pos, unsigned len, unsigned flags,
2665 struct page **pagep, void **fsdata,
2666 get_block_t *get_block)
2667{
2668 int ret;
2669
2670 ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags,
2671 pagep, fsdata, get_block);
2672
2673 /*
2674 * prepare_write() may have instantiated a few blocks
2675 * outside i_size. Trim these off again. Don't need
2676 * i_size_read because we hold i_mutex.
2677 */
2678 if (unlikely(ret)) {
2679 loff_t isize = mapping->host->i_size;
2680 if (pos + len > isize)
2681 vmtruncate(mapping->host, isize);
2682 }
2610 2683
2611 return ret; 2684 return ret;
2612} 2685}
diff --git a/fs/direct-io.c b/fs/direct-io.c
index da111aacb46e..7600aacf531d 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1134,27 +1134,8 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1134 return ret; 1134 return ret;
1135} 1135}
1136 1136
1137/*
1138 * This is a library function for use by filesystem drivers.
1139 *
1140 * The locking rules are governed by the flags parameter:
1141 * - if the flags value contains DIO_LOCKING we use a fancy locking
1142 * scheme for dumb filesystems.
1143 * For writes this function is called under i_mutex and returns with
1144 * i_mutex held, for reads, i_mutex is not held on entry, but it is
1145 * taken and dropped again before returning.
1146 * For reads and writes i_alloc_sem is taken in shared mode and released
1147 * on I/O completion (which may happen asynchronously after returning to
1148 * the caller).
1149 *
1150 * - if the flags value does NOT contain DIO_LOCKING we don't use any
1151 * internal locking but rather rely on the filesystem to synchronize
1152 * direct I/O reads/writes versus each other and truncate.
1153 * For reads and writes both i_mutex and i_alloc_sem are not held on
1154 * entry and are never taken.
1155 */
1156ssize_t 1137ssize_t
1157__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1138__blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode,
1158 struct block_device *bdev, const struct iovec *iov, loff_t offset, 1139 struct block_device *bdev, const struct iovec *iov, loff_t offset,
1159 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, 1140 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
1160 dio_submit_t submit_io, int flags) 1141 dio_submit_t submit_io, int flags)
@@ -1247,9 +1228,46 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1247 nr_segs, blkbits, get_block, end_io, 1228 nr_segs, blkbits, get_block, end_io,
1248 submit_io, dio); 1229 submit_io, dio);
1249 1230
1231out:
1232 return retval;
1233}
1234EXPORT_SYMBOL(__blockdev_direct_IO_newtrunc);
1235
1236/*
1237 * This is a library function for use by filesystem drivers.
1238 *
1239 * The locking rules are governed by the flags parameter:
1240 * - if the flags value contains DIO_LOCKING we use a fancy locking
1241 * scheme for dumb filesystems.
1242 * For writes this function is called under i_mutex and returns with
1243 * i_mutex held, for reads, i_mutex is not held on entry, but it is
1244 * taken and dropped again before returning.
1245 * For reads and writes i_alloc_sem is taken in shared mode and released
1246 * on I/O completion (which may happen asynchronously after returning to
1247 * the caller).
1248 *
1249 * - if the flags value does NOT contain DIO_LOCKING we don't use any
1250 * internal locking but rather rely on the filesystem to synchronize
1251 * direct I/O reads/writes versus each other and truncate.
1252 * For reads and writes both i_mutex and i_alloc_sem are not held on
1253 * entry and are never taken.
1254 */
1255ssize_t
1256__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1257 struct block_device *bdev, const struct iovec *iov, loff_t offset,
1258 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
1259 dio_submit_t submit_io, int flags)
1260{
1261 ssize_t retval;
1262
1263 retval = __blockdev_direct_IO_newtrunc(rw, iocb, inode, bdev, iov,
1264 offset, nr_segs, get_block, end_io, submit_io, flags);
1250 /* 1265 /*
1251 * In case of error extending write may have instantiated a few 1266 * In case of error extending write may have instantiated a few
1252 * blocks outside i_size. Trim these off again for DIO_LOCKING. 1267 * blocks outside i_size. Trim these off again for DIO_LOCKING.
1268 * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this in
1269 * their own manner. This is a further example of where the old
1270 * truncate sequence is inadequate.
1253 * 1271 *
1254 * NOTE: filesystems with their own locking have to handle this 1272 * NOTE: filesystems with their own locking have to handle this
1255 * on their own. 1273 * on their own.
@@ -1257,12 +1275,13 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1257 if (flags & DIO_LOCKING) { 1275 if (flags & DIO_LOCKING) {
1258 if (unlikely((rw & WRITE) && retval < 0)) { 1276 if (unlikely((rw & WRITE) && retval < 0)) {
1259 loff_t isize = i_size_read(inode); 1277 loff_t isize = i_size_read(inode);
1278 loff_t end = offset + iov_length(iov, nr_segs);
1279
1260 if (end > isize) 1280 if (end > isize)
1261 vmtruncate(inode, isize); 1281 vmtruncate(inode, isize);
1262 } 1282 }
1263 } 1283 }
1264 1284
1265out:
1266 return retval; 1285 return retval;
1267} 1286}
1268EXPORT_SYMBOL(__blockdev_direct_IO); 1287EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/libfs.c b/fs/libfs.c
index b84d0a7a2204..09e1016eb774 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -8,6 +8,7 @@
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <linux/mount.h> 9#include <linux/mount.h>
10#include <linux/vfs.h> 10#include <linux/vfs.h>
11#include <linux/quotaops.h>
11#include <linux/mutex.h> 12#include <linux/mutex.h>
12#include <linux/exportfs.h> 13#include <linux/exportfs.h>
13#include <linux/writeback.h> 14#include <linux/writeback.h>
@@ -325,6 +326,81 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
325 return 0; 326 return 0;
326} 327}
327 328
329/**
330 * simple_setsize - handle core mm and vfs requirements for file size change
331 * @inode: inode
332 * @newsize: new file size
333 *
334 * Returns 0 on success, -error on failure.
335 *
336 * simple_setsize must be called with inode_mutex held.
337 *
338 * simple_setsize will check that the requested new size is OK (see
339 * inode_newsize_ok), and then will perform the necessary i_size update
340 * and pagecache truncation (if necessary). It will be typically be called
341 * from the filesystem's setattr function when ATTR_SIZE is passed in.
342 *
343 * The inode itself must have correct permissions and attributes to allow
344 * i_size to be changed, this function then just checks that the new size
345 * requested is valid.
346 *
347 * In the case of simple in-memory filesystems with inodes stored solely
348 * in the inode cache, and file data in the pagecache, nothing more needs
349 * to be done to satisfy a truncate request. Filesystems with on-disk
350 * blocks for example will need to free them in the case of truncate, in
351 * that case it may be easier not to use simple_setsize (but each of its
352 * components will likely be required at some point to update pagecache
353 * and inode etc).
354 */
355int simple_setsize(struct inode *inode, loff_t newsize)
356{
357 loff_t oldsize;
358 int error;
359
360 error = inode_newsize_ok(inode, newsize);
361 if (error)
362 return error;
363
364 oldsize = inode->i_size;
365 i_size_write(inode, newsize);
366 truncate_pagecache(inode, oldsize, newsize);
367
368 return error;
369}
370EXPORT_SYMBOL(simple_setsize);
371
372/**
373 * simple_setattr - setattr for simple in-memory filesystem
374 * @dentry: dentry
375 * @iattr: iattr structure
376 *
377 * Returns 0 on success, -error on failure.
378 *
379 * simple_setattr implements setattr for an in-memory filesystem which
380 * does not store its own file data or metadata (eg. uses the page cache
381 * and inode cache as its data store).
382 */
383int simple_setattr(struct dentry *dentry, struct iattr *iattr)
384{
385 struct inode *inode = dentry->d_inode;
386 int error;
387
388 error = inode_change_ok(inode, iattr);
389 if (error)
390 return error;
391
392 if (iattr->ia_valid & ATTR_SIZE) {
393 error = simple_setsize(inode, iattr->ia_size);
394 if (error)
395 return error;
396 }
397
398 generic_setattr(inode, iattr);
399
400 return error;
401}
402EXPORT_SYMBOL(simple_setattr);
403
328int simple_readpage(struct file *file, struct page *page) 404int simple_readpage(struct file *file, struct page *page)
329{ 405{
330 clear_highpage(page); 406 clear_highpage(page);