diff options
author | npiggin@suse.de <npiggin@suse.de> | 2010-05-26 11:05:33 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2010-05-27 22:15:33 -0400 |
commit | 7bb46a6734a7e1ad4beaecc11cae7ed3ff81d30f (patch) | |
tree | e575d9c55e2a6ccc645dcb3ae2564de458b428f2 /fs/direct-io.c | |
parent | 7000d3c424e5bb350e502a477fb0e1ed42f8b10e (diff) |
fs: introduce new truncate sequence
Introduce a new truncate calling sequence into fs/mm subsystems. Rather than
setattr > vmtruncate > truncate, have filesystems call their truncate sequence
from ->setattr if filesystem specific operations are required. vmtruncate is
deprecated, and truncate_pagecache and inode_newsize_ok helpers introduced
previously should be used.
simple_setattr is introduced for simple in-ram filesystems to implement
the new truncate sequence. Eventually all filesystems should be converted
to implement a setattr, and the default code in notify_change should go
away.
simple_setsize is also introduced to perform just the ATTR_SIZE portion
of simple_setattr (ie. changing i_size and trimming pagecache).
To implement the new truncate sequence:
- filesystem specific manipulations (eg freeing blocks) must be done in
the setattr method rather than ->truncate.
- vmtruncate can not be used by core code to trim blocks past i_size in
the event of write failure after allocation, so this must be performed
in the fs code.
- convert usage of helpers block_write_begin, nobh_write_begin,
cont_write_begin, and *blockdev_direct_IO* to use _newtrunc postfixed
variants. These avoid calling vmtruncate to trim blocks (see previous).
- inode_setattr should not be used. generic_setattr is a new function
to be used to copy simple attributes into the generic inode.
- make use of the better opportunity to handle errors with the new sequence.
Big problem with the previous calling sequence: the filesystem is not called
until i_size has already changed. This means it is not allowed to fail the
call, and also it does not know what the previous i_size was. Also, generic
code calling vmtruncate to truncate allocated blocks in case of error had
no good way to return a meaningful error (or, for example, atomically handle
block deallocation).
Cc: Christoph Hellwig <hch@lst.de>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/direct-io.c')
-rw-r--r-- | fs/direct-io.c | 61 |
1 files changed, 40 insertions, 21 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c index da111aacb46e..7600aacf531d 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -1134,27 +1134,8 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1134 | return ret; | 1134 | return ret; |
1135 | } | 1135 | } |
1136 | 1136 | ||
1137 | /* | ||
1138 | * This is a library function for use by filesystem drivers. | ||
1139 | * | ||
1140 | * The locking rules are governed by the flags parameter: | ||
1141 | * - if the flags value contains DIO_LOCKING we use a fancy locking | ||
1142 | * scheme for dumb filesystems. | ||
1143 | * For writes this function is called under i_mutex and returns with | ||
1144 | * i_mutex held, for reads, i_mutex is not held on entry, but it is | ||
1145 | * taken and dropped again before returning. | ||
1146 | * For reads and writes i_alloc_sem is taken in shared mode and released | ||
1147 | * on I/O completion (which may happen asynchronously after returning to | ||
1148 | * the caller). | ||
1149 | * | ||
1150 | * - if the flags value does NOT contain DIO_LOCKING we don't use any | ||
1151 | * internal locking but rather rely on the filesystem to synchronize | ||
1152 | * direct I/O reads/writes versus each other and truncate. | ||
1153 | * For reads and writes both i_mutex and i_alloc_sem are not held on | ||
1154 | * entry and are never taken. | ||
1155 | */ | ||
1156 | ssize_t | 1137 | ssize_t |
1157 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | 1138 | __blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode, |
1158 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | 1139 | struct block_device *bdev, const struct iovec *iov, loff_t offset, |
1159 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | 1140 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, |
1160 | dio_submit_t submit_io, int flags) | 1141 | dio_submit_t submit_io, int flags) |
@@ -1247,9 +1228,46 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1247 | nr_segs, blkbits, get_block, end_io, | 1228 | nr_segs, blkbits, get_block, end_io, |
1248 | submit_io, dio); | 1229 | submit_io, dio); |
1249 | 1230 | ||
1231 | out: | ||
1232 | return retval; | ||
1233 | } | ||
1234 | EXPORT_SYMBOL(__blockdev_direct_IO_newtrunc); | ||
1235 | |||
1236 | /* | ||
1237 | * This is a library function for use by filesystem drivers. | ||
1238 | * | ||
1239 | * The locking rules are governed by the flags parameter: | ||
1240 | * - if the flags value contains DIO_LOCKING we use a fancy locking | ||
1241 | * scheme for dumb filesystems. | ||
1242 | * For writes this function is called under i_mutex and returns with | ||
1243 | * i_mutex held, for reads, i_mutex is not held on entry, but it is | ||
1244 | * taken and dropped again before returning. | ||
1245 | * For reads and writes i_alloc_sem is taken in shared mode and released | ||
1246 | * on I/O completion (which may happen asynchronously after returning to | ||
1247 | * the caller). | ||
1248 | * | ||
1249 | * - if the flags value does NOT contain DIO_LOCKING we don't use any | ||
1250 | * internal locking but rather rely on the filesystem to synchronize | ||
1251 | * direct I/O reads/writes versus each other and truncate. | ||
1252 | * For reads and writes both i_mutex and i_alloc_sem are not held on | ||
1253 | * entry and are never taken. | ||
1254 | */ | ||
1255 | ssize_t | ||
1256 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | ||
1257 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | ||
1258 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | ||
1259 | dio_submit_t submit_io, int flags) | ||
1260 | { | ||
1261 | ssize_t retval; | ||
1262 | |||
1263 | retval = __blockdev_direct_IO_newtrunc(rw, iocb, inode, bdev, iov, | ||
1264 | offset, nr_segs, get_block, end_io, submit_io, flags); | ||
1250 | /* | 1265 | /* |
1251 | * In case of error extending write may have instantiated a few | 1266 | * In case of error extending write may have instantiated a few |
1252 | * blocks outside i_size. Trim these off again for DIO_LOCKING. | 1267 | * blocks outside i_size. Trim these off again for DIO_LOCKING. |
1268 | * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this in | ||
1269 | * their own manner. This is a further example of where the old | ||
1270 | * truncate sequence is inadequate. | ||
1253 | * | 1271 | * |
1254 | * NOTE: filesystems with their own locking have to handle this | 1272 | * NOTE: filesystems with their own locking have to handle this |
1255 | * on their own. | 1273 | * on their own. |
@@ -1257,12 +1275,13 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1257 | if (flags & DIO_LOCKING) { | 1275 | if (flags & DIO_LOCKING) { |
1258 | if (unlikely((rw & WRITE) && retval < 0)) { | 1276 | if (unlikely((rw & WRITE) && retval < 0)) { |
1259 | loff_t isize = i_size_read(inode); | 1277 | loff_t isize = i_size_read(inode); |
1278 | loff_t end = offset + iov_length(iov, nr_segs); | ||
1279 | |||
1260 | if (end > isize) | 1280 | if (end > isize) |
1261 | vmtruncate(inode, isize); | 1281 | vmtruncate(inode, isize); |
1262 | } | 1282 | } |
1263 | } | 1283 | } |
1264 | 1284 | ||
1265 | out: | ||
1266 | return retval; | 1285 | return retval; |
1267 | } | 1286 | } |
1268 | EXPORT_SYMBOL(__blockdev_direct_IO); | 1287 | EXPORT_SYMBOL(__blockdev_direct_IO); |