aboutsummaryrefslogtreecommitdiffstats
path: root/fs/direct-io.c
diff options
context:
space:
mode:
authornpiggin@suse.de <npiggin@suse.de>2010-05-26 11:05:33 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2010-05-27 22:15:33 -0400
commit7bb46a6734a7e1ad4beaecc11cae7ed3ff81d30f (patch)
treee575d9c55e2a6ccc645dcb3ae2564de458b428f2 /fs/direct-io.c
parent7000d3c424e5bb350e502a477fb0e1ed42f8b10e (diff)
fs: introduce new truncate sequence
Introduce a new truncate calling sequence into fs/mm subsystems. Rather than setattr > vmtruncate > truncate, have filesystems call their truncate sequence from ->setattr if filesystem specific operations are required. vmtruncate is deprecated, and truncate_pagecache and inode_newsize_ok helpers introduced previously should be used. simple_setattr is introduced for simple in-ram filesystems to implement the new truncate sequence. Eventually all filesystems should be converted to implement a setattr, and the default code in notify_change should go away. simple_setsize is also introduced to perform just the ATTR_SIZE portion of simple_setattr (ie. changing i_size and trimming pagecache). To implement the new truncate sequence: - filesystem specific manipulations (eg freeing blocks) must be done in the setattr method rather than ->truncate. - vmtruncate can not be used by core code to trim blocks past i_size in the event of write failure after allocation, so this must be performed in the fs code. - convert usage of helpers block_write_begin, nobh_write_begin, cont_write_begin, and *blockdev_direct_IO* to use _newtrunc postfixed variants. These avoid calling vmtruncate to trim blocks (see previous). - inode_setattr should not be used. generic_setattr is a new function to be used to copy simple attributes into the generic inode. - make use of the better opportunity to handle errors with the new sequence. Big problem with the previous calling sequence: the filesystem is not called until i_size has already changed. This means it is not allowed to fail the call, and also it does not know what the previous i_size was. Also, generic code calling vmtruncate to truncate allocated blocks in case of error had no good way to return a meaningful error (or, for example, atomically handle block deallocation). Cc: Christoph Hellwig <hch@lst.de> Acked-by: Jan Kara <jack@suse.cz> Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/direct-io.c')
-rw-r--r--fs/direct-io.c61
1 files changed, 40 insertions, 21 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index da111aacb46e..7600aacf531d 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1134,27 +1134,8 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1134 return ret; 1134 return ret;
1135} 1135}
1136 1136
1137/*
1138 * This is a library function for use by filesystem drivers.
1139 *
1140 * The locking rules are governed by the flags parameter:
1141 * - if the flags value contains DIO_LOCKING we use a fancy locking
1142 * scheme for dumb filesystems.
1143 * For writes this function is called under i_mutex and returns with
1144 * i_mutex held, for reads, i_mutex is not held on entry, but it is
1145 * taken and dropped again before returning.
1146 * For reads and writes i_alloc_sem is taken in shared mode and released
1147 * on I/O completion (which may happen asynchronously after returning to
1148 * the caller).
1149 *
1150 * - if the flags value does NOT contain DIO_LOCKING we don't use any
1151 * internal locking but rather rely on the filesystem to synchronize
1152 * direct I/O reads/writes versus each other and truncate.
1153 * For reads and writes both i_mutex and i_alloc_sem are not held on
1154 * entry and are never taken.
1155 */
1156ssize_t 1137ssize_t
1157__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1138__blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode,
1158 struct block_device *bdev, const struct iovec *iov, loff_t offset, 1139 struct block_device *bdev, const struct iovec *iov, loff_t offset,
1159 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, 1140 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
1160 dio_submit_t submit_io, int flags) 1141 dio_submit_t submit_io, int flags)
@@ -1247,9 +1228,46 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1247 nr_segs, blkbits, get_block, end_io, 1228 nr_segs, blkbits, get_block, end_io,
1248 submit_io, dio); 1229 submit_io, dio);
1249 1230
1231out:
1232 return retval;
1233}
1234EXPORT_SYMBOL(__blockdev_direct_IO_newtrunc);
1235
1236/*
1237 * This is a library function for use by filesystem drivers.
1238 *
1239 * The locking rules are governed by the flags parameter:
1240 * - if the flags value contains DIO_LOCKING we use a fancy locking
1241 * scheme for dumb filesystems.
1242 * For writes this function is called under i_mutex and returns with
1243 * i_mutex held, for reads, i_mutex is not held on entry, but it is
1244 * taken and dropped again before returning.
1245 * For reads and writes i_alloc_sem is taken in shared mode and released
1246 * on I/O completion (which may happen asynchronously after returning to
1247 * the caller).
1248 *
1249 * - if the flags value does NOT contain DIO_LOCKING we don't use any
1250 * internal locking but rather rely on the filesystem to synchronize
1251 * direct I/O reads/writes versus each other and truncate.
1252 * For reads and writes both i_mutex and i_alloc_sem are not held on
1253 * entry and are never taken.
1254 */
1255ssize_t
1256__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1257 struct block_device *bdev, const struct iovec *iov, loff_t offset,
1258 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
1259 dio_submit_t submit_io, int flags)
1260{
1261 ssize_t retval;
1262
1263 retval = __blockdev_direct_IO_newtrunc(rw, iocb, inode, bdev, iov,
1264 offset, nr_segs, get_block, end_io, submit_io, flags);
1250 /* 1265 /*
1251 * In case of error extending write may have instantiated a few 1266 * In case of error extending write may have instantiated a few
1252 * blocks outside i_size. Trim these off again for DIO_LOCKING. 1267 * blocks outside i_size. Trim these off again for DIO_LOCKING.
1268 * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this in
1269 * their own manner. This is a further example of where the old
1270 * truncate sequence is inadequate.
1253 * 1271 *
1254 * NOTE: filesystems with their own locking have to handle this 1272 * NOTE: filesystems with their own locking have to handle this
1255 * on their own. 1273 * on their own.
@@ -1257,12 +1275,13 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1257 if (flags & DIO_LOCKING) { 1275 if (flags & DIO_LOCKING) {
1258 if (unlikely((rw & WRITE) && retval < 0)) { 1276 if (unlikely((rw & WRITE) && retval < 0)) {
1259 loff_t isize = i_size_read(inode); 1277 loff_t isize = i_size_read(inode);
1278 loff_t end = offset + iov_length(iov, nr_segs);
1279
1260 if (end > isize) 1280 if (end > isize)
1261 vmtruncate(inode, isize); 1281 vmtruncate(inode, isize);
1262 } 1282 }
1263 } 1283 }
1264 1284
1265out:
1266 return retval; 1285 return retval;
1267} 1286}
1268EXPORT_SYMBOL(__blockdev_direct_IO); 1287EXPORT_SYMBOL(__blockdev_direct_IO);