diff options
author | Christoph Hellwig <hch@lst.de> | 2011-01-14 07:07:43 -0500 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2011-01-17 02:25:31 -0500 |
commit | 2fe17c1075836b66678ed2a305fd09b6773883aa (patch) | |
tree | eb5287be8138686682eef9622872cfc7657e0664 /fs/btrfs | |
parent | 64c23e86873ee410554d6d1c76b60da47025e96f (diff) |
fallocate should be a file operation
Currently all filesystems except XFS implement fallocate asynchronously,
while XFS forced a commit. Both of these are suboptimal - in case of O_SYNC
I/O we really want our allocation on disk, especially for the !KEEP_SIZE
case where we actually grow the file with user-visible zeroes. On the
other hand always commiting the transaction is a bad idea for fast-path
uses of fallocate like for example in recent Samba versions. Given
that block allocation is a data plane operation anyway change it from
an inode operation to a file operation so that we have the file structure
available that lets us check for O_SYNC.
This also includes moving the code around for a few of the filesystems,
and remove the already unnedded S_ISDIR checks given that we only wire
up fallocate for regular files.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/file.c | 113 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 111 |
2 files changed, 113 insertions, 111 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 66836d85763b..a9e0a4eaf3d9 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/string.h> | 24 | #include <linux/string.h> |
25 | #include <linux/backing-dev.h> | 25 | #include <linux/backing-dev.h> |
26 | #include <linux/mpage.h> | 26 | #include <linux/mpage.h> |
27 | #include <linux/falloc.h> | ||
27 | #include <linux/swap.h> | 28 | #include <linux/swap.h> |
28 | #include <linux/writeback.h> | 29 | #include <linux/writeback.h> |
29 | #include <linux/statfs.h> | 30 | #include <linux/statfs.h> |
@@ -1237,6 +1238,117 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) | |||
1237 | return 0; | 1238 | return 0; |
1238 | } | 1239 | } |
1239 | 1240 | ||
1241 | static long btrfs_fallocate(struct file *file, int mode, | ||
1242 | loff_t offset, loff_t len) | ||
1243 | { | ||
1244 | struct inode *inode = file->f_path.dentry->d_inode; | ||
1245 | struct extent_state *cached_state = NULL; | ||
1246 | u64 cur_offset; | ||
1247 | u64 last_byte; | ||
1248 | u64 alloc_start; | ||
1249 | u64 alloc_end; | ||
1250 | u64 alloc_hint = 0; | ||
1251 | u64 locked_end; | ||
1252 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | ||
1253 | struct extent_map *em; | ||
1254 | int ret; | ||
1255 | |||
1256 | alloc_start = offset & ~mask; | ||
1257 | alloc_end = (offset + len + mask) & ~mask; | ||
1258 | |||
1259 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ | ||
1260 | if (mode & ~FALLOC_FL_KEEP_SIZE) | ||
1261 | return -EOPNOTSUPP; | ||
1262 | |||
1263 | /* | ||
1264 | * wait for ordered IO before we have any locks. We'll loop again | ||
1265 | * below with the locks held. | ||
1266 | */ | ||
1267 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); | ||
1268 | |||
1269 | mutex_lock(&inode->i_mutex); | ||
1270 | ret = inode_newsize_ok(inode, alloc_end); | ||
1271 | if (ret) | ||
1272 | goto out; | ||
1273 | |||
1274 | if (alloc_start > inode->i_size) { | ||
1275 | ret = btrfs_cont_expand(inode, alloc_start); | ||
1276 | if (ret) | ||
1277 | goto out; | ||
1278 | } | ||
1279 | |||
1280 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); | ||
1281 | if (ret) | ||
1282 | goto out; | ||
1283 | |||
1284 | locked_end = alloc_end - 1; | ||
1285 | while (1) { | ||
1286 | struct btrfs_ordered_extent *ordered; | ||
1287 | |||
1288 | /* the extent lock is ordered inside the running | ||
1289 | * transaction | ||
1290 | */ | ||
1291 | lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, | ||
1292 | locked_end, 0, &cached_state, GFP_NOFS); | ||
1293 | ordered = btrfs_lookup_first_ordered_extent(inode, | ||
1294 | alloc_end - 1); | ||
1295 | if (ordered && | ||
1296 | ordered->file_offset + ordered->len > alloc_start && | ||
1297 | ordered->file_offset < alloc_end) { | ||
1298 | btrfs_put_ordered_extent(ordered); | ||
1299 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | ||
1300 | alloc_start, locked_end, | ||
1301 | &cached_state, GFP_NOFS); | ||
1302 | /* | ||
1303 | * we can't wait on the range with the transaction | ||
1304 | * running or with the extent lock held | ||
1305 | */ | ||
1306 | btrfs_wait_ordered_range(inode, alloc_start, | ||
1307 | alloc_end - alloc_start); | ||
1308 | } else { | ||
1309 | if (ordered) | ||
1310 | btrfs_put_ordered_extent(ordered); | ||
1311 | break; | ||
1312 | } | ||
1313 | } | ||
1314 | |||
1315 | cur_offset = alloc_start; | ||
1316 | while (1) { | ||
1317 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, | ||
1318 | alloc_end - cur_offset, 0); | ||
1319 | BUG_ON(IS_ERR(em) || !em); | ||
1320 | last_byte = min(extent_map_end(em), alloc_end); | ||
1321 | last_byte = (last_byte + mask) & ~mask; | ||
1322 | if (em->block_start == EXTENT_MAP_HOLE || | ||
1323 | (cur_offset >= inode->i_size && | ||
1324 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | ||
1325 | ret = btrfs_prealloc_file_range(inode, mode, cur_offset, | ||
1326 | last_byte - cur_offset, | ||
1327 | 1 << inode->i_blkbits, | ||
1328 | offset + len, | ||
1329 | &alloc_hint); | ||
1330 | if (ret < 0) { | ||
1331 | free_extent_map(em); | ||
1332 | break; | ||
1333 | } | ||
1334 | } | ||
1335 | free_extent_map(em); | ||
1336 | |||
1337 | cur_offset = last_byte; | ||
1338 | if (cur_offset >= alloc_end) { | ||
1339 | ret = 0; | ||
1340 | break; | ||
1341 | } | ||
1342 | } | ||
1343 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | ||
1344 | &cached_state, GFP_NOFS); | ||
1345 | |||
1346 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); | ||
1347 | out: | ||
1348 | mutex_unlock(&inode->i_mutex); | ||
1349 | return ret; | ||
1350 | } | ||
1351 | |||
1240 | const struct file_operations btrfs_file_operations = { | 1352 | const struct file_operations btrfs_file_operations = { |
1241 | .llseek = generic_file_llseek, | 1353 | .llseek = generic_file_llseek, |
1242 | .read = do_sync_read, | 1354 | .read = do_sync_read, |
@@ -1248,6 +1360,7 @@ const struct file_operations btrfs_file_operations = { | |||
1248 | .open = generic_file_open, | 1360 | .open = generic_file_open, |
1249 | .release = btrfs_release_file, | 1361 | .release = btrfs_release_file, |
1250 | .fsync = btrfs_sync_file, | 1362 | .fsync = btrfs_sync_file, |
1363 | .fallocate = btrfs_fallocate, | ||
1251 | .unlocked_ioctl = btrfs_ioctl, | 1364 | .unlocked_ioctl = btrfs_ioctl, |
1252 | #ifdef CONFIG_COMPAT | 1365 | #ifdef CONFIG_COMPAT |
1253 | .compat_ioctl = btrfs_ioctl, | 1366 | .compat_ioctl = btrfs_ioctl, |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 64daf2acd0d5..902afbf50811 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -7098,116 +7098,6 @@ int btrfs_prealloc_file_range_trans(struct inode *inode, | |||
7098 | min_size, actual_len, alloc_hint, trans); | 7098 | min_size, actual_len, alloc_hint, trans); |
7099 | } | 7099 | } |
7100 | 7100 | ||
7101 | static long btrfs_fallocate(struct inode *inode, int mode, | ||
7102 | loff_t offset, loff_t len) | ||
7103 | { | ||
7104 | struct extent_state *cached_state = NULL; | ||
7105 | u64 cur_offset; | ||
7106 | u64 last_byte; | ||
7107 | u64 alloc_start; | ||
7108 | u64 alloc_end; | ||
7109 | u64 alloc_hint = 0; | ||
7110 | u64 locked_end; | ||
7111 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | ||
7112 | struct extent_map *em; | ||
7113 | int ret; | ||
7114 | |||
7115 | alloc_start = offset & ~mask; | ||
7116 | alloc_end = (offset + len + mask) & ~mask; | ||
7117 | |||
7118 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ | ||
7119 | if (mode & ~FALLOC_FL_KEEP_SIZE) | ||
7120 | return -EOPNOTSUPP; | ||
7121 | |||
7122 | /* | ||
7123 | * wait for ordered IO before we have any locks. We'll loop again | ||
7124 | * below with the locks held. | ||
7125 | */ | ||
7126 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); | ||
7127 | |||
7128 | mutex_lock(&inode->i_mutex); | ||
7129 | ret = inode_newsize_ok(inode, alloc_end); | ||
7130 | if (ret) | ||
7131 | goto out; | ||
7132 | |||
7133 | if (alloc_start > inode->i_size) { | ||
7134 | ret = btrfs_cont_expand(inode, alloc_start); | ||
7135 | if (ret) | ||
7136 | goto out; | ||
7137 | } | ||
7138 | |||
7139 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); | ||
7140 | if (ret) | ||
7141 | goto out; | ||
7142 | |||
7143 | locked_end = alloc_end - 1; | ||
7144 | while (1) { | ||
7145 | struct btrfs_ordered_extent *ordered; | ||
7146 | |||
7147 | /* the extent lock is ordered inside the running | ||
7148 | * transaction | ||
7149 | */ | ||
7150 | lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, | ||
7151 | locked_end, 0, &cached_state, GFP_NOFS); | ||
7152 | ordered = btrfs_lookup_first_ordered_extent(inode, | ||
7153 | alloc_end - 1); | ||
7154 | if (ordered && | ||
7155 | ordered->file_offset + ordered->len > alloc_start && | ||
7156 | ordered->file_offset < alloc_end) { | ||
7157 | btrfs_put_ordered_extent(ordered); | ||
7158 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | ||
7159 | alloc_start, locked_end, | ||
7160 | &cached_state, GFP_NOFS); | ||
7161 | /* | ||
7162 | * we can't wait on the range with the transaction | ||
7163 | * running or with the extent lock held | ||
7164 | */ | ||
7165 | btrfs_wait_ordered_range(inode, alloc_start, | ||
7166 | alloc_end - alloc_start); | ||
7167 | } else { | ||
7168 | if (ordered) | ||
7169 | btrfs_put_ordered_extent(ordered); | ||
7170 | break; | ||
7171 | } | ||
7172 | } | ||
7173 | |||
7174 | cur_offset = alloc_start; | ||
7175 | while (1) { | ||
7176 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, | ||
7177 | alloc_end - cur_offset, 0); | ||
7178 | BUG_ON(IS_ERR(em) || !em); | ||
7179 | last_byte = min(extent_map_end(em), alloc_end); | ||
7180 | last_byte = (last_byte + mask) & ~mask; | ||
7181 | if (em->block_start == EXTENT_MAP_HOLE || | ||
7182 | (cur_offset >= inode->i_size && | ||
7183 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | ||
7184 | ret = btrfs_prealloc_file_range(inode, mode, cur_offset, | ||
7185 | last_byte - cur_offset, | ||
7186 | 1 << inode->i_blkbits, | ||
7187 | offset + len, | ||
7188 | &alloc_hint); | ||
7189 | if (ret < 0) { | ||
7190 | free_extent_map(em); | ||
7191 | break; | ||
7192 | } | ||
7193 | } | ||
7194 | free_extent_map(em); | ||
7195 | |||
7196 | cur_offset = last_byte; | ||
7197 | if (cur_offset >= alloc_end) { | ||
7198 | ret = 0; | ||
7199 | break; | ||
7200 | } | ||
7201 | } | ||
7202 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | ||
7203 | &cached_state, GFP_NOFS); | ||
7204 | |||
7205 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); | ||
7206 | out: | ||
7207 | mutex_unlock(&inode->i_mutex); | ||
7208 | return ret; | ||
7209 | } | ||
7210 | |||
7211 | static int btrfs_set_page_dirty(struct page *page) | 7101 | static int btrfs_set_page_dirty(struct page *page) |
7212 | { | 7102 | { |
7213 | return __set_page_dirty_nobuffers(page); | 7103 | return __set_page_dirty_nobuffers(page); |
@@ -7310,7 +7200,6 @@ static const struct inode_operations btrfs_file_inode_operations = { | |||
7310 | .listxattr = btrfs_listxattr, | 7200 | .listxattr = btrfs_listxattr, |
7311 | .removexattr = btrfs_removexattr, | 7201 | .removexattr = btrfs_removexattr, |
7312 | .permission = btrfs_permission, | 7202 | .permission = btrfs_permission, |
7313 | .fallocate = btrfs_fallocate, | ||
7314 | .fiemap = btrfs_fiemap, | 7203 | .fiemap = btrfs_fiemap, |
7315 | }; | 7204 | }; |
7316 | static const struct inode_operations btrfs_special_inode_operations = { | 7205 | static const struct inode_operations btrfs_special_inode_operations = { |