diff options
author | Christoph Hellwig <hch@lst.de> | 2011-01-14 07:07:43 -0500 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2011-01-17 02:25:31 -0500 |
commit | 2fe17c1075836b66678ed2a305fd09b6773883aa (patch) | |
tree | eb5287be8138686682eef9622872cfc7657e0664 /fs/gfs2/ops_inode.c | |
parent | 64c23e86873ee410554d6d1c76b60da47025e96f (diff) |
fallocate should be a file operation
Currently all filesystems except XFS implement fallocate asynchronously,
while XFS forced a commit. Both of these are suboptimal - in case of O_SYNC
I/O we really want our allocation on disk, especially for the !KEEP_SIZE
case where we actually grow the file with user-visible zeroes. On the
other hand always commiting the transaction is a bad idea for fast-path
uses of fallocate like for example in recent Samba versions. Given
that block allocation is a data plane operation anyway change it from
an inode operation to a file operation so that we have the file structure
available that lets us check for O_SYNC.
This also includes moving the code around for a few of the filesystems,
and remove the already unnedded S_ISDIR checks given that we only wire
up fallocate for regular files.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/gfs2/ops_inode.c')
-rw-r--r-- | fs/gfs2/ops_inode.c | 258 |
1 files changed, 0 insertions, 258 deletions
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index c09528c07f3d..d8b26ac2e20b 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
@@ -18,8 +18,6 @@ | |||
18 | #include <linux/gfs2_ondisk.h> | 18 | #include <linux/gfs2_ondisk.h> |
19 | #include <linux/crc32.h> | 19 | #include <linux/crc32.h> |
20 | #include <linux/fiemap.h> | 20 | #include <linux/fiemap.h> |
21 | #include <linux/swap.h> | ||
22 | #include <linux/falloc.h> | ||
23 | #include <asm/uaccess.h> | 21 | #include <asm/uaccess.h> |
24 | 22 | ||
25 | #include "gfs2.h" | 23 | #include "gfs2.h" |
@@ -1257,261 +1255,6 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name) | |||
1257 | return ret; | 1255 | return ret; |
1258 | } | 1256 | } |
1259 | 1257 | ||
1260 | static void empty_write_end(struct page *page, unsigned from, | ||
1261 | unsigned to) | ||
1262 | { | ||
1263 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); | ||
1264 | |||
1265 | page_zero_new_buffers(page, from, to); | ||
1266 | flush_dcache_page(page); | ||
1267 | mark_page_accessed(page); | ||
1268 | |||
1269 | if (!gfs2_is_writeback(ip)) | ||
1270 | gfs2_page_add_databufs(ip, page, from, to); | ||
1271 | |||
1272 | block_commit_write(page, from, to); | ||
1273 | } | ||
1274 | |||
1275 | |||
1276 | static int write_empty_blocks(struct page *page, unsigned from, unsigned to) | ||
1277 | { | ||
1278 | unsigned start, end, next; | ||
1279 | struct buffer_head *bh, *head; | ||
1280 | int error; | ||
1281 | |||
1282 | if (!page_has_buffers(page)) { | ||
1283 | error = __block_write_begin(page, from, to - from, gfs2_block_map); | ||
1284 | if (unlikely(error)) | ||
1285 | return error; | ||
1286 | |||
1287 | empty_write_end(page, from, to); | ||
1288 | return 0; | ||
1289 | } | ||
1290 | |||
1291 | bh = head = page_buffers(page); | ||
1292 | next = end = 0; | ||
1293 | while (next < from) { | ||
1294 | next += bh->b_size; | ||
1295 | bh = bh->b_this_page; | ||
1296 | } | ||
1297 | start = next; | ||
1298 | do { | ||
1299 | next += bh->b_size; | ||
1300 | if (buffer_mapped(bh)) { | ||
1301 | if (end) { | ||
1302 | error = __block_write_begin(page, start, end - start, | ||
1303 | gfs2_block_map); | ||
1304 | if (unlikely(error)) | ||
1305 | return error; | ||
1306 | empty_write_end(page, start, end); | ||
1307 | end = 0; | ||
1308 | } | ||
1309 | start = next; | ||
1310 | } | ||
1311 | else | ||
1312 | end = next; | ||
1313 | bh = bh->b_this_page; | ||
1314 | } while (next < to); | ||
1315 | |||
1316 | if (end) { | ||
1317 | error = __block_write_begin(page, start, end - start, gfs2_block_map); | ||
1318 | if (unlikely(error)) | ||
1319 | return error; | ||
1320 | empty_write_end(page, start, end); | ||
1321 | } | ||
1322 | |||
1323 | return 0; | ||
1324 | } | ||
1325 | |||
1326 | static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, | ||
1327 | int mode) | ||
1328 | { | ||
1329 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1330 | struct buffer_head *dibh; | ||
1331 | int error; | ||
1332 | u64 start = offset >> PAGE_CACHE_SHIFT; | ||
1333 | unsigned int start_offset = offset & ~PAGE_CACHE_MASK; | ||
1334 | u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT; | ||
1335 | pgoff_t curr; | ||
1336 | struct page *page; | ||
1337 | unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK; | ||
1338 | unsigned int from, to; | ||
1339 | |||
1340 | if (!end_offset) | ||
1341 | end_offset = PAGE_CACHE_SIZE; | ||
1342 | |||
1343 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
1344 | if (unlikely(error)) | ||
1345 | goto out; | ||
1346 | |||
1347 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
1348 | |||
1349 | if (gfs2_is_stuffed(ip)) { | ||
1350 | error = gfs2_unstuff_dinode(ip, NULL); | ||
1351 | if (unlikely(error)) | ||
1352 | goto out; | ||
1353 | } | ||
1354 | |||
1355 | curr = start; | ||
1356 | offset = start << PAGE_CACHE_SHIFT; | ||
1357 | from = start_offset; | ||
1358 | to = PAGE_CACHE_SIZE; | ||
1359 | while (curr <= end) { | ||
1360 | page = grab_cache_page_write_begin(inode->i_mapping, curr, | ||
1361 | AOP_FLAG_NOFS); | ||
1362 | if (unlikely(!page)) { | ||
1363 | error = -ENOMEM; | ||
1364 | goto out; | ||
1365 | } | ||
1366 | |||
1367 | if (curr == end) | ||
1368 | to = end_offset; | ||
1369 | error = write_empty_blocks(page, from, to); | ||
1370 | if (!error && offset + to > inode->i_size && | ||
1371 | !(mode & FALLOC_FL_KEEP_SIZE)) { | ||
1372 | i_size_write(inode, offset + to); | ||
1373 | } | ||
1374 | unlock_page(page); | ||
1375 | page_cache_release(page); | ||
1376 | if (error) | ||
1377 | goto out; | ||
1378 | curr++; | ||
1379 | offset += PAGE_CACHE_SIZE; | ||
1380 | from = 0; | ||
1381 | } | ||
1382 | |||
1383 | gfs2_dinode_out(ip, dibh->b_data); | ||
1384 | mark_inode_dirty(inode); | ||
1385 | |||
1386 | brelse(dibh); | ||
1387 | |||
1388 | out: | ||
1389 | return error; | ||
1390 | } | ||
1391 | |||
1392 | static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, | ||
1393 | unsigned int *data_blocks, unsigned int *ind_blocks) | ||
1394 | { | ||
1395 | const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
1396 | unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone; | ||
1397 | unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); | ||
1398 | |||
1399 | for (tmp = max_data; tmp > sdp->sd_diptrs;) { | ||
1400 | tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); | ||
1401 | max_data -= tmp; | ||
1402 | } | ||
1403 | /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, | ||
1404 | so it might end up with fewer data blocks */ | ||
1405 | if (max_data <= *data_blocks) | ||
1406 | return; | ||
1407 | *data_blocks = max_data; | ||
1408 | *ind_blocks = max_blocks - max_data; | ||
1409 | *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; | ||
1410 | if (*len > max) { | ||
1411 | *len = max; | ||
1412 | gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks); | ||
1413 | } | ||
1414 | } | ||
1415 | |||
1416 | static long gfs2_fallocate(struct inode *inode, int mode, loff_t offset, | ||
1417 | loff_t len) | ||
1418 | { | ||
1419 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
1420 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1421 | unsigned int data_blocks = 0, ind_blocks = 0, rblocks; | ||
1422 | loff_t bytes, max_bytes; | ||
1423 | struct gfs2_alloc *al; | ||
1424 | int error; | ||
1425 | loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; | ||
1426 | next = (next + 1) << sdp->sd_sb.sb_bsize_shift; | ||
1427 | |||
1428 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ | ||
1429 | if (mode & ~FALLOC_FL_KEEP_SIZE) | ||
1430 | return -EOPNOTSUPP; | ||
1431 | |||
1432 | offset = (offset >> sdp->sd_sb.sb_bsize_shift) << | ||
1433 | sdp->sd_sb.sb_bsize_shift; | ||
1434 | |||
1435 | len = next - offset; | ||
1436 | bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; | ||
1437 | if (!bytes) | ||
1438 | bytes = UINT_MAX; | ||
1439 | |||
1440 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); | ||
1441 | error = gfs2_glock_nq(&ip->i_gh); | ||
1442 | if (unlikely(error)) | ||
1443 | goto out_uninit; | ||
1444 | |||
1445 | if (!gfs2_write_alloc_required(ip, offset, len)) | ||
1446 | goto out_unlock; | ||
1447 | |||
1448 | while (len > 0) { | ||
1449 | if (len < bytes) | ||
1450 | bytes = len; | ||
1451 | al = gfs2_alloc_get(ip); | ||
1452 | if (!al) { | ||
1453 | error = -ENOMEM; | ||
1454 | goto out_unlock; | ||
1455 | } | ||
1456 | |||
1457 | error = gfs2_quota_lock_check(ip); | ||
1458 | if (error) | ||
1459 | goto out_alloc_put; | ||
1460 | |||
1461 | retry: | ||
1462 | gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); | ||
1463 | |||
1464 | al->al_requested = data_blocks + ind_blocks; | ||
1465 | error = gfs2_inplace_reserve(ip); | ||
1466 | if (error) { | ||
1467 | if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { | ||
1468 | bytes >>= 1; | ||
1469 | goto retry; | ||
1470 | } | ||
1471 | goto out_qunlock; | ||
1472 | } | ||
1473 | max_bytes = bytes; | ||
1474 | calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks); | ||
1475 | al->al_requested = data_blocks + ind_blocks; | ||
1476 | |||
1477 | rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + | ||
1478 | RES_RG_HDR + gfs2_rg_blocks(al); | ||
1479 | if (gfs2_is_jdata(ip)) | ||
1480 | rblocks += data_blocks ? data_blocks : 1; | ||
1481 | |||
1482 | error = gfs2_trans_begin(sdp, rblocks, | ||
1483 | PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); | ||
1484 | if (error) | ||
1485 | goto out_trans_fail; | ||
1486 | |||
1487 | error = fallocate_chunk(inode, offset, max_bytes, mode); | ||
1488 | gfs2_trans_end(sdp); | ||
1489 | |||
1490 | if (error) | ||
1491 | goto out_trans_fail; | ||
1492 | |||
1493 | len -= max_bytes; | ||
1494 | offset += max_bytes; | ||
1495 | gfs2_inplace_release(ip); | ||
1496 | gfs2_quota_unlock(ip); | ||
1497 | gfs2_alloc_put(ip); | ||
1498 | } | ||
1499 | goto out_unlock; | ||
1500 | |||
1501 | out_trans_fail: | ||
1502 | gfs2_inplace_release(ip); | ||
1503 | out_qunlock: | ||
1504 | gfs2_quota_unlock(ip); | ||
1505 | out_alloc_put: | ||
1506 | gfs2_alloc_put(ip); | ||
1507 | out_unlock: | ||
1508 | gfs2_glock_dq(&ip->i_gh); | ||
1509 | out_uninit: | ||
1510 | gfs2_holder_uninit(&ip->i_gh); | ||
1511 | return error; | ||
1512 | } | ||
1513 | |||
1514 | |||
1515 | static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 1258 | static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
1516 | u64 start, u64 len) | 1259 | u64 start, u64 len) |
1517 | { | 1260 | { |
@@ -1562,7 +1305,6 @@ const struct inode_operations gfs2_file_iops = { | |||
1562 | .getxattr = gfs2_getxattr, | 1305 | .getxattr = gfs2_getxattr, |
1563 | .listxattr = gfs2_listxattr, | 1306 | .listxattr = gfs2_listxattr, |
1564 | .removexattr = gfs2_removexattr, | 1307 | .removexattr = gfs2_removexattr, |
1565 | .fallocate = gfs2_fallocate, | ||
1566 | .fiemap = gfs2_fiemap, | 1308 | .fiemap = gfs2_fiemap, |
1567 | }; | 1309 | }; |
1568 | 1310 | ||