aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Marzinski <bmarzins@redhat.com>2010-08-20 01:21:02 -0400
committerSteven Whitehouse <swhiteho@redhat.com>2010-09-20 06:19:17 -0400
commit3921120e757f9167f3fcd3a1781239824471b14d (patch)
tree4b5f8e9e5376ae6a64b9757a62392b89a6316e26
parent9a3f236d40a99ea8dca3df40d8ef67631057cad6 (diff)
GFS2: fallocate support
This patch adds support for fallocate to gfs2. Since the gfs2 does not support uninitialized data blocks, it must write out zeros to all the blocks. However, since it does not need to lock any pages to read from, gfs2 can write out the zero blocks much more efficiently. On a moderately full filesystem, fallocate works around 5 times faster on average. The fallocate call also allows gfs2 to add blocks to the file without changing the filesize, which will make it possible for gfs2 to preallocate space for the rindex file, so that gfs2 can grow a completely full filesystem. Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
-rw-r--r--fs/gfs2/aops.c4
-rw-r--r--fs/gfs2/incore.h1
-rw-r--r--fs/gfs2/inode.h2
-rw-r--r--fs/gfs2/ops_inode.c254
-rw-r--r--fs/gfs2/rgrp.c12
-rw-r--r--fs/gfs2/trans.h1
6 files changed, 272 insertions, 2 deletions
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index c92f36ba3fc9..180ef8a6de6b 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -36,8 +36,8 @@
36#include "glops.h" 36#include "glops.h"
37 37
38 38
39static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, 39void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
40 unsigned int from, unsigned int to) 40 unsigned int from, unsigned int to)
41{ 41{
42 struct buffer_head *head = page_buffers(page); 42 struct buffer_head *head = page_buffers(page);
43 unsigned int bsize = head->b_size; 43 unsigned int bsize = head->b_size;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index c11971775275..578234bb03f8 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -571,6 +571,7 @@ struct gfs2_sbd {
571 struct list_head sd_rindex_mru_list; 571 struct list_head sd_rindex_mru_list;
572 struct gfs2_rgrpd *sd_rindex_forward; 572 struct gfs2_rgrpd *sd_rindex_forward;
573 unsigned int sd_rgrps; 573 unsigned int sd_rgrps;
574 unsigned int sd_max_rg_data;
574 575
575 /* Journal index stuff */ 576 /* Journal index stuff */
576 577
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 15ff4df20aab..6720d7d5fbc6 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -19,6 +19,8 @@ extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask);
19extern int gfs2_internal_read(struct gfs2_inode *ip, 19extern int gfs2_internal_read(struct gfs2_inode *ip,
20 struct file_ra_state *ra_state, 20 struct file_ra_state *ra_state,
21 char *buf, loff_t *pos, unsigned size); 21 char *buf, loff_t *pos, unsigned size);
22extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
23 unsigned int from, unsigned int to);
22extern void gfs2_set_aops(struct inode *inode); 24extern void gfs2_set_aops(struct inode *inode);
23 25
24static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) 26static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index ee6ffd590418..f6da0d7676e2 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -18,6 +18,8 @@
18#include <linux/gfs2_ondisk.h> 18#include <linux/gfs2_ondisk.h>
19#include <linux/crc32.h> 19#include <linux/crc32.h>
20#include <linux/fiemap.h> 20#include <linux/fiemap.h>
21#include <linux/swap.h>
22#include <linux/falloc.h>
21#include <asm/uaccess.h> 23#include <asm/uaccess.h>
22 24
23#include "gfs2.h" 25#include "gfs2.h"
@@ -1277,6 +1279,257 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
1277 return ret; 1279 return ret;
1278} 1280}
1279 1281
1282static void empty_write_end(struct page *page, unsigned from,
1283 unsigned to)
1284{
1285 struct gfs2_inode *ip = GFS2_I(page->mapping->host);
1286
1287 page_zero_new_buffers(page, from, to);
1288 flush_dcache_page(page);
1289 mark_page_accessed(page);
1290
1291 if (!gfs2_is_writeback(ip))
1292 gfs2_page_add_databufs(ip, page, from, to);
1293
1294 block_commit_write(page, from, to);
1295}
1296
1297
1298static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
1299{
1300 unsigned start, end, next;
1301 struct buffer_head *bh, *head;
1302 int error;
1303
1304 if (!page_has_buffers(page)) {
1305 error = block_prepare_write(page, from, to, gfs2_block_map);
1306 if (unlikely(error))
1307 return error;
1308
1309 empty_write_end(page, from, to);
1310 return 0;
1311 }
1312
1313 bh = head = page_buffers(page);
1314 next = end = 0;
1315 while (next < from) {
1316 next += bh->b_size;
1317 bh = bh->b_this_page;
1318 }
1319 start = next;
1320 do {
1321 next += bh->b_size;
1322 if (buffer_mapped(bh)) {
1323 if (end) {
1324 error = block_prepare_write(page, start, end,
1325 gfs2_block_map);
1326 if (unlikely(error))
1327 return error;
1328 empty_write_end(page, start, end);
1329 end = 0;
1330 }
1331 start = next;
1332 }
1333 else
1334 end = next;
1335 bh = bh->b_this_page;
1336 } while (next < to);
1337
1338 if (end) {
1339 error = block_prepare_write(page, start, end, gfs2_block_map);
1340 if (unlikely(error))
1341 return error;
1342 empty_write_end(page, start, end);
1343 }
1344
1345 return 0;
1346}
1347
1348static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
1349 int mode)
1350{
1351 struct gfs2_inode *ip = GFS2_I(inode);
1352 struct buffer_head *dibh;
1353 int error;
1354 u64 start = offset >> PAGE_CACHE_SHIFT;
1355 unsigned int start_offset = offset & ~PAGE_CACHE_MASK;
1356 u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
1357 pgoff_t curr;
1358 struct page *page;
1359 unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK;
1360 unsigned int from, to;
1361
1362 if (!end_offset)
1363 end_offset = PAGE_CACHE_SIZE;
1364
1365 error = gfs2_meta_inode_buffer(ip, &dibh);
1366 if (unlikely(error))
1367 goto out;
1368
1369 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1370
1371 if (gfs2_is_stuffed(ip)) {
1372 error = gfs2_unstuff_dinode(ip, NULL);
1373 if (unlikely(error))
1374 goto out;
1375 }
1376
1377 curr = start;
1378 offset = start << PAGE_CACHE_SHIFT;
1379 from = start_offset;
1380 to = PAGE_CACHE_SIZE;
1381 while (curr <= end) {
1382 page = grab_cache_page_write_begin(inode->i_mapping, curr,
1383 AOP_FLAG_NOFS);
1384 if (unlikely(!page)) {
1385 error = -ENOMEM;
1386 goto out;
1387 }
1388
1389 if (curr == end)
1390 to = end_offset;
1391 error = write_empty_blocks(page, from, to);
1392 if (!error && offset + to > inode->i_size &&
1393 !(mode & FALLOC_FL_KEEP_SIZE)) {
1394 i_size_write(inode, offset + to);
1395 }
1396 unlock_page(page);
1397 page_cache_release(page);
1398 if (error)
1399 goto out;
1400 curr++;
1401 offset += PAGE_CACHE_SIZE;
1402 from = 0;
1403 }
1404
1405 gfs2_dinode_out(ip, dibh->b_data);
1406 mark_inode_dirty(inode);
1407
1408 brelse(dibh);
1409
1410out:
1411 return error;
1412}
1413
1414static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
1415 unsigned int *data_blocks, unsigned int *ind_blocks)
1416{
1417 const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1418 unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone;
1419 unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
1420
1421 for (tmp = max_data; tmp > sdp->sd_diptrs;) {
1422 tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
1423 max_data -= tmp;
1424 }
1425 /* This calculation isn't the exact reverse of gfs2_write_calc_reserve,
1426 so it might end up with fewer data blocks */
1427 if (max_data <= *data_blocks)
1428 return;
1429 *data_blocks = max_data;
1430 *ind_blocks = max_blocks - max_data;
1431 *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
1432 if (*len > max) {
1433 *len = max;
1434 gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks);
1435 }
1436}
1437
1438static long gfs2_fallocate(struct inode *inode, int mode, loff_t offset,
1439 loff_t len)
1440{
1441 struct gfs2_sbd *sdp = GFS2_SB(inode);
1442 struct gfs2_inode *ip = GFS2_I(inode);
1443 unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
1444 loff_t bytes, max_bytes;
1445 struct gfs2_alloc *al;
1446 int error;
1447 loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
1448 next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
1449
1450 offset = (offset >> sdp->sd_sb.sb_bsize_shift) <<
1451 sdp->sd_sb.sb_bsize_shift;
1452
1453 len = next - offset;
1454 bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
1455 if (!bytes)
1456 bytes = UINT_MAX;
1457
1458 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
1459 error = gfs2_glock_nq(&ip->i_gh);
1460 if (unlikely(error))
1461 goto out_uninit;
1462
1463 if (!gfs2_write_alloc_required(ip, offset, len))
1464 goto out_unlock;
1465
1466 while (len > 0) {
1467 if (len < bytes)
1468 bytes = len;
1469 al = gfs2_alloc_get(ip);
1470 if (!al) {
1471 error = -ENOMEM;
1472 goto out_unlock;
1473 }
1474
1475 error = gfs2_quota_lock_check(ip);
1476 if (error)
1477 goto out_alloc_put;
1478
1479retry:
1480 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
1481
1482 al->al_requested = data_blocks + ind_blocks;
1483 error = gfs2_inplace_reserve(ip);
1484 if (error) {
1485 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
1486 bytes >>= 1;
1487 goto retry;
1488 }
1489 goto out_qunlock;
1490 }
1491 max_bytes = bytes;
1492 calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks);
1493 al->al_requested = data_blocks + ind_blocks;
1494
1495 rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
1496 RES_RG_HDR + ip->i_alloc->al_rgd->rd_length;
1497 if (gfs2_is_jdata(ip))
1498 rblocks += data_blocks ? data_blocks : 1;
1499
1500 error = gfs2_trans_begin(sdp, rblocks,
1501 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
1502 if (error)
1503 goto out_trans_fail;
1504
1505 error = fallocate_chunk(inode, offset, max_bytes, mode);
1506 gfs2_trans_end(sdp);
1507
1508 if (error)
1509 goto out_trans_fail;
1510
1511 len -= max_bytes;
1512 offset += max_bytes;
1513 gfs2_inplace_release(ip);
1514 gfs2_quota_unlock(ip);
1515 gfs2_alloc_put(ip);
1516 }
1517 goto out_unlock;
1518
1519out_trans_fail:
1520 gfs2_inplace_release(ip);
1521out_qunlock:
1522 gfs2_quota_unlock(ip);
1523out_alloc_put:
1524 gfs2_alloc_put(ip);
1525out_unlock:
1526 gfs2_glock_dq(&ip->i_gh);
1527out_uninit:
1528 gfs2_holder_uninit(&ip->i_gh);
1529 return error;
1530}
1531
1532
1280static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 1533static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1281 u64 start, u64 len) 1534 u64 start, u64 len)
1282{ 1535{
@@ -1327,6 +1580,7 @@ const struct inode_operations gfs2_file_iops = {
1327 .getxattr = gfs2_getxattr, 1580 .getxattr = gfs2_getxattr,
1328 .listxattr = gfs2_listxattr, 1581 .listxattr = gfs2_listxattr,
1329 .removexattr = gfs2_removexattr, 1582 .removexattr = gfs2_removexattr,
1583 .fallocate = gfs2_fallocate,
1330 .fiemap = gfs2_fiemap, 1584 .fiemap = gfs2_fiemap,
1331}; 1585};
1332 1586
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 66b6d4d8b1d2..f9ddcf401753 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -589,6 +589,8 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
589 struct inode *inode = &ip->i_inode; 589 struct inode *inode = &ip->i_inode;
590 struct file_ra_state ra_state; 590 struct file_ra_state ra_state;
591 u64 rgrp_count = i_size_read(inode); 591 u64 rgrp_count = i_size_read(inode);
592 struct gfs2_rgrpd *rgd;
593 unsigned int max_data = 0;
592 int error; 594 int error;
593 595
594 do_div(rgrp_count, sizeof(struct gfs2_rindex)); 596 do_div(rgrp_count, sizeof(struct gfs2_rindex));
@@ -603,6 +605,10 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
603 } 605 }
604 } 606 }
605 607
608 list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list)
609 if (rgd->rd_data > max_data)
610 max_data = rgd->rd_data;
611 sdp->sd_max_rg_data = max_data;
606 sdp->sd_rindex_uptodate = 1; 612 sdp->sd_rindex_uptodate = 1;
607 return 0; 613 return 0;
608} 614}
@@ -622,6 +628,8 @@ static int gfs2_ri_update_special(struct gfs2_inode *ip)
622 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 628 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
623 struct inode *inode = &ip->i_inode; 629 struct inode *inode = &ip->i_inode;
624 struct file_ra_state ra_state; 630 struct file_ra_state ra_state;
631 struct gfs2_rgrpd *rgd;
632 unsigned int max_data = 0;
625 int error; 633 int error;
626 634
627 file_ra_state_init(&ra_state, inode->i_mapping); 635 file_ra_state_init(&ra_state, inode->i_mapping);
@@ -636,6 +644,10 @@ static int gfs2_ri_update_special(struct gfs2_inode *ip)
636 return error; 644 return error;
637 } 645 }
638 } 646 }
647 list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list)
648 if (rgd->rd_data > max_data)
649 max_data = rgd->rd_data;
650 sdp->sd_max_rg_data = max_data;
639 651
640 sdp->sd_rindex_uptodate = 1; 652 sdp->sd_rindex_uptodate = 1;
641 return 0; 653 return 0;
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index edf9d4bd908e..b849eb7ad37d 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -20,6 +20,7 @@ struct gfs2_glock;
20#define RES_JDATA 1 20#define RES_JDATA 1
21#define RES_DATA 1 21#define RES_DATA 1
22#define RES_LEAF 1 22#define RES_LEAF 1
23#define RES_RG_HDR 1
23#define RES_RG_BIT 2 24#define RES_RG_BIT 2
24#define RES_EATTR 1 25#define RES_EATTR 1
25#define RES_STATFS 1 26#define RES_STATFS 1