diff options
author | Steven Whitehouse <swhiteho@redhat.com> | 2012-10-31 06:37:10 -0400 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2012-11-07 08:33:17 -0500 |
commit | 9dbe9610b9df4efe0946299804ed46bb8f91dec2 (patch) | |
tree | 8d54797420ed9d0aef1c6bdd8f3b8dd5e9938d0a /fs/gfs2 | |
parent | c9aecf73717f55e41ac11682a50bef8594547025 (diff) |
GFS2: Add Orlov allocator
Just like ext3, this works on the root directory and any directory
with the +T flag set. Also, just like ext3, any subdirectory created
in one of the just mentioned cases will be allocated to a random
resource group (GFS2 equivalent of a block group).
If you are creating a set of directories, each of which will contain a
job running on a different node, then by setting +T on the parent
directory before creating the subdirectories, each will land up in a
different resource group, and thus resource group contention between
nodes will be kept to a minimum.
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs/gfs2')
-rw-r--r-- | fs/gfs2/aops.c | 2 | ||||
-rw-r--r-- | fs/gfs2/bmap.c | 2 | ||||
-rw-r--r-- | fs/gfs2/file.c | 4 | ||||
-rw-r--r-- | fs/gfs2/inode.c | 17 | ||||
-rw-r--r-- | fs/gfs2/quota.c | 4 | ||||
-rw-r--r-- | fs/gfs2/rgrp.c | 19 | ||||
-rw-r--r-- | fs/gfs2/rgrp.h | 3 | ||||
-rw-r--r-- | fs/gfs2/xattr.c | 2 |
8 files changed, 38 insertions, 15 deletions
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 01c4975da4bc..30de4f2a2ea9 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
@@ -643,7 +643,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
643 | goto out_unlock; | 643 | goto out_unlock; |
644 | 644 | ||
645 | requested = data_blocks + ind_blocks; | 645 | requested = data_blocks + ind_blocks; |
646 | error = gfs2_inplace_reserve(ip, requested); | 646 | error = gfs2_inplace_reserve(ip, requested, 0); |
647 | if (error) | 647 | if (error) |
648 | goto out_qunlock; | 648 | goto out_qunlock; |
649 | } | 649 | } |
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 1fd3ae237bdd..de70e52caf3a 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
@@ -1178,7 +1178,7 @@ static int do_grow(struct inode *inode, u64 size) | |||
1178 | if (error) | 1178 | if (error) |
1179 | return error; | 1179 | return error; |
1180 | 1180 | ||
1181 | error = gfs2_inplace_reserve(ip, 1); | 1181 | error = gfs2_inplace_reserve(ip, 1, 0); |
1182 | if (error) | 1182 | if (error) |
1183 | goto do_grow_qunlock; | 1183 | goto do_grow_qunlock; |
1184 | unstuff = 1; | 1184 | unstuff = 1; |
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index e056b4ce4877..dfe2d8cb9b2c 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
@@ -432,7 +432,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
432 | if (ret) | 432 | if (ret) |
433 | goto out_unlock; | 433 | goto out_unlock; |
434 | gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); | 434 | gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); |
435 | ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); | 435 | ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0); |
436 | if (ret) | 436 | if (ret) |
437 | goto out_quota_unlock; | 437 | goto out_quota_unlock; |
438 | 438 | ||
@@ -825,7 +825,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, | |||
825 | retry: | 825 | retry: |
826 | gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); | 826 | gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); |
827 | 827 | ||
828 | error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); | 828 | error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0); |
829 | if (error) { | 829 | if (error) { |
830 | if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { | 830 | if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { |
831 | bytes >>= 1; | 831 | bytes >>= 1; |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 749b05a960ef..ef3ce00bb528 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -385,13 +385,13 @@ static void munge_mode_uid_gid(const struct gfs2_inode *dip, | |||
385 | inode->i_gid = current_fsgid(); | 385 | inode->i_gid = current_fsgid(); |
386 | } | 386 | } |
387 | 387 | ||
388 | static int alloc_dinode(struct gfs2_inode *ip) | 388 | static int alloc_dinode(struct gfs2_inode *ip, u32 flags) |
389 | { | 389 | { |
390 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 390 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
391 | int error; | 391 | int error; |
392 | int dblocks = 1; | 392 | int dblocks = 1; |
393 | 393 | ||
394 | error = gfs2_inplace_reserve(ip, RES_DINODE); | 394 | error = gfs2_inplace_reserve(ip, RES_DINODE, flags); |
395 | if (error) | 395 | if (error) |
396 | goto out; | 396 | goto out; |
397 | 397 | ||
@@ -560,7 +560,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, | |||
560 | if (error) | 560 | if (error) |
561 | goto fail_quota_locks; | 561 | goto fail_quota_locks; |
562 | 562 | ||
563 | error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres); | 563 | error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0); |
564 | if (error) | 564 | if (error) |
565 | goto fail_quota_locks; | 565 | goto fail_quota_locks; |
566 | 566 | ||
@@ -650,6 +650,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
650 | struct gfs2_glock *io_gl; | 650 | struct gfs2_glock *io_gl; |
651 | int error; | 651 | int error; |
652 | struct buffer_head *bh = NULL; | 652 | struct buffer_head *bh = NULL; |
653 | u32 aflags = 0; | ||
653 | 654 | ||
654 | if (!name->len || name->len > GFS2_FNAMESIZE) | 655 | if (!name->len || name->len > GFS2_FNAMESIZE) |
655 | return -ENAMETOOLONG; | 656 | return -ENAMETOOLONG; |
@@ -685,7 +686,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
685 | munge_mode_uid_gid(dip, inode); | 686 | munge_mode_uid_gid(dip, inode); |
686 | ip->i_goal = dip->i_goal; | 687 | ip->i_goal = dip->i_goal; |
687 | 688 | ||
688 | error = alloc_dinode(ip); | 689 | if ((GFS2_I(sdp->sd_root_dir->d_inode) == dip) || |
690 | (dip->i_diskflags & GFS2_DIF_TOPDIR)) | ||
691 | aflags |= GFS2_AF_ORLOV; | ||
692 | |||
693 | error = alloc_dinode(ip, aflags); | ||
689 | if (error) | 694 | if (error) |
690 | goto fail_free_inode; | 695 | goto fail_free_inode; |
691 | 696 | ||
@@ -897,7 +902,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
897 | if (error) | 902 | if (error) |
898 | goto out_gunlock; | 903 | goto out_gunlock; |
899 | 904 | ||
900 | error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres); | 905 | error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0); |
901 | if (error) | 906 | if (error) |
902 | goto out_gunlock_q; | 907 | goto out_gunlock_q; |
903 | 908 | ||
@@ -1378,7 +1383,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
1378 | if (error) | 1383 | if (error) |
1379 | goto out_gunlock; | 1384 | goto out_gunlock; |
1380 | 1385 | ||
1381 | error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres); | 1386 | error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres, 0); |
1382 | if (error) | 1387 | if (error) |
1383 | goto out_gunlock_q; | 1388 | goto out_gunlock_q; |
1384 | 1389 | ||
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index c5af8e18f27a..6bbf64f0f5b6 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -816,7 +816,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) | |||
816 | blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; | 816 | blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; |
817 | 817 | ||
818 | reserved = 1 + (nalloc * (data_blocks + ind_blocks)); | 818 | reserved = 1 + (nalloc * (data_blocks + ind_blocks)); |
819 | error = gfs2_inplace_reserve(ip, reserved); | 819 | error = gfs2_inplace_reserve(ip, reserved, 0); |
820 | if (error) | 820 | if (error) |
821 | goto out_alloc; | 821 | goto out_alloc; |
822 | 822 | ||
@@ -1605,7 +1605,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid, | |||
1605 | gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), | 1605 | gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), |
1606 | &data_blocks, &ind_blocks); | 1606 | &data_blocks, &ind_blocks); |
1607 | blocks = 1 + data_blocks + ind_blocks; | 1607 | blocks = 1 + data_blocks + ind_blocks; |
1608 | error = gfs2_inplace_reserve(ip, blocks); | 1608 | error = gfs2_inplace_reserve(ip, blocks, 0); |
1609 | if (error) | 1609 | if (error) |
1610 | goto out_i; | 1610 | goto out_i; |
1611 | blocks += gfs2_rg_blocks(ip, blocks); | 1611 | blocks += gfs2_rg_blocks(ip, blocks); |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index bdf3e644baae..99a619788c65 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/prefetch.h> | 16 | #include <linux/prefetch.h> |
17 | #include <linux/blkdev.h> | 17 | #include <linux/blkdev.h> |
18 | #include <linux/rbtree.h> | 18 | #include <linux/rbtree.h> |
19 | #include <linux/random.h> | ||
19 | 20 | ||
20 | #include "gfs2.h" | 21 | #include "gfs2.h" |
21 | #include "incore.h" | 22 | #include "incore.h" |
@@ -1763,6 +1764,15 @@ static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs, | |||
1763 | return tdiff > (msecs * 1000 * 1000); | 1764 | return tdiff > (msecs * 1000 * 1000); |
1764 | } | 1765 | } |
1765 | 1766 | ||
1767 | static u32 gfs2_orlov_skip(const struct gfs2_inode *ip) | ||
1768 | { | ||
1769 | const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
1770 | u32 skip; | ||
1771 | |||
1772 | get_random_bytes(&skip, sizeof(skip)); | ||
1773 | return skip % sdp->sd_rgrps; | ||
1774 | } | ||
1775 | |||
1766 | static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) | 1776 | static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) |
1767 | { | 1777 | { |
1768 | struct gfs2_rgrpd *rgd = *pos; | 1778 | struct gfs2_rgrpd *rgd = *pos; |
@@ -1784,7 +1794,7 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b | |||
1784 | * Returns: errno | 1794 | * Returns: errno |
1785 | */ | 1795 | */ |
1786 | 1796 | ||
1787 | int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) | 1797 | int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags) |
1788 | { | 1798 | { |
1789 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1799 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1790 | struct gfs2_rgrpd *begin = NULL; | 1800 | struct gfs2_rgrpd *begin = NULL; |
@@ -1792,6 +1802,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) | |||
1792 | int error = 0, rg_locked, flags = 0; | 1802 | int error = 0, rg_locked, flags = 0; |
1793 | u64 last_unlinked = NO_BLOCK; | 1803 | u64 last_unlinked = NO_BLOCK; |
1794 | int loops = 0; | 1804 | int loops = 0; |
1805 | u32 skip = 0; | ||
1795 | 1806 | ||
1796 | if (sdp->sd_args.ar_rgrplvb) | 1807 | if (sdp->sd_args.ar_rgrplvb) |
1797 | flags |= GL_SKIP; | 1808 | flags |= GL_SKIP; |
@@ -1805,6 +1816,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) | |||
1805 | } else { | 1816 | } else { |
1806 | rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); | 1817 | rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); |
1807 | } | 1818 | } |
1819 | if (S_ISDIR(ip->i_inode.i_mode) && (aflags & GFS2_AF_ORLOV)) | ||
1820 | skip = gfs2_orlov_skip(ip); | ||
1808 | if (rs->rs_rbm.rgd == NULL) | 1821 | if (rs->rs_rbm.rgd == NULL) |
1809 | return -EBADSLT; | 1822 | return -EBADSLT; |
1810 | 1823 | ||
@@ -1813,6 +1826,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) | |||
1813 | 1826 | ||
1814 | if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { | 1827 | if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { |
1815 | rg_locked = 0; | 1828 | rg_locked = 0; |
1829 | if (skip && skip--) | ||
1830 | goto next_rgrp; | ||
1816 | if (!gfs2_rs_active(rs) && (loops < 2) && | 1831 | if (!gfs2_rs_active(rs) && (loops < 2) && |
1817 | gfs2_rgrp_used_recently(rs, 1000) && | 1832 | gfs2_rgrp_used_recently(rs, 1000) && |
1818 | gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) | 1833 | gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) |
@@ -1871,6 +1886,8 @@ next_rgrp: | |||
1871 | /* Find the next rgrp, and continue looking */ | 1886 | /* Find the next rgrp, and continue looking */ |
1872 | if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) | 1887 | if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) |
1873 | continue; | 1888 | continue; |
1889 | if (skip) | ||
1890 | continue; | ||
1874 | 1891 | ||
1875 | /* If we've scanned all the rgrps, but found no free blocks | 1892 | /* If we've scanned all the rgrps, but found no free blocks |
1876 | * then this checks for some less likely conditions before | 1893 | * then this checks for some less likely conditions before |
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 24077958dcf6..842185853f6b 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h | |||
@@ -39,7 +39,8 @@ extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); | |||
39 | 39 | ||
40 | extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); | 40 | extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); |
41 | 41 | ||
42 | extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested); | 42 | #define GFS2_AF_ORLOV 1 |
43 | extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 flags); | ||
43 | extern void gfs2_inplace_release(struct gfs2_inode *ip); | 44 | extern void gfs2_inplace_release(struct gfs2_inode *ip); |
44 | 45 | ||
45 | extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, | 46 | extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, |
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index db330e5518cd..76c144b3c9bb 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c | |||
@@ -734,7 +734,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, | |||
734 | if (error) | 734 | if (error) |
735 | return error; | 735 | return error; |
736 | 736 | ||
737 | error = gfs2_inplace_reserve(ip, blks); | 737 | error = gfs2_inplace_reserve(ip, blks, 0); |
738 | if (error) | 738 | if (error) |
739 | goto out_gunlock_q; | 739 | goto out_gunlock_q; |
740 | 740 | ||