aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteven Whitehouse <swhiteho@redhat.com>2012-10-31 06:37:10 -0400
committerSteven Whitehouse <swhiteho@redhat.com>2012-11-07 08:33:17 -0500
commit9dbe9610b9df4efe0946299804ed46bb8f91dec2 (patch)
tree8d54797420ed9d0aef1c6bdd8f3b8dd5e9938d0a
parentc9aecf73717f55e41ac11682a50bef8594547025 (diff)
GFS2: Add Orlov allocator
Just like ext3, this works on the root directory and any directory with the +T flag set. Also, just like ext3, any subdirectory created in one of the just mentioned cases will be allocated to a random resource group (GFS2 equivalent of a block group). If you are creating a set of directories, each of which will contain a job running on a different node, then by setting +T on the parent directory before creating the subdirectories, each will land up in a different resource group, and thus resource group contention between nodes will be kept to a minimum. Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
-rw-r--r--fs/gfs2/aops.c2
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/file.c4
-rw-r--r--fs/gfs2/inode.c17
-rw-r--r--fs/gfs2/quota.c4
-rw-r--r--fs/gfs2/rgrp.c19
-rw-r--r--fs/gfs2/rgrp.h3
-rw-r--r--fs/gfs2/xattr.c2
8 files changed, 38 insertions, 15 deletions
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 01c4975da4b..30de4f2a2ea 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -643,7 +643,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
643 goto out_unlock; 643 goto out_unlock;
644 644
645 requested = data_blocks + ind_blocks; 645 requested = data_blocks + ind_blocks;
646 error = gfs2_inplace_reserve(ip, requested); 646 error = gfs2_inplace_reserve(ip, requested, 0);
647 if (error) 647 if (error)
648 goto out_qunlock; 648 goto out_qunlock;
649 } 649 }
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 1fd3ae237bd..de70e52caf3 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1178,7 +1178,7 @@ static int do_grow(struct inode *inode, u64 size)
1178 if (error) 1178 if (error)
1179 return error; 1179 return error;
1180 1180
1181 error = gfs2_inplace_reserve(ip, 1); 1181 error = gfs2_inplace_reserve(ip, 1, 0);
1182 if (error) 1182 if (error)
1183 goto do_grow_qunlock; 1183 goto do_grow_qunlock;
1184 unstuff = 1; 1184 unstuff = 1;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index e056b4ce487..dfe2d8cb9b2 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -432,7 +432,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
432 if (ret) 432 if (ret)
433 goto out_unlock; 433 goto out_unlock;
434 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); 434 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
435 ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); 435 ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0);
436 if (ret) 436 if (ret)
437 goto out_quota_unlock; 437 goto out_quota_unlock;
438 438
@@ -825,7 +825,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
825retry: 825retry:
826 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); 826 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
827 827
828 error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); 828 error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0);
829 if (error) { 829 if (error) {
830 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { 830 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
831 bytes >>= 1; 831 bytes >>= 1;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 749b05a960e..ef3ce00bb52 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -385,13 +385,13 @@ static void munge_mode_uid_gid(const struct gfs2_inode *dip,
385 inode->i_gid = current_fsgid(); 385 inode->i_gid = current_fsgid();
386} 386}
387 387
388static int alloc_dinode(struct gfs2_inode *ip) 388static int alloc_dinode(struct gfs2_inode *ip, u32 flags)
389{ 389{
390 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 390 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
391 int error; 391 int error;
392 int dblocks = 1; 392 int dblocks = 1;
393 393
394 error = gfs2_inplace_reserve(ip, RES_DINODE); 394 error = gfs2_inplace_reserve(ip, RES_DINODE, flags);
395 if (error) 395 if (error)
396 goto out; 396 goto out;
397 397
@@ -560,7 +560,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
560 if (error) 560 if (error)
561 goto fail_quota_locks; 561 goto fail_quota_locks;
562 562
563 error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres); 563 error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0);
564 if (error) 564 if (error)
565 goto fail_quota_locks; 565 goto fail_quota_locks;
566 566
@@ -650,6 +650,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
650 struct gfs2_glock *io_gl; 650 struct gfs2_glock *io_gl;
651 int error; 651 int error;
652 struct buffer_head *bh = NULL; 652 struct buffer_head *bh = NULL;
653 u32 aflags = 0;
653 654
654 if (!name->len || name->len > GFS2_FNAMESIZE) 655 if (!name->len || name->len > GFS2_FNAMESIZE)
655 return -ENAMETOOLONG; 656 return -ENAMETOOLONG;
@@ -685,7 +686,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
685 munge_mode_uid_gid(dip, inode); 686 munge_mode_uid_gid(dip, inode);
686 ip->i_goal = dip->i_goal; 687 ip->i_goal = dip->i_goal;
687 688
688 error = alloc_dinode(ip); 689 if ((GFS2_I(sdp->sd_root_dir->d_inode) == dip) ||
690 (dip->i_diskflags & GFS2_DIF_TOPDIR))
691 aflags |= GFS2_AF_ORLOV;
692
693 error = alloc_dinode(ip, aflags);
689 if (error) 694 if (error)
690 goto fail_free_inode; 695 goto fail_free_inode;
691 696
@@ -897,7 +902,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
897 if (error) 902 if (error)
898 goto out_gunlock; 903 goto out_gunlock;
899 904
900 error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres); 905 error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0);
901 if (error) 906 if (error)
902 goto out_gunlock_q; 907 goto out_gunlock_q;
903 908
@@ -1378,7 +1383,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1378 if (error) 1383 if (error)
1379 goto out_gunlock; 1384 goto out_gunlock;
1380 1385
1381 error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres); 1386 error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres, 0);
1382 if (error) 1387 if (error)
1383 goto out_gunlock_q; 1388 goto out_gunlock_q;
1384 1389
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index c5af8e18f27..6bbf64f0f5b 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -816,7 +816,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
816 blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; 816 blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3;
817 817
818 reserved = 1 + (nalloc * (data_blocks + ind_blocks)); 818 reserved = 1 + (nalloc * (data_blocks + ind_blocks));
819 error = gfs2_inplace_reserve(ip, reserved); 819 error = gfs2_inplace_reserve(ip, reserved, 0);
820 if (error) 820 if (error)
821 goto out_alloc; 821 goto out_alloc;
822 822
@@ -1605,7 +1605,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
1605 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), 1605 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
1606 &data_blocks, &ind_blocks); 1606 &data_blocks, &ind_blocks);
1607 blocks = 1 + data_blocks + ind_blocks; 1607 blocks = 1 + data_blocks + ind_blocks;
1608 error = gfs2_inplace_reserve(ip, blocks); 1608 error = gfs2_inplace_reserve(ip, blocks, 0);
1609 if (error) 1609 if (error)
1610 goto out_i; 1610 goto out_i;
1611 blocks += gfs2_rg_blocks(ip, blocks); 1611 blocks += gfs2_rg_blocks(ip, blocks);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index bdf3e644baa..99a619788c6 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -16,6 +16,7 @@
16#include <linux/prefetch.h> 16#include <linux/prefetch.h>
17#include <linux/blkdev.h> 17#include <linux/blkdev.h>
18#include <linux/rbtree.h> 18#include <linux/rbtree.h>
19#include <linux/random.h>
19 20
20#include "gfs2.h" 21#include "gfs2.h"
21#include "incore.h" 22#include "incore.h"
@@ -1763,6 +1764,15 @@ static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
1763 return tdiff > (msecs * 1000 * 1000); 1764 return tdiff > (msecs * 1000 * 1000);
1764} 1765}
1765 1766
1767static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
1768{
1769 const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1770 u32 skip;
1771
1772 get_random_bytes(&skip, sizeof(skip));
1773 return skip % sdp->sd_rgrps;
1774}
1775
1766static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) 1776static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
1767{ 1777{
1768 struct gfs2_rgrpd *rgd = *pos; 1778 struct gfs2_rgrpd *rgd = *pos;
@@ -1784,7 +1794,7 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b
1784 * Returns: errno 1794 * Returns: errno
1785 */ 1795 */
1786 1796
1787int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) 1797int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags)
1788{ 1798{
1789 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1799 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1790 struct gfs2_rgrpd *begin = NULL; 1800 struct gfs2_rgrpd *begin = NULL;
@@ -1792,6 +1802,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
1792 int error = 0, rg_locked, flags = 0; 1802 int error = 0, rg_locked, flags = 0;
1793 u64 last_unlinked = NO_BLOCK; 1803 u64 last_unlinked = NO_BLOCK;
1794 int loops = 0; 1804 int loops = 0;
1805 u32 skip = 0;
1795 1806
1796 if (sdp->sd_args.ar_rgrplvb) 1807 if (sdp->sd_args.ar_rgrplvb)
1797 flags |= GL_SKIP; 1808 flags |= GL_SKIP;
@@ -1805,6 +1816,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
1805 } else { 1816 } else {
1806 rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); 1817 rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
1807 } 1818 }
1819 if (S_ISDIR(ip->i_inode.i_mode) && (aflags & GFS2_AF_ORLOV))
1820 skip = gfs2_orlov_skip(ip);
1808 if (rs->rs_rbm.rgd == NULL) 1821 if (rs->rs_rbm.rgd == NULL)
1809 return -EBADSLT; 1822 return -EBADSLT;
1810 1823
@@ -1813,6 +1826,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
1813 1826
1814 if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { 1827 if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
1815 rg_locked = 0; 1828 rg_locked = 0;
1829 if (skip && skip--)
1830 goto next_rgrp;
1816 if (!gfs2_rs_active(rs) && (loops < 2) && 1831 if (!gfs2_rs_active(rs) && (loops < 2) &&
1817 gfs2_rgrp_used_recently(rs, 1000) && 1832 gfs2_rgrp_used_recently(rs, 1000) &&
1818 gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) 1833 gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
@@ -1871,6 +1886,8 @@ next_rgrp:
1871 /* Find the next rgrp, and continue looking */ 1886 /* Find the next rgrp, and continue looking */
1872 if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) 1887 if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
1873 continue; 1888 continue;
1889 if (skip)
1890 continue;
1874 1891
1875 /* If we've scanned all the rgrps, but found no free blocks 1892 /* If we've scanned all the rgrps, but found no free blocks
1876 * then this checks for some less likely conditions before 1893 * then this checks for some less likely conditions before
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 24077958dcf..842185853f6 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -39,7 +39,8 @@ extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
39 39
40extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); 40extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
41 41
42extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested); 42#define GFS2_AF_ORLOV 1
43extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 flags);
43extern void gfs2_inplace_release(struct gfs2_inode *ip); 44extern void gfs2_inplace_release(struct gfs2_inode *ip);
44 45
45extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, 46extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index db330e5518c..76c144b3c9b 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -734,7 +734,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
734 if (error) 734 if (error)
735 return error; 735 return error;
736 736
737 error = gfs2_inplace_reserve(ip, blks); 737 error = gfs2_inplace_reserve(ip, blks, 0);
738 if (error) 738 if (error)
739 goto out_gunlock_q; 739 goto out_gunlock_q;
740 740