aboutsummaryrefslogtreecommitdiffstats
path: root/fs/gfs2
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-15 15:34:21 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-15 15:34:21 -0500
commit08242bc2210938761230f79c5288dbcf72e94808 (patch)
treef0e880673fb5139c692bd4004e9f165c249613ed /fs/gfs2
parentbe354f40812314dee2b1e3aa272528c056bb827d (diff)
parent1e2d9d44f3ceb7dac7cb14d2476d0a8128c8e169 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-nmw
Pull GFS2 updates from Steven Whitehouse: "The main feature this time is the new Orlov allocator and the patches leading up to it which allow us to allocate new inodes from their own allocation context, rather than borrowing that of their parent directory. It is this change which then allows us to choose a different location for subdirectories when required. This works exactly as per the ext3 implementation from the users point of view. In addition to that, we've got a speed up in gfs2_rbm_from_block() from Bob Peterson, three locking related improvements from Dave Teigland plus a selection of smaller bug fixes and clean ups." * git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-nmw: GFS2: Set gl_object during inode create GFS2: add error check while allocating new inodes GFS2: don't reference inode's glock during block allocation trace GFS2: remove redundant lvb pointer GFS2: only use lvb on glocks that need it GFS2: skip dlm_unlock calls in unmount GFS2: Fix one RG corner case GFS2: Eliminate redundant buffer_head manipulation in gfs2_unlink_inode GFS2: Use dirty_inode in gfs2_dir_add GFS2: Fix truncation of journaled data files GFS2: Add Orlov allocator GFS2: Use proper allocation context for new inodes GFS2: Add test for resource group congestion status GFS2: Rename glops go_xmote_th to go_sync GFS2: Speed up gfs2_rbm_from_block GFS2: Review bug traps in glops.c
Diffstat (limited to 'fs/gfs2')
-rw-r--r--fs/gfs2/aops.c2
-rw-r--r--fs/gfs2/bmap.c54
-rw-r--r--fs/gfs2/dir.c7
-rw-r--r--fs/gfs2/file.c4
-rw-r--r--fs/gfs2/glock.c40
-rw-r--r--fs/gfs2/glock.h54
-rw-r--r--fs/gfs2/glops.c19
-rw-r--r--fs/gfs2/incore.h6
-rw-r--r--fs/gfs2/inode.c209
-rw-r--r--fs/gfs2/lock_dlm.c20
-rw-r--r--fs/gfs2/ops_fstype.c3
-rw-r--r--fs/gfs2/quota.c10
-rw-r--r--fs/gfs2/rgrp.c139
-rw-r--r--fs/gfs2/rgrp.h3
-rw-r--r--fs/gfs2/trace_gfs2.h2
-rw-r--r--fs/gfs2/xattr.c2
16 files changed, 380 insertions, 194 deletions
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 01c4975da4bc..30de4f2a2ea9 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -643,7 +643,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
643 goto out_unlock; 643 goto out_unlock;
644 644
645 requested = data_blocks + ind_blocks; 645 requested = data_blocks + ind_blocks;
646 error = gfs2_inplace_reserve(ip, requested); 646 error = gfs2_inplace_reserve(ip, requested, 0);
647 if (error) 647 if (error)
648 goto out_qunlock; 648 goto out_qunlock;
649 } 649 }
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 1fd3ae237bdd..a68e91bcef3d 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -991,6 +991,41 @@ unlock:
991 return err; 991 return err;
992} 992}
993 993
994/**
995 * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
996 * @inode: The inode being truncated
997 * @oldsize: The original (larger) size
998 * @newsize: The new smaller size
999 *
1000 * With jdata files, we have to journal a revoke for each block which is
1001 * truncated. As a result, we need to split this into separate transactions
1002 * if the number of pages being truncated gets too large.
1003 */
1004
1005#define GFS2_JTRUNC_REVOKES 8192
1006
1007static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
1008{
1009 struct gfs2_sbd *sdp = GFS2_SB(inode);
1010 u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1011 u64 chunk;
1012 int error;
1013
1014 while (oldsize != newsize) {
1015 chunk = oldsize - newsize;
1016 if (chunk > max_chunk)
1017 chunk = max_chunk;
1018 truncate_pagecache(inode, oldsize, oldsize - chunk);
1019 oldsize -= chunk;
1020 gfs2_trans_end(sdp);
1021 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1022 if (error)
1023 return error;
1024 }
1025
1026 return 0;
1027}
1028
994static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) 1029static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize)
995{ 1030{
996 struct gfs2_inode *ip = GFS2_I(inode); 1031 struct gfs2_inode *ip = GFS2_I(inode);
@@ -1000,8 +1035,10 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize)
1000 int journaled = gfs2_is_jdata(ip); 1035 int journaled = gfs2_is_jdata(ip);
1001 int error; 1036 int error;
1002 1037
1003 error = gfs2_trans_begin(sdp, 1038 if (journaled)
1004 RES_DINODE + (journaled ? RES_JDATA : 0), 0); 1039 error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1040 else
1041 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1005 if (error) 1042 if (error)
1006 return error; 1043 return error;
1007 1044
@@ -1026,7 +1063,16 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize)
1026 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1063 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1027 gfs2_dinode_out(ip, dibh->b_data); 1064 gfs2_dinode_out(ip, dibh->b_data);
1028 1065
1029 truncate_pagecache(inode, oldsize, newsize); 1066 if (journaled)
1067 error = gfs2_journaled_truncate(inode, oldsize, newsize);
1068 else
1069 truncate_pagecache(inode, oldsize, newsize);
1070
1071 if (error) {
1072 brelse(dibh);
1073 return error;
1074 }
1075
1030out_brelse: 1076out_brelse:
1031 brelse(dibh); 1077 brelse(dibh);
1032out: 1078out:
@@ -1178,7 +1224,7 @@ static int do_grow(struct inode *inode, u64 size)
1178 if (error) 1224 if (error)
1179 return error; 1225 return error;
1180 1226
1181 error = gfs2_inplace_reserve(ip, 1); 1227 error = gfs2_inplace_reserve(ip, 1, 0);
1182 if (error) 1228 if (error)
1183 goto do_grow_qunlock; 1229 goto do_grow_qunlock;
1184 unstuff = 1; 1230 unstuff = 1;
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 259b088cfc4c..9a35670fdc38 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1676,16 +1676,11 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1676 be16_add_cpu(&leaf->lf_entries, 1); 1676 be16_add_cpu(&leaf->lf_entries, 1);
1677 } 1677 }
1678 brelse(bh); 1678 brelse(bh);
1679 error = gfs2_meta_inode_buffer(ip, &bh);
1680 if (error)
1681 break;
1682 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1683 ip->i_entries++; 1679 ip->i_entries++;
1684 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1680 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1685 if (S_ISDIR(nip->i_inode.i_mode)) 1681 if (S_ISDIR(nip->i_inode.i_mode))
1686 inc_nlink(&ip->i_inode); 1682 inc_nlink(&ip->i_inode);
1687 gfs2_dinode_out(ip, bh->b_data); 1683 mark_inode_dirty(inode);
1688 brelse(bh);
1689 error = 0; 1684 error = 0;
1690 break; 1685 break;
1691 } 1686 }
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index e056b4ce4877..dfe2d8cb9b2c 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -432,7 +432,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
432 if (ret) 432 if (ret)
433 goto out_unlock; 433 goto out_unlock;
434 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); 434 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
435 ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); 435 ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0);
436 if (ret) 436 if (ret)
437 goto out_quota_unlock; 437 goto out_quota_unlock;
438 438
@@ -825,7 +825,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
825retry: 825retry:
826 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); 826 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
827 827
828 error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); 828 error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0);
829 if (error) { 829 if (error) {
830 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { 830 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
831 bytes >>= 1; 831 bytes >>= 1;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 0f22d09f358d..992c5c0cb504 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -55,8 +55,6 @@ struct gfs2_glock_iter {
55 55
56typedef void (*glock_examiner) (struct gfs2_glock * gl); 56typedef void (*glock_examiner) (struct gfs2_glock * gl);
57 57
58static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl);
59#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0)
60static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); 58static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
61 59
62static struct dentry *gfs2_root; 60static struct dentry *gfs2_root;
@@ -107,10 +105,12 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu)
107{ 105{
108 struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); 106 struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
109 107
110 if (gl->gl_ops->go_flags & GLOF_ASPACE) 108 if (gl->gl_ops->go_flags & GLOF_ASPACE) {
111 kmem_cache_free(gfs2_glock_aspace_cachep, gl); 109 kmem_cache_free(gfs2_glock_aspace_cachep, gl);
112 else 110 } else {
111 kfree(gl->gl_lksb.sb_lvbptr);
113 kmem_cache_free(gfs2_glock_cachep, gl); 112 kmem_cache_free(gfs2_glock_cachep, gl);
113 }
114} 114}
115 115
116void gfs2_glock_free(struct gfs2_glock *gl) 116void gfs2_glock_free(struct gfs2_glock *gl)
@@ -537,8 +537,8 @@ __acquires(&gl->gl_spin)
537 (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB))) 537 (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)))
538 clear_bit(GLF_BLOCKING, &gl->gl_flags); 538 clear_bit(GLF_BLOCKING, &gl->gl_flags);
539 spin_unlock(&gl->gl_spin); 539 spin_unlock(&gl->gl_spin);
540 if (glops->go_xmote_th) 540 if (glops->go_sync)
541 glops->go_xmote_th(gl); 541 glops->go_sync(gl);
542 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) 542 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
543 glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); 543 glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA);
544 clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); 544 clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
@@ -547,7 +547,10 @@ __acquires(&gl->gl_spin)
547 if (sdp->sd_lockstruct.ls_ops->lm_lock) { 547 if (sdp->sd_lockstruct.ls_ops->lm_lock) {
548 /* lock_dlm */ 548 /* lock_dlm */
549 ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); 549 ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
550 GLOCK_BUG_ON(gl, ret); 550 if (ret) {
551 printk(KERN_ERR "GFS2: lm_lock ret %d\n", ret);
552 GLOCK_BUG_ON(gl, 1);
553 }
551 } else { /* lock_nolock */ 554 } else { /* lock_nolock */
552 finish_xmote(gl, target); 555 finish_xmote(gl, target);
553 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) 556 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
@@ -736,6 +739,16 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
736 if (!gl) 739 if (!gl)
737 return -ENOMEM; 740 return -ENOMEM;
738 741
742 memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
743
744 if (glops->go_flags & GLOF_LVB) {
745 gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_KERNEL);
746 if (!gl->gl_lksb.sb_lvbptr) {
747 kmem_cache_free(cachep, gl);
748 return -ENOMEM;
749 }
750 }
751
739 atomic_inc(&sdp->sd_glock_disposal); 752 atomic_inc(&sdp->sd_glock_disposal);
740 gl->gl_sbd = sdp; 753 gl->gl_sbd = sdp;
741 gl->gl_flags = 0; 754 gl->gl_flags = 0;
@@ -753,9 +766,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
753 preempt_enable(); 766 preempt_enable();
754 gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0; 767 gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0;
755 gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0; 768 gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0;
756 memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
757 memset(gl->gl_lvb, 0, 32 * sizeof(char));
758 gl->gl_lksb.sb_lvbptr = gl->gl_lvb;
759 gl->gl_tchange = jiffies; 769 gl->gl_tchange = jiffies;
760 gl->gl_object = NULL; 770 gl->gl_object = NULL;
761 gl->gl_hold_time = GL_GLOCK_DFT_HOLD; 771 gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
@@ -777,6 +787,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
777 tmp = search_bucket(hash, sdp, &name); 787 tmp = search_bucket(hash, sdp, &name);
778 if (tmp) { 788 if (tmp) {
779 spin_unlock_bucket(hash); 789 spin_unlock_bucket(hash);
790 kfree(gl->gl_lksb.sb_lvbptr);
780 kmem_cache_free(cachep, gl); 791 kmem_cache_free(cachep, gl);
781 atomic_dec(&sdp->sd_glock_disposal); 792 atomic_dec(&sdp->sd_glock_disposal);
782 gl = tmp; 793 gl = tmp;
@@ -1013,7 +1024,7 @@ trap_recursive:
1013 printk(KERN_ERR "pid: %d\n", pid_nr(gh->gh_owner_pid)); 1024 printk(KERN_ERR "pid: %d\n", pid_nr(gh->gh_owner_pid));
1014 printk(KERN_ERR "lock type: %d req lock state : %d\n", 1025 printk(KERN_ERR "lock type: %d req lock state : %d\n",
1015 gh->gh_gl->gl_name.ln_type, gh->gh_state); 1026 gh->gh_gl->gl_name.ln_type, gh->gh_state);
1016 __dump_glock(NULL, gl); 1027 gfs2_dump_glock(NULL, gl);
1017 BUG(); 1028 BUG();
1018} 1029}
1019 1030
@@ -1508,7 +1519,7 @@ static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
1508{ 1519{
1509 int ret; 1520 int ret;
1510 spin_lock(&gl->gl_spin); 1521 spin_lock(&gl->gl_spin);
1511 ret = __dump_glock(seq, gl); 1522 ret = gfs2_dump_glock(seq, gl);
1512 spin_unlock(&gl->gl_spin); 1523 spin_unlock(&gl->gl_spin);
1513 return ret; 1524 return ret;
1514} 1525}
@@ -1528,6 +1539,7 @@ static void dump_glock_func(struct gfs2_glock *gl)
1528 1539
1529void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) 1540void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
1530{ 1541{
1542 set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
1531 glock_hash_walk(clear_glock, sdp); 1543 glock_hash_walk(clear_glock, sdp);
1532 flush_workqueue(glock_workqueue); 1544 flush_workqueue(glock_workqueue);
1533 wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); 1545 wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
@@ -1655,7 +1667,7 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
1655} 1667}
1656 1668
1657/** 1669/**
1658 * __dump_glock - print information about a glock 1670 * gfs2_dump_glock - print information about a glock
1659 * @seq: The seq_file struct 1671 * @seq: The seq_file struct
1660 * @gl: the glock 1672 * @gl: the glock
1661 * 1673 *
@@ -1672,7 +1684,7 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
1672 * Returns: 0 on success, -ENOBUFS when we run out of space 1684 * Returns: 0 on success, -ENOBUFS when we run out of space
1673 */ 1685 */
1674 1686
1675static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) 1687int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
1676{ 1688{
1677 const struct gfs2_glock_operations *glops = gl->gl_ops; 1689 const struct gfs2_glock_operations *glops = gl->gl_ops;
1678 unsigned long long dtime; 1690 unsigned long long dtime;
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 307ac31df781..fd580b7861d5 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -178,33 +178,33 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl)
178 return NULL; 178 return NULL;
179} 179}
180 180
181int gfs2_glock_get(struct gfs2_sbd *sdp, 181extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
182 u64 number, const struct gfs2_glock_operations *glops, 182 const struct gfs2_glock_operations *glops,
183 int create, struct gfs2_glock **glp); 183 int create, struct gfs2_glock **glp);
184void gfs2_glock_hold(struct gfs2_glock *gl); 184extern void gfs2_glock_hold(struct gfs2_glock *gl);
185void gfs2_glock_put_nolock(struct gfs2_glock *gl); 185extern void gfs2_glock_put_nolock(struct gfs2_glock *gl);
186void gfs2_glock_put(struct gfs2_glock *gl); 186extern void gfs2_glock_put(struct gfs2_glock *gl);
187void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, 187extern void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state,
188 struct gfs2_holder *gh); 188 unsigned flags, struct gfs2_holder *gh);
189void gfs2_holder_reinit(unsigned int state, unsigned flags, 189extern void gfs2_holder_reinit(unsigned int state, unsigned flags,
190 struct gfs2_holder *gh); 190 struct gfs2_holder *gh);
191void gfs2_holder_uninit(struct gfs2_holder *gh); 191extern void gfs2_holder_uninit(struct gfs2_holder *gh);
192int gfs2_glock_nq(struct gfs2_holder *gh); 192extern int gfs2_glock_nq(struct gfs2_holder *gh);
193int gfs2_glock_poll(struct gfs2_holder *gh); 193extern int gfs2_glock_poll(struct gfs2_holder *gh);
194int gfs2_glock_wait(struct gfs2_holder *gh); 194extern int gfs2_glock_wait(struct gfs2_holder *gh);
195void gfs2_glock_dq(struct gfs2_holder *gh); 195extern void gfs2_glock_dq(struct gfs2_holder *gh);
196void gfs2_glock_dq_wait(struct gfs2_holder *gh); 196extern void gfs2_glock_dq_wait(struct gfs2_holder *gh);
197 197extern void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
198void gfs2_glock_dq_uninit(struct gfs2_holder *gh); 198extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
199int gfs2_glock_nq_num(struct gfs2_sbd *sdp, 199 const struct gfs2_glock_operations *glops,
200 u64 number, const struct gfs2_glock_operations *glops, 200 unsigned int state, int flags,
201 unsigned int state, int flags, struct gfs2_holder *gh); 201 struct gfs2_holder *gh);
202 202extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
203int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); 203extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
204void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); 204extern void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
205void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); 205extern int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl);
206 206#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { gfs2_dump_glock(NULL, gl); BUG(); } } while(0)
207__printf(2, 3) 207extern __printf(2, 3)
208void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); 208void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
209 209
210/** 210/**
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 32cc4fde975c..78d4184ffc7d 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -74,7 +74,7 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
74 74
75 gfs2_trans_add_revoke(sdp, bd); 75 gfs2_trans_add_revoke(sdp, bd);
76 } 76 }
77 BUG_ON(!fsync && atomic_read(&gl->gl_ail_count)); 77 GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count));
78 spin_unlock(&sdp->sd_ail_lock); 78 spin_unlock(&sdp->sd_ail_lock);
79 gfs2_log_unlock(sdp); 79 gfs2_log_unlock(sdp);
80} 80}
@@ -96,7 +96,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
96 tr.tr_ip = (unsigned long)__builtin_return_address(0); 96 tr.tr_ip = (unsigned long)__builtin_return_address(0);
97 sb_start_intwrite(sdp->sd_vfs); 97 sb_start_intwrite(sdp->sd_vfs);
98 gfs2_log_reserve(sdp, tr.tr_reserved); 98 gfs2_log_reserve(sdp, tr.tr_reserved);
99 BUG_ON(current->journal_info); 99 WARN_ON_ONCE(current->journal_info);
100 current->journal_info = &tr; 100 current->journal_info = &tr;
101 101
102 __gfs2_ail_flush(gl, 0); 102 __gfs2_ail_flush(gl, 0);
@@ -139,7 +139,7 @@ static void rgrp_go_sync(struct gfs2_glock *gl)
139 139
140 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) 140 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
141 return; 141 return;
142 BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE); 142 GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE);
143 143
144 gfs2_log_flush(gl->gl_sbd, gl); 144 gfs2_log_flush(gl->gl_sbd, gl);
145 filemap_fdatawrite(metamapping); 145 filemap_fdatawrite(metamapping);
@@ -168,7 +168,7 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
168{ 168{
169 struct address_space *mapping = gfs2_glock2aspace(gl); 169 struct address_space *mapping = gfs2_glock2aspace(gl);
170 170
171 BUG_ON(!(flags & DIO_METADATA)); 171 WARN_ON_ONCE(!(flags & DIO_METADATA));
172 gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); 172 gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count));
173 truncate_inode_pages(mapping, 0); 173 truncate_inode_pages(mapping, 0);
174 174
@@ -197,7 +197,7 @@ static void inode_go_sync(struct gfs2_glock *gl)
197 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) 197 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
198 return; 198 return;
199 199
200 BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE); 200 GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE);
201 201
202 gfs2_log_flush(gl->gl_sbd, gl); 202 gfs2_log_flush(gl->gl_sbd, gl);
203 filemap_fdatawrite(metamapping); 203 filemap_fdatawrite(metamapping);
@@ -536,7 +536,7 @@ const struct gfs2_glock_operations gfs2_meta_glops = {
536}; 536};
537 537
538const struct gfs2_glock_operations gfs2_inode_glops = { 538const struct gfs2_glock_operations gfs2_inode_glops = {
539 .go_xmote_th = inode_go_sync, 539 .go_sync = inode_go_sync,
540 .go_inval = inode_go_inval, 540 .go_inval = inode_go_inval,
541 .go_demote_ok = inode_go_demote_ok, 541 .go_demote_ok = inode_go_demote_ok,
542 .go_lock = inode_go_lock, 542 .go_lock = inode_go_lock,
@@ -546,17 +546,17 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
546}; 546};
547 547
548const struct gfs2_glock_operations gfs2_rgrp_glops = { 548const struct gfs2_glock_operations gfs2_rgrp_glops = {
549 .go_xmote_th = rgrp_go_sync, 549 .go_sync = rgrp_go_sync,
550 .go_inval = rgrp_go_inval, 550 .go_inval = rgrp_go_inval,
551 .go_lock = gfs2_rgrp_go_lock, 551 .go_lock = gfs2_rgrp_go_lock,
552 .go_unlock = gfs2_rgrp_go_unlock, 552 .go_unlock = gfs2_rgrp_go_unlock,
553 .go_dump = gfs2_rgrp_dump, 553 .go_dump = gfs2_rgrp_dump,
554 .go_type = LM_TYPE_RGRP, 554 .go_type = LM_TYPE_RGRP,
555 .go_flags = GLOF_ASPACE, 555 .go_flags = GLOF_ASPACE | GLOF_LVB,
556}; 556};
557 557
558const struct gfs2_glock_operations gfs2_trans_glops = { 558const struct gfs2_glock_operations gfs2_trans_glops = {
559 .go_xmote_th = trans_go_sync, 559 .go_sync = trans_go_sync,
560 .go_xmote_bh = trans_go_xmote_bh, 560 .go_xmote_bh = trans_go_xmote_bh,
561 .go_demote_ok = trans_go_demote_ok, 561 .go_demote_ok = trans_go_demote_ok,
562 .go_type = LM_TYPE_NONDISK, 562 .go_type = LM_TYPE_NONDISK,
@@ -577,6 +577,7 @@ const struct gfs2_glock_operations gfs2_nondisk_glops = {
577 577
578const struct gfs2_glock_operations gfs2_quota_glops = { 578const struct gfs2_glock_operations gfs2_quota_glops = {
579 .go_type = LM_TYPE_QUOTA, 579 .go_type = LM_TYPE_QUOTA,
580 .go_flags = GLOF_LVB,
580}; 581};
581 582
582const struct gfs2_glock_operations gfs2_journal_glops = { 583const struct gfs2_glock_operations gfs2_journal_glops = {
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 3d469d37345e..c373a24fedd9 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -205,7 +205,7 @@ struct lm_lockname {
205 205
206 206
207struct gfs2_glock_operations { 207struct gfs2_glock_operations {
208 void (*go_xmote_th) (struct gfs2_glock *gl); 208 void (*go_sync) (struct gfs2_glock *gl);
209 int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh); 209 int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh);
210 void (*go_inval) (struct gfs2_glock *gl, int flags); 210 void (*go_inval) (struct gfs2_glock *gl, int flags);
211 int (*go_demote_ok) (const struct gfs2_glock *gl); 211 int (*go_demote_ok) (const struct gfs2_glock *gl);
@@ -216,6 +216,7 @@ struct gfs2_glock_operations {
216 const int go_type; 216 const int go_type;
217 const unsigned long go_flags; 217 const unsigned long go_flags;
218#define GLOF_ASPACE 1 218#define GLOF_ASPACE 1
219#define GLOF_LVB 2
219}; 220};
220 221
221enum { 222enum {
@@ -321,7 +322,6 @@ struct gfs2_glock {
321 ktime_t gl_dstamp; 322 ktime_t gl_dstamp;
322 struct gfs2_lkstats gl_stats; 323 struct gfs2_lkstats gl_stats;
323 struct dlm_lksb gl_lksb; 324 struct dlm_lksb gl_lksb;
324 char gl_lvb[32];
325 unsigned long gl_tchange; 325 unsigned long gl_tchange;
326 void *gl_object; 326 void *gl_object;
327 327
@@ -539,6 +539,7 @@ enum {
539 SDF_DEMOTE = 5, 539 SDF_DEMOTE = 5,
540 SDF_NOJOURNALID = 6, 540 SDF_NOJOURNALID = 6,
541 SDF_RORECOVERY = 7, /* read only recovery */ 541 SDF_RORECOVERY = 7, /* read only recovery */
542 SDF_SKIP_DLM_UNLOCK = 8,
542}; 543};
543 544
544#define GFS2_FSNAME_LEN 256 545#define GFS2_FSNAME_LEN 256
@@ -621,6 +622,7 @@ struct gfs2_sbd {
621 u32 sd_hash_bsize_shift; 622 u32 sd_hash_bsize_shift;
622 u32 sd_hash_ptrs; /* Number of pointers in a hash block */ 623 u32 sd_hash_ptrs; /* Number of pointers in a hash block */
623 u32 sd_qc_per_block; 624 u32 sd_qc_per_block;
625 u32 sd_blocks_per_bitmap;
624 u32 sd_max_dirres; /* Max blocks needed to add a directory entry */ 626 u32 sd_max_dirres; /* Max blocks needed to add a directory entry */
625 u32 sd_max_height; /* Max height of a file's metadata tree */ 627 u32 sd_max_height; /* Max height of a file's metadata tree */
626 u64 sd_heightsize[GFS2_MAX_META_HEIGHT + 1]; 628 u64 sd_heightsize[GFS2_MAX_META_HEIGHT + 1];
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 381893ceefa4..2b6f5698ef18 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -364,34 +364,34 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
364 return 0; 364 return 0;
365} 365}
366 366
367static void munge_mode_uid_gid(struct gfs2_inode *dip, umode_t *mode, 367static void munge_mode_uid_gid(const struct gfs2_inode *dip,
368 unsigned int *uid, unsigned int *gid) 368 struct inode *inode)
369{ 369{
370 if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && 370 if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir &&
371 (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) { 371 (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) {
372 if (S_ISDIR(*mode)) 372 if (S_ISDIR(inode->i_mode))
373 *mode |= S_ISUID; 373 inode->i_mode |= S_ISUID;
374 else if (dip->i_inode.i_uid != current_fsuid()) 374 else if (dip->i_inode.i_uid != current_fsuid())
375 *mode &= ~07111; 375 inode->i_mode &= ~07111;
376 *uid = dip->i_inode.i_uid; 376 inode->i_uid = dip->i_inode.i_uid;
377 } else 377 } else
378 *uid = current_fsuid(); 378 inode->i_uid = current_fsuid();
379 379
380 if (dip->i_inode.i_mode & S_ISGID) { 380 if (dip->i_inode.i_mode & S_ISGID) {
381 if (S_ISDIR(*mode)) 381 if (S_ISDIR(inode->i_mode))
382 *mode |= S_ISGID; 382 inode->i_mode |= S_ISGID;
383 *gid = dip->i_inode.i_gid; 383 inode->i_gid = dip->i_inode.i_gid;
384 } else 384 } else
385 *gid = current_fsgid(); 385 inode->i_gid = current_fsgid();
386} 386}
387 387
388static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) 388static int alloc_dinode(struct gfs2_inode *ip, u32 flags)
389{ 389{
390 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 390 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
391 int error; 391 int error;
392 int dblocks = 1; 392 int dblocks = 1;
393 393
394 error = gfs2_inplace_reserve(dip, RES_DINODE); 394 error = gfs2_inplace_reserve(ip, RES_DINODE, flags);
395 if (error) 395 if (error)
396 goto out; 396 goto out;
397 397
@@ -399,12 +399,15 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
399 if (error) 399 if (error)
400 goto out_ipreserv; 400 goto out_ipreserv;
401 401
402 error = gfs2_alloc_blocks(dip, no_addr, &dblocks, 1, generation); 402 error = gfs2_alloc_blocks(ip, &ip->i_no_addr, &dblocks, 1, &ip->i_generation);
403 ip->i_no_formal_ino = ip->i_generation;
404 ip->i_inode.i_ino = ip->i_no_addr;
405 ip->i_goal = ip->i_no_addr;
403 406
404 gfs2_trans_end(sdp); 407 gfs2_trans_end(sdp);
405 408
406out_ipreserv: 409out_ipreserv:
407 gfs2_inplace_release(dip); 410 gfs2_inplace_release(ip);
408out: 411out:
409 return error; 412 return error;
410} 413}
@@ -429,52 +432,42 @@ static void gfs2_init_dir(struct buffer_head *dibh,
429/** 432/**
430 * init_dinode - Fill in a new dinode structure 433 * init_dinode - Fill in a new dinode structure
431 * @dip: The directory this inode is being created in 434 * @dip: The directory this inode is being created in
432 * @gl: The glock covering the new inode 435 * @ip: The inode
433 * @inum: The inode number
434 * @mode: The file permissions
435 * @uid: The uid of the new inode
436 * @gid: The gid of the new inode
437 * @generation: The generation number of the new inode
438 * @dev: The device number (if a device node)
439 * @symname: The symlink destination (if a symlink) 436 * @symname: The symlink destination (if a symlink)
440 * @size: The inode size (ignored for directories)
441 * @bhp: The buffer head (returned to caller) 437 * @bhp: The buffer head (returned to caller)
442 * 438 *
443 */ 439 */
444 440
445static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 441static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
446 const struct gfs2_inum_host *inum, umode_t mode, 442 const char *symname, struct buffer_head **bhp)
447 unsigned int uid, unsigned int gid,
448 const u64 *generation, dev_t dev, const char *symname,
449 unsigned size, struct buffer_head **bhp)
450{ 443{
451 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 444 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
452 struct gfs2_dinode *di; 445 struct gfs2_dinode *di;
453 struct buffer_head *dibh; 446 struct buffer_head *dibh;
454 struct timespec tv = CURRENT_TIME; 447 struct timespec tv = CURRENT_TIME;
455 448
456 dibh = gfs2_meta_new(gl, inum->no_addr); 449 dibh = gfs2_meta_new(ip->i_gl, ip->i_no_addr);
457 gfs2_trans_add_bh(gl, dibh, 1); 450 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
458 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI); 451 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI);
459 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 452 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
460 di = (struct gfs2_dinode *)dibh->b_data; 453 di = (struct gfs2_dinode *)dibh->b_data;
461 454
462 di->di_num.no_formal_ino = cpu_to_be64(inum->no_formal_ino); 455 di->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
463 di->di_num.no_addr = cpu_to_be64(inum->no_addr); 456 di->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
464 di->di_mode = cpu_to_be32(mode); 457 di->di_mode = cpu_to_be32(ip->i_inode.i_mode);
465 di->di_uid = cpu_to_be32(uid); 458 di->di_uid = cpu_to_be32(ip->i_inode.i_uid);
466 di->di_gid = cpu_to_be32(gid); 459 di->di_gid = cpu_to_be32(ip->i_inode.i_gid);
467 di->di_nlink = 0; 460 di->di_nlink = 0;
468 di->di_size = cpu_to_be64(size); 461 di->di_size = cpu_to_be64(ip->i_inode.i_size);
469 di->di_blocks = cpu_to_be64(1); 462 di->di_blocks = cpu_to_be64(1);
470 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec); 463 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec);
471 di->di_major = cpu_to_be32(MAJOR(dev)); 464 di->di_major = cpu_to_be32(MAJOR(ip->i_inode.i_rdev));
472 di->di_minor = cpu_to_be32(MINOR(dev)); 465 di->di_minor = cpu_to_be32(MINOR(ip->i_inode.i_rdev));
473 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); 466 di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_no_addr);
474 di->di_generation = cpu_to_be64(*generation); 467 di->di_generation = cpu_to_be64(ip->i_generation);
475 di->di_flags = 0; 468 di->di_flags = 0;
476 di->__pad1 = 0; 469 di->__pad1 = 0;
477 di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0); 470 di->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) ? GFS2_FORMAT_DE : 0);
478 di->di_height = 0; 471 di->di_height = 0;
479 di->__pad2 = 0; 472 di->__pad2 = 0;
480 di->__pad3 = 0; 473 di->__pad3 = 0;
@@ -487,7 +480,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
487 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); 480 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
488 memset(&di->di_reserved, 0, sizeof(di->di_reserved)); 481 memset(&di->di_reserved, 0, sizeof(di->di_reserved));
489 482
490 switch(mode & S_IFMT) { 483 switch(ip->i_inode.i_mode & S_IFMT) {
491 case S_IFREG: 484 case S_IFREG:
492 if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) || 485 if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) ||
493 gfs2_tune_get(sdp, gt_new_files_jdata)) 486 gfs2_tune_get(sdp, gt_new_files_jdata))
@@ -502,7 +495,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
502 gfs2_init_dir(dibh, dip); 495 gfs2_init_dir(dibh, dip);
503 break; 496 break;
504 case S_IFLNK: 497 case S_IFLNK:
505 memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, size); 498 memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, ip->i_inode.i_size);
506 break; 499 break;
507 } 500 }
508 501
@@ -511,25 +504,22 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
511 *bhp = dibh; 504 *bhp = dibh;
512} 505}
513 506
514static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 507static int make_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
515 umode_t mode, const struct gfs2_inum_host *inum, 508 const char *symname, struct buffer_head **bhp)
516 const u64 *generation, dev_t dev, const char *symname,
517 unsigned int size, struct buffer_head **bhp)
518{ 509{
510 struct inode *inode = &ip->i_inode;
519 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 511 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
520 unsigned int uid, gid;
521 int error; 512 int error;
522 513
523 munge_mode_uid_gid(dip, &mode, &uid, &gid);
524 error = gfs2_rindex_update(sdp); 514 error = gfs2_rindex_update(sdp);
525 if (error) 515 if (error)
526 return error; 516 return error;
527 517
528 error = gfs2_quota_lock(dip, uid, gid); 518 error = gfs2_quota_lock(dip, inode->i_uid, inode->i_gid);
529 if (error) 519 if (error)
530 return error; 520 return error;
531 521
532 error = gfs2_quota_check(dip, uid, gid); 522 error = gfs2_quota_check(dip, inode->i_uid, inode->i_gid);
533 if (error) 523 if (error)
534 goto out_quota; 524 goto out_quota;
535 525
@@ -537,8 +527,8 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
537 if (error) 527 if (error)
538 goto out_quota; 528 goto out_quota;
539 529
540 init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, symname, size, bhp); 530 init_dinode(dip, ip, symname, bhp);
541 gfs2_quota_change(dip, +1, uid, gid); 531 gfs2_quota_change(dip, +1, inode->i_uid, inode->i_gid);
542 gfs2_trans_end(sdp); 532 gfs2_trans_end(sdp);
543 533
544out_quota: 534out_quota:
@@ -570,7 +560,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
570 if (error) 560 if (error)
571 goto fail_quota_locks; 561 goto fail_quota_locks;
572 562
573 error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres); 563 error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0);
574 if (error) 564 if (error)
575 goto fail_quota_locks; 565 goto fail_quota_locks;
576 566
@@ -657,19 +647,14 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
657 struct inode *inode = NULL; 647 struct inode *inode = NULL;
658 struct gfs2_inode *dip = GFS2_I(dir), *ip; 648 struct gfs2_inode *dip = GFS2_I(dir), *ip;
659 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 649 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
660 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; 650 struct gfs2_glock *io_gl;
661 int error; 651 int error;
662 u64 generation;
663 struct buffer_head *bh = NULL; 652 struct buffer_head *bh = NULL;
653 u32 aflags = 0;
664 654
665 if (!name->len || name->len > GFS2_FNAMESIZE) 655 if (!name->len || name->len > GFS2_FNAMESIZE)
666 return -ENAMETOOLONG; 656 return -ENAMETOOLONG;
667 657
668 /* We need a reservation to allocate the new dinode block. The
669 directory ip temporarily points to the reservation, but this is
670 being done to get a set of contiguous blocks for the new dinode.
671 Since this is a create, we don't have a sizehint yet, so it will
672 have to use the minimum reservation size. */
673 error = gfs2_rs_alloc(dip); 658 error = gfs2_rs_alloc(dip);
674 if (error) 659 if (error)
675 return error; 660 return error;
@@ -688,45 +673,72 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
688 if (error) 673 if (error)
689 goto fail_gunlock; 674 goto fail_gunlock;
690 675
691 error = alloc_dinode(dip, &inum.no_addr, &generation); 676 inode = new_inode(sdp->sd_vfs);
677 if (!inode) {
678 gfs2_glock_dq_uninit(ghs);
679 return -ENOMEM;
680 }
681 ip = GFS2_I(inode);
682 error = gfs2_rs_alloc(ip);
692 if (error) 683 if (error)
693 goto fail_gunlock; 684 goto fail_free_inode;
694 inum.no_formal_ino = generation; 685
686 set_bit(GIF_INVALID, &ip->i_flags);
687 inode->i_mode = mode;
688 inode->i_rdev = dev;
689 inode->i_size = size;
690 munge_mode_uid_gid(dip, inode);
691 ip->i_goal = dip->i_goal;
695 692
696 error = gfs2_glock_nq_num(sdp, inum.no_addr, &gfs2_inode_glops, 693 if ((GFS2_I(sdp->sd_root_dir->d_inode) == dip) ||
697 LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); 694 (dip->i_diskflags & GFS2_DIF_TOPDIR))
695 aflags |= GFS2_AF_ORLOV;
696
697 error = alloc_dinode(ip, aflags);
698 if (error) 698 if (error)
699 goto fail_gunlock; 699 goto fail_free_inode;
700 700
701 error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, symname, size, &bh); 701 error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
702 if (error) 702 if (error)
703 goto fail_gunlock2; 703 goto fail_free_inode;
704 704
705 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr, 705 ip->i_gl->gl_object = ip;
706 inum.no_formal_ino, 0); 706 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
707 if (IS_ERR(inode)) 707 if (error)
708 goto fail_free_inode;
709
710 error = make_dinode(dip, ip, symname, &bh);
711 if (error)
708 goto fail_gunlock2; 712 goto fail_gunlock2;
709 713
710 ip = GFS2_I(inode); 714 error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
711 error = gfs2_inode_refresh(ip);
712 if (error) 715 if (error)
713 goto fail_gunlock2; 716 goto fail_gunlock2;
714 717
715 error = gfs2_rs_alloc(ip); 718 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
716 if (error) 719 if (error)
717 goto fail_gunlock2; 720 goto fail_gunlock2;
718 721
722 ip->i_iopen_gh.gh_gl->gl_object = ip;
723 gfs2_glock_put(io_gl);
724 gfs2_set_iop(inode);
725 insert_inode_hash(inode);
726
727 error = gfs2_inode_refresh(ip);
728 if (error)
729 goto fail_gunlock3;
730
719 error = gfs2_acl_create(dip, inode); 731 error = gfs2_acl_create(dip, inode);
720 if (error) 732 if (error)
721 goto fail_gunlock2; 733 goto fail_gunlock3;
722 734
723 error = gfs2_security_init(dip, ip, name); 735 error = gfs2_security_init(dip, ip, name);
724 if (error) 736 if (error)
725 goto fail_gunlock2; 737 goto fail_gunlock3;
726 738
727 error = link_dinode(dip, name, ip); 739 error = link_dinode(dip, name, ip);
728 if (error) 740 if (error)
729 goto fail_gunlock2; 741 goto fail_gunlock3;
730 742
731 if (bh) 743 if (bh)
732 brelse(bh); 744 brelse(bh);
@@ -739,8 +751,20 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
739 d_instantiate(dentry, inode); 751 d_instantiate(dentry, inode);
740 return 0; 752 return 0;
741 753
754fail_gunlock3:
755 gfs2_glock_dq_uninit(ghs + 1);
756 if (ip->i_gl)
757 gfs2_glock_put(ip->i_gl);
758 goto fail_gunlock;
759
742fail_gunlock2: 760fail_gunlock2:
743 gfs2_glock_dq_uninit(ghs + 1); 761 gfs2_glock_dq_uninit(ghs + 1);
762fail_free_inode:
763 if (ip->i_gl)
764 gfs2_glock_put(ip->i_gl);
765 gfs2_rs_delete(ip);
766 free_inode_nonrcu(inode);
767 inode = NULL;
744fail_gunlock: 768fail_gunlock:
745 gfs2_glock_dq_uninit(ghs); 769 gfs2_glock_dq_uninit(ghs);
746 if (inode && !IS_ERR(inode)) { 770 if (inode && !IS_ERR(inode)) {
@@ -748,7 +772,6 @@ fail_gunlock:
748 iput(inode); 772 iput(inode);
749 } 773 }
750fail: 774fail:
751 gfs2_rs_delete(dip);
752 if (bh) 775 if (bh)
753 brelse(bh); 776 brelse(bh);
754 return error; 777 return error;
@@ -884,7 +907,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
884 if (error) 907 if (error)
885 goto out_gunlock; 908 goto out_gunlock;
886 909
887 error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres); 910 error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0);
888 if (error) 911 if (error)
889 goto out_gunlock_q; 912 goto out_gunlock_q;
890 913
@@ -977,7 +1000,6 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
977 * gfs2_unlink_inode - Removes an inode from its parent dir and unlinks it 1000 * gfs2_unlink_inode - Removes an inode from its parent dir and unlinks it
978 * @dip: The parent directory 1001 * @dip: The parent directory
979 * @name: The name of the entry in the parent directory 1002 * @name: The name of the entry in the parent directory
980 * @bh: The inode buffer for the inode to be removed
981 * @inode: The inode to be removed 1003 * @inode: The inode to be removed
982 * 1004 *
983 * Called with all the locks and in a transaction. This will only be 1005 * Called with all the locks and in a transaction. This will only be
@@ -987,8 +1009,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
987 */ 1009 */
988 1010
989static int gfs2_unlink_inode(struct gfs2_inode *dip, 1011static int gfs2_unlink_inode(struct gfs2_inode *dip,
990 const struct dentry *dentry, 1012 const struct dentry *dentry)
991 struct buffer_head *bh)
992{ 1013{
993 struct inode *inode = dentry->d_inode; 1014 struct inode *inode = dentry->d_inode;
994 struct gfs2_inode *ip = GFS2_I(inode); 1015 struct gfs2_inode *ip = GFS2_I(inode);
@@ -1028,7 +1049,6 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
1028 struct gfs2_sbd *sdp = GFS2_SB(dir); 1049 struct gfs2_sbd *sdp = GFS2_SB(dir);
1029 struct inode *inode = dentry->d_inode; 1050 struct inode *inode = dentry->d_inode;
1030 struct gfs2_inode *ip = GFS2_I(inode); 1051 struct gfs2_inode *ip = GFS2_I(inode);
1031 struct buffer_head *bh;
1032 struct gfs2_holder ghs[3]; 1052 struct gfs2_holder ghs[3];
1033 struct gfs2_rgrpd *rgd; 1053 struct gfs2_rgrpd *rgd;
1034 int error; 1054 int error;
@@ -1077,14 +1097,9 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
1077 1097
1078 error = gfs2_trans_begin(sdp, 2*RES_DINODE + 3*RES_LEAF + RES_RG_BIT, 0); 1098 error = gfs2_trans_begin(sdp, 2*RES_DINODE + 3*RES_LEAF + RES_RG_BIT, 0);
1079 if (error) 1099 if (error)
1080 goto out_gunlock;
1081
1082 error = gfs2_meta_inode_buffer(ip, &bh);
1083 if (error)
1084 goto out_end_trans; 1100 goto out_end_trans;
1085 1101
1086 error = gfs2_unlink_inode(dip, dentry, bh); 1102 error = gfs2_unlink_inode(dip, dentry);
1087 brelse(bh);
1088 1103
1089out_end_trans: 1104out_end_trans:
1090 gfs2_trans_end(sdp); 1105 gfs2_trans_end(sdp);
@@ -1365,7 +1380,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1365 if (error) 1380 if (error)
1366 goto out_gunlock; 1381 goto out_gunlock;
1367 1382
1368 error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres); 1383 error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres, 0);
1369 if (error) 1384 if (error)
1370 goto out_gunlock_q; 1385 goto out_gunlock_q;
1371 1386
@@ -1384,14 +1399,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1384 1399
1385 /* Remove the target file, if it exists */ 1400 /* Remove the target file, if it exists */
1386 1401
1387 if (nip) { 1402 if (nip)
1388 struct buffer_head *bh; 1403 error = gfs2_unlink_inode(ndip, ndentry);
1389 error = gfs2_meta_inode_buffer(nip, &bh);
1390 if (error)
1391 goto out_end_trans;
1392 error = gfs2_unlink_inode(ndip, ndentry, bh);
1393 brelse(bh);
1394 }
1395 1404
1396 if (dir_rename) { 1405 if (dir_rename) {
1397 error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR); 1406 error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 0fb6539b0c8c..8dad6b093716 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -120,8 +120,8 @@ static void gdlm_ast(void *arg)
120 gfs2_update_reply_times(gl); 120 gfs2_update_reply_times(gl);
121 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); 121 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
122 122
123 if (gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) 123 if ((gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) && gl->gl_lksb.sb_lvbptr)
124 memset(gl->gl_lvb, 0, GDLM_LVB_SIZE); 124 memset(gl->gl_lksb.sb_lvbptr, 0, GDLM_LVB_SIZE);
125 125
126 switch (gl->gl_lksb.sb_status) { 126 switch (gl->gl_lksb.sb_status) {
127 case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ 127 case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
@@ -203,8 +203,10 @@ static int make_mode(const unsigned int lmstate)
203static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags, 203static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags,
204 const int req) 204 const int req)
205{ 205{
206 u32 lkf = DLM_LKF_VALBLK; 206 u32 lkf = 0;
207 u32 lkid = gl->gl_lksb.sb_lkid; 207
208 if (gl->gl_lksb.sb_lvbptr)
209 lkf |= DLM_LKF_VALBLK;
208 210
209 if (gfs_flags & LM_FLAG_TRY) 211 if (gfs_flags & LM_FLAG_TRY)
210 lkf |= DLM_LKF_NOQUEUE; 212 lkf |= DLM_LKF_NOQUEUE;
@@ -228,7 +230,7 @@ static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags,
228 BUG(); 230 BUG();
229 } 231 }
230 232
231 if (lkid != 0) { 233 if (gl->gl_lksb.sb_lkid != 0) {
232 lkf |= DLM_LKF_CONVERT; 234 lkf |= DLM_LKF_CONVERT;
233 if (test_bit(GLF_BLOCKING, &gl->gl_flags)) 235 if (test_bit(GLF_BLOCKING, &gl->gl_flags))
234 lkf |= DLM_LKF_QUECVT; 236 lkf |= DLM_LKF_QUECVT;
@@ -289,6 +291,14 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
289 gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT); 291 gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
290 gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); 292 gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
291 gfs2_update_request_times(gl); 293 gfs2_update_request_times(gl);
294
295 /* don't want to skip dlm_unlock writing the lvb when lock is ex */
296 if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
297 gl->gl_lksb.sb_lvbptr && (gl->gl_state != LM_ST_EXCLUSIVE)) {
298 gfs2_glock_free(gl);
299 return;
300 }
301
292 error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, 302 error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
293 NULL, gl); 303 NULL, gl);
294 if (error) { 304 if (error) {
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index e443966c8106..0e3554edb8f2 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -278,6 +278,9 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
278 sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize - 278 sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize -
279 sizeof(struct gfs2_meta_header)) / 279 sizeof(struct gfs2_meta_header)) /
280 sizeof(struct gfs2_quota_change); 280 sizeof(struct gfs2_quota_change);
281 sdp->sd_blocks_per_bitmap = (sdp->sd_sb.sb_bsize -
282 sizeof(struct gfs2_meta_header))
283 * GFS2_NBBY; /* not the rgrp bitmap, subsequent bitmaps only */
281 284
282 /* Compute maximum reservation required to add a entry to a directory */ 285 /* Compute maximum reservation required to add a entry to a directory */
283 286
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index c5af8e18f27a..ae55e248c3b7 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -816,7 +816,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
816 blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; 816 blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3;
817 817
818 reserved = 1 + (nalloc * (data_blocks + ind_blocks)); 818 reserved = 1 + (nalloc * (data_blocks + ind_blocks));
819 error = gfs2_inplace_reserve(ip, reserved); 819 error = gfs2_inplace_reserve(ip, reserved, 0);
820 if (error) 820 if (error)
821 goto out_alloc; 821 goto out_alloc;
822 822
@@ -869,7 +869,7 @@ static int update_qd(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd)
869 if (error < 0) 869 if (error < 0)
870 return error; 870 return error;
871 871
872 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; 872 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr;
873 qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC); 873 qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC);
874 qlvb->__pad = 0; 874 qlvb->__pad = 0;
875 qlvb->qb_limit = q.qu_limit; 875 qlvb->qb_limit = q.qu_limit;
@@ -893,7 +893,7 @@ restart:
893 if (error) 893 if (error)
894 return error; 894 return error;
895 895
896 qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; 896 qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr;
897 897
898 if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { 898 if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) {
899 gfs2_glock_dq_uninit(q_gh); 899 gfs2_glock_dq_uninit(q_gh);
@@ -1506,7 +1506,7 @@ static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid,
1506 if (error) 1506 if (error)
1507 goto out; 1507 goto out;
1508 1508
1509 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; 1509 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr;
1510 fdq->d_version = FS_DQUOT_VERSION; 1510 fdq->d_version = FS_DQUOT_VERSION;
1511 fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA; 1511 fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA;
1512 fdq->d_id = from_kqid(&init_user_ns, qid); 1512 fdq->d_id = from_kqid(&init_user_ns, qid);
@@ -1605,7 +1605,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
1605 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), 1605 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
1606 &data_blocks, &ind_blocks); 1606 &data_blocks, &ind_blocks);
1607 blocks = 1 + data_blocks + ind_blocks; 1607 blocks = 1 + data_blocks + ind_blocks;
1608 error = gfs2_inplace_reserve(ip, blocks); 1608 error = gfs2_inplace_reserve(ip, blocks, 0);
1609 if (error) 1609 if (error)
1610 goto out_i; 1610 goto out_i;
1611 blocks += gfs2_rg_blocks(ip, blocks); 1611 blocks += gfs2_rg_blocks(ip, blocks);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 38fe18f2f055..37ee061d899e 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -16,6 +16,7 @@
16#include <linux/prefetch.h> 16#include <linux/prefetch.h>
17#include <linux/blkdev.h> 17#include <linux/blkdev.h>
18#include <linux/rbtree.h> 18#include <linux/rbtree.h>
19#include <linux/random.h>
19 20
20#include "gfs2.h" 21#include "gfs2.h"
21#include "incore.h" 22#include "incore.h"
@@ -251,22 +252,25 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
251static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) 252static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
252{ 253{
253 u64 rblock = block - rbm->rgd->rd_data0; 254 u64 rblock = block - rbm->rgd->rd_data0;
254 u32 goal = (u32)rblock; 255 u32 x;
255 int x;
256 256
257 if (WARN_ON_ONCE(rblock > UINT_MAX)) 257 if (WARN_ON_ONCE(rblock > UINT_MAX))
258 return -EINVAL; 258 return -EINVAL;
259 if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) 259 if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
260 return -E2BIG; 260 return -E2BIG;
261 261
262 for (x = 0; x < rbm->rgd->rd_length; x++) { 262 rbm->bi = rbm->rgd->rd_bits;
263 rbm->bi = rbm->rgd->rd_bits + x; 263 rbm->offset = (u32)(rblock);
264 if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { 264 /* Check if the block is within the first block */
265 rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); 265 if (rbm->offset < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY)
266 break; 266 return 0;
267 }
268 }
269 267
268 /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */
269 rbm->offset += (sizeof(struct gfs2_rgrp) -
270 sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
271 x = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
272 rbm->offset -= x * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
273 rbm->bi += x;
270 return 0; 274 return 0;
271} 275}
272 276
@@ -875,7 +879,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
875 goto fail; 879 goto fail;
876 880
877 rgd->rd_gl->gl_object = rgd; 881 rgd->rd_gl->gl_object = rgd;
878 rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lvb; 882 rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
879 rgd->rd_flags &= ~GFS2_RDF_UPTODATE; 883 rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
880 if (rgd->rd_data > sdp->sd_max_rg_data) 884 if (rgd->rd_data > sdp->sd_max_rg_data)
881 sdp->sd_max_rg_data = rgd->rd_data; 885 sdp->sd_max_rg_data = rgd->rd_data;
@@ -1678,13 +1682,105 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
1678 return; 1682 return;
1679} 1683}
1680 1684
1685/**
1686 * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
1687 * @rgd: The rgrp in question
1688 * @loops: An indication of how picky we can be (0=very, 1=less so)
1689 *
1690 * This function uses the recently added glock statistics in order to
1691 * figure out whether a parciular resource group is suffering from
1692 * contention from multiple nodes. This is done purely on the basis
1693 * of timings, since this is the only data we have to work with and
1694 * our aim here is to reject a resource group which is highly contended
1695 * but (very important) not to do this too often in order to ensure that
1696 * we do not land up introducing fragmentation by changing resource
1697 * groups when not actually required.
1698 *
1699 * The calculation is fairly simple, we want to know whether the SRTTB
1700 * (i.e. smoothed round trip time for blocking operations) to acquire
1701 * the lock for this rgrp's glock is significantly greater than the
1702 * time taken for resource groups on average. We introduce a margin in
1703 * the form of the variable @var which is computed as the sum of the two
1704 * respective variences, and multiplied by a factor depending on @loops
1705 * and whether we have a lot of data to base the decision on. This is
1706 * then tested against the square difference of the means in order to
1707 * decide whether the result is statistically significant or not.
1708 *
1709 * Returns: A boolean verdict on the congestion status
1710 */
1711
1712static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
1713{
1714 const struct gfs2_glock *gl = rgd->rd_gl;
1715 const struct gfs2_sbd *sdp = gl->gl_sbd;
1716 struct gfs2_lkstats *st;
1717 s64 r_dcount, l_dcount;
1718 s64 r_srttb, l_srttb;
1719 s64 srttb_diff;
1720 s64 sqr_diff;
1721 s64 var;
1722
1723 preempt_disable();
1724 st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
1725 r_srttb = st->stats[GFS2_LKS_SRTTB];
1726 r_dcount = st->stats[GFS2_LKS_DCOUNT];
1727 var = st->stats[GFS2_LKS_SRTTVARB] +
1728 gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
1729 preempt_enable();
1730
1731 l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
1732 l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
1733
1734 if ((l_dcount < 1) || (r_dcount < 1) || (r_srttb == 0))
1735 return false;
1736
1737 srttb_diff = r_srttb - l_srttb;
1738 sqr_diff = srttb_diff * srttb_diff;
1739
1740 var *= 2;
1741 if (l_dcount < 8 || r_dcount < 8)
1742 var *= 2;
1743 if (loops == 1)
1744 var *= 2;
1745
1746 return ((srttb_diff < 0) && (sqr_diff > var));
1747}
1748
1749/**
1750 * gfs2_rgrp_used_recently
1751 * @rs: The block reservation with the rgrp to test
1752 * @msecs: The time limit in milliseconds
1753 *
1754 * Returns: True if the rgrp glock has been used within the time limit
1755 */
1756static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
1757 u64 msecs)
1758{
1759 u64 tdiff;
1760
1761 tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
1762 rs->rs_rbm.rgd->rd_gl->gl_dstamp));
1763
1764 return tdiff > (msecs * 1000 * 1000);
1765}
1766
1767static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
1768{
1769 const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1770 u32 skip;
1771
1772 get_random_bytes(&skip, sizeof(skip));
1773 return skip % sdp->sd_rgrps;
1774}
1775
1681static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) 1776static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
1682{ 1777{
1683 struct gfs2_rgrpd *rgd = *pos; 1778 struct gfs2_rgrpd *rgd = *pos;
1779 struct gfs2_sbd *sdp = rgd->rd_sbd;
1684 1780
1685 rgd = gfs2_rgrpd_get_next(rgd); 1781 rgd = gfs2_rgrpd_get_next(rgd);
1686 if (rgd == NULL) 1782 if (rgd == NULL)
1687 rgd = gfs2_rgrpd_get_next(NULL); 1783 rgd = gfs2_rgrpd_get_first(sdp);
1688 *pos = rgd; 1784 *pos = rgd;
1689 if (rgd != begin) /* If we didn't wrap */ 1785 if (rgd != begin) /* If we didn't wrap */
1690 return true; 1786 return true;
@@ -1699,14 +1795,15 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b
1699 * Returns: errno 1795 * Returns: errno
1700 */ 1796 */
1701 1797
1702int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) 1798int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags)
1703{ 1799{
1704 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1800 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1705 struct gfs2_rgrpd *begin = NULL; 1801 struct gfs2_rgrpd *begin = NULL;
1706 struct gfs2_blkreserv *rs = ip->i_res; 1802 struct gfs2_blkreserv *rs = ip->i_res;
1707 int error = 0, rg_locked, flags = LM_FLAG_TRY; 1803 int error = 0, rg_locked, flags = 0;
1708 u64 last_unlinked = NO_BLOCK; 1804 u64 last_unlinked = NO_BLOCK;
1709 int loops = 0; 1805 int loops = 0;
1806 u32 skip = 0;
1710 1807
1711 if (sdp->sd_args.ar_rgrplvb) 1808 if (sdp->sd_args.ar_rgrplvb)
1712 flags |= GL_SKIP; 1809 flags |= GL_SKIP;
@@ -1720,6 +1817,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
1720 } else { 1817 } else {
1721 rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); 1818 rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
1722 } 1819 }
1820 if (S_ISDIR(ip->i_inode.i_mode) && (aflags & GFS2_AF_ORLOV))
1821 skip = gfs2_orlov_skip(ip);
1723 if (rs->rs_rbm.rgd == NULL) 1822 if (rs->rs_rbm.rgd == NULL)
1724 return -EBADSLT; 1823 return -EBADSLT;
1725 1824
@@ -1728,13 +1827,20 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
1728 1827
1729 if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { 1828 if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
1730 rg_locked = 0; 1829 rg_locked = 0;
1830 if (skip && skip--)
1831 goto next_rgrp;
1832 if (!gfs2_rs_active(rs) && (loops < 2) &&
1833 gfs2_rgrp_used_recently(rs, 1000) &&
1834 gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
1835 goto next_rgrp;
1731 error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, 1836 error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
1732 LM_ST_EXCLUSIVE, flags, 1837 LM_ST_EXCLUSIVE, flags,
1733 &rs->rs_rgd_gh); 1838 &rs->rs_rgd_gh);
1734 if (error == GLR_TRYFAILED)
1735 goto next_rgrp;
1736 if (unlikely(error)) 1839 if (unlikely(error))
1737 return error; 1840 return error;
1841 if (!gfs2_rs_active(rs) && (loops < 2) &&
1842 gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
1843 goto skip_rgrp;
1738 if (sdp->sd_args.ar_rgrplvb) { 1844 if (sdp->sd_args.ar_rgrplvb) {
1739 error = update_rgrp_lvb(rs->rs_rbm.rgd); 1845 error = update_rgrp_lvb(rs->rs_rbm.rgd);
1740 if (unlikely(error)) { 1846 if (unlikely(error)) {
@@ -1781,12 +1887,13 @@ next_rgrp:
1781 /* Find the next rgrp, and continue looking */ 1887 /* Find the next rgrp, and continue looking */
1782 if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) 1888 if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
1783 continue; 1889 continue;
1890 if (skip)
1891 continue;
1784 1892
1785 /* If we've scanned all the rgrps, but found no free blocks 1893 /* If we've scanned all the rgrps, but found no free blocks
1786 * then this checks for some less likely conditions before 1894 * then this checks for some less likely conditions before
1787 * trying again. 1895 * trying again.
1788 */ 1896 */
1789 flags &= ~LM_FLAG_TRY;
1790 loops++; 1897 loops++;
1791 /* Check that fs hasn't grown if writing to rindex */ 1898 /* Check that fs hasn't grown if writing to rindex */
1792 if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { 1899 if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 24077958dcf6..842185853f6b 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -39,7 +39,8 @@ extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
39 39
40extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); 40extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
41 41
42extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested); 42#define GFS2_AF_ORLOV 1
43extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 flags);
43extern void gfs2_inplace_release(struct gfs2_inode *ip); 44extern void gfs2_inplace_release(struct gfs2_inode *ip);
44 45
45extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, 46extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index bbdc78af60ca..2ee13e841e9f 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -486,7 +486,7 @@ TRACE_EVENT(gfs2_block_alloc,
486 ), 486 ),
487 487
488 TP_fast_assign( 488 TP_fast_assign(
489 __entry->dev = ip->i_gl->gl_sbd->sd_vfs->s_dev; 489 __entry->dev = rgd->rd_gl->gl_sbd->sd_vfs->s_dev;
490 __entry->start = block; 490 __entry->start = block;
491 __entry->inum = ip->i_no_addr; 491 __entry->inum = ip->i_no_addr;
492 __entry->len = len; 492 __entry->len = len;
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index db330e5518cd..76c144b3c9bb 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -734,7 +734,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
734 if (error) 734 if (error)
735 return error; 735 return error;
736 736
737 error = gfs2_inplace_reserve(ip, blks); 737 error = gfs2_inplace_reserve(ip, blks, 0);
738 if (error) 738 if (error)
739 goto out_gunlock_q; 739 goto out_gunlock_q;
740 740