From cd81a4bac67d44742ab0aa1848f4a78e9d7e1093 Mon Sep 17 00:00:00 2001 From: Robert Peterson Date: Mon, 14 May 2007 12:42:18 -0500 Subject: [GFS2] Addendum patch 2 for gfs2_grow This addendum patch 2 corrects three things: 1. It fixes a stupid mistake in the previous addendum that broke gfs2. Ref: https://www.redhat.com/archives/cluster-devel/2007-May/msg00162.html 2. It fixes a problem that Dave Teigland pointed out regarding the external declarations in ops_address.h being in the wrong place. 3. It recasts a couple more %llu printks to (unsigned long long) as requested by Steve Whitehouse. I would have loved to put this all in one revised patch, but there was a rush to get some patches for RHEL5. Therefore, the previous patches were applied to the git tree "as is" and therefore, I'm posting another addendum. Sorry. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/gfs2/glock.c') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 1815429a2978..c66c718013ef 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1823,7 +1823,8 @@ static int dump_inode(struct glock_iter *gi, struct gfs2_inode *ip) print_dbg(gi, " Inode:\n"); print_dbg(gi, " num = %llu/%llu\n", - ip->i_num.no_formal_ino, ip->i_num.no_addr); + (unsigned long long)ip->i_num.no_formal_ino, + (unsigned long long)ip->i_num.no_addr); print_dbg(gi, " type = %u\n", IF2DT(ip->i_inode.i_mode)); print_dbg(gi, " i_flags ="); for (x = 0; x < 32; x++) @@ -1909,8 +1910,8 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) } if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { print_dbg(gi, " Demotion req to state %u (%llu uS ago)\n", - gl->gl_demote_state, - (u64)(jiffies - gl->gl_demote_time)*(1000000/HZ)); + gl->gl_demote_state, (unsigned long long) + (jiffies - gl->gl_demote_time)*(1000000/HZ)); } if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) { if (!test_bit(GLF_LOCK, &gl->gl_flags) && -- cgit v1.2.2 From dbb7cae2a36170cd17ffbe286ec0c91a998740ff Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 15 May 2007 15:37:50 +0100 Subject: [GFS2] Clean up inode number handling This patch cleans up the inode number handling code. The main difference is that instead of looking up the inodes using a struct gfs2_inum_host we now use just the no_addr member of this structure. The tests relating to no_formal_ino can then be done by the calling code. This has advantages in that we want to do different things in different code paths if the no_formal_ino doesn't match. In the NFS patch we want to return -ESTALE, but in the ->lookup() path, its a bug in the fs if the no_formal_ino doesn't match and thus we can withdraw in this case. In order to later fix bz #201012, we need to be able to look up an inode without knowing no_formal_ino, as the only information that is known to us is the on-disk location of the inode in question. This patch will also help us to fix bz #236099 at a later date by cleaning up a lot of the code in that area. There are no user visible changes as a result of this patch and there are no changes to the on-disk format either. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/gfs2/glock.c') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index c66c718013ef..b3ed58551e74 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1823,8 +1823,8 @@ static int dump_inode(struct glock_iter *gi, struct gfs2_inode *ip) print_dbg(gi, " Inode:\n"); print_dbg(gi, " num = %llu/%llu\n", - (unsigned long long)ip->i_num.no_formal_ino, - (unsigned long long)ip->i_num.no_addr); + (unsigned long long)ip->i_no_formal_ino, + (unsigned long long)ip->i_no_addr); print_dbg(gi, " type = %u\n", IF2DT(ip->i_inode.i_mode)); print_dbg(gi, " i_flags ="); for (x = 0; x < 32; x++) -- cgit v1.2.2 From d93cfa9884354dac2d8ccd894594a43e0b962b6f Mon Sep 17 00:00:00 2001 From: Abhijith Das Date: Mon, 11 Jun 2007 08:22:32 +0100 Subject: [GFS2] Fix deallocation issues There were two issues during deallocation of unlinked inodes. The first was relating to the use of a "try" lock which in the case of the inode lock wasn't trying hard enough to deallocate in all circumstances (now changed to a normal glock) and in the case of the iopen lock didn't wait for the demotion of the shared lock before attempting to get the exclusive lock, and thereby sometimes (timing dependent) not completing the deallocation when it should have done. The second issue related to the lack of a way to invalidate dcache entries on remote nodes (now fixed by this patch) which meant that unlinks were taking a long time to return disk space to the fs. By adding some code to invalidate the dcache entries across the cluster for unlinked inodes, that is now fixed. This patch was written jointly by Abhijith Das and Steven Whitehouse. Signed-off-by: Abhijith Das Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 52 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 11 deletions(-) (limited to 'fs/gfs2/glock.c') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index b3ed58551e74..384cae623ed3 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -422,11 +422,11 @@ void gfs2_holder_uninit(struct gfs2_holder *gh) static void gfs2_holder_wake(struct gfs2_holder *gh) { clear_bit(HIF_WAIT, &gh->gh_iflags); - smp_mb(); + smp_mb__after_clear_bit(); wake_up_bit(&gh->gh_iflags, HIF_WAIT); } -static int holder_wait(void *word) +static int just_schedule(void *word) { schedule(); return 0; @@ -435,7 +435,20 @@ static int holder_wait(void *word) static void wait_on_holder(struct gfs2_holder *gh) { might_sleep(); - wait_on_bit(&gh->gh_iflags, HIF_WAIT, holder_wait, TASK_UNINTERRUPTIBLE); + wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE); +} + +static void gfs2_demote_wake(struct gfs2_glock *gl) +{ + clear_bit(GLF_DEMOTE, &gl->gl_flags); + smp_mb__after_clear_bit(); + wake_up_bit(&gl->gl_flags, GLF_DEMOTE); +} + +static void wait_on_demote(struct gfs2_glock *gl) +{ + might_sleep(); + wait_on_bit(&gl->gl_flags, GLF_DEMOTE, just_schedule, TASK_UNINTERRUPTIBLE); } /** @@ -528,7 +541,7 @@ static int rq_demote(struct gfs2_glock *gl) if (gl->gl_state == gl->gl_demote_state || gl->gl_state == LM_ST_UNLOCKED) { - clear_bit(GLF_DEMOTE, &gl->gl_flags); + gfs2_demote_wake(gl); return 0; } set_bit(GLF_LOCK, &gl->gl_flags); @@ -666,12 +679,22 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl) * practise: LM_ST_SHARED and LM_ST_UNLOCKED */ -static void handle_callback(struct gfs2_glock *gl, unsigned int state) +static void handle_callback(struct gfs2_glock *gl, unsigned int state, int remote) { spin_lock(&gl->gl_spin); if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) { gl->gl_demote_state = state; gl->gl_demote_time = jiffies; + if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN && + gl->gl_object) { + struct inode *inode = igrab(gl->gl_object); + spin_unlock(&gl->gl_spin); + if (inode) { + d_prune_aliases(inode); + iput(inode); + } + return; + } } else if (gl->gl_demote_state != LM_ST_UNLOCKED) { gl->gl_demote_state = state; } @@ -740,7 +763,7 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) if (ret & LM_OUT_CANCELED) op_done = 0; else - clear_bit(GLF_DEMOTE, &gl->gl_flags); + gfs2_demote_wake(gl); } else { spin_lock(&gl->gl_spin); list_del_init(&gh->gh_list); @@ -848,7 +871,7 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) gfs2_assert_warn(sdp, !ret); state_change(gl, LM_ST_UNLOCKED); - clear_bit(GLF_DEMOTE, &gl->gl_flags); + gfs2_demote_wake(gl); if (glops->go_inval) glops->go_inval(gl, DIO_METADATA); @@ -1174,7 +1197,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh) const struct gfs2_glock_operations *glops = gl->gl_ops; if (gh->gh_flags & GL_NOCACHE) - handle_callback(gl, LM_ST_UNLOCKED); + handle_callback(gl, LM_ST_UNLOCKED, 0); gfs2_glmutex_lock(gl); @@ -1196,6 +1219,13 @@ void gfs2_glock_dq(struct gfs2_holder *gh) spin_unlock(&gl->gl_spin); } +void gfs2_glock_dq_wait(struct gfs2_holder *gh) +{ + struct gfs2_glock *gl = gh->gh_gl; + gfs2_glock_dq(gh); + wait_on_demote(gl); +} + /** * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it * @gh: the holder structure @@ -1456,7 +1486,7 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name, if (!gl) return; - handle_callback(gl, state); + handle_callback(gl, state, 1); spin_lock(&gl->gl_spin); run_queue(gl); @@ -1596,7 +1626,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp) if (gfs2_glmutex_trylock(gl)) { if (list_empty(&gl->gl_holders) && gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) - handle_callback(gl, LM_ST_UNLOCKED); + handle_callback(gl, LM_ST_UNLOCKED, 0); gfs2_glmutex_unlock(gl); } @@ -1709,7 +1739,7 @@ static void clear_glock(struct gfs2_glock *gl) if (gfs2_glmutex_trylock(gl)) { if (list_empty(&gl->gl_holders) && gl->gl_state != LM_ST_UNLOCKED) - handle_callback(gl, LM_ST_UNLOCKED); + handle_callback(gl, LM_ST_UNLOCKED, 0); gfs2_glmutex_unlock(gl); } } -- cgit v1.2.2 From eaf5bd3cac92126e5825c6ebc10bee0fba35d555 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 19 Jun 2007 15:38:17 +0100 Subject: [GFS2] Simplify multiple glock aquisition There is a bug in the code which acquires multiple glocks where if the initial out-of-order attempt fails part way though we can land up trying to acquire the wrong number of glocks. This is part of the fix for red hat bz #239737. The other part of the bz doesn't apply to upstream kernels since it was fixed by: http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=d3717bdf8f08a0e1039158c8bab2c24d20f492b6 Since the out-of-order code doesn't appear to add anything to the performance of GFS2, this patch just removed it rather than trying to fix it. It should be much easier to see whats going on here now. In addition, we don't allocate any memory unless we are using a lot of glocks (which is a relatively uncommon case). Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 64 ++++++++++++--------------------------------------------- 1 file changed, 13 insertions(+), 51 deletions(-) (limited to 'fs/gfs2/glock.c') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 384cae623ed3..3f0974e1afef 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1327,10 +1327,6 @@ static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs, * @num_gh: the number of structures * @ghs: an array of struct gfs2_holder structures * - * Figure out how big an impact this function has. Either: - * 1) Replace this code with code that calls gfs2_glock_prefetch() - * 2) Forget async stuff and just call nq_m_sync() - * 3) Leave it like it is * * Returns: 0 on success (all glocks acquired), * errno on failure (no glocks acquired) @@ -1338,62 +1334,28 @@ static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs, int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs) { - int *e; - unsigned int x; - int borked = 0, serious = 0; + struct gfs2_holder *tmp[4]; + struct gfs2_holder **pph = tmp; int error = 0; - if (!num_gh) + switch(num_gh) { + case 0: return 0; - - if (num_gh == 1) { + case 1: ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC); return gfs2_glock_nq(ghs); - } - - e = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL); - if (!e) - return -ENOMEM; - - for (x = 0; x < num_gh; x++) { - ghs[x].gh_flags |= LM_FLAG_TRY | GL_ASYNC; - error = gfs2_glock_nq(&ghs[x]); - if (error) { - borked = 1; - serious = error; - num_gh = x; + default: + if (num_gh <= 4) break; - } + pph = kmalloc(num_gh * sizeof(struct gfs2_holder *), GFP_NOFS); + if (!pph) + return -ENOMEM; } - for (x = 0; x < num_gh; x++) { - error = e[x] = glock_wait_internal(&ghs[x]); - if (error) { - borked = 1; - if (error != GLR_TRYFAILED && error != GLR_CANCELED) - serious = error; - } - } - - if (!borked) { - kfree(e); - return 0; - } - - for (x = 0; x < num_gh; x++) - if (!e[x]) - gfs2_glock_dq(&ghs[x]); - - if (serious) - error = serious; - else { - for (x = 0; x < num_gh; x++) - gfs2_holder_reinit(ghs[x].gh_state, ghs[x].gh_flags, - &ghs[x]); - error = nq_m_sync(num_gh, ghs, (struct gfs2_holder **)e); - } + error = nq_m_sync(num_gh, ghs, pph); - kfree(e); + if (pph != tmp) + kfree(pph); return error; } -- cgit v1.2.2